Beispiel #1
0
        public void Deserialize2()
        {
            var path = Path.Combine(this.ModelDirectory, "mmod_human_face_detector.dat");

            using (var loss = LossMmod.Deserialize(File.ReadAllBytes(path)))
                Assert.Equal(21, loss.NumLayers);
        }
        /// <summary>
        /// Initializes a new instance of the <see cref="FaceRecognition"/> class with the directory path that stores model files.
        /// </summary>
        /// <param name="directory">The directory path that stores model files.</param>
        /// <exception cref="FileNotFoundException">The model file is not found.</exception>
        /// <exception cref="DirectoryNotFoundException">The specified directory path is not found.</exception>
        private FaceRecognition(string directory)
        {
            if (!Directory.Exists(directory))
            {
                throw new DirectoryNotFoundException(directory);
            }

            var predictor68PointModel = Path.Combine(directory, FaceRecognitionModels.GetPosePredictorModelLocation());

            if (!File.Exists(predictor68PointModel))
            {
                throw new FileNotFoundException(predictor68PointModel);
            }

            var predictor5PointModel = Path.Combine(directory, FaceRecognitionModels.GetPosePredictorFivePointModelLocation());

            if (!File.Exists(predictor5PointModel))
            {
                throw new FileNotFoundException(predictor5PointModel);
            }

            var cnnFaceDetectionModel = Path.Combine(directory, FaceRecognitionModels.GetCnnFaceDetectorModelLocation());

            if (!File.Exists(cnnFaceDetectionModel))
            {
                throw new FileNotFoundException(cnnFaceDetectionModel);
            }

            var faceRecognitionModel = Path.Combine(directory, FaceRecognitionModels.GetFaceRecognitionModelLocation());

            if (!File.Exists(faceRecognitionModel))
            {
                throw new FileNotFoundException(faceRecognitionModel);
            }

            this._FaceDetector?.Dispose();
            this._FaceDetector = DlibDotNet.Dlib.GetFrontalFaceDetector();

            this._PosePredictor68Point?.Dispose();
            this._PosePredictor68Point = ShapePredictor.Deserialize(predictor68PointModel);

            this._PosePredictor5Point?.Dispose();
            this._PosePredictor5Point = ShapePredictor.Deserialize(predictor5PointModel);

            this._CnnFaceDetector?.Dispose();
            this._CnnFaceDetector = LossMmod.Deserialize(cnnFaceDetectionModel);

            this._FaceEncoder?.Dispose();
            this._FaceEncoder = LossMetric.Deserialize(faceRecognitionModel);

            var predictor194PointModel = Path.Combine(directory, FaceRecognitionModels.GetPosePredictor194PointModelLocation());

            if (File.Exists(predictor194PointModel))
            {
                this._PosePredictor194Point?.Dispose();
                this._PosePredictor194Point = ShapePredictor.Deserialize(predictor194PointModel);
            }
        }
        public static IEnumerable <IEnumerable <MModRect> > DetectMulti(LossMmod net, IEnumerable <Image> images, int upsampleNumTimes, int batchSize = 128)
        {
            var dimgs    = new List <Matrix <RgbPixel> >();
            var allRects = new List <IEnumerable <MModRect> >();

            using (var pyr = new PyramidDown(2))
            {
                // Copy the data into dlib based objects
                foreach (var matrix in images)
                {
                    using (var image = new Matrix <RgbPixel>())
                    {
                        var type = matrix.Matrix.MatrixElementType;
                        switch (type)
                        {
                        case MatrixElementTypes.UInt8:
                        case MatrixElementTypes.RgbPixel:
                            DlibDotNet.Dlib.AssignImage(matrix.Matrix, image);
                            break;

                        default:
                            throw new NotSupportedException("Unsupported image type, must be 8bit gray or RGB image.");
                        }

                        for (var i = 0; i < upsampleNumTimes; i++)
                        {
                            DlibDotNet.Dlib.PyramidUp(image);
                        }

                        dimgs.Add(image);

                        for (var i = 1; i < dimgs.Count; i++)
                        {
                            if (dimgs[i - 1].Columns != dimgs[i].Columns || dimgs[i - 1].Rows != dimgs[i].Rows)
                            {
                                throw new ArgumentException("Images in list must all have the same dimensions.");
                            }
                        }

                        var dets = net.Operator(dimgs, (ulong)batchSize);
                        foreach (var det in dets)
                        {
                            var rects = new List <MModRect>();
                            foreach (var d in det)
                            {
                                var drect = pyr.RectDown(new DRectangle(d.Rect), (uint)upsampleNumTimes);
                                d.Rect = new Rectangle((int)drect.Left, (int)drect.Top, (int)drect.Right, (int)drect.Bottom);
                                rects.Add(d);
                            }

                            allRects.Add(rects);
                        }
                    }
                }
            }

            return(allRects);
        }
Beispiel #4
0
        public void Create()
        {
            var networkIds = Enumerable.Range(0, 4);

            foreach (var networkId in networkIds)
            {
                using (var loss = new LossMmod(networkId))
                    Assert.True(!loss.IsDisposed);
            }
        }
Beispiel #5
0
        private static void Main()
        {
            try
            {
                // You can get this file from http://dlib.net/files/mmod_front_and_rear_end_vehicle_detector.dat.bz2
                // This network was produced by the dnn_mmod_train_find_cars_ex.cpp example program.
                // As you can see, the file also includes a separately trained shape_predictor.  To see
                // a generic example of how to train those refer to train_shape_predictor_ex.cpp.
                using (var deserialize = new ProxyDeserialize("mmod_front_and_rear_end_vehicle_detector.dat"))
                    using (var net = LossMmod.Deserialize(deserialize, 1))
                        using (var sp = ShapePredictor.Deserialize(deserialize))
                            using (var img = Dlib.LoadImageAsMatrix <RgbPixel>("mmod_cars_test_image2.jpg"))
                                using (var win = new ImageWindow())
                                {
                                    win.SetImage(img);

                                    // Run the detector on the image and show us the output.
                                    var dets = net.Operator(img).First();
                                    foreach (var d in dets)
                                    {
                                        // We use a shape_predictor to refine the exact shape and location of the detection
                                        // box.  This shape_predictor is trained to simply output the 4 corner points of
                                        // the box.  So all we do is make a rectangle that tightly contains those 4 points
                                        // and that rectangle is our refined detection position.
                                        var fd   = sp.Detect(img, d);
                                        var rect = Rectangle.Empty;
                                        for (var j = 0u; j < fd.Parts; ++j)
                                        {
                                            rect += fd.GetPart(j);
                                        }

                                        if (d.Label == "rear")
                                        {
                                            win.AddOverlay(rect, new RgbPixel(255, 0, 0), d.Label);
                                        }
                                        else
                                        {
                                            win.AddOverlay(rect, new RgbPixel(255, 255, 0), d.Label);
                                        }
                                    }

                                    Console.WriteLine("Hit enter to end program");
                                    Console.ReadKey();
                                }
            }
            catch (ImageLoadException ile)
            {
                Console.WriteLine(ile.Message);
                Console.WriteLine("The test image is located in the examples folder.  So you should run this program from a sub folder so that the relative path is correct.");
            }
            catch (Exception e)
            {
                Console.WriteLine(e);
            }
        }
Beispiel #6
0
        public static void SetAllBnRunningStatsWindowSizes(LossMmod net, uint newWindowSize)
        {
            if (net == null)
            {
                throw new ArgumentNullException(nameof(net));
            }

            net.ThrowIfDisposed();

            var ret = NativeMethods.set_all_bn_running_stats_window_sizes_loss_mmod(net.NativePtr, net.NetworkType, newWindowSize);

            if (ret == NativeMethods.ErrorType.DnnNotSupportNetworkType)
            {
                throw new NotSupportNetworkTypeException(net.NetworkType);
            }
        }
Beispiel #7
0
        public static IEnumerable <MModRect> Detect(LossMmod net, Image image, int upsampleNumTimes)
        {
            using (var pyr = new PyramidDown(2))
            {
                var rects = new List <MModRect>();

                // Copy the data into dlib based objects
                using (var matrix = new Matrix <RgbPixel>())
                {
                    var type = image.Mode;
                    switch (type)
                    {
                    case Mode.Greyscale:
                    case Mode.Rgb:
                        DlibDotNet.Dlib.AssignImage(image.Matrix, matrix);
                        break;

                    default:
                        throw new NotSupportedException("Unsupported image type, must be 8bit gray or RGB image.");
                    }

                    // Upsampling the image will allow us to detect smaller faces but will cause the
                    // program to use more RAM and run longer.
                    var levels = upsampleNumTimes;
                    while (levels > 0)
                    {
                        levels--;
                        DlibDotNet.Dlib.PyramidUp <PyramidDown>(matrix, 2);
                    }

                    var dets = net.Operator(matrix);

                    // Scale the detection locations back to the original image size
                    // if the image was upscaled.
                    foreach (var d in dets.First())
                    {
                        var drect = pyr.RectDown(new DRectangle(d.Rect), (uint)upsampleNumTimes);
                        d.Rect = new Rectangle((int)drect.Left, (int)drect.Top, (int)drect.Right, (int)drect.Bottom);
                        rects.Add(d);
                    }

                    return(rects);
                }
            }
        }
Beispiel #8
0
        public void Operator()
        {
            var image = this.GetDataFile("Lenna.jpg");
            var path  = Path.Combine(this.ModelDirectory, "mmod_human_face_detector.dat");

            using (var net1 = LossMmod.Deserialize(path))
                using (var net2 = LossMmod.Deserialize(File.ReadAllBytes(path)))
                    using (var matrix = Dlib.LoadImageAsMatrix <RgbPixel>(image.FullName))
                        using (var ret1 = net1.Operator(matrix))
                            using (var ret2 = net2.Operator(matrix))
                            {
                                Assert.Equal(1, ret1.Count);
                                Assert.Equal(1, ret2.Count);

                                var r1 = ret1[0].ToArray();
                                var r2 = ret2[0].ToArray();

                                Assert.Equal(r1.Length, r2.Length);
                                Assert.Equal(r1[0].Rect.Left, r2[0].Rect.Left);
                                Assert.Equal(r1[0].Rect.Right, r2[0].Rect.Right);
                                Assert.Equal(r1[0].Rect.Top, r2[0].Rect.Top);
                                Assert.Equal(r1[0].Rect.Bottom, r2[0].Rect.Bottom);
                            }
        }
Beispiel #9
0
        private static int Main(string[] args)
        {
            if (args.Length < 1)
            {
                Console.WriteLine("To run this program you need a copy of the PASCAL VOC2012 dataset.");
                Console.WriteLine();
                Console.WriteLine("You call this program like this: ");
                Console.WriteLine("./dnn_instance_segmentation_train_ex /path/to/VOC2012 [det-minibatch-size] [seg-minibatch-size] [class-1] [class-2] [class-3] ...");
                return(1);
            }

            try
            {
                Console.WriteLine("\nSCANNING PASCAL VOC2012 DATASET");
                Console.WriteLine();

                var listing = PascalVOC2012.GetPascalVoc2012TrainListing(args[0]).ToArray();
                Console.WriteLine($"images in entire dataset: {listing.Length}");
                if (listing.Length == 0)
                {
                    Console.WriteLine("Didn't find the VOC2012 dataset. ");
                    return(1);
                }

                // mini-batches smaller than the default can be used with GPUs having less memory
                var argc             = args.Length;
                var detMiniBatchSize = argc >= 2 ? int.Parse(args[1]) : 35;
                var segMiniBatchSize = argc >= 3 ? int.Parse(args[2]) : 100;
                Console.WriteLine($"det mini-batch size: {detMiniBatchSize}");
                Console.WriteLine($"seg mini-batch size: {segMiniBatchSize}");

                var desiredClassLabels = new List <string>();
                for (var arg = 3; arg < argc; ++arg)
                {
                    desiredClassLabels.Add(args[arg]);
                }

                if (!desiredClassLabels.Any())
                {
                    desiredClassLabels.Add("bicycle");
                    desiredClassLabels.Add("car");
                    desiredClassLabels.Add("cat");
                }

                Console.Write("desired classlabels:");
                foreach (var desiredClassLabel in desiredClassLabels)
                {
                    Console.Write($" {desiredClassLabel}");
                }
                Console.WriteLine();

                // extract the MMOD rects
                Console.Write("\nExtracting all truth instances...");
                var truthInstances = LoadAllTruthInstances(listing);
                Console.WriteLine(" Done!");
                Console.WriteLine();


                if (listing.Length != truthInstances.Count)
                {
                    throw new ApplicationException();
                }

                var originalTruthImages = new List <TruthImage>();
                for (int i = 0, end = listing.Length; i < end; ++i)
                {
                    originalTruthImages.Add(new TruthImage
                    {
                        Info           = listing[i],
                        TruthInstances = truthInstances[i]
                    });
                }


                var truthImagesFilteredByClass = FilterBasedOnClassLabel(originalTruthImages, desiredClassLabels);

                Console.WriteLine($"images in dataset filtered by class: {truthImagesFilteredByClass.Count}");

                IgnoreSomeTruthBoxes(truthImagesFilteredByClass);
                var truthImages = FilterImagesWithNoTruth(truthImagesFilteredByClass);

                Console.WriteLine($"images in dataset after ignoring some truth boxes: {truthImages.Count}");

                // First train an object detector network (loss_mmod).
                Console.WriteLine("\nTraining detector network:");
                var detNet = TrainDetectionNetwork(truthImages, (uint)detMiniBatchSize);

                // Then train mask predictors (segmentation).
                var segNetsByClass = new Dictionary <string, LossMulticlassLogPerPixel>();

                // This flag controls if a separate mask predictor is trained for each class.
                // Note that it would also be possible to train a separate mask predictor for
                // class groups, each containing somehow similar classes -- for example, one
                // mask predictor for cars and buses, another for cats and dogs, and so on.
                const bool separateSegNetForEachClass = true;


                if (separateSegNetForEachClass)
                {
                    foreach (var classLabel in desiredClassLabels)
                    {
                        // Consider only the truth images belonging to this class.
                        var classImages = FilterBasedOnClassLabel(truthImages, new[] { classLabel });

                        Console.WriteLine($"\nTraining segmentation network for class {classLabel}:");
                        segNetsByClass[classLabel] = TrainSegmentationNetwork(classImages, (uint)segMiniBatchSize, classLabel);
                    }
                }
                else
                {
                    Console.WriteLine("Training a single segmentation network:");
                    segNetsByClass[""] = TrainSegmentationNetwork(truthImages, (uint)segMiniBatchSize, "");
                }

                Console.WriteLine("Saving networks");
                using (var proxy = new ProxySerialize(InstanceSegmentationNetFilename))
                {
                    LossMmod.Serialize(proxy, detNet);
                    segNetsByClass.Serialize(proxy, 4);
                }
            }
            catch (Exception e)
            {
                Console.WriteLine(e);
            }

            return(0);
        }
Beispiel #10
0
        private static void Main(string[] args)
        {
            if (args.Length != 1)
            {
                Console.WriteLine("You call this program like this: ");
                Console.WriteLine("./dnn_instance_segmentation_train_ex /path/to/images");
                Console.WriteLine();
                Console.WriteLine($"You will also need a trained '{InstanceSegmentationNetFilename}' file.");
                Console.WriteLine("You can either train it yourself (see example program");
                Console.WriteLine("dnn_instance_segmentation_train_ex), or download a");
                Console.WriteLine($"copy from here: http://dlib.net/files/{InstanceSegmentationNetFilename}");
                return;
            }

            try
            {
                // Read the file containing the trained network from the working directory.
                using (var deserialize = new ProxyDeserialize(InstanceSegmentationNetFilename))
                    using (var detNet = LossMmod.Deserialize(deserialize, 4))
                    {
                        var segNetsByClass = new Dictionary <string, LossMulticlassLogPerPixel>();
                        segNetsByClass.Deserialize(deserialize, 4);

                        // Show inference results in a window.
                        using (var win = new ImageWindow())
                        {
                            // Find supported image files.
                            var files = Directory.GetFiles(args[0])
                                        .Where(s => s.EndsWith(".jpeg") || s.EndsWith(".jpg") || s.EndsWith(".png")).ToArray();

                            using (var rnd = new Rand())
                            {
                                Console.WriteLine($"Found {files.Length} images, processing...");
                                foreach (var file in files.Select(s => new FileInfo(s)))
                                {
                                    // Load the input image.
                                    using (var inputImage = Dlib.LoadImageAsMatrix <RgbPixel>(file.FullName))
                                    {
                                        // Create predictions for each pixel. At this point, the type of each prediction
                                        // is an index (a value between 0 and 20). Note that the net may return an image
                                        // that is not exactly the same size as the input.
                                        using (var output = detNet.Operator(inputImage))
                                        {
                                            var instances = output.First().ToList();
                                            instances.Sort((lhs, rhs) => (int)lhs.Rect.Area - (int)rhs.Rect.Area);

                                            using (var rgbLabelImage = new Matrix <RgbPixel>())
                                            {
                                                rgbLabelImage.SetSize(inputImage.Rows, inputImage.Columns);
                                                rgbLabelImage.Assign(Enumerable.Range(0, rgbLabelImage.Size).Select(i => new RgbPixel(0, 0, 0)).ToArray());

                                                var foundSomething = false;
                                                foreach (var instance in instances)
                                                {
                                                    if (!foundSomething)
                                                    {
                                                        Console.Write("Found ");
                                                        foundSomething = true;
                                                    }
                                                    else
                                                    {
                                                        Console.Write(", ");
                                                    }

                                                    Console.Write(instance.Label);

                                                    var croppingRect = GetCroppingRect(instance.Rect);
                                                    using (var dims = new ChipDims(SegDim, SegDim))
                                                        using (var chipDetails = new ChipDetails(croppingRect, dims))
                                                            using (var inputChip = Dlib.ExtractImageChip <RgbPixel>(inputImage, chipDetails, InterpolationTypes.Bilinear))
                                                            {
                                                                if (!segNetsByClass.TryGetValue(instance.Label, out var i))
                                                                {
                                                                    // per-class segmentation net not found, so we must be using the same net for all classes
                                                                    // (see bool separate_seg_net_for_each_class in dnn_instance_segmentation_train_ex.cpp)
                                                                    if (segNetsByClass.Count == 1)
                                                                    {
                                                                        throw new ApplicationException();
                                                                    }
                                                                    if (string.IsNullOrEmpty(segNetsByClass.First().Key))
                                                                    {
                                                                        throw new ApplicationException();
                                                                    }
                                                                }

                                                                var segNet = i != null
                                                               ? i                             // use the segmentation net trained for this class
                                                               : segNetsByClass.First().Value; // use the same segmentation net for all classes

                                                                using (var mask = segNet.Operator(inputChip))
                                                                {
                                                                    var randomColor = new RgbPixel(
                                                                        rnd.GetRandom8BitNumber(),
                                                                        rnd.GetRandom8BitNumber(),
                                                                        rnd.GetRandom8BitNumber()
                                                                        );

                                                                    using (var resizedMask = new Matrix <ushort>((int)chipDetails.Rect.Height, (int)chipDetails.Rect.Width))
                                                                    {
                                                                        Dlib.ResizeImage(mask.First(), resizedMask);

                                                                        for (int r = 0, nr = resizedMask.Rows; r < nr; ++r)
                                                                        {
                                                                            for (int c = 0, nc = resizedMask.Columns; c < nc; ++c)
                                                                            {
                                                                                if (resizedMask[r, c] != 0)
                                                                                {
                                                                                    var y = (int)(chipDetails.Rect.Top + r);
                                                                                    var x = (int)(chipDetails.Rect.Left + c);
                                                                                    if (y >= 0 && y < rgbLabelImage.Rows && x >= 0 && x < rgbLabelImage.Columns)
                                                                                    {
                                                                                        rgbLabelImage[y, x] = randomColor;
                                                                                    }
                                                                                }
                                                                            }
                                                                        }
                                                                    }

                                                                    var voc2012Class = PascalVOC2012.FindVoc2012Class(instance.Label);
                                                                    Dlib.DrawRectangle(rgbLabelImage, instance.Rect, voc2012Class.RgbLabel, 1u);
                                                                }
                                                            }
                                                }

                                                instances.DisposeElement();

                                                using (var tmp = Dlib.JoinRows(inputImage, rgbLabelImage))
                                                {
                                                    // Show the input image on the left, and the predicted RGB labels on the right.
                                                    win.SetImage(tmp);

                                                    if (instances.Any())
                                                    {
                                                        Console.Write($" in {file.Name} - hit enter to process the next image");
                                                        Console.ReadKey();
                                                    }
                                                }
                                            }
                                        }
                                    }
                                }
                            }
                        }

                        foreach (var kvp in segNetsByClass)
                        {
                            kvp.Value.Dispose();
                        }
                    }
            }
            catch (Exception e)
            {
                Console.WriteLine(e);
            }
        }
Beispiel #11
0
        private static void Main(string[] args)
        {
            try
            {
                if (args.Length != 2)
                {
                    Console.WriteLine("Call this program like this:");
                    Console.WriteLine("./dnn_mmod_dog_hipsterizer mmod_dog_hipsterizer.dat faces/dogs.jpg");
                    Console.WriteLine("You can get the mmod_dog_hipsterizer.dat file from:");
                    Console.WriteLine("http://dlib.net/files/mmod_dog_hipsterizer.dat.bz2");
                    return;
                }

                // load the models as well as glasses and mustache.
                using (var deserialize = new ProxyDeserialize(args[0]))
                    using (var net = LossMmod.Deserialize(deserialize))
                        using (var sp = ShapePredictor.Deserialize(deserialize))
                            using (var glasses = Matrix <RgbAlphaPixel> .Deserialize(deserialize))
                                using (var mustache = Matrix <RgbAlphaPixel> .Deserialize(deserialize))
                                {
                                    Dlib.PyramidUp(glasses);
                                    Dlib.PyramidUp(mustache);

                                    using (var win1 = new ImageWindow(glasses))
                                        using (var win2 = new ImageWindow(mustache))
                                            using (var winWireframe = new ImageWindow())
                                                using (var winHipster = new ImageWindow())
                                                {
                                                    // Now process each image, find dogs, and hipsterize them by drawing glasses and a
                                                    // mustache on each dog :)
                                                    for (var i = 1; i < args.Length; ++i)
                                                    {
                                                        using (var img = Dlib.LoadImageAsMatrix <RgbPixel>(args[i]))
                                                        {
                                                            // Upsampling the image will allow us to find smaller dog faces but will use more
                                                            // computational resources.
                                                            //pyramid_up(img);
                                                            var dets = net.Operator(img).First();
                                                            winWireframe.ClearOverlay();
                                                            winWireframe.SetImage(img);

                                                            // We will also draw a wireframe on each dog's face so you can see where the
                                                            // shape_predictor is identifying face landmarks.
                                                            var lines = new List <ImageWindow.OverlayLine>();
                                                            foreach (var d in dets)
                                                            {
                                                                // get the landmarks for this dog's face
                                                                var shape = sp.Detect(img, d.Rect);

                                                                var color    = new RgbPixel(0, 255, 0);
                                                                var top      = shape.GetPart(0);
                                                                var leftEar  = shape.GetPart(1);
                                                                var leftEye  = shape.GetPart(2);
                                                                var nose     = shape.GetPart(3);
                                                                var rightEar = shape.GetPart(4);
                                                                var rightEye = shape.GetPart(5);

                                                                // The locations of the left and right ends of the mustache.
                                                                var leftMustache  = 1.3 * (leftEye - rightEye) / 2 + nose;
                                                                var rightMustache = 1.3 * (rightEye - leftEye) / 2 + nose;

                                                                // Draw the glasses onto the image.
                                                                var from = new[]
                                                                {
                                                                    2 * new Point(176, 36), 2 * new Point(59, 35)
                                                                };
                                                                var to = new[]
                                                                {
                                                                    leftEye, rightEye
                                                                };
                                                                using (var transform = Dlib.FindSimilarityTransform(from, to))
                                                                    for (uint r = 0, nr = (uint)glasses.Rows; r < nr; ++r)
                                                                    {
                                                                        for (uint c = 0, nc = (uint)glasses.Columns; c < nc; ++c)
                                                                        {
                                                                            var p = (Point)transform.Operator(new DPoint(c, r));
                                                                            if (Dlib.GetRect(img).Contains(p))
                                                                            {
                                                                                var rgb = img[p.Y, p.X];
                                                                                Dlib.AssignPixel(ref rgb, glasses[(int)r, (int)c]);
                                                                                img[p.Y, p.X] = rgb;
                                                                            }
                                                                        }
                                                                    }

                                                                // Draw the mustache onto the image right under the dog's nose.
                                                                var mustacheRect = Dlib.GetRect(mustache);
                                                                from = new[]
                                                                {
                                                                    mustacheRect.TopLeft, mustacheRect.TopRight
                                                                };
                                                                to = new[]
                                                                {
                                                                    rightMustache, leftMustache
                                                                };
                                                                using (var transform = Dlib.FindSimilarityTransform(from, to))
                                                                    for (uint r = 0, nr = (uint)mustache.Rows; r < nr; ++r)
                                                                    {
                                                                        for (uint c = 0, nc = (uint)mustache.Columns; c < nc; ++c)
                                                                        {
                                                                            var p = (Point)transform.Operator(new DPoint(c, r));
                                                                            if (Dlib.GetRect(img).Contains(p))
                                                                            {
                                                                                var rgb = img[p.Y, p.X];
                                                                                Dlib.AssignPixel(ref rgb, mustache[(int)r, (int)c]);
                                                                                img[p.Y, p.X] = rgb;
                                                                            }
                                                                        }
                                                                    }

                                                                // Record the lines needed for the face wire frame.
                                                                lines.Add(new ImageWindow.OverlayLine(leftEye, nose, color));
                                                                lines.Add(new ImageWindow.OverlayLine(nose, rightEye, color));
                                                                lines.Add(new ImageWindow.OverlayLine(rightEye, leftEye, color));
                                                                lines.Add(new ImageWindow.OverlayLine(rightEye, rightEar, color));
                                                                lines.Add(new ImageWindow.OverlayLine(rightEar, top, color));
                                                                lines.Add(new ImageWindow.OverlayLine(top, leftEar, color));
                                                                lines.Add(new ImageWindow.OverlayLine(leftEar, leftEye, color));

                                                                winWireframe.AddOverlay(lines);
                                                                winHipster.SetImage(img);
                                                            }

                                                            Console.WriteLine("Hit enter to process the next image.");
                                                            Console.ReadKey();
                                                        }
                                                    }
                                                }
                                }
            }
            catch (Exception e)
            {
                Console.WriteLine(e);
            }
        }
Beispiel #12
0
        private static void Main(string[] args)
        {
            try
            {
                if (args.Length != 1)
                {
                    Console.WriteLine("Give the path to a folder containing training.xml and testing.xml files.");
                    Console.WriteLine("This example program is specifically designed to run on the dlib vehicle ");
                    Console.WriteLine("detection dataset, which is available at this URL: ");
                    Console.WriteLine("   http://dlib.net/files/data/dlib_rear_end_vehicles_v1.tar");
                    Console.WriteLine();
                    Console.WriteLine("So download that dataset, extract it somewhere, and then run this program");
                    Console.WriteLine("with the dlib_rear_end_vehicles folder as an argument.  E.g. if you extract");
                    Console.WriteLine("the dataset to the current folder then you should run this example program");
                    Console.WriteLine("by typing: ");
                    Console.WriteLine("   ./dnn_mmod_train_find_cars_ex dlib_rear_end_vehicles");
                    Console.WriteLine();
                    Console.WriteLine("It takes about a day to finish if run on a high end GPU like a 1080ti.");
                    Console.WriteLine();
                    return;
                }

                var dataDirectory = args[0];

                IList <Matrix <RgbPixel> > imagesTrain;
                IList <Matrix <RgbPixel> > imagesTest;
                IList <IList <MModRect> >  boxesTrain;
                IList <IList <MModRect> >  boxesTest;
                Dlib.LoadImageDataset(dataDirectory + "/training.xml", out imagesTrain, out boxesTrain);
                Dlib.LoadImageDataset(dataDirectory + "/testing.xml", out imagesTest, out boxesTest);

                // When I was creating the dlib vehicle detection dataset I had to label all the cars
                // in each image.  MMOD requires all cars to be labeled, since any unlabeled part of an
                // image is implicitly assumed to be not a car, and the algorithm will use it as
                // negative training data.  So every car must be labeled, either with a normal
                // rectangle or an "ignore" rectangle that tells MMOD to simply ignore it (i.e. neither
                // treat it as a thing to detect nor as negative training data).
                //
                // In our present case, many images contain very tiny cars in the distance, ones that
                // are essentially just dark smudges.  It's not reasonable to expect the CNN
                // architecture we defined to detect such vehicles.  However, I erred on the side of
                // having more complete annotations when creating the dataset.  So when I labeled these
                // images I labeled many of these really difficult cases as vehicles to detect.
                //
                // So the first thing we are going to do is clean up our dataset a little bit.  In
                // particular, we are going to mark boxes smaller than 35*35 pixels as ignore since
                // only really small and blurry cars appear at those sizes.  We will also mark boxes
                // that are heavily overlapped by another box as ignore.  We do this because we want to
                // allow for stronger non-maximum suppression logic in the learned detector, since that
                // will help make it easier to learn a good detector.
                //
                // To explain this non-max suppression idea further it's important to understand how
                // the detector works.  Essentially, sliding window detectors scan all image locations
                // and ask "is there a car here?".  If there really is a car in a specific location in
                // an image then usually many slightly different sliding window locations will produce
                // high detection scores, indicating that there is a car at those locations.  If we
                // just stopped there then each car would produce multiple detections.  But that isn't
                // what we want.  We want each car to produce just one detection.  So it's common for
                // detectors to include "non-maximum suppression" logic which simply takes the
                // strongest detection and then deletes all detections "close to" the strongest.  This
                // is a simple post-processing step that can eliminate duplicate detections.  However,
                // we have to define what "close to" means.  We can do this by looking at your training
                // data and checking how close the closest target boxes are to each other, and then
                // picking a "close to" measure that doesn't suppress those target boxes but is
                // otherwise as tight as possible.  This is exactly what the mmod_options object does
                // by default.
                //
                // Importantly, this means that if your training dataset contains an image with two
                // target boxes that really overlap a whole lot, then the non-maximum suppression
                // "close to" measure will be configured to allow detections to really overlap a whole
                // lot.  On the other hand, if your dataset didn't contain any overlapped boxes at all,
                // then the non-max suppression logic would be configured to filter out any boxes that
                // overlapped at all, and thus would be performing a much stronger non-max suppression.
                //
                // Why does this matter?  Well, remember that we want to avoid duplicate detections.
                // If non-max suppression just kills everything in a really wide area around a car then
                // the CNN doesn't really need to learn anything about avoiding duplicate detections.
                // However, if non-max suppression only suppresses a tiny area around each detection
                // then the CNN will need to learn to output small detection scores for those areas of
                // the image not suppressed.  The smaller the non-max suppression region the more the
                // CNN has to learn and the more difficult the learning problem will become.  This is
                // why we remove highly overlapped objects from the training dataset.  That is, we do
                // it so the non-max suppression logic will be able to be reasonably effective.  Here
                // we are ensuring that any boxes that are entirely contained by another are
                // suppressed.  We also ensure that boxes with an intersection over union of 0.5 or
                // greater are suppressed.  This will improve the resulting detector since it will be
                // able to use more aggressive non-max suppression settings.

                var numOverlappedIgnoredTest = 0;
                foreach (var v in boxesTest)
                {
                    using (var overlap = new TestBoxOverlap(0.50, 0.95))
                        numOverlappedIgnoredTest += IgnoreOverlappedBoxes(v, overlap);
                }

                var numOverlappedIgnored = 0;
                var numAdditionalIgnored = 0;

                foreach (var v in boxesTrain)
                {
                    using (var overlap = new TestBoxOverlap(0.50, 0.95))
                        numOverlappedIgnored += IgnoreOverlappedBoxes(v, overlap);
                    foreach (var bb in v)
                    {
                        if (bb.Rect.Width < 35 && bb.Rect.Height < 35)
                        {
                            if (!bb.Ignore)
                            {
                                bb.Ignore = true;
                                ++numAdditionalIgnored;
                            }
                        }

                        // The dlib vehicle detection dataset doesn't contain any detections with
                        // really extreme aspect ratios.  However, some datasets do, often because of
                        // bad labeling.  So it's a good idea to check for that and either eliminate
                        // those boxes or set them to ignore.  Although, this depends on your
                        // application.
                        //
                        // For instance, if your dataset has boxes with an aspect ratio
                        // of 10 then you should think about what that means for the network
                        // architecture.  Does the receptive field even cover the entirety of the box
                        // in those cases?  Do you care about these boxes?  Are they labeling errors?
                        // I find that many people will download some dataset from the internet and
                        // just take it as given.  They run it through some training algorithm and take
                        // the dataset as unchallengeable truth.  But many datasets are full of
                        // labeling errors.  There are also a lot of datasets that aren't full of
                        // errors, but are annotated in a sloppy and inconsistent way.  Fixing those
                        // errors and inconsistencies can often greatly improve models trained from
                        // such data.  It's almost always worth the time to try and improve your
                        // training dataset.
                        //
                        // In any case, my point is that there are other types of dataset cleaning you
                        // could put here.  What exactly you need depends on your application.  But you
                        // should carefully consider it and not take your dataset as a given.  The work
                        // of creating a good detector is largely about creating a high quality
                        // training dataset.
                    }
                }

                // When modifying a dataset like this, it's a really good idea to print a log of how
                // many boxes you ignored.  It's easy to accidentally ignore a huge block of data, so
                // you should always look and see that things are doing what you expect.
                Console.WriteLine($"num_overlapped_ignored: {numOverlappedIgnored}");
                Console.WriteLine($"num_additional_ignored: {numAdditionalIgnored}");
                Console.WriteLine($"num_overlapped_ignored_test: {numOverlappedIgnoredTest}");


                Console.WriteLine($"num training images: {imagesTrain.Count()}");
                Console.WriteLine($"num testing images: {imagesTest.Count()}");


                // Our vehicle detection dataset has basically 3 different types of boxes.  Square
                // boxes, tall and skinny boxes (e.g. semi trucks), and short and wide boxes (e.g.
                // sedans).  Here we are telling the MMOD algorithm that a vehicle is recognizable as
                // long as the longest box side is at least 70 pixels long and the shortest box side is
                // at least 30 pixels long.  mmod_options will use these parameters to decide how large
                // each of the sliding windows needs to be so as to be able to detect all the vehicles.
                // Since our dataset has basically these 3 different aspect ratios, it will decide to
                // use 3 different sliding windows.  This means the final con layer in the network will
                // have 3 filters, one for each of these aspect ratios.
                //
                // Another thing to consider when setting the sliding window size is the "stride" of
                // your network.  The network we defined above downsamples the image by a factor of 8x
                // in the first few layers.  So when the sliding windows are scanning the image, they
                // are stepping over it with a stride of 8 pixels.  If you set the sliding window size
                // too small then the stride will become an issue.  For instance, if you set the
                // sliding window size to 4 pixels, then it means a 4x4 window will be moved by 8
                // pixels at a time when scanning. This is obviously a problem since 75% of the image
                // won't even be visited by the sliding window.  So you need to set the window size to
                // be big enough relative to the stride of your network.  In our case, the windows are
                // at least 30 pixels in length, so being moved by 8 pixel steps is fine.
                using (var options = new MModOptions(boxesTrain, 70, 30))
                {
                    // This setting is very important and dataset specific.  The vehicle detection dataset
                    // contains boxes that are marked as "ignore", as we discussed above.  Some of them are
                    // ignored because we set ignore to true in the above code.  However, the xml files
                    // also contained a lot of ignore boxes.  Some of them are large boxes that encompass
                    // large parts of an image and the intention is to have everything inside those boxes
                    // be ignored.  Therefore, we need to tell the MMOD algorithm to do that, which we do
                    // by setting options.overlaps_ignore appropriately.
                    //
                    // But first, we need to understand exactly what this option does.  The MMOD loss
                    // is essentially counting the number of false alarms + missed detections produced by
                    // the detector for each image.  During training, the code is running the detector on
                    // each image in a mini-batch and looking at its output and counting the number of
                    // mistakes.  The optimizer tries to find parameters settings that minimize the number
                    // of detector mistakes.
                    //
                    // This overlaps_ignore option allows you to tell the loss that some outputs from the
                    // detector should be totally ignored, as if they never happened.  In particular, if a
                    // detection overlaps a box in the training data with ignore==true then that detection
                    // is ignored.  This overlap is determined by calling
                    // options.overlaps_ignore(the_detection, the_ignored_training_box).  If it returns
                    // true then that detection is ignored.
                    //
                    // You should read the documentation for test_box_overlap, the class type for
                    // overlaps_ignore for full details.  However, the gist is that the default behavior is
                    // to only consider boxes as overlapping if their intersection over union is > 0.5.
                    // However, the dlib vehicle detection dataset contains large boxes that are meant to
                    // mask out large areas of an image.  So intersection over union isn't an appropriate
                    // way to measure "overlaps with box" in this case.  We want any box that is contained
                    // inside one of these big regions to be ignored, even if the detection box is really
                    // small.  So we set overlaps_ignore to behave that way with this line.
                    options.OverlapsIgnore = new TestBoxOverlap(0.5, 0.95);

                    using (var net = new LossMmod(options, 3))
                    {
                        // The final layer of the network must be a con layer that contains
                        // options.detector_windows.size() filters.  This is because these final filters are
                        // what perform the final "sliding window" detection in the network.  For the dlib
                        // vehicle dataset, there will be 3 sliding window detectors, so we will be setting
                        // num_filters to 3 here.
                        var detectorWindows = options.DetectorWindows.ToArray();
                        using (var subnet = net.GetSubnet())
                            using (var details = subnet.GetLayerDetails())
                            {
                                details.SetNumFilters(detectorWindows.Length);

                                using (var trainer = new DnnTrainer <LossMmod>(net))
                                {
                                    trainer.SetLearningRate(0.1);
                                    trainer.BeVerbose();


                                    // While training, we are going to use early stopping.  That is, we will be checking
                                    // how good the detector is performing on our test data and when it stops getting
                                    // better on the test data we will drop the learning rate.  We will keep doing that
                                    // until the learning rate is less than 1e-4.   These two settings tell the trainer to
                                    // do that.  Essentially, we are setting the first argument to infinity, and only the
                                    // test iterations without progress threshold will matter.  In particular, it says that
                                    // once we observe 1000 testing mini-batches where the test loss clearly isn't
                                    // decreasing we will lower the learning rate.
                                    trainer.SetIterationsWithoutProgressThreshold(50000);
                                    trainer.SetTestIterationsWithoutProgressThreshold(1000);

                                    const string syncFilename = "mmod_cars_sync";
                                    trainer.SetSynchronizationFile(syncFilename, 5 * 60);



                                    IEnumerable <Matrix <RgbPixel> >      mini_batch_samples;
                                    IEnumerable <IEnumerable <MModRect> > mini_batch_labels;
                                    using (var cropper = new RandomCropper())
                                    {
                                        cropper.SetSeed(0);
                                        cropper.SetChipDims(350, 350);
                                        // Usually you want to give the cropper whatever min sizes you passed to the
                                        // mmod_options constructor, or very slightly smaller sizes, which is what we do here.
                                        cropper.SetMinObjectSize(69, 28);
                                        cropper.MaxRotationDegrees = 2;

                                        using (var rnd = new Rand())
                                        {
                                            // Log the training parameters to the console
                                            Console.WriteLine($"{trainer}{cropper}");

                                            var cnt = 1;
                                            // Run the trainer until the learning rate gets small.
                                            while (trainer.GetLearningRate() >= 1e-4)
                                            {
                                                // Every 30 mini-batches we do a testing mini-batch.
                                                if (cnt % 30 != 0 || !imagesTest.Any())
                                                {
                                                    cropper.Operator(87, imagesTrain, boxesTrain, out mini_batch_samples, out mini_batch_labels);
                                                    // We can also randomly jitter the colors and that often helps a detector
                                                    // generalize better to new images.
                                                    foreach (var img in mini_batch_samples)
                                                    {
                                                        Dlib.DisturbColors(img, rnd);
                                                    }

                                                    // It's a good idea to, at least once, put code here that displays the images
                                                    // and boxes the random cropper is generating.  You should look at them and
                                                    // think about if the output makes sense for your problem.  Most of the time
                                                    // it will be fine, but sometimes you will realize that the pattern of cropping
                                                    // isn't really appropriate for your problem and you will need to make some
                                                    // change to how the mini-batches are being generated.  Maybe you will tweak
                                                    // some of the cropper's settings, or write your own entirely separate code to
                                                    // create mini-batches.  But either way, if you don't look you will never know.
                                                    // An easy way to do this is to create a dlib::image_window to display the
                                                    // images and boxes.

                                                    LossMmod.TrainOneStep(trainer, mini_batch_samples, mini_batch_labels);

                                                    mini_batch_samples.DisposeElement();
                                                    mini_batch_labels.DisposeElement();
                                                }
                                                else
                                                {
                                                    cropper.Operator(87, imagesTest, boxesTest, out mini_batch_samples, out mini_batch_labels);
                                                    // We can also randomly jitter the colors and that often helps a detector
                                                    // generalize better to new images.
                                                    foreach (var img in mini_batch_samples)
                                                    {
                                                        Dlib.DisturbColors(img, rnd);
                                                    }

                                                    LossMmod.TestOneStep(trainer, mini_batch_samples, mini_batch_labels);

                                                    mini_batch_samples.DisposeElement();
                                                    mini_batch_labels.DisposeElement();
                                                }
                                                ++cnt;
                                            }
                                            // wait for training threads to stop
                                            trainer.GetNet();
                                            Console.WriteLine("done training");

                                            // Save the network to disk
                                            net.Clean();
                                            LossMmod.Serialize(net, "mmod_rear_end_vehicle_detector.dat");


                                            // It's a really good idea to print the training parameters.  This is because you will
                                            // invariably be running multiple rounds of training and should be logging the output
                                            // to a file.  This print statement will include many of the training parameters in
                                            // your log.
                                            Console.WriteLine($"{trainer}{cropper}");

                                            Console.WriteLine($"\nsync_filename: {syncFilename}");
                                            Console.WriteLine($"num training images: {imagesTrain.Count()}");
                                            using (var _ = new TestBoxOverlap())
                                                using (var matrix = Dlib.TestObjectDetectionFunction(net, imagesTrain, boxesTrain, _, 0, options.OverlapsIgnore))
                                                    Console.WriteLine($"training results: {matrix}");
                                            // Upsampling the data will allow the detector to find smaller cars.  Recall that
                                            // we configured it to use a sliding window nominally 70 pixels in size.  So upsampling
                                            // here will let it find things nominally 35 pixels in size.  Although we include a
                                            // limit of 1800*1800 here which means "don't upsample an image if it's already larger
                                            // than 1800*1800".  We do this so we don't run out of RAM, which is a concern because
                                            // some of the images in the dlib vehicle dataset are really high resolution.
                                            Dlib.UpsampleImageDataset(2, imagesTrain, boxesTrain, 1800 * 1800);
                                            using (var _ = new TestBoxOverlap())
                                                using (var matrix = Dlib.TestObjectDetectionFunction(net, imagesTrain, boxesTrain, _, 0, options.OverlapsIgnore))
                                                    Console.WriteLine($"training upsampled results: {matrix}");


                                            Console.WriteLine("num testing images: {images_test.Count()}");
                                            using (var _ = new TestBoxOverlap())
                                                using (var matrix = Dlib.TestObjectDetectionFunction(net, imagesTest, boxesTest, _, 0, options.OverlapsIgnore))
                                                    Console.WriteLine($"testing results: {matrix}");
                                            Dlib.UpsampleImageDataset(2, imagesTest, boxesTest, 1800 * 1800);
                                            using (var _ = new TestBoxOverlap())
                                                using (var matrix = Dlib.TestObjectDetectionFunction(net, imagesTest, boxesTest, _, 0, options.OverlapsIgnore))
                                                    Console.WriteLine($"testing upsampled results: {matrix}");

                                            /*
                                             *  This program takes many hours to execute on a high end GPU.  It took about a day to
                                             *  train on a NVIDIA 1080ti.  The resulting model file is available at
                                             *      http://dlib.net/files/mmod_rear_end_vehicle_detector.dat.bz2
                                             *  It should be noted that this file on dlib.net has a dlib::shape_predictor appended
                                             *  onto the end of it (see dnn_mmod_find_cars_ex.cpp for an example of its use).  This
                                             *  explains why the model file on dlib.net is larger than the
                                             *  mmod_rear_end_vehicle_detector.dat output by this program.
                                             *
                                             *  You can see some videos of this vehicle detector running on YouTube:
                                             *      https://www.youtube.com/watch?v=4B3bzmxMAZU
                                             *      https://www.youtube.com/watch?v=bP2SUo5vSlc
                                             *
                                             *  Also, the training and testing accuracies were:
                                             *      num training images: 2217
                                             *      training results: 0.990738 0.736431 0.736073
                                             *      training upsampled results: 0.986837 0.937694 0.936912
                                             *      num testing images: 135
                                             *      testing results: 0.988827 0.471372 0.470806
                                             *      testing upsampled results: 0.987879 0.651132 0.650399
                                             */
                                        }
                                    }
                                }
                            }
                    }
                }
            }
            catch (Exception e)
            {
                Console.WriteLine(e);
            }
        }
Beispiel #13
0
        private static void Main()
        {
            try
            {
                // You can get this file from http://dlib.net/files/mmod_rear_end_vehicle_detector.dat.bz2
                // This network was produced by the dnn_mmod_train_find_cars_ex.cpp example program.
                // As you can see, the file also includes a separately trained shape_predictor.  To see
                // a generic example of how to train those refer to train_shape_predictor_ex.cpp.
                using (var deserialize = new ProxyDeserialize("mmod_rear_end_vehicle_detector.dat"))
                    using (var net = LossMmod.Deserialize(deserialize, 1))
                        using (var sp = ShapePredictor.Deserialize(deserialize))
                            using (var img = Dlib.LoadImageAsMatrix <RgbPixel>("mmod_cars_test_image.jpg"))
                                using (var win = new ImageWindow())
                                {
                                    win.SetImage(img);

                                    // Run the detector on the image and show us the output.
                                    var dets = net.Operator(img).First();
                                    foreach (var d in dets)
                                    {
                                        // We use a shape_predictor to refine the exact shape and location of the detection
                                        // box.  This shape_predictor is trained to simply output the 4 corner points of
                                        // the box.  So all we do is make a rectangle that tightly contains those 4 points
                                        // and that rectangle is our refined detection position.
                                        var fd   = sp.Detect(img, d);
                                        var rect = Rectangle.Empty;
                                        for (var j = 0u; j < fd.Parts; ++j)
                                        {
                                            rect += fd.GetPart(j);
                                        }

                                        win.AddOverlay(rect, new RgbPixel(255, 0, 0));
                                    }



                                    Console.WriteLine("Hit enter to view the intermediate processing steps");
                                    Console.ReadKey();


                                    // Now let's look at how the detector works.  The high level processing steps look like:
                                    //   1. Create an image pyramid and pack the pyramid into one big image.  We call this
                                    //      image the "tiled pyramid".
                                    //   2. Run the tiled pyramid image through the CNN.  The CNN outputs a new image where
                                    //      bright pixels in the output image indicate the presence of cars.
                                    //   3. Find pixels in the CNN's output image with a value > 0.  Those locations are your
                                    //      preliminary car detections.
                                    //   4. Perform non-maximum suppression on the preliminary detections to produce the
                                    //      final output.
                                    //
                                    // We will be plotting the images from steps 1 and 2 so you can visualize what's
                                    // happening.  For the CNN's output image, we will use the jet colormap so that "bright"
                                    // outputs, i.e. pixels with big values, appear in red and "dim" outputs appear as a
                                    // cold blue color.  To do this we pick a range of CNN output values for the color
                                    // mapping.  The specific values don't matter.  They are just selected to give a nice
                                    // looking output image.
                                    const float lower = -2.5f;
                                    const float upper = 0.0f;
                                    Console.WriteLine($"jet color mapping range:  lower={lower}  upper={upper}");



                                    // Create a tiled pyramid image and display it on the screen.
                                    // Get the type of pyramid the CNN used
                                    //using pyramid_type = std::remove_reference < decltype(input_layer(net)) >::type::pyramid_type;
                                    // And tell create_tiled_pyramid to create the pyramid using that pyramid type.
                                    using (var inputLayer = new InputRgbImagePyramid <PyramidDown>(6))
                                    {
                                        net.TryGetInputLayer(inputLayer);

                                        var padding      = inputLayer.GetPyramidPadding();
                                        var outerPadding = inputLayer.GetPyramidOuterPadding();
                                        Dlib.CreateTiledPyramid <RgbPixel, PyramidDown>(img,
                                                                                        padding,
                                                                                        outerPadding,
                                                                                        6,
                                                                                        out var tiledImg,
                                                                                        out var rects);

                                        using (var winpyr = new ImageWindow(tiledImg, "Tiled pyramid"))
                                        {
                                            // This CNN detector represents a sliding window detector with 3 sliding windows.  Each
                                            // of the 3 windows has a different aspect ratio, allowing it to find vehicles which
                                            // are either tall and skinny, squarish, or short and wide.  The aspect ratio of a
                                            // detection is determined by which channel in the output image triggers the detection.
                                            // Here we are just going to max pool the channels together to get one final image for
                                            // our display.  In this image, a pixel will be bright if any of the sliding window
                                            // detectors thinks there is a car at that location.
                                            using (var subnet = net.GetSubnet())
                                            {
                                                var output = subnet.Output;
                                                Console.WriteLine($"Number of channels in final tensor image: {output.K}");
                                                var networkOutput = Dlib.ImagePlane(output);
                                                for (var k = 1; k < output.K; k++)
                                                {
                                                    using (var tmpNetworkOutput = Dlib.ImagePlane(output, 0, k))
                                                    {
                                                        var maxPointWise = Dlib.MaxPointWise(networkOutput, tmpNetworkOutput);
                                                        networkOutput.Dispose();
                                                        networkOutput = maxPointWise;
                                                    }
                                                }

                                                // We will also upsample the CNN's output image.  The CNN we defined has an 8x
                                                // downsampling layer at the beginning. In the code below we are going to overlay this
                                                // CNN output image on top of the raw input image.  To make that look nice it helps to
                                                // upsample the CNN output image back to the same resolution as the input image, which
                                                // we do here.
                                                var networkOutputScale = img.Columns / (double)networkOutput.Columns;
                                                Dlib.ResizeImage(networkOutput, networkOutputScale);


                                                // Display the network's output as a color image.
                                                using (var jet = Dlib.Jet(networkOutput, upper, lower))
                                                    using (var winOutput = new ImageWindow(jet, "Output tensor from the network"))
                                                    {
                                                        // Also, overlay network_output on top of the tiled image pyramid and display it.
                                                        for (var r = 0; r < tiledImg.Rows; ++r)
                                                        {
                                                            for (var c = 0; c < tiledImg.Columns; ++c)
                                                            {
                                                                var tmp = new DPoint(c, r);
                                                                tmp = Dlib.InputTensorToOutputTensor(net, tmp);
                                                                var dp = networkOutputScale * tmp;
                                                                tmp = new DPoint((int)dp.X, (int)dp.Y);
                                                                if (Dlib.GetRect(networkOutput).Contains((int)tmp.X, (int)tmp.Y))
                                                                {
                                                                    var val = networkOutput[(int)tmp.Y, (int)tmp.X];

                                                                    // alpha blend the network output pixel with the RGB image to make our
                                                                    // overlay.
                                                                    var p = new RgbAlphaPixel();
                                                                    Dlib.AssignPixel(ref p, Dlib.ColormapJet(val, lower, upper));
                                                                    p.Alpha = 120;

                                                                    var rgb = new RgbPixel();
                                                                    Dlib.AssignPixel(ref rgb, p);
                                                                    tiledImg[r, c] = rgb;
                                                                }
                                                            }
                                                        }

                                                        // If you look at this image you can see that the vehicles have bright red blobs on
                                                        // them.  That's the CNN saying "there is a car here!".  You will also notice there is
                                                        // a certain scale at which it finds cars.  They have to be not too big or too small,
                                                        // which is why we have an image pyramid.  The pyramid allows us to find cars of all
                                                        // scales.
                                                        using (var winPyrOverlay = new ImageWindow(tiledImg, "Detection scores on image pyramid"))
                                                        {
                                                            // Finally, we can collapse the pyramid back into the original image.  The CNN doesn't
                                                            // actually do this step, since it's enough to threshold the tiled pyramid image to get
                                                            // the detections.  However, it makes a nice visualization and clearly indicates that
                                                            // the detector is firing for all the cars.
                                                            using (var collapsed = new Matrix <float>(img.Rows, img.Columns))
                                                                using (var inputTensor = new ResizableTensor())
                                                                {
                                                                    inputLayer.ToTensor(img, 1, inputTensor);
                                                                    for (var r = 0; r < collapsed.Rows; ++r)
                                                                    {
                                                                        for (var c = 0; c < collapsed.Columns; ++c)
                                                                        {
                                                                            // Loop over a bunch of scale values and look up what part of network_output
                                                                            // corresponds to the point(c,r) in the original image, then take the max
                                                                            // detection score over all the scales and save it at pixel point(c,r).
                                                                            var maxScore = -1e30f;
                                                                            for (double scale = 1; scale > 0.2; scale *= 5.0 / 6.0)
                                                                            {
                                                                                // Map from input image coordinates to tiled pyramid coordinates.
                                                                                var tensorSpace = inputLayer.ImageSpaceToTensorSpace(inputTensor, scale, new DRectangle(new DPoint(c, r)));
                                                                                var tmp         = tensorSpace.Center;

                                                                                // Now map from pyramid coordinates to network_output coordinates.
                                                                                var dp = networkOutputScale * Dlib.InputTensorToOutputTensor(net, tmp);
                                                                                tmp = new DPoint((int)dp.X, (int)dp.Y);

                                                                                if (Dlib.GetRect(networkOutput).Contains((int)tmp.X, (int)tmp.Y))
                                                                                {
                                                                                    var val = networkOutput[(int)tmp.Y, (int)tmp.X];
                                                                                    if (val > maxScore)
                                                                                    {
                                                                                        maxScore = val;
                                                                                    }
                                                                                }
                                                                            }

                                                                            collapsed[r, c] = maxScore;

                                                                            // Also blend the scores into the original input image so we can view it as
                                                                            // an overlay on the cars.
                                                                            var p = new RgbAlphaPixel();
                                                                            Dlib.AssignPixel(ref p, Dlib.ColormapJet(maxScore, lower, upper));
                                                                            p.Alpha = 120;

                                                                            var rgb = new RgbPixel();
                                                                            Dlib.AssignPixel(ref rgb, p);
                                                                            img[r, c] = rgb;
                                                                        }
                                                                    }

                                                                    using (var jet2 = Dlib.Jet(collapsed, upper, lower))
                                                                        using (var winCollapsed = new ImageWindow(jet2, "Collapsed output tensor from the network"))
                                                                            using (var winImgAndSal = new ImageWindow(img, "Collapsed detection scores on raw image"))
                                                                            {
                                                                                Console.WriteLine("Hit enter to end program");
                                                                                Console.ReadKey();
                                                                            }
                                                                }
                                                        }
                                                    }
                                            }
                                        }
                                    }
                                }
            }
            catch (Exception e)
            {
                Console.WriteLine(e);
            }
        }
Beispiel #14
0
        public static Matrix <double> TestObjectDetectionFunction <T>(LossMmod detector,
                                                                      IEnumerable <Matrix <T> > images,
                                                                      IEnumerable <IEnumerable <MModRect> > truthDets,
                                                                      TestBoxOverlap overlapTester       = null,
                                                                      double adjustThreshold             = 0,
                                                                      TestBoxOverlap overlapIgnoreTester = null)
            where T : struct
        {
            if (detector == null)
            {
                throw new ArgumentNullException(nameof(detector));
            }
            if (images == null)
            {
                throw new ArgumentNullException(nameof(images));
            }
            if (truthDets == null)
            {
                throw new ArgumentNullException(nameof(truthDets));
            }

            detector.ThrowIfDisposed();
            images.ThrowIfDisposed();
            truthDets.ThrowIfDisposed();

            var disposeOverlapTester       = overlapTester == null;
            var disposeOverlapIgnoreTester = overlapIgnoreTester == null;

            try
            {
                if (disposeOverlapTester)
                {
                    overlapTester = new TestBoxOverlap();
                }
                if (disposeOverlapIgnoreTester)
                {
                    overlapIgnoreTester = new TestBoxOverlap();
                }

                using (var matrixVector = new StdVector <Matrix <T> >(images))
                    using (var disposer = new EnumerableDisposer <StdVector <MModRect> >(truthDets.Select(r => new StdVector <MModRect>(r))))
                        using (var detsVector = new StdVector <StdVector <MModRect> >(disposer.Collection))
                            using (new EnumerableDisposer <StdVector <MModRect> >(detsVector))
                            {
                                var type = detector.NetworkType;
                                Matrix <T> .TryParse <T>(out var elementTypes);

                                var matrix = images.FirstOrDefault();
                                var ret    = NativeMethods.test_object_detection_function_net(type,
                                                                                              detector.NativePtr,
                                                                                              elementTypes.ToNativeMatrixElementType(),
                                                                                              matrixVector.NativePtr,
                                                                                              matrix.TemplateRows,
                                                                                              matrix.TemplateColumns,
                                                                                              detsVector.NativePtr,
                                                                                              overlapTester.NativePtr,
                                                                                              adjustThreshold,
                                                                                              overlapIgnoreTester.NativePtr,
                                                                                              out var result);
                                switch (ret)
                                {
                                case NativeMethods.ErrorType.MatrixElementTypeNotSupport:
                                    throw new ArgumentException($"{elementTypes} is not supported.");

                                case NativeMethods.ErrorType.DnnNotSupportNetworkType:
                                    throw new NotSupportNetworkTypeException(type);
                                }

                                return(new Matrix <double>(result, 1, 3));
                            }
            }
            finally
            {
                if (disposeOverlapTester)
                {
                    overlapTester?.Dispose();
                }
                if (disposeOverlapIgnoreTester)
                {
                    overlapIgnoreTester?.Dispose();
                }
            }
        }
Beispiel #15
0
        private static void Main(string[] args)
        {
            try
            {
                // In this example we are going to train a face detector based on the
                // small faces dataset in the examples/faces directory.  So the first
                // thing we do is load that dataset.  This means you need to supply the
                // path to this faces folder as a command line argument so we will know
                // where it is.
                if (args.Length != 1)
                {
                    Console.WriteLine("Give the path to the examples/faces directory as the argument to this");
                    Console.WriteLine("program.  For example, if you are in the examples folder then execute ");
                    Console.WriteLine("this program by running: ");
                    Console.WriteLine("   ./dnn_mmod_ex faces");
                    return;
                }

                var facesDirectory = args[0];

                // The faces directory contains a training dataset and a separate
                // testing dataset.  The training data consists of 4 images, each
                // annotated with rectangles that bound each human face.  The idea is
                // to use this training data to learn to identify human faces in new
                // images.
                //
                // Once you have trained an object detector it is always important to
                // test it on data it wasn't trained on.  Therefore, we will also load
                // a separate testing set of 5 images.  Once we have a face detector
                // created from the training data we will see how well it works by
                // running it on the testing images.
                //
                // So here we create the variables that will hold our dataset.
                // images_train will hold the 4 training images and face_boxes_train
                // holds the locations of the faces in the training images.  So for
                // example, the image images_train[0] has the faces given by the
                // rectangles in face_boxes_train[0].
                IList <Matrix <RgbPixel> > imagesTrain;
                IList <Matrix <RgbPixel> > imagesTest;
                IList <IList <MModRect> >  faceBoxesTrain;
                IList <IList <MModRect> >  faceBoxesTest;

                // Now we load the data.  These XML files list the images in each dataset
                // and also contain the positions of the face boxes.  Obviously you can use
                // any kind of input format you like so long as you store the data into
                // images_train and face_boxes_train.  But for convenience dlib comes with
                // tools for creating and loading XML image datasets.  Here you see how to
                // load the data.  To create the XML files you can use the imglab tool which
                // can be found in the tools/imglab folder.  It is a simple graphical tool
                // for labeling objects in images with boxes.  To see how to use it read the
                // tools/imglab/README.txt file.
                Dlib.LoadImageDataset(facesDirectory + "/training.xml", out imagesTrain, out faceBoxesTrain);
                Dlib.LoadImageDataset(facesDirectory + "/testing.xml", out imagesTest, out faceBoxesTest);

                Console.WriteLine($"num training images: {imagesTrain.Count()}");
                Console.WriteLine($"num testing images:  {imagesTest.Count()}");


                // The MMOD algorithm has some options you can set to control its behavior.  However,
                // you can also call the constructor with your training annotations and a "target
                // object size" and it will automatically configure itself in a reasonable way for your
                // problem.  Here we are saying that faces are still recognizably faces when they are
                // 40x40 pixels in size.  You should generally pick the smallest size where this is
                // true.  Based on this information the mmod_options constructor will automatically
                // pick a good sliding window width and height.  It will also automatically set the
                // non-max-suppression parameters to something reasonable.  For further details see the
                // mmod_options documentation.
                using (var options = new MModOptions(faceBoxesTrain, 40, 40))
                {
                    // The detector will automatically decide to use multiple sliding windows if needed.
                    // For the face data, only one is needed however.
                    var detectorWindows = options.DetectorWindows.ToArray();
                    Console.WriteLine($"num detector windows: {detectorWindows.Length}");
                    foreach (var w in detectorWindows)
                    {
                        Console.WriteLine($"detector window width by height: {w.Width} x {w.Height}");
                    }

                    Console.WriteLine($"overlap NMS IOU thresh:             {options.OverlapsNms.GetIouThresh()}");
                    Console.WriteLine($"overlap NMS percent covered thresh: {options.OverlapsNms.GetPercentCoveredThresh()}");

                    // Now we are ready to create our network and trainer.
                    using (var net = new LossMmod(options, 2))
                    {
                        // The MMOD loss requires that the number of filters in the final network layer equal
                        // options.detector_windows.size().  So we set that here as well.
                        using (var subnet = net.GetSubnet())
                            using (var details = subnet.GetLayerDetails())
                            {
                                details.SetNumFilters(detectorWindows.Length);
                                using (var trainer = new DnnTrainer <LossMmod>(net))
                                {
                                    trainer.SetLearningRate(0.1);
                                    trainer.BeVerbose();
                                    trainer.SetSynchronizationFile("mmod_sync", 5 * 60);
                                    trainer.SetIterationsWithoutProgressThreshold(300);

                                    // Now let's train the network.  We are going to use mini-batches of 150
                                    // images.   The images are random crops from our training set (see
                                    // random_cropper_ex.cpp for a discussion of the random_cropper).
                                    IEnumerable <Matrix <RgbPixel> > miniBatchSamples;
                                    //IEnumerable<IEnumerable<RgbPixel>> mini_batch_labels;
                                    IEnumerable <IEnumerable <MModRect> > miniBatchLabels;

                                    using (var cropper = new RandomCropper())
                                        using (var chipDims = new ChipDims(200, 200))
                                        {
                                            cropper.ChipDims = chipDims;
                                            // Usually you want to give the cropper whatever min sizes you passed to the
                                            // mmod_options constructor, which is what we do here.
                                            cropper.SetMinObjectSize(40, 40);

                                            using (var rnd = new Rand())
                                            {
                                                // Run the trainer until the learning rate gets small.  This will probably take several
                                                // hours.
                                                while (trainer.GetLearningRate() >= 1e-4)
                                                {
                                                    cropper.Operator(150, imagesTrain, faceBoxesTrain, out miniBatchSamples, out miniBatchLabels);
                                                    // We can also randomly jitter the colors and that often helps a detector
                                                    // generalize better to new images.
                                                    foreach (var img in miniBatchSamples)
                                                    {
                                                        Dlib.DisturbColors(img, rnd);
                                                    }

                                                    LossMmod.TrainOneStep(trainer, miniBatchSamples, miniBatchLabels);

                                                    miniBatchSamples.DisposeElement();
                                                    miniBatchLabels.DisposeElement();
                                                }
                                                // wait for training threads to stop
                                                trainer.GetNet();
                                                Console.WriteLine("done training");

                                                // Save the network to disk
                                                net.Clean();
                                                LossMmod.Serialize(net, "mmod_network.dat");


                                                // Now that we have a face detector we can test it.  The first statement tests it
                                                // on the training data.  It will print the precision, recall, and then average precision.
                                                // This statement should indicate that the network works perfectly on the
                                                // training data.
                                                using (var matrix = Dlib.TestObjectDetectionFunction(net, imagesTrain, faceBoxesTrain))
                                                    Console.WriteLine($"training results: {matrix}");
                                                // However, to get an idea if it really worked without overfitting we need to run
                                                // it on images it wasn't trained on.  The next line does this.   Happily,
                                                // this statement indicates that the detector finds most of the faces in the
                                                // testing data.
                                                using (var matrix = Dlib.TestObjectDetectionFunction(net, imagesTest, faceBoxesTest))
                                                    Console.WriteLine($"testing results:  {matrix}");


                                                // If you are running many experiments, it's also useful to log the settings used
                                                // during the training experiment.  This statement will print the settings we used to
                                                // the screen.
                                                Console.WriteLine($"{trainer}{cropper}");

                                                // Now lets run the detector on the testing images and look at the outputs.
                                                using (var win = new ImageWindow())
                                                    foreach (var img in imagesTest)
                                                    {
                                                        Dlib.PyramidUp(img);
                                                        var dets = net.Operator(img);
                                                        win.ClearOverlay();
                                                        win.SetImage(img);
                                                        foreach (var d in dets[0])
                                                        {
                                                            win.AddOverlay(d);
                                                        }

                                                        Console.ReadKey();

                                                        foreach (var det in dets)
                                                        {
                                                            foreach (var d in det)
                                                            {
                                                                d.Dispose();
                                                            }
                                                        }
                                                    }

                                                // Now that you finished this example, you should read dnn_mmod_train_find_cars_ex.cpp,
                                                // which is a more advanced example.  It discusses many issues surrounding properly
                                                // setting the MMOD parameters and creating a good training dataset.
                                            }
                                        }
                                }
                            }
                    }

                    detectorWindows.DisposeElement();
                }
            }
            catch (Exception e)
            {
                Console.WriteLine(e);
            }
        }