コード例 #1
0
ファイル: Program.cs プロジェクト: zhuxb711/DlibDotNet
        private static void Main()
        {
            try
            {
                // You can get this file from http://dlib.net/files/mmod_front_and_rear_end_vehicle_detector.dat.bz2
                // This network was produced by the dnn_mmod_train_find_cars_ex.cpp example program.
                // As you can see, the file also includes a separately trained shape_predictor.  To see
                // a generic example of how to train those refer to train_shape_predictor_ex.cpp.
                using (var deserialize = new ProxyDeserialize("mmod_front_and_rear_end_vehicle_detector.dat"))
                    using (var net = LossMmod.Deserialize(deserialize, 1))
                        using (var sp = ShapePredictor.Deserialize(deserialize))
                            using (var img = Dlib.LoadImageAsMatrix <RgbPixel>("mmod_cars_test_image2.jpg"))
                                using (var win = new ImageWindow())
                                {
                                    win.SetImage(img);

                                    // Run the detector on the image and show us the output.
                                    var dets = net.Operator(img).First();
                                    foreach (var d in dets)
                                    {
                                        // We use a shape_predictor to refine the exact shape and location of the detection
                                        // box.  This shape_predictor is trained to simply output the 4 corner points of
                                        // the box.  So all we do is make a rectangle that tightly contains those 4 points
                                        // and that rectangle is our refined detection position.
                                        var fd   = sp.Detect(img, d);
                                        var rect = Rectangle.Empty;
                                        for (var j = 0u; j < fd.Parts; ++j)
                                        {
                                            rect += fd.GetPart(j);
                                        }

                                        if (d.Label == "rear")
                                        {
                                            win.AddOverlay(rect, new RgbPixel(255, 0, 0), d.Label);
                                        }
                                        else
                                        {
                                            win.AddOverlay(rect, new RgbPixel(255, 255, 0), d.Label);
                                        }
                                    }

                                    Console.WriteLine("Hit enter to end program");
                                    Console.ReadKey();
                                }
            }
            catch (ImageLoadException ile)
            {
                Console.WriteLine(ile.Message);
                Console.WriteLine("The test image is located in the examples folder.  So you should run this program from a sub folder so that the relative path is correct.");
            }
            catch (Exception e)
            {
                Console.WriteLine(e);
            }
        }
コード例 #2
0
        private static void Main(string[] args)
        {
            if (args.Length == 0)
            {
                Console.WriteLine("Give some image files as arguments to this program.");
                return;
            }

            using (var win = new ImageWindow())
                using (var detector = FrontalFaceDetector.GetFrontalFaceDetector())
                    foreach (var file in args)
                    {
                        using (var img = Dlib.LoadImage <byte>(file))
                        {
                            Dlib.PyramidUp(img);

                            var dets = detector.Detect(img);
                            Console.WriteLine($"Number of faces detected: {dets.Length}");

                            win.ClearOverlay();
                            win.SetImage(img);
                            win.AddOverlay(dets, new RgbPixel {
                                Red = 255
                            });

                            Console.WriteLine("hit enter to process next frame");
                            Console.ReadKey();
                        }
                    }
        }
コード例 #3
0
ファイル: Program.cs プロジェクト: tnw513/DlibDotNet
        private static void Main(string[] args)
        {
            if (args.Length != 2)
            {
                Console.WriteLine("Call this program like this:");
                Console.WriteLine("./dnn_mmod_face_detection_ex mmod_human_face_detector.dat faces/*.jpg");
                Console.WriteLine("You can get the mmod_human_face_detector.dat file from:");
                Console.WriteLine("http://dlib.net/files/mmod_human_face_detector.dat.bz2");
                return;
            }

            using (var net = DlibDotNet.Dnn.LossMmod.Deserialize(args[0]))
            {
                //image_window win;
                using (var win = new ImageWindow())
                    for (var index = 1; index < args.Length; index++)
                    {
                        using (var tmp = Dlib.LoadImage <RgbPixel>(args[index]))
                            using (var img = new Matrix <RgbPixel>(tmp))
                            {
                                // Upsampling the image will allow us to detect smaller faces but will cause the
                                // program to use more RAM and run longer.
                                while (img.Size < 1800 * 1800)
                                {
                                    Dlib.PyramidUp(img);
                                }

                                // Note that you can process a bunch of images in a std::vector at once and it runs
                                // much faster, since this will form mini-batches of images and therefore get
                                // better parallelism out of your GPU hardware.  However, all the images must be
                                // the same size.  To avoid this requirement on images being the same size we
                                // process them individually in this example.
                                using (var dets = net.Operator(img))
                                    foreach (var det in dets)
                                    {
                                        win.ClearOverlay();
                                        win.SetImage(img);
                                        foreach (var d in det)
                                        {
                                            win.AddOverlay(d);
                                        }
                                    }

                                Console.WriteLine("Hit enter to process the next image.");
                                Console.ReadKey();
                            }
                    }
            }
        }
コード例 #4
0
ファイル: Program.cs プロジェクト: wyd1520/DlibDotNet
        private static void Main(string[] args)
        {
            if (args.Length != 1)
            {
                Console.WriteLine("Call this program like this: ");
                Console.WriteLine("VideoTracking.exe <path of video_frames directory>");
                return;
            }

            var path  = args[0];
            var files = new DirectoryInfo(path).GetFiles("*.jpg").Select(info => info.FullName).ToList();

            files.Sort();

            if (files.Count == 0)
            {
                Console.WriteLine($"No images found in {path}");
                return;
            }

            using (var win = new ImageWindow())
                using (var tracker = new CorrelationTracker())
                {
                    var firstFile = files.First();
                    using (var img = Dlib.LoadImage <byte>(firstFile))
                        using (var rect = DRectangle.CenteredRect(93, 110, 38, 86))
                            tracker.StartTrack(img, rect);

                    foreach (var file in files.GetRange(1, files.Count - 1))
                    {
                        using (var img = Dlib.LoadImage <byte>(file))
                        {
                            tracker.Update(img);

                            win.SetImage(img);
                            win.ClearOverlay();

                            using (var pos = tracker.GetPosition())
                                win.AddOverlay(pos);

                            Console.WriteLine("hit enter to process next frame");
                            Console.ReadKey();
                        }
                    }
                }
        }
コード例 #5
0
        private static void Main(string[] args)
        {
            try
            {
                if (args.Length == 0)
                {
                    Console.WriteLine("Give an image dataset XML file to run this program.");
                    Console.WriteLine("For example, if you are running from the examples folder then run this program by typing");
                    Console.WriteLine("   ./RandomCropper faces/training.xml");
                    return;
                }

                // First lets load a dataset
                IEnumerable <Matrix <RgbPixel> >      images;
                IEnumerable <IEnumerable <MModRect> > boxes;
                Dlib.LoadImageDataset(args[0], out images, out boxes);

                // Here we make our random_cropper.  It has a number of options.
                var cropper = new DlibDotNet.ImageTransforms.RandomCropper();
                // We can tell it how big we want the cropped images to be.
                cropper.ChipDims = new ChipDims(400, 400);
                // Also, when doing cropping, it will map the object annotations from the
                // dataset to the cropped image as well as perform random scale jittering.
                // You can tell it how much scale jittering you would like by saying "please
                // make the objects in the crops have a min and max size of such and such".
                // You do that by calling these two functions.  Here we are saying we want the
                // objects in our crops to be no more than 0.8*400 pixels in height and width.
                cropper.MaxObjectSize = 0.8;
                // And also that they shouldn't be too small. Specifically, each object's smallest
                // dimension (i.e. height or width) should be at least 60 pixels and at least one of
                // the dimensions must be at least 80 pixels.  So the smallest objects the cropper will
                // output will be either 80x60 or 60x80.
                cropper.MinObjectLengthLongDim  = 80;
                cropper.MinObjectLengthShortDim = 60;
                // The cropper can also randomly mirror and rotate crops, which we ask it to
                // perform as well.
                cropper.RandomlyFlip       = true;
                cropper.MaxRotationDegrees = 50;
                // This fraction of crops are from random parts of images, rather than being centered
                // on some object.
                cropper.BackgroundCropsFraction = 0.2;

                // Now ask the cropper to generate a bunch of crops.  The output is stored in
                // crops and crop_boxes.
                IEnumerable <Matrix <RgbPixel> >      crops;
                IEnumerable <IEnumerable <MModRect> > cropBoxes;
                // Make 1000 crops.
                cropper.Operator(1000, images, boxes, out crops, out cropBoxes);

                // Finally, lets look at the results
                var cropList      = crops?.ToArray() ?? new Matrix <RgbPixel> [0];
                var cropBoxesList = cropBoxes?.ToArray() ?? new IEnumerable <MModRect> [0];
                using (var win = new ImageWindow())
                    for (var i = 0; i < cropList.Count(); ++i)
                    {
                        win.ClearOverlay();
                        win.SetImage(cropList[i]);
                        foreach (var b in cropBoxesList[i])
                        {
                            // Note that mmod_rect has an ignore field.  If an object was labeled
                            // ignore in boxes then it will still be labeled as ignore in
                            // crop_boxes.  Moreover, objects that are not well contained within
                            // the crop are also set to ignore.
                            var rect = b.Rect;
                            if (b.Ignore)
                            {
                                win.AddOverlay(rect, new RgbPixel {
                                    Red = 255, Blue = 255
                                });                                                           // draw ignored boxes as orange
                            }
                            else
                            {
                                win.AddOverlay(rect, new RgbPixel {
                                    Red = 255
                                });                                                 // draw other boxes as red
                            }
                        }

                        Console.WriteLine("Hit enter to view the next random crop.");
                        Console.ReadKey();
                    }
            }
            catch (Exception e)
            {
                Console.WriteLine(e);
            }
        }
コード例 #6
0
        private static void Main(string[] args)
        {
            if (args.Length != 1)
            {
                Console.WriteLine("Call this program like this: ");
                Console.WriteLine("VideoTracking.exe <path of video_frames directory>");
                return;
            }

            var path  = args[0];
            var files = new DirectoryInfo(path).GetFiles("*.jpg").Select(info => info.FullName).ToList();



            files.Sort();

            if (files.Count == 0)
            {
                Console.WriteLine($"No images found in {path}");
                return;
            }


            // 定义图像捕捉方式 从摄像头 , 注意 Windows下需要选择 VideoCaptureAPIs.DSHOW
            var cap = new VideoCapture(0, VideoCaptureAPIs.DSHOW);

            // 定义图像捕捉方式 从摄像头 视频文件
            //var cap = new VideoCapture("video.webm");

            //判断捕捉设备是否打开
            if (!cap.IsOpened())
            {
                Console.WriteLine("Unable to connect to camera");
                return;
            }

            Mat temp    = null;
            var tracker = new CorrelationTracker();

            int init = 0;

            //定义显示窗口
            using (var win = new ImageWindow())
            {
                Console.WriteLine("对象追踪程序启动");
                Console.WriteLine("选择命令行为当前窗口,通过按键选择需要追踪的区域Width: [A,Z] Height:[S,X] X:[right,left] Y:[up,down] ,点击Enter开始追踪");
                Console.WriteLine("注意:切换命令行窗口输入法为英文输入状态");
                //选择追踪对象
                while (!win.IsClosed())
                {
                    //获得1帧图片
                    temp = cap.RetrieveMat();// new Mat();


                    if (temp == null)
                    {
                        Console.WriteLine("图像获取错误!");
                        return;
                    }

                    var array = new byte[temp.Width * temp.Height * temp.ElemSize()];
                    Marshal.Copy(temp.Data, array, 0, array.Length);
                    using (var cimg = Dlib.LoadImageData <BgrPixel>(array, (uint)temp.Height, (uint)temp.Width, (uint)(temp.Width * temp.ElemSize())))
                    {
                        init++;
                        if (init > 1)
                        {
                            var KK = Console.ReadKey();
                            if (KK.Key == ConsoleKey.Enter)
                            {
                                Console.WriteLine("开始追踪目标!");

                                //确定 追踪 位置
                                var rect2 = DRectangle.CenteredRect(a_X, a_Y, a_W, a_H);
                                //开始追踪
                                tracker.StartTrack(cimg, rect2);
                                win.SetImage(cimg);
                                win.ClearOverlay();
                                win.AddOverlay(rect2);
                                break;
                            }

                            //选择 追踪区域
                            if (KK.Key == ConsoleKey.RightArrow || KK.Key == ConsoleKey.LeftArrow || KK.Key == ConsoleKey.UpArrow || KK.Key == ConsoleKey.DownArrow || KK.Key == ConsoleKey.A || KK.Key == ConsoleKey.Z || KK.Key == ConsoleKey.S || KK.Key == ConsoleKey.X)
                            {
                                if (KK.Key == ConsoleKey.RightArrow)
                                {
                                    a_X++;
                                    if (a_X > cimg.Rect.Width - a_W)
                                    {
                                        a_X = cimg.Rect.Width - a_W;
                                    }
                                }
                                if (KK.Key == ConsoleKey.LeftArrow)
                                {
                                    a_X--;
                                    if (a_X < 0)
                                    {
                                        a_X = 0;
                                    }
                                }

                                if (KK.Key == ConsoleKey.UpArrow)
                                {
                                    a_Y--;
                                    if (a_Y < 0)
                                    {
                                        a_Y = 0;
                                    }
                                }
                                if (KK.Key == ConsoleKey.DownArrow)
                                {
                                    a_Y++;
                                    if (a_Y > cimg.Rect.Height - a_H)
                                    {
                                        a_Y = cimg.Rect.Height - a_H;
                                    }
                                }

                                if (KK.Key == ConsoleKey.A)
                                {
                                    a_W++;
                                    if (a_W >= cimg.Rect.Width - a_X)
                                    {
                                        a_W = cimg.Rect.Width - a_X;
                                    }
                                }
                                if (KK.Key == ConsoleKey.Z)
                                {
                                    a_W--;
                                    if (a_W < 10)
                                    {
                                        a_W = 10;
                                    }
                                }
                                if (KK.Key == ConsoleKey.S)
                                {
                                    a_H++;
                                    if (a_H > cimg.Rect.Height - a_Y)
                                    {
                                        a_H = cimg.Rect.Height - a_Y;
                                    }
                                }
                                if (KK.Key == ConsoleKey.X)
                                {
                                    a_H--;
                                    if (a_H < 10)
                                    {
                                        a_H = 10;
                                    }
                                }
                            }
                        }

                        var rect = DRectangle.CenteredRect(a_X, a_Y, a_W, a_H);

                        Console.WriteLine("Set RECT:" + a_X + " " + a_Y + " " + a_W + " " + a_H);

                        //显示图片
                        win.SetImage(cimg);
                        win.ClearOverlay();
                        //显示框
                        win.AddOverlay(rect);
                    }
                }

                //选择追踪对象
                while (!win.IsClosed())
                {
                    //获得1帧图片
                    temp = cap.RetrieveMat();// new Mat();


                    if (temp == null)
                    {
                        Console.WriteLine("图像获取错误!");
                        return;
                    }

                    var array = new byte[temp.Width * temp.Height * temp.ElemSize()];
                    Marshal.Copy(temp.Data, array, 0, array.Length);
                    using (var cimg = Dlib.LoadImageData <BgrPixel>(array, (uint)temp.Height, (uint)temp.Width, (uint)(temp.Width * temp.ElemSize())))
                    {
                        //更新追踪图像
                        tracker.Update(cimg);

                        win.SetImage(cimg);
                        win.ClearOverlay();

                        //获得追踪到的目标位置
                        DRectangle rect = tracker.GetPosition();
                        win.AddOverlay(rect);


                        Console.WriteLine("OBJ RECT:" + (int)rect.Left + " " + (int)rect.Top + " " + (int)rect.Width + " " + (int)rect.Height);

                        System.Threading.Thread.Sleep(100);
                    }
                }
            }



            Console.WriteLine("任意键退出");
            Console.ReadKey();
        }
コード例 #7
0
ファイル: FaceLoginController.cs プロジェクト: TrojanOlx/AI
        public async Task <ActionResult> Login([FromBody] InputFaceModel model)
        {
            RequestFaceModel request = new RequestFaceModel()
            {
                Status  = 500,
                Message = null
            };
            var filePath = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "FaceImages", model.user_name);

            if (!Directory.Exists(filePath))
            {
                request.Enum = RequestEnum.Failed;
                Console.WriteLine(request.Message);
                Thread.Sleep(5000);
                return(Ok(request));
            }
            FaceContrast faceContrast = new FaceContrast(filePath);

            VideoCapture cap = null;

            try
            {
                if (model.rmtp_url == "0")
                {
                    cap = new VideoCapture(0);
                }
                else
                {
                    cap = new VideoCapture(model.rmtp_url);
                }


                var flag     = false;
                var faceFlag = false;

                var bioFlag = false;

                QueueFixedLength <double> leftEarQueue  = new QueueFixedLength <double>(10);
                QueueFixedLength <double> rightEarQueue = new QueueFixedLength <double>(10);
                QueueFixedLength <double> mouthQueue    = new QueueFixedLength <double>(20);
                bool leftEarFlag  = false;
                bool rightEarFlag = false;
                bool mouthFlag    = false;
                using (var sp = ShapePredictor.Deserialize(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "ShapeModel", "shape_predictor_5_face_landmarks.dat")))
                    using (var win = new ImageWindow())
                    {
                        // Load face detection and pose estimation models.
                        using (var detector = Dlib.GetFrontalFaceDetector())
                            using (var net = LossMetric.Deserialize(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "ShapeModel", "dlib_face_recognition_resnet_model_v1.dat")))
                                using (var poseModel = ShapePredictor.Deserialize(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "ShapeModel", "shape_predictor_68_face_landmarks.dat")))
                                {
                                    var ti = true;

                                    System.Timers.Timer t = new System.Timers.Timer(30000);
                                    t.Elapsed += new System.Timers.ElapsedEventHandler((object source, System.Timers.ElapsedEventArgs e) =>
                                    {
                                        ti = false;
                                    });

                                    t.AutoReset = false;
                                    t.Enabled   = true;

                                    //抓取和处理帧,直到用户关闭主窗口。
                                    while (/*!win.IsClosed() &&*/ ti)
                                    {
                                        try
                                        {
                                            // Grab a frame
                                            var temp = new Mat();
                                            if (!cap.Read(temp))
                                            {
                                                break;
                                            }

                                            //把OpenCV的Mat变成dlib可以处理的东西。注意
                                            //包装Mat对象,它不复制任何东西。所以cimg只对as有效
                                            //只要温度是有效的。也不要做任何可能导致它的临时工作
                                            //重新分配存储图像的内存,因为这将使cimg
                                            //包含悬空指针。这基本上意味着您不应该修改temp
                                            //使用cimg时。
                                            var array = new byte[temp.Width * temp.Height * temp.ElemSize()];
                                            Marshal.Copy(temp.Data, array, 0, array.Length);
                                            using (var cimg = Dlib.LoadImageData <RgbPixel>(array, (uint)temp.Height, (uint)temp.Width, (uint)(temp.Width * temp.ElemSize())))
                                            {
                                                // Detect faces
                                                var faces = detector.Operator(cimg);
                                                // Find the pose of each face.
                                                var shapes = new List <FullObjectDetection>();
                                                for (var i = 0; i < faces.Length; ++i)
                                                {
                                                    var det = poseModel.Detect(cimg, faces[i]);
                                                    shapes.Add(det);
                                                }

                                                if (shapes.Count > 0)
                                                {
                                                    // 活体检测

                                                    if (!bioFlag)
                                                    {
                                                        bioFlag = BioAssay(shapes[0], ref leftEarQueue, ref rightEarQueue, ref mouthQueue, ref leftEarFlag, ref rightEarFlag, ref mouthFlag);
                                                    }
                                                }


                                                if (!faceFlag)
                                                {
                                                    foreach (var face in faces)
                                                    {
                                                        var shape                   = sp.Detect(cimg, face);
                                                        var faceChipDetail          = Dlib.GetFaceChipDetails(shape, 150, 0.25);
                                                        Matrix <RgbPixel> rgbPixels = new Matrix <RgbPixel>(cimg);
                                                        var faceChip                = Dlib.ExtractImageChip <RgbPixel>(rgbPixels, faceChipDetail);
                                                        var faceDescriptors         = net.Operator(faceChip);
                                                        faceFlag = faceContrast.Contrast(faceDescriptors);
                                                    }
                                                }
                                                Console.WriteLine(model.user_name + ":" + faceFlag);
                                                if (bioFlag && faceFlag)
                                                {
                                                    flag = bioFlag && faceFlag;
                                                    if (flag)
                                                    {
                                                        break;
                                                    }
                                                }

                                                //在屏幕上显示
                                                win.ClearOverlay();
                                                win.SetImage(cimg);
                                                var lines = Dlib.RenderFaceDetections(shapes);
                                                win.AddOverlay(faces, new RgbPixel {
                                                    Red = 72, Green = 118, Blue = 255
                                                });
                                                win.AddOverlay(lines);
                                                foreach (var line in lines)
                                                {
                                                    line.Dispose();
                                                }
                                            }
                                        }
                                        catch (Exception ex)
                                        {
                                            request.Message = ex.ToString();
                                            break;
                                        }
                                    }
                                }
                    }

                if (flag)
                {
                    request.Enum = RequestEnum.Succeed;
                }
                else
                {
                    request.Enum = RequestEnum.Failed;
                }
            }
            catch (Exception ex)
            {
                request.Message = ex.ToString();
            }
            finally
            {
                if (cap != null)
                {
                    cap.Dispose();
                }
            }
            Console.WriteLine(request.Message);
            return(Ok(request));
        }
コード例 #8
0
ファイル: Program.cs プロジェクト: zhuxb711/DlibDotNet
        private static void Main(string[] args)
        {
            try
            {
                // In this example we are going to train a face detector based on the
                // small faces dataset in the examples/faces directory.  So the first
                // thing we do is load that dataset.  This means you need to supply the
                // path to this faces folder as a command line argument so we will know
                // where it is.
                if (args.Length != 1)
                {
                    Console.WriteLine("Give the path to the examples/faces directory as the argument to this");
                    Console.WriteLine("program.  For example, if you are in the examples folder then execute ");
                    Console.WriteLine("this program by running: ");
                    Console.WriteLine("   ./fhog_object_detector_ex faces");
                    Console.WriteLine();
                    return;
                }

                var facesDirectory = args[0];
                // The faces directory contains a training dataset and a separate
                // testing dataset.  The training data consists of 4 images, each
                // annotated with rectangles that bound each human face.  The idea is
                // to use this training data to learn to identify human faces in new
                // images.
                //
                // Once you have trained an object detector it is always important to
                // test it on data it wasn't trained on.  Therefore, we will also load
                // a separate testing set of 5 images.  Once we have a face detector
                // created from the training data we will see how well it works by
                // running it on the testing images.
                //
                // So here we create the variables that will hold our dataset.
                // images_train will hold the 4 training images and face_boxes_train
                // holds the locations of the faces in the training images.  So for
                // example, the image images_train[0] has the faces given by the
                // rectangles in face_boxes_train[0].
                IList <Matrix <byte> >     tmpImagesTrain;
                IList <Matrix <byte> >     tmpImagesTest;
                IList <IList <Rectangle> > tmpFaceBoxesTrain;
                IList <IList <Rectangle> > tmpFaceBoxesTest;

                // Now we load the data.  These XML files list the images in each
                // dataset and also contain the positions of the face boxes.  Obviously
                // you can use any kind of input format you like so long as you store
                // the data into images_train and face_boxes_train.  But for convenience
                // dlib comes with tools for creating and loading XML image dataset
                // files.  Here you see how to load the data.  To create the XML files
                // you can use the imglab tool which can be found in the tools/imglab
                // folder.  It is a simple graphical tool for labeling objects in images
                // with boxes.  To see how to use it read the tools/imglab/README.txt
                // file.
                Dlib.LoadImageDataset(Path.Combine(facesDirectory, "training.xml"), out tmpImagesTrain, out tmpFaceBoxesTrain);
                Dlib.LoadImageDataset(Path.Combine(facesDirectory, "testing.xml"), out tmpImagesTest, out tmpFaceBoxesTest);

                // Now we do a little bit of pre-processing.  This is optional but for
                // this training data it improves the results.  The first thing we do is
                // increase the size of the images by a factor of two.  We do this
                // because it will allow us to detect smaller faces than otherwise would
                // be practical (since the faces are all now twice as big).  Note that,
                // in addition to resizing the images, these functions also make the
                // appropriate adjustments to the face boxes so that they still fall on
                // top of the faces after the images are resized.
                var imageTrain     = new List <Matrix <byte> >(tmpImagesTrain);
                var faceBoxesTrain = new List <IList <Rectangle> >(tmpFaceBoxesTrain);
                Dlib.UpsampleImageDataset(2, imageTrain, faceBoxesTrain);
                var imageTest     = new List <Matrix <byte> >(tmpImagesTest);
                var faceBoxesTest = new List <IList <Rectangle> >(tmpFaceBoxesTest);
                Dlib.UpsampleImageDataset(2, imageTest, faceBoxesTest);

                // Since human faces are generally left-right symmetric we can increase
                // our training dataset by adding mirrored versions of each image back
                // into images_train.  So this next step doubles the size of our
                // training dataset.  Again, this is obviously optional but is useful in
                // many object detection tasks.
                Dlib.AddImageLeftRightFlips(imageTrain, faceBoxesTrain);
                Console.WriteLine($"num training images: {imageTrain.Count()}");
                Console.WriteLine($"num testing images:  {imageTest.Count()}");


                // Finally we get to the training code.  dlib contains a number of
                // object detectors.  This typedef tells it that you want to use the one
                // based on Felzenszwalb's version of the Histogram of Oriented
                // Gradients (commonly called HOG) detector.  The 6 means that you want
                // it to use an image pyramid that downsamples the image at a ratio of
                // 5/6.  Recall that HOG detectors work by creating an image pyramid and
                // then running the detector over each pyramid level in a sliding window
                // fashion.
                using (var scanner = new ScanFHogPyramid <PyramidDown, DefaultFHogFeatureExtractor>(6))
                {
                    // The sliding window detector will be 80 pixels wide and 80 pixels tall.
                    scanner.SetDetectionWindowSize(80, 80);

                    using (var trainer = new StructuralObjectDetectionTrainer <ScanFHogPyramid <PyramidDown, DefaultFHogFeatureExtractor> >(scanner))
                    {
                        // Set this to the number of processing cores on your machine.
                        trainer.SetNumThreads(4);
                        // The trainer is a kind of support vector machine and therefore has the usual SVM
                        // C parameter.  In general, a bigger C encourages it to fit the training data
                        // better but might lead to overfitting.  You must find the best C value
                        // empirically by checking how well the trained detector works on a test set of
                        // images you haven't trained on.  Don't just leave the value set at 1.  Try a few
                        // different C values and see what works best for your data.
                        trainer.SetC(1);
                        // We can tell the trainer to print it's progress to the console if we want.
                        trainer.BeVerbose();
                        // The trainer will run until the "risk gap" is less than 0.01.  Smaller values
                        // make the trainer solve the SVM optimization problem more accurately but will
                        // take longer to train.  For most problems a value in the range of 0.1 to 0.01 is
                        // plenty accurate.  Also, when in verbose mode the risk gap is printed on each
                        // iteration so you can see how close it is to finishing the training.
                        trainer.SetEpsilon(0.01);


                        // Now we run the trainer.  For this example, it should take on the order of 10
                        // seconds to train.
                        var detector = trainer.Train(imageTrain, faceBoxesTrain);

                        // Now that we have a face detector we can test it.  The first statement tests it
                        // on the training data.  It will print the precision, recall, and then average precision.
                        using (var matrix = Dlib.TestObjectDetectionFunction(detector, imageTrain, faceBoxesTrain))
                            Console.WriteLine($"training results: {matrix}");
                        // However, to get an idea if it really worked without overfitting we need to run
                        // it on images it wasn't trained on.  The next line does this.  Happily, we see
                        // that the object detector works perfectly on the testing images.
                        using (var matrix = Dlib.TestObjectDetectionFunction(detector, imageTest, faceBoxesTest))
                            Console.WriteLine($"testing results: {matrix}");

                        // If you have read any papers that use HOG you have probably seen the nice looking
                        // "sticks" visualization of a learned HOG detector.  This next line creates a
                        // window with such a visualization of our detector.  It should look somewhat like
                        // a face.
                        using (var fhog = Dlib.DrawFHog(detector))
                            using (var hogwin = new ImageWindow(fhog, "Learned fHOG detector"))
                            {
                                // Now for the really fun part.  Let's display the testing images on the screen and
                                // show the output of the face detector overlaid on each image.  You will see that
                                // it finds all the faces without false alarming on any non-faces.
                                using (var win = new ImageWindow())
                                    for (var i = 0; i < imageTest.Count; ++i)
                                    {
                                        // Run the detector and get the face detections.
                                        var dets = detector.Operator(imageTest[i]);
                                        win.ClearOverlay();
                                        win.SetImage(imageTest[i]);
                                        win.AddOverlay(dets, new RgbPixel(255, 0, 0));
                                        Console.WriteLine("Hit enter to process the next image...");
                                        Console.ReadKey();
                                        Console.WriteLine("");
                                    }
                            }


                        // Like everything in dlib, you can save your detector to disk using the
                        // serialize() function.
                        detector.Serialize("face_detector.svm");

                        // Then you can recall it using the deserialize() function.
                        using (var tmp = new ScanFHogPyramid <PyramidDown, DefaultFHogFeatureExtractor>(6))
                            using (var detector2 = new ObjectDetector <ScanFHogPyramid <PyramidDown, DefaultFHogFeatureExtractor> >(tmp))
                                detector2.Deserialize("face_detector.svm");



                        // Now let's talk about some optional features of this training tool as well as some
                        // important points you should understand.
                        //
                        // The first thing that should be pointed out is that, since this is a sliding
                        // window classifier, it can't output an arbitrary rectangle as a detection.  In
                        // this example our sliding window is 80 by 80 pixels and is run over an image
                        // pyramid.  This means that it can only output detections that are at least 80 by
                        // 80 pixels in size (recall that this is why we upsampled the images after loading
                        // them).  It also means that the aspect ratio of the outputs is 1.  So if,
                        // for example, you had a box in your training data that was 200 pixels by 10
                        // pixels then it would simply be impossible for the detector to learn to detect
                        // it.  Similarly, if you had a really small box it would be unable to learn to
                        // detect it.
                        //
                        // So the training code performs an input validation check on the training data and
                        // will throw an exception if it detects any boxes that are impossible to detect
                        // given your setting of scanning window size and image pyramid resolution.  You
                        // can use a statement like:
                        //   remove_unobtainable_rectangles(trainer, images_train, face_boxes_train)
                        // to automatically discard these impossible boxes from your training dataset
                        // before running the trainer.  This will avoid getting the "impossible box"
                        // exception.  However, I would recommend you be careful that you are not throwing
                        // away truth boxes you really care about.  The remove_unobtainable_rectangles()
                        // will return the set of removed rectangles so you can visually inspect them and
                        // make sure you are OK that they are being removed.
                        //
                        // Next, note that any location in the images not marked with a truth box is
                        // implicitly treated as a negative example.  This means that when creating
                        // training data it is critical that you label all the objects you want to detect.
                        // So for example, if you are making a face detector then you must mark all the
                        // faces in each image.  However, sometimes there are objects in images you are
                        // unsure about or simply don't care if the detector identifies or not.  For these
                        // objects you can pass in a set of "ignore boxes" as a third argument to the
                        // trainer.train() function.  The trainer will simply disregard any detections that
                        // happen to hit these boxes.
                        //
                        // Another useful thing you can do is evaluate multiple HOG detectors together. The
                        // benefit of this is increased testing speed since it avoids recomputing the HOG
                        // features for each run of the detector.  You do this by storing your detectors
                        // into a std::vector and then invoking evaluate_detectors() like so:
                        var myDetectors = new List <ObjectDetector <ScanFHogPyramid <PyramidDown, DefaultFHogFeatureExtractor> > >();
                        myDetectors.Add(detector);
                        var dect2 = Dlib.EvaluateDetectors(myDetectors, imageTrain[0]);
                        //
                        //
                        // Finally, you can add a nuclear norm regularizer to the SVM trainer.  Doing has
                        // two benefits.  First, it can cause the learned HOG detector to be composed of
                        // separable filters and therefore makes it execute faster when detecting objects.
                        // It can also help with generalization since it tends to make the learned HOG
                        // filters smoother.  To enable this option you call the following function before
                        // you create the trainer object:
                        //    scanner.set_nuclear_norm_regularization_strength(1.0);
                        // The argument determines how important it is to have a small nuclear norm.  A
                        // bigger regularization strength means it is more important.  The smaller the
                        // nuclear norm the smoother and faster the learned HOG filters will be, but if the
                        // regularization strength value is too large then the SVM will not fit the data
                        // well.  This is analogous to giving a C value that is too small.
                        //
                        // You can see how many separable filters are inside your detector like so:
                        Console.WriteLine($"num filters: {Dlib.NumSeparableFilters(detector)}");
                        // You can also control how many filters there are by explicitly thresholding the
                        // singular values of the filters like this:
                        using (var newDetector = Dlib.ThresholdFilterSingularValues(detector, 0.1))
                        {
                        }
                        // That removes filter components with singular values less than 0.1.  The bigger
                        // this number the fewer separable filters you will have and the faster the
                        // detector will run.  However, a large enough threshold will hurt detection
                        // accuracy.
                    }
                }
            }
            catch (Exception e)
            {
                Console.WriteLine(e);
            }
        }
コード例 #9
0
        private static void Main()
        {
            try
            {
                // You can get this file from http://dlib.net/files/mmod_rear_end_vehicle_detector.dat.bz2
                // This network was produced by the dnn_mmod_train_find_cars_ex.cpp example program.
                // As you can see, the file also includes a separately trained shape_predictor.  To see
                // a generic example of how to train those refer to train_shape_predictor_ex.cpp.
                using (var deserialize = new ProxyDeserialize("mmod_rear_end_vehicle_detector.dat"))
                    using (var net = LossMmod.Deserialize(deserialize, 1))
                        using (var sp = ShapePredictor.Deserialize(deserialize))
                            using (var img = Dlib.LoadImageAsMatrix <RgbPixel>("mmod_cars_test_image.jpg"))
                                using (var win = new ImageWindow())
                                {
                                    win.SetImage(img);

                                    // Run the detector on the image and show us the output.
                                    var dets = net.Operator(img).First();
                                    foreach (var d in dets)
                                    {
                                        // We use a shape_predictor to refine the exact shape and location of the detection
                                        // box.  This shape_predictor is trained to simply output the 4 corner points of
                                        // the box.  So all we do is make a rectangle that tightly contains those 4 points
                                        // and that rectangle is our refined detection position.
                                        var fd   = sp.Detect(img, d);
                                        var rect = Rectangle.Empty;
                                        for (var j = 0u; j < fd.Parts; ++j)
                                        {
                                            rect += fd.GetPart(j);
                                        }

                                        win.AddOverlay(rect, new RgbPixel(255, 0, 0));
                                    }



                                    Console.WriteLine("Hit enter to view the intermediate processing steps");
                                    Console.ReadKey();


                                    // Now let's look at how the detector works.  The high level processing steps look like:
                                    //   1. Create an image pyramid and pack the pyramid into one big image.  We call this
                                    //      image the "tiled pyramid".
                                    //   2. Run the tiled pyramid image through the CNN.  The CNN outputs a new image where
                                    //      bright pixels in the output image indicate the presence of cars.
                                    //   3. Find pixels in the CNN's output image with a value > 0.  Those locations are your
                                    //      preliminary car detections.
                                    //   4. Perform non-maximum suppression on the preliminary detections to produce the
                                    //      final output.
                                    //
                                    // We will be plotting the images from steps 1 and 2 so you can visualize what's
                                    // happening.  For the CNN's output image, we will use the jet colormap so that "bright"
                                    // outputs, i.e. pixels with big values, appear in red and "dim" outputs appear as a
                                    // cold blue color.  To do this we pick a range of CNN output values for the color
                                    // mapping.  The specific values don't matter.  They are just selected to give a nice
                                    // looking output image.
                                    const float lower = -2.5f;
                                    const float upper = 0.0f;
                                    Console.WriteLine($"jet color mapping range:  lower={lower}  upper={upper}");



                                    // Create a tiled pyramid image and display it on the screen.
                                    // Get the type of pyramid the CNN used
                                    //using pyramid_type = std::remove_reference < decltype(input_layer(net)) >::type::pyramid_type;
                                    // And tell create_tiled_pyramid to create the pyramid using that pyramid type.
                                    using (var inputLayer = new InputRgbImagePyramid <PyramidDown>(6))
                                    {
                                        net.TryGetInputLayer(inputLayer);

                                        var padding      = inputLayer.GetPyramidPadding();
                                        var outerPadding = inputLayer.GetPyramidOuterPadding();
                                        Dlib.CreateTiledPyramid <RgbPixel, PyramidDown>(img,
                                                                                        padding,
                                                                                        outerPadding,
                                                                                        6,
                                                                                        out var tiledImg,
                                                                                        out var rects);

                                        using (var winpyr = new ImageWindow(tiledImg, "Tiled pyramid"))
                                        {
                                            // This CNN detector represents a sliding window detector with 3 sliding windows.  Each
                                            // of the 3 windows has a different aspect ratio, allowing it to find vehicles which
                                            // are either tall and skinny, squarish, or short and wide.  The aspect ratio of a
                                            // detection is determined by which channel in the output image triggers the detection.
                                            // Here we are just going to max pool the channels together to get one final image for
                                            // our display.  In this image, a pixel will be bright if any of the sliding window
                                            // detectors thinks there is a car at that location.
                                            using (var subnet = net.GetSubnet())
                                            {
                                                var output = subnet.Output;
                                                Console.WriteLine($"Number of channels in final tensor image: {output.K}");
                                                var networkOutput = Dlib.ImagePlane(output);
                                                for (var k = 1; k < output.K; k++)
                                                {
                                                    using (var tmpNetworkOutput = Dlib.ImagePlane(output, 0, k))
                                                    {
                                                        var maxPointWise = Dlib.MaxPointWise(networkOutput, tmpNetworkOutput);
                                                        networkOutput.Dispose();
                                                        networkOutput = maxPointWise;
                                                    }
                                                }

                                                // We will also upsample the CNN's output image.  The CNN we defined has an 8x
                                                // downsampling layer at the beginning. In the code below we are going to overlay this
                                                // CNN output image on top of the raw input image.  To make that look nice it helps to
                                                // upsample the CNN output image back to the same resolution as the input image, which
                                                // we do here.
                                                var networkOutputScale = img.Columns / (double)networkOutput.Columns;
                                                Dlib.ResizeImage(networkOutput, networkOutputScale);


                                                // Display the network's output as a color image.
                                                using (var jet = Dlib.Jet(networkOutput, upper, lower))
                                                    using (var winOutput = new ImageWindow(jet, "Output tensor from the network"))
                                                    {
                                                        // Also, overlay network_output on top of the tiled image pyramid and display it.
                                                        for (var r = 0; r < tiledImg.Rows; ++r)
                                                        {
                                                            for (var c = 0; c < tiledImg.Columns; ++c)
                                                            {
                                                                var tmp = new DPoint(c, r);
                                                                tmp = Dlib.InputTensorToOutputTensor(net, tmp);
                                                                var dp = networkOutputScale * tmp;
                                                                tmp = new DPoint((int)dp.X, (int)dp.Y);
                                                                if (Dlib.GetRect(networkOutput).Contains((int)tmp.X, (int)tmp.Y))
                                                                {
                                                                    var val = networkOutput[(int)tmp.Y, (int)tmp.X];

                                                                    // alpha blend the network output pixel with the RGB image to make our
                                                                    // overlay.
                                                                    var p = new RgbAlphaPixel();
                                                                    Dlib.AssignPixel(ref p, Dlib.ColormapJet(val, lower, upper));
                                                                    p.Alpha = 120;

                                                                    var rgb = new RgbPixel();
                                                                    Dlib.AssignPixel(ref rgb, p);
                                                                    tiledImg[r, c] = rgb;
                                                                }
                                                            }
                                                        }

                                                        // If you look at this image you can see that the vehicles have bright red blobs on
                                                        // them.  That's the CNN saying "there is a car here!".  You will also notice there is
                                                        // a certain scale at which it finds cars.  They have to be not too big or too small,
                                                        // which is why we have an image pyramid.  The pyramid allows us to find cars of all
                                                        // scales.
                                                        using (var winPyrOverlay = new ImageWindow(tiledImg, "Detection scores on image pyramid"))
                                                        {
                                                            // Finally, we can collapse the pyramid back into the original image.  The CNN doesn't
                                                            // actually do this step, since it's enough to threshold the tiled pyramid image to get
                                                            // the detections.  However, it makes a nice visualization and clearly indicates that
                                                            // the detector is firing for all the cars.
                                                            using (var collapsed = new Matrix <float>(img.Rows, img.Columns))
                                                                using (var inputTensor = new ResizableTensor())
                                                                {
                                                                    inputLayer.ToTensor(img, 1, inputTensor);
                                                                    for (var r = 0; r < collapsed.Rows; ++r)
                                                                    {
                                                                        for (var c = 0; c < collapsed.Columns; ++c)
                                                                        {
                                                                            // Loop over a bunch of scale values and look up what part of network_output
                                                                            // corresponds to the point(c,r) in the original image, then take the max
                                                                            // detection score over all the scales and save it at pixel point(c,r).
                                                                            var maxScore = -1e30f;
                                                                            for (double scale = 1; scale > 0.2; scale *= 5.0 / 6.0)
                                                                            {
                                                                                // Map from input image coordinates to tiled pyramid coordinates.
                                                                                var tensorSpace = inputLayer.ImageSpaceToTensorSpace(inputTensor, scale, new DRectangle(new DPoint(c, r)));
                                                                                var tmp         = tensorSpace.Center;

                                                                                // Now map from pyramid coordinates to network_output coordinates.
                                                                                var dp = networkOutputScale * Dlib.InputTensorToOutputTensor(net, tmp);
                                                                                tmp = new DPoint((int)dp.X, (int)dp.Y);

                                                                                if (Dlib.GetRect(networkOutput).Contains((int)tmp.X, (int)tmp.Y))
                                                                                {
                                                                                    var val = networkOutput[(int)tmp.Y, (int)tmp.X];
                                                                                    if (val > maxScore)
                                                                                    {
                                                                                        maxScore = val;
                                                                                    }
                                                                                }
                                                                            }

                                                                            collapsed[r, c] = maxScore;

                                                                            // Also blend the scores into the original input image so we can view it as
                                                                            // an overlay on the cars.
                                                                            var p = new RgbAlphaPixel();
                                                                            Dlib.AssignPixel(ref p, Dlib.ColormapJet(maxScore, lower, upper));
                                                                            p.Alpha = 120;

                                                                            var rgb = new RgbPixel();
                                                                            Dlib.AssignPixel(ref rgb, p);
                                                                            img[r, c] = rgb;
                                                                        }
                                                                    }

                                                                    using (var jet2 = Dlib.Jet(collapsed, upper, lower))
                                                                        using (var winCollapsed = new ImageWindow(jet2, "Collapsed output tensor from the network"))
                                                                            using (var winImgAndSal = new ImageWindow(img, "Collapsed detection scores on raw image"))
                                                                            {
                                                                                Console.WriteLine("Hit enter to end program");
                                                                                Console.ReadKey();
                                                                            }
                                                                }
                                                        }
                                                    }
                                            }
                                        }
                                    }
                                }
            }
            catch (Exception e)
            {
                Console.WriteLine(e);
            }
        }
コード例 #10
0
        private static void Main()
        {
            using (var img = new Array2D <byte>(400, 400))
                using (var ht = new DlibDotNet.HoughTransform(300))
                    using (var win = new ImageWindow())
                        using (var win2 = new ImageWindow())
                        {
                            var angle1 = 0d;
                            var angle2 = 0d;

                            while (true)
                            {
                                angle1 += Math.PI / 130;
                                angle2 += Math.PI / 400;

                                var rect = img.Rect;
                                var cent = rect.Center;
                                var arc  = Point.Rotate(cent, cent + new Point(90, 0), angle1 * 180 / Math.PI);
                                var tmp2 = arc + new Point(500, 0);
                                var tmp3 = arc - new Point(500, 0);
                                var l    = Point.Rotate(arc, tmp2, angle2 * 180 / Math.PI);
                                var r    = Point.Rotate(arc, tmp3, angle2 * 180 / Math.PI);

                                Dlib.AssignAllPixels(img, 0);
                                Dlib.DrawLine(img, l, r, 255);

                                using (var himg = new Array2D <int>())
                                {
                                    var offset = new Point(50, 50);
                                    var hrect  = Dlib.GetRect(ht);
                                    var box    = Rectangle.Translate(hrect, offset);

                                    // Now let's compute the hough transform for a subwindow in the image.  In
                                    // particular, we run it on the 300x300 subwindow with an upper left corner at the
                                    // pixel point(50,50).  The output is stored in himg.
                                    ht.Operator(img, box, himg);

                                    // Now that we have the transformed image, the Hough image pixel with the largest
                                    // value should indicate where the line is.  So we find the coordinates of the
                                    // largest pixel:
                                    using (var mat = Dlib.Mat(himg))
                                    {
                                        var p = Dlib.MaxPoint(mat);

                                        // And then ask the ht object for the line segment in the original image that
                                        // corresponds to this point in Hough transform space.
                                        var line = ht.GetLine(p);

                                        // Finally, let's display all these things on the screen.  We copy the original
                                        // input image into a color image and then draw the detected line on top in red.
                                        using (var temp = new Array2D <RgbPixel>())
                                        {
                                            Dlib.AssignImage(img, temp);

                                            var p1 = line.Item1 + offset;
                                            var p2 = line.Item2 + offset;

                                            Dlib.DrawLine(temp, p1, p2, new RgbPixel
                                            {
                                                Red = 255
                                            });
                                            win.ClearOverlay();
                                            win.SetImage(temp);

                                            // Also show the subwindow we ran the Hough transform on as a green box.  You will
                                            // see that the detected line is exactly contained within this box and also
                                            // overlaps the original line.
                                            win.AddOverlay(box, new RgbPixel
                                            {
                                                Green = 255
                                            });

                                            using (var jet = Dlib.Jet(himg))
                                                win2.SetImage(jet);
                                        }
                                    }
                                }
                            }
                        }
        }
コード例 #11
0
ファイル: Program.cs プロジェクト: TrojanOlx/AI
        private static void Main()
        {
            try
            {
                //var cap = new VideoCapture(0);
                //var cap = new VideoCapture("https://js.live-play.acgvideo.com/live-js/890069/live_30947419_1716018.flv?wsSecret=2cee8a379a871fa8dbf714ba9d16e8a4&wsTime=1548240723&trid=4f64a0ae5e2444938cfdd109a54c6e1c&sig=no&platform=web&pSession=yR3bsQk1-SCY4-4QGi-K7EG-AsbTiwbX7tZF");
                var cap = new VideoCapture(0);
                if (!cap.IsOpened())
                {
                    Console.WriteLine("Unable to connect to camera");
                    return;
                }

                using (var win = new ImageWindow())
                {
                    // Load face detection and pose estimation models.
                    using (var detector = Dlib.GetFrontalFaceDetector())
                        using (var poseModel = ShapePredictor.Deserialize("shape_predictor_68_face_landmarks.dat"))
                        {
                            //抓取和处理帧,直到用户关闭主窗口。
                            while (!win.IsClosed())
                            {
                                // Grab a frame
                                var temp = new Mat();
                                if (!cap.Read(temp))
                                {
                                    break;
                                }

                                //把OpenCV的Mat变成dlib可以处理的东西。注意
                                //包装Mat对象,它不复制任何东西。所以cimg只对as有效
                                //只要温度是有效的。也不要做任何可能导致它的临时工作
                                //重新分配存储图像的内存,因为这将使cimg
                                //包含悬空指针。这基本上意味着您不应该修改temp
                                //使用cimg时。
                                var array = new byte[temp.Width * temp.Height * temp.ElemSize()];
                                Marshal.Copy(temp.Data, array, 0, array.Length);
                                using (var cimg = Dlib.LoadImageData <RgbPixel>(array, (uint)temp.Height, (uint)temp.Width, (uint)(temp.Width * temp.ElemSize())))
                                {
                                    // Detect faces
                                    var faces = detector.Operator(cimg);
                                    // Find the pose of each face.
                                    var shapes = new List <FullObjectDetection>();
                                    for (var i = 0; i < faces.Length; ++i)
                                    {
                                        var det = poseModel.Detect(cimg, faces[i]);
                                        Console.WriteLine(faces[i].Left);
                                        shapes.Add(det);
                                    }

                                    //在屏幕上显示
                                    win.ClearOverlay();
                                    win.SetImage(cimg);
                                    var lines = Dlib.RenderFaceDetections(shapes);
                                    win.AddOverlay(faces, new RgbPixel {
                                        Red = 255
                                    });
                                    win.AddOverlay(lines);
                                    foreach (var line in lines)
                                    {
                                        line.Dispose();
                                    }
                                }
                            }
                        }
                }
            }
            //catch (serialization_error&e)
            //{
            //    cout << "You need dlib's default face landmarking model file to run this example." << endl;
            //    cout << "You can get it from the following URL: " << endl;
            //    cout << "   http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2" << endl;
            //    cout << endl << e.what() << endl;
            //}
            catch (Exception e)
            {
                Console.WriteLine(e.Message);
            }
        }
コード例 #12
0
ファイル: Program.cs プロジェクト: zhuxb711/DlibDotNet
        private static void Main(string[] args)
        {
            try
            {
                if (args.Length != 2)
                {
                    Console.WriteLine("Call this program like this:");
                    Console.WriteLine("./dnn_mmod_dog_hipsterizer mmod_dog_hipsterizer.dat faces/dogs.jpg");
                    Console.WriteLine("You can get the mmod_dog_hipsterizer.dat file from:");
                    Console.WriteLine("http://dlib.net/files/mmod_dog_hipsterizer.dat.bz2");
                    return;
                }

                // load the models as well as glasses and mustache.
                using (var deserialize = new ProxyDeserialize(args[0]))
                    using (var net = LossMmod.Deserialize(deserialize))
                        using (var sp = ShapePredictor.Deserialize(deserialize))
                            using (var glasses = Matrix <RgbAlphaPixel> .Deserialize(deserialize))
                                using (var mustache = Matrix <RgbAlphaPixel> .Deserialize(deserialize))
                                {
                                    Dlib.PyramidUp(glasses);
                                    Dlib.PyramidUp(mustache);

                                    using (var win1 = new ImageWindow(glasses))
                                        using (var win2 = new ImageWindow(mustache))
                                            using (var winWireframe = new ImageWindow())
                                                using (var winHipster = new ImageWindow())
                                                {
                                                    // Now process each image, find dogs, and hipsterize them by drawing glasses and a
                                                    // mustache on each dog :)
                                                    for (var i = 1; i < args.Length; ++i)
                                                    {
                                                        using (var img = Dlib.LoadImageAsMatrix <RgbPixel>(args[i]))
                                                        {
                                                            // Upsampling the image will allow us to find smaller dog faces but will use more
                                                            // computational resources.
                                                            //pyramid_up(img);
                                                            var dets = net.Operator(img).First();
                                                            winWireframe.ClearOverlay();
                                                            winWireframe.SetImage(img);

                                                            // We will also draw a wireframe on each dog's face so you can see where the
                                                            // shape_predictor is identifying face landmarks.
                                                            var lines = new List <ImageWindow.OverlayLine>();
                                                            foreach (var d in dets)
                                                            {
                                                                // get the landmarks for this dog's face
                                                                var shape = sp.Detect(img, d.Rect);

                                                                var color    = new RgbPixel(0, 255, 0);
                                                                var top      = shape.GetPart(0);
                                                                var leftEar  = shape.GetPart(1);
                                                                var leftEye  = shape.GetPart(2);
                                                                var nose     = shape.GetPart(3);
                                                                var rightEar = shape.GetPart(4);
                                                                var rightEye = shape.GetPart(5);

                                                                // The locations of the left and right ends of the mustache.
                                                                var leftMustache  = 1.3 * (leftEye - rightEye) / 2 + nose;
                                                                var rightMustache = 1.3 * (rightEye - leftEye) / 2 + nose;

                                                                // Draw the glasses onto the image.
                                                                var from = new[]
                                                                {
                                                                    2 * new Point(176, 36), 2 * new Point(59, 35)
                                                                };
                                                                var to = new[]
                                                                {
                                                                    leftEye, rightEye
                                                                };
                                                                using (var transform = Dlib.FindSimilarityTransform(from, to))
                                                                    for (uint r = 0, nr = (uint)glasses.Rows; r < nr; ++r)
                                                                    {
                                                                        for (uint c = 0, nc = (uint)glasses.Columns; c < nc; ++c)
                                                                        {
                                                                            var p = (Point)transform.Operator(new DPoint(c, r));
                                                                            if (Dlib.GetRect(img).Contains(p))
                                                                            {
                                                                                var rgb = img[p.Y, p.X];
                                                                                Dlib.AssignPixel(ref rgb, glasses[(int)r, (int)c]);
                                                                                img[p.Y, p.X] = rgb;
                                                                            }
                                                                        }
                                                                    }

                                                                // Draw the mustache onto the image right under the dog's nose.
                                                                var mustacheRect = Dlib.GetRect(mustache);
                                                                from = new[]
                                                                {
                                                                    mustacheRect.TopLeft, mustacheRect.TopRight
                                                                };
                                                                to = new[]
                                                                {
                                                                    rightMustache, leftMustache
                                                                };
                                                                using (var transform = Dlib.FindSimilarityTransform(from, to))
                                                                    for (uint r = 0, nr = (uint)mustache.Rows; r < nr; ++r)
                                                                    {
                                                                        for (uint c = 0, nc = (uint)mustache.Columns; c < nc; ++c)
                                                                        {
                                                                            var p = (Point)transform.Operator(new DPoint(c, r));
                                                                            if (Dlib.GetRect(img).Contains(p))
                                                                            {
                                                                                var rgb = img[p.Y, p.X];
                                                                                Dlib.AssignPixel(ref rgb, mustache[(int)r, (int)c]);
                                                                                img[p.Y, p.X] = rgb;
                                                                            }
                                                                        }
                                                                    }

                                                                // Record the lines needed for the face wire frame.
                                                                lines.Add(new ImageWindow.OverlayLine(leftEye, nose, color));
                                                                lines.Add(new ImageWindow.OverlayLine(nose, rightEye, color));
                                                                lines.Add(new ImageWindow.OverlayLine(rightEye, leftEye, color));
                                                                lines.Add(new ImageWindow.OverlayLine(rightEye, rightEar, color));
                                                                lines.Add(new ImageWindow.OverlayLine(rightEar, top, color));
                                                                lines.Add(new ImageWindow.OverlayLine(top, leftEar, color));
                                                                lines.Add(new ImageWindow.OverlayLine(leftEar, leftEye, color));

                                                                winWireframe.AddOverlay(lines);
                                                                winHipster.SetImage(img);
                                                            }

                                                            Console.WriteLine("Hit enter to process the next image.");
                                                            Console.ReadKey();
                                                        }
                                                    }
                                                }
                                }
            }
            catch (Exception e)
            {
                Console.WriteLine(e);
            }
        }
コード例 #13
0
        private static void Main()
        {
            try
            {
                var cap = new VideoCapture(0);
                //var cap = new VideoCapture("20090124_WeeklyAddress.ogv.360p.webm");
                if (!cap.IsOpened())
                {
                    Console.WriteLine("Unable to connect to camera");
                    return;
                }

                using (var win = new ImageWindow())
                {
                    // Load face detection and pose estimation models.
                    using (var detector = Dlib.GetFrontalFaceDetector())
                        using (var poseModel = ShapePredictor.Deserialize("shape_predictor_68_face_landmarks.dat"))
                        {
                            // Grab and process frames until the main window is closed by the user.
                            while (!win.IsClosed())
                            {
                                // Grab a frame
                                var temp = new Mat();
                                if (!cap.Read(temp))
                                {
                                    break;
                                }

                                // Turn OpenCV's Mat into something dlib can deal with.  Note that this just
                                // wraps the Mat object, it doesn't copy anything.  So cimg is only valid as
                                // long as temp is valid.  Also don't do anything to temp that would cause it
                                // to reallocate the memory which stores the image as that will make cimg
                                // contain dangling pointers.  This basically means you shouldn't modify temp
                                // while using cimg.
                                var array = new byte[temp.Width * temp.Height * temp.ElemSize()];
                                Marshal.Copy(temp.Data, array, 0, array.Length);
                                using (var cimg = Dlib.LoadImageData <RgbPixel>(array, (uint)temp.Height, (uint)temp.Width, (uint)(temp.Width * temp.ElemSize())))
                                {
                                    // Detect faces
                                    var faces = detector.Detect(cimg);
                                    // Find the pose of each face.
                                    var shapes = new List <FullObjectDetection>();
                                    for (var i = 0; i < faces.Length; ++i)
                                    {
                                        var det = poseModel.Detect(cimg, faces[i]);
                                        shapes.Add(det);
                                    }

                                    // Display it all on the screen
                                    win.ClearOverlay();
                                    win.SetImage(cimg);
                                    var lines = Dlib.RenderFaceDetections(shapes);
                                    win.AddOverlay(lines);

                                    foreach (var line in lines)
                                    {
                                        line.Dispose();
                                    }
                                }
                            }
                        }
                }
            }
            //catch (serialization_error&e)
            //{
            //    cout << "You need dlib's default face landmarking model file to run this example." << endl;
            //    cout << "You can get it from the following URL: " << endl;
            //    cout << "   http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2" << endl;
            //    cout << endl << e.what() << endl;
            //}
            catch (Exception e)
            {
                Console.WriteLine(e.Message);
            }
        }
コード例 #14
0
        private static void Main()
        {
            try
            {
                // 定义图像捕捉方式 从摄像头 , 注意 Windows下需要选择 VideoCaptureAPIs.DSHOW
                var cap = new VideoCapture(0, VideoCaptureAPIs.DSHOW);

                // 定义图像捕捉方式 从摄像头 视频文件
                //var cap = new VideoCapture("video.webm");

                //判断捕捉设备是否打开
                if (!cap.IsOpened())
                {
                    Console.WriteLine("Unable to connect to camera");
                    return;
                }

                Mat temp = null;

                //定义显示窗口
                using (var win = new ImageWindow())
                {
                    //读取人脸检测和标注模型
                    using (var detector = Dlib.GetFrontalFaceDetector())
                        using (var poseModel = ShapePredictor.Deserialize("shape_predictor_68_face_landmarks.dat"))
                        {
                            // 主窗口是否关闭
                            while (!win.IsClosed())
                            {
                                //System.Threading.Thread.Sleep(100);
                                //获得1帧图片
                                temp = cap.RetrieveMat();// new Mat();


                                if (temp == null)
                                {
                                    break;
                                }


                                //将 OPENCV 图像数据 转换为 DILB 图像格式
                                var array = new byte[temp.Width * temp.Height * temp.ElemSize()];
                                Marshal.Copy(temp.Data, array, 0, array.Length);
                                using (var cimg = Dlib.LoadImageData <BgrPixel>(array, (uint)temp.Height, (uint)temp.Width, (uint)(temp.Width * temp.ElemSize())))
                                {
                                    // 人脸检测
                                    var faces = detector.Operator(cimg);
                                    //标注人脸
                                    var shapes = new List <FullObjectDetection>();
                                    for (var i = 0; i < faces.Length; ++i)
                                    {
                                        var det = poseModel.Detect(cimg, faces[i]);
                                        shapes.Add(det);
                                    }

                                    //显示
                                    win.ClearOverlay();
                                    win.SetImage(cimg);
                                    var lines = Dlib.RenderFaceDetections(shapes);
                                    win.AddOverlay(lines);

                                    foreach (var line in lines)
                                    {
                                        line.Dispose();
                                    }
                                }
                            }
                        }
                }
            }
            //catch (serialization_error&e)
            //{

            //    cout << "需要下载识别模型   http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2" << endl;
            //    cout << endl << e.what() << endl;
            //}
            catch (Exception e)
            {
                Console.WriteLine(e.Message);
            }
        }
コード例 #15
0
        private static void Main(string[] args)
        {
            if (args.Length != 1)
            {
                Console.WriteLine("Run this example by invoking it like this: ");
                Console.WriteLine("   ./DnnFaceRecognition faces/bald_guys.jpg");
                Console.WriteLine("You will also need to get the face landmarking model file as well as ");
                Console.WriteLine("the face recognition model file.  Download and then decompress these files from: ");
                Console.WriteLine("http://dlib.net/files/shape_predictor_5_face_landmarks.dat.bz2");
                Console.WriteLine("http://dlib.net/files/dlib_face_recognition_resnet_model_v1.dat.bz2");
                return;
            }

            // The first thing we are going to do is load all our models.  First, since we need to
            // find faces in the image we will need a face detector:
            using (var detector = FrontalFaceDetector.GetFrontalFaceDetector())
                // We will also use a face landmarking model to align faces to a standard pose:  (see face_landmark_detection_ex.cpp for an introduction)
                using (var sp = new ShapePredictor("shape_predictor_5_face_landmarks.dat"))
                    // And finally we load the DNN responsible for face recognition.
                    using (var net = DlibDotNet.Dnn.LossMetric.Deserialize("dlib_face_recognition_resnet_model_v1.dat"))

                        using (var img = Dlib.LoadImage <RgbPixel>(args[0]))
                            using (var mat = new Matrix <RgbPixel>(img))

                                // Display the raw image on the screen
                                using (var win = new ImageWindow(img))
                                {
                                    // Run the face detector on the image of our action heroes, and for each face extract a
                                    // copy that has been normalized to 150x150 pixels in size and appropriately rotated
                                    // and centered.
                                    var faces = new List <Matrix <RgbPixel> >();
                                    foreach (var face in detector.Detect(img))
                                    {
                                        var shape          = sp.Detect(img, face);
                                        var faceChipDetail = Dlib.GetFaceChipDetails(shape, 150, 0.25);
                                        var faceChip       = Dlib.ExtractImageChip <RgbPixel>(mat, faceChipDetail);

                                        //faces.Add(move(face_chip));
                                        faces.Add(faceChip);

                                        // Also put some boxes on the faces so we can see that the detector is finding
                                        // them.
                                        win.AddOverlay(face);
                                    }

                                    if (!faces.Any())
                                    {
                                        Console.WriteLine("No faces found in image!");
                                        return;
                                    }

                                    // This call asks the DNN to convert each face image in faces into a 128D vector.
                                    // In this 128D vector space, images from the same person will be close to each other
                                    // but vectors from different people will be far apart.  So we can use these vectors to
                                    // identify if a pair of images are from the same person or from different people.
                                    var faceDescriptors = net.Operator(faces);

                                    // In particular, one simple thing we can do is face clustering.  This next bit of code
                                    // creates a graph of connected faces and then uses the Chinese whispers graph clustering
                                    // algorithm to identify how many people there are and which faces belong to whom.
                                    var edges = new List <SamplePair>();
                                    for (uint i = 0; i < faceDescriptors.Count; ++i)
                                    {
                                        for (var j = i; j < faceDescriptors.Count; ++j)
                                        {
                                            // Faces are connected in the graph if they are close enough.  Here we check if
                                            // the distance between two face descriptors is less than 0.6, which is the
                                            // decision threshold the network was trained to use.  Although you can
                                            // certainly use any other threshold you find useful.
                                            var diff = faceDescriptors[i] - faceDescriptors[j];
                                            if (Dlib.Length(diff) < 0.6)
                                            {
                                                edges.Add(new SamplePair(i, j));
                                            }
                                        }
                                    }

                                    Dlib.ChineseWhispers(edges, 100, out var numClusters, out var labels);

                                    // This will correctly indicate that there are 4 people in the image.
                                    Console.WriteLine($"number of people found in the image: {numClusters}");

                                    // Now let's display the face clustering results on the screen.  You will see that it
                                    // correctly grouped all the faces.
                                    var winClusters = new List <ImageWindow>();
                                    for (var i = 0; i < numClusters; i++)
                                    {
                                        winClusters.Add(new ImageWindow());
                                    }
                                    var tileImages = new List <Matrix <RgbPixel> >();
                                    for (var clusterId = 0ul; clusterId < numClusters; ++clusterId)
                                    {
                                        var temp = new List <Matrix <RgbPixel> >();
                                        for (var j = 0; j < labels.Length; ++j)
                                        {
                                            if (clusterId == labels[j])
                                            {
                                                temp.Add(faces[j]);
                                            }
                                        }

                                        winClusters[(int)clusterId].Title = $"face cluster {clusterId}";
                                        var tileImage = Dlib.TileImages(temp);
                                        tileImages.Add(tileImage);
                                        winClusters[(int)clusterId].SetImage(tileImage);
                                    }

                                    // Finally, let's print one of the face descriptors to the screen.
                                    using (var trans = Dlib.Trans(faceDescriptors[0]))
                                    {
                                        Console.WriteLine($"face descriptor for one face: {trans}");

                                        // It should also be noted that face recognition accuracy can be improved if jittering
                                        // is used when creating face descriptors.  In particular, to get 99.38% on the LFW
                                        // benchmark you need to use the jitter_image() routine to compute the descriptors,
                                        // like so:
                                        var jitterImages = JitterImage(faces[0]).ToArray();
                                        var ret          = net.Operator(jitterImages);
                                        using (var m = Dlib.Mat(ret))
                                            using (var faceDescriptor = Dlib.Mean <float>(m))
                                                using (var t = Dlib.Trans(faceDescriptor))
                                                {
                                                    Console.WriteLine($"jittered face descriptor for one face: {t}");

                                                    // If you use the model without jittering, as we did when clustering the bald guys, it
                                                    // gets an accuracy of 99.13% on the LFW benchmark.  So jittering makes the whole
                                                    // procedure a little more accurate but makes face descriptor calculation slower.

                                                    Console.WriteLine("hit enter to terminate");
                                                    Console.ReadKey();

                                                    foreach (var jitterImage in jitterImages)
                                                    {
                                                        jitterImage.Dispose();
                                                    }

                                                    foreach (var tileImage in tileImages)
                                                    {
                                                        tileImage.Dispose();
                                                    }

                                                    foreach (var edge in edges)
                                                    {
                                                        edge.Dispose();
                                                    }

                                                    foreach (var descriptor in faceDescriptors)
                                                    {
                                                        descriptor.Dispose();
                                                    }

                                                    foreach (var face in faces)
                                                    {
                                                        face.Dispose();
                                                    }
                                                }
                                    }
                                }
        }
コード例 #16
0
        public void AddOverlay()
        {
            if (!this.CanGuiDebug)
            {
                Console.WriteLine("Build and run as Release mode if you wanna show Gui!!");
                return;
            }

            var path  = this.GetDataFile("Lenna.bmp");
            var tests = new[]
            {
                new { Type = ImageTypes.HsiPixel, ExpectResult = true },
                new { Type = ImageTypes.LabPixel, ExpectResult = true },
                new { Type = ImageTypes.BgrPixel, ExpectResult = true },
                new { Type = ImageTypes.RgbPixel, ExpectResult = true },
                new { Type = ImageTypes.RgbAlphaPixel, ExpectResult = true },
                new { Type = ImageTypes.UInt8, ExpectResult = true },
                new { Type = ImageTypes.UInt16, ExpectResult = true },
                new { Type = ImageTypes.UInt32, ExpectResult = true },
                new { Type = ImageTypes.Int8, ExpectResult = true },
                new { Type = ImageTypes.Int16, ExpectResult = true },
                new { Type = ImageTypes.Int32, ExpectResult = true },
                new { Type = ImageTypes.Float, ExpectResult = true },
                new { Type = ImageTypes.Double, ExpectResult = true }
            };

            foreach (var test in tests)
            {
                try
                {
                    var rect  = new Rectangle(10, 10, 100, 100);
                    var array = Array2D.Array2DTest.CreateArray2DHelp(test.Type, path.FullName);
                    using (var window = new ImageWindow(array))
                    {
                        switch (test.Type)
                        {
                        case ImageTypes.UInt8:
                            window.AddOverlay(rect, (byte)0, test.Type.ToString());
                            break;

                        case ImageTypes.UInt16:
                            window.AddOverlay(rect, (ushort)0, test.Type.ToString());
                            break;

                        case ImageTypes.UInt32:
                            window.AddOverlay(rect, 0u, test.Type.ToString());
                            break;

                        case ImageTypes.Int8:
                            window.AddOverlay(rect, (sbyte)0, test.Type.ToString());
                            break;

                        case ImageTypes.Int16:
                            window.AddOverlay(rect, (short)0, test.Type.ToString());
                            break;

                        case ImageTypes.Int32:
                            window.AddOverlay(rect, 0, test.Type.ToString());
                            break;

                        case ImageTypes.Float:
                            window.AddOverlay(rect, (short)0f, test.Type.ToString());
                            break;

                        case ImageTypes.Double:
                            window.AddOverlay(rect, 0d, test.Type.ToString());
                            break;

                        case ImageTypes.RgbAlphaPixel:
                            window.AddOverlay(rect, new RgbAlphaPixel(127, 0, 0, 0), test.Type.ToString());
                            break;

                        case ImageTypes.RgbPixel:
                            window.AddOverlay(rect, new RgbPixel(0, 0, 0), test.Type.ToString());
                            break;

                        case ImageTypes.HsiPixel:
                            window.AddOverlay(rect, new HsiPixel(0, 0, 0), test.Type.ToString());
                            break;

                        case ImageTypes.LabPixel:
                            window.AddOverlay(rect, new LabPixel(0, 0, 0), test.Type.ToString());
                            break;
                        }

                        window.WaitUntilClosed();
                    }
                }
                catch (Exception e)
                {
                    Console.WriteLine(e.StackTrace);
                    Console.WriteLine($"Failed to create ImageWindow from Array2D Type: {test.Type}");
                    throw;
                }
            }
        }
コード例 #17
0
        private static void Main(string[] args)
        {
            if (args.Length == 0)
            {
                Console.WriteLine("Give some image files as arguments to this program.");
                Console.WriteLine("Call this program like this:");
                Console.WriteLine("./face_landmark_detection_ex shape_predictor_68_face_landmarks.dat faces/*.jpg");
                Console.WriteLine("You can get the shape_predictor_68_face_landmarks.dat file from:");
                Console.WriteLine("http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2");
                return;
            }

            using (var win = new ImageWindow())
                using (var winFaces = new ImageWindow())
                    using (var detector = Dlib.GetFrontalFaceDetector())
                        using (var sp = ShapePredictor.Deserialize(args[0]))
                            foreach (var file in args.ToList().GetRange(1, args.Length - 1))
                            {
                                Console.WriteLine($"processing image {file}");

                                using (var img = Dlib.LoadImage <RgbPixel>(file))
                                {
                                    Dlib.PyramidUp(img);

                                    var dets = detector.Operator(img);
                                    Console.WriteLine($"Number of faces detected: {dets.Length}");

                                    var shapes = new List <FullObjectDetection>();
                                    foreach (var rect in dets)
                                    {
                                        var shape = sp.Detect(img, rect);
                                        Console.WriteLine($"number of parts: {shape.Parts}");
                                        if (shape.Parts > 2)
                                        {
                                            Console.WriteLine($"pixel position of first part:  {shape.GetPart(0)}");
                                            Console.WriteLine($"pixel position of second part: {shape.GetPart(1)}");
                                            shapes.Add(shape);
                                        }
                                    }

                                    win.ClearOverlay();
                                    win.SetImage(img);

                                    if (shapes.Any())
                                    {
                                        var lines = Dlib.RenderFaceDetections(shapes);
                                        win.AddOverlay(lines);

                                        foreach (var l in lines)
                                        {
                                            l.Dispose();
                                        }

                                        var chipLocations = Dlib.GetFaceChipDetails(shapes);
                                        using (var faceChips = Dlib.ExtractImageChips <RgbPixel>(img, chipLocations))
                                            using (var tileImage = Dlib.TileImages(faceChips))
                                                winFaces.SetImage(tileImage);

                                        foreach (var c in chipLocations)
                                        {
                                            c.Dispose();
                                        }
                                    }

                                    Console.WriteLine("hit enter to process next frame");
                                    Console.ReadKey();

                                    foreach (var s in shapes)
                                    {
                                        s.Dispose();
                                    }
                                }
                            }
        }
コード例 #18
0
ファイル: Program.cs プロジェクト: zhuxb711/DlibDotNet
        private static void Main(string[] args)
        {
            try
            {
                // In this example we are going to train a face detector based on the
                // small faces dataset in the examples/faces directory.  So the first
                // thing we do is load that dataset.  This means you need to supply the
                // path to this faces folder as a command line argument so we will know
                // where it is.
                if (args.Length != 1)
                {
                    Console.WriteLine("Give the path to the examples/faces directory as the argument to this");
                    Console.WriteLine("program.  For example, if you are in the examples folder then execute ");
                    Console.WriteLine("this program by running: ");
                    Console.WriteLine("   ./dnn_mmod_ex faces");
                    return;
                }

                var facesDirectory = args[0];

                // The faces directory contains a training dataset and a separate
                // testing dataset.  The training data consists of 4 images, each
                // annotated with rectangles that bound each human face.  The idea is
                // to use this training data to learn to identify human faces in new
                // images.
                //
                // Once you have trained an object detector it is always important to
                // test it on data it wasn't trained on.  Therefore, we will also load
                // a separate testing set of 5 images.  Once we have a face detector
                // created from the training data we will see how well it works by
                // running it on the testing images.
                //
                // So here we create the variables that will hold our dataset.
                // images_train will hold the 4 training images and face_boxes_train
                // holds the locations of the faces in the training images.  So for
                // example, the image images_train[0] has the faces given by the
                // rectangles in face_boxes_train[0].
                IList <Matrix <RgbPixel> > imagesTrain;
                IList <Matrix <RgbPixel> > imagesTest;
                IList <IList <MModRect> >  faceBoxesTrain;
                IList <IList <MModRect> >  faceBoxesTest;

                // Now we load the data.  These XML files list the images in each dataset
                // and also contain the positions of the face boxes.  Obviously you can use
                // any kind of input format you like so long as you store the data into
                // images_train and face_boxes_train.  But for convenience dlib comes with
                // tools for creating and loading XML image datasets.  Here you see how to
                // load the data.  To create the XML files you can use the imglab tool which
                // can be found in the tools/imglab folder.  It is a simple graphical tool
                // for labeling objects in images with boxes.  To see how to use it read the
                // tools/imglab/README.txt file.
                Dlib.LoadImageDataset(facesDirectory + "/training.xml", out imagesTrain, out faceBoxesTrain);
                Dlib.LoadImageDataset(facesDirectory + "/testing.xml", out imagesTest, out faceBoxesTest);

                Console.WriteLine($"num training images: {imagesTrain.Count()}");
                Console.WriteLine($"num testing images:  {imagesTest.Count()}");


                // The MMOD algorithm has some options you can set to control its behavior.  However,
                // you can also call the constructor with your training annotations and a "target
                // object size" and it will automatically configure itself in a reasonable way for your
                // problem.  Here we are saying that faces are still recognizably faces when they are
                // 40x40 pixels in size.  You should generally pick the smallest size where this is
                // true.  Based on this information the mmod_options constructor will automatically
                // pick a good sliding window width and height.  It will also automatically set the
                // non-max-suppression parameters to something reasonable.  For further details see the
                // mmod_options documentation.
                using (var options = new MModOptions(faceBoxesTrain, 40, 40))
                {
                    // The detector will automatically decide to use multiple sliding windows if needed.
                    // For the face data, only one is needed however.
                    var detectorWindows = options.DetectorWindows.ToArray();
                    Console.WriteLine($"num detector windows: {detectorWindows.Length}");
                    foreach (var w in detectorWindows)
                    {
                        Console.WriteLine($"detector window width by height: {w.Width} x {w.Height}");
                    }

                    Console.WriteLine($"overlap NMS IOU thresh:             {options.OverlapsNms.GetIouThresh()}");
                    Console.WriteLine($"overlap NMS percent covered thresh: {options.OverlapsNms.GetPercentCoveredThresh()}");

                    // Now we are ready to create our network and trainer.
                    using (var net = new LossMmod(options, 2))
                    {
                        // The MMOD loss requires that the number of filters in the final network layer equal
                        // options.detector_windows.size().  So we set that here as well.
                        using (var subnet = net.GetSubnet())
                            using (var details = subnet.GetLayerDetails())
                            {
                                details.SetNumFilters(detectorWindows.Length);
                                using (var trainer = new DnnTrainer <LossMmod>(net))
                                {
                                    trainer.SetLearningRate(0.1);
                                    trainer.BeVerbose();
                                    trainer.SetSynchronizationFile("mmod_sync", 5 * 60);
                                    trainer.SetIterationsWithoutProgressThreshold(300);

                                    // Now let's train the network.  We are going to use mini-batches of 150
                                    // images.   The images are random crops from our training set (see
                                    // random_cropper_ex.cpp for a discussion of the random_cropper).
                                    IEnumerable <Matrix <RgbPixel> > miniBatchSamples;
                                    //IEnumerable<IEnumerable<RgbPixel>> mini_batch_labels;
                                    IEnumerable <IEnumerable <MModRect> > miniBatchLabels;

                                    using (var cropper = new RandomCropper())
                                        using (var chipDims = new ChipDims(200, 200))
                                        {
                                            cropper.ChipDims = chipDims;
                                            // Usually you want to give the cropper whatever min sizes you passed to the
                                            // mmod_options constructor, which is what we do here.
                                            cropper.SetMinObjectSize(40, 40);

                                            using (var rnd = new Rand())
                                            {
                                                // Run the trainer until the learning rate gets small.  This will probably take several
                                                // hours.
                                                while (trainer.GetLearningRate() >= 1e-4)
                                                {
                                                    cropper.Operator(150, imagesTrain, faceBoxesTrain, out miniBatchSamples, out miniBatchLabels);
                                                    // We can also randomly jitter the colors and that often helps a detector
                                                    // generalize better to new images.
                                                    foreach (var img in miniBatchSamples)
                                                    {
                                                        Dlib.DisturbColors(img, rnd);
                                                    }

                                                    LossMmod.TrainOneStep(trainer, miniBatchSamples, miniBatchLabels);

                                                    miniBatchSamples.DisposeElement();
                                                    miniBatchLabels.DisposeElement();
                                                }
                                                // wait for training threads to stop
                                                trainer.GetNet();
                                                Console.WriteLine("done training");

                                                // Save the network to disk
                                                net.Clean();
                                                LossMmod.Serialize(net, "mmod_network.dat");


                                                // Now that we have a face detector we can test it.  The first statement tests it
                                                // on the training data.  It will print the precision, recall, and then average precision.
                                                // This statement should indicate that the network works perfectly on the
                                                // training data.
                                                using (var matrix = Dlib.TestObjectDetectionFunction(net, imagesTrain, faceBoxesTrain))
                                                    Console.WriteLine($"training results: {matrix}");
                                                // However, to get an idea if it really worked without overfitting we need to run
                                                // it on images it wasn't trained on.  The next line does this.   Happily,
                                                // this statement indicates that the detector finds most of the faces in the
                                                // testing data.
                                                using (var matrix = Dlib.TestObjectDetectionFunction(net, imagesTest, faceBoxesTest))
                                                    Console.WriteLine($"testing results:  {matrix}");


                                                // If you are running many experiments, it's also useful to log the settings used
                                                // during the training experiment.  This statement will print the settings we used to
                                                // the screen.
                                                Console.WriteLine($"{trainer}{cropper}");

                                                // Now lets run the detector on the testing images and look at the outputs.
                                                using (var win = new ImageWindow())
                                                    foreach (var img in imagesTest)
                                                    {
                                                        Dlib.PyramidUp(img);
                                                        var dets = net.Operator(img);
                                                        win.ClearOverlay();
                                                        win.SetImage(img);
                                                        foreach (var d in dets[0])
                                                        {
                                                            win.AddOverlay(d);
                                                        }

                                                        Console.ReadKey();

                                                        foreach (var det in dets)
                                                        {
                                                            foreach (var d in det)
                                                            {
                                                                d.Dispose();
                                                            }
                                                        }
                                                    }

                                                // Now that you finished this example, you should read dnn_mmod_train_find_cars_ex.cpp,
                                                // which is a more advanced example.  It discusses many issues surrounding properly
                                                // setting the MMOD parameters and creating a good training dataset.
                                            }
                                        }
                                }
                            }
                    }

                    detectorWindows.DisposeElement();
                }
            }
            catch (Exception e)
            {
                Console.WriteLine(e);
            }
        }