private static void Main() { try { // You can get this file from http://dlib.net/files/mmod_front_and_rear_end_vehicle_detector.dat.bz2 // This network was produced by the dnn_mmod_train_find_cars_ex.cpp example program. // As you can see, the file also includes a separately trained shape_predictor. To see // a generic example of how to train those refer to train_shape_predictor_ex.cpp. using (var deserialize = new ProxyDeserialize("mmod_front_and_rear_end_vehicle_detector.dat")) using (var net = LossMmod.Deserialize(deserialize, 1)) using (var sp = ShapePredictor.Deserialize(deserialize)) using (var img = Dlib.LoadImageAsMatrix <RgbPixel>("mmod_cars_test_image2.jpg")) using (var win = new ImageWindow()) { win.SetImage(img); // Run the detector on the image and show us the output. var dets = net.Operator(img).First(); foreach (var d in dets) { // We use a shape_predictor to refine the exact shape and location of the detection // box. This shape_predictor is trained to simply output the 4 corner points of // the box. So all we do is make a rectangle that tightly contains those 4 points // and that rectangle is our refined detection position. var fd = sp.Detect(img, d); var rect = Rectangle.Empty; for (var j = 0u; j < fd.Parts; ++j) { rect += fd.GetPart(j); } if (d.Label == "rear") { win.AddOverlay(rect, new RgbPixel(255, 0, 0), d.Label); } else { win.AddOverlay(rect, new RgbPixel(255, 255, 0), d.Label); } } Console.WriteLine("Hit enter to end program"); Console.ReadKey(); } } catch (ImageLoadException ile) { Console.WriteLine(ile.Message); Console.WriteLine("The test image is located in the examples folder. So you should run this program from a sub folder so that the relative path is correct."); } catch (Exception e) { Console.WriteLine(e); } }
private static void Main(string[] args) { if (args.Length == 0) { Console.WriteLine("Give some image files as arguments to this program."); return; } using (var win = new ImageWindow()) using (var detector = FrontalFaceDetector.GetFrontalFaceDetector()) foreach (var file in args) { using (var img = Dlib.LoadImage <byte>(file)) { Dlib.PyramidUp(img); var dets = detector.Detect(img); Console.WriteLine($"Number of faces detected: {dets.Length}"); win.ClearOverlay(); win.SetImage(img); win.AddOverlay(dets, new RgbPixel { Red = 255 }); Console.WriteLine("hit enter to process next frame"); Console.ReadKey(); } } }
private static void Main(string[] args) { if (args.Length != 2) { Console.WriteLine("Call this program like this:"); Console.WriteLine("./dnn_mmod_face_detection_ex mmod_human_face_detector.dat faces/*.jpg"); Console.WriteLine("You can get the mmod_human_face_detector.dat file from:"); Console.WriteLine("http://dlib.net/files/mmod_human_face_detector.dat.bz2"); return; } using (var net = DlibDotNet.Dnn.LossMmod.Deserialize(args[0])) { //image_window win; using (var win = new ImageWindow()) for (var index = 1; index < args.Length; index++) { using (var tmp = Dlib.LoadImage <RgbPixel>(args[index])) using (var img = new Matrix <RgbPixel>(tmp)) { // Upsampling the image will allow us to detect smaller faces but will cause the // program to use more RAM and run longer. while (img.Size < 1800 * 1800) { Dlib.PyramidUp(img); } // Note that you can process a bunch of images in a std::vector at once and it runs // much faster, since this will form mini-batches of images and therefore get // better parallelism out of your GPU hardware. However, all the images must be // the same size. To avoid this requirement on images being the same size we // process them individually in this example. using (var dets = net.Operator(img)) foreach (var det in dets) { win.ClearOverlay(); win.SetImage(img); foreach (var d in det) { win.AddOverlay(d); } } Console.WriteLine("Hit enter to process the next image."); Console.ReadKey(); } } } }
private static void Main(string[] args) { if (args.Length != 1) { Console.WriteLine("Call this program like this: "); Console.WriteLine("VideoTracking.exe <path of video_frames directory>"); return; } var path = args[0]; var files = new DirectoryInfo(path).GetFiles("*.jpg").Select(info => info.FullName).ToList(); files.Sort(); if (files.Count == 0) { Console.WriteLine($"No images found in {path}"); return; } using (var win = new ImageWindow()) using (var tracker = new CorrelationTracker()) { var firstFile = files.First(); using (var img = Dlib.LoadImage <byte>(firstFile)) using (var rect = DRectangle.CenteredRect(93, 110, 38, 86)) tracker.StartTrack(img, rect); foreach (var file in files.GetRange(1, files.Count - 1)) { using (var img = Dlib.LoadImage <byte>(file)) { tracker.Update(img); win.SetImage(img); win.ClearOverlay(); using (var pos = tracker.GetPosition()) win.AddOverlay(pos); Console.WriteLine("hit enter to process next frame"); Console.ReadKey(); } } } }
private static void Main(string[] args) { try { if (args.Length == 0) { Console.WriteLine("Give an image dataset XML file to run this program."); Console.WriteLine("For example, if you are running from the examples folder then run this program by typing"); Console.WriteLine(" ./RandomCropper faces/training.xml"); return; } // First lets load a dataset IEnumerable <Matrix <RgbPixel> > images; IEnumerable <IEnumerable <MModRect> > boxes; Dlib.LoadImageDataset(args[0], out images, out boxes); // Here we make our random_cropper. It has a number of options. var cropper = new DlibDotNet.ImageTransforms.RandomCropper(); // We can tell it how big we want the cropped images to be. cropper.ChipDims = new ChipDims(400, 400); // Also, when doing cropping, it will map the object annotations from the // dataset to the cropped image as well as perform random scale jittering. // You can tell it how much scale jittering you would like by saying "please // make the objects in the crops have a min and max size of such and such". // You do that by calling these two functions. Here we are saying we want the // objects in our crops to be no more than 0.8*400 pixels in height and width. cropper.MaxObjectSize = 0.8; // And also that they shouldn't be too small. Specifically, each object's smallest // dimension (i.e. height or width) should be at least 60 pixels and at least one of // the dimensions must be at least 80 pixels. So the smallest objects the cropper will // output will be either 80x60 or 60x80. cropper.MinObjectLengthLongDim = 80; cropper.MinObjectLengthShortDim = 60; // The cropper can also randomly mirror and rotate crops, which we ask it to // perform as well. cropper.RandomlyFlip = true; cropper.MaxRotationDegrees = 50; // This fraction of crops are from random parts of images, rather than being centered // on some object. cropper.BackgroundCropsFraction = 0.2; // Now ask the cropper to generate a bunch of crops. The output is stored in // crops and crop_boxes. IEnumerable <Matrix <RgbPixel> > crops; IEnumerable <IEnumerable <MModRect> > cropBoxes; // Make 1000 crops. cropper.Operator(1000, images, boxes, out crops, out cropBoxes); // Finally, lets look at the results var cropList = crops?.ToArray() ?? new Matrix <RgbPixel> [0]; var cropBoxesList = cropBoxes?.ToArray() ?? new IEnumerable <MModRect> [0]; using (var win = new ImageWindow()) for (var i = 0; i < cropList.Count(); ++i) { win.ClearOverlay(); win.SetImage(cropList[i]); foreach (var b in cropBoxesList[i]) { // Note that mmod_rect has an ignore field. If an object was labeled // ignore in boxes then it will still be labeled as ignore in // crop_boxes. Moreover, objects that are not well contained within // the crop are also set to ignore. var rect = b.Rect; if (b.Ignore) { win.AddOverlay(rect, new RgbPixel { Red = 255, Blue = 255 }); // draw ignored boxes as orange } else { win.AddOverlay(rect, new RgbPixel { Red = 255 }); // draw other boxes as red } } Console.WriteLine("Hit enter to view the next random crop."); Console.ReadKey(); } } catch (Exception e) { Console.WriteLine(e); } }
private static void Main(string[] args) { if (args.Length != 1) { Console.WriteLine("Call this program like this: "); Console.WriteLine("VideoTracking.exe <path of video_frames directory>"); return; } var path = args[0]; var files = new DirectoryInfo(path).GetFiles("*.jpg").Select(info => info.FullName).ToList(); files.Sort(); if (files.Count == 0) { Console.WriteLine($"No images found in {path}"); return; } // 定义图像捕捉方式 从摄像头 , 注意 Windows下需要选择 VideoCaptureAPIs.DSHOW var cap = new VideoCapture(0, VideoCaptureAPIs.DSHOW); // 定义图像捕捉方式 从摄像头 视频文件 //var cap = new VideoCapture("video.webm"); //判断捕捉设备是否打开 if (!cap.IsOpened()) { Console.WriteLine("Unable to connect to camera"); return; } Mat temp = null; var tracker = new CorrelationTracker(); int init = 0; //定义显示窗口 using (var win = new ImageWindow()) { Console.WriteLine("对象追踪程序启动"); Console.WriteLine("选择命令行为当前窗口,通过按键选择需要追踪的区域Width: [A,Z] Height:[S,X] X:[right,left] Y:[up,down] ,点击Enter开始追踪"); Console.WriteLine("注意:切换命令行窗口输入法为英文输入状态"); //选择追踪对象 while (!win.IsClosed()) { //获得1帧图片 temp = cap.RetrieveMat();// new Mat(); if (temp == null) { Console.WriteLine("图像获取错误!"); return; } var array = new byte[temp.Width * temp.Height * temp.ElemSize()]; Marshal.Copy(temp.Data, array, 0, array.Length); using (var cimg = Dlib.LoadImageData <BgrPixel>(array, (uint)temp.Height, (uint)temp.Width, (uint)(temp.Width * temp.ElemSize()))) { init++; if (init > 1) { var KK = Console.ReadKey(); if (KK.Key == ConsoleKey.Enter) { Console.WriteLine("开始追踪目标!"); //确定 追踪 位置 var rect2 = DRectangle.CenteredRect(a_X, a_Y, a_W, a_H); //开始追踪 tracker.StartTrack(cimg, rect2); win.SetImage(cimg); win.ClearOverlay(); win.AddOverlay(rect2); break; } //选择 追踪区域 if (KK.Key == ConsoleKey.RightArrow || KK.Key == ConsoleKey.LeftArrow || KK.Key == ConsoleKey.UpArrow || KK.Key == ConsoleKey.DownArrow || KK.Key == ConsoleKey.A || KK.Key == ConsoleKey.Z || KK.Key == ConsoleKey.S || KK.Key == ConsoleKey.X) { if (KK.Key == ConsoleKey.RightArrow) { a_X++; if (a_X > cimg.Rect.Width - a_W) { a_X = cimg.Rect.Width - a_W; } } if (KK.Key == ConsoleKey.LeftArrow) { a_X--; if (a_X < 0) { a_X = 0; } } if (KK.Key == ConsoleKey.UpArrow) { a_Y--; if (a_Y < 0) { a_Y = 0; } } if (KK.Key == ConsoleKey.DownArrow) { a_Y++; if (a_Y > cimg.Rect.Height - a_H) { a_Y = cimg.Rect.Height - a_H; } } if (KK.Key == ConsoleKey.A) { a_W++; if (a_W >= cimg.Rect.Width - a_X) { a_W = cimg.Rect.Width - a_X; } } if (KK.Key == ConsoleKey.Z) { a_W--; if (a_W < 10) { a_W = 10; } } if (KK.Key == ConsoleKey.S) { a_H++; if (a_H > cimg.Rect.Height - a_Y) { a_H = cimg.Rect.Height - a_Y; } } if (KK.Key == ConsoleKey.X) { a_H--; if (a_H < 10) { a_H = 10; } } } } var rect = DRectangle.CenteredRect(a_X, a_Y, a_W, a_H); Console.WriteLine("Set RECT:" + a_X + " " + a_Y + " " + a_W + " " + a_H); //显示图片 win.SetImage(cimg); win.ClearOverlay(); //显示框 win.AddOverlay(rect); } } //选择追踪对象 while (!win.IsClosed()) { //获得1帧图片 temp = cap.RetrieveMat();// new Mat(); if (temp == null) { Console.WriteLine("图像获取错误!"); return; } var array = new byte[temp.Width * temp.Height * temp.ElemSize()]; Marshal.Copy(temp.Data, array, 0, array.Length); using (var cimg = Dlib.LoadImageData <BgrPixel>(array, (uint)temp.Height, (uint)temp.Width, (uint)(temp.Width * temp.ElemSize()))) { //更新追踪图像 tracker.Update(cimg); win.SetImage(cimg); win.ClearOverlay(); //获得追踪到的目标位置 DRectangle rect = tracker.GetPosition(); win.AddOverlay(rect); Console.WriteLine("OBJ RECT:" + (int)rect.Left + " " + (int)rect.Top + " " + (int)rect.Width + " " + (int)rect.Height); System.Threading.Thread.Sleep(100); } } } Console.WriteLine("任意键退出"); Console.ReadKey(); }
public async Task <ActionResult> Login([FromBody] InputFaceModel model) { RequestFaceModel request = new RequestFaceModel() { Status = 500, Message = null }; var filePath = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "FaceImages", model.user_name); if (!Directory.Exists(filePath)) { request.Enum = RequestEnum.Failed; Console.WriteLine(request.Message); Thread.Sleep(5000); return(Ok(request)); } FaceContrast faceContrast = new FaceContrast(filePath); VideoCapture cap = null; try { if (model.rmtp_url == "0") { cap = new VideoCapture(0); } else { cap = new VideoCapture(model.rmtp_url); } var flag = false; var faceFlag = false; var bioFlag = false; QueueFixedLength <double> leftEarQueue = new QueueFixedLength <double>(10); QueueFixedLength <double> rightEarQueue = new QueueFixedLength <double>(10); QueueFixedLength <double> mouthQueue = new QueueFixedLength <double>(20); bool leftEarFlag = false; bool rightEarFlag = false; bool mouthFlag = false; using (var sp = ShapePredictor.Deserialize(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "ShapeModel", "shape_predictor_5_face_landmarks.dat"))) using (var win = new ImageWindow()) { // Load face detection and pose estimation models. using (var detector = Dlib.GetFrontalFaceDetector()) using (var net = LossMetric.Deserialize(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "ShapeModel", "dlib_face_recognition_resnet_model_v1.dat"))) using (var poseModel = ShapePredictor.Deserialize(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "ShapeModel", "shape_predictor_68_face_landmarks.dat"))) { var ti = true; System.Timers.Timer t = new System.Timers.Timer(30000); t.Elapsed += new System.Timers.ElapsedEventHandler((object source, System.Timers.ElapsedEventArgs e) => { ti = false; }); t.AutoReset = false; t.Enabled = true; //抓取和处理帧,直到用户关闭主窗口。 while (/*!win.IsClosed() &&*/ ti) { try { // Grab a frame var temp = new Mat(); if (!cap.Read(temp)) { break; } //把OpenCV的Mat变成dlib可以处理的东西。注意 //包装Mat对象,它不复制任何东西。所以cimg只对as有效 //只要温度是有效的。也不要做任何可能导致它的临时工作 //重新分配存储图像的内存,因为这将使cimg //包含悬空指针。这基本上意味着您不应该修改temp //使用cimg时。 var array = new byte[temp.Width * temp.Height * temp.ElemSize()]; Marshal.Copy(temp.Data, array, 0, array.Length); using (var cimg = Dlib.LoadImageData <RgbPixel>(array, (uint)temp.Height, (uint)temp.Width, (uint)(temp.Width * temp.ElemSize()))) { // Detect faces var faces = detector.Operator(cimg); // Find the pose of each face. var shapes = new List <FullObjectDetection>(); for (var i = 0; i < faces.Length; ++i) { var det = poseModel.Detect(cimg, faces[i]); shapes.Add(det); } if (shapes.Count > 0) { // 活体检测 if (!bioFlag) { bioFlag = BioAssay(shapes[0], ref leftEarQueue, ref rightEarQueue, ref mouthQueue, ref leftEarFlag, ref rightEarFlag, ref mouthFlag); } } if (!faceFlag) { foreach (var face in faces) { var shape = sp.Detect(cimg, face); var faceChipDetail = Dlib.GetFaceChipDetails(shape, 150, 0.25); Matrix <RgbPixel> rgbPixels = new Matrix <RgbPixel>(cimg); var faceChip = Dlib.ExtractImageChip <RgbPixel>(rgbPixels, faceChipDetail); var faceDescriptors = net.Operator(faceChip); faceFlag = faceContrast.Contrast(faceDescriptors); } } Console.WriteLine(model.user_name + ":" + faceFlag); if (bioFlag && faceFlag) { flag = bioFlag && faceFlag; if (flag) { break; } } //在屏幕上显示 win.ClearOverlay(); win.SetImage(cimg); var lines = Dlib.RenderFaceDetections(shapes); win.AddOverlay(faces, new RgbPixel { Red = 72, Green = 118, Blue = 255 }); win.AddOverlay(lines); foreach (var line in lines) { line.Dispose(); } } } catch (Exception ex) { request.Message = ex.ToString(); break; } } } } if (flag) { request.Enum = RequestEnum.Succeed; } else { request.Enum = RequestEnum.Failed; } } catch (Exception ex) { request.Message = ex.ToString(); } finally { if (cap != null) { cap.Dispose(); } } Console.WriteLine(request.Message); return(Ok(request)); }
private static void Main(string[] args) { try { // In this example we are going to train a face detector based on the // small faces dataset in the examples/faces directory. So the first // thing we do is load that dataset. This means you need to supply the // path to this faces folder as a command line argument so we will know // where it is. if (args.Length != 1) { Console.WriteLine("Give the path to the examples/faces directory as the argument to this"); Console.WriteLine("program. For example, if you are in the examples folder then execute "); Console.WriteLine("this program by running: "); Console.WriteLine(" ./fhog_object_detector_ex faces"); Console.WriteLine(); return; } var facesDirectory = args[0]; // The faces directory contains a training dataset and a separate // testing dataset. The training data consists of 4 images, each // annotated with rectangles that bound each human face. The idea is // to use this training data to learn to identify human faces in new // images. // // Once you have trained an object detector it is always important to // test it on data it wasn't trained on. Therefore, we will also load // a separate testing set of 5 images. Once we have a face detector // created from the training data we will see how well it works by // running it on the testing images. // // So here we create the variables that will hold our dataset. // images_train will hold the 4 training images and face_boxes_train // holds the locations of the faces in the training images. So for // example, the image images_train[0] has the faces given by the // rectangles in face_boxes_train[0]. IList <Matrix <byte> > tmpImagesTrain; IList <Matrix <byte> > tmpImagesTest; IList <IList <Rectangle> > tmpFaceBoxesTrain; IList <IList <Rectangle> > tmpFaceBoxesTest; // Now we load the data. These XML files list the images in each // dataset and also contain the positions of the face boxes. Obviously // you can use any kind of input format you like so long as you store // the data into images_train and face_boxes_train. But for convenience // dlib comes with tools for creating and loading XML image dataset // files. Here you see how to load the data. To create the XML files // you can use the imglab tool which can be found in the tools/imglab // folder. It is a simple graphical tool for labeling objects in images // with boxes. To see how to use it read the tools/imglab/README.txt // file. Dlib.LoadImageDataset(Path.Combine(facesDirectory, "training.xml"), out tmpImagesTrain, out tmpFaceBoxesTrain); Dlib.LoadImageDataset(Path.Combine(facesDirectory, "testing.xml"), out tmpImagesTest, out tmpFaceBoxesTest); // Now we do a little bit of pre-processing. This is optional but for // this training data it improves the results. The first thing we do is // increase the size of the images by a factor of two. We do this // because it will allow us to detect smaller faces than otherwise would // be practical (since the faces are all now twice as big). Note that, // in addition to resizing the images, these functions also make the // appropriate adjustments to the face boxes so that they still fall on // top of the faces after the images are resized. var imageTrain = new List <Matrix <byte> >(tmpImagesTrain); var faceBoxesTrain = new List <IList <Rectangle> >(tmpFaceBoxesTrain); Dlib.UpsampleImageDataset(2, imageTrain, faceBoxesTrain); var imageTest = new List <Matrix <byte> >(tmpImagesTest); var faceBoxesTest = new List <IList <Rectangle> >(tmpFaceBoxesTest); Dlib.UpsampleImageDataset(2, imageTest, faceBoxesTest); // Since human faces are generally left-right symmetric we can increase // our training dataset by adding mirrored versions of each image back // into images_train. So this next step doubles the size of our // training dataset. Again, this is obviously optional but is useful in // many object detection tasks. Dlib.AddImageLeftRightFlips(imageTrain, faceBoxesTrain); Console.WriteLine($"num training images: {imageTrain.Count()}"); Console.WriteLine($"num testing images: {imageTest.Count()}"); // Finally we get to the training code. dlib contains a number of // object detectors. This typedef tells it that you want to use the one // based on Felzenszwalb's version of the Histogram of Oriented // Gradients (commonly called HOG) detector. The 6 means that you want // it to use an image pyramid that downsamples the image at a ratio of // 5/6. Recall that HOG detectors work by creating an image pyramid and // then running the detector over each pyramid level in a sliding window // fashion. using (var scanner = new ScanFHogPyramid <PyramidDown, DefaultFHogFeatureExtractor>(6)) { // The sliding window detector will be 80 pixels wide and 80 pixels tall. scanner.SetDetectionWindowSize(80, 80); using (var trainer = new StructuralObjectDetectionTrainer <ScanFHogPyramid <PyramidDown, DefaultFHogFeatureExtractor> >(scanner)) { // Set this to the number of processing cores on your machine. trainer.SetNumThreads(4); // The trainer is a kind of support vector machine and therefore has the usual SVM // C parameter. In general, a bigger C encourages it to fit the training data // better but might lead to overfitting. You must find the best C value // empirically by checking how well the trained detector works on a test set of // images you haven't trained on. Don't just leave the value set at 1. Try a few // different C values and see what works best for your data. trainer.SetC(1); // We can tell the trainer to print it's progress to the console if we want. trainer.BeVerbose(); // The trainer will run until the "risk gap" is less than 0.01. Smaller values // make the trainer solve the SVM optimization problem more accurately but will // take longer to train. For most problems a value in the range of 0.1 to 0.01 is // plenty accurate. Also, when in verbose mode the risk gap is printed on each // iteration so you can see how close it is to finishing the training. trainer.SetEpsilon(0.01); // Now we run the trainer. For this example, it should take on the order of 10 // seconds to train. var detector = trainer.Train(imageTrain, faceBoxesTrain); // Now that we have a face detector we can test it. The first statement tests it // on the training data. It will print the precision, recall, and then average precision. using (var matrix = Dlib.TestObjectDetectionFunction(detector, imageTrain, faceBoxesTrain)) Console.WriteLine($"training results: {matrix}"); // However, to get an idea if it really worked without overfitting we need to run // it on images it wasn't trained on. The next line does this. Happily, we see // that the object detector works perfectly on the testing images. using (var matrix = Dlib.TestObjectDetectionFunction(detector, imageTest, faceBoxesTest)) Console.WriteLine($"testing results: {matrix}"); // If you have read any papers that use HOG you have probably seen the nice looking // "sticks" visualization of a learned HOG detector. This next line creates a // window with such a visualization of our detector. It should look somewhat like // a face. using (var fhog = Dlib.DrawFHog(detector)) using (var hogwin = new ImageWindow(fhog, "Learned fHOG detector")) { // Now for the really fun part. Let's display the testing images on the screen and // show the output of the face detector overlaid on each image. You will see that // it finds all the faces without false alarming on any non-faces. using (var win = new ImageWindow()) for (var i = 0; i < imageTest.Count; ++i) { // Run the detector and get the face detections. var dets = detector.Operator(imageTest[i]); win.ClearOverlay(); win.SetImage(imageTest[i]); win.AddOverlay(dets, new RgbPixel(255, 0, 0)); Console.WriteLine("Hit enter to process the next image..."); Console.ReadKey(); Console.WriteLine(""); } } // Like everything in dlib, you can save your detector to disk using the // serialize() function. detector.Serialize("face_detector.svm"); // Then you can recall it using the deserialize() function. using (var tmp = new ScanFHogPyramid <PyramidDown, DefaultFHogFeatureExtractor>(6)) using (var detector2 = new ObjectDetector <ScanFHogPyramid <PyramidDown, DefaultFHogFeatureExtractor> >(tmp)) detector2.Deserialize("face_detector.svm"); // Now let's talk about some optional features of this training tool as well as some // important points you should understand. // // The first thing that should be pointed out is that, since this is a sliding // window classifier, it can't output an arbitrary rectangle as a detection. In // this example our sliding window is 80 by 80 pixels and is run over an image // pyramid. This means that it can only output detections that are at least 80 by // 80 pixels in size (recall that this is why we upsampled the images after loading // them). It also means that the aspect ratio of the outputs is 1. So if, // for example, you had a box in your training data that was 200 pixels by 10 // pixels then it would simply be impossible for the detector to learn to detect // it. Similarly, if you had a really small box it would be unable to learn to // detect it. // // So the training code performs an input validation check on the training data and // will throw an exception if it detects any boxes that are impossible to detect // given your setting of scanning window size and image pyramid resolution. You // can use a statement like: // remove_unobtainable_rectangles(trainer, images_train, face_boxes_train) // to automatically discard these impossible boxes from your training dataset // before running the trainer. This will avoid getting the "impossible box" // exception. However, I would recommend you be careful that you are not throwing // away truth boxes you really care about. The remove_unobtainable_rectangles() // will return the set of removed rectangles so you can visually inspect them and // make sure you are OK that they are being removed. // // Next, note that any location in the images not marked with a truth box is // implicitly treated as a negative example. This means that when creating // training data it is critical that you label all the objects you want to detect. // So for example, if you are making a face detector then you must mark all the // faces in each image. However, sometimes there are objects in images you are // unsure about or simply don't care if the detector identifies or not. For these // objects you can pass in a set of "ignore boxes" as a third argument to the // trainer.train() function. The trainer will simply disregard any detections that // happen to hit these boxes. // // Another useful thing you can do is evaluate multiple HOG detectors together. The // benefit of this is increased testing speed since it avoids recomputing the HOG // features for each run of the detector. You do this by storing your detectors // into a std::vector and then invoking evaluate_detectors() like so: var myDetectors = new List <ObjectDetector <ScanFHogPyramid <PyramidDown, DefaultFHogFeatureExtractor> > >(); myDetectors.Add(detector); var dect2 = Dlib.EvaluateDetectors(myDetectors, imageTrain[0]); // // // Finally, you can add a nuclear norm regularizer to the SVM trainer. Doing has // two benefits. First, it can cause the learned HOG detector to be composed of // separable filters and therefore makes it execute faster when detecting objects. // It can also help with generalization since it tends to make the learned HOG // filters smoother. To enable this option you call the following function before // you create the trainer object: // scanner.set_nuclear_norm_regularization_strength(1.0); // The argument determines how important it is to have a small nuclear norm. A // bigger regularization strength means it is more important. The smaller the // nuclear norm the smoother and faster the learned HOG filters will be, but if the // regularization strength value is too large then the SVM will not fit the data // well. This is analogous to giving a C value that is too small. // // You can see how many separable filters are inside your detector like so: Console.WriteLine($"num filters: {Dlib.NumSeparableFilters(detector)}"); // You can also control how many filters there are by explicitly thresholding the // singular values of the filters like this: using (var newDetector = Dlib.ThresholdFilterSingularValues(detector, 0.1)) { } // That removes filter components with singular values less than 0.1. The bigger // this number the fewer separable filters you will have and the faster the // detector will run. However, a large enough threshold will hurt detection // accuracy. } } } catch (Exception e) { Console.WriteLine(e); } }
private static void Main() { try { // You can get this file from http://dlib.net/files/mmod_rear_end_vehicle_detector.dat.bz2 // This network was produced by the dnn_mmod_train_find_cars_ex.cpp example program. // As you can see, the file also includes a separately trained shape_predictor. To see // a generic example of how to train those refer to train_shape_predictor_ex.cpp. using (var deserialize = new ProxyDeserialize("mmod_rear_end_vehicle_detector.dat")) using (var net = LossMmod.Deserialize(deserialize, 1)) using (var sp = ShapePredictor.Deserialize(deserialize)) using (var img = Dlib.LoadImageAsMatrix <RgbPixel>("mmod_cars_test_image.jpg")) using (var win = new ImageWindow()) { win.SetImage(img); // Run the detector on the image and show us the output. var dets = net.Operator(img).First(); foreach (var d in dets) { // We use a shape_predictor to refine the exact shape and location of the detection // box. This shape_predictor is trained to simply output the 4 corner points of // the box. So all we do is make a rectangle that tightly contains those 4 points // and that rectangle is our refined detection position. var fd = sp.Detect(img, d); var rect = Rectangle.Empty; for (var j = 0u; j < fd.Parts; ++j) { rect += fd.GetPart(j); } win.AddOverlay(rect, new RgbPixel(255, 0, 0)); } Console.WriteLine("Hit enter to view the intermediate processing steps"); Console.ReadKey(); // Now let's look at how the detector works. The high level processing steps look like: // 1. Create an image pyramid and pack the pyramid into one big image. We call this // image the "tiled pyramid". // 2. Run the tiled pyramid image through the CNN. The CNN outputs a new image where // bright pixels in the output image indicate the presence of cars. // 3. Find pixels in the CNN's output image with a value > 0. Those locations are your // preliminary car detections. // 4. Perform non-maximum suppression on the preliminary detections to produce the // final output. // // We will be plotting the images from steps 1 and 2 so you can visualize what's // happening. For the CNN's output image, we will use the jet colormap so that "bright" // outputs, i.e. pixels with big values, appear in red and "dim" outputs appear as a // cold blue color. To do this we pick a range of CNN output values for the color // mapping. The specific values don't matter. They are just selected to give a nice // looking output image. const float lower = -2.5f; const float upper = 0.0f; Console.WriteLine($"jet color mapping range: lower={lower} upper={upper}"); // Create a tiled pyramid image and display it on the screen. // Get the type of pyramid the CNN used //using pyramid_type = std::remove_reference < decltype(input_layer(net)) >::type::pyramid_type; // And tell create_tiled_pyramid to create the pyramid using that pyramid type. using (var inputLayer = new InputRgbImagePyramid <PyramidDown>(6)) { net.TryGetInputLayer(inputLayer); var padding = inputLayer.GetPyramidPadding(); var outerPadding = inputLayer.GetPyramidOuterPadding(); Dlib.CreateTiledPyramid <RgbPixel, PyramidDown>(img, padding, outerPadding, 6, out var tiledImg, out var rects); using (var winpyr = new ImageWindow(tiledImg, "Tiled pyramid")) { // This CNN detector represents a sliding window detector with 3 sliding windows. Each // of the 3 windows has a different aspect ratio, allowing it to find vehicles which // are either tall and skinny, squarish, or short and wide. The aspect ratio of a // detection is determined by which channel in the output image triggers the detection. // Here we are just going to max pool the channels together to get one final image for // our display. In this image, a pixel will be bright if any of the sliding window // detectors thinks there is a car at that location. using (var subnet = net.GetSubnet()) { var output = subnet.Output; Console.WriteLine($"Number of channels in final tensor image: {output.K}"); var networkOutput = Dlib.ImagePlane(output); for (var k = 1; k < output.K; k++) { using (var tmpNetworkOutput = Dlib.ImagePlane(output, 0, k)) { var maxPointWise = Dlib.MaxPointWise(networkOutput, tmpNetworkOutput); networkOutput.Dispose(); networkOutput = maxPointWise; } } // We will also upsample the CNN's output image. The CNN we defined has an 8x // downsampling layer at the beginning. In the code below we are going to overlay this // CNN output image on top of the raw input image. To make that look nice it helps to // upsample the CNN output image back to the same resolution as the input image, which // we do here. var networkOutputScale = img.Columns / (double)networkOutput.Columns; Dlib.ResizeImage(networkOutput, networkOutputScale); // Display the network's output as a color image. using (var jet = Dlib.Jet(networkOutput, upper, lower)) using (var winOutput = new ImageWindow(jet, "Output tensor from the network")) { // Also, overlay network_output on top of the tiled image pyramid and display it. for (var r = 0; r < tiledImg.Rows; ++r) { for (var c = 0; c < tiledImg.Columns; ++c) { var tmp = new DPoint(c, r); tmp = Dlib.InputTensorToOutputTensor(net, tmp); var dp = networkOutputScale * tmp; tmp = new DPoint((int)dp.X, (int)dp.Y); if (Dlib.GetRect(networkOutput).Contains((int)tmp.X, (int)tmp.Y)) { var val = networkOutput[(int)tmp.Y, (int)tmp.X]; // alpha blend the network output pixel with the RGB image to make our // overlay. var p = new RgbAlphaPixel(); Dlib.AssignPixel(ref p, Dlib.ColormapJet(val, lower, upper)); p.Alpha = 120; var rgb = new RgbPixel(); Dlib.AssignPixel(ref rgb, p); tiledImg[r, c] = rgb; } } } // If you look at this image you can see that the vehicles have bright red blobs on // them. That's the CNN saying "there is a car here!". You will also notice there is // a certain scale at which it finds cars. They have to be not too big or too small, // which is why we have an image pyramid. The pyramid allows us to find cars of all // scales. using (var winPyrOverlay = new ImageWindow(tiledImg, "Detection scores on image pyramid")) { // Finally, we can collapse the pyramid back into the original image. The CNN doesn't // actually do this step, since it's enough to threshold the tiled pyramid image to get // the detections. However, it makes a nice visualization and clearly indicates that // the detector is firing for all the cars. using (var collapsed = new Matrix <float>(img.Rows, img.Columns)) using (var inputTensor = new ResizableTensor()) { inputLayer.ToTensor(img, 1, inputTensor); for (var r = 0; r < collapsed.Rows; ++r) { for (var c = 0; c < collapsed.Columns; ++c) { // Loop over a bunch of scale values and look up what part of network_output // corresponds to the point(c,r) in the original image, then take the max // detection score over all the scales and save it at pixel point(c,r). var maxScore = -1e30f; for (double scale = 1; scale > 0.2; scale *= 5.0 / 6.0) { // Map from input image coordinates to tiled pyramid coordinates. var tensorSpace = inputLayer.ImageSpaceToTensorSpace(inputTensor, scale, new DRectangle(new DPoint(c, r))); var tmp = tensorSpace.Center; // Now map from pyramid coordinates to network_output coordinates. var dp = networkOutputScale * Dlib.InputTensorToOutputTensor(net, tmp); tmp = new DPoint((int)dp.X, (int)dp.Y); if (Dlib.GetRect(networkOutput).Contains((int)tmp.X, (int)tmp.Y)) { var val = networkOutput[(int)tmp.Y, (int)tmp.X]; if (val > maxScore) { maxScore = val; } } } collapsed[r, c] = maxScore; // Also blend the scores into the original input image so we can view it as // an overlay on the cars. var p = new RgbAlphaPixel(); Dlib.AssignPixel(ref p, Dlib.ColormapJet(maxScore, lower, upper)); p.Alpha = 120; var rgb = new RgbPixel(); Dlib.AssignPixel(ref rgb, p); img[r, c] = rgb; } } using (var jet2 = Dlib.Jet(collapsed, upper, lower)) using (var winCollapsed = new ImageWindow(jet2, "Collapsed output tensor from the network")) using (var winImgAndSal = new ImageWindow(img, "Collapsed detection scores on raw image")) { Console.WriteLine("Hit enter to end program"); Console.ReadKey(); } } } } } } } } } catch (Exception e) { Console.WriteLine(e); } }
private static void Main() { using (var img = new Array2D <byte>(400, 400)) using (var ht = new DlibDotNet.HoughTransform(300)) using (var win = new ImageWindow()) using (var win2 = new ImageWindow()) { var angle1 = 0d; var angle2 = 0d; while (true) { angle1 += Math.PI / 130; angle2 += Math.PI / 400; var rect = img.Rect; var cent = rect.Center; var arc = Point.Rotate(cent, cent + new Point(90, 0), angle1 * 180 / Math.PI); var tmp2 = arc + new Point(500, 0); var tmp3 = arc - new Point(500, 0); var l = Point.Rotate(arc, tmp2, angle2 * 180 / Math.PI); var r = Point.Rotate(arc, tmp3, angle2 * 180 / Math.PI); Dlib.AssignAllPixels(img, 0); Dlib.DrawLine(img, l, r, 255); using (var himg = new Array2D <int>()) { var offset = new Point(50, 50); var hrect = Dlib.GetRect(ht); var box = Rectangle.Translate(hrect, offset); // Now let's compute the hough transform for a subwindow in the image. In // particular, we run it on the 300x300 subwindow with an upper left corner at the // pixel point(50,50). The output is stored in himg. ht.Operator(img, box, himg); // Now that we have the transformed image, the Hough image pixel with the largest // value should indicate where the line is. So we find the coordinates of the // largest pixel: using (var mat = Dlib.Mat(himg)) { var p = Dlib.MaxPoint(mat); // And then ask the ht object for the line segment in the original image that // corresponds to this point in Hough transform space. var line = ht.GetLine(p); // Finally, let's display all these things on the screen. We copy the original // input image into a color image and then draw the detected line on top in red. using (var temp = new Array2D <RgbPixel>()) { Dlib.AssignImage(img, temp); var p1 = line.Item1 + offset; var p2 = line.Item2 + offset; Dlib.DrawLine(temp, p1, p2, new RgbPixel { Red = 255 }); win.ClearOverlay(); win.SetImage(temp); // Also show the subwindow we ran the Hough transform on as a green box. You will // see that the detected line is exactly contained within this box and also // overlaps the original line. win.AddOverlay(box, new RgbPixel { Green = 255 }); using (var jet = Dlib.Jet(himg)) win2.SetImage(jet); } } } } } }
private static void Main() { try { //var cap = new VideoCapture(0); //var cap = new VideoCapture("https://js.live-play.acgvideo.com/live-js/890069/live_30947419_1716018.flv?wsSecret=2cee8a379a871fa8dbf714ba9d16e8a4&wsTime=1548240723&trid=4f64a0ae5e2444938cfdd109a54c6e1c&sig=no&platform=web&pSession=yR3bsQk1-SCY4-4QGi-K7EG-AsbTiwbX7tZF"); var cap = new VideoCapture(0); if (!cap.IsOpened()) { Console.WriteLine("Unable to connect to camera"); return; } using (var win = new ImageWindow()) { // Load face detection and pose estimation models. using (var detector = Dlib.GetFrontalFaceDetector()) using (var poseModel = ShapePredictor.Deserialize("shape_predictor_68_face_landmarks.dat")) { //抓取和处理帧,直到用户关闭主窗口。 while (!win.IsClosed()) { // Grab a frame var temp = new Mat(); if (!cap.Read(temp)) { break; } //把OpenCV的Mat变成dlib可以处理的东西。注意 //包装Mat对象,它不复制任何东西。所以cimg只对as有效 //只要温度是有效的。也不要做任何可能导致它的临时工作 //重新分配存储图像的内存,因为这将使cimg //包含悬空指针。这基本上意味着您不应该修改temp //使用cimg时。 var array = new byte[temp.Width * temp.Height * temp.ElemSize()]; Marshal.Copy(temp.Data, array, 0, array.Length); using (var cimg = Dlib.LoadImageData <RgbPixel>(array, (uint)temp.Height, (uint)temp.Width, (uint)(temp.Width * temp.ElemSize()))) { // Detect faces var faces = detector.Operator(cimg); // Find the pose of each face. var shapes = new List <FullObjectDetection>(); for (var i = 0; i < faces.Length; ++i) { var det = poseModel.Detect(cimg, faces[i]); Console.WriteLine(faces[i].Left); shapes.Add(det); } //在屏幕上显示 win.ClearOverlay(); win.SetImage(cimg); var lines = Dlib.RenderFaceDetections(shapes); win.AddOverlay(faces, new RgbPixel { Red = 255 }); win.AddOverlay(lines); foreach (var line in lines) { line.Dispose(); } } } } } } //catch (serialization_error&e) //{ // cout << "You need dlib's default face landmarking model file to run this example." << endl; // cout << "You can get it from the following URL: " << endl; // cout << " http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2" << endl; // cout << endl << e.what() << endl; //} catch (Exception e) { Console.WriteLine(e.Message); } }
private static void Main(string[] args) { try { if (args.Length != 2) { Console.WriteLine("Call this program like this:"); Console.WriteLine("./dnn_mmod_dog_hipsterizer mmod_dog_hipsterizer.dat faces/dogs.jpg"); Console.WriteLine("You can get the mmod_dog_hipsterizer.dat file from:"); Console.WriteLine("http://dlib.net/files/mmod_dog_hipsterizer.dat.bz2"); return; } // load the models as well as glasses and mustache. using (var deserialize = new ProxyDeserialize(args[0])) using (var net = LossMmod.Deserialize(deserialize)) using (var sp = ShapePredictor.Deserialize(deserialize)) using (var glasses = Matrix <RgbAlphaPixel> .Deserialize(deserialize)) using (var mustache = Matrix <RgbAlphaPixel> .Deserialize(deserialize)) { Dlib.PyramidUp(glasses); Dlib.PyramidUp(mustache); using (var win1 = new ImageWindow(glasses)) using (var win2 = new ImageWindow(mustache)) using (var winWireframe = new ImageWindow()) using (var winHipster = new ImageWindow()) { // Now process each image, find dogs, and hipsterize them by drawing glasses and a // mustache on each dog :) for (var i = 1; i < args.Length; ++i) { using (var img = Dlib.LoadImageAsMatrix <RgbPixel>(args[i])) { // Upsampling the image will allow us to find smaller dog faces but will use more // computational resources. //pyramid_up(img); var dets = net.Operator(img).First(); winWireframe.ClearOverlay(); winWireframe.SetImage(img); // We will also draw a wireframe on each dog's face so you can see where the // shape_predictor is identifying face landmarks. var lines = new List <ImageWindow.OverlayLine>(); foreach (var d in dets) { // get the landmarks for this dog's face var shape = sp.Detect(img, d.Rect); var color = new RgbPixel(0, 255, 0); var top = shape.GetPart(0); var leftEar = shape.GetPart(1); var leftEye = shape.GetPart(2); var nose = shape.GetPart(3); var rightEar = shape.GetPart(4); var rightEye = shape.GetPart(5); // The locations of the left and right ends of the mustache. var leftMustache = 1.3 * (leftEye - rightEye) / 2 + nose; var rightMustache = 1.3 * (rightEye - leftEye) / 2 + nose; // Draw the glasses onto the image. var from = new[] { 2 * new Point(176, 36), 2 * new Point(59, 35) }; var to = new[] { leftEye, rightEye }; using (var transform = Dlib.FindSimilarityTransform(from, to)) for (uint r = 0, nr = (uint)glasses.Rows; r < nr; ++r) { for (uint c = 0, nc = (uint)glasses.Columns; c < nc; ++c) { var p = (Point)transform.Operator(new DPoint(c, r)); if (Dlib.GetRect(img).Contains(p)) { var rgb = img[p.Y, p.X]; Dlib.AssignPixel(ref rgb, glasses[(int)r, (int)c]); img[p.Y, p.X] = rgb; } } } // Draw the mustache onto the image right under the dog's nose. var mustacheRect = Dlib.GetRect(mustache); from = new[] { mustacheRect.TopLeft, mustacheRect.TopRight }; to = new[] { rightMustache, leftMustache }; using (var transform = Dlib.FindSimilarityTransform(from, to)) for (uint r = 0, nr = (uint)mustache.Rows; r < nr; ++r) { for (uint c = 0, nc = (uint)mustache.Columns; c < nc; ++c) { var p = (Point)transform.Operator(new DPoint(c, r)); if (Dlib.GetRect(img).Contains(p)) { var rgb = img[p.Y, p.X]; Dlib.AssignPixel(ref rgb, mustache[(int)r, (int)c]); img[p.Y, p.X] = rgb; } } } // Record the lines needed for the face wire frame. lines.Add(new ImageWindow.OverlayLine(leftEye, nose, color)); lines.Add(new ImageWindow.OverlayLine(nose, rightEye, color)); lines.Add(new ImageWindow.OverlayLine(rightEye, leftEye, color)); lines.Add(new ImageWindow.OverlayLine(rightEye, rightEar, color)); lines.Add(new ImageWindow.OverlayLine(rightEar, top, color)); lines.Add(new ImageWindow.OverlayLine(top, leftEar, color)); lines.Add(new ImageWindow.OverlayLine(leftEar, leftEye, color)); winWireframe.AddOverlay(lines); winHipster.SetImage(img); } Console.WriteLine("Hit enter to process the next image."); Console.ReadKey(); } } } } } catch (Exception e) { Console.WriteLine(e); } }
private static void Main() { try { var cap = new VideoCapture(0); //var cap = new VideoCapture("20090124_WeeklyAddress.ogv.360p.webm"); if (!cap.IsOpened()) { Console.WriteLine("Unable to connect to camera"); return; } using (var win = new ImageWindow()) { // Load face detection and pose estimation models. using (var detector = Dlib.GetFrontalFaceDetector()) using (var poseModel = ShapePredictor.Deserialize("shape_predictor_68_face_landmarks.dat")) { // Grab and process frames until the main window is closed by the user. while (!win.IsClosed()) { // Grab a frame var temp = new Mat(); if (!cap.Read(temp)) { break; } // Turn OpenCV's Mat into something dlib can deal with. Note that this just // wraps the Mat object, it doesn't copy anything. So cimg is only valid as // long as temp is valid. Also don't do anything to temp that would cause it // to reallocate the memory which stores the image as that will make cimg // contain dangling pointers. This basically means you shouldn't modify temp // while using cimg. var array = new byte[temp.Width * temp.Height * temp.ElemSize()]; Marshal.Copy(temp.Data, array, 0, array.Length); using (var cimg = Dlib.LoadImageData <RgbPixel>(array, (uint)temp.Height, (uint)temp.Width, (uint)(temp.Width * temp.ElemSize()))) { // Detect faces var faces = detector.Detect(cimg); // Find the pose of each face. var shapes = new List <FullObjectDetection>(); for (var i = 0; i < faces.Length; ++i) { var det = poseModel.Detect(cimg, faces[i]); shapes.Add(det); } // Display it all on the screen win.ClearOverlay(); win.SetImage(cimg); var lines = Dlib.RenderFaceDetections(shapes); win.AddOverlay(lines); foreach (var line in lines) { line.Dispose(); } } } } } } //catch (serialization_error&e) //{ // cout << "You need dlib's default face landmarking model file to run this example." << endl; // cout << "You can get it from the following URL: " << endl; // cout << " http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2" << endl; // cout << endl << e.what() << endl; //} catch (Exception e) { Console.WriteLine(e.Message); } }
private static void Main() { try { // 定义图像捕捉方式 从摄像头 , 注意 Windows下需要选择 VideoCaptureAPIs.DSHOW var cap = new VideoCapture(0, VideoCaptureAPIs.DSHOW); // 定义图像捕捉方式 从摄像头 视频文件 //var cap = new VideoCapture("video.webm"); //判断捕捉设备是否打开 if (!cap.IsOpened()) { Console.WriteLine("Unable to connect to camera"); return; } Mat temp = null; //定义显示窗口 using (var win = new ImageWindow()) { //读取人脸检测和标注模型 using (var detector = Dlib.GetFrontalFaceDetector()) using (var poseModel = ShapePredictor.Deserialize("shape_predictor_68_face_landmarks.dat")) { // 主窗口是否关闭 while (!win.IsClosed()) { //System.Threading.Thread.Sleep(100); //获得1帧图片 temp = cap.RetrieveMat();// new Mat(); if (temp == null) { break; } //将 OPENCV 图像数据 转换为 DILB 图像格式 var array = new byte[temp.Width * temp.Height * temp.ElemSize()]; Marshal.Copy(temp.Data, array, 0, array.Length); using (var cimg = Dlib.LoadImageData <BgrPixel>(array, (uint)temp.Height, (uint)temp.Width, (uint)(temp.Width * temp.ElemSize()))) { // 人脸检测 var faces = detector.Operator(cimg); //标注人脸 var shapes = new List <FullObjectDetection>(); for (var i = 0; i < faces.Length; ++i) { var det = poseModel.Detect(cimg, faces[i]); shapes.Add(det); } //显示 win.ClearOverlay(); win.SetImage(cimg); var lines = Dlib.RenderFaceDetections(shapes); win.AddOverlay(lines); foreach (var line in lines) { line.Dispose(); } } } } } } //catch (serialization_error&e) //{ // cout << "需要下载识别模型 http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2" << endl; // cout << endl << e.what() << endl; //} catch (Exception e) { Console.WriteLine(e.Message); } }
private static void Main(string[] args) { if (args.Length != 1) { Console.WriteLine("Run this example by invoking it like this: "); Console.WriteLine(" ./DnnFaceRecognition faces/bald_guys.jpg"); Console.WriteLine("You will also need to get the face landmarking model file as well as "); Console.WriteLine("the face recognition model file. Download and then decompress these files from: "); Console.WriteLine("http://dlib.net/files/shape_predictor_5_face_landmarks.dat.bz2"); Console.WriteLine("http://dlib.net/files/dlib_face_recognition_resnet_model_v1.dat.bz2"); return; } // The first thing we are going to do is load all our models. First, since we need to // find faces in the image we will need a face detector: using (var detector = FrontalFaceDetector.GetFrontalFaceDetector()) // We will also use a face landmarking model to align faces to a standard pose: (see face_landmark_detection_ex.cpp for an introduction) using (var sp = new ShapePredictor("shape_predictor_5_face_landmarks.dat")) // And finally we load the DNN responsible for face recognition. using (var net = DlibDotNet.Dnn.LossMetric.Deserialize("dlib_face_recognition_resnet_model_v1.dat")) using (var img = Dlib.LoadImage <RgbPixel>(args[0])) using (var mat = new Matrix <RgbPixel>(img)) // Display the raw image on the screen using (var win = new ImageWindow(img)) { // Run the face detector on the image of our action heroes, and for each face extract a // copy that has been normalized to 150x150 pixels in size and appropriately rotated // and centered. var faces = new List <Matrix <RgbPixel> >(); foreach (var face in detector.Detect(img)) { var shape = sp.Detect(img, face); var faceChipDetail = Dlib.GetFaceChipDetails(shape, 150, 0.25); var faceChip = Dlib.ExtractImageChip <RgbPixel>(mat, faceChipDetail); //faces.Add(move(face_chip)); faces.Add(faceChip); // Also put some boxes on the faces so we can see that the detector is finding // them. win.AddOverlay(face); } if (!faces.Any()) { Console.WriteLine("No faces found in image!"); return; } // This call asks the DNN to convert each face image in faces into a 128D vector. // In this 128D vector space, images from the same person will be close to each other // but vectors from different people will be far apart. So we can use these vectors to // identify if a pair of images are from the same person or from different people. var faceDescriptors = net.Operator(faces); // In particular, one simple thing we can do is face clustering. This next bit of code // creates a graph of connected faces and then uses the Chinese whispers graph clustering // algorithm to identify how many people there are and which faces belong to whom. var edges = new List <SamplePair>(); for (uint i = 0; i < faceDescriptors.Count; ++i) { for (var j = i; j < faceDescriptors.Count; ++j) { // Faces are connected in the graph if they are close enough. Here we check if // the distance between two face descriptors is less than 0.6, which is the // decision threshold the network was trained to use. Although you can // certainly use any other threshold you find useful. var diff = faceDescriptors[i] - faceDescriptors[j]; if (Dlib.Length(diff) < 0.6) { edges.Add(new SamplePair(i, j)); } } } Dlib.ChineseWhispers(edges, 100, out var numClusters, out var labels); // This will correctly indicate that there are 4 people in the image. Console.WriteLine($"number of people found in the image: {numClusters}"); // Now let's display the face clustering results on the screen. You will see that it // correctly grouped all the faces. var winClusters = new List <ImageWindow>(); for (var i = 0; i < numClusters; i++) { winClusters.Add(new ImageWindow()); } var tileImages = new List <Matrix <RgbPixel> >(); for (var clusterId = 0ul; clusterId < numClusters; ++clusterId) { var temp = new List <Matrix <RgbPixel> >(); for (var j = 0; j < labels.Length; ++j) { if (clusterId == labels[j]) { temp.Add(faces[j]); } } winClusters[(int)clusterId].Title = $"face cluster {clusterId}"; var tileImage = Dlib.TileImages(temp); tileImages.Add(tileImage); winClusters[(int)clusterId].SetImage(tileImage); } // Finally, let's print one of the face descriptors to the screen. using (var trans = Dlib.Trans(faceDescriptors[0])) { Console.WriteLine($"face descriptor for one face: {trans}"); // It should also be noted that face recognition accuracy can be improved if jittering // is used when creating face descriptors. In particular, to get 99.38% on the LFW // benchmark you need to use the jitter_image() routine to compute the descriptors, // like so: var jitterImages = JitterImage(faces[0]).ToArray(); var ret = net.Operator(jitterImages); using (var m = Dlib.Mat(ret)) using (var faceDescriptor = Dlib.Mean <float>(m)) using (var t = Dlib.Trans(faceDescriptor)) { Console.WriteLine($"jittered face descriptor for one face: {t}"); // If you use the model without jittering, as we did when clustering the bald guys, it // gets an accuracy of 99.13% on the LFW benchmark. So jittering makes the whole // procedure a little more accurate but makes face descriptor calculation slower. Console.WriteLine("hit enter to terminate"); Console.ReadKey(); foreach (var jitterImage in jitterImages) { jitterImage.Dispose(); } foreach (var tileImage in tileImages) { tileImage.Dispose(); } foreach (var edge in edges) { edge.Dispose(); } foreach (var descriptor in faceDescriptors) { descriptor.Dispose(); } foreach (var face in faces) { face.Dispose(); } } } } }
public void AddOverlay() { if (!this.CanGuiDebug) { Console.WriteLine("Build and run as Release mode if you wanna show Gui!!"); return; } var path = this.GetDataFile("Lenna.bmp"); var tests = new[] { new { Type = ImageTypes.HsiPixel, ExpectResult = true }, new { Type = ImageTypes.LabPixel, ExpectResult = true }, new { Type = ImageTypes.BgrPixel, ExpectResult = true }, new { Type = ImageTypes.RgbPixel, ExpectResult = true }, new { Type = ImageTypes.RgbAlphaPixel, ExpectResult = true }, new { Type = ImageTypes.UInt8, ExpectResult = true }, new { Type = ImageTypes.UInt16, ExpectResult = true }, new { Type = ImageTypes.UInt32, ExpectResult = true }, new { Type = ImageTypes.Int8, ExpectResult = true }, new { Type = ImageTypes.Int16, ExpectResult = true }, new { Type = ImageTypes.Int32, ExpectResult = true }, new { Type = ImageTypes.Float, ExpectResult = true }, new { Type = ImageTypes.Double, ExpectResult = true } }; foreach (var test in tests) { try { var rect = new Rectangle(10, 10, 100, 100); var array = Array2D.Array2DTest.CreateArray2DHelp(test.Type, path.FullName); using (var window = new ImageWindow(array)) { switch (test.Type) { case ImageTypes.UInt8: window.AddOverlay(rect, (byte)0, test.Type.ToString()); break; case ImageTypes.UInt16: window.AddOverlay(rect, (ushort)0, test.Type.ToString()); break; case ImageTypes.UInt32: window.AddOverlay(rect, 0u, test.Type.ToString()); break; case ImageTypes.Int8: window.AddOverlay(rect, (sbyte)0, test.Type.ToString()); break; case ImageTypes.Int16: window.AddOverlay(rect, (short)0, test.Type.ToString()); break; case ImageTypes.Int32: window.AddOverlay(rect, 0, test.Type.ToString()); break; case ImageTypes.Float: window.AddOverlay(rect, (short)0f, test.Type.ToString()); break; case ImageTypes.Double: window.AddOverlay(rect, 0d, test.Type.ToString()); break; case ImageTypes.RgbAlphaPixel: window.AddOverlay(rect, new RgbAlphaPixel(127, 0, 0, 0), test.Type.ToString()); break; case ImageTypes.RgbPixel: window.AddOverlay(rect, new RgbPixel(0, 0, 0), test.Type.ToString()); break; case ImageTypes.HsiPixel: window.AddOverlay(rect, new HsiPixel(0, 0, 0), test.Type.ToString()); break; case ImageTypes.LabPixel: window.AddOverlay(rect, new LabPixel(0, 0, 0), test.Type.ToString()); break; } window.WaitUntilClosed(); } } catch (Exception e) { Console.WriteLine(e.StackTrace); Console.WriteLine($"Failed to create ImageWindow from Array2D Type: {test.Type}"); throw; } } }
private static void Main(string[] args) { if (args.Length == 0) { Console.WriteLine("Give some image files as arguments to this program."); Console.WriteLine("Call this program like this:"); Console.WriteLine("./face_landmark_detection_ex shape_predictor_68_face_landmarks.dat faces/*.jpg"); Console.WriteLine("You can get the shape_predictor_68_face_landmarks.dat file from:"); Console.WriteLine("http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2"); return; } using (var win = new ImageWindow()) using (var winFaces = new ImageWindow()) using (var detector = Dlib.GetFrontalFaceDetector()) using (var sp = ShapePredictor.Deserialize(args[0])) foreach (var file in args.ToList().GetRange(1, args.Length - 1)) { Console.WriteLine($"processing image {file}"); using (var img = Dlib.LoadImage <RgbPixel>(file)) { Dlib.PyramidUp(img); var dets = detector.Operator(img); Console.WriteLine($"Number of faces detected: {dets.Length}"); var shapes = new List <FullObjectDetection>(); foreach (var rect in dets) { var shape = sp.Detect(img, rect); Console.WriteLine($"number of parts: {shape.Parts}"); if (shape.Parts > 2) { Console.WriteLine($"pixel position of first part: {shape.GetPart(0)}"); Console.WriteLine($"pixel position of second part: {shape.GetPart(1)}"); shapes.Add(shape); } } win.ClearOverlay(); win.SetImage(img); if (shapes.Any()) { var lines = Dlib.RenderFaceDetections(shapes); win.AddOverlay(lines); foreach (var l in lines) { l.Dispose(); } var chipLocations = Dlib.GetFaceChipDetails(shapes); using (var faceChips = Dlib.ExtractImageChips <RgbPixel>(img, chipLocations)) using (var tileImage = Dlib.TileImages(faceChips)) winFaces.SetImage(tileImage); foreach (var c in chipLocations) { c.Dispose(); } } Console.WriteLine("hit enter to process next frame"); Console.ReadKey(); foreach (var s in shapes) { s.Dispose(); } } } }
private static void Main(string[] args) { try { // In this example we are going to train a face detector based on the // small faces dataset in the examples/faces directory. So the first // thing we do is load that dataset. This means you need to supply the // path to this faces folder as a command line argument so we will know // where it is. if (args.Length != 1) { Console.WriteLine("Give the path to the examples/faces directory as the argument to this"); Console.WriteLine("program. For example, if you are in the examples folder then execute "); Console.WriteLine("this program by running: "); Console.WriteLine(" ./dnn_mmod_ex faces"); return; } var facesDirectory = args[0]; // The faces directory contains a training dataset and a separate // testing dataset. The training data consists of 4 images, each // annotated with rectangles that bound each human face. The idea is // to use this training data to learn to identify human faces in new // images. // // Once you have trained an object detector it is always important to // test it on data it wasn't trained on. Therefore, we will also load // a separate testing set of 5 images. Once we have a face detector // created from the training data we will see how well it works by // running it on the testing images. // // So here we create the variables that will hold our dataset. // images_train will hold the 4 training images and face_boxes_train // holds the locations of the faces in the training images. So for // example, the image images_train[0] has the faces given by the // rectangles in face_boxes_train[0]. IList <Matrix <RgbPixel> > imagesTrain; IList <Matrix <RgbPixel> > imagesTest; IList <IList <MModRect> > faceBoxesTrain; IList <IList <MModRect> > faceBoxesTest; // Now we load the data. These XML files list the images in each dataset // and also contain the positions of the face boxes. Obviously you can use // any kind of input format you like so long as you store the data into // images_train and face_boxes_train. But for convenience dlib comes with // tools for creating and loading XML image datasets. Here you see how to // load the data. To create the XML files you can use the imglab tool which // can be found in the tools/imglab folder. It is a simple graphical tool // for labeling objects in images with boxes. To see how to use it read the // tools/imglab/README.txt file. Dlib.LoadImageDataset(facesDirectory + "/training.xml", out imagesTrain, out faceBoxesTrain); Dlib.LoadImageDataset(facesDirectory + "/testing.xml", out imagesTest, out faceBoxesTest); Console.WriteLine($"num training images: {imagesTrain.Count()}"); Console.WriteLine($"num testing images: {imagesTest.Count()}"); // The MMOD algorithm has some options you can set to control its behavior. However, // you can also call the constructor with your training annotations and a "target // object size" and it will automatically configure itself in a reasonable way for your // problem. Here we are saying that faces are still recognizably faces when they are // 40x40 pixels in size. You should generally pick the smallest size where this is // true. Based on this information the mmod_options constructor will automatically // pick a good sliding window width and height. It will also automatically set the // non-max-suppression parameters to something reasonable. For further details see the // mmod_options documentation. using (var options = new MModOptions(faceBoxesTrain, 40, 40)) { // The detector will automatically decide to use multiple sliding windows if needed. // For the face data, only one is needed however. var detectorWindows = options.DetectorWindows.ToArray(); Console.WriteLine($"num detector windows: {detectorWindows.Length}"); foreach (var w in detectorWindows) { Console.WriteLine($"detector window width by height: {w.Width} x {w.Height}"); } Console.WriteLine($"overlap NMS IOU thresh: {options.OverlapsNms.GetIouThresh()}"); Console.WriteLine($"overlap NMS percent covered thresh: {options.OverlapsNms.GetPercentCoveredThresh()}"); // Now we are ready to create our network and trainer. using (var net = new LossMmod(options, 2)) { // The MMOD loss requires that the number of filters in the final network layer equal // options.detector_windows.size(). So we set that here as well. using (var subnet = net.GetSubnet()) using (var details = subnet.GetLayerDetails()) { details.SetNumFilters(detectorWindows.Length); using (var trainer = new DnnTrainer <LossMmod>(net)) { trainer.SetLearningRate(0.1); trainer.BeVerbose(); trainer.SetSynchronizationFile("mmod_sync", 5 * 60); trainer.SetIterationsWithoutProgressThreshold(300); // Now let's train the network. We are going to use mini-batches of 150 // images. The images are random crops from our training set (see // random_cropper_ex.cpp for a discussion of the random_cropper). IEnumerable <Matrix <RgbPixel> > miniBatchSamples; //IEnumerable<IEnumerable<RgbPixel>> mini_batch_labels; IEnumerable <IEnumerable <MModRect> > miniBatchLabels; using (var cropper = new RandomCropper()) using (var chipDims = new ChipDims(200, 200)) { cropper.ChipDims = chipDims; // Usually you want to give the cropper whatever min sizes you passed to the // mmod_options constructor, which is what we do here. cropper.SetMinObjectSize(40, 40); using (var rnd = new Rand()) { // Run the trainer until the learning rate gets small. This will probably take several // hours. while (trainer.GetLearningRate() >= 1e-4) { cropper.Operator(150, imagesTrain, faceBoxesTrain, out miniBatchSamples, out miniBatchLabels); // We can also randomly jitter the colors and that often helps a detector // generalize better to new images. foreach (var img in miniBatchSamples) { Dlib.DisturbColors(img, rnd); } LossMmod.TrainOneStep(trainer, miniBatchSamples, miniBatchLabels); miniBatchSamples.DisposeElement(); miniBatchLabels.DisposeElement(); } // wait for training threads to stop trainer.GetNet(); Console.WriteLine("done training"); // Save the network to disk net.Clean(); LossMmod.Serialize(net, "mmod_network.dat"); // Now that we have a face detector we can test it. The first statement tests it // on the training data. It will print the precision, recall, and then average precision. // This statement should indicate that the network works perfectly on the // training data. using (var matrix = Dlib.TestObjectDetectionFunction(net, imagesTrain, faceBoxesTrain)) Console.WriteLine($"training results: {matrix}"); // However, to get an idea if it really worked without overfitting we need to run // it on images it wasn't trained on. The next line does this. Happily, // this statement indicates that the detector finds most of the faces in the // testing data. using (var matrix = Dlib.TestObjectDetectionFunction(net, imagesTest, faceBoxesTest)) Console.WriteLine($"testing results: {matrix}"); // If you are running many experiments, it's also useful to log the settings used // during the training experiment. This statement will print the settings we used to // the screen. Console.WriteLine($"{trainer}{cropper}"); // Now lets run the detector on the testing images and look at the outputs. using (var win = new ImageWindow()) foreach (var img in imagesTest) { Dlib.PyramidUp(img); var dets = net.Operator(img); win.ClearOverlay(); win.SetImage(img); foreach (var d in dets[0]) { win.AddOverlay(d); } Console.ReadKey(); foreach (var det in dets) { foreach (var d in det) { d.Dispose(); } } } // Now that you finished this example, you should read dnn_mmod_train_find_cars_ex.cpp, // which is a more advanced example. It discusses many issues surrounding properly // setting the MMOD parameters and creating a good training dataset. } } } } } detectorWindows.DisposeElement(); } } catch (Exception e) { Console.WriteLine(e); } }