private static void Main(string[] args) { var app = new CommandLineApplication(false); app.Name = nameof(HelenTraining); app.Description = "The program for training helen dataset"; app.HelpOption("-h|--help"); app.Command("generate", command => { command.HelpOption("-?|-h|--help"); var paddingOption = command.Option("-p|--padding", "padding of detected face", CommandOptionType.SingleValue); var modelsOption = command.Option("-m|--model", "model files directory path", CommandOptionType.SingleValue); command.OnExecute(() => { if (!modelsOption.HasValue()) { Console.WriteLine("model option is missing"); app.ShowHelp(); return(-1); } if (!paddingOption.HasValue()) { Console.WriteLine("padding option is missing"); app.ShowHelp(); return(-1); } var directory = modelsOption.Value(); if (!Directory.Exists(directory)) { Console.WriteLine($"'{directory}' is not found"); app.ShowHelp(); return(-1); } if (!int.TryParse(paddingOption.Value(), out var padding)) { Console.WriteLine($"padding '{paddingOption.Value()}' is not integer"); app.ShowHelp(); return(-1); } Console.WriteLine($"Model: {directory}"); Console.WriteLine($"Padding: {padding}"); _FaceRecognition = FaceRecognition.Create(directory); const string extractPath = "helen"; var zips = new[] { new{ Zip = "annotation.zip", IsImage = false, Directory = "annotation" }, new{ Zip = "helen_1.zip", IsImage = true, Directory = "helen_1" }, new{ Zip = "helen_2.zip", IsImage = true, Directory = "helen_2" }, new{ Zip = "helen_3.zip", IsImage = true, Directory = "helen_3" }, new{ Zip = "helen_4.zip", IsImage = true, Directory = "helen_4" }, new{ Zip = "helen_5.zip", IsImage = true, Directory = "helen_5" } }; Directory.CreateDirectory(extractPath); foreach (var zip in zips) { if (!Directory.Exists(Path.Combine(extractPath, zip.Directory))) { ZipFile.ExtractToDirectory(zip.Zip, extractPath); } } var annotation = zips.FirstOrDefault(arg => !arg.IsImage); var imageZips = zips.Where(arg => arg.IsImage).ToArray(); if (annotation == null) { return(-1); } var images = new List <Image>(); foreach (var file in Directory.EnumerateFiles(Path.Combine(extractPath, annotation.Directory))) { Console.WriteLine($"Process: '{file}'"); var txt = File.ReadAllLines(file); var filename = txt[0]; var jpg = $"{filename}.jpg"; foreach (var imageZip in imageZips) { var found = false; var path = Path.Combine(Path.Combine(extractPath, imageZip.Directory, jpg)); if (File.Exists(path)) { found = true; using (var fi = FaceRecognition.LoadImageFile(path)) { var locations = _FaceRecognition.FaceLocations(fi, 1, Model.Hog).ToArray(); if (locations.Length != 1) { Console.WriteLine($"\t'{path}' has {locations.Length} faces."); } else { var location = locations.First(); var parts = new List <Part>(); for (var i = 1; i < txt.Length; i++) { var tmp = txt[i].Split(',').Select(s => s.Trim()).Select(float.Parse).Select(s => (int)s).ToArray(); parts.Add(new Part { X = tmp[0], Y = tmp[1], Name = $"{i - 1}" }); } var image = new Image { File = Path.Combine(imageZip.Directory, jpg), Box = new Box { Left = location.Left - padding, Top = location.Top - padding, Width = location.Right - location.Left + 1 + padding * 2, Height = location.Bottom - location.Top + 1 + padding * 2, Part = parts.ToArray() } }; using (var bitmap = System.Drawing.Image.FromFile(path)) { var b = image.Box; using (var g = Graphics.FromImage(bitmap)) { using (var p = new Pen(Color.Red, bitmap.Width / 400f)) g.DrawRectangle(p, b.Left, b.Top, b.Width, b.Height); foreach (var part in b.Part) { g.FillEllipse(Brushes.GreenYellow, part.X, part.Y, 5, 5); } } var result = Path.Combine(extractPath, "Result"); Directory.CreateDirectory(result); bitmap.Save(Path.Combine(result, jpg), ImageFormat.Jpeg); } images.Add(image); } } } if (found) { break; } } } var dataset = new Dataset { Name = "helen dataset", Comment = "Created by Takuya Takeuchi.", Images = images.ToArray() }; var settings = new XmlWriterSettings(); using (var sw = new StreamWriter(Path.Combine(extractPath, "helen-dataset.xml"), false, new System.Text.UTF8Encoding(false))) using (var writer = XmlWriter.Create(sw, settings)) { writer.WriteProcessingInstruction("xml-stylesheet", @"type=""text/xsl"" href=""image_metadata_stylesheet.xsl"""); var serializer = new XmlSerializer(typeof(Dataset)); serializer.Serialize(writer, dataset); } return(0); }); }); app.Command("train", command => { command.HelpOption("-?|-h|--help"); var threadOption = command.Option("-t|--threads", "number of threads", CommandOptionType.SingleValue); var xmlOption = command.Option("-x|--xml", "generated xml file from helen dataset", CommandOptionType.SingleValue); command.OnExecute(() => { if (!xmlOption.HasValue()) { Console.WriteLine("xml option is missing"); app.ShowHelp(); return(-1); } if (!threadOption.HasValue()) { Console.WriteLine("thread option is missing"); app.ShowHelp(); return(-1); } var xmlFile = xmlOption.Value(); if (!File.Exists(xmlFile)) { Console.WriteLine($"'{xmlFile}' is not found"); app.ShowHelp(); return(-1); } if (!uint.TryParse(threadOption.Value(), out var thread)) { Console.WriteLine($"thread '{threadOption.Value()}' is not integer"); app.ShowHelp(); return(-1); } Dlib.LoadImageDataset(xmlFile, out Array <Array2D <byte> > imagesTrain, out var facesTrain); using (var trainer = new ShapePredictorTrainer()) { trainer.NumThreads = thread; trainer.BeVerbose(); Console.WriteLine("Start training"); using (var predictor = trainer.Train(imagesTrain, facesTrain)) { Console.WriteLine("Finish training"); var directory = Path.GetDirectoryName(xmlFile); var output = Path.Combine(directory, $"{Path.GetFileNameWithoutExtension(xmlFile)}.dat"); ShapePredictor.Serialize(predictor, output); } } return(0); }); }); app.Command("demo", command => { command.HelpOption("-?|-h|--help"); var imageOption = command.Option("-i|--image", "test image file", CommandOptionType.SingleValue); var modelOption = command.Option("-m|--model", "model file", CommandOptionType.SingleValue); var directoryOption = command.Option("-d|--directory", "model files directory path", CommandOptionType.SingleValue); command.OnExecute(() => { if (!imageOption.HasValue()) { Console.WriteLine("image option is missing"); app.ShowHelp(); return(-1); } if (!directoryOption.HasValue()) { Console.WriteLine("directory option is missing"); app.ShowHelp(); return(-1); } if (!modelOption.HasValue()) { Console.WriteLine("model option is missing"); app.ShowHelp(); return(-1); } var modelFile = modelOption.Value(); if (!File.Exists(modelFile)) { Console.WriteLine($"'{modelFile}' is not found"); app.ShowHelp(); return(-1); } var imageFile = imageOption.Value(); if (!File.Exists(imageFile)) { Console.WriteLine($"'{imageFile}' is not found"); app.ShowHelp(); return(-1); } var directory = directoryOption.Value(); if (!Directory.Exists(directory)) { Console.WriteLine($"'{directory}' is not found"); app.ShowHelp(); return(-1); } _FaceRecognition = FaceRecognition.Create(directory); using (var predictor = ShapePredictor.Deserialize(modelFile)) using (var image = FaceRecognition.LoadImageFile(imageFile)) using (var mat = Dlib.LoadImageAsMatrix <RgbPixel>(imageFile)) using (var bitmap = (Bitmap)System.Drawing.Image.FromFile(imageFile)) using (var white = new Bitmap(bitmap.Width, bitmap.Height)) using (var g = Graphics.FromImage(bitmap)) using (var gw = Graphics.FromImage(white)) { var loc = _FaceRecognition.FaceLocations(image).FirstOrDefault(); if (loc == null) { Console.WriteLine("No face is detected"); return(0); } var b = new DlibDotNet.Rectangle(loc.Left, loc.Top, loc.Right, loc.Bottom); var detection = predictor.Detect(mat, b); using (var p = new Pen(Color.Red, bitmap.Width / 200f)) { g.DrawRectangle(p, loc.Left, b.Top, b.Width, b.Height); gw.Clear(Color.White); gw.DrawRectangle(p, loc.Left, b.Top, b.Width, b.Height); } for (int i = 0, parts = (int)detection.Parts; i < parts; i++) { var part = detection.GetPart((uint)i); g.FillEllipse(Brushes.GreenYellow, part.X, part.Y, 15, 15); gw.DrawString($"{i}", SystemFonts.DefaultFont, Brushes.Black, part.X, part.Y); } bitmap.Save("demo.jpg", ImageFormat.Jpeg); white.Save("white.jpg", ImageFormat.Jpeg); } return(0); }); }); app.Execute(args); }
private static void Main(string[] args) { try { // In this example we are going to train a shape_predictor based on the // small faces dataset in the examples/faces directory. So the first // thing we do is load that dataset. This means you need to supply the // path to this faces folder as a command line argument so we will know // where it is. if (args.Length != 1) { Console.WriteLine("Give the path to the examples/faces directory as the argument to this"); Console.WriteLine("program. For example, if you are in the examples folder then execute "); Console.WriteLine("this program by running: "); Console.WriteLine(" ./train_shape_predictor_ex faces"); Console.WriteLine(); return; } var facesDirectory = args[0]; // The faces directory contains a training dataset and a separate // testing dataset. The training data consists of 4 images, each // annotated with rectangles that bound each human face along with 68 // face landmarks on each face. The idea is to use this training data // to learn to identify the position of landmarks on human faces in new // images. // // Once you have trained a shape_predictor it is always important to // test it on data it wasn't trained on. Therefore, we will also load // a separate testing set of 5 images. Once we have a shape_predictor // created from the training data we will see how well it works by // running it on the testing images. // // So here we create the variables that will hold our dataset. // images_train will hold the 4 training images and faces_train holds // the locations and poses of each face in the training images. So for // example, the image images_train[0] has the faces given by the // full_object_detections in faces_train[0]. Array <Array2D <byte> > imagesTrain; Array <Array2D <byte> > imagesTest; IList <IList <FullObjectDetection> > faces_train, faces_test; // Now we load the data. These XML files list the images in each // dataset and also contain the positions of the face boxes and // landmarks (called parts in the XML file). Obviously you can use any // kind of input format you like so long as you store the data into // images_train and faces_train. But for convenience dlib comes with // tools for creating and loading XML image dataset files. Here you see // how to load the data. To create the XML files you can use the imglab // tool which can be found in the tools/imglab folder. It is a simple // graphical tool for labeling objects in images. To see how to use it // read the tools/imglab/README.txt file. Dlib.LoadImageDataset(Path.Combine(facesDirectory, "training_with_face_landmarks.xml"), out imagesTrain, out faces_train); Dlib.LoadImageDataset(Path.Combine(facesDirectory, "testing_with_face_landmarks.xml"), out imagesTest, out faces_test); // Now make the object responsible for training the model. using (var trainer = new ShapePredictorTrainer()) { // This algorithm has a bunch of parameters you can mess with. The // documentation for the shape_predictor_trainer explains all of them. // You should also read Kazemi's paper which explains all the parameters // in great detail. However, here I'm just setting three of them // differently than their default values. I'm doing this because we // have a very small dataset. In particular, setting the oversampling // to a high amount (300) effectively boosts the training set size, so // that helps this example. trainer.OverSamplingAmount = 300; // I'm also reducing the capacity of the model by explicitly increasing // the regularization (making nu smaller) and by using trees with // smaller depths. trainer.Nu = 0.05d; trainer.TreeDepth = 2; // some parts of training process can be parallelized. // Trainer will use this count of threads when possible trainer.NumThreads = 2; // Tell the trainer to print status messages to the console so we can // see how long the training will take. trainer.BeVerbose(); // Now finally generate the shape model using (var sp = trainer.Train(imagesTrain, faces_train)) { // Now that we have a model we can test it. This function measures the // average distance between a face landmark output by the // shape_predictor and where it should be according to the truth data. // Note that there is an optional 4th argument that lets us rescale the // distances. Here we are causing the output to scale each face's // distances by the interocular distance, as is customary when // evaluating face landmarking systems. Console.WriteLine($"mean training error: {Dlib.TestShapePredictor(sp, imagesTrain, faces_train, GetInterocularDistances(faces_train))}"); // The real test is to see how well it does on data it wasn't trained // on. We trained it on a very small dataset so the accuracy is not // extremely high, but it's still doing quite good. Moreover, if you // train it on one of the large face landmarking datasets you will // obtain state-of-the-art results, as shown in the Kazemi paper. Console.WriteLine($"mean testing error: {Dlib.TestShapePredictor(sp, imagesTest, faces_test, GetInterocularDistances(faces_test))}"); // Finally, we save the model to disk so we can use it later. ShapePredictor.Serialize(sp, "sp.dat"); } } } catch (Exception e) { Console.WriteLine("\nexception thrown!"); Console.WriteLine(e.Message); } }