public async Task <MatrixFloatDto[]> GetFaceDescriptors(string filename, System.Drawing.Rectangle[] faces) { var inputFilename = filename; var chips = new List <Matrix <RgbPixel> >(); using var img = await DlibHelpers.LoadRotatedImage(imageRotationService, inputFilename); foreach (var face in faces.Select(x => new Rectangle(x.Left, x.Top, x.Right, x.Bottom))) { // detect landmarks var shape = predictor.Detect(img, face); // extract normalized and rotated 150x150 face chip var faceChipDetail = Dlib.GetFaceChipDetails(shape, 150, 0.25); var faceChip = Dlib.ExtractImageChip <RgbPixel>(img, faceChipDetail); // convert the chip to a matrix and store var matrix = new Matrix <RgbPixel>(faceChip); chips.Add(matrix); } if (!chips.Any()) { return(Array.Empty <MatrixFloatDto>()); } // put each fae in a 128D embedding space // similar faces will be placed close together var descriptors = dnn.Operator(chips); return(descriptors .Select(x => new MatrixFloatDto { Data = x.ToArray(), Row = x.Rows, Columns = x.Columns, }) .ToArray()); }
public static IEnumerable <IEnumerable <Matrix <double> > > BatchComputeFaceDescriptors(LossMetric net, IList <Image> batchImages, IList <IEnumerable <FullObjectDetection> > batchFaces, int numJitters) { if (batchImages.Count() != batchFaces.Count()) { throw new ArgumentException("The array of images and the array of array of locations must be of the same size"); } foreach (var faces in batchFaces) { foreach (var f in faces) { if (f.Parts != 68 && f.Parts != 5) { throw new ArgumentException("The full_object_detection must use the iBUG 300W 68 point face landmark style or dlib's 5 point style."); } } } var faceChips = new List <Matrix <RgbPixel> >(); for (var i = 0; i < batchImages.Count(); ++i) { var faces = batchFaces[i]; var img = batchImages[i]; var dets = new List <ChipDetails>(); foreach (var f in faces) { dets.Add(DlibDotNet.Dlib.GetFaceChipDetails(f, 150, 0.25)); } var thisImageFaceChips = DlibDotNet.Dlib.ExtractImageChips <RgbPixel>(img.Matrix, dets); foreach (var chip in thisImageFaceChips) { faceChips.Add(chip); } } var faceDescriptors = new List <List <Matrix <double> > >(); if (numJitters <= 1) { // extract descriptors and convert from float vectors to double vectors var descriptors = net.Operator(faceChips, 16); var index = 0; var list = descriptors.Select(matrix => matrix).ToArray(); for (var i = 0; i < batchFaces.Count(); ++i) { faceDescriptors.Add(new List <Matrix <double> >()); for (var j = 0; j < batchFaces[i].Count(); ++j) { faceDescriptors[i].Add(DlibDotNet.Dlib.MatrixCast <double>(list[index++])); } } if (index != list.Length) { throw new ApplicationException(); } } else { // extract descriptors and convert from float vectors to double vectors var index = 0; for (var i = 0; i < batchFaces.Count(); ++i) { for (var j = 0; j < batchFaces[i].Count(); ++j) { var tmp = JitterImage(faceChips[index++], numJitters); var tmp2 = net.Operator(tmp, 16); var mat = DlibDotNet.Dlib.Mat(tmp2); var r = DlibDotNet.Dlib.Mean <double>(mat); faceDescriptors[i].Add(r); } } if (index != faceChips.Count) { throw new ApplicationException(); } } return(faceDescriptors); }
private static void Main() { try { // The API for doing metric learning is very similar to the API for // multi-class classification. In fact, the inputs are the same, a bunch of // labeled objects. So here we create our dataset. We make up some simple // vectors and label them with the integers 1,2,3,4. The specific values of // the integer labels don't matter. var samples = new List <Matrix <double> >(); var labels = new List <uint>(); // class 1 training vectors samples.Add(new Matrix <double>(new MatrixTemplateSizeParameter(0, 1), new double[] { 1, 0, 0, 0, 0, 0, 0, 0 })); labels.Add(1); samples.Add(new Matrix <double>(new MatrixTemplateSizeParameter(0, 1), new double[] { 0, 1, 0, 0, 0, 0, 0, 0 })); labels.Add(1); // class 2 training vectors samples.Add(new Matrix <double>(new MatrixTemplateSizeParameter(0, 1), new double[] { 0, 0, 1, 0, 0, 0, 0, 0 })); labels.Add(2); samples.Add(new Matrix <double>(new MatrixTemplateSizeParameter(0, 1), new double[] { 0, 0, 0, 1, 0, 0, 0, 0 })); labels.Add(2); // class 3 training vectors samples.Add(new Matrix <double>(new MatrixTemplateSizeParameter(0, 1), new double[] { 0, 0, 0, 0, 1, 0, 0, 0 })); labels.Add(3); samples.Add(new Matrix <double>(new MatrixTemplateSizeParameter(0, 1), new double[] { 0, 0, 0, 0, 0, 1, 0, 0 })); labels.Add(3); // class 4 training vectors samples.Add(new Matrix <double>(new MatrixTemplateSizeParameter(0, 1), new double[] { 0, 0, 0, 0, 0, 0, 1, 0 })); labels.Add(4); samples.Add(new Matrix <double>(new MatrixTemplateSizeParameter(0, 1), new double[] { 0, 0, 0, 0, 0, 0, 0, 1 })); labels.Add(4); // Make a network that simply learns a linear mapping from 8D vectors to 2D // vectors. using (var net = new LossMetric(1)) using (var trainer = new DnnTrainer <LossMetric>(net)) { trainer.SetLearningRate(0.1); // It should be emphasized out that it's really important that each mini-batch contain // multiple instances of each class of object. This is because the metric learning // algorithm needs to consider pairs of objects that should be close as well as pairs // of objects that should be far apart during each training step. Here we just keep // training on the same small batch so this constraint is trivially satisfied. while (trainer.GetLearningRate() >= 1e-4) { LossMetric.TrainOneStep(trainer, samples, labels); } // Wait for training threads to stop trainer.GetNet().Dispose(); Console.WriteLine("done training"); // Run all the samples through the network to get their 2D vector embeddings. var embedded = net.Operator(samples); // Print the embedding for each sample to the screen. If you look at the // outputs carefully you should notice that they are grouped together in 2D // space according to their label. for (var i = 0; i < embedded.Count(); ++i) { using (var trans = Dlib.Trans(embedded[i])) Console.Write($"label: {labels[i]}\t{trans}"); } // Now, check if the embedding puts things with the same labels near each other and // things with different labels far apart. var numRight = 0; var numWrong = 0; for (var i = 0; i < embedded.Count(); ++i) { for (var j = i + 1; j < embedded.Count(); ++j) { if (labels[i] == labels[j]) { // The loss_metric layer will cause things with the same label to be less // than net.loss_details().get_distance_threshold() distance from each // other. So we can use that distance value as our testing threshold for // "being near to each other". if (Dlib.Length(embedded[i] - embedded[j]) < net.GetLossDetails().GetDistanceThreshold()) { ++numRight; } else { ++numWrong; } } else { if (Dlib.Length(embedded[i] - embedded[j]) >= net.GetLossDetails().GetDistanceThreshold()) { ++numRight; } else { ++numWrong; } } } } Console.WriteLine($"num_right: {numRight}"); Console.WriteLine($"num_wrong: {numWrong}"); } } catch (Exception e) { Console.WriteLine(e); } }
public async Task ProcessAsync(string[] inputFilenames) { var chips = new List <Matrix <RgbPixel> >(); var faces = new List <Rectangle>(); var filename = new List <string>(); var jsonFilename = inputFilenames.First() + ".json"; foreach (var inputFilename in inputFilenames) { if (!File.Exists(inputFilename)) { break; } if (File.Exists(jsonFilename)) { continue; } // load the image using var img = await DlibHelpers.LoadRotatedImage(imageRotationService, inputFilename); // Dlib.SaveJpeg(img, inputFilename + "__1.jpg", 25); // Dlib.SaveJpeg(img, inputFilename + "__2.jpg", 25); // detect all faces foreach (var face in detector.Operator(img)) { // detect landmarks var shape = predictor.Detect(img, face); // extract normalized and rotated 150x150 face chip var faceChipDetail = Dlib.GetFaceChipDetails(shape, 150, 0.25); var faceChip = Dlib.ExtractImageChip <RgbPixel>(img, faceChipDetail); // convert the chip to a matrix and store var matrix = new Matrix <RgbPixel>(faceChip); chips.Add(matrix); faces.Add(face); filename.Add(inputFilename); } } if (!File.Exists(jsonFilename)) { var ffd = new FoundFacesData { // Chips = chips, Faces = faces, Filenames = filename, }; OutputLabels <Matrix <float> > descriptors = null; if (chips.Any()) { // put each fae in a 128D embedding space // similar faces will be placed close together // Console.WriteLine("Recognizing faces..."); descriptors = dnn.Operator(chips); ffd.Descriptors = descriptors.ToList(); } else { ffd.Descriptors = new List <Matrix <float> >(0); } var dto = new FoundFacesDataDto { Faces = ffd.Faces .Select(f => new RectangleDto { Bottom = f.Bottom, Left = f.Left, Top = f.Top, Right = f.Right, }) .ToList(), Filenames = ffd.Filenames, Descriptors = ffd.Descriptors .Select(x => new MatrixFloatDto { Data = x.ToArray(), Row = x.Rows, Columns = x.Columns, }) .ToList() }; var x = JsonConvert.SerializeObject(dto); File.WriteAllText(jsonFilename, JsonConvert.SerializeObject(dto)); } FoundFacesData items; using (var r = new StreamReader(jsonFilename)) { var json = r.ReadToEnd(); var itemsdto = JsonConvert.DeserializeObject <FoundFacesDataDto>(json); items = new FoundFacesData { Faces = itemsdto.Faces.Select(f => new Rectangle(f.Left, f.Top, f.Right, f.Bottom)).ToList(), Filenames = itemsdto.Filenames.ToList(), Descriptors = itemsdto.Descriptors.Select(d => new Matrix <float>(d.Data, d.Row, d.Columns)).ToList(), }; } if (items.Faces.Count <= 0) { return; } // // compare each face with all other faces var edges = new List <SamplePair>(); // for (uint i = 0; i < descriptors.Count; ++i) // for (var j = i; j < descriptors.Count; ++j) // // // record every pair of two similar faces // // faces are similar if they are less than 0.6 apart in the 128D embedding space // if (Dlib.Length(descriptors[i] - descriptors[j]) < 0.5) // edges.Add(new SamplePair(i, j)); // // // use the chinese whispers algorithm to find all face clusters // Dlib.ChineseWhispers(edges, 100, out var clusters, out var labels); // // Console.WriteLine($" Found {clusters} unique person(s) in the image"); // // // draw rectangles on each face using the cluster color // for (var i = 0; i < faces.Count; i++) // { // var color = new RgbPixel(255, 255, 255); // if (labels[i] < palette.Length) // color = palette[labels[i]]; // // using var img = Dlib.LoadImage<RgbPixel>(filename[i] + "__1.jpg"); // Dlib.DrawRectangle(img, faces[i], color: color, thickness: 4); // Dlib.SaveJpeg(img, filename[i] + "__1.jpg", 25); // } // // Console.WriteLine("end 1"); // compare each face with all other faces edges = new List <SamplePair>(); for (var i = 0; i < items.Descriptors.Count; ++i) { for (var j = i; j < items.Descriptors.Count; ++j) { // record every pair of two similar faces // faces are similar if they are less than 0.6 apart in the 128D embedding space if (Dlib.Length(items.Descriptors[i] - items.Descriptors[j]) < 0.4) { edges.Add(new SamplePair((uint)i, (uint)j)); } } } // use the chinese whispers algorithm to find all face clusters Dlib.ChineseWhispers(edges, 100, out var clusters2, out var labels2); // Console.WriteLine($" Found {clusters} unique person(s) in the image"); // draw rectangles on each face using the cluster color for (var i = 0; i < items.Faces.Count; i++) { var color = palette[0]; if (labels2[i] < palette.Length) { color = palette[labels2[i]]; } if (!File.Exists(items.Filenames[i] + $"_x{labels2[i]}.jpg")) { using var img2 = await DlibHelpers.LoadRotatedImage(imageRotationService, items.Filenames[i]); Dlib.SaveJpeg(img2, items.Filenames[i] + $"_x{labels2[i]}.jpg", 25); } using var img = Dlib.LoadImage <RgbPixel>(items.Filenames[i] + $"_x{labels2[i]}.jpg"); Dlib.DrawRectangle(img, items.Faces[i], color: color, thickness: 4); Dlib.SaveJpeg(img, items.Filenames[i] + $"_x{labels2[i]}.jpg", 25); } // var origFilename = new FileInfo(inputFilename).Name; // var outputFilename = Path.Combine(outputDirectory, $"{origFilename}_Identification.jpg"); // Dlib.SaveJpeg(img, inputFilename, 75); }