/// <summary> /// Encode a MPSCnnKernel into a command Buffer. The operation shall proceed out-of-place. /// We calculate the appropriate offset as per how TensorFlow calculates its padding using input image size and stride here. /// This [Link](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/ops/nn.py) has an explanation in header comments how tensorFlow pads its convolution input images. /// </summary> /// <param name="commandBuffer">A valid MTLCommandBuffer to receive the encoded filter</param> /// <param name="sourceImage">A valid MPSImage object containing the source image.</param> /// <param name="destinationImage">A valid MPSImage to be overwritten by result image. destinationImage may not alias sourceImage</param> public override void EncodeToCommandBuffer(IMTLCommandBuffer commandBuffer, MPSImage sourceImage, MPSImage destinationImage) { // select offset according to padding being used or not if (padding) { var pad_along_height = ((destinationImage.Height - 1) * StrideInPixelsY + KernelHeight - sourceImage.Height); var pad_along_width = ((destinationImage.Width - 1) * StrideInPixelsX + KernelWidth - sourceImage.Width); var pad_top = pad_along_height / 2; var pad_left = pad_along_width / 2; Offset = new MPSOffset { X = (nint)(KernelWidth / 2 - pad_left), Y = (nint)(KernelHeight / 2 - pad_top), Z = 0 }; } else { Offset = new MPSOffset { X = (nint)(KernelWidth / 2), Y = (nint)(KernelHeight / 2), Z = 0 }; } base.EncodeToCommandBuffer(commandBuffer, sourceImage, destinationImage); }
/// <summary> /// This function encodes all the layers of the network into given commandBuffer, it calls subroutines for each piece of the network /// Returns: Guess of the network as to what the digit is as UInt /// </summary> /// <param name="inputImage">Image coming in on which the network will run</param> /// <param name="imageNum">If the test set is being used we will get a value between 0 and 9999 for which of the 10,000 images is being evaluated</param> /// <param name="correctLabel">The correct label for the inputImage while testing</param> public virtual uint Forward (MPSImage inputImage = null, int imageNum = 9999, int correctLabel = 10) { uint label = 99; // Get command buffer to use in MetalPerformanceShaders. using (var commandBuffer = commandQueue.CommandBuffer ()) { // output will be stored in this image var finalLayer = new MPSImage (commandBuffer.Device, DID); // encode layers to metal commandBuffer if (inputImage == null) layer.EncodeToCommandBuffer (commandBuffer, SrcImage, dstImage); else layer.EncodeToCommandBuffer (commandBuffer, inputImage, dstImage); softmax.EncodeToCommandBuffer (commandBuffer, dstImage, finalLayer); // add a completion handler to get the correct label the moment GPU is done and compare it to the correct output or return it commandBuffer.AddCompletedHandler (buffer => { label = GetLabel (finalLayer); if (correctLabel == label) Atomics.Increment (); }); // commit commandbuffer to run on GPU and wait for completion commandBuffer.Commit (); if (imageNum == 9999 || inputImage == null) commandBuffer.WaitUntilCompleted (); } return label; }
/// <summary> /// This function reads the output probabilities from finalLayer to CPU, sorts them and gets the label with heighest probability /// </summary> /// <param name="finalLayer">output image of the network this has probabilities of each digit</param> /// <returns>Guess of the network as to what the digit is as uint</returns> public uint GetLabel(MPSImage finalLayer) { // even though we have 10 labels outputed the MTLTexture format used is RGBAFloat16 thus 3 slices will have 3*4 = 12 outputs var resultHalfArray = Enumerable.Repeat((ushort)6, 12).ToArray(); var resultHalfArrayHandle = GCHandle.Alloc(resultHalfArray, GCHandleType.Pinned); var resultHalfArrayPtr = resultHalfArrayHandle.AddrOfPinnedObject(); var resultFloatArray = Enumerable.Repeat(0.3f, 10).ToArray(); var resultFloatArrayHandle = GCHandle.Alloc(resultFloatArray, GCHandleType.Pinned); var resultFloatArrayPtr = resultFloatArrayHandle.AddrOfPinnedObject(); for (uint i = 0; i <= 2; i++) { finalLayer.Texture.GetBytes(resultHalfArrayPtr + 4 * (int)i * sizeof(ushort), sizeof(ushort) * 1 * 4, sizeof(ushort) * 1 * 1 * 4, new MTLRegion(new MTLOrigin(0, 0, 0), new MTLSize(1, 1, 1)), 0, i); } // we use vImage to convert our data to float16, Metal GPUs use float16 and swift float is 32-bit var fullResultVImagebuf = new vImageBuffer { Data = resultFloatArrayPtr, Height = 1, Width = 10, BytesPerRow = 10 * 4 }; var halfResultVImagebuf = new vImageBuffer { Data = resultHalfArrayPtr, Height = 1, Width = 10, BytesPerRow = 10 * 2 }; if (Planar16FtoPlanarF(ref halfResultVImagebuf, ref fullResultVImagebuf, 0) != vImageError.NoError) { Console.WriteLine("Error in vImage"); } // poll all labels for probability and choose the one with max probability to return float max = 0f; uint mostProbableDigit = 10; for (uint i = 0; i <= 9; i++) { if (max < resultFloatArray [i]) { max = resultFloatArray [i]; mostProbableDigit = i; } } resultHalfArrayHandle.Free(); resultFloatArrayHandle.Free(); return(mostProbableDigit); }
public (NSArray <MPSImage> Inputs, NSArray <MPSState> Losses) GetRandomBatch(IMTLDevice device, int batchSize) { var trainImageDesc = MPSImageDescriptor.GetImageDescriptor( MPSImageFeatureChannelFormat.Unorm8, ImageSize, ImageSize, 1, 1, MTLTextureUsage.ShaderWrite | MTLTextureUsage.ShaderRead); var trainBatch = new List <MPSImage> (); var lossStateBatch = new List <MPSState> (); unsafe { fixed(byte *imagesPointer = imagesData) fixed(byte *labelsPointer = labelsData) { for (var i = 0; i < batchSize; i++) { var randomIndex = random.Next(numImages); var trainImage = new MPSImage(device, trainImageDesc) { Label = "TrainImage" + i }; trainBatch.Add(trainImage); var trainImagePointer = imagesPointer + ImagesPrefixSize + randomIndex * ImageSize * ImageSize; trainImage.WriteBytes((IntPtr)trainImagePointer, MPSDataLayout.HeightPerWidthPerFeatureChannels, 0); var labelPointer = labelsPointer + LabelsPrefixSize + randomIndex; var labelsValues = new float[12]; labelsValues[*labelPointer] = 1; fixed(void *p = labelsValues) { using var data = NSData.FromBytes((IntPtr)p, 12 * sizeof(float)); var desc = MPSCnnLossDataDescriptor.Create( data, MPSDataLayout.HeightPerWidthPerFeatureChannels, new MTLSize(1, 1, 12)); var lossState = new MPSCnnLossLabels(device, desc); lossStateBatch.Add(lossState); } } } } return(NSArray <MPSImage> .FromNSObjects(trainBatch.ToArray()), NSArray <MPSState> .FromNSObjects(lossStateBatch.ToArray())); }
/// <summary> /// This function encodes all the layers of the network into given commandBuffer, it calls subroutines for each piece of the network /// Returns: Guess of the network as to what the digit is as uint /// </summary> /// <param name="inputImage">Image coming in on which the network will run</param> /// <param name="imageNum">If the test set is being used we will get a value between 0 and 9999 for which of the 10,000 images is being evaluated</param> /// <param name="correctLabel">The correct label for the inputImage while testing</param> public override uint Forward(MPSImage inputImage = null, int imageNum = 9999, int correctLabel = 10) { uint label = 99; // Get command buffer to use in MetalPerformanceShaders. using (var commandBuffer = commandQueue.CommandBuffer()) { // output will be stored in this image var finalLayer = new MPSImage(commandBuffer.Device, DID); // encode layers to metal commandBuffer if (inputImage == null) { conv1.EncodeToCommandBuffer(commandBuffer, SrcImage, c1Image); } else { conv1.EncodeToCommandBuffer(commandBuffer, inputImage, c1Image); } pool.EncodeToCommandBuffer(commandBuffer, c1Image, p1Image); conv2.EncodeToCommandBuffer(commandBuffer, p1Image, c2Image); pool.EncodeToCommandBuffer(commandBuffer, c2Image, p2Image); fc1.EncodeToCommandBuffer(commandBuffer, p2Image, fc1Image); fc2.EncodeToCommandBuffer(commandBuffer, fc1Image, dstImage); softmax.EncodeToCommandBuffer(commandBuffer, dstImage, finalLayer); // add a completion handler to get the correct label the moment GPU is done and compare it to the correct output or return it commandBuffer.AddCompletedHandler(buffer => { label = GetLabel(finalLayer); if (correctLabel == label) { Atomics.Increment(); } }); // commit commandbuffer to run on GPU and wait for completion commandBuffer.Commit(); if (imageNum == 9999 || inputImage == null) { commandBuffer.WaitUntilCompleted(); } } return(label); }
/// <summary> /// This function runs the inference network on the test set /// </summary> /// <param name="imageNum">If the test set is being used we will get a value between 0 and 9999 for which of the 10,000 images is being evaluated</param> /// <param name="correctLabel">The correct label for the inputImage while testing</param> void Inference(int imageNum, int correctLabel) { // get the correct image pixels from the test set int startIndex = imageNum * mnistInputNumPixels; // create a source image for the network to forward var inputImage = new MPSImage(device, runningNet.SID); // put image in source texture (input layer) inputImage.Texture.ReplaceRegion(region: new MTLRegion(new MTLOrigin(0, 0, 0), new MTLSize((nint)mnistInputWidth, mnistInputHeight, 1)), level: 0, slice: 0, pixelBytes: Mnistdata.Images + startIndex, bytesPerRow: mnistInputWidth, bytesPerImage: 0); // run the network forward pass runningNet.Forward(inputImage, imageNum, correctLabel); }
public MnistFullLayerNeuralNetwork(IMTLCommandQueue commandQueueIn) { // CommandQueue to be kept around commandQueue = commandQueueIn; device = commandQueueIn.Device; // Initialize MPSImage from descriptors SrcImage = new MPSImage(device, SID); dstImage = new MPSImage(device, DID); // setup convolution layer (which is a fully-connected layer) // cliprect, offset is automatically set layer = SlimMPSCnnFullyConnected.Create(kernelWidth: 28, kernelHeight: 28, inputFeatureChannels: 1, outputFeatureChannels: 10, neuronFilter: null, device: device, kernelParamsBinaryName: "NN"); // prepare softmax layer to be applied at the end to get a clear label softmax = new MPSCnnSoftMax(device); }
public MnistFullLayerNeuralNetwork (IMTLCommandQueue commandQueueIn) { // CommandQueue to be kept around commandQueue = commandQueueIn; device = commandQueueIn.Device; // Initialize MPSImage from descriptors SrcImage = new MPSImage (device, SID); dstImage = new MPSImage (device, DID); // setup convolution layer (which is a fully-connected layer) // cliprect, offset is automatically set layer = SlimMPSCnnFullyConnected.Create (kernelWidth: 28, kernelHeight: 28, inputFeatureChannels: 1, outputFeatureChannels: 10, neuronFilter: null, device: device, kernelParamsBinaryName: "NN"); // prepare softmax layer to be applied at the end to get a clear label softmax = new MPSCnnSoftMax (device); }
public MnistDeepConvNeuralNetwork(IMTLCommandQueue commandQueueIn) : base(commandQueueIn) { // use device for a little while to initialize var device = commandQueueIn.Device; pool = new MPSCnnPoolingMax(device, 2, 2, 2, 2) { Offset = new MPSOffset { X = 1, Y = 1, Z = 0 }, EdgeMode = MPSImageEdgeMode.Clamp }; relu = new MPSCnnNeuronReLU(device, 0); // Initialize MPSImage from descriptors c1Image = new MPSImage(device, c1id); p1Image = new MPSImage(device, p1id); c2Image = new MPSImage(device, c2id); p2Image = new MPSImage(device, p2id); fc1Image = new MPSImage(device, fc1id); // setup convolution layers conv1 = SlimMPSCnnConvolution.Create(kernelWidth: 5, kernelHeight: 5, inputFeatureChannels: 1, outputFeatureChannels: 32, neuronFilter: relu, device: device, kernelParamsBinaryName: "conv1", padding: true, strideX: 1, strideY: 1, destinationFeatureChannelOffset: 0, groupNum: 1); conv2 = SlimMPSCnnConvolution.Create(kernelWidth: 5, kernelHeight: 5, inputFeatureChannels: 32, outputFeatureChannels: 64, neuronFilter: relu, device: device, kernelParamsBinaryName: "conv2", padding: true, strideX: 1, strideY: 1, destinationFeatureChannelOffset: 0, groupNum: 1); // same as a 1x1 convolution filter to produce 1x1x10 from 1x1x1024 fc1 = SlimMPSCnnFullyConnected.Create(kernelWidth: 7, kernelHeight: 7, inputFeatureChannels: 64, outputFeatureChannels: 1024, neuronFilter: null, device: device, kernelParamsBinaryName: "fc1", destinationFeatureChannelOffset: 0); fc2 = SlimMPSCnnFullyConnected.Create(kernelWidth: 1, kernelHeight: 1, inputFeatureChannels: 1024, outputFeatureChannels: 10, neuronFilter: null, device: device, kernelParamsBinaryName: "fc2"); }
public static unsafe UIKit.UIImage GetUIImage(MPSImage mpsImage) { var width = (int)mpsImage.Width; var height = (int)mpsImage.Height; var nfc = (int)mpsImage.FeatureChannels; var obytesPerRow = 4 * width; var cellSize = 44; using var cs = CoreGraphics.CGColorSpace.CreateDeviceRGB(); //Console.WriteLine ((width, height, mpsImage.Precision, mpsImage.PixelSize, mpsImage.FeatureChannels, mpsImage.PixelFormat, mpsImage.FeatureChannelFormat)); if (mpsImage.FeatureChannelFormat == MPSImageFeatureChannelFormat.Float32 && nfc == 3) { var data = new float[width * height * nfc]; fixed(float *dataPointer = data) { mpsImage.ReadBytes((IntPtr)dataPointer, MPSDataLayout.HeightPerWidthPerFeatureChannels, 0); } using var bc = new CoreGraphics.CGBitmapContext(null, width, height, 8, obytesPerRow, cs, CoreGraphics.CGImageAlphaInfo.NoneSkipFirst); var pixels = (byte *)bc.Data; var p = pixels; for (var y = 0; y < height; y++) { for (var x = 0; x < width; x++) { *p++ = 255; *p++ = ClampRGBA32Float(data[y * (width * 3) + x * 3 + 2]); *p++ = ClampRGBA32Float(data[y * (width * 3) + x * 3 + 1]); *p++ = ClampRGBA32Float(data[y * (width * 3) + x * 3 + 0]); } } var cgimage = bc.ToImage(); //Console.WriteLine ($"pixels f32 = " + string.Join (", ", data.Skip (data.Length / 2).Take (12))); return(UIImage.FromImage(cgimage)); } else if (mpsImage.FeatureChannelFormat == MPSImageFeatureChannelFormat.Float32 && nfc == 1) { var data = new float[width * height * nfc]; fixed(float *dataPointer = data) { mpsImage.ReadBytes((IntPtr)dataPointer, MPSDataLayout.HeightPerWidthPerFeatureChannels, 0); } using var bc = new CoreGraphics.CGBitmapContext(null, width, height, 8, obytesPerRow, cs, CoreGraphics.CGImageAlphaInfo.NoneSkipFirst); var pixels = (byte *)bc.Data; var p = pixels; for (var y = 0; y < height; y++) { for (var x = 0; x < width; x++) { var g = ClampRGBA32Float(data[y * width + x]); * p++ = 255; * p++ = g; * p++ = g; * p++ = g; } } var cgimage = bc.ToImage(); //Console.WriteLine ($"pixels f32 = " + string.Join (", ", data.Skip (data.Length / 2).Take (12))); return(UIImage.FromImage(cgimage)); } else if (mpsImage.FeatureChannelFormat == MPSImageFeatureChannelFormat.Unorm8 && nfc == 3) { var data = new byte[width * height * (int)mpsImage.FeatureChannels]; fixed(byte *dataPointer = data) { mpsImage.ReadBytes((IntPtr)dataPointer, MPSDataLayout.HeightPerWidthPerFeatureChannels, 0); //mpsImage.Texture.GetBytes ((IntPtr)dataPointer, (nuint)(4 * width), MTLRegion.Create3D (0, 0, 0, width, height, 1), 0); } using var bc = new CoreGraphics.CGBitmapContext(null, width, height, 8, obytesPerRow, cs, CoreGraphics.CGImageAlphaInfo.NoneSkipFirst); var pixels = (byte *)bc.Data; var p = pixels; for (var y = 0; y < height; y++) { for (var x = 0; x < width; x++) { *p++ = 255; *p++ = data[y * (width * 3) + x * 3 + 2]; // Red *p++ = data[y * (width * 3) + x * 3 + 1]; // Green *p++ = data[y * (width * 3) + x * 3 + 0]; // Blue } } var cgimage = bc.ToImage(); //Console.WriteLine ($"pixels 3 unorm8 = " + string.Join (", ", data.Skip (data.Length / 2).Take (12))); return(UIImage.FromImage(cgimage)); } else if (mpsImage.FeatureChannelFormat == MPSImageFeatureChannelFormat.Unorm8 && nfc == 1) { var data = new byte[width * height * (int)mpsImage.FeatureChannels]; fixed(byte *dataPointer = data) { mpsImage.ReadBytes((IntPtr)dataPointer, MPSDataLayout.HeightPerWidthPerFeatureChannels, 0); //mpsImage.Texture.GetBytes ((IntPtr)dataPointer, (nuint)(4 * width), MTLRegion.Create3D (0, 0, 0, width, height, 1), 0); } using var bc = new CoreGraphics.CGBitmapContext(null, width, height, 8, obytesPerRow, cs, CoreGraphics.CGImageAlphaInfo.NoneSkipFirst); var pixels = (byte *)bc.Data; var p = pixels; for (var y = 0; y < height; y++) { for (var x = 0; x < width; x++) { var g = data[y * width + x]; // Red * p++ = 255; * p++ = g; * p++ = g; * p++ = g; } } var cgimage = bc.ToImage(); //Console.WriteLine ($"pixels 1 unorm8 = " + string.Join (", ", data.Skip (data.Length / 2).Take (12))); return(UIImage.FromImage(cgimage)); } else if (mpsImage.FeatureChannelFormat == MPSImageFeatureChannelFormat.Float32 && width == 1 && height == 1) { var data = new float[width * height * nfc]; fixed(void *dataPointer = data) { mpsImage.ReadBytes((IntPtr)dataPointer, MPSDataLayout.HeightPerWidthPerFeatureChannels, 0); } return(DrawCells(nfc, cellSize, data)); } else if (mpsImage.FeatureChannelFormat == MPSImageFeatureChannelFormat.Unorm8 && width == 1 && height == 1) { var data = new byte[width * height * nfc]; fixed(void *dataPointer = data) { mpsImage.ReadBytes((IntPtr)dataPointer, MPSDataLayout.HeightPerWidthPerFeatureChannels, 0); } return(DrawCells(nfc, cellSize, data.Select(x => x / 255.0f).ToArray())); } else { if (width == 1 && height == 1) { width = cellSize; height = cellSize; } UIGraphics.BeginImageContext(new CoreGraphics.CGSize(width, height)); UIColor.Red.SetColor(); var m = $"{mpsImage.FeatureChannels}{mpsImage.FeatureChannelFormat}?"; m.DrawString(new CoreGraphics.CGPoint(0, 0), UIFont.SystemFontOfSize(8)); var image = UIGraphics.GetImageFromCurrentImageContext(); UIGraphics.EndImageContext(); return(image); } }
/// <summary> /// This function runs the inference network on the test set /// </summary> /// <param name="imageNum">If the test set is being used we will get a value between 0 and 9999 for which of the 10,000 images is being evaluated</param> /// <param name="correctLabel">The correct label for the inputImage while testing</param> void Inference (int imageNum, int correctLabel) { // get the correct image pixels from the test set int startIndex = imageNum * mnistInputNumPixels; // create a source image for the network to forward var inputImage = new MPSImage (device, runningNet.SID); // put image in source texture (input layer) inputImage.Texture.ReplaceRegion (region: new MTLRegion (new MTLOrigin (0, 0, 0), new MTLSize ((nint)mnistInputWidth, mnistInputHeight, 1)), level: 0, slice: 0, pixelBytes: Mnistdata.Images + startIndex, bytesPerRow: mnistInputWidth, bytesPerImage: 0); // run the network forward pass runningNet.Forward (inputImage, imageNum, correctLabel); }
/// <summary> /// Encode a MPSCnnKernel into a command Buffer. The operation shall proceed out-of-place. /// We calculate the appropriate offset as per how TensorFlow calculates its padding using input image size and stride here. /// This [Link](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/ops/nn.py) has an explanation in header comments how tensorFlow pads its convolution input images. /// </summary> /// <param name="commandBuffer">A valid MTLCommandBuffer to receive the encoded filter</param> /// <param name="sourceImage">A valid MPSImage object containing the source image.</param> /// <param name="destinationImage">A valid MPSImage to be overwritten by result image. destinationImage may not alias sourceImage</param> public override void EncodeToCommandBuffer (IMTLCommandBuffer commandBuffer, MPSImage sourceImage, MPSImage destinationImage) { // select offset according to padding being used or not if (padding) { var pad_along_height = ((destinationImage.Height - 1) * StrideInPixelsY + KernelHeight - sourceImage.Height); var pad_along_width = ((destinationImage.Width - 1) * StrideInPixelsX + KernelWidth - sourceImage.Width); var pad_top = pad_along_height / 2; var pad_left = pad_along_width / 2; Offset = new MPSOffset { X = (nint)(KernelWidth / 2 - pad_left), Y = (nint)(KernelHeight / 2 - pad_top), Z = 0 }; } else { Offset = new MPSOffset { X = (nint)(KernelWidth / 2), Y = (nint)(KernelHeight / 2), Z = 0 }; } base.EncodeToCommandBuffer (commandBuffer, sourceImage, destinationImage); }
/// <summary> /// This function reads the output probabilities from finalLayer to CPU, sorts them and gets the label with heighest probability /// </summary> /// <param name="finalLayer">output image of the network this has probabilities of each digit</param> /// <returns>Guess of the network as to what the digit is as uint</returns> public uint GetLabel (MPSImage finalLayer) { // even though we have 10 labels outputed the MTLTexture format used is RGBAFloat16 thus 3 slices will have 3*4 = 12 outputs var resultHalfArray = Enumerable.Repeat ((ushort)6, 12).ToArray (); var resultHalfArrayHandle = GCHandle.Alloc (resultHalfArray, GCHandleType.Pinned); var resultHalfArrayPtr = resultHalfArrayHandle.AddrOfPinnedObject (); var resultFloatArray = Enumerable.Repeat (0.3f, 10).ToArray (); var resultFloatArrayHandle = GCHandle.Alloc (resultFloatArray, GCHandleType.Pinned); var resultFloatArrayPtr = resultFloatArrayHandle.AddrOfPinnedObject (); for (uint i = 0; i <= 2; i++) { finalLayer.Texture.GetBytes (resultHalfArrayPtr + 4 * (int)i * sizeof (ushort), sizeof (ushort) * 1 * 4, sizeof (ushort) * 1 * 1 * 4, new MTLRegion (new MTLOrigin (0, 0, 0), new MTLSize (1, 1, 1)), 0, i); } // we use vImage to convert our data to float16, Metal GPUs use float16 and swift float is 32-bit var fullResultVImagebuf = new vImageBuffer { Data = resultFloatArrayPtr, Height = 1, Width = 10, BytesPerRow = 10 * 4 }; var halfResultVImagebuf = new vImageBuffer { Data = resultHalfArrayPtr, Height = 1, Width = 10, BytesPerRow = 10 * 2 }; if (Planar16FtoPlanarF (ref halfResultVImagebuf, ref fullResultVImagebuf, 0) != vImageError.NoError) Console.WriteLine ("Error in vImage"); // poll all labels for probability and choose the one with max probability to return float max = 0f; uint mostProbableDigit = 10; for (uint i = 0; i <= 9; i++) { if (max < resultFloatArray [i]) { max = resultFloatArray [i]; mostProbableDigit = i; } } resultHalfArrayHandle.Free (); resultFloatArrayHandle.Free (); return mostProbableDigit; }