/// <summary> /// Loads a set of images and labels. /// </summary> /// <param name="imagePath">The path to the file containing the images.</param> /// <param name="labelPath">The path to the file containing the labels.</param> /// <returns>An array where each element consists of an image and a label.</returns> private static LabelledSample[] LoadMnistSetFromFile(string imagePath, string labelPath) { // // Note: The MNIST file formats are described at the bottom of this web page: http://yann.lecun.com/exdb/mnist/ // Open the image file // using (FileStream imageStream = File.Open(imagePath, FileMode.Open, FileAccess.Read, FileShare.Read)) { // // Read the header to find out how many images there are and the dimensions of each image. // byte[] headerBuffer = new byte[16]; imageStream.Read(headerBuffer, 0, 16); int imageCount = ReadBigEndianInt32(headerBuffer, 4); int height = ReadBigEndianInt32(headerBuffer, 8); int width = ReadBigEndianInt32(headerBuffer, 12); int singleImageSize = width * height; // // Proceed to where the image data begins // imageStream.Seek(16, SeekOrigin.Begin); // // Construct the resulting array of labelled images // var result = new LabelledSample[imageCount]; // // Open the file with the labels // using (FileStream labelStream = File.Open(labelPath, FileMode.Open, FileAccess.Read, FileShare.Read)) { // // Skip to where the label data begins // labelStream.Seek(8, SeekOrigin.Begin); // // Read the images and their corresponding labels // for (int index = 0; index < imageCount; index++) { var imageData = new byte[singleImageSize]; imageStream.Read(imageData, 0, singleImageSize); result[index] = new LabelledSample(labelStream.ReadByte(), imageData); } } // // Return the labelled images // return(result); } }
/// <summary> /// Generates random XOR samples. /// </summary> /// <param name="count">The number of samples to generate.</param> /// <returns>The generated samples, labeled as 0 if the XOR of the two bits is false and 1 otherwise.</returns> /// <remarks> /// The visualization mechanism in the GUI assumes data to consist of square pictures. Each bit is therefore duplicated, /// resulting in each sample having size 4. Since the sample data is of type <code>byte[]</code>, every 1-bit is represented with /// <see cref="byte.MaxValue"/>.</remarks> private LabelledSample[] GenerateSamples(int count) { var random = ThreadSafeRandom.GetThreadRandom(); var result = new LabelledSample[count]; var bits = new bool[2]; for (int i = 0; i < count; i++) { // // Get two random bits // bits[0] = random.NextDouble() > 0.5 ? true : false; bits[1] = random.NextDouble() > 0.5 ? true : false; // // Are both bits zero? // if (!bits[0] && !bits[1]) { // // The current design of our neural nets makes it impossible to recognize an all-zero data vector since no // activations can result from it. For now, we work around this issue by // bits[0] = true; bits[1] = true; } // // Populate the sample data array // var sampleData = new byte[4]; sampleData[0] = sampleData[1] = bits[0] ? byte.MaxValue : byte.MinValue; sampleData[2] = sampleData[3] = bits[1] ? byte.MaxValue : byte.MinValue; // // Calculate the label // int label = bits[0] ^ bits[1] ? 1 : 0; // // Add this sample to the resulting array // result[i] = new LabelledSample(label, sampleData); } return(result); }
/// <summary> /// Gets the training set. /// </summary> /// <returns> /// An array of samples to use for training. /// </returns> public LabelledSample[] GetTrainingSet() { // // Load the training data form disk // var data = LoadData(); // // Currently, there are only 10 images in my training set. Duplicate them to make the training set slightly larger. // int duplicates = 10; var duplicatedData = new LabelledSample[data.Length * duplicates]; for (int i = 0; i < duplicatedData.Length; i++) { var original = data[i / duplicates]; duplicatedData[i] = new LabelledSample(original.Label, original.SampleData); } // // Return the enlarged training set // return(duplicatedData); }