private static void SampleLoadExtractIterate() { SigmaEnvironment sigma = SigmaEnvironment.Create("test"); sigma.Prepare(); //var irisReader = new CsvRecordReader(new MultiSource(new FileSource("iris.data"), new UrlSource("http://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data"))); //IRecordExtractor irisExtractor = irisReader.Extractor("inputs2", new[] { 0, 3 }, "targets2", 4).AddValueMapping(4, "Iris-setosa", "Iris-versicolor", "Iris-virginica"); //irisExtractor = irisExtractor.Preprocess(new OneHotPreprocessor(sectionName: "targets2", minValue: 0, maxValue: 2), new NormalisingPreprocessor(sectionNames: "inputs2", minInputValue: 0, maxInputValue: 6)); ByteRecordReader mnistImageReader = new ByteRecordReader(headerLengthBytes: 16, recordSizeBytes: 28 * 28, source: new CompressedSource(new MultiSource(new FileSource("train-images-idx3-ubyte.gz"), new UrlSource("http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz")))); IRecordExtractor mnistImageExtractor = mnistImageReader.Extractor("inputs", new[] { 0L, 0L }, new[] { 28L, 28L }).Preprocess(new NormalisingPreprocessor(0, 255)); ByteRecordReader mnistTargetReader = new ByteRecordReader(headerLengthBytes: 8, recordSizeBytes: 1, source: new CompressedSource(new MultiSource(new FileSource("train-labels-idx1-ubyte.gz"), new UrlSource("http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz")))); IRecordExtractor mnistTargetExtractor = mnistTargetReader.Extractor("targets", new[] { 0L }, new[] { 1L }).Preprocess(new OneHotPreprocessor(minValue: 0, maxValue: 9)); IComputationHandler handler = new CpuFloat32Handler(); ExtractedDataset dataset = new ExtractedDataset("mnist-training", ExtractedDataset.BlockSizeAuto, mnistImageExtractor, mnistTargetExtractor); IDataset[] slices = dataset.SplitRecordwise(0.8, 0.2); IDataset trainingData = slices[0]; IDataset validationData = slices[1]; MinibatchIterator trainingIterator = new MinibatchIterator(1, trainingData); MinibatchIterator validationIterator = new MinibatchIterator(1, validationData); while (true) { foreach (var block in trainingIterator.Yield(handler, sigma)) { Thread.Sleep(100); PrintFormattedBlock(block, PrintUtils.AsciiGreyscalePalette); Thread.Sleep(1000); } } //Random random = new Random(); //INDArray array = new ADNDArray<float>(3, 1, 2, 2); //new GaussianInitialiser(0.05, 0.05).Initialise(array, Handler, random); //Console.WriteLine(array); //new ConstantValueInitialiser(1).Initialise(array, Handler, random); //Console.WriteLine(array); //dataset.InvalidateAndClearCaches(); }
private static void SampleCachedFastIteration() { SigmaEnvironment sigma = SigmaEnvironment.Create("test"); IDataSource dataSource = new CompressedSource(new MultiSource(new FileSource("train-images-idx3-ubyte.gz"), new UrlSource("http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz"))); ByteRecordReader mnistImageReader = new ByteRecordReader(headerLengthBytes: 16, recordSizeBytes: 28 * 28, source: dataSource); IRecordExtractor mnistImageExtractor = mnistImageReader.Extractor("inputs", new[] { 0L, 0L }, new[] { 28L, 28L }).Preprocess(new NormalisingPreprocessor(0, 255)); IDataset dataset = new ExtractedDataset("mnist-training", ExtractedDataset.BlockSizeAuto, mnistImageExtractor); IDataset[] slices = dataset.SplitRecordwise(0.8, 0.2); IDataset trainingData = slices[0]; Stopwatch stopwatch = Stopwatch.StartNew(); IDataIterator iterator = new MinibatchIterator(10, trainingData); foreach (var block in iterator.Yield(new CpuFloat32Handler(), sigma)) { //PrintFormattedBlock(block, PrintUtils.AsciiGreyscalePalette); } Console.Write("\nFirst iteration took " + stopwatch.Elapsed + "\n+=+ Iterating over dataset again +=+ Dramatic pause..."); ArrayUtils.Range(1, 10).ToList().ForEach(i => { Thread.Sleep(500); Console.Write("."); }); stopwatch.Restart(); foreach (var block in iterator.Yield(new CpuFloat32Handler(), sigma)) { //PrintFormattedBlock(block, PrintUtils.AsciiGreyscalePalette); } Console.WriteLine("Second iteration took " + stopwatch.Elapsed); }
public void TestMinibatchIteratorYield(int minibatchSize) { string filename = ".unittestfile" + nameof(TestMinibatchIteratorYield); CreateCsvTempFile(filename); SigmaEnvironment.Clear(); FileSource source = new FileSource(filename, Path.GetTempPath()); CsvRecordExtractor extractor = (CsvRecordExtractor) new CsvRecordReader(source).Extractor(new CsvRecordExtractor(new Dictionary <string, int[][]> { ["inputs"] = new[] { new[] { 0 } } })); ExtractedDataset dataset = new ExtractedDataset("test", 1, new DiskCacheProvider(Path.GetTempPath() + "/" + nameof(TestMinibatchIteratorYield)), true, extractor); MinibatchIterator iterator = new MinibatchIterator(minibatchSize, dataset); IComputationHandler handler = new CpuFloat32Handler(); SigmaEnvironment sigma = SigmaEnvironment.Create("test"); Assert.Throws <ArgumentNullException>(() => iterator.Yield(null, null).GetEnumerator().MoveNext()); Assert.Throws <ArgumentNullException>(() => iterator.Yield(handler, null).GetEnumerator().MoveNext()); Assert.Throws <ArgumentNullException>(() => iterator.Yield(null, sigma).GetEnumerator().MoveNext()); int index = 0; foreach (var block in iterator.Yield(handler, sigma)) { //pass through each more than 5 times to ensure consistency if (index++ > 20) { break; } Assert.Contains(block["inputs"].GetValue <float>(0, 0, 0), new float[] { 5.1f, 4.9f, 4.7f }); } dataset.Dispose(); DeleteTempFile(filename); }