// Constructs a convolutional network with two convolutional layers, // two fully-connected layers, ReLU units and a softmax on the output. public static void BuildCnn(IAllocator allocator, SeedSource seedSource, int batchSize, bool useCudnn, out Sequential model, out ICriterion criterion, out bool outputIsClassIndices) { var inputWidth = MnistParser.ImageSize; var inputHeight = MnistParser.ImageSize; var elementType = DType.Float32; var inputDims = new long[] { batchSize, 1, inputHeight, inputWidth }; model = new Sequential(); model.Add(new ViewLayer(inputDims)); var outSize = AddCnnLayer(allocator, seedSource, elementType, model, inputDims, 20, useCudnn); outSize = AddCnnLayer(allocator, seedSource, elementType, model, outSize, 40, useCudnn); var convOutSize = outSize[1] * outSize[2] * outSize[3]; model.Add(new ViewLayer(batchSize, convOutSize)); var hiddenSize = 1000; var outputSize = 10; model.Add(new DropoutLayer(allocator, seedSource, elementType, 0.5f, batchSize, convOutSize)); model.Add(new LinearLayer(allocator, seedSource, elementType, (int)convOutSize, hiddenSize, batchSize)); model.Add(new ReLULayer(allocator, elementType, batchSize, hiddenSize)); model.Add(new DropoutLayer(allocator, seedSource, elementType, 0.5f, batchSize, hiddenSize)); model.Add(new LinearLayer(allocator, seedSource, elementType, hiddenSize, outputSize, batchSize)); model.Add(LayerBuilder.BuildLogSoftMax(allocator, elementType, batchSize, outputSize, useCudnn)); criterion = new ClassNLLCriterion(allocator, batchSize, outputSize); outputIsClassIndices = true; // output of criterion is class indices }
private void InitWeightsLinear(SeedSource seedSource, NDArray weights, NDArray bias) { var stdv = 1.0f / (float)Math.Sqrt(weights.Shape[1]); Ops.RandomUniform(weights, seedSource, -stdv, stdv); Ops.RandomUniform(bias, seedSource, -stdv, stdv); }
public Conv2Layer(IAllocator allocator, SeedSource seedSource, DType elementType, int batchSize, int inputWidth, int inputHeight, int nInputPlane, int nOutputPlane, ConvolutionDesc2d cd) { this.cd = cd; this.weight = new NDArray(allocator, elementType, nOutputPlane, nInputPlane * cd.kW * cd.kH); this.bias = new NDArray(allocator, elementType, nOutputPlane, 1); this.gradWeight = new NDArray(allocator, elementType, this.weight.Shape); this.gradBias = new NDArray(allocator, elementType, this.bias.Shape); inputSizes = new long[] { batchSize, nInputPlane, inputHeight, inputWidth }; this.gradInput = new NDArray(allocator, elementType, inputSizes); outputSizes = SpatialConvolutionMM.OutputSize(inputSizes, weight.Shape, cd); this.activation = new NDArray(allocator, elementType, outputSizes); this.OutputSizes = outputSizes; var stdv = 1.0f / (float)Math.Sqrt(cd.kW * cd.kH * nInputPlane); Ops.RandomUniform(weight, seedSource, -stdv, stdv); Ops.RandomUniform(bias, seedSource, -stdv, stdv); }
private void InitWeightsLinear(SeedSource seedSource, Tensor weights, Tensor bias) { var stdv = 1.0f / (float)Math.Sqrt(weights.Sizes[1]); Ops.RandomUniform(weights, seedSource, -stdv, stdv); Ops.RandomUniform(bias, seedSource, -stdv, stdv); }
public Conv2Cudnn(IAllocator allocator, SeedSource seedSource, DType elementType, int batchSize, int inputWidth, int inputHeight, int nInputPlane, int nOutputPlane, ConvolutionDesc2d cd) : base(allocator, seedSource, elementType, batchSize, inputWidth, inputHeight, nInputPlane, nOutputPlane, cd) { // Reshape weight and bias - CuDNN expects the dimensions to be structured slightly differently this.weight = ViewReplace(this.weight, nOutputPlane, nInputPlane, cd.kH, cd.kW); this.bias = ViewReplace(this.bias, 1, nOutputPlane, 1, 1); this.gradWeight = ViewReplace(this.gradWeight, this.weight.Shape); this.gradBias = ViewReplace(this.gradBias, this.bias.Shape); var fwdWorkspace = DNN.GetConvolutionForwardWorkspaceSize(allocator, fwdAlgo, cd, new TensorShape(elementType, new long[] { batchSize, nInputPlane, inputHeight, inputWidth }), new TensorShape(weight), new TensorShape(activation)); var bwdFilterWorkspace = DNN.GetConvolutionBackwardFilterWorkspaceSize(allocator, bwdFilterAlgo, cd, new TensorShape(elementType, new long[] { batchSize, nInputPlane, inputHeight, inputWidth }), new TensorShape(activation), new TensorShape(weight)); var bwdFilterInputWorkspace = DNN.GetConvolutionBackwardDataWorkspaceSize(allocator, bwdDataAlgo, cd, new TensorShape(weight), new TensorShape(activation), new TensorShape(elementType, new long[] { batchSize, nInputPlane, inputHeight, inputWidth })); var workspaceSize = Math.Max(Math.Max(fwdWorkspace, bwdFilterWorkspace), bwdFilterInputWorkspace); this.workspace = (CudaStorage)allocator.Allocate(DType.UInt8, workspaceSize); }
public Conv2Cuda(IAllocator allocator, SeedSource seedSource, DType elementType, int batchSize, int inputWidth, int inputHeight, int nInputPlane, int nOutputPlane, ConvolutionDesc2d cd) : base(allocator, seedSource, elementType, batchSize, inputWidth, inputHeight, nInputPlane, nOutputPlane, cd) { var finputSizes = TensorSharp.CUDA.SpatialConvolution.FInputSize(inputSizes, outputSizes, cd); this.finput = new NDArray(allocator, elementType, finputSizes); this.fgradInput = new NDArray(allocator, elementType, finputSizes); }
public override Tensor Operator(params long[] shape) { Tensor tensor = new Tensor(Global.Device, DType.Float32, shape); var hwScale = 1.0f; if (tensor.DimensionCount > 2) { for (int i = 2; i < tensor.DimensionCount; ++i) { hwScale *= shape[i]; } } var @in = shape[1] * hwScale; var @out = shape[0] * hwScale; var factor = 1.0f; switch (Mode) { case "fan_avg": factor = Scale / Math.Max(1, (@in + @out) / 2.0f); break; case "fan_in": factor = Scale / Math.Max(1, @in); break; case "fan_out": factor = Scale / Math.Max(1, @out); break; } SeedSource seedSource = new SeedSource(); if (Seed.HasValue) { seedSource = new SeedSource(Seed.Value); } switch (Distribution) { case "uniform": float limit = (float)Math.Sqrt(3f * factor); Ops.RandomUniform(tensor, seedSource, -limit, limit); break; case "normal": float stddev = (float)Math.Sqrt(factor) / 0.87962566103423978f; Ops.RandomNormal(tensor, seedSource, 0, stddev); break; } return(tensor); }
public void Initialize(Generator generator) { // Inputs InputSlot.CreateOrResetRequiredMutating <IRandom>(ref randomSeedInputSlot, generator); // Fields randomType = RandomType.XorShift128Plus; seedSource = SeedSource.Numerical; seedNumber = 0; seedText = ""; }
public DropoutLayer(IAllocator allocator, SeedSource seedSource, DType elementType, float pRemove, params long[] shape) { this.seedSource = seedSource; this.allocator = allocator; this.elementType = elementType; this.pRemove = pRemove; this.activation = new Tensor(allocator, elementType, shape); this.gradInput = new Tensor(allocator, elementType, shape); this.noise = new Tensor(allocator, elementType, shape); }
public override Tensor Operator(params long[] shape) { SeedSource seedSource = new SeedSource(); if (Seed.HasValue) { seedSource = new SeedSource(Seed.Value); } Tensor tensor = new Tensor(Global.Device, DType.Float32, shape); Ops.RandomUniform(tensor, seedSource, MinVal, MaxVal); return(tensor); }
public LinearLayer(IAllocator allocator, SeedSource seedSource, DType elementType, int nInput, int nOutput, int batchSize) { this.batchSize = batchSize; this.nOutput = nOutput; this.weights = new Tensor(allocator, elementType, nInput, nOutput); this.bias = new Tensor(allocator, elementType, 1, nOutput); this.activation = new Tensor(allocator, elementType, batchSize, nOutput); this.gradInput = new Tensor(allocator, elementType, batchSize, nInput); this.gradWeights = new Tensor(allocator, elementType, nInput, nOutput); this.gradBias = new Tensor(allocator, elementType, 1, nOutput); InitWeightsLinear(seedSource, weights, bias); }
private static long[] AddCnnLayer(IAllocator allocator, SeedSource seedSource, DType elementType, Sequential model, long[] inputSizes, int nOutputPlane, bool useCudnn) { var conv = LayerBuilder.BuildConvLayer(allocator, seedSource, elementType, (int)inputSizes[0], (int)inputSizes[3], (int)inputSizes[2], (int)inputSizes[1], nOutputPlane, new ConvolutionDesc2d(5, 5, 1, 1, 0, 0), useCudnn); model.Add(conv); var cdPool = new ConvolutionDesc2d(2, 2, 1, 1, 0, 0); var poolLayer = LayerBuilder.BuildPoolLayer(allocator, elementType, conv.OutputSizes, cdPool, useCudnn); model.Add(poolLayer); model.Add(new ReLULayer(allocator, elementType, poolLayer.OutputSizes)); return(poolLayer.OutputSizes); }
// Constructs a network composed of two fully-connected sigmoid layers public static void BuildMLP(IAllocator allocator, SeedSource seedSource, int batchSize, bool useCudnn, out Sequential model, out ICriterion criterion, out bool outputIsClassIndices) { int inputSize = MnistParser.ImageSize * MnistParser.ImageSize; int hiddenSize = 100; int outputSize = MnistParser.LabelCount; var elementType = DType.Float32; model = new Sequential(); model.Add(new ViewLayer(batchSize, inputSize)); model.Add(new LinearLayer(allocator, seedSource, elementType, inputSize, hiddenSize, batchSize)); model.Add(new SigmoidLayer(allocator, elementType, batchSize, hiddenSize)); model.Add(new LinearLayer(allocator, seedSource, elementType, hiddenSize, outputSize, batchSize)); model.Add(new SigmoidLayer(allocator, elementType, batchSize, outputSize)); criterion = new MSECriterion(allocator, batchSize, outputSize); outputIsClassIndices = false; // output is class (pseudo-)probabilities, not class indices }
// Constructs a network with two fully-connected layers; one sigmoid, one softmax public static void BuildMLPSoftmax(IAllocator allocator, SeedSource seedSource, int batchSize, bool useCudnn, out Sequential model, out ICriterion criterion, out bool outputIsClassIndices) { int inputSize = MnistParser.ImageSize * MnistParser.ImageSize; int hiddenSize = 100; int outputSize = MnistParser.LabelCount; var elementType = DType.Float32; model = new Sequential(); model.Add(new ViewLayer(batchSize, inputSize)); model.Add(new LinearLayer(allocator, seedSource, elementType, inputSize, hiddenSize, batchSize)); model.Add(new SigmoidLayer(allocator, elementType, batchSize, hiddenSize)); model.Add(new LinearLayer(allocator, seedSource, elementType, hiddenSize, outputSize, batchSize)); model.Add(LayerBuilder.BuildLogSoftMax(allocator, elementType, batchSize, outputSize, useCudnn)); criterion = new ClassNLLCriterion(allocator, batchSize, outputSize); outputIsClassIndices = true; // output of criterion is class indices }
public static Conv2Layer BuildConvLayer(IAllocator allocator, SeedSource seedSource, DType elementType, int batchSize, int inputWidth, int inputHeight, int nInputPlane, int nOutputPlane, ConvolutionDesc2d cd, bool useCudnn = false) { if (allocator is CpuAllocator) { return(new Conv2Cpu(allocator, seedSource, elementType, batchSize, inputWidth, inputHeight, nInputPlane, nOutputPlane, cd)); } else if (allocator is CudaAllocator) { if (useCudnn) { return(new Conv2Cudnn(allocator, seedSource, elementType, batchSize, inputWidth, inputHeight, nInputPlane, nOutputPlane, cd)); } else { return(new Conv2Cuda(allocator, seedSource, elementType, batchSize, inputWidth, inputHeight, nInputPlane, nOutputPlane, cd)); } } else { throw new NotSupportedException("Allocator type " + allocator.GetType() + " not supported"); } }
/// <summary> /// Randoms the cauchy. /// </summary> /// <param name="seedSource">The seed source.</param> /// <param name="median">The median.</param> /// <param name="sigma">The sigma.</param> /// <param name="allocator">The allocator.</param> /// <param name="type">The type.</param> /// <param name="sizes">The sizes.</param> /// <returns>TVar.</returns> public static Variable RandomCauchy(SeedSource seedSource, ScalarVar median, ScalarVar sigma, IAllocator allocator, DType type, params long[] sizes) { return(new Variable(new FillExpression(allocator, type, sizes, res => Ops.RandomCauchy(res, seedSource, median.Evaluate(), sigma.Evaluate())))); }
/// <summary> /// Randoms the exponential. /// </summary> /// <param name="seedSource">The seed source.</param> /// <param name="lambda">The lambda.</param> /// <param name="allocator">The allocator.</param> /// <param name="type">The type.</param> /// <param name="sizes">The sizes.</param> /// <returns>TVar.</returns> public static Variable RandomExponential(SeedSource seedSource, ScalarVar lambda, IAllocator allocator, DType type, params long[] sizes) { return(new Variable(new FillExpression(allocator, type, sizes, res => Ops.RandomExponential(res, seedSource, lambda.Evaluate())))); }
/// <summary> /// Randoms the normal. /// </summary> /// <param name="seedSource">The seed source.</param> /// <param name="mean">The mean.</param> /// <param name="stdv">The STDV.</param> /// <param name="allocator">The allocator.</param> /// <param name="type">The type.</param> /// <param name="sizes">The sizes.</param> /// <returns>TVar.</returns> public static Variable RandomNormal(SeedSource seedSource, ScalarVar mean, ScalarVar stdv, IAllocator allocator, DType type, params long[] sizes) { return(new Variable(new FillExpression(allocator, type, sizes, res => Ops.RandomNormal(res, seedSource, mean.Evaluate(), stdv.Evaluate())))); }
/// <summary> /// Randoms the uniform. /// </summary> /// <param name="seedSource">The seed source.</param> /// <param name="min">The minimum.</param> /// <param name="max">The maximum.</param> /// <param name="allocator">The allocator.</param> /// <param name="type">The type.</param> /// <param name="sizes">The sizes.</param> /// <returns>TVar.</returns> public static Variable RandomUniform(SeedSource seedSource, ScalarVar min, ScalarVar max, IAllocator allocator, DType type, params long[] sizes) { return(new Variable(new FillExpression(allocator, type, sizes, res => Ops.RandomUniform(res, seedSource, min.Evaluate(), max.Evaluate())))); }
/// <summary> /// Randoms the bernoulli. /// </summary> /// <param name="seedSource">The seed source.</param> /// <param name="p">The p.</param> /// <param name="allocator">The allocator.</param> /// <param name="type">The type.</param> /// <param name="sizes">The sizes.</param> /// <returns>TVar.</returns> public static Variable RandomBernoulli(SeedSource seedSource, ScalarVar p, IAllocator allocator, DType type, params long[] sizes) { return(new Variable(new FillExpression(allocator, type, sizes, res => Ops.RandomBernoulli(res, seedSource, p.Evaluate())))); }
public static TVar RandomGeometric(SeedSource seedSource, SVar p, IAllocator allocator, DType type, params long[] sizes) { return(new TVar(new FillExpression(allocator, type, sizes, res => Ops.RandomGeometric(res, seedSource, p.Evaluate())))); }
// End of configuraion options //########################################################################## static void Main(string[] args) { // Init TensorSharp IAllocator allocator = null; if (AccMode == AccelMode.Cpu) { allocator = new CpuAllocator(); } else { var cudaContext = new TSCudaContext(); cudaContext.Precompile(Console.Write); cudaContext.CleanUnusedPTX(); allocator = new CudaAllocator(cudaContext, 0); } var random = new SeedSource(42); // set seed to a known value - we do this to make the training repeatable // Load data if (string.IsNullOrEmpty(MnistFolder)) { throw new ApplicationException("MnistFolder should be set to the path containing the MNIST data set"); } Console.WriteLine("loading data sets"); DataSet trainingSet, testingSet; using (new SimpleTimer("data set loading done in {0}ms")) { MnistDataSetBuilder.BuildDataSets(allocator, MnistFolder, TRAINING_SIZE, TESTING_SIZE, out trainingSet, out testingSet); } // Construct the model, loss function and optimizer int numInputs = MnistParser.ImageSize * MnistParser.ImageSize; Sequential model; ICriterion criterion; bool useTargetClasses; var useCudnn = AccMode == AccelMode.Cudnn; switch (MType) { case ModelType.MLP: ModelBuilder.BuildMLP(allocator, random, BatchSize, useCudnn, out model, out criterion, out useTargetClasses); break; case ModelType.MLPSoftmax: ModelBuilder.BuildMLPSoftmax(allocator, random, BatchSize, useCudnn, out model, out criterion, out useTargetClasses); break; case ModelType.Cnn: ModelBuilder.BuildCnn(allocator, random, BatchSize, useCudnn, out model, out criterion, out useTargetClasses); break; default: throw new InvalidOperationException("Unrecognized model type " + MType); } var optim = new SgdOptimizer(sgdConfig); // Train the model for (int i = 0; i < 50; ++i) { TrainEpoch(model, criterion, optim, trainingSet, numInputs, useTargetClasses); EvaluateModel(model, testingSet, numInputs); } }