public override string ToString() { var nargs = new int[Shape.Length]; var strides = Utils.Shape2Strides(Shape); Counters.Reset(); string result = ""; var last = strides.Length == 1 ? Count : strides[strides.Length - 2]; string before, after; List <Type> listValues = GetData.ToList(); if (Utils.IsDebugLvl1) { StringBuilder sb = new StringBuilder(); sb.AppendLine($"Class:{GetType().Name,-20}"); if (Shape.Length > 1 || Shape[0] != 1) { sb.AppendLine($"Shape:({Shape.Glue()}) Version:{GetHashCode(),10}"); sb.AppendLine($"Strides:({Strides.Glue()})"); sb.AppendLine($"OwnData:({OwnData})"); } string dbg = $" : np.array([{listValues.Glue(",")}], dtype={OpsT.dtype}).reshape({Shape.Glue(",")})"; var nd = $"NDArray<{typeof(Type).Name}>"; sb.AppendLine($"{nd,-20} {Shape.Glue("x")}{dbg}"); sb.AppendLine($"Counters. DataAccess:{Counters.Data} / MethCall:{Counters.MethCall}"); Console.WriteLine(sb); } var ml0 = listValues.Select(v => $"{v}").Max(v => v.Length); var ml1 = listValues.Select(v => $"{v:F8}").Max(v => v.Length); string fmt = $"{{0,{ml0 + 2}}}"; if (ml0 > ml1 + 3) { fmt = $"{{0,{ml1 + 2}:F8}}"; } for (int idx = 0; idx < Count; ++idx) { after = before = ""; if (idx % last == 0 || idx % last == last - 1) { before = idx != 0 ? " " : "["; after = idx == Count - 1 ? "]" : ""; for (int l = strides.Length - 2; l >= 0; --l) { if (idx % strides[l] == 0) { before += "["; } else { before = " " + before; } if (idx % strides[l] == strides[l] - 1) { after += "]"; } } } result += idx % last == 0 ? before : ""; var val = listValues[idx]; result += string.Format(fmt, val); result += idx % last == last - 1 ? after + "\n" : ""; result += after.Length > 1 && idx != Count - 1 ? "\n" : ""; } if (Utils.IsDebugNo) { result = result.Substring(0, result.Length - 1); } return(result); }
public void ChangeShape(int[] shape) { Shape = shape; Strides = Strides.ToArray(); Indices = new int[Shape.Length]; }
public Rnn(RnnType ty, Variable <T> x, int numLayers, int hiddenSize, bool isTraining = true, double dropout = 0.0, ulong dropoutSeed = 1337UL) { Type = ty; IsTraining = isTraining; NumLayers = numLayers; HiddenSize = hiddenSize; Dropout = isTraining ? dropout : 0.0; DropoutSeed = dropoutSeed; // X shape (seqLength, batch, inputSize) X = x; Util.EnsureEqual(3, X.Shape.Rank, "Input layout: (seqLength, batch, inputSize)"); Util.EnsureTrue(X.Shape[0] >= 0, "Input layout: (seqLength, batch, inputSize)"); Util.EnsureTrue(X.Shape[1] >= 0, "Input layout: (seqLength, batch, inputSize)"); Util.EnsureTrue(X.Shape[2] >= 0, "Input layout: (seqLength, batch, inputSize)"); SeqLength = (int)X.Shape[0]; MiniBatch = (int)X.Shape[1]; InputSize = (int)X.Shape[2]; // Y Shape (seqLength, batch, hiddenSize) Y = Variable <T>(PartialShape.Create(SeqLength, MiniBatch, HiddenSize)); // W shape will be determined during initialization W = Parameter <T>(); // state variables var shape = PartialShape.Create(NumLayers, MiniBatch, HiddenSize); var strides = Strides.Create(shape[1] * shape[2], shape[2], 1); // inner change most HX = Variable <T>(shape); CX = Variable <T>(shape); HY = Variable <T>(shape); CY = Variable <T>(shape); StateDesc = new TensorDescriptor(); StateDesc.SetND(Dnn.DataTypeOf <T>(), shape.AsInt32Array, strides.AsInt32Array); // xDesc is an array, for each step shape = PartialShape.Create(MiniBatch, InputSize, 1); strides = Strides.Create(shape[1] * shape[2], shape[2], 1); var xDesc = new TensorDescriptor(); xDesc.SetND(Dnn.DataTypeOf <T>(), shape.AsInt32Array, strides.AsInt32Array); XDesc = Enumerable.Repeat(xDesc, SeqLength).ToArray(); // yDesc is an array, for each step shape = PartialShape.Create(MiniBatch, HiddenSize, 1); strides = Strides.Create(shape[1] * shape[2], shape[2], 1); var yDesc = new TensorDescriptor(); yDesc.SetND(Dnn.DataTypeOf <T>(), shape.AsInt32Array, strides.AsInt32Array); YDesc = Enumerable.Repeat(yDesc, SeqLength).ToArray(); // construct the graph AddInput(X); AddInput(W); AddOutput(Y); AddAuxVar(HX); AddAuxVar(CX); AddAuxVar(HY); AddAuxVar(CY); AddAuxVar(DropoutStates); AddAuxVar(Workspace); AddAuxVar(ReserveSpace); }
public override void Initialize(Executor executor) { var context = executor.Context.ToGpuContext(); var dnn = context.Dnn; // dropout var dropoutDesc = executor.DropoutDescDict[DropoutDesc]; IntPtr dropoutStatesSize; dnn.DropoutGetStatesSize(out dropoutStatesSize); var dropoutStates = executor.GetTensor(DropoutStates, Shape.Create(dropoutStatesSize.ToInt64())); dropoutDesc.Set(dnn, (float)Dropout, dropoutStates.Buffer.Ptr, dropoutStatesSize, DropoutSeed); // rnn descriptor var rnnDesc = executor.RnnDescDict[RnnDesc]; var mode = RnnType.Mode; rnnDesc.Set(HiddenSize, NumLayers, dropoutDesc, RNNInputMode.LINEAR_INPUT, DirectionMode.UNIDIRECTIONAL, mode, Dnn.DataTypeOf <T>()); // initialize weight, once only, using minibatch size 1 var shape = PartialShape.Create(1, InputSize, 1); // first dimension does not affect the weight shape and size TODO test all, tested only for LSTM var strides = Strides.Create(shape[1] * shape[2], shape[2], 1); var xDesc = new TensorDescriptor(); xDesc.SetND(Dnn.DataTypeOf <T>(), shape.AsInt32Array, strides.AsInt32Array); var wDesc = executor.FilterDescDict[WDesc]; IntPtr weightsSize; dnn.GetRNNParamsSize(rnnDesc, xDesc, out weightsSize, Dnn.DataTypeOf <T>()); Util.EnsureTrue(weightsSize.ToInt64() % Gpu.SizeOf <T>() == 0); var shapeW = Shape.Create(weightsSize.ToInt64() / Alea.Gpu.SizeOf <T>()); wDesc.SetND(Dnn.DataTypeOf <T>(), TensorFormat.CUDNN_TENSOR_NCHW, new [] { (int)shapeW[0], 1, 1 }); // since we are using cuDNN, we'd better make sure these varaibles are allocated executor.GetTensor(W, shapeW); if (IsTraining) { executor.GetGradient(W, shapeW); } // init weights var numLinearLayers = RnnType.NumLinLayers; using (var filterDesc = new FilterDescriptor()) { var w = executor.GetTensor(W); var filterDimA = new int[3]; for (var layer = 0; layer < NumLayers; ++layer) { for (var linLayerId = 0; linLayerId < numLinearLayers; ++linLayerId) { int nbDims; DataType dataType; TensorFormat format; deviceptr <T> linLayerMat; dnn.GetRNNLinLayerMatrixParams(rnnDesc, layer, xDesc, wDesc, w.Buffer.Ptr, linLayerId, filterDesc, out linLayerMat); filterDesc.GetND(out dataType, out format, out nbDims, filterDimA); var length = filterDimA.Aggregate(ScalarOps.Mul); var linLayerMatBuffer = new Buffer <T>(context.Device, w.Memory, new Layout(Shape.Create(length)), linLayerMat); var linLayerMatTensor = new Tensor <T>(linLayerMatBuffer); context.Assign(linLayerMatTensor, RandomNormal <T>(Shape.Create(length)) / (Math.Sqrt(HiddenSize + InputSize).AsScalar <T>())); deviceptr <T> linLayerBias; dnn.GetRNNLinLayerBiasParams(rnnDesc, layer, xDesc, wDesc, w.Buffer.Ptr, linLayerId, filterDesc, out linLayerBias); filterDesc.GetND(out dataType, out format, out nbDims, filterDimA); length = filterDimA.Aggregate(ScalarOps.Mul); var linLayerBiasBuffer = new Buffer <T>(context.Device, w.Memory, new Layout(Shape.Create(length)), linLayerBias); var linLayerBiasTensor = new Tensor <T>(linLayerBiasBuffer); RnnType.InitBias(context, layer, linLayerId, linLayerBiasTensor); } } } base.Initialize(executor); }
public RnnDescr(Executor executor, RnnDynamic <T> rnn) { var context = executor.Context.ToGpuContext(); var dnn = context.Dnn; Rnn = rnn; var x = executor.GetTensor(Rnn.X); SeqLength = (int)x.Shape[0]; MiniBatch = (int)x.Shape[1]; var shape = Shape.Create(SeqLength, MiniBatch, Rnn.HiddenSize); executor.GetTensor(Rnn.Y, shape); // state variables shape = Shape.Create(Rnn.NumLayers, MiniBatch, Rnn.HiddenSize); var strides = Strides.Create(shape[1] * shape[2], shape[2], 1); // inner change most executor.GetTensor(Rnn.HX, shape); executor.GetTensor(Rnn.CX, shape); executor.GetTensor(Rnn.HY, shape); executor.GetTensor(Rnn.CY, shape); StateDesc = new TensorDescriptor(); StateDesc.SetND(Dnn.DataTypeOf <T>(), shape.AsInt32Array, strides.AsInt32Array); // xDesc is an array, for each step shape = Shape.Create(MiniBatch, rnn.InputSize, 1); strides = Strides.Create(shape[1] * shape[2], shape[2], 1); var xDesc = new TensorDescriptor(); xDesc.SetND(Dnn.DataTypeOf <T>(), shape.AsInt32Array, strides.AsInt32Array); XDesc = Enumerable.Repeat(xDesc, SeqLength).ToArray(); // yDesc is an array, for each step shape = Shape.Create(MiniBatch, rnn.HiddenSize, 1); strides = Strides.Create(shape[1] * shape[2], shape[2], 1); var yDesc = new TensorDescriptor(); yDesc.SetND(Dnn.DataTypeOf <T>(), shape.AsInt32Array, strides.AsInt32Array); YDesc = Enumerable.Repeat(yDesc, SeqLength).ToArray(); // workspace and reserved space var rnnDesc = executor.RnnDescDict[rnn.RnnDesc]; IntPtr workSize; dnn.GetRNNWorkspaceSize(rnnDesc, SeqLength, XDesc, out workSize); executor.GetTensor(Rnn.Workspace, Shape.Create(workSize.ToInt64())); if (Rnn.IsTraining) { IntPtr reserveSize; dnn.GetRNNTrainingReserveSize(rnnDesc, SeqLength, XDesc, out reserveSize); executor.GetTensor(Rnn.ReserveSpace, Shape.Create(reserveSize.ToInt64())); executor.GetGradient(Rnn.X, x.Shape); executor.GetGradient(Rnn.Y, Shape.Create(SeqLength, MiniBatch, Rnn.HiddenSize)); executor.GetGradient(Rnn.HX, Shape.Create(Rnn.NumLayers, MiniBatch, Rnn.HiddenSize)); executor.GetGradient(Rnn.CX, Shape.Create(Rnn.NumLayers, MiniBatch, Rnn.HiddenSize)); } }
public RnnCell(Executor executor, RnnType rnnType, Variable <T> w, int inputSize, int batch, int hiddenSize, int numLayers, bool isTraining, double dropoutProbability, ulong dropoutSeed = 1337UL) { IsTraining = isTraining; BatchSize = batch; InputSize = inputSize; HiddenSize = hiddenSize; NumLayers = numLayers; RnnType = rnnType; W = w; var context = executor.Context.ToGpuContext(); var dnn = context.Dnn; // state variables var shape = Shape.Create(numLayers, batch, hiddenSize); var strides = Strides.Create(shape[1] * shape[2], shape[2], 1); // inner change most StateDesc.SetND(Dnn.DataTypeOf <T>(), shape.AsInt32Array, strides.AsInt32Array); // xDesc is an array of one element because we do only one step shape = Shape.Create(batch, inputSize, 1); strides = Strides.Create(shape[1] * shape[2], shape[2], 1); var xDesc = new TensorDescriptor(); xDesc.SetND(Dnn.DataTypeOf <T>(), shape.AsInt32Array, strides.AsInt32Array); XDesc = Enumerable.Repeat(xDesc, 1).ToArray(); // yDesc is an array of one element because we do only one step shape = Shape.Create(batch, hiddenSize, 1); strides = Strides.Create(shape[1] * shape[2], shape[2], 1); var yDesc = new TensorDescriptor(); yDesc.SetND(Dnn.DataTypeOf <T>(), shape.AsInt32Array, strides.AsInt32Array); YDesc = Enumerable.Repeat(yDesc, 1).ToArray(); IntPtr dropoutStatesSize; dnn.DropoutGetStatesSize(out dropoutStatesSize); DropoutStates = executor.Context.Device.Allocate <byte>(Shape.Create(dropoutStatesSize.ToInt64())); DropoutDesc.Set(dnn, (float)dropoutProbability, DropoutStates.Buffer.Ptr, dropoutStatesSize, dropoutSeed); var mode = rnnType.Mode; RnnDesc.Set(hiddenSize, numLayers, DropoutDesc, RNNInputMode.LINEAR_INPUT, DirectionMode.UNIDIRECTIONAL, mode, Dnn.DataTypeOf <T>()); IntPtr workSize; dnn.GetRNNWorkspaceSize(RnnDesc, 1, XDesc, out workSize); Workspace = executor.Context.Device.Allocate <byte>(Shape.Create(workSize.ToInt64())); if (isTraining) { IntPtr reserveSize; dnn.GetRNNTrainingReserveSize(RnnDesc, 1, XDesc, out reserveSize); ReserveSize = reserveSize.ToInt64(); //ReserveSpace = executor.AttentionState.Device.Allocate<byte>(Shape.Create(reserveSize.ToInt64())); } IntPtr weightsSize; dnn.GetRNNParamsSize(RnnDesc, xDesc, out weightsSize, Dnn.DataTypeOf <T>()); Util.EnsureTrue(weightsSize.ToInt64() % Gpu.SizeOf <T>() == 0); var shapeW = Shape.Create(weightsSize.ToInt64() / Alea.Gpu.SizeOf <T>()); WDesc.SetND(Dnn.DataTypeOf <T>(), TensorFormat.CUDNN_TENSOR_NCHW, new[] { (int)shapeW[0], 1, 1 }); executor.GetTensor(W, shapeW); if (isTraining) { executor.GetGradient(W, shapeW); } }