public FloatTensor createZerosTensorLike() { FloatTensor new_tensor = this.emptyTensorCopy(); new_tensor.Zero_(); return(new_tensor); }
public FloatTensor createOnesTensorLike() { FloatTensor new_tensor = this.emptyTensorCopy(); new_tensor.Zero_(); new_tensor.Add((float)1, true); return(new_tensor); }
// parameters are overrides public FloatTensor Copy(FloatTensor result = null) { result = HookAutograd(ref result, "copy", false); result.Zero_(); result.Add(this, inline: true); return(result); }
internal FloatTensor emptyTensorCopy() { FloatTensor result = factory.Create( _shape: this.shape, _data: data, _dataBuffer: dataBuffer, _shapeBuffer: shapeBuffer, _shader: shader, _copyData: true, _dataOnGpu: dataOnGpu, _autograd: autograd, _keepgrads: keepgrads, _creation_op: "emptyTensorCopy"); result.Zero_(); return(result); }
public void DivElemGPU_(FloatTensor tensor) { Debug.LogFormat("<color=blue>FloatTensor.DivElemGPU_ dataOnGpu: {0}</color>", dataOnGpu); if (dataOnGpu) { if (tensor.id != this.id) { shader.SetBuffer(DivElemKernel_, "DivElemDataA_", dataBuffer); shader.SetBuffer(DivElemKernel_, "DivElemDataB_", tensor.dataBuffer); shader.Dispatch(DivElemKernel_, this.size, 1, 1); } else { tensor.Zero_(); tensor.Add(1, inline: true); } } }
public FloatTensor HookGraph(ref FloatTensor result, string creation_op, bool inline, float scalar_input = -1, FloatTensor[] tensor_inputs = null, int[] resultShape = null, float[] resultData = null, IntTensor[] indices = null) { // no dynamic graph for inline operations if (inline) { return(this); } bool autograd_pre_initialized = false; // if we don't override with a result tensor being passed in, let's first look to see if we can reuse one // from a previous operation - if not - we'll create our own. if (result == null) { bool child_pre_initialized = false; int child_index = 0; // iterate through all children to see if any were created using the same parameters and creation_op // as is currently being requested for (int i = 0; i < this.children_indices.Count; i++) { FloatTensor child = factory.Get(children_indices[i]); if (child.creation_op == creation_op) { // if this creation_op requires no parameters - then we only have to match // on the creation_op itself - which we have already done. if (scalar_input == -1 && (tensor_inputs == null || tensor_inputs.Length == 0)) { child_pre_initialized = true; child_index = children_indices[i]; break; } // since there are paremeters - now this child must match all parameters exactly bool keep_looking = false; if (scalar_input != -1) { if (child.creators.Count > 1) { if (factory.Get(child.creators[1]).data[0] != scalar_input) { keep_looking = true; } } } if (tensor_inputs != null && tensor_inputs.Length >= 1) { foreach (FloatTensor tensor in tensor_inputs) { if (!child.creators.Contains(tensor.id)) { keep_looking = true; } } } if (keep_looking) { continue; } // found a child that matches all parameters child_pre_initialized = true; child_index = children_indices[i]; break; } } if (child_pre_initialized) { autograd_pre_initialized = true; result = factory.Get(child_index); result.Zero_(); } else { bool resultAutograd = autograd; if (tensor_inputs != null) { foreach (FloatTensor tensor in tensor_inputs) { resultAutograd = tensor.autograd && resultAutograd; } } if (resultShape == null) { resultShape = this.shape; if (resultData == null) { resultData = this.data; } } else { // if shape is passed in - initialize a new dataset with that shape resultData = null; } result = factory.Create( _shape: resultShape, _data: resultData, _dataBuffer: dataBuffer, _shapeBuffer: shapeBuffer, _shader: shader, _copyData: true, _dataOnGpu: dataOnGpu, _autograd: resultAutograd, // if either tensor doesn't have gradients _keepgrads: keepgrads, // neither does the result. This might not end up being _creation_op: creation_op); // a good decision in the long run. We'll see. if (this.dataOnGpu) { result.Gpu(shader); } } } if (autograd_pre_initialized) { this.ResetAutogradCounts(); result.ResetAutogradCounts(); if (tensor_inputs != null) { foreach (FloatTensor tensor in tensor_inputs) { tensor.ResetAutogradCounts(); } } } else { result.InitGraph(); result.creators.Add(this.id); result.creation_op = creation_op; children_indices.Add(result.Id); children_counts.Add(0); // hook autograd one parents - one scalar if (scalar_input != -1) { result.creators.Add(factory.Create( _shape: new int[] { 1 }, _data: new float[] { scalar_input }, _dataBuffer: dataBuffer, _shapeBuffer: shapeBuffer, _shader: shader, _copyData: true, _dataOnGpu: dataOnGpu, _autograd: autograd, _keepgrads: keepgrads, _creation_op: creation_op).id); } // hook autograd - two parents if (tensor_inputs != null) { foreach (FloatTensor tensor in tensor_inputs) { result.creators.Add(tensor.id); tensor.children_indices.Add(result.Id); tensor.children_counts.Add(0); } } // special storage for the graph so that we can know which indices of the parent to // backprop into. note that int_creators are expected to be non-differentiable and so we do // not backprop into them directly if (indices != null && indices.Length > 0) { if (result.int_creators.Count == 0) { foreach (IntTensor ind in indices) { result.int_creators.Add(ind.Id); } } else if (result.int_creators.Count == indices.Length) { // TODO: after dynamic graph works for IntTensor you should be able to simply check to see if // the ids are the same - but at the time of writing we always creating new IntTensors so that // wouldn't work yet. } else { throw new Exception("Something is wrong... int_creators already existed but had the wrong length"); } } // TODO: this is just used so that eventually if any inline operation was run on "indices" to change it // (before backpropagating), we could trigger a warning that backprop will be broken. //indices.children_indices.Add(result.id); } return(result); }
public void Backward(FloatTensor grad = null, FloatTensor grad_origin = null) { //Debug.Log("Backward:" + this.id + " creation_op:" + creation_op); if (autograd) { if (grad == null) { Debug.Log("Grad not Found... Creating Gradient of 1s"); grad = this.createOnesTensorLike(); grad.Autograd = false; } if (grad_origin != null) { int child_index = children_indices.IndexOf(grad_origin.Id); if (children_counts[child_index] > 0) { throw new InvalidOperationException("Can't backprop more than once."); } else { children_counts[child_index] += 1; } } if (this.Grad == null) { this.Grad = grad; //Debug.Log("Setting Grad Tensor Id:" + this.id); } else { if (this.Grad.id == grad.id) { // do nothing //Debug.Log("Not Updating For Tensor Id:" + this.id); } else { //Debug.Log("Updating For Tensor Id:" + this.id); //this.Grad.Zero_(); this.Grad.Add(grad, inline: true); } } // grads must not have grads of their own if (this.Grad.autograd == true) { throw new InvalidOperationException("Sorry, grads cannot have grads"); } // RULES FOR AUTOGRAD: // 1) if you need to use "this" for calculating a gradient, copy it first and set autograd to false (see sigmoid) // 2) if you use a method in your backprop logic that doesn't hook into the dynamic graph yet, backprop // will not work!!! Make sure there's a "hookautograd" function in every method you use for backprop. // 3) whenever backpropping into a method where the forward prop involved a scalar (such as scalar // multiplication), current implementations assume you will NOT backprop into the scalar itself. // 4) Because of rule (2), do NOT use "emptyTensorCopy" at all in backprop unless you know what you're // doing. // 5) I will be especially strict about Unit tests for all backprop logic as this is the most complex // piece of functionality we have. Furthermore, most errors go completely undetected (not discovered // by runtime errors). Autograd bugs just make convergence go slowly and sub-optimally. // 6) If you use a forward propagation tensor to backprop, you MUST remember to turn off autograd // when backpropagating (see "mm" below for example). Otherwise, it will cause autograd to break because // whatever child you select will think it needs to wait for another gradient before backpropagating. // 7) In the "view" backprop method, you'll notice that we set parent.grad = null. This keeps grads from // accumulating when forward and backprop is called multiple times. However, it doesn't cause any new // memory allocation. // only continue backpropping if there's something to backprop into // only continue backpropping if all gradients (from children) are accounted for // override waiting for children if "backprop" was called on this variable directly if (this.creators != null && this.creators.Count > 0 && (grad_origin == null || AllAutogradChildrenAccountedFor())) { if (creation_op == "abs") { FloatTensor c = this.Copy(autograd: false); var parent = factory.Get(creators[0]); parent.Backward(parent.Div(c).Mul(grad)); } else if (creation_op == "add_elem") { factory.Get(creators[0]).Backward(grad, this); factory.Get(creators[1]).Backward(grad, this); } else if (creation_op == "add_scalar") { factory.Get(creators[0]).Backward(grad, this); } else if (creation_op.Contains("concatenate_")) { int dim = int.Parse(creation_op.Split('_')[1]); for (int i = 0; i < creators.Count; i++) { FloatTensor slice = grad.IndexSelect(factory.ctrl.intTensorFactory.Get(int_creators[i]), dim); factory.Get(creators[i]).Backward(slice); } } else if (creation_op == "contiguous") { //Debug.Log("Contiguous Backpropping Grad:" + grad.Id); //Debug.Log("Contiguous Storing Grad:" + this.Grad.Id); factory.Get(creators[0]).Backward(this.Grad.Copy(autograd: this.Grad.Autograd), this); } else if (creation_op == "copy") { factory.Get(creators[0]).Backward(grad, this); } else if (creation_op == "div_elem") { FloatTensor x = factory.Get(creators[0]); FloatTensor y = factory.Get(creators[1]); x.Backward(grad.Div(y)); FloatTensor y2 = y.Pow(2); FloatTensor xn = x.Neg(); FloatTensor xny2 = xn.Div(y2); FloatTensor gradxny2 = grad.Mul(xny2); y.Backward(gradxny2); } else if (creation_op == "div_scalar") { factory.Get(creators[0]).Backward(grad.Div(factory.Get(creators[1]).data[0]), this); } else if (creation_op == "emptyTensorCopy_Hooked") { factory.Get(creators[0]).Backward(grad, this); } else if (creation_op == "expand") { var parent = factory.Get(creators[0]); parent.Grad = null; FloatTensor local_grad = grad.Copy(autograd: grad.Autograd); var grad_shape = new int[shape.Length]; for (int i = 0; i < grad.shape.Length; i++) { grad_shape[i] = grad.shape[i]; } for (int i = 0; i < shape.Length; i++) { grad_shape[i] = parent.shape[i]; if (parent.shape[i] == 1 && shape[i] > 1) { local_grad = local_grad.Sum(i).View(grad_shape); } } parent.Backward(local_grad, this); } else if (creation_op.Contains("shaped_index_select")) { FloatTensor parent = factory.Get(creators[0]); IntTensor indices = factory.ctrl.intTensorFactory.Get(int_creators[0]); FloatTensor back_grad = parent.emptyTensorCopy(hook_graph: true); back_grad.autograd = false; back_grad.Zero_(); FloatTensor out_grad = back_grad.IndexAdd(indices, -1, grad); parent.Backward(out_grad); } else if (creation_op.Contains("index_select")) { FloatTensor parent = factory.Get(creators[0]); IntTensor indices = factory.ctrl.intTensorFactory.Get(int_creators[0]); int dim = int.Parse(creation_op.Split('_')[2]); FloatTensor back_grad = parent.emptyTensorCopy(hook_graph: true); back_grad.autograd = false; FloatTensor out_grad = back_grad.IndexAdd(indices, dim, grad); parent.Backward(out_grad); } else if (creation_op == "log") { // TOOD: sum backprop logic FloatTensor x = factory.Get(creators[0]).Copy(autograd: false); factory.Get(creators[0]).Backward(grad.Mul(x.Pow(-1)), this); } else if (creation_op == "mul_elem") { factory.Get(creators[0]).Backward(grad.Mul(factory.Get(creators[1])), this); factory.Get(creators[1]).Backward(grad.Mul(factory.Get(creators[0])), this); } else if (creation_op == "mul_scalar") { factory.Get(creators[0]).Backward(grad.Mul(factory.Get(creators[1]).data[0]), this); } else if (creation_op == "mm") { FloatTensor x = factory.Get(creators[1]).Transpose(); x.autograd = false; FloatTensor y = factory.Get(creators[0]).Transpose(); y.autograd = false; factory.Get(creators[0]).Backward(grad.MM(x), this); factory.Get(creators[1]).Backward(y.MM(grad), this); } else if (creation_op == "neg") { factory.Get(creators[0]).Backward(grad.Neg(), this); } else if (creation_op == "pow_scalar") { FloatTensor x = factory.Get(creators[0]).Copy(autograd: false); factory.Get(creators[0]).Backward(x.Mul(grad).Mul(factory.Get(creators[1]).Data[0]), this); } else if (creation_op == "relu") { // TOOD: replace with simple comparison and mulitplication (should be 2 liner) FloatTensor c = this.Copy(autograd: false); FloatTensor output = c; var dimSize = 1; for (var i = 0; i < output.Shape.Length; ++i) { dimSize *= output.Shape[i]; } var gradInput = output.Copy(autograd: false); gradInput.Zero_(); var nCpu = SystemInfo.processorCount; Parallel.For(0, nCpu, workerId => { var max = dimSize * (workerId + 1) / nCpu; for (var i = dimSize * workerId / nCpu; i < max; i++) { if (output.Data[i] > 0) { gradInput.Data[i] = 1; } else { gradInput.Data[i] = 0; } } }); factory.Get(creators[0]).Backward((gradInput).Mul(grad), this); } else if (creation_op == "sub_elem") { factory.Get(creators[0]).Backward(grad, this); factory.Get(creators[1]).Backward(grad.Neg(), this); } else if (creation_op == "sub_scalar") { factory.Get(creators[0]).Backward(grad, this); } else if (creation_op == "sigmoid") { FloatTensor self_nograd = this.Copy(autograd: false); factory.Get(creators[0]).Backward(self_nograd.Neg().Add(1f).Mul(self_nograd).Mul(grad), this); } else if (creation_op.Contains("softmax-")) { FloatTensor c = this.Copy(autograd: false); var dim = int.Parse(creation_op.Split('-')[1]); FloatTensor output = this; FloatTensor gradOutput = grad; if (!output.IsContiguous() || !gradOutput.IsContiguous()) { throw new NotImplementedException( "Softmax Gradient does not support non-contiguous tensors at the moment!"); } var outerSize = 1; var innerSize = 1; var dimSize = output.Shape[dim]; for (var i = 0; i < dim; ++i) { outerSize *= output.Shape[i]; } for (var i = dim + 1; i < output.Shape.Length; ++i) { innerSize *= output.Shape[i]; } var dimStride = innerSize; var outerStride = dimSize * dimStride; var gradInput = output.Copy(autograd: false); var nCpu = SystemInfo.processorCount; Parallel.For(0, nCpu, workerId => { var max = (outerSize * innerSize) * (workerId + 1) / nCpu; for (var i = (outerSize * innerSize) * workerId / nCpu; i < max; i++) { int outerIdx = i / innerSize; int innerIdx = i % innerSize; // works for contiguous!! var index = outerIdx * outerStride + innerIdx; float sum = 0; for (var d = 0; d < dimSize; d++) { sum += output.Data[index + d * dimStride] * gradOutput.Data[index + d * dimStride]; } for (var d = 0; d < dimSize; d++) { gradInput.Data[index + d * dimStride] = output.Data[index + d * dimStride] * (gradOutput.Data[index + d * dimStride] - sum); } } }); gradInput.Autograd = false; factory.Get(creators[0]).Backward(gradInput, this); } else if (creation_op.Contains("sum")) { // TOOD: sum backprop logic FloatTensor parent = factory.Get(creators[0]); parent.Grad = null; int dim = int.Parse(creation_op.Split('_')[1]); if (dim >= 0) { int[] view_shape = (int[])parent.shape.Clone(); view_shape[dim] = 1; parent.Backward(grad.View(view_shape).Expand(parent.shape).Contiguous()); } else { int[] view_shape = (int[])parent.shape.Clone(); for (int i = 0; i < parent.shape.Length; i++) { view_shape[i] = 1; } parent.Backward(grad.View(view_shape).Expand(parent.shape).Contiguous()); } } else if (creation_op == "transpose") { factory.Get(creators[0]).Backward(grad.Transpose()); } else if (creation_op == "tanh") { FloatTensor c = this.Copy(autograd: false); factory.Get(creators[0]).Backward(c.Pow(2).Neg().Add(1f).Mul(grad), this); } else if (creation_op.Contains("view_")) { FloatTensor parent = factory.Get(creators[0]); parent.Grad = null; // prevents gradient from simply being added to the previous gradient // instead the backpropagated gradient is set to a new value. parent.Backward(this.Grad.View(parent.shape)); } else { Debug.Log("Autograd couldn't find matching operation for:" + creation_op); } } } else { Debug.Log("Autograd off - skipping backprop at tensor:" + id + " with creation_op:" + creation_op); } }
// hook autograd one parents - one scalar public FloatTensor HookAutograd(ref FloatTensor result, float x, string creation_op, bool inline) { if (inline) { return(this); } bool autograd_pre_initialized = false; if (result == null) { bool child_pre_initialized = false; int child_index = 0; if (this.children_indices.Count > 0) { for (int i = 0; i < this.children_indices.Count; i++) { FloatTensor temp = factory.Get(children_indices[i]); if (temp.creation_op == creation_op) { if (temp.creators.Count > 1) { FloatTensor temp2 = factory.Get(temp.creators[1]); if (temp2.data[0] == x) { //if (temp2.autograd == temp.autograd) //{ child_pre_initialized = true; child_index = children_indices[i]; //} } } } } } if (child_pre_initialized) { autograd_pre_initialized = true; result = factory.Get(child_index); result.Zero_(); //Debug.Log("Graph:93:Fetching Tensor:" + result.id + " with creation_op:" + result.creation_op + " called under creation op:" + creation_op); } else { result = factory.Create(_shape: this.shape, _data: data, _dataBuffer: dataBuffer, _shapeBuffer: shapeBuffer, _shader: shader, _copyData: true, _dataOnGpu: dataOnGpu, _autograd: autograd, _keepgrads: keepgrads, _creation_op: creation_op); //Debug.Log("Graph:109:Creating Tensor:" + result.id + " with creation_op:" + result.creation_op); } } if (autograd_pre_initialized) { this.ResetAutogradCounts(); result.ResetAutogradCounts(); } else { /* * FloatTensor new_child = * new FloatTensor(_controller: controller, _shape: , _data: new float[] {x}); */ FloatTensor new_child = factory.Create( _shape: new int[] { 1 }, _data: new float[] { x }, _dataBuffer: dataBuffer, _shapeBuffer: shapeBuffer, _shader: shader, _copyData: true, _dataOnGpu: dataOnGpu, _autograd: autograd, _keepgrads: keepgrads, _creation_op: creation_op); result.InitGraph(); result.creators.Add(this.id); result.creators.Add(new_child.id); result.creation_op = creation_op; children_indices.Add(result.Id); children_counts.Add(0); } return(result); }
// hook autograd single parent public FloatTensor HookAutograd(ref FloatTensor result, string creation_op, bool inline = false, int[] resultShape = null) { if (inline) { return(this); } bool autograd_pre_initialized = false; //Debug.Log("Id:" + this.id + " Children:" + this.children.Count); if (result == null) { bool child_pre_initialized = false; int child_index = 0; if (this.children_indices.Count > 0) { for (int i = 0; i < this.children_indices.Count; i++) { if (factory.Get(children_indices[i]).creation_op == creation_op) { child_pre_initialized = true; child_index = children_indices[i]; } } } if (child_pre_initialized) { autograd_pre_initialized = true; result = factory.Get(child_index); result.Zero_(); //Debug.Log("Graph:237:Fetching Tensor:" + result.id + " with creation_op:" + result.creation_op + " called under creation op:" + creation_op); } else { if (resultShape != null) { result = factory.Create( _shape: resultShape, _dataOnGpu: dataOnGpu, _autograd: autograd, _keepgrads: keepgrads, _creation_op: creation_op); //Debug.Log("Graph:187:Creating Tensor:" + result.id + " with creation_op:" + result.creation_op); } else { result = factory.Create( _shape: this.shape, _data: data, _dataBuffer: dataBuffer, _shapeBuffer: shapeBuffer, _shader: shader, _copyData: true, _dataOnGpu: dataOnGpu, _autograd: autograd, _keepgrads: keepgrads, _creation_op: creation_op); //Debug.Log("Graph:254:Creating Tensor:" + result.id + " with creation_op:" + result.creation_op); } } } if (autograd_pre_initialized) { this.ResetAutogradCounts(); result.ResetAutogradCounts(); } else { result.InitGraph(); result.creators.Add(this.id); result.creation_op = creation_op; children_indices.Add(result.Id); children_counts.Add(0); } return(result); }
// hook autograd two parents public FloatTensor HookAutograd(ref FloatTensor result, ref FloatTensor x, string creation_op, bool inline = false, int[] resultShape = null) { if (inline) { return(this); } // checks to see if the input has been seen previously. If so, then it assumes // that we should just use the previous computation graph instead of initializing // a new result. The assumption here is that if the same tensors are used to perform // the same operation, then they should output to the same memory instead of allocating // new memory. bool autograd_pre_initialized = false; if (result == null) { bool child_pre_initialized = false; int child_index = 0; if (this.children_indices.Count > 0) { // iterate through children for (int i = 0; i < this.children_indices.Count; i++) { FloatTensor temp = factory.Get(children_indices[i]); // if a child was created using the same op as the one currently being called // and the child was also created using the same tensor as x // then it's exactly the same operation and we can re-use variables. if (temp.creation_op == creation_op && temp.creators.Contains(x.id)) { child_pre_initialized = true; child_index = children_indices[i]; } } } if (child_pre_initialized) { //Debug.Log("Id:" + this.id + " Children:" + this.children_indices.Count); autograd_pre_initialized = true; result = factory.Get(child_index); result.Zero_(); //Debug.Log("Graph:148:Fetching Tensor:" + result.id + " with creation_op:" + result.creation_op + " called under creation op:" + creation_op); } else { if (resultShape != null) { // initializes an empty tensor with new shape result = factory.Create( _shape: resultShape, _dataOnGpu: dataOnGpu, _autograd: x.autograd && autograd, _keepgrads: keepgrads, _creation_op: creation_op); //Debug.Log("Graph:187:Creating Tensor:" + result.id + " with creation_op:" + result.creation_op); } else { // initializes an empty tensor with identical shape result = factory.Create( _shape: this.shape, _data: data, _dataBuffer: dataBuffer, _shapeBuffer: shapeBuffer, _shader: shader, _copyData: true, _dataOnGpu: dataOnGpu, _autograd: x.autograd && autograd, // if either tensor doesn't have gradients _keepgrads: keepgrads, // neither does the result. This might not end up being _creation_op: creation_op); // a good decision in the long run. We'll see. //Debug.Log("Graph:202:Creating Tensor:" + result.id + " with creation_op:" + result.creation_op); } // this is sortof a backup check. In theory, the result tensor should have been // initialized correctly. if (this.dataOnGpu) { result.Gpu(shader); } } } if (autograd_pre_initialized) { this.ResetAutogradCounts(); result.ResetAutogradCounts(); x.ResetAutogradCounts(); } else { result.InitGraph(); result.creators.Add(this.id); result.creators.Add(x.id); result.creation_op = creation_op; children_indices.Add(result.Id); children_counts.Add(0); x.children_indices.Add(result.Id); x.children_counts.Add(0); this.sibling = x.id; } return(result); }