public bool AssignByRank2 <T>(ILValue <T> lValue, IRValue <T> rValue) { var rows = lValue.Layout.Shape[0]; var cols = lValue.Layout.Shape[1]; var read = rValue.BufferReader.GetReader2(lValue.Layout.Shape); var write = lValue.Buffer.Writer2; if (Context.Type == ContextType.Gpu) { var stream = Context.ToGpuContext().Stream; stream.For(0L, rows * cols, i => { var row = i / cols; var col = i % cols; write(row, col, read(row, col)); }); return(true); } if (Context.Type == ContextType.Cpu) { for (var i = 0L; i < rows; ++i) { for (var j = 0L; j < cols; ++j) { write(i, j, read(i, j)); } } return(true); } return(false); }
public static Task Copy <T>(this Context context, Tensor <T> dstTensor, Tensor <T> srcTensor) { if (!srcTensor.Layout.Shape.SequenceEqual(dstTensor.Layout.Shape)) { throw new InvalidOperationException($"Copy require same shape! dst({dstTensor.Shape}) src({srcTensor.Shape})"); } // if both are on cpu side, then we cannot copy, we have to assign it. if (dstTensor.Device == Device.CpuDevice && srcTensor.Device == Device.CpuDevice) { return(Context.CpuContext.Assign(dstTensor, srcTensor)); } // if strides are same, then we can safely copy it, the buffer length should be correct, // we cannot tell if buffer length is correct here. if (srcTensor.Layout.Strides.SequenceEqual(dstTensor.Layout.Strides)) { var dstMemory = dstTensor.Memory.Memory; var srcMemory = srcTensor.Memory.Memory; var dstOffset = dstTensor.Memory.Offset; var srcOffset = srcTensor.Memory.Offset; var dstLength = dstTensor.Memory.Length; var srcLength = srcTensor.Memory.Length; var length = Math.Min(dstLength, srcLength); Task task; if (context.Type == ContextType.Gpu && (context.Device == dstTensor.Device || context.Device == srcTensor.Device)) { var stream = context.ToGpuContext().Stream; task = new Task(() => Memory.XiangCopy2(stream, srcMemory, srcOffset, dstMemory, dstOffset, length)); } else { task = new Task(() => Memory.XiangCopy(srcMemory, srcOffset, dstMemory, dstOffset, length)); } // TODO:@RDE task.Start(); task.Wait(); return(task); } throw new NotImplementedException(); }
public bool AssignByRank3 <T>(ILValue <T> lValue, IRValue <T> rValue) { var l0 = lValue.Layout.Shape[0]; var l1 = lValue.Layout.Shape[1]; var l2 = lValue.Layout.Shape[2]; var read = rValue.BufferReader.GetReader3(lValue.Layout.Shape); var write = lValue.Buffer.Writer3; if (Context.Type == ContextType.Gpu) { var stream = Context.ToGpuContext().Stream; stream.For(0L, l0 * l1 * l2, i => { var i0 = i / (l1 * l2); var i1 = (i % (l1 * l2)) / l2; var i2 = (i % (l1 * l2)) % l2; write(i0, i1, i2, read(i0, i1, i2)); }); return(true); } if (Context.Type == ContextType.Cpu) { for (var i = 0L; i < l0; ++i) { for (var j = 0L; j < l1; ++j) { for (var k = 0L; k < l2; ++k) { write(i, j, k, read(i, j, k)); } } } return(true); } return(false); }
public bool AssignByFlat1 <T>(ILValue <T> lValue, IRValue <T> rValue) { var length = lValue.Layout.Shape.Length; var read = rValue.BufferReader.GetFlatReader1(lValue.Layout.Shape); var write = lValue.Buffer.FlatWriter1; if (Context.Type == ContextType.Gpu) { var stream = Context.ToGpuContext().Stream; stream.For(0L, length, i => write(i, read(i))); return(true); } if (Context.Type == ContextType.Cpu) { for (var i = 0L; i < length; ++i) { write(i, read(i)); } return(true); } return(false); }