private void UploadConstBuffers(NvGpuVmm Vmm, GalPipelineState State, long[] Keys) { for (int Stage = 0; Stage < Keys.Length; Stage++) { foreach (ShaderDeclInfo DeclInfo in Gpu.Renderer.Shader.GetConstBufferUsage(Keys[Stage])) { ConstBuffer Cb = ConstBuffers[Stage][DeclInfo.Cbuf]; if (!Cb.Enabled) { continue; } long Key = Vmm.GetPhysicalAddress(Cb.Position); if (QueryKeyUpload(Vmm, Key, Cb.Size, NvGpuBufferType.ConstBuffer)) { IntPtr Source = Vmm.GetHostAddress(Cb.Position, Cb.Size); Gpu.Renderer.Buffer.SetData(Key, Cb.Size, Source); } State.ConstBufferKeys[Stage][DeclInfo.Cbuf] = Key; } } }
private void CbBind(NvGpuVmm Vmm, NvGpuPBEntry PBEntry) { int Stage = (PBEntry.Method - 0x904) >> 3; int Index = PBEntry.Arguments[0]; bool Enabled = (Index & 1) != 0; Index = (Index >> 4) & 0x1f; long Position = MakeInt64From2xInt32(NvGpuEngine3dReg.ConstBufferAddress); long CbKey = Vmm.GetPhysicalAddress(Position); int Size = ReadRegister(NvGpuEngine3dReg.ConstBufferSize); if (!Gpu.Renderer.Buffer.IsCached(CbKey, Size)) { Gpu.Renderer.Buffer.Create(CbKey, Size); } ConstBuffer Cb = ConstBuffers[Stage][Index]; if (Cb.Position != Position || Cb.Enabled != Enabled || Cb.Size != Size) { ConstBuffers[Stage][Index].Position = Position; ConstBuffers[Stage][Index].Enabled = Enabled; ConstBuffers[Stage][Index].Size = Size; } }
private void UploadUniforms(AMemory Memory) { long BasePosition = MakeInt64From2xInt32(NvGpuEngine3dReg.ShaderAddress); for (int Index = 0; Index < 5; Index++) { int Control = ReadRegister(NvGpuEngine3dReg.ShaderNControl + (Index + 1) * 0x10); int Offset = ReadRegister(NvGpuEngine3dReg.ShaderNOffset + (Index + 1) * 0x10); //Note: Vertex Program (B) is always enabled. bool Enable = (Control & 1) != 0 || Index == 0; if (!Enable) { continue; } for (int Cbuf = 0; Cbuf < ConstBuffers.Length; Cbuf++) { ConstBuffer Cb = ConstBuffers[Cbuf]; if (Cb.Enabled) { long CbPosition = Cb.Position + Index * Cb.Size; byte[] Data = AMemoryHelper.ReadBytes(Memory, CbPosition, (uint)Cb.Size); Gpu.Renderer.SetConstBuffer(BasePosition + (uint)Offset, Cbuf, Data); } } } }
private void UploadConstBuffers(NvGpuVmm vmm, GalPipelineState state, long[] keys) { Profile.Begin(Profiles.GPU.Engine3d.UploadConstBuffers); for (int stage = 0; stage < keys.Length; stage++) { foreach (CBufferDescriptor desc in _gpu.Renderer.Shader.GetConstBufferUsage(keys[stage])) { ConstBuffer cb = _constBuffers[stage][desc.Slot]; if (!cb.Enabled) { continue; } long key = vmm.GetPhysicalAddress(cb.Position); if (_gpu.ResourceManager.MemoryRegionModified(vmm, key, cb.Size, NvGpuBufferType.ConstBuffer)) { if (vmm.TryGetHostAddress(cb.Position, cb.Size, out IntPtr cbPtr)) { _gpu.Renderer.Buffer.SetData(key, cb.Size, cbPtr); } else { _gpu.Renderer.Buffer.SetData(key, vmm.ReadBytes(cb.Position, cb.Size)); } } state.ConstBufferKeys[stage][desc.Slot] = key; } } Profile.End(Profiles.GPU.Engine3d.UploadConstBuffers); }
private void CbBind(NvGpuVmm vmm, GpuMethodCall methCall) { int stage = (methCall.Method - 0x904) >> 3; int index = methCall.Argument; bool enabled = (index & 1) != 0; index = (index >> 4) & 0x1f; long position = MakeInt64From2xInt32(NvGpuEngine3dReg.ConstBufferAddress); long cbKey = vmm.GetPhysicalAddress(position); int size = ReadRegister(NvGpuEngine3dReg.ConstBufferSize); if (!_gpu.Renderer.Buffer.IsCached(cbKey, size)) { _gpu.Renderer.Buffer.Create(cbKey, size); } ConstBuffer cb = _constBuffers[stage][index]; if (cb.Position != position || cb.Enabled != enabled || cb.Size != size) { _constBuffers[stage][index].Position = position; _constBuffers[stage][index].Enabled = enabled; _constBuffers[stage][index].Size = size; } }
public NvGpuEngine3d(NvGpu Gpu) { this.Gpu = Gpu; Registers = new int[0xe00]; Methods = new Dictionary <int, NvGpuMethod>(); void AddMethod(int Meth, int Count, int Stride, NvGpuMethod Method) { while (Count-- > 0) { Methods.Add(Meth, Method); Meth += Stride; } } AddMethod(0x585, 1, 1, VertexEndGl); AddMethod(0x674, 1, 1, ClearBuffers); AddMethod(0x6c3, 1, 1, QueryControl); AddMethod(0x8e4, 16, 1, CbData); AddMethod(0x904, 5, 8, CbBind); ConstBuffers = new ConstBuffer[6][]; for (int Index = 0; Index < ConstBuffers.Length; Index++) { ConstBuffers[Index] = new ConstBuffer[18]; } FrameBuffers = new HashSet <long>(); }
private void UploadConstBuffers(NvGpuVmm Vmm, GalPipelineState State, long[] Keys) { for (int Stage = 0; Stage < Keys.Length; Stage++) { foreach (ShaderDeclInfo DeclInfo in Gpu.Renderer.Shader.GetConstBufferUsage(Keys[Stage])) { ConstBuffer Cb = ConstBuffers[Stage][DeclInfo.Cbuf]; if (!Cb.Enabled) { continue; } long Key = Vmm.GetPhysicalAddress(Cb.Position); if (Gpu.ResourceManager.MemoryRegionModified(Vmm, Key, Cb.Size, NvGpuBufferType.ConstBuffer)) { if (Vmm.TryGetHostAddress(Cb.Position, Cb.Size, out IntPtr CbPtr)) { Gpu.Renderer.Buffer.SetData(Key, Cb.Size, CbPtr); } else { Gpu.Renderer.Buffer.SetData(Key, Vmm.ReadBytes(Cb.Position, Cb.Size)); } } State.ConstBufferKeys[Stage][DeclInfo.Cbuf] = Key; } } }
private void UploadUniforms(NvGpuVmm Vmm) { long BasePosition = MakeInt64From2xInt32(NvGpuEngine3dReg.ShaderAddress); for (int Index = 0; Index < 5; Index++) { int Control = ReadRegister(NvGpuEngine3dReg.ShaderNControl + (Index + 1) * 0x10); int Offset = ReadRegister(NvGpuEngine3dReg.ShaderNOffset + (Index + 1) * 0x10); //Note: Vertex Program (B) is always enabled. bool Enable = (Control & 1) != 0 || Index == 0; if (!Enable) { continue; } for (int Cbuf = 0; Cbuf < ConstBuffers[Index].Length; Cbuf++) { ConstBuffer Cb = ConstBuffers[Index][Cbuf]; if (Cb.Enabled) { IntPtr DataAddress = Vmm.GetHostAddress(Cb.Position, Cb.Size); Gpu.Renderer.Shader.SetConstBuffer(BasePosition + (uint)Offset, Cbuf, Cb.Size, DataAddress); } } } }
public NvGpuEngine3d(NvGpu Gpu) { this.Gpu = Gpu; Registers = new int[0xe00]; Methods = new Dictionary <int, NvGpuMethod>(); void AddMethod(int Meth, int Count, int Stride, NvGpuMethod Method) { while (Count-- > 0) { Methods.Add(Meth, Method); Meth += Stride; } } AddMethod(0x585, 1, 1, VertexEndGl); AddMethod(0x674, 1, 1, ClearBuffers); AddMethod(0x6c3, 1, 1, QueryControl); AddMethod(0x8e4, 16, 1, CbData); AddMethod(0x904, 5, 8, CbBind); ConstBuffers = new ConstBuffer[6][]; for (int Index = 0; Index < ConstBuffers.Length; Index++) { ConstBuffers[Index] = new ConstBuffer[18]; } UploadedKeys = new List <long> [(int)NvGpuBufferType.Count]; for (int i = 0; i < UploadedKeys.Length; i++) { UploadedKeys[i] = new List <long>(); } //Ensure that all components are enabled by default. //FIXME: Is this correct? WriteRegister(NvGpuEngine3dReg.ColorMaskN, 0x1111); for (int Index = 0; Index < GalPipelineState.RenderTargetsCount; Index++) { WriteRegister(NvGpuEngine3dReg.IBlendNEquationRgb + Index * 8, (int)GalBlendEquation.FuncAdd); WriteRegister(NvGpuEngine3dReg.IBlendNFuncSrcRgb + Index * 8, (int)GalBlendFactor.One); WriteRegister(NvGpuEngine3dReg.IBlendNFuncDstRgb + Index * 8, (int)GalBlendFactor.Zero); WriteRegister(NvGpuEngine3dReg.IBlendNEquationAlpha + Index * 8, (int)GalBlendEquation.FuncAdd); WriteRegister(NvGpuEngine3dReg.IBlendNFuncSrcAlpha + Index * 8, (int)GalBlendFactor.One); WriteRegister(NvGpuEngine3dReg.IBlendNFuncDstAlpha + Index * 8, (int)GalBlendFactor.Zero); } }
public NvGpuEngine3d(NvGpu gpu) { _gpu = gpu; Registers = new int[0xe00]; _methods = new Dictionary <int, NvGpuMethod>(); void AddMethod(int meth, int count, int stride, NvGpuMethod method) { while (count-- > 0) { _methods.Add(meth, method); meth += stride; } } AddMethod(0x585, 1, 1, VertexEndGl); AddMethod(0x674, 1, 1, ClearBuffers); AddMethod(0x6c3, 1, 1, QueryControl); AddMethod(0x8e4, 16, 1, CbData); AddMethod(0x904, 5, 8, CbBind); _constBuffers = new ConstBuffer[6][]; for (int index = 0; index < _constBuffers.Length; index++) { _constBuffers[index] = new ConstBuffer[18]; } // Ensure that all components are enabled by default. // FIXME: Is this correct? WriteRegister(NvGpuEngine3dReg.ColorMaskN, 0x1111); WriteRegister(NvGpuEngine3dReg.FrameBufferSrgb, 1); WriteRegister(NvGpuEngine3dReg.FrontFace, (int)GalFrontFace.Cw); for (int index = 0; index < GalPipelineState.RenderTargetsCount; index++) { WriteRegister(NvGpuEngine3dReg.IBlendNEquationRgb + index * 8, (int)GalBlendEquation.FuncAdd); WriteRegister(NvGpuEngine3dReg.IBlendNFuncSrcRgb + index * 8, (int)GalBlendFactor.One); WriteRegister(NvGpuEngine3dReg.IBlendNFuncDstRgb + index * 8, (int)GalBlendFactor.Zero); WriteRegister(NvGpuEngine3dReg.IBlendNEquationAlpha + index * 8, (int)GalBlendEquation.FuncAdd); WriteRegister(NvGpuEngine3dReg.IBlendNFuncSrcAlpha + index * 8, (int)GalBlendFactor.One); WriteRegister(NvGpuEngine3dReg.IBlendNFuncDstAlpha + index * 8, (int)GalBlendFactor.Zero); } }
public NvGpuEngine3d(NvGpu Gpu) { this.Gpu = Gpu; Registers = new int[0xe00]; Methods = new Dictionary <int, NvGpuMethod>(); void AddMethod(int Meth, int Count, int Stride, NvGpuMethod Method) { while (Count-- > 0) { Methods.Add(Meth, Method); Meth += Stride; } } AddMethod(0x585, 1, 1, VertexEndGl); AddMethod(0x674, 1, 1, ClearBuffers); AddMethod(0x6c3, 1, 1, QueryControl); AddMethod(0x8e4, 16, 1, CbData); AddMethod(0x904, 5, 8, CbBind); ConstBuffers = new ConstBuffer[6][]; for (int Index = 0; Index < ConstBuffers.Length; Index++) { ConstBuffers[Index] = new ConstBuffer[18]; } UploadedKeys = new List <long> [(int)NvGpuBufferType.Count]; for (int i = 0; i < UploadedKeys.Length; i++) { UploadedKeys[i] = new List <long>(); } //Ensure that all components are enabled by default. //FIXME: Is this correct? WriteRegister(NvGpuEngine3dReg.ColorMaskN, 0x1111); }
private void UploadConstBuffers(NvGpuVmm Vmm, GalPipelineState State) { for (int Stage = 0; Stage < State.ConstBufferKeys.Length; Stage++) { for (int Index = 0; Index < State.ConstBufferKeys[Stage].Length; Index++) { ConstBuffer Cb = ConstBuffers[Stage][Index]; long Key = Cb.Position; if (Cb.Enabled && QueryKeyUpload(Vmm, Key, Cb.Size, NvGpuBufferType.ConstBuffer)) { IntPtr Source = Vmm.GetHostAddress(Key, Cb.Size); Gpu.Renderer.Buffer.SetData(Key, Cb.Size, Source); } State.ConstBufferKeys[Stage][Index] = Key; } } }
public float[][] Fit(float[][] X) { int exaggerationLength = (int)(MaxEpochs * ExaggerationRatio); gpu = new GpuDevice(); cc = gpu.CreateConstantBuffer <TsneMapConstants>(0); int N = X.Length; cc.c.columns = X[0].Length; cc.c.N = N; cc.c.outDim = OutDim; cc.c.metricType = MetricType; #region Initialize Y Buffer Y2Buf = null; Buffer Y3Buf = null; Buffer Y3StagingBuf = null; Buffer Y2StagingBuf = null; Buffer v2Buf = null; Buffer v3Buf = null; if (cc.c.outDim <= 2) { Y2Buf = gpu.CreateBufferRW(N, 8, 3); Y2StagingBuf = gpu.CreateStagingBuffer(Y2Buf); v2Buf = gpu.CreateBufferRW(N, 2 * 8, 5); } else { Y3Buf = gpu.CreateBufferRW(N, 12, 4); Y3StagingBuf = gpu.CreateStagingBuffer(Y3Buf); v3Buf = gpu.CreateBufferRW(N, 2 * 12, 6); } float rang = 0.05f; Random rGenerator = new Random(435243); if (cc.c.outDim <= 2) { using (var ws = gpu.NewWriteStream(v2Buf)) { for (int row = 0; row < N; row++) { ws.Write <float>(0, 1, 0, 1); } } using (var ws = gpu.NewWriteStream(Y2Buf)) { for (int row = 0; row < N; row++) { for (int col = 0; col < cc.c.outDim; col++) { ws.Write((float)(rang * rGenerator.NextDouble() - rang / 2)); } if (cc.c.outDim == 1) { ws.Write(0.0f); } } } } else { using (var ws = gpu.NewWriteStream(v3Buf)) { for (int row = 0; row < N; row++) { ws.Write <float>(0, 1, 0, 1, 0, 1); } } using (var ws = gpu.NewWriteStream(Y3Buf)) { for (int row = 0; row < N; row++) { for (int col = 0; col < cc.c.outDim; col++) { ws.Write((float)(rang * rGenerator.NextDouble() - rang / 2)); } } } } #endregion #region Upload data table and initialize the distance matrix // Used to aggregate values created by parallel threads. // the size of of groupMaxBuf must be large enoght to hold a float value for each thread started in parallel. // Notice: gpu.Run(k) will start k*GROUP_SIZE threads. int gpSize = Math.Max(GpuGroupSize, MaxGroupNumber * GroupSize); gpSize = Math.Max(gpSize, MaxGroupNumberHyp * GroupSizeHyp); groupMaxBuf = gpu.CreateBufferRW(gpSize, 4, 7); resultBuf = gpu.CreateBufferRW(3, 4, 2); // to receive the total changes. resultStaging = gpu.CreateStagingBuffer(resultBuf); tableBuf = gpu.CreateBufferRO(N * cc.c.columns, 4, 0); if (MetricType == 1) { NormalizeTable(X); } gpu.WriteMarix(tableBuf, X, true); const int MinCpuDimension = 100; // minimal dimension to trigger CPU caching. const int MaxDimension = 64; // maximal dimension (table columns) for fast EuclideanNoCache shader. Must be the same as MAX_DIMENSION. const int MaxDimensionS = 32; // maximal dimension (table columns) for fast EuclideanNoCache shader. Must be the same as MAX_DIMENSIONs. if (N <= CacheLimit) { cachingMode = CachingMode.OnGpu; } else { if ((cc.c.columns > MinCpuDimension) && ((double)N * N * 4) < ((double)MaxCpuCacheSize * 1024.0 * 1024.0)) { cachingMode = CachingMode.OnCpu; } else { if (cc.c.columns < MaxDimensionS) { cachingMode = CachingMode.OnFlySmS; } else if (cc.c.columns < MaxDimension) { cachingMode = CachingMode.OnFlySm; } else { cachingMode = CachingMode.OnFly; } } } #endregion cc.c.targetH = (float)Math.Log(PerplexityRatio * N); if (cachingMode == CachingMode.OnGpu) { CalculateP(); } else if (cachingMode == CachingMode.OnCpu) { InitializePCpu(); } else // (cachingMode == CachingMode.OnFly[Sm,SmS]) { InitializeP(); } using (var sd = gpu.LoadShader("TsneDx.CalculateSumQ.cso")) { gpu.SetShader(sd); cc.c.groupNumber = 256; for (int i = 0; i < N; i += cc.c.groupNumber) { cc.c.blockIdx = i; cc.Upload(); gpu.Run(cc.c.groupNumber); } cc.c.blockIdx = -1; cc.Upload(); gpu.Run(); } var sdNames = new Dictionary <CachingMode, string>() { { CachingMode.OnGpu, "TsneDx.OneStep.cso" }, { CachingMode.OnCpu, "TsneDx.OneStepCpuCache.cso" }, { CachingMode.OnFly, "TsneDx.OneStepNoCache.cso" }, { CachingMode.OnFlySm, "TsneDx.FastStep.cso" }, { CachingMode.OnFlySmS, "TsneDx.FastStepS.cso" }, }; ComputeShader csOneStep = gpu.LoadShader(sdNames[cachingMode]); ComputeShader csSumUp = gpu.LoadShader("TsneDx.OneStepSumUp.cso"); int stepCounter = 0; while (true) { if (stepCounter < exaggerationLength) { if (ExaggerationSmoothen) { int len = (int)(0.9 * MaxEpochs); if (stepCounter < len) { double t = (double)stepCounter / len; t = Math.Sqrt(Math.Sqrt(t)); cc.c.PFactor = (float)((1 - t) * ExaggerationFactor + t); } else { cc.c.PFactor = 1.0f; } } else { cc.c.PFactor = (float)ExaggerationFactor; } } else { cc.c.PFactor = 1.0f; } gpu.SetShader(csOneStep); if (cachingMode == CachingMode.OnGpu) { cc.c.groupNumber = MaxGroupNumber; // Notice: cc.c.groupNumber*GroupSize must fit into groupMax[]. for (int bIdx = 0; bIdx < N; bIdx += cc.c.groupNumber * GroupSize) { cc.c.blockIdx = bIdx; cc.Upload(); gpu.Run(cc.c.groupNumber); } cc.c.groupNumber = MaxGroupNumber * GroupSize; } else if (cachingMode == CachingMode.OnCpu) { int bSize = MaxGroupNumberHyp * GroupSizeHyp; cc.c.groupNumber = MaxGroupNumberHyp; for (int bIdx = 0; bIdx < N; bIdx += bSize) { gpu.WriteArray(cpuP, bIdx, Math.Min(N, bIdx + bSize), P2Buf); cc.c.blockIdx = bIdx; cc.Upload(); gpu.Run(cc.c.groupNumber); } cc.c.groupNumber = Math.Min(N, bSize); } else if ((cachingMode == CachingMode.OnFlySm) || (cachingMode == CachingMode.OnFlySmS)) { const int GrSize = 64; // This value must match that of GR_SIZE in TsneMap.hlsl. cc.c.groupNumber = MaxGroupNumber; for (int bIdx = 0; bIdx < N; bIdx += cc.c.groupNumber * GrSize) { cc.c.blockIdx = bIdx; cc.Upload(); gpu.Run(cc.c.groupNumber); } cc.c.groupNumber = cc.c.groupNumber * GrSize; } else // cachingMode==CachingMode.OnFly { cc.c.groupNumber = 128; for (int bIdx = 0; bIdx < N; bIdx += cc.c.groupNumber) { cc.c.blockIdx = bIdx; cc.Upload(); gpu.Run(cc.c.groupNumber); } } //Notice: cc.c.groupNumber must be number of partial sumQ_next, which add up to sumQ for the next step. gpu.SetShader(csSumUp); cc.Upload(); gpu.Run(); currentVariation = gpu.ReadRange <float>(resultStaging, resultBuf, 3)[2] / N; cc.c.mom = (float)((stepCounter < (MaxEpochs * momentumSwitch)) ? momentum : finalMomentum); stepCounter++; if (stepCounter % 10 == 0) { Console.Write('.'); } if (stepCounter % 500 == 0) { Console.WriteLine(); } if ((stepCounter >= MaxEpochs) || ((stepCounter >= (2 + exaggerationLength)) && (currentVariation < stopVariation))) { break; } } Console.WriteLine(); float[][] Y = new float[N][]; using (var rs = gpu.NewReadStream((cc.c.outDim == 3) ? Y3StagingBuf : Y2StagingBuf, (cc.c.outDim == 3) ? Y3Buf : Y2Buf)) { int outVDim = (cc.c.outDim == 3) ? 3 : 2; for (int row = 0; row < N; row++) { Y[row] = rs.ReadRange <float>(outVDim); } } if (cc.c.outDim == 1) { for (int i = 0; i < N; i++) { Y[i] = new float[] { Y[i][0] } } } ; TsneDx.SafeDispose(csSumUp, csOneStep, PBuf, P2Buf, distanceBuf, tableBuf, resultBuf, resultStaging, groupMaxBuf, Y3Buf, Y3StagingBuf, v3Buf, Y2Buf, Y2StagingBuf, v2Buf, cc, gpu); return(AutoNormalize ? PcaNormalize.DoNormalize(Y) : Y); }