void InitializePCpu() { int N = cc.c.N; const float DistanceScale = 100.0f; const float eps = 2.22e-16f; int bandSize = Math.Min(N, MaxGroupNumberHyp * GroupSizeHyp); PBuf = gpu.CreateBufferRW(bandSize * N, 4, 1); P2Buf = gpu.CreateBufferDynamic(bandSize * N, 4, 7); // dynamic buffer for fast uploading. Linked to Pcpu[] on HLSL. int blockSize = 128; // Calculate so many rows per dispatch. cpuP = new float[N][]; for (int i = 0; i < N; i++) { cpuP[i] = new float[N]; } using (var distanceBuf = gpu.CreateBufferRW(blockSize * N, 4, 0)) using (var stagingBuf = gpu.CreateStagingBuffer(distanceBuf)) using (var sd = gpu.LoadShader("TsneDx.PartialDistance2.cso")) { gpu.SetShader(sd); for (int iBlock = 0; iBlock < N; iBlock += blockSize) { cc.c.blockIdx = iBlock; cc.Upload(); gpu.Run(blockSize); int iBlock2 = Math.Min(iBlock + blockSize, N); int blockLen = (iBlock2 * (iBlock2 - 1) - iBlock * (iBlock - 1)) / 2; float[] ret = gpu.ReadRange <float>(stagingBuf, distanceBuf, blockLen); int idx = 0; for (int row = iBlock; row < iBlock2; row++) { Array.Copy(ret, idx, cpuP[row], 0, row); idx += row; } } } double distanceFactor = double.MinValue; MT.For(1, N, i => { float maxV = cpuP[i].Max(); lock (this) distanceFactor = Math.Max(distanceFactor, maxV); }); if (distanceFactor == 0) { throw new System.Exception("Distance metric degenerated: all components are zero."); } // Scale the distance to managable range [0, 100.0] to avoid degredation // with exp function. distanceFactor = DistanceScale / distanceFactor; MT.For(1, N, i => { for (int j = 0; j < i; j++) { cpuP[i][j] = (float)(cpuP[i][j] * distanceFactor); } }); MT.For(0, N, i => { for (int j = 0; j < i; j++) { cpuP[j][i] = cpuP[i][j]; } cpuP[i][i] = 0; }); int bSize = MaxGroupNumberHyp * GroupSizeHyp; using (var sd = gpu.LoadShader("TsneDx.Dist2Affinity.cso")) using (var stagingBuf = gpu.CreateStagingBuffer(PBuf)) { gpu.SetShader(sd); for (int iBlock = 0; iBlock < N; iBlock += bSize) { cc.c.blockIdx = iBlock; cc.Upload(); int iBlock2 = Math.Min(N, iBlock + bSize); using (var ws = gpu.NewWriteStream(PBuf)) for (int row = iBlock; row < iBlock2; row++) { ws.WriteRange(cpuP[row]); } gpu.Run(MaxGroupNumberHyp); using (var rs = gpu.NewReadStream(stagingBuf, PBuf)) for (int row = iBlock; row < iBlock2; row++) { rs.ReadRange(cpuP[row], 0, N); } } } double sum = 0; MT.For(0, N, i => { double sum2 = 0.0; for (int j = i + 1; j < N; j++) { cpuP[i][j] += cpuP[j][i]; sum2 += cpuP[i][j]; } lock (this) sum += sum2; }); if (sum == 0) { throw new System.Exception("Perplexity too small!"); } sum *= 2; MT.For(0, N, i => { for (int j = i + 1; j < N; j++) { cpuP[i][j] = (float)Math.Max(cpuP[i][j] / sum, eps); cpuP[j][i] = cpuP[i][j]; } cpuP[i][i] = 1.0f; }); }