public float[][] Fit(float[][] X) { int exaggerationLength = (int)(MaxEpochs * ExaggerationRatio); gpu = new GpuDevice(); cc = gpu.CreateConstantBuffer <TsneMapConstants>(0); int N = X.Length; cc.c.columns = X[0].Length; cc.c.N = N; cc.c.outDim = OutDim; cc.c.metricType = MetricType; #region Initialize Y Buffer Y2Buf = null; Buffer Y3Buf = null; Buffer Y3StagingBuf = null; Buffer Y2StagingBuf = null; Buffer v2Buf = null; Buffer v3Buf = null; if (cc.c.outDim <= 2) { Y2Buf = gpu.CreateBufferRW(N, 8, 3); Y2StagingBuf = gpu.CreateStagingBuffer(Y2Buf); v2Buf = gpu.CreateBufferRW(N, 2 * 8, 5); } else { Y3Buf = gpu.CreateBufferRW(N, 12, 4); Y3StagingBuf = gpu.CreateStagingBuffer(Y3Buf); v3Buf = gpu.CreateBufferRW(N, 2 * 12, 6); } float rang = 0.05f; Random rGenerator = new Random(435243); if (cc.c.outDim <= 2) { using (var ws = gpu.NewWriteStream(v2Buf)) { for (int row = 0; row < N; row++) { ws.Write <float>(0, 1, 0, 1); } } using (var ws = gpu.NewWriteStream(Y2Buf)) { for (int row = 0; row < N; row++) { for (int col = 0; col < cc.c.outDim; col++) { ws.Write((float)(rang * rGenerator.NextDouble() - rang / 2)); } if (cc.c.outDim == 1) { ws.Write(0.0f); } } } } else { using (var ws = gpu.NewWriteStream(v3Buf)) { for (int row = 0; row < N; row++) { ws.Write <float>(0, 1, 0, 1, 0, 1); } } using (var ws = gpu.NewWriteStream(Y3Buf)) { for (int row = 0; row < N; row++) { for (int col = 0; col < cc.c.outDim; col++) { ws.Write((float)(rang * rGenerator.NextDouble() - rang / 2)); } } } } #endregion #region Upload data table and initialize the distance matrix // Used to aggregate values created by parallel threads. // the size of of groupMaxBuf must be large enoght to hold a float value for each thread started in parallel. // Notice: gpu.Run(k) will start k*GROUP_SIZE threads. int gpSize = Math.Max(GpuGroupSize, MaxGroupNumber * GroupSize); gpSize = Math.Max(gpSize, MaxGroupNumberHyp * GroupSizeHyp); groupMaxBuf = gpu.CreateBufferRW(gpSize, 4, 7); resultBuf = gpu.CreateBufferRW(3, 4, 2); // to receive the total changes. resultStaging = gpu.CreateStagingBuffer(resultBuf); tableBuf = gpu.CreateBufferRO(N * cc.c.columns, 4, 0); if (MetricType == 1) { NormalizeTable(X); } gpu.WriteMarix(tableBuf, X, true); const int MinCpuDimension = 100; // minimal dimension to trigger CPU caching. const int MaxDimension = 64; // maximal dimension (table columns) for fast EuclideanNoCache shader. Must be the same as MAX_DIMENSION. const int MaxDimensionS = 32; // maximal dimension (table columns) for fast EuclideanNoCache shader. Must be the same as MAX_DIMENSIONs. if (N <= CacheLimit) { cachingMode = CachingMode.OnGpu; } else { if ((cc.c.columns > MinCpuDimension) && ((double)N * N * 4) < ((double)MaxCpuCacheSize * 1024.0 * 1024.0)) { cachingMode = CachingMode.OnCpu; } else { if (cc.c.columns < MaxDimensionS) { cachingMode = CachingMode.OnFlySmS; } else if (cc.c.columns < MaxDimension) { cachingMode = CachingMode.OnFlySm; } else { cachingMode = CachingMode.OnFly; } } } #endregion cc.c.targetH = (float)Math.Log(PerplexityRatio * N); if (cachingMode == CachingMode.OnGpu) { CalculateP(); } else if (cachingMode == CachingMode.OnCpu) { InitializePCpu(); } else // (cachingMode == CachingMode.OnFly[Sm,SmS]) { InitializeP(); } using (var sd = gpu.LoadShader("TsneDx.CalculateSumQ.cso")) { gpu.SetShader(sd); cc.c.groupNumber = 256; for (int i = 0; i < N; i += cc.c.groupNumber) { cc.c.blockIdx = i; cc.Upload(); gpu.Run(cc.c.groupNumber); } cc.c.blockIdx = -1; cc.Upload(); gpu.Run(); } var sdNames = new Dictionary <CachingMode, string>() { { CachingMode.OnGpu, "TsneDx.OneStep.cso" }, { CachingMode.OnCpu, "TsneDx.OneStepCpuCache.cso" }, { CachingMode.OnFly, "TsneDx.OneStepNoCache.cso" }, { CachingMode.OnFlySm, "TsneDx.FastStep.cso" }, { CachingMode.OnFlySmS, "TsneDx.FastStepS.cso" }, }; ComputeShader csOneStep = gpu.LoadShader(sdNames[cachingMode]); ComputeShader csSumUp = gpu.LoadShader("TsneDx.OneStepSumUp.cso"); int stepCounter = 0; while (true) { if (stepCounter < exaggerationLength) { if (ExaggerationSmoothen) { int len = (int)(0.9 * MaxEpochs); if (stepCounter < len) { double t = (double)stepCounter / len; t = Math.Sqrt(Math.Sqrt(t)); cc.c.PFactor = (float)((1 - t) * ExaggerationFactor + t); } else { cc.c.PFactor = 1.0f; } } else { cc.c.PFactor = (float)ExaggerationFactor; } } else { cc.c.PFactor = 1.0f; } gpu.SetShader(csOneStep); if (cachingMode == CachingMode.OnGpu) { cc.c.groupNumber = MaxGroupNumber; // Notice: cc.c.groupNumber*GroupSize must fit into groupMax[]. for (int bIdx = 0; bIdx < N; bIdx += cc.c.groupNumber * GroupSize) { cc.c.blockIdx = bIdx; cc.Upload(); gpu.Run(cc.c.groupNumber); } cc.c.groupNumber = MaxGroupNumber * GroupSize; } else if (cachingMode == CachingMode.OnCpu) { int bSize = MaxGroupNumberHyp * GroupSizeHyp; cc.c.groupNumber = MaxGroupNumberHyp; for (int bIdx = 0; bIdx < N; bIdx += bSize) { gpu.WriteArray(cpuP, bIdx, Math.Min(N, bIdx + bSize), P2Buf); cc.c.blockIdx = bIdx; cc.Upload(); gpu.Run(cc.c.groupNumber); } cc.c.groupNumber = Math.Min(N, bSize); } else if ((cachingMode == CachingMode.OnFlySm) || (cachingMode == CachingMode.OnFlySmS)) { const int GrSize = 64; // This value must match that of GR_SIZE in TsneMap.hlsl. cc.c.groupNumber = MaxGroupNumber; for (int bIdx = 0; bIdx < N; bIdx += cc.c.groupNumber * GrSize) { cc.c.blockIdx = bIdx; cc.Upload(); gpu.Run(cc.c.groupNumber); } cc.c.groupNumber = cc.c.groupNumber * GrSize; } else // cachingMode==CachingMode.OnFly { cc.c.groupNumber = 128; for (int bIdx = 0; bIdx < N; bIdx += cc.c.groupNumber) { cc.c.blockIdx = bIdx; cc.Upload(); gpu.Run(cc.c.groupNumber); } } //Notice: cc.c.groupNumber must be number of partial sumQ_next, which add up to sumQ for the next step. gpu.SetShader(csSumUp); cc.Upload(); gpu.Run(); currentVariation = gpu.ReadRange <float>(resultStaging, resultBuf, 3)[2] / N; cc.c.mom = (float)((stepCounter < (MaxEpochs * momentumSwitch)) ? momentum : finalMomentum); stepCounter++; if (stepCounter % 10 == 0) { Console.Write('.'); } if (stepCounter % 500 == 0) { Console.WriteLine(); } if ((stepCounter >= MaxEpochs) || ((stepCounter >= (2 + exaggerationLength)) && (currentVariation < stopVariation))) { break; } } Console.WriteLine(); float[][] Y = new float[N][]; using (var rs = gpu.NewReadStream((cc.c.outDim == 3) ? Y3StagingBuf : Y2StagingBuf, (cc.c.outDim == 3) ? Y3Buf : Y2Buf)) { int outVDim = (cc.c.outDim == 3) ? 3 : 2; for (int row = 0; row < N; row++) { Y[row] = rs.ReadRange <float>(outVDim); } } if (cc.c.outDim == 1) { for (int i = 0; i < N; i++) { Y[i] = new float[] { Y[i][0] } } } ; TsneDx.SafeDispose(csSumUp, csOneStep, PBuf, P2Buf, distanceBuf, tableBuf, resultBuf, resultStaging, groupMaxBuf, Y3Buf, Y3StagingBuf, v3Buf, Y2Buf, Y2StagingBuf, v2Buf, cc, gpu); return(AutoNormalize ? PcaNormalize.DoNormalize(Y) : Y); }
void CalculateP() { int N = cc.c.N; distanceBuf = gpu.CreateBufferRW((N * N - N) / 2, 4, 0); using (var shader = gpu.LoadShader("TsneDx.CreateDistanceCache.cso")) { gpu.SetShader(shader); int groupNr = 256; for (int i = 0; i < N; i += groupNr) { cc.c.blockIdx = i; cc.Upload(); gpu.Run(groupNr); } } PBuf = gpu.CreateBufferRW(N * N, 4, 1); cc.c.chacedP = true; using (var sd = gpu.LoadShader("TsneDx.CalculateP.cso")) { // Calculate the squared distance matrix in to P using (var sd2 = gpu.LoadShader("TsneDx.CalculatePFromCache.cso")) { gpu.SetShader(sd2); gpu.Run(64); } gpu.SetShader(sd); // Normalize and symmetrizing the distance matrix cc.c.cmd = 4; cc.Upload(); gpu.Run(); // Convert the matrix to affinities. cc.c.cmd = 2; cc.c.groupNumber = 4; for (int bIdx = 0; bIdx < N; bIdx += cc.c.groupNumber * GpuGroupSize) { cc.c.blockIdx = bIdx; cc.Upload(); gpu.Run(cc.c.groupNumber); } // Normalize and symmetrizing the affinity matrix gpu.SetShader(sd); cc.c.cmd = 3; cc.Upload(); gpu.Run(); } }