Exemplo n.º 1
0
        public float[][] Fit(float[][] X)
        {
            int exaggerationLength = (int)(MaxEpochs * ExaggerationRatio);

            gpu = new GpuDevice();
            cc  = gpu.CreateConstantBuffer <TsneMapConstants>(0);

            int N = X.Length;

            cc.c.columns    = X[0].Length;
            cc.c.N          = N;
            cc.c.outDim     = OutDim;
            cc.c.metricType = MetricType;

            #region Initialize Y
            Buffer Y2Buf        = null;
            Buffer Y3Buf        = null;
            Buffer Y3StagingBuf = null;
            Buffer Y2StagingBuf = null;
            Buffer v2Buf        = null;
            Buffer v3Buf        = null;

            if (cc.c.outDim <= 2)
            {
                Y2Buf        = gpu.CreateBufferRW(N, 8, 3);
                Y2StagingBuf = gpu.CreateStagingBuffer(Y2Buf);
                v2Buf        = gpu.CreateBufferRW(N, 2 * 8, 5);
            }
            else
            {
                Y3Buf        = gpu.CreateBufferRW(N, 12, 4);
                Y3StagingBuf = gpu.CreateStagingBuffer(Y3Buf);
                v3Buf        = gpu.CreateBufferRW(N, 2 * 12, 6);
            }

            float  rang       = 0.05f;
            Random rGenerator = new Random(435243);

            if (cc.c.outDim <= 2)
            {
                using (var ws = gpu.NewWriteStream(v2Buf)) {
                    for (int row = 0; row < N; row++)
                    {
                        ws.Write <float>(0, 1, 0, 1);
                    }
                }

                using (var ws = gpu.NewWriteStream(Y2Buf)) {
                    for (int row = 0; row < N; row++)
                    {
                        for (int col = 0; col < cc.c.outDim; col++)
                        {
                            ws.Write((float)(rang * rGenerator.NextDouble() - rang / 2));
                        }
                        if (cc.c.outDim == 1)
                        {
                            ws.Write(0.0f);
                        }
                    }
                }
            }
            else
            {
                using (var ws = gpu.NewWriteStream(v3Buf)) {
                    for (int row = 0; row < N; row++)
                    {
                        ws.Write <float>(0, 1, 0, 1, 0, 1);
                    }
                }
                using (var ws = gpu.NewWriteStream(Y3Buf)) {
                    for (int row = 0; row < N; row++)
                    {
                        for (int col = 0; col < cc.c.outDim; col++)
                        {
                            ws.Write((float)(rang * rGenerator.NextDouble() - rang / 2));
                        }
                    }
                }
            }
            #endregion

            #region Upload data table and initialize the distance matrix

            // Used to aggregate values created by parallel threads.
            // the size of of groupMaxBuf must be large enoght to hold a float value for each thread started in parallel.
            // Notice: gpu.Run(k) will start k*GROUP_SIZE threads.
            int gpSize = Math.Max(GpuGroupSize, MaxGroupNumber * GroupSize);
            gpSize      = Math.Max(gpSize, MaxGroupNumberHyp * GroupSizeHyp);
            groupMaxBuf = gpu.CreateBufferRW(gpSize, 4, 7);

            resultBuf     = gpu.CreateBufferRW(3, 4, 2); // to receive the total changes.
            resultStaging = gpu.CreateStagingBuffer(resultBuf);

            tableBuf = gpu.CreateBufferRO(N * cc.c.columns, 4, 0);
            if (MetricType == 1)
            {
                NormalizeTable(X);
            }
            gpu.WriteMarix(tableBuf, X, true);

            const int MinCpuDimension = 100; // minimal dimension to trigger CPU caching.
            const int MaxDimension    = 64;  // maximal dimension (table columns) for fast EuclideanNoCache shader. Must be the same as MAX_DIMENSION.
            const int MaxDimensionS   = 32;  // maximal dimension (table columns) for fast EuclideanNoCache shader. Must be the same as MAX_DIMENSIONs.
            if (N <= CacheLimit)
            {
                cachingMode = CachingMode.OnGpu;
            }
            else
            {
                if ((cc.c.columns > MinCpuDimension) && ((double)N * N * 4) < ((double)MaxCpuCacheSize * 1024.0 * 1024.0))
                {
                    cachingMode = CachingMode.OnCpu;
                }
                else
                {
                    if (cc.c.columns < MaxDimensionS)
                    {
                        cachingMode = CachingMode.OnFlySmS;
                    }
                    else if (cc.c.columns < MaxDimension)
                    {
                        cachingMode = CachingMode.OnFlySm;
                    }
                    else
                    {
                        cachingMode = CachingMode.OnFly;
                    }
                }
            }
            #endregion

            cc.c.targetH = (float)Math.Log(PerplexityRatio * N);
            if (cachingMode == CachingMode.OnGpu)
            {
                CalculateP();
            }
            else if (cachingMode == CachingMode.OnCpu)
            {
                InitializePCpu();
            }
            else     // (cachingMode == CachingMode.OnFly[Sm,SmS])
            {
                InitializeP();
            }

            using (var sd = gpu.LoadShader("TsneDx.CalculateSumQ.cso")) {
                gpu.SetShader(sd);
                cc.c.groupNumber = 256;
                for (int i = 0; i < N; i += cc.c.groupNumber)
                {
                    cc.c.blockIdx = i;
                    cc.Upload();
                    gpu.Run(cc.c.groupNumber);
                }
                cc.c.blockIdx = -1;
                cc.Upload();
                gpu.Run();
            }

            var sdNames = new Dictionary <CachingMode, string>()
            {
                { CachingMode.OnGpu, "TsneDx.OneStep.cso" },
                { CachingMode.OnCpu, "TsneDx.OneStepCpuCache.cso" },
                { CachingMode.OnFly, "TsneDx.OneStepNoCache.cso" },
                { CachingMode.OnFlySm, "TsneDx.FastStep.cso" },
                { CachingMode.OnFlySmS, "TsneDx.FastStepS.cso" },
            };

            ComputeShader csOneStep   = gpu.LoadShader(sdNames[cachingMode]);
            ComputeShader csSumUp     = gpu.LoadShader("TsneDx.OneStepSumUp.cso");
            int           stepCounter = 0;

            while (true)
            {
                if (stepCounter < exaggerationLength)
                {
                    if (ExaggerationSmoothen)
                    {
                        int len = (int)(0.9 * MaxEpochs);
                        if (stepCounter < len)
                        {
                            double t = (double)stepCounter / len;
                            t            = Math.Sqrt(Math.Sqrt(t));
                            cc.c.PFactor = (float)((1 - t) * ExaggerationFactor + t);
                        }
                        else
                        {
                            cc.c.PFactor = 1.0f;
                        }
                    }
                    else
                    {
                        cc.c.PFactor = (float)ExaggerationFactor;
                    }
                }
                else
                {
                    cc.c.PFactor = 1.0f;
                }

                gpu.SetShader(csOneStep);

                if (cachingMode == CachingMode.OnGpu)
                {
                    cc.c.groupNumber = MaxGroupNumber;
                    // Notice: cc.c.groupNumber*GroupSize must fit into groupMax[].
                    for (int bIdx = 0; bIdx < N; bIdx += cc.c.groupNumber * GroupSize)
                    {
                        cc.c.blockIdx = bIdx;
                        cc.Upload();
                        gpu.Run(cc.c.groupNumber);
                    }
                    cc.c.groupNumber = MaxGroupNumber * GroupSize;
                }
                else if (cachingMode == CachingMode.OnCpu)
                {
                    int bSize = MaxGroupNumberHyp * GroupSizeHyp;
                    cc.c.groupNumber = MaxGroupNumberHyp;
                    for (int bIdx = 0; bIdx < N; bIdx += bSize)
                    {
                        gpu.WriteArray(cpuP, bIdx, Math.Min(N, bIdx + bSize), P2Buf);
                        cc.c.blockIdx = bIdx;
                        cc.Upload();
                        gpu.Run(cc.c.groupNumber);
                    }
                    cc.c.groupNumber = Math.Min(N, bSize);
                }
                else if ((cachingMode == CachingMode.OnFlySm) || (cachingMode == CachingMode.OnFlySmS))
                {
                    const int GrSize = 64;  // This value must match that of GR_SIZE in TsneMap.hlsl.
                    cc.c.groupNumber = MaxGroupNumber;
                    for (int bIdx = 0; bIdx < N; bIdx += cc.c.groupNumber * GrSize)
                    {
                        cc.c.blockIdx = bIdx;
                        cc.Upload();
                        gpu.Run(cc.c.groupNumber);
                    }
                    cc.c.groupNumber = cc.c.groupNumber * GrSize;
                }
                else     // cachingMode==CachingMode.OnFly
                {
                    cc.c.groupNumber = 128;
                    for (int bIdx = 0; bIdx < N; bIdx += cc.c.groupNumber)
                    {
                        cc.c.blockIdx = bIdx;
                        cc.Upload();
                        gpu.Run(cc.c.groupNumber);
                    }
                }

                //Notice: cc.c.groupNumber must be number of partial sumQ_next, which add up to sumQ for the next step.
                gpu.SetShader(csSumUp);
                cc.Upload();
                gpu.Run();

                currentVariation = gpu.ReadRange <float>(resultStaging, resultBuf, 3)[2] / N;

                cc.c.mom = (float)((stepCounter < (MaxEpochs * momentumSwitch)) ? momentum : finalMomentum);
                stepCounter++;
                if (stepCounter % 10 == 0)
                {
                    Console.Write('.');
                }
                if (stepCounter % 500 == 0)
                {
                    Console.WriteLine();
                }
                if ((stepCounter >= MaxEpochs) || ((stepCounter >= (2 + exaggerationLength)) && (currentVariation < stopVariation)))
                {
                    break;
                }
            }
            Console.WriteLine();

            float[][] Y = new float[N][];
            using (var rs = gpu.NewReadStream((cc.c.outDim == 3) ? Y3StagingBuf : Y2StagingBuf, (cc.c.outDim == 3) ? Y3Buf : Y2Buf)) {
                int outVDim = (cc.c.outDim == 3) ? 3 : 2;
                for (int row = 0; row < N; row++)
                {
                    Y[row] = rs.ReadRange <float>(outVDim);
                }
            }

            if (cc.c.outDim == 1)
            {
                for (int i = 0; i < N; i++)
                {
                    Y[i] = new float[] { Y[i][0] }
                }
            }
            ;

            TsneDx.SafeDispose(csSumUp, csOneStep, PBuf, P2Buf, distanceBuf, tableBuf, resultBuf,
                               resultStaging, groupMaxBuf, Y3Buf, Y3StagingBuf, v3Buf, Y2Buf, Y2StagingBuf, v2Buf, cc, gpu);

            return(AutoNormalize ? PcaNormalize.DoNormalize(Y) : Y);
        }
Exemplo n.º 2
0
        void CalculateP()
        {
            int N = cc.c.N;

            distanceBuf = gpu.CreateBufferRW((N * N - N) / 2, 4, 0);
            using (var shader = gpu.LoadShader("TsneDx.CreateDistanceCache.cso")) {
                gpu.SetShader(shader);
                int groupNr = 256;
                for (int i = 0; i < N; i += groupNr)
                {
                    cc.c.blockIdx = i;
                    cc.Upload();
                    gpu.Run(groupNr);
                }
            }

            PBuf         = gpu.CreateBufferRW(N * N, 4, 1);
            cc.c.chacedP = true;
            using (var sd = gpu.LoadShader("TsneDx.CalculateP.cso")) {
                // Calculate the squared distance matrix in to P
                using (var sd2 = gpu.LoadShader("TsneDx.CalculatePFromCache.cso")) {
                    gpu.SetShader(sd2);
                    gpu.Run(64);
                }
                gpu.SetShader(sd);

                // Normalize and symmetrizing the distance matrix
                cc.c.cmd = 4;
                cc.Upload();
                gpu.Run();

                // Convert the matrix to affinities.
                cc.c.cmd         = 2;
                cc.c.groupNumber = 4;
                for (int bIdx = 0; bIdx < N; bIdx += cc.c.groupNumber * GpuGroupSize)
                {
                    cc.c.blockIdx = bIdx;
                    cc.Upload();
                    gpu.Run(cc.c.groupNumber);
                }

                // Normalize and symmetrizing the affinity matrix
                gpu.SetShader(sd);
                cc.c.cmd = 3;
                cc.Upload();
                gpu.Run();
            }
        }