Exemplo n.º 1
0
 public ConstBuffer(GpuDevice gpu, int slot)
 {
     buffer = new Buffer(gpu.device, (Marshal.SizeOf(typeof(T)) / 16 + 1) * 16,
                         ResourceUsage.Default, BindFlags.ConstantBuffer, CpuAccessFlags.None, ResourceOptionFlags.None, 0);
     gpu.Context.ComputeShader.SetConstantBuffer(slot, buffer);
     this.gpu = gpu;
 }
Exemplo n.º 2
0
 public ReadDataStream(GpuDevice device, Buffer dataBuffer, Buffer stagingBuffer)
 {
     this.device        = device;
     this.stagingBuffer = stagingBuffer;
     device.Context.CopyResource(dataBuffer, stagingBuffer);
     device.Context.MapSubresource(stagingBuffer, 0, SharpDX.Direct3D11.MapMode.Read, MapFlags.None, out ds);
 }
Exemplo n.º 3
0
        // Reduce give matrix to top PC components.
        public float[][] DoPca(float[][] A, int eigenCount)
        {
            GpuDevice gpu = new GpuDevice();
            var       cc  = gpu.CreateConstBuffer <PcaConstants>(0);

            bool transposing = (A.Length > A[0].Length);

            cc.c.eigenCount = eigenCount;
            cc.c.rows       = transposing ? A[0].Length : A.Length;
            cc.c.columns    = (!transposing) ? A[0].Length : A.Length;

            var resultBuf     = gpu.CreateBufferRW(3, 4, 1); // to receive the total changes.
            var resultStaging = gpu.CreateStagingBuffer(resultBuf);

            Buffer tableBuf = gpu.CreateBufferRO(cc.c.rows * cc.c.columns, 4, 0);

            double[] colMean = new double[A[0].Length];
            Parallel.For(0, A[0].Length, col => {
                colMean[col] = 0.0;
                for (int row = 0; row < A.Length; row++)
                {
                    colMean[col] += A[row][col];
                }
                colMean[col] /= A.Length;
            });

            using (var ds = gpu.NewWriteStream(tableBuf)) {
                float[] buf = new float[cc.c.rows * cc.c.columns];
                if (transposing)
                {
                    Parallel.For(0, cc.c.columns, col => {
                        int offset = col * cc.c.rows;
                        for (int row = 0; row < cc.c.rows; row++)
                        {
                            buf[offset + row] = (float)(A[col][row] - colMean[row]);
                        }
                    });
                }
                else
                {
                    Parallel.For(0, cc.c.columns, col => {
                        int offset = col * cc.c.rows;
                        for (int row = 0; row < cc.c.rows; row++)
                        {
                            buf[offset + row] = (float)(A[row][col] - colMean[col]);
                        }
                    });
                }
                ds.WriteRange(buf);
            }

            cc.c.covFactor = transposing ? 1.0f / (cc.c.columns - 1) : 1.0f / (cc.c.rows - 1);
            Buffer covBuf = gpu.CreateBufferRW(cc.c.rows * cc.c.rows, 4, 0);

            using (var shader = gpu.LoadShader("TsneDx.PcaCreateCovMatrix.cso")) {
                gpu.SetShader(shader);
                cc.c.groupNumber = 256;
                for (int iBlock = 0; iBlock < cc.c.rows; iBlock += cc.c.groupNumber)
                {
                    cc.c.iBlock = iBlock;
                    cc.Upload();
                    gpu.Run(cc.c.groupNumber);
                }
            }

            var eVectorBuf     = gpu.CreateBufferRW(cc.c.rows, 4, 2);
            var eVectorStaging = gpu.CreateStagingBuffer(eVectorBuf);
            var eVector2Buf    = gpu.CreateBufferRW(cc.c.rows, 4, 3);

            var sdInit   = gpu.LoadShader("TsneDx.PcaInitIteration.cso");
            var sdStep   = gpu.LoadShader("TsneDx.PcaIterateOneStep.cso");
            var sdNorm   = gpu.LoadShader("TsneDx.PcaCalculateNormal.cso");
            var sdAdjCov = gpu.LoadShader("TsneDx.PcaAdjustCovMatrix.cso");

            gpu.SetShader(sdInit);
            cc.c.eigenIdx = 0;
            cc.Upload();
            gpu.Run();

            float preEigen = 1e30f;
            float newEigen = 0;

            float[][] eVectors = new float[eigenCount][];
            double[]  eValues  = new double[eigenCount];

            for (int eigenIdx = 0; eigenIdx < eigenCount; eigenIdx++)
            {
                cc.c.groupNumber = 256;
                cc.Upload();

                for (int repeat = 0; repeat < MAX_ITERATION; repeat++)
                {
                    gpu.SetShader(sdStep);
                    gpu.Run(cc.c.groupNumber);
                    gpu.SetShader(sdNorm);
                    gpu.Run(1);
                    newEigen = gpu.ReadFloat(resultStaging, resultBuf);
                    double delta = Math.Abs((newEigen - preEigen) / preEigen);
                    if (delta < epsilon)
                    {
                        break;
                    }
                    preEigen = newEigen;
                }

                eValues[eigenIdx] = (double)newEigen;

                // Eigenvector with extrem small eigenvalue (i.e. 0.0) will be ignored and stop the calculation.
                if (Math.Abs(eValues[eigenIdx] / eValues[0]) < stopEpsilon)
                {
                    Array.Resize(ref eValues, eigenIdx);
                    Array.Resize(ref eVectors, eigenIdx);
                    break;
                }

                eVectors[eigenIdx] = new float[cc.c.rows];
                Array.Copy(gpu.ReadRange <float>(eVectorStaging, eVectorBuf, cc.c.rows), eVectors[eigenIdx], cc.c.rows);

                if (eigenIdx == (eigenCount - 1))
                {
                    break;
                }

                // Adjust the covariance matrix.
                gpu.SetShader(sdAdjCov);
                cc.c.groupNumber = 128;
                cc.Upload();
                gpu.Run(cc.c.groupNumber);

                // Initialize the iteration loop for the next eigen-vector.
                gpu.SetShader(sdInit);
                cc.c.eigenIdx = eigenIdx + 1;
                cc.Upload();
                gpu.Run();
                //CmdSynchronize();
            }

            if (!transposing)
            {
                using (var shader = gpu.LoadShader("TsneDx.PcaTransposeEigenvectors.cso")) {
                    int      eRows      = eVectors.Length;
                    int      eColumns   = eVectors[0].Length;
                    Buffer   eigenList1 = gpu.CreateBufferRO(eRows * eColumns, 4, 1);
                    double[] S          = eValues.Select(x => 1.0 / Math.Sqrt(Math.Abs(x * (eVectors[0].Length - 1)))).ToArray();
                    float[]  eVector1   = new float[eRows * eColumns];
                    for (int row = 0; row < eRows; row++)
                    {
                        for (int col = 0; col < eColumns; col++)
                        {
                            eVector1[row * eColumns + col] = (float)(S[row] * eVectors[row][col]);
                        }
                    }

                    using (var ds = gpu.NewWriteStream(eigenList1))
                        ds.WriteRange(eVector1);

                    Buffer eigenList2 = gpu.CreateBufferRW(eVectors.Length * cc.c.columns, 4, 4);
                    gpu.SetShader(shader);
                    cc.c.groupNumber = 128;
                    cc.c.eigenCount  = eVectors.Length;
                    cc.Upload();
                    gpu.Run(cc.c.groupNumber);

                    float[] eVectors2 = gpu.ReadRange <float>(eigenList2, eVectors.Length * cc.c.columns);
                    eVectors = new float[eVectors.Length][];
                    for (int row = 0; row < eVectors.Length; row++)
                    {
                        eVectors[row] = new float[cc.c.columns];
                    }
                    Parallel.For(0, eVectors.Length, row => {
                        Array.Copy(eVectors2, row * cc.c.columns, eVectors[row], 0, cc.c.columns);
                    });

                    TsneDx.SafeDispose(eigenList1, eigenList2);
                }
            }

            float[][] B = null;
            cc.c.rows       = A.Length;
            cc.c.columns    = A[0].Length;
            cc.c.eigenCount = eVectors.Length;
            cc.Upload();

            if (transposing)
            {
                // The tableBuf on GPU is in wrong matrix order. We need to upload the tableBuf in needed order here.
                TsneDx.SafeDispose(tableBuf);
                tableBuf = gpu.CreateBufferRO(cc.c.rows * cc.c.columns, 4, 0);
                Parallel.For(0, cc.c.rows, row => {
                    for (int col = 0; col < cc.c.columns; col++)
                    {
                        A[row][col] -= (float)colMean[col];
                    }
                });
                gpu.WriteMarix(tableBuf, A);
            }

            Buffer eigenTable = gpu.CreateBufferRO(cc.c.eigenCount * cc.c.columns, 4, 1);

            gpu.WriteMarix(eigenTable, eVectors);

            TsneDx.SafeDispose(resultBuf);
            resultBuf = gpu.CreateBufferRW(cc.c.rows * cc.c.eigenCount, 4, 1);

            using (var shader = gpu.LoadShader("TsneDx.PcaReduceMatrix.cso")) {
                try {
                    gpu.SetShader(shader);
                    const int GROUP_NR = 256;
                    gpu.Run(GROUP_NR);

                    float[] buf = gpu.ReadRange <float>(resultBuf, cc.c.rows * cc.c.eigenCount);
                    B = new float[cc.c.rows][];
                    for (int row = 0; row < cc.c.rows; row++)
                    {
                        B[row] = new float[cc.c.eigenCount];
                    }
                    Parallel.For(0, cc.c.rows, row => {
                        Array.Copy(buf, row * cc.c.eigenCount, B[row], 0, cc.c.eigenCount);
                    });
                } catch (SharpDX.SharpDXException ex) {
                    string msg = ex.Message;
                    Console.WriteLine("GPU operation timeouted: Please try to enlarge the TDR value");
                }
            }

            TsneDx.SafeDispose(eigenTable, sdInit, sdStep, sdNorm, sdAdjCov, eVectorBuf,
                               eVectorStaging, eVector2Buf, resultBuf, resultStaging, covBuf, tableBuf, cc, gpu);
            return(B);
        }
Exemplo n.º 4
0
        public float[][] Fit(float[][] X)
        {
            int exaggerationLength = (int)(MaxEpochs * ExaggerationRatio);

            gpu = new GpuDevice();
            cc  = gpu.CreateConstantBuffer <TsneMapConstants>(0);

            int N = X.Length;

            cc.c.columns    = X[0].Length;
            cc.c.N          = N;
            cc.c.outDim     = OutDim;
            cc.c.metricType = MetricType;

            #region Initialize Y
            Buffer Y2Buf        = null;
            Buffer Y3Buf        = null;
            Buffer Y3StagingBuf = null;
            Buffer Y2StagingBuf = null;
            Buffer v2Buf        = null;
            Buffer v3Buf        = null;

            if (cc.c.outDim <= 2)
            {
                Y2Buf        = gpu.CreateBufferRW(N, 8, 3);
                Y2StagingBuf = gpu.CreateStagingBuffer(Y2Buf);
                v2Buf        = gpu.CreateBufferRW(N, 2 * 8, 5);
            }
            else
            {
                Y3Buf        = gpu.CreateBufferRW(N, 12, 4);
                Y3StagingBuf = gpu.CreateStagingBuffer(Y3Buf);
                v3Buf        = gpu.CreateBufferRW(N, 2 * 12, 6);
            }

            float  rang       = 0.05f;
            Random rGenerator = new Random(435243);

            if (cc.c.outDim <= 2)
            {
                using (var ws = gpu.NewWriteStream(v2Buf)) {
                    for (int row = 0; row < N; row++)
                    {
                        ws.Write <float>(0, 1, 0, 1);
                    }
                }

                using (var ws = gpu.NewWriteStream(Y2Buf)) {
                    for (int row = 0; row < N; row++)
                    {
                        for (int col = 0; col < cc.c.outDim; col++)
                        {
                            ws.Write((float)(rang * rGenerator.NextDouble() - rang / 2));
                        }
                        if (cc.c.outDim == 1)
                        {
                            ws.Write(0.0f);
                        }
                    }
                }
            }
            else
            {
                using (var ws = gpu.NewWriteStream(v3Buf)) {
                    for (int row = 0; row < N; row++)
                    {
                        ws.Write <float>(0, 1, 0, 1, 0, 1);
                    }
                }
                using (var ws = gpu.NewWriteStream(Y3Buf)) {
                    for (int row = 0; row < N; row++)
                    {
                        for (int col = 0; col < cc.c.outDim; col++)
                        {
                            ws.Write((float)(rang * rGenerator.NextDouble() - rang / 2));
                        }
                    }
                }
            }
            #endregion

            #region Upload data table and initialize the distance matrix

            // Used to aggregate values created by parallel threads.
            // the size of of groupMaxBuf must be large enoght to hold a float value for each thread started in parallel.
            // Notice: gpu.Run(k) will start k*GROUP_SIZE threads.
            int gpSize = Math.Max(GpuGroupSize, MaxGroupNumber * GroupSize);
            gpSize      = Math.Max(gpSize, MaxGroupNumberHyp * GroupSizeHyp);
            groupMaxBuf = gpu.CreateBufferRW(gpSize, 4, 7);

            resultBuf     = gpu.CreateBufferRW(3, 4, 2); // to receive the total changes.
            resultStaging = gpu.CreateStagingBuffer(resultBuf);

            tableBuf = gpu.CreateBufferRO(N * cc.c.columns, 4, 0);
            if (MetricType == 1)
            {
                NormalizeTable(X);
            }
            gpu.WriteMarix(tableBuf, X, true);

            const int MinCpuDimension = 100; // minimal dimension to trigger CPU caching.
            const int MaxDimension    = 64;  // maximal dimension (table columns) for fast EuclideanNoCache shader. Must be the same as MAX_DIMENSION.
            const int MaxDimensionS   = 32;  // maximal dimension (table columns) for fast EuclideanNoCache shader. Must be the same as MAX_DIMENSIONs.
            if (N <= CacheLimit)
            {
                cachingMode = CachingMode.OnGpu;
            }
            else
            {
                if ((cc.c.columns > MinCpuDimension) && ((double)N * N * 4) < ((double)MaxCpuCacheSize * 1024.0 * 1024.0))
                {
                    cachingMode = CachingMode.OnCpu;
                }
                else
                {
                    if (cc.c.columns < MaxDimensionS)
                    {
                        cachingMode = CachingMode.OnFlySmS;
                    }
                    else if (cc.c.columns < MaxDimension)
                    {
                        cachingMode = CachingMode.OnFlySm;
                    }
                    else
                    {
                        cachingMode = CachingMode.OnFly;
                    }
                }
            }
            #endregion

            cc.c.targetH = (float)Math.Log(PerplexityRatio * N);
            if (cachingMode == CachingMode.OnGpu)
            {
                CalculateP();
            }
            else if (cachingMode == CachingMode.OnCpu)
            {
                InitializePCpu();
            }
            else     // (cachingMode == CachingMode.OnFly[Sm,SmS])
            {
                InitializeP();
            }

            using (var sd = gpu.LoadShader("TsneDx.CalculateSumQ.cso")) {
                gpu.SetShader(sd);
                cc.c.groupNumber = 256;
                for (int i = 0; i < N; i += cc.c.groupNumber)
                {
                    cc.c.blockIdx = i;
                    cc.Upload();
                    gpu.Run(cc.c.groupNumber);
                }
                cc.c.blockIdx = -1;
                cc.Upload();
                gpu.Run();
            }

            var sdNames = new Dictionary <CachingMode, string>()
            {
                { CachingMode.OnGpu, "TsneDx.OneStep.cso" },
                { CachingMode.OnCpu, "TsneDx.OneStepCpuCache.cso" },
                { CachingMode.OnFly, "TsneDx.OneStepNoCache.cso" },
                { CachingMode.OnFlySm, "TsneDx.FastStep.cso" },
                { CachingMode.OnFlySmS, "TsneDx.FastStepS.cso" },
            };

            ComputeShader csOneStep   = gpu.LoadShader(sdNames[cachingMode]);
            ComputeShader csSumUp     = gpu.LoadShader("TsneDx.OneStepSumUp.cso");
            int           stepCounter = 0;

            while (true)
            {
                if (stepCounter < exaggerationLength)
                {
                    if (ExaggerationSmoothen)
                    {
                        int len = (int)(0.9 * MaxEpochs);
                        if (stepCounter < len)
                        {
                            double t = (double)stepCounter / len;
                            t            = Math.Sqrt(Math.Sqrt(t));
                            cc.c.PFactor = (float)((1 - t) * ExaggerationFactor + t);
                        }
                        else
                        {
                            cc.c.PFactor = 1.0f;
                        }
                    }
                    else
                    {
                        cc.c.PFactor = (float)ExaggerationFactor;
                    }
                }
                else
                {
                    cc.c.PFactor = 1.0f;
                }

                gpu.SetShader(csOneStep);

                if (cachingMode == CachingMode.OnGpu)
                {
                    cc.c.groupNumber = MaxGroupNumber;
                    // Notice: cc.c.groupNumber*GroupSize must fit into groupMax[].
                    for (int bIdx = 0; bIdx < N; bIdx += cc.c.groupNumber * GroupSize)
                    {
                        cc.c.blockIdx = bIdx;
                        cc.Upload();
                        gpu.Run(cc.c.groupNumber);
                    }
                    cc.c.groupNumber = MaxGroupNumber * GroupSize;
                }
                else if (cachingMode == CachingMode.OnCpu)
                {
                    int bSize = MaxGroupNumberHyp * GroupSizeHyp;
                    cc.c.groupNumber = MaxGroupNumberHyp;
                    for (int bIdx = 0; bIdx < N; bIdx += bSize)
                    {
                        gpu.WriteArray(cpuP, bIdx, Math.Min(N, bIdx + bSize), P2Buf);
                        cc.c.blockIdx = bIdx;
                        cc.Upload();
                        gpu.Run(cc.c.groupNumber);
                    }
                    cc.c.groupNumber = Math.Min(N, bSize);
                }
                else if ((cachingMode == CachingMode.OnFlySm) || (cachingMode == CachingMode.OnFlySmS))
                {
                    const int GrSize = 64;  // This value must match that of GR_SIZE in TsneMap.hlsl.
                    cc.c.groupNumber = MaxGroupNumber;
                    for (int bIdx = 0; bIdx < N; bIdx += cc.c.groupNumber * GrSize)
                    {
                        cc.c.blockIdx = bIdx;
                        cc.Upload();
                        gpu.Run(cc.c.groupNumber);
                    }
                    cc.c.groupNumber = cc.c.groupNumber * GrSize;
                }
                else     // cachingMode==CachingMode.OnFly
                {
                    cc.c.groupNumber = 128;
                    for (int bIdx = 0; bIdx < N; bIdx += cc.c.groupNumber)
                    {
                        cc.c.blockIdx = bIdx;
                        cc.Upload();
                        gpu.Run(cc.c.groupNumber);
                    }
                }

                //Notice: cc.c.groupNumber must be number of partial sumQ_next, which add up to sumQ for the next step.
                gpu.SetShader(csSumUp);
                cc.Upload();
                gpu.Run();

                currentVariation = gpu.ReadRange <float>(resultStaging, resultBuf, 3)[2] / N;

                cc.c.mom = (float)((stepCounter < (MaxEpochs * momentumSwitch)) ? momentum : finalMomentum);
                stepCounter++;
                if (stepCounter % 10 == 0)
                {
                    Console.Write('.');
                }
                if (stepCounter % 500 == 0)
                {
                    Console.WriteLine();
                }
                if ((stepCounter >= MaxEpochs) || ((stepCounter >= (2 + exaggerationLength)) && (currentVariation < stopVariation)))
                {
                    break;
                }
            }
            Console.WriteLine();

            float[][] Y = new float[N][];
            using (var rs = gpu.NewReadStream((cc.c.outDim == 3) ? Y3StagingBuf : Y2StagingBuf, (cc.c.outDim == 3) ? Y3Buf : Y2Buf)) {
                int outVDim = (cc.c.outDim == 3) ? 3 : 2;
                for (int row = 0; row < N; row++)
                {
                    Y[row] = rs.ReadRange <float>(outVDim);
                }
            }

            if (cc.c.outDim == 1)
            {
                for (int i = 0; i < N; i++)
                {
                    Y[i] = new float[] { Y[i][0] }
                }
            }
            ;

            TsneDx.SafeDispose(csSumUp, csOneStep, PBuf, P2Buf, distanceBuf, tableBuf, resultBuf,
                               resultStaging, groupMaxBuf, Y3Buf, Y3StagingBuf, v3Buf, Y2Buf, Y2StagingBuf, v2Buf, cc, gpu);

            return(AutoNormalize ? PcaNormalize.DoNormalize(Y) : Y);
        }