Beispiel #1
0
        // Reduce give matrix to top PC components.
        public float[][] DoPca(float[][] A, int eigenCount)
        {
            GpuDevice gpu = new GpuDevice();
            var       cc  = gpu.CreateConstBuffer <PcaConstants>(0);

            bool transposing = (A.Length > A[0].Length);

            cc.c.eigenCount = eigenCount;
            cc.c.rows       = transposing ? A[0].Length : A.Length;
            cc.c.columns    = (!transposing) ? A[0].Length : A.Length;

            var resultBuf     = gpu.CreateBufferRW(3, 4, 1); // to receive the total changes.
            var resultStaging = gpu.CreateStagingBuffer(resultBuf);

            Buffer tableBuf = gpu.CreateBufferRO(cc.c.rows * cc.c.columns, 4, 0);

            double[] colMean = new double[A[0].Length];
            Parallel.For(0, A[0].Length, col => {
                colMean[col] = 0.0;
                for (int row = 0; row < A.Length; row++)
                {
                    colMean[col] += A[row][col];
                }
                colMean[col] /= A.Length;
            });

            using (var ds = gpu.NewWriteStream(tableBuf)) {
                float[] buf = new float[cc.c.rows * cc.c.columns];
                if (transposing)
                {
                    Parallel.For(0, cc.c.columns, col => {
                        int offset = col * cc.c.rows;
                        for (int row = 0; row < cc.c.rows; row++)
                        {
                            buf[offset + row] = (float)(A[col][row] - colMean[row]);
                        }
                    });
                }
                else
                {
                    Parallel.For(0, cc.c.columns, col => {
                        int offset = col * cc.c.rows;
                        for (int row = 0; row < cc.c.rows; row++)
                        {
                            buf[offset + row] = (float)(A[row][col] - colMean[col]);
                        }
                    });
                }
                ds.WriteRange(buf);
            }

            cc.c.covFactor = transposing ? 1.0f / (cc.c.columns - 1) : 1.0f / (cc.c.rows - 1);
            Buffer covBuf = gpu.CreateBufferRW(cc.c.rows * cc.c.rows, 4, 0);

            using (var shader = gpu.LoadShader("TsneDx.PcaCreateCovMatrix.cso")) {
                gpu.SetShader(shader);
                cc.c.groupNumber = 256;
                for (int iBlock = 0; iBlock < cc.c.rows; iBlock += cc.c.groupNumber)
                {
                    cc.c.iBlock = iBlock;
                    cc.Upload();
                    gpu.Run(cc.c.groupNumber);
                }
            }

            var eVectorBuf     = gpu.CreateBufferRW(cc.c.rows, 4, 2);
            var eVectorStaging = gpu.CreateStagingBuffer(eVectorBuf);
            var eVector2Buf    = gpu.CreateBufferRW(cc.c.rows, 4, 3);

            var sdInit   = gpu.LoadShader("TsneDx.PcaInitIteration.cso");
            var sdStep   = gpu.LoadShader("TsneDx.PcaIterateOneStep.cso");
            var sdNorm   = gpu.LoadShader("TsneDx.PcaCalculateNormal.cso");
            var sdAdjCov = gpu.LoadShader("TsneDx.PcaAdjustCovMatrix.cso");

            gpu.SetShader(sdInit);
            cc.c.eigenIdx = 0;
            cc.Upload();
            gpu.Run();

            float preEigen = 1e30f;
            float newEigen = 0;

            float[][] eVectors = new float[eigenCount][];
            double[]  eValues  = new double[eigenCount];

            for (int eigenIdx = 0; eigenIdx < eigenCount; eigenIdx++)
            {
                cc.c.groupNumber = 256;
                cc.Upload();

                for (int repeat = 0; repeat < MAX_ITERATION; repeat++)
                {
                    gpu.SetShader(sdStep);
                    gpu.Run(cc.c.groupNumber);
                    gpu.SetShader(sdNorm);
                    gpu.Run(1);
                    newEigen = gpu.ReadFloat(resultStaging, resultBuf);
                    double delta = Math.Abs((newEigen - preEigen) / preEigen);
                    if (delta < epsilon)
                    {
                        break;
                    }
                    preEigen = newEigen;
                }

                eValues[eigenIdx] = (double)newEigen;

                // Eigenvector with extrem small eigenvalue (i.e. 0.0) will be ignored and stop the calculation.
                if (Math.Abs(eValues[eigenIdx] / eValues[0]) < stopEpsilon)
                {
                    Array.Resize(ref eValues, eigenIdx);
                    Array.Resize(ref eVectors, eigenIdx);
                    break;
                }

                eVectors[eigenIdx] = new float[cc.c.rows];
                Array.Copy(gpu.ReadRange <float>(eVectorStaging, eVectorBuf, cc.c.rows), eVectors[eigenIdx], cc.c.rows);

                if (eigenIdx == (eigenCount - 1))
                {
                    break;
                }

                // Adjust the covariance matrix.
                gpu.SetShader(sdAdjCov);
                cc.c.groupNumber = 128;
                cc.Upload();
                gpu.Run(cc.c.groupNumber);

                // Initialize the iteration loop for the next eigen-vector.
                gpu.SetShader(sdInit);
                cc.c.eigenIdx = eigenIdx + 1;
                cc.Upload();
                gpu.Run();
                //CmdSynchronize();
            }

            if (!transposing)
            {
                using (var shader = gpu.LoadShader("TsneDx.PcaTransposeEigenvectors.cso")) {
                    int      eRows      = eVectors.Length;
                    int      eColumns   = eVectors[0].Length;
                    Buffer   eigenList1 = gpu.CreateBufferRO(eRows * eColumns, 4, 1);
                    double[] S          = eValues.Select(x => 1.0 / Math.Sqrt(Math.Abs(x * (eVectors[0].Length - 1)))).ToArray();
                    float[]  eVector1   = new float[eRows * eColumns];
                    for (int row = 0; row < eRows; row++)
                    {
                        for (int col = 0; col < eColumns; col++)
                        {
                            eVector1[row * eColumns + col] = (float)(S[row] * eVectors[row][col]);
                        }
                    }

                    using (var ds = gpu.NewWriteStream(eigenList1))
                        ds.WriteRange(eVector1);

                    Buffer eigenList2 = gpu.CreateBufferRW(eVectors.Length * cc.c.columns, 4, 4);
                    gpu.SetShader(shader);
                    cc.c.groupNumber = 128;
                    cc.c.eigenCount  = eVectors.Length;
                    cc.Upload();
                    gpu.Run(cc.c.groupNumber);

                    float[] eVectors2 = gpu.ReadRange <float>(eigenList2, eVectors.Length * cc.c.columns);
                    eVectors = new float[eVectors.Length][];
                    for (int row = 0; row < eVectors.Length; row++)
                    {
                        eVectors[row] = new float[cc.c.columns];
                    }
                    Parallel.For(0, eVectors.Length, row => {
                        Array.Copy(eVectors2, row * cc.c.columns, eVectors[row], 0, cc.c.columns);
                    });

                    TsneDx.SafeDispose(eigenList1, eigenList2);
                }
            }

            float[][] B = null;
            cc.c.rows       = A.Length;
            cc.c.columns    = A[0].Length;
            cc.c.eigenCount = eVectors.Length;
            cc.Upload();

            if (transposing)
            {
                // The tableBuf on GPU is in wrong matrix order. We need to upload the tableBuf in needed order here.
                TsneDx.SafeDispose(tableBuf);
                tableBuf = gpu.CreateBufferRO(cc.c.rows * cc.c.columns, 4, 0);
                Parallel.For(0, cc.c.rows, row => {
                    for (int col = 0; col < cc.c.columns; col++)
                    {
                        A[row][col] -= (float)colMean[col];
                    }
                });
                gpu.WriteMarix(tableBuf, A);
            }

            Buffer eigenTable = gpu.CreateBufferRO(cc.c.eigenCount * cc.c.columns, 4, 1);

            gpu.WriteMarix(eigenTable, eVectors);

            TsneDx.SafeDispose(resultBuf);
            resultBuf = gpu.CreateBufferRW(cc.c.rows * cc.c.eigenCount, 4, 1);

            using (var shader = gpu.LoadShader("TsneDx.PcaReduceMatrix.cso")) {
                try {
                    gpu.SetShader(shader);
                    const int GROUP_NR = 256;
                    gpu.Run(GROUP_NR);

                    float[] buf = gpu.ReadRange <float>(resultBuf, cc.c.rows * cc.c.eigenCount);
                    B = new float[cc.c.rows][];
                    for (int row = 0; row < cc.c.rows; row++)
                    {
                        B[row] = new float[cc.c.eigenCount];
                    }
                    Parallel.For(0, cc.c.rows, row => {
                        Array.Copy(buf, row * cc.c.eigenCount, B[row], 0, cc.c.eigenCount);
                    });
                } catch (SharpDX.SharpDXException ex) {
                    string msg = ex.Message;
                    Console.WriteLine("GPU operation timeouted: Please try to enlarge the TDR value");
                }
            }

            TsneDx.SafeDispose(eigenTable, sdInit, sdStep, sdNorm, sdAdjCov, eVectorBuf,
                               eVectorStaging, eVector2Buf, resultBuf, resultStaging, covBuf, tableBuf, cc, gpu);
            return(B);
        }