private void CreateShaders(DevicePipelineStateCache pipelineStateCache)
        {
            if (effectBytecode == null)
            {
                return;
            }

            foreach (var shaderBytecode in effectBytecode.Stages)
            {
                var reflection = effectBytecode.Reflection;

                // TODO CACHE Shaders with a bytecode hash
                switch (shaderBytecode.Stage)
                {
                case ShaderStage.Vertex:
                    vertexShader = pipelineStateCache.VertexShaderCache.Instantiate(shaderBytecode);
                    // Note: input signature can be reused when reseting device since it only stores non-GPU data,
                    // so just keep it if it has already been created before.
                    if (inputSignature == null)
                    {
                        inputSignature = shaderBytecode;
                    }
                    break;

                case ShaderStage.Domain:
                    domainShader = pipelineStateCache.DomainShaderCache.Instantiate(shaderBytecode);
                    break;

                case ShaderStage.Hull:
                    hullShader = pipelineStateCache.HullShaderCache.Instantiate(shaderBytecode);
                    break;

                case ShaderStage.Geometry:
                    if (reflection.ShaderStreamOutputDeclarations != null && reflection.ShaderStreamOutputDeclarations.Count > 0)
                    {
                        // Calculate the strides
                        var soStrides = new List <int>();
                        foreach (var streamOutputElement in reflection.ShaderStreamOutputDeclarations)
                        {
                            for (int i = soStrides.Count; i < (streamOutputElement.Stream + 1); i++)
                            {
                                soStrides.Add(0);
                            }

                            soStrides[streamOutputElement.Stream] += streamOutputElement.ComponentCount * sizeof(float);
                        }
                        var soElements = new SharpDX.Direct3D11.StreamOutputElement[0];     // TODO CREATE StreamOutputElement from bytecode.Reflection.ShaderStreamOutputDeclarations
                        // TODO GRAPHICS REFACTOR better cache
                        geometryShader = new SharpDX.Direct3D11.GeometryShader(GraphicsDevice.NativeDevice, shaderBytecode, soElements, soStrides.ToArray(), reflection.StreamOutputRasterizedStream);
                    }
                    else
                    {
                        geometryShader = pipelineStateCache.GeometryShaderCache.Instantiate(shaderBytecode);
                    }
                    break;

                case ShaderStage.Pixel:
                    pixelShader = pipelineStateCache.PixelShaderCache.Instantiate(shaderBytecode);
                    break;

                case ShaderStage.Compute:
                    computeShader = pipelineStateCache.ComputeShaderCache.Instantiate(shaderBytecode);
                    break;
                }
            }
        }
Esempio n. 2
0
        public ComputeKernel(GraphicsDevice device, byte[] byteCodes)
        {
#if DIRECTX
            shader = new SharpDX.Direct3D11.ComputeShader(device.Handle as SharpDX.Direct3D11.Device, byteCodes);
#endif
        }
Esempio n. 3
0
        private void CreateShaders(DevicePipelineStateCache pipelineStateCache)
        {
            if (effectBytecode == null)
            {
                return;
            }

            foreach (var shaderBytecode in effectBytecode.Stages)
            {
                var reflection = effectBytecode.Reflection;

                // TODO CACHE Shaders with a bytecode hash
                switch (shaderBytecode.Stage)
                {
                case ShaderStage.Vertex:
                    vertexShader = pipelineStateCache.VertexShaderCache.Instantiate(shaderBytecode);
                    // Note: input signature can be reused when reseting device since it only stores non-GPU data,
                    // so just keep it if it has already been created before.
                    if (inputSignature == null)
                    {
                        inputSignature = shaderBytecode;
                    }
                    break;

                case ShaderStage.Domain:
                    domainShader = pipelineStateCache.DomainShaderCache.Instantiate(shaderBytecode);
                    break;

                case ShaderStage.Hull:
                    hullShader = pipelineStateCache.HullShaderCache.Instantiate(shaderBytecode);
                    break;

                case ShaderStage.Geometry:
                    if (reflection.ShaderStreamOutputDeclarations != null && reflection.ShaderStreamOutputDeclarations.Count > 0)
                    {
                        // stream out elements
                        var soElements = new List <SharpDX.Direct3D11.StreamOutputElement>();
                        foreach (var streamOutputElement in reflection.ShaderStreamOutputDeclarations)
                        {
                            var soElem = new SharpDX.Direct3D11.StreamOutputElement()
                            {
                                Stream         = streamOutputElement.Stream,
                                SemanticIndex  = streamOutputElement.SemanticIndex,
                                SemanticName   = streamOutputElement.SemanticName,
                                StartComponent = streamOutputElement.StartComponent,
                                ComponentCount = streamOutputElement.ComponentCount,
                                OutputSlot     = streamOutputElement.OutputSlot
                            };
                            soElements.Add(soElem);
                        }
                        // TODO GRAPHICS REFACTOR better cache
                        geometryShader = new SharpDX.Direct3D11.GeometryShader(GraphicsDevice.NativeDevice, shaderBytecode, soElements.ToArray(), reflection.StreamOutputStrides, reflection.StreamOutputRasterizedStream);
                    }
                    else
                    {
                        geometryShader = pipelineStateCache.GeometryShaderCache.Instantiate(shaderBytecode);
                    }
                    break;

                case ShaderStage.Pixel:
                    pixelShader = pipelineStateCache.PixelShaderCache.Instantiate(shaderBytecode);
                    break;

                case ShaderStage.Compute:
                    computeShader = pipelineStateCache.ComputeShaderCache.Instantiate(shaderBytecode);
                    break;
                }
            }
        }
Esempio n. 4
0
        public float[][] Fit(float[][] X)
        {
            int exaggerationLength = (int)(MaxEpochs * ExaggerationRatio);

            gpu = new GpuDevice();
            cc  = gpu.CreateConstantBuffer <TsneMapConstants>(0);

            int N = X.Length;

            cc.c.columns    = X[0].Length;
            cc.c.N          = N;
            cc.c.outDim     = OutDim;
            cc.c.metricType = MetricType;

            #region Initialize Y
            Buffer Y2Buf        = null;
            Buffer Y3Buf        = null;
            Buffer Y3StagingBuf = null;
            Buffer Y2StagingBuf = null;
            Buffer v2Buf        = null;
            Buffer v3Buf        = null;

            if (cc.c.outDim <= 2)
            {
                Y2Buf        = gpu.CreateBufferRW(N, 8, 3);
                Y2StagingBuf = gpu.CreateStagingBuffer(Y2Buf);
                v2Buf        = gpu.CreateBufferRW(N, 2 * 8, 5);
            }
            else
            {
                Y3Buf        = gpu.CreateBufferRW(N, 12, 4);
                Y3StagingBuf = gpu.CreateStagingBuffer(Y3Buf);
                v3Buf        = gpu.CreateBufferRW(N, 2 * 12, 6);
            }

            float  rang       = 0.05f;
            Random rGenerator = new Random(435243);

            if (cc.c.outDim <= 2)
            {
                using (var ws = gpu.NewWriteStream(v2Buf)) {
                    for (int row = 0; row < N; row++)
                    {
                        ws.Write <float>(0, 1, 0, 1);
                    }
                }

                using (var ws = gpu.NewWriteStream(Y2Buf)) {
                    for (int row = 0; row < N; row++)
                    {
                        for (int col = 0; col < cc.c.outDim; col++)
                        {
                            ws.Write((float)(rang * rGenerator.NextDouble() - rang / 2));
                        }
                        if (cc.c.outDim == 1)
                        {
                            ws.Write(0.0f);
                        }
                    }
                }
            }
            else
            {
                using (var ws = gpu.NewWriteStream(v3Buf)) {
                    for (int row = 0; row < N; row++)
                    {
                        ws.Write <float>(0, 1, 0, 1, 0, 1);
                    }
                }
                using (var ws = gpu.NewWriteStream(Y3Buf)) {
                    for (int row = 0; row < N; row++)
                    {
                        for (int col = 0; col < cc.c.outDim; col++)
                        {
                            ws.Write((float)(rang * rGenerator.NextDouble() - rang / 2));
                        }
                    }
                }
            }
            #endregion

            #region Upload data table and initialize the distance matrix

            // Used to aggregate values created by parallel threads.
            // the size of of groupMaxBuf must be large enoght to hold a float value for each thread started in parallel.
            // Notice: gpu.Run(k) will start k*GROUP_SIZE threads.
            int gpSize = Math.Max(GpuGroupSize, MaxGroupNumber * GroupSize);
            gpSize      = Math.Max(gpSize, MaxGroupNumberHyp * GroupSizeHyp);
            groupMaxBuf = gpu.CreateBufferRW(gpSize, 4, 7);

            resultBuf     = gpu.CreateBufferRW(3, 4, 2); // to receive the total changes.
            resultStaging = gpu.CreateStagingBuffer(resultBuf);

            tableBuf = gpu.CreateBufferRO(N * cc.c.columns, 4, 0);
            if (MetricType == 1)
            {
                NormalizeTable(X);
            }
            gpu.WriteMarix(tableBuf, X, true);

            const int MinCpuDimension = 100; // minimal dimension to trigger CPU caching.
            const int MaxDimension    = 64;  // maximal dimension (table columns) for fast EuclideanNoCache shader. Must be the same as MAX_DIMENSION.
            const int MaxDimensionS   = 32;  // maximal dimension (table columns) for fast EuclideanNoCache shader. Must be the same as MAX_DIMENSIONs.
            if (N <= CacheLimit)
            {
                cachingMode = CachingMode.OnGpu;
            }
            else
            {
                if ((cc.c.columns > MinCpuDimension) && ((double)N * N * 4) < ((double)MaxCpuCacheSize * 1024.0 * 1024.0))
                {
                    cachingMode = CachingMode.OnCpu;
                }
                else
                {
                    if (cc.c.columns < MaxDimensionS)
                    {
                        cachingMode = CachingMode.OnFlySmS;
                    }
                    else if (cc.c.columns < MaxDimension)
                    {
                        cachingMode = CachingMode.OnFlySm;
                    }
                    else
                    {
                        cachingMode = CachingMode.OnFly;
                    }
                }
            }
            #endregion

            cc.c.targetH = (float)Math.Log(PerplexityRatio * N);
            if (cachingMode == CachingMode.OnGpu)
            {
                CalculateP();
            }
            else if (cachingMode == CachingMode.OnCpu)
            {
                InitializePCpu();
            }
            else     // (cachingMode == CachingMode.OnFly[Sm,SmS])
            {
                InitializeP();
            }

            using (var sd = gpu.LoadShader("TsneDx.CalculateSumQ.cso")) {
                gpu.SetShader(sd);
                cc.c.groupNumber = 256;
                for (int i = 0; i < N; i += cc.c.groupNumber)
                {
                    cc.c.blockIdx = i;
                    cc.Upload();
                    gpu.Run(cc.c.groupNumber);
                }
                cc.c.blockIdx = -1;
                cc.Upload();
                gpu.Run();
            }

            var sdNames = new Dictionary <CachingMode, string>()
            {
                { CachingMode.OnGpu, "TsneDx.OneStep.cso" },
                { CachingMode.OnCpu, "TsneDx.OneStepCpuCache.cso" },
                { CachingMode.OnFly, "TsneDx.OneStepNoCache.cso" },
                { CachingMode.OnFlySm, "TsneDx.FastStep.cso" },
                { CachingMode.OnFlySmS, "TsneDx.FastStepS.cso" },
            };

            ComputeShader csOneStep   = gpu.LoadShader(sdNames[cachingMode]);
            ComputeShader csSumUp     = gpu.LoadShader("TsneDx.OneStepSumUp.cso");
            int           stepCounter = 0;

            while (true)
            {
                if (stepCounter < exaggerationLength)
                {
                    if (ExaggerationSmoothen)
                    {
                        int len = (int)(0.9 * MaxEpochs);
                        if (stepCounter < len)
                        {
                            double t = (double)stepCounter / len;
                            t            = Math.Sqrt(Math.Sqrt(t));
                            cc.c.PFactor = (float)((1 - t) * ExaggerationFactor + t);
                        }
                        else
                        {
                            cc.c.PFactor = 1.0f;
                        }
                    }
                    else
                    {
                        cc.c.PFactor = (float)ExaggerationFactor;
                    }
                }
                else
                {
                    cc.c.PFactor = 1.0f;
                }

                gpu.SetShader(csOneStep);

                if (cachingMode == CachingMode.OnGpu)
                {
                    cc.c.groupNumber = MaxGroupNumber;
                    // Notice: cc.c.groupNumber*GroupSize must fit into groupMax[].
                    for (int bIdx = 0; bIdx < N; bIdx += cc.c.groupNumber * GroupSize)
                    {
                        cc.c.blockIdx = bIdx;
                        cc.Upload();
                        gpu.Run(cc.c.groupNumber);
                    }
                    cc.c.groupNumber = MaxGroupNumber * GroupSize;
                }
                else if (cachingMode == CachingMode.OnCpu)
                {
                    int bSize = MaxGroupNumberHyp * GroupSizeHyp;
                    cc.c.groupNumber = MaxGroupNumberHyp;
                    for (int bIdx = 0; bIdx < N; bIdx += bSize)
                    {
                        gpu.WriteArray(cpuP, bIdx, Math.Min(N, bIdx + bSize), P2Buf);
                        cc.c.blockIdx = bIdx;
                        cc.Upload();
                        gpu.Run(cc.c.groupNumber);
                    }
                    cc.c.groupNumber = Math.Min(N, bSize);
                }
                else if ((cachingMode == CachingMode.OnFlySm) || (cachingMode == CachingMode.OnFlySmS))
                {
                    const int GrSize = 64;  // This value must match that of GR_SIZE in TsneMap.hlsl.
                    cc.c.groupNumber = MaxGroupNumber;
                    for (int bIdx = 0; bIdx < N; bIdx += cc.c.groupNumber * GrSize)
                    {
                        cc.c.blockIdx = bIdx;
                        cc.Upload();
                        gpu.Run(cc.c.groupNumber);
                    }
                    cc.c.groupNumber = cc.c.groupNumber * GrSize;
                }
                else     // cachingMode==CachingMode.OnFly
                {
                    cc.c.groupNumber = 128;
                    for (int bIdx = 0; bIdx < N; bIdx += cc.c.groupNumber)
                    {
                        cc.c.blockIdx = bIdx;
                        cc.Upload();
                        gpu.Run(cc.c.groupNumber);
                    }
                }

                //Notice: cc.c.groupNumber must be number of partial sumQ_next, which add up to sumQ for the next step.
                gpu.SetShader(csSumUp);
                cc.Upload();
                gpu.Run();

                currentVariation = gpu.ReadRange <float>(resultStaging, resultBuf, 3)[2] / N;

                cc.c.mom = (float)((stepCounter < (MaxEpochs * momentumSwitch)) ? momentum : finalMomentum);
                stepCounter++;
                if (stepCounter % 10 == 0)
                {
                    Console.Write('.');
                }
                if (stepCounter % 500 == 0)
                {
                    Console.WriteLine();
                }
                if ((stepCounter >= MaxEpochs) || ((stepCounter >= (2 + exaggerationLength)) && (currentVariation < stopVariation)))
                {
                    break;
                }
            }
            Console.WriteLine();

            float[][] Y = new float[N][];
            using (var rs = gpu.NewReadStream((cc.c.outDim == 3) ? Y3StagingBuf : Y2StagingBuf, (cc.c.outDim == 3) ? Y3Buf : Y2Buf)) {
                int outVDim = (cc.c.outDim == 3) ? 3 : 2;
                for (int row = 0; row < N; row++)
                {
                    Y[row] = rs.ReadRange <float>(outVDim);
                }
            }

            if (cc.c.outDim == 1)
            {
                for (int i = 0; i < N; i++)
                {
                    Y[i] = new float[] { Y[i][0] }
                }
            }
            ;

            TsneDx.SafeDispose(csSumUp, csOneStep, PBuf, P2Buf, distanceBuf, tableBuf, resultBuf,
                               resultStaging, groupMaxBuf, Y3Buf, Y3StagingBuf, v3Buf, Y2Buf, Y2StagingBuf, v2Buf, cc, gpu);

            return(AutoNormalize ? PcaNormalize.DoNormalize(Y) : Y);
        }