public CudaIntersectionDevice(RayEngineScene scene, NVContext ctx)
            : base(scene)
        {
            wallclock = new Stopwatch();
            this.todoRayBuffers = new ConcurrentQueue<Tuple<int, RayBuffer>>();
            this.doneRayBuffers = new List<ConcurrentQueue<RayBuffer>>() { { new ConcurrentQueue<RayBuffer>() } };
            this.started = false;
            if (ctx != null)
            {
                this.cudaContext = ctx;
            }
            else
            {
                this.cudaContext = new NVContext() { Context = new CudaContext(CudaContext.GetMaxGflopsDeviceId()) };
            }
            using (var sr = new StreamReader(@"G:\Git\RayDen\CudaMegaRay\x64\Release\kernel.cu.ptx"))
            {
                intersectKernel = cudaContext.Context.LoadKernelPTX(sr.BaseStream, "IntersectLBvh");
            }

            this.rays = new CudaDeviceVariable<RayData>(RayBuffer.RayBufferSize);
            this.hits = new CudaDeviceVariable<RayHit>(RayBuffer.RayBufferSize);
            verts = scene.Vertices.ToArray();
            //scene.Triangles.Select(i => i.GetInfo()).ToArray();

            var ti = scene.Triangles.Select(i => i.GetInfo()).ToArray();
            var da = new BvhDataAdapter(scene);
            var treeData = da.GetMpData();
            bvh = treeData;
            trianglesCount = ti.Length;
            tris = ti; 

            nodesCount = treeData.Length;
            Tracer.TraceLine("BVH Data Size {0:F3} MBytes", (treeData.Length * 32f) / (1024f * 1024f));
        }
        public DadeCudaIntersectionDevice(RayEngineScene scene, NVContext ctx)
            : base(scene)
        {
            this.scene = scene;
            wallclock = new Stopwatch();
            this.todoRayBuffers = new InputRayBufferCollection();
            this.doneRayBuffers = new OutputRayBufferCollection();
            this.started = false;
            if (ctx != null)
            {
                this.cudaContext = ctx;
            }
            else
            {
                this.cudaContext = new NVContext() { Context = new CudaContext(CudaContext.GetMaxGflopsDeviceId()) };
            }
            using (var sr = new StreamReader(@"G:\Git\RayDen\CudaMegaRay\x64\Release\Intersection.cu.ptx"))
            {
                intersectKernel = cudaContext.Context.LoadKernelPTX(sr.BaseStream, "Intersect");
            }

            this.rays = new CudaDeviceVariable<RayData>(RayBuffer.RayBufferSize);
            this.hits = new CudaDeviceVariable<RayHit>(RayBuffer.RayBufferSize);
            verts = scene.Vertices.ToArray();
            tris=scene.Triangles.Select(i => i.GetInfo()).ToArray();

            if (GlobalConfiguration.Instance.UseSceneCaching && scene.Cache != null)
            {
                bvh = scene.Cache.BvhData;
                nodesCount = scene.Cache.BvhData.Length;
            }
            else
            {
                var da = new BvhDataAdapter(scene);
                var treeData = da.BuildData();
                bvh = treeData;
                nodesCount = treeData.Length;

            }

            Tracer.TraceLine("BVH Data Size {0:F3} MBytes", (nodesCount * 32f) / (1024f * 1024f));
        }
        public ClIntersectionDevice(RayEngineScene scene, bool lowLatency, int index)
            : base(scene) {
            wallclock = new Stopwatch();
            this.todoRayBuffers = new ConcurrentQueue<RayBuffer>();
            this.doneRayBuffers = new ConcurrentQueue<RayBuffer>();
            this.started = false;
            clContext = new ClDeviceContext() { KernelSrc = Kernels.PbrtBVHKernel };
            clContext.Initialize();
            clContext.SetupDevice("Intersect");

            var rayBufferSize = lowLatency ? (RayBuffer.RayBufferSize / 8) : RayBuffer.RayBufferSize;
            var sceneVertices = scene.Vertices.ToArray();
            var sceneTriangles = scene.Triangles.ToArray();
            Tracer.TraceLine("Vertices Data Size {0:F3} MBytes", (sceneVertices.Length * 12f) / (1024f * 1024f));
            Tracer.TraceLine("Indexes Data Size {0:F3} MBytes", (sceneTriangles.Length * 12f) / (1024f * 1024f));
            var da = new BvhDataAdapter(scene);
            var st = DateTime.UtcNow;
            TriangleDataInfo[] triData = null;
            var treeData = da.BuildLData(out triData);
            //scene.Triangles = triData.ToList();
            var dc = treeData.Count(item => item.IsLeaf);
            Tracer.TraceLine("Bvh Leaf nodes {0}", dc);
            verts = new ComputeBuffer<Point>(clContext.context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, sceneVertices);
            tris = new ComputeBuffer<TriangleInfo>(clContext.context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, triData.Select(item=>item.GetInfo()).ToArray());

          
            
            Tracer.TraceLine("BVH Data Size {0:F3} MBytes" ,(treeData.Length*32f) / (1024f*1024f));
            tree = new ComputeBuffer<LNode>(clContext.context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, treeData);
            rays = new ComputeBuffer<RayData>(clContext.context, ComputeMemoryFlags.ReadOnly, rayBufferSize);
            rayHits = new ComputeBuffer<RayHit>(clContext.context, ComputeMemoryFlags.WriteOnly, rayBufferSize);
            Tracer.TraceLine("Bvh Build and Load Time {0}", DateTime.UtcNow - st);

            clContext.kernel.SetMemoryArgument(0, rays);
            clContext.kernel.SetMemoryArgument(1, rayHits);
            clContext.kernel.SetMemoryArgument(2, verts);
            clContext.kernel.SetMemoryArgument(3, tris);
            clContext.kernel.SetValueArgument(4, (uint)tris.Count);
            clContext.kernel.SetValueArgument(5, (uint)tree.Count);
            clContext.kernel.SetMemoryArgument(6, tree);
        }
        public override void SetScene(IRayEngineScene scn)
        {
            base.SetScene(scn);
            Tracer.TraceLine("Setting scene");
            this.scene = (RayEngineScene)scn;
            verts = scene.Vertices.ToArray();
            tris = scene.Triangles.Select(i => i.GetInfo()).ToArray();

            if (GlobalConfiguration.Instance.UseSceneCaching && scene.Cache != null)
            {
                bvh = scene.Cache.BvhData;
                nodesCount = scene.Cache.BvhData.Length;
            }
            else
            {
                var da = new BvhDataAdapter(scene);
                var treeData = da.BuildData();
                bvh = treeData;
                nodesCount = treeData.Length;

            }
        }
        public TwoLevelOpenCLIntersectionDevice(RayEngineScene scene, bool lowLatency, int index)
            : base(scene) {
            wallclock = new Stopwatch();
            this.todoRayBuffers = new ConcurrentQueue<RayBuffer>();
            this.doneRayBuffers = new ConcurrentQueue<RayBuffer>();
            this.started = false;
            var device = ComputePlatform.Platforms[0].Devices[0];
            clContext = new ClDeviceContext() { KernelSrc = Kernels.LuxBVHKernel2L };
            clContext.Initialize();
            clContext.SetupDevice("Intersect","TLIntersect");

            var rayBufferSize = lowLatency ? (RayBuffer.RayBufferSize / 8) : RayBuffer.RayBufferSize;
            var sceneVertices = scene.Vertices.ToArray();
            var sceneTriangles = scene.Triangles.Select(item=>item.GetInfo()).ToArray();
            Tracer.TraceLine("Vertices Data Size {0:F3} MBytes", (sceneVertices.Length * 12f) / (1024f * 1024f));
            Tracer.TraceLine("Indexes Data Size {0:F3} MBytes", (sceneTriangles.Length * 12f) / (1024f * 1024f));

            verts = new ComputeBuffer<Point>(clContext.context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, sceneVertices);
            tris = new ComputeBuffer<TriangleInfo>(clContext.context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, sceneTriangles);
            tlhits = new ComputeBuffer<TLHit>(clContext.context, ComputeMemoryFlags.WriteOnly, rayBufferSize);
            var st = DateTime.UtcNow;

            var da = new BvhDataAdapter(scene);
            var meshes = scene.Meshes.Cast<TriangleMeshInfo>().ToArray();
            List<GpuSmitsBVHNode> bottomLevelData = new List<GpuSmitsBVHNode>();
            foreach (var triangleMesh in meshes)
            {
                var data = da.BuildData(triangleMesh);
                bottomLevelData.AddRange(data);
                triangleMesh.MeshProfile.BvhData = data;
            }

            var bounds = meshes.Select(item => item.Bounds).ToArray();
            var topLevelData = da.BuildTopData(bounds);
            long bvhOffset = 0;
            long to = 0;
            List<Primitive> ps = new List<Primitive>();
            foreach (var triangleMeshInfo in meshes)
            {
                
                ps.Add(new Primitive()
                    {
                        bvhOffset = (uint) bvhOffset,
                        index = (uint) Math.Max(0,to-1)
                    });
                to += triangleMeshInfo.EndTriangle;

                bvhOffset += triangleMeshInfo.MeshProfile.BvhData.Length-1;
            }

            topLeveltree = new ComputeBuffer<GpuSmitsBVHNode>(clContext.context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, topLevelData);
            bottomLevelTrees = new ComputeBuffer<GpuSmitsBVHNode>(clContext.context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, bottomLevelData.ToArray()); 
            this.prims  = new ComputeBuffer<Primitive>(clContext.context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, ps.ToArray());
            //tree = new ComputeBuffer<GpuBVHNode>(clContext.context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, treeData);
            rays = new ComputeBuffer<RayData>(clContext.context, ComputeMemoryFlags.ReadWrite, rayBufferSize);
            rayHits = new ComputeBuffer<RayHit>(clContext.context, ComputeMemoryFlags.ReadWrite, rayBufferSize);
            Tracer.TraceLine("Bvh Build and Load Time {0}", DateTime.UtcNow - st);

            BottomLevelKernel.SetMemoryArgument(0, rays);
            BottomLevelKernel.SetMemoryArgument(1, rayHits);
            BottomLevelKernel.SetMemoryArgument(2, verts);
            BottomLevelKernel.SetMemoryArgument(3, prims);

            BottomLevelKernel.SetMemoryArgument(4, tris);
            BottomLevelKernel.SetValueArgument(5, (uint)tris.Count);
            BottomLevelKernel.SetValueArgument(6, (uint)bottomLevelTrees.Count);//nodeCount
            BottomLevelKernel.SetMemoryArgument(7, bottomLevelTrees);


            TopLevelKernel.SetMemoryArgument(0, rays);
            TopLevelKernel.SetMemoryArgument(1, tlhits);
            TopLevelKernel.SetValueArgument(2, (uint)topLevelData.Length);
            TopLevelKernel.SetMemoryArgument(3, topLeveltree);

            //clContext.kernel.SetValueArgument(5, (uint)tree.Count);
            //clContext.kernel.SetMemoryArgument(6, tree);
        }