示例#1
0
        private List <int> getGpus(int nMax)
        {
            CudaDnn <float> cuda      = new CudaDnn <float>(0);
            List <int>      rgGpu     = new List <int>();
            int             nDevCount = cuda.GetDeviceCount();

            for (int i = 0; i < nDevCount; i++)
            {
                string strDevInfo = cuda.GetDeviceInfo(i, true);
                string strP2PInfo = cuda.GetDeviceP2PInfo(i);

                if (strP2PInfo.Contains("P2P Capable = YES"))
                {
                    rgGpu.Add(i);
                }

                if (rgGpu.Count == nMax)
                {
                    break;
                }
            }

            cuda.Dispose();

            return(rgGpu);
        }
示例#2
0
        static void Main(string[] args)
        {
            // Create the output log used.
            Log log = new Log("Test");

            log.OnWriteLine += Log_OnWriteLine;

            // Create the CudaDnn connection used.  NOTE: only one CudaDnn connection is needed
            // per thread for each instance creates and manages its own low-level kernel state
            // which includes all memory allocated etc.  All memory handles allocated should
            // be used with the CudaDnn that allocated the memory.
            CudaDnn <float> cuda = new CudaDnn <float>(0, DEVINIT.CUBLAS | DEVINIT.CURAND);

            log.WriteLine("CudaDnn created.");

            // Run super simple sample.
            runSuperSimpleSample(cuda, log);

            // Run Blob sample #1
            runSimpleBlobExample1(cuda, log);

            // Run Blob sample #2
            runSimpleBlobExample2(cuda, log);

            // Run Blob sample #3
            runSimpleBlobExample3(cuda, log);

            // Release all GPU memory and other state data used.
            cuda.Dispose();
        }
示例#3
0
 protected virtual void dispose()
 {
     if (m_bResetOnCleanUp)
     {
         CudaDnn <float> cuda = new CudaDnn <float>(0, DEVINIT.NONE);
         cuda.ResetDevice();
         cuda.Dispose();
     }
 }
示例#4
0
        protected virtual void dispose()
        {
            if (m_bResetOnCleanUp)
            {
                CudaDnn <float> cuda = new CudaDnn <float>(0, DEVINIT.NONE);
                cuda.ResetDevice();
                cuda.Dispose();
            }

            if (m_defaultCulture != null)
            {
                Thread.CurrentThread.CurrentCulture = m_defaultCulture;
            }
        }
示例#5
0
        /// <summary>
        /// Main demonstration function.
        /// </summary>
        /// <param name="args"></param>
        static void Main(string[] args)
        {
            // Get the ONNX file to import.
            string strOnnxModelUrl = "https://github.com/onnx/models/raw/master/vision/classification/alexnet/model/bvlcalexnet-9.onnx";
            string strOnnxFile     = downloadFile(strOnnxModelUrl);

            // Create the MyCaffe conversion control
            MyCaffeConversionControl <float> convert = new MyCaffeConversionControl <float>();
            CudaDnn <float> cuda = new CudaDnn <float>(0);
            Log             log  = new Log("Onnx Test");

            // Convert an ONNX model file into the MyCaffe model description prototxt and weight protobuf.
            MyCaffeModelData modeldata = convert.ConvertOnnxToMyCaffeFromFile(cuda, log, strOnnxFile);

            // Use the model description prototxt (same format used by CAFFE)...
            string strModelDesc = modeldata.ModelDescription;

            // And weights in binary protbuf format (same format used by CAFFE)...
            byte[] rgWeights = modeldata.Weights;
            // along with the solver descriptor of your choice to use the model.

            Console.WriteLine("================================");
            Console.WriteLine("IMPORT: Model imported from *.onnx");
            Console.WriteLine("================================");
            Console.WriteLine(strModelDesc);
            Console.WriteLine("--done--");

            // Convert a MyCaffe model file (and weights) into the equivalent ONNX model file.
            Console.WriteLine("================================");
            Console.WriteLine("EXPORT: Model exported to *.onnx");
            Console.WriteLine("================================");
            string strOnnxOutFile = TestDataPath + "\\bvlc_allexnet.onnx";

            if (File.Exists(strOnnxOutFile))
            {
                File.Delete(strOnnxOutFile);
            }

            // Convert the MyCaffe model file (and weights) back into a new ONNX model file.
            convert.ConvertMyCaffeToOnnxFile(cuda, log, modeldata, strOnnxOutFile);
            Console.WriteLine("Exported model to '" + strOnnxOutFile + "'.");

            // Cleanup.
            cuda.Dispose();
            convert.Dispose();

            Console.WriteLine("Press any key to continue...");
            Console.ReadKey();
        }
示例#6
0
        private void m_bwInit_DoWork(object sender, DoWorkEventArgs e)
        {
            List <string> rgstrGpu = new List <string>();

            // Setup the GPU menu with all GPU's in the system and
            //  select the first GPU as the default for testing.
            CudaDnn <float> cuda         = new CudaDnn <float>(0);
            int             nDeviceCount = cuda.GetDeviceCount();

            for (int i = 0; i < nDeviceCount; i++)
            {
                string strDevice = cuda.GetDeviceName(i);
                rgstrGpu.Add(strDevice);
            }

            cuda.Dispose();

            e.Result = rgstrGpu;
        }
示例#7
0
 protected virtual void dispose()
 {
     m_cuda.Dispose();
     m_cuda = null;
 }
示例#8
0
        static void Main(string[] args)
        {
            // This memory will reside on the GPU.
            long hGpuMem = 0;

            Console.WriteLine("Creating CudaCuDnn...");
            CudaDnn <float> cuda = new CudaDnn <float>(0);

            try
            {
                string strDeviceInfo = cuda.GetDeviceName(0);
                Console.WriteLine(strDeviceInfo);
                strDeviceInfo = cuda.GetDeviceP2PInfo(0);
                Console.WriteLine(strDeviceInfo);
                strDeviceInfo = cuda.GetDeviceInfo(0, true);
                Console.WriteLine(strDeviceInfo);

                List <long> rghGpuMem = new List <long>();
                long        lOffset   = 0;

                // You must first allocate the GPU memory to use.
                // Below we will allocate an array of 1000 float values.
                Console.WriteLine("Allocate 1000 items...");
                hGpuMem = cuda.AllocMemory(1000);
                cuda.set(1000, hGpuMem, 0.0);

                Console.WriteLine("Create memory pointers...");
                for (int i = 0; i < 10; i++)
                {
                    long hMem1 = cuda.CreateMemoryPointer(hGpuMem, lOffset, 100);
                    cuda.set(100, hMem1, (double)(i + 1));
                    rghGpuMem.Add(hMem1);
                    lOffset += 100;
                }

                Console.WriteLine("Test memory...");
                for (int i = 0; i < 10; i++)
                {
                    long    hMem1  = rghGpuMem[i];
                    float[] rgData = cuda.GetMemoryFloat(hMem1);

                    if (rgData.Length != 100)
                    {
                        throw new Exception("The data length should = 100!");
                    }

                    for (int j = 0; j < 100; j++)
                    {
                        if (rgData[j] != (float)(i + 1))
                        {
                            throw new Exception("The data at index " + j.ToString() + " is not correct!");
                        }
                    }
                }

                Console.WriteLine("Memory test passed successfully!");
            }
            catch (Exception excpt)
            {
                Console.WriteLine("ERROR: " + excpt.Message);
            }
            finally
            {
                // Clean-up and release all GPU memory used.
                if (hGpuMem != 0)
                {
                    cuda.FreeMemory(hGpuMem);
                    hGpuMem = 0;
                }

                cuda.Dispose();
            }

            Console.WriteLine("Press any key to exit.");
            Console.Read();
        }
示例#9
0
        static void Main(string[] args)
        {
            // Create the output log used.
            Log log = new Log("Test");
            // Create the CudaDnn connection used.  NOTE: only one CudaDnn connection is needed
            // per thread for each instance creates and manages its own low-level kernel state
            // which includes all memory allocated etc.  All memory handles allocated should
            // be used with the CudaDnn that allocated the memory.
            CudaDnn <float>         cuda      = new CudaDnn <float>(0, DEVINIT.CUBLAS | DEVINIT.CURAND);
            MemoryDataLayer <float> layer     = createMemoryDataLayer(cuda, log);
            List <Datum>            rgData    = dataSetter();
            Blob <float>            blobData  = new Blob <float>(cuda, log);
            Blob <float>            blobLabel = new Blob <float>(cuda, log);
            BlobCollection <float>  colBottom = new BlobCollection <float>();
            BlobCollection <float>  colTop    = new BlobCollection <float>();

            // Set the top blob for MemoryDataLayers only have tops (e.g. no bottoms).
            colTop.Add(blobData);
            colTop.Add(blobLabel);

            layer.Setup(colBottom, colTop);
            layer.AddDatumVector(rgData);


            // Run Pass 1 - memory data layer advances intern index by batch size after forward completes.
            layer.Forward(colBottom, colTop);

            float[] rgDataPass1  = colTop[0].mutable_cpu_data;
            float[] rgLabelPass1 = colTop[1].mutable_cpu_data;

            log.CHECK_EQ(rgDataPass1.Length, 60, "There should be 60 data items.");
            for (int i = 0; i < rgDataPass1.Length; i++)
            {
                log.CHECK_EQ(rgDataPass1[i], 10, "The data value should = 10.");
            }

            log.CHECK_EQ(rgLabelPass1.Length, 1, "There should only be one label, for the batch size = 1.");
            log.CHECK_EQ(rgLabelPass1[0], 0, "The label of the first item should = 0.");
            Console.WriteLine("First Pass - label = " + rgLabelPass1[0].ToString());


            // Pass 2 - memory data layer advances intern index by batch size after forward completes.
            layer.Forward(colBottom, colTop);

            float[] rgDataPass2  = colTop[0].mutable_cpu_data;
            float[] rgLabelPass2 = colTop[1].mutable_cpu_data;

            log.CHECK_EQ(rgDataPass2.Length, 60, "There should be 60 data items.");
            for (int i = 0; i < rgDataPass2.Length; i++)
            {
                log.CHECK_EQ(rgDataPass2[i], 10, "The data value should = 10.");
            }

            log.CHECK_EQ(rgLabelPass2.Length, 1, "There should only be one label, for the batch size = 1.");
            log.CHECK_EQ(rgLabelPass2[0], 1, "The label of the first item should = 1.");
            Console.WriteLine("Second Pass - label = " + rgLabelPass2[0].ToString());

            // Pass 3 - memory data layer advances intern index by batch size after forward completes.
            layer.Forward(colBottom, colTop);

            float[] rgDataPass3  = colTop[0].mutable_cpu_data;
            float[] rgLabelPass3 = colTop[1].mutable_cpu_data;

            log.CHECK_EQ(rgDataPass3.Length, 60, "There should be 60 data items.");
            for (int i = 0; i < rgDataPass3.Length; i++)
            {
                log.CHECK_EQ(rgDataPass3[i], 10, "The data value should = 10.");
            }

            log.CHECK_EQ(rgLabelPass3.Length, 1, "There should only be one label, for the batch size = 1.");
            log.CHECK_EQ(rgLabelPass3[0], 2, "The label of the first item should = 2.");
            Console.WriteLine("Third Pass - label = " + rgLabelPass3[0].ToString());

            layer.Dispose();
            blobData.Dispose();
            blobLabel.Dispose();
            cuda.Dispose();

            Console.WriteLine("Press any key...");
            Console.ReadKey();
        }
示例#10
0
        private void Worker_DoWork(object sender, ActionStateArgs <T> e)
        {
            SolverInfo <T> info = e.Arg as SolverInfo <T>;
            NCCL <T>       nccl = null;

            m_cuda = new common.CudaDnn <T>(e.DeviceID, DEVINIT.CUBLAS | DEVINIT.CURAND, null, info.CudaPath);

            try
            {
                Solver <T> rank0 = info.Rank0;
                Log        log   = new Log("Worker solver for DeviceID = " + e.DeviceID.ToString());

                //-----------------------------------------
                //  Transfer the NCCL handle from the
                //  main kernel that created it to the
                //  one used by the CudaDnn on this thread.
                //
                //  After the copy, this thread will 'own'
                //  the nccl and be responsible for its
                //  destruction.
                //-----------------------------------------
                long hNccl = m_cuda.KernelCopyNccl(info.KernelHandle, info.NcclHandle);

                // Create solver and install callbacks
                SolverParameter param = rank0.parameter.Clone();
                param.device_id = e.DeviceID;
                param.type      = rank0.parameter.type;
                Solver <T> solver = Solver <T> .Create(m_cuda, log, param, rank0.CancelEvent, null, null, rank0.Database, null, rank0.solver_count, info.SolverRank);

                info.StartedEvent.Set();
                log.CHECK_EQ((int)solver.type, (int)rank0.type, "The solver types should be the same.");

                //-----------------------------------------
                //  Turn off logging for all other
                //  operations on the worker thread.
                //-----------------------------------------
                log.Enable = false;

                nccl = new NCCL <T>(m_cuda, log, solver, e.DeviceID, hNccl, info.GradientReadyEvents);

                info.InitializedEvent.Set();
                m_cuda.SynchronizeDevice();

                List <WaitHandle> rgWait = new List <WaitHandle>();
                rgWait.AddRange(rank0.CancelEvent.Handles);
                rgWait.Add(info.AllCreatedEvent);

                int nWait = WaitHandle.WaitAny(rgWait.ToArray());
                if (nWait < rgWait.Count - 1)
                {
                    return;
                }

                nccl.Broadcast();

                int nIterations = param.max_iter - solver.iter;
                if (info.IterationOverride > 0)
                {
                    nIterations = info.IterationOverride;
                }

                solver.Step(nIterations);
                solver.Dispose();
            }
            catch (Exception excpt)
            {
                info.Error = excpt;
                info.ErrorEvent.Set();
            }
            finally
            {
                if (nccl != null)
                {
                    nccl.Dispose();
                }

                m_cuda.Dispose();
                m_cuda = null;
            }
        }