/// <summary>
        /// 1GPUでの共役勾配法を生成する
        /// </summary>
        /// <param name="count">要素数</param>
        /// <param name="maxNonZeroCount"></param>
        /// <param name="_minIteration"></param>
        /// <param name="_maxIteration"></param>
        /// <param name="_allowableResidual"></param>
        public ConjugateGradientSingleGpu(int count, int maxNonZeroCount, int _minIteration, int _maxIteration, double allowableResidual)
            : base(count, maxNonZeroCount, _minIteration, _maxIteration, allowableResidual)
        {
            // cudaの使用準備
            cublas = CreateBlas();
            cusparse = CreateSparse();
            matDescr = CreateMatDescr();

            // 行列を初期化
            vectorA = new VectorDouble(count * maxNonZeroCount);
            vectorColumnIndeces = new VectorInt(count * maxNonZeroCount);
            vectorRowOffsets = new VectorInt(count + 1);

            // ベクトルを初期化
            vectorX = new VectorDouble(count);
            vectorB = new VectorDouble(count);
            vectorAp = new VectorDouble(count);
            vectorP = new VectorDouble(count);
            vectorR = new VectorDouble(count);
        }
        /// <summary>
        /// 複数GPUでの共役勾配法を生成する
        /// </summary>
        /// <param name="count">要素数</param>
        /// <param name="maxNonZeroCount"></param>
        /// <param name="_minIteration"></param>
        /// <param name="_maxIteration"></param>
        /// <param name="_allowableResidual"></param>
        public ConjugateGradientParallelGpu(int count, int maxNonZeroCount, int _minIteration, int _maxIteration, double allowableResidual)
            : base(count, maxNonZeroCount, _minIteration, _maxIteration, allowableResidual)
        {
            // デバイス数を取得
            deviceCount = GetDeviceCount();

            // デバイスが計算する要素の先頭位置を生成
            offsetsForDevice = new int[deviceCount + 1];
            offsetsForDevice[0] = 0;
            for(int i = 1; i < deviceCount; i++)
            {
                offsetsForDevice[i] = offsetsForDevice[i - 1] + (int)Math.Floor((double)Count / deviceCount);
            }
            offsetsForDevice[deviceCount] = Count;

            // 内積の結果を初期化
            resultsDot = new double[deviceCount];

            bufferHost = new double[Count];
            minJ = new int[deviceCount];
            maxJ = new int[deviceCount];

            // 配列を初期化
            cublas = new IntPtr[deviceCount];
            cusparse = new IntPtr[deviceCount];
            matDescr = new IntPtr[deviceCount];
            vectorElements = new VectorDouble[deviceCount];
            vectorColumnIndeces = new VectorInt[deviceCount];
            vectorRowOffsets = new VectorInt[deviceCount];
            vectorX = new VectorDouble[deviceCount];
            vectorB = new VectorDouble[deviceCount];
            vectorAp = new VectorDouble[deviceCount];
            vectorP = new VectorDouble[deviceCount];
            vectorR = new VectorDouble[deviceCount];

            // 全デバイスで
            Parallel.For(0, deviceCount, deviceID =>
            {
                SetDevice(deviceID);

                var countForDevice = CountForDevice(deviceID);

                // cudaの使用準備
                cublas[deviceID] = CreateBlas();
                cusparse[deviceID] = CreateSparse();
                matDescr[deviceID] = CreateMatDescr();

                // 行列を初期化
                vectorElements[deviceID] = new VectorDouble(countForDevice * maxNonZeroCount);
                vectorColumnIndeces[deviceID] = new VectorInt(countForDevice * maxNonZeroCount);
                vectorRowOffsets[deviceID] = new VectorInt(countForDevice + 1);

                // ベクトルを初期化
                vectorX[deviceID] = new VectorDouble(countForDevice);
                vectorB[deviceID] = new VectorDouble(countForDevice);
                vectorAp[deviceID] = new VectorDouble(countForDevice);
                vectorP[deviceID] = new VectorDouble(count);
                vectorR[deviceID] = new VectorDouble(countForDevice);
            });
        }