public static double MSSIMGpu(GpuMat VI1, GpuMat VI2, MSSIMGpuParam b, double C1 = defaultC1, double C2 = defaultC2) { if (!VI1.Size.Equals(VI2.Size)) { throw new ArgumentException(); } var gStream = new Stream(); CudaInvoke.Multiply(VI1, VI1, b.I1_2, stream: gStream); CudaInvoke.Multiply(VI2, VI2, b.I2_2, stream: gStream); CudaInvoke.Multiply(VI1, VI2, b.I1_I2, stream: gStream); b.GaussianFilter.Apply(VI1, b.Mu1, stream: gStream); b.GaussianFilter.Apply(VI2, b.Mu2, stream: gStream); CudaInvoke.Multiply(b.Mu1, b.Mu1, b.Mu1_2, stream: gStream); CudaInvoke.Multiply(b.Mu2, b.Mu2, b.Mu2_2, stream: gStream); CudaInvoke.Multiply(b.Mu1, b.Mu2, b.Mu1_Mu2, stream: gStream); b.GaussianFilter.Apply(b.I1_2, b.Sigma1_2, stream: gStream); CudaInvoke.Subtract(b.Sigma1_2, b.Mu1_2, b.Sigma1_2, stream: gStream); b.GaussianFilter.Apply(b.I2_2, b.Sigma2_2, stream: gStream); CudaInvoke.Subtract(b.Sigma2_2, b.Mu2_2, b.Sigma2_2, stream: gStream); b.GaussianFilter.Apply(b.I1_I2, b.Sigma12, stream: gStream); CudaInvoke.Subtract(b.Sigma12, b.Mu1_Mu2, b.Sigma12, stream: gStream); CudaInvoke.AddWeighted(b.Mu1_Mu2, 2, b.Ones, C1, 0, b.T1, stream: gStream); CudaInvoke.AddWeighted(b.Sigma12, 2, b.Ones, C2, 0, b.T2, stream: gStream); CudaInvoke.Multiply(b.T1, b.T2, b.T3, stream: gStream); CudaInvoke.AddWeighted(b.Mu1_2, 1, b.Mu2_2, 1, C1, b.T1, stream: gStream); CudaInvoke.AddWeighted(b.Sigma1_2, 1, b.Sigma2_2, 1, C2, b.T1, stream: gStream); CudaInvoke.Multiply(b.T1, b.T2, b.T1, stream: gStream); CudaInvoke.Divide(b.T3, b.T1, b.SSIM_map, stream: gStream); gStream.WaitForCompletion(); var mssim = CudaInvoke.AbsSum(b.SSIM_map); var ret = 0.0; foreach (var v in mssim.ToArray()) { var tmp = v / (b.SSIM_map.Size.Height * b.SSIM_map.Size.Width); tmp *= tmp; ret += tmp; } return(Math.Sqrt(ret)); }