快速高斯模糊算法-白红宇

快速高斯模糊算法

阅读量：6417 次

发布时间：2019-06-23

本文共 4612 字，大约阅读时间需要 15 分钟。

刚才发现一份快速高斯模糊的实现。

源地址为：

作者信息为:  Fast Gaussian Blur v1.3 by Mario Klingemann

processing源码: 效果图：

转为C语言实现版本。 代码如下:

// Fast Gaussian Blur v1.3// by Mario Klingemann 
     
      // C version updated and performance optimization by tntmonks(http://tntmonks.cnblogs.com)// One of my first steps with Processing. I am a fan// of blurring. Especially as you can use blurred images// as a base for other effects. So this is something I// might get back to in later experiments.//// What you see is an attempt to implement a Gaussian Blur algorithm// which is exact but fast. I think that this one should be// relatively fast because it uses a special trick by first// making a horizontal blur on the original image and afterwards// making a vertical blur on the pre-processed image. This// is a mathematical correct thing to do and reduces the// calculation a lot.//// In order to avoid the overhead of function calls I unrolled// the whole convolution routine in one method. This may not// look nice, but brings a huge performance boost.////// v1.1: I replaced some multiplications by additions//       and added aome minor pre-caclulations.//       Also add correct rounding for float->int conversion//// v1.2: I completely got rid of all floating point calculations//       and speeded up the whole process by using a//       precalculated multiplication table. Unfortunately//       a precalculated division table was becoming too//       huge. But maybe there is some way to even speed//       up the divisions.//// v1.3: Fixed a bug that caused blurs that start at y>0//	 to go wrong. Thanks to Jeroen Schellekens for //       finding it!void GaussianBlur(unsigned char* img, unsigned  int x, unsigned int y, unsigned int w, unsigned int h, unsigned int comp, unsigned int radius){	unsigned int i, j ;	radius = min(max(1, radius), 248); 	unsigned int kernelSize = 1 + radius * 2; 	unsigned int* kernel = (unsigned int*)malloc(kernelSize* sizeof(unsigned int));	memset(kernel, 0, kernelSize* sizeof(unsigned int)); 	unsigned int(*mult)[256] = (unsigned int(*)[256])malloc(kernelSize * 256 * sizeof(unsigned int)); 	memset(mult, 0, kernelSize * 256 * sizeof(unsigned int));	unsigned	int sum = 0;	for (i = 1; i < radius; i++){		unsigned int szi = radius - i;		kernel[radius + i] = kernel[szi] = szi*szi;		sum += kernel[szi] + kernel[szi];		for (j = 0; j < 256; j++){			mult[radius + i][j] = mult[szi][j] = kernel[szi] * j;		}	}	kernel[radius] = radius*radius;	sum += kernel[radius];	for (j = 0; j < 256; j++){		 		mult[radius][j] = kernel[radius] * j;	}	unsigned int   cr, cg, cb;	unsigned int   xl, yl, yi, ym, riw;	unsigned int   read, ri, p,   n;	unsigned	int imgWidth = w;	unsigned	int imgHeight = h;	unsigned	int imageSize = imgWidth*imgHeight;	unsigned char *	rgb = (unsigned char *)malloc(sizeof(unsigned char) * imageSize * 3);	unsigned char *	r = rgb;	unsigned char *	g = rgb + imageSize;	unsigned char *	b = rgb + imageSize * 2;	unsigned char *	rgb2 = (unsigned char *)malloc(sizeof(unsigned char) * imageSize * 3);	unsigned char *	r2 = rgb2;	unsigned char *	g2 = rgb2 + imageSize;	unsigned char *	b2 = rgb2 + imageSize * 2; 	for (size_t yh = 0; yh < imgHeight; ++yh) { 		for (size_t xw = 0; xw < imgWidth; ++xw) {			n = xw + yh* imgWidth;			p = n*comp;			r[n] = img[p];			g[n] = img[p + 1];			b[n] = img[p + 2];		}	}		x = max(0, x);	y = max(0, y);	w = x + w - max(0, (x + w) - imgWidth);	h = y + h - max(0, (y + h) - imgHeight);	yi = y*imgWidth; 	for (yl = y; yl < h; yl++){ 		for (xl = x; xl < w; xl++){			cb = cg = cr = sum = 0;			ri = xl - radius; 			for (i = 0; i < kernelSize; i++){				read = ri + i;				if (read >= x && read < w)				{					read += yi;					cr += mult[i][r[read]];					cg += mult[i][g[read]];					cb += mult[i][b[read]];					sum += kernel[i];				}			}			ri = yi + xl;			r2[ri] = cr / sum;			g2[ri] = cg / sum;			b2[ri] = cb / sum;		}		yi += imgWidth;	}	yi = y*imgWidth; 	for (yl = y; yl < h; yl++){		ym = yl - radius;		riw = ym*imgWidth; 		for (xl = x; xl < w; xl++){			cb = cg = cr = sum = 0;			ri = ym;			read = xl + riw; 			for (i = 0; i < kernelSize; i++){				if (ri < h && ri >= y)				{					cr += mult[i][r2[read]];					cg += mult[i][g2[read]];					cb += mult[i][b2[read]];					sum += kernel[i];				}				ri++;				read += imgWidth;			}			p = (xl + yi)*comp;			img[p] = (unsigned char)(cr / sum);			img[p + 1] = (unsigned char)(cg / sum);			img[p + 2] = (unsigned char)(cb / sum);		}		yi += imgWidth;	}	free(rgb);	free(rgb2);	free(kernel);	free(mult);}

　　该代码，将二维数组进一步优化后可提升一定的速度。

在博主机子上测试一张5000x3000的图像，模糊半径为10的情况下，耗时4s.

转载于:https://www.cnblogs.com/cpuimage/p/4899649.html

你可能感兴趣的文章

iOS学习之Objective-C 2.0 运行时系统编程

查看>>

Exchange2007-Exchange2010升级-06 数据库高可用组的创建

查看>>

phpHiveAdmin是如何通过Hive/Hadoop工作的

查看>>

双向链表内结点的删除（4）

查看>>