// // Created by YY on 2018/10/30. // #include "libyuv.h" #include #include #include #define y(r,g,b) (((66 * r + 129 * g + 25 * b + 128) >> 8) + 16) #define u(r,g,b) (((-38 * r - 74 * g + 112 * b + 128) >> 8) + 128) #define v(r,g,b) (((112 * r - 94 * g - 18 * b + 128) >> 8) + 128) #define color(x) ((unsigned char)((x < 0) ? 0 : ((x > 255) ? 255 : x))) /** * type 0-3位表示b的偏移量 * 4-7位表示g的偏移量 * 8-11位表示r的偏移量 * 12-15位表示rgba一个像素所占的byte * 16-19位表示yuv的类型,0为420sp,1为420p */ void rgbaToYuv(int width,int height,unsigned char * rgb,unsigned char * yuv,int type) { const int frameSize = width * height; const int yuvType=(type&0x10000)>>16; const int byteRgba=(type&0x0F000)>>12; const int rShift=(type&0x00F00)>>8; const int gShift=(type&0x000F0)>>4; const int bShift= (type&0x0000F); const int uIndex=0; const int vIndex=yuvType; //yuvType为1表示YUV420p,为0表示420sp int yIndex = 0; int uvIndex[2]={frameSize,frameSize+frameSize/4}; unsigned char R, G, B, Y, U, V; unsigned int index = 0; for (int j = 0; j < height; j++) { for (int i = 0; i < width; i++) { index = j * width + i; R = rgb[index*byteRgba+rShift]&0xFF; G = rgb[index*byteRgba+gShift]&0xFF; B = rgb[index*byteRgba+bShift]&0xFF; Y = y(R,G,B); U = u(R,G,B); V = v(R,G,B); yuv[yIndex++] = color(Y); if (j % 2 == 0 && index % 2 == 0) { yuv[uvIndex[uIndex]++] =color(U); yuv[uvIndex[vIndex]++] =color(V); } } } } void rgbaToYuvSIMD(int width,int height,unsigned char * rgb,unsigned char * yuv,int type) { const int frameSize = width * height; const int yuvType=(type&0x30000)>>16; // const int byteRgba=(type&0x0F000)>>12; const int rShift=(type&0x00F00)>>8; const int gShift=(type&0x000F0)>>4; const int bShift= (type&0x0000F); //const int uIndex=0; //const int vIndex=yuvType; //yuvType为1表示YUV420p,为0表示420sp int yIndex = 0; // int uvIndex[2]={frameSize,frameSize+frameSize/4}; int n = 0; int uIndex, vIndex; int uvIndexStep; if (yuvType == 0) { uIndex = frameSize; vIndex = frameSize + 1; uvIndexStep = 2; } else if (yuvType == 1) { uIndex = frameSize; vIndex = frameSize+frameSize/4; uvIndexStep = 1; } else if (yuvType == 2) { vIndex = frameSize; uIndex = frameSize + 1; uvIndexStep = 2; } else if (yuvType == 3) { vIndex = frameSize; uIndex = frameSize+frameSize/4; uvIndexStep = 1; } uint8x8_t Y_RFac = vdup_n_u8(66); uint8x8_t Y_GFac = vdup_n_u8(129); uint8x8_t Y_BFac = vdup_n_u8(25); uint8x8_t Y_Add1Fac = vdup_n_u8(128); uint8x8_t Y_Add2Fac = vdup_n_u8(16); int16x8_t U_RFac = vdupq_n_s16(-38); int16x8_t U_GFac = vdupq_n_s16(-74); int16x8_t U_BFac = vdupq_n_s16(112); int16x8_t U_AddFac = vdupq_n_s16(128); int16x8_t V_RFac = vdupq_n_s16(112); int16x8_t V_GFac = vdupq_n_s16(-94); int16x8_t V_BFac = vdupq_n_s16(-18); for (int j = 0; j < height; j++) { for (int i = 0; i < width; i += 8) { // 每次处理32字节(8像素) uint8x8x4_t a = vld4_u8(rgb + n); // 步长为4交叉存入4个向量寄存器 n += 32; // Y分量(((66 * r + 129 * g + 25 * b + 128) >> 8) + 16) uint8x8_t result; uint16x8_t temp = vmull_u8(a.val[rShift], Y_RFac); temp = vmlal_u8 (temp, a.val[gShift], Y_GFac); temp = vmlal_u8(temp, a.val[bShift], Y_BFac); temp = vaddw_u8(temp, Y_Add1Fac); result = vshrn_n_u16(temp, 8); result = vqadd_u8(result, Y_Add2Fac); vst1_u8(yuv + yIndex, result); // 从向量寄存器回存8字节Y分量(8像素) yIndex += 8; if ((j % 2 == 0) && (i % 2 == 0)) { int16_t uBuffer[8]; int16_t vBuffer[8]; // U分量(((-38 * r - 74 * g + 112 * b + 128) >> 8) + 128) int16x8_t U; int16x8_t ur = {vget_lane_u8(a.val[rShift], 0), vget_lane_u8(a.val[rShift], 1), vget_lane_u8(a.val[rShift], 2), vget_lane_u8(a.val[rShift], 3), vget_lane_u8(a.val[rShift], 4), vget_lane_u8(a.val[rShift], 5), vget_lane_u8(a.val[rShift], 6), vget_lane_u8(a.val[rShift], 7)}; int16x8_t ug = {vget_lane_u8(a.val[gShift], 0), vget_lane_u8(a.val[gShift], 1), vget_lane_u8(a.val[gShift], 2), vget_lane_u8(a.val[gShift], 3), vget_lane_u8(a.val[gShift], 4), vget_lane_u8(a.val[gShift], 5), vget_lane_u8(a.val[gShift], 6), vget_lane_u8(a.val[gShift], 7)}; int16x8_t ub = {vget_lane_u8(a.val[bShift], 0), vget_lane_u8(a.val[bShift], 1), vget_lane_u8(a.val[bShift], 2), vget_lane_u8(a.val[bShift], 3), vget_lane_u8(a.val[bShift], 4), vget_lane_u8(a.val[bShift], 5), vget_lane_u8(a.val[bShift], 6), vget_lane_u8(a.val[bShift], 7)}; U = vmulq_s16(ur, U_RFac); U = vmlaq_s16(U, ug, U_GFac); U = vmlaq_s16(U, ub, U_BFac); U = vaddq_s16(U, U_AddFac); U = vshrq_n_s16(U, 8); U = vaddq_s16(U, U_AddFac); // 范围[16 - 240] vst1q_s16(uBuffer, U); // V分量(((112 * r - 94 * g - 18 * b + 128) >> 8) + 128) int16x8_t V; V = vmulq_s16(ur, V_RFac); V = vmlaq_s16(V, ug, V_GFac); V = vmlaq_s16(V, ub, V_BFac); V = vaddq_s16(V, U_AddFac); V = vshrq_n_s16(V, 8); V = vaddq_s16(V, U_AddFac); // 范围[16 - 240] vst1q_s16(vBuffer, V); yuv[uIndex] = uBuffer[0]; uIndex += uvIndexStep; yuv[uIndex] = uBuffer[2]; uIndex += uvIndexStep; yuv[uIndex] = uBuffer[4]; uIndex += uvIndexStep; yuv[uIndex] = uBuffer[6]; uIndex += uvIndexStep; yuv[vIndex] = vBuffer[0]; vIndex += uvIndexStep; yuv[vIndex] = vBuffer[2]; vIndex += uvIndexStep; yuv[vIndex] = vBuffer[4]; vIndex += uvIndexStep; yuv[vIndex] = vBuffer[6]; vIndex += uvIndexStep; } } } }