mfqe.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330
  1. /*
  2. * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
  3. *
  4. * Use of this source code is governed by a BSD-style license
  5. * that can be found in the LICENSE file in the root of the source
  6. * tree. An additional intellectual property rights grant can be found
  7. * in the file PATENTS. All contributing project authors may
  8. * be found in the AUTHORS file in the root of the source tree.
  9. */
  10. /* MFQE: Multiframe Quality Enhancement
  11. * In rate limited situations keyframes may cause significant visual artifacts
  12. * commonly referred to as "popping." This file implements a postproccesing
  13. * algorithm which blends data from the preceeding frame when there is no
  14. * motion and the q from the previous frame is lower which indicates that it is
  15. * higher quality.
  16. */
  17. #include "./vp8_rtcd.h"
  18. #include "./vpx_dsp_rtcd.h"
  19. #include "vp8/common/postproc.h"
  20. #include "vpx_dsp/variance.h"
  21. #include "vpx_mem/vpx_mem.h"
  22. #include "vpx_scale/yv12config.h"
  23. #include <limits.h>
  24. #include <stdlib.h>
  25. static void filter_by_weight(unsigned char *src, int src_stride,
  26. unsigned char *dst, int dst_stride, int block_size,
  27. int src_weight) {
  28. int dst_weight = (1 << MFQE_PRECISION) - src_weight;
  29. int rounding_bit = 1 << (MFQE_PRECISION - 1);
  30. int r, c;
  31. for (r = 0; r < block_size; ++r) {
  32. for (c = 0; c < block_size; ++c) {
  33. dst[c] = (src[c] * src_weight + dst[c] * dst_weight + rounding_bit) >>
  34. MFQE_PRECISION;
  35. }
  36. src += src_stride;
  37. dst += dst_stride;
  38. }
  39. }
  40. void vp8_filter_by_weight16x16_c(unsigned char *src, int src_stride,
  41. unsigned char *dst, int dst_stride,
  42. int src_weight) {
  43. filter_by_weight(src, src_stride, dst, dst_stride, 16, src_weight);
  44. }
  45. void vp8_filter_by_weight8x8_c(unsigned char *src, int src_stride,
  46. unsigned char *dst, int dst_stride,
  47. int src_weight) {
  48. filter_by_weight(src, src_stride, dst, dst_stride, 8, src_weight);
  49. }
  50. void vp8_filter_by_weight4x4_c(unsigned char *src, int src_stride,
  51. unsigned char *dst, int dst_stride,
  52. int src_weight) {
  53. filter_by_weight(src, src_stride, dst, dst_stride, 4, src_weight);
  54. }
  55. static void apply_ifactor(unsigned char *y_src, int y_src_stride,
  56. unsigned char *y_dst, int y_dst_stride,
  57. unsigned char *u_src, unsigned char *v_src,
  58. int uv_src_stride, unsigned char *u_dst,
  59. unsigned char *v_dst, int uv_dst_stride,
  60. int block_size, int src_weight) {
  61. if (block_size == 16) {
  62. vp8_filter_by_weight16x16(y_src, y_src_stride, y_dst, y_dst_stride,
  63. src_weight);
  64. vp8_filter_by_weight8x8(u_src, uv_src_stride, u_dst, uv_dst_stride,
  65. src_weight);
  66. vp8_filter_by_weight8x8(v_src, uv_src_stride, v_dst, uv_dst_stride,
  67. src_weight);
  68. } else /* if (block_size == 8) */
  69. {
  70. vp8_filter_by_weight8x8(y_src, y_src_stride, y_dst, y_dst_stride,
  71. src_weight);
  72. vp8_filter_by_weight4x4(u_src, uv_src_stride, u_dst, uv_dst_stride,
  73. src_weight);
  74. vp8_filter_by_weight4x4(v_src, uv_src_stride, v_dst, uv_dst_stride,
  75. src_weight);
  76. }
  77. }
  78. static unsigned int int_sqrt(unsigned int x) {
  79. unsigned int y = x;
  80. unsigned int guess;
  81. int p = 1;
  82. while (y >>= 1) p++;
  83. p >>= 1;
  84. guess = 0;
  85. while (p >= 0) {
  86. guess |= (1 << p);
  87. if (x < guess * guess) guess -= (1 << p);
  88. p--;
  89. }
  90. /* choose between guess or guess+1 */
  91. return guess + (guess * guess + guess + 1 <= x);
  92. }
  93. #define USE_SSD
  94. static void multiframe_quality_enhance_block(
  95. int blksize, /* Currently only values supported are 16, 8 */
  96. int qcurr, int qprev, unsigned char *y, unsigned char *u, unsigned char *v,
  97. int y_stride, int uv_stride, unsigned char *yd, unsigned char *ud,
  98. unsigned char *vd, int yd_stride, int uvd_stride) {
  99. static const unsigned char VP8_ZEROS[16] = { 0, 0, 0, 0, 0, 0, 0, 0,
  100. 0, 0, 0, 0, 0, 0, 0, 0 };
  101. int uvblksize = blksize >> 1;
  102. int qdiff = qcurr - qprev;
  103. int i;
  104. unsigned char *up;
  105. unsigned char *udp;
  106. unsigned char *vp;
  107. unsigned char *vdp;
  108. unsigned int act, actd, sad, usad, vsad, sse, thr, thrsq, actrisk;
  109. if (blksize == 16) {
  110. actd = (vpx_variance16x16(yd, yd_stride, VP8_ZEROS, 0, &sse) + 128) >> 8;
  111. act = (vpx_variance16x16(y, y_stride, VP8_ZEROS, 0, &sse) + 128) >> 8;
  112. #ifdef USE_SSD
  113. vpx_variance16x16(y, y_stride, yd, yd_stride, &sse);
  114. sad = (sse + 128) >> 8;
  115. vpx_variance8x8(u, uv_stride, ud, uvd_stride, &sse);
  116. usad = (sse + 32) >> 6;
  117. vpx_variance8x8(v, uv_stride, vd, uvd_stride, &sse);
  118. vsad = (sse + 32) >> 6;
  119. #else
  120. sad = (vpx_sad16x16(y, y_stride, yd, yd_stride) + 128) >> 8;
  121. usad = (vpx_sad8x8(u, uv_stride, ud, uvd_stride) + 32) >> 6;
  122. vsad = (vpx_sad8x8(v, uv_stride, vd, uvd_stride) + 32) >> 6;
  123. #endif
  124. } else /* if (blksize == 8) */
  125. {
  126. actd = (vpx_variance8x8(yd, yd_stride, VP8_ZEROS, 0, &sse) + 32) >> 6;
  127. act = (vpx_variance8x8(y, y_stride, VP8_ZEROS, 0, &sse) + 32) >> 6;
  128. #ifdef USE_SSD
  129. vpx_variance8x8(y, y_stride, yd, yd_stride, &sse);
  130. sad = (sse + 32) >> 6;
  131. vpx_variance4x4(u, uv_stride, ud, uvd_stride, &sse);
  132. usad = (sse + 8) >> 4;
  133. vpx_variance4x4(v, uv_stride, vd, uvd_stride, &sse);
  134. vsad = (sse + 8) >> 4;
  135. #else
  136. sad = (vpx_sad8x8(y, y_stride, yd, yd_stride) + 32) >> 6;
  137. usad = (vpx_sad4x4(u, uv_stride, ud, uvd_stride) + 8) >> 4;
  138. vsad = (vpx_sad4x4(v, uv_stride, vd, uvd_stride) + 8) >> 4;
  139. #endif
  140. }
  141. actrisk = (actd > act * 5);
  142. /* thr = qdiff/16 + log2(act) + log4(qprev) */
  143. thr = (qdiff >> 4);
  144. while (actd >>= 1) thr++;
  145. while (qprev >>= 2) thr++;
  146. #ifdef USE_SSD
  147. thrsq = thr * thr;
  148. if (sad < thrsq &&
  149. /* additional checks for color mismatch and excessive addition of
  150. * high-frequencies */
  151. 4 * usad < thrsq && 4 * vsad < thrsq && !actrisk)
  152. #else
  153. if (sad < thr &&
  154. /* additional checks for color mismatch and excessive addition of
  155. * high-frequencies */
  156. 2 * usad < thr && 2 * vsad < thr && !actrisk)
  157. #endif
  158. {
  159. int ifactor;
  160. #ifdef USE_SSD
  161. /* TODO: optimize this later to not need sqr root */
  162. sad = int_sqrt(sad);
  163. #endif
  164. ifactor = (sad << MFQE_PRECISION) / thr;
  165. ifactor >>= (qdiff >> 5);
  166. if (ifactor) {
  167. apply_ifactor(y, y_stride, yd, yd_stride, u, v, uv_stride, ud, vd,
  168. uvd_stride, blksize, ifactor);
  169. }
  170. } else /* else implicitly copy from previous frame */
  171. {
  172. if (blksize == 16) {
  173. vp8_copy_mem16x16(y, y_stride, yd, yd_stride);
  174. vp8_copy_mem8x8(u, uv_stride, ud, uvd_stride);
  175. vp8_copy_mem8x8(v, uv_stride, vd, uvd_stride);
  176. } else /* if (blksize == 8) */
  177. {
  178. vp8_copy_mem8x8(y, y_stride, yd, yd_stride);
  179. for (up = u, udp = ud, i = 0; i < uvblksize;
  180. ++i, up += uv_stride, udp += uvd_stride) {
  181. memcpy(udp, up, uvblksize);
  182. }
  183. for (vp = v, vdp = vd, i = 0; i < uvblksize;
  184. ++i, vp += uv_stride, vdp += uvd_stride) {
  185. memcpy(vdp, vp, uvblksize);
  186. }
  187. }
  188. }
  189. }
  190. static int qualify_inter_mb(const MODE_INFO *mode_info_context, int *map) {
  191. if (mode_info_context->mbmi.mb_skip_coeff) {
  192. map[0] = map[1] = map[2] = map[3] = 1;
  193. } else if (mode_info_context->mbmi.mode == SPLITMV) {
  194. static int ndx[4][4] = {
  195. { 0, 1, 4, 5 }, { 2, 3, 6, 7 }, { 8, 9, 12, 13 }, { 10, 11, 14, 15 }
  196. };
  197. int i, j;
  198. for (i = 0; i < 4; ++i) {
  199. map[i] = 1;
  200. for (j = 0; j < 4 && map[j]; ++j) {
  201. map[i] &= (mode_info_context->bmi[ndx[i][j]].mv.as_mv.row <= 2 &&
  202. mode_info_context->bmi[ndx[i][j]].mv.as_mv.col <= 2);
  203. }
  204. }
  205. } else {
  206. map[0] = map[1] = map[2] = map[3] =
  207. (mode_info_context->mbmi.mode > B_PRED &&
  208. abs(mode_info_context->mbmi.mv.as_mv.row) <= 2 &&
  209. abs(mode_info_context->mbmi.mv.as_mv.col) <= 2);
  210. }
  211. return (map[0] + map[1] + map[2] + map[3]);
  212. }
  213. void vp8_multiframe_quality_enhance(VP8_COMMON *cm) {
  214. YV12_BUFFER_CONFIG *show = cm->frame_to_show;
  215. YV12_BUFFER_CONFIG *dest = &cm->post_proc_buffer;
  216. FRAME_TYPE frame_type = cm->frame_type;
  217. /* Point at base of Mb MODE_INFO list has motion vectors etc */
  218. const MODE_INFO *mode_info_context = cm->show_frame_mi;
  219. int mb_row;
  220. int mb_col;
  221. int totmap, map[4];
  222. int qcurr = cm->base_qindex;
  223. int qprev = cm->postproc_state.last_base_qindex;
  224. unsigned char *y_ptr, *u_ptr, *v_ptr;
  225. unsigned char *yd_ptr, *ud_ptr, *vd_ptr;
  226. /* Set up the buffer pointers */
  227. y_ptr = show->y_buffer;
  228. u_ptr = show->u_buffer;
  229. v_ptr = show->v_buffer;
  230. yd_ptr = dest->y_buffer;
  231. ud_ptr = dest->u_buffer;
  232. vd_ptr = dest->v_buffer;
  233. /* postprocess each macro block */
  234. for (mb_row = 0; mb_row < cm->mb_rows; ++mb_row) {
  235. for (mb_col = 0; mb_col < cm->mb_cols; ++mb_col) {
  236. /* if motion is high there will likely be no benefit */
  237. if (frame_type == INTER_FRAME) {
  238. totmap = qualify_inter_mb(mode_info_context, map);
  239. } else {
  240. totmap = (frame_type == KEY_FRAME ? 4 : 0);
  241. }
  242. if (totmap) {
  243. if (totmap < 4) {
  244. int i, j;
  245. for (i = 0; i < 2; ++i) {
  246. for (j = 0; j < 2; ++j) {
  247. if (map[i * 2 + j]) {
  248. multiframe_quality_enhance_block(
  249. 8, qcurr, qprev, y_ptr + 8 * (i * show->y_stride + j),
  250. u_ptr + 4 * (i * show->uv_stride + j),
  251. v_ptr + 4 * (i * show->uv_stride + j), show->y_stride,
  252. show->uv_stride, yd_ptr + 8 * (i * dest->y_stride + j),
  253. ud_ptr + 4 * (i * dest->uv_stride + j),
  254. vd_ptr + 4 * (i * dest->uv_stride + j), dest->y_stride,
  255. dest->uv_stride);
  256. } else {
  257. /* copy a 8x8 block */
  258. int k;
  259. unsigned char *up = u_ptr + 4 * (i * show->uv_stride + j);
  260. unsigned char *udp = ud_ptr + 4 * (i * dest->uv_stride + j);
  261. unsigned char *vp = v_ptr + 4 * (i * show->uv_stride + j);
  262. unsigned char *vdp = vd_ptr + 4 * (i * dest->uv_stride + j);
  263. vp8_copy_mem8x8(
  264. y_ptr + 8 * (i * show->y_stride + j), show->y_stride,
  265. yd_ptr + 8 * (i * dest->y_stride + j), dest->y_stride);
  266. for (k = 0; k < 4; ++k, up += show->uv_stride,
  267. udp += dest->uv_stride, vp += show->uv_stride,
  268. vdp += dest->uv_stride) {
  269. memcpy(udp, up, 4);
  270. memcpy(vdp, vp, 4);
  271. }
  272. }
  273. }
  274. }
  275. } else /* totmap = 4 */
  276. {
  277. multiframe_quality_enhance_block(
  278. 16, qcurr, qprev, y_ptr, u_ptr, v_ptr, show->y_stride,
  279. show->uv_stride, yd_ptr, ud_ptr, vd_ptr, dest->y_stride,
  280. dest->uv_stride);
  281. }
  282. } else {
  283. vp8_copy_mem16x16(y_ptr, show->y_stride, yd_ptr, dest->y_stride);
  284. vp8_copy_mem8x8(u_ptr, show->uv_stride, ud_ptr, dest->uv_stride);
  285. vp8_copy_mem8x8(v_ptr, show->uv_stride, vd_ptr, dest->uv_stride);
  286. }
  287. y_ptr += 16;
  288. u_ptr += 8;
  289. v_ptr += 8;
  290. yd_ptr += 16;
  291. ud_ptr += 8;
  292. vd_ptr += 8;
  293. mode_info_context++; /* step to next MB */
  294. }
  295. y_ptr += show->y_stride * 16 - 16 * cm->mb_cols;
  296. u_ptr += show->uv_stride * 8 - 8 * cm->mb_cols;
  297. v_ptr += show->uv_stride * 8 - 8 * cm->mb_cols;
  298. yd_ptr += dest->y_stride * 16 - 16 * cm->mb_cols;
  299. ud_ptr += dest->uv_stride * 8 - 8 * cm->mb_cols;
  300. vd_ptr += dest->uv_stride * 8 - 8 * cm->mb_cols;
  301. mode_info_context++; /* Skip border mb */
  302. }
  303. }