encodemb.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512
  1. /*
  2. * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  3. *
  4. * Use of this source code is governed by a BSD-style license
  5. * that can be found in the LICENSE file in the root of the source
  6. * tree. An additional intellectual property rights grant can be found
  7. * in the file PATENTS. All contributing project authors may
  8. * be found in the AUTHORS file in the root of the source tree.
  9. */
  10. #include "./vpx_dsp_rtcd.h"
  11. #include "vpx_config.h"
  12. #include "vp8_rtcd.h"
  13. #include "encodemb.h"
  14. #include "vp8/common/reconinter.h"
  15. #include "vp8/encoder/quantize.h"
  16. #include "tokenize.h"
  17. #include "vp8/common/invtrans.h"
  18. #include "vpx_mem/vpx_mem.h"
  19. #include "rdopt.h"
  20. void vp8_subtract_b(BLOCK *be, BLOCKD *bd, int pitch) {
  21. unsigned char *src_ptr = (*(be->base_src) + be->src);
  22. short *diff_ptr = be->src_diff;
  23. unsigned char *pred_ptr = bd->predictor;
  24. int src_stride = be->src_stride;
  25. vpx_subtract_block(4, 4, diff_ptr, pitch, src_ptr, src_stride, pred_ptr,
  26. pitch);
  27. }
  28. void vp8_subtract_mbuv(short *diff, unsigned char *usrc, unsigned char *vsrc,
  29. int src_stride, unsigned char *upred,
  30. unsigned char *vpred, int pred_stride) {
  31. short *udiff = diff + 256;
  32. short *vdiff = diff + 320;
  33. vpx_subtract_block(8, 8, udiff, 8, usrc, src_stride, upred, pred_stride);
  34. vpx_subtract_block(8, 8, vdiff, 8, vsrc, src_stride, vpred, pred_stride);
  35. }
  36. void vp8_subtract_mby(short *diff, unsigned char *src, int src_stride,
  37. unsigned char *pred, int pred_stride) {
  38. vpx_subtract_block(16, 16, diff, 16, src, src_stride, pred, pred_stride);
  39. }
  40. static void vp8_subtract_mb(MACROBLOCK *x) {
  41. BLOCK *b = &x->block[0];
  42. vp8_subtract_mby(x->src_diff, *(b->base_src), b->src_stride,
  43. x->e_mbd.dst.y_buffer, x->e_mbd.dst.y_stride);
  44. vp8_subtract_mbuv(x->src_diff, x->src.u_buffer, x->src.v_buffer,
  45. x->src.uv_stride, x->e_mbd.dst.u_buffer,
  46. x->e_mbd.dst.v_buffer, x->e_mbd.dst.uv_stride);
  47. }
  48. static void build_dcblock(MACROBLOCK *x) {
  49. short *src_diff_ptr = &x->src_diff[384];
  50. int i;
  51. for (i = 0; i < 16; ++i) {
  52. src_diff_ptr[i] = x->coeff[i * 16];
  53. }
  54. }
  55. void vp8_transform_mbuv(MACROBLOCK *x) {
  56. int i;
  57. for (i = 16; i < 24; i += 2) {
  58. x->short_fdct8x4(&x->block[i].src_diff[0], &x->block[i].coeff[0], 16);
  59. }
  60. }
  61. void vp8_transform_intra_mby(MACROBLOCK *x) {
  62. int i;
  63. for (i = 0; i < 16; i += 2) {
  64. x->short_fdct8x4(&x->block[i].src_diff[0], &x->block[i].coeff[0], 32);
  65. }
  66. /* build dc block from 16 y dc values */
  67. build_dcblock(x);
  68. /* do 2nd order transform on the dc block */
  69. x->short_walsh4x4(&x->block[24].src_diff[0], &x->block[24].coeff[0], 8);
  70. }
  71. static void transform_mb(MACROBLOCK *x) {
  72. int i;
  73. for (i = 0; i < 16; i += 2) {
  74. x->short_fdct8x4(&x->block[i].src_diff[0], &x->block[i].coeff[0], 32);
  75. }
  76. /* build dc block from 16 y dc values */
  77. if (x->e_mbd.mode_info_context->mbmi.mode != SPLITMV) build_dcblock(x);
  78. for (i = 16; i < 24; i += 2) {
  79. x->short_fdct8x4(&x->block[i].src_diff[0], &x->block[i].coeff[0], 16);
  80. }
  81. /* do 2nd order transform on the dc block */
  82. if (x->e_mbd.mode_info_context->mbmi.mode != SPLITMV) {
  83. x->short_walsh4x4(&x->block[24].src_diff[0], &x->block[24].coeff[0], 8);
  84. }
  85. }
  86. static void transform_mby(MACROBLOCK *x) {
  87. int i;
  88. for (i = 0; i < 16; i += 2) {
  89. x->short_fdct8x4(&x->block[i].src_diff[0], &x->block[i].coeff[0], 32);
  90. }
  91. /* build dc block from 16 y dc values */
  92. if (x->e_mbd.mode_info_context->mbmi.mode != SPLITMV) {
  93. build_dcblock(x);
  94. x->short_walsh4x4(&x->block[24].src_diff[0], &x->block[24].coeff[0], 8);
  95. }
  96. }
  97. #define RDTRUNC(RM, DM, R, D) ((128 + (R) * (RM)) & 0xFF)
  98. typedef struct vp8_token_state vp8_token_state;
  99. struct vp8_token_state {
  100. int rate;
  101. int error;
  102. signed char next;
  103. signed char token;
  104. short qc;
  105. };
  106. /* TODO: experiments to find optimal multiple numbers */
  107. #define Y1_RD_MULT 4
  108. #define UV_RD_MULT 2
  109. #define Y2_RD_MULT 16
  110. static const int plane_rd_mult[4] = { Y1_RD_MULT, Y2_RD_MULT, UV_RD_MULT,
  111. Y1_RD_MULT };
  112. static void optimize_b(MACROBLOCK *mb, int ib, int type, ENTROPY_CONTEXT *a,
  113. ENTROPY_CONTEXT *l) {
  114. BLOCK *b;
  115. BLOCKD *d;
  116. vp8_token_state tokens[17][2];
  117. unsigned best_mask[2];
  118. const short *dequant_ptr;
  119. const short *coeff_ptr;
  120. short *qcoeff_ptr;
  121. short *dqcoeff_ptr;
  122. int eob;
  123. int i0;
  124. int rc;
  125. int x;
  126. int sz = 0;
  127. int next;
  128. int rdmult;
  129. int rddiv;
  130. int final_eob;
  131. int rd_cost0;
  132. int rd_cost1;
  133. int rate0;
  134. int rate1;
  135. int error0;
  136. int error1;
  137. int t0;
  138. int t1;
  139. int best;
  140. int band;
  141. int pt;
  142. int i;
  143. int err_mult = plane_rd_mult[type];
  144. b = &mb->block[ib];
  145. d = &mb->e_mbd.block[ib];
  146. dequant_ptr = d->dequant;
  147. coeff_ptr = b->coeff;
  148. qcoeff_ptr = d->qcoeff;
  149. dqcoeff_ptr = d->dqcoeff;
  150. i0 = !type;
  151. eob = *d->eob;
  152. /* Now set up a Viterbi trellis to evaluate alternative roundings. */
  153. rdmult = mb->rdmult * err_mult;
  154. if (mb->e_mbd.mode_info_context->mbmi.ref_frame == INTRA_FRAME) {
  155. rdmult = (rdmult * 9) >> 4;
  156. }
  157. rddiv = mb->rddiv;
  158. best_mask[0] = best_mask[1] = 0;
  159. /* Initialize the sentinel node of the trellis. */
  160. tokens[eob][0].rate = 0;
  161. tokens[eob][0].error = 0;
  162. tokens[eob][0].next = 16;
  163. tokens[eob][0].token = DCT_EOB_TOKEN;
  164. tokens[eob][0].qc = 0;
  165. *(tokens[eob] + 1) = *(tokens[eob] + 0);
  166. next = eob;
  167. for (i = eob; i-- > i0;) {
  168. int base_bits;
  169. int d2;
  170. int dx;
  171. rc = vp8_default_zig_zag1d[i];
  172. x = qcoeff_ptr[rc];
  173. /* Only add a trellis state for non-zero coefficients. */
  174. if (x) {
  175. int shortcut = 0;
  176. error0 = tokens[next][0].error;
  177. error1 = tokens[next][1].error;
  178. /* Evaluate the first possibility for this state. */
  179. rate0 = tokens[next][0].rate;
  180. rate1 = tokens[next][1].rate;
  181. t0 = (vp8_dct_value_tokens_ptr + x)->Token;
  182. /* Consider both possible successor states. */
  183. if (next < 16) {
  184. band = vp8_coef_bands[i + 1];
  185. pt = vp8_prev_token_class[t0];
  186. rate0 += mb->token_costs[type][band][pt][tokens[next][0].token];
  187. rate1 += mb->token_costs[type][band][pt][tokens[next][1].token];
  188. }
  189. rd_cost0 = RDCOST(rdmult, rddiv, rate0, error0);
  190. rd_cost1 = RDCOST(rdmult, rddiv, rate1, error1);
  191. if (rd_cost0 == rd_cost1) {
  192. rd_cost0 = RDTRUNC(rdmult, rddiv, rate0, error0);
  193. rd_cost1 = RDTRUNC(rdmult, rddiv, rate1, error1);
  194. }
  195. /* And pick the best. */
  196. best = rd_cost1 < rd_cost0;
  197. base_bits = *(vp8_dct_value_cost_ptr + x);
  198. dx = dqcoeff_ptr[rc] - coeff_ptr[rc];
  199. d2 = dx * dx;
  200. tokens[i][0].rate = base_bits + (best ? rate1 : rate0);
  201. tokens[i][0].error = d2 + (best ? error1 : error0);
  202. tokens[i][0].next = next;
  203. tokens[i][0].token = t0;
  204. tokens[i][0].qc = x;
  205. best_mask[0] |= best << i;
  206. /* Evaluate the second possibility for this state. */
  207. rate0 = tokens[next][0].rate;
  208. rate1 = tokens[next][1].rate;
  209. if ((abs(x) * dequant_ptr[rc] > abs(coeff_ptr[rc])) &&
  210. (abs(x) * dequant_ptr[rc] < abs(coeff_ptr[rc]) + dequant_ptr[rc])) {
  211. shortcut = 1;
  212. } else {
  213. shortcut = 0;
  214. }
  215. if (shortcut) {
  216. sz = -(x < 0);
  217. x -= 2 * sz + 1;
  218. }
  219. /* Consider both possible successor states. */
  220. if (!x) {
  221. /* If we reduced this coefficient to zero, check to see if
  222. * we need to move the EOB back here.
  223. */
  224. t0 =
  225. tokens[next][0].token == DCT_EOB_TOKEN ? DCT_EOB_TOKEN : ZERO_TOKEN;
  226. t1 =
  227. tokens[next][1].token == DCT_EOB_TOKEN ? DCT_EOB_TOKEN : ZERO_TOKEN;
  228. } else {
  229. t0 = t1 = (vp8_dct_value_tokens_ptr + x)->Token;
  230. }
  231. if (next < 16) {
  232. band = vp8_coef_bands[i + 1];
  233. if (t0 != DCT_EOB_TOKEN) {
  234. pt = vp8_prev_token_class[t0];
  235. rate0 += mb->token_costs[type][band][pt][tokens[next][0].token];
  236. }
  237. if (t1 != DCT_EOB_TOKEN) {
  238. pt = vp8_prev_token_class[t1];
  239. rate1 += mb->token_costs[type][band][pt][tokens[next][1].token];
  240. }
  241. }
  242. rd_cost0 = RDCOST(rdmult, rddiv, rate0, error0);
  243. rd_cost1 = RDCOST(rdmult, rddiv, rate1, error1);
  244. if (rd_cost0 == rd_cost1) {
  245. rd_cost0 = RDTRUNC(rdmult, rddiv, rate0, error0);
  246. rd_cost1 = RDTRUNC(rdmult, rddiv, rate1, error1);
  247. }
  248. /* And pick the best. */
  249. best = rd_cost1 < rd_cost0;
  250. base_bits = *(vp8_dct_value_cost_ptr + x);
  251. if (shortcut) {
  252. dx -= (dequant_ptr[rc] + sz) ^ sz;
  253. d2 = dx * dx;
  254. }
  255. tokens[i][1].rate = base_bits + (best ? rate1 : rate0);
  256. tokens[i][1].error = d2 + (best ? error1 : error0);
  257. tokens[i][1].next = next;
  258. tokens[i][1].token = best ? t1 : t0;
  259. tokens[i][1].qc = x;
  260. best_mask[1] |= best << i;
  261. /* Finally, make this the new head of the trellis. */
  262. next = i;
  263. }
  264. /* There's no choice to make for a zero coefficient, so we don't
  265. * add a new trellis node, but we do need to update the costs.
  266. */
  267. else {
  268. band = vp8_coef_bands[i + 1];
  269. t0 = tokens[next][0].token;
  270. t1 = tokens[next][1].token;
  271. /* Update the cost of each path if we're past the EOB token. */
  272. if (t0 != DCT_EOB_TOKEN) {
  273. tokens[next][0].rate += mb->token_costs[type][band][0][t0];
  274. tokens[next][0].token = ZERO_TOKEN;
  275. }
  276. if (t1 != DCT_EOB_TOKEN) {
  277. tokens[next][1].rate += mb->token_costs[type][band][0][t1];
  278. tokens[next][1].token = ZERO_TOKEN;
  279. }
  280. /* Don't update next, because we didn't add a new node. */
  281. }
  282. }
  283. /* Now pick the best path through the whole trellis. */
  284. band = vp8_coef_bands[i + 1];
  285. VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l);
  286. rate0 = tokens[next][0].rate;
  287. rate1 = tokens[next][1].rate;
  288. error0 = tokens[next][0].error;
  289. error1 = tokens[next][1].error;
  290. t0 = tokens[next][0].token;
  291. t1 = tokens[next][1].token;
  292. rate0 += mb->token_costs[type][band][pt][t0];
  293. rate1 += mb->token_costs[type][band][pt][t1];
  294. rd_cost0 = RDCOST(rdmult, rddiv, rate0, error0);
  295. rd_cost1 = RDCOST(rdmult, rddiv, rate1, error1);
  296. if (rd_cost0 == rd_cost1) {
  297. rd_cost0 = RDTRUNC(rdmult, rddiv, rate0, error0);
  298. rd_cost1 = RDTRUNC(rdmult, rddiv, rate1, error1);
  299. }
  300. best = rd_cost1 < rd_cost0;
  301. final_eob = i0 - 1;
  302. for (i = next; i < eob; i = next) {
  303. x = tokens[i][best].qc;
  304. if (x) final_eob = i;
  305. rc = vp8_default_zig_zag1d[i];
  306. qcoeff_ptr[rc] = x;
  307. dqcoeff_ptr[rc] = x * dequant_ptr[rc];
  308. next = tokens[i][best].next;
  309. best = (best_mask[best] >> i) & 1;
  310. }
  311. final_eob++;
  312. *a = *l = (final_eob != !type);
  313. *d->eob = (char)final_eob;
  314. }
  315. static void check_reset_2nd_coeffs(MACROBLOCKD *x, int type, ENTROPY_CONTEXT *a,
  316. ENTROPY_CONTEXT *l) {
  317. int sum = 0;
  318. int i;
  319. BLOCKD *bd = &x->block[24];
  320. if (bd->dequant[0] >= 35 && bd->dequant[1] >= 35) return;
  321. for (i = 0; i < (*bd->eob); ++i) {
  322. int coef = bd->dqcoeff[vp8_default_zig_zag1d[i]];
  323. sum += (coef >= 0) ? coef : -coef;
  324. if (sum >= 35) return;
  325. }
  326. /**************************************************************************
  327. our inverse hadamard transform effectively is weighted sum of all 16 inputs
  328. with weight either 1 or -1. It has a last stage scaling of (sum+3)>>3. And
  329. dc only idct is (dc+4)>>3. So if all the sums are between -35 and 29, the
  330. output after inverse wht and idct will be all zero. A sum of absolute value
  331. smaller than 35 guarantees all 16 different (+1/-1) weighted sums in wht
  332. fall between -35 and +35.
  333. **************************************************************************/
  334. if (sum < 35) {
  335. for (i = 0; i < (*bd->eob); ++i) {
  336. int rc = vp8_default_zig_zag1d[i];
  337. bd->qcoeff[rc] = 0;
  338. bd->dqcoeff[rc] = 0;
  339. }
  340. *bd->eob = 0;
  341. *a = *l = (*bd->eob != !type);
  342. }
  343. }
  344. static void optimize_mb(MACROBLOCK *x) {
  345. int b;
  346. int type;
  347. int has_2nd_order;
  348. ENTROPY_CONTEXT_PLANES t_above, t_left;
  349. ENTROPY_CONTEXT *ta;
  350. ENTROPY_CONTEXT *tl;
  351. memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
  352. memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
  353. ta = (ENTROPY_CONTEXT *)&t_above;
  354. tl = (ENTROPY_CONTEXT *)&t_left;
  355. has_2nd_order = (x->e_mbd.mode_info_context->mbmi.mode != B_PRED &&
  356. x->e_mbd.mode_info_context->mbmi.mode != SPLITMV);
  357. type = has_2nd_order ? PLANE_TYPE_Y_NO_DC : PLANE_TYPE_Y_WITH_DC;
  358. for (b = 0; b < 16; ++b) {
  359. optimize_b(x, b, type, ta + vp8_block2above[b], tl + vp8_block2left[b]);
  360. }
  361. for (b = 16; b < 24; ++b) {
  362. optimize_b(x, b, PLANE_TYPE_UV, ta + vp8_block2above[b],
  363. tl + vp8_block2left[b]);
  364. }
  365. if (has_2nd_order) {
  366. b = 24;
  367. optimize_b(x, b, PLANE_TYPE_Y2, ta + vp8_block2above[b],
  368. tl + vp8_block2left[b]);
  369. check_reset_2nd_coeffs(&x->e_mbd, PLANE_TYPE_Y2, ta + vp8_block2above[b],
  370. tl + vp8_block2left[b]);
  371. }
  372. }
  373. void vp8_optimize_mby(MACROBLOCK *x) {
  374. int b;
  375. int type;
  376. int has_2nd_order;
  377. ENTROPY_CONTEXT_PLANES t_above, t_left;
  378. ENTROPY_CONTEXT *ta;
  379. ENTROPY_CONTEXT *tl;
  380. if (!x->e_mbd.above_context) return;
  381. if (!x->e_mbd.left_context) return;
  382. memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
  383. memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
  384. ta = (ENTROPY_CONTEXT *)&t_above;
  385. tl = (ENTROPY_CONTEXT *)&t_left;
  386. has_2nd_order = (x->e_mbd.mode_info_context->mbmi.mode != B_PRED &&
  387. x->e_mbd.mode_info_context->mbmi.mode != SPLITMV);
  388. type = has_2nd_order ? PLANE_TYPE_Y_NO_DC : PLANE_TYPE_Y_WITH_DC;
  389. for (b = 0; b < 16; ++b) {
  390. optimize_b(x, b, type, ta + vp8_block2above[b], tl + vp8_block2left[b]);
  391. }
  392. if (has_2nd_order) {
  393. b = 24;
  394. optimize_b(x, b, PLANE_TYPE_Y2, ta + vp8_block2above[b],
  395. tl + vp8_block2left[b]);
  396. check_reset_2nd_coeffs(&x->e_mbd, PLANE_TYPE_Y2, ta + vp8_block2above[b],
  397. tl + vp8_block2left[b]);
  398. }
  399. }
  400. void vp8_optimize_mbuv(MACROBLOCK *x) {
  401. int b;
  402. ENTROPY_CONTEXT_PLANES t_above, t_left;
  403. ENTROPY_CONTEXT *ta;
  404. ENTROPY_CONTEXT *tl;
  405. if (!x->e_mbd.above_context) return;
  406. if (!x->e_mbd.left_context) return;
  407. memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
  408. memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
  409. ta = (ENTROPY_CONTEXT *)&t_above;
  410. tl = (ENTROPY_CONTEXT *)&t_left;
  411. for (b = 16; b < 24; ++b) {
  412. optimize_b(x, b, PLANE_TYPE_UV, ta + vp8_block2above[b],
  413. tl + vp8_block2left[b]);
  414. }
  415. }
  416. void vp8_encode_inter16x16(MACROBLOCK *x) {
  417. vp8_build_inter_predictors_mb(&x->e_mbd);
  418. vp8_subtract_mb(x);
  419. transform_mb(x);
  420. vp8_quantize_mb(x);
  421. if (x->optimize) optimize_mb(x);
  422. }
  423. /* this funciton is used by first pass only */
  424. void vp8_encode_inter16x16y(MACROBLOCK *x) {
  425. BLOCK *b = &x->block[0];
  426. vp8_build_inter16x16_predictors_mby(&x->e_mbd, x->e_mbd.dst.y_buffer,
  427. x->e_mbd.dst.y_stride);
  428. vp8_subtract_mby(x->src_diff, *(b->base_src), b->src_stride,
  429. x->e_mbd.dst.y_buffer, x->e_mbd.dst.y_stride);
  430. transform_mby(x);
  431. vp8_quantize_mby(x);
  432. vp8_inverse_transform_mby(&x->e_mbd);
  433. }