threading.c 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858
  1. /*
  2. * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  3. *
  4. * Use of this source code is governed by a BSD-style license
  5. * that can be found in the LICENSE file in the root of the source
  6. * tree. An additional intellectual property rights grant can be found
  7. * in the file PATENTS. All contributing project authors may
  8. * be found in the AUTHORS file in the root of the source tree.
  9. */
  10. #include "vpx_config.h"
  11. #include "vp8_rtcd.h"
  12. #if !defined(WIN32) && CONFIG_OS_SUPPORT == 1
  13. #include <unistd.h>
  14. #endif
  15. #include "onyxd_int.h"
  16. #include "vpx_mem/vpx_mem.h"
  17. #include "vp8/common/threading.h"
  18. #include "vp8/common/loopfilter.h"
  19. #include "vp8/common/extend.h"
  20. #include "vpx_ports/vpx_timer.h"
  21. #include "detokenize.h"
  22. #include "vp8/common/reconintra4x4.h"
  23. #include "vp8/common/reconinter.h"
  24. #include "vp8/common/reconintra.h"
  25. #include "vp8/common/setupintrarecon.h"
  26. #if CONFIG_ERROR_CONCEALMENT
  27. #include "error_concealment.h"
  28. #endif
  29. #define CALLOC_ARRAY(p, n) CHECK_MEM_ERROR((p), vpx_calloc(sizeof(*(p)), (n)))
  30. #define CALLOC_ARRAY_ALIGNED(p, n, algn) \
  31. do { \
  32. CHECK_MEM_ERROR((p), vpx_memalign((algn), sizeof(*(p)) * (n))); \
  33. memset((p), 0, (n) * sizeof(*(p))); \
  34. } while (0)
  35. void vp8_mb_init_dequantizer(VP8D_COMP *pbi, MACROBLOCKD *xd);
  36. static void setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd,
  37. MB_ROW_DEC *mbrd, int count) {
  38. VP8_COMMON *const pc = &pbi->common;
  39. int i;
  40. for (i = 0; i < count; ++i) {
  41. MACROBLOCKD *mbd = &mbrd[i].mbd;
  42. mbd->subpixel_predict = xd->subpixel_predict;
  43. mbd->subpixel_predict8x4 = xd->subpixel_predict8x4;
  44. mbd->subpixel_predict8x8 = xd->subpixel_predict8x8;
  45. mbd->subpixel_predict16x16 = xd->subpixel_predict16x16;
  46. mbd->mode_info_context = pc->mi + pc->mode_info_stride * (i + 1);
  47. mbd->mode_info_stride = pc->mode_info_stride;
  48. mbd->frame_type = pc->frame_type;
  49. mbd->pre = xd->pre;
  50. mbd->dst = xd->dst;
  51. mbd->segmentation_enabled = xd->segmentation_enabled;
  52. mbd->mb_segement_abs_delta = xd->mb_segement_abs_delta;
  53. memcpy(mbd->segment_feature_data, xd->segment_feature_data,
  54. sizeof(xd->segment_feature_data));
  55. /*signed char ref_lf_deltas[MAX_REF_LF_DELTAS];*/
  56. memcpy(mbd->ref_lf_deltas, xd->ref_lf_deltas, sizeof(xd->ref_lf_deltas));
  57. /*signed char mode_lf_deltas[MAX_MODE_LF_DELTAS];*/
  58. memcpy(mbd->mode_lf_deltas, xd->mode_lf_deltas, sizeof(xd->mode_lf_deltas));
  59. /*unsigned char mode_ref_lf_delta_enabled;
  60. unsigned char mode_ref_lf_delta_update;*/
  61. mbd->mode_ref_lf_delta_enabled = xd->mode_ref_lf_delta_enabled;
  62. mbd->mode_ref_lf_delta_update = xd->mode_ref_lf_delta_update;
  63. mbd->current_bc = &pbi->mbc[0];
  64. memcpy(mbd->dequant_y1_dc, xd->dequant_y1_dc, sizeof(xd->dequant_y1_dc));
  65. memcpy(mbd->dequant_y1, xd->dequant_y1, sizeof(xd->dequant_y1));
  66. memcpy(mbd->dequant_y2, xd->dequant_y2, sizeof(xd->dequant_y2));
  67. memcpy(mbd->dequant_uv, xd->dequant_uv, sizeof(xd->dequant_uv));
  68. mbd->fullpixel_mask = 0xffffffff;
  69. if (pc->full_pixel) mbd->fullpixel_mask = 0xfffffff8;
  70. }
  71. for (i = 0; i < pc->mb_rows; ++i) pbi->mt_current_mb_col[i] = -1;
  72. }
  73. static void mt_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
  74. unsigned int mb_idx) {
  75. MB_PREDICTION_MODE mode;
  76. int i;
  77. #if CONFIG_ERROR_CONCEALMENT
  78. int corruption_detected = 0;
  79. #else
  80. (void)mb_idx;
  81. #endif
  82. if (xd->mode_info_context->mbmi.mb_skip_coeff) {
  83. vp8_reset_mb_tokens_context(xd);
  84. } else if (!vp8dx_bool_error(xd->current_bc)) {
  85. int eobtotal;
  86. eobtotal = vp8_decode_mb_tokens(pbi, xd);
  87. /* Special case: Force the loopfilter to skip when eobtotal is zero */
  88. xd->mode_info_context->mbmi.mb_skip_coeff = (eobtotal == 0);
  89. }
  90. mode = xd->mode_info_context->mbmi.mode;
  91. if (xd->segmentation_enabled) vp8_mb_init_dequantizer(pbi, xd);
  92. #if CONFIG_ERROR_CONCEALMENT
  93. if (pbi->ec_active) {
  94. int throw_residual;
  95. /* When we have independent partitions we can apply residual even
  96. * though other partitions within the frame are corrupt.
  97. */
  98. throw_residual =
  99. (!pbi->independent_partitions && pbi->frame_corrupt_residual);
  100. throw_residual = (throw_residual || vp8dx_bool_error(xd->current_bc));
  101. if ((mb_idx >= pbi->mvs_corrupt_from_mb || throw_residual)) {
  102. /* MB with corrupt residuals or corrupt mode/motion vectors.
  103. * Better to use the predictor as reconstruction.
  104. */
  105. pbi->frame_corrupt_residual = 1;
  106. memset(xd->qcoeff, 0, sizeof(xd->qcoeff));
  107. corruption_detected = 1;
  108. /* force idct to be skipped for B_PRED and use the
  109. * prediction only for reconstruction
  110. * */
  111. memset(xd->eobs, 0, 25);
  112. }
  113. }
  114. #endif
  115. /* do prediction */
  116. if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) {
  117. vp8_build_intra_predictors_mbuv_s(
  118. xd, xd->recon_above[1], xd->recon_above[2], xd->recon_left[1],
  119. xd->recon_left[2], xd->recon_left_stride[1], xd->dst.u_buffer,
  120. xd->dst.v_buffer, xd->dst.uv_stride);
  121. if (mode != B_PRED) {
  122. vp8_build_intra_predictors_mby_s(
  123. xd, xd->recon_above[0], xd->recon_left[0], xd->recon_left_stride[0],
  124. xd->dst.y_buffer, xd->dst.y_stride);
  125. } else {
  126. short *DQC = xd->dequant_y1;
  127. int dst_stride = xd->dst.y_stride;
  128. /* clear out residual eob info */
  129. if (xd->mode_info_context->mbmi.mb_skip_coeff) memset(xd->eobs, 0, 25);
  130. intra_prediction_down_copy(xd, xd->recon_above[0] + 16);
  131. for (i = 0; i < 16; ++i) {
  132. BLOCKD *b = &xd->block[i];
  133. unsigned char *dst = xd->dst.y_buffer + b->offset;
  134. B_PREDICTION_MODE b_mode = xd->mode_info_context->bmi[i].as_mode;
  135. unsigned char *Above;
  136. unsigned char *yleft;
  137. int left_stride;
  138. unsigned char top_left;
  139. /*Caution: For some b_mode, it needs 8 pixels (4 above + 4
  140. * above-right).*/
  141. if (i < 4 && pbi->common.filter_level) {
  142. Above = xd->recon_above[0] + b->offset;
  143. } else {
  144. Above = dst - dst_stride;
  145. }
  146. if (i % 4 == 0 && pbi->common.filter_level) {
  147. yleft = xd->recon_left[0] + i;
  148. left_stride = 1;
  149. } else {
  150. yleft = dst - 1;
  151. left_stride = dst_stride;
  152. }
  153. if ((i == 4 || i == 8 || i == 12) && pbi->common.filter_level) {
  154. top_left = *(xd->recon_left[0] + i - 1);
  155. } else {
  156. top_left = Above[-1];
  157. }
  158. vp8_intra4x4_predict(Above, yleft, left_stride, b_mode, dst, dst_stride,
  159. top_left);
  160. if (xd->eobs[i]) {
  161. if (xd->eobs[i] > 1) {
  162. vp8_dequant_idct_add(b->qcoeff, DQC, dst, dst_stride);
  163. } else {
  164. vp8_dc_only_idct_add(b->qcoeff[0] * DQC[0], dst, dst_stride, dst,
  165. dst_stride);
  166. memset(b->qcoeff, 0, 2 * sizeof(b->qcoeff[0]));
  167. }
  168. }
  169. }
  170. }
  171. } else {
  172. vp8_build_inter_predictors_mb(xd);
  173. }
  174. #if CONFIG_ERROR_CONCEALMENT
  175. if (corruption_detected) {
  176. return;
  177. }
  178. #endif
  179. if (!xd->mode_info_context->mbmi.mb_skip_coeff) {
  180. /* dequantization and idct */
  181. if (mode != B_PRED) {
  182. short *DQC = xd->dequant_y1;
  183. if (mode != SPLITMV) {
  184. BLOCKD *b = &xd->block[24];
  185. /* do 2nd order transform on the dc block */
  186. if (xd->eobs[24] > 1) {
  187. vp8_dequantize_b(b, xd->dequant_y2);
  188. vp8_short_inv_walsh4x4(&b->dqcoeff[0], xd->qcoeff);
  189. memset(b->qcoeff, 0, 16 * sizeof(b->qcoeff[0]));
  190. } else {
  191. b->dqcoeff[0] = b->qcoeff[0] * xd->dequant_y2[0];
  192. vp8_short_inv_walsh4x4_1(&b->dqcoeff[0], xd->qcoeff);
  193. memset(b->qcoeff, 0, 2 * sizeof(b->qcoeff[0]));
  194. }
  195. /* override the dc dequant constant in order to preserve the
  196. * dc components
  197. */
  198. DQC = xd->dequant_y1_dc;
  199. }
  200. vp8_dequant_idct_add_y_block(xd->qcoeff, DQC, xd->dst.y_buffer,
  201. xd->dst.y_stride, xd->eobs);
  202. }
  203. vp8_dequant_idct_add_uv_block(xd->qcoeff + 16 * 16, xd->dequant_uv,
  204. xd->dst.u_buffer, xd->dst.v_buffer,
  205. xd->dst.uv_stride, xd->eobs + 16);
  206. }
  207. }
  208. static void mt_decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd,
  209. int start_mb_row) {
  210. volatile const int *last_row_current_mb_col;
  211. volatile int *current_mb_col;
  212. int mb_row;
  213. VP8_COMMON *pc = &pbi->common;
  214. const int nsync = pbi->sync_range;
  215. const int first_row_no_sync_above = pc->mb_cols + nsync;
  216. int num_part = 1 << pbi->common.multi_token_partition;
  217. int last_mb_row = start_mb_row;
  218. YV12_BUFFER_CONFIG *yv12_fb_new = pbi->dec_fb_ref[INTRA_FRAME];
  219. YV12_BUFFER_CONFIG *yv12_fb_lst = pbi->dec_fb_ref[LAST_FRAME];
  220. int recon_y_stride = yv12_fb_new->y_stride;
  221. int recon_uv_stride = yv12_fb_new->uv_stride;
  222. unsigned char *ref_buffer[MAX_REF_FRAMES][3];
  223. unsigned char *dst_buffer[3];
  224. int i;
  225. int ref_fb_corrupted[MAX_REF_FRAMES];
  226. ref_fb_corrupted[INTRA_FRAME] = 0;
  227. for (i = 1; i < MAX_REF_FRAMES; ++i) {
  228. YV12_BUFFER_CONFIG *this_fb = pbi->dec_fb_ref[i];
  229. ref_buffer[i][0] = this_fb->y_buffer;
  230. ref_buffer[i][1] = this_fb->u_buffer;
  231. ref_buffer[i][2] = this_fb->v_buffer;
  232. ref_fb_corrupted[i] = this_fb->corrupted;
  233. }
  234. dst_buffer[0] = yv12_fb_new->y_buffer;
  235. dst_buffer[1] = yv12_fb_new->u_buffer;
  236. dst_buffer[2] = yv12_fb_new->v_buffer;
  237. xd->up_available = (start_mb_row != 0);
  238. for (mb_row = start_mb_row; mb_row < pc->mb_rows;
  239. mb_row += (pbi->decoding_thread_count + 1)) {
  240. int recon_yoffset, recon_uvoffset;
  241. int mb_col;
  242. int filter_level;
  243. loop_filter_info_n *lfi_n = &pc->lf_info;
  244. /* save last row processed by this thread */
  245. last_mb_row = mb_row;
  246. /* select bool coder for current partition */
  247. xd->current_bc = &pbi->mbc[mb_row % num_part];
  248. if (mb_row > 0) {
  249. last_row_current_mb_col = &pbi->mt_current_mb_col[mb_row - 1];
  250. } else {
  251. last_row_current_mb_col = &first_row_no_sync_above;
  252. }
  253. current_mb_col = &pbi->mt_current_mb_col[mb_row];
  254. recon_yoffset = mb_row * recon_y_stride * 16;
  255. recon_uvoffset = mb_row * recon_uv_stride * 8;
  256. /* reset contexts */
  257. xd->above_context = pc->above_context;
  258. memset(xd->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES));
  259. xd->left_available = 0;
  260. xd->mb_to_top_edge = -((mb_row * 16)) << 3;
  261. xd->mb_to_bottom_edge = ((pc->mb_rows - 1 - mb_row) * 16) << 3;
  262. if (pbi->common.filter_level) {
  263. xd->recon_above[0] = pbi->mt_yabove_row[mb_row] + 0 * 16 + 32;
  264. xd->recon_above[1] = pbi->mt_uabove_row[mb_row] + 0 * 8 + 16;
  265. xd->recon_above[2] = pbi->mt_vabove_row[mb_row] + 0 * 8 + 16;
  266. xd->recon_left[0] = pbi->mt_yleft_col[mb_row];
  267. xd->recon_left[1] = pbi->mt_uleft_col[mb_row];
  268. xd->recon_left[2] = pbi->mt_vleft_col[mb_row];
  269. /* TODO: move to outside row loop */
  270. xd->recon_left_stride[0] = 1;
  271. xd->recon_left_stride[1] = 1;
  272. } else {
  273. xd->recon_above[0] = dst_buffer[0] + recon_yoffset;
  274. xd->recon_above[1] = dst_buffer[1] + recon_uvoffset;
  275. xd->recon_above[2] = dst_buffer[2] + recon_uvoffset;
  276. xd->recon_left[0] = xd->recon_above[0] - 1;
  277. xd->recon_left[1] = xd->recon_above[1] - 1;
  278. xd->recon_left[2] = xd->recon_above[2] - 1;
  279. xd->recon_above[0] -= xd->dst.y_stride;
  280. xd->recon_above[1] -= xd->dst.uv_stride;
  281. xd->recon_above[2] -= xd->dst.uv_stride;
  282. /* TODO: move to outside row loop */
  283. xd->recon_left_stride[0] = xd->dst.y_stride;
  284. xd->recon_left_stride[1] = xd->dst.uv_stride;
  285. setup_intra_recon_left(xd->recon_left[0], xd->recon_left[1],
  286. xd->recon_left[2], xd->dst.y_stride,
  287. xd->dst.uv_stride);
  288. }
  289. for (mb_col = 0; mb_col < pc->mb_cols; mb_col++) {
  290. *current_mb_col = mb_col - 1;
  291. if ((mb_col & (nsync - 1)) == 0) {
  292. while (mb_col > (*last_row_current_mb_col - nsync)) {
  293. x86_pause_hint();
  294. thread_sleep(1);
  295. }
  296. }
  297. /* Distance of MB to the various image edges.
  298. * These are specified to 8th pel as they are always
  299. * compared to values that are in 1/8th pel units.
  300. */
  301. xd->mb_to_left_edge = -((mb_col * 16) << 3);
  302. xd->mb_to_right_edge = ((pc->mb_cols - 1 - mb_col) * 16) << 3;
  303. #if CONFIG_ERROR_CONCEALMENT
  304. {
  305. int corrupt_residual =
  306. (!pbi->independent_partitions && pbi->frame_corrupt_residual) ||
  307. vp8dx_bool_error(xd->current_bc);
  308. if (pbi->ec_active &&
  309. (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) &&
  310. corrupt_residual) {
  311. /* We have an intra block with corrupt
  312. * coefficients, better to conceal with an inter
  313. * block.
  314. * Interpolate MVs from neighboring MBs
  315. *
  316. * Note that for the first mb with corrupt
  317. * residual in a frame, we might not discover
  318. * that before decoding the residual. That
  319. * happens after this check, and therefore no
  320. * inter concealment will be done.
  321. */
  322. vp8_interpolate_motion(xd, mb_row, mb_col, pc->mb_rows, pc->mb_cols);
  323. }
  324. }
  325. #endif
  326. xd->dst.y_buffer = dst_buffer[0] + recon_yoffset;
  327. xd->dst.u_buffer = dst_buffer[1] + recon_uvoffset;
  328. xd->dst.v_buffer = dst_buffer[2] + recon_uvoffset;
  329. xd->pre.y_buffer =
  330. ref_buffer[xd->mode_info_context->mbmi.ref_frame][0] + recon_yoffset;
  331. xd->pre.u_buffer =
  332. ref_buffer[xd->mode_info_context->mbmi.ref_frame][1] + recon_uvoffset;
  333. xd->pre.v_buffer =
  334. ref_buffer[xd->mode_info_context->mbmi.ref_frame][2] + recon_uvoffset;
  335. /* propagate errors from reference frames */
  336. xd->corrupted |= ref_fb_corrupted[xd->mode_info_context->mbmi.ref_frame];
  337. mt_decode_macroblock(pbi, xd, 0);
  338. xd->left_available = 1;
  339. /* check if the boolean decoder has suffered an error */
  340. xd->corrupted |= vp8dx_bool_error(xd->current_bc);
  341. xd->recon_above[0] += 16;
  342. xd->recon_above[1] += 8;
  343. xd->recon_above[2] += 8;
  344. if (!pbi->common.filter_level) {
  345. xd->recon_left[0] += 16;
  346. xd->recon_left[1] += 8;
  347. xd->recon_left[2] += 8;
  348. }
  349. if (pbi->common.filter_level) {
  350. int skip_lf = (xd->mode_info_context->mbmi.mode != B_PRED &&
  351. xd->mode_info_context->mbmi.mode != SPLITMV &&
  352. xd->mode_info_context->mbmi.mb_skip_coeff);
  353. const int mode_index =
  354. lfi_n->mode_lf_lut[xd->mode_info_context->mbmi.mode];
  355. const int seg = xd->mode_info_context->mbmi.segment_id;
  356. const int ref_frame = xd->mode_info_context->mbmi.ref_frame;
  357. filter_level = lfi_n->lvl[seg][ref_frame][mode_index];
  358. if (mb_row != pc->mb_rows - 1) {
  359. /* Save decoded MB last row data for next-row decoding */
  360. memcpy((pbi->mt_yabove_row[mb_row + 1] + 32 + mb_col * 16),
  361. (xd->dst.y_buffer + 15 * recon_y_stride), 16);
  362. memcpy((pbi->mt_uabove_row[mb_row + 1] + 16 + mb_col * 8),
  363. (xd->dst.u_buffer + 7 * recon_uv_stride), 8);
  364. memcpy((pbi->mt_vabove_row[mb_row + 1] + 16 + mb_col * 8),
  365. (xd->dst.v_buffer + 7 * recon_uv_stride), 8);
  366. }
  367. /* save left_col for next MB decoding */
  368. if (mb_col != pc->mb_cols - 1) {
  369. MODE_INFO *next = xd->mode_info_context + 1;
  370. if (next->mbmi.ref_frame == INTRA_FRAME) {
  371. for (i = 0; i < 16; ++i) {
  372. pbi->mt_yleft_col[mb_row][i] =
  373. xd->dst.y_buffer[i * recon_y_stride + 15];
  374. }
  375. for (i = 0; i < 8; ++i) {
  376. pbi->mt_uleft_col[mb_row][i] =
  377. xd->dst.u_buffer[i * recon_uv_stride + 7];
  378. pbi->mt_vleft_col[mb_row][i] =
  379. xd->dst.v_buffer[i * recon_uv_stride + 7];
  380. }
  381. }
  382. }
  383. /* loopfilter on this macroblock. */
  384. if (filter_level) {
  385. if (pc->filter_type == NORMAL_LOOPFILTER) {
  386. loop_filter_info lfi;
  387. FRAME_TYPE frame_type = pc->frame_type;
  388. const int hev_index = lfi_n->hev_thr_lut[frame_type][filter_level];
  389. lfi.mblim = lfi_n->mblim[filter_level];
  390. lfi.blim = lfi_n->blim[filter_level];
  391. lfi.lim = lfi_n->lim[filter_level];
  392. lfi.hev_thr = lfi_n->hev_thr[hev_index];
  393. if (mb_col > 0)
  394. vp8_loop_filter_mbv(xd->dst.y_buffer, xd->dst.u_buffer,
  395. xd->dst.v_buffer, recon_y_stride,
  396. recon_uv_stride, &lfi);
  397. if (!skip_lf)
  398. vp8_loop_filter_bv(xd->dst.y_buffer, xd->dst.u_buffer,
  399. xd->dst.v_buffer, recon_y_stride,
  400. recon_uv_stride, &lfi);
  401. /* don't apply across umv border */
  402. if (mb_row > 0)
  403. vp8_loop_filter_mbh(xd->dst.y_buffer, xd->dst.u_buffer,
  404. xd->dst.v_buffer, recon_y_stride,
  405. recon_uv_stride, &lfi);
  406. if (!skip_lf)
  407. vp8_loop_filter_bh(xd->dst.y_buffer, xd->dst.u_buffer,
  408. xd->dst.v_buffer, recon_y_stride,
  409. recon_uv_stride, &lfi);
  410. } else {
  411. if (mb_col > 0)
  412. vp8_loop_filter_simple_mbv(xd->dst.y_buffer, recon_y_stride,
  413. lfi_n->mblim[filter_level]);
  414. if (!skip_lf)
  415. vp8_loop_filter_simple_bv(xd->dst.y_buffer, recon_y_stride,
  416. lfi_n->blim[filter_level]);
  417. /* don't apply across umv border */
  418. if (mb_row > 0)
  419. vp8_loop_filter_simple_mbh(xd->dst.y_buffer, recon_y_stride,
  420. lfi_n->mblim[filter_level]);
  421. if (!skip_lf)
  422. vp8_loop_filter_simple_bh(xd->dst.y_buffer, recon_y_stride,
  423. lfi_n->blim[filter_level]);
  424. }
  425. }
  426. }
  427. recon_yoffset += 16;
  428. recon_uvoffset += 8;
  429. ++xd->mode_info_context; /* next mb */
  430. xd->above_context++;
  431. }
  432. /* adjust to the next row of mbs */
  433. if (pbi->common.filter_level) {
  434. if (mb_row != pc->mb_rows - 1) {
  435. int lasty = yv12_fb_lst->y_width + VP8BORDERINPIXELS;
  436. int lastuv = (yv12_fb_lst->y_width >> 1) + (VP8BORDERINPIXELS >> 1);
  437. for (i = 0; i < 4; ++i) {
  438. pbi->mt_yabove_row[mb_row + 1][lasty + i] =
  439. pbi->mt_yabove_row[mb_row + 1][lasty - 1];
  440. pbi->mt_uabove_row[mb_row + 1][lastuv + i] =
  441. pbi->mt_uabove_row[mb_row + 1][lastuv - 1];
  442. pbi->mt_vabove_row[mb_row + 1][lastuv + i] =
  443. pbi->mt_vabove_row[mb_row + 1][lastuv - 1];
  444. }
  445. }
  446. } else {
  447. vp8_extend_mb_row(yv12_fb_new, xd->dst.y_buffer + 16,
  448. xd->dst.u_buffer + 8, xd->dst.v_buffer + 8);
  449. }
  450. /* last MB of row is ready just after extension is done */
  451. *current_mb_col = mb_col + nsync;
  452. ++xd->mode_info_context; /* skip prediction column */
  453. xd->up_available = 1;
  454. /* since we have multithread */
  455. xd->mode_info_context += xd->mode_info_stride * pbi->decoding_thread_count;
  456. }
  457. /* signal end of frame decoding if this thread processed the last mb_row */
  458. if (last_mb_row == (pc->mb_rows - 1)) sem_post(&pbi->h_event_end_decoding);
  459. }
  460. static THREAD_FUNCTION thread_decoding_proc(void *p_data) {
  461. int ithread = ((DECODETHREAD_DATA *)p_data)->ithread;
  462. VP8D_COMP *pbi = (VP8D_COMP *)(((DECODETHREAD_DATA *)p_data)->ptr1);
  463. MB_ROW_DEC *mbrd = (MB_ROW_DEC *)(((DECODETHREAD_DATA *)p_data)->ptr2);
  464. ENTROPY_CONTEXT_PLANES mb_row_left_context;
  465. while (1) {
  466. if (pbi->b_multithreaded_rd == 0) break;
  467. if (sem_wait(&pbi->h_event_start_decoding[ithread]) == 0) {
  468. if (pbi->b_multithreaded_rd == 0) {
  469. break;
  470. } else {
  471. MACROBLOCKD *xd = &mbrd->mbd;
  472. xd->left_context = &mb_row_left_context;
  473. mt_decode_mb_rows(pbi, xd, ithread + 1);
  474. }
  475. }
  476. }
  477. return 0;
  478. }
  479. void vp8_decoder_create_threads(VP8D_COMP *pbi) {
  480. int core_count = 0;
  481. unsigned int ithread;
  482. pbi->b_multithreaded_rd = 0;
  483. pbi->allocated_decoding_thread_count = 0;
  484. /* limit decoding threads to the max number of token partitions */
  485. core_count = (pbi->max_threads > 8) ? 8 : pbi->max_threads;
  486. /* limit decoding threads to the available cores */
  487. if (core_count > pbi->common.processor_core_count) {
  488. core_count = pbi->common.processor_core_count;
  489. }
  490. if (core_count > 1) {
  491. pbi->b_multithreaded_rd = 1;
  492. pbi->decoding_thread_count = core_count - 1;
  493. CALLOC_ARRAY(pbi->h_decoding_thread, pbi->decoding_thread_count);
  494. CALLOC_ARRAY(pbi->h_event_start_decoding, pbi->decoding_thread_count);
  495. CALLOC_ARRAY_ALIGNED(pbi->mb_row_di, pbi->decoding_thread_count, 32);
  496. CALLOC_ARRAY(pbi->de_thread_data, pbi->decoding_thread_count);
  497. if (sem_init(&pbi->h_event_end_decoding, 0, 0)) {
  498. vpx_internal_error(&pbi->common.error, VPX_CODEC_MEM_ERROR,
  499. "Failed to initialize semaphore");
  500. }
  501. for (ithread = 0; ithread < pbi->decoding_thread_count; ++ithread) {
  502. if (sem_init(&pbi->h_event_start_decoding[ithread], 0, 0)) break;
  503. vp8_setup_block_dptrs(&pbi->mb_row_di[ithread].mbd);
  504. pbi->de_thread_data[ithread].ithread = ithread;
  505. pbi->de_thread_data[ithread].ptr1 = (void *)pbi;
  506. pbi->de_thread_data[ithread].ptr2 = (void *)&pbi->mb_row_di[ithread];
  507. if (pthread_create(&pbi->h_decoding_thread[ithread], 0,
  508. thread_decoding_proc, &pbi->de_thread_data[ithread])) {
  509. sem_destroy(&pbi->h_event_start_decoding[ithread]);
  510. break;
  511. }
  512. }
  513. pbi->allocated_decoding_thread_count = ithread;
  514. if (pbi->allocated_decoding_thread_count !=
  515. (int)pbi->decoding_thread_count) {
  516. /* the remainder of cleanup cases will be handled in
  517. * vp8_decoder_remove_threads(). */
  518. if (pbi->allocated_decoding_thread_count == 0) {
  519. sem_destroy(&pbi->h_event_end_decoding);
  520. }
  521. vpx_internal_error(&pbi->common.error, VPX_CODEC_MEM_ERROR,
  522. "Failed to create threads");
  523. }
  524. }
  525. }
  526. void vp8mt_de_alloc_temp_buffers(VP8D_COMP *pbi, int mb_rows) {
  527. int i;
  528. vpx_free(pbi->mt_current_mb_col);
  529. pbi->mt_current_mb_col = NULL;
  530. /* Free above_row buffers. */
  531. if (pbi->mt_yabove_row) {
  532. for (i = 0; i < mb_rows; ++i) {
  533. vpx_free(pbi->mt_yabove_row[i]);
  534. pbi->mt_yabove_row[i] = NULL;
  535. }
  536. vpx_free(pbi->mt_yabove_row);
  537. pbi->mt_yabove_row = NULL;
  538. }
  539. if (pbi->mt_uabove_row) {
  540. for (i = 0; i < mb_rows; ++i) {
  541. vpx_free(pbi->mt_uabove_row[i]);
  542. pbi->mt_uabove_row[i] = NULL;
  543. }
  544. vpx_free(pbi->mt_uabove_row);
  545. pbi->mt_uabove_row = NULL;
  546. }
  547. if (pbi->mt_vabove_row) {
  548. for (i = 0; i < mb_rows; ++i) {
  549. vpx_free(pbi->mt_vabove_row[i]);
  550. pbi->mt_vabove_row[i] = NULL;
  551. }
  552. vpx_free(pbi->mt_vabove_row);
  553. pbi->mt_vabove_row = NULL;
  554. }
  555. /* Free left_col buffers. */
  556. if (pbi->mt_yleft_col) {
  557. for (i = 0; i < mb_rows; ++i) {
  558. vpx_free(pbi->mt_yleft_col[i]);
  559. pbi->mt_yleft_col[i] = NULL;
  560. }
  561. vpx_free(pbi->mt_yleft_col);
  562. pbi->mt_yleft_col = NULL;
  563. }
  564. if (pbi->mt_uleft_col) {
  565. for (i = 0; i < mb_rows; ++i) {
  566. vpx_free(pbi->mt_uleft_col[i]);
  567. pbi->mt_uleft_col[i] = NULL;
  568. }
  569. vpx_free(pbi->mt_uleft_col);
  570. pbi->mt_uleft_col = NULL;
  571. }
  572. if (pbi->mt_vleft_col) {
  573. for (i = 0; i < mb_rows; ++i) {
  574. vpx_free(pbi->mt_vleft_col[i]);
  575. pbi->mt_vleft_col[i] = NULL;
  576. }
  577. vpx_free(pbi->mt_vleft_col);
  578. pbi->mt_vleft_col = NULL;
  579. }
  580. }
  581. void vp8mt_alloc_temp_buffers(VP8D_COMP *pbi, int width, int prev_mb_rows) {
  582. VP8_COMMON *const pc = &pbi->common;
  583. int i;
  584. int uv_width;
  585. if (pbi->b_multithreaded_rd) {
  586. vp8mt_de_alloc_temp_buffers(pbi, prev_mb_rows);
  587. /* our internal buffers are always multiples of 16 */
  588. if ((width & 0xf) != 0) width += 16 - (width & 0xf);
  589. if (width < 640) {
  590. pbi->sync_range = 1;
  591. } else if (width <= 1280) {
  592. pbi->sync_range = 8;
  593. } else if (width <= 2560) {
  594. pbi->sync_range = 16;
  595. } else {
  596. pbi->sync_range = 32;
  597. }
  598. uv_width = width >> 1;
  599. /* Allocate an int for each mb row. */
  600. CALLOC_ARRAY(pbi->mt_current_mb_col, pc->mb_rows);
  601. /* Allocate memory for above_row buffers. */
  602. CALLOC_ARRAY(pbi->mt_yabove_row, pc->mb_rows);
  603. for (i = 0; i < pc->mb_rows; ++i)
  604. CHECK_MEM_ERROR(pbi->mt_yabove_row[i],
  605. vpx_memalign(16, sizeof(unsigned char) *
  606. (width + (VP8BORDERINPIXELS << 1))));
  607. CALLOC_ARRAY(pbi->mt_uabove_row, pc->mb_rows);
  608. for (i = 0; i < pc->mb_rows; ++i)
  609. CHECK_MEM_ERROR(pbi->mt_uabove_row[i],
  610. vpx_memalign(16, sizeof(unsigned char) *
  611. (uv_width + VP8BORDERINPIXELS)));
  612. CALLOC_ARRAY(pbi->mt_vabove_row, pc->mb_rows);
  613. for (i = 0; i < pc->mb_rows; ++i)
  614. CHECK_MEM_ERROR(pbi->mt_vabove_row[i],
  615. vpx_memalign(16, sizeof(unsigned char) *
  616. (uv_width + VP8BORDERINPIXELS)));
  617. /* Allocate memory for left_col buffers. */
  618. CALLOC_ARRAY(pbi->mt_yleft_col, pc->mb_rows);
  619. for (i = 0; i < pc->mb_rows; ++i)
  620. CHECK_MEM_ERROR(pbi->mt_yleft_col[i],
  621. vpx_calloc(sizeof(unsigned char) * 16, 1));
  622. CALLOC_ARRAY(pbi->mt_uleft_col, pc->mb_rows);
  623. for (i = 0; i < pc->mb_rows; ++i)
  624. CHECK_MEM_ERROR(pbi->mt_uleft_col[i],
  625. vpx_calloc(sizeof(unsigned char) * 8, 1));
  626. CALLOC_ARRAY(pbi->mt_vleft_col, pc->mb_rows);
  627. for (i = 0; i < pc->mb_rows; ++i)
  628. CHECK_MEM_ERROR(pbi->mt_vleft_col[i],
  629. vpx_calloc(sizeof(unsigned char) * 8, 1));
  630. }
  631. }
  632. void vp8_decoder_remove_threads(VP8D_COMP *pbi) {
  633. /* shutdown MB Decoding thread; */
  634. if (pbi->b_multithreaded_rd) {
  635. int i;
  636. pbi->b_multithreaded_rd = 0;
  637. /* allow all threads to exit */
  638. for (i = 0; i < pbi->allocated_decoding_thread_count; ++i) {
  639. sem_post(&pbi->h_event_start_decoding[i]);
  640. pthread_join(pbi->h_decoding_thread[i], NULL);
  641. }
  642. for (i = 0; i < pbi->allocated_decoding_thread_count; ++i) {
  643. sem_destroy(&pbi->h_event_start_decoding[i]);
  644. }
  645. if (pbi->allocated_decoding_thread_count) {
  646. sem_destroy(&pbi->h_event_end_decoding);
  647. }
  648. vpx_free(pbi->h_decoding_thread);
  649. pbi->h_decoding_thread = NULL;
  650. vpx_free(pbi->h_event_start_decoding);
  651. pbi->h_event_start_decoding = NULL;
  652. vpx_free(pbi->mb_row_di);
  653. pbi->mb_row_di = NULL;
  654. vpx_free(pbi->de_thread_data);
  655. pbi->de_thread_data = NULL;
  656. vp8mt_de_alloc_temp_buffers(pbi, pbi->common.mb_rows);
  657. }
  658. }
  659. void vp8mt_decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd) {
  660. VP8_COMMON *pc = &pbi->common;
  661. unsigned int i;
  662. int j;
  663. int filter_level = pc->filter_level;
  664. YV12_BUFFER_CONFIG *yv12_fb_new = pbi->dec_fb_ref[INTRA_FRAME];
  665. if (filter_level) {
  666. /* Set above_row buffer to 127 for decoding first MB row */
  667. memset(pbi->mt_yabove_row[0] + VP8BORDERINPIXELS - 1, 127,
  668. yv12_fb_new->y_width + 5);
  669. memset(pbi->mt_uabove_row[0] + (VP8BORDERINPIXELS >> 1) - 1, 127,
  670. (yv12_fb_new->y_width >> 1) + 5);
  671. memset(pbi->mt_vabove_row[0] + (VP8BORDERINPIXELS >> 1) - 1, 127,
  672. (yv12_fb_new->y_width >> 1) + 5);
  673. for (j = 1; j < pc->mb_rows; ++j) {
  674. memset(pbi->mt_yabove_row[j] + VP8BORDERINPIXELS - 1, (unsigned char)129,
  675. 1);
  676. memset(pbi->mt_uabove_row[j] + (VP8BORDERINPIXELS >> 1) - 1,
  677. (unsigned char)129, 1);
  678. memset(pbi->mt_vabove_row[j] + (VP8BORDERINPIXELS >> 1) - 1,
  679. (unsigned char)129, 1);
  680. }
  681. /* Set left_col to 129 initially */
  682. for (j = 0; j < pc->mb_rows; ++j) {
  683. memset(pbi->mt_yleft_col[j], (unsigned char)129, 16);
  684. memset(pbi->mt_uleft_col[j], (unsigned char)129, 8);
  685. memset(pbi->mt_vleft_col[j], (unsigned char)129, 8);
  686. }
  687. /* Initialize the loop filter for this frame. */
  688. vp8_loop_filter_frame_init(pc, &pbi->mb, filter_level);
  689. } else {
  690. vp8_setup_intra_recon_top_line(yv12_fb_new);
  691. }
  692. setup_decoding_thread_data(pbi, xd, pbi->mb_row_di,
  693. pbi->decoding_thread_count);
  694. for (i = 0; i < pbi->decoding_thread_count; ++i) {
  695. sem_post(&pbi->h_event_start_decoding[i]);
  696. }
  697. mt_decode_mb_rows(pbi, xd, 0);
  698. sem_wait(&pbi->h_event_end_decoding); /* add back for each frame */
  699. }