2
0

vp9_pickmode.c 86 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308
  1. /*
  2. * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
  3. *
  4. * Use of this source code is governed by a BSD-style license
  5. * that can be found in the LICENSE file in the root of the source
  6. * tree. An additional intellectual property rights grant can be found
  7. * in the file PATENTS. All contributing project authors may
  8. * be found in the AUTHORS file in the root of the source tree.
  9. */
  10. #include <assert.h>
  11. #include <limits.h>
  12. #include <math.h>
  13. #include <stdio.h>
  14. #include "./vp9_rtcd.h"
  15. #include "./vpx_dsp_rtcd.h"
  16. #include "vpx/vpx_codec.h"
  17. #include "vpx_dsp/vpx_dsp_common.h"
  18. #include "vpx_mem/vpx_mem.h"
  19. #include "vpx_ports/mem.h"
  20. #include "vp9/common/vp9_blockd.h"
  21. #include "vp9/common/vp9_common.h"
  22. #include "vp9/common/vp9_mvref_common.h"
  23. #include "vp9/common/vp9_pred_common.h"
  24. #include "vp9/common/vp9_reconinter.h"
  25. #include "vp9/common/vp9_reconintra.h"
  26. #include "vp9/common/vp9_scan.h"
  27. #include "vp9/encoder/vp9_cost.h"
  28. #include "vp9/encoder/vp9_encoder.h"
  29. #include "vp9/encoder/vp9_pickmode.h"
  30. #include "vp9/encoder/vp9_ratectrl.h"
  31. #include "vp9/encoder/vp9_rd.h"
  32. typedef struct {
  33. uint8_t *data;
  34. int stride;
  35. int in_use;
  36. } PRED_BUFFER;
  37. static const int pos_shift_16x16[4][4] = {
  38. { 9, 10, 13, 14 }, { 11, 12, 15, 16 }, { 17, 18, 21, 22 }, { 19, 20, 23, 24 }
  39. };
  40. static int mv_refs_rt(VP9_COMP *cpi, const VP9_COMMON *cm, const MACROBLOCK *x,
  41. const MACROBLOCKD *xd, const TileInfo *const tile,
  42. MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame,
  43. int_mv *mv_ref_list, int_mv *base_mv, int mi_row,
  44. int mi_col, int use_base_mv) {
  45. const int *ref_sign_bias = cm->ref_frame_sign_bias;
  46. int i, refmv_count = 0;
  47. const POSITION *const mv_ref_search = mv_ref_blocks[mi->sb_type];
  48. int different_ref_found = 0;
  49. int context_counter = 0;
  50. int const_motion = 0;
  51. // Blank the reference vector list
  52. memset(mv_ref_list, 0, sizeof(*mv_ref_list) * MAX_MV_REF_CANDIDATES);
  53. // The nearest 2 blocks are treated differently
  54. // if the size < 8x8 we get the mv from the bmi substructure,
  55. // and we also need to keep a mode count.
  56. for (i = 0; i < 2; ++i) {
  57. const POSITION *const mv_ref = &mv_ref_search[i];
  58. if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) {
  59. const MODE_INFO *const candidate_mi =
  60. xd->mi[mv_ref->col + mv_ref->row * xd->mi_stride];
  61. // Keep counts for entropy encoding.
  62. context_counter += mode_2_counter[candidate_mi->mode];
  63. different_ref_found = 1;
  64. if (candidate_mi->ref_frame[0] == ref_frame)
  65. ADD_MV_REF_LIST(get_sub_block_mv(candidate_mi, 0, mv_ref->col, -1),
  66. refmv_count, mv_ref_list, Done);
  67. }
  68. }
  69. const_motion = 1;
  70. // Check the rest of the neighbors in much the same way
  71. // as before except we don't need to keep track of sub blocks or
  72. // mode counts.
  73. for (; i < MVREF_NEIGHBOURS && !refmv_count; ++i) {
  74. const POSITION *const mv_ref = &mv_ref_search[i];
  75. if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) {
  76. const MODE_INFO *const candidate_mi =
  77. xd->mi[mv_ref->col + mv_ref->row * xd->mi_stride];
  78. different_ref_found = 1;
  79. if (candidate_mi->ref_frame[0] == ref_frame)
  80. ADD_MV_REF_LIST(candidate_mi->mv[0], refmv_count, mv_ref_list, Done);
  81. }
  82. }
  83. // Since we couldn't find 2 mvs from the same reference frame
  84. // go back through the neighbors and find motion vectors from
  85. // different reference frames.
  86. if (different_ref_found && !refmv_count) {
  87. for (i = 0; i < MVREF_NEIGHBOURS; ++i) {
  88. const POSITION *mv_ref = &mv_ref_search[i];
  89. if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) {
  90. const MODE_INFO *const candidate_mi =
  91. xd->mi[mv_ref->col + mv_ref->row * xd->mi_stride];
  92. // If the candidate is INTRA we don't want to consider its mv.
  93. IF_DIFF_REF_FRAME_ADD_MV(candidate_mi, ref_frame, ref_sign_bias,
  94. refmv_count, mv_ref_list, Done);
  95. }
  96. }
  97. }
  98. if (use_base_mv &&
  99. !cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame &&
  100. ref_frame == LAST_FRAME) {
  101. // Get base layer mv.
  102. MV_REF *candidate =
  103. &cm->prev_frame
  104. ->mvs[(mi_col >> 1) + (mi_row >> 1) * (cm->mi_cols >> 1)];
  105. if (candidate->mv[0].as_int != INVALID_MV) {
  106. base_mv->as_mv.row = (candidate->mv[0].as_mv.row * 2);
  107. base_mv->as_mv.col = (candidate->mv[0].as_mv.col * 2);
  108. clamp_mv_ref(&base_mv->as_mv, xd);
  109. } else {
  110. base_mv->as_int = INVALID_MV;
  111. }
  112. }
  113. Done:
  114. x->mbmi_ext->mode_context[ref_frame] = counter_to_context[context_counter];
  115. // Clamp vectors
  116. for (i = 0; i < MAX_MV_REF_CANDIDATES; ++i)
  117. clamp_mv_ref(&mv_ref_list[i].as_mv, xd);
  118. return const_motion;
  119. }
  120. static int combined_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
  121. BLOCK_SIZE bsize, int mi_row, int mi_col,
  122. int_mv *tmp_mv, int *rate_mv,
  123. int64_t best_rd_sofar, int use_base_mv) {
  124. MACROBLOCKD *xd = &x->e_mbd;
  125. MODE_INFO *mi = xd->mi[0];
  126. struct buf_2d backup_yv12[MAX_MB_PLANE] = { { 0, 0 } };
  127. const int step_param = cpi->sf.mv.fullpel_search_step_param;
  128. const int sadpb = x->sadperbit16;
  129. MV mvp_full;
  130. const int ref = mi->ref_frame[0];
  131. const MV ref_mv = x->mbmi_ext->ref_mvs[ref][0].as_mv;
  132. MV center_mv;
  133. uint32_t dis;
  134. int rate_mode;
  135. const MvLimits tmp_mv_limits = x->mv_limits;
  136. int rv = 0;
  137. int cost_list[5];
  138. const YV12_BUFFER_CONFIG *scaled_ref_frame =
  139. vp9_get_scaled_ref_frame(cpi, ref);
  140. if (scaled_ref_frame) {
  141. int i;
  142. // Swap out the reference frame for a version that's been scaled to
  143. // match the resolution of the current frame, allowing the existing
  144. // motion search code to be used without additional modifications.
  145. for (i = 0; i < MAX_MB_PLANE; i++) backup_yv12[i] = xd->plane[i].pre[0];
  146. vp9_setup_pre_planes(xd, 0, scaled_ref_frame, mi_row, mi_col, NULL);
  147. }
  148. vp9_set_mv_search_range(&x->mv_limits, &ref_mv);
  149. assert(x->mv_best_ref_index[ref] <= 2);
  150. if (x->mv_best_ref_index[ref] < 2)
  151. mvp_full = x->mbmi_ext->ref_mvs[ref][x->mv_best_ref_index[ref]].as_mv;
  152. else
  153. mvp_full = x->pred_mv[ref];
  154. mvp_full.col >>= 3;
  155. mvp_full.row >>= 3;
  156. if (!use_base_mv)
  157. center_mv = ref_mv;
  158. else
  159. center_mv = tmp_mv->as_mv;
  160. vp9_full_pixel_search(cpi, x, bsize, &mvp_full, step_param, sadpb,
  161. cond_cost_list(cpi, cost_list), &center_mv,
  162. &tmp_mv->as_mv, INT_MAX, 0);
  163. x->mv_limits = tmp_mv_limits;
  164. // calculate the bit cost on motion vector
  165. mvp_full.row = tmp_mv->as_mv.row * 8;
  166. mvp_full.col = tmp_mv->as_mv.col * 8;
  167. *rate_mv = vp9_mv_bit_cost(&mvp_full, &ref_mv, x->nmvjointcost, x->mvcost,
  168. MV_COST_WEIGHT);
  169. rate_mode =
  170. cpi->inter_mode_cost[x->mbmi_ext->mode_context[ref]][INTER_OFFSET(NEWMV)];
  171. rv =
  172. !(RDCOST(x->rdmult, x->rddiv, (*rate_mv + rate_mode), 0) > best_rd_sofar);
  173. if (rv) {
  174. const int subpel_force_stop = use_base_mv && cpi->sf.base_mv_aggressive
  175. ? 2
  176. : cpi->sf.mv.subpel_force_stop;
  177. cpi->find_fractional_mv_step(
  178. x, &tmp_mv->as_mv, &ref_mv, cpi->common.allow_high_precision_mv,
  179. x->errorperbit, &cpi->fn_ptr[bsize], subpel_force_stop,
  180. cpi->sf.mv.subpel_iters_per_step, cond_cost_list(cpi, cost_list),
  181. x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref], NULL, 0, 0);
  182. *rate_mv = vp9_mv_bit_cost(&tmp_mv->as_mv, &ref_mv, x->nmvjointcost,
  183. x->mvcost, MV_COST_WEIGHT);
  184. }
  185. if (scaled_ref_frame) {
  186. int i;
  187. for (i = 0; i < MAX_MB_PLANE; i++) xd->plane[i].pre[0] = backup_yv12[i];
  188. }
  189. return rv;
  190. }
  191. static void block_variance(const uint8_t *src, int src_stride,
  192. const uint8_t *ref, int ref_stride, int w, int h,
  193. unsigned int *sse, int *sum, int block_size,
  194. #if CONFIG_VP9_HIGHBITDEPTH
  195. int use_highbitdepth, vpx_bit_depth_t bd,
  196. #endif
  197. uint32_t *sse8x8, int *sum8x8, uint32_t *var8x8) {
  198. int i, j, k = 0;
  199. *sse = 0;
  200. *sum = 0;
  201. for (i = 0; i < h; i += block_size) {
  202. for (j = 0; j < w; j += block_size) {
  203. #if CONFIG_VP9_HIGHBITDEPTH
  204. if (use_highbitdepth) {
  205. switch (bd) {
  206. case VPX_BITS_8:
  207. vpx_highbd_8_get8x8var(src + src_stride * i + j, src_stride,
  208. ref + ref_stride * i + j, ref_stride,
  209. &sse8x8[k], &sum8x8[k]);
  210. break;
  211. case VPX_BITS_10:
  212. vpx_highbd_10_get8x8var(src + src_stride * i + j, src_stride,
  213. ref + ref_stride * i + j, ref_stride,
  214. &sse8x8[k], &sum8x8[k]);
  215. break;
  216. case VPX_BITS_12:
  217. vpx_highbd_12_get8x8var(src + src_stride * i + j, src_stride,
  218. ref + ref_stride * i + j, ref_stride,
  219. &sse8x8[k], &sum8x8[k]);
  220. break;
  221. }
  222. } else {
  223. vpx_get8x8var(src + src_stride * i + j, src_stride,
  224. ref + ref_stride * i + j, ref_stride, &sse8x8[k],
  225. &sum8x8[k]);
  226. }
  227. #else
  228. vpx_get8x8var(src + src_stride * i + j, src_stride,
  229. ref + ref_stride * i + j, ref_stride, &sse8x8[k],
  230. &sum8x8[k]);
  231. #endif
  232. *sse += sse8x8[k];
  233. *sum += sum8x8[k];
  234. var8x8[k] = sse8x8[k] - (uint32_t)(((int64_t)sum8x8[k] * sum8x8[k]) >> 6);
  235. k++;
  236. }
  237. }
  238. }
  239. static void calculate_variance(int bw, int bh, TX_SIZE tx_size,
  240. unsigned int *sse_i, int *sum_i,
  241. unsigned int *var_o, unsigned int *sse_o,
  242. int *sum_o) {
  243. const BLOCK_SIZE unit_size = txsize_to_bsize[tx_size];
  244. const int nw = 1 << (bw - b_width_log2_lookup[unit_size]);
  245. const int nh = 1 << (bh - b_height_log2_lookup[unit_size]);
  246. int i, j, k = 0;
  247. for (i = 0; i < nh; i += 2) {
  248. for (j = 0; j < nw; j += 2) {
  249. sse_o[k] = sse_i[i * nw + j] + sse_i[i * nw + j + 1] +
  250. sse_i[(i + 1) * nw + j] + sse_i[(i + 1) * nw + j + 1];
  251. sum_o[k] = sum_i[i * nw + j] + sum_i[i * nw + j + 1] +
  252. sum_i[(i + 1) * nw + j] + sum_i[(i + 1) * nw + j + 1];
  253. var_o[k] = sse_o[k] - (uint32_t)(((int64_t)sum_o[k] * sum_o[k]) >>
  254. (b_width_log2_lookup[unit_size] +
  255. b_height_log2_lookup[unit_size] + 6));
  256. k++;
  257. }
  258. }
  259. }
  260. static void model_rd_for_sb_y_large(VP9_COMP *cpi, BLOCK_SIZE bsize,
  261. MACROBLOCK *x, MACROBLOCKD *xd,
  262. int *out_rate_sum, int64_t *out_dist_sum,
  263. unsigned int *var_y, unsigned int *sse_y,
  264. int mi_row, int mi_col, int *early_term) {
  265. // Note our transform coeffs are 8 times an orthogonal transform.
  266. // Hence quantizer step is also 8 times. To get effective quantizer
  267. // we need to divide by 8 before sending to modeling function.
  268. unsigned int sse;
  269. int rate;
  270. int64_t dist;
  271. struct macroblock_plane *const p = &x->plane[0];
  272. struct macroblockd_plane *const pd = &xd->plane[0];
  273. const uint32_t dc_quant = pd->dequant[0];
  274. const uint32_t ac_quant = pd->dequant[1];
  275. const int64_t dc_thr = dc_quant * dc_quant >> 6;
  276. const int64_t ac_thr = ac_quant * ac_quant >> 6;
  277. unsigned int var;
  278. int sum;
  279. int skip_dc = 0;
  280. const int bw = b_width_log2_lookup[bsize];
  281. const int bh = b_height_log2_lookup[bsize];
  282. const int num8x8 = 1 << (bw + bh - 2);
  283. unsigned int sse8x8[64] = { 0 };
  284. int sum8x8[64] = { 0 };
  285. unsigned int var8x8[64] = { 0 };
  286. TX_SIZE tx_size;
  287. int i, k;
  288. #if CONFIG_VP9_HIGHBITDEPTH
  289. const vpx_bit_depth_t bd = cpi->common.bit_depth;
  290. #endif
  291. // Calculate variance for whole partition, and also save 8x8 blocks' variance
  292. // to be used in following transform skipping test.
  293. block_variance(p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride,
  294. 4 << bw, 4 << bh, &sse, &sum, 8,
  295. #if CONFIG_VP9_HIGHBITDEPTH
  296. cpi->common.use_highbitdepth, bd,
  297. #endif
  298. sse8x8, sum8x8, var8x8);
  299. var = sse - (unsigned int)(((int64_t)sum * sum) >> (bw + bh + 4));
  300. *var_y = var;
  301. *sse_y = sse;
  302. if (cpi->common.tx_mode == TX_MODE_SELECT) {
  303. if (sse > (var << 2))
  304. tx_size = VPXMIN(max_txsize_lookup[bsize],
  305. tx_mode_to_biggest_tx_size[cpi->common.tx_mode]);
  306. else
  307. tx_size = TX_8X8;
  308. if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ &&
  309. cyclic_refresh_segment_id_boosted(xd->mi[0]->segment_id))
  310. tx_size = TX_8X8;
  311. else if (tx_size > TX_16X16)
  312. tx_size = TX_16X16;
  313. } else {
  314. tx_size = VPXMIN(max_txsize_lookup[bsize],
  315. tx_mode_to_biggest_tx_size[cpi->common.tx_mode]);
  316. }
  317. assert(tx_size >= TX_8X8);
  318. xd->mi[0]->tx_size = tx_size;
  319. // Evaluate if the partition block is a skippable block in Y plane.
  320. {
  321. unsigned int sse16x16[16] = { 0 };
  322. int sum16x16[16] = { 0 };
  323. unsigned int var16x16[16] = { 0 };
  324. const int num16x16 = num8x8 >> 2;
  325. unsigned int sse32x32[4] = { 0 };
  326. int sum32x32[4] = { 0 };
  327. unsigned int var32x32[4] = { 0 };
  328. const int num32x32 = num8x8 >> 4;
  329. int ac_test = 1;
  330. int dc_test = 1;
  331. const int num = (tx_size == TX_8X8)
  332. ? num8x8
  333. : ((tx_size == TX_16X16) ? num16x16 : num32x32);
  334. const unsigned int *sse_tx =
  335. (tx_size == TX_8X8) ? sse8x8
  336. : ((tx_size == TX_16X16) ? sse16x16 : sse32x32);
  337. const unsigned int *var_tx =
  338. (tx_size == TX_8X8) ? var8x8
  339. : ((tx_size == TX_16X16) ? var16x16 : var32x32);
  340. // Calculate variance if tx_size > TX_8X8
  341. if (tx_size >= TX_16X16)
  342. calculate_variance(bw, bh, TX_8X8, sse8x8, sum8x8, var16x16, sse16x16,
  343. sum16x16);
  344. if (tx_size == TX_32X32)
  345. calculate_variance(bw, bh, TX_16X16, sse16x16, sum16x16, var32x32,
  346. sse32x32, sum32x32);
  347. // Skipping test
  348. x->skip_txfm[0] = SKIP_TXFM_NONE;
  349. for (k = 0; k < num; k++)
  350. // Check if all ac coefficients can be quantized to zero.
  351. if (!(var_tx[k] < ac_thr || var == 0)) {
  352. ac_test = 0;
  353. break;
  354. }
  355. for (k = 0; k < num; k++)
  356. // Check if dc coefficient can be quantized to zero.
  357. if (!(sse_tx[k] - var_tx[k] < dc_thr || sse == var)) {
  358. dc_test = 0;
  359. break;
  360. }
  361. if (ac_test) {
  362. x->skip_txfm[0] = SKIP_TXFM_AC_ONLY;
  363. if (dc_test) x->skip_txfm[0] = SKIP_TXFM_AC_DC;
  364. } else if (dc_test) {
  365. skip_dc = 1;
  366. }
  367. }
  368. if (x->skip_txfm[0] == SKIP_TXFM_AC_DC) {
  369. int skip_uv[2] = { 0 };
  370. unsigned int var_uv[2];
  371. unsigned int sse_uv[2];
  372. *out_rate_sum = 0;
  373. *out_dist_sum = sse << 4;
  374. // Transform skipping test in UV planes.
  375. for (i = 1; i <= 2; i++) {
  376. struct macroblock_plane *const p = &x->plane[i];
  377. struct macroblockd_plane *const pd = &xd->plane[i];
  378. const TX_SIZE uv_tx_size = get_uv_tx_size(xd->mi[0], pd);
  379. const BLOCK_SIZE unit_size = txsize_to_bsize[uv_tx_size];
  380. const BLOCK_SIZE uv_bsize = get_plane_block_size(bsize, pd);
  381. const int uv_bw = b_width_log2_lookup[uv_bsize];
  382. const int uv_bh = b_height_log2_lookup[uv_bsize];
  383. const int sf = (uv_bw - b_width_log2_lookup[unit_size]) +
  384. (uv_bh - b_height_log2_lookup[unit_size]);
  385. const uint32_t uv_dc_thr = pd->dequant[0] * pd->dequant[0] >> (6 - sf);
  386. const uint32_t uv_ac_thr = pd->dequant[1] * pd->dequant[1] >> (6 - sf);
  387. int j = i - 1;
  388. vp9_build_inter_predictors_sbp(xd, mi_row, mi_col, bsize, i);
  389. var_uv[j] = cpi->fn_ptr[uv_bsize].vf(
  390. p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride, &sse_uv[j]);
  391. if ((var_uv[j] < uv_ac_thr || var_uv[j] == 0) &&
  392. (sse_uv[j] - var_uv[j] < uv_dc_thr || sse_uv[j] == var_uv[j]))
  393. skip_uv[j] = 1;
  394. else
  395. break;
  396. }
  397. // If the transform in YUV planes are skippable, the mode search checks
  398. // fewer inter modes and doesn't check intra modes.
  399. if (skip_uv[0] & skip_uv[1]) {
  400. *early_term = 1;
  401. }
  402. return;
  403. }
  404. if (!skip_dc) {
  405. #if CONFIG_VP9_HIGHBITDEPTH
  406. vp9_model_rd_from_var_lapndz(sse - var, num_pels_log2_lookup[bsize],
  407. dc_quant >> (xd->bd - 5), &rate, &dist);
  408. #else
  409. vp9_model_rd_from_var_lapndz(sse - var, num_pels_log2_lookup[bsize],
  410. dc_quant >> 3, &rate, &dist);
  411. #endif // CONFIG_VP9_HIGHBITDEPTH
  412. }
  413. if (!skip_dc) {
  414. *out_rate_sum = rate >> 1;
  415. *out_dist_sum = dist << 3;
  416. } else {
  417. *out_rate_sum = 0;
  418. *out_dist_sum = (sse - var) << 4;
  419. }
  420. #if CONFIG_VP9_HIGHBITDEPTH
  421. vp9_model_rd_from_var_lapndz(var, num_pels_log2_lookup[bsize],
  422. ac_quant >> (xd->bd - 5), &rate, &dist);
  423. #else
  424. vp9_model_rd_from_var_lapndz(var, num_pels_log2_lookup[bsize], ac_quant >> 3,
  425. &rate, &dist);
  426. #endif // CONFIG_VP9_HIGHBITDEPTH
  427. *out_rate_sum += rate;
  428. *out_dist_sum += dist << 4;
  429. }
  430. static void model_rd_for_sb_y(VP9_COMP *cpi, BLOCK_SIZE bsize, MACROBLOCK *x,
  431. MACROBLOCKD *xd, int *out_rate_sum,
  432. int64_t *out_dist_sum, unsigned int *var_y,
  433. unsigned int *sse_y) {
  434. // Note our transform coeffs are 8 times an orthogonal transform.
  435. // Hence quantizer step is also 8 times. To get effective quantizer
  436. // we need to divide by 8 before sending to modeling function.
  437. unsigned int sse;
  438. int rate;
  439. int64_t dist;
  440. struct macroblock_plane *const p = &x->plane[0];
  441. struct macroblockd_plane *const pd = &xd->plane[0];
  442. const int64_t dc_thr = p->quant_thred[0] >> 6;
  443. const int64_t ac_thr = p->quant_thred[1] >> 6;
  444. const uint32_t dc_quant = pd->dequant[0];
  445. const uint32_t ac_quant = pd->dequant[1];
  446. unsigned int var = cpi->fn_ptr[bsize].vf(p->src.buf, p->src.stride,
  447. pd->dst.buf, pd->dst.stride, &sse);
  448. int skip_dc = 0;
  449. *var_y = var;
  450. *sse_y = sse;
  451. if (cpi->common.tx_mode == TX_MODE_SELECT) {
  452. if (sse > (var << 2))
  453. xd->mi[0]->tx_size =
  454. VPXMIN(max_txsize_lookup[bsize],
  455. tx_mode_to_biggest_tx_size[cpi->common.tx_mode]);
  456. else
  457. xd->mi[0]->tx_size = TX_8X8;
  458. if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ &&
  459. cyclic_refresh_segment_id_boosted(xd->mi[0]->segment_id))
  460. xd->mi[0]->tx_size = TX_8X8;
  461. else if (xd->mi[0]->tx_size > TX_16X16)
  462. xd->mi[0]->tx_size = TX_16X16;
  463. } else {
  464. xd->mi[0]->tx_size =
  465. VPXMIN(max_txsize_lookup[bsize],
  466. tx_mode_to_biggest_tx_size[cpi->common.tx_mode]);
  467. }
  468. // Evaluate if the partition block is a skippable block in Y plane.
  469. {
  470. const BLOCK_SIZE unit_size = txsize_to_bsize[xd->mi[0]->tx_size];
  471. const unsigned int num_blk_log2 =
  472. (b_width_log2_lookup[bsize] - b_width_log2_lookup[unit_size]) +
  473. (b_height_log2_lookup[bsize] - b_height_log2_lookup[unit_size]);
  474. const unsigned int sse_tx = sse >> num_blk_log2;
  475. const unsigned int var_tx = var >> num_blk_log2;
  476. x->skip_txfm[0] = SKIP_TXFM_NONE;
  477. // Check if all ac coefficients can be quantized to zero.
  478. if (var_tx < ac_thr || var == 0) {
  479. x->skip_txfm[0] = SKIP_TXFM_AC_ONLY;
  480. // Check if dc coefficient can be quantized to zero.
  481. if (sse_tx - var_tx < dc_thr || sse == var)
  482. x->skip_txfm[0] = SKIP_TXFM_AC_DC;
  483. } else {
  484. if (sse_tx - var_tx < dc_thr || sse == var) skip_dc = 1;
  485. }
  486. }
  487. if (x->skip_txfm[0] == SKIP_TXFM_AC_DC) {
  488. *out_rate_sum = 0;
  489. *out_dist_sum = sse << 4;
  490. return;
  491. }
  492. if (!skip_dc) {
  493. #if CONFIG_VP9_HIGHBITDEPTH
  494. vp9_model_rd_from_var_lapndz(sse - var, num_pels_log2_lookup[bsize],
  495. dc_quant >> (xd->bd - 5), &rate, &dist);
  496. #else
  497. vp9_model_rd_from_var_lapndz(sse - var, num_pels_log2_lookup[bsize],
  498. dc_quant >> 3, &rate, &dist);
  499. #endif // CONFIG_VP9_HIGHBITDEPTH
  500. }
  501. if (!skip_dc) {
  502. *out_rate_sum = rate >> 1;
  503. *out_dist_sum = dist << 3;
  504. } else {
  505. *out_rate_sum = 0;
  506. *out_dist_sum = (sse - var) << 4;
  507. }
  508. #if CONFIG_VP9_HIGHBITDEPTH
  509. vp9_model_rd_from_var_lapndz(var, num_pels_log2_lookup[bsize],
  510. ac_quant >> (xd->bd - 5), &rate, &dist);
  511. #else
  512. vp9_model_rd_from_var_lapndz(var, num_pels_log2_lookup[bsize], ac_quant >> 3,
  513. &rate, &dist);
  514. #endif // CONFIG_VP9_HIGHBITDEPTH
  515. *out_rate_sum += rate;
  516. *out_dist_sum += dist << 4;
  517. }
  518. #if CONFIG_VP9_HIGHBITDEPTH
  519. static void block_yrd(VP9_COMP *cpi, MACROBLOCK *x, RD_COST *this_rdc,
  520. int *skippable, int64_t *sse, BLOCK_SIZE bsize,
  521. TX_SIZE tx_size) {
  522. MACROBLOCKD *xd = &x->e_mbd;
  523. unsigned int var_y, sse_y;
  524. (void)tx_size;
  525. model_rd_for_sb_y(cpi, bsize, x, xd, &this_rdc->rate, &this_rdc->dist, &var_y,
  526. &sse_y);
  527. *sse = INT_MAX;
  528. *skippable = 0;
  529. return;
  530. }
  531. #else
  532. static void block_yrd(VP9_COMP *cpi, MACROBLOCK *x, RD_COST *this_rdc,
  533. int *skippable, int64_t *sse, BLOCK_SIZE bsize,
  534. TX_SIZE tx_size) {
  535. MACROBLOCKD *xd = &x->e_mbd;
  536. const struct macroblockd_plane *pd = &xd->plane[0];
  537. struct macroblock_plane *const p = &x->plane[0];
  538. const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize];
  539. const int num_4x4_h = num_4x4_blocks_high_lookup[bsize];
  540. const int step = 1 << (tx_size << 1);
  541. const int block_step = (1 << tx_size);
  542. int block = 0, r, c;
  543. const int max_blocks_wide =
  544. num_4x4_w + (xd->mb_to_right_edge >= 0 ? 0 : xd->mb_to_right_edge >> 5);
  545. const int max_blocks_high =
  546. num_4x4_h + (xd->mb_to_bottom_edge >= 0 ? 0 : xd->mb_to_bottom_edge >> 5);
  547. int eob_cost = 0;
  548. const int bw = 4 * num_4x4_w;
  549. const int bh = 4 * num_4x4_h;
  550. (void)cpi;
  551. // The max tx_size passed in is TX_16X16.
  552. assert(tx_size != TX_32X32);
  553. vpx_subtract_block(bh, bw, p->src_diff, bw, p->src.buf, p->src.stride,
  554. pd->dst.buf, pd->dst.stride);
  555. *skippable = 1;
  556. // Keep track of the row and column of the blocks we use so that we know
  557. // if we are in the unrestricted motion border.
  558. for (r = 0; r < max_blocks_high; r += block_step) {
  559. for (c = 0; c < num_4x4_w; c += block_step) {
  560. if (c < max_blocks_wide) {
  561. const scan_order *const scan_order = &vp9_default_scan_orders[tx_size];
  562. tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
  563. tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
  564. tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
  565. uint16_t *const eob = &p->eobs[block];
  566. const int diff_stride = bw;
  567. const int16_t *src_diff;
  568. src_diff = &p->src_diff[(r * diff_stride + c) << 2];
  569. switch (tx_size) {
  570. case TX_16X16:
  571. vpx_hadamard_16x16(src_diff, diff_stride, (int16_t *)coeff);
  572. vp9_quantize_fp(coeff, 256, x->skip_block, p->zbin, p->round_fp,
  573. p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
  574. pd->dequant, eob, scan_order->scan,
  575. scan_order->iscan);
  576. break;
  577. case TX_8X8:
  578. vpx_hadamard_8x8(src_diff, diff_stride, (int16_t *)coeff);
  579. vp9_quantize_fp(coeff, 64, x->skip_block, p->zbin, p->round_fp,
  580. p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
  581. pd->dequant, eob, scan_order->scan,
  582. scan_order->iscan);
  583. break;
  584. case TX_4X4:
  585. x->fwd_txm4x4(src_diff, coeff, diff_stride);
  586. vp9_quantize_fp(coeff, 16, x->skip_block, p->zbin, p->round_fp,
  587. p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
  588. pd->dequant, eob, scan_order->scan,
  589. scan_order->iscan);
  590. break;
  591. default: assert(0); break;
  592. }
  593. *skippable &= (*eob == 0);
  594. eob_cost += 1;
  595. }
  596. block += step;
  597. }
  598. }
  599. this_rdc->rate = 0;
  600. if (*sse < INT64_MAX) {
  601. *sse = (*sse << 6) >> 2;
  602. if (*skippable) {
  603. this_rdc->dist = *sse;
  604. return;
  605. }
  606. }
  607. block = 0;
  608. this_rdc->dist = 0;
  609. for (r = 0; r < max_blocks_high; r += block_step) {
  610. for (c = 0; c < num_4x4_w; c += block_step) {
  611. if (c < max_blocks_wide) {
  612. tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
  613. tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
  614. tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
  615. uint16_t *const eob = &p->eobs[block];
  616. if (*eob == 1)
  617. this_rdc->rate += (int)abs(qcoeff[0]);
  618. else if (*eob > 1)
  619. this_rdc->rate += vpx_satd((const int16_t *)qcoeff, step << 4);
  620. this_rdc->dist += vp9_block_error_fp(coeff, dqcoeff, step << 4) >> 2;
  621. }
  622. block += step;
  623. }
  624. }
  625. // If skippable is set, rate gets clobbered later.
  626. this_rdc->rate <<= (2 + VP9_PROB_COST_SHIFT);
  627. this_rdc->rate += (eob_cost << VP9_PROB_COST_SHIFT);
  628. }
  629. #endif
  630. static void model_rd_for_sb_uv(VP9_COMP *cpi, BLOCK_SIZE plane_bsize,
  631. MACROBLOCK *x, MACROBLOCKD *xd,
  632. RD_COST *this_rdc, unsigned int *var_y,
  633. unsigned int *sse_y, int start_plane,
  634. int stop_plane) {
  635. // Note our transform coeffs are 8 times an orthogonal transform.
  636. // Hence quantizer step is also 8 times. To get effective quantizer
  637. // we need to divide by 8 before sending to modeling function.
  638. unsigned int sse;
  639. int rate;
  640. int64_t dist;
  641. int i;
  642. #if CONFIG_VP9_HIGHBITDEPTH
  643. uint64_t tot_var = *var_y;
  644. uint64_t tot_sse = *sse_y;
  645. #else
  646. uint32_t tot_var = *var_y;
  647. uint32_t tot_sse = *sse_y;
  648. #endif
  649. this_rdc->rate = 0;
  650. this_rdc->dist = 0;
  651. for (i = start_plane; i <= stop_plane; ++i) {
  652. struct macroblock_plane *const p = &x->plane[i];
  653. struct macroblockd_plane *const pd = &xd->plane[i];
  654. const uint32_t dc_quant = pd->dequant[0];
  655. const uint32_t ac_quant = pd->dequant[1];
  656. const BLOCK_SIZE bs = plane_bsize;
  657. unsigned int var;
  658. if (!x->color_sensitivity[i - 1]) continue;
  659. var = cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride, pd->dst.buf,
  660. pd->dst.stride, &sse);
  661. assert(sse >= var);
  662. tot_var += var;
  663. tot_sse += sse;
  664. #if CONFIG_VP9_HIGHBITDEPTH
  665. vp9_model_rd_from_var_lapndz(sse - var, num_pels_log2_lookup[bs],
  666. dc_quant >> (xd->bd - 5), &rate, &dist);
  667. #else
  668. vp9_model_rd_from_var_lapndz(sse - var, num_pels_log2_lookup[bs],
  669. dc_quant >> 3, &rate, &dist);
  670. #endif // CONFIG_VP9_HIGHBITDEPTH
  671. this_rdc->rate += rate >> 1;
  672. this_rdc->dist += dist << 3;
  673. #if CONFIG_VP9_HIGHBITDEPTH
  674. vp9_model_rd_from_var_lapndz(var, num_pels_log2_lookup[bs],
  675. ac_quant >> (xd->bd - 5), &rate, &dist);
  676. #else
  677. vp9_model_rd_from_var_lapndz(var, num_pels_log2_lookup[bs], ac_quant >> 3,
  678. &rate, &dist);
  679. #endif // CONFIG_VP9_HIGHBITDEPTH
  680. this_rdc->rate += rate;
  681. this_rdc->dist += dist << 4;
  682. }
  683. #if CONFIG_VP9_HIGHBITDEPTH
  684. *var_y = tot_var > UINT32_MAX ? UINT32_MAX : (uint32_t)tot_var;
  685. *sse_y = tot_sse > UINT32_MAX ? UINT32_MAX : (uint32_t)tot_sse;
  686. #else
  687. *var_y = tot_var;
  688. *sse_y = tot_sse;
  689. #endif
  690. }
  691. static int get_pred_buffer(PRED_BUFFER *p, int len) {
  692. int i;
  693. for (i = 0; i < len; i++) {
  694. if (!p[i].in_use) {
  695. p[i].in_use = 1;
  696. return i;
  697. }
  698. }
  699. return -1;
  700. }
  701. static void free_pred_buffer(PRED_BUFFER *p) {
  702. if (p != NULL) p->in_use = 0;
  703. }
  704. static void encode_breakout_test(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
  705. int mi_row, int mi_col,
  706. MV_REFERENCE_FRAME ref_frame,
  707. PREDICTION_MODE this_mode, unsigned int var_y,
  708. unsigned int sse_y,
  709. struct buf_2d yv12_mb[][MAX_MB_PLANE],
  710. int *rate, int64_t *dist) {
  711. MACROBLOCKD *xd = &x->e_mbd;
  712. MODE_INFO *const mi = xd->mi[0];
  713. const BLOCK_SIZE uv_size = get_plane_block_size(bsize, &xd->plane[1]);
  714. unsigned int var = var_y, sse = sse_y;
  715. // Skipping threshold for ac.
  716. unsigned int thresh_ac;
  717. // Skipping threshold for dc.
  718. unsigned int thresh_dc;
  719. int motion_low = 1;
  720. if (mi->mv[0].as_mv.row > 64 || mi->mv[0].as_mv.row < -64 ||
  721. mi->mv[0].as_mv.col > 64 || mi->mv[0].as_mv.col < -64)
  722. motion_low = 0;
  723. if (x->encode_breakout > 0 && motion_low == 1) {
  724. // Set a maximum for threshold to avoid big PSNR loss in low bit rate
  725. // case. Use extreme low threshold for static frames to limit
  726. // skipping.
  727. const unsigned int max_thresh = 36000;
  728. // The encode_breakout input
  729. const unsigned int min_thresh =
  730. VPXMIN(((unsigned int)x->encode_breakout << 4), max_thresh);
  731. #if CONFIG_VP9_HIGHBITDEPTH
  732. const int shift = (xd->bd << 1) - 16;
  733. #endif
  734. // Calculate threshold according to dequant value.
  735. thresh_ac = (xd->plane[0].dequant[1] * xd->plane[0].dequant[1]) >> 3;
  736. #if CONFIG_VP9_HIGHBITDEPTH
  737. if ((xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) && shift > 0) {
  738. thresh_ac = ROUND_POWER_OF_TWO(thresh_ac, shift);
  739. }
  740. #endif // CONFIG_VP9_HIGHBITDEPTH
  741. thresh_ac = clamp(thresh_ac, min_thresh, max_thresh);
  742. // Adjust ac threshold according to partition size.
  743. thresh_ac >>=
  744. 8 - (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize]);
  745. thresh_dc = (xd->plane[0].dequant[0] * xd->plane[0].dequant[0] >> 6);
  746. #if CONFIG_VP9_HIGHBITDEPTH
  747. if ((xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) && shift > 0) {
  748. thresh_dc = ROUND_POWER_OF_TWO(thresh_dc, shift);
  749. }
  750. #endif // CONFIG_VP9_HIGHBITDEPTH
  751. } else {
  752. thresh_ac = 0;
  753. thresh_dc = 0;
  754. }
  755. // Y skipping condition checking for ac and dc.
  756. if (var <= thresh_ac && (sse - var) <= thresh_dc) {
  757. unsigned int sse_u, sse_v;
  758. unsigned int var_u, var_v;
  759. unsigned int thresh_ac_uv = thresh_ac;
  760. unsigned int thresh_dc_uv = thresh_dc;
  761. if (x->sb_is_skin) {
  762. thresh_ac_uv = 0;
  763. thresh_dc_uv = 0;
  764. }
  765. // Skip UV prediction unless breakout is zero (lossless) to save
  766. // computation with low impact on the result
  767. if (x->encode_breakout == 0) {
  768. xd->plane[1].pre[0] = yv12_mb[ref_frame][1];
  769. xd->plane[2].pre[0] = yv12_mb[ref_frame][2];
  770. vp9_build_inter_predictors_sbuv(xd, mi_row, mi_col, bsize);
  771. }
  772. var_u = cpi->fn_ptr[uv_size].vf(x->plane[1].src.buf, x->plane[1].src.stride,
  773. xd->plane[1].dst.buf,
  774. xd->plane[1].dst.stride, &sse_u);
  775. // U skipping condition checking
  776. if (((var_u << 2) <= thresh_ac_uv) && (sse_u - var_u <= thresh_dc_uv)) {
  777. var_v = cpi->fn_ptr[uv_size].vf(
  778. x->plane[2].src.buf, x->plane[2].src.stride, xd->plane[2].dst.buf,
  779. xd->plane[2].dst.stride, &sse_v);
  780. // V skipping condition checking
  781. if (((var_v << 2) <= thresh_ac_uv) && (sse_v - var_v <= thresh_dc_uv)) {
  782. x->skip = 1;
  783. // The cost of skip bit needs to be added.
  784. *rate = cpi->inter_mode_cost[x->mbmi_ext->mode_context[ref_frame]]
  785. [INTER_OFFSET(this_mode)];
  786. // More on this part of rate
  787. // rate += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1);
  788. // Scaling factor for SSE from spatial domain to frequency
  789. // domain is 16. Adjust distortion accordingly.
  790. // TODO(yunqingwang): In this function, only y-plane dist is
  791. // calculated.
  792. *dist = (sse << 4); // + ((sse_u + sse_v) << 4);
  793. // *disable_skip = 1;
  794. }
  795. }
  796. }
  797. }
  798. struct estimate_block_intra_args {
  799. VP9_COMP *cpi;
  800. MACROBLOCK *x;
  801. PREDICTION_MODE mode;
  802. int skippable;
  803. RD_COST *rdc;
  804. };
  805. static void estimate_block_intra(int plane, int block, int row, int col,
  806. BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
  807. void *arg) {
  808. struct estimate_block_intra_args *const args = arg;
  809. VP9_COMP *const cpi = args->cpi;
  810. MACROBLOCK *const x = args->x;
  811. MACROBLOCKD *const xd = &x->e_mbd;
  812. struct macroblock_plane *const p = &x->plane[0];
  813. struct macroblockd_plane *const pd = &xd->plane[0];
  814. const BLOCK_SIZE bsize_tx = txsize_to_bsize[tx_size];
  815. uint8_t *const src_buf_base = p->src.buf;
  816. uint8_t *const dst_buf_base = pd->dst.buf;
  817. const int src_stride = p->src.stride;
  818. const int dst_stride = pd->dst.stride;
  819. RD_COST this_rdc;
  820. (void)block;
  821. p->src.buf = &src_buf_base[4 * (row * src_stride + col)];
  822. pd->dst.buf = &dst_buf_base[4 * (row * dst_stride + col)];
  823. // Use source buffer as an approximation for the fully reconstructed buffer.
  824. vp9_predict_intra_block(xd, b_width_log2_lookup[plane_bsize], tx_size,
  825. args->mode, x->skip_encode ? p->src.buf : pd->dst.buf,
  826. x->skip_encode ? src_stride : dst_stride, pd->dst.buf,
  827. dst_stride, col, row, plane);
  828. if (plane == 0) {
  829. int64_t this_sse = INT64_MAX;
  830. // TODO(jingning): This needs further refactoring.
  831. block_yrd(cpi, x, &this_rdc, &args->skippable, &this_sse, bsize_tx,
  832. VPXMIN(tx_size, TX_16X16));
  833. } else {
  834. unsigned int var = 0;
  835. unsigned int sse = 0;
  836. model_rd_for_sb_uv(cpi, plane_bsize, x, xd, &this_rdc, &var, &sse, plane,
  837. plane);
  838. }
  839. p->src.buf = src_buf_base;
  840. pd->dst.buf = dst_buf_base;
  841. args->rdc->rate += this_rdc.rate;
  842. args->rdc->dist += this_rdc.dist;
  843. }
  844. static const THR_MODES mode_idx[MAX_REF_FRAMES - 1][4] = {
  845. { THR_DC, THR_V_PRED, THR_H_PRED, THR_TM },
  846. { THR_NEARESTMV, THR_NEARMV, THR_ZEROMV, THR_NEWMV },
  847. { THR_NEARESTG, THR_NEARG, THR_ZEROG, THR_NEWG },
  848. };
  849. static const PREDICTION_MODE intra_mode_list[] = { DC_PRED, V_PRED, H_PRED,
  850. TM_PRED };
  851. static int mode_offset(const PREDICTION_MODE mode) {
  852. if (mode >= NEARESTMV) {
  853. return INTER_OFFSET(mode);
  854. } else {
  855. switch (mode) {
  856. case DC_PRED: return 0;
  857. case V_PRED: return 1;
  858. case H_PRED: return 2;
  859. case TM_PRED: return 3;
  860. default: return -1;
  861. }
  862. }
  863. }
  864. static INLINE void update_thresh_freq_fact(
  865. VP9_COMP *cpi, TileDataEnc *tile_data, int source_variance,
  866. BLOCK_SIZE bsize, MV_REFERENCE_FRAME ref_frame, THR_MODES best_mode_idx,
  867. PREDICTION_MODE mode) {
  868. THR_MODES thr_mode_idx = mode_idx[ref_frame][mode_offset(mode)];
  869. int *freq_fact = &tile_data->thresh_freq_fact[bsize][thr_mode_idx];
  870. if (thr_mode_idx == best_mode_idx)
  871. *freq_fact -= (*freq_fact >> 4);
  872. else if (cpi->sf.limit_newmv_early_exit && mode == NEWMV &&
  873. ref_frame == LAST_FRAME && source_variance < 5) {
  874. *freq_fact = VPXMIN(*freq_fact + RD_THRESH_INC, 32);
  875. } else {
  876. *freq_fact = VPXMIN(*freq_fact + RD_THRESH_INC,
  877. cpi->sf.adaptive_rd_thresh * RD_THRESH_MAX_FACT);
  878. }
  879. }
  880. void vp9_pick_intra_mode(VP9_COMP *cpi, MACROBLOCK *x, RD_COST *rd_cost,
  881. BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx) {
  882. MACROBLOCKD *const xd = &x->e_mbd;
  883. MODE_INFO *const mi = xd->mi[0];
  884. RD_COST this_rdc, best_rdc;
  885. PREDICTION_MODE this_mode;
  886. struct estimate_block_intra_args args = { cpi, x, DC_PRED, 1, 0 };
  887. const TX_SIZE intra_tx_size =
  888. VPXMIN(max_txsize_lookup[bsize],
  889. tx_mode_to_biggest_tx_size[cpi->common.tx_mode]);
  890. MODE_INFO *const mic = xd->mi[0];
  891. int *bmode_costs;
  892. const MODE_INFO *above_mi = xd->above_mi;
  893. const MODE_INFO *left_mi = xd->left_mi;
  894. const PREDICTION_MODE A = vp9_above_block_mode(mic, above_mi, 0);
  895. const PREDICTION_MODE L = vp9_left_block_mode(mic, left_mi, 0);
  896. bmode_costs = cpi->y_mode_costs[A][L];
  897. (void)ctx;
  898. vp9_rd_cost_reset(&best_rdc);
  899. vp9_rd_cost_reset(&this_rdc);
  900. mi->ref_frame[0] = INTRA_FRAME;
  901. // Initialize interp_filter here so we do not have to check for inter block
  902. // modes in get_pred_context_switchable_interp()
  903. mi->interp_filter = SWITCHABLE_FILTERS;
  904. mi->mv[0].as_int = INVALID_MV;
  905. mi->uv_mode = DC_PRED;
  906. memset(x->skip_txfm, 0, sizeof(x->skip_txfm));
  907. // Change the limit of this loop to add other intra prediction
  908. // mode tests.
  909. for (this_mode = DC_PRED; this_mode <= H_PRED; ++this_mode) {
  910. this_rdc.dist = this_rdc.rate = 0;
  911. args.mode = this_mode;
  912. args.skippable = 1;
  913. args.rdc = &this_rdc;
  914. mi->tx_size = intra_tx_size;
  915. vp9_foreach_transformed_block_in_plane(xd, bsize, 0, estimate_block_intra,
  916. &args);
  917. if (args.skippable) {
  918. x->skip_txfm[0] = SKIP_TXFM_AC_DC;
  919. this_rdc.rate = vp9_cost_bit(vp9_get_skip_prob(&cpi->common, xd), 1);
  920. } else {
  921. x->skip_txfm[0] = SKIP_TXFM_NONE;
  922. this_rdc.rate += vp9_cost_bit(vp9_get_skip_prob(&cpi->common, xd), 0);
  923. }
  924. this_rdc.rate += bmode_costs[this_mode];
  925. this_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, this_rdc.rate, this_rdc.dist);
  926. if (this_rdc.rdcost < best_rdc.rdcost) {
  927. best_rdc = this_rdc;
  928. mi->mode = this_mode;
  929. }
  930. }
  931. *rd_cost = best_rdc;
  932. }
  933. static void init_ref_frame_cost(VP9_COMMON *const cm, MACROBLOCKD *const xd,
  934. int ref_frame_cost[MAX_REF_FRAMES]) {
  935. vpx_prob intra_inter_p = vp9_get_intra_inter_prob(cm, xd);
  936. vpx_prob ref_single_p1 = vp9_get_pred_prob_single_ref_p1(cm, xd);
  937. vpx_prob ref_single_p2 = vp9_get_pred_prob_single_ref_p2(cm, xd);
  938. ref_frame_cost[INTRA_FRAME] = vp9_cost_bit(intra_inter_p, 0);
  939. ref_frame_cost[LAST_FRAME] = ref_frame_cost[GOLDEN_FRAME] =
  940. ref_frame_cost[ALTREF_FRAME] = vp9_cost_bit(intra_inter_p, 1);
  941. ref_frame_cost[LAST_FRAME] += vp9_cost_bit(ref_single_p1, 0);
  942. ref_frame_cost[GOLDEN_FRAME] += vp9_cost_bit(ref_single_p1, 1);
  943. ref_frame_cost[ALTREF_FRAME] += vp9_cost_bit(ref_single_p1, 1);
  944. ref_frame_cost[GOLDEN_FRAME] += vp9_cost_bit(ref_single_p2, 0);
  945. ref_frame_cost[ALTREF_FRAME] += vp9_cost_bit(ref_single_p2, 1);
  946. }
  947. typedef struct {
  948. MV_REFERENCE_FRAME ref_frame;
  949. PREDICTION_MODE pred_mode;
  950. } REF_MODE;
  951. #define RT_INTER_MODES 8
  952. static const REF_MODE ref_mode_set[RT_INTER_MODES] = {
  953. { LAST_FRAME, ZEROMV }, { LAST_FRAME, NEARESTMV },
  954. { GOLDEN_FRAME, ZEROMV }, { LAST_FRAME, NEARMV },
  955. { LAST_FRAME, NEWMV }, { GOLDEN_FRAME, NEARESTMV },
  956. { GOLDEN_FRAME, NEARMV }, { GOLDEN_FRAME, NEWMV }
  957. };
  958. static const REF_MODE ref_mode_set_svc[RT_INTER_MODES] = {
  959. { LAST_FRAME, ZEROMV }, { GOLDEN_FRAME, ZEROMV },
  960. { LAST_FRAME, NEARESTMV }, { LAST_FRAME, NEARMV },
  961. { GOLDEN_FRAME, NEARESTMV }, { GOLDEN_FRAME, NEARMV },
  962. { LAST_FRAME, NEWMV }, { GOLDEN_FRAME, NEWMV }
  963. };
  964. static int set_intra_cost_penalty(const VP9_COMP *const cpi, BLOCK_SIZE bsize) {
  965. const VP9_COMMON *const cm = &cpi->common;
  966. // Reduce the intra cost penalty for small blocks (<=16x16).
  967. int reduction_fac =
  968. (bsize <= BLOCK_16X16) ? ((bsize <= BLOCK_8X8) ? 4 : 2) : 0;
  969. if (cpi->noise_estimate.enabled && cpi->noise_estimate.level == kHigh)
  970. // Don't reduce intra cost penalty if estimated noise level is high.
  971. reduction_fac = 0;
  972. return vp9_get_intra_cost_penalty(cm->base_qindex, cm->y_dc_delta_q,
  973. cm->bit_depth) >>
  974. reduction_fac;
  975. }
  976. static INLINE void find_predictors(
  977. VP9_COMP *cpi, MACROBLOCK *x, MV_REFERENCE_FRAME ref_frame,
  978. int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES],
  979. int const_motion[MAX_REF_FRAMES], int *ref_frame_skip_mask,
  980. const int flag_list[4], TileDataEnc *tile_data, int mi_row, int mi_col,
  981. struct buf_2d yv12_mb[4][MAX_MB_PLANE], BLOCK_SIZE bsize,
  982. int force_skip_low_temp_var) {
  983. VP9_COMMON *const cm = &cpi->common;
  984. MACROBLOCKD *const xd = &x->e_mbd;
  985. const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref_frame);
  986. TileInfo *const tile_info = &tile_data->tile_info;
  987. // TODO(jingning) placeholder for inter-frame non-RD mode decision.
  988. x->pred_mv_sad[ref_frame] = INT_MAX;
  989. frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
  990. frame_mv[ZEROMV][ref_frame].as_int = 0;
  991. // this needs various further optimizations. to be continued..
  992. if ((cpi->ref_frame_flags & flag_list[ref_frame]) && (yv12 != NULL)) {
  993. int_mv *const candidates = x->mbmi_ext->ref_mvs[ref_frame];
  994. const struct scale_factors *const sf = &cm->frame_refs[ref_frame - 1].sf;
  995. vp9_setup_pred_block(xd, yv12_mb[ref_frame], yv12, mi_row, mi_col, sf, sf);
  996. if (cm->use_prev_frame_mvs) {
  997. vp9_find_mv_refs(cm, xd, xd->mi[0], ref_frame, candidates, mi_row, mi_col,
  998. x->mbmi_ext->mode_context);
  999. } else {
  1000. const_motion[ref_frame] =
  1001. mv_refs_rt(cpi, cm, x, xd, tile_info, xd->mi[0], ref_frame,
  1002. candidates, &frame_mv[NEWMV][ref_frame], mi_row, mi_col,
  1003. (int)(cpi->svc.use_base_mv && cpi->svc.spatial_layer_id));
  1004. }
  1005. vp9_find_best_ref_mvs(xd, cm->allow_high_precision_mv, candidates,
  1006. &frame_mv[NEARESTMV][ref_frame],
  1007. &frame_mv[NEARMV][ref_frame]);
  1008. // Early exit for golden frame if force_skip_low_temp_var is set.
  1009. if (!vp9_is_scaled(sf) && bsize >= BLOCK_8X8 &&
  1010. !(force_skip_low_temp_var && ref_frame == GOLDEN_FRAME)) {
  1011. vp9_mv_pred(cpi, x, yv12_mb[ref_frame][0].buf, yv12->y_stride, ref_frame,
  1012. bsize);
  1013. }
  1014. } else {
  1015. *ref_frame_skip_mask |= (1 << ref_frame);
  1016. }
  1017. }
  1018. static void vp9_NEWMV_diff_bias(const NOISE_ESTIMATE *ne, MACROBLOCKD *xd,
  1019. PREDICTION_MODE this_mode, RD_COST *this_rdc,
  1020. BLOCK_SIZE bsize, int mv_row, int mv_col,
  1021. int is_last_frame) {
  1022. // Bias against MVs associated with NEWMV mode that are very different from
  1023. // top/left neighbors.
  1024. if (this_mode == NEWMV) {
  1025. int al_mv_average_row;
  1026. int al_mv_average_col;
  1027. int left_row, left_col;
  1028. int row_diff, col_diff;
  1029. int above_mv_valid = 0;
  1030. int left_mv_valid = 0;
  1031. int above_row = 0;
  1032. int above_col = 0;
  1033. if (xd->above_mi) {
  1034. above_mv_valid = xd->above_mi->mv[0].as_int != INVALID_MV;
  1035. above_row = xd->above_mi->mv[0].as_mv.row;
  1036. above_col = xd->above_mi->mv[0].as_mv.col;
  1037. }
  1038. if (xd->left_mi) {
  1039. left_mv_valid = xd->left_mi->mv[0].as_int != INVALID_MV;
  1040. left_row = xd->left_mi->mv[0].as_mv.row;
  1041. left_col = xd->left_mi->mv[0].as_mv.col;
  1042. }
  1043. if (above_mv_valid && left_mv_valid) {
  1044. al_mv_average_row = (above_row + left_row + 1) >> 1;
  1045. al_mv_average_col = (above_col + left_col + 1) >> 1;
  1046. } else if (above_mv_valid) {
  1047. al_mv_average_row = above_row;
  1048. al_mv_average_col = above_col;
  1049. } else if (left_mv_valid) {
  1050. al_mv_average_row = left_row;
  1051. al_mv_average_col = left_col;
  1052. } else {
  1053. al_mv_average_row = al_mv_average_col = 0;
  1054. }
  1055. row_diff = (al_mv_average_row - mv_row);
  1056. col_diff = (al_mv_average_col - mv_col);
  1057. if (row_diff > 48 || row_diff < -48 || col_diff > 48 || col_diff < -48) {
  1058. if (bsize > BLOCK_32X32)
  1059. this_rdc->rdcost = this_rdc->rdcost << 1;
  1060. else
  1061. this_rdc->rdcost = 3 * this_rdc->rdcost >> 1;
  1062. }
  1063. }
  1064. // If noise estimation is enabled, and estimated level is above threshold,
  1065. // add a bias to LAST reference with small motion, for large blocks.
  1066. if (ne->enabled && ne->level >= kMedium && bsize >= BLOCK_32X32 &&
  1067. is_last_frame && mv_row < 8 && mv_row > -8 && mv_col < 8 && mv_col > -8) {
  1068. this_rdc->rdcost = 7 * this_rdc->rdcost >> 3;
  1069. }
  1070. }
  1071. #if CONFIG_VP9_TEMPORAL_DENOISING
  1072. static void vp9_pickmode_ctx_den_update(
  1073. VP9_PICKMODE_CTX_DEN *ctx_den, int64_t zero_last_cost_orig,
  1074. int ref_frame_cost[MAX_REF_FRAMES],
  1075. int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES], int reuse_inter_pred,
  1076. TX_SIZE best_tx_size, PREDICTION_MODE best_mode,
  1077. MV_REFERENCE_FRAME best_ref_frame, INTERP_FILTER best_pred_filter,
  1078. uint8_t best_mode_skip_txfm) {
  1079. ctx_den->zero_last_cost_orig = zero_last_cost_orig;
  1080. ctx_den->ref_frame_cost = ref_frame_cost;
  1081. ctx_den->frame_mv = frame_mv;
  1082. ctx_den->reuse_inter_pred = reuse_inter_pred;
  1083. ctx_den->best_tx_size = best_tx_size;
  1084. ctx_den->best_mode = best_mode;
  1085. ctx_den->best_ref_frame = best_ref_frame;
  1086. ctx_den->best_pred_filter = best_pred_filter;
  1087. ctx_den->best_mode_skip_txfm = best_mode_skip_txfm;
  1088. }
  1089. static void recheck_zeromv_after_denoising(
  1090. VP9_COMP *cpi, MODE_INFO *const mi, MACROBLOCK *x, MACROBLOCKD *const xd,
  1091. VP9_DENOISER_DECISION decision, VP9_PICKMODE_CTX_DEN *ctx_den,
  1092. struct buf_2d yv12_mb[4][MAX_MB_PLANE], RD_COST *best_rdc, BLOCK_SIZE bsize,
  1093. int mi_row, int mi_col) {
  1094. // If INTRA or GOLDEN reference was selected, re-evaluate ZEROMV on
  1095. // denoised result. Only do this under noise conditions, and if rdcost of
  1096. // ZEROMV onoriginal source is not significantly higher than rdcost of best
  1097. // mode.
  1098. if (cpi->noise_estimate.enabled && cpi->noise_estimate.level > kLow &&
  1099. ctx_den->zero_last_cost_orig < (best_rdc->rdcost << 3) &&
  1100. ((ctx_den->best_ref_frame == INTRA_FRAME && decision >= FILTER_BLOCK) ||
  1101. (ctx_den->best_ref_frame == GOLDEN_FRAME &&
  1102. decision == FILTER_ZEROMV_BLOCK))) {
  1103. // Check if we should pick ZEROMV on denoised signal.
  1104. int rate = 0;
  1105. int64_t dist = 0;
  1106. uint32_t var_y = UINT_MAX;
  1107. uint32_t sse_y = UINT_MAX;
  1108. RD_COST this_rdc;
  1109. mi->mode = ZEROMV;
  1110. mi->ref_frame[0] = LAST_FRAME;
  1111. mi->ref_frame[1] = NONE;
  1112. mi->mv[0].as_int = 0;
  1113. mi->interp_filter = EIGHTTAP;
  1114. xd->plane[0].pre[0] = yv12_mb[LAST_FRAME][0];
  1115. vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize);
  1116. model_rd_for_sb_y(cpi, bsize, x, xd, &rate, &dist, &var_y, &sse_y);
  1117. this_rdc.rate = rate + ctx_den->ref_frame_cost[LAST_FRAME] +
  1118. cpi->inter_mode_cost[x->mbmi_ext->mode_context[LAST_FRAME]]
  1119. [INTER_OFFSET(ZEROMV)];
  1120. this_rdc.dist = dist;
  1121. this_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, rate, dist);
  1122. // Switch to ZEROMV if the rdcost for ZEROMV on denoised source
  1123. // is lower than best_ref mode (on original source).
  1124. if (this_rdc.rdcost > best_rdc->rdcost) {
  1125. this_rdc = *best_rdc;
  1126. mi->mode = ctx_den->best_mode;
  1127. mi->ref_frame[0] = ctx_den->best_ref_frame;
  1128. mi->interp_filter = ctx_den->best_pred_filter;
  1129. if (ctx_den->best_ref_frame == INTRA_FRAME)
  1130. mi->mv[0].as_int = INVALID_MV;
  1131. else if (ctx_den->best_ref_frame == GOLDEN_FRAME) {
  1132. mi->mv[0].as_int =
  1133. ctx_den->frame_mv[ctx_den->best_mode][ctx_den->best_ref_frame]
  1134. .as_int;
  1135. if (ctx_den->reuse_inter_pred) {
  1136. xd->plane[0].pre[0] = yv12_mb[GOLDEN_FRAME][0];
  1137. vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize);
  1138. }
  1139. }
  1140. mi->tx_size = ctx_den->best_tx_size;
  1141. x->skip_txfm[0] = ctx_den->best_mode_skip_txfm;
  1142. } else {
  1143. ctx_den->best_ref_frame = LAST_FRAME;
  1144. *best_rdc = this_rdc;
  1145. }
  1146. }
  1147. }
  1148. #endif // CONFIG_VP9_TEMPORAL_DENOISING
  1149. static INLINE int get_force_skip_low_temp_var(uint8_t *variance_low, int mi_row,
  1150. int mi_col, BLOCK_SIZE bsize) {
  1151. const int i = (mi_row & 0x7) >> 1;
  1152. const int j = (mi_col & 0x7) >> 1;
  1153. int force_skip_low_temp_var = 0;
  1154. // Set force_skip_low_temp_var based on the block size and block offset.
  1155. if (bsize == BLOCK_64X64) {
  1156. force_skip_low_temp_var = variance_low[0];
  1157. } else if (bsize == BLOCK_64X32) {
  1158. if (!(mi_col & 0x7) && !(mi_row & 0x7)) {
  1159. force_skip_low_temp_var = variance_low[1];
  1160. } else if (!(mi_col & 0x7) && (mi_row & 0x7)) {
  1161. force_skip_low_temp_var = variance_low[2];
  1162. }
  1163. } else if (bsize == BLOCK_32X64) {
  1164. if (!(mi_col & 0x7) && !(mi_row & 0x7)) {
  1165. force_skip_low_temp_var = variance_low[3];
  1166. } else if ((mi_col & 0x7) && !(mi_row & 0x7)) {
  1167. force_skip_low_temp_var = variance_low[4];
  1168. }
  1169. } else if (bsize == BLOCK_32X32) {
  1170. if (!(mi_col & 0x7) && !(mi_row & 0x7)) {
  1171. force_skip_low_temp_var = variance_low[5];
  1172. } else if ((mi_col & 0x7) && !(mi_row & 0x7)) {
  1173. force_skip_low_temp_var = variance_low[6];
  1174. } else if (!(mi_col & 0x7) && (mi_row & 0x7)) {
  1175. force_skip_low_temp_var = variance_low[7];
  1176. } else if ((mi_col & 0x7) && (mi_row & 0x7)) {
  1177. force_skip_low_temp_var = variance_low[8];
  1178. }
  1179. } else if (bsize == BLOCK_16X16) {
  1180. force_skip_low_temp_var = variance_low[pos_shift_16x16[i][j]];
  1181. } else if (bsize == BLOCK_32X16) {
  1182. // The col shift index for the second 16x16 block.
  1183. const int j2 = ((mi_col + 2) & 0x7) >> 1;
  1184. // Only if each 16x16 block inside has low temporal variance.
  1185. force_skip_low_temp_var = variance_low[pos_shift_16x16[i][j]] &&
  1186. variance_low[pos_shift_16x16[i][j2]];
  1187. } else if (bsize == BLOCK_16X32) {
  1188. // The row shift index for the second 16x16 block.
  1189. const int i2 = ((mi_row + 2) & 0x7) >> 1;
  1190. force_skip_low_temp_var = variance_low[pos_shift_16x16[i][j]] &&
  1191. variance_low[pos_shift_16x16[i2][j]];
  1192. }
  1193. return force_skip_low_temp_var;
  1194. }
  1195. void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
  1196. int mi_row, int mi_col, RD_COST *rd_cost,
  1197. BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx) {
  1198. VP9_COMMON *const cm = &cpi->common;
  1199. SPEED_FEATURES *const sf = &cpi->sf;
  1200. const SVC *const svc = &cpi->svc;
  1201. MACROBLOCKD *const xd = &x->e_mbd;
  1202. MODE_INFO *const mi = xd->mi[0];
  1203. struct macroblockd_plane *const pd = &xd->plane[0];
  1204. PREDICTION_MODE best_mode = ZEROMV;
  1205. MV_REFERENCE_FRAME ref_frame, best_ref_frame = LAST_FRAME;
  1206. MV_REFERENCE_FRAME usable_ref_frame;
  1207. TX_SIZE best_tx_size = TX_SIZES;
  1208. INTERP_FILTER best_pred_filter = EIGHTTAP;
  1209. int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
  1210. struct buf_2d yv12_mb[4][MAX_MB_PLANE];
  1211. static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
  1212. VP9_ALT_FLAG };
  1213. RD_COST this_rdc, best_rdc;
  1214. uint8_t skip_txfm = SKIP_TXFM_NONE, best_mode_skip_txfm = SKIP_TXFM_NONE;
  1215. // var_y and sse_y are saved to be used in skipping checking
  1216. unsigned int var_y = UINT_MAX;
  1217. unsigned int sse_y = UINT_MAX;
  1218. const int intra_cost_penalty = set_intra_cost_penalty(cpi, bsize);
  1219. int64_t inter_mode_thresh =
  1220. RDCOST(x->rdmult, x->rddiv, intra_cost_penalty, 0);
  1221. const int *const rd_threshes = cpi->rd.threshes[mi->segment_id][bsize];
  1222. const int *const rd_thresh_freq_fact = tile_data->thresh_freq_fact[bsize];
  1223. INTERP_FILTER filter_ref;
  1224. const int bsl = mi_width_log2_lookup[bsize];
  1225. const int pred_filter_search =
  1226. cm->interp_filter == SWITCHABLE
  1227. ? (((mi_row + mi_col) >> bsl) +
  1228. get_chessboard_index(cm->current_video_frame)) &
  1229. 0x1
  1230. : 0;
  1231. int const_motion[MAX_REF_FRAMES] = { 0 };
  1232. const int bh = num_4x4_blocks_high_lookup[bsize] << 2;
  1233. const int bw = num_4x4_blocks_wide_lookup[bsize] << 2;
  1234. // For speed 6, the result of interp filter is reused later in actual encoding
  1235. // process.
  1236. // tmp[3] points to dst buffer, and the other 3 point to allocated buffers.
  1237. PRED_BUFFER tmp[4];
  1238. DECLARE_ALIGNED(16, uint8_t, pred_buf[3 * 64 * 64]);
  1239. #if CONFIG_VP9_HIGHBITDEPTH
  1240. DECLARE_ALIGNED(16, uint16_t, pred_buf_16[3 * 64 * 64]);
  1241. #endif
  1242. struct buf_2d orig_dst = pd->dst;
  1243. PRED_BUFFER *best_pred = NULL;
  1244. PRED_BUFFER *this_mode_pred = NULL;
  1245. const int pixels_in_block = bh * bw;
  1246. int reuse_inter_pred = cpi->sf.reuse_inter_pred_sby && ctx->pred_pixel_ready;
  1247. int ref_frame_skip_mask = 0;
  1248. int idx;
  1249. int best_pred_sad = INT_MAX;
  1250. int best_early_term = 0;
  1251. int ref_frame_cost[MAX_REF_FRAMES];
  1252. int svc_force_zero_mode[3] = { 0 };
  1253. int perform_intra_pred = 1;
  1254. int use_golden_nonzeromv = 1;
  1255. int force_skip_low_temp_var = 0;
  1256. #if CONFIG_VP9_TEMPORAL_DENOISING
  1257. VP9_PICKMODE_CTX_DEN ctx_den;
  1258. int64_t zero_last_cost_orig = INT64_MAX;
  1259. #endif
  1260. init_ref_frame_cost(cm, xd, ref_frame_cost);
  1261. if (reuse_inter_pred) {
  1262. int i;
  1263. for (i = 0; i < 3; i++) {
  1264. #if CONFIG_VP9_HIGHBITDEPTH
  1265. if (cm->use_highbitdepth)
  1266. tmp[i].data = CONVERT_TO_BYTEPTR(&pred_buf_16[pixels_in_block * i]);
  1267. else
  1268. tmp[i].data = &pred_buf[pixels_in_block * i];
  1269. #else
  1270. tmp[i].data = &pred_buf[pixels_in_block * i];
  1271. #endif // CONFIG_VP9_HIGHBITDEPTH
  1272. tmp[i].stride = bw;
  1273. tmp[i].in_use = 0;
  1274. }
  1275. tmp[3].data = pd->dst.buf;
  1276. tmp[3].stride = pd->dst.stride;
  1277. tmp[3].in_use = 0;
  1278. }
  1279. x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;
  1280. x->skip = 0;
  1281. // Instead of using vp9_get_pred_context_switchable_interp(xd) to assign
  1282. // filter_ref, we use a less strict condition on assigning filter_ref.
  1283. // This is to reduce the probabily of entering the flow of not assigning
  1284. // filter_ref and then skip filter search.
  1285. if (xd->above_mi && is_inter_block(xd->above_mi))
  1286. filter_ref = xd->above_mi->interp_filter;
  1287. else if (xd->left_mi && is_inter_block(xd->left_mi))
  1288. filter_ref = xd->left_mi->interp_filter;
  1289. else
  1290. filter_ref = cm->interp_filter;
  1291. // initialize mode decisions
  1292. vp9_rd_cost_reset(&best_rdc);
  1293. vp9_rd_cost_reset(rd_cost);
  1294. mi->sb_type = bsize;
  1295. mi->ref_frame[0] = NONE;
  1296. mi->ref_frame[1] = NONE;
  1297. mi->tx_size =
  1298. VPXMIN(max_txsize_lookup[bsize], tx_mode_to_biggest_tx_size[cm->tx_mode]);
  1299. if (sf->short_circuit_flat_blocks || sf->limit_newmv_early_exit) {
  1300. #if CONFIG_VP9_HIGHBITDEPTH
  1301. if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
  1302. x->source_variance = vp9_high_get_sby_perpixel_variance(
  1303. cpi, &x->plane[0].src, bsize, xd->bd);
  1304. else
  1305. #endif // CONFIG_VP9_HIGHBITDEPTH
  1306. x->source_variance =
  1307. vp9_get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize);
  1308. }
  1309. #if CONFIG_VP9_TEMPORAL_DENOISING
  1310. if (cpi->oxcf.noise_sensitivity > 0 &&
  1311. cpi->denoiser.denoising_level > kDenLowLow) {
  1312. vp9_denoiser_reset_frame_stats(ctx);
  1313. }
  1314. #endif
  1315. if (cpi->rc.frames_since_golden == 0 && !cpi->use_svc) {
  1316. usable_ref_frame = LAST_FRAME;
  1317. } else {
  1318. usable_ref_frame = GOLDEN_FRAME;
  1319. }
  1320. // For svc mode, on spatial_layer_id > 0: if the reference has different scale
  1321. // constrain the inter mode to only test zero motion.
  1322. if (cpi->use_svc && svc->force_zero_mode_spatial_ref &&
  1323. cpi->svc.spatial_layer_id > 0) {
  1324. if (cpi->ref_frame_flags & flag_list[LAST_FRAME]) {
  1325. struct scale_factors *const sf = &cm->frame_refs[LAST_FRAME - 1].sf;
  1326. if (vp9_is_scaled(sf)) svc_force_zero_mode[LAST_FRAME - 1] = 1;
  1327. }
  1328. if (cpi->ref_frame_flags & flag_list[GOLDEN_FRAME]) {
  1329. struct scale_factors *const sf = &cm->frame_refs[GOLDEN_FRAME - 1].sf;
  1330. if (vp9_is_scaled(sf)) svc_force_zero_mode[GOLDEN_FRAME - 1] = 1;
  1331. }
  1332. }
  1333. if (cpi->sf.short_circuit_low_temp_var) {
  1334. force_skip_low_temp_var =
  1335. get_force_skip_low_temp_var(&x->variance_low[0], mi_row, mi_col, bsize);
  1336. }
  1337. if (!((cpi->ref_frame_flags & flag_list[GOLDEN_FRAME]) &&
  1338. !svc_force_zero_mode[GOLDEN_FRAME - 1] && !force_skip_low_temp_var))
  1339. use_golden_nonzeromv = 0;
  1340. for (ref_frame = LAST_FRAME; ref_frame <= usable_ref_frame; ++ref_frame) {
  1341. find_predictors(cpi, x, ref_frame, frame_mv, const_motion,
  1342. &ref_frame_skip_mask, flag_list, tile_data, mi_row, mi_col,
  1343. yv12_mb, bsize, force_skip_low_temp_var);
  1344. }
  1345. for (idx = 0; idx < RT_INTER_MODES; ++idx) {
  1346. int rate_mv = 0;
  1347. int mode_rd_thresh;
  1348. int mode_index;
  1349. int i;
  1350. int64_t this_sse;
  1351. int is_skippable;
  1352. int this_early_term = 0;
  1353. PREDICTION_MODE this_mode = ref_mode_set[idx].pred_mode;
  1354. if (cpi->use_svc) this_mode = ref_mode_set_svc[idx].pred_mode;
  1355. if (sf->short_circuit_flat_blocks && x->source_variance == 0 &&
  1356. this_mode != NEARESTMV) {
  1357. continue;
  1358. }
  1359. if (!(cpi->sf.inter_mode_mask[bsize] & (1 << this_mode))) continue;
  1360. ref_frame = ref_mode_set[idx].ref_frame;
  1361. if (cpi->use_svc) {
  1362. ref_frame = ref_mode_set_svc[idx].ref_frame;
  1363. }
  1364. if (!(cpi->ref_frame_flags & flag_list[ref_frame])) continue;
  1365. if (const_motion[ref_frame] && this_mode == NEARMV) continue;
  1366. // Skip non-zeromv mode search for golden frame if force_skip_low_temp_var
  1367. // is set. If nearestmv for golden frame is 0, zeromv mode will be skipped
  1368. // later.
  1369. if (force_skip_low_temp_var && ref_frame == GOLDEN_FRAME &&
  1370. frame_mv[this_mode][ref_frame].as_int != 0) {
  1371. continue;
  1372. }
  1373. if (cpi->sf.short_circuit_low_temp_var == 2 && force_skip_low_temp_var &&
  1374. ref_frame == LAST_FRAME && this_mode == NEWMV) {
  1375. continue;
  1376. }
  1377. if (cpi->use_svc) {
  1378. if (svc_force_zero_mode[ref_frame - 1] &&
  1379. frame_mv[this_mode][ref_frame].as_int != 0)
  1380. continue;
  1381. }
  1382. if (!force_skip_low_temp_var &&
  1383. !(frame_mv[this_mode][ref_frame].as_int == 0 &&
  1384. ref_frame == LAST_FRAME)) {
  1385. i = (ref_frame == LAST_FRAME) ? GOLDEN_FRAME : LAST_FRAME;
  1386. if ((cpi->ref_frame_flags & flag_list[i]) && sf->reference_masking)
  1387. if (x->pred_mv_sad[ref_frame] > (x->pred_mv_sad[i] << 1))
  1388. ref_frame_skip_mask |= (1 << ref_frame);
  1389. }
  1390. if (ref_frame_skip_mask & (1 << ref_frame)) continue;
  1391. // Select prediction reference frames.
  1392. for (i = 0; i < MAX_MB_PLANE; i++)
  1393. xd->plane[i].pre[0] = yv12_mb[ref_frame][i];
  1394. mi->ref_frame[0] = ref_frame;
  1395. set_ref_ptrs(cm, xd, ref_frame, NONE);
  1396. mode_index = mode_idx[ref_frame][INTER_OFFSET(this_mode)];
  1397. mode_rd_thresh = best_mode_skip_txfm ? rd_threshes[mode_index] << 1
  1398. : rd_threshes[mode_index];
  1399. // Increase mode_rd_thresh value for GOLDEN_FRAME for improved encoding
  1400. // speed with little/no subjective quality loss.
  1401. if (cpi->sf.bias_golden && ref_frame == GOLDEN_FRAME &&
  1402. cpi->rc.frames_since_golden > 4)
  1403. mode_rd_thresh = mode_rd_thresh << 3;
  1404. if (rd_less_than_thresh(best_rdc.rdcost, mode_rd_thresh,
  1405. rd_thresh_freq_fact[mode_index]))
  1406. continue;
  1407. if (this_mode == NEWMV) {
  1408. if (ref_frame > LAST_FRAME && !cpi->use_svc &&
  1409. cpi->oxcf.rc_mode == VPX_CBR) {
  1410. int tmp_sad;
  1411. uint32_t dis;
  1412. int cost_list[5];
  1413. if (bsize < BLOCK_16X16) continue;
  1414. tmp_sad = vp9_int_pro_motion_estimation(cpi, x, bsize, mi_row, mi_col);
  1415. if (tmp_sad > x->pred_mv_sad[LAST_FRAME]) continue;
  1416. if (tmp_sad + (num_pels_log2_lookup[bsize] << 4) > best_pred_sad)
  1417. continue;
  1418. frame_mv[NEWMV][ref_frame].as_int = mi->mv[0].as_int;
  1419. rate_mv = vp9_mv_bit_cost(&frame_mv[NEWMV][ref_frame].as_mv,
  1420. &x->mbmi_ext->ref_mvs[ref_frame][0].as_mv,
  1421. x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
  1422. frame_mv[NEWMV][ref_frame].as_mv.row >>= 3;
  1423. frame_mv[NEWMV][ref_frame].as_mv.col >>= 3;
  1424. cpi->find_fractional_mv_step(
  1425. x, &frame_mv[NEWMV][ref_frame].as_mv,
  1426. &x->mbmi_ext->ref_mvs[ref_frame][0].as_mv,
  1427. cpi->common.allow_high_precision_mv, x->errorperbit,
  1428. &cpi->fn_ptr[bsize], cpi->sf.mv.subpel_force_stop,
  1429. cpi->sf.mv.subpel_iters_per_step, cond_cost_list(cpi, cost_list),
  1430. x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref_frame], NULL, 0,
  1431. 0);
  1432. } else if (svc->use_base_mv && svc->spatial_layer_id) {
  1433. if (frame_mv[NEWMV][ref_frame].as_int != INVALID_MV) {
  1434. const int pre_stride = xd->plane[0].pre[0].stride;
  1435. int base_mv_sad = INT_MAX;
  1436. const float base_mv_bias = sf->base_mv_aggressive ? 1.5f : 1.0f;
  1437. const uint8_t *const pre_buf =
  1438. xd->plane[0].pre[0].buf +
  1439. (frame_mv[NEWMV][ref_frame].as_mv.row >> 3) * pre_stride +
  1440. (frame_mv[NEWMV][ref_frame].as_mv.col >> 3);
  1441. base_mv_sad = cpi->fn_ptr[bsize].sdf(
  1442. x->plane[0].src.buf, x->plane[0].src.stride, pre_buf, pre_stride);
  1443. if (base_mv_sad < (int)(base_mv_bias * x->pred_mv_sad[ref_frame])) {
  1444. // Base layer mv is good.
  1445. if (!combined_motion_search(cpi, x, bsize, mi_row, mi_col,
  1446. &frame_mv[NEWMV][ref_frame], &rate_mv,
  1447. best_rdc.rdcost, 1)) {
  1448. continue;
  1449. }
  1450. } else if (!combined_motion_search(cpi, x, bsize, mi_row, mi_col,
  1451. &frame_mv[NEWMV][ref_frame],
  1452. &rate_mv, best_rdc.rdcost, 0)) {
  1453. continue;
  1454. }
  1455. } else if (!combined_motion_search(cpi, x, bsize, mi_row, mi_col,
  1456. &frame_mv[NEWMV][ref_frame],
  1457. &rate_mv, best_rdc.rdcost, 0)) {
  1458. continue;
  1459. }
  1460. } else if (!combined_motion_search(cpi, x, bsize, mi_row, mi_col,
  1461. &frame_mv[NEWMV][ref_frame], &rate_mv,
  1462. best_rdc.rdcost, 0)) {
  1463. continue;
  1464. }
  1465. }
  1466. // If use_golden_nonzeromv is false, NEWMV mode is skipped for golden, no
  1467. // need to compute best_pred_sad which is only used to skip golden NEWMV.
  1468. if (use_golden_nonzeromv && this_mode == NEWMV && ref_frame == LAST_FRAME &&
  1469. frame_mv[NEWMV][LAST_FRAME].as_int != INVALID_MV) {
  1470. const int pre_stride = xd->plane[0].pre[0].stride;
  1471. const uint8_t *const pre_buf =
  1472. xd->plane[0].pre[0].buf +
  1473. (frame_mv[NEWMV][LAST_FRAME].as_mv.row >> 3) * pre_stride +
  1474. (frame_mv[NEWMV][LAST_FRAME].as_mv.col >> 3);
  1475. best_pred_sad = cpi->fn_ptr[bsize].sdf(
  1476. x->plane[0].src.buf, x->plane[0].src.stride, pre_buf, pre_stride);
  1477. x->pred_mv_sad[LAST_FRAME] = best_pred_sad;
  1478. }
  1479. if (this_mode != NEARESTMV &&
  1480. frame_mv[this_mode][ref_frame].as_int ==
  1481. frame_mv[NEARESTMV][ref_frame].as_int)
  1482. continue;
  1483. mi->mode = this_mode;
  1484. mi->mv[0].as_int = frame_mv[this_mode][ref_frame].as_int;
  1485. // Search for the best prediction filter type, when the resulting
  1486. // motion vector is at sub-pixel accuracy level for luma component, i.e.,
  1487. // the last three bits are all zeros.
  1488. if (reuse_inter_pred) {
  1489. if (!this_mode_pred) {
  1490. this_mode_pred = &tmp[3];
  1491. } else {
  1492. this_mode_pred = &tmp[get_pred_buffer(tmp, 3)];
  1493. pd->dst.buf = this_mode_pred->data;
  1494. pd->dst.stride = bw;
  1495. }
  1496. }
  1497. if ((this_mode == NEWMV || filter_ref == SWITCHABLE) &&
  1498. pred_filter_search &&
  1499. (ref_frame == LAST_FRAME ||
  1500. (ref_frame == GOLDEN_FRAME &&
  1501. (cpi->use_svc || cpi->oxcf.rc_mode == VPX_VBR))) &&
  1502. (((mi->mv[0].as_mv.row | mi->mv[0].as_mv.col) & 0x07) != 0)) {
  1503. int pf_rate[3];
  1504. int64_t pf_dist[3];
  1505. unsigned int pf_var[3];
  1506. unsigned int pf_sse[3];
  1507. TX_SIZE pf_tx_size[3];
  1508. int64_t best_cost = INT64_MAX;
  1509. INTERP_FILTER best_filter = SWITCHABLE, filter;
  1510. PRED_BUFFER *current_pred = this_mode_pred;
  1511. for (filter = EIGHTTAP; filter <= EIGHTTAP_SMOOTH; ++filter) {
  1512. int64_t cost;
  1513. mi->interp_filter = filter;
  1514. vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize);
  1515. model_rd_for_sb_y(cpi, bsize, x, xd, &pf_rate[filter], &pf_dist[filter],
  1516. &pf_var[filter], &pf_sse[filter]);
  1517. pf_rate[filter] += vp9_get_switchable_rate(cpi, xd);
  1518. cost = RDCOST(x->rdmult, x->rddiv, pf_rate[filter], pf_dist[filter]);
  1519. pf_tx_size[filter] = mi->tx_size;
  1520. if (cost < best_cost) {
  1521. best_filter = filter;
  1522. best_cost = cost;
  1523. skip_txfm = x->skip_txfm[0];
  1524. if (reuse_inter_pred) {
  1525. if (this_mode_pred != current_pred) {
  1526. free_pred_buffer(this_mode_pred);
  1527. this_mode_pred = current_pred;
  1528. }
  1529. current_pred = &tmp[get_pred_buffer(tmp, 3)];
  1530. pd->dst.buf = current_pred->data;
  1531. pd->dst.stride = bw;
  1532. }
  1533. }
  1534. }
  1535. if (reuse_inter_pred && this_mode_pred != current_pred)
  1536. free_pred_buffer(current_pred);
  1537. mi->interp_filter = best_filter;
  1538. mi->tx_size = pf_tx_size[best_filter];
  1539. this_rdc.rate = pf_rate[best_filter];
  1540. this_rdc.dist = pf_dist[best_filter];
  1541. var_y = pf_var[best_filter];
  1542. sse_y = pf_sse[best_filter];
  1543. x->skip_txfm[0] = skip_txfm;
  1544. if (reuse_inter_pred) {
  1545. pd->dst.buf = this_mode_pred->data;
  1546. pd->dst.stride = this_mode_pred->stride;
  1547. }
  1548. } else {
  1549. // TODO(jackychen): the low-bitdepth condition causes a segfault in
  1550. // high-bitdepth builds.
  1551. // https://bugs.chromium.org/p/webm/issues/detail?id=1250
  1552. #if CONFIG_VP9_HIGHBITDEPTH
  1553. const int large_block = bsize > BLOCK_32X32;
  1554. #else
  1555. const int large_block =
  1556. x->sb_is_skin ? bsize > BLOCK_32X32 : bsize >= BLOCK_32X32;
  1557. #endif
  1558. mi->interp_filter = (filter_ref == SWITCHABLE) ? EIGHTTAP : filter_ref;
  1559. vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize);
  1560. // For large partition blocks, extra testing is done.
  1561. if (cpi->oxcf.rc_mode == VPX_CBR && large_block &&
  1562. !cyclic_refresh_segment_id_boosted(xd->mi[0]->segment_id) &&
  1563. cm->base_qindex) {
  1564. model_rd_for_sb_y_large(cpi, bsize, x, xd, &this_rdc.rate,
  1565. &this_rdc.dist, &var_y, &sse_y, mi_row, mi_col,
  1566. &this_early_term);
  1567. } else {
  1568. model_rd_for_sb_y(cpi, bsize, x, xd, &this_rdc.rate, &this_rdc.dist,
  1569. &var_y, &sse_y);
  1570. }
  1571. }
  1572. if (!this_early_term) {
  1573. this_sse = (int64_t)sse_y;
  1574. block_yrd(cpi, x, &this_rdc, &is_skippable, &this_sse, bsize,
  1575. VPXMIN(mi->tx_size, TX_16X16));
  1576. x->skip_txfm[0] = is_skippable;
  1577. if (is_skippable) {
  1578. this_rdc.rate = vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1);
  1579. } else {
  1580. if (RDCOST(x->rdmult, x->rddiv, this_rdc.rate, this_rdc.dist) <
  1581. RDCOST(x->rdmult, x->rddiv, 0, this_sse)) {
  1582. this_rdc.rate += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0);
  1583. } else {
  1584. this_rdc.rate = vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1);
  1585. this_rdc.dist = this_sse;
  1586. x->skip_txfm[0] = SKIP_TXFM_AC_DC;
  1587. }
  1588. }
  1589. if (cm->interp_filter == SWITCHABLE) {
  1590. if ((mi->mv[0].as_mv.row | mi->mv[0].as_mv.col) & 0x07)
  1591. this_rdc.rate += vp9_get_switchable_rate(cpi, xd);
  1592. }
  1593. } else {
  1594. this_rdc.rate += cm->interp_filter == SWITCHABLE
  1595. ? vp9_get_switchable_rate(cpi, xd)
  1596. : 0;
  1597. this_rdc.rate += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1);
  1598. }
  1599. if (x->color_sensitivity[0] || x->color_sensitivity[1]) {
  1600. RD_COST rdc_uv;
  1601. const BLOCK_SIZE uv_bsize = get_plane_block_size(bsize, &xd->plane[1]);
  1602. if (x->color_sensitivity[0])
  1603. vp9_build_inter_predictors_sbp(xd, mi_row, mi_col, bsize, 1);
  1604. if (x->color_sensitivity[1])
  1605. vp9_build_inter_predictors_sbp(xd, mi_row, mi_col, bsize, 2);
  1606. model_rd_for_sb_uv(cpi, uv_bsize, x, xd, &rdc_uv, &var_y, &sse_y, 1, 2);
  1607. this_rdc.rate += rdc_uv.rate;
  1608. this_rdc.dist += rdc_uv.dist;
  1609. }
  1610. this_rdc.rate += rate_mv;
  1611. this_rdc.rate += cpi->inter_mode_cost[x->mbmi_ext->mode_context[ref_frame]]
  1612. [INTER_OFFSET(this_mode)];
  1613. this_rdc.rate += ref_frame_cost[ref_frame];
  1614. this_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, this_rdc.rate, this_rdc.dist);
  1615. // Bias against NEWMV that is very different from its neighbors, and bias
  1616. // to small motion-lastref for noisy input.
  1617. if (cpi->oxcf.rc_mode == VPX_CBR && cpi->oxcf.speed >= 5 &&
  1618. cpi->oxcf.content != VP9E_CONTENT_SCREEN) {
  1619. vp9_NEWMV_diff_bias(&cpi->noise_estimate, xd, this_mode, &this_rdc, bsize,
  1620. frame_mv[this_mode][ref_frame].as_mv.row,
  1621. frame_mv[this_mode][ref_frame].as_mv.col,
  1622. ref_frame == LAST_FRAME);
  1623. }
  1624. // Skipping checking: test to see if this block can be reconstructed by
  1625. // prediction only.
  1626. if (cpi->allow_encode_breakout) {
  1627. encode_breakout_test(cpi, x, bsize, mi_row, mi_col, ref_frame, this_mode,
  1628. var_y, sse_y, yv12_mb, &this_rdc.rate,
  1629. &this_rdc.dist);
  1630. if (x->skip) {
  1631. this_rdc.rate += rate_mv;
  1632. this_rdc.rdcost =
  1633. RDCOST(x->rdmult, x->rddiv, this_rdc.rate, this_rdc.dist);
  1634. }
  1635. }
  1636. #if CONFIG_VP9_TEMPORAL_DENOISING
  1637. if (cpi->oxcf.noise_sensitivity > 0 &&
  1638. cpi->denoiser.denoising_level > kDenLowLow) {
  1639. vp9_denoiser_update_frame_stats(mi, sse_y, this_mode, ctx);
  1640. // Keep track of zero_last cost.
  1641. if (ref_frame == LAST_FRAME && frame_mv[this_mode][ref_frame].as_int == 0)
  1642. zero_last_cost_orig = this_rdc.rdcost;
  1643. }
  1644. #else
  1645. (void)ctx;
  1646. #endif
  1647. if (this_rdc.rdcost < best_rdc.rdcost || x->skip) {
  1648. best_rdc = this_rdc;
  1649. best_mode = this_mode;
  1650. best_pred_filter = mi->interp_filter;
  1651. best_tx_size = mi->tx_size;
  1652. best_ref_frame = ref_frame;
  1653. best_mode_skip_txfm = x->skip_txfm[0];
  1654. best_early_term = this_early_term;
  1655. if (reuse_inter_pred) {
  1656. free_pred_buffer(best_pred);
  1657. best_pred = this_mode_pred;
  1658. }
  1659. } else {
  1660. if (reuse_inter_pred) free_pred_buffer(this_mode_pred);
  1661. }
  1662. if (x->skip) break;
  1663. // If early termination flag is 1 and at least 2 modes are checked,
  1664. // the mode search is terminated.
  1665. if (best_early_term && idx > 0) {
  1666. x->skip = 1;
  1667. break;
  1668. }
  1669. }
  1670. mi->mode = best_mode;
  1671. mi->interp_filter = best_pred_filter;
  1672. mi->tx_size = best_tx_size;
  1673. mi->ref_frame[0] = best_ref_frame;
  1674. mi->mv[0].as_int = frame_mv[best_mode][best_ref_frame].as_int;
  1675. xd->mi[0]->bmi[0].as_mv[0].as_int = mi->mv[0].as_int;
  1676. x->skip_txfm[0] = best_mode_skip_txfm;
  1677. // For spatial enhancemanent layer: perform intra prediction only if base
  1678. // layer is chosen as the reference. Always perform intra prediction if
  1679. // LAST is the only reference or is_key_frame is set.
  1680. if (cpi->svc.spatial_layer_id) {
  1681. perform_intra_pred =
  1682. cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame ||
  1683. !(cpi->ref_frame_flags & flag_list[GOLDEN_FRAME]) ||
  1684. (!cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame &&
  1685. svc_force_zero_mode[best_ref_frame - 1]);
  1686. inter_mode_thresh = (inter_mode_thresh << 1) + inter_mode_thresh;
  1687. }
  1688. // Perform intra prediction search, if the best SAD is above a certain
  1689. // threshold.
  1690. if ((!force_skip_low_temp_var || bsize < BLOCK_32X32) && perform_intra_pred &&
  1691. (best_rdc.rdcost == INT64_MAX ||
  1692. (!x->skip && best_rdc.rdcost > inter_mode_thresh &&
  1693. bsize <= cpi->sf.max_intra_bsize))) {
  1694. struct estimate_block_intra_args args = { cpi, x, DC_PRED, 1, 0 };
  1695. int i;
  1696. TX_SIZE best_intra_tx_size = TX_SIZES;
  1697. TX_SIZE intra_tx_size =
  1698. VPXMIN(max_txsize_lookup[bsize],
  1699. tx_mode_to_biggest_tx_size[cpi->common.tx_mode]);
  1700. if (cpi->oxcf.content != VP9E_CONTENT_SCREEN && intra_tx_size > TX_16X16)
  1701. intra_tx_size = TX_16X16;
  1702. if (reuse_inter_pred && best_pred != NULL) {
  1703. if (best_pred->data == orig_dst.buf) {
  1704. this_mode_pred = &tmp[get_pred_buffer(tmp, 3)];
  1705. #if CONFIG_VP9_HIGHBITDEPTH
  1706. if (cm->use_highbitdepth)
  1707. vpx_highbd_convolve_copy(best_pred->data, best_pred->stride,
  1708. this_mode_pred->data, this_mode_pred->stride,
  1709. NULL, 0, NULL, 0, bw, bh, xd->bd);
  1710. else
  1711. vpx_convolve_copy(best_pred->data, best_pred->stride,
  1712. this_mode_pred->data, this_mode_pred->stride, NULL,
  1713. 0, NULL, 0, bw, bh);
  1714. #else
  1715. vpx_convolve_copy(best_pred->data, best_pred->stride,
  1716. this_mode_pred->data, this_mode_pred->stride, NULL, 0,
  1717. NULL, 0, bw, bh);
  1718. #endif // CONFIG_VP9_HIGHBITDEPTH
  1719. best_pred = this_mode_pred;
  1720. }
  1721. }
  1722. pd->dst = orig_dst;
  1723. for (i = 0; i < 4; ++i) {
  1724. const PREDICTION_MODE this_mode = intra_mode_list[i];
  1725. THR_MODES mode_index = mode_idx[INTRA_FRAME][mode_offset(this_mode)];
  1726. int mode_rd_thresh = rd_threshes[mode_index];
  1727. if (sf->short_circuit_flat_blocks && x->source_variance == 0 &&
  1728. this_mode != DC_PRED) {
  1729. continue;
  1730. }
  1731. if (!((1 << this_mode) & cpi->sf.intra_y_mode_bsize_mask[bsize]))
  1732. continue;
  1733. if (rd_less_than_thresh(best_rdc.rdcost, mode_rd_thresh,
  1734. rd_thresh_freq_fact[mode_index]))
  1735. continue;
  1736. mi->mode = this_mode;
  1737. mi->ref_frame[0] = INTRA_FRAME;
  1738. this_rdc.dist = this_rdc.rate = 0;
  1739. args.mode = this_mode;
  1740. args.skippable = 1;
  1741. args.rdc = &this_rdc;
  1742. mi->tx_size = intra_tx_size;
  1743. vp9_foreach_transformed_block_in_plane(xd, bsize, 0, estimate_block_intra,
  1744. &args);
  1745. // Check skip cost here since skippable is not set for for uv, this
  1746. // mirrors the behavior used by inter
  1747. if (args.skippable) {
  1748. x->skip_txfm[0] = SKIP_TXFM_AC_DC;
  1749. this_rdc.rate = vp9_cost_bit(vp9_get_skip_prob(&cpi->common, xd), 1);
  1750. } else {
  1751. x->skip_txfm[0] = SKIP_TXFM_NONE;
  1752. this_rdc.rate += vp9_cost_bit(vp9_get_skip_prob(&cpi->common, xd), 0);
  1753. }
  1754. // Inter and intra RD will mismatch in scale for non-screen content.
  1755. if (cpi->oxcf.content == VP9E_CONTENT_SCREEN) {
  1756. if (x->color_sensitivity[0])
  1757. vp9_foreach_transformed_block_in_plane(xd, bsize, 1,
  1758. estimate_block_intra, &args);
  1759. if (x->color_sensitivity[1])
  1760. vp9_foreach_transformed_block_in_plane(xd, bsize, 2,
  1761. estimate_block_intra, &args);
  1762. }
  1763. this_rdc.rate += cpi->mbmode_cost[this_mode];
  1764. this_rdc.rate += ref_frame_cost[INTRA_FRAME];
  1765. this_rdc.rate += intra_cost_penalty;
  1766. this_rdc.rdcost =
  1767. RDCOST(x->rdmult, x->rddiv, this_rdc.rate, this_rdc.dist);
  1768. if (this_rdc.rdcost < best_rdc.rdcost) {
  1769. best_rdc = this_rdc;
  1770. best_mode = this_mode;
  1771. best_intra_tx_size = mi->tx_size;
  1772. best_ref_frame = INTRA_FRAME;
  1773. mi->uv_mode = this_mode;
  1774. mi->mv[0].as_int = INVALID_MV;
  1775. best_mode_skip_txfm = x->skip_txfm[0];
  1776. }
  1777. }
  1778. // Reset mb_mode_info to the best inter mode.
  1779. if (best_ref_frame != INTRA_FRAME) {
  1780. mi->tx_size = best_tx_size;
  1781. } else {
  1782. mi->tx_size = best_intra_tx_size;
  1783. }
  1784. }
  1785. pd->dst = orig_dst;
  1786. mi->mode = best_mode;
  1787. mi->ref_frame[0] = best_ref_frame;
  1788. x->skip_txfm[0] = best_mode_skip_txfm;
  1789. if (!is_inter_block(mi)) {
  1790. mi->interp_filter = SWITCHABLE_FILTERS;
  1791. }
  1792. if (reuse_inter_pred && best_pred != NULL) {
  1793. if (best_pred->data != orig_dst.buf && is_inter_mode(mi->mode)) {
  1794. #if CONFIG_VP9_HIGHBITDEPTH
  1795. if (cm->use_highbitdepth)
  1796. vpx_highbd_convolve_copy(best_pred->data, best_pred->stride,
  1797. pd->dst.buf, pd->dst.stride, NULL, 0, NULL, 0,
  1798. bw, bh, xd->bd);
  1799. else
  1800. vpx_convolve_copy(best_pred->data, best_pred->stride, pd->dst.buf,
  1801. pd->dst.stride, NULL, 0, NULL, 0, bw, bh);
  1802. #else
  1803. vpx_convolve_copy(best_pred->data, best_pred->stride, pd->dst.buf,
  1804. pd->dst.stride, NULL, 0, NULL, 0, bw, bh);
  1805. #endif // CONFIG_VP9_HIGHBITDEPTH
  1806. }
  1807. }
  1808. #if CONFIG_VP9_TEMPORAL_DENOISING
  1809. if (cpi->oxcf.noise_sensitivity > 0 && cpi->resize_pending == 0 &&
  1810. cpi->denoiser.denoising_level > kDenLowLow && cpi->denoiser.reset == 0) {
  1811. VP9_DENOISER_DECISION decision = COPY_BLOCK;
  1812. vp9_pickmode_ctx_den_update(&ctx_den, zero_last_cost_orig, ref_frame_cost,
  1813. frame_mv, reuse_inter_pred, best_tx_size,
  1814. best_mode, best_ref_frame, best_pred_filter,
  1815. best_mode_skip_txfm);
  1816. vp9_denoiser_denoise(cpi, x, mi_row, mi_col, bsize, ctx, &decision);
  1817. recheck_zeromv_after_denoising(cpi, mi, x, xd, decision, &ctx_den, yv12_mb,
  1818. &best_rdc, bsize, mi_row, mi_col);
  1819. best_ref_frame = ctx_den.best_ref_frame;
  1820. }
  1821. #endif
  1822. if (cpi->sf.adaptive_rd_thresh) {
  1823. THR_MODES best_mode_idx = mode_idx[best_ref_frame][mode_offset(mi->mode)];
  1824. if (best_ref_frame == INTRA_FRAME) {
  1825. // Only consider the modes that are included in the intra_mode_list.
  1826. int intra_modes = sizeof(intra_mode_list) / sizeof(PREDICTION_MODE);
  1827. int i;
  1828. // TODO(yunqingwang): Check intra mode mask and only update freq_fact
  1829. // for those valid modes.
  1830. for (i = 0; i < intra_modes; i++) {
  1831. update_thresh_freq_fact(cpi, tile_data, x->source_variance, bsize,
  1832. INTRA_FRAME, best_mode_idx, intra_mode_list[i]);
  1833. }
  1834. } else {
  1835. for (ref_frame = LAST_FRAME; ref_frame <= GOLDEN_FRAME; ++ref_frame) {
  1836. PREDICTION_MODE this_mode;
  1837. if (best_ref_frame != ref_frame) continue;
  1838. for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) {
  1839. update_thresh_freq_fact(cpi, tile_data, x->source_variance, bsize,
  1840. ref_frame, best_mode_idx, this_mode);
  1841. }
  1842. }
  1843. }
  1844. }
  1845. *rd_cost = best_rdc;
  1846. }
  1847. void vp9_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, int mi_row,
  1848. int mi_col, RD_COST *rd_cost, BLOCK_SIZE bsize,
  1849. PICK_MODE_CONTEXT *ctx) {
  1850. VP9_COMMON *const cm = &cpi->common;
  1851. SPEED_FEATURES *const sf = &cpi->sf;
  1852. MACROBLOCKD *const xd = &x->e_mbd;
  1853. MODE_INFO *const mi = xd->mi[0];
  1854. MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
  1855. const struct segmentation *const seg = &cm->seg;
  1856. MV_REFERENCE_FRAME ref_frame, second_ref_frame = NONE;
  1857. MV_REFERENCE_FRAME best_ref_frame = NONE;
  1858. unsigned char segment_id = mi->segment_id;
  1859. struct buf_2d yv12_mb[4][MAX_MB_PLANE];
  1860. static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
  1861. VP9_ALT_FLAG };
  1862. int64_t best_rd = INT64_MAX;
  1863. b_mode_info bsi[MAX_REF_FRAMES][4];
  1864. int ref_frame_skip_mask = 0;
  1865. const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
  1866. const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
  1867. int idx, idy;
  1868. x->skip_encode = sf->skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;
  1869. ctx->pred_pixel_ready = 0;
  1870. for (ref_frame = LAST_FRAME; ref_frame <= GOLDEN_FRAME; ++ref_frame) {
  1871. const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref_frame);
  1872. int_mv dummy_mv[2];
  1873. x->pred_mv_sad[ref_frame] = INT_MAX;
  1874. if ((cpi->ref_frame_flags & flag_list[ref_frame]) && (yv12 != NULL)) {
  1875. int_mv *const candidates = mbmi_ext->ref_mvs[ref_frame];
  1876. const struct scale_factors *const sf = &cm->frame_refs[ref_frame - 1].sf;
  1877. vp9_setup_pred_block(xd, yv12_mb[ref_frame], yv12, mi_row, mi_col, sf,
  1878. sf);
  1879. vp9_find_mv_refs(cm, xd, xd->mi[0], ref_frame, candidates, mi_row, mi_col,
  1880. mbmi_ext->mode_context);
  1881. vp9_find_best_ref_mvs(xd, cm->allow_high_precision_mv, candidates,
  1882. &dummy_mv[0], &dummy_mv[1]);
  1883. } else {
  1884. ref_frame_skip_mask |= (1 << ref_frame);
  1885. }
  1886. }
  1887. mi->sb_type = bsize;
  1888. mi->tx_size = TX_4X4;
  1889. mi->uv_mode = DC_PRED;
  1890. mi->ref_frame[0] = LAST_FRAME;
  1891. mi->ref_frame[1] = NONE;
  1892. mi->interp_filter =
  1893. cm->interp_filter == SWITCHABLE ? EIGHTTAP : cm->interp_filter;
  1894. for (ref_frame = LAST_FRAME; ref_frame <= GOLDEN_FRAME; ++ref_frame) {
  1895. int64_t this_rd = 0;
  1896. int plane;
  1897. if (ref_frame_skip_mask & (1 << ref_frame)) continue;
  1898. #if CONFIG_BETTER_HW_COMPATIBILITY
  1899. if ((bsize == BLOCK_8X4 || bsize == BLOCK_4X8) && ref_frame > INTRA_FRAME &&
  1900. vp9_is_scaled(&cm->frame_refs[ref_frame - 1].sf))
  1901. continue;
  1902. #endif
  1903. // TODO(jingning, agrange): Scaling reference frame not supported for
  1904. // sub8x8 blocks. Is this supported now?
  1905. if (ref_frame > INTRA_FRAME &&
  1906. vp9_is_scaled(&cm->frame_refs[ref_frame - 1].sf))
  1907. continue;
  1908. // If the segment reference frame feature is enabled....
  1909. // then do nothing if the current ref frame is not allowed..
  1910. if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
  1911. get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame)
  1912. continue;
  1913. mi->ref_frame[0] = ref_frame;
  1914. x->skip = 0;
  1915. set_ref_ptrs(cm, xd, ref_frame, second_ref_frame);
  1916. // Select prediction reference frames.
  1917. for (plane = 0; plane < MAX_MB_PLANE; plane++)
  1918. xd->plane[plane].pre[0] = yv12_mb[ref_frame][plane];
  1919. for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
  1920. for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
  1921. int_mv b_mv[MB_MODE_COUNT];
  1922. int64_t b_best_rd = INT64_MAX;
  1923. const int i = idy * 2 + idx;
  1924. PREDICTION_MODE this_mode;
  1925. RD_COST this_rdc;
  1926. unsigned int var_y, sse_y;
  1927. struct macroblock_plane *p = &x->plane[0];
  1928. struct macroblockd_plane *pd = &xd->plane[0];
  1929. const struct buf_2d orig_src = p->src;
  1930. const struct buf_2d orig_dst = pd->dst;
  1931. struct buf_2d orig_pre[2];
  1932. memcpy(orig_pre, xd->plane[0].pre, sizeof(orig_pre));
  1933. // set buffer pointers for sub8x8 motion search.
  1934. p->src.buf =
  1935. &p->src.buf[vp9_raster_block_offset(BLOCK_8X8, i, p->src.stride)];
  1936. pd->dst.buf =
  1937. &pd->dst.buf[vp9_raster_block_offset(BLOCK_8X8, i, pd->dst.stride)];
  1938. pd->pre[0].buf =
  1939. &pd->pre[0]
  1940. .buf[vp9_raster_block_offset(BLOCK_8X8, i, pd->pre[0].stride)];
  1941. b_mv[ZEROMV].as_int = 0;
  1942. b_mv[NEWMV].as_int = INVALID_MV;
  1943. vp9_append_sub8x8_mvs_for_idx(cm, xd, i, 0, mi_row, mi_col,
  1944. &b_mv[NEARESTMV], &b_mv[NEARMV],
  1945. mbmi_ext->mode_context);
  1946. for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) {
  1947. int b_rate = 0;
  1948. xd->mi[0]->bmi[i].as_mv[0].as_int = b_mv[this_mode].as_int;
  1949. if (this_mode == NEWMV) {
  1950. const int step_param = cpi->sf.mv.fullpel_search_step_param;
  1951. MV mvp_full;
  1952. MV tmp_mv;
  1953. int cost_list[5];
  1954. const MvLimits tmp_mv_limits = x->mv_limits;
  1955. uint32_t dummy_dist;
  1956. if (i == 0) {
  1957. mvp_full.row = b_mv[NEARESTMV].as_mv.row >> 3;
  1958. mvp_full.col = b_mv[NEARESTMV].as_mv.col >> 3;
  1959. } else {
  1960. mvp_full.row = xd->mi[0]->bmi[0].as_mv[0].as_mv.row >> 3;
  1961. mvp_full.col = xd->mi[0]->bmi[0].as_mv[0].as_mv.col >> 3;
  1962. }
  1963. vp9_set_mv_search_range(&x->mv_limits,
  1964. &mbmi_ext->ref_mvs[0]->as_mv);
  1965. vp9_full_pixel_search(cpi, x, bsize, &mvp_full, step_param,
  1966. x->sadperbit4, cond_cost_list(cpi, cost_list),
  1967. &mbmi_ext->ref_mvs[ref_frame][0].as_mv,
  1968. &tmp_mv, INT_MAX, 0);
  1969. x->mv_limits = tmp_mv_limits;
  1970. // calculate the bit cost on motion vector
  1971. mvp_full.row = tmp_mv.row * 8;
  1972. mvp_full.col = tmp_mv.col * 8;
  1973. b_rate += vp9_mv_bit_cost(
  1974. &mvp_full, &mbmi_ext->ref_mvs[ref_frame][0].as_mv,
  1975. x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
  1976. b_rate += cpi->inter_mode_cost[x->mbmi_ext->mode_context[ref_frame]]
  1977. [INTER_OFFSET(NEWMV)];
  1978. if (RDCOST(x->rdmult, x->rddiv, b_rate, 0) > b_best_rd) continue;
  1979. cpi->find_fractional_mv_step(
  1980. x, &tmp_mv, &mbmi_ext->ref_mvs[ref_frame][0].as_mv,
  1981. cpi->common.allow_high_precision_mv, x->errorperbit,
  1982. &cpi->fn_ptr[bsize], cpi->sf.mv.subpel_force_stop,
  1983. cpi->sf.mv.subpel_iters_per_step,
  1984. cond_cost_list(cpi, cost_list), x->nmvjointcost, x->mvcost,
  1985. &dummy_dist, &x->pred_sse[ref_frame], NULL, 0, 0);
  1986. xd->mi[0]->bmi[i].as_mv[0].as_mv = tmp_mv;
  1987. } else {
  1988. b_rate += cpi->inter_mode_cost[x->mbmi_ext->mode_context[ref_frame]]
  1989. [INTER_OFFSET(this_mode)];
  1990. }
  1991. #if CONFIG_VP9_HIGHBITDEPTH
  1992. if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
  1993. vp9_highbd_build_inter_predictor(
  1994. pd->pre[0].buf, pd->pre[0].stride, pd->dst.buf, pd->dst.stride,
  1995. &xd->mi[0]->bmi[i].as_mv[0].as_mv, &xd->block_refs[0]->sf,
  1996. 4 * num_4x4_blocks_wide, 4 * num_4x4_blocks_high, 0,
  1997. vp9_filter_kernels[mi->interp_filter], MV_PRECISION_Q3,
  1998. mi_col * MI_SIZE + 4 * (i & 0x01),
  1999. mi_row * MI_SIZE + 4 * (i >> 1), xd->bd);
  2000. } else {
  2001. #endif
  2002. vp9_build_inter_predictor(
  2003. pd->pre[0].buf, pd->pre[0].stride, pd->dst.buf, pd->dst.stride,
  2004. &xd->mi[0]->bmi[i].as_mv[0].as_mv, &xd->block_refs[0]->sf,
  2005. 4 * num_4x4_blocks_wide, 4 * num_4x4_blocks_high, 0,
  2006. vp9_filter_kernels[mi->interp_filter], MV_PRECISION_Q3,
  2007. mi_col * MI_SIZE + 4 * (i & 0x01),
  2008. mi_row * MI_SIZE + 4 * (i >> 1));
  2009. #if CONFIG_VP9_HIGHBITDEPTH
  2010. }
  2011. #endif
  2012. model_rd_for_sb_y(cpi, bsize, x, xd, &this_rdc.rate, &this_rdc.dist,
  2013. &var_y, &sse_y);
  2014. this_rdc.rate += b_rate;
  2015. this_rdc.rdcost =
  2016. RDCOST(x->rdmult, x->rddiv, this_rdc.rate, this_rdc.dist);
  2017. if (this_rdc.rdcost < b_best_rd) {
  2018. b_best_rd = this_rdc.rdcost;
  2019. bsi[ref_frame][i].as_mode = this_mode;
  2020. bsi[ref_frame][i].as_mv[0].as_mv = xd->mi[0]->bmi[i].as_mv[0].as_mv;
  2021. }
  2022. } // mode search
  2023. // restore source and prediction buffer pointers.
  2024. p->src = orig_src;
  2025. pd->pre[0] = orig_pre[0];
  2026. pd->dst = orig_dst;
  2027. this_rd += b_best_rd;
  2028. xd->mi[0]->bmi[i] = bsi[ref_frame][i];
  2029. if (num_4x4_blocks_wide > 1) xd->mi[0]->bmi[i + 1] = xd->mi[0]->bmi[i];
  2030. if (num_4x4_blocks_high > 1) xd->mi[0]->bmi[i + 2] = xd->mi[0]->bmi[i];
  2031. }
  2032. } // loop through sub8x8 blocks
  2033. if (this_rd < best_rd) {
  2034. best_rd = this_rd;
  2035. best_ref_frame = ref_frame;
  2036. }
  2037. } // reference frames
  2038. mi->tx_size = TX_4X4;
  2039. mi->ref_frame[0] = best_ref_frame;
  2040. for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
  2041. for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
  2042. const int block = idy * 2 + idx;
  2043. xd->mi[0]->bmi[block] = bsi[best_ref_frame][block];
  2044. if (num_4x4_blocks_wide > 1)
  2045. xd->mi[0]->bmi[block + 1] = bsi[best_ref_frame][block];
  2046. if (num_4x4_blocks_high > 1)
  2047. xd->mi[0]->bmi[block + 2] = bsi[best_ref_frame][block];
  2048. }
  2049. }
  2050. mi->mode = xd->mi[0]->bmi[3].as_mode;
  2051. ctx->mic = *(xd->mi[0]);
  2052. ctx->mbmi_ext = *x->mbmi_ext;
  2053. ctx->skip_txfm[0] = SKIP_TXFM_NONE;
  2054. ctx->skip = 0;
  2055. // Dummy assignment for speed -5. No effect in speed -6.
  2056. rd_cost->rdcost = best_rd;
  2057. }