vp9_mcomp.c 92 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442
  1. /*
  2. * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  3. *
  4. * Use of this source code is governed by a BSD-style license
  5. * that can be found in the LICENSE file in the root of the source
  6. * tree. An additional intellectual property rights grant can be found
  7. * in the file PATENTS. All contributing project authors may
  8. * be found in the AUTHORS file in the root of the source tree.
  9. */
  10. #include <assert.h>
  11. #include <limits.h>
  12. #include <math.h>
  13. #include <stdio.h>
  14. #include "./vpx_config.h"
  15. #include "./vpx_dsp_rtcd.h"
  16. #include "vpx_dsp/vpx_dsp_common.h"
  17. #include "vpx_mem/vpx_mem.h"
  18. #include "vpx_ports/mem.h"
  19. #include "vp9/common/vp9_common.h"
  20. #include "vp9/common/vp9_reconinter.h"
  21. #include "vp9/encoder/vp9_encoder.h"
  22. #include "vp9/encoder/vp9_mcomp.h"
  23. // #define NEW_DIAMOND_SEARCH
  24. static INLINE const uint8_t *get_buf_from_mv(const struct buf_2d *buf,
  25. const MV *mv) {
  26. return &buf->buf[mv->row * buf->stride + mv->col];
  27. }
  28. void vp9_set_mv_search_range(MvLimits *mv_limits, const MV *mv) {
  29. int col_min = (mv->col >> 3) - MAX_FULL_PEL_VAL + (mv->col & 7 ? 1 : 0);
  30. int row_min = (mv->row >> 3) - MAX_FULL_PEL_VAL + (mv->row & 7 ? 1 : 0);
  31. int col_max = (mv->col >> 3) + MAX_FULL_PEL_VAL;
  32. int row_max = (mv->row >> 3) + MAX_FULL_PEL_VAL;
  33. col_min = VPXMAX(col_min, (MV_LOW >> 3) + 1);
  34. row_min = VPXMAX(row_min, (MV_LOW >> 3) + 1);
  35. col_max = VPXMIN(col_max, (MV_UPP >> 3) - 1);
  36. row_max = VPXMIN(row_max, (MV_UPP >> 3) - 1);
  37. // Get intersection of UMV window and valid MV window to reduce # of checks
  38. // in diamond search.
  39. if (mv_limits->col_min < col_min) mv_limits->col_min = col_min;
  40. if (mv_limits->col_max > col_max) mv_limits->col_max = col_max;
  41. if (mv_limits->row_min < row_min) mv_limits->row_min = row_min;
  42. if (mv_limits->row_max > row_max) mv_limits->row_max = row_max;
  43. }
  44. int vp9_init_search_range(int size) {
  45. int sr = 0;
  46. // Minimum search size no matter what the passed in value.
  47. size = VPXMAX(16, size);
  48. while ((size << sr) < MAX_FULL_PEL_VAL) sr++;
  49. sr = VPXMIN(sr, MAX_MVSEARCH_STEPS - 2);
  50. return sr;
  51. }
  52. static INLINE int mv_cost(const MV *mv, const int *joint_cost,
  53. int *const comp_cost[2]) {
  54. assert(mv->row >= -MV_MAX && mv->row < MV_MAX);
  55. assert(mv->col >= -MV_MAX && mv->col < MV_MAX);
  56. return joint_cost[vp9_get_mv_joint(mv)] + comp_cost[0][mv->row] +
  57. comp_cost[1][mv->col];
  58. }
  59. int vp9_mv_bit_cost(const MV *mv, const MV *ref, const int *mvjcost,
  60. int *mvcost[2], int weight) {
  61. const MV diff = { mv->row - ref->row, mv->col - ref->col };
  62. return ROUND_POWER_OF_TWO(mv_cost(&diff, mvjcost, mvcost) * weight, 7);
  63. }
  64. #define PIXEL_TRANSFORM_ERROR_SCALE 4
  65. static int mv_err_cost(const MV *mv, const MV *ref, const int *mvjcost,
  66. int *mvcost[2], int error_per_bit) {
  67. if (mvcost) {
  68. const MV diff = { mv->row - ref->row, mv->col - ref->col };
  69. // This product sits at a 32-bit ceiling right now and any additional
  70. // accuracy in either bit cost or error cost will cause it to overflow.
  71. return ROUND_POWER_OF_TWO(
  72. (unsigned)mv_cost(&diff, mvjcost, mvcost) * error_per_bit,
  73. RDDIV_BITS + VP9_PROB_COST_SHIFT - RD_EPB_SHIFT +
  74. PIXEL_TRANSFORM_ERROR_SCALE);
  75. }
  76. return 0;
  77. }
  78. static int mvsad_err_cost(const MACROBLOCK *x, const MV *mv, const MV *ref,
  79. int sad_per_bit) {
  80. const MV diff = { mv->row - ref->row, mv->col - ref->col };
  81. return ROUND_POWER_OF_TWO(
  82. (unsigned)mv_cost(&diff, x->nmvjointsadcost, x->nmvsadcost) * sad_per_bit,
  83. VP9_PROB_COST_SHIFT);
  84. }
  85. void vp9_init_dsmotion_compensation(search_site_config *cfg, int stride) {
  86. int len;
  87. int ss_count = 0;
  88. for (len = MAX_FIRST_STEP; len > 0; len /= 2) {
  89. // Generate offsets for 4 search sites per step.
  90. const MV ss_mvs[] = { { -len, 0 }, { len, 0 }, { 0, -len }, { 0, len } };
  91. int i;
  92. for (i = 0; i < 4; ++i, ++ss_count) {
  93. cfg->ss_mv[ss_count] = ss_mvs[i];
  94. cfg->ss_os[ss_count] = ss_mvs[i].row * stride + ss_mvs[i].col;
  95. }
  96. }
  97. cfg->searches_per_step = 4;
  98. cfg->total_steps = ss_count / cfg->searches_per_step;
  99. }
  100. void vp9_init3smotion_compensation(search_site_config *cfg, int stride) {
  101. int len;
  102. int ss_count = 0;
  103. for (len = MAX_FIRST_STEP; len > 0; len /= 2) {
  104. // Generate offsets for 8 search sites per step.
  105. const MV ss_mvs[8] = { { -len, 0 }, { len, 0 }, { 0, -len },
  106. { 0, len }, { -len, -len }, { -len, len },
  107. { len, -len }, { len, len } };
  108. int i;
  109. for (i = 0; i < 8; ++i, ++ss_count) {
  110. cfg->ss_mv[ss_count] = ss_mvs[i];
  111. cfg->ss_os[ss_count] = ss_mvs[i].row * stride + ss_mvs[i].col;
  112. }
  113. }
  114. cfg->searches_per_step = 8;
  115. cfg->total_steps = ss_count / cfg->searches_per_step;
  116. }
  117. /* Estimated (square) error cost of a motion vector (r,c). The 14 scale comes
  118. * from the same math as in mv_err_cost(). */
  119. #define MVC(r, c) \
  120. (mvcost \
  121. ? ((unsigned)(mvjcost[((r) != rr) * 2 + ((c) != rc)] + \
  122. mvcost[0][((r)-rr)] + mvcost[1][((c)-rc)]) * \
  123. error_per_bit + \
  124. 8192) >> \
  125. 14 \
  126. : 0)
  127. // convert motion vector component to offset for sv[a]f calc
  128. static INLINE int sp(int x) { return x & 7; }
  129. static INLINE const uint8_t *pre(const uint8_t *buf, int stride, int r, int c) {
  130. return &buf[(r >> 3) * stride + (c >> 3)];
  131. }
  132. #if CONFIG_VP9_HIGHBITDEPTH
  133. /* checks if (r, c) has better score than previous best */
  134. #define CHECK_BETTER(v, r, c) \
  135. if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \
  136. int64_t tmpmse; \
  137. if (second_pred == NULL) { \
  138. thismse = vfp->svf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z, \
  139. src_stride, &sse); \
  140. } else { \
  141. thismse = vfp->svaf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z, \
  142. src_stride, &sse, second_pred); \
  143. } \
  144. tmpmse = thismse; \
  145. tmpmse += MVC(r, c); \
  146. if (tmpmse >= INT_MAX) { \
  147. v = INT_MAX; \
  148. } else if ((v = (uint32_t)tmpmse) < besterr) { \
  149. besterr = v; \
  150. br = r; \
  151. bc = c; \
  152. *distortion = thismse; \
  153. *sse1 = sse; \
  154. } \
  155. } else { \
  156. v = INT_MAX; \
  157. }
  158. #else
  159. /* checks if (r, c) has better score than previous best */
  160. #define CHECK_BETTER(v, r, c) \
  161. if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \
  162. if (second_pred == NULL) \
  163. thismse = vfp->svf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z, \
  164. src_stride, &sse); \
  165. else \
  166. thismse = vfp->svaf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z, \
  167. src_stride, &sse, second_pred); \
  168. if ((v = MVC(r, c) + thismse) < besterr) { \
  169. besterr = v; \
  170. br = r; \
  171. bc = c; \
  172. *distortion = thismse; \
  173. *sse1 = sse; \
  174. } \
  175. } else { \
  176. v = INT_MAX; \
  177. }
  178. #endif
  179. #define FIRST_LEVEL_CHECKS \
  180. { \
  181. unsigned int left, right, up, down, diag; \
  182. CHECK_BETTER(left, tr, tc - hstep); \
  183. CHECK_BETTER(right, tr, tc + hstep); \
  184. CHECK_BETTER(up, tr - hstep, tc); \
  185. CHECK_BETTER(down, tr + hstep, tc); \
  186. whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); \
  187. switch (whichdir) { \
  188. case 0: CHECK_BETTER(diag, tr - hstep, tc - hstep); break; \
  189. case 1: CHECK_BETTER(diag, tr - hstep, tc + hstep); break; \
  190. case 2: CHECK_BETTER(diag, tr + hstep, tc - hstep); break; \
  191. case 3: CHECK_BETTER(diag, tr + hstep, tc + hstep); break; \
  192. } \
  193. }
  194. #define SECOND_LEVEL_CHECKS \
  195. { \
  196. int kr, kc; \
  197. unsigned int second; \
  198. if (tr != br && tc != bc) { \
  199. kr = br - tr; \
  200. kc = bc - tc; \
  201. CHECK_BETTER(second, tr + kr, tc + 2 * kc); \
  202. CHECK_BETTER(second, tr + 2 * kr, tc + kc); \
  203. } else if (tr == br && tc != bc) { \
  204. kc = bc - tc; \
  205. CHECK_BETTER(second, tr + hstep, tc + 2 * kc); \
  206. CHECK_BETTER(second, tr - hstep, tc + 2 * kc); \
  207. switch (whichdir) { \
  208. case 0: \
  209. case 1: CHECK_BETTER(second, tr + hstep, tc + kc); break; \
  210. case 2: \
  211. case 3: CHECK_BETTER(second, tr - hstep, tc + kc); break; \
  212. } \
  213. } else if (tr != br && tc == bc) { \
  214. kr = br - tr; \
  215. CHECK_BETTER(second, tr + 2 * kr, tc + hstep); \
  216. CHECK_BETTER(second, tr + 2 * kr, tc - hstep); \
  217. switch (whichdir) { \
  218. case 0: \
  219. case 2: CHECK_BETTER(second, tr + kr, tc + hstep); break; \
  220. case 1: \
  221. case 3: CHECK_BETTER(second, tr + kr, tc - hstep); break; \
  222. } \
  223. } \
  224. }
  225. // TODO(yunqingwang): SECOND_LEVEL_CHECKS_BEST was a rewrote of
  226. // SECOND_LEVEL_CHECKS, and SECOND_LEVEL_CHECKS should be rewritten
  227. // later in the same way.
  228. #define SECOND_LEVEL_CHECKS_BEST \
  229. { \
  230. unsigned int second; \
  231. int br0 = br; \
  232. int bc0 = bc; \
  233. assert(tr == br || tc == bc); \
  234. if (tr == br && tc != bc) { \
  235. kc = bc - tc; \
  236. } else if (tr != br && tc == bc) { \
  237. kr = br - tr; \
  238. } \
  239. CHECK_BETTER(second, br0 + kr, bc0); \
  240. CHECK_BETTER(second, br0, bc0 + kc); \
  241. if (br0 != br || bc0 != bc) { \
  242. CHECK_BETTER(second, br0 + kr, bc0 + kc); \
  243. } \
  244. }
  245. #define SETUP_SUBPEL_SEARCH \
  246. const uint8_t *const z = x->plane[0].src.buf; \
  247. const int src_stride = x->plane[0].src.stride; \
  248. const MACROBLOCKD *xd = &x->e_mbd; \
  249. unsigned int besterr = INT_MAX; \
  250. unsigned int sse; \
  251. unsigned int whichdir; \
  252. int thismse; \
  253. const unsigned int halfiters = iters_per_step; \
  254. const unsigned int quarteriters = iters_per_step; \
  255. const unsigned int eighthiters = iters_per_step; \
  256. const int y_stride = xd->plane[0].pre[0].stride; \
  257. const int offset = bestmv->row * y_stride + bestmv->col; \
  258. const uint8_t *const y = xd->plane[0].pre[0].buf; \
  259. \
  260. int rr = ref_mv->row; \
  261. int rc = ref_mv->col; \
  262. int br = bestmv->row * 8; \
  263. int bc = bestmv->col * 8; \
  264. int hstep = 4; \
  265. const int minc = VPXMAX(x->mv_limits.col_min * 8, ref_mv->col - MV_MAX); \
  266. const int maxc = VPXMIN(x->mv_limits.col_max * 8, ref_mv->col + MV_MAX); \
  267. const int minr = VPXMAX(x->mv_limits.row_min * 8, ref_mv->row - MV_MAX); \
  268. const int maxr = VPXMIN(x->mv_limits.row_max * 8, ref_mv->row + MV_MAX); \
  269. int tr = br; \
  270. int tc = bc; \
  271. \
  272. bestmv->row *= 8; \
  273. bestmv->col *= 8;
  274. static unsigned int setup_center_error(
  275. const MACROBLOCKD *xd, const MV *bestmv, const MV *ref_mv,
  276. int error_per_bit, const vp9_variance_fn_ptr_t *vfp,
  277. const uint8_t *const src, const int src_stride, const uint8_t *const y,
  278. int y_stride, const uint8_t *second_pred, int w, int h, int offset,
  279. int *mvjcost, int *mvcost[2], uint32_t *sse1, uint32_t *distortion) {
  280. #if CONFIG_VP9_HIGHBITDEPTH
  281. uint64_t besterr;
  282. if (second_pred != NULL) {
  283. if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
  284. DECLARE_ALIGNED(16, uint16_t, comp_pred16[64 * 64]);
  285. vpx_highbd_comp_avg_pred(comp_pred16, second_pred, w, h, y + offset,
  286. y_stride);
  287. besterr =
  288. vfp->vf(CONVERT_TO_BYTEPTR(comp_pred16), w, src, src_stride, sse1);
  289. } else {
  290. DECLARE_ALIGNED(16, uint8_t, comp_pred[64 * 64]);
  291. vpx_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride);
  292. besterr = vfp->vf(comp_pred, w, src, src_stride, sse1);
  293. }
  294. } else {
  295. besterr = vfp->vf(y + offset, y_stride, src, src_stride, sse1);
  296. }
  297. *distortion = (uint32_t)besterr;
  298. besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
  299. if (besterr >= UINT_MAX) return UINT_MAX;
  300. return (uint32_t)besterr;
  301. #else
  302. uint32_t besterr;
  303. (void)xd;
  304. if (second_pred != NULL) {
  305. DECLARE_ALIGNED(16, uint8_t, comp_pred[64 * 64]);
  306. vpx_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride);
  307. besterr = vfp->vf(comp_pred, w, src, src_stride, sse1);
  308. } else {
  309. besterr = vfp->vf(y + offset, y_stride, src, src_stride, sse1);
  310. }
  311. *distortion = besterr;
  312. besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
  313. return besterr;
  314. #endif // CONFIG_VP9_HIGHBITDEPTH
  315. }
  316. static INLINE int divide_and_round(const int n, const int d) {
  317. return ((n < 0) ^ (d < 0)) ? ((n - d / 2) / d) : ((n + d / 2) / d);
  318. }
  319. static INLINE int is_cost_list_wellbehaved(int *cost_list) {
  320. return cost_list[0] < cost_list[1] && cost_list[0] < cost_list[2] &&
  321. cost_list[0] < cost_list[3] && cost_list[0] < cost_list[4];
  322. }
  323. // Returns surface minima estimate at given precision in 1/2^n bits.
  324. // Assume a model for the cost surface: S = A(x - x0)^2 + B(y - y0)^2 + C
  325. // For a given set of costs S0, S1, S2, S3, S4 at points
  326. // (y, x) = (0, 0), (0, -1), (1, 0), (0, 1) and (-1, 0) respectively,
  327. // the solution for the location of the minima (x0, y0) is given by:
  328. // x0 = 1/2 (S1 - S3)/(S1 + S3 - 2*S0),
  329. // y0 = 1/2 (S4 - S2)/(S4 + S2 - 2*S0).
  330. // The code below is an integerized version of that.
  331. static void get_cost_surf_min(int *cost_list, int *ir, int *ic, int bits) {
  332. *ic = divide_and_round((cost_list[1] - cost_list[3]) * (1 << (bits - 1)),
  333. (cost_list[1] - 2 * cost_list[0] + cost_list[3]));
  334. *ir = divide_and_round((cost_list[4] - cost_list[2]) * (1 << (bits - 1)),
  335. (cost_list[4] - 2 * cost_list[0] + cost_list[2]));
  336. }
  337. uint32_t vp9_skip_sub_pixel_tree(const MACROBLOCK *x, MV *bestmv,
  338. const MV *ref_mv, int allow_hp,
  339. int error_per_bit,
  340. const vp9_variance_fn_ptr_t *vfp,
  341. int forced_stop, int iters_per_step,
  342. int *cost_list, int *mvjcost, int *mvcost[2],
  343. uint32_t *distortion, uint32_t *sse1,
  344. const uint8_t *second_pred, int w, int h) {
  345. SETUP_SUBPEL_SEARCH;
  346. besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, z,
  347. src_stride, y, y_stride, second_pred, w, h,
  348. offset, mvjcost, mvcost, sse1, distortion);
  349. (void)halfiters;
  350. (void)quarteriters;
  351. (void)eighthiters;
  352. (void)whichdir;
  353. (void)allow_hp;
  354. (void)forced_stop;
  355. (void)hstep;
  356. (void)rr;
  357. (void)rc;
  358. (void)minr;
  359. (void)minc;
  360. (void)maxr;
  361. (void)maxc;
  362. (void)tr;
  363. (void)tc;
  364. (void)sse;
  365. (void)thismse;
  366. (void)cost_list;
  367. if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) ||
  368. (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3)))
  369. return UINT_MAX;
  370. return besterr;
  371. }
  372. uint32_t vp9_find_best_sub_pixel_tree_pruned_evenmore(
  373. const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp,
  374. int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop,
  375. int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2],
  376. uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w,
  377. int h) {
  378. SETUP_SUBPEL_SEARCH;
  379. besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, z,
  380. src_stride, y, y_stride, second_pred, w, h,
  381. offset, mvjcost, mvcost, sse1, distortion);
  382. (void)halfiters;
  383. (void)quarteriters;
  384. (void)eighthiters;
  385. (void)whichdir;
  386. (void)allow_hp;
  387. (void)forced_stop;
  388. (void)hstep;
  389. if (cost_list && cost_list[0] != INT_MAX && cost_list[1] != INT_MAX &&
  390. cost_list[2] != INT_MAX && cost_list[3] != INT_MAX &&
  391. cost_list[4] != INT_MAX && is_cost_list_wellbehaved(cost_list)) {
  392. int ir, ic;
  393. unsigned int minpt;
  394. get_cost_surf_min(cost_list, &ir, &ic, 2);
  395. if (ir != 0 || ic != 0) {
  396. CHECK_BETTER(minpt, tr + 2 * ir, tc + 2 * ic);
  397. }
  398. } else {
  399. FIRST_LEVEL_CHECKS;
  400. if (halfiters > 1) {
  401. SECOND_LEVEL_CHECKS;
  402. }
  403. tr = br;
  404. tc = bc;
  405. // Each subsequent iteration checks at least one point in common with
  406. // the last iteration could be 2 ( if diag selected) 1/4 pel
  407. // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only
  408. if (forced_stop != 2) {
  409. hstep >>= 1;
  410. FIRST_LEVEL_CHECKS;
  411. if (quarteriters > 1) {
  412. SECOND_LEVEL_CHECKS;
  413. }
  414. }
  415. }
  416. tr = br;
  417. tc = bc;
  418. if (allow_hp && use_mv_hp(ref_mv) && forced_stop == 0) {
  419. hstep >>= 1;
  420. FIRST_LEVEL_CHECKS;
  421. if (eighthiters > 1) {
  422. SECOND_LEVEL_CHECKS;
  423. }
  424. }
  425. bestmv->row = br;
  426. bestmv->col = bc;
  427. if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) ||
  428. (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3)))
  429. return INT_MAX;
  430. return besterr;
  431. }
  432. uint32_t vp9_find_best_sub_pixel_tree_pruned_more(
  433. const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp,
  434. int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop,
  435. int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2],
  436. uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w,
  437. int h) {
  438. SETUP_SUBPEL_SEARCH;
  439. besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, z,
  440. src_stride, y, y_stride, second_pred, w, h,
  441. offset, mvjcost, mvcost, sse1, distortion);
  442. if (cost_list && cost_list[0] != INT_MAX && cost_list[1] != INT_MAX &&
  443. cost_list[2] != INT_MAX && cost_list[3] != INT_MAX &&
  444. cost_list[4] != INT_MAX && is_cost_list_wellbehaved(cost_list)) {
  445. unsigned int minpt;
  446. int ir, ic;
  447. get_cost_surf_min(cost_list, &ir, &ic, 1);
  448. if (ir != 0 || ic != 0) {
  449. CHECK_BETTER(minpt, tr + ir * hstep, tc + ic * hstep);
  450. }
  451. } else {
  452. FIRST_LEVEL_CHECKS;
  453. if (halfiters > 1) {
  454. SECOND_LEVEL_CHECKS;
  455. }
  456. }
  457. // Each subsequent iteration checks at least one point in common with
  458. // the last iteration could be 2 ( if diag selected) 1/4 pel
  459. // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only
  460. if (forced_stop != 2) {
  461. tr = br;
  462. tc = bc;
  463. hstep >>= 1;
  464. FIRST_LEVEL_CHECKS;
  465. if (quarteriters > 1) {
  466. SECOND_LEVEL_CHECKS;
  467. }
  468. }
  469. if (allow_hp && use_mv_hp(ref_mv) && forced_stop == 0) {
  470. tr = br;
  471. tc = bc;
  472. hstep >>= 1;
  473. FIRST_LEVEL_CHECKS;
  474. if (eighthiters > 1) {
  475. SECOND_LEVEL_CHECKS;
  476. }
  477. }
  478. // These lines insure static analysis doesn't warn that
  479. // tr and tc aren't used after the above point.
  480. (void)tr;
  481. (void)tc;
  482. bestmv->row = br;
  483. bestmv->col = bc;
  484. if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) ||
  485. (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3)))
  486. return UINT_MAX;
  487. return besterr;
  488. }
  489. uint32_t vp9_find_best_sub_pixel_tree_pruned(
  490. const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp,
  491. int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop,
  492. int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2],
  493. uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w,
  494. int h) {
  495. SETUP_SUBPEL_SEARCH;
  496. besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, z,
  497. src_stride, y, y_stride, second_pred, w, h,
  498. offset, mvjcost, mvcost, sse1, distortion);
  499. if (cost_list && cost_list[0] != INT_MAX && cost_list[1] != INT_MAX &&
  500. cost_list[2] != INT_MAX && cost_list[3] != INT_MAX &&
  501. cost_list[4] != INT_MAX) {
  502. unsigned int left, right, up, down, diag;
  503. whichdir = (cost_list[1] < cost_list[3] ? 0 : 1) +
  504. (cost_list[2] < cost_list[4] ? 0 : 2);
  505. switch (whichdir) {
  506. case 0:
  507. CHECK_BETTER(left, tr, tc - hstep);
  508. CHECK_BETTER(down, tr + hstep, tc);
  509. CHECK_BETTER(diag, tr + hstep, tc - hstep);
  510. break;
  511. case 1:
  512. CHECK_BETTER(right, tr, tc + hstep);
  513. CHECK_BETTER(down, tr + hstep, tc);
  514. CHECK_BETTER(diag, tr + hstep, tc + hstep);
  515. break;
  516. case 2:
  517. CHECK_BETTER(left, tr, tc - hstep);
  518. CHECK_BETTER(up, tr - hstep, tc);
  519. CHECK_BETTER(diag, tr - hstep, tc - hstep);
  520. break;
  521. case 3:
  522. CHECK_BETTER(right, tr, tc + hstep);
  523. CHECK_BETTER(up, tr - hstep, tc);
  524. CHECK_BETTER(diag, tr - hstep, tc + hstep);
  525. break;
  526. }
  527. } else {
  528. FIRST_LEVEL_CHECKS;
  529. if (halfiters > 1) {
  530. SECOND_LEVEL_CHECKS;
  531. }
  532. }
  533. tr = br;
  534. tc = bc;
  535. // Each subsequent iteration checks at least one point in common with
  536. // the last iteration could be 2 ( if diag selected) 1/4 pel
  537. // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only
  538. if (forced_stop != 2) {
  539. hstep >>= 1;
  540. FIRST_LEVEL_CHECKS;
  541. if (quarteriters > 1) {
  542. SECOND_LEVEL_CHECKS;
  543. }
  544. tr = br;
  545. tc = bc;
  546. }
  547. if (allow_hp && use_mv_hp(ref_mv) && forced_stop == 0) {
  548. hstep >>= 1;
  549. FIRST_LEVEL_CHECKS;
  550. if (eighthiters > 1) {
  551. SECOND_LEVEL_CHECKS;
  552. }
  553. tr = br;
  554. tc = bc;
  555. }
  556. // These lines insure static analysis doesn't warn that
  557. // tr and tc aren't used after the above point.
  558. (void)tr;
  559. (void)tc;
  560. bestmv->row = br;
  561. bestmv->col = bc;
  562. if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) ||
  563. (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3)))
  564. return INT_MAX;
  565. return besterr;
  566. }
  567. /* clang-format off */
  568. static const MV search_step_table[12] = {
  569. // left, right, up, down
  570. { 0, -4 }, { 0, 4 }, { -4, 0 }, { 4, 0 },
  571. { 0, -2 }, { 0, 2 }, { -2, 0 }, { 2, 0 },
  572. { 0, -1 }, { 0, 1 }, { -1, 0 }, { 1, 0 }
  573. };
  574. /* clang-format on */
  575. uint32_t vp9_find_best_sub_pixel_tree(
  576. const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp,
  577. int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop,
  578. int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2],
  579. uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w,
  580. int h) {
  581. const uint8_t *const z = x->plane[0].src.buf;
  582. const uint8_t *const src_address = z;
  583. const int src_stride = x->plane[0].src.stride;
  584. const MACROBLOCKD *xd = &x->e_mbd;
  585. unsigned int besterr = INT_MAX;
  586. unsigned int sse;
  587. int thismse;
  588. const int y_stride = xd->plane[0].pre[0].stride;
  589. const int offset = bestmv->row * y_stride + bestmv->col;
  590. const uint8_t *const y = xd->plane[0].pre[0].buf;
  591. int rr = ref_mv->row;
  592. int rc = ref_mv->col;
  593. int br = bestmv->row * 8;
  594. int bc = bestmv->col * 8;
  595. int hstep = 4;
  596. int iter, round = 3 - forced_stop;
  597. const int minc = VPXMAX(x->mv_limits.col_min * 8, ref_mv->col - MV_MAX);
  598. const int maxc = VPXMIN(x->mv_limits.col_max * 8, ref_mv->col + MV_MAX);
  599. const int minr = VPXMAX(x->mv_limits.row_min * 8, ref_mv->row - MV_MAX);
  600. const int maxr = VPXMIN(x->mv_limits.row_max * 8, ref_mv->row + MV_MAX);
  601. int tr = br;
  602. int tc = bc;
  603. const MV *search_step = search_step_table;
  604. int idx, best_idx = -1;
  605. unsigned int cost_array[5];
  606. int kr, kc;
  607. if (!(allow_hp && use_mv_hp(ref_mv)))
  608. if (round == 3) round = 2;
  609. bestmv->row *= 8;
  610. bestmv->col *= 8;
  611. besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, z,
  612. src_stride, y, y_stride, second_pred, w, h,
  613. offset, mvjcost, mvcost, sse1, distortion);
  614. (void)cost_list; // to silence compiler warning
  615. for (iter = 0; iter < round; ++iter) {
  616. // Check vertical and horizontal sub-pixel positions.
  617. for (idx = 0; idx < 4; ++idx) {
  618. tr = br + search_step[idx].row;
  619. tc = bc + search_step[idx].col;
  620. if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
  621. const uint8_t *const pre_address = y + (tr >> 3) * y_stride + (tc >> 3);
  622. MV this_mv;
  623. this_mv.row = tr;
  624. this_mv.col = tc;
  625. if (second_pred == NULL)
  626. thismse = vfp->svf(pre_address, y_stride, sp(tc), sp(tr), src_address,
  627. src_stride, &sse);
  628. else
  629. thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr),
  630. src_address, src_stride, &sse, second_pred);
  631. cost_array[idx] = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost,
  632. mvcost, error_per_bit);
  633. if (cost_array[idx] < besterr) {
  634. best_idx = idx;
  635. besterr = cost_array[idx];
  636. *distortion = thismse;
  637. *sse1 = sse;
  638. }
  639. } else {
  640. cost_array[idx] = INT_MAX;
  641. }
  642. }
  643. // Check diagonal sub-pixel position
  644. kc = (cost_array[0] <= cost_array[1] ? -hstep : hstep);
  645. kr = (cost_array[2] <= cost_array[3] ? -hstep : hstep);
  646. tc = bc + kc;
  647. tr = br + kr;
  648. if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
  649. const uint8_t *const pre_address = y + (tr >> 3) * y_stride + (tc >> 3);
  650. MV this_mv = { tr, tc };
  651. if (second_pred == NULL)
  652. thismse = vfp->svf(pre_address, y_stride, sp(tc), sp(tr), src_address,
  653. src_stride, &sse);
  654. else
  655. thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr), src_address,
  656. src_stride, &sse, second_pred);
  657. cost_array[4] = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost,
  658. error_per_bit);
  659. if (cost_array[4] < besterr) {
  660. best_idx = 4;
  661. besterr = cost_array[4];
  662. *distortion = thismse;
  663. *sse1 = sse;
  664. }
  665. } else {
  666. cost_array[idx] = INT_MAX;
  667. }
  668. if (best_idx < 4 && best_idx >= 0) {
  669. br += search_step[best_idx].row;
  670. bc += search_step[best_idx].col;
  671. } else if (best_idx == 4) {
  672. br = tr;
  673. bc = tc;
  674. }
  675. if (iters_per_step > 1 && best_idx != -1) SECOND_LEVEL_CHECKS_BEST;
  676. tr = br;
  677. tc = bc;
  678. search_step += 4;
  679. hstep >>= 1;
  680. best_idx = -1;
  681. }
  682. // Each subsequent iteration checks at least one point in common with
  683. // the last iteration could be 2 ( if diag selected) 1/4 pel
  684. // These lines insure static analysis doesn't warn that
  685. // tr and tc aren't used after the above point.
  686. (void)tr;
  687. (void)tc;
  688. bestmv->row = br;
  689. bestmv->col = bc;
  690. if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) ||
  691. (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3)))
  692. return INT_MAX;
  693. return besterr;
  694. }
  695. #undef MVC
  696. #undef CHECK_BETTER
  697. static INLINE int check_bounds(const MvLimits *mv_limits, int row, int col,
  698. int range) {
  699. return ((row - range) >= mv_limits->row_min) &
  700. ((row + range) <= mv_limits->row_max) &
  701. ((col - range) >= mv_limits->col_min) &
  702. ((col + range) <= mv_limits->col_max);
  703. }
  704. static INLINE int is_mv_in(const MvLimits *mv_limits, const MV *mv) {
  705. return (mv->col >= mv_limits->col_min) && (mv->col <= mv_limits->col_max) &&
  706. (mv->row >= mv_limits->row_min) && (mv->row <= mv_limits->row_max);
  707. }
  708. #define CHECK_BETTER \
  709. { \
  710. if (thissad < bestsad) { \
  711. if (use_mvcost) \
  712. thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit); \
  713. if (thissad < bestsad) { \
  714. bestsad = thissad; \
  715. best_site = i; \
  716. } \
  717. } \
  718. }
  719. #define MAX_PATTERN_SCALES 11
  720. #define MAX_PATTERN_CANDIDATES 8 // max number of canddiates per scale
  721. #define PATTERN_CANDIDATES_REF 3 // number of refinement candidates
  722. // Calculate and return a sad+mvcost list around an integer best pel.
  723. static INLINE void calc_int_cost_list(const MACROBLOCK *x, const MV *ref_mv,
  724. int sadpb,
  725. const vp9_variance_fn_ptr_t *fn_ptr,
  726. const MV *best_mv, int *cost_list) {
  727. static const MV neighbors[4] = { { 0, -1 }, { 1, 0 }, { 0, 1 }, { -1, 0 } };
  728. const struct buf_2d *const what = &x->plane[0].src;
  729. const struct buf_2d *const in_what = &x->e_mbd.plane[0].pre[0];
  730. const MV fcenter_mv = { ref_mv->row >> 3, ref_mv->col >> 3 };
  731. int br = best_mv->row;
  732. int bc = best_mv->col;
  733. MV this_mv;
  734. int i;
  735. unsigned int sse;
  736. this_mv.row = br;
  737. this_mv.col = bc;
  738. cost_list[0] =
  739. fn_ptr->vf(what->buf, what->stride, get_buf_from_mv(in_what, &this_mv),
  740. in_what->stride, &sse) +
  741. mvsad_err_cost(x, &this_mv, &fcenter_mv, sadpb);
  742. if (check_bounds(&x->mv_limits, br, bc, 1)) {
  743. for (i = 0; i < 4; i++) {
  744. const MV this_mv = { br + neighbors[i].row, bc + neighbors[i].col };
  745. cost_list[i + 1] = fn_ptr->vf(what->buf, what->stride,
  746. get_buf_from_mv(in_what, &this_mv),
  747. in_what->stride, &sse) +
  748. mv_err_cost(&this_mv, &fcenter_mv, x->nmvjointcost,
  749. x->mvcost, x->errorperbit);
  750. }
  751. } else {
  752. for (i = 0; i < 4; i++) {
  753. const MV this_mv = { br + neighbors[i].row, bc + neighbors[i].col };
  754. if (!is_mv_in(&x->mv_limits, &this_mv))
  755. cost_list[i + 1] = INT_MAX;
  756. else
  757. cost_list[i + 1] = fn_ptr->vf(what->buf, what->stride,
  758. get_buf_from_mv(in_what, &this_mv),
  759. in_what->stride, &sse) +
  760. mv_err_cost(&this_mv, &fcenter_mv, x->nmvjointcost,
  761. x->mvcost, x->errorperbit);
  762. }
  763. }
  764. }
  765. // Generic pattern search function that searches over multiple scales.
  766. // Each scale can have a different number of candidates and shape of
  767. // candidates as indicated in the num_candidates and candidates arrays
  768. // passed into this function
  769. //
  770. static int vp9_pattern_search(const MACROBLOCK *x, MV *ref_mv, int search_param,
  771. int sad_per_bit, int do_init_search,
  772. int *cost_list, const vp9_variance_fn_ptr_t *vfp,
  773. int use_mvcost, const MV *center_mv, MV *best_mv,
  774. const int num_candidates[MAX_PATTERN_SCALES],
  775. const MV candidates[MAX_PATTERN_SCALES]
  776. [MAX_PATTERN_CANDIDATES]) {
  777. const MACROBLOCKD *const xd = &x->e_mbd;
  778. static const int search_param_to_steps[MAX_MVSEARCH_STEPS] = {
  779. 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
  780. };
  781. int i, s, t;
  782. const struct buf_2d *const what = &x->plane[0].src;
  783. const struct buf_2d *const in_what = &xd->plane[0].pre[0];
  784. int br, bc;
  785. int bestsad = INT_MAX;
  786. int thissad;
  787. int k = -1;
  788. const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
  789. int best_init_s = search_param_to_steps[search_param];
  790. // adjust ref_mv to make sure it is within MV range
  791. clamp_mv(ref_mv, x->mv_limits.col_min, x->mv_limits.col_max,
  792. x->mv_limits.row_min, x->mv_limits.row_max);
  793. br = ref_mv->row;
  794. bc = ref_mv->col;
  795. // Work out the start point for the search
  796. bestsad = vfp->sdf(what->buf, what->stride, get_buf_from_mv(in_what, ref_mv),
  797. in_what->stride) +
  798. mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit);
  799. // Search all possible scales upto the search param around the center point
  800. // pick the scale of the point that is best as the starting scale of
  801. // further steps around it.
  802. if (do_init_search) {
  803. s = best_init_s;
  804. best_init_s = -1;
  805. for (t = 0; t <= s; ++t) {
  806. int best_site = -1;
  807. if (check_bounds(&x->mv_limits, br, bc, 1 << t)) {
  808. for (i = 0; i < num_candidates[t]; i++) {
  809. const MV this_mv = { br + candidates[t][i].row,
  810. bc + candidates[t][i].col };
  811. thissad =
  812. vfp->sdf(what->buf, what->stride,
  813. get_buf_from_mv(in_what, &this_mv), in_what->stride);
  814. CHECK_BETTER
  815. }
  816. } else {
  817. for (i = 0; i < num_candidates[t]; i++) {
  818. const MV this_mv = { br + candidates[t][i].row,
  819. bc + candidates[t][i].col };
  820. if (!is_mv_in(&x->mv_limits, &this_mv)) continue;
  821. thissad =
  822. vfp->sdf(what->buf, what->stride,
  823. get_buf_from_mv(in_what, &this_mv), in_what->stride);
  824. CHECK_BETTER
  825. }
  826. }
  827. if (best_site == -1) {
  828. continue;
  829. } else {
  830. best_init_s = t;
  831. k = best_site;
  832. }
  833. }
  834. if (best_init_s != -1) {
  835. br += candidates[best_init_s][k].row;
  836. bc += candidates[best_init_s][k].col;
  837. }
  838. }
  839. // If the center point is still the best, just skip this and move to
  840. // the refinement step.
  841. if (best_init_s != -1) {
  842. int best_site = -1;
  843. s = best_init_s;
  844. do {
  845. // No need to search all 6 points the 1st time if initial search was used
  846. if (!do_init_search || s != best_init_s) {
  847. if (check_bounds(&x->mv_limits, br, bc, 1 << s)) {
  848. for (i = 0; i < num_candidates[s]; i++) {
  849. const MV this_mv = { br + candidates[s][i].row,
  850. bc + candidates[s][i].col };
  851. thissad =
  852. vfp->sdf(what->buf, what->stride,
  853. get_buf_from_mv(in_what, &this_mv), in_what->stride);
  854. CHECK_BETTER
  855. }
  856. } else {
  857. for (i = 0; i < num_candidates[s]; i++) {
  858. const MV this_mv = { br + candidates[s][i].row,
  859. bc + candidates[s][i].col };
  860. if (!is_mv_in(&x->mv_limits, &this_mv)) continue;
  861. thissad =
  862. vfp->sdf(what->buf, what->stride,
  863. get_buf_from_mv(in_what, &this_mv), in_what->stride);
  864. CHECK_BETTER
  865. }
  866. }
  867. if (best_site == -1) {
  868. continue;
  869. } else {
  870. br += candidates[s][best_site].row;
  871. bc += candidates[s][best_site].col;
  872. k = best_site;
  873. }
  874. }
  875. do {
  876. int next_chkpts_indices[PATTERN_CANDIDATES_REF];
  877. best_site = -1;
  878. next_chkpts_indices[0] = (k == 0) ? num_candidates[s] - 1 : k - 1;
  879. next_chkpts_indices[1] = k;
  880. next_chkpts_indices[2] = (k == num_candidates[s] - 1) ? 0 : k + 1;
  881. if (check_bounds(&x->mv_limits, br, bc, 1 << s)) {
  882. for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
  883. const MV this_mv = {
  884. br + candidates[s][next_chkpts_indices[i]].row,
  885. bc + candidates[s][next_chkpts_indices[i]].col
  886. };
  887. thissad =
  888. vfp->sdf(what->buf, what->stride,
  889. get_buf_from_mv(in_what, &this_mv), in_what->stride);
  890. CHECK_BETTER
  891. }
  892. } else {
  893. for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
  894. const MV this_mv = {
  895. br + candidates[s][next_chkpts_indices[i]].row,
  896. bc + candidates[s][next_chkpts_indices[i]].col
  897. };
  898. if (!is_mv_in(&x->mv_limits, &this_mv)) continue;
  899. thissad =
  900. vfp->sdf(what->buf, what->stride,
  901. get_buf_from_mv(in_what, &this_mv), in_what->stride);
  902. CHECK_BETTER
  903. }
  904. }
  905. if (best_site != -1) {
  906. k = next_chkpts_indices[best_site];
  907. br += candidates[s][k].row;
  908. bc += candidates[s][k].col;
  909. }
  910. } while (best_site != -1);
  911. } while (s--);
  912. }
  913. // Returns the one-away integer pel sad values around the best as follows:
  914. // cost_list[0]: cost at the best integer pel
  915. // cost_list[1]: cost at delta {0, -1} (left) from the best integer pel
  916. // cost_list[2]: cost at delta { 1, 0} (bottom) from the best integer pel
  917. // cost_list[3]: cost at delta { 0, 1} (right) from the best integer pel
  918. // cost_list[4]: cost at delta {-1, 0} (top) from the best integer pel
  919. if (cost_list) {
  920. const MV best_mv = { br, bc };
  921. calc_int_cost_list(x, &fcenter_mv, sad_per_bit, vfp, &best_mv, cost_list);
  922. }
  923. best_mv->row = br;
  924. best_mv->col = bc;
  925. return bestsad;
  926. }
  927. // A specialized function where the smallest scale search candidates
  928. // are 4 1-away neighbors, and cost_list is non-null
  929. // TODO(debargha): Merge this function with the one above. Also remove
  930. // use_mvcost option since it is always 1, to save unnecessary branches.
  931. static int vp9_pattern_search_sad(
  932. const MACROBLOCK *x, MV *ref_mv, int search_param, int sad_per_bit,
  933. int do_init_search, int *cost_list, const vp9_variance_fn_ptr_t *vfp,
  934. int use_mvcost, const MV *center_mv, MV *best_mv,
  935. const int num_candidates[MAX_PATTERN_SCALES],
  936. const MV candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES]) {
  937. const MACROBLOCKD *const xd = &x->e_mbd;
  938. static const int search_param_to_steps[MAX_MVSEARCH_STEPS] = {
  939. 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
  940. };
  941. int i, s, t;
  942. const struct buf_2d *const what = &x->plane[0].src;
  943. const struct buf_2d *const in_what = &xd->plane[0].pre[0];
  944. int br, bc;
  945. int bestsad = INT_MAX;
  946. int thissad;
  947. int k = -1;
  948. const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
  949. int best_init_s = search_param_to_steps[search_param];
  950. // adjust ref_mv to make sure it is within MV range
  951. clamp_mv(ref_mv, x->mv_limits.col_min, x->mv_limits.col_max,
  952. x->mv_limits.row_min, x->mv_limits.row_max);
  953. br = ref_mv->row;
  954. bc = ref_mv->col;
  955. if (cost_list != NULL) {
  956. cost_list[0] = cost_list[1] = cost_list[2] = cost_list[3] = cost_list[4] =
  957. INT_MAX;
  958. }
  959. // Work out the start point for the search
  960. bestsad = vfp->sdf(what->buf, what->stride, get_buf_from_mv(in_what, ref_mv),
  961. in_what->stride) +
  962. mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit);
  963. // Search all possible scales upto the search param around the center point
  964. // pick the scale of the point that is best as the starting scale of
  965. // further steps around it.
  966. if (do_init_search) {
  967. s = best_init_s;
  968. best_init_s = -1;
  969. for (t = 0; t <= s; ++t) {
  970. int best_site = -1;
  971. if (check_bounds(&x->mv_limits, br, bc, 1 << t)) {
  972. for (i = 0; i < num_candidates[t]; i++) {
  973. const MV this_mv = { br + candidates[t][i].row,
  974. bc + candidates[t][i].col };
  975. thissad =
  976. vfp->sdf(what->buf, what->stride,
  977. get_buf_from_mv(in_what, &this_mv), in_what->stride);
  978. CHECK_BETTER
  979. }
  980. } else {
  981. for (i = 0; i < num_candidates[t]; i++) {
  982. const MV this_mv = { br + candidates[t][i].row,
  983. bc + candidates[t][i].col };
  984. if (!is_mv_in(&x->mv_limits, &this_mv)) continue;
  985. thissad =
  986. vfp->sdf(what->buf, what->stride,
  987. get_buf_from_mv(in_what, &this_mv), in_what->stride);
  988. CHECK_BETTER
  989. }
  990. }
  991. if (best_site == -1) {
  992. continue;
  993. } else {
  994. best_init_s = t;
  995. k = best_site;
  996. }
  997. }
  998. if (best_init_s != -1) {
  999. br += candidates[best_init_s][k].row;
  1000. bc += candidates[best_init_s][k].col;
  1001. }
  1002. }
  1003. // If the center point is still the best, just skip this and move to
  1004. // the refinement step.
  1005. if (best_init_s != -1) {
  1006. int do_sad = (num_candidates[0] == 4 && cost_list != NULL);
  1007. int best_site = -1;
  1008. s = best_init_s;
  1009. for (; s >= do_sad; s--) {
  1010. if (!do_init_search || s != best_init_s) {
  1011. if (check_bounds(&x->mv_limits, br, bc, 1 << s)) {
  1012. for (i = 0; i < num_candidates[s]; i++) {
  1013. const MV this_mv = { br + candidates[s][i].row,
  1014. bc + candidates[s][i].col };
  1015. thissad =
  1016. vfp->sdf(what->buf, what->stride,
  1017. get_buf_from_mv(in_what, &this_mv), in_what->stride);
  1018. CHECK_BETTER
  1019. }
  1020. } else {
  1021. for (i = 0; i < num_candidates[s]; i++) {
  1022. const MV this_mv = { br + candidates[s][i].row,
  1023. bc + candidates[s][i].col };
  1024. if (!is_mv_in(&x->mv_limits, &this_mv)) continue;
  1025. thissad =
  1026. vfp->sdf(what->buf, what->stride,
  1027. get_buf_from_mv(in_what, &this_mv), in_what->stride);
  1028. CHECK_BETTER
  1029. }
  1030. }
  1031. if (best_site == -1) {
  1032. continue;
  1033. } else {
  1034. br += candidates[s][best_site].row;
  1035. bc += candidates[s][best_site].col;
  1036. k = best_site;
  1037. }
  1038. }
  1039. do {
  1040. int next_chkpts_indices[PATTERN_CANDIDATES_REF];
  1041. best_site = -1;
  1042. next_chkpts_indices[0] = (k == 0) ? num_candidates[s] - 1 : k - 1;
  1043. next_chkpts_indices[1] = k;
  1044. next_chkpts_indices[2] = (k == num_candidates[s] - 1) ? 0 : k + 1;
  1045. if (check_bounds(&x->mv_limits, br, bc, 1 << s)) {
  1046. for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
  1047. const MV this_mv = {
  1048. br + candidates[s][next_chkpts_indices[i]].row,
  1049. bc + candidates[s][next_chkpts_indices[i]].col
  1050. };
  1051. thissad =
  1052. vfp->sdf(what->buf, what->stride,
  1053. get_buf_from_mv(in_what, &this_mv), in_what->stride);
  1054. CHECK_BETTER
  1055. }
  1056. } else {
  1057. for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
  1058. const MV this_mv = {
  1059. br + candidates[s][next_chkpts_indices[i]].row,
  1060. bc + candidates[s][next_chkpts_indices[i]].col
  1061. };
  1062. if (!is_mv_in(&x->mv_limits, &this_mv)) continue;
  1063. thissad =
  1064. vfp->sdf(what->buf, what->stride,
  1065. get_buf_from_mv(in_what, &this_mv), in_what->stride);
  1066. CHECK_BETTER
  1067. }
  1068. }
  1069. if (best_site != -1) {
  1070. k = next_chkpts_indices[best_site];
  1071. br += candidates[s][k].row;
  1072. bc += candidates[s][k].col;
  1073. }
  1074. } while (best_site != -1);
  1075. }
  1076. // Note: If we enter the if below, then cost_list must be non-NULL.
  1077. if (s == 0) {
  1078. cost_list[0] = bestsad;
  1079. if (!do_init_search || s != best_init_s) {
  1080. if (check_bounds(&x->mv_limits, br, bc, 1 << s)) {
  1081. for (i = 0; i < num_candidates[s]; i++) {
  1082. const MV this_mv = { br + candidates[s][i].row,
  1083. bc + candidates[s][i].col };
  1084. cost_list[i + 1] = thissad =
  1085. vfp->sdf(what->buf, what->stride,
  1086. get_buf_from_mv(in_what, &this_mv), in_what->stride);
  1087. CHECK_BETTER
  1088. }
  1089. } else {
  1090. for (i = 0; i < num_candidates[s]; i++) {
  1091. const MV this_mv = { br + candidates[s][i].row,
  1092. bc + candidates[s][i].col };
  1093. if (!is_mv_in(&x->mv_limits, &this_mv)) continue;
  1094. cost_list[i + 1] = thissad =
  1095. vfp->sdf(what->buf, what->stride,
  1096. get_buf_from_mv(in_what, &this_mv), in_what->stride);
  1097. CHECK_BETTER
  1098. }
  1099. }
  1100. if (best_site != -1) {
  1101. br += candidates[s][best_site].row;
  1102. bc += candidates[s][best_site].col;
  1103. k = best_site;
  1104. }
  1105. }
  1106. while (best_site != -1) {
  1107. int next_chkpts_indices[PATTERN_CANDIDATES_REF];
  1108. best_site = -1;
  1109. next_chkpts_indices[0] = (k == 0) ? num_candidates[s] - 1 : k - 1;
  1110. next_chkpts_indices[1] = k;
  1111. next_chkpts_indices[2] = (k == num_candidates[s] - 1) ? 0 : k + 1;
  1112. cost_list[1] = cost_list[2] = cost_list[3] = cost_list[4] = INT_MAX;
  1113. cost_list[((k + 2) % 4) + 1] = cost_list[0];
  1114. cost_list[0] = bestsad;
  1115. if (check_bounds(&x->mv_limits, br, bc, 1 << s)) {
  1116. for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
  1117. const MV this_mv = {
  1118. br + candidates[s][next_chkpts_indices[i]].row,
  1119. bc + candidates[s][next_chkpts_indices[i]].col
  1120. };
  1121. cost_list[next_chkpts_indices[i] + 1] = thissad =
  1122. vfp->sdf(what->buf, what->stride,
  1123. get_buf_from_mv(in_what, &this_mv), in_what->stride);
  1124. CHECK_BETTER
  1125. }
  1126. } else {
  1127. for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
  1128. const MV this_mv = {
  1129. br + candidates[s][next_chkpts_indices[i]].row,
  1130. bc + candidates[s][next_chkpts_indices[i]].col
  1131. };
  1132. if (!is_mv_in(&x->mv_limits, &this_mv)) {
  1133. cost_list[next_chkpts_indices[i] + 1] = INT_MAX;
  1134. continue;
  1135. }
  1136. cost_list[next_chkpts_indices[i] + 1] = thissad =
  1137. vfp->sdf(what->buf, what->stride,
  1138. get_buf_from_mv(in_what, &this_mv), in_what->stride);
  1139. CHECK_BETTER
  1140. }
  1141. }
  1142. if (best_site != -1) {
  1143. k = next_chkpts_indices[best_site];
  1144. br += candidates[s][k].row;
  1145. bc += candidates[s][k].col;
  1146. }
  1147. }
  1148. }
  1149. }
  1150. // Returns the one-away integer pel sad values around the best as follows:
  1151. // cost_list[0]: sad at the best integer pel
  1152. // cost_list[1]: sad at delta {0, -1} (left) from the best integer pel
  1153. // cost_list[2]: sad at delta { 1, 0} (bottom) from the best integer pel
  1154. // cost_list[3]: sad at delta { 0, 1} (right) from the best integer pel
  1155. // cost_list[4]: sad at delta {-1, 0} (top) from the best integer pel
  1156. if (cost_list) {
  1157. static const MV neighbors[4] = { { 0, -1 }, { 1, 0 }, { 0, 1 }, { -1, 0 } };
  1158. if (cost_list[0] == INT_MAX) {
  1159. cost_list[0] = bestsad;
  1160. if (check_bounds(&x->mv_limits, br, bc, 1)) {
  1161. for (i = 0; i < 4; i++) {
  1162. const MV this_mv = { br + neighbors[i].row, bc + neighbors[i].col };
  1163. cost_list[i + 1] =
  1164. vfp->sdf(what->buf, what->stride,
  1165. get_buf_from_mv(in_what, &this_mv), in_what->stride);
  1166. }
  1167. } else {
  1168. for (i = 0; i < 4; i++) {
  1169. const MV this_mv = { br + neighbors[i].row, bc + neighbors[i].col };
  1170. if (!is_mv_in(&x->mv_limits, &this_mv))
  1171. cost_list[i + 1] = INT_MAX;
  1172. else
  1173. cost_list[i + 1] =
  1174. vfp->sdf(what->buf, what->stride,
  1175. get_buf_from_mv(in_what, &this_mv), in_what->stride);
  1176. }
  1177. }
  1178. } else {
  1179. if (use_mvcost) {
  1180. for (i = 0; i < 4; i++) {
  1181. const MV this_mv = { br + neighbors[i].row, bc + neighbors[i].col };
  1182. if (cost_list[i + 1] != INT_MAX) {
  1183. cost_list[i + 1] +=
  1184. mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
  1185. }
  1186. }
  1187. }
  1188. }
  1189. }
  1190. best_mv->row = br;
  1191. best_mv->col = bc;
  1192. return bestsad;
  1193. }
  1194. int vp9_get_mvpred_var(const MACROBLOCK *x, const MV *best_mv,
  1195. const MV *center_mv, const vp9_variance_fn_ptr_t *vfp,
  1196. int use_mvcost) {
  1197. const MACROBLOCKD *const xd = &x->e_mbd;
  1198. const struct buf_2d *const what = &x->plane[0].src;
  1199. const struct buf_2d *const in_what = &xd->plane[0].pre[0];
  1200. const MV mv = { best_mv->row * 8, best_mv->col * 8 };
  1201. uint32_t unused;
  1202. #if CONFIG_VP9_HIGHBITDEPTH
  1203. uint64_t err =
  1204. vfp->vf(what->buf, what->stride, get_buf_from_mv(in_what, best_mv),
  1205. in_what->stride, &unused);
  1206. err += (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost, x->mvcost,
  1207. x->errorperbit)
  1208. : 0);
  1209. if (err >= INT_MAX) return INT_MAX;
  1210. return (int)err;
  1211. #else
  1212. return vfp->vf(what->buf, what->stride, get_buf_from_mv(in_what, best_mv),
  1213. in_what->stride, &unused) +
  1214. (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost, x->mvcost,
  1215. x->errorperbit)
  1216. : 0);
  1217. #endif
  1218. }
  1219. int vp9_get_mvpred_av_var(const MACROBLOCK *x, const MV *best_mv,
  1220. const MV *center_mv, const uint8_t *second_pred,
  1221. const vp9_variance_fn_ptr_t *vfp, int use_mvcost) {
  1222. const MACROBLOCKD *const xd = &x->e_mbd;
  1223. const struct buf_2d *const what = &x->plane[0].src;
  1224. const struct buf_2d *const in_what = &xd->plane[0].pre[0];
  1225. const MV mv = { best_mv->row * 8, best_mv->col * 8 };
  1226. unsigned int unused;
  1227. return vfp->svaf(get_buf_from_mv(in_what, best_mv), in_what->stride, 0, 0,
  1228. what->buf, what->stride, &unused, second_pred) +
  1229. (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost, x->mvcost,
  1230. x->errorperbit)
  1231. : 0);
  1232. }
  1233. static int hex_search(const MACROBLOCK *x, MV *ref_mv, int search_param,
  1234. int sad_per_bit, int do_init_search, int *cost_list,
  1235. const vp9_variance_fn_ptr_t *vfp, int use_mvcost,
  1236. const MV *center_mv, MV *best_mv) {
  1237. // First scale has 8-closest points, the rest have 6 points in hex shape
  1238. // at increasing scales
  1239. static const int hex_num_candidates[MAX_PATTERN_SCALES] = { 8, 6, 6, 6, 6, 6,
  1240. 6, 6, 6, 6, 6 };
  1241. // Note that the largest candidate step at each scale is 2^scale
  1242. /* clang-format off */
  1243. static const MV hex_candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES] = {
  1244. { { -1, -1 }, { 0, -1 }, { 1, -1 }, { 1, 0 }, { 1, 1 }, { 0, 1 }, { -1, 1 },
  1245. { -1, 0 } },
  1246. { { -1, -2 }, { 1, -2 }, { 2, 0 }, { 1, 2 }, { -1, 2 }, { -2, 0 } },
  1247. { { -2, -4 }, { 2, -4 }, { 4, 0 }, { 2, 4 }, { -2, 4 }, { -4, 0 } },
  1248. { { -4, -8 }, { 4, -8 }, { 8, 0 }, { 4, 8 }, { -4, 8 }, { -8, 0 } },
  1249. { { -8, -16 }, { 8, -16 }, { 16, 0 }, { 8, 16 }, { -8, 16 }, { -16, 0 } },
  1250. { { -16, -32 }, { 16, -32 }, { 32, 0 }, { 16, 32 }, { -16, 32 },
  1251. { -32, 0 } },
  1252. { { -32, -64 }, { 32, -64 }, { 64, 0 }, { 32, 64 }, { -32, 64 },
  1253. { -64, 0 } },
  1254. { { -64, -128 }, { 64, -128 }, { 128, 0 }, { 64, 128 }, { -64, 128 },
  1255. { -128, 0 } },
  1256. { { -128, -256 }, { 128, -256 }, { 256, 0 }, { 128, 256 }, { -128, 256 },
  1257. { -256, 0 } },
  1258. { { -256, -512 }, { 256, -512 }, { 512, 0 }, { 256, 512 }, { -256, 512 },
  1259. { -512, 0 } },
  1260. { { -512, -1024 }, { 512, -1024 }, { 1024, 0 }, { 512, 1024 },
  1261. { -512, 1024 }, { -1024, 0 } }
  1262. };
  1263. /* clang-format on */
  1264. return vp9_pattern_search(
  1265. x, ref_mv, search_param, sad_per_bit, do_init_search, cost_list, vfp,
  1266. use_mvcost, center_mv, best_mv, hex_num_candidates, hex_candidates);
  1267. }
  1268. static int bigdia_search(const MACROBLOCK *x, MV *ref_mv, int search_param,
  1269. int sad_per_bit, int do_init_search, int *cost_list,
  1270. const vp9_variance_fn_ptr_t *vfp, int use_mvcost,
  1271. const MV *center_mv, MV *best_mv) {
  1272. // First scale has 4-closest points, the rest have 8 points in diamond
  1273. // shape at increasing scales
  1274. static const int bigdia_num_candidates[MAX_PATTERN_SCALES] = {
  1275. 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
  1276. };
  1277. // Note that the largest candidate step at each scale is 2^scale
  1278. /* clang-format off */
  1279. static const MV
  1280. bigdia_candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES] = {
  1281. { { 0, -1 }, { 1, 0 }, { 0, 1 }, { -1, 0 } },
  1282. { { -1, -1 }, { 0, -2 }, { 1, -1 }, { 2, 0 }, { 1, 1 }, { 0, 2 },
  1283. { -1, 1 }, { -2, 0 } },
  1284. { { -2, -2 }, { 0, -4 }, { 2, -2 }, { 4, 0 }, { 2, 2 }, { 0, 4 },
  1285. { -2, 2 }, { -4, 0 } },
  1286. { { -4, -4 }, { 0, -8 }, { 4, -4 }, { 8, 0 }, { 4, 4 }, { 0, 8 },
  1287. { -4, 4 }, { -8, 0 } },
  1288. { { -8, -8 }, { 0, -16 }, { 8, -8 }, { 16, 0 }, { 8, 8 }, { 0, 16 },
  1289. { -8, 8 }, { -16, 0 } },
  1290. { { -16, -16 }, { 0, -32 }, { 16, -16 }, { 32, 0 }, { 16, 16 },
  1291. { 0, 32 }, { -16, 16 }, { -32, 0 } },
  1292. { { -32, -32 }, { 0, -64 }, { 32, -32 }, { 64, 0 }, { 32, 32 },
  1293. { 0, 64 }, { -32, 32 }, { -64, 0 } },
  1294. { { -64, -64 }, { 0, -128 }, { 64, -64 }, { 128, 0 }, { 64, 64 },
  1295. { 0, 128 }, { -64, 64 }, { -128, 0 } },
  1296. { { -128, -128 }, { 0, -256 }, { 128, -128 }, { 256, 0 }, { 128, 128 },
  1297. { 0, 256 }, { -128, 128 }, { -256, 0 } },
  1298. { { -256, -256 }, { 0, -512 }, { 256, -256 }, { 512, 0 }, { 256, 256 },
  1299. { 0, 512 }, { -256, 256 }, { -512, 0 } },
  1300. { { -512, -512 }, { 0, -1024 }, { 512, -512 }, { 1024, 0 },
  1301. { 512, 512 }, { 0, 1024 }, { -512, 512 }, { -1024, 0 } }
  1302. };
  1303. /* clang-format on */
  1304. return vp9_pattern_search_sad(
  1305. x, ref_mv, search_param, sad_per_bit, do_init_search, cost_list, vfp,
  1306. use_mvcost, center_mv, best_mv, bigdia_num_candidates, bigdia_candidates);
  1307. }
  1308. static int square_search(const MACROBLOCK *x, MV *ref_mv, int search_param,
  1309. int sad_per_bit, int do_init_search, int *cost_list,
  1310. const vp9_variance_fn_ptr_t *vfp, int use_mvcost,
  1311. const MV *center_mv, MV *best_mv) {
  1312. // All scales have 8 closest points in square shape
  1313. static const int square_num_candidates[MAX_PATTERN_SCALES] = {
  1314. 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
  1315. };
  1316. // Note that the largest candidate step at each scale is 2^scale
  1317. /* clang-format off */
  1318. static const MV
  1319. square_candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES] = {
  1320. { { -1, -1 }, { 0, -1 }, { 1, -1 }, { 1, 0 }, { 1, 1 }, { 0, 1 },
  1321. { -1, 1 }, { -1, 0 } },
  1322. { { -2, -2 }, { 0, -2 }, { 2, -2 }, { 2, 0 }, { 2, 2 }, { 0, 2 },
  1323. { -2, 2 }, { -2, 0 } },
  1324. { { -4, -4 }, { 0, -4 }, { 4, -4 }, { 4, 0 }, { 4, 4 }, { 0, 4 },
  1325. { -4, 4 }, { -4, 0 } },
  1326. { { -8, -8 }, { 0, -8 }, { 8, -8 }, { 8, 0 }, { 8, 8 }, { 0, 8 },
  1327. { -8, 8 }, { -8, 0 } },
  1328. { { -16, -16 }, { 0, -16 }, { 16, -16 }, { 16, 0 }, { 16, 16 },
  1329. { 0, 16 }, { -16, 16 }, { -16, 0 } },
  1330. { { -32, -32 }, { 0, -32 }, { 32, -32 }, { 32, 0 }, { 32, 32 },
  1331. { 0, 32 }, { -32, 32 }, { -32, 0 } },
  1332. { { -64, -64 }, { 0, -64 }, { 64, -64 }, { 64, 0 }, { 64, 64 },
  1333. { 0, 64 }, { -64, 64 }, { -64, 0 } },
  1334. { { -128, -128 }, { 0, -128 }, { 128, -128 }, { 128, 0 }, { 128, 128 },
  1335. { 0, 128 }, { -128, 128 }, { -128, 0 } },
  1336. { { -256, -256 }, { 0, -256 }, { 256, -256 }, { 256, 0 }, { 256, 256 },
  1337. { 0, 256 }, { -256, 256 }, { -256, 0 } },
  1338. { { -512, -512 }, { 0, -512 }, { 512, -512 }, { 512, 0 }, { 512, 512 },
  1339. { 0, 512 }, { -512, 512 }, { -512, 0 } },
  1340. { { -1024, -1024 }, { 0, -1024 }, { 1024, -1024 }, { 1024, 0 },
  1341. { 1024, 1024 }, { 0, 1024 }, { -1024, 1024 }, { -1024, 0 } }
  1342. };
  1343. /* clang-format on */
  1344. return vp9_pattern_search(
  1345. x, ref_mv, search_param, sad_per_bit, do_init_search, cost_list, vfp,
  1346. use_mvcost, center_mv, best_mv, square_num_candidates, square_candidates);
  1347. }
  1348. static int fast_hex_search(const MACROBLOCK *x, MV *ref_mv, int search_param,
  1349. int sad_per_bit,
  1350. int do_init_search, // must be zero for fast_hex
  1351. int *cost_list, const vp9_variance_fn_ptr_t *vfp,
  1352. int use_mvcost, const MV *center_mv, MV *best_mv) {
  1353. return hex_search(x, ref_mv, VPXMAX(MAX_MVSEARCH_STEPS - 2, search_param),
  1354. sad_per_bit, do_init_search, cost_list, vfp, use_mvcost,
  1355. center_mv, best_mv);
  1356. }
  1357. static int fast_dia_search(const MACROBLOCK *x, MV *ref_mv, int search_param,
  1358. int sad_per_bit, int do_init_search, int *cost_list,
  1359. const vp9_variance_fn_ptr_t *vfp, int use_mvcost,
  1360. const MV *center_mv, MV *best_mv) {
  1361. return bigdia_search(x, ref_mv, VPXMAX(MAX_MVSEARCH_STEPS - 2, search_param),
  1362. sad_per_bit, do_init_search, cost_list, vfp, use_mvcost,
  1363. center_mv, best_mv);
  1364. }
  1365. #undef CHECK_BETTER
  1366. // Exhuastive motion search around a given centre position with a given
  1367. // step size.
  1368. static int exhuastive_mesh_search(const MACROBLOCK *x, MV *ref_mv, MV *best_mv,
  1369. int range, int step, int sad_per_bit,
  1370. const vp9_variance_fn_ptr_t *fn_ptr,
  1371. const MV *center_mv) {
  1372. const MACROBLOCKD *const xd = &x->e_mbd;
  1373. const struct buf_2d *const what = &x->plane[0].src;
  1374. const struct buf_2d *const in_what = &xd->plane[0].pre[0];
  1375. MV fcenter_mv = { center_mv->row, center_mv->col };
  1376. unsigned int best_sad = INT_MAX;
  1377. int r, c, i;
  1378. int start_col, end_col, start_row, end_row;
  1379. int col_step = (step > 1) ? step : 4;
  1380. assert(step >= 1);
  1381. clamp_mv(&fcenter_mv, x->mv_limits.col_min, x->mv_limits.col_max,
  1382. x->mv_limits.row_min, x->mv_limits.row_max);
  1383. *best_mv = fcenter_mv;
  1384. best_sad =
  1385. fn_ptr->sdf(what->buf, what->stride,
  1386. get_buf_from_mv(in_what, &fcenter_mv), in_what->stride) +
  1387. mvsad_err_cost(x, &fcenter_mv, ref_mv, sad_per_bit);
  1388. start_row = VPXMAX(-range, x->mv_limits.row_min - fcenter_mv.row);
  1389. start_col = VPXMAX(-range, x->mv_limits.col_min - fcenter_mv.col);
  1390. end_row = VPXMIN(range, x->mv_limits.row_max - fcenter_mv.row);
  1391. end_col = VPXMIN(range, x->mv_limits.col_max - fcenter_mv.col);
  1392. for (r = start_row; r <= end_row; r += step) {
  1393. for (c = start_col; c <= end_col; c += col_step) {
  1394. // Step > 1 means we are not checking every location in this pass.
  1395. if (step > 1) {
  1396. const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c };
  1397. unsigned int sad =
  1398. fn_ptr->sdf(what->buf, what->stride, get_buf_from_mv(in_what, &mv),
  1399. in_what->stride);
  1400. if (sad < best_sad) {
  1401. sad += mvsad_err_cost(x, &mv, ref_mv, sad_per_bit);
  1402. if (sad < best_sad) {
  1403. best_sad = sad;
  1404. *best_mv = mv;
  1405. }
  1406. }
  1407. } else {
  1408. // 4 sads in a single call if we are checking every location
  1409. if (c + 3 <= end_col) {
  1410. unsigned int sads[4];
  1411. const uint8_t *addrs[4];
  1412. for (i = 0; i < 4; ++i) {
  1413. const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c + i };
  1414. addrs[i] = get_buf_from_mv(in_what, &mv);
  1415. }
  1416. fn_ptr->sdx4df(what->buf, what->stride, addrs, in_what->stride, sads);
  1417. for (i = 0; i < 4; ++i) {
  1418. if (sads[i] < best_sad) {
  1419. const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c + i };
  1420. const unsigned int sad =
  1421. sads[i] + mvsad_err_cost(x, &mv, ref_mv, sad_per_bit);
  1422. if (sad < best_sad) {
  1423. best_sad = sad;
  1424. *best_mv = mv;
  1425. }
  1426. }
  1427. }
  1428. } else {
  1429. for (i = 0; i < end_col - c; ++i) {
  1430. const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c + i };
  1431. unsigned int sad =
  1432. fn_ptr->sdf(what->buf, what->stride,
  1433. get_buf_from_mv(in_what, &mv), in_what->stride);
  1434. if (sad < best_sad) {
  1435. sad += mvsad_err_cost(x, &mv, ref_mv, sad_per_bit);
  1436. if (sad < best_sad) {
  1437. best_sad = sad;
  1438. *best_mv = mv;
  1439. }
  1440. }
  1441. }
  1442. }
  1443. }
  1444. }
  1445. }
  1446. return best_sad;
  1447. }
  1448. int vp9_diamond_search_sad_c(const MACROBLOCK *x, const search_site_config *cfg,
  1449. MV *ref_mv, MV *best_mv, int search_param,
  1450. int sad_per_bit, int *num00,
  1451. const vp9_variance_fn_ptr_t *fn_ptr,
  1452. const MV *center_mv) {
  1453. int i, j, step;
  1454. const MACROBLOCKD *const xd = &x->e_mbd;
  1455. uint8_t *what = x->plane[0].src.buf;
  1456. const int what_stride = x->plane[0].src.stride;
  1457. const uint8_t *in_what;
  1458. const int in_what_stride = xd->plane[0].pre[0].stride;
  1459. const uint8_t *best_address;
  1460. unsigned int bestsad = INT_MAX;
  1461. int best_site = -1;
  1462. int last_site = -1;
  1463. int ref_row;
  1464. int ref_col;
  1465. // search_param determines the length of the initial step and hence the number
  1466. // of iterations.
  1467. // 0 = initial step (MAX_FIRST_STEP) pel
  1468. // 1 = (MAX_FIRST_STEP/2) pel,
  1469. // 2 = (MAX_FIRST_STEP/4) pel...
  1470. // const search_site *ss = &cfg->ss[search_param * cfg->searches_per_step];
  1471. const MV *ss_mv = &cfg->ss_mv[search_param * cfg->searches_per_step];
  1472. const intptr_t *ss_os = &cfg->ss_os[search_param * cfg->searches_per_step];
  1473. const int tot_steps = cfg->total_steps - search_param;
  1474. const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
  1475. clamp_mv(ref_mv, x->mv_limits.col_min, x->mv_limits.col_max,
  1476. x->mv_limits.row_min, x->mv_limits.row_max);
  1477. ref_row = ref_mv->row;
  1478. ref_col = ref_mv->col;
  1479. *num00 = 0;
  1480. best_mv->row = ref_row;
  1481. best_mv->col = ref_col;
  1482. // Work out the start point for the search
  1483. in_what = xd->plane[0].pre[0].buf + ref_row * in_what_stride + ref_col;
  1484. best_address = in_what;
  1485. // Check the starting position
  1486. bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride) +
  1487. mvsad_err_cost(x, best_mv, &fcenter_mv, sad_per_bit);
  1488. i = 0;
  1489. for (step = 0; step < tot_steps; step++) {
  1490. int all_in = 1, t;
  1491. // All_in is true if every one of the points we are checking are within
  1492. // the bounds of the image.
  1493. all_in &= ((best_mv->row + ss_mv[i].row) > x->mv_limits.row_min);
  1494. all_in &= ((best_mv->row + ss_mv[i + 1].row) < x->mv_limits.row_max);
  1495. all_in &= ((best_mv->col + ss_mv[i + 2].col) > x->mv_limits.col_min);
  1496. all_in &= ((best_mv->col + ss_mv[i + 3].col) < x->mv_limits.col_max);
  1497. // If all the pixels are within the bounds we don't check whether the
  1498. // search point is valid in this loop, otherwise we check each point
  1499. // for validity..
  1500. if (all_in) {
  1501. unsigned int sad_array[4];
  1502. for (j = 0; j < cfg->searches_per_step; j += 4) {
  1503. unsigned char const *block_offset[4];
  1504. for (t = 0; t < 4; t++) block_offset[t] = ss_os[i + t] + best_address;
  1505. fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride,
  1506. sad_array);
  1507. for (t = 0; t < 4; t++, i++) {
  1508. if (sad_array[t] < bestsad) {
  1509. const MV this_mv = { best_mv->row + ss_mv[i].row,
  1510. best_mv->col + ss_mv[i].col };
  1511. sad_array[t] +=
  1512. mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
  1513. if (sad_array[t] < bestsad) {
  1514. bestsad = sad_array[t];
  1515. best_site = i;
  1516. }
  1517. }
  1518. }
  1519. }
  1520. } else {
  1521. for (j = 0; j < cfg->searches_per_step; j++) {
  1522. // Trap illegal vectors
  1523. const MV this_mv = { best_mv->row + ss_mv[i].row,
  1524. best_mv->col + ss_mv[i].col };
  1525. if (is_mv_in(&x->mv_limits, &this_mv)) {
  1526. const uint8_t *const check_here = ss_os[i] + best_address;
  1527. unsigned int thissad =
  1528. fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
  1529. if (thissad < bestsad) {
  1530. thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
  1531. if (thissad < bestsad) {
  1532. bestsad = thissad;
  1533. best_site = i;
  1534. }
  1535. }
  1536. }
  1537. i++;
  1538. }
  1539. }
  1540. if (best_site != last_site) {
  1541. best_mv->row += ss_mv[best_site].row;
  1542. best_mv->col += ss_mv[best_site].col;
  1543. best_address += ss_os[best_site];
  1544. last_site = best_site;
  1545. #if defined(NEW_DIAMOND_SEARCH)
  1546. while (1) {
  1547. const MV this_mv = { best_mv->row + ss_mv[best_site].row,
  1548. best_mv->col + ss_mv[best_site].col };
  1549. if (is_mv_in(&x->mv_limits, &this_mv)) {
  1550. const uint8_t *const check_here = ss_os[best_site] + best_address;
  1551. unsigned int thissad =
  1552. fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
  1553. if (thissad < bestsad) {
  1554. thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
  1555. if (thissad < bestsad) {
  1556. bestsad = thissad;
  1557. best_mv->row += ss_mv[best_site].row;
  1558. best_mv->col += ss_mv[best_site].col;
  1559. best_address += ss_os[best_site];
  1560. continue;
  1561. }
  1562. }
  1563. }
  1564. break;
  1565. }
  1566. #endif
  1567. } else if (best_address == in_what) {
  1568. (*num00)++;
  1569. }
  1570. }
  1571. return bestsad;
  1572. }
  1573. static int vector_match(int16_t *ref, int16_t *src, int bwl) {
  1574. int best_sad = INT_MAX;
  1575. int this_sad;
  1576. int d;
  1577. int center, offset = 0;
  1578. int bw = 4 << bwl; // redundant variable, to be changed in the experiments.
  1579. for (d = 0; d <= bw; d += 16) {
  1580. this_sad = vpx_vector_var(&ref[d], src, bwl);
  1581. if (this_sad < best_sad) {
  1582. best_sad = this_sad;
  1583. offset = d;
  1584. }
  1585. }
  1586. center = offset;
  1587. for (d = -8; d <= 8; d += 16) {
  1588. int this_pos = offset + d;
  1589. // check limit
  1590. if (this_pos < 0 || this_pos > bw) continue;
  1591. this_sad = vpx_vector_var(&ref[this_pos], src, bwl);
  1592. if (this_sad < best_sad) {
  1593. best_sad = this_sad;
  1594. center = this_pos;
  1595. }
  1596. }
  1597. offset = center;
  1598. for (d = -4; d <= 4; d += 8) {
  1599. int this_pos = offset + d;
  1600. // check limit
  1601. if (this_pos < 0 || this_pos > bw) continue;
  1602. this_sad = vpx_vector_var(&ref[this_pos], src, bwl);
  1603. if (this_sad < best_sad) {
  1604. best_sad = this_sad;
  1605. center = this_pos;
  1606. }
  1607. }
  1608. offset = center;
  1609. for (d = -2; d <= 2; d += 4) {
  1610. int this_pos = offset + d;
  1611. // check limit
  1612. if (this_pos < 0 || this_pos > bw) continue;
  1613. this_sad = vpx_vector_var(&ref[this_pos], src, bwl);
  1614. if (this_sad < best_sad) {
  1615. best_sad = this_sad;
  1616. center = this_pos;
  1617. }
  1618. }
  1619. offset = center;
  1620. for (d = -1; d <= 1; d += 2) {
  1621. int this_pos = offset + d;
  1622. // check limit
  1623. if (this_pos < 0 || this_pos > bw) continue;
  1624. this_sad = vpx_vector_var(&ref[this_pos], src, bwl);
  1625. if (this_sad < best_sad) {
  1626. best_sad = this_sad;
  1627. center = this_pos;
  1628. }
  1629. }
  1630. return (center - (bw >> 1));
  1631. }
  1632. static const MV search_pos[4] = {
  1633. { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 },
  1634. };
  1635. unsigned int vp9_int_pro_motion_estimation(const VP9_COMP *cpi, MACROBLOCK *x,
  1636. BLOCK_SIZE bsize, int mi_row,
  1637. int mi_col) {
  1638. MACROBLOCKD *xd = &x->e_mbd;
  1639. MODE_INFO *mi = xd->mi[0];
  1640. struct buf_2d backup_yv12[MAX_MB_PLANE] = { { 0, 0 } };
  1641. DECLARE_ALIGNED(16, int16_t, hbuf[128]);
  1642. DECLARE_ALIGNED(16, int16_t, vbuf[128]);
  1643. DECLARE_ALIGNED(16, int16_t, src_hbuf[64]);
  1644. DECLARE_ALIGNED(16, int16_t, src_vbuf[64]);
  1645. int idx;
  1646. const int bw = 4 << b_width_log2_lookup[bsize];
  1647. const int bh = 4 << b_height_log2_lookup[bsize];
  1648. const int search_width = bw << 1;
  1649. const int search_height = bh << 1;
  1650. const int src_stride = x->plane[0].src.stride;
  1651. const int ref_stride = xd->plane[0].pre[0].stride;
  1652. uint8_t const *ref_buf, *src_buf;
  1653. MV *tmp_mv = &xd->mi[0]->mv[0].as_mv;
  1654. unsigned int best_sad, tmp_sad, this_sad[4];
  1655. MV this_mv;
  1656. const int norm_factor = 3 + (bw >> 5);
  1657. const YV12_BUFFER_CONFIG *scaled_ref_frame =
  1658. vp9_get_scaled_ref_frame(cpi, mi->ref_frame[0]);
  1659. if (scaled_ref_frame) {
  1660. int i;
  1661. // Swap out the reference frame for a version that's been scaled to
  1662. // match the resolution of the current frame, allowing the existing
  1663. // motion search code to be used without additional modifications.
  1664. for (i = 0; i < MAX_MB_PLANE; i++) backup_yv12[i] = xd->plane[i].pre[0];
  1665. vp9_setup_pre_planes(xd, 0, scaled_ref_frame, mi_row, mi_col, NULL);
  1666. }
  1667. #if CONFIG_VP9_HIGHBITDEPTH
  1668. {
  1669. unsigned int this_sad;
  1670. tmp_mv->row = 0;
  1671. tmp_mv->col = 0;
  1672. this_sad = cpi->fn_ptr[bsize].sdf(x->plane[0].src.buf, src_stride,
  1673. xd->plane[0].pre[0].buf, ref_stride);
  1674. if (scaled_ref_frame) {
  1675. int i;
  1676. for (i = 0; i < MAX_MB_PLANE; i++) xd->plane[i].pre[0] = backup_yv12[i];
  1677. }
  1678. return this_sad;
  1679. }
  1680. #endif
  1681. // Set up prediction 1-D reference set
  1682. ref_buf = xd->plane[0].pre[0].buf - (bw >> 1);
  1683. for (idx = 0; idx < search_width; idx += 16) {
  1684. vpx_int_pro_row(&hbuf[idx], ref_buf, ref_stride, bh);
  1685. ref_buf += 16;
  1686. }
  1687. ref_buf = xd->plane[0].pre[0].buf - (bh >> 1) * ref_stride;
  1688. for (idx = 0; idx < search_height; ++idx) {
  1689. vbuf[idx] = vpx_int_pro_col(ref_buf, bw) >> norm_factor;
  1690. ref_buf += ref_stride;
  1691. }
  1692. // Set up src 1-D reference set
  1693. for (idx = 0; idx < bw; idx += 16) {
  1694. src_buf = x->plane[0].src.buf + idx;
  1695. vpx_int_pro_row(&src_hbuf[idx], src_buf, src_stride, bh);
  1696. }
  1697. src_buf = x->plane[0].src.buf;
  1698. for (idx = 0; idx < bh; ++idx) {
  1699. src_vbuf[idx] = vpx_int_pro_col(src_buf, bw) >> norm_factor;
  1700. src_buf += src_stride;
  1701. }
  1702. // Find the best match per 1-D search
  1703. tmp_mv->col = vector_match(hbuf, src_hbuf, b_width_log2_lookup[bsize]);
  1704. tmp_mv->row = vector_match(vbuf, src_vbuf, b_height_log2_lookup[bsize]);
  1705. this_mv = *tmp_mv;
  1706. src_buf = x->plane[0].src.buf;
  1707. ref_buf = xd->plane[0].pre[0].buf + this_mv.row * ref_stride + this_mv.col;
  1708. best_sad = cpi->fn_ptr[bsize].sdf(src_buf, src_stride, ref_buf, ref_stride);
  1709. {
  1710. const uint8_t *const pos[4] = {
  1711. ref_buf - ref_stride, ref_buf - 1, ref_buf + 1, ref_buf + ref_stride,
  1712. };
  1713. cpi->fn_ptr[bsize].sdx4df(src_buf, src_stride, pos, ref_stride, this_sad);
  1714. }
  1715. for (idx = 0; idx < 4; ++idx) {
  1716. if (this_sad[idx] < best_sad) {
  1717. best_sad = this_sad[idx];
  1718. tmp_mv->row = search_pos[idx].row + this_mv.row;
  1719. tmp_mv->col = search_pos[idx].col + this_mv.col;
  1720. }
  1721. }
  1722. if (this_sad[0] < this_sad[3])
  1723. this_mv.row -= 1;
  1724. else
  1725. this_mv.row += 1;
  1726. if (this_sad[1] < this_sad[2])
  1727. this_mv.col -= 1;
  1728. else
  1729. this_mv.col += 1;
  1730. ref_buf = xd->plane[0].pre[0].buf + this_mv.row * ref_stride + this_mv.col;
  1731. tmp_sad = cpi->fn_ptr[bsize].sdf(src_buf, src_stride, ref_buf, ref_stride);
  1732. if (best_sad > tmp_sad) {
  1733. *tmp_mv = this_mv;
  1734. best_sad = tmp_sad;
  1735. }
  1736. tmp_mv->row *= 8;
  1737. tmp_mv->col *= 8;
  1738. if (scaled_ref_frame) {
  1739. int i;
  1740. for (i = 0; i < MAX_MB_PLANE; i++) xd->plane[i].pre[0] = backup_yv12[i];
  1741. }
  1742. return best_sad;
  1743. }
  1744. // Runs sequence of diamond searches in smaller steps for RD.
  1745. /* do_refine: If last step (1-away) of n-step search doesn't pick the center
  1746. point as the best match, we will do a final 1-away diamond
  1747. refining search */
  1748. static int full_pixel_diamond(const VP9_COMP *cpi, MACROBLOCK *x, MV *mvp_full,
  1749. int step_param, int sadpb, int further_steps,
  1750. int do_refine, int *cost_list,
  1751. const vp9_variance_fn_ptr_t *fn_ptr,
  1752. const MV *ref_mv, MV *dst_mv) {
  1753. MV temp_mv;
  1754. int thissme, n, num00 = 0;
  1755. int bestsme = cpi->diamond_search_sad(x, &cpi->ss_cfg, mvp_full, &temp_mv,
  1756. step_param, sadpb, &n, fn_ptr, ref_mv);
  1757. if (bestsme < INT_MAX)
  1758. bestsme = vp9_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1);
  1759. *dst_mv = temp_mv;
  1760. // If there won't be more n-step search, check to see if refining search is
  1761. // needed.
  1762. if (n > further_steps) do_refine = 0;
  1763. while (n < further_steps) {
  1764. ++n;
  1765. if (num00) {
  1766. num00--;
  1767. } else {
  1768. thissme = cpi->diamond_search_sad(x, &cpi->ss_cfg, mvp_full, &temp_mv,
  1769. step_param + n, sadpb, &num00, fn_ptr,
  1770. ref_mv);
  1771. if (thissme < INT_MAX)
  1772. thissme = vp9_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1);
  1773. // check to see if refining search is needed.
  1774. if (num00 > further_steps - n) do_refine = 0;
  1775. if (thissme < bestsme) {
  1776. bestsme = thissme;
  1777. *dst_mv = temp_mv;
  1778. }
  1779. }
  1780. }
  1781. // final 1-away diamond refining search
  1782. if (do_refine) {
  1783. const int search_range = 8;
  1784. MV best_mv = *dst_mv;
  1785. thissme = vp9_refining_search_sad(x, &best_mv, sadpb, search_range, fn_ptr,
  1786. ref_mv);
  1787. if (thissme < INT_MAX)
  1788. thissme = vp9_get_mvpred_var(x, &best_mv, ref_mv, fn_ptr, 1);
  1789. if (thissme < bestsme) {
  1790. bestsme = thissme;
  1791. *dst_mv = best_mv;
  1792. }
  1793. }
  1794. // Return cost list.
  1795. if (cost_list) {
  1796. calc_int_cost_list(x, ref_mv, sadpb, fn_ptr, dst_mv, cost_list);
  1797. }
  1798. return bestsme;
  1799. }
  1800. #define MIN_RANGE 7
  1801. #define MAX_RANGE 256
  1802. #define MIN_INTERVAL 1
  1803. // Runs an limited range exhaustive mesh search using a pattern set
  1804. // according to the encode speed profile.
  1805. static int full_pixel_exhaustive(VP9_COMP *cpi, MACROBLOCK *x,
  1806. MV *centre_mv_full, int sadpb, int *cost_list,
  1807. const vp9_variance_fn_ptr_t *fn_ptr,
  1808. const MV *ref_mv, MV *dst_mv) {
  1809. const SPEED_FEATURES *const sf = &cpi->sf;
  1810. MV temp_mv = { centre_mv_full->row, centre_mv_full->col };
  1811. MV f_ref_mv = { ref_mv->row >> 3, ref_mv->col >> 3 };
  1812. int bestsme;
  1813. int i;
  1814. int interval = sf->mesh_patterns[0].interval;
  1815. int range = sf->mesh_patterns[0].range;
  1816. int baseline_interval_divisor;
  1817. // Keep track of number of exhaustive calls (this frame in this thread).
  1818. ++(*x->ex_search_count_ptr);
  1819. // Trap illegal values for interval and range for this function.
  1820. if ((range < MIN_RANGE) || (range > MAX_RANGE) || (interval < MIN_INTERVAL) ||
  1821. (interval > range))
  1822. return INT_MAX;
  1823. baseline_interval_divisor = range / interval;
  1824. // Check size of proposed first range against magnitude of the centre
  1825. // value used as a starting point.
  1826. range = VPXMAX(range, (5 * VPXMAX(abs(temp_mv.row), abs(temp_mv.col))) / 4);
  1827. range = VPXMIN(range, MAX_RANGE);
  1828. interval = VPXMAX(interval, range / baseline_interval_divisor);
  1829. // initial search
  1830. bestsme = exhuastive_mesh_search(x, &f_ref_mv, &temp_mv, range, interval,
  1831. sadpb, fn_ptr, &temp_mv);
  1832. if ((interval > MIN_INTERVAL) && (range > MIN_RANGE)) {
  1833. // Progressive searches with range and step size decreasing each time
  1834. // till we reach a step size of 1. Then break out.
  1835. for (i = 1; i < MAX_MESH_STEP; ++i) {
  1836. // First pass with coarser step and longer range
  1837. bestsme = exhuastive_mesh_search(
  1838. x, &f_ref_mv, &temp_mv, sf->mesh_patterns[i].range,
  1839. sf->mesh_patterns[i].interval, sadpb, fn_ptr, &temp_mv);
  1840. if (sf->mesh_patterns[i].interval == 1) break;
  1841. }
  1842. }
  1843. if (bestsme < INT_MAX)
  1844. bestsme = vp9_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1);
  1845. *dst_mv = temp_mv;
  1846. // Return cost list.
  1847. if (cost_list) {
  1848. calc_int_cost_list(x, ref_mv, sadpb, fn_ptr, dst_mv, cost_list);
  1849. }
  1850. return bestsme;
  1851. }
  1852. int vp9_full_search_sad_c(const MACROBLOCK *x, const MV *ref_mv,
  1853. int sad_per_bit, int distance,
  1854. const vp9_variance_fn_ptr_t *fn_ptr,
  1855. const MV *center_mv, MV *best_mv) {
  1856. int r, c;
  1857. const MACROBLOCKD *const xd = &x->e_mbd;
  1858. const struct buf_2d *const what = &x->plane[0].src;
  1859. const struct buf_2d *const in_what = &xd->plane[0].pre[0];
  1860. const int row_min = VPXMAX(ref_mv->row - distance, x->mv_limits.row_min);
  1861. const int row_max = VPXMIN(ref_mv->row + distance, x->mv_limits.row_max);
  1862. const int col_min = VPXMAX(ref_mv->col - distance, x->mv_limits.col_min);
  1863. const int col_max = VPXMIN(ref_mv->col + distance, x->mv_limits.col_max);
  1864. const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
  1865. int best_sad =
  1866. fn_ptr->sdf(what->buf, what->stride, get_buf_from_mv(in_what, ref_mv),
  1867. in_what->stride) +
  1868. mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit);
  1869. *best_mv = *ref_mv;
  1870. for (r = row_min; r < row_max; ++r) {
  1871. for (c = col_min; c < col_max; ++c) {
  1872. const MV mv = { r, c };
  1873. const int sad =
  1874. fn_ptr->sdf(what->buf, what->stride, get_buf_from_mv(in_what, &mv),
  1875. in_what->stride) +
  1876. mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
  1877. if (sad < best_sad) {
  1878. best_sad = sad;
  1879. *best_mv = mv;
  1880. }
  1881. }
  1882. }
  1883. return best_sad;
  1884. }
  1885. int vp9_full_search_sadx3(const MACROBLOCK *x, const MV *ref_mv,
  1886. int sad_per_bit, int distance,
  1887. const vp9_variance_fn_ptr_t *fn_ptr,
  1888. const MV *center_mv, MV *best_mv) {
  1889. int r;
  1890. const MACROBLOCKD *const xd = &x->e_mbd;
  1891. const struct buf_2d *const what = &x->plane[0].src;
  1892. const struct buf_2d *const in_what = &xd->plane[0].pre[0];
  1893. const int row_min = VPXMAX(ref_mv->row - distance, x->mv_limits.row_min);
  1894. const int row_max = VPXMIN(ref_mv->row + distance, x->mv_limits.row_max);
  1895. const int col_min = VPXMAX(ref_mv->col - distance, x->mv_limits.col_min);
  1896. const int col_max = VPXMIN(ref_mv->col + distance, x->mv_limits.col_max);
  1897. const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
  1898. unsigned int best_sad =
  1899. fn_ptr->sdf(what->buf, what->stride, get_buf_from_mv(in_what, ref_mv),
  1900. in_what->stride) +
  1901. mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit);
  1902. *best_mv = *ref_mv;
  1903. for (r = row_min; r < row_max; ++r) {
  1904. int c = col_min;
  1905. const uint8_t *check_here = &in_what->buf[r * in_what->stride + c];
  1906. if (fn_ptr->sdx3f != NULL) {
  1907. while ((c + 2) < col_max) {
  1908. int i;
  1909. DECLARE_ALIGNED(16, uint32_t, sads[3]);
  1910. fn_ptr->sdx3f(what->buf, what->stride, check_here, in_what->stride,
  1911. sads);
  1912. for (i = 0; i < 3; ++i) {
  1913. unsigned int sad = sads[i];
  1914. if (sad < best_sad) {
  1915. const MV mv = { r, c };
  1916. sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
  1917. if (sad < best_sad) {
  1918. best_sad = sad;
  1919. *best_mv = mv;
  1920. }
  1921. }
  1922. ++check_here;
  1923. ++c;
  1924. }
  1925. }
  1926. }
  1927. while (c < col_max) {
  1928. unsigned int sad =
  1929. fn_ptr->sdf(what->buf, what->stride, check_here, in_what->stride);
  1930. if (sad < best_sad) {
  1931. const MV mv = { r, c };
  1932. sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
  1933. if (sad < best_sad) {
  1934. best_sad = sad;
  1935. *best_mv = mv;
  1936. }
  1937. }
  1938. ++check_here;
  1939. ++c;
  1940. }
  1941. }
  1942. return best_sad;
  1943. }
  1944. int vp9_full_search_sadx8(const MACROBLOCK *x, const MV *ref_mv,
  1945. int sad_per_bit, int distance,
  1946. const vp9_variance_fn_ptr_t *fn_ptr,
  1947. const MV *center_mv, MV *best_mv) {
  1948. int r;
  1949. const MACROBLOCKD *const xd = &x->e_mbd;
  1950. const struct buf_2d *const what = &x->plane[0].src;
  1951. const struct buf_2d *const in_what = &xd->plane[0].pre[0];
  1952. const int row_min = VPXMAX(ref_mv->row - distance, x->mv_limits.row_min);
  1953. const int row_max = VPXMIN(ref_mv->row + distance, x->mv_limits.row_max);
  1954. const int col_min = VPXMAX(ref_mv->col - distance, x->mv_limits.col_min);
  1955. const int col_max = VPXMIN(ref_mv->col + distance, x->mv_limits.col_max);
  1956. const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
  1957. unsigned int best_sad =
  1958. fn_ptr->sdf(what->buf, what->stride, get_buf_from_mv(in_what, ref_mv),
  1959. in_what->stride) +
  1960. mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit);
  1961. *best_mv = *ref_mv;
  1962. for (r = row_min; r < row_max; ++r) {
  1963. int c = col_min;
  1964. const uint8_t *check_here = &in_what->buf[r * in_what->stride + c];
  1965. if (fn_ptr->sdx8f != NULL) {
  1966. while ((c + 7) < col_max) {
  1967. int i;
  1968. DECLARE_ALIGNED(16, uint32_t, sads[8]);
  1969. fn_ptr->sdx8f(what->buf, what->stride, check_here, in_what->stride,
  1970. sads);
  1971. for (i = 0; i < 8; ++i) {
  1972. unsigned int sad = sads[i];
  1973. if (sad < best_sad) {
  1974. const MV mv = { r, c };
  1975. sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
  1976. if (sad < best_sad) {
  1977. best_sad = sad;
  1978. *best_mv = mv;
  1979. }
  1980. }
  1981. ++check_here;
  1982. ++c;
  1983. }
  1984. }
  1985. }
  1986. if (fn_ptr->sdx3f != NULL) {
  1987. while ((c + 2) < col_max) {
  1988. int i;
  1989. DECLARE_ALIGNED(16, uint32_t, sads[3]);
  1990. fn_ptr->sdx3f(what->buf, what->stride, check_here, in_what->stride,
  1991. sads);
  1992. for (i = 0; i < 3; ++i) {
  1993. unsigned int sad = sads[i];
  1994. if (sad < best_sad) {
  1995. const MV mv = { r, c };
  1996. sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
  1997. if (sad < best_sad) {
  1998. best_sad = sad;
  1999. *best_mv = mv;
  2000. }
  2001. }
  2002. ++check_here;
  2003. ++c;
  2004. }
  2005. }
  2006. }
  2007. while (c < col_max) {
  2008. unsigned int sad =
  2009. fn_ptr->sdf(what->buf, what->stride, check_here, in_what->stride);
  2010. if (sad < best_sad) {
  2011. const MV mv = { r, c };
  2012. sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
  2013. if (sad < best_sad) {
  2014. best_sad = sad;
  2015. *best_mv = mv;
  2016. }
  2017. }
  2018. ++check_here;
  2019. ++c;
  2020. }
  2021. }
  2022. return best_sad;
  2023. }
  2024. int vp9_refining_search_sad(const MACROBLOCK *x, MV *ref_mv, int error_per_bit,
  2025. int search_range,
  2026. const vp9_variance_fn_ptr_t *fn_ptr,
  2027. const MV *center_mv) {
  2028. const MACROBLOCKD *const xd = &x->e_mbd;
  2029. const MV neighbors[4] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 } };
  2030. const struct buf_2d *const what = &x->plane[0].src;
  2031. const struct buf_2d *const in_what = &xd->plane[0].pre[0];
  2032. const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
  2033. const uint8_t *best_address = get_buf_from_mv(in_what, ref_mv);
  2034. unsigned int best_sad =
  2035. fn_ptr->sdf(what->buf, what->stride, best_address, in_what->stride) +
  2036. mvsad_err_cost(x, ref_mv, &fcenter_mv, error_per_bit);
  2037. int i, j;
  2038. for (i = 0; i < search_range; i++) {
  2039. int best_site = -1;
  2040. const int all_in = ((ref_mv->row - 1) > x->mv_limits.row_min) &
  2041. ((ref_mv->row + 1) < x->mv_limits.row_max) &
  2042. ((ref_mv->col - 1) > x->mv_limits.col_min) &
  2043. ((ref_mv->col + 1) < x->mv_limits.col_max);
  2044. if (all_in) {
  2045. unsigned int sads[4];
  2046. const uint8_t *const positions[4] = { best_address - in_what->stride,
  2047. best_address - 1, best_address + 1,
  2048. best_address + in_what->stride };
  2049. fn_ptr->sdx4df(what->buf, what->stride, positions, in_what->stride, sads);
  2050. for (j = 0; j < 4; ++j) {
  2051. if (sads[j] < best_sad) {
  2052. const MV mv = { ref_mv->row + neighbors[j].row,
  2053. ref_mv->col + neighbors[j].col };
  2054. sads[j] += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit);
  2055. if (sads[j] < best_sad) {
  2056. best_sad = sads[j];
  2057. best_site = j;
  2058. }
  2059. }
  2060. }
  2061. } else {
  2062. for (j = 0; j < 4; ++j) {
  2063. const MV mv = { ref_mv->row + neighbors[j].row,
  2064. ref_mv->col + neighbors[j].col };
  2065. if (is_mv_in(&x->mv_limits, &mv)) {
  2066. unsigned int sad =
  2067. fn_ptr->sdf(what->buf, what->stride,
  2068. get_buf_from_mv(in_what, &mv), in_what->stride);
  2069. if (sad < best_sad) {
  2070. sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit);
  2071. if (sad < best_sad) {
  2072. best_sad = sad;
  2073. best_site = j;
  2074. }
  2075. }
  2076. }
  2077. }
  2078. }
  2079. if (best_site == -1) {
  2080. break;
  2081. } else {
  2082. ref_mv->row += neighbors[best_site].row;
  2083. ref_mv->col += neighbors[best_site].col;
  2084. best_address = get_buf_from_mv(in_what, ref_mv);
  2085. }
  2086. }
  2087. return best_sad;
  2088. }
  2089. // This function is called when we do joint motion search in comp_inter_inter
  2090. // mode.
  2091. int vp9_refining_search_8p_c(const MACROBLOCK *x, MV *ref_mv, int error_per_bit,
  2092. int search_range,
  2093. const vp9_variance_fn_ptr_t *fn_ptr,
  2094. const MV *center_mv, const uint8_t *second_pred) {
  2095. const MV neighbors[8] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 },
  2096. { -1, -1 }, { 1, -1 }, { -1, 1 }, { 1, 1 } };
  2097. const MACROBLOCKD *const xd = &x->e_mbd;
  2098. const struct buf_2d *const what = &x->plane[0].src;
  2099. const struct buf_2d *const in_what = &xd->plane[0].pre[0];
  2100. const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
  2101. unsigned int best_sad =
  2102. fn_ptr->sdaf(what->buf, what->stride, get_buf_from_mv(in_what, ref_mv),
  2103. in_what->stride, second_pred) +
  2104. mvsad_err_cost(x, ref_mv, &fcenter_mv, error_per_bit);
  2105. int i, j;
  2106. for (i = 0; i < search_range; ++i) {
  2107. int best_site = -1;
  2108. for (j = 0; j < 8; ++j) {
  2109. const MV mv = { ref_mv->row + neighbors[j].row,
  2110. ref_mv->col + neighbors[j].col };
  2111. if (is_mv_in(&x->mv_limits, &mv)) {
  2112. unsigned int sad =
  2113. fn_ptr->sdaf(what->buf, what->stride, get_buf_from_mv(in_what, &mv),
  2114. in_what->stride, second_pred);
  2115. if (sad < best_sad) {
  2116. sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit);
  2117. if (sad < best_sad) {
  2118. best_sad = sad;
  2119. best_site = j;
  2120. }
  2121. }
  2122. }
  2123. }
  2124. if (best_site == -1) {
  2125. break;
  2126. } else {
  2127. ref_mv->row += neighbors[best_site].row;
  2128. ref_mv->col += neighbors[best_site].col;
  2129. }
  2130. }
  2131. return best_sad;
  2132. }
  2133. #define MIN_EX_SEARCH_LIMIT 128
  2134. static int is_exhaustive_allowed(VP9_COMP *cpi, MACROBLOCK *x) {
  2135. const SPEED_FEATURES *const sf = &cpi->sf;
  2136. const int max_ex =
  2137. VPXMAX(MIN_EX_SEARCH_LIMIT,
  2138. (*x->m_search_count_ptr * sf->max_exaustive_pct) / 100);
  2139. return sf->allow_exhaustive_searches &&
  2140. (sf->exhaustive_searches_thresh < INT_MAX) &&
  2141. (*x->ex_search_count_ptr <= max_ex) && !cpi->rc.is_src_frame_alt_ref;
  2142. }
  2143. int vp9_full_pixel_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
  2144. MV *mvp_full, int step_param, int error_per_bit,
  2145. int *cost_list, const MV *ref_mv, MV *tmp_mv,
  2146. int var_max, int rd) {
  2147. const SPEED_FEATURES *const sf = &cpi->sf;
  2148. const SEARCH_METHODS method = sf->mv.search_method;
  2149. vp9_variance_fn_ptr_t *fn_ptr = &cpi->fn_ptr[bsize];
  2150. int var = 0;
  2151. if (cost_list) {
  2152. cost_list[0] = INT_MAX;
  2153. cost_list[1] = INT_MAX;
  2154. cost_list[2] = INT_MAX;
  2155. cost_list[3] = INT_MAX;
  2156. cost_list[4] = INT_MAX;
  2157. }
  2158. // Keep track of number of searches (this frame in this thread).
  2159. ++(*x->m_search_count_ptr);
  2160. switch (method) {
  2161. case FAST_DIAMOND:
  2162. var = fast_dia_search(x, mvp_full, step_param, error_per_bit, 0,
  2163. cost_list, fn_ptr, 1, ref_mv, tmp_mv);
  2164. break;
  2165. case FAST_HEX:
  2166. var = fast_hex_search(x, mvp_full, step_param, error_per_bit, 0,
  2167. cost_list, fn_ptr, 1, ref_mv, tmp_mv);
  2168. break;
  2169. case HEX:
  2170. var = hex_search(x, mvp_full, step_param, error_per_bit, 1, cost_list,
  2171. fn_ptr, 1, ref_mv, tmp_mv);
  2172. break;
  2173. case SQUARE:
  2174. var = square_search(x, mvp_full, step_param, error_per_bit, 1, cost_list,
  2175. fn_ptr, 1, ref_mv, tmp_mv);
  2176. break;
  2177. case BIGDIA:
  2178. var = bigdia_search(x, mvp_full, step_param, error_per_bit, 1, cost_list,
  2179. fn_ptr, 1, ref_mv, tmp_mv);
  2180. break;
  2181. case NSTEP:
  2182. var = full_pixel_diamond(cpi, x, mvp_full, step_param, error_per_bit,
  2183. MAX_MVSEARCH_STEPS - 1 - step_param, 1,
  2184. cost_list, fn_ptr, ref_mv, tmp_mv);
  2185. // Should we allow a follow on exhaustive search?
  2186. if (is_exhaustive_allowed(cpi, x)) {
  2187. int64_t exhuastive_thr = sf->exhaustive_searches_thresh;
  2188. exhuastive_thr >>=
  2189. 8 - (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize]);
  2190. // Threshold variance for an exhaustive full search.
  2191. if (var > exhuastive_thr) {
  2192. int var_ex;
  2193. MV tmp_mv_ex;
  2194. var_ex = full_pixel_exhaustive(cpi, x, tmp_mv, error_per_bit,
  2195. cost_list, fn_ptr, ref_mv, &tmp_mv_ex);
  2196. if (var_ex < var) {
  2197. var = var_ex;
  2198. *tmp_mv = tmp_mv_ex;
  2199. }
  2200. }
  2201. }
  2202. break;
  2203. default: assert(0 && "Invalid search method.");
  2204. }
  2205. if (method != NSTEP && rd && var < var_max)
  2206. var = vp9_get_mvpred_var(x, tmp_mv, ref_mv, fn_ptr, 1);
  2207. return var;
  2208. }