vp9_mcomp.c 123 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143
  1. /*
  2. * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  3. *
  4. * Use of this source code is governed by a BSD-style license
  5. * that can be found in the LICENSE file in the root of the source
  6. * tree. An additional intellectual property rights grant can be found
  7. * in the file PATENTS. All contributing project authors may
  8. * be found in the AUTHORS file in the root of the source tree.
  9. */
  10. #include <assert.h>
  11. #include <limits.h>
  12. #include <math.h>
  13. #include <stdio.h>
  14. #include "./vpx_config.h"
  15. #include "./vpx_dsp_rtcd.h"
  16. #include "vpx_dsp/vpx_dsp_common.h"
  17. #include "vpx_mem/vpx_mem.h"
  18. #include "vpx_ports/mem.h"
  19. #include "vp9/common/vp9_common.h"
  20. #include "vp9/common/vp9_mvref_common.h"
  21. #include "vp9/common/vp9_reconinter.h"
  22. #include "vp9/encoder/vp9_encoder.h"
  23. #include "vp9/encoder/vp9_mcomp.h"
  24. // #define NEW_DIAMOND_SEARCH
  25. void vp9_set_mv_search_range(MvLimits *mv_limits, const MV *mv) {
  26. int col_min = (mv->col >> 3) - MAX_FULL_PEL_VAL + (mv->col & 7 ? 1 : 0);
  27. int row_min = (mv->row >> 3) - MAX_FULL_PEL_VAL + (mv->row & 7 ? 1 : 0);
  28. int col_max = (mv->col >> 3) + MAX_FULL_PEL_VAL;
  29. int row_max = (mv->row >> 3) + MAX_FULL_PEL_VAL;
  30. col_min = VPXMAX(col_min, (MV_LOW >> 3) + 1);
  31. row_min = VPXMAX(row_min, (MV_LOW >> 3) + 1);
  32. col_max = VPXMIN(col_max, (MV_UPP >> 3) - 1);
  33. row_max = VPXMIN(row_max, (MV_UPP >> 3) - 1);
  34. // Get intersection of UMV window and valid MV window to reduce # of checks
  35. // in diamond search.
  36. if (mv_limits->col_min < col_min) mv_limits->col_min = col_min;
  37. if (mv_limits->col_max > col_max) mv_limits->col_max = col_max;
  38. if (mv_limits->row_min < row_min) mv_limits->row_min = row_min;
  39. if (mv_limits->row_max > row_max) mv_limits->row_max = row_max;
  40. }
  41. void vp9_set_subpel_mv_search_range(MvLimits *subpel_mv_limits,
  42. const MvLimits *umv_window_limits,
  43. const MV *ref_mv) {
  44. subpel_mv_limits->col_min = VPXMAX(umv_window_limits->col_min * 8,
  45. ref_mv->col - MAX_FULL_PEL_VAL * 8);
  46. subpel_mv_limits->col_max = VPXMIN(umv_window_limits->col_max * 8,
  47. ref_mv->col + MAX_FULL_PEL_VAL * 8);
  48. subpel_mv_limits->row_min = VPXMAX(umv_window_limits->row_min * 8,
  49. ref_mv->row - MAX_FULL_PEL_VAL * 8);
  50. subpel_mv_limits->row_max = VPXMIN(umv_window_limits->row_max * 8,
  51. ref_mv->row + MAX_FULL_PEL_VAL * 8);
  52. subpel_mv_limits->col_min = VPXMAX(MV_LOW + 1, subpel_mv_limits->col_min);
  53. subpel_mv_limits->col_max = VPXMIN(MV_UPP - 1, subpel_mv_limits->col_max);
  54. subpel_mv_limits->row_min = VPXMAX(MV_LOW + 1, subpel_mv_limits->row_min);
  55. subpel_mv_limits->row_max = VPXMIN(MV_UPP - 1, subpel_mv_limits->row_max);
  56. }
  57. int vp9_init_search_range(int size) {
  58. int sr = 0;
  59. // Minimum search size no matter what the passed in value.
  60. size = VPXMAX(16, size);
  61. while ((size << sr) < MAX_FULL_PEL_VAL) sr++;
  62. sr = VPXMIN(sr, MAX_MVSEARCH_STEPS - 2);
  63. return sr;
  64. }
  65. static INLINE int mv_cost(const MV *mv, const int *joint_cost,
  66. int *const comp_cost[2]) {
  67. assert(mv->row >= -MV_MAX && mv->row < MV_MAX);
  68. assert(mv->col >= -MV_MAX && mv->col < MV_MAX);
  69. return joint_cost[vp9_get_mv_joint(mv)] + comp_cost[0][mv->row] +
  70. comp_cost[1][mv->col];
  71. }
  72. int vp9_mv_bit_cost(const MV *mv, const MV *ref, const int *mvjcost,
  73. int *mvcost[2], int weight) {
  74. const MV diff = { mv->row - ref->row, mv->col - ref->col };
  75. return ROUND_POWER_OF_TWO(mv_cost(&diff, mvjcost, mvcost) * weight, 7);
  76. }
  77. #define PIXEL_TRANSFORM_ERROR_SCALE 4
  78. static int mv_err_cost(const MV *mv, const MV *ref, const int *mvjcost,
  79. int *mvcost[2], int error_per_bit) {
  80. if (mvcost) {
  81. const MV diff = { mv->row - ref->row, mv->col - ref->col };
  82. return (int)ROUND64_POWER_OF_TWO(
  83. (int64_t)mv_cost(&diff, mvjcost, mvcost) * error_per_bit,
  84. RDDIV_BITS + VP9_PROB_COST_SHIFT - RD_EPB_SHIFT +
  85. PIXEL_TRANSFORM_ERROR_SCALE);
  86. }
  87. return 0;
  88. }
  89. static int mvsad_err_cost(const MACROBLOCK *x, const MV *mv, const MV *ref,
  90. int sad_per_bit) {
  91. const MV diff = { mv->row - ref->row, mv->col - ref->col };
  92. return ROUND_POWER_OF_TWO(
  93. (unsigned)mv_cost(&diff, x->nmvjointsadcost, x->nmvsadcost) * sad_per_bit,
  94. VP9_PROB_COST_SHIFT);
  95. }
  96. void vp9_init_dsmotion_compensation(search_site_config *cfg, int stride) {
  97. int len;
  98. int ss_count = 0;
  99. for (len = MAX_FIRST_STEP; len > 0; len /= 2) {
  100. // Generate offsets for 4 search sites per step.
  101. const MV ss_mvs[] = { { -len, 0 }, { len, 0 }, { 0, -len }, { 0, len } };
  102. int i;
  103. for (i = 0; i < 4; ++i, ++ss_count) {
  104. cfg->ss_mv[ss_count] = ss_mvs[i];
  105. cfg->ss_os[ss_count] = ss_mvs[i].row * stride + ss_mvs[i].col;
  106. }
  107. }
  108. cfg->searches_per_step = 4;
  109. cfg->total_steps = ss_count / cfg->searches_per_step;
  110. }
  111. void vp9_init3smotion_compensation(search_site_config *cfg, int stride) {
  112. int len;
  113. int ss_count = 0;
  114. for (len = MAX_FIRST_STEP; len > 0; len /= 2) {
  115. // Generate offsets for 8 search sites per step.
  116. const MV ss_mvs[8] = { { -len, 0 }, { len, 0 }, { 0, -len },
  117. { 0, len }, { -len, -len }, { -len, len },
  118. { len, -len }, { len, len } };
  119. int i;
  120. for (i = 0; i < 8; ++i, ++ss_count) {
  121. cfg->ss_mv[ss_count] = ss_mvs[i];
  122. cfg->ss_os[ss_count] = ss_mvs[i].row * stride + ss_mvs[i].col;
  123. }
  124. }
  125. cfg->searches_per_step = 8;
  126. cfg->total_steps = ss_count / cfg->searches_per_step;
  127. }
  128. // convert motion vector component to offset for sv[a]f calc
  129. static INLINE int sp(int x) { return x & 7; }
  130. static INLINE const uint8_t *pre(const uint8_t *buf, int stride, int r, int c) {
  131. return &buf[(r >> 3) * stride + (c >> 3)];
  132. }
  133. #if CONFIG_VP9_HIGHBITDEPTH
  134. /* checks if (r, c) has better score than previous best */
  135. #define CHECK_BETTER(v, r, c) \
  136. if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \
  137. int64_t tmpmse; \
  138. const MV mv = { r, c }; \
  139. const MV ref_mv = { rr, rc }; \
  140. if (second_pred == NULL) { \
  141. thismse = vfp->svf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z, \
  142. src_stride, &sse); \
  143. } else { \
  144. thismse = vfp->svaf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z, \
  145. src_stride, &sse, second_pred); \
  146. } \
  147. tmpmse = thismse; \
  148. tmpmse += mv_err_cost(&mv, &ref_mv, mvjcost, mvcost, error_per_bit); \
  149. if (tmpmse >= INT_MAX) { \
  150. v = INT_MAX; \
  151. } else if ((v = (uint32_t)tmpmse) < besterr) { \
  152. besterr = v; \
  153. br = r; \
  154. bc = c; \
  155. *distortion = thismse; \
  156. *sse1 = sse; \
  157. } \
  158. } else { \
  159. v = INT_MAX; \
  160. }
  161. #else
  162. /* checks if (r, c) has better score than previous best */
  163. #define CHECK_BETTER(v, r, c) \
  164. if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \
  165. const MV mv = { r, c }; \
  166. const MV ref_mv = { rr, rc }; \
  167. if (second_pred == NULL) \
  168. thismse = vfp->svf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z, \
  169. src_stride, &sse); \
  170. else \
  171. thismse = vfp->svaf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z, \
  172. src_stride, &sse, second_pred); \
  173. if ((v = mv_err_cost(&mv, &ref_mv, mvjcost, mvcost, error_per_bit) + \
  174. thismse) < besterr) { \
  175. besterr = v; \
  176. br = r; \
  177. bc = c; \
  178. *distortion = thismse; \
  179. *sse1 = sse; \
  180. } \
  181. } else { \
  182. v = INT_MAX; \
  183. }
  184. #endif
  185. #define FIRST_LEVEL_CHECKS \
  186. { \
  187. unsigned int left, right, up, down, diag; \
  188. CHECK_BETTER(left, tr, tc - hstep); \
  189. CHECK_BETTER(right, tr, tc + hstep); \
  190. CHECK_BETTER(up, tr - hstep, tc); \
  191. CHECK_BETTER(down, tr + hstep, tc); \
  192. whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); \
  193. switch (whichdir) { \
  194. case 0: CHECK_BETTER(diag, tr - hstep, tc - hstep); break; \
  195. case 1: CHECK_BETTER(diag, tr - hstep, tc + hstep); break; \
  196. case 2: CHECK_BETTER(diag, tr + hstep, tc - hstep); break; \
  197. case 3: CHECK_BETTER(diag, tr + hstep, tc + hstep); break; \
  198. } \
  199. }
  200. #define SECOND_LEVEL_CHECKS \
  201. { \
  202. int kr, kc; \
  203. unsigned int second; \
  204. if (tr != br && tc != bc) { \
  205. kr = br - tr; \
  206. kc = bc - tc; \
  207. CHECK_BETTER(second, tr + kr, tc + 2 * kc); \
  208. CHECK_BETTER(second, tr + 2 * kr, tc + kc); \
  209. } else if (tr == br && tc != bc) { \
  210. kc = bc - tc; \
  211. CHECK_BETTER(second, tr + hstep, tc + 2 * kc); \
  212. CHECK_BETTER(second, tr - hstep, tc + 2 * kc); \
  213. switch (whichdir) { \
  214. case 0: \
  215. case 1: CHECK_BETTER(second, tr + hstep, tc + kc); break; \
  216. case 2: \
  217. case 3: CHECK_BETTER(second, tr - hstep, tc + kc); break; \
  218. } \
  219. } else if (tr != br && tc == bc) { \
  220. kr = br - tr; \
  221. CHECK_BETTER(second, tr + 2 * kr, tc + hstep); \
  222. CHECK_BETTER(second, tr + 2 * kr, tc - hstep); \
  223. switch (whichdir) { \
  224. case 0: \
  225. case 2: CHECK_BETTER(second, tr + kr, tc + hstep); break; \
  226. case 1: \
  227. case 3: CHECK_BETTER(second, tr + kr, tc - hstep); break; \
  228. } \
  229. } \
  230. }
  231. #define SETUP_SUBPEL_SEARCH \
  232. const uint8_t *const z = x->plane[0].src.buf; \
  233. const int src_stride = x->plane[0].src.stride; \
  234. const MACROBLOCKD *xd = &x->e_mbd; \
  235. unsigned int besterr = UINT_MAX; \
  236. unsigned int sse; \
  237. unsigned int whichdir; \
  238. int thismse; \
  239. const unsigned int halfiters = iters_per_step; \
  240. const unsigned int quarteriters = iters_per_step; \
  241. const unsigned int eighthiters = iters_per_step; \
  242. const int y_stride = xd->plane[0].pre[0].stride; \
  243. const int offset = bestmv->row * y_stride + bestmv->col; \
  244. const uint8_t *const y = xd->plane[0].pre[0].buf; \
  245. \
  246. int rr = ref_mv->row; \
  247. int rc = ref_mv->col; \
  248. int br = bestmv->row * 8; \
  249. int bc = bestmv->col * 8; \
  250. int hstep = 4; \
  251. int minc, maxc, minr, maxr; \
  252. int tr = br; \
  253. int tc = bc; \
  254. MvLimits subpel_mv_limits; \
  255. \
  256. vp9_set_subpel_mv_search_range(&subpel_mv_limits, &x->mv_limits, ref_mv); \
  257. minc = subpel_mv_limits.col_min; \
  258. maxc = subpel_mv_limits.col_max; \
  259. minr = subpel_mv_limits.row_min; \
  260. maxr = subpel_mv_limits.row_max; \
  261. \
  262. bestmv->row *= 8; \
  263. bestmv->col *= 8;
  264. static unsigned int setup_center_error(
  265. const MACROBLOCKD *xd, const MV *bestmv, const MV *ref_mv,
  266. int error_per_bit, const vp9_variance_fn_ptr_t *vfp,
  267. const uint8_t *const src, const int src_stride, const uint8_t *const y,
  268. int y_stride, const uint8_t *second_pred, int w, int h, int offset,
  269. int *mvjcost, int *mvcost[2], uint32_t *sse1, uint32_t *distortion) {
  270. #if CONFIG_VP9_HIGHBITDEPTH
  271. uint64_t besterr;
  272. if (second_pred != NULL) {
  273. if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
  274. DECLARE_ALIGNED(16, uint16_t, comp_pred16[64 * 64]);
  275. vpx_highbd_comp_avg_pred(comp_pred16, CONVERT_TO_SHORTPTR(second_pred), w,
  276. h, CONVERT_TO_SHORTPTR(y + offset), y_stride);
  277. besterr =
  278. vfp->vf(CONVERT_TO_BYTEPTR(comp_pred16), w, src, src_stride, sse1);
  279. } else {
  280. DECLARE_ALIGNED(16, uint8_t, comp_pred[64 * 64]);
  281. vpx_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride);
  282. besterr = vfp->vf(comp_pred, w, src, src_stride, sse1);
  283. }
  284. } else {
  285. besterr = vfp->vf(y + offset, y_stride, src, src_stride, sse1);
  286. }
  287. *distortion = (uint32_t)besterr;
  288. besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
  289. if (besterr >= UINT_MAX) return UINT_MAX;
  290. return (uint32_t)besterr;
  291. #else
  292. uint32_t besterr;
  293. (void)xd;
  294. if (second_pred != NULL) {
  295. DECLARE_ALIGNED(16, uint8_t, comp_pred[64 * 64]);
  296. vpx_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride);
  297. besterr = vfp->vf(comp_pred, w, src, src_stride, sse1);
  298. } else {
  299. besterr = vfp->vf(y + offset, y_stride, src, src_stride, sse1);
  300. }
  301. *distortion = besterr;
  302. besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
  303. return besterr;
  304. #endif // CONFIG_VP9_HIGHBITDEPTH
  305. }
  306. static INLINE int64_t divide_and_round(const int64_t n, const int64_t d) {
  307. return ((n < 0) ^ (d < 0)) ? ((n - d / 2) / d) : ((n + d / 2) / d);
  308. }
  309. static INLINE int is_cost_list_wellbehaved(int *cost_list) {
  310. return cost_list[0] < cost_list[1] && cost_list[0] < cost_list[2] &&
  311. cost_list[0] < cost_list[3] && cost_list[0] < cost_list[4];
  312. }
  313. // Returns surface minima estimate at given precision in 1/2^n bits.
  314. // Assume a model for the cost surface: S = A(x - x0)^2 + B(y - y0)^2 + C
  315. // For a given set of costs S0, S1, S2, S3, S4 at points
  316. // (y, x) = (0, 0), (0, -1), (1, 0), (0, 1) and (-1, 0) respectively,
  317. // the solution for the location of the minima (x0, y0) is given by:
  318. // x0 = 1/2 (S1 - S3)/(S1 + S3 - 2*S0),
  319. // y0 = 1/2 (S4 - S2)/(S4 + S2 - 2*S0).
  320. // The code below is an integerized version of that.
  321. static void get_cost_surf_min(int *cost_list, int *ir, int *ic, int bits) {
  322. const int64_t x0 = (int64_t)cost_list[1] - cost_list[3];
  323. const int64_t y0 = cost_list[1] - 2 * (int64_t)cost_list[0] + cost_list[3];
  324. const int64_t x1 = (int64_t)cost_list[4] - cost_list[2];
  325. const int64_t y1 = cost_list[4] - 2 * (int64_t)cost_list[0] + cost_list[2];
  326. const int b = 1 << (bits - 1);
  327. *ic = (int)divide_and_round(x0 * b, y0);
  328. *ir = (int)divide_and_round(x1 * b, y1);
  329. }
  330. uint32_t vp9_skip_sub_pixel_tree(
  331. const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp,
  332. int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop,
  333. int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2],
  334. uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w,
  335. int h, int use_accurate_subpel_search) {
  336. SETUP_SUBPEL_SEARCH;
  337. besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, z,
  338. src_stride, y, y_stride, second_pred, w, h,
  339. offset, mvjcost, mvcost, sse1, distortion);
  340. (void)halfiters;
  341. (void)quarteriters;
  342. (void)eighthiters;
  343. (void)whichdir;
  344. (void)allow_hp;
  345. (void)forced_stop;
  346. (void)hstep;
  347. (void)rr;
  348. (void)rc;
  349. (void)minr;
  350. (void)minc;
  351. (void)maxr;
  352. (void)maxc;
  353. (void)tr;
  354. (void)tc;
  355. (void)sse;
  356. (void)thismse;
  357. (void)cost_list;
  358. (void)use_accurate_subpel_search;
  359. return besterr;
  360. }
  361. uint32_t vp9_find_best_sub_pixel_tree_pruned_evenmore(
  362. const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp,
  363. int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop,
  364. int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2],
  365. uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w,
  366. int h, int use_accurate_subpel_search) {
  367. SETUP_SUBPEL_SEARCH;
  368. besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, z,
  369. src_stride, y, y_stride, second_pred, w, h,
  370. offset, mvjcost, mvcost, sse1, distortion);
  371. (void)halfiters;
  372. (void)quarteriters;
  373. (void)eighthiters;
  374. (void)whichdir;
  375. (void)allow_hp;
  376. (void)forced_stop;
  377. (void)hstep;
  378. (void)use_accurate_subpel_search;
  379. if (cost_list && cost_list[0] != INT_MAX && cost_list[1] != INT_MAX &&
  380. cost_list[2] != INT_MAX && cost_list[3] != INT_MAX &&
  381. cost_list[4] != INT_MAX && is_cost_list_wellbehaved(cost_list)) {
  382. int ir, ic;
  383. unsigned int minpt = INT_MAX;
  384. get_cost_surf_min(cost_list, &ir, &ic, 2);
  385. if (ir != 0 || ic != 0) {
  386. CHECK_BETTER(minpt, tr + 2 * ir, tc + 2 * ic);
  387. }
  388. } else {
  389. FIRST_LEVEL_CHECKS;
  390. if (halfiters > 1) {
  391. SECOND_LEVEL_CHECKS;
  392. }
  393. tr = br;
  394. tc = bc;
  395. // Each subsequent iteration checks at least one point in common with
  396. // the last iteration could be 2 ( if diag selected) 1/4 pel
  397. // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only
  398. if (forced_stop != 2) {
  399. hstep >>= 1;
  400. FIRST_LEVEL_CHECKS;
  401. if (quarteriters > 1) {
  402. SECOND_LEVEL_CHECKS;
  403. }
  404. }
  405. }
  406. tr = br;
  407. tc = bc;
  408. if (allow_hp && use_mv_hp(ref_mv) && forced_stop == 0) {
  409. hstep >>= 1;
  410. FIRST_LEVEL_CHECKS;
  411. if (eighthiters > 1) {
  412. SECOND_LEVEL_CHECKS;
  413. }
  414. }
  415. bestmv->row = br;
  416. bestmv->col = bc;
  417. return besterr;
  418. }
  419. uint32_t vp9_find_best_sub_pixel_tree_pruned_more(
  420. const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp,
  421. int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop,
  422. int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2],
  423. uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w,
  424. int h, int use_accurate_subpel_search) {
  425. SETUP_SUBPEL_SEARCH;
  426. (void)use_accurate_subpel_search;
  427. besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, z,
  428. src_stride, y, y_stride, second_pred, w, h,
  429. offset, mvjcost, mvcost, sse1, distortion);
  430. if (cost_list && cost_list[0] != INT_MAX && cost_list[1] != INT_MAX &&
  431. cost_list[2] != INT_MAX && cost_list[3] != INT_MAX &&
  432. cost_list[4] != INT_MAX && is_cost_list_wellbehaved(cost_list)) {
  433. unsigned int minpt;
  434. int ir, ic;
  435. get_cost_surf_min(cost_list, &ir, &ic, 1);
  436. if (ir != 0 || ic != 0) {
  437. CHECK_BETTER(minpt, tr + ir * hstep, tc + ic * hstep);
  438. }
  439. } else {
  440. FIRST_LEVEL_CHECKS;
  441. if (halfiters > 1) {
  442. SECOND_LEVEL_CHECKS;
  443. }
  444. }
  445. // Each subsequent iteration checks at least one point in common with
  446. // the last iteration could be 2 ( if diag selected) 1/4 pel
  447. // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only
  448. if (forced_stop != 2) {
  449. tr = br;
  450. tc = bc;
  451. hstep >>= 1;
  452. FIRST_LEVEL_CHECKS;
  453. if (quarteriters > 1) {
  454. SECOND_LEVEL_CHECKS;
  455. }
  456. }
  457. if (allow_hp && use_mv_hp(ref_mv) && forced_stop == 0) {
  458. tr = br;
  459. tc = bc;
  460. hstep >>= 1;
  461. FIRST_LEVEL_CHECKS;
  462. if (eighthiters > 1) {
  463. SECOND_LEVEL_CHECKS;
  464. }
  465. }
  466. // These lines insure static analysis doesn't warn that
  467. // tr and tc aren't used after the above point.
  468. (void)tr;
  469. (void)tc;
  470. bestmv->row = br;
  471. bestmv->col = bc;
  472. return besterr;
  473. }
  474. uint32_t vp9_find_best_sub_pixel_tree_pruned(
  475. const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp,
  476. int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop,
  477. int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2],
  478. uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w,
  479. int h, int use_accurate_subpel_search) {
  480. SETUP_SUBPEL_SEARCH;
  481. (void)use_accurate_subpel_search;
  482. besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, z,
  483. src_stride, y, y_stride, second_pred, w, h,
  484. offset, mvjcost, mvcost, sse1, distortion);
  485. if (cost_list && cost_list[0] != INT_MAX && cost_list[1] != INT_MAX &&
  486. cost_list[2] != INT_MAX && cost_list[3] != INT_MAX &&
  487. cost_list[4] != INT_MAX) {
  488. unsigned int left, right, up, down, diag;
  489. whichdir = (cost_list[1] < cost_list[3] ? 0 : 1) +
  490. (cost_list[2] < cost_list[4] ? 0 : 2);
  491. switch (whichdir) {
  492. case 0:
  493. CHECK_BETTER(left, tr, tc - hstep);
  494. CHECK_BETTER(down, tr + hstep, tc);
  495. CHECK_BETTER(diag, tr + hstep, tc - hstep);
  496. break;
  497. case 1:
  498. CHECK_BETTER(right, tr, tc + hstep);
  499. CHECK_BETTER(down, tr + hstep, tc);
  500. CHECK_BETTER(diag, tr + hstep, tc + hstep);
  501. break;
  502. case 2:
  503. CHECK_BETTER(left, tr, tc - hstep);
  504. CHECK_BETTER(up, tr - hstep, tc);
  505. CHECK_BETTER(diag, tr - hstep, tc - hstep);
  506. break;
  507. case 3:
  508. CHECK_BETTER(right, tr, tc + hstep);
  509. CHECK_BETTER(up, tr - hstep, tc);
  510. CHECK_BETTER(diag, tr - hstep, tc + hstep);
  511. break;
  512. }
  513. } else {
  514. FIRST_LEVEL_CHECKS;
  515. if (halfiters > 1) {
  516. SECOND_LEVEL_CHECKS;
  517. }
  518. }
  519. tr = br;
  520. tc = bc;
  521. // Each subsequent iteration checks at least one point in common with
  522. // the last iteration could be 2 ( if diag selected) 1/4 pel
  523. // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only
  524. if (forced_stop != 2) {
  525. hstep >>= 1;
  526. FIRST_LEVEL_CHECKS;
  527. if (quarteriters > 1) {
  528. SECOND_LEVEL_CHECKS;
  529. }
  530. tr = br;
  531. tc = bc;
  532. }
  533. if (allow_hp && use_mv_hp(ref_mv) && forced_stop == 0) {
  534. hstep >>= 1;
  535. FIRST_LEVEL_CHECKS;
  536. if (eighthiters > 1) {
  537. SECOND_LEVEL_CHECKS;
  538. }
  539. tr = br;
  540. tc = bc;
  541. }
  542. // These lines insure static analysis doesn't warn that
  543. // tr and tc aren't used after the above point.
  544. (void)tr;
  545. (void)tc;
  546. bestmv->row = br;
  547. bestmv->col = bc;
  548. return besterr;
  549. }
  550. /* clang-format off */
  551. static const MV search_step_table[12] = {
  552. // left, right, up, down
  553. { 0, -4 }, { 0, 4 }, { -4, 0 }, { 4, 0 },
  554. { 0, -2 }, { 0, 2 }, { -2, 0 }, { 2, 0 },
  555. { 0, -1 }, { 0, 1 }, { -1, 0 }, { 1, 0 }
  556. };
  557. /* clang-format on */
  558. static int accurate_sub_pel_search(
  559. const MACROBLOCKD *xd, const MV *this_mv, const struct scale_factors *sf,
  560. const InterpKernel *kernel, const vp9_variance_fn_ptr_t *vfp,
  561. const uint8_t *const src_address, const int src_stride,
  562. const uint8_t *const pre_address, int y_stride, const uint8_t *second_pred,
  563. int w, int h, uint32_t *sse) {
  564. #if CONFIG_VP9_HIGHBITDEPTH
  565. uint64_t besterr;
  566. assert(sf->x_step_q4 == 16 && sf->y_step_q4 == 16);
  567. assert(w != 0 && h != 0);
  568. if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
  569. DECLARE_ALIGNED(16, uint16_t, pred16[64 * 64]);
  570. vp9_highbd_build_inter_predictor(CONVERT_TO_SHORTPTR(pre_address), y_stride,
  571. pred16, w, this_mv, sf, w, h, 0, kernel,
  572. MV_PRECISION_Q3, 0, 0, xd->bd);
  573. if (second_pred != NULL) {
  574. DECLARE_ALIGNED(16, uint16_t, comp_pred16[64 * 64]);
  575. vpx_highbd_comp_avg_pred(comp_pred16, CONVERT_TO_SHORTPTR(second_pred), w,
  576. h, pred16, w);
  577. besterr = vfp->vf(CONVERT_TO_BYTEPTR(comp_pred16), w, src_address,
  578. src_stride, sse);
  579. } else {
  580. besterr =
  581. vfp->vf(CONVERT_TO_BYTEPTR(pred16), w, src_address, src_stride, sse);
  582. }
  583. } else {
  584. DECLARE_ALIGNED(16, uint8_t, pred[64 * 64]);
  585. vp9_build_inter_predictor(pre_address, y_stride, pred, w, this_mv, sf, w, h,
  586. 0, kernel, MV_PRECISION_Q3, 0, 0);
  587. if (second_pred != NULL) {
  588. DECLARE_ALIGNED(16, uint8_t, comp_pred[64 * 64]);
  589. vpx_comp_avg_pred(comp_pred, second_pred, w, h, pred, w);
  590. besterr = vfp->vf(comp_pred, w, src_address, src_stride, sse);
  591. } else {
  592. besterr = vfp->vf(pred, w, src_address, src_stride, sse);
  593. }
  594. }
  595. if (besterr >= UINT_MAX) return UINT_MAX;
  596. return (int)besterr;
  597. #else
  598. int besterr;
  599. DECLARE_ALIGNED(16, uint8_t, pred[64 * 64]);
  600. assert(sf->x_step_q4 == 16 && sf->y_step_q4 == 16);
  601. assert(w != 0 && h != 0);
  602. (void)xd;
  603. vp9_build_inter_predictor(pre_address, y_stride, pred, w, this_mv, sf, w, h,
  604. 0, kernel, MV_PRECISION_Q3, 0, 0);
  605. if (second_pred != NULL) {
  606. DECLARE_ALIGNED(16, uint8_t, comp_pred[64 * 64]);
  607. vpx_comp_avg_pred(comp_pred, second_pred, w, h, pred, w);
  608. besterr = vfp->vf(comp_pred, w, src_address, src_stride, sse);
  609. } else {
  610. besterr = vfp->vf(pred, w, src_address, src_stride, sse);
  611. }
  612. return besterr;
  613. #endif // CONFIG_VP9_HIGHBITDEPTH
  614. }
  615. // TODO(yunqing): this part can be further refactored.
  616. #if CONFIG_VP9_HIGHBITDEPTH
  617. /* checks if (r, c) has better score than previous best */
  618. #define CHECK_BETTER1(v, r, c) \
  619. if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \
  620. int64_t tmpmse; \
  621. const MV mv = { r, c }; \
  622. const MV ref_mv = { rr, rc }; \
  623. thismse = \
  624. accurate_sub_pel_search(xd, &mv, x->me_sf, kernel, vfp, z, src_stride, \
  625. y, y_stride, second_pred, w, h, &sse); \
  626. tmpmse = thismse; \
  627. tmpmse += mv_err_cost(&mv, &ref_mv, mvjcost, mvcost, error_per_bit); \
  628. if (tmpmse >= INT_MAX) { \
  629. v = INT_MAX; \
  630. } else if ((v = (uint32_t)tmpmse) < besterr) { \
  631. besterr = v; \
  632. br = r; \
  633. bc = c; \
  634. *distortion = thismse; \
  635. *sse1 = sse; \
  636. } \
  637. } else { \
  638. v = INT_MAX; \
  639. }
  640. #else
  641. /* checks if (r, c) has better score than previous best */
  642. #define CHECK_BETTER1(v, r, c) \
  643. if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \
  644. const MV mv = { r, c }; \
  645. const MV ref_mv = { rr, rc }; \
  646. thismse = \
  647. accurate_sub_pel_search(xd, &mv, x->me_sf, kernel, vfp, z, src_stride, \
  648. y, y_stride, second_pred, w, h, &sse); \
  649. if ((v = mv_err_cost(&mv, &ref_mv, mvjcost, mvcost, error_per_bit) + \
  650. thismse) < besterr) { \
  651. besterr = v; \
  652. br = r; \
  653. bc = c; \
  654. *distortion = thismse; \
  655. *sse1 = sse; \
  656. } \
  657. } else { \
  658. v = INT_MAX; \
  659. }
  660. #endif
  661. uint32_t vp9_find_best_sub_pixel_tree(
  662. const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp,
  663. int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop,
  664. int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2],
  665. uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w,
  666. int h, int use_accurate_subpel_search) {
  667. const uint8_t *const z = x->plane[0].src.buf;
  668. const uint8_t *const src_address = z;
  669. const int src_stride = x->plane[0].src.stride;
  670. const MACROBLOCKD *xd = &x->e_mbd;
  671. unsigned int besterr = UINT_MAX;
  672. unsigned int sse;
  673. int thismse;
  674. const int y_stride = xd->plane[0].pre[0].stride;
  675. const int offset = bestmv->row * y_stride + bestmv->col;
  676. const uint8_t *const y = xd->plane[0].pre[0].buf;
  677. int rr = ref_mv->row;
  678. int rc = ref_mv->col;
  679. int br = bestmv->row * 8;
  680. int bc = bestmv->col * 8;
  681. int hstep = 4;
  682. int iter, round = 3 - forced_stop;
  683. int minc, maxc, minr, maxr;
  684. int tr = br;
  685. int tc = bc;
  686. const MV *search_step = search_step_table;
  687. int idx, best_idx = -1;
  688. unsigned int cost_array[5];
  689. int kr, kc;
  690. MvLimits subpel_mv_limits;
  691. // TODO(yunqing): need to add 4-tap filter optimization to speed up the
  692. // encoder.
  693. const InterpKernel *kernel =
  694. (use_accurate_subpel_search > 0)
  695. ? ((use_accurate_subpel_search == USE_4_TAPS)
  696. ? vp9_filter_kernels[FOURTAP]
  697. : ((use_accurate_subpel_search == USE_8_TAPS)
  698. ? vp9_filter_kernels[EIGHTTAP]
  699. : vp9_filter_kernels[EIGHTTAP_SHARP]))
  700. : vp9_filter_kernels[BILINEAR];
  701. vp9_set_subpel_mv_search_range(&subpel_mv_limits, &x->mv_limits, ref_mv);
  702. minc = subpel_mv_limits.col_min;
  703. maxc = subpel_mv_limits.col_max;
  704. minr = subpel_mv_limits.row_min;
  705. maxr = subpel_mv_limits.row_max;
  706. if (!(allow_hp && use_mv_hp(ref_mv)))
  707. if (round == 3) round = 2;
  708. bestmv->row *= 8;
  709. bestmv->col *= 8;
  710. besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, z,
  711. src_stride, y, y_stride, second_pred, w, h,
  712. offset, mvjcost, mvcost, sse1, distortion);
  713. (void)cost_list; // to silence compiler warning
  714. for (iter = 0; iter < round; ++iter) {
  715. // Check vertical and horizontal sub-pixel positions.
  716. for (idx = 0; idx < 4; ++idx) {
  717. tr = br + search_step[idx].row;
  718. tc = bc + search_step[idx].col;
  719. if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
  720. MV this_mv;
  721. this_mv.row = tr;
  722. this_mv.col = tc;
  723. if (use_accurate_subpel_search) {
  724. thismse = accurate_sub_pel_search(xd, &this_mv, x->me_sf, kernel, vfp,
  725. src_address, src_stride, y,
  726. y_stride, second_pred, w, h, &sse);
  727. } else {
  728. const uint8_t *const pre_address =
  729. y + (tr >> 3) * y_stride + (tc >> 3);
  730. if (second_pred == NULL)
  731. thismse = vfp->svf(pre_address, y_stride, sp(tc), sp(tr),
  732. src_address, src_stride, &sse);
  733. else
  734. thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr),
  735. src_address, src_stride, &sse, second_pred);
  736. }
  737. cost_array[idx] = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost,
  738. mvcost, error_per_bit);
  739. if (cost_array[idx] < besterr) {
  740. best_idx = idx;
  741. besterr = cost_array[idx];
  742. *distortion = thismse;
  743. *sse1 = sse;
  744. }
  745. } else {
  746. cost_array[idx] = UINT_MAX;
  747. }
  748. }
  749. // Check diagonal sub-pixel position
  750. kc = (cost_array[0] <= cost_array[1] ? -hstep : hstep);
  751. kr = (cost_array[2] <= cost_array[3] ? -hstep : hstep);
  752. tc = bc + kc;
  753. tr = br + kr;
  754. if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
  755. MV this_mv = { tr, tc };
  756. if (use_accurate_subpel_search) {
  757. thismse = accurate_sub_pel_search(xd, &this_mv, x->me_sf, kernel, vfp,
  758. src_address, src_stride, y, y_stride,
  759. second_pred, w, h, &sse);
  760. } else {
  761. const uint8_t *const pre_address = y + (tr >> 3) * y_stride + (tc >> 3);
  762. if (second_pred == NULL)
  763. thismse = vfp->svf(pre_address, y_stride, sp(tc), sp(tr), src_address,
  764. src_stride, &sse);
  765. else
  766. thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr),
  767. src_address, src_stride, &sse, second_pred);
  768. }
  769. cost_array[4] = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost,
  770. error_per_bit);
  771. if (cost_array[4] < besterr) {
  772. best_idx = 4;
  773. besterr = cost_array[4];
  774. *distortion = thismse;
  775. *sse1 = sse;
  776. }
  777. } else {
  778. cost_array[idx] = UINT_MAX;
  779. }
  780. if (best_idx < 4 && best_idx >= 0) {
  781. br += search_step[best_idx].row;
  782. bc += search_step[best_idx].col;
  783. } else if (best_idx == 4) {
  784. br = tr;
  785. bc = tc;
  786. }
  787. if (iters_per_step > 0 && best_idx != -1) {
  788. unsigned int second;
  789. const int br0 = br;
  790. const int bc0 = bc;
  791. assert(tr == br || tc == bc);
  792. if (tr == br && tc != bc) {
  793. kc = bc - tc;
  794. if (iters_per_step == 1) {
  795. if (use_accurate_subpel_search) {
  796. CHECK_BETTER1(second, br0, bc0 + kc);
  797. } else {
  798. CHECK_BETTER(second, br0, bc0 + kc);
  799. }
  800. }
  801. } else if (tr != br && tc == bc) {
  802. kr = br - tr;
  803. if (iters_per_step == 1) {
  804. if (use_accurate_subpel_search) {
  805. CHECK_BETTER1(second, br0 + kr, bc0);
  806. } else {
  807. CHECK_BETTER(second, br0 + kr, bc0);
  808. }
  809. }
  810. }
  811. if (iters_per_step > 1) {
  812. if (use_accurate_subpel_search) {
  813. CHECK_BETTER1(second, br0 + kr, bc0);
  814. CHECK_BETTER1(second, br0, bc0 + kc);
  815. if (br0 != br || bc0 != bc) {
  816. CHECK_BETTER1(second, br0 + kr, bc0 + kc);
  817. }
  818. } else {
  819. CHECK_BETTER(second, br0 + kr, bc0);
  820. CHECK_BETTER(second, br0, bc0 + kc);
  821. if (br0 != br || bc0 != bc) {
  822. CHECK_BETTER(second, br0 + kr, bc0 + kc);
  823. }
  824. }
  825. }
  826. }
  827. search_step += 4;
  828. hstep >>= 1;
  829. best_idx = -1;
  830. }
  831. // Each subsequent iteration checks at least one point in common with
  832. // the last iteration could be 2 ( if diag selected) 1/4 pel
  833. // These lines insure static analysis doesn't warn that
  834. // tr and tc aren't used after the above point.
  835. (void)tr;
  836. (void)tc;
  837. bestmv->row = br;
  838. bestmv->col = bc;
  839. return besterr;
  840. }
  841. #undef CHECK_BETTER
  842. #undef CHECK_BETTER1
  843. static INLINE int check_bounds(const MvLimits *mv_limits, int row, int col,
  844. int range) {
  845. return ((row - range) >= mv_limits->row_min) &
  846. ((row + range) <= mv_limits->row_max) &
  847. ((col - range) >= mv_limits->col_min) &
  848. ((col + range) <= mv_limits->col_max);
  849. }
  850. static INLINE int is_mv_in(const MvLimits *mv_limits, const MV *mv) {
  851. return (mv->col >= mv_limits->col_min) && (mv->col <= mv_limits->col_max) &&
  852. (mv->row >= mv_limits->row_min) && (mv->row <= mv_limits->row_max);
  853. }
  854. #define CHECK_BETTER \
  855. { \
  856. if (thissad < bestsad) { \
  857. if (use_mvcost) \
  858. thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit); \
  859. if (thissad < bestsad) { \
  860. bestsad = thissad; \
  861. best_site = i; \
  862. } \
  863. } \
  864. }
  865. #define MAX_PATTERN_SCALES 11
  866. #define MAX_PATTERN_CANDIDATES 8 // max number of canddiates per scale
  867. #define PATTERN_CANDIDATES_REF 3 // number of refinement candidates
  868. // Calculate and return a sad+mvcost list around an integer best pel.
  869. static INLINE void calc_int_cost_list(const MACROBLOCK *x, const MV *ref_mv,
  870. int sadpb,
  871. const vp9_variance_fn_ptr_t *fn_ptr,
  872. const MV *best_mv, int *cost_list) {
  873. static const MV neighbors[4] = { { 0, -1 }, { 1, 0 }, { 0, 1 }, { -1, 0 } };
  874. const struct buf_2d *const what = &x->plane[0].src;
  875. const struct buf_2d *const in_what = &x->e_mbd.plane[0].pre[0];
  876. const MV fcenter_mv = { ref_mv->row >> 3, ref_mv->col >> 3 };
  877. int br = best_mv->row;
  878. int bc = best_mv->col;
  879. MV this_mv;
  880. int i;
  881. unsigned int sse;
  882. this_mv.row = br;
  883. this_mv.col = bc;
  884. cost_list[0] =
  885. fn_ptr->vf(what->buf, what->stride, get_buf_from_mv(in_what, &this_mv),
  886. in_what->stride, &sse) +
  887. mvsad_err_cost(x, &this_mv, &fcenter_mv, sadpb);
  888. if (check_bounds(&x->mv_limits, br, bc, 1)) {
  889. for (i = 0; i < 4; i++) {
  890. const MV this_mv = { br + neighbors[i].row, bc + neighbors[i].col };
  891. cost_list[i + 1] = fn_ptr->vf(what->buf, what->stride,
  892. get_buf_from_mv(in_what, &this_mv),
  893. in_what->stride, &sse) +
  894. mv_err_cost(&this_mv, &fcenter_mv, x->nmvjointcost,
  895. x->mvcost, x->errorperbit);
  896. }
  897. } else {
  898. for (i = 0; i < 4; i++) {
  899. const MV this_mv = { br + neighbors[i].row, bc + neighbors[i].col };
  900. if (!is_mv_in(&x->mv_limits, &this_mv))
  901. cost_list[i + 1] = INT_MAX;
  902. else
  903. cost_list[i + 1] = fn_ptr->vf(what->buf, what->stride,
  904. get_buf_from_mv(in_what, &this_mv),
  905. in_what->stride, &sse) +
  906. mv_err_cost(&this_mv, &fcenter_mv, x->nmvjointcost,
  907. x->mvcost, x->errorperbit);
  908. }
  909. }
  910. }
  911. // Generic pattern search function that searches over multiple scales.
  912. // Each scale can have a different number of candidates and shape of
  913. // candidates as indicated in the num_candidates and candidates arrays
  914. // passed into this function
  915. //
  916. static int vp9_pattern_search(
  917. const MACROBLOCK *x, MV *ref_mv, int search_param, int sad_per_bit,
  918. int do_init_search, int *cost_list, const vp9_variance_fn_ptr_t *vfp,
  919. int use_mvcost, const MV *center_mv, MV *best_mv,
  920. const int num_candidates[MAX_PATTERN_SCALES],
  921. const MV candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES]) {
  922. const MACROBLOCKD *const xd = &x->e_mbd;
  923. static const int search_param_to_steps[MAX_MVSEARCH_STEPS] = {
  924. 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
  925. };
  926. int i, s, t;
  927. const struct buf_2d *const what = &x->plane[0].src;
  928. const struct buf_2d *const in_what = &xd->plane[0].pre[0];
  929. int br, bc;
  930. int bestsad = INT_MAX;
  931. int thissad;
  932. int k = -1;
  933. const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
  934. int best_init_s = search_param_to_steps[search_param];
  935. // adjust ref_mv to make sure it is within MV range
  936. clamp_mv(ref_mv, x->mv_limits.col_min, x->mv_limits.col_max,
  937. x->mv_limits.row_min, x->mv_limits.row_max);
  938. br = ref_mv->row;
  939. bc = ref_mv->col;
  940. // Work out the start point for the search
  941. bestsad = vfp->sdf(what->buf, what->stride, get_buf_from_mv(in_what, ref_mv),
  942. in_what->stride) +
  943. mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit);
  944. // Search all possible scales upto the search param around the center point
  945. // pick the scale of the point that is best as the starting scale of
  946. // further steps around it.
  947. if (do_init_search) {
  948. s = best_init_s;
  949. best_init_s = -1;
  950. for (t = 0; t <= s; ++t) {
  951. int best_site = -1;
  952. if (check_bounds(&x->mv_limits, br, bc, 1 << t)) {
  953. for (i = 0; i < num_candidates[t]; i++) {
  954. const MV this_mv = { br + candidates[t][i].row,
  955. bc + candidates[t][i].col };
  956. thissad =
  957. vfp->sdf(what->buf, what->stride,
  958. get_buf_from_mv(in_what, &this_mv), in_what->stride);
  959. CHECK_BETTER
  960. }
  961. } else {
  962. for (i = 0; i < num_candidates[t]; i++) {
  963. const MV this_mv = { br + candidates[t][i].row,
  964. bc + candidates[t][i].col };
  965. if (!is_mv_in(&x->mv_limits, &this_mv)) continue;
  966. thissad =
  967. vfp->sdf(what->buf, what->stride,
  968. get_buf_from_mv(in_what, &this_mv), in_what->stride);
  969. CHECK_BETTER
  970. }
  971. }
  972. if (best_site == -1) {
  973. continue;
  974. } else {
  975. best_init_s = t;
  976. k = best_site;
  977. }
  978. }
  979. if (best_init_s != -1) {
  980. br += candidates[best_init_s][k].row;
  981. bc += candidates[best_init_s][k].col;
  982. }
  983. }
  984. // If the center point is still the best, just skip this and move to
  985. // the refinement step.
  986. if (best_init_s != -1) {
  987. int best_site = -1;
  988. s = best_init_s;
  989. do {
  990. // No need to search all 6 points the 1st time if initial search was used
  991. if (!do_init_search || s != best_init_s) {
  992. if (check_bounds(&x->mv_limits, br, bc, 1 << s)) {
  993. for (i = 0; i < num_candidates[s]; i++) {
  994. const MV this_mv = { br + candidates[s][i].row,
  995. bc + candidates[s][i].col };
  996. thissad =
  997. vfp->sdf(what->buf, what->stride,
  998. get_buf_from_mv(in_what, &this_mv), in_what->stride);
  999. CHECK_BETTER
  1000. }
  1001. } else {
  1002. for (i = 0; i < num_candidates[s]; i++) {
  1003. const MV this_mv = { br + candidates[s][i].row,
  1004. bc + candidates[s][i].col };
  1005. if (!is_mv_in(&x->mv_limits, &this_mv)) continue;
  1006. thissad =
  1007. vfp->sdf(what->buf, what->stride,
  1008. get_buf_from_mv(in_what, &this_mv), in_what->stride);
  1009. CHECK_BETTER
  1010. }
  1011. }
  1012. if (best_site == -1) {
  1013. continue;
  1014. } else {
  1015. br += candidates[s][best_site].row;
  1016. bc += candidates[s][best_site].col;
  1017. k = best_site;
  1018. }
  1019. }
  1020. do {
  1021. int next_chkpts_indices[PATTERN_CANDIDATES_REF];
  1022. best_site = -1;
  1023. next_chkpts_indices[0] = (k == 0) ? num_candidates[s] - 1 : k - 1;
  1024. next_chkpts_indices[1] = k;
  1025. next_chkpts_indices[2] = (k == num_candidates[s] - 1) ? 0 : k + 1;
  1026. if (check_bounds(&x->mv_limits, br, bc, 1 << s)) {
  1027. for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
  1028. const MV this_mv = {
  1029. br + candidates[s][next_chkpts_indices[i]].row,
  1030. bc + candidates[s][next_chkpts_indices[i]].col
  1031. };
  1032. thissad =
  1033. vfp->sdf(what->buf, what->stride,
  1034. get_buf_from_mv(in_what, &this_mv), in_what->stride);
  1035. CHECK_BETTER
  1036. }
  1037. } else {
  1038. for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
  1039. const MV this_mv = {
  1040. br + candidates[s][next_chkpts_indices[i]].row,
  1041. bc + candidates[s][next_chkpts_indices[i]].col
  1042. };
  1043. if (!is_mv_in(&x->mv_limits, &this_mv)) continue;
  1044. thissad =
  1045. vfp->sdf(what->buf, what->stride,
  1046. get_buf_from_mv(in_what, &this_mv), in_what->stride);
  1047. CHECK_BETTER
  1048. }
  1049. }
  1050. if (best_site != -1) {
  1051. k = next_chkpts_indices[best_site];
  1052. br += candidates[s][k].row;
  1053. bc += candidates[s][k].col;
  1054. }
  1055. } while (best_site != -1);
  1056. } while (s--);
  1057. }
  1058. // Returns the one-away integer pel sad values around the best as follows:
  1059. // cost_list[0]: cost at the best integer pel
  1060. // cost_list[1]: cost at delta {0, -1} (left) from the best integer pel
  1061. // cost_list[2]: cost at delta { 1, 0} (bottom) from the best integer pel
  1062. // cost_list[3]: cost at delta { 0, 1} (right) from the best integer pel
  1063. // cost_list[4]: cost at delta {-1, 0} (top) from the best integer pel
  1064. if (cost_list) {
  1065. const MV best_mv = { br, bc };
  1066. calc_int_cost_list(x, &fcenter_mv, sad_per_bit, vfp, &best_mv, cost_list);
  1067. }
  1068. best_mv->row = br;
  1069. best_mv->col = bc;
  1070. return bestsad;
  1071. }
  1072. // A specialized function where the smallest scale search candidates
  1073. // are 4 1-away neighbors, and cost_list is non-null
  1074. // TODO(debargha): Merge this function with the one above. Also remove
  1075. // use_mvcost option since it is always 1, to save unnecessary branches.
  1076. static int vp9_pattern_search_sad(
  1077. const MACROBLOCK *x, MV *ref_mv, int search_param, int sad_per_bit,
  1078. int do_init_search, int *cost_list, const vp9_variance_fn_ptr_t *vfp,
  1079. int use_mvcost, const MV *center_mv, MV *best_mv,
  1080. const int num_candidates[MAX_PATTERN_SCALES],
  1081. const MV candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES]) {
  1082. const MACROBLOCKD *const xd = &x->e_mbd;
  1083. static const int search_param_to_steps[MAX_MVSEARCH_STEPS] = {
  1084. 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
  1085. };
  1086. int i, s, t;
  1087. const struct buf_2d *const what = &x->plane[0].src;
  1088. const struct buf_2d *const in_what = &xd->plane[0].pre[0];
  1089. int br, bc;
  1090. int bestsad = INT_MAX;
  1091. int thissad;
  1092. int k = -1;
  1093. const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
  1094. int best_init_s = search_param_to_steps[search_param];
  1095. // adjust ref_mv to make sure it is within MV range
  1096. clamp_mv(ref_mv, x->mv_limits.col_min, x->mv_limits.col_max,
  1097. x->mv_limits.row_min, x->mv_limits.row_max);
  1098. br = ref_mv->row;
  1099. bc = ref_mv->col;
  1100. if (cost_list != NULL) {
  1101. cost_list[0] = cost_list[1] = cost_list[2] = cost_list[3] = cost_list[4] =
  1102. INT_MAX;
  1103. }
  1104. // Work out the start point for the search
  1105. bestsad = vfp->sdf(what->buf, what->stride, get_buf_from_mv(in_what, ref_mv),
  1106. in_what->stride) +
  1107. mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit);
  1108. // Search all possible scales upto the search param around the center point
  1109. // pick the scale of the point that is best as the starting scale of
  1110. // further steps around it.
  1111. if (do_init_search) {
  1112. s = best_init_s;
  1113. best_init_s = -1;
  1114. for (t = 0; t <= s; ++t) {
  1115. int best_site = -1;
  1116. if (check_bounds(&x->mv_limits, br, bc, 1 << t)) {
  1117. for (i = 0; i < num_candidates[t]; i++) {
  1118. const MV this_mv = { br + candidates[t][i].row,
  1119. bc + candidates[t][i].col };
  1120. thissad =
  1121. vfp->sdf(what->buf, what->stride,
  1122. get_buf_from_mv(in_what, &this_mv), in_what->stride);
  1123. CHECK_BETTER
  1124. }
  1125. } else {
  1126. for (i = 0; i < num_candidates[t]; i++) {
  1127. const MV this_mv = { br + candidates[t][i].row,
  1128. bc + candidates[t][i].col };
  1129. if (!is_mv_in(&x->mv_limits, &this_mv)) continue;
  1130. thissad =
  1131. vfp->sdf(what->buf, what->stride,
  1132. get_buf_from_mv(in_what, &this_mv), in_what->stride);
  1133. CHECK_BETTER
  1134. }
  1135. }
  1136. if (best_site == -1) {
  1137. continue;
  1138. } else {
  1139. best_init_s = t;
  1140. k = best_site;
  1141. }
  1142. }
  1143. if (best_init_s != -1) {
  1144. br += candidates[best_init_s][k].row;
  1145. bc += candidates[best_init_s][k].col;
  1146. }
  1147. }
  1148. // If the center point is still the best, just skip this and move to
  1149. // the refinement step.
  1150. if (best_init_s != -1) {
  1151. int do_sad = (num_candidates[0] == 4 && cost_list != NULL);
  1152. int best_site = -1;
  1153. s = best_init_s;
  1154. for (; s >= do_sad; s--) {
  1155. if (!do_init_search || s != best_init_s) {
  1156. if (check_bounds(&x->mv_limits, br, bc, 1 << s)) {
  1157. for (i = 0; i < num_candidates[s]; i++) {
  1158. const MV this_mv = { br + candidates[s][i].row,
  1159. bc + candidates[s][i].col };
  1160. thissad =
  1161. vfp->sdf(what->buf, what->stride,
  1162. get_buf_from_mv(in_what, &this_mv), in_what->stride);
  1163. CHECK_BETTER
  1164. }
  1165. } else {
  1166. for (i = 0; i < num_candidates[s]; i++) {
  1167. const MV this_mv = { br + candidates[s][i].row,
  1168. bc + candidates[s][i].col };
  1169. if (!is_mv_in(&x->mv_limits, &this_mv)) continue;
  1170. thissad =
  1171. vfp->sdf(what->buf, what->stride,
  1172. get_buf_from_mv(in_what, &this_mv), in_what->stride);
  1173. CHECK_BETTER
  1174. }
  1175. }
  1176. if (best_site == -1) {
  1177. continue;
  1178. } else {
  1179. br += candidates[s][best_site].row;
  1180. bc += candidates[s][best_site].col;
  1181. k = best_site;
  1182. }
  1183. }
  1184. do {
  1185. int next_chkpts_indices[PATTERN_CANDIDATES_REF];
  1186. best_site = -1;
  1187. next_chkpts_indices[0] = (k == 0) ? num_candidates[s] - 1 : k - 1;
  1188. next_chkpts_indices[1] = k;
  1189. next_chkpts_indices[2] = (k == num_candidates[s] - 1) ? 0 : k + 1;
  1190. if (check_bounds(&x->mv_limits, br, bc, 1 << s)) {
  1191. for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
  1192. const MV this_mv = {
  1193. br + candidates[s][next_chkpts_indices[i]].row,
  1194. bc + candidates[s][next_chkpts_indices[i]].col
  1195. };
  1196. thissad =
  1197. vfp->sdf(what->buf, what->stride,
  1198. get_buf_from_mv(in_what, &this_mv), in_what->stride);
  1199. CHECK_BETTER
  1200. }
  1201. } else {
  1202. for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
  1203. const MV this_mv = {
  1204. br + candidates[s][next_chkpts_indices[i]].row,
  1205. bc + candidates[s][next_chkpts_indices[i]].col
  1206. };
  1207. if (!is_mv_in(&x->mv_limits, &this_mv)) continue;
  1208. thissad =
  1209. vfp->sdf(what->buf, what->stride,
  1210. get_buf_from_mv(in_what, &this_mv), in_what->stride);
  1211. CHECK_BETTER
  1212. }
  1213. }
  1214. if (best_site != -1) {
  1215. k = next_chkpts_indices[best_site];
  1216. br += candidates[s][k].row;
  1217. bc += candidates[s][k].col;
  1218. }
  1219. } while (best_site != -1);
  1220. }
  1221. // Note: If we enter the if below, then cost_list must be non-NULL.
  1222. if (s == 0) {
  1223. cost_list[0] = bestsad;
  1224. if (!do_init_search || s != best_init_s) {
  1225. if (check_bounds(&x->mv_limits, br, bc, 1 << s)) {
  1226. for (i = 0; i < num_candidates[s]; i++) {
  1227. const MV this_mv = { br + candidates[s][i].row,
  1228. bc + candidates[s][i].col };
  1229. cost_list[i + 1] = thissad =
  1230. vfp->sdf(what->buf, what->stride,
  1231. get_buf_from_mv(in_what, &this_mv), in_what->stride);
  1232. CHECK_BETTER
  1233. }
  1234. } else {
  1235. for (i = 0; i < num_candidates[s]; i++) {
  1236. const MV this_mv = { br + candidates[s][i].row,
  1237. bc + candidates[s][i].col };
  1238. if (!is_mv_in(&x->mv_limits, &this_mv)) continue;
  1239. cost_list[i + 1] = thissad =
  1240. vfp->sdf(what->buf, what->stride,
  1241. get_buf_from_mv(in_what, &this_mv), in_what->stride);
  1242. CHECK_BETTER
  1243. }
  1244. }
  1245. if (best_site != -1) {
  1246. br += candidates[s][best_site].row;
  1247. bc += candidates[s][best_site].col;
  1248. k = best_site;
  1249. }
  1250. }
  1251. while (best_site != -1) {
  1252. int next_chkpts_indices[PATTERN_CANDIDATES_REF];
  1253. best_site = -1;
  1254. next_chkpts_indices[0] = (k == 0) ? num_candidates[s] - 1 : k - 1;
  1255. next_chkpts_indices[1] = k;
  1256. next_chkpts_indices[2] = (k == num_candidates[s] - 1) ? 0 : k + 1;
  1257. cost_list[1] = cost_list[2] = cost_list[3] = cost_list[4] = INT_MAX;
  1258. cost_list[((k + 2) % 4) + 1] = cost_list[0];
  1259. cost_list[0] = bestsad;
  1260. if (check_bounds(&x->mv_limits, br, bc, 1 << s)) {
  1261. for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
  1262. const MV this_mv = {
  1263. br + candidates[s][next_chkpts_indices[i]].row,
  1264. bc + candidates[s][next_chkpts_indices[i]].col
  1265. };
  1266. cost_list[next_chkpts_indices[i] + 1] = thissad =
  1267. vfp->sdf(what->buf, what->stride,
  1268. get_buf_from_mv(in_what, &this_mv), in_what->stride);
  1269. CHECK_BETTER
  1270. }
  1271. } else {
  1272. for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
  1273. const MV this_mv = {
  1274. br + candidates[s][next_chkpts_indices[i]].row,
  1275. bc + candidates[s][next_chkpts_indices[i]].col
  1276. };
  1277. if (!is_mv_in(&x->mv_limits, &this_mv)) {
  1278. cost_list[next_chkpts_indices[i] + 1] = INT_MAX;
  1279. continue;
  1280. }
  1281. cost_list[next_chkpts_indices[i] + 1] = thissad =
  1282. vfp->sdf(what->buf, what->stride,
  1283. get_buf_from_mv(in_what, &this_mv), in_what->stride);
  1284. CHECK_BETTER
  1285. }
  1286. }
  1287. if (best_site != -1) {
  1288. k = next_chkpts_indices[best_site];
  1289. br += candidates[s][k].row;
  1290. bc += candidates[s][k].col;
  1291. }
  1292. }
  1293. }
  1294. }
  1295. // Returns the one-away integer pel sad values around the best as follows:
  1296. // cost_list[0]: sad at the best integer pel
  1297. // cost_list[1]: sad at delta {0, -1} (left) from the best integer pel
  1298. // cost_list[2]: sad at delta { 1, 0} (bottom) from the best integer pel
  1299. // cost_list[3]: sad at delta { 0, 1} (right) from the best integer pel
  1300. // cost_list[4]: sad at delta {-1, 0} (top) from the best integer pel
  1301. if (cost_list) {
  1302. static const MV neighbors[4] = { { 0, -1 }, { 1, 0 }, { 0, 1 }, { -1, 0 } };
  1303. if (cost_list[0] == INT_MAX) {
  1304. cost_list[0] = bestsad;
  1305. if (check_bounds(&x->mv_limits, br, bc, 1)) {
  1306. for (i = 0; i < 4; i++) {
  1307. const MV this_mv = { br + neighbors[i].row, bc + neighbors[i].col };
  1308. cost_list[i + 1] =
  1309. vfp->sdf(what->buf, what->stride,
  1310. get_buf_from_mv(in_what, &this_mv), in_what->stride);
  1311. }
  1312. } else {
  1313. for (i = 0; i < 4; i++) {
  1314. const MV this_mv = { br + neighbors[i].row, bc + neighbors[i].col };
  1315. if (!is_mv_in(&x->mv_limits, &this_mv))
  1316. cost_list[i + 1] = INT_MAX;
  1317. else
  1318. cost_list[i + 1] =
  1319. vfp->sdf(what->buf, what->stride,
  1320. get_buf_from_mv(in_what, &this_mv), in_what->stride);
  1321. }
  1322. }
  1323. } else {
  1324. if (use_mvcost) {
  1325. for (i = 0; i < 4; i++) {
  1326. const MV this_mv = { br + neighbors[i].row, bc + neighbors[i].col };
  1327. if (cost_list[i + 1] != INT_MAX) {
  1328. cost_list[i + 1] +=
  1329. mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
  1330. }
  1331. }
  1332. }
  1333. }
  1334. }
  1335. best_mv->row = br;
  1336. best_mv->col = bc;
  1337. return bestsad;
  1338. }
  1339. int vp9_get_mvpred_var(const MACROBLOCK *x, const MV *best_mv,
  1340. const MV *center_mv, const vp9_variance_fn_ptr_t *vfp,
  1341. int use_mvcost) {
  1342. const MACROBLOCKD *const xd = &x->e_mbd;
  1343. const struct buf_2d *const what = &x->plane[0].src;
  1344. const struct buf_2d *const in_what = &xd->plane[0].pre[0];
  1345. const MV mv = { best_mv->row * 8, best_mv->col * 8 };
  1346. uint32_t unused;
  1347. #if CONFIG_VP9_HIGHBITDEPTH
  1348. uint64_t err =
  1349. vfp->vf(what->buf, what->stride, get_buf_from_mv(in_what, best_mv),
  1350. in_what->stride, &unused);
  1351. err += (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost, x->mvcost,
  1352. x->errorperbit)
  1353. : 0);
  1354. if (err >= INT_MAX) return INT_MAX;
  1355. return (int)err;
  1356. #else
  1357. return vfp->vf(what->buf, what->stride, get_buf_from_mv(in_what, best_mv),
  1358. in_what->stride, &unused) +
  1359. (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost, x->mvcost,
  1360. x->errorperbit)
  1361. : 0);
  1362. #endif
  1363. }
  1364. int vp9_get_mvpred_av_var(const MACROBLOCK *x, const MV *best_mv,
  1365. const MV *center_mv, const uint8_t *second_pred,
  1366. const vp9_variance_fn_ptr_t *vfp, int use_mvcost) {
  1367. const MACROBLOCKD *const xd = &x->e_mbd;
  1368. const struct buf_2d *const what = &x->plane[0].src;
  1369. const struct buf_2d *const in_what = &xd->plane[0].pre[0];
  1370. const MV mv = { best_mv->row * 8, best_mv->col * 8 };
  1371. unsigned int unused;
  1372. return vfp->svaf(get_buf_from_mv(in_what, best_mv), in_what->stride, 0, 0,
  1373. what->buf, what->stride, &unused, second_pred) +
  1374. (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost, x->mvcost,
  1375. x->errorperbit)
  1376. : 0);
  1377. }
  1378. static int hex_search(const MACROBLOCK *x, MV *ref_mv, int search_param,
  1379. int sad_per_bit, int do_init_search, int *cost_list,
  1380. const vp9_variance_fn_ptr_t *vfp, int use_mvcost,
  1381. const MV *center_mv, MV *best_mv) {
  1382. // First scale has 8-closest points, the rest have 6 points in hex shape
  1383. // at increasing scales
  1384. static const int hex_num_candidates[MAX_PATTERN_SCALES] = { 8, 6, 6, 6, 6, 6,
  1385. 6, 6, 6, 6, 6 };
  1386. // Note that the largest candidate step at each scale is 2^scale
  1387. /* clang-format off */
  1388. static const MV hex_candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES] = {
  1389. { { -1, -1 }, { 0, -1 }, { 1, -1 }, { 1, 0 }, { 1, 1 }, { 0, 1 }, { -1, 1 },
  1390. { -1, 0 } },
  1391. { { -1, -2 }, { 1, -2 }, { 2, 0 }, { 1, 2 }, { -1, 2 }, { -2, 0 } },
  1392. { { -2, -4 }, { 2, -4 }, { 4, 0 }, { 2, 4 }, { -2, 4 }, { -4, 0 } },
  1393. { { -4, -8 }, { 4, -8 }, { 8, 0 }, { 4, 8 }, { -4, 8 }, { -8, 0 } },
  1394. { { -8, -16 }, { 8, -16 }, { 16, 0 }, { 8, 16 }, { -8, 16 }, { -16, 0 } },
  1395. { { -16, -32 }, { 16, -32 }, { 32, 0 }, { 16, 32 }, { -16, 32 },
  1396. { -32, 0 } },
  1397. { { -32, -64 }, { 32, -64 }, { 64, 0 }, { 32, 64 }, { -32, 64 },
  1398. { -64, 0 } },
  1399. { { -64, -128 }, { 64, -128 }, { 128, 0 }, { 64, 128 }, { -64, 128 },
  1400. { -128, 0 } },
  1401. { { -128, -256 }, { 128, -256 }, { 256, 0 }, { 128, 256 }, { -128, 256 },
  1402. { -256, 0 } },
  1403. { { -256, -512 }, { 256, -512 }, { 512, 0 }, { 256, 512 }, { -256, 512 },
  1404. { -512, 0 } },
  1405. { { -512, -1024 }, { 512, -1024 }, { 1024, 0 }, { 512, 1024 },
  1406. { -512, 1024 }, { -1024, 0 } }
  1407. };
  1408. /* clang-format on */
  1409. return vp9_pattern_search(
  1410. x, ref_mv, search_param, sad_per_bit, do_init_search, cost_list, vfp,
  1411. use_mvcost, center_mv, best_mv, hex_num_candidates, hex_candidates);
  1412. }
  1413. static int bigdia_search(const MACROBLOCK *x, MV *ref_mv, int search_param,
  1414. int sad_per_bit, int do_init_search, int *cost_list,
  1415. const vp9_variance_fn_ptr_t *vfp, int use_mvcost,
  1416. const MV *center_mv, MV *best_mv) {
  1417. // First scale has 4-closest points, the rest have 8 points in diamond
  1418. // shape at increasing scales
  1419. static const int bigdia_num_candidates[MAX_PATTERN_SCALES] = {
  1420. 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
  1421. };
  1422. // Note that the largest candidate step at each scale is 2^scale
  1423. /* clang-format off */
  1424. static const MV
  1425. bigdia_candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES] = {
  1426. { { 0, -1 }, { 1, 0 }, { 0, 1 }, { -1, 0 } },
  1427. { { -1, -1 }, { 0, -2 }, { 1, -1 }, { 2, 0 }, { 1, 1 }, { 0, 2 },
  1428. { -1, 1 }, { -2, 0 } },
  1429. { { -2, -2 }, { 0, -4 }, { 2, -2 }, { 4, 0 }, { 2, 2 }, { 0, 4 },
  1430. { -2, 2 }, { -4, 0 } },
  1431. { { -4, -4 }, { 0, -8 }, { 4, -4 }, { 8, 0 }, { 4, 4 }, { 0, 8 },
  1432. { -4, 4 }, { -8, 0 } },
  1433. { { -8, -8 }, { 0, -16 }, { 8, -8 }, { 16, 0 }, { 8, 8 }, { 0, 16 },
  1434. { -8, 8 }, { -16, 0 } },
  1435. { { -16, -16 }, { 0, -32 }, { 16, -16 }, { 32, 0 }, { 16, 16 },
  1436. { 0, 32 }, { -16, 16 }, { -32, 0 } },
  1437. { { -32, -32 }, { 0, -64 }, { 32, -32 }, { 64, 0 }, { 32, 32 },
  1438. { 0, 64 }, { -32, 32 }, { -64, 0 } },
  1439. { { -64, -64 }, { 0, -128 }, { 64, -64 }, { 128, 0 }, { 64, 64 },
  1440. { 0, 128 }, { -64, 64 }, { -128, 0 } },
  1441. { { -128, -128 }, { 0, -256 }, { 128, -128 }, { 256, 0 }, { 128, 128 },
  1442. { 0, 256 }, { -128, 128 }, { -256, 0 } },
  1443. { { -256, -256 }, { 0, -512 }, { 256, -256 }, { 512, 0 }, { 256, 256 },
  1444. { 0, 512 }, { -256, 256 }, { -512, 0 } },
  1445. { { -512, -512 }, { 0, -1024 }, { 512, -512 }, { 1024, 0 },
  1446. { 512, 512 }, { 0, 1024 }, { -512, 512 }, { -1024, 0 } }
  1447. };
  1448. /* clang-format on */
  1449. return vp9_pattern_search_sad(
  1450. x, ref_mv, search_param, sad_per_bit, do_init_search, cost_list, vfp,
  1451. use_mvcost, center_mv, best_mv, bigdia_num_candidates, bigdia_candidates);
  1452. }
  1453. static int square_search(const MACROBLOCK *x, MV *ref_mv, int search_param,
  1454. int sad_per_bit, int do_init_search, int *cost_list,
  1455. const vp9_variance_fn_ptr_t *vfp, int use_mvcost,
  1456. const MV *center_mv, MV *best_mv) {
  1457. // All scales have 8 closest points in square shape
  1458. static const int square_num_candidates[MAX_PATTERN_SCALES] = {
  1459. 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
  1460. };
  1461. // Note that the largest candidate step at each scale is 2^scale
  1462. /* clang-format off */
  1463. static const MV
  1464. square_candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES] = {
  1465. { { -1, -1 }, { 0, -1 }, { 1, -1 }, { 1, 0 }, { 1, 1 }, { 0, 1 },
  1466. { -1, 1 }, { -1, 0 } },
  1467. { { -2, -2 }, { 0, -2 }, { 2, -2 }, { 2, 0 }, { 2, 2 }, { 0, 2 },
  1468. { -2, 2 }, { -2, 0 } },
  1469. { { -4, -4 }, { 0, -4 }, { 4, -4 }, { 4, 0 }, { 4, 4 }, { 0, 4 },
  1470. { -4, 4 }, { -4, 0 } },
  1471. { { -8, -8 }, { 0, -8 }, { 8, -8 }, { 8, 0 }, { 8, 8 }, { 0, 8 },
  1472. { -8, 8 }, { -8, 0 } },
  1473. { { -16, -16 }, { 0, -16 }, { 16, -16 }, { 16, 0 }, { 16, 16 },
  1474. { 0, 16 }, { -16, 16 }, { -16, 0 } },
  1475. { { -32, -32 }, { 0, -32 }, { 32, -32 }, { 32, 0 }, { 32, 32 },
  1476. { 0, 32 }, { -32, 32 }, { -32, 0 } },
  1477. { { -64, -64 }, { 0, -64 }, { 64, -64 }, { 64, 0 }, { 64, 64 },
  1478. { 0, 64 }, { -64, 64 }, { -64, 0 } },
  1479. { { -128, -128 }, { 0, -128 }, { 128, -128 }, { 128, 0 }, { 128, 128 },
  1480. { 0, 128 }, { -128, 128 }, { -128, 0 } },
  1481. { { -256, -256 }, { 0, -256 }, { 256, -256 }, { 256, 0 }, { 256, 256 },
  1482. { 0, 256 }, { -256, 256 }, { -256, 0 } },
  1483. { { -512, -512 }, { 0, -512 }, { 512, -512 }, { 512, 0 }, { 512, 512 },
  1484. { 0, 512 }, { -512, 512 }, { -512, 0 } },
  1485. { { -1024, -1024 }, { 0, -1024 }, { 1024, -1024 }, { 1024, 0 },
  1486. { 1024, 1024 }, { 0, 1024 }, { -1024, 1024 }, { -1024, 0 } }
  1487. };
  1488. /* clang-format on */
  1489. return vp9_pattern_search(
  1490. x, ref_mv, search_param, sad_per_bit, do_init_search, cost_list, vfp,
  1491. use_mvcost, center_mv, best_mv, square_num_candidates, square_candidates);
  1492. }
  1493. static int fast_hex_search(const MACROBLOCK *x, MV *ref_mv, int search_param,
  1494. int sad_per_bit,
  1495. int do_init_search, // must be zero for fast_hex
  1496. int *cost_list, const vp9_variance_fn_ptr_t *vfp,
  1497. int use_mvcost, const MV *center_mv, MV *best_mv) {
  1498. return hex_search(x, ref_mv, VPXMAX(MAX_MVSEARCH_STEPS - 2, search_param),
  1499. sad_per_bit, do_init_search, cost_list, vfp, use_mvcost,
  1500. center_mv, best_mv);
  1501. }
  1502. static int fast_dia_search(const MACROBLOCK *x, MV *ref_mv, int search_param,
  1503. int sad_per_bit, int do_init_search, int *cost_list,
  1504. const vp9_variance_fn_ptr_t *vfp, int use_mvcost,
  1505. const MV *center_mv, MV *best_mv) {
  1506. return bigdia_search(x, ref_mv, VPXMAX(MAX_MVSEARCH_STEPS - 2, search_param),
  1507. sad_per_bit, do_init_search, cost_list, vfp, use_mvcost,
  1508. center_mv, best_mv);
  1509. }
  1510. #undef CHECK_BETTER
  1511. // Exhuastive motion search around a given centre position with a given
  1512. // step size.
  1513. static int exhaustive_mesh_search(const MACROBLOCK *x, MV *ref_mv, MV *best_mv,
  1514. int range, int step, int sad_per_bit,
  1515. const vp9_variance_fn_ptr_t *fn_ptr,
  1516. const MV *center_mv) {
  1517. const MACROBLOCKD *const xd = &x->e_mbd;
  1518. const struct buf_2d *const what = &x->plane[0].src;
  1519. const struct buf_2d *const in_what = &xd->plane[0].pre[0];
  1520. MV fcenter_mv = { center_mv->row, center_mv->col };
  1521. unsigned int best_sad = INT_MAX;
  1522. int r, c, i;
  1523. int start_col, end_col, start_row, end_row;
  1524. int col_step = (step > 1) ? step : 4;
  1525. assert(step >= 1);
  1526. clamp_mv(&fcenter_mv, x->mv_limits.col_min, x->mv_limits.col_max,
  1527. x->mv_limits.row_min, x->mv_limits.row_max);
  1528. *best_mv = fcenter_mv;
  1529. best_sad =
  1530. fn_ptr->sdf(what->buf, what->stride,
  1531. get_buf_from_mv(in_what, &fcenter_mv), in_what->stride) +
  1532. mvsad_err_cost(x, &fcenter_mv, ref_mv, sad_per_bit);
  1533. start_row = VPXMAX(-range, x->mv_limits.row_min - fcenter_mv.row);
  1534. start_col = VPXMAX(-range, x->mv_limits.col_min - fcenter_mv.col);
  1535. end_row = VPXMIN(range, x->mv_limits.row_max - fcenter_mv.row);
  1536. end_col = VPXMIN(range, x->mv_limits.col_max - fcenter_mv.col);
  1537. for (r = start_row; r <= end_row; r += step) {
  1538. for (c = start_col; c <= end_col; c += col_step) {
  1539. // Step > 1 means we are not checking every location in this pass.
  1540. if (step > 1) {
  1541. const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c };
  1542. unsigned int sad =
  1543. fn_ptr->sdf(what->buf, what->stride, get_buf_from_mv(in_what, &mv),
  1544. in_what->stride);
  1545. if (sad < best_sad) {
  1546. sad += mvsad_err_cost(x, &mv, ref_mv, sad_per_bit);
  1547. if (sad < best_sad) {
  1548. best_sad = sad;
  1549. *best_mv = mv;
  1550. }
  1551. }
  1552. } else {
  1553. // 4 sads in a single call if we are checking every location
  1554. if (c + 3 <= end_col) {
  1555. unsigned int sads[4];
  1556. const uint8_t *addrs[4];
  1557. for (i = 0; i < 4; ++i) {
  1558. const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c + i };
  1559. addrs[i] = get_buf_from_mv(in_what, &mv);
  1560. }
  1561. fn_ptr->sdx4df(what->buf, what->stride, addrs, in_what->stride, sads);
  1562. for (i = 0; i < 4; ++i) {
  1563. if (sads[i] < best_sad) {
  1564. const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c + i };
  1565. const unsigned int sad =
  1566. sads[i] + mvsad_err_cost(x, &mv, ref_mv, sad_per_bit);
  1567. if (sad < best_sad) {
  1568. best_sad = sad;
  1569. *best_mv = mv;
  1570. }
  1571. }
  1572. }
  1573. } else {
  1574. for (i = 0; i < end_col - c; ++i) {
  1575. const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c + i };
  1576. unsigned int sad =
  1577. fn_ptr->sdf(what->buf, what->stride,
  1578. get_buf_from_mv(in_what, &mv), in_what->stride);
  1579. if (sad < best_sad) {
  1580. sad += mvsad_err_cost(x, &mv, ref_mv, sad_per_bit);
  1581. if (sad < best_sad) {
  1582. best_sad = sad;
  1583. *best_mv = mv;
  1584. }
  1585. }
  1586. }
  1587. }
  1588. }
  1589. }
  1590. }
  1591. return best_sad;
  1592. }
  1593. #define MIN_RANGE 7
  1594. #define MAX_RANGE 256
  1595. #define MIN_INTERVAL 1
  1596. #if CONFIG_NON_GREEDY_MV
  1597. #define LOG2_TABLE_SIZE 1024
  1598. static const int log2_table[LOG2_TABLE_SIZE] = {
  1599. 0, // This is a dummy value
  1600. 0, 1048576, 1661954, 2097152, 2434718, 2710530, 2943725,
  1601. 3145728, 3323907, 3483294, 3627477, 3759106, 3880192, 3992301,
  1602. 4096672, 4194304, 4286015, 4372483, 4454275, 4531870, 4605679,
  1603. 4676053, 4743299, 4807682, 4869436, 4928768, 4985861, 5040877,
  1604. 5093962, 5145248, 5194851, 5242880, 5289431, 5334591, 5378443,
  1605. 5421059, 5462508, 5502851, 5542146, 5580446, 5617800, 5654255,
  1606. 5689851, 5724629, 5758625, 5791875, 5824409, 5856258, 5887450,
  1607. 5918012, 5947969, 5977344, 6006160, 6034437, 6062195, 6089453,
  1608. 6116228, 6142538, 6168398, 6193824, 6218829, 6243427, 6267632,
  1609. 6291456, 6314910, 6338007, 6360756, 6383167, 6405252, 6427019,
  1610. 6448477, 6469635, 6490501, 6511084, 6531390, 6551427, 6571202,
  1611. 6590722, 6609993, 6629022, 6647815, 6666376, 6684713, 6702831,
  1612. 6720734, 6738427, 6755916, 6773205, 6790299, 6807201, 6823917,
  1613. 6840451, 6856805, 6872985, 6888993, 6904834, 6920510, 6936026,
  1614. 6951384, 6966588, 6981641, 6996545, 7011304, 7025920, 7040397,
  1615. 7054736, 7068940, 7083013, 7096956, 7110771, 7124461, 7138029,
  1616. 7151476, 7164804, 7178017, 7191114, 7204100, 7216974, 7229740,
  1617. 7242400, 7254954, 7267405, 7279754, 7292003, 7304154, 7316208,
  1618. 7328167, 7340032, 7351805, 7363486, 7375079, 7386583, 7398000,
  1619. 7409332, 7420579, 7431743, 7442826, 7453828, 7464751, 7475595,
  1620. 7486362, 7497053, 7507669, 7518211, 7528680, 7539077, 7549404,
  1621. 7559660, 7569847, 7579966, 7590017, 7600003, 7609923, 7619778,
  1622. 7629569, 7639298, 7648964, 7658569, 7668114, 7677598, 7687023,
  1623. 7696391, 7705700, 7714952, 7724149, 7733289, 7742375, 7751407,
  1624. 7760385, 7769310, 7778182, 7787003, 7795773, 7804492, 7813161,
  1625. 7821781, 7830352, 7838875, 7847350, 7855777, 7864158, 7872493,
  1626. 7880782, 7889027, 7897226, 7905381, 7913492, 7921561, 7929586,
  1627. 7937569, 7945510, 7953410, 7961268, 7969086, 7976864, 7984602,
  1628. 7992301, 7999960, 8007581, 8015164, 8022709, 8030217, 8037687,
  1629. 8045121, 8052519, 8059880, 8067206, 8074496, 8081752, 8088973,
  1630. 8096159, 8103312, 8110431, 8117516, 8124569, 8131589, 8138576,
  1631. 8145532, 8152455, 8159347, 8166208, 8173037, 8179836, 8186605,
  1632. 8193343, 8200052, 8206731, 8213380, 8220001, 8226593, 8233156,
  1633. 8239690, 8246197, 8252676, 8259127, 8265550, 8271947, 8278316,
  1634. 8284659, 8290976, 8297266, 8303530, 8309768, 8315981, 8322168,
  1635. 8328330, 8334467, 8340579, 8346667, 8352730, 8358769, 8364784,
  1636. 8370775, 8376743, 8382687, 8388608, 8394506, 8400381, 8406233,
  1637. 8412062, 8417870, 8423655, 8429418, 8435159, 8440878, 8446576,
  1638. 8452252, 8457908, 8463542, 8469155, 8474748, 8480319, 8485871,
  1639. 8491402, 8496913, 8502404, 8507875, 8513327, 8518759, 8524171,
  1640. 8529564, 8534938, 8540293, 8545629, 8550947, 8556245, 8561525,
  1641. 8566787, 8572031, 8577256, 8582464, 8587653, 8592825, 8597980,
  1642. 8603116, 8608236, 8613338, 8618423, 8623491, 8628542, 8633576,
  1643. 8638593, 8643594, 8648579, 8653547, 8658499, 8663434, 8668354,
  1644. 8673258, 8678145, 8683017, 8687874, 8692715, 8697540, 8702350,
  1645. 8707145, 8711925, 8716690, 8721439, 8726174, 8730894, 8735599,
  1646. 8740290, 8744967, 8749628, 8754276, 8758909, 8763528, 8768134,
  1647. 8772725, 8777302, 8781865, 8786415, 8790951, 8795474, 8799983,
  1648. 8804478, 8808961, 8813430, 8817886, 8822328, 8826758, 8831175,
  1649. 8835579, 8839970, 8844349, 8848715, 8853068, 8857409, 8861737,
  1650. 8866053, 8870357, 8874649, 8878928, 8883195, 8887451, 8891694,
  1651. 8895926, 8900145, 8904353, 8908550, 8912734, 8916908, 8921069,
  1652. 8925220, 8929358, 8933486, 8937603, 8941708, 8945802, 8949885,
  1653. 8953957, 8958018, 8962068, 8966108, 8970137, 8974155, 8978162,
  1654. 8982159, 8986145, 8990121, 8994086, 8998041, 9001986, 9005920,
  1655. 9009844, 9013758, 9017662, 9021556, 9025440, 9029314, 9033178,
  1656. 9037032, 9040877, 9044711, 9048536, 9052352, 9056157, 9059953,
  1657. 9063740, 9067517, 9071285, 9075044, 9078793, 9082533, 9086263,
  1658. 9089985, 9093697, 9097400, 9101095, 9104780, 9108456, 9112123,
  1659. 9115782, 9119431, 9123072, 9126704, 9130328, 9133943, 9137549,
  1660. 9141146, 9144735, 9148316, 9151888, 9155452, 9159007, 9162554,
  1661. 9166092, 9169623, 9173145, 9176659, 9180165, 9183663, 9187152,
  1662. 9190634, 9194108, 9197573, 9201031, 9204481, 9207923, 9211357,
  1663. 9214784, 9218202, 9221613, 9225017, 9228412, 9231800, 9235181,
  1664. 9238554, 9241919, 9245277, 9248628, 9251971, 9255307, 9258635,
  1665. 9261956, 9265270, 9268577, 9271876, 9275169, 9278454, 9281732,
  1666. 9285002, 9288266, 9291523, 9294773, 9298016, 9301252, 9304481,
  1667. 9307703, 9310918, 9314126, 9317328, 9320523, 9323711, 9326892,
  1668. 9330067, 9333235, 9336397, 9339552, 9342700, 9345842, 9348977,
  1669. 9352106, 9355228, 9358344, 9361454, 9364557, 9367654, 9370744,
  1670. 9373828, 9376906, 9379978, 9383043, 9386102, 9389155, 9392202,
  1671. 9395243, 9398278, 9401306, 9404329, 9407345, 9410356, 9413360,
  1672. 9416359, 9419351, 9422338, 9425319, 9428294, 9431263, 9434226,
  1673. 9437184, 9440136, 9443082, 9446022, 9448957, 9451886, 9454809,
  1674. 9457726, 9460638, 9463545, 9466446, 9469341, 9472231, 9475115,
  1675. 9477994, 9480867, 9483735, 9486597, 9489454, 9492306, 9495152,
  1676. 9497993, 9500828, 9503659, 9506484, 9509303, 9512118, 9514927,
  1677. 9517731, 9520530, 9523324, 9526112, 9528895, 9531674, 9534447,
  1678. 9537215, 9539978, 9542736, 9545489, 9548237, 9550980, 9553718,
  1679. 9556451, 9559179, 9561903, 9564621, 9567335, 9570043, 9572747,
  1680. 9575446, 9578140, 9580830, 9583514, 9586194, 9588869, 9591540,
  1681. 9594205, 9596866, 9599523, 9602174, 9604821, 9607464, 9610101,
  1682. 9612735, 9615363, 9617987, 9620607, 9623222, 9625832, 9628438,
  1683. 9631040, 9633637, 9636229, 9638818, 9641401, 9643981, 9646556,
  1684. 9649126, 9651692, 9654254, 9656812, 9659365, 9661914, 9664459,
  1685. 9666999, 9669535, 9672067, 9674594, 9677118, 9679637, 9682152,
  1686. 9684663, 9687169, 9689672, 9692170, 9694665, 9697155, 9699641,
  1687. 9702123, 9704601, 9707075, 9709545, 9712010, 9714472, 9716930,
  1688. 9719384, 9721834, 9724279, 9726721, 9729159, 9731593, 9734024,
  1689. 9736450, 9738872, 9741291, 9743705, 9746116, 9748523, 9750926,
  1690. 9753326, 9755721, 9758113, 9760501, 9762885, 9765266, 9767642,
  1691. 9770015, 9772385, 9774750, 9777112, 9779470, 9781825, 9784175,
  1692. 9786523, 9788866, 9791206, 9793543, 9795875, 9798204, 9800530,
  1693. 9802852, 9805170, 9807485, 9809797, 9812104, 9814409, 9816710,
  1694. 9819007, 9821301, 9823591, 9825878, 9828161, 9830441, 9832718,
  1695. 9834991, 9837261, 9839527, 9841790, 9844050, 9846306, 9848559,
  1696. 9850808, 9853054, 9855297, 9857537, 9859773, 9862006, 9864235,
  1697. 9866462, 9868685, 9870904, 9873121, 9875334, 9877544, 9879751,
  1698. 9881955, 9884155, 9886352, 9888546, 9890737, 9892925, 9895109,
  1699. 9897291, 9899469, 9901644, 9903816, 9905985, 9908150, 9910313,
  1700. 9912473, 9914629, 9916783, 9918933, 9921080, 9923225, 9925366,
  1701. 9927504, 9929639, 9931771, 9933900, 9936027, 9938150, 9940270,
  1702. 9942387, 9944502, 9946613, 9948721, 9950827, 9952929, 9955029,
  1703. 9957126, 9959219, 9961310, 9963398, 9965484, 9967566, 9969645,
  1704. 9971722, 9973796, 9975866, 9977934, 9980000, 9982062, 9984122,
  1705. 9986179, 9988233, 9990284, 9992332, 9994378, 9996421, 9998461,
  1706. 10000498, 10002533, 10004565, 10006594, 10008621, 10010644, 10012665,
  1707. 10014684, 10016700, 10018713, 10020723, 10022731, 10024736, 10026738,
  1708. 10028738, 10030735, 10032729, 10034721, 10036710, 10038697, 10040681,
  1709. 10042662, 10044641, 10046617, 10048591, 10050562, 10052530, 10054496,
  1710. 10056459, 10058420, 10060379, 10062334, 10064287, 10066238, 10068186,
  1711. 10070132, 10072075, 10074016, 10075954, 10077890, 10079823, 10081754,
  1712. 10083682, 10085608, 10087532, 10089453, 10091371, 10093287, 10095201,
  1713. 10097112, 10099021, 10100928, 10102832, 10104733, 10106633, 10108529,
  1714. 10110424, 10112316, 10114206, 10116093, 10117978, 10119861, 10121742,
  1715. 10123620, 10125495, 10127369, 10129240, 10131109, 10132975, 10134839,
  1716. 10136701, 10138561, 10140418, 10142273, 10144126, 10145976, 10147825,
  1717. 10149671, 10151514, 10153356, 10155195, 10157032, 10158867, 10160699,
  1718. 10162530, 10164358, 10166184, 10168007, 10169829, 10171648, 10173465,
  1719. 10175280, 10177093, 10178904, 10180712, 10182519, 10184323, 10186125,
  1720. 10187925, 10189722, 10191518, 10193311, 10195103, 10196892, 10198679,
  1721. 10200464, 10202247, 10204028, 10205806, 10207583, 10209357, 10211130,
  1722. 10212900, 10214668, 10216435, 10218199, 10219961, 10221721, 10223479,
  1723. 10225235, 10226989, 10228741, 10230491, 10232239, 10233985, 10235728,
  1724. 10237470, 10239210, 10240948, 10242684, 10244417, 10246149, 10247879,
  1725. 10249607, 10251333, 10253057, 10254779, 10256499, 10258217, 10259933,
  1726. 10261647, 10263360, 10265070, 10266778, 10268485, 10270189, 10271892,
  1727. 10273593, 10275292, 10276988, 10278683, 10280376, 10282068, 10283757,
  1728. 10285444, 10287130, 10288814, 10290495, 10292175, 10293853, 10295530,
  1729. 10297204, 10298876, 10300547, 10302216, 10303883, 10305548, 10307211,
  1730. 10308873, 10310532, 10312190, 10313846, 10315501, 10317153, 10318804,
  1731. 10320452, 10322099, 10323745, 10325388, 10327030, 10328670, 10330308,
  1732. 10331944, 10333578, 10335211, 10336842, 10338472, 10340099, 10341725,
  1733. 10343349, 10344971, 10346592, 10348210, 10349828, 10351443, 10353057,
  1734. 10354668, 10356279, 10357887, 10359494, 10361099, 10362702, 10364304,
  1735. 10365904, 10367502, 10369099, 10370694, 10372287, 10373879, 10375468,
  1736. 10377057, 10378643, 10380228, 10381811, 10383393, 10384973, 10386551,
  1737. 10388128, 10389703, 10391276, 10392848, 10394418, 10395986, 10397553,
  1738. 10399118, 10400682, 10402244, 10403804, 10405363, 10406920, 10408476,
  1739. 10410030, 10411582, 10413133, 10414682, 10416230, 10417776, 10419320,
  1740. 10420863, 10422404, 10423944, 10425482, 10427019, 10428554, 10430087,
  1741. 10431619, 10433149, 10434678, 10436206, 10437731, 10439256, 10440778,
  1742. 10442299, 10443819, 10445337, 10446854, 10448369, 10449882, 10451394,
  1743. 10452905, 10454414, 10455921, 10457427, 10458932, 10460435, 10461936,
  1744. 10463436, 10464935, 10466432, 10467927, 10469422, 10470914, 10472405,
  1745. 10473895, 10475383, 10476870, 10478355, 10479839, 10481322, 10482802,
  1746. 10484282,
  1747. };
  1748. #define LOG2_PRECISION 20
  1749. static int64_t log2_approximation(int64_t v) {
  1750. assert(v > 0);
  1751. if (v < LOG2_TABLE_SIZE) {
  1752. return log2_table[v];
  1753. } else {
  1754. // use linear approximation when v >= 2^10
  1755. const int slope =
  1756. 1477; // slope = 1 / (log(2) * 1024) * (1 << LOG2_PRECISION)
  1757. assert(LOG2_TABLE_SIZE == 1 << 10);
  1758. return slope * (v - LOG2_TABLE_SIZE) + (10 << LOG2_PRECISION);
  1759. }
  1760. }
  1761. int64_t vp9_nb_mvs_inconsistency(const MV *mv, const int_mv *nb_mvs,
  1762. int mv_num) {
  1763. int i;
  1764. int update = 0;
  1765. int64_t best_cost = 0;
  1766. vpx_clear_system_state();
  1767. for (i = 0; i < mv_num; ++i) {
  1768. if (nb_mvs[i].as_int != INVALID_MV) {
  1769. MV nb_mv = nb_mvs[i].as_mv;
  1770. const int64_t row_diff = abs(mv->row - nb_mv.row);
  1771. const int64_t col_diff = abs(mv->col - nb_mv.col);
  1772. const int64_t cost =
  1773. log2_approximation(1 + row_diff * row_diff + col_diff * col_diff);
  1774. if (update == 0) {
  1775. best_cost = cost;
  1776. update = 1;
  1777. } else {
  1778. best_cost = cost < best_cost ? cost : best_cost;
  1779. }
  1780. }
  1781. }
  1782. return best_cost;
  1783. }
  1784. static int64_t exhaustive_mesh_search_new(const MACROBLOCK *x, MV *best_mv,
  1785. int range, int step,
  1786. const vp9_variance_fn_ptr_t *fn_ptr,
  1787. const MV *center_mv, int lambda,
  1788. const int_mv *nb_full_mvs,
  1789. int full_mv_num) {
  1790. const MACROBLOCKD *const xd = &x->e_mbd;
  1791. const struct buf_2d *const what = &x->plane[0].src;
  1792. const struct buf_2d *const in_what = &xd->plane[0].pre[0];
  1793. MV fcenter_mv = { center_mv->row, center_mv->col };
  1794. int64_t best_sad;
  1795. int r, c, i;
  1796. int start_col, end_col, start_row, end_row;
  1797. int col_step = (step > 1) ? step : 4;
  1798. assert(step >= 1);
  1799. clamp_mv(&fcenter_mv, x->mv_limits.col_min, x->mv_limits.col_max,
  1800. x->mv_limits.row_min, x->mv_limits.row_max);
  1801. *best_mv = fcenter_mv;
  1802. best_sad =
  1803. ((int64_t)fn_ptr->sdf(what->buf, what->stride,
  1804. get_buf_from_mv(in_what, &fcenter_mv),
  1805. in_what->stride)
  1806. << LOG2_PRECISION) +
  1807. lambda * vp9_nb_mvs_inconsistency(&fcenter_mv, nb_full_mvs, full_mv_num);
  1808. start_row = VPXMAX(-range, x->mv_limits.row_min - fcenter_mv.row);
  1809. start_col = VPXMAX(-range, x->mv_limits.col_min - fcenter_mv.col);
  1810. end_row = VPXMIN(range, x->mv_limits.row_max - fcenter_mv.row);
  1811. end_col = VPXMIN(range, x->mv_limits.col_max - fcenter_mv.col);
  1812. for (r = start_row; r <= end_row; r += step) {
  1813. for (c = start_col; c <= end_col; c += col_step) {
  1814. // Step > 1 means we are not checking every location in this pass.
  1815. if (step > 1) {
  1816. const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c };
  1817. int64_t sad =
  1818. (int64_t)fn_ptr->sdf(what->buf, what->stride,
  1819. get_buf_from_mv(in_what, &mv), in_what->stride)
  1820. << LOG2_PRECISION;
  1821. if (sad < best_sad) {
  1822. sad +=
  1823. lambda * vp9_nb_mvs_inconsistency(&mv, nb_full_mvs, full_mv_num);
  1824. if (sad < best_sad) {
  1825. best_sad = sad;
  1826. *best_mv = mv;
  1827. }
  1828. }
  1829. } else {
  1830. // 4 sads in a single call if we are checking every location
  1831. if (c + 3 <= end_col) {
  1832. unsigned int sads[4];
  1833. const uint8_t *addrs[4];
  1834. for (i = 0; i < 4; ++i) {
  1835. const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c + i };
  1836. addrs[i] = get_buf_from_mv(in_what, &mv);
  1837. }
  1838. fn_ptr->sdx4df(what->buf, what->stride, addrs, in_what->stride, sads);
  1839. for (i = 0; i < 4; ++i) {
  1840. int64_t sad = (int64_t)sads[i] << LOG2_PRECISION;
  1841. if (sad < best_sad) {
  1842. const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c + i };
  1843. sad += lambda *
  1844. vp9_nb_mvs_inconsistency(&mv, nb_full_mvs, full_mv_num);
  1845. if (sad < best_sad) {
  1846. best_sad = sad;
  1847. *best_mv = mv;
  1848. }
  1849. }
  1850. }
  1851. } else {
  1852. for (i = 0; i < end_col - c; ++i) {
  1853. const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c + i };
  1854. int64_t sad = (int64_t)fn_ptr->sdf(what->buf, what->stride,
  1855. get_buf_from_mv(in_what, &mv),
  1856. in_what->stride)
  1857. << LOG2_PRECISION;
  1858. if (sad < best_sad) {
  1859. sad += lambda *
  1860. vp9_nb_mvs_inconsistency(&mv, nb_full_mvs, full_mv_num);
  1861. if (sad < best_sad) {
  1862. best_sad = sad;
  1863. *best_mv = mv;
  1864. }
  1865. }
  1866. }
  1867. }
  1868. }
  1869. }
  1870. }
  1871. return best_sad;
  1872. }
  1873. static int64_t full_pixel_exhaustive_new(const VP9_COMP *cpi, MACROBLOCK *x,
  1874. MV *centre_mv_full,
  1875. const vp9_variance_fn_ptr_t *fn_ptr,
  1876. MV *dst_mv, int lambda,
  1877. const int_mv *nb_full_mvs,
  1878. int full_mv_num) {
  1879. const SPEED_FEATURES *const sf = &cpi->sf;
  1880. MV temp_mv = { centre_mv_full->row, centre_mv_full->col };
  1881. int64_t bestsme;
  1882. int i;
  1883. int interval = sf->mesh_patterns[0].interval;
  1884. int range = sf->mesh_patterns[0].range;
  1885. int baseline_interval_divisor;
  1886. const MV dummy_mv = { 0, 0 };
  1887. // Trap illegal values for interval and range for this function.
  1888. if ((range < MIN_RANGE) || (range > MAX_RANGE) || (interval < MIN_INTERVAL) ||
  1889. (interval > range)) {
  1890. printf("ERROR: invalid range\n");
  1891. assert(0);
  1892. }
  1893. baseline_interval_divisor = range / interval;
  1894. // Check size of proposed first range against magnitude of the centre
  1895. // value used as a starting point.
  1896. range = VPXMAX(range, (5 * VPXMAX(abs(temp_mv.row), abs(temp_mv.col))) / 4);
  1897. range = VPXMIN(range, MAX_RANGE);
  1898. interval = VPXMAX(interval, range / baseline_interval_divisor);
  1899. // initial search
  1900. bestsme =
  1901. exhaustive_mesh_search_new(x, &temp_mv, range, interval, fn_ptr, &temp_mv,
  1902. lambda, nb_full_mvs, full_mv_num);
  1903. if ((interval > MIN_INTERVAL) && (range > MIN_RANGE)) {
  1904. // Progressive searches with range and step size decreasing each time
  1905. // till we reach a step size of 1. Then break out.
  1906. for (i = 1; i < MAX_MESH_STEP; ++i) {
  1907. // First pass with coarser step and longer range
  1908. bestsme = exhaustive_mesh_search_new(
  1909. x, &temp_mv, sf->mesh_patterns[i].range,
  1910. sf->mesh_patterns[i].interval, fn_ptr, &temp_mv, lambda, nb_full_mvs,
  1911. full_mv_num);
  1912. if (sf->mesh_patterns[i].interval == 1) break;
  1913. }
  1914. }
  1915. bestsme = vp9_get_mvpred_var(x, &temp_mv, &dummy_mv, fn_ptr, 0);
  1916. *dst_mv = temp_mv;
  1917. return bestsme;
  1918. }
  1919. static double diamond_search_sad_new(const MACROBLOCK *x,
  1920. const search_site_config *cfg,
  1921. const MV *init_full_mv, MV *best_full_mv,
  1922. int search_param, int lambda, int *num00,
  1923. const vp9_variance_fn_ptr_t *fn_ptr,
  1924. const int_mv *nb_full_mvs,
  1925. int full_mv_num) {
  1926. int i, j, step;
  1927. const MACROBLOCKD *const xd = &x->e_mbd;
  1928. uint8_t *what = x->plane[0].src.buf;
  1929. const int what_stride = x->plane[0].src.stride;
  1930. const uint8_t *in_what;
  1931. const int in_what_stride = xd->plane[0].pre[0].stride;
  1932. const uint8_t *best_address;
  1933. double bestsad;
  1934. int best_site = -1;
  1935. int last_site = -1;
  1936. // search_param determines the length of the initial step and hence the number
  1937. // of iterations.
  1938. // 0 = initial step (MAX_FIRST_STEP) pel
  1939. // 1 = (MAX_FIRST_STEP/2) pel,
  1940. // 2 = (MAX_FIRST_STEP/4) pel...
  1941. // const search_site *ss = &cfg->ss[search_param * cfg->searches_per_step];
  1942. const MV *ss_mv = &cfg->ss_mv[search_param * cfg->searches_per_step];
  1943. const intptr_t *ss_os = &cfg->ss_os[search_param * cfg->searches_per_step];
  1944. const int tot_steps = cfg->total_steps - search_param;
  1945. vpx_clear_system_state();
  1946. *best_full_mv = *init_full_mv;
  1947. clamp_mv(best_full_mv, x->mv_limits.col_min, x->mv_limits.col_max,
  1948. x->mv_limits.row_min, x->mv_limits.row_max);
  1949. *num00 = 0;
  1950. // Work out the start point for the search
  1951. in_what = xd->plane[0].pre[0].buf + best_full_mv->row * in_what_stride +
  1952. best_full_mv->col;
  1953. best_address = in_what;
  1954. // Check the starting position
  1955. {
  1956. const double mv_dist =
  1957. fn_ptr->sdf(what, what_stride, in_what, in_what_stride);
  1958. const double mv_cost =
  1959. vp9_nb_mvs_inconsistency(best_full_mv, nb_full_mvs, full_mv_num) /
  1960. (double)(1 << LOG2_PRECISION);
  1961. bestsad = mv_dist + lambda * mv_cost;
  1962. }
  1963. i = 0;
  1964. for (step = 0; step < tot_steps; step++) {
  1965. int all_in = 1, t;
  1966. // All_in is true if every one of the points we are checking are within
  1967. // the bounds of the image.
  1968. all_in &= ((best_full_mv->row + ss_mv[i].row) > x->mv_limits.row_min);
  1969. all_in &= ((best_full_mv->row + ss_mv[i + 1].row) < x->mv_limits.row_max);
  1970. all_in &= ((best_full_mv->col + ss_mv[i + 2].col) > x->mv_limits.col_min);
  1971. all_in &= ((best_full_mv->col + ss_mv[i + 3].col) < x->mv_limits.col_max);
  1972. // If all the pixels are within the bounds we don't check whether the
  1973. // search point is valid in this loop, otherwise we check each point
  1974. // for validity..
  1975. if (all_in) {
  1976. unsigned int sad_array[4];
  1977. for (j = 0; j < cfg->searches_per_step; j += 4) {
  1978. unsigned char const *block_offset[4];
  1979. for (t = 0; t < 4; t++) block_offset[t] = ss_os[i + t] + best_address;
  1980. fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride,
  1981. sad_array);
  1982. for (t = 0; t < 4; t++, i++) {
  1983. if (sad_array[t] < bestsad) {
  1984. const MV this_mv = { best_full_mv->row + ss_mv[i].row,
  1985. best_full_mv->col + ss_mv[i].col };
  1986. const double mv_dist = sad_array[t];
  1987. const double mv_cost =
  1988. vp9_nb_mvs_inconsistency(&this_mv, nb_full_mvs, full_mv_num) /
  1989. (double)(1 << LOG2_PRECISION);
  1990. double thissad = mv_dist + lambda * mv_cost;
  1991. if (thissad < bestsad) {
  1992. bestsad = thissad;
  1993. best_site = i;
  1994. }
  1995. }
  1996. }
  1997. }
  1998. } else {
  1999. for (j = 0; j < cfg->searches_per_step; j++) {
  2000. // Trap illegal vectors
  2001. const MV this_mv = { best_full_mv->row + ss_mv[i].row,
  2002. best_full_mv->col + ss_mv[i].col };
  2003. if (is_mv_in(&x->mv_limits, &this_mv)) {
  2004. const uint8_t *const check_here = ss_os[i] + best_address;
  2005. const double mv_dist =
  2006. fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
  2007. if (mv_dist < bestsad) {
  2008. const double mv_cost =
  2009. vp9_nb_mvs_inconsistency(&this_mv, nb_full_mvs, full_mv_num) /
  2010. (double)(1 << LOG2_PRECISION);
  2011. double thissad = mv_dist + lambda * mv_cost;
  2012. if (thissad < bestsad) {
  2013. bestsad = thissad;
  2014. best_site = i;
  2015. }
  2016. }
  2017. }
  2018. i++;
  2019. }
  2020. }
  2021. if (best_site != last_site) {
  2022. best_full_mv->row += ss_mv[best_site].row;
  2023. best_full_mv->col += ss_mv[best_site].col;
  2024. best_address += ss_os[best_site];
  2025. last_site = best_site;
  2026. } else if (best_address == in_what) {
  2027. (*num00)++;
  2028. }
  2029. }
  2030. return bestsad;
  2031. }
  2032. void vp9_prepare_nb_full_mvs(const TplDepFrame *tpl_frame, int mi_row,
  2033. int mi_col, int rf_idx, BLOCK_SIZE bsize,
  2034. int_mv *nb_full_mvs) {
  2035. const int mi_width = num_8x8_blocks_wide_lookup[bsize];
  2036. const int mi_height = num_8x8_blocks_high_lookup[bsize];
  2037. const int dirs[NB_MVS_NUM][2] = { { -1, 0 }, { 0, -1 }, { 1, 0 }, { 0, 1 } };
  2038. int i;
  2039. for (i = 0; i < NB_MVS_NUM; ++i) {
  2040. int r = dirs[i][0] * mi_height;
  2041. int c = dirs[i][1] * mi_width;
  2042. if (mi_row + r >= 0 && mi_row + r < tpl_frame->mi_rows && mi_col + c >= 0 &&
  2043. mi_col + c < tpl_frame->mi_cols) {
  2044. const TplDepStats *tpl_ptr =
  2045. &tpl_frame
  2046. ->tpl_stats_ptr[(mi_row + r) * tpl_frame->stride + mi_col + c];
  2047. int_mv *mv =
  2048. get_pyramid_mv(tpl_frame, rf_idx, bsize, mi_row + r, mi_col + c);
  2049. if (tpl_ptr->ready[rf_idx]) {
  2050. nb_full_mvs[i].as_mv = get_full_mv(&mv->as_mv);
  2051. } else {
  2052. nb_full_mvs[i].as_int = INVALID_MV;
  2053. }
  2054. } else {
  2055. nb_full_mvs[i].as_int = INVALID_MV;
  2056. }
  2057. }
  2058. }
  2059. #endif // CONFIG_NON_GREEDY_MV
  2060. int vp9_diamond_search_sad_c(const MACROBLOCK *x, const search_site_config *cfg,
  2061. MV *ref_mv, MV *best_mv, int search_param,
  2062. int sad_per_bit, int *num00,
  2063. const vp9_variance_fn_ptr_t *fn_ptr,
  2064. const MV *center_mv) {
  2065. int i, j, step;
  2066. const MACROBLOCKD *const xd = &x->e_mbd;
  2067. uint8_t *what = x->plane[0].src.buf;
  2068. const int what_stride = x->plane[0].src.stride;
  2069. const uint8_t *in_what;
  2070. const int in_what_stride = xd->plane[0].pre[0].stride;
  2071. const uint8_t *best_address;
  2072. unsigned int bestsad = INT_MAX;
  2073. int best_site = -1;
  2074. int last_site = -1;
  2075. int ref_row;
  2076. int ref_col;
  2077. // search_param determines the length of the initial step and hence the number
  2078. // of iterations.
  2079. // 0 = initial step (MAX_FIRST_STEP) pel
  2080. // 1 = (MAX_FIRST_STEP/2) pel,
  2081. // 2 = (MAX_FIRST_STEP/4) pel...
  2082. // const search_site *ss = &cfg->ss[search_param * cfg->searches_per_step];
  2083. const MV *ss_mv = &cfg->ss_mv[search_param * cfg->searches_per_step];
  2084. const intptr_t *ss_os = &cfg->ss_os[search_param * cfg->searches_per_step];
  2085. const int tot_steps = cfg->total_steps - search_param;
  2086. const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
  2087. clamp_mv(ref_mv, x->mv_limits.col_min, x->mv_limits.col_max,
  2088. x->mv_limits.row_min, x->mv_limits.row_max);
  2089. ref_row = ref_mv->row;
  2090. ref_col = ref_mv->col;
  2091. *num00 = 0;
  2092. best_mv->row = ref_row;
  2093. best_mv->col = ref_col;
  2094. // Work out the start point for the search
  2095. in_what = xd->plane[0].pre[0].buf + ref_row * in_what_stride + ref_col;
  2096. best_address = in_what;
  2097. // Check the starting position
  2098. bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride) +
  2099. mvsad_err_cost(x, best_mv, &fcenter_mv, sad_per_bit);
  2100. i = 0;
  2101. for (step = 0; step < tot_steps; step++) {
  2102. int all_in = 1, t;
  2103. // All_in is true if every one of the points we are checking are within
  2104. // the bounds of the image.
  2105. all_in &= ((best_mv->row + ss_mv[i].row) > x->mv_limits.row_min);
  2106. all_in &= ((best_mv->row + ss_mv[i + 1].row) < x->mv_limits.row_max);
  2107. all_in &= ((best_mv->col + ss_mv[i + 2].col) > x->mv_limits.col_min);
  2108. all_in &= ((best_mv->col + ss_mv[i + 3].col) < x->mv_limits.col_max);
  2109. // If all the pixels are within the bounds we don't check whether the
  2110. // search point is valid in this loop, otherwise we check each point
  2111. // for validity..
  2112. if (all_in) {
  2113. unsigned int sad_array[4];
  2114. for (j = 0; j < cfg->searches_per_step; j += 4) {
  2115. unsigned char const *block_offset[4];
  2116. for (t = 0; t < 4; t++) block_offset[t] = ss_os[i + t] + best_address;
  2117. fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride,
  2118. sad_array);
  2119. for (t = 0; t < 4; t++, i++) {
  2120. if (sad_array[t] < bestsad) {
  2121. const MV this_mv = { best_mv->row + ss_mv[i].row,
  2122. best_mv->col + ss_mv[i].col };
  2123. sad_array[t] +=
  2124. mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
  2125. if (sad_array[t] < bestsad) {
  2126. bestsad = sad_array[t];
  2127. best_site = i;
  2128. }
  2129. }
  2130. }
  2131. }
  2132. } else {
  2133. for (j = 0; j < cfg->searches_per_step; j++) {
  2134. // Trap illegal vectors
  2135. const MV this_mv = { best_mv->row + ss_mv[i].row,
  2136. best_mv->col + ss_mv[i].col };
  2137. if (is_mv_in(&x->mv_limits, &this_mv)) {
  2138. const uint8_t *const check_here = ss_os[i] + best_address;
  2139. unsigned int thissad =
  2140. fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
  2141. if (thissad < bestsad) {
  2142. thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
  2143. if (thissad < bestsad) {
  2144. bestsad = thissad;
  2145. best_site = i;
  2146. }
  2147. }
  2148. }
  2149. i++;
  2150. }
  2151. }
  2152. if (best_site != last_site) {
  2153. best_mv->row += ss_mv[best_site].row;
  2154. best_mv->col += ss_mv[best_site].col;
  2155. best_address += ss_os[best_site];
  2156. last_site = best_site;
  2157. #if defined(NEW_DIAMOND_SEARCH)
  2158. while (1) {
  2159. const MV this_mv = { best_mv->row + ss_mv[best_site].row,
  2160. best_mv->col + ss_mv[best_site].col };
  2161. if (is_mv_in(&x->mv_limits, &this_mv)) {
  2162. const uint8_t *const check_here = ss_os[best_site] + best_address;
  2163. unsigned int thissad =
  2164. fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
  2165. if (thissad < bestsad) {
  2166. thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
  2167. if (thissad < bestsad) {
  2168. bestsad = thissad;
  2169. best_mv->row += ss_mv[best_site].row;
  2170. best_mv->col += ss_mv[best_site].col;
  2171. best_address += ss_os[best_site];
  2172. continue;
  2173. }
  2174. }
  2175. }
  2176. break;
  2177. }
  2178. #endif
  2179. } else if (best_address == in_what) {
  2180. (*num00)++;
  2181. }
  2182. }
  2183. return bestsad;
  2184. }
  2185. static int vector_match(int16_t *ref, int16_t *src, int bwl) {
  2186. int best_sad = INT_MAX;
  2187. int this_sad;
  2188. int d;
  2189. int center, offset = 0;
  2190. int bw = 4 << bwl; // redundant variable, to be changed in the experiments.
  2191. for (d = 0; d <= bw; d += 16) {
  2192. this_sad = vpx_vector_var(&ref[d], src, bwl);
  2193. if (this_sad < best_sad) {
  2194. best_sad = this_sad;
  2195. offset = d;
  2196. }
  2197. }
  2198. center = offset;
  2199. for (d = -8; d <= 8; d += 16) {
  2200. int this_pos = offset + d;
  2201. // check limit
  2202. if (this_pos < 0 || this_pos > bw) continue;
  2203. this_sad = vpx_vector_var(&ref[this_pos], src, bwl);
  2204. if (this_sad < best_sad) {
  2205. best_sad = this_sad;
  2206. center = this_pos;
  2207. }
  2208. }
  2209. offset = center;
  2210. for (d = -4; d <= 4; d += 8) {
  2211. int this_pos = offset + d;
  2212. // check limit
  2213. if (this_pos < 0 || this_pos > bw) continue;
  2214. this_sad = vpx_vector_var(&ref[this_pos], src, bwl);
  2215. if (this_sad < best_sad) {
  2216. best_sad = this_sad;
  2217. center = this_pos;
  2218. }
  2219. }
  2220. offset = center;
  2221. for (d = -2; d <= 2; d += 4) {
  2222. int this_pos = offset + d;
  2223. // check limit
  2224. if (this_pos < 0 || this_pos > bw) continue;
  2225. this_sad = vpx_vector_var(&ref[this_pos], src, bwl);
  2226. if (this_sad < best_sad) {
  2227. best_sad = this_sad;
  2228. center = this_pos;
  2229. }
  2230. }
  2231. offset = center;
  2232. for (d = -1; d <= 1; d += 2) {
  2233. int this_pos = offset + d;
  2234. // check limit
  2235. if (this_pos < 0 || this_pos > bw) continue;
  2236. this_sad = vpx_vector_var(&ref[this_pos], src, bwl);
  2237. if (this_sad < best_sad) {
  2238. best_sad = this_sad;
  2239. center = this_pos;
  2240. }
  2241. }
  2242. return (center - (bw >> 1));
  2243. }
  2244. static const MV search_pos[4] = {
  2245. { -1, 0 },
  2246. { 0, -1 },
  2247. { 0, 1 },
  2248. { 1, 0 },
  2249. };
  2250. unsigned int vp9_int_pro_motion_estimation(const VP9_COMP *cpi, MACROBLOCK *x,
  2251. BLOCK_SIZE bsize, int mi_row,
  2252. int mi_col, const MV *ref_mv) {
  2253. MACROBLOCKD *xd = &x->e_mbd;
  2254. MODE_INFO *mi = xd->mi[0];
  2255. struct buf_2d backup_yv12[MAX_MB_PLANE] = { { 0, 0 } };
  2256. DECLARE_ALIGNED(16, int16_t, hbuf[128]);
  2257. DECLARE_ALIGNED(16, int16_t, vbuf[128]);
  2258. DECLARE_ALIGNED(16, int16_t, src_hbuf[64]);
  2259. DECLARE_ALIGNED(16, int16_t, src_vbuf[64]);
  2260. int idx;
  2261. const int bw = 4 << b_width_log2_lookup[bsize];
  2262. const int bh = 4 << b_height_log2_lookup[bsize];
  2263. const int search_width = bw << 1;
  2264. const int search_height = bh << 1;
  2265. const int src_stride = x->plane[0].src.stride;
  2266. const int ref_stride = xd->plane[0].pre[0].stride;
  2267. uint8_t const *ref_buf, *src_buf;
  2268. MV *tmp_mv = &xd->mi[0]->mv[0].as_mv;
  2269. unsigned int best_sad, tmp_sad, this_sad[4];
  2270. MV this_mv;
  2271. const int norm_factor = 3 + (bw >> 5);
  2272. const YV12_BUFFER_CONFIG *scaled_ref_frame =
  2273. vp9_get_scaled_ref_frame(cpi, mi->ref_frame[0]);
  2274. MvLimits subpel_mv_limits;
  2275. if (scaled_ref_frame) {
  2276. int i;
  2277. // Swap out the reference frame for a version that's been scaled to
  2278. // match the resolution of the current frame, allowing the existing
  2279. // motion search code to be used without additional modifications.
  2280. for (i = 0; i < MAX_MB_PLANE; i++) backup_yv12[i] = xd->plane[i].pre[0];
  2281. vp9_setup_pre_planes(xd, 0, scaled_ref_frame, mi_row, mi_col, NULL);
  2282. }
  2283. #if CONFIG_VP9_HIGHBITDEPTH
  2284. // TODO(jingning): Implement integral projection functions for high bit-depth
  2285. // setting and remove this part of code.
  2286. if (xd->bd != 8) {
  2287. unsigned int this_sad;
  2288. tmp_mv->row = 0;
  2289. tmp_mv->col = 0;
  2290. this_sad = cpi->fn_ptr[bsize].sdf(x->plane[0].src.buf, src_stride,
  2291. xd->plane[0].pre[0].buf, ref_stride);
  2292. if (scaled_ref_frame) {
  2293. int i;
  2294. for (i = 0; i < MAX_MB_PLANE; i++) xd->plane[i].pre[0] = backup_yv12[i];
  2295. }
  2296. return this_sad;
  2297. }
  2298. #endif
  2299. // Set up prediction 1-D reference set
  2300. ref_buf = xd->plane[0].pre[0].buf - (bw >> 1);
  2301. for (idx = 0; idx < search_width; idx += 16) {
  2302. vpx_int_pro_row(&hbuf[idx], ref_buf, ref_stride, bh);
  2303. ref_buf += 16;
  2304. }
  2305. ref_buf = xd->plane[0].pre[0].buf - (bh >> 1) * ref_stride;
  2306. for (idx = 0; idx < search_height; ++idx) {
  2307. vbuf[idx] = vpx_int_pro_col(ref_buf, bw) >> norm_factor;
  2308. ref_buf += ref_stride;
  2309. }
  2310. // Set up src 1-D reference set
  2311. for (idx = 0; idx < bw; idx += 16) {
  2312. src_buf = x->plane[0].src.buf + idx;
  2313. vpx_int_pro_row(&src_hbuf[idx], src_buf, src_stride, bh);
  2314. }
  2315. src_buf = x->plane[0].src.buf;
  2316. for (idx = 0; idx < bh; ++idx) {
  2317. src_vbuf[idx] = vpx_int_pro_col(src_buf, bw) >> norm_factor;
  2318. src_buf += src_stride;
  2319. }
  2320. // Find the best match per 1-D search
  2321. tmp_mv->col = vector_match(hbuf, src_hbuf, b_width_log2_lookup[bsize]);
  2322. tmp_mv->row = vector_match(vbuf, src_vbuf, b_height_log2_lookup[bsize]);
  2323. this_mv = *tmp_mv;
  2324. src_buf = x->plane[0].src.buf;
  2325. ref_buf = xd->plane[0].pre[0].buf + this_mv.row * ref_stride + this_mv.col;
  2326. best_sad = cpi->fn_ptr[bsize].sdf(src_buf, src_stride, ref_buf, ref_stride);
  2327. {
  2328. const uint8_t *const pos[4] = {
  2329. ref_buf - ref_stride,
  2330. ref_buf - 1,
  2331. ref_buf + 1,
  2332. ref_buf + ref_stride,
  2333. };
  2334. cpi->fn_ptr[bsize].sdx4df(src_buf, src_stride, pos, ref_stride, this_sad);
  2335. }
  2336. for (idx = 0; idx < 4; ++idx) {
  2337. if (this_sad[idx] < best_sad) {
  2338. best_sad = this_sad[idx];
  2339. tmp_mv->row = search_pos[idx].row + this_mv.row;
  2340. tmp_mv->col = search_pos[idx].col + this_mv.col;
  2341. }
  2342. }
  2343. if (this_sad[0] < this_sad[3])
  2344. this_mv.row -= 1;
  2345. else
  2346. this_mv.row += 1;
  2347. if (this_sad[1] < this_sad[2])
  2348. this_mv.col -= 1;
  2349. else
  2350. this_mv.col += 1;
  2351. ref_buf = xd->plane[0].pre[0].buf + this_mv.row * ref_stride + this_mv.col;
  2352. tmp_sad = cpi->fn_ptr[bsize].sdf(src_buf, src_stride, ref_buf, ref_stride);
  2353. if (best_sad > tmp_sad) {
  2354. *tmp_mv = this_mv;
  2355. best_sad = tmp_sad;
  2356. }
  2357. tmp_mv->row *= 8;
  2358. tmp_mv->col *= 8;
  2359. vp9_set_subpel_mv_search_range(&subpel_mv_limits, &x->mv_limits, ref_mv);
  2360. clamp_mv(tmp_mv, subpel_mv_limits.col_min, subpel_mv_limits.col_max,
  2361. subpel_mv_limits.row_min, subpel_mv_limits.row_max);
  2362. if (scaled_ref_frame) {
  2363. int i;
  2364. for (i = 0; i < MAX_MB_PLANE; i++) xd->plane[i].pre[0] = backup_yv12[i];
  2365. }
  2366. return best_sad;
  2367. }
  2368. #if CONFIG_NON_GREEDY_MV
  2369. // Runs sequence of diamond searches in smaller steps for RD.
  2370. /* do_refine: If last step (1-away) of n-step search doesn't pick the center
  2371. point as the best match, we will do a final 1-away diamond
  2372. refining search */
  2373. double vp9_full_pixel_diamond_new(const VP9_COMP *cpi, MACROBLOCK *x,
  2374. MV *mvp_full, int step_param, int lambda,
  2375. int do_refine,
  2376. const vp9_variance_fn_ptr_t *fn_ptr,
  2377. const int_mv *nb_full_mvs, int full_mv_num,
  2378. MV *best_mv) {
  2379. int n, num00 = 0;
  2380. double thissme;
  2381. double bestsme;
  2382. const int further_steps = MAX_MVSEARCH_STEPS - 1 - step_param;
  2383. const MV center_mv = { 0, 0 };
  2384. vpx_clear_system_state();
  2385. bestsme =
  2386. diamond_search_sad_new(x, &cpi->ss_cfg, mvp_full, best_mv, step_param,
  2387. lambda, &n, fn_ptr, nb_full_mvs, full_mv_num);
  2388. bestsme = vp9_get_mvpred_var(x, best_mv, &center_mv, fn_ptr, 0);
  2389. // If there won't be more n-step search, check to see if refining search is
  2390. // needed.
  2391. if (n > further_steps) do_refine = 0;
  2392. while (n < further_steps) {
  2393. ++n;
  2394. if (num00) {
  2395. num00--;
  2396. } else {
  2397. MV temp_mv;
  2398. thissme = diamond_search_sad_new(x, &cpi->ss_cfg, mvp_full, &temp_mv,
  2399. step_param + n, lambda, &num00, fn_ptr,
  2400. nb_full_mvs, full_mv_num);
  2401. thissme = vp9_get_mvpred_var(x, &temp_mv, &center_mv, fn_ptr, 0);
  2402. // check to see if refining search is needed.
  2403. if (num00 > further_steps - n) do_refine = 0;
  2404. if (thissme < bestsme) {
  2405. bestsme = thissme;
  2406. *best_mv = temp_mv;
  2407. }
  2408. }
  2409. }
  2410. // final 1-away diamond refining search
  2411. if (do_refine) {
  2412. const int search_range = 8;
  2413. MV temp_mv = *best_mv;
  2414. thissme = vp9_refining_search_sad_new(x, &temp_mv, lambda, search_range,
  2415. fn_ptr, nb_full_mvs, full_mv_num);
  2416. thissme = vp9_get_mvpred_var(x, &temp_mv, &center_mv, fn_ptr, 0);
  2417. if (thissme < bestsme) {
  2418. bestsme = thissme;
  2419. *best_mv = temp_mv;
  2420. }
  2421. }
  2422. bestsme = (double)full_pixel_exhaustive_new(cpi, x, best_mv, fn_ptr, best_mv,
  2423. lambda, nb_full_mvs, full_mv_num);
  2424. return bestsme;
  2425. }
  2426. #endif // CONFIG_NON_GREEDY_MV
  2427. // Runs sequence of diamond searches in smaller steps for RD.
  2428. /* do_refine: If last step (1-away) of n-step search doesn't pick the center
  2429. point as the best match, we will do a final 1-away diamond
  2430. refining search */
  2431. static int full_pixel_diamond(const VP9_COMP *const cpi,
  2432. const MACROBLOCK *const x, MV *mvp_full,
  2433. int step_param, int sadpb, int further_steps,
  2434. int do_refine, int *cost_list,
  2435. const vp9_variance_fn_ptr_t *fn_ptr,
  2436. const MV *ref_mv, MV *dst_mv) {
  2437. MV temp_mv;
  2438. int thissme, n, num00 = 0;
  2439. int bestsme = cpi->diamond_search_sad(x, &cpi->ss_cfg, mvp_full, &temp_mv,
  2440. step_param, sadpb, &n, fn_ptr, ref_mv);
  2441. if (bestsme < INT_MAX)
  2442. bestsme = vp9_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1);
  2443. *dst_mv = temp_mv;
  2444. // If there won't be more n-step search, check to see if refining search is
  2445. // needed.
  2446. if (n > further_steps) do_refine = 0;
  2447. while (n < further_steps) {
  2448. ++n;
  2449. if (num00) {
  2450. num00--;
  2451. } else {
  2452. thissme = cpi->diamond_search_sad(x, &cpi->ss_cfg, mvp_full, &temp_mv,
  2453. step_param + n, sadpb, &num00, fn_ptr,
  2454. ref_mv);
  2455. if (thissme < INT_MAX)
  2456. thissme = vp9_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1);
  2457. // check to see if refining search is needed.
  2458. if (num00 > further_steps - n) do_refine = 0;
  2459. if (thissme < bestsme) {
  2460. bestsme = thissme;
  2461. *dst_mv = temp_mv;
  2462. }
  2463. }
  2464. }
  2465. // final 1-away diamond refining search
  2466. if (do_refine) {
  2467. const int search_range = 8;
  2468. MV best_mv = *dst_mv;
  2469. thissme = vp9_refining_search_sad(x, &best_mv, sadpb, search_range, fn_ptr,
  2470. ref_mv);
  2471. if (thissme < INT_MAX)
  2472. thissme = vp9_get_mvpred_var(x, &best_mv, ref_mv, fn_ptr, 1);
  2473. if (thissme < bestsme) {
  2474. bestsme = thissme;
  2475. *dst_mv = best_mv;
  2476. }
  2477. }
  2478. // Return cost list.
  2479. if (cost_list) {
  2480. calc_int_cost_list(x, ref_mv, sadpb, fn_ptr, dst_mv, cost_list);
  2481. }
  2482. return bestsme;
  2483. }
  2484. // Runs an limited range exhaustive mesh search using a pattern set
  2485. // according to the encode speed profile.
  2486. static int full_pixel_exhaustive(const VP9_COMP *const cpi,
  2487. const MACROBLOCK *const x, MV *centre_mv_full,
  2488. int sadpb, int *cost_list,
  2489. const vp9_variance_fn_ptr_t *fn_ptr,
  2490. const MV *ref_mv, MV *dst_mv) {
  2491. const SPEED_FEATURES *const sf = &cpi->sf;
  2492. MV temp_mv = { centre_mv_full->row, centre_mv_full->col };
  2493. MV f_ref_mv = { ref_mv->row >> 3, ref_mv->col >> 3 };
  2494. int bestsme;
  2495. int i;
  2496. int interval = sf->mesh_patterns[0].interval;
  2497. int range = sf->mesh_patterns[0].range;
  2498. int baseline_interval_divisor;
  2499. // Trap illegal values for interval and range for this function.
  2500. if ((range < MIN_RANGE) || (range > MAX_RANGE) || (interval < MIN_INTERVAL) ||
  2501. (interval > range))
  2502. return INT_MAX;
  2503. baseline_interval_divisor = range / interval;
  2504. // Check size of proposed first range against magnitude of the centre
  2505. // value used as a starting point.
  2506. range = VPXMAX(range, (5 * VPXMAX(abs(temp_mv.row), abs(temp_mv.col))) / 4);
  2507. range = VPXMIN(range, MAX_RANGE);
  2508. interval = VPXMAX(interval, range / baseline_interval_divisor);
  2509. // initial search
  2510. bestsme = exhaustive_mesh_search(x, &f_ref_mv, &temp_mv, range, interval,
  2511. sadpb, fn_ptr, &temp_mv);
  2512. if ((interval > MIN_INTERVAL) && (range > MIN_RANGE)) {
  2513. // Progressive searches with range and step size decreasing each time
  2514. // till we reach a step size of 1. Then break out.
  2515. for (i = 1; i < MAX_MESH_STEP; ++i) {
  2516. // First pass with coarser step and longer range
  2517. bestsme = exhaustive_mesh_search(
  2518. x, &f_ref_mv, &temp_mv, sf->mesh_patterns[i].range,
  2519. sf->mesh_patterns[i].interval, sadpb, fn_ptr, &temp_mv);
  2520. if (sf->mesh_patterns[i].interval == 1) break;
  2521. }
  2522. }
  2523. if (bestsme < INT_MAX)
  2524. bestsme = vp9_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1);
  2525. *dst_mv = temp_mv;
  2526. // Return cost list.
  2527. if (cost_list) {
  2528. calc_int_cost_list(x, ref_mv, sadpb, fn_ptr, dst_mv, cost_list);
  2529. }
  2530. return bestsme;
  2531. }
  2532. #if CONFIG_NON_GREEDY_MV
  2533. double vp9_refining_search_sad_new(const MACROBLOCK *x, MV *best_full_mv,
  2534. int lambda, int search_range,
  2535. const vp9_variance_fn_ptr_t *fn_ptr,
  2536. const int_mv *nb_full_mvs, int full_mv_num) {
  2537. const MACROBLOCKD *const xd = &x->e_mbd;
  2538. const MV neighbors[4] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 } };
  2539. const struct buf_2d *const what = &x->plane[0].src;
  2540. const struct buf_2d *const in_what = &xd->plane[0].pre[0];
  2541. const uint8_t *best_address = get_buf_from_mv(in_what, best_full_mv);
  2542. double best_sad;
  2543. int i, j;
  2544. vpx_clear_system_state();
  2545. {
  2546. const double mv_dist =
  2547. fn_ptr->sdf(what->buf, what->stride, best_address, in_what->stride);
  2548. const double mv_cost =
  2549. vp9_nb_mvs_inconsistency(best_full_mv, nb_full_mvs, full_mv_num) /
  2550. (double)(1 << LOG2_PRECISION);
  2551. best_sad = mv_dist + lambda * mv_cost;
  2552. }
  2553. for (i = 0; i < search_range; i++) {
  2554. int best_site = -1;
  2555. const int all_in = ((best_full_mv->row - 1) > x->mv_limits.row_min) &
  2556. ((best_full_mv->row + 1) < x->mv_limits.row_max) &
  2557. ((best_full_mv->col - 1) > x->mv_limits.col_min) &
  2558. ((best_full_mv->col + 1) < x->mv_limits.col_max);
  2559. if (all_in) {
  2560. unsigned int sads[4];
  2561. const uint8_t *const positions[4] = { best_address - in_what->stride,
  2562. best_address - 1, best_address + 1,
  2563. best_address + in_what->stride };
  2564. fn_ptr->sdx4df(what->buf, what->stride, positions, in_what->stride, sads);
  2565. for (j = 0; j < 4; ++j) {
  2566. const MV mv = { best_full_mv->row + neighbors[j].row,
  2567. best_full_mv->col + neighbors[j].col };
  2568. const double mv_dist = sads[j];
  2569. const double mv_cost =
  2570. vp9_nb_mvs_inconsistency(&mv, nb_full_mvs, full_mv_num) /
  2571. (double)(1 << LOG2_PRECISION);
  2572. const double thissad = mv_dist + lambda * mv_cost;
  2573. if (thissad < best_sad) {
  2574. best_sad = thissad;
  2575. best_site = j;
  2576. }
  2577. }
  2578. } else {
  2579. for (j = 0; j < 4; ++j) {
  2580. const MV mv = { best_full_mv->row + neighbors[j].row,
  2581. best_full_mv->col + neighbors[j].col };
  2582. if (is_mv_in(&x->mv_limits, &mv)) {
  2583. const double mv_dist =
  2584. fn_ptr->sdf(what->buf, what->stride,
  2585. get_buf_from_mv(in_what, &mv), in_what->stride);
  2586. const double mv_cost =
  2587. vp9_nb_mvs_inconsistency(&mv, nb_full_mvs, full_mv_num) /
  2588. (double)(1 << LOG2_PRECISION);
  2589. const double thissad = mv_dist + lambda * mv_cost;
  2590. if (thissad < best_sad) {
  2591. best_sad = thissad;
  2592. best_site = j;
  2593. }
  2594. }
  2595. }
  2596. }
  2597. if (best_site == -1) {
  2598. break;
  2599. } else {
  2600. best_full_mv->row += neighbors[best_site].row;
  2601. best_full_mv->col += neighbors[best_site].col;
  2602. best_address = get_buf_from_mv(in_what, best_full_mv);
  2603. }
  2604. }
  2605. return best_sad;
  2606. }
  2607. #endif // CONFIG_NON_GREEDY_MV
  2608. int vp9_refining_search_sad(const MACROBLOCK *x, MV *ref_mv, int error_per_bit,
  2609. int search_range,
  2610. const vp9_variance_fn_ptr_t *fn_ptr,
  2611. const MV *center_mv) {
  2612. const MACROBLOCKD *const xd = &x->e_mbd;
  2613. const MV neighbors[4] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 } };
  2614. const struct buf_2d *const what = &x->plane[0].src;
  2615. const struct buf_2d *const in_what = &xd->plane[0].pre[0];
  2616. const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
  2617. const uint8_t *best_address = get_buf_from_mv(in_what, ref_mv);
  2618. unsigned int best_sad =
  2619. fn_ptr->sdf(what->buf, what->stride, best_address, in_what->stride) +
  2620. mvsad_err_cost(x, ref_mv, &fcenter_mv, error_per_bit);
  2621. int i, j;
  2622. for (i = 0; i < search_range; i++) {
  2623. int best_site = -1;
  2624. const int all_in = ((ref_mv->row - 1) > x->mv_limits.row_min) &
  2625. ((ref_mv->row + 1) < x->mv_limits.row_max) &
  2626. ((ref_mv->col - 1) > x->mv_limits.col_min) &
  2627. ((ref_mv->col + 1) < x->mv_limits.col_max);
  2628. if (all_in) {
  2629. unsigned int sads[4];
  2630. const uint8_t *const positions[4] = { best_address - in_what->stride,
  2631. best_address - 1, best_address + 1,
  2632. best_address + in_what->stride };
  2633. fn_ptr->sdx4df(what->buf, what->stride, positions, in_what->stride, sads);
  2634. for (j = 0; j < 4; ++j) {
  2635. if (sads[j] < best_sad) {
  2636. const MV mv = { ref_mv->row + neighbors[j].row,
  2637. ref_mv->col + neighbors[j].col };
  2638. sads[j] += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit);
  2639. if (sads[j] < best_sad) {
  2640. best_sad = sads[j];
  2641. best_site = j;
  2642. }
  2643. }
  2644. }
  2645. } else {
  2646. for (j = 0; j < 4; ++j) {
  2647. const MV mv = { ref_mv->row + neighbors[j].row,
  2648. ref_mv->col + neighbors[j].col };
  2649. if (is_mv_in(&x->mv_limits, &mv)) {
  2650. unsigned int sad =
  2651. fn_ptr->sdf(what->buf, what->stride,
  2652. get_buf_from_mv(in_what, &mv), in_what->stride);
  2653. if (sad < best_sad) {
  2654. sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit);
  2655. if (sad < best_sad) {
  2656. best_sad = sad;
  2657. best_site = j;
  2658. }
  2659. }
  2660. }
  2661. }
  2662. }
  2663. if (best_site == -1) {
  2664. break;
  2665. } else {
  2666. ref_mv->row += neighbors[best_site].row;
  2667. ref_mv->col += neighbors[best_site].col;
  2668. best_address = get_buf_from_mv(in_what, ref_mv);
  2669. }
  2670. }
  2671. return best_sad;
  2672. }
  2673. // This function is called when we do joint motion search in comp_inter_inter
  2674. // mode.
  2675. int vp9_refining_search_8p_c(const MACROBLOCK *x, MV *ref_mv, int error_per_bit,
  2676. int search_range,
  2677. const vp9_variance_fn_ptr_t *fn_ptr,
  2678. const MV *center_mv, const uint8_t *second_pred) {
  2679. const MV neighbors[8] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 },
  2680. { -1, -1 }, { 1, -1 }, { -1, 1 }, { 1, 1 } };
  2681. const MACROBLOCKD *const xd = &x->e_mbd;
  2682. const struct buf_2d *const what = &x->plane[0].src;
  2683. const struct buf_2d *const in_what = &xd->plane[0].pre[0];
  2684. const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
  2685. unsigned int best_sad = INT_MAX;
  2686. int i, j;
  2687. clamp_mv(ref_mv, x->mv_limits.col_min, x->mv_limits.col_max,
  2688. x->mv_limits.row_min, x->mv_limits.row_max);
  2689. best_sad =
  2690. fn_ptr->sdaf(what->buf, what->stride, get_buf_from_mv(in_what, ref_mv),
  2691. in_what->stride, second_pred) +
  2692. mvsad_err_cost(x, ref_mv, &fcenter_mv, error_per_bit);
  2693. for (i = 0; i < search_range; ++i) {
  2694. int best_site = -1;
  2695. for (j = 0; j < 8; ++j) {
  2696. const MV mv = { ref_mv->row + neighbors[j].row,
  2697. ref_mv->col + neighbors[j].col };
  2698. if (is_mv_in(&x->mv_limits, &mv)) {
  2699. unsigned int sad =
  2700. fn_ptr->sdaf(what->buf, what->stride, get_buf_from_mv(in_what, &mv),
  2701. in_what->stride, second_pred);
  2702. if (sad < best_sad) {
  2703. sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit);
  2704. if (sad < best_sad) {
  2705. best_sad = sad;
  2706. best_site = j;
  2707. }
  2708. }
  2709. }
  2710. }
  2711. if (best_site == -1) {
  2712. break;
  2713. } else {
  2714. ref_mv->row += neighbors[best_site].row;
  2715. ref_mv->col += neighbors[best_site].col;
  2716. }
  2717. }
  2718. return best_sad;
  2719. }
  2720. int vp9_full_pixel_search(const VP9_COMP *const cpi, const MACROBLOCK *const x,
  2721. BLOCK_SIZE bsize, MV *mvp_full, int step_param,
  2722. int search_method, int error_per_bit, int *cost_list,
  2723. const MV *ref_mv, MV *tmp_mv, int var_max, int rd) {
  2724. const SPEED_FEATURES *const sf = &cpi->sf;
  2725. const SEARCH_METHODS method = (SEARCH_METHODS)search_method;
  2726. const vp9_variance_fn_ptr_t *fn_ptr = &cpi->fn_ptr[bsize];
  2727. int var = 0;
  2728. int run_exhaustive_search = 0;
  2729. if (cost_list) {
  2730. cost_list[0] = INT_MAX;
  2731. cost_list[1] = INT_MAX;
  2732. cost_list[2] = INT_MAX;
  2733. cost_list[3] = INT_MAX;
  2734. cost_list[4] = INT_MAX;
  2735. }
  2736. switch (method) {
  2737. case FAST_DIAMOND:
  2738. var = fast_dia_search(x, mvp_full, step_param, error_per_bit, 0,
  2739. cost_list, fn_ptr, 1, ref_mv, tmp_mv);
  2740. break;
  2741. case FAST_HEX:
  2742. var = fast_hex_search(x, mvp_full, step_param, error_per_bit, 0,
  2743. cost_list, fn_ptr, 1, ref_mv, tmp_mv);
  2744. break;
  2745. case HEX:
  2746. var = hex_search(x, mvp_full, step_param, error_per_bit, 1, cost_list,
  2747. fn_ptr, 1, ref_mv, tmp_mv);
  2748. break;
  2749. case SQUARE:
  2750. var = square_search(x, mvp_full, step_param, error_per_bit, 1, cost_list,
  2751. fn_ptr, 1, ref_mv, tmp_mv);
  2752. break;
  2753. case BIGDIA:
  2754. var = bigdia_search(x, mvp_full, step_param, error_per_bit, 1, cost_list,
  2755. fn_ptr, 1, ref_mv, tmp_mv);
  2756. break;
  2757. case NSTEP:
  2758. case MESH:
  2759. var = full_pixel_diamond(cpi, x, mvp_full, step_param, error_per_bit,
  2760. MAX_MVSEARCH_STEPS - 1 - step_param, 1,
  2761. cost_list, fn_ptr, ref_mv, tmp_mv);
  2762. break;
  2763. default: assert(0 && "Unknown search method");
  2764. }
  2765. if (method == NSTEP) {
  2766. if (sf->exhaustive_searches_thresh < INT_MAX &&
  2767. !cpi->rc.is_src_frame_alt_ref) {
  2768. const int64_t exhaustive_thr =
  2769. sf->exhaustive_searches_thresh >>
  2770. (8 - (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize]));
  2771. if (var > exhaustive_thr) run_exhaustive_search = 1;
  2772. }
  2773. } else if (method == MESH) {
  2774. run_exhaustive_search = 1;
  2775. }
  2776. if (run_exhaustive_search) {
  2777. int var_ex;
  2778. MV tmp_mv_ex;
  2779. var_ex = full_pixel_exhaustive(cpi, x, tmp_mv, error_per_bit, cost_list,
  2780. fn_ptr, ref_mv, &tmp_mv_ex);
  2781. if (var_ex < var) {
  2782. var = var_ex;
  2783. *tmp_mv = tmp_mv_ex;
  2784. }
  2785. }
  2786. if (method != NSTEP && method != MESH && rd && var < var_max)
  2787. var = vp9_get_mvpred_var(x, tmp_mv, ref_mv, fn_ptr, 1);
  2788. return var;
  2789. }
  2790. // Note(yunqingwang): The following 2 functions are only used in the motion
  2791. // vector unit test, which return extreme motion vectors allowed by the MV
  2792. // limits.
  2793. #define COMMON_MV_TEST \
  2794. SETUP_SUBPEL_SEARCH; \
  2795. \
  2796. (void)error_per_bit; \
  2797. (void)vfp; \
  2798. (void)z; \
  2799. (void)src_stride; \
  2800. (void)y; \
  2801. (void)y_stride; \
  2802. (void)second_pred; \
  2803. (void)w; \
  2804. (void)h; \
  2805. (void)offset; \
  2806. (void)mvjcost; \
  2807. (void)mvcost; \
  2808. (void)sse1; \
  2809. (void)distortion; \
  2810. \
  2811. (void)halfiters; \
  2812. (void)quarteriters; \
  2813. (void)eighthiters; \
  2814. (void)whichdir; \
  2815. (void)allow_hp; \
  2816. (void)forced_stop; \
  2817. (void)hstep; \
  2818. (void)rr; \
  2819. (void)rc; \
  2820. \
  2821. (void)tr; \
  2822. (void)tc; \
  2823. (void)sse; \
  2824. (void)thismse; \
  2825. (void)cost_list; \
  2826. (void)use_accurate_subpel_search;
  2827. // Return the maximum MV.
  2828. uint32_t vp9_return_max_sub_pixel_mv(
  2829. const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp,
  2830. int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop,
  2831. int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2],
  2832. uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w,
  2833. int h, int use_accurate_subpel_search) {
  2834. COMMON_MV_TEST;
  2835. (void)minr;
  2836. (void)minc;
  2837. bestmv->row = maxr;
  2838. bestmv->col = maxc;
  2839. besterr = 0;
  2840. // In the sub-pel motion search, if hp is not used, then the last bit of mv
  2841. // has to be 0.
  2842. lower_mv_precision(bestmv, allow_hp && use_mv_hp(ref_mv));
  2843. return besterr;
  2844. }
  2845. // Return the minimum MV.
  2846. uint32_t vp9_return_min_sub_pixel_mv(
  2847. const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp,
  2848. int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop,
  2849. int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2],
  2850. uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w,
  2851. int h, int use_accurate_subpel_search) {
  2852. COMMON_MV_TEST;
  2853. (void)maxr;
  2854. (void)maxc;
  2855. bestmv->row = minr;
  2856. bestmv->col = minc;
  2857. besterr = 0;
  2858. // In the sub-pel motion search, if hp is not used, then the last bit of mv
  2859. // has to be 0.
  2860. lower_mv_precision(bestmv, allow_hp && use_mv_hp(ref_mv));
  2861. return besterr;
  2862. }