mcomp.c 58 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913
  1. /*
  2. * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  3. *
  4. * Use of this source code is governed by a BSD-style license
  5. * that can be found in the LICENSE file in the root of the source
  6. * tree. An additional intellectual property rights grant can be found
  7. * in the file PATENTS. All contributing project authors may
  8. * be found in the AUTHORS file in the root of the source tree.
  9. */
  10. #include "./vp8_rtcd.h"
  11. #include "./vpx_dsp_rtcd.h"
  12. #include "onyx_int.h"
  13. #include "mcomp.h"
  14. #include "vpx_mem/vpx_mem.h"
  15. #include "vpx_config.h"
  16. #include <stdio.h>
  17. #include <limits.h>
  18. #include <math.h>
  19. #include "vp8/common/findnearmv.h"
  20. #include "vp8/common/common.h"
  21. #include "vpx_dsp/vpx_dsp_common.h"
  22. #ifdef VP8_ENTROPY_STATS
  23. static int mv_ref_ct[31][4][2];
  24. static int mv_mode_cts[4][2];
  25. #endif
  26. int vp8_mv_bit_cost(int_mv *mv, int_mv *ref, int *mvcost[2], int Weight) {
  27. /* MV costing is based on the distribution of vectors in the previous
  28. * frame and as such will tend to over state the cost of vectors. In
  29. * addition coding a new vector can have a knock on effect on the cost
  30. * of subsequent vectors and the quality of prediction from NEAR and
  31. * NEAREST for subsequent blocks. The "Weight" parameter allows, to a
  32. * limited extent, for some account to be taken of these factors.
  33. */
  34. return ((mvcost[0][(mv->as_mv.row - ref->as_mv.row) >> 1] +
  35. mvcost[1][(mv->as_mv.col - ref->as_mv.col) >> 1]) *
  36. Weight) >>
  37. 7;
  38. }
  39. static int mv_err_cost(int_mv *mv, int_mv *ref, int *mvcost[2],
  40. int error_per_bit) {
  41. /* Ignore mv costing if mvcost is NULL */
  42. if (mvcost) {
  43. return ((mvcost[0][(mv->as_mv.row - ref->as_mv.row) >> 1] +
  44. mvcost[1][(mv->as_mv.col - ref->as_mv.col) >> 1]) *
  45. error_per_bit +
  46. 128) >>
  47. 8;
  48. }
  49. return 0;
  50. }
  51. static int mvsad_err_cost(int_mv *mv, int_mv *ref, int *mvsadcost[2],
  52. int error_per_bit) {
  53. /* Calculate sad error cost on full pixel basis. */
  54. /* Ignore mv costing if mvsadcost is NULL */
  55. if (mvsadcost) {
  56. return ((mvsadcost[0][(mv->as_mv.row - ref->as_mv.row)] +
  57. mvsadcost[1][(mv->as_mv.col - ref->as_mv.col)]) *
  58. error_per_bit +
  59. 128) >>
  60. 8;
  61. }
  62. return 0;
  63. }
  64. void vp8_init_dsmotion_compensation(MACROBLOCK *x, int stride) {
  65. int Len;
  66. int search_site_count = 0;
  67. /* Generate offsets for 4 search sites per step. */
  68. Len = MAX_FIRST_STEP;
  69. x->ss[search_site_count].mv.col = 0;
  70. x->ss[search_site_count].mv.row = 0;
  71. x->ss[search_site_count].offset = 0;
  72. search_site_count++;
  73. while (Len > 0) {
  74. /* Compute offsets for search sites. */
  75. x->ss[search_site_count].mv.col = 0;
  76. x->ss[search_site_count].mv.row = -Len;
  77. x->ss[search_site_count].offset = -Len * stride;
  78. search_site_count++;
  79. /* Compute offsets for search sites. */
  80. x->ss[search_site_count].mv.col = 0;
  81. x->ss[search_site_count].mv.row = Len;
  82. x->ss[search_site_count].offset = Len * stride;
  83. search_site_count++;
  84. /* Compute offsets for search sites. */
  85. x->ss[search_site_count].mv.col = -Len;
  86. x->ss[search_site_count].mv.row = 0;
  87. x->ss[search_site_count].offset = -Len;
  88. search_site_count++;
  89. /* Compute offsets for search sites. */
  90. x->ss[search_site_count].mv.col = Len;
  91. x->ss[search_site_count].mv.row = 0;
  92. x->ss[search_site_count].offset = Len;
  93. search_site_count++;
  94. /* Contract. */
  95. Len /= 2;
  96. }
  97. x->ss_count = search_site_count;
  98. x->searches_per_step = 4;
  99. }
  100. void vp8_init3smotion_compensation(MACROBLOCK *x, int stride) {
  101. int Len;
  102. int search_site_count = 0;
  103. /* Generate offsets for 8 search sites per step. */
  104. Len = MAX_FIRST_STEP;
  105. x->ss[search_site_count].mv.col = 0;
  106. x->ss[search_site_count].mv.row = 0;
  107. x->ss[search_site_count].offset = 0;
  108. search_site_count++;
  109. while (Len > 0) {
  110. /* Compute offsets for search sites. */
  111. x->ss[search_site_count].mv.col = 0;
  112. x->ss[search_site_count].mv.row = -Len;
  113. x->ss[search_site_count].offset = -Len * stride;
  114. search_site_count++;
  115. /* Compute offsets for search sites. */
  116. x->ss[search_site_count].mv.col = 0;
  117. x->ss[search_site_count].mv.row = Len;
  118. x->ss[search_site_count].offset = Len * stride;
  119. search_site_count++;
  120. /* Compute offsets for search sites. */
  121. x->ss[search_site_count].mv.col = -Len;
  122. x->ss[search_site_count].mv.row = 0;
  123. x->ss[search_site_count].offset = -Len;
  124. search_site_count++;
  125. /* Compute offsets for search sites. */
  126. x->ss[search_site_count].mv.col = Len;
  127. x->ss[search_site_count].mv.row = 0;
  128. x->ss[search_site_count].offset = Len;
  129. search_site_count++;
  130. /* Compute offsets for search sites. */
  131. x->ss[search_site_count].mv.col = -Len;
  132. x->ss[search_site_count].mv.row = -Len;
  133. x->ss[search_site_count].offset = -Len * stride - Len;
  134. search_site_count++;
  135. /* Compute offsets for search sites. */
  136. x->ss[search_site_count].mv.col = Len;
  137. x->ss[search_site_count].mv.row = -Len;
  138. x->ss[search_site_count].offset = -Len * stride + Len;
  139. search_site_count++;
  140. /* Compute offsets for search sites. */
  141. x->ss[search_site_count].mv.col = -Len;
  142. x->ss[search_site_count].mv.row = Len;
  143. x->ss[search_site_count].offset = Len * stride - Len;
  144. search_site_count++;
  145. /* Compute offsets for search sites. */
  146. x->ss[search_site_count].mv.col = Len;
  147. x->ss[search_site_count].mv.row = Len;
  148. x->ss[search_site_count].offset = Len * stride + Len;
  149. search_site_count++;
  150. /* Contract. */
  151. Len /= 2;
  152. }
  153. x->ss_count = search_site_count;
  154. x->searches_per_step = 8;
  155. }
  156. /*
  157. * To avoid the penalty for crossing cache-line read, preload the reference
  158. * area in a small buffer, which is aligned to make sure there won't be crossing
  159. * cache-line read while reading from this buffer. This reduced the cpu
  160. * cycles spent on reading ref data in sub-pixel filter functions.
  161. * TODO: Currently, since sub-pixel search range here is -3 ~ 3, copy 22 rows x
  162. * 32 cols area that is enough for 16x16 macroblock. Later, for SPLITMV, we
  163. * could reduce the area.
  164. */
  165. /* estimated cost of a motion vector (r,c) */
  166. #define MVC(r, c) \
  167. (mvcost \
  168. ? ((mvcost[0][(r)-rr] + mvcost[1][(c)-rc]) * error_per_bit + 128) >> 8 \
  169. : 0)
  170. /* pointer to predictor base of a motionvector */
  171. #define PRE(r, c) (y + (((r) >> 2) * y_stride + ((c) >> 2) - (offset)))
  172. /* convert motion vector component to offset for svf calc */
  173. #define SP(x) (((x)&3) << 1)
  174. /* returns subpixel variance error function. */
  175. #define DIST(r, c) \
  176. vfp->svf(PRE(r, c), y_stride, SP(c), SP(r), z, b->src_stride, &sse)
  177. #define IFMVCV(r, c, s, e) \
  178. if (c >= minc && c <= maxc && r >= minr && r <= maxr) s else e;
  179. /* returns distortion + motion vector cost */
  180. #define ERR(r, c) (MVC(r, c) + DIST(r, c))
  181. /* checks if (r,c) has better score than previous best */
  182. #define CHECK_BETTER(v, r, c) \
  183. IFMVCV(r, c, \
  184. { \
  185. thismse = DIST(r, c); \
  186. if ((v = (MVC(r, c) + thismse)) < besterr) { \
  187. besterr = v; \
  188. br = r; \
  189. bc = c; \
  190. *distortion = thismse; \
  191. *sse1 = sse; \
  192. } \
  193. }, \
  194. v = UINT_MAX;)
  195. int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
  196. int_mv *bestmv, int_mv *ref_mv,
  197. int error_per_bit,
  198. const vp8_variance_fn_ptr_t *vfp,
  199. int *mvcost[2], int *distortion,
  200. unsigned int *sse1) {
  201. unsigned char *z = (*(b->base_src) + b->src);
  202. int rr = ref_mv->as_mv.row >> 1, rc = ref_mv->as_mv.col >> 1;
  203. int br = bestmv->as_mv.row * 4, bc = bestmv->as_mv.col * 4;
  204. int tr = br, tc = bc;
  205. unsigned int besterr;
  206. unsigned int left, right, up, down, diag;
  207. unsigned int sse;
  208. unsigned int whichdir;
  209. unsigned int halfiters = 4;
  210. unsigned int quarteriters = 4;
  211. int thismse;
  212. int minc = VPXMAX(x->mv_col_min * 4,
  213. (ref_mv->as_mv.col >> 1) - ((1 << mvlong_width) - 1));
  214. int maxc = VPXMIN(x->mv_col_max * 4,
  215. (ref_mv->as_mv.col >> 1) + ((1 << mvlong_width) - 1));
  216. int minr = VPXMAX(x->mv_row_min * 4,
  217. (ref_mv->as_mv.row >> 1) - ((1 << mvlong_width) - 1));
  218. int maxr = VPXMIN(x->mv_row_max * 4,
  219. (ref_mv->as_mv.row >> 1) + ((1 << mvlong_width) - 1));
  220. int y_stride;
  221. int offset;
  222. int pre_stride = x->e_mbd.pre.y_stride;
  223. unsigned char *base_pre = x->e_mbd.pre.y_buffer;
  224. #if ARCH_X86 || ARCH_X86_64
  225. MACROBLOCKD *xd = &x->e_mbd;
  226. unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride +
  227. bestmv->as_mv.col;
  228. unsigned char *y;
  229. int buf_r1, buf_r2, buf_c1;
  230. /* Clamping to avoid out-of-range data access */
  231. buf_r1 = ((bestmv->as_mv.row - 3) < x->mv_row_min)
  232. ? (bestmv->as_mv.row - x->mv_row_min)
  233. : 3;
  234. buf_r2 = ((bestmv->as_mv.row + 3) > x->mv_row_max)
  235. ? (x->mv_row_max - bestmv->as_mv.row)
  236. : 3;
  237. buf_c1 = ((bestmv->as_mv.col - 3) < x->mv_col_min)
  238. ? (bestmv->as_mv.col - x->mv_col_min)
  239. : 3;
  240. y_stride = 32;
  241. /* Copy to intermediate buffer before searching. */
  242. vfp->copymem(y_0 - buf_c1 - pre_stride * buf_r1, pre_stride, xd->y_buf,
  243. y_stride, 16 + buf_r1 + buf_r2);
  244. y = xd->y_buf + y_stride * buf_r1 + buf_c1;
  245. #else
  246. unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride +
  247. bestmv->as_mv.col;
  248. y_stride = pre_stride;
  249. #endif
  250. offset = (bestmv->as_mv.row) * y_stride + bestmv->as_mv.col;
  251. /* central mv */
  252. bestmv->as_mv.row *= 8;
  253. bestmv->as_mv.col *= 8;
  254. /* calculate central point error */
  255. besterr = vfp->vf(y, y_stride, z, b->src_stride, sse1);
  256. *distortion = besterr;
  257. besterr += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
  258. /* TODO: Each subsequent iteration checks at least one point in common
  259. * with the last iteration could be 2 ( if diag selected)
  260. */
  261. while (--halfiters) {
  262. /* 1/2 pel */
  263. CHECK_BETTER(left, tr, tc - 2);
  264. CHECK_BETTER(right, tr, tc + 2);
  265. CHECK_BETTER(up, tr - 2, tc);
  266. CHECK_BETTER(down, tr + 2, tc);
  267. whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
  268. switch (whichdir) {
  269. case 0: CHECK_BETTER(diag, tr - 2, tc - 2); break;
  270. case 1: CHECK_BETTER(diag, tr - 2, tc + 2); break;
  271. case 2: CHECK_BETTER(diag, tr + 2, tc - 2); break;
  272. case 3: CHECK_BETTER(diag, tr + 2, tc + 2); break;
  273. }
  274. /* no reason to check the same one again. */
  275. if (tr == br && tc == bc) break;
  276. tr = br;
  277. tc = bc;
  278. }
  279. /* TODO: Each subsequent iteration checks at least one point in common
  280. * with the last iteration could be 2 ( if diag selected)
  281. */
  282. /* 1/4 pel */
  283. while (--quarteriters) {
  284. CHECK_BETTER(left, tr, tc - 1);
  285. CHECK_BETTER(right, tr, tc + 1);
  286. CHECK_BETTER(up, tr - 1, tc);
  287. CHECK_BETTER(down, tr + 1, tc);
  288. whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
  289. switch (whichdir) {
  290. case 0: CHECK_BETTER(diag, tr - 1, tc - 1); break;
  291. case 1: CHECK_BETTER(diag, tr - 1, tc + 1); break;
  292. case 2: CHECK_BETTER(diag, tr + 1, tc - 1); break;
  293. case 3: CHECK_BETTER(diag, tr + 1, tc + 1); break;
  294. }
  295. /* no reason to check the same one again. */
  296. if (tr == br && tc == bc) break;
  297. tr = br;
  298. tc = bc;
  299. }
  300. bestmv->as_mv.row = br * 2;
  301. bestmv->as_mv.col = bc * 2;
  302. if ((abs(bestmv->as_mv.col - ref_mv->as_mv.col) > (MAX_FULL_PEL_VAL << 3)) ||
  303. (abs(bestmv->as_mv.row - ref_mv->as_mv.row) > (MAX_FULL_PEL_VAL << 3))) {
  304. return INT_MAX;
  305. }
  306. return besterr;
  307. }
  308. #undef MVC
  309. #undef PRE
  310. #undef SP
  311. #undef DIST
  312. #undef IFMVCV
  313. #undef ERR
  314. #undef CHECK_BETTER
  315. int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
  316. int_mv *bestmv, int_mv *ref_mv,
  317. int error_per_bit,
  318. const vp8_variance_fn_ptr_t *vfp,
  319. int *mvcost[2], int *distortion,
  320. unsigned int *sse1) {
  321. int bestmse = INT_MAX;
  322. int_mv startmv;
  323. int_mv this_mv;
  324. unsigned char *z = (*(b->base_src) + b->src);
  325. int left, right, up, down, diag;
  326. unsigned int sse;
  327. int whichdir;
  328. int thismse;
  329. int y_stride;
  330. int pre_stride = x->e_mbd.pre.y_stride;
  331. unsigned char *base_pre = x->e_mbd.pre.y_buffer;
  332. #if ARCH_X86 || ARCH_X86_64
  333. MACROBLOCKD *xd = &x->e_mbd;
  334. unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride +
  335. bestmv->as_mv.col;
  336. unsigned char *y;
  337. y_stride = 32;
  338. /* Copy 18 rows x 32 cols area to intermediate buffer before searching. */
  339. vfp->copymem(y_0 - 1 - pre_stride, pre_stride, xd->y_buf, y_stride, 18);
  340. y = xd->y_buf + y_stride + 1;
  341. #else
  342. unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride +
  343. bestmv->as_mv.col;
  344. y_stride = pre_stride;
  345. #endif
  346. /* central mv */
  347. bestmv->as_mv.row *= 8;
  348. bestmv->as_mv.col *= 8;
  349. startmv = *bestmv;
  350. /* calculate central point error */
  351. bestmse = vfp->vf(y, y_stride, z, b->src_stride, sse1);
  352. *distortion = bestmse;
  353. bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
  354. /* go left then right and check error */
  355. this_mv.as_mv.row = startmv.as_mv.row;
  356. this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4);
  357. /* "halfpix" horizontal variance */
  358. thismse = vfp->svf(y - 1, y_stride, 4, 0, z, b->src_stride, &sse);
  359. left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
  360. if (left < bestmse) {
  361. *bestmv = this_mv;
  362. bestmse = left;
  363. *distortion = thismse;
  364. *sse1 = sse;
  365. }
  366. this_mv.as_mv.col += 8;
  367. /* "halfpix" horizontal variance */
  368. thismse = vfp->svf(y, y_stride, 4, 0, z, b->src_stride, &sse);
  369. right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
  370. if (right < bestmse) {
  371. *bestmv = this_mv;
  372. bestmse = right;
  373. *distortion = thismse;
  374. *sse1 = sse;
  375. }
  376. /* go up then down and check error */
  377. this_mv.as_mv.col = startmv.as_mv.col;
  378. this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4);
  379. /* "halfpix" vertical variance */
  380. thismse = vfp->svf(y - y_stride, y_stride, 0, 4, z, b->src_stride, &sse);
  381. up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
  382. if (up < bestmse) {
  383. *bestmv = this_mv;
  384. bestmse = up;
  385. *distortion = thismse;
  386. *sse1 = sse;
  387. }
  388. this_mv.as_mv.row += 8;
  389. /* "halfpix" vertical variance */
  390. thismse = vfp->svf(y, y_stride, 0, 4, z, b->src_stride, &sse);
  391. down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
  392. if (down < bestmse) {
  393. *bestmv = this_mv;
  394. bestmse = down;
  395. *distortion = thismse;
  396. *sse1 = sse;
  397. }
  398. /* now check 1 more diagonal */
  399. whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
  400. this_mv = startmv;
  401. switch (whichdir) {
  402. case 0:
  403. this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
  404. this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
  405. /* "halfpix" horizontal/vertical variance */
  406. thismse =
  407. vfp->svf(y - 1 - y_stride, y_stride, 4, 4, z, b->src_stride, &sse);
  408. break;
  409. case 1:
  410. this_mv.as_mv.col += 4;
  411. this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
  412. /* "halfpix" horizontal/vertical variance */
  413. thismse = vfp->svf(y - y_stride, y_stride, 4, 4, z, b->src_stride, &sse);
  414. break;
  415. case 2:
  416. this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
  417. this_mv.as_mv.row += 4;
  418. /* "halfpix" horizontal/vertical variance */
  419. thismse = vfp->svf(y - 1, y_stride, 4, 4, z, b->src_stride, &sse);
  420. break;
  421. case 3:
  422. default:
  423. this_mv.as_mv.col += 4;
  424. this_mv.as_mv.row += 4;
  425. /* "halfpix" horizontal/vertical variance */
  426. thismse = vfp->svf(y, y_stride, 4, 4, z, b->src_stride, &sse);
  427. break;
  428. }
  429. diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
  430. if (diag < bestmse) {
  431. *bestmv = this_mv;
  432. bestmse = diag;
  433. *distortion = thismse;
  434. *sse1 = sse;
  435. }
  436. /* time to check quarter pels. */
  437. if (bestmv->as_mv.row < startmv.as_mv.row) y -= y_stride;
  438. if (bestmv->as_mv.col < startmv.as_mv.col) y--;
  439. startmv = *bestmv;
  440. /* go left then right and check error */
  441. this_mv.as_mv.row = startmv.as_mv.row;
  442. if (startmv.as_mv.col & 7) {
  443. this_mv.as_mv.col = startmv.as_mv.col - 2;
  444. thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7,
  445. this_mv.as_mv.row & 7, z, b->src_stride, &sse);
  446. } else {
  447. this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
  448. thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z,
  449. b->src_stride, &sse);
  450. }
  451. left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
  452. if (left < bestmse) {
  453. *bestmv = this_mv;
  454. bestmse = left;
  455. *distortion = thismse;
  456. *sse1 = sse;
  457. }
  458. this_mv.as_mv.col += 4;
  459. thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7,
  460. z, b->src_stride, &sse);
  461. right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
  462. if (right < bestmse) {
  463. *bestmv = this_mv;
  464. bestmse = right;
  465. *distortion = thismse;
  466. *sse1 = sse;
  467. }
  468. /* go up then down and check error */
  469. this_mv.as_mv.col = startmv.as_mv.col;
  470. if (startmv.as_mv.row & 7) {
  471. this_mv.as_mv.row = startmv.as_mv.row - 2;
  472. thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7,
  473. this_mv.as_mv.row & 7, z, b->src_stride, &sse);
  474. } else {
  475. this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
  476. thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6, z,
  477. b->src_stride, &sse);
  478. }
  479. up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
  480. if (up < bestmse) {
  481. *bestmv = this_mv;
  482. bestmse = up;
  483. *distortion = thismse;
  484. *sse1 = sse;
  485. }
  486. this_mv.as_mv.row += 4;
  487. thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7,
  488. z, b->src_stride, &sse);
  489. down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
  490. if (down < bestmse) {
  491. *bestmv = this_mv;
  492. bestmse = down;
  493. *distortion = thismse;
  494. *sse1 = sse;
  495. }
  496. /* now check 1 more diagonal */
  497. whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
  498. this_mv = startmv;
  499. switch (whichdir) {
  500. case 0:
  501. if (startmv.as_mv.row & 7) {
  502. this_mv.as_mv.row -= 2;
  503. if (startmv.as_mv.col & 7) {
  504. this_mv.as_mv.col -= 2;
  505. thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7,
  506. this_mv.as_mv.row & 7, z, b->src_stride, &sse);
  507. } else {
  508. this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
  509. thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z,
  510. b->src_stride, &sse);
  511. }
  512. } else {
  513. this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
  514. if (startmv.as_mv.col & 7) {
  515. this_mv.as_mv.col -= 2;
  516. thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6,
  517. z, b->src_stride, &sse);
  518. } else {
  519. this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
  520. thismse = vfp->svf(y - y_stride - 1, y_stride, 6, 6, z, b->src_stride,
  521. &sse);
  522. }
  523. }
  524. break;
  525. case 1:
  526. this_mv.as_mv.col += 2;
  527. if (startmv.as_mv.row & 7) {
  528. this_mv.as_mv.row -= 2;
  529. thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7,
  530. this_mv.as_mv.row & 7, z, b->src_stride, &sse);
  531. } else {
  532. this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
  533. thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6, z,
  534. b->src_stride, &sse);
  535. }
  536. break;
  537. case 2:
  538. this_mv.as_mv.row += 2;
  539. if (startmv.as_mv.col & 7) {
  540. this_mv.as_mv.col -= 2;
  541. thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7,
  542. this_mv.as_mv.row & 7, z, b->src_stride, &sse);
  543. } else {
  544. this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
  545. thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z,
  546. b->src_stride, &sse);
  547. }
  548. break;
  549. case 3:
  550. this_mv.as_mv.col += 2;
  551. this_mv.as_mv.row += 2;
  552. thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7,
  553. this_mv.as_mv.row & 7, z, b->src_stride, &sse);
  554. break;
  555. }
  556. diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
  557. if (diag < bestmse) {
  558. *bestmv = this_mv;
  559. bestmse = diag;
  560. *distortion = thismse;
  561. *sse1 = sse;
  562. }
  563. return bestmse;
  564. }
  565. int vp8_find_best_half_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
  566. int_mv *bestmv, int_mv *ref_mv,
  567. int error_per_bit,
  568. const vp8_variance_fn_ptr_t *vfp,
  569. int *mvcost[2], int *distortion,
  570. unsigned int *sse1) {
  571. int bestmse = INT_MAX;
  572. int_mv startmv;
  573. int_mv this_mv;
  574. unsigned char *z = (*(b->base_src) + b->src);
  575. int left, right, up, down, diag;
  576. unsigned int sse;
  577. int whichdir;
  578. int thismse;
  579. int y_stride;
  580. int pre_stride = x->e_mbd.pre.y_stride;
  581. unsigned char *base_pre = x->e_mbd.pre.y_buffer;
  582. #if ARCH_X86 || ARCH_X86_64
  583. MACROBLOCKD *xd = &x->e_mbd;
  584. unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride +
  585. bestmv->as_mv.col;
  586. unsigned char *y;
  587. y_stride = 32;
  588. /* Copy 18 rows x 32 cols area to intermediate buffer before searching. */
  589. vfp->copymem(y_0 - 1 - pre_stride, pre_stride, xd->y_buf, y_stride, 18);
  590. y = xd->y_buf + y_stride + 1;
  591. #else
  592. unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride +
  593. bestmv->as_mv.col;
  594. y_stride = pre_stride;
  595. #endif
  596. /* central mv */
  597. bestmv->as_mv.row *= 8;
  598. bestmv->as_mv.col *= 8;
  599. startmv = *bestmv;
  600. /* calculate central point error */
  601. bestmse = vfp->vf(y, y_stride, z, b->src_stride, sse1);
  602. *distortion = bestmse;
  603. bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
  604. /* go left then right and check error */
  605. this_mv.as_mv.row = startmv.as_mv.row;
  606. this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4);
  607. /* "halfpix" horizontal variance */
  608. thismse = vfp->svf(y - 1, y_stride, 4, 0, z, b->src_stride, &sse);
  609. left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
  610. if (left < bestmse) {
  611. *bestmv = this_mv;
  612. bestmse = left;
  613. *distortion = thismse;
  614. *sse1 = sse;
  615. }
  616. this_mv.as_mv.col += 8;
  617. /* "halfpix" horizontal variance */
  618. thismse = vfp->svf(y, y_stride, 4, 0, z, b->src_stride, &sse);
  619. right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
  620. if (right < bestmse) {
  621. *bestmv = this_mv;
  622. bestmse = right;
  623. *distortion = thismse;
  624. *sse1 = sse;
  625. }
  626. /* go up then down and check error */
  627. this_mv.as_mv.col = startmv.as_mv.col;
  628. this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4);
  629. /* "halfpix" vertical variance */
  630. thismse = vfp->svf(y - y_stride, y_stride, 0, 4, z, b->src_stride, &sse);
  631. up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
  632. if (up < bestmse) {
  633. *bestmv = this_mv;
  634. bestmse = up;
  635. *distortion = thismse;
  636. *sse1 = sse;
  637. }
  638. this_mv.as_mv.row += 8;
  639. /* "halfpix" vertical variance */
  640. thismse = vfp->svf(y, y_stride, 0, 4, z, b->src_stride, &sse);
  641. down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
  642. if (down < bestmse) {
  643. *bestmv = this_mv;
  644. bestmse = down;
  645. *distortion = thismse;
  646. *sse1 = sse;
  647. }
  648. /* now check 1 more diagonal - */
  649. whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
  650. this_mv = startmv;
  651. switch (whichdir) {
  652. case 0:
  653. this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
  654. this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
  655. /* "halfpix" horizontal/vertical variance */
  656. thismse =
  657. vfp->svf(y - 1 - y_stride, y_stride, 4, 4, z, b->src_stride, &sse);
  658. break;
  659. case 1:
  660. this_mv.as_mv.col += 4;
  661. this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
  662. /* "halfpix" horizontal/vertical variance */
  663. thismse = vfp->svf(y - y_stride, y_stride, 4, 4, z, b->src_stride, &sse);
  664. break;
  665. case 2:
  666. this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
  667. this_mv.as_mv.row += 4;
  668. /* "halfpix" horizontal/vertical variance */
  669. thismse = vfp->svf(y - 1, y_stride, 4, 4, z, b->src_stride, &sse);
  670. break;
  671. case 3:
  672. default:
  673. this_mv.as_mv.col += 4;
  674. this_mv.as_mv.row += 4;
  675. /* "halfpix" horizontal/vertical variance */
  676. thismse = vfp->svf(y, y_stride, 4, 4, z, b->src_stride, &sse);
  677. break;
  678. }
  679. diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
  680. if (diag < bestmse) {
  681. *bestmv = this_mv;
  682. bestmse = diag;
  683. *distortion = thismse;
  684. *sse1 = sse;
  685. }
  686. return bestmse;
  687. }
  688. #define CHECK_BOUNDS(range) \
  689. { \
  690. all_in = 1; \
  691. all_in &= ((br - range) >= x->mv_row_min); \
  692. all_in &= ((br + range) <= x->mv_row_max); \
  693. all_in &= ((bc - range) >= x->mv_col_min); \
  694. all_in &= ((bc + range) <= x->mv_col_max); \
  695. }
  696. #define CHECK_POINT \
  697. { \
  698. if (this_mv.as_mv.col < x->mv_col_min) continue; \
  699. if (this_mv.as_mv.col > x->mv_col_max) continue; \
  700. if (this_mv.as_mv.row < x->mv_row_min) continue; \
  701. if (this_mv.as_mv.row > x->mv_row_max) continue; \
  702. }
  703. #define CHECK_BETTER \
  704. { \
  705. if (thissad < bestsad) { \
  706. thissad += \
  707. mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit); \
  708. if (thissad < bestsad) { \
  709. bestsad = thissad; \
  710. best_site = i; \
  711. } \
  712. } \
  713. }
  714. static const MV next_chkpts[6][3] = {
  715. { { -2, 0 }, { -1, -2 }, { 1, -2 } }, { { -1, -2 }, { 1, -2 }, { 2, 0 } },
  716. { { 1, -2 }, { 2, 0 }, { 1, 2 } }, { { 2, 0 }, { 1, 2 }, { -1, 2 } },
  717. { { 1, 2 }, { -1, 2 }, { -2, 0 } }, { { -1, 2 }, { -2, 0 }, { -1, -2 } }
  718. };
  719. int vp8_hex_search(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
  720. int_mv *best_mv, int search_param, int sad_per_bit,
  721. const vp8_variance_fn_ptr_t *vfp, int *mvsadcost[2],
  722. int *mvcost[2], int_mv *center_mv) {
  723. MV hex[6] = {
  724. { -1, -2 }, { 1, -2 }, { 2, 0 }, { 1, 2 }, { -1, 2 }, { -2, 0 }
  725. };
  726. MV neighbors[4] = { { 0, -1 }, { -1, 0 }, { 1, 0 }, { 0, 1 } };
  727. int i, j;
  728. unsigned char *what = (*(b->base_src) + b->src);
  729. int what_stride = b->src_stride;
  730. int pre_stride = x->e_mbd.pre.y_stride;
  731. unsigned char *base_pre = x->e_mbd.pre.y_buffer;
  732. int in_what_stride = pre_stride;
  733. int br, bc;
  734. int_mv this_mv;
  735. unsigned int bestsad;
  736. unsigned int thissad;
  737. unsigned char *base_offset;
  738. unsigned char *this_offset;
  739. int k = -1;
  740. int all_in;
  741. int best_site = -1;
  742. int hex_range = 127;
  743. int dia_range = 8;
  744. int_mv fcenter_mv;
  745. fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
  746. fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
  747. (void)mvcost;
  748. /* adjust ref_mv to make sure it is within MV range */
  749. vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min,
  750. x->mv_row_max);
  751. br = ref_mv->as_mv.row;
  752. bc = ref_mv->as_mv.col;
  753. /* Work out the start point for the search */
  754. base_offset = (unsigned char *)(base_pre + d->offset);
  755. this_offset = base_offset + (br * (pre_stride)) + bc;
  756. this_mv.as_mv.row = br;
  757. this_mv.as_mv.col = bc;
  758. bestsad = vfp->sdf(what, what_stride, this_offset, in_what_stride) +
  759. mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
  760. #if CONFIG_MULTI_RES_ENCODING
  761. /* Lower search range based on prediction info */
  762. if (search_param >= 6)
  763. goto cal_neighbors;
  764. else if (search_param >= 5)
  765. hex_range = 4;
  766. else if (search_param >= 4)
  767. hex_range = 6;
  768. else if (search_param >= 3)
  769. hex_range = 15;
  770. else if (search_param >= 2)
  771. hex_range = 31;
  772. else if (search_param >= 1)
  773. hex_range = 63;
  774. dia_range = 8;
  775. #else
  776. (void)search_param;
  777. #endif
  778. /* hex search */
  779. CHECK_BOUNDS(2)
  780. if (all_in) {
  781. for (i = 0; i < 6; ++i) {
  782. this_mv.as_mv.row = br + hex[i].row;
  783. this_mv.as_mv.col = bc + hex[i].col;
  784. this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) +
  785. this_mv.as_mv.col;
  786. thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
  787. CHECK_BETTER
  788. }
  789. } else {
  790. for (i = 0; i < 6; ++i) {
  791. this_mv.as_mv.row = br + hex[i].row;
  792. this_mv.as_mv.col = bc + hex[i].col;
  793. CHECK_POINT
  794. this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) +
  795. this_mv.as_mv.col;
  796. thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
  797. CHECK_BETTER
  798. }
  799. }
  800. if (best_site == -1) {
  801. goto cal_neighbors;
  802. } else {
  803. br += hex[best_site].row;
  804. bc += hex[best_site].col;
  805. k = best_site;
  806. }
  807. for (j = 1; j < hex_range; ++j) {
  808. best_site = -1;
  809. CHECK_BOUNDS(2)
  810. if (all_in) {
  811. for (i = 0; i < 3; ++i) {
  812. this_mv.as_mv.row = br + next_chkpts[k][i].row;
  813. this_mv.as_mv.col = bc + next_chkpts[k][i].col;
  814. this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) +
  815. this_mv.as_mv.col;
  816. thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
  817. CHECK_BETTER
  818. }
  819. } else {
  820. for (i = 0; i < 3; ++i) {
  821. this_mv.as_mv.row = br + next_chkpts[k][i].row;
  822. this_mv.as_mv.col = bc + next_chkpts[k][i].col;
  823. CHECK_POINT
  824. this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) +
  825. this_mv.as_mv.col;
  826. thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
  827. CHECK_BETTER
  828. }
  829. }
  830. if (best_site == -1) {
  831. break;
  832. } else {
  833. br += next_chkpts[k][best_site].row;
  834. bc += next_chkpts[k][best_site].col;
  835. k += 5 + best_site;
  836. if (k >= 12) {
  837. k -= 12;
  838. } else if (k >= 6) {
  839. k -= 6;
  840. }
  841. }
  842. }
  843. /* check 4 1-away neighbors */
  844. cal_neighbors:
  845. for (j = 0; j < dia_range; ++j) {
  846. best_site = -1;
  847. CHECK_BOUNDS(1)
  848. if (all_in) {
  849. for (i = 0; i < 4; ++i) {
  850. this_mv.as_mv.row = br + neighbors[i].row;
  851. this_mv.as_mv.col = bc + neighbors[i].col;
  852. this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) +
  853. this_mv.as_mv.col;
  854. thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
  855. CHECK_BETTER
  856. }
  857. } else {
  858. for (i = 0; i < 4; ++i) {
  859. this_mv.as_mv.row = br + neighbors[i].row;
  860. this_mv.as_mv.col = bc + neighbors[i].col;
  861. CHECK_POINT
  862. this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) +
  863. this_mv.as_mv.col;
  864. thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
  865. CHECK_BETTER
  866. }
  867. }
  868. if (best_site == -1) {
  869. break;
  870. } else {
  871. br += neighbors[best_site].row;
  872. bc += neighbors[best_site].col;
  873. }
  874. }
  875. best_mv->as_mv.row = br;
  876. best_mv->as_mv.col = bc;
  877. return bestsad;
  878. }
  879. #undef CHECK_BOUNDS
  880. #undef CHECK_POINT
  881. #undef CHECK_BETTER
  882. int vp8_diamond_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
  883. int_mv *best_mv, int search_param, int sad_per_bit,
  884. int *num00, vp8_variance_fn_ptr_t *fn_ptr,
  885. int *mvcost[2], int_mv *center_mv) {
  886. int i, j, step;
  887. unsigned char *what = (*(b->base_src) + b->src);
  888. int what_stride = b->src_stride;
  889. unsigned char *in_what;
  890. int pre_stride = x->e_mbd.pre.y_stride;
  891. unsigned char *base_pre = x->e_mbd.pre.y_buffer;
  892. int in_what_stride = pre_stride;
  893. unsigned char *best_address;
  894. int tot_steps;
  895. int_mv this_mv;
  896. unsigned int bestsad;
  897. unsigned int thissad;
  898. int best_site = 0;
  899. int last_site = 0;
  900. int ref_row;
  901. int ref_col;
  902. int this_row_offset;
  903. int this_col_offset;
  904. search_site *ss;
  905. unsigned char *check_here;
  906. int *mvsadcost[2];
  907. int_mv fcenter_mv;
  908. mvsadcost[0] = x->mvsadcost[0];
  909. mvsadcost[1] = x->mvsadcost[1];
  910. fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
  911. fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
  912. vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min,
  913. x->mv_row_max);
  914. ref_row = ref_mv->as_mv.row;
  915. ref_col = ref_mv->as_mv.col;
  916. *num00 = 0;
  917. best_mv->as_mv.row = ref_row;
  918. best_mv->as_mv.col = ref_col;
  919. /* Work out the start point for the search */
  920. in_what = (unsigned char *)(base_pre + d->offset + (ref_row * pre_stride) +
  921. ref_col);
  922. best_address = in_what;
  923. /* Check the starting position */
  924. bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride) +
  925. mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
  926. /* search_param determines the length of the initial step and hence
  927. * the number of iterations 0 = initial step (MAX_FIRST_STEP) pel :
  928. * 1 = (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc.
  929. */
  930. ss = &x->ss[search_param * x->searches_per_step];
  931. tot_steps = (x->ss_count / x->searches_per_step) - search_param;
  932. i = 1;
  933. for (step = 0; step < tot_steps; ++step) {
  934. for (j = 0; j < x->searches_per_step; ++j) {
  935. /* Trap illegal vectors */
  936. this_row_offset = best_mv->as_mv.row + ss[i].mv.row;
  937. this_col_offset = best_mv->as_mv.col + ss[i].mv.col;
  938. if ((this_col_offset > x->mv_col_min) &&
  939. (this_col_offset < x->mv_col_max) &&
  940. (this_row_offset > x->mv_row_min) &&
  941. (this_row_offset < x->mv_row_max))
  942. {
  943. check_here = ss[i].offset + best_address;
  944. thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
  945. if (thissad < bestsad) {
  946. this_mv.as_mv.row = this_row_offset;
  947. this_mv.as_mv.col = this_col_offset;
  948. thissad +=
  949. mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
  950. if (thissad < bestsad) {
  951. bestsad = thissad;
  952. best_site = i;
  953. }
  954. }
  955. }
  956. i++;
  957. }
  958. if (best_site != last_site) {
  959. best_mv->as_mv.row += ss[best_site].mv.row;
  960. best_mv->as_mv.col += ss[best_site].mv.col;
  961. best_address += ss[best_site].offset;
  962. last_site = best_site;
  963. } else if (best_address == in_what) {
  964. (*num00)++;
  965. }
  966. }
  967. this_mv.as_mv.row = best_mv->as_mv.row << 3;
  968. this_mv.as_mv.col = best_mv->as_mv.col << 3;
  969. return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad) +
  970. mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
  971. }
  972. int vp8_diamond_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
  973. int_mv *best_mv, int search_param, int sad_per_bit,
  974. int *num00, vp8_variance_fn_ptr_t *fn_ptr,
  975. int *mvcost[2], int_mv *center_mv) {
  976. int i, j, step;
  977. unsigned char *what = (*(b->base_src) + b->src);
  978. int what_stride = b->src_stride;
  979. unsigned char *in_what;
  980. int pre_stride = x->e_mbd.pre.y_stride;
  981. unsigned char *base_pre = x->e_mbd.pre.y_buffer;
  982. int in_what_stride = pre_stride;
  983. unsigned char *best_address;
  984. int tot_steps;
  985. int_mv this_mv;
  986. unsigned int bestsad;
  987. unsigned int thissad;
  988. int best_site = 0;
  989. int last_site = 0;
  990. int ref_row;
  991. int ref_col;
  992. int this_row_offset;
  993. int this_col_offset;
  994. search_site *ss;
  995. unsigned char *check_here;
  996. int *mvsadcost[2];
  997. int_mv fcenter_mv;
  998. mvsadcost[0] = x->mvsadcost[0];
  999. mvsadcost[1] = x->mvsadcost[1];
  1000. fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
  1001. fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
  1002. vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min,
  1003. x->mv_row_max);
  1004. ref_row = ref_mv->as_mv.row;
  1005. ref_col = ref_mv->as_mv.col;
  1006. *num00 = 0;
  1007. best_mv->as_mv.row = ref_row;
  1008. best_mv->as_mv.col = ref_col;
  1009. /* Work out the start point for the search */
  1010. in_what = (unsigned char *)(base_pre + d->offset + (ref_row * pre_stride) +
  1011. ref_col);
  1012. best_address = in_what;
  1013. /* Check the starting position */
  1014. bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride) +
  1015. mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
  1016. /* search_param determines the length of the initial step and hence the
  1017. * number of iterations 0 = initial step (MAX_FIRST_STEP) pel : 1 =
  1018. * (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc.
  1019. */
  1020. ss = &x->ss[search_param * x->searches_per_step];
  1021. tot_steps = (x->ss_count / x->searches_per_step) - search_param;
  1022. i = 1;
  1023. for (step = 0; step < tot_steps; ++step) {
  1024. int all_in = 1, t;
  1025. /* To know if all neighbor points are within the bounds, 4 bounds
  1026. * checking are enough instead of checking 4 bounds for each
  1027. * points.
  1028. */
  1029. all_in &= ((best_mv->as_mv.row + ss[i].mv.row) > x->mv_row_min);
  1030. all_in &= ((best_mv->as_mv.row + ss[i + 1].mv.row) < x->mv_row_max);
  1031. all_in &= ((best_mv->as_mv.col + ss[i + 2].mv.col) > x->mv_col_min);
  1032. all_in &= ((best_mv->as_mv.col + ss[i + 3].mv.col) < x->mv_col_max);
  1033. if (all_in) {
  1034. unsigned int sad_array[4];
  1035. for (j = 0; j < x->searches_per_step; j += 4) {
  1036. const unsigned char *block_offset[4];
  1037. for (t = 0; t < 4; ++t) {
  1038. block_offset[t] = ss[i + t].offset + best_address;
  1039. }
  1040. fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride,
  1041. sad_array);
  1042. for (t = 0; t < 4; t++, i++) {
  1043. if (sad_array[t] < bestsad) {
  1044. this_mv.as_mv.row = best_mv->as_mv.row + ss[i].mv.row;
  1045. this_mv.as_mv.col = best_mv->as_mv.col + ss[i].mv.col;
  1046. sad_array[t] +=
  1047. mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
  1048. if (sad_array[t] < bestsad) {
  1049. bestsad = sad_array[t];
  1050. best_site = i;
  1051. }
  1052. }
  1053. }
  1054. }
  1055. } else {
  1056. for (j = 0; j < x->searches_per_step; ++j) {
  1057. /* Trap illegal vectors */
  1058. this_row_offset = best_mv->as_mv.row + ss[i].mv.row;
  1059. this_col_offset = best_mv->as_mv.col + ss[i].mv.col;
  1060. if ((this_col_offset > x->mv_col_min) &&
  1061. (this_col_offset < x->mv_col_max) &&
  1062. (this_row_offset > x->mv_row_min) &&
  1063. (this_row_offset < x->mv_row_max)) {
  1064. check_here = ss[i].offset + best_address;
  1065. thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
  1066. if (thissad < bestsad) {
  1067. this_mv.as_mv.row = this_row_offset;
  1068. this_mv.as_mv.col = this_col_offset;
  1069. thissad +=
  1070. mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
  1071. if (thissad < bestsad) {
  1072. bestsad = thissad;
  1073. best_site = i;
  1074. }
  1075. }
  1076. }
  1077. i++;
  1078. }
  1079. }
  1080. if (best_site != last_site) {
  1081. best_mv->as_mv.row += ss[best_site].mv.row;
  1082. best_mv->as_mv.col += ss[best_site].mv.col;
  1083. best_address += ss[best_site].offset;
  1084. last_site = best_site;
  1085. } else if (best_address == in_what) {
  1086. (*num00)++;
  1087. }
  1088. }
  1089. this_mv.as_mv.row = best_mv->as_mv.row * 8;
  1090. this_mv.as_mv.col = best_mv->as_mv.col * 8;
  1091. return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad) +
  1092. mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
  1093. }
  1094. int vp8_full_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
  1095. int sad_per_bit, int distance,
  1096. vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2],
  1097. int_mv *center_mv) {
  1098. unsigned char *what = (*(b->base_src) + b->src);
  1099. int what_stride = b->src_stride;
  1100. unsigned char *in_what;
  1101. int pre_stride = x->e_mbd.pre.y_stride;
  1102. unsigned char *base_pre = x->e_mbd.pre.y_buffer;
  1103. int in_what_stride = pre_stride;
  1104. int mv_stride = pre_stride;
  1105. unsigned char *bestaddress;
  1106. int_mv *best_mv = &d->bmi.mv;
  1107. int_mv this_mv;
  1108. unsigned int bestsad;
  1109. unsigned int thissad;
  1110. int r, c;
  1111. unsigned char *check_here;
  1112. int ref_row = ref_mv->as_mv.row;
  1113. int ref_col = ref_mv->as_mv.col;
  1114. int row_min = ref_row - distance;
  1115. int row_max = ref_row + distance;
  1116. int col_min = ref_col - distance;
  1117. int col_max = ref_col + distance;
  1118. int *mvsadcost[2];
  1119. int_mv fcenter_mv;
  1120. mvsadcost[0] = x->mvsadcost[0];
  1121. mvsadcost[1] = x->mvsadcost[1];
  1122. fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
  1123. fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
  1124. /* Work out the mid point for the search */
  1125. in_what = base_pre + d->offset;
  1126. bestaddress = in_what + (ref_row * pre_stride) + ref_col;
  1127. best_mv->as_mv.row = ref_row;
  1128. best_mv->as_mv.col = ref_col;
  1129. /* Baseline value at the centre */
  1130. bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride) +
  1131. mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
  1132. /* Apply further limits to prevent us looking using vectors that
  1133. * stretch beyiond the UMV border
  1134. */
  1135. if (col_min < x->mv_col_min) col_min = x->mv_col_min;
  1136. if (col_max > x->mv_col_max) col_max = x->mv_col_max;
  1137. if (row_min < x->mv_row_min) row_min = x->mv_row_min;
  1138. if (row_max > x->mv_row_max) row_max = x->mv_row_max;
  1139. for (r = row_min; r < row_max; ++r) {
  1140. this_mv.as_mv.row = r;
  1141. check_here = r * mv_stride + in_what + col_min;
  1142. for (c = col_min; c < col_max; ++c) {
  1143. thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
  1144. this_mv.as_mv.col = c;
  1145. thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
  1146. if (thissad < bestsad) {
  1147. bestsad = thissad;
  1148. best_mv->as_mv.row = r;
  1149. best_mv->as_mv.col = c;
  1150. bestaddress = check_here;
  1151. }
  1152. check_here++;
  1153. }
  1154. }
  1155. this_mv.as_mv.row = best_mv->as_mv.row << 3;
  1156. this_mv.as_mv.col = best_mv->as_mv.col << 3;
  1157. return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad) +
  1158. mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
  1159. }
  1160. int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
  1161. int sad_per_bit, int distance,
  1162. vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2],
  1163. int_mv *center_mv) {
  1164. unsigned char *what = (*(b->base_src) + b->src);
  1165. int what_stride = b->src_stride;
  1166. unsigned char *in_what;
  1167. int pre_stride = x->e_mbd.pre.y_stride;
  1168. unsigned char *base_pre = x->e_mbd.pre.y_buffer;
  1169. int in_what_stride = pre_stride;
  1170. int mv_stride = pre_stride;
  1171. unsigned char *bestaddress;
  1172. int_mv *best_mv = &d->bmi.mv;
  1173. int_mv this_mv;
  1174. unsigned int bestsad;
  1175. unsigned int thissad;
  1176. int r, c;
  1177. unsigned char *check_here;
  1178. int ref_row = ref_mv->as_mv.row;
  1179. int ref_col = ref_mv->as_mv.col;
  1180. int row_min = ref_row - distance;
  1181. int row_max = ref_row + distance;
  1182. int col_min = ref_col - distance;
  1183. int col_max = ref_col + distance;
  1184. unsigned int sad_array[3];
  1185. int *mvsadcost[2];
  1186. int_mv fcenter_mv;
  1187. mvsadcost[0] = x->mvsadcost[0];
  1188. mvsadcost[1] = x->mvsadcost[1];
  1189. fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
  1190. fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
  1191. /* Work out the mid point for the search */
  1192. in_what = base_pre + d->offset;
  1193. bestaddress = in_what + (ref_row * pre_stride) + ref_col;
  1194. best_mv->as_mv.row = ref_row;
  1195. best_mv->as_mv.col = ref_col;
  1196. /* Baseline value at the centre */
  1197. bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride) +
  1198. mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
  1199. /* Apply further limits to prevent us looking using vectors that stretch
  1200. * beyond the UMV border
  1201. */
  1202. if (col_min < x->mv_col_min) col_min = x->mv_col_min;
  1203. if (col_max > x->mv_col_max) col_max = x->mv_col_max;
  1204. if (row_min < x->mv_row_min) row_min = x->mv_row_min;
  1205. if (row_max > x->mv_row_max) row_max = x->mv_row_max;
  1206. for (r = row_min; r < row_max; ++r) {
  1207. this_mv.as_mv.row = r;
  1208. check_here = r * mv_stride + in_what + col_min;
  1209. c = col_min;
  1210. while ((c + 2) < col_max) {
  1211. int i;
  1212. fn_ptr->sdx3f(what, what_stride, check_here, in_what_stride, sad_array);
  1213. for (i = 0; i < 3; ++i) {
  1214. thissad = sad_array[i];
  1215. if (thissad < bestsad) {
  1216. this_mv.as_mv.col = c;
  1217. thissad +=
  1218. mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
  1219. if (thissad < bestsad) {
  1220. bestsad = thissad;
  1221. best_mv->as_mv.row = r;
  1222. best_mv->as_mv.col = c;
  1223. bestaddress = check_here;
  1224. }
  1225. }
  1226. check_here++;
  1227. c++;
  1228. }
  1229. }
  1230. while (c < col_max) {
  1231. thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
  1232. if (thissad < bestsad) {
  1233. this_mv.as_mv.col = c;
  1234. thissad +=
  1235. mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
  1236. if (thissad < bestsad) {
  1237. bestsad = thissad;
  1238. best_mv->as_mv.row = r;
  1239. best_mv->as_mv.col = c;
  1240. bestaddress = check_here;
  1241. }
  1242. }
  1243. check_here++;
  1244. c++;
  1245. }
  1246. }
  1247. this_mv.as_mv.row = best_mv->as_mv.row << 3;
  1248. this_mv.as_mv.col = best_mv->as_mv.col << 3;
  1249. return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad) +
  1250. mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
  1251. }
  1252. int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
  1253. int sad_per_bit, int distance,
  1254. vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2],
  1255. int_mv *center_mv) {
  1256. unsigned char *what = (*(b->base_src) + b->src);
  1257. int what_stride = b->src_stride;
  1258. int pre_stride = x->e_mbd.pre.y_stride;
  1259. unsigned char *base_pre = x->e_mbd.pre.y_buffer;
  1260. unsigned char *in_what;
  1261. int in_what_stride = pre_stride;
  1262. int mv_stride = pre_stride;
  1263. unsigned char *bestaddress;
  1264. int_mv *best_mv = &d->bmi.mv;
  1265. int_mv this_mv;
  1266. unsigned int bestsad;
  1267. unsigned int thissad;
  1268. int r, c;
  1269. unsigned char *check_here;
  1270. int ref_row = ref_mv->as_mv.row;
  1271. int ref_col = ref_mv->as_mv.col;
  1272. int row_min = ref_row - distance;
  1273. int row_max = ref_row + distance;
  1274. int col_min = ref_col - distance;
  1275. int col_max = ref_col + distance;
  1276. DECLARE_ALIGNED(16, unsigned int, sad_array8[8]);
  1277. unsigned int sad_array[3];
  1278. int *mvsadcost[2];
  1279. int_mv fcenter_mv;
  1280. mvsadcost[0] = x->mvsadcost[0];
  1281. mvsadcost[1] = x->mvsadcost[1];
  1282. fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
  1283. fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
  1284. /* Work out the mid point for the search */
  1285. in_what = base_pre + d->offset;
  1286. bestaddress = in_what + (ref_row * pre_stride) + ref_col;
  1287. best_mv->as_mv.row = ref_row;
  1288. best_mv->as_mv.col = ref_col;
  1289. /* Baseline value at the centre */
  1290. bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride) +
  1291. mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
  1292. /* Apply further limits to prevent us looking using vectors that stretch
  1293. * beyond the UMV border
  1294. */
  1295. if (col_min < x->mv_col_min) col_min = x->mv_col_min;
  1296. if (col_max > x->mv_col_max) col_max = x->mv_col_max;
  1297. if (row_min < x->mv_row_min) row_min = x->mv_row_min;
  1298. if (row_max > x->mv_row_max) row_max = x->mv_row_max;
  1299. for (r = row_min; r < row_max; ++r) {
  1300. this_mv.as_mv.row = r;
  1301. check_here = r * mv_stride + in_what + col_min;
  1302. c = col_min;
  1303. while ((c + 7) < col_max) {
  1304. int i;
  1305. fn_ptr->sdx8f(what, what_stride, check_here, in_what_stride, sad_array8);
  1306. for (i = 0; i < 8; ++i) {
  1307. thissad = sad_array8[i];
  1308. if (thissad < bestsad) {
  1309. this_mv.as_mv.col = c;
  1310. thissad +=
  1311. mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
  1312. if (thissad < bestsad) {
  1313. bestsad = thissad;
  1314. best_mv->as_mv.row = r;
  1315. best_mv->as_mv.col = c;
  1316. bestaddress = check_here;
  1317. }
  1318. }
  1319. check_here++;
  1320. c++;
  1321. }
  1322. }
  1323. while ((c + 2) < col_max) {
  1324. int i;
  1325. fn_ptr->sdx3f(what, what_stride, check_here, in_what_stride, sad_array);
  1326. for (i = 0; i < 3; ++i) {
  1327. thissad = sad_array[i];
  1328. if (thissad < bestsad) {
  1329. this_mv.as_mv.col = c;
  1330. thissad +=
  1331. mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
  1332. if (thissad < bestsad) {
  1333. bestsad = thissad;
  1334. best_mv->as_mv.row = r;
  1335. best_mv->as_mv.col = c;
  1336. bestaddress = check_here;
  1337. }
  1338. }
  1339. check_here++;
  1340. c++;
  1341. }
  1342. }
  1343. while (c < col_max) {
  1344. thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
  1345. if (thissad < bestsad) {
  1346. this_mv.as_mv.col = c;
  1347. thissad +=
  1348. mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
  1349. if (thissad < bestsad) {
  1350. bestsad = thissad;
  1351. best_mv->as_mv.row = r;
  1352. best_mv->as_mv.col = c;
  1353. bestaddress = check_here;
  1354. }
  1355. }
  1356. check_here++;
  1357. c++;
  1358. }
  1359. }
  1360. this_mv.as_mv.row = best_mv->as_mv.row * 8;
  1361. this_mv.as_mv.col = best_mv->as_mv.col * 8;
  1362. return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad) +
  1363. mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
  1364. }
  1365. int vp8_refining_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
  1366. int_mv *ref_mv, int error_per_bit,
  1367. int search_range, vp8_variance_fn_ptr_t *fn_ptr,
  1368. int *mvcost[2], int_mv *center_mv) {
  1369. MV neighbors[4] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 } };
  1370. int i, j;
  1371. short this_row_offset, this_col_offset;
  1372. int what_stride = b->src_stride;
  1373. int pre_stride = x->e_mbd.pre.y_stride;
  1374. unsigned char *base_pre = x->e_mbd.pre.y_buffer;
  1375. int in_what_stride = pre_stride;
  1376. unsigned char *what = (*(b->base_src) + b->src);
  1377. unsigned char *best_address =
  1378. (unsigned char *)(base_pre + d->offset +
  1379. (ref_mv->as_mv.row * pre_stride) + ref_mv->as_mv.col);
  1380. unsigned char *check_here;
  1381. int_mv this_mv;
  1382. unsigned int bestsad;
  1383. unsigned int thissad;
  1384. int *mvsadcost[2];
  1385. int_mv fcenter_mv;
  1386. mvsadcost[0] = x->mvsadcost[0];
  1387. mvsadcost[1] = x->mvsadcost[1];
  1388. fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
  1389. fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
  1390. bestsad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride) +
  1391. mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit);
  1392. for (i = 0; i < search_range; ++i) {
  1393. int best_site = -1;
  1394. for (j = 0; j < 4; ++j) {
  1395. this_row_offset = ref_mv->as_mv.row + neighbors[j].row;
  1396. this_col_offset = ref_mv->as_mv.col + neighbors[j].col;
  1397. if ((this_col_offset > x->mv_col_min) &&
  1398. (this_col_offset < x->mv_col_max) &&
  1399. (this_row_offset > x->mv_row_min) &&
  1400. (this_row_offset < x->mv_row_max)) {
  1401. check_here = (neighbors[j].row) * in_what_stride + neighbors[j].col +
  1402. best_address;
  1403. thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
  1404. if (thissad < bestsad) {
  1405. this_mv.as_mv.row = this_row_offset;
  1406. this_mv.as_mv.col = this_col_offset;
  1407. thissad +=
  1408. mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit);
  1409. if (thissad < bestsad) {
  1410. bestsad = thissad;
  1411. best_site = j;
  1412. }
  1413. }
  1414. }
  1415. }
  1416. if (best_site == -1) {
  1417. break;
  1418. } else {
  1419. ref_mv->as_mv.row += neighbors[best_site].row;
  1420. ref_mv->as_mv.col += neighbors[best_site].col;
  1421. best_address += (neighbors[best_site].row) * in_what_stride +
  1422. neighbors[best_site].col;
  1423. }
  1424. }
  1425. this_mv.as_mv.row = ref_mv->as_mv.row << 3;
  1426. this_mv.as_mv.col = ref_mv->as_mv.col << 3;
  1427. return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad) +
  1428. mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
  1429. }
  1430. int vp8_refining_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
  1431. int_mv *ref_mv, int error_per_bit,
  1432. int search_range, vp8_variance_fn_ptr_t *fn_ptr,
  1433. int *mvcost[2], int_mv *center_mv) {
  1434. MV neighbors[4] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 } };
  1435. int i, j;
  1436. short this_row_offset, this_col_offset;
  1437. int what_stride = b->src_stride;
  1438. int pre_stride = x->e_mbd.pre.y_stride;
  1439. unsigned char *base_pre = x->e_mbd.pre.y_buffer;
  1440. int in_what_stride = pre_stride;
  1441. unsigned char *what = (*(b->base_src) + b->src);
  1442. unsigned char *best_address =
  1443. (unsigned char *)(base_pre + d->offset +
  1444. (ref_mv->as_mv.row * pre_stride) + ref_mv->as_mv.col);
  1445. unsigned char *check_here;
  1446. int_mv this_mv;
  1447. unsigned int bestsad;
  1448. unsigned int thissad;
  1449. int *mvsadcost[2];
  1450. int_mv fcenter_mv;
  1451. mvsadcost[0] = x->mvsadcost[0];
  1452. mvsadcost[1] = x->mvsadcost[1];
  1453. fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
  1454. fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
  1455. bestsad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride) +
  1456. mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit);
  1457. for (i = 0; i < search_range; ++i) {
  1458. int best_site = -1;
  1459. int all_in = 1;
  1460. all_in &= ((ref_mv->as_mv.row - 1) > x->mv_row_min);
  1461. all_in &= ((ref_mv->as_mv.row + 1) < x->mv_row_max);
  1462. all_in &= ((ref_mv->as_mv.col - 1) > x->mv_col_min);
  1463. all_in &= ((ref_mv->as_mv.col + 1) < x->mv_col_max);
  1464. if (all_in) {
  1465. unsigned int sad_array[4];
  1466. const unsigned char *block_offset[4];
  1467. block_offset[0] = best_address - in_what_stride;
  1468. block_offset[1] = best_address - 1;
  1469. block_offset[2] = best_address + 1;
  1470. block_offset[3] = best_address + in_what_stride;
  1471. fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride,
  1472. sad_array);
  1473. for (j = 0; j < 4; ++j) {
  1474. if (sad_array[j] < bestsad) {
  1475. this_mv.as_mv.row = ref_mv->as_mv.row + neighbors[j].row;
  1476. this_mv.as_mv.col = ref_mv->as_mv.col + neighbors[j].col;
  1477. sad_array[j] +=
  1478. mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit);
  1479. if (sad_array[j] < bestsad) {
  1480. bestsad = sad_array[j];
  1481. best_site = j;
  1482. }
  1483. }
  1484. }
  1485. } else {
  1486. for (j = 0; j < 4; ++j) {
  1487. this_row_offset = ref_mv->as_mv.row + neighbors[j].row;
  1488. this_col_offset = ref_mv->as_mv.col + neighbors[j].col;
  1489. if ((this_col_offset > x->mv_col_min) &&
  1490. (this_col_offset < x->mv_col_max) &&
  1491. (this_row_offset > x->mv_row_min) &&
  1492. (this_row_offset < x->mv_row_max)) {
  1493. check_here = (neighbors[j].row) * in_what_stride + neighbors[j].col +
  1494. best_address;
  1495. thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
  1496. if (thissad < bestsad) {
  1497. this_mv.as_mv.row = this_row_offset;
  1498. this_mv.as_mv.col = this_col_offset;
  1499. thissad +=
  1500. mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit);
  1501. if (thissad < bestsad) {
  1502. bestsad = thissad;
  1503. best_site = j;
  1504. }
  1505. }
  1506. }
  1507. }
  1508. }
  1509. if (best_site == -1) {
  1510. break;
  1511. } else {
  1512. ref_mv->as_mv.row += neighbors[best_site].row;
  1513. ref_mv->as_mv.col += neighbors[best_site].col;
  1514. best_address += (neighbors[best_site].row) * in_what_stride +
  1515. neighbors[best_site].col;
  1516. }
  1517. }
  1518. this_mv.as_mv.row = ref_mv->as_mv.row * 8;
  1519. this_mv.as_mv.col = ref_mv->as_mv.col * 8;
  1520. return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad) +
  1521. mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
  1522. }
  1523. #ifdef VP8_ENTROPY_STATS
  1524. void print_mode_context(void) {
  1525. FILE *f = fopen("modecont.c", "w");
  1526. int i, j;
  1527. fprintf(f, "#include \"entropy.h\"\n");
  1528. fprintf(f, "const int vp8_mode_contexts[6][4] =\n");
  1529. fprintf(f, "{\n");
  1530. for (j = 0; j < 6; ++j) {
  1531. fprintf(f, " { /* %d */\n", j);
  1532. fprintf(f, " ");
  1533. for (i = 0; i < 4; ++i) {
  1534. int overal_prob;
  1535. int this_prob;
  1536. int count;
  1537. /* Overall probs */
  1538. count = mv_mode_cts[i][0] + mv_mode_cts[i][1];
  1539. if (count)
  1540. overal_prob = 256 * mv_mode_cts[i][0] / count;
  1541. else
  1542. overal_prob = 128;
  1543. if (overal_prob == 0) overal_prob = 1;
  1544. /* context probs */
  1545. count = mv_ref_ct[j][i][0] + mv_ref_ct[j][i][1];
  1546. if (count)
  1547. this_prob = 256 * mv_ref_ct[j][i][0] / count;
  1548. else
  1549. this_prob = 128;
  1550. if (this_prob == 0) this_prob = 1;
  1551. fprintf(f, "%5d, ", this_prob);
  1552. }
  1553. fprintf(f, " },\n");
  1554. }
  1555. fprintf(f, "};\n");
  1556. fclose(f);
  1557. }
  1558. /* MV ref count VP8_ENTROPY_STATS stats code */
  1559. #ifdef VP8_ENTROPY_STATS
  1560. void init_mv_ref_counts() {
  1561. memset(mv_ref_ct, 0, sizeof(mv_ref_ct));
  1562. memset(mv_mode_cts, 0, sizeof(mv_mode_cts));
  1563. }
  1564. void accum_mv_refs(MB_PREDICTION_MODE m, const int ct[4]) {
  1565. if (m == ZEROMV) {
  1566. ++mv_ref_ct[ct[0]][0][0];
  1567. ++mv_mode_cts[0][0];
  1568. } else {
  1569. ++mv_ref_ct[ct[0]][0][1];
  1570. ++mv_mode_cts[0][1];
  1571. if (m == NEARESTMV) {
  1572. ++mv_ref_ct[ct[1]][1][0];
  1573. ++mv_mode_cts[1][0];
  1574. } else {
  1575. ++mv_ref_ct[ct[1]][1][1];
  1576. ++mv_mode_cts[1][1];
  1577. if (m == NEARMV) {
  1578. ++mv_ref_ct[ct[2]][2][0];
  1579. ++mv_mode_cts[2][0];
  1580. } else {
  1581. ++mv_ref_ct[ct[2]][2][1];
  1582. ++mv_mode_cts[2][1];
  1583. if (m == NEWMV) {
  1584. ++mv_ref_ct[ct[3]][3][0];
  1585. ++mv_mode_cts[3][0];
  1586. } else {
  1587. ++mv_ref_ct[ct[3]][3][1];
  1588. ++mv_mode_cts[3][1];
  1589. }
  1590. }
  1591. }
  1592. }
  1593. }
  1594. #endif /* END MV ref count VP8_ENTROPY_STATS stats code */
  1595. #endif