2
0

variance.c 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566
  1. /*
  2. * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  3. *
  4. * Use of this source code is governed by a BSD-style license
  5. * that can be found in the LICENSE file in the root of the source
  6. * tree. An additional intellectual property rights grant can be found
  7. * in the file PATENTS. All contributing project authors may
  8. * be found in the AUTHORS file in the root of the source tree.
  9. */
  10. #include "./vpx_config.h"
  11. #include "./vpx_dsp_rtcd.h"
  12. #include "vpx_ports/mem.h"
  13. #include "vpx/vpx_integer.h"
  14. #include "vpx_dsp/variance.h"
  15. static const uint8_t bilinear_filters[8][2] = {
  16. { 128, 0 }, { 112, 16 }, { 96, 32 }, { 80, 48 },
  17. { 64, 64 }, { 48, 80 }, { 32, 96 }, { 16, 112 },
  18. };
  19. uint32_t vpx_get4x4sse_cs_c(const uint8_t *src_ptr, int src_stride,
  20. const uint8_t *ref_ptr, int ref_stride) {
  21. int distortion = 0;
  22. int r, c;
  23. for (r = 0; r < 4; ++r) {
  24. for (c = 0; c < 4; ++c) {
  25. int diff = src_ptr[c] - ref_ptr[c];
  26. distortion += diff * diff;
  27. }
  28. src_ptr += src_stride;
  29. ref_ptr += ref_stride;
  30. }
  31. return distortion;
  32. }
  33. uint32_t vpx_get_mb_ss_c(const int16_t *src_ptr) {
  34. unsigned int i, sum = 0;
  35. for (i = 0; i < 256; ++i) {
  36. sum += src_ptr[i] * src_ptr[i];
  37. }
  38. return sum;
  39. }
  40. static void variance(const uint8_t *src_ptr, int src_stride,
  41. const uint8_t *ref_ptr, int ref_stride, int w, int h,
  42. uint32_t *sse, int *sum) {
  43. int i, j;
  44. *sum = 0;
  45. *sse = 0;
  46. for (i = 0; i < h; ++i) {
  47. for (j = 0; j < w; ++j) {
  48. const int diff = src_ptr[j] - ref_ptr[j];
  49. *sum += diff;
  50. *sse += diff * diff;
  51. }
  52. src_ptr += src_stride;
  53. ref_ptr += ref_stride;
  54. }
  55. }
  56. // Applies a 1-D 2-tap bilinear filter to the source block in either horizontal
  57. // or vertical direction to produce the filtered output block. Used to implement
  58. // the first-pass of 2-D separable filter.
  59. //
  60. // Produces int16_t output to retain precision for the next pass. Two filter
  61. // taps should sum to FILTER_WEIGHT. pixel_step defines whether the filter is
  62. // applied horizontally (pixel_step = 1) or vertically (pixel_step = stride).
  63. // It defines the offset required to move from one input to the next.
  64. static void var_filter_block2d_bil_first_pass(
  65. const uint8_t *src_ptr, uint16_t *ref_ptr, unsigned int src_pixels_per_line,
  66. int pixel_step, unsigned int output_height, unsigned int output_width,
  67. const uint8_t *filter) {
  68. unsigned int i, j;
  69. for (i = 0; i < output_height; ++i) {
  70. for (j = 0; j < output_width; ++j) {
  71. ref_ptr[j] = ROUND_POWER_OF_TWO(
  72. (int)src_ptr[0] * filter[0] + (int)src_ptr[pixel_step] * filter[1],
  73. FILTER_BITS);
  74. ++src_ptr;
  75. }
  76. src_ptr += src_pixels_per_line - output_width;
  77. ref_ptr += output_width;
  78. }
  79. }
  80. // Applies a 1-D 2-tap bilinear filter to the source block in either horizontal
  81. // or vertical direction to produce the filtered output block. Used to implement
  82. // the second-pass of 2-D separable filter.
  83. //
  84. // Requires 16-bit input as produced by filter_block2d_bil_first_pass. Two
  85. // filter taps should sum to FILTER_WEIGHT. pixel_step defines whether the
  86. // filter is applied horizontally (pixel_step = 1) or vertically
  87. // (pixel_step = stride). It defines the offset required to move from one input
  88. // to the next. Output is 8-bit.
  89. static void var_filter_block2d_bil_second_pass(
  90. const uint16_t *src_ptr, uint8_t *ref_ptr, unsigned int src_pixels_per_line,
  91. unsigned int pixel_step, unsigned int output_height,
  92. unsigned int output_width, const uint8_t *filter) {
  93. unsigned int i, j;
  94. for (i = 0; i < output_height; ++i) {
  95. for (j = 0; j < output_width; ++j) {
  96. ref_ptr[j] = ROUND_POWER_OF_TWO(
  97. (int)src_ptr[0] * filter[0] + (int)src_ptr[pixel_step] * filter[1],
  98. FILTER_BITS);
  99. ++src_ptr;
  100. }
  101. src_ptr += src_pixels_per_line - output_width;
  102. ref_ptr += output_width;
  103. }
  104. }
  105. #define VAR(W, H) \
  106. uint32_t vpx_variance##W##x##H##_c(const uint8_t *src_ptr, int src_stride, \
  107. const uint8_t *ref_ptr, int ref_stride, \
  108. uint32_t *sse) { \
  109. int sum; \
  110. variance(src_ptr, src_stride, ref_ptr, ref_stride, W, H, sse, &sum); \
  111. return *sse - (uint32_t)(((int64_t)sum * sum) / (W * H)); \
  112. }
  113. #define SUBPIX_VAR(W, H) \
  114. uint32_t vpx_sub_pixel_variance##W##x##H##_c( \
  115. const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, \
  116. const uint8_t *ref_ptr, int ref_stride, uint32_t *sse) { \
  117. uint16_t fdata3[(H + 1) * W]; \
  118. uint8_t temp2[H * W]; \
  119. \
  120. var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_stride, 1, H + 1, \
  121. W, bilinear_filters[x_offset]); \
  122. var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
  123. bilinear_filters[y_offset]); \
  124. \
  125. return vpx_variance##W##x##H##_c(temp2, W, ref_ptr, ref_stride, sse); \
  126. }
  127. #define SUBPIX_AVG_VAR(W, H) \
  128. uint32_t vpx_sub_pixel_avg_variance##W##x##H##_c( \
  129. const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, \
  130. const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, \
  131. const uint8_t *second_pred) { \
  132. uint16_t fdata3[(H + 1) * W]; \
  133. uint8_t temp2[H * W]; \
  134. DECLARE_ALIGNED(16, uint8_t, temp3[H * W]); \
  135. \
  136. var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_stride, 1, H + 1, \
  137. W, bilinear_filters[x_offset]); \
  138. var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
  139. bilinear_filters[y_offset]); \
  140. \
  141. vpx_comp_avg_pred_c(temp3, second_pred, W, H, temp2, W); \
  142. \
  143. return vpx_variance##W##x##H##_c(temp3, W, ref_ptr, ref_stride, sse); \
  144. }
  145. /* Identical to the variance call except it takes an additional parameter, sum,
  146. * and returns that value using pass-by-reference instead of returning
  147. * sse - sum^2 / w*h
  148. */
  149. #define GET_VAR(W, H) \
  150. void vpx_get##W##x##H##var_c(const uint8_t *src_ptr, int src_stride, \
  151. const uint8_t *ref_ptr, int ref_stride, \
  152. uint32_t *sse, int *sum) { \
  153. variance(src_ptr, src_stride, ref_ptr, ref_stride, W, H, sse, sum); \
  154. }
  155. /* Identical to the variance call except it does not calculate the
  156. * sse - sum^2 / w*h and returns sse in addtion to modifying the passed in
  157. * variable.
  158. */
  159. #define MSE(W, H) \
  160. uint32_t vpx_mse##W##x##H##_c(const uint8_t *src_ptr, int src_stride, \
  161. const uint8_t *ref_ptr, int ref_stride, \
  162. uint32_t *sse) { \
  163. int sum; \
  164. variance(src_ptr, src_stride, ref_ptr, ref_stride, W, H, sse, &sum); \
  165. return *sse; \
  166. }
  167. /* All three forms of the variance are available in the same sizes. */
  168. #define VARIANCES(W, H) \
  169. VAR(W, H) \
  170. SUBPIX_VAR(W, H) \
  171. SUBPIX_AVG_VAR(W, H)
  172. VARIANCES(64, 64)
  173. VARIANCES(64, 32)
  174. VARIANCES(32, 64)
  175. VARIANCES(32, 32)
  176. VARIANCES(32, 16)
  177. VARIANCES(16, 32)
  178. VARIANCES(16, 16)
  179. VARIANCES(16, 8)
  180. VARIANCES(8, 16)
  181. VARIANCES(8, 8)
  182. VARIANCES(8, 4)
  183. VARIANCES(4, 8)
  184. VARIANCES(4, 4)
  185. GET_VAR(16, 16)
  186. GET_VAR(8, 8)
  187. MSE(16, 16)
  188. MSE(16, 8)
  189. MSE(8, 16)
  190. MSE(8, 8)
  191. void vpx_comp_avg_pred_c(uint8_t *comp_pred, const uint8_t *pred, int width,
  192. int height, const uint8_t *ref, int ref_stride) {
  193. int i, j;
  194. for (i = 0; i < height; ++i) {
  195. for (j = 0; j < width; ++j) {
  196. const int tmp = pred[j] + ref[j];
  197. comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1);
  198. }
  199. comp_pred += width;
  200. pred += width;
  201. ref += ref_stride;
  202. }
  203. }
  204. #if CONFIG_VP9_HIGHBITDEPTH
  205. static void highbd_variance64(const uint8_t *src8_ptr, int src_stride,
  206. const uint8_t *ref8_ptr, int ref_stride, int w,
  207. int h, uint64_t *sse, int64_t *sum) {
  208. int i, j;
  209. uint16_t *src_ptr = CONVERT_TO_SHORTPTR(src8_ptr);
  210. uint16_t *ref_ptr = CONVERT_TO_SHORTPTR(ref8_ptr);
  211. *sum = 0;
  212. *sse = 0;
  213. for (i = 0; i < h; ++i) {
  214. for (j = 0; j < w; ++j) {
  215. const int diff = src_ptr[j] - ref_ptr[j];
  216. *sum += diff;
  217. *sse += diff * diff;
  218. }
  219. src_ptr += src_stride;
  220. ref_ptr += ref_stride;
  221. }
  222. }
  223. static void highbd_8_variance(const uint8_t *src8_ptr, int src_stride,
  224. const uint8_t *ref8_ptr, int ref_stride, int w,
  225. int h, uint32_t *sse, int *sum) {
  226. uint64_t sse_long = 0;
  227. int64_t sum_long = 0;
  228. highbd_variance64(src8_ptr, src_stride, ref8_ptr, ref_stride, w, h, &sse_long,
  229. &sum_long);
  230. *sse = (uint32_t)sse_long;
  231. *sum = (int)sum_long;
  232. }
  233. static void highbd_10_variance(const uint8_t *src8_ptr, int src_stride,
  234. const uint8_t *ref8_ptr, int ref_stride, int w,
  235. int h, uint32_t *sse, int *sum) {
  236. uint64_t sse_long = 0;
  237. int64_t sum_long = 0;
  238. highbd_variance64(src8_ptr, src_stride, ref8_ptr, ref_stride, w, h, &sse_long,
  239. &sum_long);
  240. *sse = (uint32_t)ROUND_POWER_OF_TWO(sse_long, 4);
  241. *sum = (int)ROUND_POWER_OF_TWO(sum_long, 2);
  242. }
  243. static void highbd_12_variance(const uint8_t *src8_ptr, int src_stride,
  244. const uint8_t *ref8_ptr, int ref_stride, int w,
  245. int h, uint32_t *sse, int *sum) {
  246. uint64_t sse_long = 0;
  247. int64_t sum_long = 0;
  248. highbd_variance64(src8_ptr, src_stride, ref8_ptr, ref_stride, w, h, &sse_long,
  249. &sum_long);
  250. *sse = (uint32_t)ROUND_POWER_OF_TWO(sse_long, 8);
  251. *sum = (int)ROUND_POWER_OF_TWO(sum_long, 4);
  252. }
  253. #define HIGHBD_VAR(W, H) \
  254. uint32_t vpx_highbd_8_variance##W##x##H##_c( \
  255. const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, \
  256. int ref_stride, uint32_t *sse) { \
  257. int sum; \
  258. highbd_8_variance(src_ptr, src_stride, ref_ptr, ref_stride, W, H, sse, \
  259. &sum); \
  260. return *sse - (uint32_t)(((int64_t)sum * sum) / (W * H)); \
  261. } \
  262. \
  263. uint32_t vpx_highbd_10_variance##W##x##H##_c( \
  264. const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, \
  265. int ref_stride, uint32_t *sse) { \
  266. int sum; \
  267. int64_t var; \
  268. highbd_10_variance(src_ptr, src_stride, ref_ptr, ref_stride, W, H, sse, \
  269. &sum); \
  270. var = (int64_t)(*sse) - (((int64_t)sum * sum) / (W * H)); \
  271. return (var >= 0) ? (uint32_t)var : 0; \
  272. } \
  273. \
  274. uint32_t vpx_highbd_12_variance##W##x##H##_c( \
  275. const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, \
  276. int ref_stride, uint32_t *sse) { \
  277. int sum; \
  278. int64_t var; \
  279. highbd_12_variance(src_ptr, src_stride, ref_ptr, ref_stride, W, H, sse, \
  280. &sum); \
  281. var = (int64_t)(*sse) - (((int64_t)sum * sum) / (W * H)); \
  282. return (var >= 0) ? (uint32_t)var : 0; \
  283. }
  284. #define HIGHBD_GET_VAR(S) \
  285. void vpx_highbd_8_get##S##x##S##var_c( \
  286. const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, \
  287. int ref_stride, uint32_t *sse, int *sum) { \
  288. highbd_8_variance(src_ptr, src_stride, ref_ptr, ref_stride, S, S, sse, \
  289. sum); \
  290. } \
  291. \
  292. void vpx_highbd_10_get##S##x##S##var_c( \
  293. const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, \
  294. int ref_stride, uint32_t *sse, int *sum) { \
  295. highbd_10_variance(src_ptr, src_stride, ref_ptr, ref_stride, S, S, sse, \
  296. sum); \
  297. } \
  298. \
  299. void vpx_highbd_12_get##S##x##S##var_c( \
  300. const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, \
  301. int ref_stride, uint32_t *sse, int *sum) { \
  302. highbd_12_variance(src_ptr, src_stride, ref_ptr, ref_stride, S, S, sse, \
  303. sum); \
  304. }
  305. #define HIGHBD_MSE(W, H) \
  306. uint32_t vpx_highbd_8_mse##W##x##H##_c( \
  307. const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, \
  308. int ref_stride, uint32_t *sse) { \
  309. int sum; \
  310. highbd_8_variance(src_ptr, src_stride, ref_ptr, ref_stride, W, H, sse, \
  311. &sum); \
  312. return *sse; \
  313. } \
  314. \
  315. uint32_t vpx_highbd_10_mse##W##x##H##_c( \
  316. const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, \
  317. int ref_stride, uint32_t *sse) { \
  318. int sum; \
  319. highbd_10_variance(src_ptr, src_stride, ref_ptr, ref_stride, W, H, sse, \
  320. &sum); \
  321. return *sse; \
  322. } \
  323. \
  324. uint32_t vpx_highbd_12_mse##W##x##H##_c( \
  325. const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, \
  326. int ref_stride, uint32_t *sse) { \
  327. int sum; \
  328. highbd_12_variance(src_ptr, src_stride, ref_ptr, ref_stride, W, H, sse, \
  329. &sum); \
  330. return *sse; \
  331. }
  332. static void highbd_var_filter_block2d_bil_first_pass(
  333. const uint8_t *src_ptr8, uint16_t *output_ptr,
  334. unsigned int src_pixels_per_line, int pixel_step,
  335. unsigned int output_height, unsigned int output_width,
  336. const uint8_t *filter) {
  337. unsigned int i, j;
  338. uint16_t *src_ptr = CONVERT_TO_SHORTPTR(src_ptr8);
  339. for (i = 0; i < output_height; ++i) {
  340. for (j = 0; j < output_width; ++j) {
  341. output_ptr[j] = ROUND_POWER_OF_TWO(
  342. (int)src_ptr[0] * filter[0] + (int)src_ptr[pixel_step] * filter[1],
  343. FILTER_BITS);
  344. ++src_ptr;
  345. }
  346. // Next row...
  347. src_ptr += src_pixels_per_line - output_width;
  348. output_ptr += output_width;
  349. }
  350. }
  351. static void highbd_var_filter_block2d_bil_second_pass(
  352. const uint16_t *src_ptr, uint16_t *output_ptr,
  353. unsigned int src_pixels_per_line, unsigned int pixel_step,
  354. unsigned int output_height, unsigned int output_width,
  355. const uint8_t *filter) {
  356. unsigned int i, j;
  357. for (i = 0; i < output_height; ++i) {
  358. for (j = 0; j < output_width; ++j) {
  359. output_ptr[j] = ROUND_POWER_OF_TWO(
  360. (int)src_ptr[0] * filter[0] + (int)src_ptr[pixel_step] * filter[1],
  361. FILTER_BITS);
  362. ++src_ptr;
  363. }
  364. src_ptr += src_pixels_per_line - output_width;
  365. output_ptr += output_width;
  366. }
  367. }
  368. #define HIGHBD_SUBPIX_VAR(W, H) \
  369. uint32_t vpx_highbd_8_sub_pixel_variance##W##x##H##_c( \
  370. const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, \
  371. const uint8_t *ref_ptr, int ref_stride, uint32_t *sse) { \
  372. uint16_t fdata3[(H + 1) * W]; \
  373. uint16_t temp2[H * W]; \
  374. \
  375. highbd_var_filter_block2d_bil_first_pass( \
  376. src_ptr, fdata3, src_stride, 1, H + 1, W, bilinear_filters[x_offset]); \
  377. highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
  378. bilinear_filters[y_offset]); \
  379. \
  380. return vpx_highbd_8_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, \
  381. ref_ptr, ref_stride, sse); \
  382. } \
  383. \
  384. uint32_t vpx_highbd_10_sub_pixel_variance##W##x##H##_c( \
  385. const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, \
  386. const uint8_t *ref_ptr, int ref_stride, uint32_t *sse) { \
  387. uint16_t fdata3[(H + 1) * W]; \
  388. uint16_t temp2[H * W]; \
  389. \
  390. highbd_var_filter_block2d_bil_first_pass( \
  391. src_ptr, fdata3, src_stride, 1, H + 1, W, bilinear_filters[x_offset]); \
  392. highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
  393. bilinear_filters[y_offset]); \
  394. \
  395. return vpx_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, \
  396. ref_ptr, ref_stride, sse); \
  397. } \
  398. \
  399. uint32_t vpx_highbd_12_sub_pixel_variance##W##x##H##_c( \
  400. const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, \
  401. const uint8_t *ref_ptr, int ref_stride, uint32_t *sse) { \
  402. uint16_t fdata3[(H + 1) * W]; \
  403. uint16_t temp2[H * W]; \
  404. \
  405. highbd_var_filter_block2d_bil_first_pass( \
  406. src_ptr, fdata3, src_stride, 1, H + 1, W, bilinear_filters[x_offset]); \
  407. highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
  408. bilinear_filters[y_offset]); \
  409. \
  410. return vpx_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, \
  411. ref_ptr, ref_stride, sse); \
  412. }
  413. #define HIGHBD_SUBPIX_AVG_VAR(W, H) \
  414. uint32_t vpx_highbd_8_sub_pixel_avg_variance##W##x##H##_c( \
  415. const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, \
  416. const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, \
  417. const uint8_t *second_pred) { \
  418. uint16_t fdata3[(H + 1) * W]; \
  419. uint16_t temp2[H * W]; \
  420. DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
  421. \
  422. highbd_var_filter_block2d_bil_first_pass( \
  423. src_ptr, fdata3, src_stride, 1, H + 1, W, bilinear_filters[x_offset]); \
  424. highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
  425. bilinear_filters[y_offset]); \
  426. \
  427. vpx_highbd_comp_avg_pred_c(temp3, CONVERT_TO_SHORTPTR(second_pred), W, H, \
  428. temp2, W); \
  429. \
  430. return vpx_highbd_8_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, \
  431. ref_ptr, ref_stride, sse); \
  432. } \
  433. \
  434. uint32_t vpx_highbd_10_sub_pixel_avg_variance##W##x##H##_c( \
  435. const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, \
  436. const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, \
  437. const uint8_t *second_pred) { \
  438. uint16_t fdata3[(H + 1) * W]; \
  439. uint16_t temp2[H * W]; \
  440. DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
  441. \
  442. highbd_var_filter_block2d_bil_first_pass( \
  443. src_ptr, fdata3, src_stride, 1, H + 1, W, bilinear_filters[x_offset]); \
  444. highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
  445. bilinear_filters[y_offset]); \
  446. \
  447. vpx_highbd_comp_avg_pred_c(temp3, CONVERT_TO_SHORTPTR(second_pred), W, H, \
  448. temp2, W); \
  449. \
  450. return vpx_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, \
  451. ref_ptr, ref_stride, sse); \
  452. } \
  453. \
  454. uint32_t vpx_highbd_12_sub_pixel_avg_variance##W##x##H##_c( \
  455. const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, \
  456. const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, \
  457. const uint8_t *second_pred) { \
  458. uint16_t fdata3[(H + 1) * W]; \
  459. uint16_t temp2[H * W]; \
  460. DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
  461. \
  462. highbd_var_filter_block2d_bil_first_pass( \
  463. src_ptr, fdata3, src_stride, 1, H + 1, W, bilinear_filters[x_offset]); \
  464. highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
  465. bilinear_filters[y_offset]); \
  466. \
  467. vpx_highbd_comp_avg_pred_c(temp3, CONVERT_TO_SHORTPTR(second_pred), W, H, \
  468. temp2, W); \
  469. \
  470. return vpx_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, \
  471. ref_ptr, ref_stride, sse); \
  472. }
  473. /* All three forms of the variance are available in the same sizes. */
  474. #define HIGHBD_VARIANCES(W, H) \
  475. HIGHBD_VAR(W, H) \
  476. HIGHBD_SUBPIX_VAR(W, H) \
  477. HIGHBD_SUBPIX_AVG_VAR(W, H)
  478. HIGHBD_VARIANCES(64, 64)
  479. HIGHBD_VARIANCES(64, 32)
  480. HIGHBD_VARIANCES(32, 64)
  481. HIGHBD_VARIANCES(32, 32)
  482. HIGHBD_VARIANCES(32, 16)
  483. HIGHBD_VARIANCES(16, 32)
  484. HIGHBD_VARIANCES(16, 16)
  485. HIGHBD_VARIANCES(16, 8)
  486. HIGHBD_VARIANCES(8, 16)
  487. HIGHBD_VARIANCES(8, 8)
  488. HIGHBD_VARIANCES(8, 4)
  489. HIGHBD_VARIANCES(4, 8)
  490. HIGHBD_VARIANCES(4, 4)
  491. HIGHBD_GET_VAR(8)
  492. HIGHBD_GET_VAR(16)
  493. HIGHBD_MSE(16, 16)
  494. HIGHBD_MSE(16, 8)
  495. HIGHBD_MSE(8, 16)
  496. HIGHBD_MSE(8, 8)
  497. void vpx_highbd_comp_avg_pred(uint16_t *comp_pred, const uint16_t *pred,
  498. int width, int height, const uint16_t *ref,
  499. int ref_stride) {
  500. int i, j;
  501. for (i = 0; i < height; ++i) {
  502. for (j = 0; j < width; ++j) {
  503. const int tmp = pred[j] + ref[j];
  504. comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1);
  505. }
  506. comp_pred += width;
  507. pred += width;
  508. ref += ref_stride;
  509. }
  510. }
  511. #endif // CONFIG_VP9_HIGHBITDEPTH