convolve_test.cc 54 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472
  1. /*
  2. * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  3. *
  4. * Use of this source code is governed by a BSD-style license
  5. * that can be found in the LICENSE file in the root of the source
  6. * tree. An additional intellectual property rights grant can be found
  7. * in the file PATENTS. All contributing project authors may
  8. * be found in the AUTHORS file in the root of the source tree.
  9. */
  10. #include <string.h>
  11. #include <tuple>
  12. #include "third_party/googletest/src/include/gtest/gtest.h"
  13. #include "./vp9_rtcd.h"
  14. #include "./vpx_config.h"
  15. #include "./vpx_dsp_rtcd.h"
  16. #include "test/acm_random.h"
  17. #include "test/clear_system_state.h"
  18. #include "test/register_state_check.h"
  19. #include "test/util.h"
  20. #include "vp9/common/vp9_common.h"
  21. #include "vp9/common/vp9_filter.h"
  22. #include "vpx_dsp/vpx_dsp_common.h"
  23. #include "vpx_dsp/vpx_filter.h"
  24. #include "vpx_mem/vpx_mem.h"
  25. #include "vpx_ports/mem.h"
  26. #include "vpx_ports/vpx_timer.h"
  27. namespace {
  28. static const unsigned int kMaxDimension = 64;
  29. typedef void (*ConvolveFunc)(const uint8_t *src, ptrdiff_t src_stride,
  30. uint8_t *dst, ptrdiff_t dst_stride,
  31. const InterpKernel *filter, int x0_q4,
  32. int x_step_q4, int y0_q4, int y_step_q4, int w,
  33. int h);
  34. typedef void (*WrapperFilterBlock2d8Func)(
  35. const uint8_t *src_ptr, const unsigned int src_stride,
  36. const int16_t *hfilter, const int16_t *vfilter, uint8_t *dst_ptr,
  37. unsigned int dst_stride, unsigned int output_width,
  38. unsigned int output_height, int use_highbd);
  39. struct ConvolveFunctions {
  40. ConvolveFunctions(ConvolveFunc copy, ConvolveFunc avg, ConvolveFunc h8,
  41. ConvolveFunc h8_avg, ConvolveFunc v8, ConvolveFunc v8_avg,
  42. ConvolveFunc hv8, ConvolveFunc hv8_avg, ConvolveFunc sh8,
  43. ConvolveFunc sh8_avg, ConvolveFunc sv8,
  44. ConvolveFunc sv8_avg, ConvolveFunc shv8,
  45. ConvolveFunc shv8_avg, int bd)
  46. : use_highbd_(bd) {
  47. copy_[0] = copy;
  48. copy_[1] = avg;
  49. h8_[0] = h8;
  50. h8_[1] = h8_avg;
  51. v8_[0] = v8;
  52. v8_[1] = v8_avg;
  53. hv8_[0] = hv8;
  54. hv8_[1] = hv8_avg;
  55. sh8_[0] = sh8;
  56. sh8_[1] = sh8_avg;
  57. sv8_[0] = sv8;
  58. sv8_[1] = sv8_avg;
  59. shv8_[0] = shv8;
  60. shv8_[1] = shv8_avg;
  61. }
  62. ConvolveFunc copy_[2];
  63. ConvolveFunc h8_[2];
  64. ConvolveFunc v8_[2];
  65. ConvolveFunc hv8_[2];
  66. ConvolveFunc sh8_[2]; // scaled horiz
  67. ConvolveFunc sv8_[2]; // scaled vert
  68. ConvolveFunc shv8_[2]; // scaled horiz/vert
  69. int use_highbd_; // 0 if high bitdepth not used, else the actual bit depth.
  70. };
  71. typedef std::tuple<int, int, const ConvolveFunctions *> ConvolveParam;
  72. #define ALL_SIZES(convolve_fn) \
  73. make_tuple(4, 4, &convolve_fn), make_tuple(8, 4, &convolve_fn), \
  74. make_tuple(4, 8, &convolve_fn), make_tuple(8, 8, &convolve_fn), \
  75. make_tuple(16, 8, &convolve_fn), make_tuple(8, 16, &convolve_fn), \
  76. make_tuple(16, 16, &convolve_fn), make_tuple(32, 16, &convolve_fn), \
  77. make_tuple(16, 32, &convolve_fn), make_tuple(32, 32, &convolve_fn), \
  78. make_tuple(64, 32, &convolve_fn), make_tuple(32, 64, &convolve_fn), \
  79. make_tuple(64, 64, &convolve_fn)
  80. // Reference 8-tap subpixel filter, slightly modified to fit into this test.
  81. #define VP9_FILTER_WEIGHT 128
  82. #define VP9_FILTER_SHIFT 7
  83. uint8_t clip_pixel(int x) { return x < 0 ? 0 : x > 255 ? 255 : x; }
  84. void filter_block2d_8_c(const uint8_t *src_ptr, const unsigned int src_stride,
  85. const int16_t *hfilter, const int16_t *vfilter,
  86. uint8_t *dst_ptr, unsigned int dst_stride,
  87. unsigned int output_width, unsigned int output_height) {
  88. // Between passes, we use an intermediate buffer whose height is extended to
  89. // have enough horizontally filtered values as input for the vertical pass.
  90. // This buffer is allocated to be big enough for the largest block type we
  91. // support.
  92. const int kInterp_Extend = 4;
  93. const unsigned int intermediate_height =
  94. (kInterp_Extend - 1) + output_height + kInterp_Extend;
  95. unsigned int i, j;
  96. // Size of intermediate_buffer is max_intermediate_height * filter_max_width,
  97. // where max_intermediate_height = (kInterp_Extend - 1) + filter_max_height
  98. // + kInterp_Extend
  99. // = 3 + 16 + 4
  100. // = 23
  101. // and filter_max_width = 16
  102. //
  103. uint8_t intermediate_buffer[71 * kMaxDimension];
  104. vp9_zero(intermediate_buffer);
  105. const int intermediate_next_stride =
  106. 1 - static_cast<int>(intermediate_height * output_width);
  107. // Horizontal pass (src -> transposed intermediate).
  108. uint8_t *output_ptr = intermediate_buffer;
  109. const int src_next_row_stride = src_stride - output_width;
  110. src_ptr -= (kInterp_Extend - 1) * src_stride + (kInterp_Extend - 1);
  111. for (i = 0; i < intermediate_height; ++i) {
  112. for (j = 0; j < output_width; ++j) {
  113. // Apply filter...
  114. const int temp = (src_ptr[0] * hfilter[0]) + (src_ptr[1] * hfilter[1]) +
  115. (src_ptr[2] * hfilter[2]) + (src_ptr[3] * hfilter[3]) +
  116. (src_ptr[4] * hfilter[4]) + (src_ptr[5] * hfilter[5]) +
  117. (src_ptr[6] * hfilter[6]) + (src_ptr[7] * hfilter[7]) +
  118. (VP9_FILTER_WEIGHT >> 1); // Rounding
  119. // Normalize back to 0-255...
  120. *output_ptr = clip_pixel(temp >> VP9_FILTER_SHIFT);
  121. ++src_ptr;
  122. output_ptr += intermediate_height;
  123. }
  124. src_ptr += src_next_row_stride;
  125. output_ptr += intermediate_next_stride;
  126. }
  127. // Vertical pass (transposed intermediate -> dst).
  128. src_ptr = intermediate_buffer;
  129. const int dst_next_row_stride = dst_stride - output_width;
  130. for (i = 0; i < output_height; ++i) {
  131. for (j = 0; j < output_width; ++j) {
  132. // Apply filter...
  133. const int temp = (src_ptr[0] * vfilter[0]) + (src_ptr[1] * vfilter[1]) +
  134. (src_ptr[2] * vfilter[2]) + (src_ptr[3] * vfilter[3]) +
  135. (src_ptr[4] * vfilter[4]) + (src_ptr[5] * vfilter[5]) +
  136. (src_ptr[6] * vfilter[6]) + (src_ptr[7] * vfilter[7]) +
  137. (VP9_FILTER_WEIGHT >> 1); // Rounding
  138. // Normalize back to 0-255...
  139. *dst_ptr++ = clip_pixel(temp >> VP9_FILTER_SHIFT);
  140. src_ptr += intermediate_height;
  141. }
  142. src_ptr += intermediate_next_stride;
  143. dst_ptr += dst_next_row_stride;
  144. }
  145. }
  146. void block2d_average_c(uint8_t *src, unsigned int src_stride,
  147. uint8_t *output_ptr, unsigned int output_stride,
  148. unsigned int output_width, unsigned int output_height) {
  149. unsigned int i, j;
  150. for (i = 0; i < output_height; ++i) {
  151. for (j = 0; j < output_width; ++j) {
  152. output_ptr[j] = (output_ptr[j] + src[i * src_stride + j] + 1) >> 1;
  153. }
  154. output_ptr += output_stride;
  155. }
  156. }
  157. void filter_average_block2d_8_c(const uint8_t *src_ptr,
  158. const unsigned int src_stride,
  159. const int16_t *hfilter, const int16_t *vfilter,
  160. uint8_t *dst_ptr, unsigned int dst_stride,
  161. unsigned int output_width,
  162. unsigned int output_height) {
  163. uint8_t tmp[kMaxDimension * kMaxDimension];
  164. assert(output_width <= kMaxDimension);
  165. assert(output_height <= kMaxDimension);
  166. filter_block2d_8_c(src_ptr, src_stride, hfilter, vfilter, tmp, 64,
  167. output_width, output_height);
  168. block2d_average_c(tmp, 64, dst_ptr, dst_stride, output_width, output_height);
  169. }
  170. #if CONFIG_VP9_HIGHBITDEPTH
  171. void highbd_filter_block2d_8_c(const uint16_t *src_ptr,
  172. const unsigned int src_stride,
  173. const int16_t *hfilter, const int16_t *vfilter,
  174. uint16_t *dst_ptr, unsigned int dst_stride,
  175. unsigned int output_width,
  176. unsigned int output_height, int bd) {
  177. // Between passes, we use an intermediate buffer whose height is extended to
  178. // have enough horizontally filtered values as input for the vertical pass.
  179. // This buffer is allocated to be big enough for the largest block type we
  180. // support.
  181. const int kInterp_Extend = 4;
  182. const unsigned int intermediate_height =
  183. (kInterp_Extend - 1) + output_height + kInterp_Extend;
  184. /* Size of intermediate_buffer is max_intermediate_height * filter_max_width,
  185. * where max_intermediate_height = (kInterp_Extend - 1) + filter_max_height
  186. * + kInterp_Extend
  187. * = 3 + 16 + 4
  188. * = 23
  189. * and filter_max_width = 16
  190. */
  191. uint16_t intermediate_buffer[71 * kMaxDimension];
  192. const int intermediate_next_stride =
  193. 1 - static_cast<int>(intermediate_height * output_width);
  194. vp9_zero(intermediate_buffer);
  195. // Horizontal pass (src -> transposed intermediate).
  196. {
  197. uint16_t *output_ptr = intermediate_buffer;
  198. const int src_next_row_stride = src_stride - output_width;
  199. unsigned int i, j;
  200. src_ptr -= (kInterp_Extend - 1) * src_stride + (kInterp_Extend - 1);
  201. for (i = 0; i < intermediate_height; ++i) {
  202. for (j = 0; j < output_width; ++j) {
  203. // Apply filter...
  204. const int temp = (src_ptr[0] * hfilter[0]) + (src_ptr[1] * hfilter[1]) +
  205. (src_ptr[2] * hfilter[2]) + (src_ptr[3] * hfilter[3]) +
  206. (src_ptr[4] * hfilter[4]) + (src_ptr[5] * hfilter[5]) +
  207. (src_ptr[6] * hfilter[6]) + (src_ptr[7] * hfilter[7]) +
  208. (VP9_FILTER_WEIGHT >> 1); // Rounding
  209. // Normalize back to 0-255...
  210. *output_ptr = clip_pixel_highbd(temp >> VP9_FILTER_SHIFT, bd);
  211. ++src_ptr;
  212. output_ptr += intermediate_height;
  213. }
  214. src_ptr += src_next_row_stride;
  215. output_ptr += intermediate_next_stride;
  216. }
  217. }
  218. // Vertical pass (transposed intermediate -> dst).
  219. {
  220. uint16_t *src_ptr = intermediate_buffer;
  221. const int dst_next_row_stride = dst_stride - output_width;
  222. unsigned int i, j;
  223. for (i = 0; i < output_height; ++i) {
  224. for (j = 0; j < output_width; ++j) {
  225. // Apply filter...
  226. const int temp = (src_ptr[0] * vfilter[0]) + (src_ptr[1] * vfilter[1]) +
  227. (src_ptr[2] * vfilter[2]) + (src_ptr[3] * vfilter[3]) +
  228. (src_ptr[4] * vfilter[4]) + (src_ptr[5] * vfilter[5]) +
  229. (src_ptr[6] * vfilter[6]) + (src_ptr[7] * vfilter[7]) +
  230. (VP9_FILTER_WEIGHT >> 1); // Rounding
  231. // Normalize back to 0-255...
  232. *dst_ptr++ = clip_pixel_highbd(temp >> VP9_FILTER_SHIFT, bd);
  233. src_ptr += intermediate_height;
  234. }
  235. src_ptr += intermediate_next_stride;
  236. dst_ptr += dst_next_row_stride;
  237. }
  238. }
  239. }
  240. void highbd_block2d_average_c(uint16_t *src, unsigned int src_stride,
  241. uint16_t *output_ptr, unsigned int output_stride,
  242. unsigned int output_width,
  243. unsigned int output_height) {
  244. unsigned int i, j;
  245. for (i = 0; i < output_height; ++i) {
  246. for (j = 0; j < output_width; ++j) {
  247. output_ptr[j] = (output_ptr[j] + src[i * src_stride + j] + 1) >> 1;
  248. }
  249. output_ptr += output_stride;
  250. }
  251. }
  252. void highbd_filter_average_block2d_8_c(
  253. const uint16_t *src_ptr, const unsigned int src_stride,
  254. const int16_t *hfilter, const int16_t *vfilter, uint16_t *dst_ptr,
  255. unsigned int dst_stride, unsigned int output_width,
  256. unsigned int output_height, int bd) {
  257. uint16_t tmp[kMaxDimension * kMaxDimension];
  258. assert(output_width <= kMaxDimension);
  259. assert(output_height <= kMaxDimension);
  260. highbd_filter_block2d_8_c(src_ptr, src_stride, hfilter, vfilter, tmp, 64,
  261. output_width, output_height, bd);
  262. highbd_block2d_average_c(tmp, 64, dst_ptr, dst_stride, output_width,
  263. output_height);
  264. }
  265. #endif // CONFIG_VP9_HIGHBITDEPTH
  266. void wrapper_filter_average_block2d_8_c(
  267. const uint8_t *src_ptr, const unsigned int src_stride,
  268. const int16_t *hfilter, const int16_t *vfilter, uint8_t *dst_ptr,
  269. unsigned int dst_stride, unsigned int output_width,
  270. unsigned int output_height, int use_highbd) {
  271. #if CONFIG_VP9_HIGHBITDEPTH
  272. if (use_highbd == 0) {
  273. filter_average_block2d_8_c(src_ptr, src_stride, hfilter, vfilter, dst_ptr,
  274. dst_stride, output_width, output_height);
  275. } else {
  276. highbd_filter_average_block2d_8_c(CAST_TO_SHORTPTR(src_ptr), src_stride,
  277. hfilter, vfilter,
  278. CAST_TO_SHORTPTR(dst_ptr), dst_stride,
  279. output_width, output_height, use_highbd);
  280. }
  281. #else
  282. ASSERT_EQ(0, use_highbd);
  283. filter_average_block2d_8_c(src_ptr, src_stride, hfilter, vfilter, dst_ptr,
  284. dst_stride, output_width, output_height);
  285. #endif
  286. }
  287. void wrapper_filter_block2d_8_c(const uint8_t *src_ptr,
  288. const unsigned int src_stride,
  289. const int16_t *hfilter, const int16_t *vfilter,
  290. uint8_t *dst_ptr, unsigned int dst_stride,
  291. unsigned int output_width,
  292. unsigned int output_height, int use_highbd) {
  293. #if CONFIG_VP9_HIGHBITDEPTH
  294. if (use_highbd == 0) {
  295. filter_block2d_8_c(src_ptr, src_stride, hfilter, vfilter, dst_ptr,
  296. dst_stride, output_width, output_height);
  297. } else {
  298. highbd_filter_block2d_8_c(CAST_TO_SHORTPTR(src_ptr), src_stride, hfilter,
  299. vfilter, CAST_TO_SHORTPTR(dst_ptr), dst_stride,
  300. output_width, output_height, use_highbd);
  301. }
  302. #else
  303. ASSERT_EQ(0, use_highbd);
  304. filter_block2d_8_c(src_ptr, src_stride, hfilter, vfilter, dst_ptr, dst_stride,
  305. output_width, output_height);
  306. #endif
  307. }
  308. class ConvolveTest : public ::testing::TestWithParam<ConvolveParam> {
  309. public:
  310. static void SetUpTestCase() {
  311. // Force input_ to be unaligned, output to be 16 byte aligned.
  312. input_ = reinterpret_cast<uint8_t *>(
  313. vpx_memalign(kDataAlignment, kInputBufferSize + 1)) +
  314. 1;
  315. output_ = reinterpret_cast<uint8_t *>(
  316. vpx_memalign(kDataAlignment, kOutputBufferSize));
  317. output_ref_ = reinterpret_cast<uint8_t *>(
  318. vpx_memalign(kDataAlignment, kOutputBufferSize));
  319. #if CONFIG_VP9_HIGHBITDEPTH
  320. input16_ = reinterpret_cast<uint16_t *>(vpx_memalign(
  321. kDataAlignment, (kInputBufferSize + 1) * sizeof(uint16_t))) +
  322. 1;
  323. output16_ = reinterpret_cast<uint16_t *>(
  324. vpx_memalign(kDataAlignment, (kOutputBufferSize) * sizeof(uint16_t)));
  325. output16_ref_ = reinterpret_cast<uint16_t *>(
  326. vpx_memalign(kDataAlignment, (kOutputBufferSize) * sizeof(uint16_t)));
  327. #endif
  328. }
  329. virtual void TearDown() { libvpx_test::ClearSystemState(); }
  330. static void TearDownTestCase() {
  331. vpx_free(input_ - 1);
  332. input_ = NULL;
  333. vpx_free(output_);
  334. output_ = NULL;
  335. vpx_free(output_ref_);
  336. output_ref_ = NULL;
  337. #if CONFIG_VP9_HIGHBITDEPTH
  338. vpx_free(input16_ - 1);
  339. input16_ = NULL;
  340. vpx_free(output16_);
  341. output16_ = NULL;
  342. vpx_free(output16_ref_);
  343. output16_ref_ = NULL;
  344. #endif
  345. }
  346. protected:
  347. static const int kDataAlignment = 16;
  348. static const int kOuterBlockSize = 256;
  349. static const int kInputStride = kOuterBlockSize;
  350. static const int kOutputStride = kOuterBlockSize;
  351. static const int kInputBufferSize = kOuterBlockSize * kOuterBlockSize;
  352. static const int kOutputBufferSize = kOuterBlockSize * kOuterBlockSize;
  353. int Width() const { return GET_PARAM(0); }
  354. int Height() const { return GET_PARAM(1); }
  355. int BorderLeft() const {
  356. const int center = (kOuterBlockSize - Width()) / 2;
  357. return (center + (kDataAlignment - 1)) & ~(kDataAlignment - 1);
  358. }
  359. int BorderTop() const { return (kOuterBlockSize - Height()) / 2; }
  360. bool IsIndexInBorder(int i) {
  361. return (i < BorderTop() * kOuterBlockSize ||
  362. i >= (BorderTop() + Height()) * kOuterBlockSize ||
  363. i % kOuterBlockSize < BorderLeft() ||
  364. i % kOuterBlockSize >= (BorderLeft() + Width()));
  365. }
  366. virtual void SetUp() {
  367. UUT_ = GET_PARAM(2);
  368. #if CONFIG_VP9_HIGHBITDEPTH
  369. if (UUT_->use_highbd_ != 0) {
  370. mask_ = (1 << UUT_->use_highbd_) - 1;
  371. } else {
  372. mask_ = 255;
  373. }
  374. #endif
  375. /* Set up guard blocks for an inner block centered in the outer block */
  376. for (int i = 0; i < kOutputBufferSize; ++i) {
  377. if (IsIndexInBorder(i)) {
  378. output_[i] = 255;
  379. #if CONFIG_VP9_HIGHBITDEPTH
  380. output16_[i] = mask_;
  381. #endif
  382. } else {
  383. output_[i] = 0;
  384. #if CONFIG_VP9_HIGHBITDEPTH
  385. output16_[i] = 0;
  386. #endif
  387. }
  388. }
  389. ::libvpx_test::ACMRandom prng;
  390. for (int i = 0; i < kInputBufferSize; ++i) {
  391. if (i & 1) {
  392. input_[i] = 255;
  393. #if CONFIG_VP9_HIGHBITDEPTH
  394. input16_[i] = mask_;
  395. #endif
  396. } else {
  397. input_[i] = prng.Rand8Extremes();
  398. #if CONFIG_VP9_HIGHBITDEPTH
  399. input16_[i] = prng.Rand16() & mask_;
  400. #endif
  401. }
  402. }
  403. }
  404. void SetConstantInput(int value) {
  405. memset(input_, value, kInputBufferSize);
  406. #if CONFIG_VP9_HIGHBITDEPTH
  407. vpx_memset16(input16_, value, kInputBufferSize);
  408. #endif
  409. }
  410. void CopyOutputToRef() {
  411. memcpy(output_ref_, output_, kOutputBufferSize);
  412. #if CONFIG_VP9_HIGHBITDEPTH
  413. memcpy(output16_ref_, output16_,
  414. kOutputBufferSize * sizeof(output16_ref_[0]));
  415. #endif
  416. }
  417. void CheckGuardBlocks() {
  418. for (int i = 0; i < kOutputBufferSize; ++i) {
  419. if (IsIndexInBorder(i)) {
  420. EXPECT_EQ(255, output_[i]);
  421. }
  422. }
  423. }
  424. uint8_t *input() const {
  425. const int offset = BorderTop() * kOuterBlockSize + BorderLeft();
  426. #if CONFIG_VP9_HIGHBITDEPTH
  427. if (UUT_->use_highbd_ == 0) {
  428. return input_ + offset;
  429. } else {
  430. return CAST_TO_BYTEPTR(input16_ + offset);
  431. }
  432. #else
  433. return input_ + offset;
  434. #endif
  435. }
  436. uint8_t *output() const {
  437. const int offset = BorderTop() * kOuterBlockSize + BorderLeft();
  438. #if CONFIG_VP9_HIGHBITDEPTH
  439. if (UUT_->use_highbd_ == 0) {
  440. return output_ + offset;
  441. } else {
  442. return CAST_TO_BYTEPTR(output16_ + offset);
  443. }
  444. #else
  445. return output_ + offset;
  446. #endif
  447. }
  448. uint8_t *output_ref() const {
  449. const int offset = BorderTop() * kOuterBlockSize + BorderLeft();
  450. #if CONFIG_VP9_HIGHBITDEPTH
  451. if (UUT_->use_highbd_ == 0) {
  452. return output_ref_ + offset;
  453. } else {
  454. return CAST_TO_BYTEPTR(output16_ref_ + offset);
  455. }
  456. #else
  457. return output_ref_ + offset;
  458. #endif
  459. }
  460. uint16_t lookup(uint8_t *list, int index) const {
  461. #if CONFIG_VP9_HIGHBITDEPTH
  462. if (UUT_->use_highbd_ == 0) {
  463. return list[index];
  464. } else {
  465. return CAST_TO_SHORTPTR(list)[index];
  466. }
  467. #else
  468. return list[index];
  469. #endif
  470. }
  471. void assign_val(uint8_t *list, int index, uint16_t val) const {
  472. #if CONFIG_VP9_HIGHBITDEPTH
  473. if (UUT_->use_highbd_ == 0) {
  474. list[index] = (uint8_t)val;
  475. } else {
  476. CAST_TO_SHORTPTR(list)[index] = val;
  477. }
  478. #else
  479. list[index] = (uint8_t)val;
  480. #endif
  481. }
  482. const ConvolveFunctions *UUT_;
  483. static uint8_t *input_;
  484. static uint8_t *output_;
  485. static uint8_t *output_ref_;
  486. #if CONFIG_VP9_HIGHBITDEPTH
  487. static uint16_t *input16_;
  488. static uint16_t *output16_;
  489. static uint16_t *output16_ref_;
  490. int mask_;
  491. #endif
  492. };
  493. uint8_t *ConvolveTest::input_ = NULL;
  494. uint8_t *ConvolveTest::output_ = NULL;
  495. uint8_t *ConvolveTest::output_ref_ = NULL;
  496. #if CONFIG_VP9_HIGHBITDEPTH
  497. uint16_t *ConvolveTest::input16_ = NULL;
  498. uint16_t *ConvolveTest::output16_ = NULL;
  499. uint16_t *ConvolveTest::output16_ref_ = NULL;
  500. #endif
  501. TEST_P(ConvolveTest, GuardBlocks) { CheckGuardBlocks(); }
  502. TEST_P(ConvolveTest, DISABLED_Copy_Speed) {
  503. const uint8_t *const in = input();
  504. uint8_t *const out = output();
  505. const int kNumTests = 5000000;
  506. const int width = Width();
  507. const int height = Height();
  508. vpx_usec_timer timer;
  509. vpx_usec_timer_start(&timer);
  510. for (int n = 0; n < kNumTests; ++n) {
  511. UUT_->copy_[0](in, kInputStride, out, kOutputStride, NULL, 0, 0, 0, 0,
  512. width, height);
  513. }
  514. vpx_usec_timer_mark(&timer);
  515. const int elapsed_time = static_cast<int>(vpx_usec_timer_elapsed(&timer));
  516. printf("convolve_copy_%dx%d_%d: %d us\n", width, height,
  517. UUT_->use_highbd_ ? UUT_->use_highbd_ : 8, elapsed_time);
  518. }
  519. TEST_P(ConvolveTest, DISABLED_Avg_Speed) {
  520. const uint8_t *const in = input();
  521. uint8_t *const out = output();
  522. const int kNumTests = 5000000;
  523. const int width = Width();
  524. const int height = Height();
  525. vpx_usec_timer timer;
  526. vpx_usec_timer_start(&timer);
  527. for (int n = 0; n < kNumTests; ++n) {
  528. UUT_->copy_[1](in, kInputStride, out, kOutputStride, NULL, 0, 0, 0, 0,
  529. width, height);
  530. }
  531. vpx_usec_timer_mark(&timer);
  532. const int elapsed_time = static_cast<int>(vpx_usec_timer_elapsed(&timer));
  533. printf("convolve_avg_%dx%d_%d: %d us\n", width, height,
  534. UUT_->use_highbd_ ? UUT_->use_highbd_ : 8, elapsed_time);
  535. }
  536. TEST_P(ConvolveTest, DISABLED_Scale_Speed) {
  537. const uint8_t *const in = input();
  538. uint8_t *const out = output();
  539. const InterpKernel *const eighttap = vp9_filter_kernels[EIGHTTAP];
  540. const int kNumTests = 5000000;
  541. const int width = Width();
  542. const int height = Height();
  543. vpx_usec_timer timer;
  544. SetConstantInput(127);
  545. vpx_usec_timer_start(&timer);
  546. for (int n = 0; n < kNumTests; ++n) {
  547. UUT_->shv8_[0](in, kInputStride, out, kOutputStride, eighttap, 8, 16, 8, 16,
  548. width, height);
  549. }
  550. vpx_usec_timer_mark(&timer);
  551. const int elapsed_time = static_cast<int>(vpx_usec_timer_elapsed(&timer));
  552. printf("convolve_scale_%dx%d_%d: %d us\n", width, height,
  553. UUT_->use_highbd_ ? UUT_->use_highbd_ : 8, elapsed_time);
  554. }
  555. TEST_P(ConvolveTest, DISABLED_8Tap_Speed) {
  556. const uint8_t *const in = input();
  557. uint8_t *const out = output();
  558. const InterpKernel *const eighttap = vp9_filter_kernels[EIGHTTAP_SHARP];
  559. const int kNumTests = 5000000;
  560. const int width = Width();
  561. const int height = Height();
  562. vpx_usec_timer timer;
  563. SetConstantInput(127);
  564. vpx_usec_timer_start(&timer);
  565. for (int n = 0; n < kNumTests; ++n) {
  566. UUT_->hv8_[0](in, kInputStride, out, kOutputStride, eighttap, 8, 16, 8, 16,
  567. width, height);
  568. }
  569. vpx_usec_timer_mark(&timer);
  570. const int elapsed_time = static_cast<int>(vpx_usec_timer_elapsed(&timer));
  571. printf("convolve8_%dx%d_%d: %d us\n", width, height,
  572. UUT_->use_highbd_ ? UUT_->use_highbd_ : 8, elapsed_time);
  573. }
  574. TEST_P(ConvolveTest, DISABLED_8Tap_Horiz_Speed) {
  575. const uint8_t *const in = input();
  576. uint8_t *const out = output();
  577. const InterpKernel *const eighttap = vp9_filter_kernels[EIGHTTAP_SHARP];
  578. const int kNumTests = 5000000;
  579. const int width = Width();
  580. const int height = Height();
  581. vpx_usec_timer timer;
  582. SetConstantInput(127);
  583. vpx_usec_timer_start(&timer);
  584. for (int n = 0; n < kNumTests; ++n) {
  585. UUT_->h8_[0](in, kInputStride, out, kOutputStride, eighttap, 8, 16, 8, 16,
  586. width, height);
  587. }
  588. vpx_usec_timer_mark(&timer);
  589. const int elapsed_time = static_cast<int>(vpx_usec_timer_elapsed(&timer));
  590. printf("convolve8_horiz_%dx%d_%d: %d us\n", width, height,
  591. UUT_->use_highbd_ ? UUT_->use_highbd_ : 8, elapsed_time);
  592. }
  593. TEST_P(ConvolveTest, DISABLED_8Tap_Vert_Speed) {
  594. const uint8_t *const in = input();
  595. uint8_t *const out = output();
  596. const InterpKernel *const eighttap = vp9_filter_kernels[EIGHTTAP_SHARP];
  597. const int kNumTests = 5000000;
  598. const int width = Width();
  599. const int height = Height();
  600. vpx_usec_timer timer;
  601. SetConstantInput(127);
  602. vpx_usec_timer_start(&timer);
  603. for (int n = 0; n < kNumTests; ++n) {
  604. UUT_->v8_[0](in, kInputStride, out, kOutputStride, eighttap, 8, 16, 8, 16,
  605. width, height);
  606. }
  607. vpx_usec_timer_mark(&timer);
  608. const int elapsed_time = static_cast<int>(vpx_usec_timer_elapsed(&timer));
  609. printf("convolve8_vert_%dx%d_%d: %d us\n", width, height,
  610. UUT_->use_highbd_ ? UUT_->use_highbd_ : 8, elapsed_time);
  611. }
  612. TEST_P(ConvolveTest, DISABLED_4Tap_Speed) {
  613. const uint8_t *const in = input();
  614. uint8_t *const out = output();
  615. const InterpKernel *const fourtap = vp9_filter_kernels[FOURTAP];
  616. const int kNumTests = 5000000;
  617. const int width = Width();
  618. const int height = Height();
  619. vpx_usec_timer timer;
  620. SetConstantInput(127);
  621. vpx_usec_timer_start(&timer);
  622. for (int n = 0; n < kNumTests; ++n) {
  623. UUT_->hv8_[0](in, kInputStride, out, kOutputStride, fourtap, 8, 16, 8, 16,
  624. width, height);
  625. }
  626. vpx_usec_timer_mark(&timer);
  627. const int elapsed_time = static_cast<int>(vpx_usec_timer_elapsed(&timer));
  628. printf("convolve4_%dx%d_%d: %d us\n", width, height,
  629. UUT_->use_highbd_ ? UUT_->use_highbd_ : 8, elapsed_time);
  630. }
  631. TEST_P(ConvolveTest, DISABLED_4Tap_Horiz_Speed) {
  632. const uint8_t *const in = input();
  633. uint8_t *const out = output();
  634. const InterpKernel *const fourtap = vp9_filter_kernels[FOURTAP];
  635. const int kNumTests = 5000000;
  636. const int width = Width();
  637. const int height = Height();
  638. vpx_usec_timer timer;
  639. SetConstantInput(127);
  640. vpx_usec_timer_start(&timer);
  641. for (int n = 0; n < kNumTests; ++n) {
  642. UUT_->h8_[0](in, kInputStride, out, kOutputStride, fourtap, 8, 16, 8, 16,
  643. width, height);
  644. }
  645. vpx_usec_timer_mark(&timer);
  646. const int elapsed_time = static_cast<int>(vpx_usec_timer_elapsed(&timer));
  647. printf("convolve4_horiz_%dx%d_%d: %d us\n", width, height,
  648. UUT_->use_highbd_ ? UUT_->use_highbd_ : 8, elapsed_time);
  649. }
  650. TEST_P(ConvolveTest, DISABLED_4Tap_Vert_Speed) {
  651. const uint8_t *const in = input();
  652. uint8_t *const out = output();
  653. const InterpKernel *const fourtap = vp9_filter_kernels[FOURTAP];
  654. const int kNumTests = 5000000;
  655. const int width = Width();
  656. const int height = Height();
  657. vpx_usec_timer timer;
  658. SetConstantInput(127);
  659. vpx_usec_timer_start(&timer);
  660. for (int n = 0; n < kNumTests; ++n) {
  661. UUT_->v8_[0](in, kInputStride, out, kOutputStride, fourtap, 8, 16, 8, 16,
  662. width, height);
  663. }
  664. vpx_usec_timer_mark(&timer);
  665. const int elapsed_time = static_cast<int>(vpx_usec_timer_elapsed(&timer));
  666. printf("convolve4_vert_%dx%d_%d: %d us\n", width, height,
  667. UUT_->use_highbd_ ? UUT_->use_highbd_ : 8, elapsed_time);
  668. }
  669. TEST_P(ConvolveTest, DISABLED_8Tap_Avg_Speed) {
  670. const uint8_t *const in = input();
  671. uint8_t *const out = output();
  672. const InterpKernel *const eighttap = vp9_filter_kernels[EIGHTTAP_SHARP];
  673. const int kNumTests = 5000000;
  674. const int width = Width();
  675. const int height = Height();
  676. vpx_usec_timer timer;
  677. SetConstantInput(127);
  678. vpx_usec_timer_start(&timer);
  679. for (int n = 0; n < kNumTests; ++n) {
  680. UUT_->hv8_[1](in, kInputStride, out, kOutputStride, eighttap, 8, 16, 8, 16,
  681. width, height);
  682. }
  683. vpx_usec_timer_mark(&timer);
  684. const int elapsed_time = static_cast<int>(vpx_usec_timer_elapsed(&timer));
  685. printf("convolve8_avg_%dx%d_%d: %d us\n", width, height,
  686. UUT_->use_highbd_ ? UUT_->use_highbd_ : 8, elapsed_time);
  687. }
  688. TEST_P(ConvolveTest, Copy) {
  689. uint8_t *const in = input();
  690. uint8_t *const out = output();
  691. ASM_REGISTER_STATE_CHECK(UUT_->copy_[0](in, kInputStride, out, kOutputStride,
  692. NULL, 0, 0, 0, 0, Width(), Height()));
  693. CheckGuardBlocks();
  694. for (int y = 0; y < Height(); ++y) {
  695. for (int x = 0; x < Width(); ++x)
  696. ASSERT_EQ(lookup(out, y * kOutputStride + x),
  697. lookup(in, y * kInputStride + x))
  698. << "(" << x << "," << y << ")";
  699. }
  700. }
  701. TEST_P(ConvolveTest, Avg) {
  702. uint8_t *const in = input();
  703. uint8_t *const out = output();
  704. uint8_t *const out_ref = output_ref();
  705. CopyOutputToRef();
  706. ASM_REGISTER_STATE_CHECK(UUT_->copy_[1](in, kInputStride, out, kOutputStride,
  707. NULL, 0, 0, 0, 0, Width(), Height()));
  708. CheckGuardBlocks();
  709. for (int y = 0; y < Height(); ++y) {
  710. for (int x = 0; x < Width(); ++x)
  711. ASSERT_EQ(lookup(out, y * kOutputStride + x),
  712. ROUND_POWER_OF_TWO(lookup(in, y * kInputStride + x) +
  713. lookup(out_ref, y * kOutputStride + x),
  714. 1))
  715. << "(" << x << "," << y << ")";
  716. }
  717. }
  718. TEST_P(ConvolveTest, CopyHoriz) {
  719. uint8_t *const in = input();
  720. uint8_t *const out = output();
  721. ASM_REGISTER_STATE_CHECK(UUT_->sh8_[0](in, kInputStride, out, kOutputStride,
  722. vp9_filter_kernels[0], 0, 16, 0, 16,
  723. Width(), Height()));
  724. CheckGuardBlocks();
  725. for (int y = 0; y < Height(); ++y) {
  726. for (int x = 0; x < Width(); ++x)
  727. ASSERT_EQ(lookup(out, y * kOutputStride + x),
  728. lookup(in, y * kInputStride + x))
  729. << "(" << x << "," << y << ")";
  730. }
  731. }
  732. TEST_P(ConvolveTest, CopyVert) {
  733. uint8_t *const in = input();
  734. uint8_t *const out = output();
  735. ASM_REGISTER_STATE_CHECK(UUT_->sv8_[0](in, kInputStride, out, kOutputStride,
  736. vp9_filter_kernels[0], 0, 16, 0, 16,
  737. Width(), Height()));
  738. CheckGuardBlocks();
  739. for (int y = 0; y < Height(); ++y) {
  740. for (int x = 0; x < Width(); ++x)
  741. ASSERT_EQ(lookup(out, y * kOutputStride + x),
  742. lookup(in, y * kInputStride + x))
  743. << "(" << x << "," << y << ")";
  744. }
  745. }
  746. TEST_P(ConvolveTest, Copy2D) {
  747. uint8_t *const in = input();
  748. uint8_t *const out = output();
  749. ASM_REGISTER_STATE_CHECK(UUT_->shv8_[0](in, kInputStride, out, kOutputStride,
  750. vp9_filter_kernels[0], 0, 16, 0, 16,
  751. Width(), Height()));
  752. CheckGuardBlocks();
  753. for (int y = 0; y < Height(); ++y) {
  754. for (int x = 0; x < Width(); ++x)
  755. ASSERT_EQ(lookup(out, y * kOutputStride + x),
  756. lookup(in, y * kInputStride + x))
  757. << "(" << x << "," << y << ")";
  758. }
  759. }
  760. const int kNumFilterBanks = 5;
  761. const int kNumFilters = 16;
  762. TEST(ConvolveTest, FiltersWontSaturateWhenAddedPairwise) {
  763. for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
  764. const InterpKernel *filters =
  765. vp9_filter_kernels[static_cast<INTERP_FILTER>(filter_bank)];
  766. for (int i = 0; i < kNumFilters; i++) {
  767. const int p0 = filters[i][0] + filters[i][1];
  768. const int p1 = filters[i][2] + filters[i][3];
  769. const int p2 = filters[i][4] + filters[i][5];
  770. const int p3 = filters[i][6] + filters[i][7];
  771. EXPECT_LE(p0, 128);
  772. EXPECT_LE(p1, 128);
  773. EXPECT_LE(p2, 128);
  774. EXPECT_LE(p3, 128);
  775. EXPECT_LE(p0 + p3, 128);
  776. EXPECT_LE(p0 + p3 + p1, 128);
  777. EXPECT_LE(p0 + p3 + p1 + p2, 128);
  778. EXPECT_EQ(p0 + p1 + p2 + p3, 128);
  779. }
  780. }
  781. }
  782. const WrapperFilterBlock2d8Func wrapper_filter_block2d_8[2] = {
  783. wrapper_filter_block2d_8_c, wrapper_filter_average_block2d_8_c
  784. };
  785. TEST_P(ConvolveTest, MatchesReferenceSubpixelFilter) {
  786. for (int i = 0; i < 2; ++i) {
  787. uint8_t *const in = input();
  788. uint8_t *const out = output();
  789. #if CONFIG_VP9_HIGHBITDEPTH
  790. uint8_t ref8[kOutputStride * kMaxDimension];
  791. uint16_t ref16[kOutputStride * kMaxDimension];
  792. uint8_t *ref;
  793. if (UUT_->use_highbd_ == 0) {
  794. ref = ref8;
  795. } else {
  796. ref = CAST_TO_BYTEPTR(ref16);
  797. }
  798. #else
  799. uint8_t ref[kOutputStride * kMaxDimension];
  800. #endif
  801. // Populate ref and out with some random data
  802. ::libvpx_test::ACMRandom prng;
  803. for (int y = 0; y < Height(); ++y) {
  804. for (int x = 0; x < Width(); ++x) {
  805. uint16_t r;
  806. #if CONFIG_VP9_HIGHBITDEPTH
  807. if (UUT_->use_highbd_ == 0 || UUT_->use_highbd_ == 8) {
  808. r = prng.Rand8Extremes();
  809. } else {
  810. r = prng.Rand16() & mask_;
  811. }
  812. #else
  813. r = prng.Rand8Extremes();
  814. #endif
  815. assign_val(out, y * kOutputStride + x, r);
  816. assign_val(ref, y * kOutputStride + x, r);
  817. }
  818. }
  819. for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
  820. const InterpKernel *filters =
  821. vp9_filter_kernels[static_cast<INTERP_FILTER>(filter_bank)];
  822. for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) {
  823. for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) {
  824. wrapper_filter_block2d_8[i](in, kInputStride, filters[filter_x],
  825. filters[filter_y], ref, kOutputStride,
  826. Width(), Height(), UUT_->use_highbd_);
  827. if (filter_x && filter_y)
  828. ASM_REGISTER_STATE_CHECK(
  829. UUT_->hv8_[i](in, kInputStride, out, kOutputStride, filters,
  830. filter_x, 16, filter_y, 16, Width(), Height()));
  831. else if (filter_y)
  832. ASM_REGISTER_STATE_CHECK(
  833. UUT_->v8_[i](in, kInputStride, out, kOutputStride, filters, 0,
  834. 16, filter_y, 16, Width(), Height()));
  835. else if (filter_x)
  836. ASM_REGISTER_STATE_CHECK(
  837. UUT_->h8_[i](in, kInputStride, out, kOutputStride, filters,
  838. filter_x, 16, 0, 16, Width(), Height()));
  839. else
  840. ASM_REGISTER_STATE_CHECK(UUT_->copy_[i](in, kInputStride, out,
  841. kOutputStride, NULL, 0, 0,
  842. 0, 0, Width(), Height()));
  843. CheckGuardBlocks();
  844. for (int y = 0; y < Height(); ++y) {
  845. for (int x = 0; x < Width(); ++x)
  846. ASSERT_EQ(lookup(ref, y * kOutputStride + x),
  847. lookup(out, y * kOutputStride + x))
  848. << "mismatch at (" << x << "," << y << "), "
  849. << "filters (" << filter_bank << "," << filter_x << ","
  850. << filter_y << ")";
  851. }
  852. }
  853. }
  854. }
  855. }
  856. }
  857. TEST_P(ConvolveTest, FilterExtremes) {
  858. uint8_t *const in = input();
  859. uint8_t *const out = output();
  860. #if CONFIG_VP9_HIGHBITDEPTH
  861. uint8_t ref8[kOutputStride * kMaxDimension];
  862. uint16_t ref16[kOutputStride * kMaxDimension];
  863. uint8_t *ref;
  864. if (UUT_->use_highbd_ == 0) {
  865. ref = ref8;
  866. } else {
  867. ref = CAST_TO_BYTEPTR(ref16);
  868. }
  869. #else
  870. uint8_t ref[kOutputStride * kMaxDimension];
  871. #endif
  872. // Populate ref and out with some random data
  873. ::libvpx_test::ACMRandom prng;
  874. for (int y = 0; y < Height(); ++y) {
  875. for (int x = 0; x < Width(); ++x) {
  876. uint16_t r;
  877. #if CONFIG_VP9_HIGHBITDEPTH
  878. if (UUT_->use_highbd_ == 0 || UUT_->use_highbd_ == 8) {
  879. r = prng.Rand8Extremes();
  880. } else {
  881. r = prng.Rand16() & mask_;
  882. }
  883. #else
  884. r = prng.Rand8Extremes();
  885. #endif
  886. assign_val(out, y * kOutputStride + x, r);
  887. assign_val(ref, y * kOutputStride + x, r);
  888. }
  889. }
  890. for (int axis = 0; axis < 2; axis++) {
  891. int seed_val = 0;
  892. while (seed_val < 256) {
  893. for (int y = 0; y < 8; ++y) {
  894. for (int x = 0; x < 8; ++x) {
  895. #if CONFIG_VP9_HIGHBITDEPTH
  896. assign_val(in, y * kOutputStride + x - SUBPEL_TAPS / 2 + 1,
  897. ((seed_val >> (axis ? y : x)) & 1) * mask_);
  898. #else
  899. assign_val(in, y * kOutputStride + x - SUBPEL_TAPS / 2 + 1,
  900. ((seed_val >> (axis ? y : x)) & 1) * 255);
  901. #endif
  902. if (axis) seed_val++;
  903. }
  904. if (axis) {
  905. seed_val -= 8;
  906. } else {
  907. seed_val++;
  908. }
  909. }
  910. if (axis) seed_val += 8;
  911. for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
  912. const InterpKernel *filters =
  913. vp9_filter_kernels[static_cast<INTERP_FILTER>(filter_bank)];
  914. for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) {
  915. for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) {
  916. wrapper_filter_block2d_8_c(in, kInputStride, filters[filter_x],
  917. filters[filter_y], ref, kOutputStride,
  918. Width(), Height(), UUT_->use_highbd_);
  919. if (filter_x && filter_y)
  920. ASM_REGISTER_STATE_CHECK(
  921. UUT_->hv8_[0](in, kInputStride, out, kOutputStride, filters,
  922. filter_x, 16, filter_y, 16, Width(), Height()));
  923. else if (filter_y)
  924. ASM_REGISTER_STATE_CHECK(
  925. UUT_->v8_[0](in, kInputStride, out, kOutputStride, filters, 0,
  926. 16, filter_y, 16, Width(), Height()));
  927. else if (filter_x)
  928. ASM_REGISTER_STATE_CHECK(
  929. UUT_->h8_[0](in, kInputStride, out, kOutputStride, filters,
  930. filter_x, 16, 0, 16, Width(), Height()));
  931. else
  932. ASM_REGISTER_STATE_CHECK(UUT_->copy_[0](in, kInputStride, out,
  933. kOutputStride, NULL, 0, 0,
  934. 0, 0, Width(), Height()));
  935. for (int y = 0; y < Height(); ++y) {
  936. for (int x = 0; x < Width(); ++x)
  937. ASSERT_EQ(lookup(ref, y * kOutputStride + x),
  938. lookup(out, y * kOutputStride + x))
  939. << "mismatch at (" << x << "," << y << "), "
  940. << "filters (" << filter_bank << "," << filter_x << ","
  941. << filter_y << ")";
  942. }
  943. }
  944. }
  945. }
  946. }
  947. }
  948. }
  949. /* This test exercises that enough rows and columns are filtered with every
  950. possible initial fractional positions and scaling steps. */
  951. #if !CONFIG_VP9_HIGHBITDEPTH
  952. static const ConvolveFunc scaled_2d_c_funcs[2] = { vpx_scaled_2d_c,
  953. vpx_scaled_avg_2d_c };
  954. TEST_P(ConvolveTest, CheckScalingFiltering) {
  955. uint8_t *const in = input();
  956. uint8_t *const out = output();
  957. uint8_t ref[kOutputStride * kMaxDimension];
  958. ::libvpx_test::ACMRandom prng;
  959. for (int y = 0; y < Height(); ++y) {
  960. for (int x = 0; x < Width(); ++x) {
  961. const uint16_t r = prng.Rand8Extremes();
  962. assign_val(in, y * kInputStride + x, r);
  963. }
  964. }
  965. for (int i = 0; i < 2; ++i) {
  966. for (INTERP_FILTER filter_type = 0; filter_type < 4; ++filter_type) {
  967. const InterpKernel *const eighttap = vp9_filter_kernels[filter_type];
  968. for (int frac = 0; frac < 16; ++frac) {
  969. for (int step = 1; step <= 32; ++step) {
  970. /* Test the horizontal and vertical filters in combination. */
  971. scaled_2d_c_funcs[i](in, kInputStride, ref, kOutputStride, eighttap,
  972. frac, step, frac, step, Width(), Height());
  973. ASM_REGISTER_STATE_CHECK(
  974. UUT_->shv8_[i](in, kInputStride, out, kOutputStride, eighttap,
  975. frac, step, frac, step, Width(), Height()));
  976. CheckGuardBlocks();
  977. for (int y = 0; y < Height(); ++y) {
  978. for (int x = 0; x < Width(); ++x) {
  979. ASSERT_EQ(lookup(ref, y * kOutputStride + x),
  980. lookup(out, y * kOutputStride + x))
  981. << "x == " << x << ", y == " << y << ", frac == " << frac
  982. << ", step == " << step;
  983. }
  984. }
  985. }
  986. }
  987. }
  988. }
  989. }
  990. #endif
  991. using std::make_tuple;
  992. #if CONFIG_VP9_HIGHBITDEPTH
  993. #define WRAP(func, bd) \
  994. void wrap_##func##_##bd( \
  995. const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, \
  996. ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, \
  997. int x_step_q4, int y0_q4, int y_step_q4, int w, int h) { \
  998. vpx_highbd_##func(reinterpret_cast<const uint16_t *>(src), src_stride, \
  999. reinterpret_cast<uint16_t *>(dst), dst_stride, filter, \
  1000. x0_q4, x_step_q4, y0_q4, y_step_q4, w, h, bd); \
  1001. }
  1002. #if HAVE_SSE2 && ARCH_X86_64
  1003. WRAP(convolve_copy_sse2, 8)
  1004. WRAP(convolve_avg_sse2, 8)
  1005. WRAP(convolve_copy_sse2, 10)
  1006. WRAP(convolve_avg_sse2, 10)
  1007. WRAP(convolve_copy_sse2, 12)
  1008. WRAP(convolve_avg_sse2, 12)
  1009. WRAP(convolve8_horiz_sse2, 8)
  1010. WRAP(convolve8_avg_horiz_sse2, 8)
  1011. WRAP(convolve8_vert_sse2, 8)
  1012. WRAP(convolve8_avg_vert_sse2, 8)
  1013. WRAP(convolve8_sse2, 8)
  1014. WRAP(convolve8_avg_sse2, 8)
  1015. WRAP(convolve8_horiz_sse2, 10)
  1016. WRAP(convolve8_avg_horiz_sse2, 10)
  1017. WRAP(convolve8_vert_sse2, 10)
  1018. WRAP(convolve8_avg_vert_sse2, 10)
  1019. WRAP(convolve8_sse2, 10)
  1020. WRAP(convolve8_avg_sse2, 10)
  1021. WRAP(convolve8_horiz_sse2, 12)
  1022. WRAP(convolve8_avg_horiz_sse2, 12)
  1023. WRAP(convolve8_vert_sse2, 12)
  1024. WRAP(convolve8_avg_vert_sse2, 12)
  1025. WRAP(convolve8_sse2, 12)
  1026. WRAP(convolve8_avg_sse2, 12)
  1027. #endif // HAVE_SSE2 && ARCH_X86_64
  1028. #if HAVE_AVX2
  1029. WRAP(convolve_copy_avx2, 8)
  1030. WRAP(convolve_avg_avx2, 8)
  1031. WRAP(convolve8_horiz_avx2, 8)
  1032. WRAP(convolve8_avg_horiz_avx2, 8)
  1033. WRAP(convolve8_vert_avx2, 8)
  1034. WRAP(convolve8_avg_vert_avx2, 8)
  1035. WRAP(convolve8_avx2, 8)
  1036. WRAP(convolve8_avg_avx2, 8)
  1037. WRAP(convolve_copy_avx2, 10)
  1038. WRAP(convolve_avg_avx2, 10)
  1039. WRAP(convolve8_avx2, 10)
  1040. WRAP(convolve8_horiz_avx2, 10)
  1041. WRAP(convolve8_vert_avx2, 10)
  1042. WRAP(convolve8_avg_avx2, 10)
  1043. WRAP(convolve8_avg_horiz_avx2, 10)
  1044. WRAP(convolve8_avg_vert_avx2, 10)
  1045. WRAP(convolve_copy_avx2, 12)
  1046. WRAP(convolve_avg_avx2, 12)
  1047. WRAP(convolve8_avx2, 12)
  1048. WRAP(convolve8_horiz_avx2, 12)
  1049. WRAP(convolve8_vert_avx2, 12)
  1050. WRAP(convolve8_avg_avx2, 12)
  1051. WRAP(convolve8_avg_horiz_avx2, 12)
  1052. WRAP(convolve8_avg_vert_avx2, 12)
  1053. #endif // HAVE_AVX2
  1054. #if HAVE_NEON
  1055. WRAP(convolve_copy_neon, 8)
  1056. WRAP(convolve_avg_neon, 8)
  1057. WRAP(convolve_copy_neon, 10)
  1058. WRAP(convolve_avg_neon, 10)
  1059. WRAP(convolve_copy_neon, 12)
  1060. WRAP(convolve_avg_neon, 12)
  1061. WRAP(convolve8_horiz_neon, 8)
  1062. WRAP(convolve8_avg_horiz_neon, 8)
  1063. WRAP(convolve8_vert_neon, 8)
  1064. WRAP(convolve8_avg_vert_neon, 8)
  1065. WRAP(convolve8_neon, 8)
  1066. WRAP(convolve8_avg_neon, 8)
  1067. WRAP(convolve8_horiz_neon, 10)
  1068. WRAP(convolve8_avg_horiz_neon, 10)
  1069. WRAP(convolve8_vert_neon, 10)
  1070. WRAP(convolve8_avg_vert_neon, 10)
  1071. WRAP(convolve8_neon, 10)
  1072. WRAP(convolve8_avg_neon, 10)
  1073. WRAP(convolve8_horiz_neon, 12)
  1074. WRAP(convolve8_avg_horiz_neon, 12)
  1075. WRAP(convolve8_vert_neon, 12)
  1076. WRAP(convolve8_avg_vert_neon, 12)
  1077. WRAP(convolve8_neon, 12)
  1078. WRAP(convolve8_avg_neon, 12)
  1079. #endif // HAVE_NEON
  1080. WRAP(convolve_copy_c, 8)
  1081. WRAP(convolve_avg_c, 8)
  1082. WRAP(convolve8_horiz_c, 8)
  1083. WRAP(convolve8_avg_horiz_c, 8)
  1084. WRAP(convolve8_vert_c, 8)
  1085. WRAP(convolve8_avg_vert_c, 8)
  1086. WRAP(convolve8_c, 8)
  1087. WRAP(convolve8_avg_c, 8)
  1088. WRAP(convolve_copy_c, 10)
  1089. WRAP(convolve_avg_c, 10)
  1090. WRAP(convolve8_horiz_c, 10)
  1091. WRAP(convolve8_avg_horiz_c, 10)
  1092. WRAP(convolve8_vert_c, 10)
  1093. WRAP(convolve8_avg_vert_c, 10)
  1094. WRAP(convolve8_c, 10)
  1095. WRAP(convolve8_avg_c, 10)
  1096. WRAP(convolve_copy_c, 12)
  1097. WRAP(convolve_avg_c, 12)
  1098. WRAP(convolve8_horiz_c, 12)
  1099. WRAP(convolve8_avg_horiz_c, 12)
  1100. WRAP(convolve8_vert_c, 12)
  1101. WRAP(convolve8_avg_vert_c, 12)
  1102. WRAP(convolve8_c, 12)
  1103. WRAP(convolve8_avg_c, 12)
  1104. #undef WRAP
  1105. const ConvolveFunctions convolve8_c(
  1106. wrap_convolve_copy_c_8, wrap_convolve_avg_c_8, wrap_convolve8_horiz_c_8,
  1107. wrap_convolve8_avg_horiz_c_8, wrap_convolve8_vert_c_8,
  1108. wrap_convolve8_avg_vert_c_8, wrap_convolve8_c_8, wrap_convolve8_avg_c_8,
  1109. wrap_convolve8_horiz_c_8, wrap_convolve8_avg_horiz_c_8,
  1110. wrap_convolve8_vert_c_8, wrap_convolve8_avg_vert_c_8, wrap_convolve8_c_8,
  1111. wrap_convolve8_avg_c_8, 8);
  1112. const ConvolveFunctions convolve10_c(
  1113. wrap_convolve_copy_c_10, wrap_convolve_avg_c_10, wrap_convolve8_horiz_c_10,
  1114. wrap_convolve8_avg_horiz_c_10, wrap_convolve8_vert_c_10,
  1115. wrap_convolve8_avg_vert_c_10, wrap_convolve8_c_10, wrap_convolve8_avg_c_10,
  1116. wrap_convolve8_horiz_c_10, wrap_convolve8_avg_horiz_c_10,
  1117. wrap_convolve8_vert_c_10, wrap_convolve8_avg_vert_c_10, wrap_convolve8_c_10,
  1118. wrap_convolve8_avg_c_10, 10);
  1119. const ConvolveFunctions convolve12_c(
  1120. wrap_convolve_copy_c_12, wrap_convolve_avg_c_12, wrap_convolve8_horiz_c_12,
  1121. wrap_convolve8_avg_horiz_c_12, wrap_convolve8_vert_c_12,
  1122. wrap_convolve8_avg_vert_c_12, wrap_convolve8_c_12, wrap_convolve8_avg_c_12,
  1123. wrap_convolve8_horiz_c_12, wrap_convolve8_avg_horiz_c_12,
  1124. wrap_convolve8_vert_c_12, wrap_convolve8_avg_vert_c_12, wrap_convolve8_c_12,
  1125. wrap_convolve8_avg_c_12, 12);
  1126. const ConvolveParam kArrayConvolve_c[] = { ALL_SIZES(convolve8_c),
  1127. ALL_SIZES(convolve10_c),
  1128. ALL_SIZES(convolve12_c) };
  1129. #else
  1130. const ConvolveFunctions convolve8_c(
  1131. vpx_convolve_copy_c, vpx_convolve_avg_c, vpx_convolve8_horiz_c,
  1132. vpx_convolve8_avg_horiz_c, vpx_convolve8_vert_c, vpx_convolve8_avg_vert_c,
  1133. vpx_convolve8_c, vpx_convolve8_avg_c, vpx_scaled_horiz_c,
  1134. vpx_scaled_avg_horiz_c, vpx_scaled_vert_c, vpx_scaled_avg_vert_c,
  1135. vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0);
  1136. const ConvolveParam kArrayConvolve_c[] = { ALL_SIZES(convolve8_c) };
  1137. #endif
  1138. INSTANTIATE_TEST_CASE_P(C, ConvolveTest, ::testing::ValuesIn(kArrayConvolve_c));
  1139. #if HAVE_SSE2 && ARCH_X86_64
  1140. #if CONFIG_VP9_HIGHBITDEPTH
  1141. const ConvolveFunctions convolve8_sse2(
  1142. wrap_convolve_copy_sse2_8, wrap_convolve_avg_sse2_8,
  1143. wrap_convolve8_horiz_sse2_8, wrap_convolve8_avg_horiz_sse2_8,
  1144. wrap_convolve8_vert_sse2_8, wrap_convolve8_avg_vert_sse2_8,
  1145. wrap_convolve8_sse2_8, wrap_convolve8_avg_sse2_8,
  1146. wrap_convolve8_horiz_sse2_8, wrap_convolve8_avg_horiz_sse2_8,
  1147. wrap_convolve8_vert_sse2_8, wrap_convolve8_avg_vert_sse2_8,
  1148. wrap_convolve8_sse2_8, wrap_convolve8_avg_sse2_8, 8);
  1149. const ConvolveFunctions convolve10_sse2(
  1150. wrap_convolve_copy_sse2_10, wrap_convolve_avg_sse2_10,
  1151. wrap_convolve8_horiz_sse2_10, wrap_convolve8_avg_horiz_sse2_10,
  1152. wrap_convolve8_vert_sse2_10, wrap_convolve8_avg_vert_sse2_10,
  1153. wrap_convolve8_sse2_10, wrap_convolve8_avg_sse2_10,
  1154. wrap_convolve8_horiz_sse2_10, wrap_convolve8_avg_horiz_sse2_10,
  1155. wrap_convolve8_vert_sse2_10, wrap_convolve8_avg_vert_sse2_10,
  1156. wrap_convolve8_sse2_10, wrap_convolve8_avg_sse2_10, 10);
  1157. const ConvolveFunctions convolve12_sse2(
  1158. wrap_convolve_copy_sse2_12, wrap_convolve_avg_sse2_12,
  1159. wrap_convolve8_horiz_sse2_12, wrap_convolve8_avg_horiz_sse2_12,
  1160. wrap_convolve8_vert_sse2_12, wrap_convolve8_avg_vert_sse2_12,
  1161. wrap_convolve8_sse2_12, wrap_convolve8_avg_sse2_12,
  1162. wrap_convolve8_horiz_sse2_12, wrap_convolve8_avg_horiz_sse2_12,
  1163. wrap_convolve8_vert_sse2_12, wrap_convolve8_avg_vert_sse2_12,
  1164. wrap_convolve8_sse2_12, wrap_convolve8_avg_sse2_12, 12);
  1165. const ConvolveParam kArrayConvolve_sse2[] = { ALL_SIZES(convolve8_sse2),
  1166. ALL_SIZES(convolve10_sse2),
  1167. ALL_SIZES(convolve12_sse2) };
  1168. #else
  1169. const ConvolveFunctions convolve8_sse2(
  1170. vpx_convolve_copy_sse2, vpx_convolve_avg_sse2, vpx_convolve8_horiz_sse2,
  1171. vpx_convolve8_avg_horiz_sse2, vpx_convolve8_vert_sse2,
  1172. vpx_convolve8_avg_vert_sse2, vpx_convolve8_sse2, vpx_convolve8_avg_sse2,
  1173. vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c, vpx_scaled_vert_c,
  1174. vpx_scaled_avg_vert_c, vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0);
  1175. const ConvolveParam kArrayConvolve_sse2[] = { ALL_SIZES(convolve8_sse2) };
  1176. #endif // CONFIG_VP9_HIGHBITDEPTH
  1177. INSTANTIATE_TEST_CASE_P(SSE2, ConvolveTest,
  1178. ::testing::ValuesIn(kArrayConvolve_sse2));
  1179. #endif
  1180. #if HAVE_SSSE3
  1181. const ConvolveFunctions convolve8_ssse3(
  1182. vpx_convolve_copy_c, vpx_convolve_avg_c, vpx_convolve8_horiz_ssse3,
  1183. vpx_convolve8_avg_horiz_ssse3, vpx_convolve8_vert_ssse3,
  1184. vpx_convolve8_avg_vert_ssse3, vpx_convolve8_ssse3, vpx_convolve8_avg_ssse3,
  1185. vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c, vpx_scaled_vert_c,
  1186. vpx_scaled_avg_vert_c, vpx_scaled_2d_ssse3, vpx_scaled_avg_2d_c, 0);
  1187. const ConvolveParam kArrayConvolve8_ssse3[] = { ALL_SIZES(convolve8_ssse3) };
  1188. INSTANTIATE_TEST_CASE_P(SSSE3, ConvolveTest,
  1189. ::testing::ValuesIn(kArrayConvolve8_ssse3));
  1190. #endif
  1191. #if HAVE_AVX2
  1192. #if CONFIG_VP9_HIGHBITDEPTH
  1193. const ConvolveFunctions convolve8_avx2(
  1194. wrap_convolve_copy_avx2_8, wrap_convolve_avg_avx2_8,
  1195. wrap_convolve8_horiz_avx2_8, wrap_convolve8_avg_horiz_avx2_8,
  1196. wrap_convolve8_vert_avx2_8, wrap_convolve8_avg_vert_avx2_8,
  1197. wrap_convolve8_avx2_8, wrap_convolve8_avg_avx2_8, wrap_convolve8_horiz_c_8,
  1198. wrap_convolve8_avg_horiz_c_8, wrap_convolve8_vert_c_8,
  1199. wrap_convolve8_avg_vert_c_8, wrap_convolve8_c_8, wrap_convolve8_avg_c_8, 8);
  1200. const ConvolveFunctions convolve10_avx2(
  1201. wrap_convolve_copy_avx2_10, wrap_convolve_avg_avx2_10,
  1202. wrap_convolve8_horiz_avx2_10, wrap_convolve8_avg_horiz_avx2_10,
  1203. wrap_convolve8_vert_avx2_10, wrap_convolve8_avg_vert_avx2_10,
  1204. wrap_convolve8_avx2_10, wrap_convolve8_avg_avx2_10,
  1205. wrap_convolve8_horiz_c_10, wrap_convolve8_avg_horiz_c_10,
  1206. wrap_convolve8_vert_c_10, wrap_convolve8_avg_vert_c_10, wrap_convolve8_c_10,
  1207. wrap_convolve8_avg_c_10, 10);
  1208. const ConvolveFunctions convolve12_avx2(
  1209. wrap_convolve_copy_avx2_12, wrap_convolve_avg_avx2_12,
  1210. wrap_convolve8_horiz_avx2_12, wrap_convolve8_avg_horiz_avx2_12,
  1211. wrap_convolve8_vert_avx2_12, wrap_convolve8_avg_vert_avx2_12,
  1212. wrap_convolve8_avx2_12, wrap_convolve8_avg_avx2_12,
  1213. wrap_convolve8_horiz_c_12, wrap_convolve8_avg_horiz_c_12,
  1214. wrap_convolve8_vert_c_12, wrap_convolve8_avg_vert_c_12, wrap_convolve8_c_12,
  1215. wrap_convolve8_avg_c_12, 12);
  1216. const ConvolveParam kArrayConvolve8_avx2[] = { ALL_SIZES(convolve8_avx2),
  1217. ALL_SIZES(convolve10_avx2),
  1218. ALL_SIZES(convolve12_avx2) };
  1219. INSTANTIATE_TEST_CASE_P(AVX2, ConvolveTest,
  1220. ::testing::ValuesIn(kArrayConvolve8_avx2));
  1221. #else // !CONFIG_VP9_HIGHBITDEPTH
  1222. const ConvolveFunctions convolve8_avx2(
  1223. vpx_convolve_copy_c, vpx_convolve_avg_c, vpx_convolve8_horiz_avx2,
  1224. vpx_convolve8_avg_horiz_avx2, vpx_convolve8_vert_avx2,
  1225. vpx_convolve8_avg_vert_avx2, vpx_convolve8_avx2, vpx_convolve8_avg_avx2,
  1226. vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c, vpx_scaled_vert_c,
  1227. vpx_scaled_avg_vert_c, vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0);
  1228. const ConvolveParam kArrayConvolve8_avx2[] = { ALL_SIZES(convolve8_avx2) };
  1229. INSTANTIATE_TEST_CASE_P(AVX2, ConvolveTest,
  1230. ::testing::ValuesIn(kArrayConvolve8_avx2));
  1231. #endif // CONFIG_VP9_HIGHBITDEPTH
  1232. #endif // HAVE_AVX2
  1233. #if HAVE_NEON
  1234. #if CONFIG_VP9_HIGHBITDEPTH
  1235. const ConvolveFunctions convolve8_neon(
  1236. wrap_convolve_copy_neon_8, wrap_convolve_avg_neon_8,
  1237. wrap_convolve8_horiz_neon_8, wrap_convolve8_avg_horiz_neon_8,
  1238. wrap_convolve8_vert_neon_8, wrap_convolve8_avg_vert_neon_8,
  1239. wrap_convolve8_neon_8, wrap_convolve8_avg_neon_8,
  1240. wrap_convolve8_horiz_neon_8, wrap_convolve8_avg_horiz_neon_8,
  1241. wrap_convolve8_vert_neon_8, wrap_convolve8_avg_vert_neon_8,
  1242. wrap_convolve8_neon_8, wrap_convolve8_avg_neon_8, 8);
  1243. const ConvolveFunctions convolve10_neon(
  1244. wrap_convolve_copy_neon_10, wrap_convolve_avg_neon_10,
  1245. wrap_convolve8_horiz_neon_10, wrap_convolve8_avg_horiz_neon_10,
  1246. wrap_convolve8_vert_neon_10, wrap_convolve8_avg_vert_neon_10,
  1247. wrap_convolve8_neon_10, wrap_convolve8_avg_neon_10,
  1248. wrap_convolve8_horiz_neon_10, wrap_convolve8_avg_horiz_neon_10,
  1249. wrap_convolve8_vert_neon_10, wrap_convolve8_avg_vert_neon_10,
  1250. wrap_convolve8_neon_10, wrap_convolve8_avg_neon_10, 10);
  1251. const ConvolveFunctions convolve12_neon(
  1252. wrap_convolve_copy_neon_12, wrap_convolve_avg_neon_12,
  1253. wrap_convolve8_horiz_neon_12, wrap_convolve8_avg_horiz_neon_12,
  1254. wrap_convolve8_vert_neon_12, wrap_convolve8_avg_vert_neon_12,
  1255. wrap_convolve8_neon_12, wrap_convolve8_avg_neon_12,
  1256. wrap_convolve8_horiz_neon_12, wrap_convolve8_avg_horiz_neon_12,
  1257. wrap_convolve8_vert_neon_12, wrap_convolve8_avg_vert_neon_12,
  1258. wrap_convolve8_neon_12, wrap_convolve8_avg_neon_12, 12);
  1259. const ConvolveParam kArrayConvolve_neon[] = { ALL_SIZES(convolve8_neon),
  1260. ALL_SIZES(convolve10_neon),
  1261. ALL_SIZES(convolve12_neon) };
  1262. #else
  1263. const ConvolveFunctions convolve8_neon(
  1264. vpx_convolve_copy_neon, vpx_convolve_avg_neon, vpx_convolve8_horiz_neon,
  1265. vpx_convolve8_avg_horiz_neon, vpx_convolve8_vert_neon,
  1266. vpx_convolve8_avg_vert_neon, vpx_convolve8_neon, vpx_convolve8_avg_neon,
  1267. vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c, vpx_scaled_vert_c,
  1268. vpx_scaled_avg_vert_c, vpx_scaled_2d_neon, vpx_scaled_avg_2d_c, 0);
  1269. const ConvolveParam kArrayConvolve_neon[] = { ALL_SIZES(convolve8_neon) };
  1270. #endif // CONFIG_VP9_HIGHBITDEPTH
  1271. INSTANTIATE_TEST_CASE_P(NEON, ConvolveTest,
  1272. ::testing::ValuesIn(kArrayConvolve_neon));
  1273. #endif // HAVE_NEON
  1274. #if HAVE_DSPR2
  1275. const ConvolveFunctions convolve8_dspr2(
  1276. vpx_convolve_copy_dspr2, vpx_convolve_avg_dspr2, vpx_convolve8_horiz_dspr2,
  1277. vpx_convolve8_avg_horiz_dspr2, vpx_convolve8_vert_dspr2,
  1278. vpx_convolve8_avg_vert_dspr2, vpx_convolve8_dspr2, vpx_convolve8_avg_dspr2,
  1279. vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c, vpx_scaled_vert_c,
  1280. vpx_scaled_avg_vert_c, vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0);
  1281. const ConvolveParam kArrayConvolve8_dspr2[] = { ALL_SIZES(convolve8_dspr2) };
  1282. INSTANTIATE_TEST_CASE_P(DSPR2, ConvolveTest,
  1283. ::testing::ValuesIn(kArrayConvolve8_dspr2));
  1284. #endif // HAVE_DSPR2
  1285. #if HAVE_MSA
  1286. const ConvolveFunctions convolve8_msa(
  1287. vpx_convolve_copy_msa, vpx_convolve_avg_msa, vpx_convolve8_horiz_msa,
  1288. vpx_convolve8_avg_horiz_msa, vpx_convolve8_vert_msa,
  1289. vpx_convolve8_avg_vert_msa, vpx_convolve8_msa, vpx_convolve8_avg_msa,
  1290. vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c, vpx_scaled_vert_c,
  1291. vpx_scaled_avg_vert_c, vpx_scaled_2d_msa, vpx_scaled_avg_2d_c, 0);
  1292. const ConvolveParam kArrayConvolve8_msa[] = { ALL_SIZES(convolve8_msa) };
  1293. INSTANTIATE_TEST_CASE_P(MSA, ConvolveTest,
  1294. ::testing::ValuesIn(kArrayConvolve8_msa));
  1295. #endif // HAVE_MSA
  1296. #if HAVE_VSX
  1297. const ConvolveFunctions convolve8_vsx(
  1298. vpx_convolve_copy_vsx, vpx_convolve_avg_vsx, vpx_convolve8_horiz_vsx,
  1299. vpx_convolve8_avg_horiz_vsx, vpx_convolve8_vert_vsx,
  1300. vpx_convolve8_avg_vert_vsx, vpx_convolve8_vsx, vpx_convolve8_avg_vsx,
  1301. vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c, vpx_scaled_vert_c,
  1302. vpx_scaled_avg_vert_c, vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0);
  1303. const ConvolveParam kArrayConvolve_vsx[] = { ALL_SIZES(convolve8_vsx) };
  1304. INSTANTIATE_TEST_CASE_P(VSX, ConvolveTest,
  1305. ::testing::ValuesIn(kArrayConvolve_vsx));
  1306. #endif // HAVE_VSX
  1307. #if HAVE_MMI
  1308. const ConvolveFunctions convolve8_mmi(
  1309. vpx_convolve_copy_c, vpx_convolve_avg_mmi, vpx_convolve8_horiz_mmi,
  1310. vpx_convolve8_avg_horiz_mmi, vpx_convolve8_vert_mmi,
  1311. vpx_convolve8_avg_vert_mmi, vpx_convolve8_mmi, vpx_convolve8_avg_mmi,
  1312. vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c, vpx_scaled_vert_c,
  1313. vpx_scaled_avg_vert_c, vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0);
  1314. const ConvolveParam kArrayConvolve_mmi[] = { ALL_SIZES(convolve8_mmi) };
  1315. INSTANTIATE_TEST_CASE_P(MMI, ConvolveTest,
  1316. ::testing::ValuesIn(kArrayConvolve_mmi));
  1317. #endif // HAVE_MMI
  1318. } // namespace