dct_test.cc 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756
  1. /*
  2. * Copyright (c) 2017 The WebM project authors. All Rights Reserved.
  3. *
  4. * Use of this source code is governed by a BSD-style license
  5. * that can be found in the LICENSE file in the root of the source
  6. * tree. An additional intellectual property rights grant can be found
  7. * in the file PATENTS. All contributing project authors may
  8. * be found in the AUTHORS file in the root of the source tree.
  9. */
  10. #include <math.h>
  11. #include <stdlib.h>
  12. #include <string.h>
  13. #include <tuple>
  14. #include "third_party/googletest/src/include/gtest/gtest.h"
  15. #include "./vp9_rtcd.h"
  16. #include "./vpx_dsp_rtcd.h"
  17. #include "test/acm_random.h"
  18. #include "test/buffer.h"
  19. #include "test/clear_system_state.h"
  20. #include "test/register_state_check.h"
  21. #include "test/util.h"
  22. #include "vp9/common/vp9_entropy.h"
  23. #include "vpx/vpx_codec.h"
  24. #include "vpx/vpx_integer.h"
  25. #include "vpx_ports/mem.h"
  26. using libvpx_test::ACMRandom;
  27. using libvpx_test::Buffer;
  28. using std::make_tuple;
  29. using std::tuple;
  30. namespace {
  31. typedef void (*FdctFunc)(const int16_t *in, tran_low_t *out, int stride);
  32. typedef void (*IdctFunc)(const tran_low_t *in, uint8_t *out, int stride);
  33. typedef void (*FhtFunc)(const int16_t *in, tran_low_t *out, int stride,
  34. int tx_type);
  35. typedef void (*FhtFuncRef)(const Buffer<int16_t> &in, Buffer<tran_low_t> *out,
  36. int size, int tx_type);
  37. typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride,
  38. int tx_type);
  39. typedef void (*IhtWithBdFunc)(const tran_low_t *in, uint8_t *out, int stride,
  40. int tx_type, int bd);
  41. template <FdctFunc fn>
  42. void fdct_wrapper(const int16_t *in, tran_low_t *out, int stride, int tx_type) {
  43. (void)tx_type;
  44. fn(in, out, stride);
  45. }
  46. template <IdctFunc fn>
  47. void idct_wrapper(const tran_low_t *in, uint8_t *out, int stride, int tx_type,
  48. int bd) {
  49. (void)tx_type;
  50. (void)bd;
  51. fn(in, out, stride);
  52. }
  53. template <IhtFunc fn>
  54. void iht_wrapper(const tran_low_t *in, uint8_t *out, int stride, int tx_type,
  55. int bd) {
  56. (void)bd;
  57. fn(in, out, stride, tx_type);
  58. }
  59. #if CONFIG_VP9_HIGHBITDEPTH
  60. typedef void (*HighbdIdctFunc)(const tran_low_t *in, uint16_t *out, int stride,
  61. int bd);
  62. typedef void (*HighbdIhtFunc)(const tran_low_t *in, uint16_t *out, int stride,
  63. int tx_type, int bd);
  64. template <HighbdIdctFunc fn>
  65. void highbd_idct_wrapper(const tran_low_t *in, uint8_t *out, int stride,
  66. int tx_type, int bd) {
  67. (void)tx_type;
  68. fn(in, CAST_TO_SHORTPTR(out), stride, bd);
  69. }
  70. template <HighbdIhtFunc fn>
  71. void highbd_iht_wrapper(const tran_low_t *in, uint8_t *out, int stride,
  72. int tx_type, int bd) {
  73. fn(in, CAST_TO_SHORTPTR(out), stride, tx_type, bd);
  74. }
  75. #endif // CONFIG_VP9_HIGHBITDEPTH
  76. struct FuncInfo {
  77. FhtFunc ft_func;
  78. IhtWithBdFunc it_func;
  79. int size;
  80. int pixel_size;
  81. };
  82. /* forward transform, inverse transform, size, transform type, bit depth */
  83. typedef tuple<int, const FuncInfo *, int, vpx_bit_depth_t> DctParam;
  84. void fdct_ref(const Buffer<int16_t> &in, Buffer<tran_low_t> *out, int size,
  85. int /*tx_type*/) {
  86. const int16_t *i = in.TopLeftPixel();
  87. const int i_stride = in.stride();
  88. tran_low_t *o = out->TopLeftPixel();
  89. if (size == 4) {
  90. vpx_fdct4x4_c(i, o, i_stride);
  91. } else if (size == 8) {
  92. vpx_fdct8x8_c(i, o, i_stride);
  93. } else if (size == 16) {
  94. vpx_fdct16x16_c(i, o, i_stride);
  95. } else if (size == 32) {
  96. vpx_fdct32x32_c(i, o, i_stride);
  97. }
  98. }
  99. void fht_ref(const Buffer<int16_t> &in, Buffer<tran_low_t> *out, int size,
  100. int tx_type) {
  101. const int16_t *i = in.TopLeftPixel();
  102. const int i_stride = in.stride();
  103. tran_low_t *o = out->TopLeftPixel();
  104. if (size == 4) {
  105. vp9_fht4x4_c(i, o, i_stride, tx_type);
  106. } else if (size == 8) {
  107. vp9_fht8x8_c(i, o, i_stride, tx_type);
  108. } else if (size == 16) {
  109. vp9_fht16x16_c(i, o, i_stride, tx_type);
  110. }
  111. }
  112. void fwht_ref(const Buffer<int16_t> &in, Buffer<tran_low_t> *out, int size,
  113. int /*tx_type*/) {
  114. ASSERT_EQ(size, 4);
  115. vp9_fwht4x4_c(in.TopLeftPixel(), out->TopLeftPixel(), in.stride());
  116. }
  117. class TransTestBase : public ::testing::TestWithParam<DctParam> {
  118. public:
  119. virtual void SetUp() {
  120. rnd_.Reset(ACMRandom::DeterministicSeed());
  121. const int idx = GET_PARAM(0);
  122. const FuncInfo *func_info = &(GET_PARAM(1)[idx]);
  123. tx_type_ = GET_PARAM(2);
  124. bit_depth_ = GET_PARAM(3);
  125. fwd_txfm_ = func_info->ft_func;
  126. inv_txfm_ = func_info->it_func;
  127. size_ = func_info->size;
  128. pixel_size_ = func_info->pixel_size;
  129. max_pixel_value_ = (1 << bit_depth_) - 1;
  130. // Randomize stride_ to a value less than or equal to 1024
  131. stride_ = rnd_(1024) + 1;
  132. if (stride_ < size_) {
  133. stride_ = size_;
  134. }
  135. // Align stride_ to 16 if it's bigger than 16.
  136. if (stride_ > 16) {
  137. stride_ &= ~15;
  138. }
  139. block_size_ = size_ * stride_;
  140. src_ = reinterpret_cast<uint8_t *>(
  141. vpx_memalign(16, pixel_size_ * block_size_));
  142. ASSERT_TRUE(src_ != NULL);
  143. dst_ = reinterpret_cast<uint8_t *>(
  144. vpx_memalign(16, pixel_size_ * block_size_));
  145. ASSERT_TRUE(dst_ != NULL);
  146. }
  147. virtual void TearDown() {
  148. vpx_free(src_);
  149. src_ = NULL;
  150. vpx_free(dst_);
  151. dst_ = NULL;
  152. libvpx_test::ClearSystemState();
  153. }
  154. void InitMem() {
  155. if (pixel_size_ == 1 && bit_depth_ > VPX_BITS_8) return;
  156. if (pixel_size_ == 1) {
  157. for (int j = 0; j < block_size_; ++j) {
  158. src_[j] = rnd_.Rand16() & max_pixel_value_;
  159. }
  160. for (int j = 0; j < block_size_; ++j) {
  161. dst_[j] = rnd_.Rand16() & max_pixel_value_;
  162. }
  163. } else {
  164. ASSERT_EQ(pixel_size_, 2);
  165. uint16_t *const src = reinterpret_cast<uint16_t *>(src_);
  166. uint16_t *const dst = reinterpret_cast<uint16_t *>(dst_);
  167. for (int j = 0; j < block_size_; ++j) {
  168. src[j] = rnd_.Rand16() & max_pixel_value_;
  169. }
  170. for (int j = 0; j < block_size_; ++j) {
  171. dst[j] = rnd_.Rand16() & max_pixel_value_;
  172. }
  173. }
  174. }
  175. void RunFwdTxfm(const Buffer<int16_t> &in, Buffer<tran_low_t> *out) {
  176. fwd_txfm_(in.TopLeftPixel(), out->TopLeftPixel(), in.stride(), tx_type_);
  177. }
  178. void RunInvTxfm(const Buffer<tran_low_t> &in, uint8_t *out) {
  179. inv_txfm_(in.TopLeftPixel(), out, stride_, tx_type_, bit_depth_);
  180. }
  181. protected:
  182. void RunAccuracyCheck(int limit) {
  183. if (pixel_size_ == 1 && bit_depth_ > VPX_BITS_8) return;
  184. ACMRandom rnd(ACMRandom::DeterministicSeed());
  185. Buffer<int16_t> test_input_block =
  186. Buffer<int16_t>(size_, size_, 8, size_ == 4 ? 0 : 16);
  187. ASSERT_TRUE(test_input_block.Init());
  188. ASSERT_TRUE(test_input_block.TopLeftPixel() != NULL);
  189. Buffer<tran_low_t> test_temp_block =
  190. Buffer<tran_low_t>(size_, size_, 0, 16);
  191. ASSERT_TRUE(test_temp_block.Init());
  192. uint32_t max_error = 0;
  193. int64_t total_error = 0;
  194. const int count_test_block = 10000;
  195. for (int i = 0; i < count_test_block; ++i) {
  196. InitMem();
  197. for (int h = 0; h < size_; ++h) {
  198. for (int w = 0; w < size_; ++w) {
  199. if (pixel_size_ == 1) {
  200. test_input_block.TopLeftPixel()[h * test_input_block.stride() + w] =
  201. src_[h * stride_ + w] - dst_[h * stride_ + w];
  202. } else {
  203. ASSERT_EQ(pixel_size_, 2);
  204. const uint16_t *const src = reinterpret_cast<uint16_t *>(src_);
  205. const uint16_t *const dst = reinterpret_cast<uint16_t *>(dst_);
  206. test_input_block.TopLeftPixel()[h * test_input_block.stride() + w] =
  207. src[h * stride_ + w] - dst[h * stride_ + w];
  208. }
  209. }
  210. }
  211. ASM_REGISTER_STATE_CHECK(RunFwdTxfm(test_input_block, &test_temp_block));
  212. ASM_REGISTER_STATE_CHECK(RunInvTxfm(test_temp_block, dst_));
  213. for (int h = 0; h < size_; ++h) {
  214. for (int w = 0; w < size_; ++w) {
  215. int diff;
  216. if (pixel_size_ == 1) {
  217. diff = dst_[h * stride_ + w] - src_[h * stride_ + w];
  218. } else {
  219. ASSERT_EQ(pixel_size_, 2);
  220. const uint16_t *const src = reinterpret_cast<uint16_t *>(src_);
  221. const uint16_t *const dst = reinterpret_cast<uint16_t *>(dst_);
  222. diff = dst[h * stride_ + w] - src[h * stride_ + w];
  223. }
  224. const uint32_t error = diff * diff;
  225. if (max_error < error) max_error = error;
  226. total_error += error;
  227. }
  228. }
  229. }
  230. EXPECT_GE(static_cast<uint32_t>(limit), max_error)
  231. << "Error: " << size_ << "x" << size_
  232. << " transform/inverse transform has an individual round trip error > "
  233. << limit;
  234. EXPECT_GE(count_test_block * limit, total_error)
  235. << "Error: " << size_ << "x" << size_
  236. << " transform/inverse transform has average round trip error > "
  237. << limit << " per block";
  238. }
  239. void RunCoeffCheck() {
  240. if (pixel_size_ == 1 && bit_depth_ > VPX_BITS_8) return;
  241. ACMRandom rnd(ACMRandom::DeterministicSeed());
  242. const int count_test_block = 5000;
  243. Buffer<int16_t> input_block =
  244. Buffer<int16_t>(size_, size_, 8, size_ == 4 ? 0 : 16);
  245. ASSERT_TRUE(input_block.Init());
  246. Buffer<tran_low_t> output_ref_block = Buffer<tran_low_t>(size_, size_, 0);
  247. ASSERT_TRUE(output_ref_block.Init());
  248. Buffer<tran_low_t> output_block = Buffer<tran_low_t>(size_, size_, 0, 16);
  249. ASSERT_TRUE(output_block.Init());
  250. for (int i = 0; i < count_test_block; ++i) {
  251. // Initialize a test block with input range [-max_pixel_value_,
  252. // max_pixel_value_].
  253. input_block.Set(&rnd, -max_pixel_value_, max_pixel_value_);
  254. fwd_txfm_ref(input_block, &output_ref_block, size_, tx_type_);
  255. ASM_REGISTER_STATE_CHECK(RunFwdTxfm(input_block, &output_block));
  256. // The minimum quant value is 4.
  257. EXPECT_TRUE(output_block.CheckValues(output_ref_block));
  258. if (::testing::Test::HasFailure()) {
  259. printf("Size: %d Transform type: %d\n", size_, tx_type_);
  260. output_block.PrintDifference(output_ref_block);
  261. return;
  262. }
  263. }
  264. }
  265. void RunMemCheck() {
  266. if (pixel_size_ == 1 && bit_depth_ > VPX_BITS_8) return;
  267. ACMRandom rnd(ACMRandom::DeterministicSeed());
  268. const int count_test_block = 5000;
  269. Buffer<int16_t> input_extreme_block =
  270. Buffer<int16_t>(size_, size_, 8, size_ == 4 ? 0 : 16);
  271. ASSERT_TRUE(input_extreme_block.Init());
  272. Buffer<tran_low_t> output_ref_block = Buffer<tran_low_t>(size_, size_, 0);
  273. ASSERT_TRUE(output_ref_block.Init());
  274. Buffer<tran_low_t> output_block = Buffer<tran_low_t>(size_, size_, 0, 16);
  275. ASSERT_TRUE(output_block.Init());
  276. for (int i = 0; i < count_test_block; ++i) {
  277. // Initialize a test block with -max_pixel_value_ or max_pixel_value_.
  278. if (i == 0) {
  279. input_extreme_block.Set(max_pixel_value_);
  280. } else if (i == 1) {
  281. input_extreme_block.Set(-max_pixel_value_);
  282. } else {
  283. ASSERT_TRUE(input_extreme_block.TopLeftPixel() != NULL);
  284. for (int h = 0; h < size_; ++h) {
  285. for (int w = 0; w < size_; ++w) {
  286. input_extreme_block
  287. .TopLeftPixel()[h * input_extreme_block.stride() + w] =
  288. rnd.Rand8() % 2 ? max_pixel_value_ : -max_pixel_value_;
  289. }
  290. }
  291. }
  292. fwd_txfm_ref(input_extreme_block, &output_ref_block, size_, tx_type_);
  293. ASM_REGISTER_STATE_CHECK(RunFwdTxfm(input_extreme_block, &output_block));
  294. // The minimum quant value is 4.
  295. EXPECT_TRUE(output_block.CheckValues(output_ref_block));
  296. ASSERT_TRUE(output_block.TopLeftPixel() != NULL);
  297. for (int h = 0; h < size_; ++h) {
  298. for (int w = 0; w < size_; ++w) {
  299. EXPECT_GE(
  300. 4 * DCT_MAX_VALUE << (bit_depth_ - 8),
  301. abs(output_block.TopLeftPixel()[h * output_block.stride() + w]))
  302. << "Error: " << size_ << "x" << size_
  303. << " transform has coefficient larger than 4*DCT_MAX_VALUE"
  304. << " at " << w << "," << h;
  305. if (::testing::Test::HasFailure()) {
  306. printf("Size: %d Transform type: %d\n", size_, tx_type_);
  307. output_block.DumpBuffer();
  308. return;
  309. }
  310. }
  311. }
  312. }
  313. }
  314. void RunInvAccuracyCheck(int limit) {
  315. if (pixel_size_ == 1 && bit_depth_ > VPX_BITS_8) return;
  316. ACMRandom rnd(ACMRandom::DeterministicSeed());
  317. const int count_test_block = 1000;
  318. Buffer<int16_t> in = Buffer<int16_t>(size_, size_, 4);
  319. ASSERT_TRUE(in.Init());
  320. Buffer<tran_low_t> coeff = Buffer<tran_low_t>(size_, size_, 0, 16);
  321. ASSERT_TRUE(coeff.Init());
  322. Buffer<uint8_t> dst = Buffer<uint8_t>(size_, size_, 0, 16);
  323. ASSERT_TRUE(dst.Init());
  324. Buffer<uint8_t> src = Buffer<uint8_t>(size_, size_, 0);
  325. ASSERT_TRUE(src.Init());
  326. Buffer<uint16_t> dst16 = Buffer<uint16_t>(size_, size_, 0, 16);
  327. ASSERT_TRUE(dst16.Init());
  328. Buffer<uint16_t> src16 = Buffer<uint16_t>(size_, size_, 0);
  329. ASSERT_TRUE(src16.Init());
  330. for (int i = 0; i < count_test_block; ++i) {
  331. InitMem();
  332. ASSERT_TRUE(in.TopLeftPixel() != NULL);
  333. // Initialize a test block with input range [-max_pixel_value_,
  334. // max_pixel_value_].
  335. for (int h = 0; h < size_; ++h) {
  336. for (int w = 0; w < size_; ++w) {
  337. if (pixel_size_ == 1) {
  338. in.TopLeftPixel()[h * in.stride() + w] =
  339. src_[h * stride_ + w] - dst_[h * stride_ + w];
  340. } else {
  341. ASSERT_EQ(pixel_size_, 2);
  342. const uint16_t *const src = reinterpret_cast<uint16_t *>(src_);
  343. const uint16_t *const dst = reinterpret_cast<uint16_t *>(dst_);
  344. in.TopLeftPixel()[h * in.stride() + w] =
  345. src[h * stride_ + w] - dst[h * stride_ + w];
  346. }
  347. }
  348. }
  349. fwd_txfm_ref(in, &coeff, size_, tx_type_);
  350. ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst_));
  351. for (int h = 0; h < size_; ++h) {
  352. for (int w = 0; w < size_; ++w) {
  353. int diff;
  354. if (pixel_size_ == 1) {
  355. diff = dst_[h * stride_ + w] - src_[h * stride_ + w];
  356. } else {
  357. ASSERT_EQ(pixel_size_, 2);
  358. const uint16_t *const src = reinterpret_cast<uint16_t *>(src_);
  359. const uint16_t *const dst = reinterpret_cast<uint16_t *>(dst_);
  360. diff = dst[h * stride_ + w] - src[h * stride_ + w];
  361. }
  362. const uint32_t error = diff * diff;
  363. EXPECT_GE(static_cast<uint32_t>(limit), error)
  364. << "Error: " << size_ << "x" << size_
  365. << " inverse transform has error " << error << " at " << w << ","
  366. << h;
  367. if (::testing::Test::HasFailure()) {
  368. printf("Size: %d Transform type: %d\n", size_, tx_type_);
  369. return;
  370. }
  371. }
  372. }
  373. }
  374. }
  375. FhtFunc fwd_txfm_;
  376. FhtFuncRef fwd_txfm_ref;
  377. IhtWithBdFunc inv_txfm_;
  378. ACMRandom rnd_;
  379. uint8_t *src_;
  380. uint8_t *dst_;
  381. vpx_bit_depth_t bit_depth_;
  382. int tx_type_;
  383. int max_pixel_value_;
  384. int size_;
  385. int stride_;
  386. int pixel_size_;
  387. int block_size_;
  388. };
  389. /* -------------------------------------------------------------------------- */
  390. class TransDCT : public TransTestBase {
  391. public:
  392. TransDCT() { fwd_txfm_ref = fdct_ref; }
  393. };
  394. TEST_P(TransDCT, AccuracyCheck) {
  395. int t = 1;
  396. if (size_ == 16 && bit_depth_ > 10 && pixel_size_ == 2) {
  397. t = 2;
  398. } else if (size_ == 32 && bit_depth_ > 10 && pixel_size_ == 2) {
  399. t = 7;
  400. }
  401. RunAccuracyCheck(t);
  402. }
  403. TEST_P(TransDCT, CoeffCheck) { RunCoeffCheck(); }
  404. TEST_P(TransDCT, MemCheck) { RunMemCheck(); }
  405. TEST_P(TransDCT, InvAccuracyCheck) { RunInvAccuracyCheck(1); }
  406. static const FuncInfo dct_c_func_info[] = {
  407. #if CONFIG_VP9_HIGHBITDEPTH
  408. { &fdct_wrapper<vpx_highbd_fdct4x4_c>,
  409. &highbd_idct_wrapper<vpx_highbd_idct4x4_16_add_c>, 4, 2 },
  410. { &fdct_wrapper<vpx_highbd_fdct8x8_c>,
  411. &highbd_idct_wrapper<vpx_highbd_idct8x8_64_add_c>, 8, 2 },
  412. { &fdct_wrapper<vpx_highbd_fdct16x16_c>,
  413. &highbd_idct_wrapper<vpx_highbd_idct16x16_256_add_c>, 16, 2 },
  414. { &fdct_wrapper<vpx_highbd_fdct32x32_c>,
  415. &highbd_idct_wrapper<vpx_highbd_idct32x32_1024_add_c>, 32, 2 },
  416. #endif
  417. { &fdct_wrapper<vpx_fdct4x4_c>, &idct_wrapper<vpx_idct4x4_16_add_c>, 4, 1 },
  418. { &fdct_wrapper<vpx_fdct8x8_c>, &idct_wrapper<vpx_idct8x8_64_add_c>, 8, 1 },
  419. { &fdct_wrapper<vpx_fdct16x16_c>, &idct_wrapper<vpx_idct16x16_256_add_c>, 16,
  420. 1 },
  421. { &fdct_wrapper<vpx_fdct32x32_c>, &idct_wrapper<vpx_idct32x32_1024_add_c>, 32,
  422. 1 }
  423. };
  424. INSTANTIATE_TEST_CASE_P(
  425. C, TransDCT,
  426. ::testing::Combine(
  427. ::testing::Range(0, static_cast<int>(sizeof(dct_c_func_info) /
  428. sizeof(dct_c_func_info[0]))),
  429. ::testing::Values(dct_c_func_info), ::testing::Values(0),
  430. ::testing::Values(VPX_BITS_8, VPX_BITS_10, VPX_BITS_12)));
  431. #if !CONFIG_EMULATE_HARDWARE
  432. #if HAVE_SSE2
  433. static const FuncInfo dct_sse2_func_info[] = {
  434. #if CONFIG_VP9_HIGHBITDEPTH
  435. { &fdct_wrapper<vpx_highbd_fdct4x4_sse2>,
  436. &highbd_idct_wrapper<vpx_highbd_idct4x4_16_add_sse2>, 4, 2 },
  437. { &fdct_wrapper<vpx_highbd_fdct8x8_sse2>,
  438. &highbd_idct_wrapper<vpx_highbd_idct8x8_64_add_sse2>, 8, 2 },
  439. { &fdct_wrapper<vpx_highbd_fdct16x16_sse2>,
  440. &highbd_idct_wrapper<vpx_highbd_idct16x16_256_add_sse2>, 16, 2 },
  441. { &fdct_wrapper<vpx_highbd_fdct32x32_sse2>,
  442. &highbd_idct_wrapper<vpx_highbd_idct32x32_1024_add_sse2>, 32, 2 },
  443. #endif
  444. { &fdct_wrapper<vpx_fdct4x4_sse2>, &idct_wrapper<vpx_idct4x4_16_add_sse2>, 4,
  445. 1 },
  446. { &fdct_wrapper<vpx_fdct8x8_sse2>, &idct_wrapper<vpx_idct8x8_64_add_sse2>, 8,
  447. 1 },
  448. { &fdct_wrapper<vpx_fdct16x16_sse2>,
  449. &idct_wrapper<vpx_idct16x16_256_add_sse2>, 16, 1 },
  450. { &fdct_wrapper<vpx_fdct32x32_sse2>,
  451. &idct_wrapper<vpx_idct32x32_1024_add_sse2>, 32, 1 }
  452. };
  453. INSTANTIATE_TEST_CASE_P(
  454. SSE2, TransDCT,
  455. ::testing::Combine(
  456. ::testing::Range(0, static_cast<int>(sizeof(dct_sse2_func_info) /
  457. sizeof(dct_sse2_func_info[0]))),
  458. ::testing::Values(dct_sse2_func_info), ::testing::Values(0),
  459. ::testing::Values(VPX_BITS_8, VPX_BITS_10, VPX_BITS_12)));
  460. #endif // HAVE_SSE2
  461. #if HAVE_SSSE3 && !CONFIG_VP9_HIGHBITDEPTH && ARCH_X86_64
  462. // vpx_fdct8x8_ssse3 is only available in 64 bit builds.
  463. static const FuncInfo dct_ssse3_func_info = {
  464. &fdct_wrapper<vpx_fdct8x8_ssse3>, &idct_wrapper<vpx_idct8x8_64_add_sse2>, 8, 1
  465. };
  466. // TODO(johannkoenig): high bit depth fdct8x8.
  467. INSTANTIATE_TEST_CASE_P(SSSE3, TransDCT,
  468. ::testing::Values(make_tuple(0, &dct_ssse3_func_info, 0,
  469. VPX_BITS_8)));
  470. #endif // HAVE_SSSE3 && !CONFIG_VP9_HIGHBITDEPTH && ARCH_X86_64
  471. #if HAVE_AVX2 && !CONFIG_VP9_HIGHBITDEPTH
  472. static const FuncInfo dct_avx2_func_info = {
  473. &fdct_wrapper<vpx_fdct32x32_avx2>, &idct_wrapper<vpx_idct32x32_1024_add_sse2>,
  474. 32, 1
  475. };
  476. // TODO(johannkoenig): high bit depth fdct32x32.
  477. INSTANTIATE_TEST_CASE_P(AVX2, TransDCT,
  478. ::testing::Values(make_tuple(0, &dct_avx2_func_info, 0,
  479. VPX_BITS_8)));
  480. #endif // HAVE_AVX2 && !CONFIG_VP9_HIGHBITDEPTH
  481. #if HAVE_NEON
  482. static const FuncInfo dct_neon_func_info[4] = {
  483. { &fdct_wrapper<vpx_fdct4x4_neon>, &idct_wrapper<vpx_idct4x4_16_add_neon>, 4,
  484. 1 },
  485. { &fdct_wrapper<vpx_fdct8x8_neon>, &idct_wrapper<vpx_idct8x8_64_add_neon>, 8,
  486. 1 },
  487. { &fdct_wrapper<vpx_fdct16x16_neon>,
  488. &idct_wrapper<vpx_idct16x16_256_add_neon>, 16, 1 },
  489. { &fdct_wrapper<vpx_fdct32x32_neon>,
  490. &idct_wrapper<vpx_idct32x32_1024_add_neon>, 32, 1 }
  491. };
  492. INSTANTIATE_TEST_CASE_P(
  493. NEON, TransDCT,
  494. ::testing::Combine(::testing::Range(0, 4),
  495. ::testing::Values(dct_neon_func_info),
  496. ::testing::Values(0), ::testing::Values(VPX_BITS_8)));
  497. #endif // HAVE_NEON
  498. #if HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH
  499. static const FuncInfo dct_msa_func_info[4] = {
  500. { &fdct_wrapper<vpx_fdct4x4_msa>, &idct_wrapper<vpx_idct4x4_16_add_msa>, 4,
  501. 1 },
  502. { &fdct_wrapper<vpx_fdct8x8_msa>, &idct_wrapper<vpx_idct8x8_64_add_msa>, 8,
  503. 1 },
  504. { &fdct_wrapper<vpx_fdct16x16_msa>, &idct_wrapper<vpx_idct16x16_256_add_msa>,
  505. 16, 1 },
  506. { &fdct_wrapper<vpx_fdct32x32_msa>, &idct_wrapper<vpx_idct32x32_1024_add_msa>,
  507. 32, 1 }
  508. };
  509. INSTANTIATE_TEST_CASE_P(MSA, TransDCT,
  510. ::testing::Combine(::testing::Range(0, 4),
  511. ::testing::Values(dct_msa_func_info),
  512. ::testing::Values(0),
  513. ::testing::Values(VPX_BITS_8)));
  514. #endif // HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH
  515. #if HAVE_VSX && !CONFIG_VP9_HIGHBITDEPTH
  516. static const FuncInfo dct_vsx_func_info = {
  517. &fdct_wrapper<vpx_fdct4x4_c>, &idct_wrapper<vpx_idct4x4_16_add_vsx>, 4, 1
  518. };
  519. INSTANTIATE_TEST_CASE_P(VSX, TransDCT,
  520. ::testing::Values(make_tuple(0, &dct_vsx_func_info, 0,
  521. VPX_BITS_8)));
  522. #endif // HAVE_VSX && !CONFIG_VP9_HIGHBITDEPTH &&
  523. #endif // !CONFIG_EMULATE_HARDWARE
  524. /* -------------------------------------------------------------------------- */
  525. class TransHT : public TransTestBase {
  526. public:
  527. TransHT() { fwd_txfm_ref = fht_ref; }
  528. };
  529. TEST_P(TransHT, AccuracyCheck) {
  530. RunAccuracyCheck(size_ == 16 && bit_depth_ > 10 && pixel_size_ == 2 ? 2 : 1);
  531. }
  532. TEST_P(TransHT, CoeffCheck) { RunCoeffCheck(); }
  533. TEST_P(TransHT, MemCheck) { RunMemCheck(); }
  534. TEST_P(TransHT, InvAccuracyCheck) { RunInvAccuracyCheck(1); }
  535. static const FuncInfo ht_c_func_info[] = {
  536. #if CONFIG_VP9_HIGHBITDEPTH
  537. { &vp9_highbd_fht4x4_c, &highbd_iht_wrapper<vp9_highbd_iht4x4_16_add_c>, 4,
  538. 2 },
  539. { &vp9_highbd_fht8x8_c, &highbd_iht_wrapper<vp9_highbd_iht8x8_64_add_c>, 8,
  540. 2 },
  541. { &vp9_highbd_fht16x16_c, &highbd_iht_wrapper<vp9_highbd_iht16x16_256_add_c>,
  542. 16, 2 },
  543. #endif
  544. { &vp9_fht4x4_c, &iht_wrapper<vp9_iht4x4_16_add_c>, 4, 1 },
  545. { &vp9_fht8x8_c, &iht_wrapper<vp9_iht8x8_64_add_c>, 8, 1 },
  546. { &vp9_fht16x16_c, &iht_wrapper<vp9_iht16x16_256_add_c>, 16, 1 }
  547. };
  548. INSTANTIATE_TEST_CASE_P(
  549. C, TransHT,
  550. ::testing::Combine(
  551. ::testing::Range(0, static_cast<int>(sizeof(ht_c_func_info) /
  552. sizeof(ht_c_func_info[0]))),
  553. ::testing::Values(ht_c_func_info), ::testing::Range(0, 4),
  554. ::testing::Values(VPX_BITS_8, VPX_BITS_10, VPX_BITS_12)));
  555. #if !CONFIG_EMULATE_HARDWARE
  556. #if HAVE_NEON
  557. static const FuncInfo ht_neon_func_info[] = {
  558. #if CONFIG_VP9_HIGHBITDEPTH
  559. { &vp9_highbd_fht4x4_c, &highbd_iht_wrapper<vp9_highbd_iht4x4_16_add_neon>, 4,
  560. 2 },
  561. { &vp9_highbd_fht8x8_c, &highbd_iht_wrapper<vp9_highbd_iht8x8_64_add_neon>, 8,
  562. 2 },
  563. { &vp9_highbd_fht16x16_c,
  564. &highbd_iht_wrapper<vp9_highbd_iht16x16_256_add_neon>, 16, 2 },
  565. #endif
  566. { &vp9_fht4x4_c, &iht_wrapper<vp9_iht4x4_16_add_neon>, 4, 1 },
  567. { &vp9_fht8x8_c, &iht_wrapper<vp9_iht8x8_64_add_neon>, 8, 1 },
  568. { &vp9_fht16x16_c, &iht_wrapper<vp9_iht16x16_256_add_neon>, 16, 1 }
  569. };
  570. INSTANTIATE_TEST_CASE_P(
  571. NEON, TransHT,
  572. ::testing::Combine(
  573. ::testing::Range(0, static_cast<int>(sizeof(ht_neon_func_info) /
  574. sizeof(ht_neon_func_info[0]))),
  575. ::testing::Values(ht_neon_func_info), ::testing::Range(0, 4),
  576. ::testing::Values(VPX_BITS_8, VPX_BITS_10, VPX_BITS_12)));
  577. #endif // HAVE_NEON
  578. #if HAVE_SSE2
  579. static const FuncInfo ht_sse2_func_info[3] = {
  580. { &vp9_fht4x4_sse2, &iht_wrapper<vp9_iht4x4_16_add_sse2>, 4, 1 },
  581. { &vp9_fht8x8_sse2, &iht_wrapper<vp9_iht8x8_64_add_sse2>, 8, 1 },
  582. { &vp9_fht16x16_sse2, &iht_wrapper<vp9_iht16x16_256_add_sse2>, 16, 1 }
  583. };
  584. INSTANTIATE_TEST_CASE_P(SSE2, TransHT,
  585. ::testing::Combine(::testing::Range(0, 3),
  586. ::testing::Values(ht_sse2_func_info),
  587. ::testing::Range(0, 4),
  588. ::testing::Values(VPX_BITS_8)));
  589. #endif // HAVE_SSE2
  590. #if HAVE_SSE4_1 && CONFIG_VP9_HIGHBITDEPTH
  591. static const FuncInfo ht_sse4_1_func_info[3] = {
  592. { &vp9_highbd_fht4x4_c, &highbd_iht_wrapper<vp9_highbd_iht4x4_16_add_sse4_1>,
  593. 4, 2 },
  594. { vp9_highbd_fht8x8_c, &highbd_iht_wrapper<vp9_highbd_iht8x8_64_add_sse4_1>,
  595. 8, 2 },
  596. { &vp9_highbd_fht16x16_c,
  597. &highbd_iht_wrapper<vp9_highbd_iht16x16_256_add_sse4_1>, 16, 2 }
  598. };
  599. INSTANTIATE_TEST_CASE_P(
  600. SSE4_1, TransHT,
  601. ::testing::Combine(::testing::Range(0, 3),
  602. ::testing::Values(ht_sse4_1_func_info),
  603. ::testing::Range(0, 4),
  604. ::testing::Values(VPX_BITS_8, VPX_BITS_10,
  605. VPX_BITS_12)));
  606. #endif // HAVE_SSE4_1 && CONFIG_VP9_HIGHBITDEPTH
  607. #if HAVE_VSX && !CONFIG_EMULATE_HARDWARE && !CONFIG_VP9_HIGHBITDEPTH
  608. static const FuncInfo ht_vsx_func_info[3] = {
  609. { &vp9_fht4x4_c, &iht_wrapper<vp9_iht4x4_16_add_vsx>, 4, 1 },
  610. { &vp9_fht8x8_c, &iht_wrapper<vp9_iht8x8_64_add_vsx>, 8, 1 },
  611. { &vp9_fht16x16_c, &iht_wrapper<vp9_iht16x16_256_add_vsx>, 16, 1 }
  612. };
  613. INSTANTIATE_TEST_CASE_P(VSX, TransHT,
  614. ::testing::Combine(::testing::Range(0, 3),
  615. ::testing::Values(ht_vsx_func_info),
  616. ::testing::Range(0, 4),
  617. ::testing::Values(VPX_BITS_8)));
  618. #endif // HAVE_VSX
  619. #endif // !CONFIG_EMULATE_HARDWARE
  620. /* -------------------------------------------------------------------------- */
  621. class TransWHT : public TransTestBase {
  622. public:
  623. TransWHT() { fwd_txfm_ref = fwht_ref; }
  624. };
  625. TEST_P(TransWHT, AccuracyCheck) { RunAccuracyCheck(0); }
  626. TEST_P(TransWHT, CoeffCheck) { RunCoeffCheck(); }
  627. TEST_P(TransWHT, MemCheck) { RunMemCheck(); }
  628. TEST_P(TransWHT, InvAccuracyCheck) { RunInvAccuracyCheck(0); }
  629. static const FuncInfo wht_c_func_info[] = {
  630. #if CONFIG_VP9_HIGHBITDEPTH
  631. { &fdct_wrapper<vp9_highbd_fwht4x4_c>,
  632. &highbd_idct_wrapper<vpx_highbd_iwht4x4_16_add_c>, 4, 2 },
  633. #endif
  634. { &fdct_wrapper<vp9_fwht4x4_c>, &idct_wrapper<vpx_iwht4x4_16_add_c>, 4, 1 }
  635. };
  636. INSTANTIATE_TEST_CASE_P(
  637. C, TransWHT,
  638. ::testing::Combine(
  639. ::testing::Range(0, static_cast<int>(sizeof(wht_c_func_info) /
  640. sizeof(wht_c_func_info[0]))),
  641. ::testing::Values(wht_c_func_info), ::testing::Values(0),
  642. ::testing::Values(VPX_BITS_8, VPX_BITS_10, VPX_BITS_12)));
  643. #if HAVE_SSE2 && !CONFIG_EMULATE_HARDWARE
  644. static const FuncInfo wht_sse2_func_info = {
  645. &fdct_wrapper<vp9_fwht4x4_sse2>, &idct_wrapper<vpx_iwht4x4_16_add_sse2>, 4, 1
  646. };
  647. INSTANTIATE_TEST_CASE_P(SSE2, TransWHT,
  648. ::testing::Values(make_tuple(0, &wht_sse2_func_info, 0,
  649. VPX_BITS_8)));
  650. #endif // HAVE_SSE2 && !CONFIG_EMULATE_HARDWARE
  651. #if HAVE_VSX && !CONFIG_EMULATE_HARDWARE && !CONFIG_VP9_HIGHBITDEPTH
  652. static const FuncInfo wht_vsx_func_info = {
  653. &fdct_wrapper<vp9_fwht4x4_c>, &idct_wrapper<vpx_iwht4x4_16_add_vsx>, 4, 1
  654. };
  655. INSTANTIATE_TEST_CASE_P(VSX, TransWHT,
  656. ::testing::Values(make_tuple(0, &wht_vsx_func_info, 0,
  657. VPX_BITS_8)));
  658. #endif // HAVE_VSX && !CONFIG_EMULATE_HARDWARE
  659. } // namespace