fdct8x8_test.cc 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768
  1. /*
  2. * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
  3. *
  4. * Use of this source code is governed by a BSD-style license
  5. * that can be found in the LICENSE file in the root of the source
  6. * tree. An additional intellectual property rights grant can be found
  7. * in the file PATENTS. All contributing project authors may
  8. * be found in the AUTHORS file in the root of the source tree.
  9. */
  10. #include <math.h>
  11. #include <stdlib.h>
  12. #include <string.h>
  13. #include <tuple>
  14. #include "third_party/googletest/src/include/gtest/gtest.h"
  15. #include "./vp9_rtcd.h"
  16. #include "./vpx_dsp_rtcd.h"
  17. #include "test/acm_random.h"
  18. #include "test/clear_system_state.h"
  19. #include "test/register_state_check.h"
  20. #include "test/util.h"
  21. #include "vp9/common/vp9_entropy.h"
  22. #include "vp9/common/vp9_scan.h"
  23. #include "vpx/vpx_codec.h"
  24. #include "vpx/vpx_integer.h"
  25. #include "vpx_ports/mem.h"
  26. using libvpx_test::ACMRandom;
  27. namespace {
  28. const int kNumCoeffs = 64;
  29. const double kPi = 3.141592653589793238462643383279502884;
  30. const int kSignBiasMaxDiff255 = 1500;
  31. const int kSignBiasMaxDiff15 = 10000;
  32. typedef void (*FdctFunc)(const int16_t *in, tran_low_t *out, int stride);
  33. typedef void (*IdctFunc)(const tran_low_t *in, uint8_t *out, int stride);
  34. typedef void (*FhtFunc)(const int16_t *in, tran_low_t *out, int stride,
  35. int tx_type);
  36. typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride,
  37. int tx_type);
  38. typedef std::tuple<FdctFunc, IdctFunc, int, vpx_bit_depth_t> Dct8x8Param;
  39. typedef std::tuple<FhtFunc, IhtFunc, int, vpx_bit_depth_t> Ht8x8Param;
  40. typedef std::tuple<IdctFunc, IdctFunc, int, vpx_bit_depth_t> Idct8x8Param;
  41. void reference_8x8_dct_1d(const double in[8], double out[8]) {
  42. const double kInvSqrt2 = 0.707106781186547524400844362104;
  43. for (int k = 0; k < 8; k++) {
  44. out[k] = 0.0;
  45. for (int n = 0; n < 8; n++) {
  46. out[k] += in[n] * cos(kPi * (2 * n + 1) * k / 16.0);
  47. }
  48. if (k == 0) out[k] = out[k] * kInvSqrt2;
  49. }
  50. }
  51. void reference_8x8_dct_2d(const int16_t input[kNumCoeffs],
  52. double output[kNumCoeffs]) {
  53. // First transform columns
  54. for (int i = 0; i < 8; ++i) {
  55. double temp_in[8], temp_out[8];
  56. for (int j = 0; j < 8; ++j) temp_in[j] = input[j * 8 + i];
  57. reference_8x8_dct_1d(temp_in, temp_out);
  58. for (int j = 0; j < 8; ++j) output[j * 8 + i] = temp_out[j];
  59. }
  60. // Then transform rows
  61. for (int i = 0; i < 8; ++i) {
  62. double temp_in[8], temp_out[8];
  63. for (int j = 0; j < 8; ++j) temp_in[j] = output[j + i * 8];
  64. reference_8x8_dct_1d(temp_in, temp_out);
  65. // Scale by some magic number
  66. for (int j = 0; j < 8; ++j) output[j + i * 8] = temp_out[j] * 2;
  67. }
  68. }
  69. void fdct8x8_ref(const int16_t *in, tran_low_t *out, int stride,
  70. int /*tx_type*/) {
  71. vpx_fdct8x8_c(in, out, stride);
  72. }
  73. void fht8x8_ref(const int16_t *in, tran_low_t *out, int stride, int tx_type) {
  74. vp9_fht8x8_c(in, out, stride, tx_type);
  75. }
  76. #if CONFIG_VP9_HIGHBITDEPTH
  77. void idct8x8_10(const tran_low_t *in, uint8_t *out, int stride) {
  78. vpx_highbd_idct8x8_64_add_c(in, CAST_TO_SHORTPTR(out), stride, 10);
  79. }
  80. void idct8x8_12(const tran_low_t *in, uint8_t *out, int stride) {
  81. vpx_highbd_idct8x8_64_add_c(in, CAST_TO_SHORTPTR(out), stride, 12);
  82. }
  83. void iht8x8_10(const tran_low_t *in, uint8_t *out, int stride, int tx_type) {
  84. vp9_highbd_iht8x8_64_add_c(in, CAST_TO_SHORTPTR(out), stride, tx_type, 10);
  85. }
  86. void iht8x8_12(const tran_low_t *in, uint8_t *out, int stride, int tx_type) {
  87. vp9_highbd_iht8x8_64_add_c(in, CAST_TO_SHORTPTR(out), stride, tx_type, 12);
  88. }
  89. #if HAVE_SSE2
  90. void idct8x8_12_add_10_c(const tran_low_t *in, uint8_t *out, int stride) {
  91. vpx_highbd_idct8x8_12_add_c(in, CAST_TO_SHORTPTR(out), stride, 10);
  92. }
  93. void idct8x8_12_add_12_c(const tran_low_t *in, uint8_t *out, int stride) {
  94. vpx_highbd_idct8x8_12_add_c(in, CAST_TO_SHORTPTR(out), stride, 12);
  95. }
  96. void idct8x8_12_add_10_sse2(const tran_low_t *in, uint8_t *out, int stride) {
  97. vpx_highbd_idct8x8_12_add_sse2(in, CAST_TO_SHORTPTR(out), stride, 10);
  98. }
  99. void idct8x8_12_add_12_sse2(const tran_low_t *in, uint8_t *out, int stride) {
  100. vpx_highbd_idct8x8_12_add_sse2(in, CAST_TO_SHORTPTR(out), stride, 12);
  101. }
  102. void idct8x8_64_add_10_sse2(const tran_low_t *in, uint8_t *out, int stride) {
  103. vpx_highbd_idct8x8_64_add_sse2(in, CAST_TO_SHORTPTR(out), stride, 10);
  104. }
  105. void idct8x8_64_add_12_sse2(const tran_low_t *in, uint8_t *out, int stride) {
  106. vpx_highbd_idct8x8_64_add_sse2(in, CAST_TO_SHORTPTR(out), stride, 12);
  107. }
  108. #endif // HAVE_SSE2
  109. #endif // CONFIG_VP9_HIGHBITDEPTH
  110. class FwdTrans8x8TestBase {
  111. public:
  112. virtual ~FwdTrans8x8TestBase() {}
  113. protected:
  114. virtual void RunFwdTxfm(int16_t *in, tran_low_t *out, int stride) = 0;
  115. virtual void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) = 0;
  116. void RunSignBiasCheck() {
  117. ACMRandom rnd(ACMRandom::DeterministicSeed());
  118. DECLARE_ALIGNED(16, int16_t, test_input_block[64]);
  119. DECLARE_ALIGNED(16, tran_low_t, test_output_block[64]);
  120. int count_sign_block[64][2];
  121. const int count_test_block = 100000;
  122. memset(count_sign_block, 0, sizeof(count_sign_block));
  123. for (int i = 0; i < count_test_block; ++i) {
  124. // Initialize a test block with input range [-255, 255].
  125. for (int j = 0; j < 64; ++j) {
  126. test_input_block[j] = ((rnd.Rand16() >> (16 - bit_depth_)) & mask_) -
  127. ((rnd.Rand16() >> (16 - bit_depth_)) & mask_);
  128. }
  129. ASM_REGISTER_STATE_CHECK(
  130. RunFwdTxfm(test_input_block, test_output_block, pitch_));
  131. for (int j = 0; j < 64; ++j) {
  132. if (test_output_block[j] < 0) {
  133. ++count_sign_block[j][0];
  134. } else if (test_output_block[j] > 0) {
  135. ++count_sign_block[j][1];
  136. }
  137. }
  138. }
  139. for (int j = 0; j < 64; ++j) {
  140. const int diff = abs(count_sign_block[j][0] - count_sign_block[j][1]);
  141. const int max_diff = kSignBiasMaxDiff255;
  142. EXPECT_LT(diff, max_diff << (bit_depth_ - 8))
  143. << "Error: 8x8 FDCT/FHT has a sign bias > "
  144. << 1. * max_diff / count_test_block * 100 << "%"
  145. << " for input range [-255, 255] at index " << j
  146. << " count0: " << count_sign_block[j][0]
  147. << " count1: " << count_sign_block[j][1] << " diff: " << diff;
  148. }
  149. memset(count_sign_block, 0, sizeof(count_sign_block));
  150. for (int i = 0; i < count_test_block; ++i) {
  151. // Initialize a test block with input range [-mask_ / 16, mask_ / 16].
  152. for (int j = 0; j < 64; ++j) {
  153. test_input_block[j] =
  154. ((rnd.Rand16() & mask_) >> 4) - ((rnd.Rand16() & mask_) >> 4);
  155. }
  156. ASM_REGISTER_STATE_CHECK(
  157. RunFwdTxfm(test_input_block, test_output_block, pitch_));
  158. for (int j = 0; j < 64; ++j) {
  159. if (test_output_block[j] < 0) {
  160. ++count_sign_block[j][0];
  161. } else if (test_output_block[j] > 0) {
  162. ++count_sign_block[j][1];
  163. }
  164. }
  165. }
  166. for (int j = 0; j < 64; ++j) {
  167. const int diff = abs(count_sign_block[j][0] - count_sign_block[j][1]);
  168. const int max_diff = kSignBiasMaxDiff15;
  169. EXPECT_LT(diff, max_diff << (bit_depth_ - 8))
  170. << "Error: 8x8 FDCT/FHT has a sign bias > "
  171. << 1. * max_diff / count_test_block * 100 << "%"
  172. << " for input range [-15, 15] at index " << j
  173. << " count0: " << count_sign_block[j][0]
  174. << " count1: " << count_sign_block[j][1] << " diff: " << diff;
  175. }
  176. }
  177. void RunRoundTripErrorCheck() {
  178. ACMRandom rnd(ACMRandom::DeterministicSeed());
  179. int max_error = 0;
  180. int total_error = 0;
  181. const int count_test_block = 100000;
  182. DECLARE_ALIGNED(16, int16_t, test_input_block[64]);
  183. DECLARE_ALIGNED(16, tran_low_t, test_temp_block[64]);
  184. DECLARE_ALIGNED(16, uint8_t, dst[64]);
  185. DECLARE_ALIGNED(16, uint8_t, src[64]);
  186. #if CONFIG_VP9_HIGHBITDEPTH
  187. DECLARE_ALIGNED(16, uint16_t, dst16[64]);
  188. DECLARE_ALIGNED(16, uint16_t, src16[64]);
  189. #endif
  190. for (int i = 0; i < count_test_block; ++i) {
  191. // Initialize a test block with input range [-mask_, mask_].
  192. for (int j = 0; j < 64; ++j) {
  193. if (bit_depth_ == VPX_BITS_8) {
  194. src[j] = rnd.Rand8();
  195. dst[j] = rnd.Rand8();
  196. test_input_block[j] = src[j] - dst[j];
  197. #if CONFIG_VP9_HIGHBITDEPTH
  198. } else {
  199. src16[j] = rnd.Rand16() & mask_;
  200. dst16[j] = rnd.Rand16() & mask_;
  201. test_input_block[j] = src16[j] - dst16[j];
  202. #endif
  203. }
  204. }
  205. ASM_REGISTER_STATE_CHECK(
  206. RunFwdTxfm(test_input_block, test_temp_block, pitch_));
  207. for (int j = 0; j < 64; ++j) {
  208. if (test_temp_block[j] > 0) {
  209. test_temp_block[j] += 2;
  210. test_temp_block[j] /= 4;
  211. test_temp_block[j] *= 4;
  212. } else {
  213. test_temp_block[j] -= 2;
  214. test_temp_block[j] /= 4;
  215. test_temp_block[j] *= 4;
  216. }
  217. }
  218. if (bit_depth_ == VPX_BITS_8) {
  219. ASM_REGISTER_STATE_CHECK(RunInvTxfm(test_temp_block, dst, pitch_));
  220. #if CONFIG_VP9_HIGHBITDEPTH
  221. } else {
  222. ASM_REGISTER_STATE_CHECK(
  223. RunInvTxfm(test_temp_block, CAST_TO_BYTEPTR(dst16), pitch_));
  224. #endif
  225. }
  226. for (int j = 0; j < 64; ++j) {
  227. #if CONFIG_VP9_HIGHBITDEPTH
  228. const int diff =
  229. bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
  230. #else
  231. const int diff = dst[j] - src[j];
  232. #endif
  233. const int error = diff * diff;
  234. if (max_error < error) max_error = error;
  235. total_error += error;
  236. }
  237. }
  238. EXPECT_GE(1 << 2 * (bit_depth_ - 8), max_error)
  239. << "Error: 8x8 FDCT/IDCT or FHT/IHT has an individual"
  240. << " roundtrip error > 1";
  241. EXPECT_GE((count_test_block << 2 * (bit_depth_ - 8)) / 5, total_error)
  242. << "Error: 8x8 FDCT/IDCT or FHT/IHT has average roundtrip "
  243. << "error > 1/5 per block";
  244. }
  245. void RunExtremalCheck() {
  246. ACMRandom rnd(ACMRandom::DeterministicSeed());
  247. int max_error = 0;
  248. int total_error = 0;
  249. int total_coeff_error = 0;
  250. const int count_test_block = 100000;
  251. DECLARE_ALIGNED(16, int16_t, test_input_block[64]);
  252. DECLARE_ALIGNED(16, tran_low_t, test_temp_block[64]);
  253. DECLARE_ALIGNED(16, tran_low_t, ref_temp_block[64]);
  254. DECLARE_ALIGNED(16, uint8_t, dst[64]);
  255. DECLARE_ALIGNED(16, uint8_t, src[64]);
  256. #if CONFIG_VP9_HIGHBITDEPTH
  257. DECLARE_ALIGNED(16, uint16_t, dst16[64]);
  258. DECLARE_ALIGNED(16, uint16_t, src16[64]);
  259. #endif
  260. for (int i = 0; i < count_test_block; ++i) {
  261. // Initialize a test block with input range [-mask_, mask_].
  262. for (int j = 0; j < 64; ++j) {
  263. if (bit_depth_ == VPX_BITS_8) {
  264. if (i == 0) {
  265. src[j] = 255;
  266. dst[j] = 0;
  267. } else if (i == 1) {
  268. src[j] = 0;
  269. dst[j] = 255;
  270. } else {
  271. src[j] = rnd.Rand8() % 2 ? 255 : 0;
  272. dst[j] = rnd.Rand8() % 2 ? 255 : 0;
  273. }
  274. test_input_block[j] = src[j] - dst[j];
  275. #if CONFIG_VP9_HIGHBITDEPTH
  276. } else {
  277. if (i == 0) {
  278. src16[j] = mask_;
  279. dst16[j] = 0;
  280. } else if (i == 1) {
  281. src16[j] = 0;
  282. dst16[j] = mask_;
  283. } else {
  284. src16[j] = rnd.Rand8() % 2 ? mask_ : 0;
  285. dst16[j] = rnd.Rand8() % 2 ? mask_ : 0;
  286. }
  287. test_input_block[j] = src16[j] - dst16[j];
  288. #endif
  289. }
  290. }
  291. ASM_REGISTER_STATE_CHECK(
  292. RunFwdTxfm(test_input_block, test_temp_block, pitch_));
  293. ASM_REGISTER_STATE_CHECK(
  294. fwd_txfm_ref(test_input_block, ref_temp_block, pitch_, tx_type_));
  295. if (bit_depth_ == VPX_BITS_8) {
  296. ASM_REGISTER_STATE_CHECK(RunInvTxfm(test_temp_block, dst, pitch_));
  297. #if CONFIG_VP9_HIGHBITDEPTH
  298. } else {
  299. ASM_REGISTER_STATE_CHECK(
  300. RunInvTxfm(test_temp_block, CAST_TO_BYTEPTR(dst16), pitch_));
  301. #endif
  302. }
  303. for (int j = 0; j < 64; ++j) {
  304. #if CONFIG_VP9_HIGHBITDEPTH
  305. const int diff =
  306. bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
  307. #else
  308. const int diff = dst[j] - src[j];
  309. #endif
  310. const int error = diff * diff;
  311. if (max_error < error) max_error = error;
  312. total_error += error;
  313. const int coeff_diff = test_temp_block[j] - ref_temp_block[j];
  314. total_coeff_error += abs(coeff_diff);
  315. }
  316. EXPECT_GE(1 << 2 * (bit_depth_ - 8), max_error)
  317. << "Error: Extremal 8x8 FDCT/IDCT or FHT/IHT has"
  318. << "an individual roundtrip error > 1";
  319. EXPECT_GE((count_test_block << 2 * (bit_depth_ - 8)) / 5, total_error)
  320. << "Error: Extremal 8x8 FDCT/IDCT or FHT/IHT has average"
  321. << " roundtrip error > 1/5 per block";
  322. EXPECT_EQ(0, total_coeff_error)
  323. << "Error: Extremal 8x8 FDCT/FHT has"
  324. << "overflow issues in the intermediate steps > 1";
  325. }
  326. }
  327. void RunInvAccuracyCheck() {
  328. ACMRandom rnd(ACMRandom::DeterministicSeed());
  329. const int count_test_block = 1000;
  330. DECLARE_ALIGNED(16, int16_t, in[kNumCoeffs]);
  331. DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]);
  332. DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
  333. DECLARE_ALIGNED(16, uint8_t, src[kNumCoeffs]);
  334. #if CONFIG_VP9_HIGHBITDEPTH
  335. DECLARE_ALIGNED(16, uint16_t, src16[kNumCoeffs]);
  336. DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
  337. #endif
  338. for (int i = 0; i < count_test_block; ++i) {
  339. double out_r[kNumCoeffs];
  340. // Initialize a test block with input range [-255, 255].
  341. for (int j = 0; j < kNumCoeffs; ++j) {
  342. if (bit_depth_ == VPX_BITS_8) {
  343. src[j] = rnd.Rand8() % 2 ? 255 : 0;
  344. dst[j] = src[j] > 0 ? 0 : 255;
  345. in[j] = src[j] - dst[j];
  346. #if CONFIG_VP9_HIGHBITDEPTH
  347. } else {
  348. src16[j] = rnd.Rand8() % 2 ? mask_ : 0;
  349. dst16[j] = src16[j] > 0 ? 0 : mask_;
  350. in[j] = src16[j] - dst16[j];
  351. #endif
  352. }
  353. }
  354. reference_8x8_dct_2d(in, out_r);
  355. for (int j = 0; j < kNumCoeffs; ++j) {
  356. coeff[j] = static_cast<tran_low_t>(round(out_r[j]));
  357. }
  358. if (bit_depth_ == VPX_BITS_8) {
  359. ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, pitch_));
  360. #if CONFIG_VP9_HIGHBITDEPTH
  361. } else {
  362. ASM_REGISTER_STATE_CHECK(
  363. RunInvTxfm(coeff, CAST_TO_BYTEPTR(dst16), pitch_));
  364. #endif
  365. }
  366. for (int j = 0; j < kNumCoeffs; ++j) {
  367. #if CONFIG_VP9_HIGHBITDEPTH
  368. const int diff =
  369. bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
  370. #else
  371. const int diff = dst[j] - src[j];
  372. #endif
  373. const uint32_t error = diff * diff;
  374. EXPECT_GE(1u << 2 * (bit_depth_ - 8), error)
  375. << "Error: 8x8 IDCT has error " << error << " at index " << j;
  376. }
  377. }
  378. }
  379. void RunFwdAccuracyCheck() {
  380. ACMRandom rnd(ACMRandom::DeterministicSeed());
  381. const int count_test_block = 1000;
  382. DECLARE_ALIGNED(16, int16_t, in[kNumCoeffs]);
  383. DECLARE_ALIGNED(16, tran_low_t, coeff_r[kNumCoeffs]);
  384. DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]);
  385. for (int i = 0; i < count_test_block; ++i) {
  386. double out_r[kNumCoeffs];
  387. // Initialize a test block with input range [-mask_, mask_].
  388. for (int j = 0; j < kNumCoeffs; ++j) {
  389. in[j] = rnd.Rand8() % 2 == 0 ? mask_ : -mask_;
  390. }
  391. RunFwdTxfm(in, coeff, pitch_);
  392. reference_8x8_dct_2d(in, out_r);
  393. for (int j = 0; j < kNumCoeffs; ++j) {
  394. coeff_r[j] = static_cast<tran_low_t>(round(out_r[j]));
  395. }
  396. for (int j = 0; j < kNumCoeffs; ++j) {
  397. const int32_t diff = coeff[j] - coeff_r[j];
  398. const uint32_t error = diff * diff;
  399. EXPECT_GE(9u << 2 * (bit_depth_ - 8), error)
  400. << "Error: 8x8 DCT has error " << error << " at index " << j;
  401. }
  402. }
  403. }
  404. void CompareInvReference(IdctFunc ref_txfm, int thresh) {
  405. ACMRandom rnd(ACMRandom::DeterministicSeed());
  406. const int count_test_block = 10000;
  407. const int eob = 12;
  408. DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]);
  409. DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
  410. DECLARE_ALIGNED(16, uint8_t, ref[kNumCoeffs]);
  411. #if CONFIG_VP9_HIGHBITDEPTH
  412. DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
  413. DECLARE_ALIGNED(16, uint16_t, ref16[kNumCoeffs]);
  414. #endif
  415. const int16_t *scan = vp9_default_scan_orders[TX_8X8].scan;
  416. for (int i = 0; i < count_test_block; ++i) {
  417. for (int j = 0; j < kNumCoeffs; ++j) {
  418. if (j < eob) {
  419. // Random values less than the threshold, either positive or negative
  420. coeff[scan[j]] = rnd(thresh) * (1 - 2 * (i % 2));
  421. } else {
  422. coeff[scan[j]] = 0;
  423. }
  424. if (bit_depth_ == VPX_BITS_8) {
  425. dst[j] = 0;
  426. ref[j] = 0;
  427. #if CONFIG_VP9_HIGHBITDEPTH
  428. } else {
  429. dst16[j] = 0;
  430. ref16[j] = 0;
  431. #endif
  432. }
  433. }
  434. if (bit_depth_ == VPX_BITS_8) {
  435. ref_txfm(coeff, ref, pitch_);
  436. ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, pitch_));
  437. #if CONFIG_VP9_HIGHBITDEPTH
  438. } else {
  439. ref_txfm(coeff, CAST_TO_BYTEPTR(ref16), pitch_);
  440. ASM_REGISTER_STATE_CHECK(
  441. RunInvTxfm(coeff, CAST_TO_BYTEPTR(dst16), pitch_));
  442. #endif
  443. }
  444. for (int j = 0; j < kNumCoeffs; ++j) {
  445. #if CONFIG_VP9_HIGHBITDEPTH
  446. const int diff =
  447. bit_depth_ == VPX_BITS_8 ? dst[j] - ref[j] : dst16[j] - ref16[j];
  448. #else
  449. const int diff = dst[j] - ref[j];
  450. #endif
  451. const uint32_t error = diff * diff;
  452. EXPECT_EQ(0u, error)
  453. << "Error: 8x8 IDCT has error " << error << " at index " << j;
  454. }
  455. }
  456. }
  457. int pitch_;
  458. int tx_type_;
  459. FhtFunc fwd_txfm_ref;
  460. vpx_bit_depth_t bit_depth_;
  461. int mask_;
  462. };
  463. class FwdTrans8x8DCT : public FwdTrans8x8TestBase,
  464. public ::testing::TestWithParam<Dct8x8Param> {
  465. public:
  466. virtual ~FwdTrans8x8DCT() {}
  467. virtual void SetUp() {
  468. fwd_txfm_ = GET_PARAM(0);
  469. inv_txfm_ = GET_PARAM(1);
  470. tx_type_ = GET_PARAM(2);
  471. pitch_ = 8;
  472. fwd_txfm_ref = fdct8x8_ref;
  473. bit_depth_ = GET_PARAM(3);
  474. mask_ = (1 << bit_depth_) - 1;
  475. }
  476. virtual void TearDown() { libvpx_test::ClearSystemState(); }
  477. protected:
  478. void RunFwdTxfm(int16_t *in, tran_low_t *out, int stride) {
  479. fwd_txfm_(in, out, stride);
  480. }
  481. void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) {
  482. inv_txfm_(out, dst, stride);
  483. }
  484. FdctFunc fwd_txfm_;
  485. IdctFunc inv_txfm_;
  486. };
  487. TEST_P(FwdTrans8x8DCT, SignBiasCheck) { RunSignBiasCheck(); }
  488. TEST_P(FwdTrans8x8DCT, RoundTripErrorCheck) { RunRoundTripErrorCheck(); }
  489. TEST_P(FwdTrans8x8DCT, ExtremalCheck) { RunExtremalCheck(); }
  490. TEST_P(FwdTrans8x8DCT, FwdAccuracyCheck) { RunFwdAccuracyCheck(); }
  491. TEST_P(FwdTrans8x8DCT, InvAccuracyCheck) { RunInvAccuracyCheck(); }
  492. class FwdTrans8x8HT : public FwdTrans8x8TestBase,
  493. public ::testing::TestWithParam<Ht8x8Param> {
  494. public:
  495. virtual ~FwdTrans8x8HT() {}
  496. virtual void SetUp() {
  497. fwd_txfm_ = GET_PARAM(0);
  498. inv_txfm_ = GET_PARAM(1);
  499. tx_type_ = GET_PARAM(2);
  500. pitch_ = 8;
  501. fwd_txfm_ref = fht8x8_ref;
  502. bit_depth_ = GET_PARAM(3);
  503. mask_ = (1 << bit_depth_) - 1;
  504. }
  505. virtual void TearDown() { libvpx_test::ClearSystemState(); }
  506. protected:
  507. void RunFwdTxfm(int16_t *in, tran_low_t *out, int stride) {
  508. fwd_txfm_(in, out, stride, tx_type_);
  509. }
  510. void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) {
  511. inv_txfm_(out, dst, stride, tx_type_);
  512. }
  513. FhtFunc fwd_txfm_;
  514. IhtFunc inv_txfm_;
  515. };
  516. TEST_P(FwdTrans8x8HT, SignBiasCheck) { RunSignBiasCheck(); }
  517. TEST_P(FwdTrans8x8HT, RoundTripErrorCheck) { RunRoundTripErrorCheck(); }
  518. TEST_P(FwdTrans8x8HT, ExtremalCheck) { RunExtremalCheck(); }
  519. class InvTrans8x8DCT : public FwdTrans8x8TestBase,
  520. public ::testing::TestWithParam<Idct8x8Param> {
  521. public:
  522. virtual ~InvTrans8x8DCT() {}
  523. virtual void SetUp() {
  524. ref_txfm_ = GET_PARAM(0);
  525. inv_txfm_ = GET_PARAM(1);
  526. thresh_ = GET_PARAM(2);
  527. pitch_ = 8;
  528. bit_depth_ = GET_PARAM(3);
  529. mask_ = (1 << bit_depth_) - 1;
  530. }
  531. virtual void TearDown() { libvpx_test::ClearSystemState(); }
  532. protected:
  533. void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) {
  534. inv_txfm_(out, dst, stride);
  535. }
  536. void RunFwdTxfm(int16_t * /*out*/, tran_low_t * /*dst*/, int /*stride*/) {}
  537. IdctFunc ref_txfm_;
  538. IdctFunc inv_txfm_;
  539. int thresh_;
  540. };
  541. TEST_P(InvTrans8x8DCT, CompareReference) {
  542. CompareInvReference(ref_txfm_, thresh_);
  543. }
  544. using std::make_tuple;
  545. #if CONFIG_VP9_HIGHBITDEPTH
  546. INSTANTIATE_TEST_CASE_P(
  547. C, FwdTrans8x8DCT,
  548. ::testing::Values(
  549. make_tuple(&vpx_fdct8x8_c, &vpx_idct8x8_64_add_c, 0, VPX_BITS_8),
  550. make_tuple(&vpx_highbd_fdct8x8_c, &idct8x8_10, 0, VPX_BITS_10),
  551. make_tuple(&vpx_highbd_fdct8x8_c, &idct8x8_12, 0, VPX_BITS_12)));
  552. #else
  553. INSTANTIATE_TEST_CASE_P(C, FwdTrans8x8DCT,
  554. ::testing::Values(make_tuple(&vpx_fdct8x8_c,
  555. &vpx_idct8x8_64_add_c, 0,
  556. VPX_BITS_8)));
  557. #endif // CONFIG_VP9_HIGHBITDEPTH
  558. #if CONFIG_VP9_HIGHBITDEPTH
  559. INSTANTIATE_TEST_CASE_P(
  560. C, FwdTrans8x8HT,
  561. ::testing::Values(
  562. make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 0, VPX_BITS_8),
  563. make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_10, 0, VPX_BITS_10),
  564. make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_10, 1, VPX_BITS_10),
  565. make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_10, 2, VPX_BITS_10),
  566. make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_10, 3, VPX_BITS_10),
  567. make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_12, 0, VPX_BITS_12),
  568. make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_12, 1, VPX_BITS_12),
  569. make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_12, 2, VPX_BITS_12),
  570. make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_12, 3, VPX_BITS_12),
  571. make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 1, VPX_BITS_8),
  572. make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 2, VPX_BITS_8),
  573. make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 3, VPX_BITS_8)));
  574. #else
  575. INSTANTIATE_TEST_CASE_P(
  576. C, FwdTrans8x8HT,
  577. ::testing::Values(
  578. make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 0, VPX_BITS_8),
  579. make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 1, VPX_BITS_8),
  580. make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 2, VPX_BITS_8),
  581. make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 3, VPX_BITS_8)));
  582. #endif // CONFIG_VP9_HIGHBITDEPTH
  583. #if HAVE_NEON && !CONFIG_EMULATE_HARDWARE
  584. INSTANTIATE_TEST_CASE_P(NEON, FwdTrans8x8DCT,
  585. ::testing::Values(make_tuple(&vpx_fdct8x8_neon,
  586. &vpx_idct8x8_64_add_neon,
  587. 0, VPX_BITS_8)));
  588. #if !CONFIG_VP9_HIGHBITDEPTH
  589. INSTANTIATE_TEST_CASE_P(
  590. NEON, FwdTrans8x8HT,
  591. ::testing::Values(
  592. make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_neon, 0, VPX_BITS_8),
  593. make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_neon, 1, VPX_BITS_8),
  594. make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_neon, 2, VPX_BITS_8),
  595. make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_neon, 3, VPX_BITS_8)));
  596. #endif // !CONFIG_VP9_HIGHBITDEPTH
  597. #endif // HAVE_NEON && !CONFIG_EMULATE_HARDWARE
  598. #if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
  599. INSTANTIATE_TEST_CASE_P(SSE2, FwdTrans8x8DCT,
  600. ::testing::Values(make_tuple(&vpx_fdct8x8_sse2,
  601. &vpx_idct8x8_64_add_sse2,
  602. 0, VPX_BITS_8)));
  603. INSTANTIATE_TEST_CASE_P(
  604. SSE2, FwdTrans8x8HT,
  605. ::testing::Values(
  606. make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 0, VPX_BITS_8),
  607. make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 1, VPX_BITS_8),
  608. make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 2, VPX_BITS_8),
  609. make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 3, VPX_BITS_8)));
  610. #endif // HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
  611. #if HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
  612. INSTANTIATE_TEST_CASE_P(
  613. SSE2, FwdTrans8x8DCT,
  614. ::testing::Values(make_tuple(&vpx_fdct8x8_sse2, &vpx_idct8x8_64_add_c, 0,
  615. VPX_BITS_8),
  616. make_tuple(&vpx_highbd_fdct8x8_c, &idct8x8_64_add_10_sse2,
  617. 12, VPX_BITS_10),
  618. make_tuple(&vpx_highbd_fdct8x8_sse2,
  619. &idct8x8_64_add_10_sse2, 12, VPX_BITS_10),
  620. make_tuple(&vpx_highbd_fdct8x8_c, &idct8x8_64_add_12_sse2,
  621. 12, VPX_BITS_12),
  622. make_tuple(&vpx_highbd_fdct8x8_sse2,
  623. &idct8x8_64_add_12_sse2, 12, VPX_BITS_12)));
  624. INSTANTIATE_TEST_CASE_P(
  625. SSE2, FwdTrans8x8HT,
  626. ::testing::Values(
  627. make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_c, 0, VPX_BITS_8),
  628. make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_c, 1, VPX_BITS_8),
  629. make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_c, 2, VPX_BITS_8),
  630. make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_c, 3, VPX_BITS_8)));
  631. // Optimizations take effect at a threshold of 6201, so we use a value close to
  632. // that to test both branches.
  633. INSTANTIATE_TEST_CASE_P(
  634. SSE2, InvTrans8x8DCT,
  635. ::testing::Values(
  636. make_tuple(&idct8x8_12_add_10_c, &idct8x8_12_add_10_sse2, 6225,
  637. VPX_BITS_10),
  638. make_tuple(&idct8x8_10, &idct8x8_64_add_10_sse2, 6225, VPX_BITS_10),
  639. make_tuple(&idct8x8_12_add_12_c, &idct8x8_12_add_12_sse2, 6225,
  640. VPX_BITS_12),
  641. make_tuple(&idct8x8_12, &idct8x8_64_add_12_sse2, 6225, VPX_BITS_12)));
  642. #endif // HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
  643. #if HAVE_SSSE3 && ARCH_X86_64 && !CONFIG_VP9_HIGHBITDEPTH && \
  644. !CONFIG_EMULATE_HARDWARE
  645. INSTANTIATE_TEST_CASE_P(SSSE3, FwdTrans8x8DCT,
  646. ::testing::Values(make_tuple(&vpx_fdct8x8_ssse3,
  647. &vpx_idct8x8_64_add_sse2,
  648. 0, VPX_BITS_8)));
  649. #endif
  650. #if HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
  651. INSTANTIATE_TEST_CASE_P(MSA, FwdTrans8x8DCT,
  652. ::testing::Values(make_tuple(&vpx_fdct8x8_msa,
  653. &vpx_idct8x8_64_add_msa, 0,
  654. VPX_BITS_8)));
  655. INSTANTIATE_TEST_CASE_P(
  656. MSA, FwdTrans8x8HT,
  657. ::testing::Values(
  658. make_tuple(&vp9_fht8x8_msa, &vp9_iht8x8_64_add_msa, 0, VPX_BITS_8),
  659. make_tuple(&vp9_fht8x8_msa, &vp9_iht8x8_64_add_msa, 1, VPX_BITS_8),
  660. make_tuple(&vp9_fht8x8_msa, &vp9_iht8x8_64_add_msa, 2, VPX_BITS_8),
  661. make_tuple(&vp9_fht8x8_msa, &vp9_iht8x8_64_add_msa, 3, VPX_BITS_8)));
  662. #endif // HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
  663. #if HAVE_VSX && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
  664. INSTANTIATE_TEST_CASE_P(VSX, FwdTrans8x8DCT,
  665. ::testing::Values(make_tuple(&vpx_fdct8x8_c,
  666. &vpx_idct8x8_64_add_vsx, 0,
  667. VPX_BITS_8)));
  668. #endif // HAVE_VSX && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
  669. } // namespace