dct16x16_test.cc 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868
  1. /*
  2. * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
  3. *
  4. * Use of this source code is governed by a BSD-style license
  5. * that can be found in the LICENSE file in the root of the source
  6. * tree. An additional intellectual property rights grant can be found
  7. * in the file PATENTS. All contributing project authors may
  8. * be found in the AUTHORS file in the root of the source tree.
  9. */
  10. #include <math.h>
  11. #include <stdlib.h>
  12. #include <string.h>
  13. #include <tuple>
  14. #include "third_party/googletest/src/include/gtest/gtest.h"
  15. #include "./vp9_rtcd.h"
  16. #include "./vpx_dsp_rtcd.h"
  17. #include "test/acm_random.h"
  18. #include "test/clear_system_state.h"
  19. #include "test/register_state_check.h"
  20. #include "test/util.h"
  21. #include "vp9/common/vp9_entropy.h"
  22. #include "vp9/common/vp9_scan.h"
  23. #include "vpx/vpx_codec.h"
  24. #include "vpx/vpx_integer.h"
  25. #include "vpx_ports/mem.h"
  26. #include "vpx_ports/msvc.h" // for round()
  27. using libvpx_test::ACMRandom;
  28. namespace {
  29. const int kNumCoeffs = 256;
  30. const double C1 = 0.995184726672197;
  31. const double C2 = 0.98078528040323;
  32. const double C3 = 0.956940335732209;
  33. const double C4 = 0.923879532511287;
  34. const double C5 = 0.881921264348355;
  35. const double C6 = 0.831469612302545;
  36. const double C7 = 0.773010453362737;
  37. const double C8 = 0.707106781186548;
  38. const double C9 = 0.634393284163646;
  39. const double C10 = 0.555570233019602;
  40. const double C11 = 0.471396736825998;
  41. const double C12 = 0.38268343236509;
  42. const double C13 = 0.290284677254462;
  43. const double C14 = 0.195090322016128;
  44. const double C15 = 0.098017140329561;
  45. void butterfly_16x16_dct_1d(double input[16], double output[16]) {
  46. double step[16];
  47. double intermediate[16];
  48. double temp1, temp2;
  49. // step 1
  50. step[0] = input[0] + input[15];
  51. step[1] = input[1] + input[14];
  52. step[2] = input[2] + input[13];
  53. step[3] = input[3] + input[12];
  54. step[4] = input[4] + input[11];
  55. step[5] = input[5] + input[10];
  56. step[6] = input[6] + input[9];
  57. step[7] = input[7] + input[8];
  58. step[8] = input[7] - input[8];
  59. step[9] = input[6] - input[9];
  60. step[10] = input[5] - input[10];
  61. step[11] = input[4] - input[11];
  62. step[12] = input[3] - input[12];
  63. step[13] = input[2] - input[13];
  64. step[14] = input[1] - input[14];
  65. step[15] = input[0] - input[15];
  66. // step 2
  67. output[0] = step[0] + step[7];
  68. output[1] = step[1] + step[6];
  69. output[2] = step[2] + step[5];
  70. output[3] = step[3] + step[4];
  71. output[4] = step[3] - step[4];
  72. output[5] = step[2] - step[5];
  73. output[6] = step[1] - step[6];
  74. output[7] = step[0] - step[7];
  75. temp1 = step[8] * C7;
  76. temp2 = step[15] * C9;
  77. output[8] = temp1 + temp2;
  78. temp1 = step[9] * C11;
  79. temp2 = step[14] * C5;
  80. output[9] = temp1 - temp2;
  81. temp1 = step[10] * C3;
  82. temp2 = step[13] * C13;
  83. output[10] = temp1 + temp2;
  84. temp1 = step[11] * C15;
  85. temp2 = step[12] * C1;
  86. output[11] = temp1 - temp2;
  87. temp1 = step[11] * C1;
  88. temp2 = step[12] * C15;
  89. output[12] = temp2 + temp1;
  90. temp1 = step[10] * C13;
  91. temp2 = step[13] * C3;
  92. output[13] = temp2 - temp1;
  93. temp1 = step[9] * C5;
  94. temp2 = step[14] * C11;
  95. output[14] = temp2 + temp1;
  96. temp1 = step[8] * C9;
  97. temp2 = step[15] * C7;
  98. output[15] = temp2 - temp1;
  99. // step 3
  100. step[0] = output[0] + output[3];
  101. step[1] = output[1] + output[2];
  102. step[2] = output[1] - output[2];
  103. step[3] = output[0] - output[3];
  104. temp1 = output[4] * C14;
  105. temp2 = output[7] * C2;
  106. step[4] = temp1 + temp2;
  107. temp1 = output[5] * C10;
  108. temp2 = output[6] * C6;
  109. step[5] = temp1 + temp2;
  110. temp1 = output[5] * C6;
  111. temp2 = output[6] * C10;
  112. step[6] = temp2 - temp1;
  113. temp1 = output[4] * C2;
  114. temp2 = output[7] * C14;
  115. step[7] = temp2 - temp1;
  116. step[8] = output[8] + output[11];
  117. step[9] = output[9] + output[10];
  118. step[10] = output[9] - output[10];
  119. step[11] = output[8] - output[11];
  120. step[12] = output[12] + output[15];
  121. step[13] = output[13] + output[14];
  122. step[14] = output[13] - output[14];
  123. step[15] = output[12] - output[15];
  124. // step 4
  125. output[0] = (step[0] + step[1]);
  126. output[8] = (step[0] - step[1]);
  127. temp1 = step[2] * C12;
  128. temp2 = step[3] * C4;
  129. temp1 = temp1 + temp2;
  130. output[4] = 2 * (temp1 * C8);
  131. temp1 = step[2] * C4;
  132. temp2 = step[3] * C12;
  133. temp1 = temp2 - temp1;
  134. output[12] = 2 * (temp1 * C8);
  135. output[2] = 2 * ((step[4] + step[5]) * C8);
  136. output[14] = 2 * ((step[7] - step[6]) * C8);
  137. temp1 = step[4] - step[5];
  138. temp2 = step[6] + step[7];
  139. output[6] = (temp1 + temp2);
  140. output[10] = (temp1 - temp2);
  141. intermediate[8] = step[8] + step[14];
  142. intermediate[9] = step[9] + step[15];
  143. temp1 = intermediate[8] * C12;
  144. temp2 = intermediate[9] * C4;
  145. temp1 = temp1 - temp2;
  146. output[3] = 2 * (temp1 * C8);
  147. temp1 = intermediate[8] * C4;
  148. temp2 = intermediate[9] * C12;
  149. temp1 = temp2 + temp1;
  150. output[13] = 2 * (temp1 * C8);
  151. output[9] = 2 * ((step[10] + step[11]) * C8);
  152. intermediate[11] = step[10] - step[11];
  153. intermediate[12] = step[12] + step[13];
  154. intermediate[13] = step[12] - step[13];
  155. intermediate[14] = step[8] - step[14];
  156. intermediate[15] = step[9] - step[15];
  157. output[15] = (intermediate[11] + intermediate[12]);
  158. output[1] = -(intermediate[11] - intermediate[12]);
  159. output[7] = 2 * (intermediate[13] * C8);
  160. temp1 = intermediate[14] * C12;
  161. temp2 = intermediate[15] * C4;
  162. temp1 = temp1 - temp2;
  163. output[11] = -2 * (temp1 * C8);
  164. temp1 = intermediate[14] * C4;
  165. temp2 = intermediate[15] * C12;
  166. temp1 = temp2 + temp1;
  167. output[5] = 2 * (temp1 * C8);
  168. }
  169. void reference_16x16_dct_2d(int16_t input[256], double output[256]) {
  170. // First transform columns
  171. for (int i = 0; i < 16; ++i) {
  172. double temp_in[16], temp_out[16];
  173. for (int j = 0; j < 16; ++j) temp_in[j] = input[j * 16 + i];
  174. butterfly_16x16_dct_1d(temp_in, temp_out);
  175. for (int j = 0; j < 16; ++j) output[j * 16 + i] = temp_out[j];
  176. }
  177. // Then transform rows
  178. for (int i = 0; i < 16; ++i) {
  179. double temp_in[16], temp_out[16];
  180. for (int j = 0; j < 16; ++j) temp_in[j] = output[j + i * 16];
  181. butterfly_16x16_dct_1d(temp_in, temp_out);
  182. // Scale by some magic number
  183. for (int j = 0; j < 16; ++j) output[j + i * 16] = temp_out[j] / 2;
  184. }
  185. }
  186. typedef void (*FdctFunc)(const int16_t *in, tran_low_t *out, int stride);
  187. typedef void (*IdctFunc)(const tran_low_t *in, uint8_t *out, int stride);
  188. typedef void (*FhtFunc)(const int16_t *in, tran_low_t *out, int stride,
  189. int tx_type);
  190. typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride,
  191. int tx_type);
  192. typedef std::tuple<FdctFunc, IdctFunc, int, vpx_bit_depth_t> Dct16x16Param;
  193. typedef std::tuple<FhtFunc, IhtFunc, int, vpx_bit_depth_t> Ht16x16Param;
  194. typedef std::tuple<IdctFunc, IdctFunc, int, vpx_bit_depth_t> Idct16x16Param;
  195. void fdct16x16_ref(const int16_t *in, tran_low_t *out, int stride,
  196. int /*tx_type*/) {
  197. vpx_fdct16x16_c(in, out, stride);
  198. }
  199. void idct16x16_ref(const tran_low_t *in, uint8_t *dest, int stride,
  200. int /*tx_type*/) {
  201. vpx_idct16x16_256_add_c(in, dest, stride);
  202. }
  203. void fht16x16_ref(const int16_t *in, tran_low_t *out, int stride, int tx_type) {
  204. vp9_fht16x16_c(in, out, stride, tx_type);
  205. }
  206. void iht16x16_ref(const tran_low_t *in, uint8_t *dest, int stride,
  207. int tx_type) {
  208. vp9_iht16x16_256_add_c(in, dest, stride, tx_type);
  209. }
  210. #if CONFIG_VP9_HIGHBITDEPTH
  211. void idct16x16_10(const tran_low_t *in, uint8_t *out, int stride) {
  212. vpx_highbd_idct16x16_256_add_c(in, CAST_TO_SHORTPTR(out), stride, 10);
  213. }
  214. void idct16x16_12(const tran_low_t *in, uint8_t *out, int stride) {
  215. vpx_highbd_idct16x16_256_add_c(in, CAST_TO_SHORTPTR(out), stride, 12);
  216. }
  217. void idct16x16_10_ref(const tran_low_t *in, uint8_t *out, int stride,
  218. int /*tx_type*/) {
  219. idct16x16_10(in, out, stride);
  220. }
  221. void idct16x16_12_ref(const tran_low_t *in, uint8_t *out, int stride,
  222. int /*tx_type*/) {
  223. idct16x16_12(in, out, stride);
  224. }
  225. void iht16x16_10(const tran_low_t *in, uint8_t *out, int stride, int tx_type) {
  226. vp9_highbd_iht16x16_256_add_c(in, CAST_TO_SHORTPTR(out), stride, tx_type, 10);
  227. }
  228. void iht16x16_12(const tran_low_t *in, uint8_t *out, int stride, int tx_type) {
  229. vp9_highbd_iht16x16_256_add_c(in, CAST_TO_SHORTPTR(out), stride, tx_type, 12);
  230. }
  231. #if HAVE_SSE2
  232. void idct16x16_10_add_10_c(const tran_low_t *in, uint8_t *out, int stride) {
  233. vpx_highbd_idct16x16_10_add_c(in, CAST_TO_SHORTPTR(out), stride, 10);
  234. }
  235. void idct16x16_10_add_12_c(const tran_low_t *in, uint8_t *out, int stride) {
  236. vpx_highbd_idct16x16_10_add_c(in, CAST_TO_SHORTPTR(out), stride, 12);
  237. }
  238. void idct16x16_256_add_10_sse2(const tran_low_t *in, uint8_t *out, int stride) {
  239. vpx_highbd_idct16x16_256_add_sse2(in, CAST_TO_SHORTPTR(out), stride, 10);
  240. }
  241. void idct16x16_256_add_12_sse2(const tran_low_t *in, uint8_t *out, int stride) {
  242. vpx_highbd_idct16x16_256_add_sse2(in, CAST_TO_SHORTPTR(out), stride, 12);
  243. }
  244. void idct16x16_10_add_10_sse2(const tran_low_t *in, uint8_t *out, int stride) {
  245. vpx_highbd_idct16x16_10_add_sse2(in, CAST_TO_SHORTPTR(out), stride, 10);
  246. }
  247. void idct16x16_10_add_12_sse2(const tran_low_t *in, uint8_t *out, int stride) {
  248. vpx_highbd_idct16x16_10_add_sse2(in, CAST_TO_SHORTPTR(out), stride, 12);
  249. }
  250. #endif // HAVE_SSE2
  251. #endif // CONFIG_VP9_HIGHBITDEPTH
  252. class Trans16x16TestBase {
  253. public:
  254. virtual ~Trans16x16TestBase() {}
  255. protected:
  256. virtual void RunFwdTxfm(int16_t *in, tran_low_t *out, int stride) = 0;
  257. virtual void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) = 0;
  258. void RunAccuracyCheck() {
  259. ACMRandom rnd(ACMRandom::DeterministicSeed());
  260. uint32_t max_error = 0;
  261. int64_t total_error = 0;
  262. const int count_test_block = 10000;
  263. for (int i = 0; i < count_test_block; ++i) {
  264. DECLARE_ALIGNED(16, int16_t, test_input_block[kNumCoeffs]);
  265. DECLARE_ALIGNED(16, tran_low_t, test_temp_block[kNumCoeffs]);
  266. DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
  267. DECLARE_ALIGNED(16, uint8_t, src[kNumCoeffs]);
  268. #if CONFIG_VP9_HIGHBITDEPTH
  269. DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
  270. DECLARE_ALIGNED(16, uint16_t, src16[kNumCoeffs]);
  271. #endif
  272. // Initialize a test block with input range [-mask_, mask_].
  273. for (int j = 0; j < kNumCoeffs; ++j) {
  274. if (bit_depth_ == VPX_BITS_8) {
  275. src[j] = rnd.Rand8();
  276. dst[j] = rnd.Rand8();
  277. test_input_block[j] = src[j] - dst[j];
  278. #if CONFIG_VP9_HIGHBITDEPTH
  279. } else {
  280. src16[j] = rnd.Rand16() & mask_;
  281. dst16[j] = rnd.Rand16() & mask_;
  282. test_input_block[j] = src16[j] - dst16[j];
  283. #endif
  284. }
  285. }
  286. ASM_REGISTER_STATE_CHECK(
  287. RunFwdTxfm(test_input_block, test_temp_block, pitch_));
  288. if (bit_depth_ == VPX_BITS_8) {
  289. ASM_REGISTER_STATE_CHECK(RunInvTxfm(test_temp_block, dst, pitch_));
  290. #if CONFIG_VP9_HIGHBITDEPTH
  291. } else {
  292. ASM_REGISTER_STATE_CHECK(
  293. RunInvTxfm(test_temp_block, CAST_TO_BYTEPTR(dst16), pitch_));
  294. #endif
  295. }
  296. for (int j = 0; j < kNumCoeffs; ++j) {
  297. #if CONFIG_VP9_HIGHBITDEPTH
  298. const int32_t diff =
  299. bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
  300. #else
  301. const int32_t diff = dst[j] - src[j];
  302. #endif
  303. const uint32_t error = diff * diff;
  304. if (max_error < error) max_error = error;
  305. total_error += error;
  306. }
  307. }
  308. EXPECT_GE(1u << 2 * (bit_depth_ - 8), max_error)
  309. << "Error: 16x16 FHT/IHT has an individual round trip error > 1";
  310. EXPECT_GE(count_test_block << 2 * (bit_depth_ - 8), total_error)
  311. << "Error: 16x16 FHT/IHT has average round trip error > 1 per block";
  312. }
  313. void RunCoeffCheck() {
  314. ACMRandom rnd(ACMRandom::DeterministicSeed());
  315. const int count_test_block = 1000;
  316. DECLARE_ALIGNED(16, int16_t, input_block[kNumCoeffs]);
  317. DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kNumCoeffs]);
  318. DECLARE_ALIGNED(16, tran_low_t, output_block[kNumCoeffs]);
  319. for (int i = 0; i < count_test_block; ++i) {
  320. // Initialize a test block with input range [-mask_, mask_].
  321. for (int j = 0; j < kNumCoeffs; ++j) {
  322. input_block[j] = (rnd.Rand16() & mask_) - (rnd.Rand16() & mask_);
  323. }
  324. fwd_txfm_ref(input_block, output_ref_block, pitch_, tx_type_);
  325. ASM_REGISTER_STATE_CHECK(RunFwdTxfm(input_block, output_block, pitch_));
  326. // The minimum quant value is 4.
  327. for (int j = 0; j < kNumCoeffs; ++j)
  328. EXPECT_EQ(output_block[j], output_ref_block[j]);
  329. }
  330. }
  331. void RunMemCheck() {
  332. ACMRandom rnd(ACMRandom::DeterministicSeed());
  333. const int count_test_block = 1000;
  334. DECLARE_ALIGNED(16, int16_t, input_extreme_block[kNumCoeffs]);
  335. DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kNumCoeffs]);
  336. DECLARE_ALIGNED(16, tran_low_t, output_block[kNumCoeffs]);
  337. for (int i = 0; i < count_test_block; ++i) {
  338. // Initialize a test block with input range [-mask_, mask_].
  339. for (int j = 0; j < kNumCoeffs; ++j) {
  340. input_extreme_block[j] = rnd.Rand8() % 2 ? mask_ : -mask_;
  341. }
  342. if (i == 0) {
  343. for (int j = 0; j < kNumCoeffs; ++j) input_extreme_block[j] = mask_;
  344. } else if (i == 1) {
  345. for (int j = 0; j < kNumCoeffs; ++j) input_extreme_block[j] = -mask_;
  346. }
  347. fwd_txfm_ref(input_extreme_block, output_ref_block, pitch_, tx_type_);
  348. ASM_REGISTER_STATE_CHECK(
  349. RunFwdTxfm(input_extreme_block, output_block, pitch_));
  350. // The minimum quant value is 4.
  351. for (int j = 0; j < kNumCoeffs; ++j) {
  352. EXPECT_EQ(output_block[j], output_ref_block[j]);
  353. EXPECT_GE(4 * DCT_MAX_VALUE << (bit_depth_ - 8), abs(output_block[j]))
  354. << "Error: 16x16 FDCT has coefficient larger than 4*DCT_MAX_VALUE";
  355. }
  356. }
  357. }
  358. void RunQuantCheck(int dc_thred, int ac_thred) {
  359. ACMRandom rnd(ACMRandom::DeterministicSeed());
  360. const int count_test_block = 100000;
  361. DECLARE_ALIGNED(16, int16_t, input_extreme_block[kNumCoeffs]);
  362. DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kNumCoeffs]);
  363. DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
  364. DECLARE_ALIGNED(16, uint8_t, ref[kNumCoeffs]);
  365. #if CONFIG_VP9_HIGHBITDEPTH
  366. DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
  367. DECLARE_ALIGNED(16, uint16_t, ref16[kNumCoeffs]);
  368. #endif
  369. for (int i = 0; i < count_test_block; ++i) {
  370. // Initialize a test block with input range [-mask_, mask_].
  371. for (int j = 0; j < kNumCoeffs; ++j) {
  372. input_extreme_block[j] = rnd.Rand8() % 2 ? mask_ : -mask_;
  373. }
  374. if (i == 0) {
  375. for (int j = 0; j < kNumCoeffs; ++j) input_extreme_block[j] = mask_;
  376. }
  377. if (i == 1) {
  378. for (int j = 0; j < kNumCoeffs; ++j) input_extreme_block[j] = -mask_;
  379. }
  380. fwd_txfm_ref(input_extreme_block, output_ref_block, pitch_, tx_type_);
  381. // clear reconstructed pixel buffers
  382. memset(dst, 0, kNumCoeffs * sizeof(uint8_t));
  383. memset(ref, 0, kNumCoeffs * sizeof(uint8_t));
  384. #if CONFIG_VP9_HIGHBITDEPTH
  385. memset(dst16, 0, kNumCoeffs * sizeof(uint16_t));
  386. memset(ref16, 0, kNumCoeffs * sizeof(uint16_t));
  387. #endif
  388. // quantization with maximum allowed step sizes
  389. output_ref_block[0] = (output_ref_block[0] / dc_thred) * dc_thred;
  390. for (int j = 1; j < kNumCoeffs; ++j) {
  391. output_ref_block[j] = (output_ref_block[j] / ac_thred) * ac_thred;
  392. }
  393. if (bit_depth_ == VPX_BITS_8) {
  394. inv_txfm_ref(output_ref_block, ref, pitch_, tx_type_);
  395. ASM_REGISTER_STATE_CHECK(RunInvTxfm(output_ref_block, dst, pitch_));
  396. #if CONFIG_VP9_HIGHBITDEPTH
  397. } else {
  398. inv_txfm_ref(output_ref_block, CAST_TO_BYTEPTR(ref16), pitch_,
  399. tx_type_);
  400. ASM_REGISTER_STATE_CHECK(
  401. RunInvTxfm(output_ref_block, CAST_TO_BYTEPTR(dst16), pitch_));
  402. #endif
  403. }
  404. if (bit_depth_ == VPX_BITS_8) {
  405. for (int j = 0; j < kNumCoeffs; ++j) EXPECT_EQ(ref[j], dst[j]);
  406. #if CONFIG_VP9_HIGHBITDEPTH
  407. } else {
  408. for (int j = 0; j < kNumCoeffs; ++j) EXPECT_EQ(ref16[j], dst16[j]);
  409. #endif
  410. }
  411. }
  412. }
  413. void RunInvAccuracyCheck() {
  414. ACMRandom rnd(ACMRandom::DeterministicSeed());
  415. const int count_test_block = 1000;
  416. DECLARE_ALIGNED(16, int16_t, in[kNumCoeffs]);
  417. DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]);
  418. DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
  419. DECLARE_ALIGNED(16, uint8_t, src[kNumCoeffs]);
  420. #if CONFIG_VP9_HIGHBITDEPTH
  421. DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
  422. DECLARE_ALIGNED(16, uint16_t, src16[kNumCoeffs]);
  423. #endif // CONFIG_VP9_HIGHBITDEPTH
  424. for (int i = 0; i < count_test_block; ++i) {
  425. double out_r[kNumCoeffs];
  426. // Initialize a test block with input range [-255, 255].
  427. for (int j = 0; j < kNumCoeffs; ++j) {
  428. if (bit_depth_ == VPX_BITS_8) {
  429. src[j] = rnd.Rand8();
  430. dst[j] = rnd.Rand8();
  431. in[j] = src[j] - dst[j];
  432. #if CONFIG_VP9_HIGHBITDEPTH
  433. } else {
  434. src16[j] = rnd.Rand16() & mask_;
  435. dst16[j] = rnd.Rand16() & mask_;
  436. in[j] = src16[j] - dst16[j];
  437. #endif // CONFIG_VP9_HIGHBITDEPTH
  438. }
  439. }
  440. reference_16x16_dct_2d(in, out_r);
  441. for (int j = 0; j < kNumCoeffs; ++j) {
  442. coeff[j] = static_cast<tran_low_t>(round(out_r[j]));
  443. }
  444. if (bit_depth_ == VPX_BITS_8) {
  445. ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, 16));
  446. #if CONFIG_VP9_HIGHBITDEPTH
  447. } else {
  448. ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, CAST_TO_BYTEPTR(dst16), 16));
  449. #endif // CONFIG_VP9_HIGHBITDEPTH
  450. }
  451. for (int j = 0; j < kNumCoeffs; ++j) {
  452. #if CONFIG_VP9_HIGHBITDEPTH
  453. const uint32_t diff =
  454. bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
  455. #else
  456. const uint32_t diff = dst[j] - src[j];
  457. #endif // CONFIG_VP9_HIGHBITDEPTH
  458. const uint32_t error = diff * diff;
  459. EXPECT_GE(1u, error)
  460. << "Error: 16x16 IDCT has error " << error << " at index " << j;
  461. }
  462. }
  463. }
  464. void CompareInvReference(IdctFunc ref_txfm, int thresh) {
  465. ACMRandom rnd(ACMRandom::DeterministicSeed());
  466. const int count_test_block = 10000;
  467. const int eob = 10;
  468. const int16_t *scan = vp9_default_scan_orders[TX_16X16].scan;
  469. DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]);
  470. DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
  471. DECLARE_ALIGNED(16, uint8_t, ref[kNumCoeffs]);
  472. #if CONFIG_VP9_HIGHBITDEPTH
  473. DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
  474. DECLARE_ALIGNED(16, uint16_t, ref16[kNumCoeffs]);
  475. #endif // CONFIG_VP9_HIGHBITDEPTH
  476. for (int i = 0; i < count_test_block; ++i) {
  477. for (int j = 0; j < kNumCoeffs; ++j) {
  478. if (j < eob) {
  479. // Random values less than the threshold, either positive or negative
  480. coeff[scan[j]] = rnd(thresh) * (1 - 2 * (i % 2));
  481. } else {
  482. coeff[scan[j]] = 0;
  483. }
  484. if (bit_depth_ == VPX_BITS_8) {
  485. dst[j] = 0;
  486. ref[j] = 0;
  487. #if CONFIG_VP9_HIGHBITDEPTH
  488. } else {
  489. dst16[j] = 0;
  490. ref16[j] = 0;
  491. #endif // CONFIG_VP9_HIGHBITDEPTH
  492. }
  493. }
  494. if (bit_depth_ == VPX_BITS_8) {
  495. ref_txfm(coeff, ref, pitch_);
  496. ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, pitch_));
  497. } else {
  498. #if CONFIG_VP9_HIGHBITDEPTH
  499. ref_txfm(coeff, CAST_TO_BYTEPTR(ref16), pitch_);
  500. ASM_REGISTER_STATE_CHECK(
  501. RunInvTxfm(coeff, CAST_TO_BYTEPTR(dst16), pitch_));
  502. #endif // CONFIG_VP9_HIGHBITDEPTH
  503. }
  504. for (int j = 0; j < kNumCoeffs; ++j) {
  505. #if CONFIG_VP9_HIGHBITDEPTH
  506. const uint32_t diff =
  507. bit_depth_ == VPX_BITS_8 ? dst[j] - ref[j] : dst16[j] - ref16[j];
  508. #else
  509. const uint32_t diff = dst[j] - ref[j];
  510. #endif // CONFIG_VP9_HIGHBITDEPTH
  511. const uint32_t error = diff * diff;
  512. EXPECT_EQ(0u, error) << "Error: 16x16 IDCT Comparison has error "
  513. << error << " at index " << j;
  514. }
  515. }
  516. }
  517. int pitch_;
  518. int tx_type_;
  519. vpx_bit_depth_t bit_depth_;
  520. int mask_;
  521. FhtFunc fwd_txfm_ref;
  522. IhtFunc inv_txfm_ref;
  523. };
  524. class Trans16x16DCT : public Trans16x16TestBase,
  525. public ::testing::TestWithParam<Dct16x16Param> {
  526. public:
  527. virtual ~Trans16x16DCT() {}
  528. virtual void SetUp() {
  529. fwd_txfm_ = GET_PARAM(0);
  530. inv_txfm_ = GET_PARAM(1);
  531. tx_type_ = GET_PARAM(2);
  532. bit_depth_ = GET_PARAM(3);
  533. pitch_ = 16;
  534. fwd_txfm_ref = fdct16x16_ref;
  535. inv_txfm_ref = idct16x16_ref;
  536. mask_ = (1 << bit_depth_) - 1;
  537. #if CONFIG_VP9_HIGHBITDEPTH
  538. switch (bit_depth_) {
  539. case VPX_BITS_10: inv_txfm_ref = idct16x16_10_ref; break;
  540. case VPX_BITS_12: inv_txfm_ref = idct16x16_12_ref; break;
  541. default: inv_txfm_ref = idct16x16_ref; break;
  542. }
  543. #else
  544. inv_txfm_ref = idct16x16_ref;
  545. #endif
  546. }
  547. virtual void TearDown() { libvpx_test::ClearSystemState(); }
  548. protected:
  549. void RunFwdTxfm(int16_t *in, tran_low_t *out, int stride) {
  550. fwd_txfm_(in, out, stride);
  551. }
  552. void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) {
  553. inv_txfm_(out, dst, stride);
  554. }
  555. FdctFunc fwd_txfm_;
  556. IdctFunc inv_txfm_;
  557. };
  558. TEST_P(Trans16x16DCT, AccuracyCheck) { RunAccuracyCheck(); }
  559. TEST_P(Trans16x16DCT, CoeffCheck) { RunCoeffCheck(); }
  560. TEST_P(Trans16x16DCT, MemCheck) { RunMemCheck(); }
  561. TEST_P(Trans16x16DCT, QuantCheck) {
  562. // Use maximally allowed quantization step sizes for DC and AC
  563. // coefficients respectively.
  564. RunQuantCheck(1336, 1828);
  565. }
  566. TEST_P(Trans16x16DCT, InvAccuracyCheck) { RunInvAccuracyCheck(); }
  567. class Trans16x16HT : public Trans16x16TestBase,
  568. public ::testing::TestWithParam<Ht16x16Param> {
  569. public:
  570. virtual ~Trans16x16HT() {}
  571. virtual void SetUp() {
  572. fwd_txfm_ = GET_PARAM(0);
  573. inv_txfm_ = GET_PARAM(1);
  574. tx_type_ = GET_PARAM(2);
  575. bit_depth_ = GET_PARAM(3);
  576. pitch_ = 16;
  577. fwd_txfm_ref = fht16x16_ref;
  578. inv_txfm_ref = iht16x16_ref;
  579. mask_ = (1 << bit_depth_) - 1;
  580. #if CONFIG_VP9_HIGHBITDEPTH
  581. switch (bit_depth_) {
  582. case VPX_BITS_10: inv_txfm_ref = iht16x16_10; break;
  583. case VPX_BITS_12: inv_txfm_ref = iht16x16_12; break;
  584. default: inv_txfm_ref = iht16x16_ref; break;
  585. }
  586. #else
  587. inv_txfm_ref = iht16x16_ref;
  588. #endif
  589. }
  590. virtual void TearDown() { libvpx_test::ClearSystemState(); }
  591. protected:
  592. void RunFwdTxfm(int16_t *in, tran_low_t *out, int stride) {
  593. fwd_txfm_(in, out, stride, tx_type_);
  594. }
  595. void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) {
  596. inv_txfm_(out, dst, stride, tx_type_);
  597. }
  598. FhtFunc fwd_txfm_;
  599. IhtFunc inv_txfm_;
  600. };
  601. TEST_P(Trans16x16HT, AccuracyCheck) { RunAccuracyCheck(); }
  602. TEST_P(Trans16x16HT, CoeffCheck) { RunCoeffCheck(); }
  603. TEST_P(Trans16x16HT, MemCheck) { RunMemCheck(); }
  604. TEST_P(Trans16x16HT, QuantCheck) {
  605. // The encoder skips any non-DC intra prediction modes,
  606. // when the quantization step size goes beyond 988.
  607. RunQuantCheck(429, 729);
  608. }
  609. class InvTrans16x16DCT : public Trans16x16TestBase,
  610. public ::testing::TestWithParam<Idct16x16Param> {
  611. public:
  612. virtual ~InvTrans16x16DCT() {}
  613. virtual void SetUp() {
  614. ref_txfm_ = GET_PARAM(0);
  615. inv_txfm_ = GET_PARAM(1);
  616. thresh_ = GET_PARAM(2);
  617. bit_depth_ = GET_PARAM(3);
  618. pitch_ = 16;
  619. mask_ = (1 << bit_depth_) - 1;
  620. }
  621. virtual void TearDown() { libvpx_test::ClearSystemState(); }
  622. protected:
  623. void RunFwdTxfm(int16_t * /*in*/, tran_low_t * /*out*/, int /*stride*/) {}
  624. void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) {
  625. inv_txfm_(out, dst, stride);
  626. }
  627. IdctFunc ref_txfm_;
  628. IdctFunc inv_txfm_;
  629. int thresh_;
  630. };
  631. TEST_P(InvTrans16x16DCT, CompareReference) {
  632. CompareInvReference(ref_txfm_, thresh_);
  633. }
  634. using std::make_tuple;
  635. #if CONFIG_VP9_HIGHBITDEPTH
  636. INSTANTIATE_TEST_CASE_P(
  637. C, Trans16x16DCT,
  638. ::testing::Values(
  639. make_tuple(&vpx_highbd_fdct16x16_c, &idct16x16_10, 0, VPX_BITS_10),
  640. make_tuple(&vpx_highbd_fdct16x16_c, &idct16x16_12, 0, VPX_BITS_12),
  641. make_tuple(&vpx_fdct16x16_c, &vpx_idct16x16_256_add_c, 0, VPX_BITS_8)));
  642. #else
  643. INSTANTIATE_TEST_CASE_P(C, Trans16x16DCT,
  644. ::testing::Values(make_tuple(&vpx_fdct16x16_c,
  645. &vpx_idct16x16_256_add_c,
  646. 0, VPX_BITS_8)));
  647. #endif // CONFIG_VP9_HIGHBITDEPTH
  648. #if CONFIG_VP9_HIGHBITDEPTH
  649. INSTANTIATE_TEST_CASE_P(
  650. C, Trans16x16HT,
  651. ::testing::Values(
  652. make_tuple(&vp9_highbd_fht16x16_c, &iht16x16_10, 0, VPX_BITS_10),
  653. make_tuple(&vp9_highbd_fht16x16_c, &iht16x16_10, 1, VPX_BITS_10),
  654. make_tuple(&vp9_highbd_fht16x16_c, &iht16x16_10, 2, VPX_BITS_10),
  655. make_tuple(&vp9_highbd_fht16x16_c, &iht16x16_10, 3, VPX_BITS_10),
  656. make_tuple(&vp9_highbd_fht16x16_c, &iht16x16_12, 0, VPX_BITS_12),
  657. make_tuple(&vp9_highbd_fht16x16_c, &iht16x16_12, 1, VPX_BITS_12),
  658. make_tuple(&vp9_highbd_fht16x16_c, &iht16x16_12, 2, VPX_BITS_12),
  659. make_tuple(&vp9_highbd_fht16x16_c, &iht16x16_12, 3, VPX_BITS_12),
  660. make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 0, VPX_BITS_8),
  661. make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 1, VPX_BITS_8),
  662. make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 2, VPX_BITS_8),
  663. make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 3, VPX_BITS_8)));
  664. #else
  665. INSTANTIATE_TEST_CASE_P(
  666. C, Trans16x16HT,
  667. ::testing::Values(
  668. make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 0, VPX_BITS_8),
  669. make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 1, VPX_BITS_8),
  670. make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 2, VPX_BITS_8),
  671. make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 3, VPX_BITS_8)));
  672. #endif // CONFIG_VP9_HIGHBITDEPTH
  673. #if HAVE_NEON && !CONFIG_EMULATE_HARDWARE
  674. INSTANTIATE_TEST_CASE_P(
  675. NEON, Trans16x16DCT,
  676. ::testing::Values(make_tuple(&vpx_fdct16x16_neon,
  677. &vpx_idct16x16_256_add_neon, 0, VPX_BITS_8)));
  678. #endif // HAVE_NEON && !CONFIG_EMULATE_HARDWARE
  679. #if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
  680. INSTANTIATE_TEST_CASE_P(
  681. SSE2, Trans16x16DCT,
  682. ::testing::Values(make_tuple(&vpx_fdct16x16_sse2,
  683. &vpx_idct16x16_256_add_sse2, 0, VPX_BITS_8)));
  684. INSTANTIATE_TEST_CASE_P(
  685. SSE2, Trans16x16HT,
  686. ::testing::Values(make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_sse2,
  687. 0, VPX_BITS_8),
  688. make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_sse2,
  689. 1, VPX_BITS_8),
  690. make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_sse2,
  691. 2, VPX_BITS_8),
  692. make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_sse2,
  693. 3, VPX_BITS_8)));
  694. #endif // HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
  695. #if HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
  696. INSTANTIATE_TEST_CASE_P(
  697. SSE2, Trans16x16DCT,
  698. ::testing::Values(
  699. make_tuple(&vpx_highbd_fdct16x16_sse2, &idct16x16_10, 0, VPX_BITS_10),
  700. make_tuple(&vpx_highbd_fdct16x16_c, &idct16x16_256_add_10_sse2, 0,
  701. VPX_BITS_10),
  702. make_tuple(&vpx_highbd_fdct16x16_sse2, &idct16x16_12, 0, VPX_BITS_12),
  703. make_tuple(&vpx_highbd_fdct16x16_c, &idct16x16_256_add_12_sse2, 0,
  704. VPX_BITS_12),
  705. make_tuple(&vpx_fdct16x16_sse2, &vpx_idct16x16_256_add_c, 0,
  706. VPX_BITS_8)));
  707. INSTANTIATE_TEST_CASE_P(
  708. SSE2, Trans16x16HT,
  709. ::testing::Values(
  710. make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_c, 0, VPX_BITS_8),
  711. make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_c, 1, VPX_BITS_8),
  712. make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_c, 2, VPX_BITS_8),
  713. make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_c, 3,
  714. VPX_BITS_8)));
  715. // Optimizations take effect at a threshold of 3155, so we use a value close to
  716. // that to test both branches.
  717. INSTANTIATE_TEST_CASE_P(
  718. SSE2, InvTrans16x16DCT,
  719. ::testing::Values(make_tuple(&idct16x16_10_add_10_c,
  720. &idct16x16_10_add_10_sse2, 3167, VPX_BITS_10),
  721. make_tuple(&idct16x16_10, &idct16x16_256_add_10_sse2,
  722. 3167, VPX_BITS_10),
  723. make_tuple(&idct16x16_10_add_12_c,
  724. &idct16x16_10_add_12_sse2, 3167, VPX_BITS_12),
  725. make_tuple(&idct16x16_12, &idct16x16_256_add_12_sse2,
  726. 3167, VPX_BITS_12)));
  727. #endif // HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
  728. #if HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
  729. INSTANTIATE_TEST_CASE_P(MSA, Trans16x16DCT,
  730. ::testing::Values(make_tuple(&vpx_fdct16x16_msa,
  731. &vpx_idct16x16_256_add_msa,
  732. 0, VPX_BITS_8)));
  733. INSTANTIATE_TEST_CASE_P(
  734. MSA, Trans16x16HT,
  735. ::testing::Values(
  736. make_tuple(&vp9_fht16x16_msa, &vp9_iht16x16_256_add_msa, 0, VPX_BITS_8),
  737. make_tuple(&vp9_fht16x16_msa, &vp9_iht16x16_256_add_msa, 1, VPX_BITS_8),
  738. make_tuple(&vp9_fht16x16_msa, &vp9_iht16x16_256_add_msa, 2, VPX_BITS_8),
  739. make_tuple(&vp9_fht16x16_msa, &vp9_iht16x16_256_add_msa, 3,
  740. VPX_BITS_8)));
  741. #endif // HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
  742. #if HAVE_VSX && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
  743. INSTANTIATE_TEST_CASE_P(VSX, Trans16x16DCT,
  744. ::testing::Values(make_tuple(&vpx_fdct16x16_c,
  745. &vpx_idct16x16_256_add_vsx,
  746. 0, VPX_BITS_8)));
  747. #endif // HAVE_VSX && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
  748. } // namespace