dct32x32_test.cc 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399
  1. /*
  2. * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
  3. *
  4. * Use of this source code is governed by a BSD-style license
  5. * that can be found in the LICENSE file in the root of the source
  6. * tree. An additional intellectual property rights grant can be found
  7. * in the file PATENTS. All contributing project authors may
  8. * be found in the AUTHORS file in the root of the source tree.
  9. */
  10. #include <math.h>
  11. #include <stdlib.h>
  12. #include <string.h>
  13. #include <tuple>
  14. #include "third_party/googletest/src/include/gtest/gtest.h"
  15. #include "./vp9_rtcd.h"
  16. #include "./vpx_config.h"
  17. #include "./vpx_dsp_rtcd.h"
  18. #include "test/acm_random.h"
  19. #include "test/bench.h"
  20. #include "test/clear_system_state.h"
  21. #include "test/register_state_check.h"
  22. #include "test/util.h"
  23. #include "vp9/common/vp9_entropy.h"
  24. #include "vpx/vpx_codec.h"
  25. #include "vpx/vpx_integer.h"
  26. #include "vpx_ports/mem.h"
  27. #include "vpx_ports/msvc.h" // for round()
  28. using libvpx_test::ACMRandom;
  29. namespace {
  30. const int kNumCoeffs = 1024;
  31. const double kPi = 3.141592653589793238462643383279502884;
  32. void reference_32x32_dct_1d(const double in[32], double out[32]) {
  33. const double kInvSqrt2 = 0.707106781186547524400844362104;
  34. for (int k = 0; k < 32; k++) {
  35. out[k] = 0.0;
  36. for (int n = 0; n < 32; n++) {
  37. out[k] += in[n] * cos(kPi * (2 * n + 1) * k / 64.0);
  38. }
  39. if (k == 0) out[k] = out[k] * kInvSqrt2;
  40. }
  41. }
  42. void reference_32x32_dct_2d(const int16_t input[kNumCoeffs],
  43. double output[kNumCoeffs]) {
  44. // First transform columns
  45. for (int i = 0; i < 32; ++i) {
  46. double temp_in[32], temp_out[32];
  47. for (int j = 0; j < 32; ++j) temp_in[j] = input[j * 32 + i];
  48. reference_32x32_dct_1d(temp_in, temp_out);
  49. for (int j = 0; j < 32; ++j) output[j * 32 + i] = temp_out[j];
  50. }
  51. // Then transform rows
  52. for (int i = 0; i < 32; ++i) {
  53. double temp_in[32], temp_out[32];
  54. for (int j = 0; j < 32; ++j) temp_in[j] = output[j + i * 32];
  55. reference_32x32_dct_1d(temp_in, temp_out);
  56. // Scale by some magic number
  57. for (int j = 0; j < 32; ++j) output[j + i * 32] = temp_out[j] / 4;
  58. }
  59. }
  60. typedef void (*FwdTxfmFunc)(const int16_t *in, tran_low_t *out, int stride);
  61. typedef void (*InvTxfmFunc)(const tran_low_t *in, uint8_t *out, int stride);
  62. typedef std::tuple<FwdTxfmFunc, InvTxfmFunc, int, vpx_bit_depth_t>
  63. Trans32x32Param;
  64. #if CONFIG_VP9_HIGHBITDEPTH
  65. void idct32x32_10(const tran_low_t *in, uint8_t *out, int stride) {
  66. vpx_highbd_idct32x32_1024_add_c(in, CAST_TO_SHORTPTR(out), stride, 10);
  67. }
  68. void idct32x32_12(const tran_low_t *in, uint8_t *out, int stride) {
  69. vpx_highbd_idct32x32_1024_add_c(in, CAST_TO_SHORTPTR(out), stride, 12);
  70. }
  71. #endif // CONFIG_VP9_HIGHBITDEPTH
  72. class Trans32x32Test : public AbstractBench,
  73. public ::testing::TestWithParam<Trans32x32Param> {
  74. public:
  75. virtual ~Trans32x32Test() {}
  76. virtual void SetUp() {
  77. fwd_txfm_ = GET_PARAM(0);
  78. inv_txfm_ = GET_PARAM(1);
  79. version_ = GET_PARAM(2); // 0: high precision forward transform
  80. // 1: low precision version for rd loop
  81. bit_depth_ = GET_PARAM(3);
  82. mask_ = (1 << bit_depth_) - 1;
  83. }
  84. virtual void TearDown() { libvpx_test::ClearSystemState(); }
  85. protected:
  86. int version_;
  87. vpx_bit_depth_t bit_depth_;
  88. int mask_;
  89. FwdTxfmFunc fwd_txfm_;
  90. InvTxfmFunc inv_txfm_;
  91. int16_t *bench_in_;
  92. tran_low_t *bench_out_;
  93. virtual void Run();
  94. };
  95. void Trans32x32Test::Run() { fwd_txfm_(bench_in_, bench_out_, 32); }
  96. TEST_P(Trans32x32Test, AccuracyCheck) {
  97. ACMRandom rnd(ACMRandom::DeterministicSeed());
  98. uint32_t max_error = 0;
  99. int64_t total_error = 0;
  100. const int count_test_block = 10000;
  101. DECLARE_ALIGNED(16, int16_t, test_input_block[kNumCoeffs]);
  102. DECLARE_ALIGNED(16, tran_low_t, test_temp_block[kNumCoeffs]);
  103. DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
  104. DECLARE_ALIGNED(16, uint8_t, src[kNumCoeffs]);
  105. #if CONFIG_VP9_HIGHBITDEPTH
  106. DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
  107. DECLARE_ALIGNED(16, uint16_t, src16[kNumCoeffs]);
  108. #endif
  109. for (int i = 0; i < count_test_block; ++i) {
  110. // Initialize a test block with input range [-mask_, mask_].
  111. for (int j = 0; j < kNumCoeffs; ++j) {
  112. if (bit_depth_ == VPX_BITS_8) {
  113. src[j] = rnd.Rand8();
  114. dst[j] = rnd.Rand8();
  115. test_input_block[j] = src[j] - dst[j];
  116. #if CONFIG_VP9_HIGHBITDEPTH
  117. } else {
  118. src16[j] = rnd.Rand16() & mask_;
  119. dst16[j] = rnd.Rand16() & mask_;
  120. test_input_block[j] = src16[j] - dst16[j];
  121. #endif
  122. }
  123. }
  124. ASM_REGISTER_STATE_CHECK(fwd_txfm_(test_input_block, test_temp_block, 32));
  125. if (bit_depth_ == VPX_BITS_8) {
  126. ASM_REGISTER_STATE_CHECK(inv_txfm_(test_temp_block, dst, 32));
  127. #if CONFIG_VP9_HIGHBITDEPTH
  128. } else {
  129. ASM_REGISTER_STATE_CHECK(
  130. inv_txfm_(test_temp_block, CAST_TO_BYTEPTR(dst16), 32));
  131. #endif
  132. }
  133. for (int j = 0; j < kNumCoeffs; ++j) {
  134. #if CONFIG_VP9_HIGHBITDEPTH
  135. const int32_t diff =
  136. bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
  137. #else
  138. const int32_t diff = dst[j] - src[j];
  139. #endif
  140. const uint32_t error = diff * diff;
  141. if (max_error < error) max_error = error;
  142. total_error += error;
  143. }
  144. }
  145. if (version_ == 1) {
  146. max_error /= 2;
  147. total_error /= 45;
  148. }
  149. EXPECT_GE(1u << 2 * (bit_depth_ - 8), max_error)
  150. << "Error: 32x32 FDCT/IDCT has an individual round-trip error > 1";
  151. EXPECT_GE(count_test_block << 2 * (bit_depth_ - 8), total_error)
  152. << "Error: 32x32 FDCT/IDCT has average round-trip error > 1 per block";
  153. }
  154. TEST_P(Trans32x32Test, CoeffCheck) {
  155. ACMRandom rnd(ACMRandom::DeterministicSeed());
  156. const int count_test_block = 1000;
  157. DECLARE_ALIGNED(16, int16_t, input_block[kNumCoeffs]);
  158. DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kNumCoeffs]);
  159. DECLARE_ALIGNED(16, tran_low_t, output_block[kNumCoeffs]);
  160. for (int i = 0; i < count_test_block; ++i) {
  161. for (int j = 0; j < kNumCoeffs; ++j) {
  162. input_block[j] = (rnd.Rand16() & mask_) - (rnd.Rand16() & mask_);
  163. }
  164. const int stride = 32;
  165. vpx_fdct32x32_c(input_block, output_ref_block, stride);
  166. ASM_REGISTER_STATE_CHECK(fwd_txfm_(input_block, output_block, stride));
  167. if (version_ == 0) {
  168. for (int j = 0; j < kNumCoeffs; ++j)
  169. EXPECT_EQ(output_block[j], output_ref_block[j])
  170. << "Error: 32x32 FDCT versions have mismatched coefficients";
  171. } else {
  172. for (int j = 0; j < kNumCoeffs; ++j)
  173. EXPECT_GE(6, abs(output_block[j] - output_ref_block[j]))
  174. << "Error: 32x32 FDCT rd has mismatched coefficients";
  175. }
  176. }
  177. }
  178. TEST_P(Trans32x32Test, MemCheck) {
  179. ACMRandom rnd(ACMRandom::DeterministicSeed());
  180. const int count_test_block = 2000;
  181. DECLARE_ALIGNED(16, int16_t, input_extreme_block[kNumCoeffs]);
  182. DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kNumCoeffs]);
  183. DECLARE_ALIGNED(16, tran_low_t, output_block[kNumCoeffs]);
  184. for (int i = 0; i < count_test_block; ++i) {
  185. // Initialize a test block with input range [-mask_, mask_].
  186. for (int j = 0; j < kNumCoeffs; ++j) {
  187. input_extreme_block[j] = rnd.Rand8() & 1 ? mask_ : -mask_;
  188. }
  189. if (i == 0) {
  190. for (int j = 0; j < kNumCoeffs; ++j) input_extreme_block[j] = mask_;
  191. } else if (i == 1) {
  192. for (int j = 0; j < kNumCoeffs; ++j) input_extreme_block[j] = -mask_;
  193. }
  194. const int stride = 32;
  195. vpx_fdct32x32_c(input_extreme_block, output_ref_block, stride);
  196. ASM_REGISTER_STATE_CHECK(
  197. fwd_txfm_(input_extreme_block, output_block, stride));
  198. // The minimum quant value is 4.
  199. for (int j = 0; j < kNumCoeffs; ++j) {
  200. if (version_ == 0) {
  201. EXPECT_EQ(output_block[j], output_ref_block[j])
  202. << "Error: 32x32 FDCT versions have mismatched coefficients";
  203. } else {
  204. EXPECT_GE(6, abs(output_block[j] - output_ref_block[j]))
  205. << "Error: 32x32 FDCT rd has mismatched coefficients";
  206. }
  207. EXPECT_GE(4 * DCT_MAX_VALUE << (bit_depth_ - 8), abs(output_ref_block[j]))
  208. << "Error: 32x32 FDCT C has coefficient larger than 4*DCT_MAX_VALUE";
  209. EXPECT_GE(4 * DCT_MAX_VALUE << (bit_depth_ - 8), abs(output_block[j]))
  210. << "Error: 32x32 FDCT has coefficient larger than "
  211. << "4*DCT_MAX_VALUE";
  212. }
  213. }
  214. }
  215. TEST_P(Trans32x32Test, DISABLED_Speed) {
  216. ACMRandom rnd(ACMRandom::DeterministicSeed());
  217. DECLARE_ALIGNED(16, int16_t, input_extreme_block[kNumCoeffs]);
  218. DECLARE_ALIGNED(16, tran_low_t, output_block[kNumCoeffs]);
  219. bench_in_ = input_extreme_block;
  220. bench_out_ = output_block;
  221. RunNTimes(INT16_MAX);
  222. PrintMedian("32x32");
  223. }
  224. TEST_P(Trans32x32Test, InverseAccuracy) {
  225. ACMRandom rnd(ACMRandom::DeterministicSeed());
  226. const int count_test_block = 1000;
  227. DECLARE_ALIGNED(16, int16_t, in[kNumCoeffs]);
  228. DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]);
  229. DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
  230. DECLARE_ALIGNED(16, uint8_t, src[kNumCoeffs]);
  231. #if CONFIG_VP9_HIGHBITDEPTH
  232. DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
  233. DECLARE_ALIGNED(16, uint16_t, src16[kNumCoeffs]);
  234. #endif
  235. for (int i = 0; i < count_test_block; ++i) {
  236. double out_r[kNumCoeffs];
  237. // Initialize a test block with input range [-255, 255]
  238. for (int j = 0; j < kNumCoeffs; ++j) {
  239. if (bit_depth_ == VPX_BITS_8) {
  240. src[j] = rnd.Rand8();
  241. dst[j] = rnd.Rand8();
  242. in[j] = src[j] - dst[j];
  243. #if CONFIG_VP9_HIGHBITDEPTH
  244. } else {
  245. src16[j] = rnd.Rand16() & mask_;
  246. dst16[j] = rnd.Rand16() & mask_;
  247. in[j] = src16[j] - dst16[j];
  248. #endif
  249. }
  250. }
  251. reference_32x32_dct_2d(in, out_r);
  252. for (int j = 0; j < kNumCoeffs; ++j) {
  253. coeff[j] = static_cast<tran_low_t>(round(out_r[j]));
  254. }
  255. if (bit_depth_ == VPX_BITS_8) {
  256. ASM_REGISTER_STATE_CHECK(inv_txfm_(coeff, dst, 32));
  257. #if CONFIG_VP9_HIGHBITDEPTH
  258. } else {
  259. ASM_REGISTER_STATE_CHECK(inv_txfm_(coeff, CAST_TO_BYTEPTR(dst16), 32));
  260. #endif
  261. }
  262. for (int j = 0; j < kNumCoeffs; ++j) {
  263. #if CONFIG_VP9_HIGHBITDEPTH
  264. const int diff =
  265. bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
  266. #else
  267. const int diff = dst[j] - src[j];
  268. #endif
  269. const int error = diff * diff;
  270. EXPECT_GE(1, error) << "Error: 32x32 IDCT has error " << error
  271. << " at index " << j;
  272. }
  273. }
  274. }
  275. using std::make_tuple;
  276. #if CONFIG_VP9_HIGHBITDEPTH
  277. INSTANTIATE_TEST_CASE_P(
  278. C, Trans32x32Test,
  279. ::testing::Values(
  280. make_tuple(&vpx_highbd_fdct32x32_c, &idct32x32_10, 0, VPX_BITS_10),
  281. make_tuple(&vpx_highbd_fdct32x32_rd_c, &idct32x32_10, 1, VPX_BITS_10),
  282. make_tuple(&vpx_highbd_fdct32x32_c, &idct32x32_12, 0, VPX_BITS_12),
  283. make_tuple(&vpx_highbd_fdct32x32_rd_c, &idct32x32_12, 1, VPX_BITS_12),
  284. make_tuple(&vpx_fdct32x32_c, &vpx_idct32x32_1024_add_c, 0, VPX_BITS_8),
  285. make_tuple(&vpx_fdct32x32_rd_c, &vpx_idct32x32_1024_add_c, 1,
  286. VPX_BITS_8)));
  287. #else
  288. INSTANTIATE_TEST_CASE_P(
  289. C, Trans32x32Test,
  290. ::testing::Values(make_tuple(&vpx_fdct32x32_c, &vpx_idct32x32_1024_add_c, 0,
  291. VPX_BITS_8),
  292. make_tuple(&vpx_fdct32x32_rd_c, &vpx_idct32x32_1024_add_c,
  293. 1, VPX_BITS_8)));
  294. #endif // CONFIG_VP9_HIGHBITDEPTH
  295. #if HAVE_NEON && !CONFIG_EMULATE_HARDWARE
  296. INSTANTIATE_TEST_CASE_P(
  297. NEON, Trans32x32Test,
  298. ::testing::Values(make_tuple(&vpx_fdct32x32_neon,
  299. &vpx_idct32x32_1024_add_neon, 0, VPX_BITS_8),
  300. make_tuple(&vpx_fdct32x32_rd_neon,
  301. &vpx_idct32x32_1024_add_neon, 1, VPX_BITS_8)));
  302. #endif // HAVE_NEON && !CONFIG_EMULATE_HARDWARE
  303. #if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
  304. INSTANTIATE_TEST_CASE_P(
  305. SSE2, Trans32x32Test,
  306. ::testing::Values(make_tuple(&vpx_fdct32x32_sse2,
  307. &vpx_idct32x32_1024_add_sse2, 0, VPX_BITS_8),
  308. make_tuple(&vpx_fdct32x32_rd_sse2,
  309. &vpx_idct32x32_1024_add_sse2, 1, VPX_BITS_8)));
  310. #endif // HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
  311. #if HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
  312. INSTANTIATE_TEST_CASE_P(
  313. SSE2, Trans32x32Test,
  314. ::testing::Values(
  315. make_tuple(&vpx_highbd_fdct32x32_sse2, &idct32x32_10, 0, VPX_BITS_10),
  316. make_tuple(&vpx_highbd_fdct32x32_rd_sse2, &idct32x32_10, 1,
  317. VPX_BITS_10),
  318. make_tuple(&vpx_highbd_fdct32x32_sse2, &idct32x32_12, 0, VPX_BITS_12),
  319. make_tuple(&vpx_highbd_fdct32x32_rd_sse2, &idct32x32_12, 1,
  320. VPX_BITS_12),
  321. make_tuple(&vpx_fdct32x32_sse2, &vpx_idct32x32_1024_add_c, 0,
  322. VPX_BITS_8),
  323. make_tuple(&vpx_fdct32x32_rd_sse2, &vpx_idct32x32_1024_add_c, 1,
  324. VPX_BITS_8)));
  325. #endif // HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
  326. #if HAVE_AVX2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
  327. INSTANTIATE_TEST_CASE_P(
  328. AVX2, Trans32x32Test,
  329. ::testing::Values(make_tuple(&vpx_fdct32x32_avx2,
  330. &vpx_idct32x32_1024_add_sse2, 0, VPX_BITS_8),
  331. make_tuple(&vpx_fdct32x32_rd_avx2,
  332. &vpx_idct32x32_1024_add_sse2, 1, VPX_BITS_8)));
  333. #endif // HAVE_AVX2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
  334. #if HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
  335. INSTANTIATE_TEST_CASE_P(
  336. MSA, Trans32x32Test,
  337. ::testing::Values(make_tuple(&vpx_fdct32x32_msa,
  338. &vpx_idct32x32_1024_add_msa, 0, VPX_BITS_8),
  339. make_tuple(&vpx_fdct32x32_rd_msa,
  340. &vpx_idct32x32_1024_add_msa, 1, VPX_BITS_8)));
  341. #endif // HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
  342. #if HAVE_VSX && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
  343. INSTANTIATE_TEST_CASE_P(
  344. VSX, Trans32x32Test,
  345. ::testing::Values(make_tuple(&vpx_fdct32x32_c, &vpx_idct32x32_1024_add_vsx,
  346. 0, VPX_BITS_8),
  347. make_tuple(&vpx_fdct32x32_rd_vsx,
  348. &vpx_idct32x32_1024_add_vsx, 1, VPX_BITS_8)));
  349. #endif // HAVE_VSX && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
  350. } // namespace