hadamard_test.cc 9.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320
  1. /*
  2. * Copyright (c) 2016 The WebM project authors. All Rights Reserved.
  3. *
  4. * Use of this source code is governed by a BSD-style license
  5. * that can be found in the LICENSE file in the root of the source
  6. * tree. An additional intellectual property rights grant can be found
  7. * in the file PATENTS. All contributing project authors may
  8. * be found in the AUTHORS file in the root of the source tree.
  9. */
  10. #include <algorithm>
  11. #include "third_party/googletest/src/include/gtest/gtest.h"
  12. #include "./vpx_dsp_rtcd.h"
  13. #include "vpx_ports/vpx_timer.h"
  14. #include "test/acm_random.h"
  15. #include "test/register_state_check.h"
  16. namespace {
  17. using ::libvpx_test::ACMRandom;
  18. typedef void (*HadamardFunc)(const int16_t *a, ptrdiff_t a_stride,
  19. tran_low_t *b);
  20. void hadamard_loop(const tran_low_t *a, tran_low_t *out) {
  21. tran_low_t b[8];
  22. for (int i = 0; i < 8; i += 2) {
  23. b[i + 0] = a[i * 8] + a[(i + 1) * 8];
  24. b[i + 1] = a[i * 8] - a[(i + 1) * 8];
  25. }
  26. tran_low_t c[8];
  27. for (int i = 0; i < 8; i += 4) {
  28. c[i + 0] = b[i + 0] + b[i + 2];
  29. c[i + 1] = b[i + 1] + b[i + 3];
  30. c[i + 2] = b[i + 0] - b[i + 2];
  31. c[i + 3] = b[i + 1] - b[i + 3];
  32. }
  33. out[0] = c[0] + c[4];
  34. out[7] = c[1] + c[5];
  35. out[3] = c[2] + c[6];
  36. out[4] = c[3] + c[7];
  37. out[2] = c[0] - c[4];
  38. out[6] = c[1] - c[5];
  39. out[1] = c[2] - c[6];
  40. out[5] = c[3] - c[7];
  41. }
  42. void reference_hadamard8x8(const int16_t *a, int a_stride, tran_low_t *b) {
  43. tran_low_t input[64];
  44. tran_low_t buf[64];
  45. for (int i = 0; i < 8; ++i) {
  46. for (int j = 0; j < 8; ++j) {
  47. input[i * 8 + j] = static_cast<tran_low_t>(a[i * a_stride + j]);
  48. }
  49. }
  50. for (int i = 0; i < 8; ++i) hadamard_loop(input + i, buf + i * 8);
  51. for (int i = 0; i < 8; ++i) hadamard_loop(buf + i, b + i * 8);
  52. }
  53. void reference_hadamard16x16(const int16_t *a, int a_stride, tran_low_t *b) {
  54. /* The source is a 16x16 block. The destination is rearranged to 8x32.
  55. * Input is 9 bit. */
  56. reference_hadamard8x8(a + 0 + 0 * a_stride, a_stride, b + 0);
  57. reference_hadamard8x8(a + 8 + 0 * a_stride, a_stride, b + 64);
  58. reference_hadamard8x8(a + 0 + 8 * a_stride, a_stride, b + 128);
  59. reference_hadamard8x8(a + 8 + 8 * a_stride, a_stride, b + 192);
  60. /* Overlay the 8x8 blocks and combine. */
  61. for (int i = 0; i < 64; ++i) {
  62. /* 8x8 steps the range up to 15 bits. */
  63. const tran_low_t a0 = b[0];
  64. const tran_low_t a1 = b[64];
  65. const tran_low_t a2 = b[128];
  66. const tran_low_t a3 = b[192];
  67. /* Prevent the result from escaping int16_t. */
  68. const tran_low_t b0 = (a0 + a1) >> 1;
  69. const tran_low_t b1 = (a0 - a1) >> 1;
  70. const tran_low_t b2 = (a2 + a3) >> 1;
  71. const tran_low_t b3 = (a2 - a3) >> 1;
  72. /* Store a 16 bit value. */
  73. b[0] = b0 + b2;
  74. b[64] = b1 + b3;
  75. b[128] = b0 - b2;
  76. b[192] = b1 - b3;
  77. ++b;
  78. }
  79. }
  80. void reference_hadamard32x32(const int16_t *a, int a_stride, tran_low_t *b) {
  81. reference_hadamard16x16(a + 0 + 0 * a_stride, a_stride, b + 0);
  82. reference_hadamard16x16(a + 16 + 0 * a_stride, a_stride, b + 256);
  83. reference_hadamard16x16(a + 0 + 16 * a_stride, a_stride, b + 512);
  84. reference_hadamard16x16(a + 16 + 16 * a_stride, a_stride, b + 768);
  85. for (int i = 0; i < 256; ++i) {
  86. const tran_low_t a0 = b[0];
  87. const tran_low_t a1 = b[256];
  88. const tran_low_t a2 = b[512];
  89. const tran_low_t a3 = b[768];
  90. const tran_low_t b0 = (a0 + a1) >> 2;
  91. const tran_low_t b1 = (a0 - a1) >> 2;
  92. const tran_low_t b2 = (a2 + a3) >> 2;
  93. const tran_low_t b3 = (a2 - a3) >> 2;
  94. b[0] = b0 + b2;
  95. b[256] = b1 + b3;
  96. b[512] = b0 - b2;
  97. b[768] = b1 - b3;
  98. ++b;
  99. }
  100. }
  101. struct HadamardFuncWithSize {
  102. HadamardFuncWithSize(HadamardFunc f, int s) : func(f), block_size(s) {}
  103. HadamardFunc func;
  104. int block_size;
  105. };
  106. std::ostream &operator<<(std::ostream &os, const HadamardFuncWithSize &hfs) {
  107. return os << "block size: " << hfs.block_size;
  108. }
  109. class HadamardTestBase : public ::testing::TestWithParam<HadamardFuncWithSize> {
  110. public:
  111. virtual void SetUp() {
  112. h_func_ = GetParam().func;
  113. bwh_ = GetParam().block_size;
  114. block_size_ = bwh_ * bwh_;
  115. rnd_.Reset(ACMRandom::DeterministicSeed());
  116. }
  117. virtual int16_t Rand() = 0;
  118. void ReferenceHadamard(const int16_t *a, int a_stride, tran_low_t *b,
  119. int bwh) {
  120. if (bwh == 32)
  121. reference_hadamard32x32(a, a_stride, b);
  122. else if (bwh == 16)
  123. reference_hadamard16x16(a, a_stride, b);
  124. else
  125. reference_hadamard8x8(a, a_stride, b);
  126. }
  127. void CompareReferenceRandom() {
  128. const int kMaxBlockSize = 32 * 32;
  129. DECLARE_ALIGNED(16, int16_t, a[kMaxBlockSize]);
  130. DECLARE_ALIGNED(16, tran_low_t, b[kMaxBlockSize]);
  131. memset(a, 0, sizeof(a));
  132. memset(b, 0, sizeof(b));
  133. tran_low_t b_ref[kMaxBlockSize];
  134. memset(b_ref, 0, sizeof(b_ref));
  135. for (int i = 0; i < block_size_; ++i) a[i] = Rand();
  136. ReferenceHadamard(a, bwh_, b_ref, bwh_);
  137. ASM_REGISTER_STATE_CHECK(h_func_(a, bwh_, b));
  138. // The order of the output is not important. Sort before checking.
  139. std::sort(b, b + block_size_);
  140. std::sort(b_ref, b_ref + block_size_);
  141. EXPECT_EQ(0, memcmp(b, b_ref, sizeof(b)));
  142. }
  143. void VaryStride() {
  144. const int kMaxBlockSize = 32 * 32;
  145. DECLARE_ALIGNED(16, int16_t, a[kMaxBlockSize * 8]);
  146. DECLARE_ALIGNED(16, tran_low_t, b[kMaxBlockSize]);
  147. memset(a, 0, sizeof(a));
  148. for (int i = 0; i < block_size_ * 8; ++i) a[i] = Rand();
  149. tran_low_t b_ref[kMaxBlockSize];
  150. for (int i = 8; i < 64; i += 8) {
  151. memset(b, 0, sizeof(b));
  152. memset(b_ref, 0, sizeof(b_ref));
  153. ReferenceHadamard(a, i, b_ref, bwh_);
  154. ASM_REGISTER_STATE_CHECK(h_func_(a, i, b));
  155. // The order of the output is not important. Sort before checking.
  156. std::sort(b, b + block_size_);
  157. std::sort(b_ref, b_ref + block_size_);
  158. EXPECT_EQ(0, memcmp(b, b_ref, sizeof(b)));
  159. }
  160. }
  161. void SpeedTest(int times) {
  162. const int kMaxBlockSize = 32 * 32;
  163. DECLARE_ALIGNED(16, int16_t, input[kMaxBlockSize]);
  164. DECLARE_ALIGNED(16, tran_low_t, output[kMaxBlockSize]);
  165. memset(input, 1, sizeof(input));
  166. memset(output, 0, sizeof(output));
  167. vpx_usec_timer timer;
  168. vpx_usec_timer_start(&timer);
  169. for (int i = 0; i < times; ++i) {
  170. h_func_(input, bwh_, output);
  171. }
  172. vpx_usec_timer_mark(&timer);
  173. const int elapsed_time = static_cast<int>(vpx_usec_timer_elapsed(&timer));
  174. printf("Hadamard%dx%d[%12d runs]: %d us\n", bwh_, bwh_, times,
  175. elapsed_time);
  176. }
  177. protected:
  178. int bwh_;
  179. int block_size_;
  180. HadamardFunc h_func_;
  181. ACMRandom rnd_;
  182. };
  183. class HadamardLowbdTest : public HadamardTestBase {
  184. protected:
  185. virtual int16_t Rand() { return rnd_.Rand9Signed(); }
  186. };
  187. TEST_P(HadamardLowbdTest, CompareReferenceRandom) { CompareReferenceRandom(); }
  188. TEST_P(HadamardLowbdTest, VaryStride) { VaryStride(); }
  189. TEST_P(HadamardLowbdTest, DISABLED_Speed) {
  190. SpeedTest(10);
  191. SpeedTest(10000);
  192. SpeedTest(10000000);
  193. }
  194. INSTANTIATE_TEST_CASE_P(
  195. C, HadamardLowbdTest,
  196. ::testing::Values(HadamardFuncWithSize(&vpx_hadamard_8x8_c, 8),
  197. HadamardFuncWithSize(&vpx_hadamard_16x16_c, 16),
  198. HadamardFuncWithSize(&vpx_hadamard_32x32_c, 32)));
  199. #if HAVE_SSE2
  200. INSTANTIATE_TEST_CASE_P(
  201. SSE2, HadamardLowbdTest,
  202. ::testing::Values(HadamardFuncWithSize(&vpx_hadamard_8x8_sse2, 8),
  203. HadamardFuncWithSize(&vpx_hadamard_16x16_sse2, 16),
  204. HadamardFuncWithSize(&vpx_hadamard_32x32_sse2, 32)));
  205. #endif // HAVE_SSE2
  206. #if HAVE_AVX2
  207. INSTANTIATE_TEST_CASE_P(
  208. AVX2, HadamardLowbdTest,
  209. ::testing::Values(HadamardFuncWithSize(&vpx_hadamard_16x16_avx2, 16),
  210. HadamardFuncWithSize(&vpx_hadamard_32x32_avx2, 32)));
  211. #endif // HAVE_AVX2
  212. #if HAVE_SSSE3 && ARCH_X86_64
  213. INSTANTIATE_TEST_CASE_P(
  214. SSSE3, HadamardLowbdTest,
  215. ::testing::Values(HadamardFuncWithSize(&vpx_hadamard_8x8_ssse3, 8)));
  216. #endif // HAVE_SSSE3 && ARCH_X86_64
  217. #if HAVE_NEON
  218. INSTANTIATE_TEST_CASE_P(
  219. NEON, HadamardLowbdTest,
  220. ::testing::Values(HadamardFuncWithSize(&vpx_hadamard_8x8_neon, 8),
  221. HadamardFuncWithSize(&vpx_hadamard_16x16_neon, 16)));
  222. #endif // HAVE_NEON
  223. // TODO(jingning): Remove highbitdepth flag when the SIMD functions are
  224. // in place and turn on the unit test.
  225. #if !CONFIG_VP9_HIGHBITDEPTH
  226. #if HAVE_MSA
  227. INSTANTIATE_TEST_CASE_P(
  228. MSA, HadamardLowbdTest,
  229. ::testing::Values(HadamardFuncWithSize(&vpx_hadamard_8x8_msa, 8),
  230. HadamardFuncWithSize(&vpx_hadamard_16x16_msa, 16)));
  231. #endif // HAVE_MSA
  232. #endif // !CONFIG_VP9_HIGHBITDEPTH
  233. #if HAVE_VSX
  234. INSTANTIATE_TEST_CASE_P(
  235. VSX, HadamardLowbdTest,
  236. ::testing::Values(HadamardFuncWithSize(&vpx_hadamard_8x8_vsx, 8),
  237. HadamardFuncWithSize(&vpx_hadamard_16x16_vsx, 16)));
  238. #endif // HAVE_VSX
  239. #if CONFIG_VP9_HIGHBITDEPTH
  240. class HadamardHighbdTest : public HadamardTestBase {
  241. protected:
  242. virtual int16_t Rand() { return rnd_.Rand13Signed(); }
  243. };
  244. TEST_P(HadamardHighbdTest, CompareReferenceRandom) { CompareReferenceRandom(); }
  245. TEST_P(HadamardHighbdTest, VaryStride) { VaryStride(); }
  246. TEST_P(HadamardHighbdTest, DISABLED_Speed) {
  247. SpeedTest(10);
  248. SpeedTest(10000);
  249. SpeedTest(10000000);
  250. }
  251. INSTANTIATE_TEST_CASE_P(
  252. C, HadamardHighbdTest,
  253. ::testing::Values(HadamardFuncWithSize(&vpx_highbd_hadamard_8x8_c, 8),
  254. HadamardFuncWithSize(&vpx_highbd_hadamard_16x16_c, 16),
  255. HadamardFuncWithSize(&vpx_highbd_hadamard_32x32_c, 32)));
  256. #if HAVE_AVX2
  257. INSTANTIATE_TEST_CASE_P(
  258. AVX2, HadamardHighbdTest,
  259. ::testing::Values(HadamardFuncWithSize(&vpx_highbd_hadamard_8x8_avx2, 8),
  260. HadamardFuncWithSize(&vpx_highbd_hadamard_16x16_avx2, 16),
  261. HadamardFuncWithSize(&vpx_highbd_hadamard_32x32_avx2,
  262. 32)));
  263. #endif // HAVE_AVX2
  264. #endif // CONFIG_VP9_HIGHBITDEPTH
  265. } // namespace