123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320 |
- /*
- * Copyright (c) 2016 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
- #include <algorithm>
- #include "third_party/googletest/src/include/gtest/gtest.h"
- #include "./vpx_dsp_rtcd.h"
- #include "vpx_ports/vpx_timer.h"
- #include "test/acm_random.h"
- #include "test/register_state_check.h"
- namespace {
- using ::libvpx_test::ACMRandom;
- typedef void (*HadamardFunc)(const int16_t *a, ptrdiff_t a_stride,
- tran_low_t *b);
- void hadamard_loop(const tran_low_t *a, tran_low_t *out) {
- tran_low_t b[8];
- for (int i = 0; i < 8; i += 2) {
- b[i + 0] = a[i * 8] + a[(i + 1) * 8];
- b[i + 1] = a[i * 8] - a[(i + 1) * 8];
- }
- tran_low_t c[8];
- for (int i = 0; i < 8; i += 4) {
- c[i + 0] = b[i + 0] + b[i + 2];
- c[i + 1] = b[i + 1] + b[i + 3];
- c[i + 2] = b[i + 0] - b[i + 2];
- c[i + 3] = b[i + 1] - b[i + 3];
- }
- out[0] = c[0] + c[4];
- out[7] = c[1] + c[5];
- out[3] = c[2] + c[6];
- out[4] = c[3] + c[7];
- out[2] = c[0] - c[4];
- out[6] = c[1] - c[5];
- out[1] = c[2] - c[6];
- out[5] = c[3] - c[7];
- }
- void reference_hadamard8x8(const int16_t *a, int a_stride, tran_low_t *b) {
- tran_low_t input[64];
- tran_low_t buf[64];
- for (int i = 0; i < 8; ++i) {
- for (int j = 0; j < 8; ++j) {
- input[i * 8 + j] = static_cast<tran_low_t>(a[i * a_stride + j]);
- }
- }
- for (int i = 0; i < 8; ++i) hadamard_loop(input + i, buf + i * 8);
- for (int i = 0; i < 8; ++i) hadamard_loop(buf + i, b + i * 8);
- }
- void reference_hadamard16x16(const int16_t *a, int a_stride, tran_low_t *b) {
- /* The source is a 16x16 block. The destination is rearranged to 8x32.
- * Input is 9 bit. */
- reference_hadamard8x8(a + 0 + 0 * a_stride, a_stride, b + 0);
- reference_hadamard8x8(a + 8 + 0 * a_stride, a_stride, b + 64);
- reference_hadamard8x8(a + 0 + 8 * a_stride, a_stride, b + 128);
- reference_hadamard8x8(a + 8 + 8 * a_stride, a_stride, b + 192);
- /* Overlay the 8x8 blocks and combine. */
- for (int i = 0; i < 64; ++i) {
- /* 8x8 steps the range up to 15 bits. */
- const tran_low_t a0 = b[0];
- const tran_low_t a1 = b[64];
- const tran_low_t a2 = b[128];
- const tran_low_t a3 = b[192];
- /* Prevent the result from escaping int16_t. */
- const tran_low_t b0 = (a0 + a1) >> 1;
- const tran_low_t b1 = (a0 - a1) >> 1;
- const tran_low_t b2 = (a2 + a3) >> 1;
- const tran_low_t b3 = (a2 - a3) >> 1;
- /* Store a 16 bit value. */
- b[0] = b0 + b2;
- b[64] = b1 + b3;
- b[128] = b0 - b2;
- b[192] = b1 - b3;
- ++b;
- }
- }
- void reference_hadamard32x32(const int16_t *a, int a_stride, tran_low_t *b) {
- reference_hadamard16x16(a + 0 + 0 * a_stride, a_stride, b + 0);
- reference_hadamard16x16(a + 16 + 0 * a_stride, a_stride, b + 256);
- reference_hadamard16x16(a + 0 + 16 * a_stride, a_stride, b + 512);
- reference_hadamard16x16(a + 16 + 16 * a_stride, a_stride, b + 768);
- for (int i = 0; i < 256; ++i) {
- const tran_low_t a0 = b[0];
- const tran_low_t a1 = b[256];
- const tran_low_t a2 = b[512];
- const tran_low_t a3 = b[768];
- const tran_low_t b0 = (a0 + a1) >> 2;
- const tran_low_t b1 = (a0 - a1) >> 2;
- const tran_low_t b2 = (a2 + a3) >> 2;
- const tran_low_t b3 = (a2 - a3) >> 2;
- b[0] = b0 + b2;
- b[256] = b1 + b3;
- b[512] = b0 - b2;
- b[768] = b1 - b3;
- ++b;
- }
- }
- struct HadamardFuncWithSize {
- HadamardFuncWithSize(HadamardFunc f, int s) : func(f), block_size(s) {}
- HadamardFunc func;
- int block_size;
- };
- std::ostream &operator<<(std::ostream &os, const HadamardFuncWithSize &hfs) {
- return os << "block size: " << hfs.block_size;
- }
- class HadamardTestBase : public ::testing::TestWithParam<HadamardFuncWithSize> {
- public:
- virtual void SetUp() {
- h_func_ = GetParam().func;
- bwh_ = GetParam().block_size;
- block_size_ = bwh_ * bwh_;
- rnd_.Reset(ACMRandom::DeterministicSeed());
- }
- virtual int16_t Rand() = 0;
- void ReferenceHadamard(const int16_t *a, int a_stride, tran_low_t *b,
- int bwh) {
- if (bwh == 32)
- reference_hadamard32x32(a, a_stride, b);
- else if (bwh == 16)
- reference_hadamard16x16(a, a_stride, b);
- else
- reference_hadamard8x8(a, a_stride, b);
- }
- void CompareReferenceRandom() {
- const int kMaxBlockSize = 32 * 32;
- DECLARE_ALIGNED(16, int16_t, a[kMaxBlockSize]);
- DECLARE_ALIGNED(16, tran_low_t, b[kMaxBlockSize]);
- memset(a, 0, sizeof(a));
- memset(b, 0, sizeof(b));
- tran_low_t b_ref[kMaxBlockSize];
- memset(b_ref, 0, sizeof(b_ref));
- for (int i = 0; i < block_size_; ++i) a[i] = Rand();
- ReferenceHadamard(a, bwh_, b_ref, bwh_);
- ASM_REGISTER_STATE_CHECK(h_func_(a, bwh_, b));
- // The order of the output is not important. Sort before checking.
- std::sort(b, b + block_size_);
- std::sort(b_ref, b_ref + block_size_);
- EXPECT_EQ(0, memcmp(b, b_ref, sizeof(b)));
- }
- void VaryStride() {
- const int kMaxBlockSize = 32 * 32;
- DECLARE_ALIGNED(16, int16_t, a[kMaxBlockSize * 8]);
- DECLARE_ALIGNED(16, tran_low_t, b[kMaxBlockSize]);
- memset(a, 0, sizeof(a));
- for (int i = 0; i < block_size_ * 8; ++i) a[i] = Rand();
- tran_low_t b_ref[kMaxBlockSize];
- for (int i = 8; i < 64; i += 8) {
- memset(b, 0, sizeof(b));
- memset(b_ref, 0, sizeof(b_ref));
- ReferenceHadamard(a, i, b_ref, bwh_);
- ASM_REGISTER_STATE_CHECK(h_func_(a, i, b));
- // The order of the output is not important. Sort before checking.
- std::sort(b, b + block_size_);
- std::sort(b_ref, b_ref + block_size_);
- EXPECT_EQ(0, memcmp(b, b_ref, sizeof(b)));
- }
- }
- void SpeedTest(int times) {
- const int kMaxBlockSize = 32 * 32;
- DECLARE_ALIGNED(16, int16_t, input[kMaxBlockSize]);
- DECLARE_ALIGNED(16, tran_low_t, output[kMaxBlockSize]);
- memset(input, 1, sizeof(input));
- memset(output, 0, sizeof(output));
- vpx_usec_timer timer;
- vpx_usec_timer_start(&timer);
- for (int i = 0; i < times; ++i) {
- h_func_(input, bwh_, output);
- }
- vpx_usec_timer_mark(&timer);
- const int elapsed_time = static_cast<int>(vpx_usec_timer_elapsed(&timer));
- printf("Hadamard%dx%d[%12d runs]: %d us\n", bwh_, bwh_, times,
- elapsed_time);
- }
- protected:
- int bwh_;
- int block_size_;
- HadamardFunc h_func_;
- ACMRandom rnd_;
- };
- class HadamardLowbdTest : public HadamardTestBase {
- protected:
- virtual int16_t Rand() { return rnd_.Rand9Signed(); }
- };
- TEST_P(HadamardLowbdTest, CompareReferenceRandom) { CompareReferenceRandom(); }
- TEST_P(HadamardLowbdTest, VaryStride) { VaryStride(); }
- TEST_P(HadamardLowbdTest, DISABLED_Speed) {
- SpeedTest(10);
- SpeedTest(10000);
- SpeedTest(10000000);
- }
- INSTANTIATE_TEST_CASE_P(
- C, HadamardLowbdTest,
- ::testing::Values(HadamardFuncWithSize(&vpx_hadamard_8x8_c, 8),
- HadamardFuncWithSize(&vpx_hadamard_16x16_c, 16),
- HadamardFuncWithSize(&vpx_hadamard_32x32_c, 32)));
- #if HAVE_SSE2
- INSTANTIATE_TEST_CASE_P(
- SSE2, HadamardLowbdTest,
- ::testing::Values(HadamardFuncWithSize(&vpx_hadamard_8x8_sse2, 8),
- HadamardFuncWithSize(&vpx_hadamard_16x16_sse2, 16),
- HadamardFuncWithSize(&vpx_hadamard_32x32_sse2, 32)));
- #endif // HAVE_SSE2
- #if HAVE_AVX2
- INSTANTIATE_TEST_CASE_P(
- AVX2, HadamardLowbdTest,
- ::testing::Values(HadamardFuncWithSize(&vpx_hadamard_16x16_avx2, 16),
- HadamardFuncWithSize(&vpx_hadamard_32x32_avx2, 32)));
- #endif // HAVE_AVX2
- #if HAVE_SSSE3 && ARCH_X86_64
- INSTANTIATE_TEST_CASE_P(
- SSSE3, HadamardLowbdTest,
- ::testing::Values(HadamardFuncWithSize(&vpx_hadamard_8x8_ssse3, 8)));
- #endif // HAVE_SSSE3 && ARCH_X86_64
- #if HAVE_NEON
- INSTANTIATE_TEST_CASE_P(
- NEON, HadamardLowbdTest,
- ::testing::Values(HadamardFuncWithSize(&vpx_hadamard_8x8_neon, 8),
- HadamardFuncWithSize(&vpx_hadamard_16x16_neon, 16)));
- #endif // HAVE_NEON
- // TODO(jingning): Remove highbitdepth flag when the SIMD functions are
- // in place and turn on the unit test.
- #if !CONFIG_VP9_HIGHBITDEPTH
- #if HAVE_MSA
- INSTANTIATE_TEST_CASE_P(
- MSA, HadamardLowbdTest,
- ::testing::Values(HadamardFuncWithSize(&vpx_hadamard_8x8_msa, 8),
- HadamardFuncWithSize(&vpx_hadamard_16x16_msa, 16)));
- #endif // HAVE_MSA
- #endif // !CONFIG_VP9_HIGHBITDEPTH
- #if HAVE_VSX
- INSTANTIATE_TEST_CASE_P(
- VSX, HadamardLowbdTest,
- ::testing::Values(HadamardFuncWithSize(&vpx_hadamard_8x8_vsx, 8),
- HadamardFuncWithSize(&vpx_hadamard_16x16_vsx, 16)));
- #endif // HAVE_VSX
- #if CONFIG_VP9_HIGHBITDEPTH
- class HadamardHighbdTest : public HadamardTestBase {
- protected:
- virtual int16_t Rand() { return rnd_.Rand13Signed(); }
- };
- TEST_P(HadamardHighbdTest, CompareReferenceRandom) { CompareReferenceRandom(); }
- TEST_P(HadamardHighbdTest, VaryStride) { VaryStride(); }
- TEST_P(HadamardHighbdTest, DISABLED_Speed) {
- SpeedTest(10);
- SpeedTest(10000);
- SpeedTest(10000000);
- }
- INSTANTIATE_TEST_CASE_P(
- C, HadamardHighbdTest,
- ::testing::Values(HadamardFuncWithSize(&vpx_highbd_hadamard_8x8_c, 8),
- HadamardFuncWithSize(&vpx_highbd_hadamard_16x16_c, 16),
- HadamardFuncWithSize(&vpx_highbd_hadamard_32x32_c, 32)));
- #if HAVE_AVX2
- INSTANTIATE_TEST_CASE_P(
- AVX2, HadamardHighbdTest,
- ::testing::Values(HadamardFuncWithSize(&vpx_highbd_hadamard_8x8_avx2, 8),
- HadamardFuncWithSize(&vpx_highbd_hadamard_16x16_avx2, 16),
- HadamardFuncWithSize(&vpx_highbd_hadamard_32x32_avx2,
- 32)));
- #endif // HAVE_AVX2
- #endif // CONFIG_VP9_HIGHBITDEPTH
- } // namespace
|