vp9_error_msa.c 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105
  1. /*
  2. * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
  3. *
  4. * Use of this source code is governed by a BSD-style license
  5. * that can be found in the LICENSE file in the root of the source
  6. * tree. An additional intellectual property rights grant can be found
  7. * in the file PATENTS. All contributing project authors may
  8. * be found in the AUTHORS file in the root of the source tree.
  9. */
  10. #include "./vp9_rtcd.h"
  11. #include "vpx_dsp/mips/macros_msa.h"
  12. #define BLOCK_ERROR_BLOCKSIZE_MSA(BSize) \
  13. static int64_t block_error_##BSize##size_msa( \
  14. const int16_t *coeff_ptr, const int16_t *dq_coeff_ptr, int64_t *ssz) { \
  15. int64_t err = 0; \
  16. uint32_t loop_cnt; \
  17. v8i16 coeff, dq_coeff, coeff_r_h, coeff_l_h; \
  18. v4i32 diff_r, diff_l, coeff_r_w, coeff_l_w; \
  19. v2i64 sq_coeff_r, sq_coeff_l; \
  20. v2i64 err0, err_dup0, err1, err_dup1; \
  21. \
  22. coeff = LD_SH(coeff_ptr); \
  23. dq_coeff = LD_SH(dq_coeff_ptr); \
  24. UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w); \
  25. ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h); \
  26. HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l); \
  27. DOTP_SW2_SD(coeff_r_w, coeff_l_w, coeff_r_w, coeff_l_w, sq_coeff_r, \
  28. sq_coeff_l); \
  29. DOTP_SW2_SD(diff_r, diff_l, diff_r, diff_l, err0, err1); \
  30. \
  31. coeff = LD_SH(coeff_ptr + 8); \
  32. dq_coeff = LD_SH(dq_coeff_ptr + 8); \
  33. UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w); \
  34. ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h); \
  35. HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l); \
  36. DPADD_SD2_SD(coeff_r_w, coeff_l_w, sq_coeff_r, sq_coeff_l); \
  37. DPADD_SD2_SD(diff_r, diff_l, err0, err1); \
  38. \
  39. coeff_ptr += 16; \
  40. dq_coeff_ptr += 16; \
  41. \
  42. for (loop_cnt = ((BSize >> 4) - 1); loop_cnt--;) { \
  43. coeff = LD_SH(coeff_ptr); \
  44. dq_coeff = LD_SH(dq_coeff_ptr); \
  45. UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w); \
  46. ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h); \
  47. HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l); \
  48. DPADD_SD2_SD(coeff_r_w, coeff_l_w, sq_coeff_r, sq_coeff_l); \
  49. DPADD_SD2_SD(diff_r, diff_l, err0, err1); \
  50. \
  51. coeff = LD_SH(coeff_ptr + 8); \
  52. dq_coeff = LD_SH(dq_coeff_ptr + 8); \
  53. UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w); \
  54. ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h); \
  55. HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l); \
  56. DPADD_SD2_SD(coeff_r_w, coeff_l_w, sq_coeff_r, sq_coeff_l); \
  57. DPADD_SD2_SD(diff_r, diff_l, err0, err1); \
  58. \
  59. coeff_ptr += 16; \
  60. dq_coeff_ptr += 16; \
  61. } \
  62. \
  63. err_dup0 = __msa_splati_d(sq_coeff_r, 1); \
  64. err_dup1 = __msa_splati_d(sq_coeff_l, 1); \
  65. sq_coeff_r += err_dup0; \
  66. sq_coeff_l += err_dup1; \
  67. *ssz = __msa_copy_s_d(sq_coeff_r, 0); \
  68. *ssz += __msa_copy_s_d(sq_coeff_l, 0); \
  69. \
  70. err_dup0 = __msa_splati_d(err0, 1); \
  71. err_dup1 = __msa_splati_d(err1, 1); \
  72. err0 += err_dup0; \
  73. err1 += err_dup1; \
  74. err = __msa_copy_s_d(err0, 0); \
  75. err += __msa_copy_s_d(err1, 0); \
  76. \
  77. return err; \
  78. }
  79. BLOCK_ERROR_BLOCKSIZE_MSA(16);
  80. BLOCK_ERROR_BLOCKSIZE_MSA(64);
  81. BLOCK_ERROR_BLOCKSIZE_MSA(256);
  82. BLOCK_ERROR_BLOCKSIZE_MSA(1024);
  83. int64_t vp9_block_error_msa(const tran_low_t *coeff_ptr,
  84. const tran_low_t *dq_coeff_ptr, intptr_t blk_size,
  85. int64_t *ssz) {
  86. int64_t err;
  87. const int16_t *coeff = (const int16_t *)coeff_ptr;
  88. const int16_t *dq_coeff = (const int16_t *)dq_coeff_ptr;
  89. switch (blk_size) {
  90. case 16: err = block_error_16size_msa(coeff, dq_coeff, ssz); break;
  91. case 64: err = block_error_64size_msa(coeff, dq_coeff, ssz); break;
  92. case 256: err = block_error_256size_msa(coeff, dq_coeff, ssz); break;
  93. case 1024: err = block_error_1024size_msa(coeff, dq_coeff, ssz); break;
  94. default:
  95. err = vp9_block_error_c(coeff_ptr, dq_coeff_ptr, blk_size, ssz);
  96. break;
  97. }
  98. return err;
  99. }