encodeopt_msa.c 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167
  1. /*
  2. * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
  3. *
  4. * Use of this source code is governed by a BSD-style license
  5. * that can be found in the LICENSE file in the root of the source
  6. * tree. An additional intellectual property rights grant can be found
  7. * in the file PATENTS. All contributing project authors may
  8. * be found in the AUTHORS file in the root of the source tree.
  9. */
  10. #include "./vp8_rtcd.h"
  11. #include "vp8/common/mips/msa/vp8_macros_msa.h"
  12. #include "vp8/encoder/block.h"
  13. int32_t vp8_block_error_msa(int16_t *coeff_ptr, int16_t *dq_coeff_ptr) {
  14. int32_t err = 0;
  15. uint32_t loop_cnt;
  16. v8i16 coeff, dq_coeff, coeff0, coeff1;
  17. v4i32 diff0, diff1;
  18. v2i64 err0 = { 0 };
  19. v2i64 err1 = { 0 };
  20. for (loop_cnt = 2; loop_cnt--;) {
  21. coeff = LD_SH(coeff_ptr);
  22. dq_coeff = LD_SH(dq_coeff_ptr);
  23. ILVRL_H2_SH(coeff, dq_coeff, coeff0, coeff1);
  24. HSUB_UH2_SW(coeff0, coeff1, diff0, diff1);
  25. DPADD_SD2_SD(diff0, diff1, err0, err1);
  26. coeff_ptr += 8;
  27. dq_coeff_ptr += 8;
  28. }
  29. err0 += __msa_splati_d(err0, 1);
  30. err1 += __msa_splati_d(err1, 1);
  31. err = __msa_copy_s_d(err0, 0);
  32. err += __msa_copy_s_d(err1, 0);
  33. return err;
  34. }
  35. int32_t vp8_mbblock_error_msa(MACROBLOCK *mb, int32_t dc) {
  36. BLOCK *be;
  37. BLOCKD *bd;
  38. int16_t *coeff_ptr, *dq_coeff_ptr;
  39. int32_t err = 0;
  40. uint32_t loop_cnt;
  41. v8i16 coeff, coeff0, coeff1, coeff2, coeff3, coeff4;
  42. v8i16 dq_coeff, dq_coeff2, dq_coeff3, dq_coeff4;
  43. v4i32 diff0, diff1;
  44. v2i64 err0, err1;
  45. v16u8 zero = { 0 };
  46. v16u8 mask0 = (v16u8)__msa_ldi_b(255);
  47. if (1 == dc) {
  48. mask0 = (v16u8)__msa_insve_w((v4i32)mask0, 0, (v4i32)zero);
  49. }
  50. for (loop_cnt = 0; loop_cnt < 8; ++loop_cnt) {
  51. be = &mb->block[2 * loop_cnt];
  52. bd = &mb->e_mbd.block[2 * loop_cnt];
  53. coeff_ptr = be->coeff;
  54. dq_coeff_ptr = bd->dqcoeff;
  55. coeff = LD_SH(coeff_ptr);
  56. dq_coeff = LD_SH(dq_coeff_ptr);
  57. coeff_ptr += 8;
  58. dq_coeff_ptr += 8;
  59. coeff2 = LD_SH(coeff_ptr);
  60. dq_coeff2 = LD_SH(dq_coeff_ptr);
  61. be = &mb->block[2 * loop_cnt + 1];
  62. bd = &mb->e_mbd.block[2 * loop_cnt + 1];
  63. coeff_ptr = be->coeff;
  64. dq_coeff_ptr = bd->dqcoeff;
  65. coeff3 = LD_SH(coeff_ptr);
  66. dq_coeff3 = LD_SH(dq_coeff_ptr);
  67. coeff_ptr += 8;
  68. dq_coeff_ptr += 8;
  69. coeff4 = LD_SH(coeff_ptr);
  70. dq_coeff4 = LD_SH(dq_coeff_ptr);
  71. ILVRL_H2_SH(coeff, dq_coeff, coeff0, coeff1);
  72. HSUB_UH2_SW(coeff0, coeff1, diff0, diff1);
  73. diff0 = (v4i32)__msa_bmnz_v(zero, (v16u8)diff0, mask0);
  74. DOTP_SW2_SD(diff0, diff1, diff0, diff1, err0, err1);
  75. ILVRL_H2_SH(coeff2, dq_coeff2, coeff0, coeff1);
  76. HSUB_UH2_SW(coeff0, coeff1, diff0, diff1);
  77. DPADD_SD2_SD(diff0, diff1, err0, err1);
  78. err0 += __msa_splati_d(err0, 1);
  79. err1 += __msa_splati_d(err1, 1);
  80. err += __msa_copy_s_d(err0, 0);
  81. err += __msa_copy_s_d(err1, 0);
  82. ILVRL_H2_SH(coeff3, dq_coeff3, coeff0, coeff1);
  83. HSUB_UH2_SW(coeff0, coeff1, diff0, diff1);
  84. diff0 = (v4i32)__msa_bmnz_v(zero, (v16u8)diff0, mask0);
  85. DOTP_SW2_SD(diff0, diff1, diff0, diff1, err0, err1);
  86. ILVRL_H2_SH(coeff4, dq_coeff4, coeff0, coeff1);
  87. HSUB_UH2_SW(coeff0, coeff1, diff0, diff1);
  88. DPADD_SD2_SD(diff0, diff1, err0, err1);
  89. err0 += __msa_splati_d(err0, 1);
  90. err1 += __msa_splati_d(err1, 1);
  91. err += __msa_copy_s_d(err0, 0);
  92. err += __msa_copy_s_d(err1, 0);
  93. }
  94. return err;
  95. }
  96. int32_t vp8_mbuverror_msa(MACROBLOCK *mb) {
  97. BLOCK *be;
  98. BLOCKD *bd;
  99. int16_t *coeff_ptr, *dq_coeff_ptr;
  100. int32_t err = 0;
  101. uint32_t loop_cnt;
  102. v8i16 coeff, coeff0, coeff1, coeff2, coeff3, coeff4;
  103. v8i16 dq_coeff, dq_coeff2, dq_coeff3, dq_coeff4;
  104. v4i32 diff0, diff1;
  105. v2i64 err0, err1, err_dup0, err_dup1;
  106. for (loop_cnt = 16; loop_cnt < 24; loop_cnt += 2) {
  107. be = &mb->block[loop_cnt];
  108. bd = &mb->e_mbd.block[loop_cnt];
  109. coeff_ptr = be->coeff;
  110. dq_coeff_ptr = bd->dqcoeff;
  111. coeff = LD_SH(coeff_ptr);
  112. dq_coeff = LD_SH(dq_coeff_ptr);
  113. coeff_ptr += 8;
  114. dq_coeff_ptr += 8;
  115. coeff2 = LD_SH(coeff_ptr);
  116. dq_coeff2 = LD_SH(dq_coeff_ptr);
  117. be = &mb->block[loop_cnt + 1];
  118. bd = &mb->e_mbd.block[loop_cnt + 1];
  119. coeff_ptr = be->coeff;
  120. dq_coeff_ptr = bd->dqcoeff;
  121. coeff3 = LD_SH(coeff_ptr);
  122. dq_coeff3 = LD_SH(dq_coeff_ptr);
  123. coeff_ptr += 8;
  124. dq_coeff_ptr += 8;
  125. coeff4 = LD_SH(coeff_ptr);
  126. dq_coeff4 = LD_SH(dq_coeff_ptr);
  127. ILVRL_H2_SH(coeff, dq_coeff, coeff0, coeff1);
  128. HSUB_UH2_SW(coeff0, coeff1, diff0, diff1);
  129. DOTP_SW2_SD(diff0, diff1, diff0, diff1, err0, err1);
  130. ILVRL_H2_SH(coeff2, dq_coeff2, coeff0, coeff1);
  131. HSUB_UH2_SW(coeff0, coeff1, diff0, diff1);
  132. DPADD_SD2_SD(diff0, diff1, err0, err1);
  133. err_dup0 = __msa_splati_d(err0, 1);
  134. err_dup1 = __msa_splati_d(err1, 1);
  135. ADD2(err0, err_dup0, err1, err_dup1, err0, err1);
  136. err += __msa_copy_s_d(err0, 0);
  137. err += __msa_copy_s_d(err1, 0);
  138. ILVRL_H2_SH(coeff3, dq_coeff3, coeff0, coeff1);
  139. HSUB_UH2_SW(coeff0, coeff1, diff0, diff1);
  140. DOTP_SW2_SD(diff0, diff1, diff0, diff1, err0, err1);
  141. ILVRL_H2_SH(coeff4, dq_coeff4, coeff0, coeff1);
  142. HSUB_UH2_SW(coeff0, coeff1, diff0, diff1);
  143. DPADD_SD2_SD(diff0, diff1, err0, err1);
  144. err_dup0 = __msa_splati_d(err0, 1);
  145. err_dup1 = __msa_splati_d(err1, 1);
  146. ADD2(err0, err_dup0, err1, err_dup1, err0, err1);
  147. err += __msa_copy_s_d(err0, 0);
  148. err += __msa_copy_s_d(err1, 0);
  149. }
  150. return err;
  151. }