vp8_quantize_mmi.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262
  1. /*
  2. * Copyright (c) 2017 The WebM project authors. All Rights Reserved.
  3. *
  4. * Use of this source code is governed by a BSD-style license
  5. * that can be found in the LICENSE file in the root of the source
  6. * tree. An additional intellectual property rights grant can be found
  7. * in the file PATENTS. All contributing project authors may
  8. * be found in the AUTHORS file in the root of the source tree.
  9. */
  10. #include "vpx_mem/vpx_mem.h"
  11. #include "vpx_ports/asmdefs_mmi.h"
  12. #include "vp8/encoder/onyx_int.h"
  13. #include "vp8/encoder/quantize.h"
  14. #include "vp8/common/quant_common.h"
  15. #define REGULAR_SELECT_EOB(i, rc) \
  16. z = coeff_ptr[rc]; \
  17. sz = (z >> 31); \
  18. x = (z ^ sz) - sz; \
  19. zbin = zbin_ptr[rc] + *(zbin_boost_ptr++) + zbin_oq_value; \
  20. if (x >= zbin) { \
  21. x += round_ptr[rc]; \
  22. y = ((((x * quant_ptr[rc]) >> 16) + x) * quant_shift_ptr[rc]) >> 16; \
  23. if (y) { \
  24. x = (y ^ sz) - sz; \
  25. qcoeff_ptr[rc] = x; \
  26. dqcoeff_ptr[rc] = x * dequant_ptr[rc]; \
  27. eob = i; \
  28. zbin_boost_ptr = b->zrun_zbin_boost; \
  29. } \
  30. }
  31. void vp8_fast_quantize_b_mmi(BLOCK *b, BLOCKD *d) {
  32. const int16_t *coeff_ptr = b->coeff;
  33. const int16_t *round_ptr = b->round;
  34. const int16_t *quant_ptr = b->quant_fast;
  35. int16_t *qcoeff_ptr = d->qcoeff;
  36. int16_t *dqcoeff_ptr = d->dqcoeff;
  37. const int16_t *dequant_ptr = d->dequant;
  38. const int16_t *inv_zig_zag = vp8_default_inv_zig_zag;
  39. double ftmp[13];
  40. uint64_t tmp[1];
  41. DECLARE_ALIGNED(8, const uint64_t, ones) = { 0xffffffffffffffffULL };
  42. int eob = 0;
  43. __asm__ volatile(
  44. // loop 0 ~ 7
  45. "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
  46. "gsldlc1 %[ftmp1], 0x07(%[coeff_ptr]) \n\t"
  47. "gsldrc1 %[ftmp1], 0x00(%[coeff_ptr]) \n\t"
  48. "li %[tmp0], 0x0f \n\t"
  49. "mtc1 %[tmp0], %[ftmp9] \n\t"
  50. "gsldlc1 %[ftmp2], 0x0f(%[coeff_ptr]) \n\t"
  51. "gsldrc1 %[ftmp2], 0x08(%[coeff_ptr]) \n\t"
  52. "psrah %[ftmp3], %[ftmp1], %[ftmp9] \n\t"
  53. "xor %[ftmp1], %[ftmp3], %[ftmp1] \n\t"
  54. "psubh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
  55. "psrah %[ftmp4], %[ftmp2], %[ftmp9] \n\t"
  56. "xor %[ftmp2], %[ftmp4], %[ftmp2] \n\t"
  57. "psubh %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
  58. "gsldlc1 %[ftmp5], 0x07(%[round_ptr]) \n\t"
  59. "gsldrc1 %[ftmp5], 0x00(%[round_ptr]) \n\t"
  60. "gsldlc1 %[ftmp6], 0x0f(%[round_ptr]) \n\t"
  61. "gsldrc1 %[ftmp6], 0x08(%[round_ptr]) \n\t"
  62. "paddh %[ftmp5], %[ftmp5], %[ftmp1] \n\t"
  63. "paddh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
  64. "gsldlc1 %[ftmp7], 0x07(%[quant_ptr]) \n\t"
  65. "gsldrc1 %[ftmp7], 0x00(%[quant_ptr]) \n\t"
  66. "gsldlc1 %[ftmp8], 0x0f(%[quant_ptr]) \n\t"
  67. "gsldrc1 %[ftmp8], 0x08(%[quant_ptr]) \n\t"
  68. "pmulhuh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
  69. "pmulhuh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
  70. "xor %[ftmp7], %[ftmp5], %[ftmp3] \n\t"
  71. "xor %[ftmp8], %[ftmp6], %[ftmp4] \n\t"
  72. "psubh %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
  73. "psubh %[ftmp8], %[ftmp8], %[ftmp4] \n\t"
  74. "gssdlc1 %[ftmp7], 0x07(%[qcoeff_ptr]) \n\t"
  75. "gssdrc1 %[ftmp7], 0x00(%[qcoeff_ptr]) \n\t"
  76. "gssdlc1 %[ftmp8], 0x0f(%[qcoeff_ptr]) \n\t"
  77. "gssdrc1 %[ftmp8], 0x08(%[qcoeff_ptr]) \n\t"
  78. "gsldlc1 %[ftmp1], 0x07(%[inv_zig_zag]) \n\t"
  79. "gsldrc1 %[ftmp1], 0x00(%[inv_zig_zag]) \n\t"
  80. "gsldlc1 %[ftmp2], 0x0f(%[inv_zig_zag]) \n\t"
  81. "gsldrc1 %[ftmp2], 0x08(%[inv_zig_zag]) \n\t"
  82. "pcmpeqh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
  83. "pcmpeqh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
  84. "xor %[ftmp5], %[ftmp5], %[ones] \n\t"
  85. "xor %[ftmp6], %[ftmp6], %[ones] \n\t"
  86. "and %[ftmp5], %[ftmp5], %[ftmp1] \n\t"
  87. "and %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
  88. "pmaxsh %[ftmp10], %[ftmp5], %[ftmp6] \n\t"
  89. "gsldlc1 %[ftmp5], 0x07(%[dequant_ptr]) \n\t"
  90. "gsldrc1 %[ftmp5], 0x00(%[dequant_ptr]) \n\t"
  91. "gsldlc1 %[ftmp6], 0x0f(%[dequant_ptr]) \n\t"
  92. "gsldrc1 %[ftmp6], 0x08(%[dequant_ptr]) \n\t"
  93. "pmullh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
  94. "pmullh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
  95. "gssdlc1 %[ftmp5], 0x07(%[dqcoeff_ptr]) \n\t"
  96. "gssdrc1 %[ftmp5], 0x00(%[dqcoeff_ptr]) \n\t"
  97. "gssdlc1 %[ftmp6], 0x0f(%[dqcoeff_ptr]) \n\t"
  98. "gssdrc1 %[ftmp6], 0x08(%[dqcoeff_ptr]) \n\t"
  99. // loop 8 ~ 15
  100. "gsldlc1 %[ftmp1], 0x17(%[coeff_ptr]) \n\t"
  101. "gsldrc1 %[ftmp1], 0x10(%[coeff_ptr]) \n\t"
  102. "gsldlc1 %[ftmp2], 0x1f(%[coeff_ptr]) \n\t"
  103. "gsldrc1 %[ftmp2], 0x18(%[coeff_ptr]) \n\t"
  104. "psrah %[ftmp3], %[ftmp1], %[ftmp9] \n\t"
  105. "xor %[ftmp1], %[ftmp3], %[ftmp1] \n\t"
  106. "psubh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
  107. "psrah %[ftmp4], %[ftmp2], %[ftmp9] \n\t"
  108. "xor %[ftmp2], %[ftmp4], %[ftmp2] \n\t"
  109. "psubh %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
  110. "gsldlc1 %[ftmp5], 0x17(%[round_ptr]) \n\t"
  111. "gsldrc1 %[ftmp5], 0x10(%[round_ptr]) \n\t"
  112. "gsldlc1 %[ftmp6], 0x1f(%[round_ptr]) \n\t"
  113. "gsldrc1 %[ftmp6], 0x18(%[round_ptr]) \n\t"
  114. "paddh %[ftmp5], %[ftmp5], %[ftmp1] \n\t"
  115. "paddh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
  116. "gsldlc1 %[ftmp7], 0x17(%[quant_ptr]) \n\t"
  117. "gsldrc1 %[ftmp7], 0x10(%[quant_ptr]) \n\t"
  118. "gsldlc1 %[ftmp8], 0x1f(%[quant_ptr]) \n\t"
  119. "gsldrc1 %[ftmp8], 0x18(%[quant_ptr]) \n\t"
  120. "pmulhuh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
  121. "pmulhuh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
  122. "xor %[ftmp7], %[ftmp5], %[ftmp3] \n\t"
  123. "xor %[ftmp8], %[ftmp6], %[ftmp4] \n\t"
  124. "psubh %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
  125. "psubh %[ftmp8], %[ftmp8], %[ftmp4] \n\t"
  126. "gssdlc1 %[ftmp7], 0x17(%[qcoeff_ptr]) \n\t"
  127. "gssdrc1 %[ftmp7], 0x10(%[qcoeff_ptr]) \n\t"
  128. "gssdlc1 %[ftmp8], 0x1f(%[qcoeff_ptr]) \n\t"
  129. "gssdrc1 %[ftmp8], 0x18(%[qcoeff_ptr]) \n\t"
  130. "gsldlc1 %[ftmp1], 0x17(%[inv_zig_zag]) \n\t"
  131. "gsldrc1 %[ftmp1], 0x10(%[inv_zig_zag]) \n\t"
  132. "gsldlc1 %[ftmp2], 0x1f(%[inv_zig_zag]) \n\t"
  133. "gsldrc1 %[ftmp2], 0x18(%[inv_zig_zag]) \n\t"
  134. "pcmpeqh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
  135. "pcmpeqh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
  136. "xor %[ftmp5], %[ftmp5], %[ones] \n\t"
  137. "xor %[ftmp6], %[ftmp6], %[ones] \n\t"
  138. "and %[ftmp5], %[ftmp5], %[ftmp1] \n\t"
  139. "and %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
  140. "pmaxsh %[ftmp11], %[ftmp5], %[ftmp6] \n\t"
  141. "gsldlc1 %[ftmp5], 0x17(%[dequant_ptr]) \n\t"
  142. "gsldrc1 %[ftmp5], 0x10(%[dequant_ptr]) \n\t"
  143. "gsldlc1 %[ftmp6], 0x1f(%[dequant_ptr]) \n\t"
  144. "gsldrc1 %[ftmp6], 0x18(%[dequant_ptr]) \n\t"
  145. "pmullh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
  146. "pmullh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
  147. "gssdlc1 %[ftmp5], 0x17(%[dqcoeff_ptr]) \n\t"
  148. "gssdrc1 %[ftmp5], 0x10(%[dqcoeff_ptr]) \n\t"
  149. "gssdlc1 %[ftmp6], 0x1f(%[dqcoeff_ptr]) \n\t"
  150. "gssdrc1 %[ftmp6], 0x18(%[dqcoeff_ptr]) \n\t"
  151. "li %[tmp0], 0x10 \n\t"
  152. "mtc1 %[tmp0], %[ftmp9] \n\t"
  153. "pmaxsh %[ftmp10], %[ftmp10], %[ftmp11] \n\t"
  154. "psrlw %[ftmp11], %[ftmp10], %[ftmp9] \n\t"
  155. "pmaxsh %[ftmp10], %[ftmp10], %[ftmp11] \n\t"
  156. "li %[tmp0], 0xaa \n\t"
  157. "mtc1 %[tmp0], %[ftmp9] \n\t"
  158. "pshufh %[ftmp11], %[ftmp10], %[ftmp9] \n\t"
  159. "pmaxsh %[ftmp10], %[ftmp10], %[ftmp11] \n\t"
  160. "li %[tmp0], 0xffff \n\t"
  161. "mtc1 %[tmp0], %[ftmp9] \n\t"
  162. "and %[ftmp10], %[ftmp10], %[ftmp9] \n\t"
  163. "gssdlc1 %[ftmp10], 0x07(%[eob]) \n\t"
  164. "gssdrc1 %[ftmp10], 0x00(%[eob]) \n\t"
  165. : [ftmp0] "=&f"(ftmp[0]), [ftmp1] "=&f"(ftmp[1]), [ftmp2] "=&f"(ftmp[2]),
  166. [ftmp3] "=&f"(ftmp[3]), [ftmp4] "=&f"(ftmp[4]), [ftmp5] "=&f"(ftmp[5]),
  167. [ftmp6] "=&f"(ftmp[6]), [ftmp7] "=&f"(ftmp[7]), [ftmp8] "=&f"(ftmp[8]),
  168. [ftmp9] "=&f"(ftmp[9]), [ftmp10] "=&f"(ftmp[10]),
  169. [ftmp11] "=&f"(ftmp[11]), [ftmp12] "=&f"(ftmp[12]), [tmp0] "=&r"(tmp[0])
  170. : [coeff_ptr] "r"((mips_reg)coeff_ptr),
  171. [qcoeff_ptr] "r"((mips_reg)qcoeff_ptr),
  172. [dequant_ptr] "r"((mips_reg)dequant_ptr),
  173. [round_ptr] "r"((mips_reg)round_ptr),
  174. [quant_ptr] "r"((mips_reg)quant_ptr),
  175. [dqcoeff_ptr] "r"((mips_reg)dqcoeff_ptr),
  176. [inv_zig_zag] "r"((mips_reg)inv_zig_zag), [eob] "r"((mips_reg)&eob),
  177. [ones] "f"(ones)
  178. : "memory");
  179. *d->eob = eob;
  180. }
  181. void vp8_regular_quantize_b_mmi(BLOCK *b, BLOCKD *d) {
  182. int eob = 0;
  183. int x, y, z, sz, zbin;
  184. const int16_t *zbin_boost_ptr = b->zrun_zbin_boost;
  185. const int16_t *coeff_ptr = b->coeff;
  186. const int16_t *zbin_ptr = b->zbin;
  187. const int16_t *round_ptr = b->round;
  188. const int16_t *quant_ptr = b->quant;
  189. const int16_t *quant_shift_ptr = b->quant_shift;
  190. int16_t *qcoeff_ptr = d->qcoeff;
  191. int16_t *dqcoeff_ptr = d->dqcoeff;
  192. const int16_t *dequant_ptr = d->dequant;
  193. const int16_t zbin_oq_value = b->zbin_extra;
  194. register double ftmp0 asm("$f0");
  195. // memset(qcoeff_ptr, 0, 32);
  196. // memset(dqcoeff_ptr, 0, 32);
  197. /* clang-format off */
  198. __asm__ volatile (
  199. "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
  200. "gssdlc1 %[ftmp0], 0x07(%[qcoeff_ptr]) \n\t"
  201. "gssdrc1 %[ftmp0], 0x00(%[qcoeff_ptr]) \n\t"
  202. "gssdlc1 %[ftmp0], 0x0f(%[qcoeff_ptr]) \n\t"
  203. "gssdrc1 %[ftmp0], 0x08(%[qcoeff_ptr]) \n\t"
  204. "gssdlc1 %[ftmp0], 0x17(%[qcoeff_ptr]) \n\t"
  205. "gssdrc1 %[ftmp0], 0x10(%[qcoeff_ptr]) \n\t"
  206. "gssdlc1 %[ftmp0], 0x1f(%[qcoeff_ptr]) \n\t"
  207. "gssdrc1 %[ftmp0], 0x18(%[qcoeff_ptr]) \n\t"
  208. "gssdlc1 %[ftmp0], 0x07(%[dqcoeff_ptr]) \n\t"
  209. "gssdrc1 %[ftmp0], 0x00(%[dqcoeff_ptr]) \n\t"
  210. "gssdlc1 %[ftmp0], 0x0f(%[dqcoeff_ptr]) \n\t"
  211. "gssdrc1 %[ftmp0], 0x08(%[dqcoeff_ptr]) \n\t"
  212. "gssdlc1 %[ftmp0], 0x17(%[dqcoeff_ptr]) \n\t"
  213. "gssdrc1 %[ftmp0], 0x10(%[dqcoeff_ptr]) \n\t"
  214. "gssdlc1 %[ftmp0], 0x1f(%[dqcoeff_ptr]) \n\t"
  215. "gssdrc1 %[ftmp0], 0x18(%[dqcoeff_ptr]) \n\t"
  216. : [ftmp0]"=&f"(ftmp0)
  217. : [qcoeff_ptr]"r"(qcoeff_ptr), [dqcoeff_ptr]"r"(dqcoeff_ptr)
  218. : "memory"
  219. );
  220. /* clang-format on */
  221. REGULAR_SELECT_EOB(1, 0);
  222. REGULAR_SELECT_EOB(2, 1);
  223. REGULAR_SELECT_EOB(3, 4);
  224. REGULAR_SELECT_EOB(4, 8);
  225. REGULAR_SELECT_EOB(5, 5);
  226. REGULAR_SELECT_EOB(6, 2);
  227. REGULAR_SELECT_EOB(7, 3);
  228. REGULAR_SELECT_EOB(8, 6);
  229. REGULAR_SELECT_EOB(9, 9);
  230. REGULAR_SELECT_EOB(10, 12);
  231. REGULAR_SELECT_EOB(11, 13);
  232. REGULAR_SELECT_EOB(12, 10);
  233. REGULAR_SELECT_EOB(13, 7);
  234. REGULAR_SELECT_EOB(14, 11);
  235. REGULAR_SELECT_EOB(15, 14);
  236. REGULAR_SELECT_EOB(16, 15);
  237. *d->eob = (char)eob;
  238. }