vp9_fdct_msa.h 8.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116
  1. /*
  2. * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
  3. *
  4. * Use of this source code is governed by a BSD-style license
  5. * that can be found in the LICENSE file in the root of the source
  6. * tree. An additional intellectual property rights grant can be found
  7. * in the file PATENTS. All contributing project authors may
  8. * be found in the AUTHORS file in the root of the source tree.
  9. */
  10. #ifndef VP9_ENCODER_MIPS_MSA_VP9_FDCT_MSA_H_
  11. #define VP9_ENCODER_MIPS_MSA_VP9_FDCT_MSA_H_
  12. #include "vpx_dsp/mips/fwd_txfm_msa.h"
  13. #include "vpx_dsp/mips/txfm_macros_msa.h"
  14. #include "vpx_ports/mem.h"
  15. #define VP9_ADST8(in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, out2, \
  16. out3, out4, out5, out6, out7) \
  17. { \
  18. v8i16 cnst0_m, cnst1_m, cnst2_m, cnst3_m, cnst4_m; \
  19. v8i16 vec0_m, vec1_m, vec2_m, vec3_m, s0_m, s1_m; \
  20. v8i16 coeff0_m = { cospi_2_64, cospi_6_64, cospi_10_64, cospi_14_64, \
  21. cospi_18_64, cospi_22_64, cospi_26_64, cospi_30_64 }; \
  22. v8i16 coeff1_m = { cospi_8_64, -cospi_8_64, cospi_16_64, -cospi_16_64, \
  23. cospi_24_64, -cospi_24_64, 0, 0 }; \
  24. \
  25. SPLATI_H2_SH(coeff0_m, 0, 7, cnst0_m, cnst1_m); \
  26. cnst2_m = -cnst0_m; \
  27. ILVEV_H2_SH(cnst0_m, cnst1_m, cnst1_m, cnst2_m, cnst0_m, cnst1_m); \
  28. SPLATI_H2_SH(coeff0_m, 4, 3, cnst2_m, cnst3_m); \
  29. cnst4_m = -cnst2_m; \
  30. ILVEV_H2_SH(cnst2_m, cnst3_m, cnst3_m, cnst4_m, cnst2_m, cnst3_m); \
  31. \
  32. ILVRL_H2_SH(in0, in7, vec1_m, vec0_m); \
  33. ILVRL_H2_SH(in4, in3, vec3_m, vec2_m); \
  34. DOT_ADD_SUB_SRARI_PCK(vec0_m, vec1_m, vec2_m, vec3_m, cnst0_m, cnst1_m, \
  35. cnst2_m, cnst3_m, in7, in0, in4, in3); \
  36. \
  37. SPLATI_H2_SH(coeff0_m, 2, 5, cnst0_m, cnst1_m); \
  38. cnst2_m = -cnst0_m; \
  39. ILVEV_H2_SH(cnst0_m, cnst1_m, cnst1_m, cnst2_m, cnst0_m, cnst1_m); \
  40. SPLATI_H2_SH(coeff0_m, 6, 1, cnst2_m, cnst3_m); \
  41. cnst4_m = -cnst2_m; \
  42. ILVEV_H2_SH(cnst2_m, cnst3_m, cnst3_m, cnst4_m, cnst2_m, cnst3_m); \
  43. \
  44. ILVRL_H2_SH(in2, in5, vec1_m, vec0_m); \
  45. ILVRL_H2_SH(in6, in1, vec3_m, vec2_m); \
  46. \
  47. DOT_ADD_SUB_SRARI_PCK(vec0_m, vec1_m, vec2_m, vec3_m, cnst0_m, cnst1_m, \
  48. cnst2_m, cnst3_m, in5, in2, in6, in1); \
  49. BUTTERFLY_4(in7, in0, in2, in5, s1_m, s0_m, in2, in5); \
  50. out7 = -s0_m; \
  51. out0 = s1_m; \
  52. \
  53. SPLATI_H4_SH(coeff1_m, 0, 4, 1, 5, cnst0_m, cnst1_m, cnst2_m, cnst3_m); \
  54. \
  55. ILVEV_H2_SH(cnst3_m, cnst0_m, cnst1_m, cnst2_m, cnst3_m, cnst2_m); \
  56. cnst0_m = __msa_ilvev_h(cnst1_m, cnst0_m); \
  57. cnst1_m = cnst0_m; \
  58. \
  59. ILVRL_H2_SH(in4, in3, vec1_m, vec0_m); \
  60. ILVRL_H2_SH(in6, in1, vec3_m, vec2_m); \
  61. DOT_ADD_SUB_SRARI_PCK(vec0_m, vec1_m, vec2_m, vec3_m, cnst0_m, cnst2_m, \
  62. cnst3_m, cnst1_m, out1, out6, s0_m, s1_m); \
  63. \
  64. SPLATI_H2_SH(coeff1_m, 2, 3, cnst0_m, cnst1_m); \
  65. cnst1_m = __msa_ilvev_h(cnst1_m, cnst0_m); \
  66. \
  67. ILVRL_H2_SH(in2, in5, vec1_m, vec0_m); \
  68. ILVRL_H2_SH(s0_m, s1_m, vec3_m, vec2_m); \
  69. out3 = DOT_SHIFT_RIGHT_PCK_H(vec0_m, vec1_m, cnst0_m); \
  70. out4 = DOT_SHIFT_RIGHT_PCK_H(vec0_m, vec1_m, cnst1_m); \
  71. out2 = DOT_SHIFT_RIGHT_PCK_H(vec2_m, vec3_m, cnst0_m); \
  72. out5 = DOT_SHIFT_RIGHT_PCK_H(vec2_m, vec3_m, cnst1_m); \
  73. \
  74. out1 = -out1; \
  75. out3 = -out3; \
  76. out5 = -out5; \
  77. }
  78. #define VP9_FADST4(in0, in1, in2, in3, out0, out1, out2, out3) \
  79. { \
  80. v4i32 s0_m, s1_m, s2_m, s3_m, constant_m; \
  81. v4i32 in0_r_m, in1_r_m, in2_r_m, in3_r_m; \
  82. \
  83. UNPCK_R_SH_SW(in0, in0_r_m); \
  84. UNPCK_R_SH_SW(in1, in1_r_m); \
  85. UNPCK_R_SH_SW(in2, in2_r_m); \
  86. UNPCK_R_SH_SW(in3, in3_r_m); \
  87. \
  88. constant_m = __msa_fill_w(sinpi_4_9); \
  89. MUL2(in0_r_m, constant_m, in3_r_m, constant_m, s1_m, s0_m); \
  90. \
  91. constant_m = __msa_fill_w(sinpi_1_9); \
  92. s0_m += in0_r_m * constant_m; \
  93. s1_m -= in1_r_m * constant_m; \
  94. \
  95. constant_m = __msa_fill_w(sinpi_2_9); \
  96. s0_m += in1_r_m * constant_m; \
  97. s1_m += in3_r_m * constant_m; \
  98. \
  99. s2_m = in0_r_m + in1_r_m - in3_r_m; \
  100. \
  101. constant_m = __msa_fill_w(sinpi_3_9); \
  102. MUL2(in2_r_m, constant_m, s2_m, constant_m, s3_m, in1_r_m); \
  103. \
  104. in0_r_m = s0_m + s3_m; \
  105. s2_m = s1_m - s3_m; \
  106. s3_m = s1_m - s0_m + s3_m; \
  107. \
  108. SRARI_W4_SW(in0_r_m, in1_r_m, s2_m, s3_m, DCT_CONST_BITS); \
  109. PCKEV_H4_SH(in0_r_m, in0_r_m, in1_r_m, in1_r_m, s2_m, s2_m, s3_m, s3_m, \
  110. out0, out1, out2, out3); \
  111. }
  112. #endif /* VP9_ENCODER_MIPS_MSA_VP9_FDCT_MSA_H_ */