subtract_mmi.c 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306
  1. /*
  2. * Copyright (c) 2017 The WebM project authors. All Rights Reserved.
  3. *
  4. * Use of this source code is governed by a BSD-style license
  5. * that can be found in the LICENSE file in the root of the source
  6. * tree. An additional intellectual property rights grant can be found
  7. * in the file PATENTS. All contributing project authors may
  8. * be found in the AUTHORS file in the root of the source tree.
  9. */
  10. #include "./vpx_dsp_rtcd.h"
  11. #include "vpx/vpx_integer.h"
  12. #include "vpx_ports/mem.h"
  13. #include "vpx_ports/asmdefs_mmi.h"
  14. void vpx_subtract_block_mmi(int rows, int cols, int16_t *diff,
  15. ptrdiff_t diff_stride, const uint8_t *src,
  16. ptrdiff_t src_stride, const uint8_t *pred,
  17. ptrdiff_t pred_stride) {
  18. double ftmp[13];
  19. uint32_t tmp[1];
  20. if (rows == cols) {
  21. switch (rows) {
  22. case 4:
  23. __asm__ volatile(
  24. "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
  25. #if _MIPS_SIM == _ABIO32
  26. "ulw %[tmp0], 0x00(%[src]) \n\t"
  27. "mtc1 %[tmp0], %[ftmp1] \n\t"
  28. "ulw %[tmp0], 0x00(%[pred]) \n\t"
  29. "mtc1 %[tmp0], %[ftmp2] \n\t"
  30. #else
  31. "gslwlc1 %[ftmp1], 0x03(%[src]) \n\t"
  32. "gslwrc1 %[ftmp1], 0x00(%[src]) \n\t"
  33. "gslwlc1 %[ftmp2], 0x03(%[pred]) \n\t"
  34. "gslwrc1 %[ftmp2], 0x00(%[pred]) \n\t"
  35. #endif
  36. MMI_ADDU(%[src], %[src], %[src_stride])
  37. MMI_ADDU(%[pred], %[pred], %[pred_stride])
  38. #if _MIPS_SIM == _ABIO32
  39. "ulw %[tmp0], 0x00(%[src]) \n\t"
  40. "mtc1 %[tmp0], %[ftmp3] \n\t"
  41. "ulw %[tmp0], 0x00(%[pred]) \n\t"
  42. "mtc1 %[tmp0], %[ftmp4] \n\t"
  43. #else
  44. "gslwlc1 %[ftmp3], 0x03(%[src]) \n\t"
  45. "gslwrc1 %[ftmp3], 0x00(%[src]) \n\t"
  46. "gslwlc1 %[ftmp4], 0x03(%[pred]) \n\t"
  47. "gslwrc1 %[ftmp4], 0x00(%[pred]) \n\t"
  48. #endif
  49. MMI_ADDU(%[src], %[src], %[src_stride])
  50. MMI_ADDU(%[pred], %[pred], %[pred_stride])
  51. #if _MIPS_SIM == _ABIO32
  52. "ulw %[tmp0], 0x00(%[src]) \n\t"
  53. "mtc1 %[tmp0], %[ftmp5] \n\t"
  54. "ulw %[tmp0], 0x00(%[pred]) \n\t"
  55. "mtc1 %[tmp0], %[ftmp6] \n\t"
  56. #else
  57. "gslwlc1 %[ftmp5], 0x03(%[src]) \n\t"
  58. "gslwrc1 %[ftmp5], 0x00(%[src]) \n\t"
  59. "gslwlc1 %[ftmp6], 0x03(%[pred]) \n\t"
  60. "gslwrc1 %[ftmp6], 0x00(%[pred]) \n\t"
  61. #endif
  62. MMI_ADDU(%[src], %[src], %[src_stride])
  63. MMI_ADDU(%[pred], %[pred], %[pred_stride])
  64. #if _MIPS_SIM == _ABIO32
  65. "ulw %[tmp0], 0x00(%[src]) \n\t"
  66. "mtc1 %[tmp0], %[ftmp7] \n\t"
  67. "ulw %[tmp0], 0x00(%[pred]) \n\t"
  68. "mtc1 %[tmp0], %[ftmp8] \n\t"
  69. #else
  70. "gslwlc1 %[ftmp7], 0x03(%[src]) \n\t"
  71. "gslwrc1 %[ftmp7], 0x00(%[src]) \n\t"
  72. "gslwlc1 %[ftmp8], 0x03(%[pred]) \n\t"
  73. "gslwrc1 %[ftmp8], 0x00(%[pred]) \n\t"
  74. #endif
  75. "punpcklbh %[ftmp9], %[ftmp1], %[ftmp0] \n\t"
  76. "punpcklbh %[ftmp10], %[ftmp2], %[ftmp0] \n\t"
  77. "psubh %[ftmp11], %[ftmp9], %[ftmp10] \n\t"
  78. "gssdlc1 %[ftmp11], 0x07(%[diff]) \n\t"
  79. "gssdrc1 %[ftmp11], 0x00(%[diff]) \n\t"
  80. MMI_ADDU(%[diff], %[diff], %[diff_stride])
  81. "punpcklbh %[ftmp9], %[ftmp3], %[ftmp0] \n\t"
  82. "punpcklbh %[ftmp10], %[ftmp4], %[ftmp0] \n\t"
  83. "psubh %[ftmp11], %[ftmp9], %[ftmp10] \n\t"
  84. "gssdlc1 %[ftmp11], 0x07(%[diff]) \n\t"
  85. "gssdrc1 %[ftmp11], 0x00(%[diff]) \n\t"
  86. MMI_ADDU(%[diff], %[diff], %[diff_stride])
  87. "punpcklbh %[ftmp9], %[ftmp5], %[ftmp0] \n\t"
  88. "punpcklbh %[ftmp10], %[ftmp6], %[ftmp0] \n\t"
  89. "psubh %[ftmp11], %[ftmp9], %[ftmp10] \n\t"
  90. "gssdlc1 %[ftmp11], 0x07(%[diff]) \n\t"
  91. "gssdrc1 %[ftmp11], 0x00(%[diff]) \n\t"
  92. MMI_ADDU(%[diff], %[diff], %[diff_stride])
  93. "punpcklbh %[ftmp9], %[ftmp7], %[ftmp0] \n\t"
  94. "punpcklbh %[ftmp10], %[ftmp8], %[ftmp0] \n\t"
  95. "psubh %[ftmp11], %[ftmp9], %[ftmp10] \n\t"
  96. "gssdlc1 %[ftmp11], 0x07(%[diff]) \n\t"
  97. "gssdrc1 %[ftmp11], 0x00(%[diff]) \n\t"
  98. : [ftmp0] "=&f"(ftmp[0]), [ftmp1] "=&f"(ftmp[1]),
  99. [ftmp2] "=&f"(ftmp[2]), [ftmp3] "=&f"(ftmp[3]),
  100. [ftmp4] "=&f"(ftmp[4]), [ftmp5] "=&f"(ftmp[5]),
  101. [ftmp6] "=&f"(ftmp[6]), [ftmp7] "=&f"(ftmp[7]),
  102. [ftmp8] "=&f"(ftmp[8]), [ftmp9] "=&f"(ftmp[9]),
  103. [ftmp10] "=&f"(ftmp[10]), [ftmp11] "=&f"(ftmp[11]),
  104. #if _MIPS_SIM == _ABIO32
  105. [tmp0] "=&r"(tmp[0]),
  106. #endif
  107. [src] "+&r"(src), [pred] "+&r"(pred), [diff] "+&r"(diff)
  108. : [src_stride] "r"((mips_reg)src_stride),
  109. [pred_stride] "r"((mips_reg)pred_stride),
  110. [diff_stride] "r"((mips_reg)(diff_stride * 2))
  111. : "memory");
  112. break;
  113. case 8:
  114. __asm__ volatile(
  115. "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
  116. "li %[tmp0], 0x02 \n\t"
  117. "1: \n\t"
  118. "gsldlc1 %[ftmp1], 0x07(%[src]) \n\t"
  119. "gsldrc1 %[ftmp1], 0x00(%[src]) \n\t"
  120. "gsldlc1 %[ftmp2], 0x07(%[pred]) \n\t"
  121. "gsldrc1 %[ftmp2], 0x00(%[pred]) \n\t"
  122. MMI_ADDU(%[src], %[src], %[src_stride])
  123. MMI_ADDU(%[pred], %[pred], %[pred_stride])
  124. "gsldlc1 %[ftmp3], 0x07(%[src]) \n\t"
  125. "gsldrc1 %[ftmp3], 0x00(%[src]) \n\t"
  126. "gsldlc1 %[ftmp4], 0x07(%[pred]) \n\t"
  127. "gsldrc1 %[ftmp4], 0x00(%[pred]) \n\t"
  128. MMI_ADDU(%[src], %[src], %[src_stride])
  129. MMI_ADDU(%[pred], %[pred], %[pred_stride])
  130. "gsldlc1 %[ftmp5], 0x07(%[src]) \n\t"
  131. "gsldrc1 %[ftmp5], 0x00(%[src]) \n\t"
  132. "gsldlc1 %[ftmp6], 0x07(%[pred]) \n\t"
  133. "gsldrc1 %[ftmp6], 0x00(%[pred]) \n\t"
  134. MMI_ADDU(%[src], %[src], %[src_stride])
  135. MMI_ADDU(%[pred], %[pred], %[pred_stride])
  136. "gsldlc1 %[ftmp7], 0x07(%[src]) \n\t"
  137. "gsldrc1 %[ftmp7], 0x00(%[src]) \n\t"
  138. "gsldlc1 %[ftmp8], 0x07(%[pred]) \n\t"
  139. "gsldrc1 %[ftmp8], 0x00(%[pred]) \n\t"
  140. MMI_ADDU(%[src], %[src], %[src_stride])
  141. MMI_ADDU(%[pred], %[pred], %[pred_stride])
  142. "punpcklbh %[ftmp9], %[ftmp1], %[ftmp0] \n\t"
  143. "punpckhbh %[ftmp10], %[ftmp1], %[ftmp0] \n\t"
  144. "punpcklbh %[ftmp11], %[ftmp2], %[ftmp0] \n\t"
  145. "punpckhbh %[ftmp12], %[ftmp2], %[ftmp0] \n\t"
  146. "psubsh %[ftmp9], %[ftmp9], %[ftmp11] \n\t"
  147. "psubsh %[ftmp10], %[ftmp10], %[ftmp12] \n\t"
  148. "gssdlc1 %[ftmp9], 0x07(%[diff]) \n\t"
  149. "gssdrc1 %[ftmp9], 0x00(%[diff]) \n\t"
  150. "gssdlc1 %[ftmp10], 0x0f(%[diff]) \n\t"
  151. "gssdrc1 %[ftmp10], 0x08(%[diff]) \n\t"
  152. MMI_ADDU(%[diff], %[diff], %[diff_stride])
  153. "punpcklbh %[ftmp9], %[ftmp3], %[ftmp0] \n\t"
  154. "punpckhbh %[ftmp10], %[ftmp3], %[ftmp0] \n\t"
  155. "punpcklbh %[ftmp11], %[ftmp4], %[ftmp0] \n\t"
  156. "punpckhbh %[ftmp12], %[ftmp4], %[ftmp0] \n\t"
  157. "psubsh %[ftmp9], %[ftmp9], %[ftmp11] \n\t"
  158. "psubsh %[ftmp10], %[ftmp10], %[ftmp12] \n\t"
  159. "gssdlc1 %[ftmp9], 0x07(%[diff]) \n\t"
  160. "gssdrc1 %[ftmp9], 0x00(%[diff]) \n\t"
  161. "gssdlc1 %[ftmp10], 0x0f(%[diff]) \n\t"
  162. "gssdrc1 %[ftmp10], 0x08(%[diff]) \n\t"
  163. MMI_ADDU(%[diff], %[diff], %[diff_stride])
  164. "punpcklbh %[ftmp9], %[ftmp5], %[ftmp0] \n\t"
  165. "punpckhbh %[ftmp10], %[ftmp5], %[ftmp0] \n\t"
  166. "punpcklbh %[ftmp11], %[ftmp6], %[ftmp0] \n\t"
  167. "punpckhbh %[ftmp12], %[ftmp6], %[ftmp0] \n\t"
  168. "psubsh %[ftmp9], %[ftmp9], %[ftmp11] \n\t"
  169. "psubsh %[ftmp10], %[ftmp10], %[ftmp12] \n\t"
  170. "gssdlc1 %[ftmp9], 0x07(%[diff]) \n\t"
  171. "gssdrc1 %[ftmp9], 0x00(%[diff]) \n\t"
  172. "gssdlc1 %[ftmp10], 0x0f(%[diff]) \n\t"
  173. "gssdrc1 %[ftmp10], 0x08(%[diff]) \n\t"
  174. MMI_ADDU(%[diff], %[diff], %[diff_stride])
  175. "punpcklbh %[ftmp9], %[ftmp7], %[ftmp0] \n\t"
  176. "punpckhbh %[ftmp10], %[ftmp7], %[ftmp0] \n\t"
  177. "punpcklbh %[ftmp11], %[ftmp8], %[ftmp0] \n\t"
  178. "punpckhbh %[ftmp12], %[ftmp8], %[ftmp0] \n\t"
  179. "psubsh %[ftmp9], %[ftmp9], %[ftmp11] \n\t"
  180. "psubsh %[ftmp10], %[ftmp10], %[ftmp12] \n\t"
  181. "gssdlc1 %[ftmp9], 0x07(%[diff]) \n\t"
  182. "gssdrc1 %[ftmp9], 0x00(%[diff]) \n\t"
  183. "gssdlc1 %[ftmp10], 0x0f(%[diff]) \n\t"
  184. "gssdrc1 %[ftmp10], 0x08(%[diff]) \n\t"
  185. MMI_ADDU(%[diff], %[diff], %[diff_stride])
  186. "addiu %[tmp0], %[tmp0], -0x01 \n\t"
  187. "bnez %[tmp0], 1b \n\t"
  188. : [ftmp0] "=&f"(ftmp[0]), [ftmp1] "=&f"(ftmp[1]),
  189. [ftmp2] "=&f"(ftmp[2]), [ftmp3] "=&f"(ftmp[3]),
  190. [ftmp4] "=&f"(ftmp[4]), [ftmp5] "=&f"(ftmp[5]),
  191. [ftmp6] "=&f"(ftmp[6]), [ftmp7] "=&f"(ftmp[7]),
  192. [ftmp8] "=&f"(ftmp[8]), [ftmp9] "=&f"(ftmp[9]),
  193. [ftmp10] "=&f"(ftmp[10]), [ftmp11] "=&f"(ftmp[11]),
  194. [ftmp12] "=&f"(ftmp[12]), [tmp0] "=&r"(tmp[0]), [src] "+&r"(src),
  195. [pred] "+&r"(pred), [diff] "+&r"(diff)
  196. : [pred_stride] "r"((mips_reg)pred_stride),
  197. [src_stride] "r"((mips_reg)src_stride),
  198. [diff_stride] "r"((mips_reg)(diff_stride * 2))
  199. : "memory");
  200. break;
  201. case 16:
  202. __asm__ volatile(
  203. "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
  204. "li %[tmp0], 0x08 \n\t"
  205. "1: \n\t"
  206. "gsldlc1 %[ftmp1], 0x07(%[src]) \n\t"
  207. "gsldrc1 %[ftmp1], 0x00(%[src]) \n\t"
  208. "gsldlc1 %[ftmp2], 0x07(%[pred]) \n\t"
  209. "gsldrc1 %[ftmp2], 0x00(%[pred]) \n\t"
  210. "gsldlc1 %[ftmp3], 0x0f(%[src]) \n\t"
  211. "gsldrc1 %[ftmp3], 0x08(%[src]) \n\t"
  212. "gsldlc1 %[ftmp4], 0x0f(%[pred]) \n\t"
  213. "gsldrc1 %[ftmp4], 0x08(%[pred]) \n\t"
  214. MMI_ADDU(%[src], %[src], %[src_stride])
  215. MMI_ADDU(%[pred], %[pred], %[pred_stride])
  216. "gsldlc1 %[ftmp5], 0x07(%[src]) \n\t"
  217. "gsldrc1 %[ftmp5], 0x00(%[src]) \n\t"
  218. "gsldlc1 %[ftmp6], 0x07(%[pred]) \n\t"
  219. "gsldrc1 %[ftmp6], 0x00(%[pred]) \n\t"
  220. "gsldlc1 %[ftmp7], 0x0f(%[src]) \n\t"
  221. "gsldrc1 %[ftmp7], 0x08(%[src]) \n\t"
  222. "gsldlc1 %[ftmp8], 0x0f(%[pred]) \n\t"
  223. "gsldrc1 %[ftmp8], 0x08(%[pred]) \n\t"
  224. MMI_ADDU(%[src], %[src], %[src_stride])
  225. MMI_ADDU(%[pred], %[pred], %[pred_stride])
  226. "punpcklbh %[ftmp9], %[ftmp1], %[ftmp0] \n\t"
  227. "punpckhbh %[ftmp10], %[ftmp1], %[ftmp0] \n\t"
  228. "punpcklbh %[ftmp11], %[ftmp2], %[ftmp0] \n\t"
  229. "punpckhbh %[ftmp12], %[ftmp2], %[ftmp0] \n\t"
  230. "psubsh %[ftmp9], %[ftmp9], %[ftmp11] \n\t"
  231. "psubsh %[ftmp10], %[ftmp10], %[ftmp12] \n\t"
  232. "gssdlc1 %[ftmp9], 0x07(%[diff]) \n\t"
  233. "gssdrc1 %[ftmp9], 0x00(%[diff]) \n\t"
  234. "gssdlc1 %[ftmp10], 0x0f(%[diff]) \n\t"
  235. "gssdrc1 %[ftmp10], 0x08(%[diff]) \n\t"
  236. "punpcklbh %[ftmp9], %[ftmp3], %[ftmp0] \n\t"
  237. "punpckhbh %[ftmp10], %[ftmp3], %[ftmp0] \n\t"
  238. "punpcklbh %[ftmp11], %[ftmp4], %[ftmp0] \n\t"
  239. "punpckhbh %[ftmp12], %[ftmp4], %[ftmp0] \n\t"
  240. "psubsh %[ftmp9], %[ftmp9], %[ftmp11] \n\t"
  241. "psubsh %[ftmp10], %[ftmp10], %[ftmp12] \n\t"
  242. "gssdlc1 %[ftmp9], 0x17(%[diff]) \n\t"
  243. "gssdrc1 %[ftmp9], 0x10(%[diff]) \n\t"
  244. "gssdlc1 %[ftmp10], 0x1f(%[diff]) \n\t"
  245. "gssdrc1 %[ftmp10], 0x18(%[diff]) \n\t"
  246. MMI_ADDU(%[diff], %[diff], %[diff_stride])
  247. "punpcklbh %[ftmp9], %[ftmp5], %[ftmp0] \n\t"
  248. "punpckhbh %[ftmp10], %[ftmp5], %[ftmp0] \n\t"
  249. "punpcklbh %[ftmp11], %[ftmp6], %[ftmp0] \n\t"
  250. "punpckhbh %[ftmp12], %[ftmp6], %[ftmp0] \n\t"
  251. "psubsh %[ftmp9], %[ftmp9], %[ftmp11] \n\t"
  252. "psubsh %[ftmp10], %[ftmp10], %[ftmp12] \n\t"
  253. "gssdlc1 %[ftmp9], 0x07(%[diff]) \n\t"
  254. "gssdrc1 %[ftmp9], 0x00(%[diff]) \n\t"
  255. "gssdlc1 %[ftmp10], 0x0f(%[diff]) \n\t"
  256. "gssdrc1 %[ftmp10], 0x08(%[diff]) \n\t"
  257. "punpcklbh %[ftmp9], %[ftmp7], %[ftmp0] \n\t"
  258. "punpckhbh %[ftmp10], %[ftmp7], %[ftmp0] \n\t"
  259. "punpcklbh %[ftmp11], %[ftmp8], %[ftmp0] \n\t"
  260. "punpckhbh %[ftmp12], %[ftmp8], %[ftmp0] \n\t"
  261. "psubsh %[ftmp9], %[ftmp9], %[ftmp11] \n\t"
  262. "psubsh %[ftmp10], %[ftmp10], %[ftmp12] \n\t"
  263. "gssdlc1 %[ftmp9], 0x17(%[diff]) \n\t"
  264. "gssdrc1 %[ftmp9], 0x10(%[diff]) \n\t"
  265. "gssdlc1 %[ftmp10], 0x1f(%[diff]) \n\t"
  266. "gssdrc1 %[ftmp10], 0x18(%[diff]) \n\t"
  267. MMI_ADDU(%[diff], %[diff], %[diff_stride])
  268. "addiu %[tmp0], %[tmp0], -0x01 \n\t"
  269. "bnez %[tmp0], 1b \n\t"
  270. : [ftmp0] "=&f"(ftmp[0]), [ftmp1] "=&f"(ftmp[1]),
  271. [ftmp2] "=&f"(ftmp[2]), [ftmp3] "=&f"(ftmp[3]),
  272. [ftmp4] "=&f"(ftmp[4]), [ftmp5] "=&f"(ftmp[5]),
  273. [ftmp6] "=&f"(ftmp[6]), [ftmp7] "=&f"(ftmp[7]),
  274. [ftmp8] "=&f"(ftmp[8]), [ftmp9] "=&f"(ftmp[9]),
  275. [ftmp10] "=&f"(ftmp[10]), [ftmp11] "=&f"(ftmp[11]),
  276. [ftmp12] "=&f"(ftmp[12]), [tmp0] "=&r"(tmp[0]), [src] "+&r"(src),
  277. [pred] "+&r"(pred), [diff] "+&r"(diff)
  278. : [pred_stride] "r"((mips_reg)pred_stride),
  279. [src_stride] "r"((mips_reg)src_stride),
  280. [diff_stride] "r"((mips_reg)(diff_stride * 2))
  281. : "memory");
  282. break;
  283. case 32:
  284. vpx_subtract_block_c(rows, cols, diff, diff_stride, src, src_stride,
  285. pred, pred_stride);
  286. break;
  287. case 64:
  288. vpx_subtract_block_c(rows, cols, diff, diff_stride, src, src_stride,
  289. pred, pred_stride);
  290. break;
  291. default:
  292. vpx_subtract_block_c(rows, cols, diff, diff_stride, src, src_stride,
  293. pred, pred_stride);
  294. break;
  295. }
  296. } else {
  297. vpx_subtract_block_c(rows, cols, diff, diff_stride, src, src_stride, pred,
  298. pred_stride);
  299. }
  300. }