sad_mmi.c 41 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805
  1. /*
  2. * Copyright (c) 2017 The WebM project authors. All Rights Reserved.
  3. *
  4. * Use of this source code is governed by a BSD-style license
  5. * that can be found in the LICENSE file in the root of the source
  6. * tree. An additional intellectual property rights grant can be found
  7. * in the file PATENTS. All contributing project authors may
  8. * be found in the AUTHORS file in the root of the source tree.
  9. */
  10. #include "./vpx_dsp_rtcd.h"
  11. #include "vpx_ports/asmdefs_mmi.h"
  12. #include "vpx/vpx_integer.h"
  13. #include "vpx_ports/mem.h"
  14. #define SAD_SRC_REF_ABS_SUB_64 \
  15. "gsldlc1 %[ftmp1], 0x07(%[src]) \n\t" \
  16. "gsldrc1 %[ftmp1], 0x00(%[src]) \n\t" \
  17. "gsldlc1 %[ftmp2], 0x0f(%[src]) \n\t" \
  18. "gsldrc1 %[ftmp2], 0x08(%[src]) \n\t" \
  19. "gsldlc1 %[ftmp3], 0x07(%[ref]) \n\t" \
  20. "gsldrc1 %[ftmp3], 0x00(%[ref]) \n\t" \
  21. "gsldlc1 %[ftmp4], 0x0f(%[ref]) \n\t" \
  22. "gsldrc1 %[ftmp4], 0x08(%[ref]) \n\t" \
  23. "pasubub %[ftmp1], %[ftmp1], %[ftmp3] \n\t" \
  24. "pasubub %[ftmp2], %[ftmp2], %[ftmp4] \n\t" \
  25. "biadd %[ftmp1], %[ftmp1] \n\t" \
  26. "biadd %[ftmp2], %[ftmp2] \n\t" \
  27. "paddw %[ftmp5], %[ftmp5], %[ftmp1] \n\t" \
  28. "paddw %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \
  29. "gsldlc1 %[ftmp1], 0x17(%[src]) \n\t" \
  30. "gsldrc1 %[ftmp1], 0x10(%[src]) \n\t" \
  31. "gsldlc1 %[ftmp2], 0x1f(%[src]) \n\t" \
  32. "gsldrc1 %[ftmp2], 0x18(%[src]) \n\t" \
  33. "gsldlc1 %[ftmp3], 0x17(%[ref]) \n\t" \
  34. "gsldrc1 %[ftmp3], 0x10(%[ref]) \n\t" \
  35. "gsldlc1 %[ftmp4], 0x1f(%[ref]) \n\t" \
  36. "gsldrc1 %[ftmp4], 0x18(%[ref]) \n\t" \
  37. "pasubub %[ftmp1], %[ftmp1], %[ftmp3] \n\t" \
  38. "pasubub %[ftmp2], %[ftmp2], %[ftmp4] \n\t" \
  39. "biadd %[ftmp1], %[ftmp1] \n\t" \
  40. "biadd %[ftmp2], %[ftmp2] \n\t" \
  41. "paddw %[ftmp5], %[ftmp5], %[ftmp1] \n\t" \
  42. "paddw %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \
  43. "gsldlc1 %[ftmp1], 0x27(%[src]) \n\t" \
  44. "gsldrc1 %[ftmp1], 0x20(%[src]) \n\t" \
  45. "gsldlc1 %[ftmp2], 0x2f(%[src]) \n\t" \
  46. "gsldrc1 %[ftmp2], 0x28(%[src]) \n\t" \
  47. "gsldlc1 %[ftmp3], 0x27(%[ref]) \n\t" \
  48. "gsldrc1 %[ftmp3], 0x20(%[ref]) \n\t" \
  49. "gsldlc1 %[ftmp4], 0x2f(%[ref]) \n\t" \
  50. "gsldrc1 %[ftmp4], 0x28(%[ref]) \n\t" \
  51. "pasubub %[ftmp1], %[ftmp1], %[ftmp3] \n\t" \
  52. "pasubub %[ftmp2], %[ftmp2], %[ftmp4] \n\t" \
  53. "biadd %[ftmp1], %[ftmp1] \n\t" \
  54. "biadd %[ftmp2], %[ftmp2] \n\t" \
  55. "paddw %[ftmp5], %[ftmp5], %[ftmp1] \n\t" \
  56. "paddw %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \
  57. "gsldlc1 %[ftmp1], 0x37(%[src]) \n\t" \
  58. "gsldrc1 %[ftmp1], 0x30(%[src]) \n\t" \
  59. "gsldlc1 %[ftmp2], 0x3f(%[src]) \n\t" \
  60. "gsldrc1 %[ftmp2], 0x38(%[src]) \n\t" \
  61. "gsldlc1 %[ftmp3], 0x37(%[ref]) \n\t" \
  62. "gsldrc1 %[ftmp3], 0x30(%[ref]) \n\t" \
  63. "gsldlc1 %[ftmp4], 0x3f(%[ref]) \n\t" \
  64. "gsldrc1 %[ftmp4], 0x38(%[ref]) \n\t" \
  65. "pasubub %[ftmp1], %[ftmp1], %[ftmp3] \n\t" \
  66. "pasubub %[ftmp2], %[ftmp2], %[ftmp4] \n\t" \
  67. "biadd %[ftmp1], %[ftmp1] \n\t" \
  68. "biadd %[ftmp2], %[ftmp2] \n\t" \
  69. "paddw %[ftmp5], %[ftmp5], %[ftmp1] \n\t" \
  70. "paddw %[ftmp5], %[ftmp5], %[ftmp2] \n\t"
  71. #define SAD_SRC_REF_ABS_SUB_32 \
  72. "gsldlc1 %[ftmp1], 0x07(%[src]) \n\t" \
  73. "gsldrc1 %[ftmp1], 0x00(%[src]) \n\t" \
  74. "gsldlc1 %[ftmp2], 0x0f(%[src]) \n\t" \
  75. "gsldrc1 %[ftmp2], 0x08(%[src]) \n\t" \
  76. "gsldlc1 %[ftmp3], 0x07(%[ref]) \n\t" \
  77. "gsldrc1 %[ftmp3], 0x00(%[ref]) \n\t" \
  78. "gsldlc1 %[ftmp4], 0x0f(%[ref]) \n\t" \
  79. "gsldrc1 %[ftmp4], 0x08(%[ref]) \n\t" \
  80. "pasubub %[ftmp1], %[ftmp1], %[ftmp3] \n\t" \
  81. "pasubub %[ftmp2], %[ftmp2], %[ftmp4] \n\t" \
  82. "biadd %[ftmp1], %[ftmp1] \n\t" \
  83. "biadd %[ftmp2], %[ftmp2] \n\t" \
  84. "paddw %[ftmp5], %[ftmp5], %[ftmp1] \n\t" \
  85. "paddw %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \
  86. "gsldlc1 %[ftmp1], 0x17(%[src]) \n\t" \
  87. "gsldrc1 %[ftmp1], 0x10(%[src]) \n\t" \
  88. "gsldlc1 %[ftmp2], 0x1f(%[src]) \n\t" \
  89. "gsldrc1 %[ftmp2], 0x18(%[src]) \n\t" \
  90. "gsldlc1 %[ftmp3], 0x17(%[ref]) \n\t" \
  91. "gsldrc1 %[ftmp3], 0x10(%[ref]) \n\t" \
  92. "gsldlc1 %[ftmp4], 0x1f(%[ref]) \n\t" \
  93. "gsldrc1 %[ftmp4], 0x18(%[ref]) \n\t" \
  94. "pasubub %[ftmp1], %[ftmp1], %[ftmp3] \n\t" \
  95. "pasubub %[ftmp2], %[ftmp2], %[ftmp4] \n\t" \
  96. "biadd %[ftmp1], %[ftmp1] \n\t" \
  97. "biadd %[ftmp2], %[ftmp2] \n\t" \
  98. "paddw %[ftmp5], %[ftmp5], %[ftmp1] \n\t" \
  99. "paddw %[ftmp5], %[ftmp5], %[ftmp2] \n\t"
  100. #define SAD_SRC_REF_ABS_SUB_16 \
  101. "gsldlc1 %[ftmp1], 0x07(%[src]) \n\t" \
  102. "gsldrc1 %[ftmp1], 0x00(%[src]) \n\t" \
  103. "gsldlc1 %[ftmp2], 0x0f(%[src]) \n\t" \
  104. "gsldrc1 %[ftmp2], 0x08(%[src]) \n\t" \
  105. "gsldlc1 %[ftmp3], 0x07(%[ref]) \n\t" \
  106. "gsldrc1 %[ftmp3], 0x00(%[ref]) \n\t" \
  107. "gsldlc1 %[ftmp4], 0x0f(%[ref]) \n\t" \
  108. "gsldrc1 %[ftmp4], 0x08(%[ref]) \n\t" \
  109. "pasubub %[ftmp1], %[ftmp1], %[ftmp3] \n\t" \
  110. "pasubub %[ftmp2], %[ftmp2], %[ftmp4] \n\t" \
  111. "biadd %[ftmp1], %[ftmp1] \n\t" \
  112. "biadd %[ftmp2], %[ftmp2] \n\t" \
  113. "paddw %[ftmp5], %[ftmp5], %[ftmp1] \n\t" \
  114. "paddw %[ftmp5], %[ftmp5], %[ftmp2] \n\t"
  115. #define SAD_SRC_REF_ABS_SUB_8 \
  116. "gsldlc1 %[ftmp1], 0x07(%[src]) \n\t" \
  117. "gsldrc1 %[ftmp1], 0x00(%[src]) \n\t" \
  118. "gsldlc1 %[ftmp2], 0x07(%[ref]) \n\t" \
  119. "gsldrc1 %[ftmp2], 0x00(%[ref]) \n\t" \
  120. "pasubub %[ftmp1], %[ftmp1], %[ftmp2] \n\t" \
  121. "biadd %[ftmp1], %[ftmp1] \n\t" \
  122. "paddw %[ftmp3], %[ftmp3], %[ftmp1] \n\t"
  123. #if _MIPS_SIM == _ABIO32
  124. #define SAD_SRC_REF_ABS_SUB_4 \
  125. "ulw %[tmp0], 0x00(%[src]) \n\t" \
  126. "mtc1 %[tmp0], %[ftmp1] \n\t" \
  127. "ulw %[tmp0], 0x00(%[ref]) \n\t" \
  128. "mtc1 %[tmp0], %[ftmp2] \n\t" \
  129. "pasubub %[ftmp1], %[ftmp1], %[ftmp2] \n\t" \
  130. "mthc1 $0, %[ftmp1] \n\t" \
  131. "biadd %[ftmp1], %[ftmp1] \n\t" \
  132. "paddw %[ftmp3], %[ftmp3], %[ftmp1] \n\t"
  133. #else /* _MIPS_SIM == _ABI64 || _MIPS_SIM == _ABIN32 */
  134. #define SAD_SRC_REF_ABS_SUB_4 \
  135. "gslwlc1 %[ftmp1], 0x03(%[src]) \n\t" \
  136. "gslwrc1 %[ftmp1], 0x00(%[src]) \n\t" \
  137. "gslwlc1 %[ftmp2], 0x03(%[ref]) \n\t" \
  138. "gslwrc1 %[ftmp2], 0x00(%[ref]) \n\t" \
  139. "pasubub %[ftmp1], %[ftmp1], %[ftmp2] \n\t" \
  140. "mthc1 $0, %[ftmp1] \n\t" \
  141. "biadd %[ftmp1], %[ftmp1] \n\t" \
  142. "paddw %[ftmp3], %[ftmp3], %[ftmp1] \n\t"
  143. #endif /* _MIPS_SIM == _ABIO32 */
  144. #define SAD_SRC_AVGREF_ABS_SUB_64 \
  145. "gsldlc1 %[ftmp1], 0x07(%[second_pred]) \n\t" \
  146. "gsldrc1 %[ftmp1], 0x00(%[second_pred]) \n\t" \
  147. "gsldlc1 %[ftmp2], 0x0f(%[second_pred]) \n\t" \
  148. "gsldrc1 %[ftmp2], 0x08(%[second_pred]) \n\t" \
  149. "gsldlc1 %[ftmp3], 0x07(%[ref]) \n\t" \
  150. "gsldrc1 %[ftmp3], 0x00(%[ref]) \n\t" \
  151. "gsldlc1 %[ftmp4], 0x0f(%[ref]) \n\t" \
  152. "gsldrc1 %[ftmp4], 0x08(%[ref]) \n\t" \
  153. "pavgb %[ftmp3], %[ftmp1], %[ftmp3] \n\t" \
  154. "pavgb %[ftmp4], %[ftmp2], %[ftmp4] \n\t" \
  155. "gsldlc1 %[ftmp1], 0x07(%[src]) \n\t" \
  156. "gsldrc1 %[ftmp1], 0x00(%[src]) \n\t" \
  157. "gsldlc1 %[ftmp2], 0x0f(%[src]) \n\t" \
  158. "gsldrc1 %[ftmp2], 0x08(%[src]) \n\t" \
  159. "pasubub %[ftmp1], %[ftmp1], %[ftmp3] \n\t" \
  160. "pasubub %[ftmp2], %[ftmp2], %[ftmp4] \n\t" \
  161. "biadd %[ftmp1], %[ftmp1] \n\t" \
  162. "biadd %[ftmp2], %[ftmp2] \n\t" \
  163. "paddw %[ftmp5], %[ftmp5], %[ftmp1] \n\t" \
  164. "paddw %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \
  165. "gsldlc1 %[ftmp1], 0x17(%[second_pred]) \n\t" \
  166. "gsldrc1 %[ftmp1], 0x10(%[second_pred]) \n\t" \
  167. "gsldlc1 %[ftmp2], 0x1f(%[second_pred]) \n\t" \
  168. "gsldrc1 %[ftmp2], 0x18(%[second_pred]) \n\t" \
  169. "gsldlc1 %[ftmp3], 0x17(%[ref]) \n\t" \
  170. "gsldrc1 %[ftmp3], 0x10(%[ref]) \n\t" \
  171. "gsldlc1 %[ftmp4], 0x1f(%[ref]) \n\t" \
  172. "gsldrc1 %[ftmp4], 0x18(%[ref]) \n\t" \
  173. "pavgb %[ftmp3], %[ftmp1], %[ftmp3] \n\t" \
  174. "pavgb %[ftmp4], %[ftmp2], %[ftmp4] \n\t" \
  175. "gsldlc1 %[ftmp1], 0x17(%[src]) \n\t" \
  176. "gsldrc1 %[ftmp1], 0x10(%[src]) \n\t" \
  177. "gsldlc1 %[ftmp2], 0x1f(%[src]) \n\t" \
  178. "gsldrc1 %[ftmp2], 0x18(%[src]) \n\t" \
  179. "pasubub %[ftmp1], %[ftmp1], %[ftmp3] \n\t" \
  180. "pasubub %[ftmp2], %[ftmp2], %[ftmp4] \n\t" \
  181. "biadd %[ftmp1], %[ftmp1] \n\t" \
  182. "biadd %[ftmp2], %[ftmp2] \n\t" \
  183. "paddw %[ftmp5], %[ftmp5], %[ftmp1] \n\t" \
  184. "paddw %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \
  185. "gsldlc1 %[ftmp1], 0x27(%[second_pred]) \n\t" \
  186. "gsldrc1 %[ftmp1], 0x20(%[second_pred]) \n\t" \
  187. "gsldlc1 %[ftmp2], 0x2f(%[second_pred]) \n\t" \
  188. "gsldrc1 %[ftmp2], 0x28(%[second_pred]) \n\t" \
  189. "gsldlc1 %[ftmp3], 0x27(%[ref]) \n\t" \
  190. "gsldrc1 %[ftmp3], 0x20(%[ref]) \n\t" \
  191. "gsldlc1 %[ftmp4], 0x2f(%[ref]) \n\t" \
  192. "gsldrc1 %[ftmp4], 0x28(%[ref]) \n\t" \
  193. "pavgb %[ftmp3], %[ftmp1], %[ftmp3] \n\t" \
  194. "pavgb %[ftmp4], %[ftmp2], %[ftmp4] \n\t" \
  195. "gsldlc1 %[ftmp1], 0x27(%[src]) \n\t" \
  196. "gsldrc1 %[ftmp1], 0x20(%[src]) \n\t" \
  197. "gsldlc1 %[ftmp2], 0x2f(%[src]) \n\t" \
  198. "gsldrc1 %[ftmp2], 0x28(%[src]) \n\t" \
  199. "pasubub %[ftmp1], %[ftmp1], %[ftmp3] \n\t" \
  200. "pasubub %[ftmp2], %[ftmp2], %[ftmp4] \n\t" \
  201. "biadd %[ftmp1], %[ftmp1] \n\t" \
  202. "biadd %[ftmp2], %[ftmp2] \n\t" \
  203. "paddw %[ftmp5], %[ftmp5], %[ftmp1] \n\t" \
  204. "paddw %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \
  205. "gsldlc1 %[ftmp1], 0x37(%[second_pred]) \n\t" \
  206. "gsldrc1 %[ftmp1], 0x30(%[second_pred]) \n\t" \
  207. "gsldlc1 %[ftmp2], 0x3f(%[second_pred]) \n\t" \
  208. "gsldrc1 %[ftmp2], 0x38(%[second_pred]) \n\t" \
  209. "gsldlc1 %[ftmp3], 0x37(%[ref]) \n\t" \
  210. "gsldrc1 %[ftmp3], 0x30(%[ref]) \n\t" \
  211. "gsldlc1 %[ftmp4], 0x3f(%[ref]) \n\t" \
  212. "gsldrc1 %[ftmp4], 0x38(%[ref]) \n\t" \
  213. "pavgb %[ftmp3], %[ftmp1], %[ftmp3] \n\t" \
  214. "pavgb %[ftmp4], %[ftmp2], %[ftmp4] \n\t" \
  215. "gsldlc1 %[ftmp1], 0x37(%[src]) \n\t" \
  216. "gsldrc1 %[ftmp1], 0x30(%[src]) \n\t" \
  217. "gsldlc1 %[ftmp2], 0x3f(%[src]) \n\t" \
  218. "gsldrc1 %[ftmp2], 0x38(%[src]) \n\t" \
  219. "pasubub %[ftmp1], %[ftmp1], %[ftmp3] \n\t" \
  220. "pasubub %[ftmp2], %[ftmp2], %[ftmp4] \n\t" \
  221. "biadd %[ftmp1], %[ftmp1] \n\t" \
  222. "biadd %[ftmp2], %[ftmp2] \n\t" \
  223. "paddw %[ftmp5], %[ftmp5], %[ftmp1] \n\t" \
  224. "paddw %[ftmp5], %[ftmp5], %[ftmp2] \n\t"
  225. #define SAD_SRC_AVGREF_ABS_SUB_32 \
  226. "gsldlc1 %[ftmp1], 0x07(%[second_pred]) \n\t" \
  227. "gsldrc1 %[ftmp1], 0x00(%[second_pred]) \n\t" \
  228. "gsldlc1 %[ftmp2], 0x0f(%[second_pred]) \n\t" \
  229. "gsldrc1 %[ftmp2], 0x08(%[second_pred]) \n\t" \
  230. "gsldlc1 %[ftmp3], 0x07(%[ref]) \n\t" \
  231. "gsldrc1 %[ftmp3], 0x00(%[ref]) \n\t" \
  232. "gsldlc1 %[ftmp4], 0x0f(%[ref]) \n\t" \
  233. "gsldrc1 %[ftmp4], 0x08(%[ref]) \n\t" \
  234. "pavgb %[ftmp3], %[ftmp1], %[ftmp3] \n\t" \
  235. "pavgb %[ftmp4], %[ftmp2], %[ftmp4] \n\t" \
  236. "gsldlc1 %[ftmp1], 0x07(%[src]) \n\t" \
  237. "gsldrc1 %[ftmp1], 0x00(%[src]) \n\t" \
  238. "gsldlc1 %[ftmp2], 0x0f(%[src]) \n\t" \
  239. "gsldrc1 %[ftmp2], 0x08(%[src]) \n\t" \
  240. "pasubub %[ftmp1], %[ftmp1], %[ftmp3] \n\t" \
  241. "pasubub %[ftmp2], %[ftmp2], %[ftmp4] \n\t" \
  242. "biadd %[ftmp1], %[ftmp1] \n\t" \
  243. "biadd %[ftmp2], %[ftmp2] \n\t" \
  244. "paddw %[ftmp5], %[ftmp5], %[ftmp1] \n\t" \
  245. "paddw %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \
  246. "gsldlc1 %[ftmp1], 0x17(%[second_pred]) \n\t" \
  247. "gsldrc1 %[ftmp1], 0x10(%[second_pred]) \n\t" \
  248. "gsldlc1 %[ftmp2], 0x1f(%[second_pred]) \n\t" \
  249. "gsldrc1 %[ftmp2], 0x18(%[second_pred]) \n\t" \
  250. "gsldlc1 %[ftmp3], 0x17(%[ref]) \n\t" \
  251. "gsldrc1 %[ftmp3], 0x10(%[ref]) \n\t" \
  252. "gsldlc1 %[ftmp4], 0x1f(%[ref]) \n\t" \
  253. "gsldrc1 %[ftmp4], 0x18(%[ref]) \n\t" \
  254. "pavgb %[ftmp3], %[ftmp1], %[ftmp3] \n\t" \
  255. "pavgb %[ftmp4], %[ftmp2], %[ftmp4] \n\t" \
  256. "gsldlc1 %[ftmp1], 0x17(%[src]) \n\t" \
  257. "gsldrc1 %[ftmp1], 0x10(%[src]) \n\t" \
  258. "gsldlc1 %[ftmp2], 0x1f(%[src]) \n\t" \
  259. "gsldrc1 %[ftmp2], 0x18(%[src]) \n\t" \
  260. "pasubub %[ftmp1], %[ftmp1], %[ftmp3] \n\t" \
  261. "pasubub %[ftmp2], %[ftmp2], %[ftmp4] \n\t" \
  262. "biadd %[ftmp1], %[ftmp1] \n\t" \
  263. "biadd %[ftmp2], %[ftmp2] \n\t" \
  264. "paddw %[ftmp5], %[ftmp5], %[ftmp1] \n\t" \
  265. "paddw %[ftmp5], %[ftmp5], %[ftmp2] \n\t"
  266. #define SAD_SRC_AVGREF_ABS_SUB_16 \
  267. "gsldlc1 %[ftmp1], 0x07(%[second_pred]) \n\t" \
  268. "gsldrc1 %[ftmp1], 0x00(%[second_pred]) \n\t" \
  269. "gsldlc1 %[ftmp2], 0x0f(%[second_pred]) \n\t" \
  270. "gsldrc1 %[ftmp2], 0x08(%[second_pred]) \n\t" \
  271. "gsldlc1 %[ftmp3], 0x07(%[ref]) \n\t" \
  272. "gsldrc1 %[ftmp3], 0x00(%[ref]) \n\t" \
  273. "gsldlc1 %[ftmp4], 0x0f(%[ref]) \n\t" \
  274. "gsldrc1 %[ftmp4], 0x08(%[ref]) \n\t" \
  275. "pavgb %[ftmp3], %[ftmp1], %[ftmp3] \n\t" \
  276. "pavgb %[ftmp4], %[ftmp2], %[ftmp4] \n\t" \
  277. "gsldlc1 %[ftmp1], 0x07(%[src]) \n\t" \
  278. "gsldrc1 %[ftmp1], 0x00(%[src]) \n\t" \
  279. "gsldlc1 %[ftmp2], 0x0f(%[src]) \n\t" \
  280. "gsldrc1 %[ftmp2], 0x08(%[src]) \n\t" \
  281. "pasubub %[ftmp1], %[ftmp1], %[ftmp3] \n\t" \
  282. "pasubub %[ftmp2], %[ftmp2], %[ftmp4] \n\t" \
  283. "biadd %[ftmp1], %[ftmp1] \n\t" \
  284. "biadd %[ftmp2], %[ftmp2] \n\t" \
  285. "paddw %[ftmp5], %[ftmp5], %[ftmp1] \n\t" \
  286. "paddw %[ftmp5], %[ftmp5], %[ftmp2] \n\t"
  287. #define SAD_SRC_AVGREF_ABS_SUB_8 \
  288. "gsldlc1 %[ftmp1], 0x07(%[second_pred]) \n\t" \
  289. "gsldrc1 %[ftmp1], 0x00(%[second_pred]) \n\t" \
  290. "gsldlc1 %[ftmp2], 0x07(%[ref]) \n\t" \
  291. "gsldrc1 %[ftmp2], 0x00(%[ref]) \n\t" \
  292. "pavgb %[ftmp2], %[ftmp1], %[ftmp2] \n\t" \
  293. "gsldlc1 %[ftmp1], 0x07(%[src]) \n\t" \
  294. "gsldrc1 %[ftmp1], 0x00(%[src]) \n\t" \
  295. "pasubub %[ftmp1], %[ftmp1], %[ftmp2] \n\t" \
  296. "biadd %[ftmp1], %[ftmp1] \n\t" \
  297. "paddw %[ftmp3], %[ftmp3], %[ftmp1] \n\t"
  298. #if _MIPS_SIM == _ABIO32
  299. #define SAD_SRC_AVGREF_ABS_SUB_4 \
  300. "ulw %[tmp0], 0x00(%[second_pred]) \n\t" \
  301. "mtc1 %[tmp0], %[ftmp1] \n\t" \
  302. "ulw %[tmp0], 0x00(%[ref]) \n\t" \
  303. "mtc1 %[tmp0], %[ftmp2] \n\t" \
  304. "pavgb %[ftmp2], %[ftmp1], %[ftmp2] \n\t" \
  305. "gsldlc1 %[ftmp1], 0x07(%[src]) \n\t" \
  306. "gsldrc1 %[ftmp1], 0x00(%[src]) \n\t" \
  307. "pasubub %[ftmp1], %[ftmp1], %[ftmp2] \n\t" \
  308. "mthc1 $0, %[ftmp1] \n\t" \
  309. "biadd %[ftmp1], %[ftmp1] \n\t" \
  310. "paddw %[ftmp3], %[ftmp3], %[ftmp1] \n\t"
  311. #else /* _MIPS_SIM == _ABI64 || _MIPS_SIM == _ABIN32 */
  312. #define SAD_SRC_AVGREF_ABS_SUB_4 \
  313. "gslwlc1 %[ftmp1], 0x03(%[second_pred]) \n\t" \
  314. "gslwrc1 %[ftmp1], 0x00(%[second_pred]) \n\t" \
  315. "gslwlc1 %[ftmp2], 0x03(%[ref]) \n\t" \
  316. "gslwrc1 %[ftmp2], 0x00(%[ref]) \n\t" \
  317. "pavgb %[ftmp2], %[ftmp1], %[ftmp2] \n\t" \
  318. "gsldlc1 %[ftmp1], 0x07(%[src]) \n\t" \
  319. "gsldrc1 %[ftmp1], 0x00(%[src]) \n\t" \
  320. "pasubub %[ftmp1], %[ftmp1], %[ftmp2] \n\t" \
  321. "mthc1 $0, %[ftmp1] \n\t" \
  322. "biadd %[ftmp1], %[ftmp1] \n\t" \
  323. "paddw %[ftmp3], %[ftmp3], %[ftmp1] \n\t"
  324. #endif /* _MIPS_SIM == _ABIO32 */
  325. // depending on call sites, pass **ref_array to avoid & in subsequent call and
  326. // de-dup with 4D below.
  327. #define sadMxNxK_mmi(m, n, k) \
  328. void vpx_sad##m##x##n##x##k##_mmi(const uint8_t *src, int src_stride, \
  329. const uint8_t *ref_array, int ref_stride, \
  330. uint32_t *sad_array) { \
  331. int i; \
  332. for (i = 0; i < (k); ++i) \
  333. sad_array[i] = \
  334. vpx_sad##m##x##n##_mmi(src, src_stride, &ref_array[i], ref_stride); \
  335. }
  336. // This appears to be equivalent to the above when k == 4 and refs is const
  337. #define sadMxNx4D_mmi(m, n) \
  338. void vpx_sad##m##x##n##x4d_mmi(const uint8_t *src, int src_stride, \
  339. const uint8_t *const ref_array[], \
  340. int ref_stride, uint32_t *sad_array) { \
  341. int i; \
  342. for (i = 0; i < 4; ++i) \
  343. sad_array[i] = \
  344. vpx_sad##m##x##n##_mmi(src, src_stride, ref_array[i], ref_stride); \
  345. }
  346. static inline unsigned int vpx_sad64x(const uint8_t *src, int src_stride,
  347. const uint8_t *ref, int ref_stride,
  348. int counter) {
  349. unsigned int sad;
  350. double ftmp1, ftmp2, ftmp3, ftmp4, ftmp5;
  351. mips_reg l_counter = counter;
  352. __asm__ volatile (
  353. "xor %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
  354. "1: \n\t"
  355. // Include two loop body, to reduce loop time.
  356. SAD_SRC_REF_ABS_SUB_64
  357. MMI_ADDU(%[src], %[src], %[src_stride])
  358. MMI_ADDU(%[ref], %[ref], %[ref_stride])
  359. SAD_SRC_REF_ABS_SUB_64
  360. MMI_ADDU(%[src], %[src], %[src_stride])
  361. MMI_ADDU(%[ref], %[ref], %[ref_stride])
  362. MMI_ADDIU(%[counter], %[counter], -0x02)
  363. "bnez %[counter], 1b \n\t"
  364. "mfc1 %[sad], %[ftmp5] \n\t"
  365. : [ftmp1]"=&f"(ftmp1), [ftmp2]"=&f"(ftmp2), [ftmp3]"=&f"(ftmp3),
  366. [ftmp4]"=&f"(ftmp4), [ftmp5]"=&f"(ftmp5), [counter]"+&r"(l_counter),
  367. [src]"+&r"(src), [ref]"+&r"(ref), [sad]"=&r"(sad)
  368. : [src_stride]"r"((mips_reg)src_stride),
  369. [ref_stride]"r"((mips_reg)ref_stride)
  370. );
  371. return sad;
  372. }
  373. #define vpx_sad64xN(H) \
  374. unsigned int vpx_sad64x##H##_mmi(const uint8_t *src, int src_stride, \
  375. const uint8_t *ref, int ref_stride) { \
  376. return vpx_sad64x(src, src_stride, ref, ref_stride, H); \
  377. }
  378. vpx_sad64xN(64);
  379. vpx_sad64xN(32);
  380. sadMxNx4D_mmi(64, 64);
  381. sadMxNx4D_mmi(64, 32);
  382. static inline unsigned int vpx_sad_avg64x(const uint8_t *src, int src_stride,
  383. const uint8_t *ref, int ref_stride,
  384. const uint8_t *second_pred,
  385. int counter) {
  386. unsigned int sad;
  387. double ftmp1, ftmp2, ftmp3, ftmp4, ftmp5;
  388. mips_reg l_counter = counter;
  389. __asm__ volatile (
  390. "xor %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
  391. "1: \n\t"
  392. // Include two loop body, to reduce loop time.
  393. SAD_SRC_AVGREF_ABS_SUB_64
  394. MMI_ADDIU(%[second_pred], %[second_pred], 0x40)
  395. MMI_ADDU(%[src], %[src], %[src_stride])
  396. MMI_ADDU(%[ref], %[ref], %[ref_stride])
  397. SAD_SRC_AVGREF_ABS_SUB_64
  398. MMI_ADDIU(%[second_pred], %[second_pred], 0x40)
  399. MMI_ADDU(%[src], %[src], %[src_stride])
  400. MMI_ADDU(%[ref], %[ref], %[ref_stride])
  401. MMI_ADDIU(%[counter], %[counter], -0x02)
  402. "bnez %[counter], 1b \n\t"
  403. "mfc1 %[sad], %[ftmp5] \n\t"
  404. : [ftmp1]"=&f"(ftmp1), [ftmp2]"=&f"(ftmp2), [ftmp3]"=&f"(ftmp3),
  405. [ftmp4]"=&f"(ftmp4), [ftmp5]"=&f"(ftmp5), [counter]"+&r"(l_counter),
  406. [src]"+&r"(src), [ref]"+&r"(ref),
  407. [second_pred]"+&r"((mips_reg)second_pred),
  408. [sad]"=&r"(sad)
  409. : [src_stride]"r"((mips_reg)src_stride),
  410. [ref_stride]"r"((mips_reg)ref_stride)
  411. );
  412. return sad;
  413. }
  414. #define vpx_sad_avg64xN(H) \
  415. unsigned int vpx_sad64x##H##_avg_mmi(const uint8_t *src, int src_stride, \
  416. const uint8_t *ref, int ref_stride, \
  417. const uint8_t *second_pred) { \
  418. return vpx_sad_avg64x(src, src_stride, ref, ref_stride, second_pred, H); \
  419. }
  420. vpx_sad_avg64xN(64);
  421. vpx_sad_avg64xN(32);
  422. static inline unsigned int vpx_sad32x(const uint8_t *src, int src_stride,
  423. const uint8_t *ref, int ref_stride,
  424. int counter) {
  425. unsigned int sad;
  426. double ftmp1, ftmp2, ftmp3, ftmp4, ftmp5;
  427. mips_reg l_counter = counter;
  428. __asm__ volatile (
  429. "xor %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
  430. "1: \n\t"
  431. // Include two loop body, to reduce loop time.
  432. SAD_SRC_REF_ABS_SUB_32
  433. MMI_ADDU(%[src], %[src], %[src_stride])
  434. MMI_ADDU(%[ref], %[ref], %[ref_stride])
  435. SAD_SRC_REF_ABS_SUB_32
  436. MMI_ADDU(%[src], %[src], %[src_stride])
  437. MMI_ADDU(%[ref], %[ref], %[ref_stride])
  438. MMI_ADDIU(%[counter], %[counter], -0x02)
  439. "bnez %[counter], 1b \n\t"
  440. "mfc1 %[sad], %[ftmp5] \n\t"
  441. : [ftmp1]"=&f"(ftmp1), [ftmp2]"=&f"(ftmp2), [ftmp3]"=&f"(ftmp3),
  442. [ftmp4]"=&f"(ftmp4), [ftmp5]"=&f"(ftmp5), [counter]"+&r"(l_counter),
  443. [src]"+&r"(src), [ref]"+&r"(ref), [sad]"=&r"(sad)
  444. : [src_stride]"r"((mips_reg)src_stride),
  445. [ref_stride]"r"((mips_reg)ref_stride)
  446. );
  447. return sad;
  448. }
  449. #define vpx_sad32xN(H) \
  450. unsigned int vpx_sad32x##H##_mmi(const uint8_t *src, int src_stride, \
  451. const uint8_t *ref, int ref_stride) { \
  452. return vpx_sad32x(src, src_stride, ref, ref_stride, H); \
  453. }
  454. vpx_sad32xN(64);
  455. vpx_sad32xN(32);
  456. vpx_sad32xN(16);
  457. sadMxNx4D_mmi(32, 64);
  458. sadMxNx4D_mmi(32, 32);
  459. sadMxNx4D_mmi(32, 16);
  460. static inline unsigned int vpx_sad_avg32x(const uint8_t *src, int src_stride,
  461. const uint8_t *ref, int ref_stride,
  462. const uint8_t *second_pred,
  463. int counter) {
  464. unsigned int sad;
  465. double ftmp1, ftmp2, ftmp3, ftmp4, ftmp5;
  466. mips_reg l_counter = counter;
  467. __asm__ volatile (
  468. "xor %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
  469. "1: \n\t"
  470. // Include two loop body, to reduce loop time.
  471. SAD_SRC_AVGREF_ABS_SUB_32
  472. MMI_ADDIU(%[second_pred], %[second_pred], 0x20)
  473. MMI_ADDU(%[src], %[src], %[src_stride])
  474. MMI_ADDU(%[ref], %[ref], %[ref_stride])
  475. SAD_SRC_AVGREF_ABS_SUB_32
  476. MMI_ADDIU(%[second_pred], %[second_pred], 0x20)
  477. MMI_ADDU(%[src], %[src], %[src_stride])
  478. MMI_ADDU(%[ref], %[ref], %[ref_stride])
  479. MMI_ADDIU(%[counter], %[counter], -0x02)
  480. "bnez %[counter], 1b \n\t"
  481. "mfc1 %[sad], %[ftmp5] \n\t"
  482. : [ftmp1]"=&f"(ftmp1), [ftmp2]"=&f"(ftmp2), [ftmp3]"=&f"(ftmp3),
  483. [ftmp4]"=&f"(ftmp4), [ftmp5]"=&f"(ftmp5), [counter]"+&r"(l_counter),
  484. [src]"+&r"(src), [ref]"+&r"(ref),
  485. [second_pred]"+&r"((mips_reg)second_pred),
  486. [sad]"=&r"(sad)
  487. : [src_stride]"r"((mips_reg)src_stride),
  488. [ref_stride]"r"((mips_reg)ref_stride)
  489. );
  490. return sad;
  491. }
  492. #define vpx_sad_avg32xN(H) \
  493. unsigned int vpx_sad32x##H##_avg_mmi(const uint8_t *src, int src_stride, \
  494. const uint8_t *ref, int ref_stride, \
  495. const uint8_t *second_pred) { \
  496. return vpx_sad_avg32x(src, src_stride, ref, ref_stride, second_pred, H); \
  497. }
  498. vpx_sad_avg32xN(64);
  499. vpx_sad_avg32xN(32);
  500. vpx_sad_avg32xN(16);
  501. static inline unsigned int vpx_sad16x(const uint8_t *src, int src_stride,
  502. const uint8_t *ref, int ref_stride,
  503. int counter) {
  504. unsigned int sad;
  505. double ftmp1, ftmp2, ftmp3, ftmp4, ftmp5;
  506. mips_reg l_counter = counter;
  507. __asm__ volatile (
  508. "xor %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
  509. "1: \n\t"
  510. // Include two loop body, to reduce loop time.
  511. SAD_SRC_REF_ABS_SUB_16
  512. MMI_ADDU(%[src], %[src], %[src_stride])
  513. MMI_ADDU(%[ref], %[ref], %[ref_stride])
  514. SAD_SRC_REF_ABS_SUB_16
  515. MMI_ADDU(%[src], %[src], %[src_stride])
  516. MMI_ADDU(%[ref], %[ref], %[ref_stride])
  517. MMI_ADDIU(%[counter], %[counter], -0x02)
  518. "bnez %[counter], 1b \n\t"
  519. "mfc1 %[sad], %[ftmp5] \n\t"
  520. : [ftmp1]"=&f"(ftmp1), [ftmp2]"=&f"(ftmp2), [ftmp3]"=&f"(ftmp3),
  521. [ftmp4]"=&f"(ftmp4), [ftmp5]"=&f"(ftmp5), [counter]"+&r"(l_counter),
  522. [src]"+&r"(src), [ref]"+&r"(ref), [sad]"=&r"(sad)
  523. : [src_stride]"r"((mips_reg)src_stride),
  524. [ref_stride]"r"((mips_reg)ref_stride)
  525. );
  526. return sad;
  527. }
  528. #define vpx_sad16xN(H) \
  529. unsigned int vpx_sad16x##H##_mmi(const uint8_t *src, int src_stride, \
  530. const uint8_t *ref, int ref_stride) { \
  531. return vpx_sad16x(src, src_stride, ref, ref_stride, H); \
  532. }
  533. vpx_sad16xN(32);
  534. vpx_sad16xN(16);
  535. vpx_sad16xN(8);
  536. sadMxNxK_mmi(16, 16, 3);
  537. sadMxNxK_mmi(16, 16, 8);
  538. sadMxNxK_mmi(16, 8, 3);
  539. sadMxNxK_mmi(16, 8, 8);
  540. sadMxNx4D_mmi(16, 32);
  541. sadMxNx4D_mmi(16, 16);
  542. sadMxNx4D_mmi(16, 8);
  543. static inline unsigned int vpx_sad_avg16x(const uint8_t *src, int src_stride,
  544. const uint8_t *ref, int ref_stride,
  545. const uint8_t *second_pred,
  546. int counter) {
  547. unsigned int sad;
  548. double ftmp1, ftmp2, ftmp3, ftmp4, ftmp5;
  549. mips_reg l_counter = counter;
  550. __asm__ volatile (
  551. "xor %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
  552. "1: \n\t"
  553. // Include two loop body, to reduce loop time.
  554. SAD_SRC_AVGREF_ABS_SUB_16
  555. MMI_ADDIU(%[second_pred], %[second_pred], 0x10)
  556. MMI_ADDU(%[src], %[src], %[src_stride])
  557. MMI_ADDU(%[ref], %[ref], %[ref_stride])
  558. SAD_SRC_AVGREF_ABS_SUB_16
  559. MMI_ADDIU(%[second_pred], %[second_pred], 0x10)
  560. MMI_ADDU(%[src], %[src], %[src_stride])
  561. MMI_ADDU(%[ref], %[ref], %[ref_stride])
  562. MMI_ADDIU(%[counter], %[counter], -0x02)
  563. "bnez %[counter], 1b \n\t"
  564. "mfc1 %[sad], %[ftmp5] \n\t"
  565. : [ftmp1]"=&f"(ftmp1), [ftmp2]"=&f"(ftmp2), [ftmp3]"=&f"(ftmp3),
  566. [ftmp4]"=&f"(ftmp4), [ftmp5]"=&f"(ftmp5), [counter]"+&r"(l_counter),
  567. [src]"+&r"(src), [ref]"+&r"(ref),
  568. [second_pred]"+&r"((mips_reg)second_pred),
  569. [sad]"=&r"(sad)
  570. : [src_stride]"r"((mips_reg)src_stride),
  571. [ref_stride]"r"((mips_reg)ref_stride)
  572. );
  573. return sad;
  574. }
  575. #define vpx_sad_avg16xN(H) \
  576. unsigned int vpx_sad16x##H##_avg_mmi(const uint8_t *src, int src_stride, \
  577. const uint8_t *ref, int ref_stride, \
  578. const uint8_t *second_pred) { \
  579. return vpx_sad_avg16x(src, src_stride, ref, ref_stride, second_pred, H); \
  580. }
  581. vpx_sad_avg16xN(32);
  582. vpx_sad_avg16xN(16);
  583. vpx_sad_avg16xN(8);
  584. static inline unsigned int vpx_sad8x(const uint8_t *src, int src_stride,
  585. const uint8_t *ref, int ref_stride,
  586. int counter) {
  587. unsigned int sad;
  588. double ftmp1, ftmp2, ftmp3;
  589. mips_reg l_counter = counter;
  590. __asm__ volatile (
  591. "xor %[ftmp3], %[ftmp3], %[ftmp3] \n\t"
  592. "1: \n\t"
  593. // Include two loop body, to reduce loop time.
  594. SAD_SRC_REF_ABS_SUB_8
  595. MMI_ADDU(%[src], %[src], %[src_stride])
  596. MMI_ADDU(%[ref], %[ref], %[ref_stride])
  597. SAD_SRC_REF_ABS_SUB_8
  598. MMI_ADDU(%[src], %[src], %[src_stride])
  599. MMI_ADDU(%[ref], %[ref], %[ref_stride])
  600. MMI_ADDIU(%[counter], %[counter], -0x02)
  601. "bnez %[counter], 1b \n\t"
  602. "mfc1 %[sad], %[ftmp3] \n\t"
  603. : [ftmp1]"=&f"(ftmp1), [ftmp2]"=&f"(ftmp2), [ftmp3]"=&f"(ftmp3),
  604. [counter]"+&r"(l_counter), [src]"+&r"(src), [ref]"+&r"(ref),
  605. [sad]"=&r"(sad)
  606. : [src_stride]"r"((mips_reg)src_stride),
  607. [ref_stride]"r"((mips_reg)ref_stride)
  608. );
  609. return sad;
  610. }
  611. #define vpx_sad8xN(H) \
  612. unsigned int vpx_sad8x##H##_mmi(const uint8_t *src, int src_stride, \
  613. const uint8_t *ref, int ref_stride) { \
  614. return vpx_sad8x(src, src_stride, ref, ref_stride, H); \
  615. }
  616. vpx_sad8xN(16);
  617. vpx_sad8xN(8);
  618. vpx_sad8xN(4);
  619. sadMxNxK_mmi(8, 16, 3);
  620. sadMxNxK_mmi(8, 16, 8);
  621. sadMxNxK_mmi(8, 8, 3);
  622. sadMxNxK_mmi(8, 8, 8);
  623. sadMxNx4D_mmi(8, 16);
  624. sadMxNx4D_mmi(8, 8);
  625. sadMxNx4D_mmi(8, 4);
  626. static inline unsigned int vpx_sad_avg8x(const uint8_t *src, int src_stride,
  627. const uint8_t *ref, int ref_stride,
  628. const uint8_t *second_pred,
  629. int counter) {
  630. unsigned int sad;
  631. double ftmp1, ftmp2, ftmp3;
  632. mips_reg l_counter = counter;
  633. __asm__ volatile (
  634. "xor %[ftmp3], %[ftmp3], %[ftmp3] \n\t"
  635. "1: \n\t"
  636. // Include two loop body, to reduce loop time.
  637. SAD_SRC_AVGREF_ABS_SUB_8
  638. MMI_ADDIU(%[second_pred], %[second_pred], 0x08)
  639. MMI_ADDU(%[src], %[src], %[src_stride])
  640. MMI_ADDU(%[ref], %[ref], %[ref_stride])
  641. SAD_SRC_AVGREF_ABS_SUB_8
  642. MMI_ADDIU(%[second_pred], %[second_pred], 0x08)
  643. MMI_ADDU(%[src], %[src], %[src_stride])
  644. MMI_ADDU(%[ref], %[ref], %[ref_stride])
  645. MMI_ADDIU(%[counter], %[counter], -0x02)
  646. "bnez %[counter], 1b \n\t"
  647. "mfc1 %[sad], %[ftmp3] \n\t"
  648. : [ftmp1]"=&f"(ftmp1), [ftmp2]"=&f"(ftmp2), [ftmp3]"=&f"(ftmp3),
  649. [counter]"+&r"(l_counter), [src]"+&r"(src), [ref]"+&r"(ref),
  650. [second_pred]"+&r"((mips_reg)second_pred),
  651. [sad]"=&r"(sad)
  652. : [src_stride]"r"((mips_reg)src_stride),
  653. [ref_stride]"r"((mips_reg)ref_stride)
  654. );
  655. return sad;
  656. }
  657. #define vpx_sad_avg8xN(H) \
  658. unsigned int vpx_sad8x##H##_avg_mmi(const uint8_t *src, int src_stride, \
  659. const uint8_t *ref, int ref_stride, \
  660. const uint8_t *second_pred) { \
  661. return vpx_sad_avg8x(src, src_stride, ref, ref_stride, second_pred, H); \
  662. }
  663. vpx_sad_avg8xN(16);
  664. vpx_sad_avg8xN(8);
  665. vpx_sad_avg8xN(4);
  666. static inline unsigned int vpx_sad4x(const uint8_t *src, int src_stride,
  667. const uint8_t *ref, int ref_stride,
  668. int counter) {
  669. unsigned int sad;
  670. double ftmp1, ftmp2, ftmp3;
  671. mips_reg l_counter = counter;
  672. __asm__ volatile (
  673. "xor %[ftmp3], %[ftmp3], %[ftmp3] \n\t"
  674. "1: \n\t"
  675. // Include two loop body, to reduce loop time.
  676. SAD_SRC_REF_ABS_SUB_4
  677. MMI_ADDU(%[src], %[src], %[src_stride])
  678. MMI_ADDU(%[ref], %[ref], %[ref_stride])
  679. SAD_SRC_REF_ABS_SUB_4
  680. MMI_ADDU(%[src], %[src], %[src_stride])
  681. MMI_ADDU(%[ref], %[ref], %[ref_stride])
  682. MMI_ADDIU(%[counter], %[counter], -0x02)
  683. "bnez %[counter], 1b \n\t"
  684. "mfc1 %[sad], %[ftmp3] \n\t"
  685. : [ftmp1]"=&f"(ftmp1), [ftmp2]"=&f"(ftmp2), [ftmp3]"=&f"(ftmp3),
  686. [counter]"+&r"(l_counter), [src]"+&r"(src), [ref]"+&r"(ref),
  687. [sad]"=&r"(sad)
  688. : [src_stride]"r"((mips_reg)src_stride),
  689. [ref_stride]"r"((mips_reg)ref_stride)
  690. );
  691. return sad;
  692. }
  693. #define vpx_sad4xN(H) \
  694. unsigned int vpx_sad4x##H##_mmi(const uint8_t *src, int src_stride, \
  695. const uint8_t *ref, int ref_stride) { \
  696. return vpx_sad4x(src, src_stride, ref, ref_stride, H); \
  697. }
  698. vpx_sad4xN(8);
  699. vpx_sad4xN(4);
  700. sadMxNxK_mmi(4, 4, 3);
  701. sadMxNxK_mmi(4, 4, 8);
  702. sadMxNx4D_mmi(4, 8);
  703. sadMxNx4D_mmi(4, 4);
  704. static inline unsigned int vpx_sad_avg4x(const uint8_t *src, int src_stride,
  705. const uint8_t *ref, int ref_stride,
  706. const uint8_t *second_pred,
  707. int counter) {
  708. unsigned int sad;
  709. double ftmp1, ftmp2, ftmp3;
  710. mips_reg l_counter = counter;
  711. __asm__ volatile (
  712. "xor %[ftmp3], %[ftmp3], %[ftmp3] \n\t"
  713. "1: \n\t"
  714. // Include two loop body, to reduce loop time.
  715. SAD_SRC_AVGREF_ABS_SUB_4
  716. MMI_ADDIU(%[second_pred], %[second_pred], 0x04)
  717. MMI_ADDU(%[src], %[src], %[src_stride])
  718. MMI_ADDU(%[ref], %[ref], %[ref_stride])
  719. SAD_SRC_AVGREF_ABS_SUB_4
  720. MMI_ADDIU(%[second_pred], %[second_pred], 0x04)
  721. MMI_ADDU(%[src], %[src], %[src_stride])
  722. MMI_ADDU(%[ref], %[ref], %[ref_stride])
  723. MMI_ADDIU(%[counter], %[counter], -0x02)
  724. "bnez %[counter], 1b \n\t"
  725. "mfc1 %[sad], %[ftmp3] \n\t"
  726. : [ftmp1]"=&f"(ftmp1), [ftmp2]"=&f"(ftmp2), [ftmp3]"=&f"(ftmp3),
  727. [counter]"+&r"(l_counter), [src]"+&r"(src), [ref]"+&r"(ref),
  728. [second_pred]"+&r"((mips_reg)second_pred),
  729. [sad]"=&r"(sad)
  730. : [src_stride]"r"((mips_reg)src_stride),
  731. [ref_stride]"r"((mips_reg)ref_stride)
  732. );
  733. return sad;
  734. }
  735. #define vpx_sad_avg4xN(H) \
  736. unsigned int vpx_sad4x##H##_avg_mmi(const uint8_t *src, int src_stride, \
  737. const uint8_t *ref, int ref_stride, \
  738. const uint8_t *second_pred) { \
  739. return vpx_sad_avg4x(src, src_stride, ref, ref_stride, second_pred, H); \
  740. }
  741. vpx_sad_avg4xN(8);
  742. vpx_sad_avg4xN(4);