intrapred8_dspr2.c 34 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603
  1. /*
  2. * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
  3. *
  4. * Use of this source code is governed by a BSD-style license
  5. * that can be found in the LICENSE file in the root of the source
  6. * tree. An additional intellectual property rights grant can be found
  7. * in the file PATENTS. All contributing project authors may
  8. * be found in the AUTHORS file in the root of the source tree.
  9. */
  10. #include "vpx_dsp/mips/common_dspr2.h"
  11. #if HAVE_DSPR2
  12. void vpx_h_predictor_8x8_dspr2(uint8_t *dst, ptrdiff_t stride,
  13. const uint8_t *above, const uint8_t *left) {
  14. int32_t tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8;
  15. (void)above;
  16. __asm__ __volatile__(
  17. "lb %[tmp1], (%[left]) \n\t"
  18. "lb %[tmp2], 1(%[left]) \n\t"
  19. "lb %[tmp3], 2(%[left]) \n\t"
  20. "lb %[tmp4], 3(%[left]) \n\t"
  21. "lb %[tmp5], 4(%[left]) \n\t"
  22. "lb %[tmp6], 5(%[left]) \n\t"
  23. "lb %[tmp7], 6(%[left]) \n\t"
  24. "lb %[tmp8], 7(%[left]) \n\t"
  25. "replv.qb %[tmp1], %[tmp1] \n\t"
  26. "replv.qb %[tmp2], %[tmp2] \n\t"
  27. "replv.qb %[tmp3], %[tmp3] \n\t"
  28. "replv.qb %[tmp4], %[tmp4] \n\t"
  29. "replv.qb %[tmp5], %[tmp5] \n\t"
  30. "replv.qb %[tmp6], %[tmp6] \n\t"
  31. "replv.qb %[tmp7], %[tmp7] \n\t"
  32. "replv.qb %[tmp8], %[tmp8] \n\t"
  33. "sw %[tmp1], (%[dst]) \n\t"
  34. "sw %[tmp1], 4(%[dst]) \n\t"
  35. "add %[dst], %[dst], %[stride] \n\t"
  36. "sw %[tmp2], (%[dst]) \n\t"
  37. "sw %[tmp2], 4(%[dst]) \n\t"
  38. "add %[dst], %[dst], %[stride] \n\t"
  39. "sw %[tmp3], (%[dst]) \n\t"
  40. "sw %[tmp3], 4(%[dst]) \n\t"
  41. "add %[dst], %[dst], %[stride] \n\t"
  42. "sw %[tmp4], (%[dst]) \n\t"
  43. "sw %[tmp4], 4(%[dst]) \n\t"
  44. "add %[dst], %[dst], %[stride] \n\t"
  45. "sw %[tmp5], (%[dst]) \n\t"
  46. "sw %[tmp5], 4(%[dst]) \n\t"
  47. "add %[dst], %[dst], %[stride] \n\t"
  48. "sw %[tmp6], (%[dst]) \n\t"
  49. "sw %[tmp6], 4(%[dst]) \n\t"
  50. "add %[dst], %[dst], %[stride] \n\t"
  51. "sw %[tmp7], (%[dst]) \n\t"
  52. "sw %[tmp7], 4(%[dst]) \n\t"
  53. "add %[dst], %[dst], %[stride] \n\t"
  54. "sw %[tmp8], (%[dst]) \n\t"
  55. "sw %[tmp8], 4(%[dst]) \n\t"
  56. : [tmp1] "=&r"(tmp1), [tmp2] "=&r"(tmp2), [tmp3] "=&r"(tmp3),
  57. [tmp4] "=&r"(tmp4), [tmp5] "=&r"(tmp5), [tmp7] "=&r"(tmp7),
  58. [tmp6] "=&r"(tmp6), [tmp8] "=&r"(tmp8)
  59. : [left] "r"(left), [dst] "r"(dst), [stride] "r"(stride));
  60. }
  61. void vpx_dc_predictor_8x8_dspr2(uint8_t *dst, ptrdiff_t stride,
  62. const uint8_t *above, const uint8_t *left) {
  63. int32_t expected_dc;
  64. int32_t average;
  65. int32_t tmp, above1, above_l1, above_r1, left1, left_r1, left_l1;
  66. int32_t above2, above_l2, above_r2, left2, left_r2, left_l2;
  67. __asm__ __volatile__(
  68. "lw %[above1], (%[above]) \n\t"
  69. "lw %[above2], 4(%[above]) \n\t"
  70. "lw %[left1], (%[left]) \n\t"
  71. "lw %[left2], 4(%[left]) \n\t"
  72. "preceu.ph.qbl %[above_l1], %[above1] \n\t"
  73. "preceu.ph.qbr %[above_r1], %[above1] \n\t"
  74. "preceu.ph.qbl %[left_l1], %[left1] \n\t"
  75. "preceu.ph.qbr %[left_r1], %[left1] \n\t"
  76. "preceu.ph.qbl %[above_l2], %[above2] \n\t"
  77. "preceu.ph.qbr %[above_r2], %[above2] \n\t"
  78. "preceu.ph.qbl %[left_l2], %[left2] \n\t"
  79. "preceu.ph.qbr %[left_r2], %[left2] \n\t"
  80. "addu.ph %[average], %[above_r1], %[above_l1] \n\t"
  81. "addu.ph %[average], %[average], %[left_l1] \n\t"
  82. "addu.ph %[average], %[average], %[left_r1] \n\t"
  83. "addu.ph %[average], %[average], %[above_l2] \n\t"
  84. "addu.ph %[average], %[average], %[above_r2] \n\t"
  85. "addu.ph %[average], %[average], %[left_l2] \n\t"
  86. "addu.ph %[average], %[average], %[left_r2] \n\t"
  87. "addiu %[average], %[average], 8 \n\t"
  88. "srl %[tmp], %[average], 16 \n\t"
  89. "addu.ph %[average], %[tmp], %[average] \n\t"
  90. "srl %[expected_dc], %[average], 4 \n\t"
  91. "replv.qb %[expected_dc], %[expected_dc] \n\t"
  92. "sw %[expected_dc], (%[dst]) \n\t"
  93. "sw %[expected_dc], 4(%[dst]) \n\t"
  94. "add %[dst], %[dst], %[stride] \n\t"
  95. "sw %[expected_dc], (%[dst]) \n\t"
  96. "sw %[expected_dc], 4(%[dst]) \n\t"
  97. "add %[dst], %[dst], %[stride] \n\t"
  98. "sw %[expected_dc], (%[dst]) \n\t"
  99. "sw %[expected_dc], 4(%[dst]) \n\t"
  100. "add %[dst], %[dst], %[stride] \n\t"
  101. "sw %[expected_dc], (%[dst]) \n\t"
  102. "sw %[expected_dc], 4(%[dst]) \n\t"
  103. "add %[dst], %[dst], %[stride] \n\t"
  104. "sw %[expected_dc], (%[dst]) \n\t"
  105. "sw %[expected_dc], 4(%[dst]) \n\t"
  106. "add %[dst], %[dst], %[stride] \n\t"
  107. "sw %[expected_dc], (%[dst]) \n\t"
  108. "sw %[expected_dc], 4(%[dst]) \n\t"
  109. "add %[dst], %[dst], %[stride] \n\t"
  110. "sw %[expected_dc], (%[dst]) \n\t"
  111. "sw %[expected_dc], 4(%[dst]) \n\t"
  112. "add %[dst], %[dst], %[stride] \n\t"
  113. "sw %[expected_dc], (%[dst]) \n\t"
  114. "sw %[expected_dc], 4(%[dst]) \n\t"
  115. : [above1] "=&r"(above1), [above_l1] "=&r"(above_l1),
  116. [above_r1] "=&r"(above_r1), [left1] "=&r"(left1),
  117. [left_l1] "=&r"(left_l1), [left_r1] "=&r"(left_r1),
  118. [above2] "=&r"(above2), [above_l2] "=&r"(above_l2),
  119. [above_r2] "=&r"(above_r2), [left2] "=&r"(left2),
  120. [left_l2] "=&r"(left_l2), [left_r2] "=&r"(left_r2),
  121. [average] "=&r"(average), [tmp] "=&r"(tmp),
  122. [expected_dc] "=&r"(expected_dc)
  123. : [above] "r"(above), [left] "r"(left), [dst] "r"(dst),
  124. [stride] "r"(stride));
  125. }
  126. void vpx_tm_predictor_8x8_dspr2(uint8_t *dst, ptrdiff_t stride,
  127. const uint8_t *above, const uint8_t *left) {
  128. int32_t abovel, abover;
  129. int32_t abovel_1, abover_1;
  130. int32_t left0;
  131. int32_t res0, res1, res2, res3;
  132. int32_t reshw;
  133. int32_t top_left;
  134. uint8_t *cm = vpx_ff_cropTbl;
  135. __asm__ __volatile__(
  136. "ulw %[reshw], (%[above]) \n\t"
  137. "ulw %[top_left], 4(%[above]) \n\t"
  138. "lbu %[left0], (%[left]) \n\t"
  139. "preceu.ph.qbl %[abovel], %[reshw] \n\t"
  140. "preceu.ph.qbr %[abover], %[reshw] \n\t"
  141. "preceu.ph.qbl %[abovel_1], %[top_left] \n\t"
  142. "preceu.ph.qbr %[abover_1], %[top_left] \n\t"
  143. "lbu %[top_left], -1(%[above]) \n\t"
  144. "replv.ph %[left0], %[left0] \n\t"
  145. "replv.ph %[top_left], %[top_left] \n\t"
  146. "addu.ph %[reshw], %[abovel], %[left0] \n\t"
  147. "subu.ph %[reshw], %[reshw], %[top_left] \n\t"
  148. "sll %[res2], %[reshw], 16 \n\t"
  149. "sra %[res2], %[res2], 16 \n\t"
  150. "sra %[res3], %[reshw], 16 \n\t"
  151. "addu.ph %[reshw], %[abover], %[left0] \n\t"
  152. "subu.ph %[reshw], %[reshw], %[top_left] \n\t"
  153. "sll %[res0], %[reshw], 16 \n\t"
  154. "sra %[res0], %[res0], 16 \n\t"
  155. "sra %[res1], %[reshw], 16 \n\t"
  156. "lbux %[res0], %[res0](%[cm]) \n\t"
  157. "lbux %[res1], %[res1](%[cm]) \n\t"
  158. "lbux %[res2], %[res2](%[cm]) \n\t"
  159. "lbux %[res3], %[res3](%[cm]) \n\t"
  160. "sb %[res0], (%[dst]) \n\t"
  161. "sb %[res1], 1(%[dst]) \n\t"
  162. "sb %[res2], 2(%[dst]) \n\t"
  163. "sb %[res3], 3(%[dst]) \n\t"
  164. "addu.ph %[reshw], %[abovel_1], %[left0] \n\t"
  165. "subu.ph %[reshw], %[reshw], %[top_left] \n\t"
  166. "sll %[res2], %[reshw], 16 \n\t"
  167. "sra %[res2], %[res2], 16 \n\t"
  168. "sra %[res3], %[reshw], 16 \n\t"
  169. "addu.ph %[reshw], %[abover_1], %[left0] \n\t"
  170. "subu.ph %[reshw], %[reshw], %[top_left] \n\t"
  171. "sll %[res0], %[reshw], 16 \n\t"
  172. "sra %[res0], %[res0], 16 \n\t"
  173. "sra %[res1], %[reshw], 16 \n\t"
  174. "lbu %[left0], 1(%[left]) \n\t"
  175. "lbux %[res0], %[res0](%[cm]) \n\t"
  176. "lbux %[res1], %[res1](%[cm]) \n\t"
  177. "lbux %[res2], %[res2](%[cm]) \n\t"
  178. "lbux %[res3], %[res3](%[cm]) \n\t"
  179. "sb %[res0], 4(%[dst]) \n\t"
  180. "sb %[res1], 5(%[dst]) \n\t"
  181. "sb %[res2], 6(%[dst]) \n\t"
  182. "sb %[res3], 7(%[dst]) \n\t"
  183. "replv.ph %[left0], %[left0] \n\t"
  184. "add %[dst], %[dst], %[stride] \n\t"
  185. "addu.ph %[reshw], %[abovel], %[left0] \n\t"
  186. "subu.ph %[reshw], %[reshw], %[top_left] \n\t"
  187. "sll %[res2], %[reshw], 16 \n\t"
  188. "sra %[res2], %[res2], 16 \n\t"
  189. "sra %[res3], %[reshw], 16 \n\t"
  190. "addu.ph %[reshw], %[abover], %[left0] \n\t"
  191. "subu.ph %[reshw], %[reshw], %[top_left] \n\t"
  192. "sll %[res0], %[reshw], 16 \n\t"
  193. "sra %[res0], %[res0], 16 \n\t"
  194. "sra %[res1], %[reshw], 16 \n\t"
  195. "lbux %[res0], %[res0](%[cm]) \n\t"
  196. "lbux %[res1], %[res1](%[cm]) \n\t"
  197. "lbux %[res2], %[res2](%[cm]) \n\t"
  198. "lbux %[res3], %[res3](%[cm]) \n\t"
  199. "sb %[res0], (%[dst]) \n\t"
  200. "sb %[res1], 1(%[dst]) \n\t"
  201. "sb %[res2], 2(%[dst]) \n\t"
  202. "sb %[res3], 3(%[dst]) \n\t"
  203. "addu.ph %[reshw], %[abovel_1], %[left0] \n\t"
  204. "subu.ph %[reshw], %[reshw], %[top_left] \n\t"
  205. "sll %[res2], %[reshw], 16 \n\t"
  206. "sra %[res2], %[res2], 16 \n\t"
  207. "sra %[res3], %[reshw], 16 \n\t"
  208. "addu.ph %[reshw], %[abover_1], %[left0] \n\t"
  209. "subu.ph %[reshw], %[reshw], %[top_left] \n\t"
  210. "sll %[res0], %[reshw], 16 \n\t"
  211. "sra %[res0], %[res0], 16 \n\t"
  212. "sra %[res1], %[reshw], 16 \n\t"
  213. "lbu %[left0], 2(%[left]) \n\t"
  214. "lbux %[res0], %[res0](%[cm]) \n\t"
  215. "lbux %[res1], %[res1](%[cm]) \n\t"
  216. "lbux %[res2], %[res2](%[cm]) \n\t"
  217. "lbux %[res3], %[res3](%[cm]) \n\t"
  218. "sb %[res0], 4(%[dst]) \n\t"
  219. "sb %[res1], 5(%[dst]) \n\t"
  220. "sb %[res2], 6(%[dst]) \n\t"
  221. "sb %[res3], 7(%[dst]) \n\t"
  222. "replv.ph %[left0], %[left0] \n\t"
  223. "add %[dst], %[dst], %[stride] \n\t"
  224. "addu.ph %[reshw], %[abovel], %[left0] \n\t"
  225. "subu.ph %[reshw], %[reshw], %[top_left] \n\t"
  226. "sll %[res2], %[reshw], 16 \n\t"
  227. "sra %[res2], %[res2], 16 \n\t"
  228. "sra %[res3], %[reshw], 16 \n\t"
  229. "addu.ph %[reshw], %[abover], %[left0] \n\t"
  230. "subu.ph %[reshw], %[reshw], %[top_left] \n\t"
  231. "sll %[res0], %[reshw], 16 \n\t"
  232. "sra %[res0], %[res0], 16 \n\t"
  233. "sra %[res1], %[reshw], 16 \n\t"
  234. "lbux %[res0], %[res0](%[cm]) \n\t"
  235. "lbux %[res1], %[res1](%[cm]) \n\t"
  236. "lbux %[res2], %[res2](%[cm]) \n\t"
  237. "lbux %[res3], %[res3](%[cm]) \n\t"
  238. "sb %[res0], (%[dst]) \n\t"
  239. "sb %[res1], 1(%[dst]) \n\t"
  240. "sb %[res2], 2(%[dst]) \n\t"
  241. "sb %[res3], 3(%[dst]) \n\t"
  242. "addu.ph %[reshw], %[abovel_1], %[left0] \n\t"
  243. "subu.ph %[reshw], %[reshw], %[top_left] \n\t"
  244. "sll %[res2], %[reshw], 16 \n\t"
  245. "sra %[res2], %[res2], 16 \n\t"
  246. "sra %[res3], %[reshw], 16 \n\t"
  247. "addu.ph %[reshw], %[abover_1], %[left0] \n\t"
  248. "subu.ph %[reshw], %[reshw], %[top_left] \n\t"
  249. "sll %[res0], %[reshw], 16 \n\t"
  250. "sra %[res0], %[res0], 16 \n\t"
  251. "sra %[res1], %[reshw], 16 \n\t"
  252. "lbu %[left0], 3(%[left]) \n\t"
  253. "lbux %[res0], %[res0](%[cm]) \n\t"
  254. "lbux %[res1], %[res1](%[cm]) \n\t"
  255. "lbux %[res2], %[res2](%[cm]) \n\t"
  256. "lbux %[res3], %[res3](%[cm]) \n\t"
  257. "sb %[res0], 4(%[dst]) \n\t"
  258. "sb %[res1], 5(%[dst]) \n\t"
  259. "sb %[res2], 6(%[dst]) \n\t"
  260. "sb %[res3], 7(%[dst]) \n\t"
  261. "replv.ph %[left0], %[left0] \n\t"
  262. "add %[dst], %[dst], %[stride] \n\t"
  263. "addu.ph %[reshw], %[abovel], %[left0] \n\t"
  264. "subu.ph %[reshw], %[reshw], %[top_left] \n\t"
  265. "sll %[res2], %[reshw], 16 \n\t"
  266. "sra %[res2], %[res2], 16 \n\t"
  267. "sra %[res3], %[reshw], 16 \n\t"
  268. "addu.ph %[reshw], %[abover], %[left0] \n\t"
  269. "subu.ph %[reshw], %[reshw], %[top_left] \n\t"
  270. "sll %[res0], %[reshw], 16 \n\t"
  271. "sra %[res0], %[res0], 16 \n\t"
  272. "sra %[res1], %[reshw], 16 \n\t"
  273. "lbux %[res0], %[res0](%[cm]) \n\t"
  274. "lbux %[res1], %[res1](%[cm]) \n\t"
  275. "lbux %[res2], %[res2](%[cm]) \n\t"
  276. "lbux %[res3], %[res3](%[cm]) \n\t"
  277. "sb %[res0], (%[dst]) \n\t"
  278. "sb %[res1], 1(%[dst]) \n\t"
  279. "sb %[res2], 2(%[dst]) \n\t"
  280. "sb %[res3], 3(%[dst]) \n\t"
  281. "addu.ph %[reshw], %[abovel_1], %[left0] \n\t"
  282. "subu.ph %[reshw], %[reshw], %[top_left] \n\t"
  283. "sll %[res2], %[reshw], 16 \n\t"
  284. "sra %[res2], %[res2], 16 \n\t"
  285. "sra %[res3], %[reshw], 16 \n\t"
  286. "addu.ph %[reshw], %[abover_1], %[left0] \n\t"
  287. "subu.ph %[reshw], %[reshw], %[top_left] \n\t"
  288. "sll %[res0], %[reshw], 16 \n\t"
  289. "sra %[res0], %[res0], 16 \n\t"
  290. "sra %[res1], %[reshw], 16 \n\t"
  291. "lbu %[left0], 4(%[left]) \n\t"
  292. "lbux %[res0], %[res0](%[cm]) \n\t"
  293. "lbux %[res1], %[res1](%[cm]) \n\t"
  294. "lbux %[res2], %[res2](%[cm]) \n\t"
  295. "lbux %[res3], %[res3](%[cm]) \n\t"
  296. "sb %[res0], 4(%[dst]) \n\t"
  297. "sb %[res1], 5(%[dst]) \n\t"
  298. "sb %[res2], 6(%[dst]) \n\t"
  299. "sb %[res3], 7(%[dst]) \n\t"
  300. "replv.ph %[left0], %[left0] \n\t"
  301. "add %[dst], %[dst], %[stride] \n\t"
  302. "addu.ph %[reshw], %[abovel], %[left0] \n\t"
  303. "subu.ph %[reshw], %[reshw], %[top_left] \n\t"
  304. "sll %[res2], %[reshw], 16 \n\t"
  305. "sra %[res2], %[res2], 16 \n\t"
  306. "sra %[res3], %[reshw], 16 \n\t"
  307. "addu.ph %[reshw], %[abover], %[left0] \n\t"
  308. "subu.ph %[reshw], %[reshw], %[top_left] \n\t"
  309. "sll %[res0], %[reshw], 16 \n\t"
  310. "sra %[res0], %[res0], 16 \n\t"
  311. "sra %[res1], %[reshw], 16 \n\t"
  312. "lbux %[res0], %[res0](%[cm]) \n\t"
  313. "lbux %[res1], %[res1](%[cm]) \n\t"
  314. "lbux %[res2], %[res2](%[cm]) \n\t"
  315. "lbux %[res3], %[res3](%[cm]) \n\t"
  316. "sb %[res0], (%[dst]) \n\t"
  317. "sb %[res1], 1(%[dst]) \n\t"
  318. "sb %[res2], 2(%[dst]) \n\t"
  319. "sb %[res3], 3(%[dst]) \n\t"
  320. "addu.ph %[reshw], %[abovel_1], %[left0] \n\t"
  321. "subu.ph %[reshw], %[reshw], %[top_left] \n\t"
  322. "sll %[res2], %[reshw], 16 \n\t"
  323. "sra %[res2], %[res2], 16 \n\t"
  324. "sra %[res3], %[reshw], 16 \n\t"
  325. "addu.ph %[reshw], %[abover_1], %[left0] \n\t"
  326. "subu.ph %[reshw], %[reshw], %[top_left] \n\t"
  327. "sll %[res0], %[reshw], 16 \n\t"
  328. "sra %[res0], %[res0], 16 \n\t"
  329. "sra %[res1], %[reshw], 16 \n\t"
  330. "lbu %[left0], 5(%[left]) \n\t"
  331. "lbux %[res0], %[res0](%[cm]) \n\t"
  332. "lbux %[res1], %[res1](%[cm]) \n\t"
  333. "lbux %[res2], %[res2](%[cm]) \n\t"
  334. "lbux %[res3], %[res3](%[cm]) \n\t"
  335. "sb %[res0], 4(%[dst]) \n\t"
  336. "sb %[res1], 5(%[dst]) \n\t"
  337. "sb %[res2], 6(%[dst]) \n\t"
  338. "sb %[res3], 7(%[dst]) \n\t"
  339. "replv.ph %[left0], %[left0] \n\t"
  340. "add %[dst], %[dst], %[stride] \n\t"
  341. "addu.ph %[reshw], %[abovel], %[left0] \n\t"
  342. "subu.ph %[reshw], %[reshw], %[top_left] \n\t"
  343. "sll %[res2], %[reshw], 16 \n\t"
  344. "sra %[res2], %[res2], 16 \n\t"
  345. "sra %[res3], %[reshw], 16 \n\t"
  346. "addu.ph %[reshw], %[abover], %[left0] \n\t"
  347. "subu.ph %[reshw], %[reshw], %[top_left] \n\t"
  348. "sll %[res0], %[reshw], 16 \n\t"
  349. "sra %[res0], %[res0], 16 \n\t"
  350. "sra %[res1], %[reshw], 16 \n\t"
  351. "lbux %[res0], %[res0](%[cm]) \n\t"
  352. "lbux %[res1], %[res1](%[cm]) \n\t"
  353. "lbux %[res2], %[res2](%[cm]) \n\t"
  354. "lbux %[res3], %[res3](%[cm]) \n\t"
  355. "sb %[res0], (%[dst]) \n\t"
  356. "sb %[res1], 1(%[dst]) \n\t"
  357. "sb %[res2], 2(%[dst]) \n\t"
  358. "sb %[res3], 3(%[dst]) \n\t"
  359. "addu.ph %[reshw], %[abovel_1], %[left0] \n\t"
  360. "subu.ph %[reshw], %[reshw], %[top_left] \n\t"
  361. "sll %[res2], %[reshw], 16 \n\t"
  362. "sra %[res2], %[res2], 16 \n\t"
  363. "sra %[res3], %[reshw], 16 \n\t"
  364. "addu.ph %[reshw], %[abover_1], %[left0] \n\t"
  365. "subu.ph %[reshw], %[reshw], %[top_left] \n\t"
  366. "sll %[res0], %[reshw], 16 \n\t"
  367. "sra %[res0], %[res0], 16 \n\t"
  368. "sra %[res1], %[reshw], 16 \n\t"
  369. "lbu %[left0], 6(%[left]) \n\t"
  370. "lbux %[res0], %[res0](%[cm]) \n\t"
  371. "lbux %[res1], %[res1](%[cm]) \n\t"
  372. "lbux %[res2], %[res2](%[cm]) \n\t"
  373. "lbux %[res3], %[res3](%[cm]) \n\t"
  374. "sb %[res0], 4(%[dst]) \n\t"
  375. "sb %[res1], 5(%[dst]) \n\t"
  376. "sb %[res2], 6(%[dst]) \n\t"
  377. "sb %[res3], 7(%[dst]) \n\t"
  378. "replv.ph %[left0], %[left0] \n\t"
  379. "add %[dst], %[dst], %[stride] \n\t"
  380. "addu.ph %[reshw], %[abovel], %[left0] \n\t"
  381. "subu.ph %[reshw], %[reshw], %[top_left] \n\t"
  382. "sll %[res2], %[reshw], 16 \n\t"
  383. "sra %[res2], %[res2], 16 \n\t"
  384. "sra %[res3], %[reshw], 16 \n\t"
  385. "addu.ph %[reshw], %[abover], %[left0] \n\t"
  386. "subu.ph %[reshw], %[reshw], %[top_left] \n\t"
  387. "sll %[res0], %[reshw], 16 \n\t"
  388. "sra %[res0], %[res0], 16 \n\t"
  389. "sra %[res1], %[reshw], 16 \n\t"
  390. "lbux %[res0], %[res0](%[cm]) \n\t"
  391. "lbux %[res1], %[res1](%[cm]) \n\t"
  392. "lbux %[res2], %[res2](%[cm]) \n\t"
  393. "lbux %[res3], %[res3](%[cm]) \n\t"
  394. "sb %[res0], (%[dst]) \n\t"
  395. "sb %[res1], 1(%[dst]) \n\t"
  396. "sb %[res2], 2(%[dst]) \n\t"
  397. "sb %[res3], 3(%[dst]) \n\t"
  398. "addu.ph %[reshw], %[abovel_1], %[left0] \n\t"
  399. "subu.ph %[reshw], %[reshw], %[top_left] \n\t"
  400. "sll %[res2], %[reshw], 16 \n\t"
  401. "sra %[res2], %[res2], 16 \n\t"
  402. "sra %[res3], %[reshw], 16 \n\t"
  403. "addu.ph %[reshw], %[abover_1], %[left0] \n\t"
  404. "subu.ph %[reshw], %[reshw], %[top_left] \n\t"
  405. "sll %[res0], %[reshw], 16 \n\t"
  406. "sra %[res0], %[res0], 16 \n\t"
  407. "sra %[res1], %[reshw], 16 \n\t"
  408. "lbu %[left0], 7(%[left]) \n\t"
  409. "lbux %[res0], %[res0](%[cm]) \n\t"
  410. "lbux %[res1], %[res1](%[cm]) \n\t"
  411. "lbux %[res2], %[res2](%[cm]) \n\t"
  412. "lbux %[res3], %[res3](%[cm]) \n\t"
  413. "sb %[res0], 4(%[dst]) \n\t"
  414. "sb %[res1], 5(%[dst]) \n\t"
  415. "sb %[res2], 6(%[dst]) \n\t"
  416. "sb %[res3], 7(%[dst]) \n\t"
  417. "replv.ph %[left0], %[left0] \n\t"
  418. "add %[dst], %[dst], %[stride] \n\t"
  419. "addu.ph %[reshw], %[abovel], %[left0] \n\t"
  420. "subu.ph %[reshw], %[reshw], %[top_left] \n\t"
  421. "sll %[res2], %[reshw], 16 \n\t"
  422. "sra %[res2], %[res2], 16 \n\t"
  423. "sra %[res3], %[reshw], 16 \n\t"
  424. "addu.ph %[reshw], %[abover], %[left0] \n\t"
  425. "subu.ph %[reshw], %[reshw], %[top_left] \n\t"
  426. "sll %[res0], %[reshw], 16 \n\t"
  427. "sra %[res0], %[res0], 16 \n\t"
  428. "sra %[res1], %[reshw], 16 \n\t"
  429. "lbux %[res0], %[res0](%[cm]) \n\t"
  430. "lbux %[res1], %[res1](%[cm]) \n\t"
  431. "lbux %[res2], %[res2](%[cm]) \n\t"
  432. "lbux %[res3], %[res3](%[cm]) \n\t"
  433. "sb %[res0], (%[dst]) \n\t"
  434. "sb %[res1], 1(%[dst]) \n\t"
  435. "sb %[res2], 2(%[dst]) \n\t"
  436. "sb %[res3], 3(%[dst]) \n\t"
  437. "addu.ph %[reshw], %[abovel_1], %[left0] \n\t"
  438. "subu.ph %[reshw], %[reshw], %[top_left] \n\t"
  439. "sll %[res2], %[reshw], 16 \n\t"
  440. "sra %[res2], %[res2], 16 \n\t"
  441. "sra %[res3], %[reshw], 16 \n\t"
  442. "addu.ph %[reshw], %[abover_1], %[left0] \n\t"
  443. "subu.ph %[reshw], %[reshw], %[top_left] \n\t"
  444. "sll %[res0], %[reshw], 16 \n\t"
  445. "sra %[res0], %[res0], 16 \n\t"
  446. "sra %[res1], %[reshw], 16 \n\t"
  447. "lbux %[res0], %[res0](%[cm]) \n\t"
  448. "lbux %[res1], %[res1](%[cm]) \n\t"
  449. "lbux %[res2], %[res2](%[cm]) \n\t"
  450. "lbux %[res3], %[res3](%[cm]) \n\t"
  451. "sb %[res0], 4(%[dst]) \n\t"
  452. "sb %[res1], 5(%[dst]) \n\t"
  453. "sb %[res2], 6(%[dst]) \n\t"
  454. "sb %[res3], 7(%[dst]) \n\t"
  455. : [abovel] "=&r"(abovel), [abover] "=&r"(abover),
  456. [abovel_1] "=&r"(abovel_1), [abover_1] "=&r"(abover_1),
  457. [left0] "=&r"(left0), [res2] "=&r"(res2), [res3] "=&r"(res3),
  458. [res0] "=&r"(res0), [res1] "=&r"(res1), [reshw] "=&r"(reshw),
  459. [top_left] "=&r"(top_left)
  460. : [above] "r"(above), [left] "r"(left), [dst] "r"(dst),
  461. [stride] "r"(stride), [cm] "r"(cm));
  462. }
  463. #endif // #if HAVE_DSPR2