vpx_convolve_avg_neon_asm.asm 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116
  1. ;
  2. ; Copyright (c) 2013 The WebM project authors. All Rights Reserved.
  3. ;
  4. ; Use of this source code is governed by a BSD-style license
  5. ; that can be found in the LICENSE file in the root of the source
  6. ; tree. An additional intellectual property rights grant can be found
  7. ; in the file PATENTS. All contributing project authors may
  8. ; be found in the AUTHORS file in the root of the source tree.
  9. ;
  10. EXPORT |vpx_convolve_avg_neon|
  11. ARM
  12. REQUIRE8
  13. PRESERVE8
  14. AREA ||.text||, CODE, READONLY, ALIGN=2
  15. |vpx_convolve_avg_neon| PROC
  16. push {r4-r6, lr}
  17. ldrd r4, r5, [sp, #36]
  18. mov r6, r2
  19. cmp r4, #32
  20. bgt avg64
  21. beq avg32
  22. cmp r4, #8
  23. bgt avg16
  24. beq avg8
  25. b avg4
  26. avg64
  27. sub lr, r1, #32
  28. sub r4, r3, #32
  29. avg64_h
  30. pld [r0, r1, lsl #1]
  31. vld1.8 {q0-q1}, [r0]!
  32. vld1.8 {q2-q3}, [r0], lr
  33. pld [r2, r3]
  34. vld1.8 {q8-q9}, [r6@128]!
  35. vld1.8 {q10-q11}, [r6@128], r4
  36. vrhadd.u8 q0, q0, q8
  37. vrhadd.u8 q1, q1, q9
  38. vrhadd.u8 q2, q2, q10
  39. vrhadd.u8 q3, q3, q11
  40. vst1.8 {q0-q1}, [r2@128]!
  41. vst1.8 {q2-q3}, [r2@128], r4
  42. subs r5, r5, #1
  43. bgt avg64_h
  44. pop {r4-r6, pc}
  45. avg32
  46. vld1.8 {q0-q1}, [r0], r1
  47. vld1.8 {q2-q3}, [r0], r1
  48. vld1.8 {q8-q9}, [r6@128], r3
  49. vld1.8 {q10-q11}, [r6@128], r3
  50. pld [r0]
  51. vrhadd.u8 q0, q0, q8
  52. pld [r0, r1]
  53. vrhadd.u8 q1, q1, q9
  54. pld [r6]
  55. vrhadd.u8 q2, q2, q10
  56. pld [r6, r3]
  57. vrhadd.u8 q3, q3, q11
  58. vst1.8 {q0-q1}, [r2@128], r3
  59. vst1.8 {q2-q3}, [r2@128], r3
  60. subs r5, r5, #2
  61. bgt avg32
  62. pop {r4-r6, pc}
  63. avg16
  64. vld1.8 {q0}, [r0], r1
  65. vld1.8 {q1}, [r0], r1
  66. vld1.8 {q2}, [r6@128], r3
  67. vld1.8 {q3}, [r6@128], r3
  68. pld [r0]
  69. pld [r0, r1]
  70. vrhadd.u8 q0, q0, q2
  71. pld [r6]
  72. pld [r6, r3]
  73. vrhadd.u8 q1, q1, q3
  74. vst1.8 {q0}, [r2@128], r3
  75. vst1.8 {q1}, [r2@128], r3
  76. subs r5, r5, #2
  77. bgt avg16
  78. pop {r4-r6, pc}
  79. avg8
  80. vld1.8 {d0}, [r0], r1
  81. vld1.8 {d1}, [r0], r1
  82. vld1.8 {d2}, [r6@64], r3
  83. vld1.8 {d3}, [r6@64], r3
  84. pld [r0]
  85. pld [r0, r1]
  86. vrhadd.u8 q0, q0, q1
  87. pld [r6]
  88. pld [r6, r3]
  89. vst1.8 {d0}, [r2@64], r3
  90. vst1.8 {d1}, [r2@64], r3
  91. subs r5, r5, #2
  92. bgt avg8
  93. pop {r4-r6, pc}
  94. avg4
  95. vld1.32 {d0[0]}, [r0], r1
  96. vld1.32 {d0[1]}, [r0], r1
  97. vld1.32 {d2[0]}, [r6@32], r3
  98. vld1.32 {d2[1]}, [r6@32], r3
  99. vrhadd.u8 d0, d0, d2
  100. vst1.32 {d0[0]}, [r2@32], r3
  101. vst1.32 {d0[1]}, [r2@32], r3
  102. subs r5, r5, #2
  103. bgt avg4
  104. pop {r4-r6, pc}
  105. ENDP
  106. END