vpx_convolve_copy_neon_asm.asm 2.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384
  1. ;
  2. ; Copyright (c) 2013 The WebM project authors. All Rights Reserved.
  3. ;
  4. ; Use of this source code is governed by a BSD-style license
  5. ; that can be found in the LICENSE file in the root of the source
  6. ; tree. An additional intellectual property rights grant can be found
  7. ; in the file PATENTS. All contributing project authors may
  8. ; be found in the AUTHORS file in the root of the source tree.
  9. ;
  10. EXPORT |vpx_convolve_copy_neon|
  11. ARM
  12. REQUIRE8
  13. PRESERVE8
  14. AREA ||.text||, CODE, READONLY, ALIGN=2
  15. |vpx_convolve_copy_neon| PROC
  16. push {r4-r5, lr}
  17. ldrd r4, r5, [sp, #32]
  18. cmp r4, #32
  19. bgt copy64
  20. beq copy32
  21. cmp r4, #8
  22. bgt copy16
  23. beq copy8
  24. b copy4
  25. copy64
  26. sub lr, r1, #32
  27. sub r3, r3, #32
  28. copy64_h
  29. pld [r0, r1, lsl #1]
  30. vld1.8 {q0-q1}, [r0]!
  31. vld1.8 {q2-q3}, [r0], lr
  32. vst1.8 {q0-q1}, [r2@128]!
  33. vst1.8 {q2-q3}, [r2@128], r3
  34. subs r5, r5, #1
  35. bgt copy64_h
  36. pop {r4-r5, pc}
  37. copy32
  38. pld [r0, r1, lsl #1]
  39. vld1.8 {q0-q1}, [r0], r1
  40. pld [r0, r1, lsl #1]
  41. vld1.8 {q2-q3}, [r0], r1
  42. vst1.8 {q0-q1}, [r2@128], r3
  43. vst1.8 {q2-q3}, [r2@128], r3
  44. subs r5, r5, #2
  45. bgt copy32
  46. pop {r4-r5, pc}
  47. copy16
  48. pld [r0, r1, lsl #1]
  49. vld1.8 {q0}, [r0], r1
  50. pld [r0, r1, lsl #1]
  51. vld1.8 {q1}, [r0], r1
  52. vst1.8 {q0}, [r2@128], r3
  53. vst1.8 {q1}, [r2@128], r3
  54. subs r5, r5, #2
  55. bgt copy16
  56. pop {r4-r5, pc}
  57. copy8
  58. pld [r0, r1, lsl #1]
  59. vld1.8 {d0}, [r0], r1
  60. pld [r0, r1, lsl #1]
  61. vld1.8 {d2}, [r0], r1
  62. vst1.8 {d0}, [r2@64], r3
  63. vst1.8 {d2}, [r2@64], r3
  64. subs r5, r5, #2
  65. bgt copy8
  66. pop {r4-r5, pc}
  67. copy4
  68. ldr r12, [r0], r1
  69. str r12, [r2], r3
  70. subs r5, r5, #1
  71. bgt copy4
  72. pop {r4-r5, pc}
  73. ENDP
  74. END