recon_sse2.asm 3.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118
  1. ;
  2. ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  3. ;
  4. ; Use of this source code is governed by a BSD-style license
  5. ; that can be found in the LICENSE file in the root of the source
  6. ; tree. An additional intellectual property rights grant can be found
  7. ; in the file PATENTS. All contributing project authors may
  8. ; be found in the AUTHORS file in the root of the source tree.
  9. ;
  10. %include "vpx_ports/x86_abi_support.asm"
  11. SECTION .text
  12. ;void copy_mem16x16_sse2(
  13. ; unsigned char *src,
  14. ; int src_stride,
  15. ; unsigned char *dst,
  16. ; int dst_stride
  17. ; )
  18. global sym(vp8_copy_mem16x16_sse2) PRIVATE
  19. sym(vp8_copy_mem16x16_sse2):
  20. push rbp
  21. mov rbp, rsp
  22. SHADOW_ARGS_TO_STACK 4
  23. push rsi
  24. push rdi
  25. ; end prolog
  26. mov rsi, arg(0) ;src;
  27. movdqu xmm0, [rsi]
  28. movsxd rax, dword ptr arg(1) ;src_stride;
  29. mov rdi, arg(2) ;dst;
  30. movdqu xmm1, [rsi+rax]
  31. movdqu xmm2, [rsi+rax*2]
  32. movsxd rcx, dword ptr arg(3) ;dst_stride
  33. lea rsi, [rsi+rax*2]
  34. movdqa [rdi], xmm0
  35. add rsi, rax
  36. movdqa [rdi+rcx], xmm1
  37. movdqa [rdi+rcx*2],xmm2
  38. lea rdi, [rdi+rcx*2]
  39. movdqu xmm3, [rsi]
  40. add rdi, rcx
  41. movdqu xmm4, [rsi+rax]
  42. movdqu xmm5, [rsi+rax*2]
  43. lea rsi, [rsi+rax*2]
  44. movdqa [rdi], xmm3
  45. add rsi, rax
  46. movdqa [rdi+rcx], xmm4
  47. movdqa [rdi+rcx*2],xmm5
  48. lea rdi, [rdi+rcx*2]
  49. movdqu xmm0, [rsi]
  50. add rdi, rcx
  51. movdqu xmm1, [rsi+rax]
  52. movdqu xmm2, [rsi+rax*2]
  53. lea rsi, [rsi+rax*2]
  54. movdqa [rdi], xmm0
  55. add rsi, rax
  56. movdqa [rdi+rcx], xmm1
  57. movdqa [rdi+rcx*2], xmm2
  58. movdqu xmm3, [rsi]
  59. movdqu xmm4, [rsi+rax]
  60. lea rdi, [rdi+rcx*2]
  61. add rdi, rcx
  62. movdqu xmm5, [rsi+rax*2]
  63. lea rsi, [rsi+rax*2]
  64. movdqa [rdi], xmm3
  65. add rsi, rax
  66. movdqa [rdi+rcx], xmm4
  67. movdqa [rdi+rcx*2],xmm5
  68. movdqu xmm0, [rsi]
  69. lea rdi, [rdi+rcx*2]
  70. movdqu xmm1, [rsi+rax]
  71. add rdi, rcx
  72. movdqu xmm2, [rsi+rax*2]
  73. lea rsi, [rsi+rax*2]
  74. movdqa [rdi], xmm0
  75. movdqa [rdi+rcx], xmm1
  76. movdqa [rdi+rcx*2],xmm2
  77. movdqu xmm3, [rsi+rax]
  78. lea rdi, [rdi+rcx*2]
  79. movdqa [rdi+rcx], xmm3
  80. ; begin epilog
  81. pop rdi
  82. pop rsi
  83. UNSHADOW_ARGS
  84. pop rbp
  85. ret