recon_sse2.asm 3.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116
  1. ;
  2. ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  3. ;
  4. ; Use of this source code is governed by a BSD-style license
  5. ; that can be found in the LICENSE file in the root of the source
  6. ; tree. An additional intellectual property rights grant can be found
  7. ; in the file PATENTS. All contributing project authors may
  8. ; be found in the AUTHORS file in the root of the source tree.
  9. ;
  10. %include "vpx_ports/x86_abi_support.asm"
  11. ;void copy_mem16x16_sse2(
  12. ; unsigned char *src,
  13. ; int src_stride,
  14. ; unsigned char *dst,
  15. ; int dst_stride
  16. ; )
  17. global sym(vp8_copy_mem16x16_sse2) PRIVATE
  18. sym(vp8_copy_mem16x16_sse2):
  19. push rbp
  20. mov rbp, rsp
  21. SHADOW_ARGS_TO_STACK 4
  22. push rsi
  23. push rdi
  24. ; end prolog
  25. mov rsi, arg(0) ;src;
  26. movdqu xmm0, [rsi]
  27. movsxd rax, dword ptr arg(1) ;src_stride;
  28. mov rdi, arg(2) ;dst;
  29. movdqu xmm1, [rsi+rax]
  30. movdqu xmm2, [rsi+rax*2]
  31. movsxd rcx, dword ptr arg(3) ;dst_stride
  32. lea rsi, [rsi+rax*2]
  33. movdqa [rdi], xmm0
  34. add rsi, rax
  35. movdqa [rdi+rcx], xmm1
  36. movdqa [rdi+rcx*2],xmm2
  37. lea rdi, [rdi+rcx*2]
  38. movdqu xmm3, [rsi]
  39. add rdi, rcx
  40. movdqu xmm4, [rsi+rax]
  41. movdqu xmm5, [rsi+rax*2]
  42. lea rsi, [rsi+rax*2]
  43. movdqa [rdi], xmm3
  44. add rsi, rax
  45. movdqa [rdi+rcx], xmm4
  46. movdqa [rdi+rcx*2],xmm5
  47. lea rdi, [rdi+rcx*2]
  48. movdqu xmm0, [rsi]
  49. add rdi, rcx
  50. movdqu xmm1, [rsi+rax]
  51. movdqu xmm2, [rsi+rax*2]
  52. lea rsi, [rsi+rax*2]
  53. movdqa [rdi], xmm0
  54. add rsi, rax
  55. movdqa [rdi+rcx], xmm1
  56. movdqa [rdi+rcx*2], xmm2
  57. movdqu xmm3, [rsi]
  58. movdqu xmm4, [rsi+rax]
  59. lea rdi, [rdi+rcx*2]
  60. add rdi, rcx
  61. movdqu xmm5, [rsi+rax*2]
  62. lea rsi, [rsi+rax*2]
  63. movdqa [rdi], xmm3
  64. add rsi, rax
  65. movdqa [rdi+rcx], xmm4
  66. movdqa [rdi+rcx*2],xmm5
  67. movdqu xmm0, [rsi]
  68. lea rdi, [rdi+rcx*2]
  69. movdqu xmm1, [rsi+rax]
  70. add rdi, rcx
  71. movdqu xmm2, [rsi+rax*2]
  72. lea rsi, [rsi+rax*2]
  73. movdqa [rdi], xmm0
  74. movdqa [rdi+rcx], xmm1
  75. movdqa [rdi+rcx*2],xmm2
  76. movdqu xmm3, [rsi+rax]
  77. lea rdi, [rdi+rcx*2]
  78. movdqa [rdi+rcx], xmm3
  79. ; begin epilog
  80. pop rdi
  81. pop rsi
  82. UNSHADOW_ARGS
  83. pop rbp
  84. ret