recon_mmx.asm 2.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120
  1. ;
  2. ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  3. ;
  4. ; Use of this source code is governed by a BSD-style license
  5. ; that can be found in the LICENSE file in the root of the source
  6. ; tree. An additional intellectual property rights grant can be found
  7. ; in the file PATENTS. All contributing project authors may
  8. ; be found in the AUTHORS file in the root of the source tree.
  9. ;
  10. %include "vpx_ports/x86_abi_support.asm"
  11. SECTION .text
  12. ;void copy_mem8x8_mmx(
  13. ; unsigned char *src,
  14. ; int src_stride,
  15. ; unsigned char *dst,
  16. ; int dst_stride
  17. ; )
  18. global sym(vp8_copy_mem8x8_mmx) PRIVATE
  19. sym(vp8_copy_mem8x8_mmx):
  20. push rbp
  21. mov rbp, rsp
  22. SHADOW_ARGS_TO_STACK 4
  23. push rsi
  24. push rdi
  25. ; end prolog
  26. mov rsi, arg(0) ;src;
  27. movq mm0, [rsi]
  28. movsxd rax, dword ptr arg(1) ;src_stride;
  29. mov rdi, arg(2) ;dst;
  30. movq mm1, [rsi+rax]
  31. movq mm2, [rsi+rax*2]
  32. movsxd rcx, dword ptr arg(3) ;dst_stride
  33. lea rsi, [rsi+rax*2]
  34. movq [rdi], mm0
  35. add rsi, rax
  36. movq [rdi+rcx], mm1
  37. movq [rdi+rcx*2], mm2
  38. lea rdi, [rdi+rcx*2]
  39. movq mm3, [rsi]
  40. add rdi, rcx
  41. movq mm4, [rsi+rax]
  42. movq mm5, [rsi+rax*2]
  43. movq [rdi], mm3
  44. lea rsi, [rsi+rax*2]
  45. movq [rdi+rcx], mm4
  46. movq [rdi+rcx*2], mm5
  47. lea rdi, [rdi+rcx*2]
  48. movq mm0, [rsi+rax]
  49. movq mm1, [rsi+rax*2]
  50. movq [rdi+rcx], mm0
  51. movq [rdi+rcx*2],mm1
  52. ; begin epilog
  53. pop rdi
  54. pop rsi
  55. UNSHADOW_ARGS
  56. pop rbp
  57. ret
  58. ;void copy_mem8x4_mmx(
  59. ; unsigned char *src,
  60. ; int src_stride,
  61. ; unsigned char *dst,
  62. ; int dst_stride
  63. ; )
  64. global sym(vp8_copy_mem8x4_mmx) PRIVATE
  65. sym(vp8_copy_mem8x4_mmx):
  66. push rbp
  67. mov rbp, rsp
  68. SHADOW_ARGS_TO_STACK 4
  69. push rsi
  70. push rdi
  71. ; end prolog
  72. mov rsi, arg(0) ;src;
  73. movq mm0, [rsi]
  74. movsxd rax, dword ptr arg(1) ;src_stride;
  75. mov rdi, arg(2) ;dst;
  76. movq mm1, [rsi+rax]
  77. movq mm2, [rsi+rax*2]
  78. movsxd rcx, dword ptr arg(3) ;dst_stride
  79. lea rsi, [rsi+rax*2]
  80. movq [rdi], mm0
  81. movq [rdi+rcx], mm1
  82. movq [rdi+rcx*2], mm2
  83. lea rdi, [rdi+rcx*2]
  84. movq mm3, [rsi+rax]
  85. movq [rdi+rcx], mm3
  86. ; begin epilog
  87. pop rdi
  88. pop rsi
  89. UNSHADOW_ARGS
  90. pop rbp
  91. ret