add_noise_sse2.asm 2.4 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788
  1. ;
  2. ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  3. ;
  4. ; Use of this source code is governed by a BSD-style license
  5. ; that can be found in the LICENSE file in the root of the source
  6. ; tree. An additional intellectual property rights grant can be found
  7. ; in the file PATENTS. All contributing project authors may
  8. ; be found in the AUTHORS file in the root of the source tree.
  9. ;
  10. %include "vpx_ports/x86_abi_support.asm"
  11. SECTION .text
  12. ;void vpx_plane_add_noise_sse2(uint8_t *start, const int8_t *noise,
  13. ; int blackclamp, int whiteclamp,
  14. ; int width, int height, int pitch)
  15. global sym(vpx_plane_add_noise_sse2) PRIVATE
  16. sym(vpx_plane_add_noise_sse2):
  17. push rbp
  18. mov rbp, rsp
  19. SHADOW_ARGS_TO_STACK 7
  20. GET_GOT rbx
  21. push rsi
  22. push rdi
  23. mov rdx, 0x01010101
  24. mov rax, arg(2)
  25. mul rdx
  26. movq xmm3, rax
  27. pshufd xmm3, xmm3, 0 ; xmm3 is 16 copies of char in blackclamp
  28. mov rdx, 0x01010101
  29. mov rax, arg(3)
  30. mul rdx
  31. movq xmm4, rax
  32. pshufd xmm4, xmm4, 0 ; xmm4 is 16 copies of char in whiteclamp
  33. movdqu xmm5, xmm3 ; both clamp = black clamp + white clamp
  34. paddusb xmm5, xmm4
  35. .addnoise_loop:
  36. call sym(LIBVPX_RAND) WRT_PLT
  37. mov rcx, arg(1) ;noise
  38. and rax, 0xff
  39. add rcx, rax
  40. mov rdi, rcx
  41. movsxd rcx, dword arg(4) ;[Width]
  42. mov rsi, arg(0) ;Pos
  43. xor rax, rax
  44. .addnoise_nextset:
  45. movdqu xmm1,[rsi+rax] ; get the source
  46. psubusb xmm1, xmm3 ; subtract black clamp
  47. paddusb xmm1, xmm5 ; add both clamp
  48. psubusb xmm1, xmm4 ; subtract whiteclamp
  49. movdqu xmm2,[rdi+rax] ; get the noise for this line
  50. paddb xmm1,xmm2 ; add it in
  51. movdqu [rsi+rax],xmm1 ; store the result
  52. add rax,16 ; move to the next line
  53. cmp rax, rcx
  54. jl .addnoise_nextset
  55. movsxd rax, dword arg(6) ; Pitch
  56. add arg(0), rax ; Start += Pitch
  57. sub dword arg(5), 1 ; Height -= 1
  58. jg .addnoise_loop
  59. ; begin epilog
  60. pop rdi
  61. pop rsi
  62. RESTORE_GOT
  63. UNSHADOW_ARGS
  64. pop rbp
  65. ret
  66. SECTION_RODATA
  67. align 16
  68. rd42:
  69. times 8 dw 0x04
  70. four8s:
  71. times 4 dd 8