bitdepth_conversion_sse2.asm 2.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990
  1. ;
  2. ; Copyright (c) 2017 The WebM project authors. All Rights Reserved.
  3. ;
  4. ; Use of this source code is governed by a BSD-style license
  5. ; that can be found in the LICENSE file in the root of the source
  6. ; tree. An additional intellectual property rights grant can be found
  7. ; in the file PATENTS. All contributing project authors may
  8. ; be found in the AUTHORS file in the root of the source tree.
  9. ;
  10. ; TODO(johannkoenig): Add the necessary include guards to vpx_config.asm.
  11. ; vpx_config.asm is not guarded so can not be included twice. Because this will
  12. ; be used in conjunction with x86_abi_support.asm or x86inc.asm, it must be
  13. ; included after those files.
  14. ; Increment register by sizeof() tran_low_t * 8.
  15. %macro INCREMENT_TRAN_LOW 1
  16. %if CONFIG_VP9_HIGHBITDEPTH
  17. add %1, 32
  18. %else
  19. add %1, 16
  20. %endif
  21. %endmacro
  22. ; Increment %1 by sizeof() tran_low_t * %2.
  23. %macro INCREMENT_ELEMENTS_TRAN_LOW 2
  24. %if CONFIG_VP9_HIGHBITDEPTH
  25. lea %1, [%1 + %2 * 4]
  26. %else
  27. lea %1, [%1 + %2 * 2]
  28. %endif
  29. %endmacro
  30. ; Load %2 + %3 into m%1.
  31. ; %3 is the offset in elements, not bytes.
  32. ; If tran_low_t is 16 bits (low bit depth configuration) then load the value
  33. ; directly. If tran_low_t is 32 bits (high bit depth configuration) then pack
  34. ; the values down to 16 bits.
  35. %macro LOAD_TRAN_LOW 3
  36. %if CONFIG_VP9_HIGHBITDEPTH
  37. mova m%1, [%2 + (%3) * 4]
  38. packssdw m%1, [%2 + (%3) * 4 + 16]
  39. %else
  40. mova m%1, [%2 + (%3) * 2]
  41. %endif
  42. %endmacro
  43. ; Store m%1 to %2 + %3.
  44. ; %3 is the offset in elements, not bytes.
  45. ; If 5 arguments are provided then m%1 is corrupted.
  46. ; If 6 arguments are provided then m%1 is preserved.
  47. ; If tran_low_t is 16 bits (low bit depth configuration) then store the value
  48. ; directly. If tran_low_t is 32 bits (high bit depth configuration) then sign
  49. ; extend the values first.
  50. ; Uses m%4-m%6 as scratch registers for high bit depth.
  51. %macro STORE_TRAN_LOW 5-6
  52. %if CONFIG_VP9_HIGHBITDEPTH
  53. pxor m%4, m%4
  54. mova m%5, m%1
  55. %if %0 == 6
  56. mova m%6, m%1
  57. %endif
  58. pcmpgtw m%4, m%1
  59. punpcklwd m%5, m%4
  60. %if %0 == 5
  61. punpckhwd m%1, m%4
  62. %else
  63. punpckhwd m%6, m%4
  64. %endif
  65. mova [%2 + (%3) * 4 + 0], m%5
  66. %if %0 == 5
  67. mova [%2 + (%3) * 4 + 16], m%1
  68. %else
  69. mova [%2 + (%3) * 4 + 16], m%6
  70. %endif
  71. %else
  72. mova [%2 + (%3) * 2], m%1
  73. %endif
  74. %endmacro
  75. ; Store zeros (in m%1) to %2 + %3.
  76. ; %3 is the offset in elements, not bytes.
  77. %macro STORE_ZERO_TRAN_LOW 3
  78. %if CONFIG_VP9_HIGHBITDEPTH
  79. mova [%2 + (%3) * 4 + 0], m%1
  80. mova [%2 + (%3) * 4 + 16], m%1
  81. %else
  82. mova [%2 + (%3) * 2], m%1
  83. %endif
  84. %endmacro