2
0

aacencdsp.asm 2.7 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586
  1. ;******************************************************************************
  2. ;* SIMD optimized AAC encoder DSP functions
  3. ;*
  4. ;* Copyright (C) 2016 Rostislav Pehlivanov <atomnuker@gmail.com>
  5. ;*
  6. ;* This file is part of FFmpeg.
  7. ;*
  8. ;* FFmpeg is free software; you can redistribute it and/or
  9. ;* modify it under the terms of the GNU Lesser General Public
  10. ;* License as published by the Free Software Foundation; either
  11. ;* version 2.1 of the License, or (at your option) any later version.
  12. ;*
  13. ;* FFmpeg is distributed in the hope that it will be useful,
  14. ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
  15. ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  16. ;* Lesser General Public License for more details.
  17. ;*
  18. ;* You should have received a copy of the GNU Lesser General Public
  19. ;* License along with FFmpeg; if not, write to the Free Software
  20. ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  21. ;******************************************************************************
  22. %include "libavutil/x86/x86util.asm"
  23. SECTION_RODATA
  24. float_abs_mask: times 4 dd 0x7fffffff
  25. SECTION .text
  26. ;*******************************************************************
  27. ;void ff_abs_pow34(float *out, const float *in, const int size);
  28. ;*******************************************************************
  29. INIT_XMM sse
  30. cglobal abs_pow34, 3, 3, 3, out, in, size
  31. mova m2, [float_abs_mask]
  32. shl sizeq, 2
  33. add inq, sizeq
  34. add outq, sizeq
  35. neg sizeq
  36. .loop:
  37. andps m0, m2, [inq+sizeq]
  38. sqrtps m1, m0
  39. mulps m0, m1
  40. sqrtps m0, m0
  41. mova [outq+sizeq], m0
  42. add sizeq, mmsize
  43. jl .loop
  44. RET
  45. ;*******************************************************************
  46. ;void ff_aac_quantize_bands(int *out, const float *in, const float *scaled,
  47. ; int size, int is_signed, int maxval, const float Q34,
  48. ; const float rounding)
  49. ;*******************************************************************
  50. INIT_XMM sse2
  51. cglobal aac_quantize_bands, 5, 5, 6, out, in, scaled, size, is_signed, maxval, Q34, rounding
  52. %if UNIX64 == 0
  53. movss m0, Q34m
  54. movss m1, roundingm
  55. cvtsi2ss m3, dword maxvalm
  56. %else
  57. cvtsi2ss m3, maxvald
  58. %endif
  59. shufps m0, m0, 0
  60. shufps m1, m1, 0
  61. shufps m3, m3, 0
  62. shl is_signedd, 31
  63. movd m4, is_signedd
  64. shufps m4, m4, 0
  65. shl sized, 2
  66. add inq, sizeq
  67. add outq, sizeq
  68. add scaledq, sizeq
  69. neg sizeq
  70. .loop:
  71. mulps m2, m0, [scaledq+sizeq]
  72. addps m2, m1
  73. minps m2, m3
  74. andps m5, m4, [inq+sizeq]
  75. orps m2, m5
  76. cvttps2dq m2, m2
  77. mova [outq+sizeq], m2
  78. add sizeq, mmsize
  79. jl .loop
  80. RET