hpeldsp_init_aarch64.c 6.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123
  1. /*
  2. * ARM NEON optimised DSP functions
  3. * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
  4. *
  5. * This file is part of FFmpeg.
  6. *
  7. * FFmpeg is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Lesser General Public
  9. * License as published by the Free Software Foundation; either
  10. * version 2.1 of the License, or (at your option) any later version.
  11. *
  12. * FFmpeg is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with FFmpeg; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. */
  21. #include <stddef.h>
  22. #include <stdint.h>
  23. #include "config.h"
  24. #include "libavutil/attributes.h"
  25. #include "libavutil/cpu.h"
  26. #include "libavutil/aarch64/cpu.h"
  27. #include "libavcodec/hpeldsp.h"
  28. void ff_put_pixels16_neon(uint8_t *block, const uint8_t *pixels,
  29. ptrdiff_t line_size, int h);
  30. void ff_put_pixels16_x2_neon(uint8_t *block, const uint8_t *pixels,
  31. ptrdiff_t line_size, int h);
  32. void ff_put_pixels16_y2_neon(uint8_t *block, const uint8_t *pixels,
  33. ptrdiff_t line_size, int h);
  34. void ff_put_pixels16_xy2_neon(uint8_t *block, const uint8_t *pixels,
  35. ptrdiff_t line_size, int h);
  36. void ff_put_pixels8_neon(uint8_t *block, const uint8_t *pixels,
  37. ptrdiff_t line_size, int h);
  38. void ff_put_pixels8_x2_neon(uint8_t *block, const uint8_t *pixels,
  39. ptrdiff_t line_size, int h);
  40. void ff_put_pixels8_y2_neon(uint8_t *block, const uint8_t *pixels,
  41. ptrdiff_t line_size, int h);
  42. void ff_put_pixels8_xy2_neon(uint8_t *block, const uint8_t *pixels,
  43. ptrdiff_t line_size, int h);
  44. void ff_put_pixels16_x2_no_rnd_neon(uint8_t *block, const uint8_t *pixels,
  45. ptrdiff_t line_size, int h);
  46. void ff_put_pixels16_y2_no_rnd_neon(uint8_t *block, const uint8_t *pixels,
  47. ptrdiff_t line_size, int h);
  48. void ff_put_pixels16_xy2_no_rnd_neon(uint8_t *block, const uint8_t *pixels,
  49. ptrdiff_t line_size, int h);
  50. void ff_put_pixels8_x2_no_rnd_neon(uint8_t *block, const uint8_t *pixels,
  51. ptrdiff_t line_size, int h);
  52. void ff_put_pixels8_y2_no_rnd_neon(uint8_t *block, const uint8_t *pixels,
  53. ptrdiff_t line_size, int h);
  54. void ff_put_pixels8_xy2_no_rnd_neon(uint8_t *block, const uint8_t *pixels,
  55. ptrdiff_t line_size, int h);
  56. void ff_avg_pixels16_neon(uint8_t *block, const uint8_t *pixels,
  57. ptrdiff_t line_size, int h);
  58. void ff_avg_pixels16_x2_neon(uint8_t *block, const uint8_t *pixels,
  59. ptrdiff_t line_size, int h);
  60. void ff_avg_pixels16_y2_neon(uint8_t *block, const uint8_t *pixels,
  61. ptrdiff_t line_size, int h);
  62. void ff_avg_pixels16_xy2_neon(uint8_t *block, const uint8_t *pixels,
  63. ptrdiff_t line_size, int h);
  64. void ff_avg_pixels8_neon(uint8_t *block, const uint8_t *pixels,
  65. ptrdiff_t line_size, int h);
  66. void ff_avg_pixels8_x2_neon(uint8_t *block, const uint8_t *pixels,
  67. ptrdiff_t line_size, int h);
  68. void ff_avg_pixels8_y2_neon(uint8_t *block, const uint8_t *pixels,
  69. ptrdiff_t line_size, int h);
  70. void ff_avg_pixels8_xy2_neon(uint8_t *block, const uint8_t *pixels,
  71. ptrdiff_t line_size, int h);
  72. void ff_avg_pixels16_x2_no_rnd_neon(uint8_t *block, const uint8_t *pixels,
  73. ptrdiff_t line_size, int h);
  74. void ff_avg_pixels16_y2_no_rnd_neon(uint8_t *block, const uint8_t *pixels,
  75. ptrdiff_t line_size, int h);
  76. void ff_avg_pixels16_xy2_no_rnd_neon(uint8_t *block, const uint8_t *pixels,
  77. ptrdiff_t line_size, int h);
  78. av_cold void ff_hpeldsp_init_aarch64(HpelDSPContext *c, int flags)
  79. {
  80. int cpu_flags = av_get_cpu_flags();
  81. if (have_neon(cpu_flags)) {
  82. c->put_pixels_tab[0][0] = ff_put_pixels16_neon;
  83. c->put_pixels_tab[0][1] = ff_put_pixels16_x2_neon;
  84. c->put_pixels_tab[0][2] = ff_put_pixels16_y2_neon;
  85. c->put_pixels_tab[0][3] = ff_put_pixels16_xy2_neon;
  86. c->put_pixels_tab[1][0] = ff_put_pixels8_neon;
  87. c->put_pixels_tab[1][1] = ff_put_pixels8_x2_neon;
  88. c->put_pixels_tab[1][2] = ff_put_pixels8_y2_neon;
  89. c->put_pixels_tab[1][3] = ff_put_pixels8_xy2_neon;
  90. c->put_no_rnd_pixels_tab[0][0] = ff_put_pixels16_neon;
  91. c->put_no_rnd_pixels_tab[0][1] = ff_put_pixels16_x2_no_rnd_neon;
  92. c->put_no_rnd_pixels_tab[0][2] = ff_put_pixels16_y2_no_rnd_neon;
  93. c->put_no_rnd_pixels_tab[0][3] = ff_put_pixels16_xy2_no_rnd_neon;
  94. c->put_no_rnd_pixels_tab[1][0] = ff_put_pixels8_neon;
  95. c->put_no_rnd_pixels_tab[1][1] = ff_put_pixels8_x2_no_rnd_neon;
  96. c->put_no_rnd_pixels_tab[1][2] = ff_put_pixels8_y2_no_rnd_neon;
  97. c->put_no_rnd_pixels_tab[1][3] = ff_put_pixels8_xy2_no_rnd_neon;
  98. c->avg_pixels_tab[0][0] = ff_avg_pixels16_neon;
  99. c->avg_pixels_tab[0][1] = ff_avg_pixels16_x2_neon;
  100. c->avg_pixels_tab[0][2] = ff_avg_pixels16_y2_neon;
  101. c->avg_pixels_tab[0][3] = ff_avg_pixels16_xy2_neon;
  102. c->avg_pixels_tab[1][0] = ff_avg_pixels8_neon;
  103. c->avg_pixels_tab[1][1] = ff_avg_pixels8_x2_neon;
  104. c->avg_pixels_tab[1][2] = ff_avg_pixels8_y2_neon;
  105. c->avg_pixels_tab[1][3] = ff_avg_pixels8_xy2_neon;
  106. c->avg_no_rnd_pixels_tab[0] = ff_avg_pixels16_neon;
  107. c->avg_no_rnd_pixels_tab[1] = ff_avg_pixels16_x2_no_rnd_neon;
  108. c->avg_no_rnd_pixels_tab[2] = ff_avg_pixels16_y2_no_rnd_neon;
  109. c->avg_no_rnd_pixels_tab[3] = ff_avg_pixels16_xy2_no_rnd_neon;
  110. }
  111. }