reconinter_dspr2.c 3.1 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697
  1. /*
  2. * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
  3. *
  4. * Use of this source code is governed by a BSD-style license
  5. * that can be found in the LICENSE file in the root of the source
  6. * tree. An additional intellectual property rights grant can be found
  7. * in the file PATENTS. All contributing project authors may
  8. * be found in the AUTHORS file in the root of the source tree.
  9. */
  10. #include "vpx_config.h"
  11. #include "vp8_rtcd.h"
  12. #include "vpx/vpx_integer.h"
  13. #if HAVE_DSPR2
  14. inline void prefetch_load_int(unsigned char *src) {
  15. __asm__ __volatile__("pref 0, 0(%[src]) \n\t" : : [src] "r"(src));
  16. }
  17. __inline void vp8_copy_mem16x16_dspr2(unsigned char *RESTRICT src,
  18. int src_stride,
  19. unsigned char *RESTRICT dst,
  20. int dst_stride) {
  21. int r;
  22. unsigned int a0, a1, a2, a3;
  23. for (r = 16; r--;) {
  24. /* load src data in cache memory */
  25. prefetch_load_int(src + src_stride);
  26. /* use unaligned memory load and store */
  27. __asm__ __volatile__(
  28. "ulw %[a0], 0(%[src]) \n\t"
  29. "ulw %[a1], 4(%[src]) \n\t"
  30. "ulw %[a2], 8(%[src]) \n\t"
  31. "ulw %[a3], 12(%[src]) \n\t"
  32. "sw %[a0], 0(%[dst]) \n\t"
  33. "sw %[a1], 4(%[dst]) \n\t"
  34. "sw %[a2], 8(%[dst]) \n\t"
  35. "sw %[a3], 12(%[dst]) \n\t"
  36. : [a0] "=&r"(a0), [a1] "=&r"(a1), [a2] "=&r"(a2), [a3] "=&r"(a3)
  37. : [src] "r"(src), [dst] "r"(dst));
  38. src += src_stride;
  39. dst += dst_stride;
  40. }
  41. }
  42. __inline void vp8_copy_mem8x8_dspr2(unsigned char *RESTRICT src, int src_stride,
  43. unsigned char *RESTRICT dst,
  44. int dst_stride) {
  45. int r;
  46. unsigned int a0, a1;
  47. /* load src data in cache memory */
  48. prefetch_load_int(src + src_stride);
  49. for (r = 8; r--;) {
  50. /* use unaligned memory load and store */
  51. __asm__ __volatile__(
  52. "ulw %[a0], 0(%[src]) \n\t"
  53. "ulw %[a1], 4(%[src]) \n\t"
  54. "sw %[a0], 0(%[dst]) \n\t"
  55. "sw %[a1], 4(%[dst]) \n\t"
  56. : [a0] "=&r"(a0), [a1] "=&r"(a1)
  57. : [src] "r"(src), [dst] "r"(dst));
  58. src += src_stride;
  59. dst += dst_stride;
  60. }
  61. }
  62. __inline void vp8_copy_mem8x4_dspr2(unsigned char *RESTRICT src, int src_stride,
  63. unsigned char *RESTRICT dst,
  64. int dst_stride) {
  65. int r;
  66. unsigned int a0, a1;
  67. /* load src data in cache memory */
  68. prefetch_load_int(src + src_stride);
  69. for (r = 4; r--;) {
  70. /* use unaligned memory load and store */
  71. __asm__ __volatile__(
  72. "ulw %[a0], 0(%[src]) \n\t"
  73. "ulw %[a1], 4(%[src]) \n\t"
  74. "sw %[a0], 0(%[dst]) \n\t"
  75. "sw %[a1], 4(%[dst]) \n\t"
  76. : [a0] "=&r"(a0), [a1] "=&r"(a1)
  77. : [src] "r"(src), [dst] "r"(dst));
  78. src += src_stride;
  79. dst += dst_stride;
  80. }
  81. }
  82. #endif