compare_mmi.cc 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123
  1. /*
  2. * Copyright 2012 The LibYuv Project Authors. All rights reserved.
  3. *
  4. * Use of this source code is governed by a BSD-style license
  5. * that can be found in the LICENSE file in the root of the source
  6. * tree. An additional intellectual property rights grant can be found
  7. * in the file PATENTS. All contributing project authors may
  8. * be found in the AUTHORS file in the root of the source tree.
  9. */
  10. #include "libyuv/basic_types.h"
  11. #include "libyuv/compare_row.h"
  12. #ifdef __cplusplus
  13. namespace libyuv {
  14. extern "C" {
  15. #endif
  16. // This module is for Mips MMI.
  17. #if !defined(LIBYUV_DISABLE_MMI) && defined(_MIPS_ARCH_LOONGSON3A)
  18. // Hakmem method for hamming distance.
  19. uint32_t HammingDistance_MMI(const uint8_t* src_a,
  20. const uint8_t* src_b,
  21. int count) {
  22. uint32_t diff = 0u;
  23. uint64_t temp = 0, temp1 = 0, ta = 0, tb = 0;
  24. uint64_t c1 = 0x5555555555555555;
  25. uint64_t c2 = 0x3333333333333333;
  26. uint64_t c3 = 0x0f0f0f0f0f0f0f0f;
  27. uint32_t c4 = 0x01010101;
  28. uint64_t s1 = 1, s2 = 2, s3 = 4;
  29. __asm__ volatile(
  30. "1: \n\t"
  31. "ldc1 %[ta], 0(%[src_a]) \n\t"
  32. "ldc1 %[tb], 0(%[src_b]) \n\t"
  33. "xor %[temp], %[ta], %[tb] \n\t"
  34. "psrlw %[temp1], %[temp], %[s1] \n\t" // temp1=x>>1
  35. "and %[temp1], %[temp1], %[c1] \n\t" // temp1&=c1
  36. "psubw %[temp1], %[temp], %[temp1] \n\t" // x-temp1
  37. "and %[temp], %[temp1], %[c2] \n\t" // t = (u&c2)
  38. "psrlw %[temp1], %[temp1], %[s2] \n\t" // u>>2
  39. "and %[temp1], %[temp1], %[c2] \n\t" // u>>2 & c2
  40. "paddw %[temp1], %[temp1], %[temp] \n\t" // t1 = t1+t
  41. "psrlw %[temp], %[temp1], %[s3] \n\t" // u>>4
  42. "paddw %[temp1], %[temp1], %[temp] \n\t" // u+(u>>4)
  43. "and %[temp1], %[temp1], %[c3] \n\t" //&c3
  44. "dmfc1 $t0, %[temp1] \n\t"
  45. "dsrl32 $t0, $t0, 0 \n\t "
  46. "mul $t0, $t0, %[c4] \n\t"
  47. "dsrl $t0, $t0, 24 \n\t"
  48. "dadd %[diff], %[diff], $t0 \n\t"
  49. "dmfc1 $t0, %[temp1] \n\t"
  50. "mul $t0, $t0, %[c4] \n\t"
  51. "dsrl $t0, $t0, 24 \n\t"
  52. "dadd %[diff], %[diff], $t0 \n\t"
  53. "daddiu %[src_a], %[src_a], 8 \n\t"
  54. "daddiu %[src_b], %[src_b], 8 \n\t"
  55. "addiu %[count], %[count], -8 \n\t"
  56. "bgtz %[count], 1b \n\t"
  57. "nop \n\t"
  58. : [diff] "+r"(diff), [src_a] "+r"(src_a), [src_b] "+r"(src_b),
  59. [count] "+r"(count), [ta] "+f"(ta), [tb] "+f"(tb), [temp] "+f"(temp),
  60. [temp1] "+f"(temp1)
  61. : [c1] "f"(c1), [c2] "f"(c2), [c3] "f"(c3), [c4] "r"(c4), [s1] "f"(s1),
  62. [s2] "f"(s2), [s3] "f"(s3)
  63. : "memory");
  64. return diff;
  65. }
  66. uint32_t SumSquareError_MMI(const uint8_t* src_a,
  67. const uint8_t* src_b,
  68. int count) {
  69. uint32_t sse = 0u;
  70. uint32_t sse_hi = 0u, sse_lo = 0u;
  71. uint64_t src1, src2;
  72. uint64_t diff, diff_hi, diff_lo;
  73. uint64_t sse_sum, sse_tmp;
  74. const uint64_t mask = 0x0ULL;
  75. __asm__ volatile(
  76. "xor %[sse_sum], %[sse_sum], %[sse_sum] \n\t"
  77. "1: \n\t"
  78. "ldc1 %[src1], 0x00(%[src_a]) \n\t"
  79. "ldc1 %[src2], 0x00(%[src_b]) \n\t"
  80. "pasubub %[diff], %[src1], %[src2] \n\t"
  81. "punpcklbh %[diff_lo], %[diff], %[mask] \n\t"
  82. "punpckhbh %[diff_hi], %[diff], %[mask] \n\t"
  83. "pmaddhw %[sse_tmp], %[diff_lo], %[diff_lo] \n\t"
  84. "paddw %[sse_sum], %[sse_sum], %[sse_tmp] \n\t"
  85. "pmaddhw %[sse_tmp], %[diff_hi], %[diff_hi] \n\t"
  86. "paddw %[sse_sum], %[sse_sum], %[sse_tmp] \n\t"
  87. "daddiu %[src_a], %[src_a], 0x08 \n\t"
  88. "daddiu %[src_b], %[src_b], 0x08 \n\t"
  89. "daddiu %[count], %[count], -0x08 \n\t"
  90. "bnez %[count], 1b \n\t"
  91. "mfc1 %[sse_lo], %[sse_sum] \n\t"
  92. "mfhc1 %[sse_hi], %[sse_sum] \n\t"
  93. "daddu %[sse], %[sse_hi], %[sse_lo] \n\t"
  94. : [sse] "+&r"(sse), [diff] "=&f"(diff), [src1] "=&f"(src1),
  95. [src2] "=&f"(src2), [diff_lo] "=&f"(diff_lo), [diff_hi] "=&f"(diff_hi),
  96. [sse_sum] "=&f"(sse_sum), [sse_tmp] "=&f"(sse_tmp),
  97. [sse_hi] "+&r"(sse_hi), [sse_lo] "+&r"(sse_lo)
  98. : [src_a] "r"(src_a), [src_b] "r"(src_b), [count] "r"(count),
  99. [mask] "f"(mask)
  100. : "memory");
  101. return sse;
  102. }
  103. #endif // !defined(LIBYUV_DISABLE_MMI) && defined(_MIPS_ARCH_LOONGSON3A)
  104. #ifdef __cplusplus
  105. } // extern "C"
  106. } // namespace libyuv
  107. #endif