2
0

scale_test.cc 36 KB


  1. /*
  2. * Copyright 2011 The LibYuv Project Authors. All rights reserved.
  3. *
  4. * Use of this source code is governed by a BSD-style license
  5. * that can be found in the LICENSE file in the root of the source
  6. * tree. An additional intellectual property rights grant can be found
  7. * in the file PATENTS. All contributing project authors may
  8. * be found in the AUTHORS file in the root of the source tree.
  9. */
  10. #include <stdlib.h>
  11. #include <time.h>
  12. #include "../unit_test/unit_test.h"
  13. #include "libyuv/cpu_id.h"
  14. #include "libyuv/scale.h"
  15. #ifdef ENABLE_ROW_TESTS
  16. #include "libyuv/scale_row.h" // For ScaleRowDown2Box_Odd_C
  17. #endif
  18. #define STRINGIZE(line) #line
  19. #define FILELINESTR(file, line) file ":" STRINGIZE(line)
  20. namespace libyuv {
  21. // Test scaling with C vs Opt and return maximum pixel difference. 0 = exact.
  22. static int I420TestFilter(int src_width,
  23. int src_height,
  24. int dst_width,
  25. int dst_height,
  26. FilterMode f,
  27. int benchmark_iterations,
  28. int disable_cpu_flags,
  29. int benchmark_cpu_info) {
  30. if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
  31. return 0;
  32. }
  33. int i, j;
  34. int src_width_uv = (Abs(src_width) + 1) >> 1;
  35. int src_height_uv = (Abs(src_height) + 1) >> 1;
  36. int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height));
  37. int64_t src_uv_plane_size = (src_width_uv) * (src_height_uv);
  38. int src_stride_y = Abs(src_width);
  39. int src_stride_uv = src_width_uv;
  40. align_buffer_page_end(src_y, src_y_plane_size);
  41. align_buffer_page_end(src_u, src_uv_plane_size);
  42. align_buffer_page_end(src_v, src_uv_plane_size);
  43. if (!src_y || !src_u || !src_v) {
  44. printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
  45. return 0;
  46. }
  47. MemRandomize(src_y, src_y_plane_size);
  48. MemRandomize(src_u, src_uv_plane_size);
  49. MemRandomize(src_v, src_uv_plane_size);
  50. int dst_width_uv = (dst_width + 1) >> 1;
  51. int dst_height_uv = (dst_height + 1) >> 1;
  52. int64_t dst_y_plane_size = (dst_width) * (dst_height);
  53. int64_t dst_uv_plane_size = (dst_width_uv) * (dst_height_uv);
  54. int dst_stride_y = dst_width;
  55. int dst_stride_uv = dst_width_uv;
  56. align_buffer_page_end(dst_y_c, dst_y_plane_size);
  57. align_buffer_page_end(dst_u_c, dst_uv_plane_size);
  58. align_buffer_page_end(dst_v_c, dst_uv_plane_size);
  59. align_buffer_page_end(dst_y_opt, dst_y_plane_size);
  60. align_buffer_page_end(dst_u_opt, dst_uv_plane_size);
  61. align_buffer_page_end(dst_v_opt, dst_uv_plane_size);
  62. if (!dst_y_c || !dst_u_c || !dst_v_c || !dst_y_opt || !dst_u_opt ||
  63. !dst_v_opt) {
  64. printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
  65. return 0;
  66. }
  67. MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
  68. double c_time = get_time();
  69. I420Scale(src_y, src_stride_y, src_u, src_stride_uv, src_v, src_stride_uv,
  70. src_width, src_height, dst_y_c, dst_stride_y, dst_u_c,
  71. dst_stride_uv, dst_v_c, dst_stride_uv, dst_width, dst_height, f);
  72. c_time = (get_time() - c_time);
  73. MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
  74. double opt_time = get_time();
  75. for (i = 0; i < benchmark_iterations; ++i) {
  76. I420Scale(src_y, src_stride_y, src_u, src_stride_uv, src_v, src_stride_uv,
  77. src_width, src_height, dst_y_opt, dst_stride_y, dst_u_opt,
  78. dst_stride_uv, dst_v_opt, dst_stride_uv, dst_width, dst_height,
  79. f);
  80. }
  81. opt_time = (get_time() - opt_time) / benchmark_iterations;
  82. // Report performance of C vs OPT.
  83. printf("filter %d - %8d us C - %8d us OPT\n", f,
  84. static_cast<int>(c_time * 1e6), static_cast<int>(opt_time * 1e6));
  85. // C version may be a little off from the optimized. Order of
  86. // operations may introduce rounding somewhere. So do a difference
  87. // of the buffers and look to see that the max difference is not
  88. // over 3.
  89. int max_diff = 0;
  90. for (i = 0; i < (dst_height); ++i) {
  91. for (j = 0; j < (dst_width); ++j) {
  92. int abs_diff = Abs(dst_y_c[(i * dst_stride_y) + j] -
  93. dst_y_opt[(i * dst_stride_y) + j]);
  94. if (abs_diff > max_diff) {
  95. max_diff = abs_diff;
  96. }
  97. }
  98. }
  99. for (i = 0; i < (dst_height_uv); ++i) {
  100. for (j = 0; j < (dst_width_uv); ++j) {
  101. int abs_diff = Abs(dst_u_c[(i * dst_stride_uv) + j] -
  102. dst_u_opt[(i * dst_stride_uv) + j]);
  103. if (abs_diff > max_diff) {
  104. max_diff = abs_diff;
  105. }
  106. abs_diff = Abs(dst_v_c[(i * dst_stride_uv) + j] -
  107. dst_v_opt[(i * dst_stride_uv) + j]);
  108. if (abs_diff > max_diff) {
  109. max_diff = abs_diff;
  110. }
  111. }
  112. }
  113. free_aligned_buffer_page_end(dst_y_c);
  114. free_aligned_buffer_page_end(dst_u_c);
  115. free_aligned_buffer_page_end(dst_v_c);
  116. free_aligned_buffer_page_end(dst_y_opt);
  117. free_aligned_buffer_page_end(dst_u_opt);
  118. free_aligned_buffer_page_end(dst_v_opt);
  119. free_aligned_buffer_page_end(src_y);
  120. free_aligned_buffer_page_end(src_u);
  121. free_aligned_buffer_page_end(src_v);
  122. return max_diff;
  123. }
  124. // Test scaling with 8 bit C vs 16 bit C and return maximum pixel difference.
  125. // 0 = exact.
  126. static int I420TestFilter_16(int src_width,
  127. int src_height,
  128. int dst_width,
  129. int dst_height,
  130. FilterMode f,
  131. int benchmark_iterations,
  132. int disable_cpu_flags,
  133. int benchmark_cpu_info) {
  134. if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
  135. return 0;
  136. }
  137. int i;
  138. int src_width_uv = (Abs(src_width) + 1) >> 1;
  139. int src_height_uv = (Abs(src_height) + 1) >> 1;
  140. int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height));
  141. int64_t src_uv_plane_size = (src_width_uv) * (src_height_uv);
  142. int src_stride_y = Abs(src_width);
  143. int src_stride_uv = src_width_uv;
  144. align_buffer_page_end(src_y, src_y_plane_size);
  145. align_buffer_page_end(src_u, src_uv_plane_size);
  146. align_buffer_page_end(src_v, src_uv_plane_size);
  147. align_buffer_page_end(src_y_16, src_y_plane_size * 2);
  148. align_buffer_page_end(src_u_16, src_uv_plane_size * 2);
  149. align_buffer_page_end(src_v_16, src_uv_plane_size * 2);
  150. if (!src_y || !src_u || !src_v || !src_y_16 || !src_u_16 || !src_v_16) {
  151. printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
  152. return 0;
  153. }
  154. uint16_t* p_src_y_16 = reinterpret_cast<uint16_t*>(src_y_16);
  155. uint16_t* p_src_u_16 = reinterpret_cast<uint16_t*>(src_u_16);
  156. uint16_t* p_src_v_16 = reinterpret_cast<uint16_t*>(src_v_16);
  157. MemRandomize(src_y, src_y_plane_size);
  158. MemRandomize(src_u, src_uv_plane_size);
  159. MemRandomize(src_v, src_uv_plane_size);
  160. for (i = 0; i < src_y_plane_size; ++i) {
  161. p_src_y_16[i] = src_y[i];
  162. }
  163. for (i = 0; i < src_uv_plane_size; ++i) {
  164. p_src_u_16[i] = src_u[i];
  165. p_src_v_16[i] = src_v[i];
  166. }
  167. int dst_width_uv = (dst_width + 1) >> 1;
  168. int dst_height_uv = (dst_height + 1) >> 1;
  169. int dst_y_plane_size = (dst_width) * (dst_height);
  170. int dst_uv_plane_size = (dst_width_uv) * (dst_height_uv);
  171. int dst_stride_y = dst_width;
  172. int dst_stride_uv = dst_width_uv;
  173. align_buffer_page_end(dst_y_8, dst_y_plane_size);
  174. align_buffer_page_end(dst_u_8, dst_uv_plane_size);
  175. align_buffer_page_end(dst_v_8, dst_uv_plane_size);
  176. align_buffer_page_end(dst_y_16, dst_y_plane_size * 2);
  177. align_buffer_page_end(dst_u_16, dst_uv_plane_size * 2);
  178. align_buffer_page_end(dst_v_16, dst_uv_plane_size * 2);
  179. uint16_t* p_dst_y_16 = reinterpret_cast<uint16_t*>(dst_y_16);
  180. uint16_t* p_dst_u_16 = reinterpret_cast<uint16_t*>(dst_u_16);
  181. uint16_t* p_dst_v_16 = reinterpret_cast<uint16_t*>(dst_v_16);
  182. MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
  183. I420Scale(src_y, src_stride_y, src_u, src_stride_uv, src_v, src_stride_uv,
  184. src_width, src_height, dst_y_8, dst_stride_y, dst_u_8,
  185. dst_stride_uv, dst_v_8, dst_stride_uv, dst_width, dst_height, f);
  186. MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
  187. for (i = 0; i < benchmark_iterations; ++i) {
  188. I420Scale_16(p_src_y_16, src_stride_y, p_src_u_16, src_stride_uv,
  189. p_src_v_16, src_stride_uv, src_width, src_height, p_dst_y_16,
  190. dst_stride_y, p_dst_u_16, dst_stride_uv, p_dst_v_16,
  191. dst_stride_uv, dst_width, dst_height, f);
  192. }
  193. // Expect an exact match.
  194. int max_diff = 0;
  195. for (i = 0; i < dst_y_plane_size; ++i) {
  196. int abs_diff = Abs(dst_y_8[i] - p_dst_y_16[i]);
  197. if (abs_diff > max_diff) {
  198. max_diff = abs_diff;
  199. }
  200. }
  201. for (i = 0; i < dst_uv_plane_size; ++i) {
  202. int abs_diff = Abs(dst_u_8[i] - p_dst_u_16[i]);
  203. if (abs_diff > max_diff) {
  204. max_diff = abs_diff;
  205. }
  206. abs_diff = Abs(dst_v_8[i] - p_dst_v_16[i]);
  207. if (abs_diff > max_diff) {
  208. max_diff = abs_diff;
  209. }
  210. }
  211. free_aligned_buffer_page_end(dst_y_8);
  212. free_aligned_buffer_page_end(dst_u_8);
  213. free_aligned_buffer_page_end(dst_v_8);
  214. free_aligned_buffer_page_end(dst_y_16);
  215. free_aligned_buffer_page_end(dst_u_16);
  216. free_aligned_buffer_page_end(dst_v_16);
  217. free_aligned_buffer_page_end(src_y);
  218. free_aligned_buffer_page_end(src_u);
  219. free_aligned_buffer_page_end(src_v);
  220. free_aligned_buffer_page_end(src_y_16);
  221. free_aligned_buffer_page_end(src_u_16);
  222. free_aligned_buffer_page_end(src_v_16);
  223. return max_diff;
  224. }
  225. // Test scaling with C vs Opt and return maximum pixel difference. 0 = exact.
  226. static int I444TestFilter(int src_width,
  227. int src_height,
  228. int dst_width,
  229. int dst_height,
  230. FilterMode f,
  231. int benchmark_iterations,
  232. int disable_cpu_flags,
  233. int benchmark_cpu_info) {
  234. if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
  235. return 0;
  236. }
  237. int i, j;
  238. int src_width_uv = Abs(src_width);
  239. int src_height_uv = Abs(src_height);
  240. int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height));
  241. int64_t src_uv_plane_size = (src_width_uv) * (src_height_uv);
  242. int src_stride_y = Abs(src_width);
  243. int src_stride_uv = src_width_uv;
  244. align_buffer_page_end(src_y, src_y_plane_size);
  245. align_buffer_page_end(src_u, src_uv_plane_size);
  246. align_buffer_page_end(src_v, src_uv_plane_size);
  247. if (!src_y || !src_u || !src_v) {
  248. printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
  249. return 0;
  250. }
  251. MemRandomize(src_y, src_y_plane_size);
  252. MemRandomize(src_u, src_uv_plane_size);
  253. MemRandomize(src_v, src_uv_plane_size);
  254. int dst_width_uv = dst_width;
  255. int dst_height_uv = dst_height;
  256. int64_t dst_y_plane_size = (dst_width) * (dst_height);
  257. int64_t dst_uv_plane_size = (dst_width_uv) * (dst_height_uv);
  258. int dst_stride_y = dst_width;
  259. int dst_stride_uv = dst_width_uv;
  260. align_buffer_page_end(dst_y_c, dst_y_plane_size);
  261. align_buffer_page_end(dst_u_c, dst_uv_plane_size);
  262. align_buffer_page_end(dst_v_c, dst_uv_plane_size);
  263. align_buffer_page_end(dst_y_opt, dst_y_plane_size);
  264. align_buffer_page_end(dst_u_opt, dst_uv_plane_size);
  265. align_buffer_page_end(dst_v_opt, dst_uv_plane_size);
  266. if (!dst_y_c || !dst_u_c || !dst_v_c || !dst_y_opt || !dst_u_opt ||
  267. !dst_v_opt) {
  268. printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
  269. return 0;
  270. }
  271. MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
  272. double c_time = get_time();
  273. I444Scale(src_y, src_stride_y, src_u, src_stride_uv, src_v, src_stride_uv,
  274. src_width, src_height, dst_y_c, dst_stride_y, dst_u_c,
  275. dst_stride_uv, dst_v_c, dst_stride_uv, dst_width, dst_height, f);
  276. c_time = (get_time() - c_time);
  277. MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
  278. double opt_time = get_time();
  279. for (i = 0; i < benchmark_iterations; ++i) {
  280. I444Scale(src_y, src_stride_y, src_u, src_stride_uv, src_v, src_stride_uv,
  281. src_width, src_height, dst_y_opt, dst_stride_y, dst_u_opt,
  282. dst_stride_uv, dst_v_opt, dst_stride_uv, dst_width, dst_height,
  283. f);
  284. }
  285. opt_time = (get_time() - opt_time) / benchmark_iterations;
  286. // Report performance of C vs OPT.
  287. printf("filter %d - %8d us C - %8d us OPT\n", f,
  288. static_cast<int>(c_time * 1e6), static_cast<int>(opt_time * 1e6));
  289. // C version may be a little off from the optimized. Order of
  290. // operations may introduce rounding somewhere. So do a difference
  291. // of the buffers and look to see that the max difference is not
  292. // over 3.
  293. int max_diff = 0;
  294. for (i = 0; i < (dst_height); ++i) {
  295. for (j = 0; j < (dst_width); ++j) {
  296. int abs_diff = Abs(dst_y_c[(i * dst_stride_y) + j] -
  297. dst_y_opt[(i * dst_stride_y) + j]);
  298. if (abs_diff > max_diff) {
  299. max_diff = abs_diff;
  300. }
  301. }
  302. }
  303. for (i = 0; i < (dst_height_uv); ++i) {
  304. for (j = 0; j < (dst_width_uv); ++j) {
  305. int abs_diff = Abs(dst_u_c[(i * dst_stride_uv) + j] -
  306. dst_u_opt[(i * dst_stride_uv) + j]);
  307. if (abs_diff > max_diff) {
  308. max_diff = abs_diff;
  309. }
  310. abs_diff = Abs(dst_v_c[(i * dst_stride_uv) + j] -
  311. dst_v_opt[(i * dst_stride_uv) + j]);
  312. if (abs_diff > max_diff) {
  313. max_diff = abs_diff;
  314. }
  315. }
  316. }
  317. free_aligned_buffer_page_end(dst_y_c);
  318. free_aligned_buffer_page_end(dst_u_c);
  319. free_aligned_buffer_page_end(dst_v_c);
  320. free_aligned_buffer_page_end(dst_y_opt);
  321. free_aligned_buffer_page_end(dst_u_opt);
  322. free_aligned_buffer_page_end(dst_v_opt);
  323. free_aligned_buffer_page_end(src_y);
  324. free_aligned_buffer_page_end(src_u);
  325. free_aligned_buffer_page_end(src_v);
  326. return max_diff;
  327. }
  328. // Test scaling with 8 bit C vs 16 bit C and return maximum pixel difference.
  329. // 0 = exact.
  330. static int I444TestFilter_16(int src_width,
  331. int src_height,
  332. int dst_width,
  333. int dst_height,
  334. FilterMode f,
  335. int benchmark_iterations,
  336. int disable_cpu_flags,
  337. int benchmark_cpu_info) {
  338. if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
  339. return 0;
  340. }
  341. int i;
  342. int src_width_uv = Abs(src_width);
  343. int src_height_uv = Abs(src_height);
  344. int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height));
  345. int64_t src_uv_plane_size = (src_width_uv) * (src_height_uv);
  346. int src_stride_y = Abs(src_width);
  347. int src_stride_uv = src_width_uv;
  348. align_buffer_page_end(src_y, src_y_plane_size);
  349. align_buffer_page_end(src_u, src_uv_plane_size);
  350. align_buffer_page_end(src_v, src_uv_plane_size);
  351. align_buffer_page_end(src_y_16, src_y_plane_size * 2);
  352. align_buffer_page_end(src_u_16, src_uv_plane_size * 2);
  353. align_buffer_page_end(src_v_16, src_uv_plane_size * 2);
  354. if (!src_y || !src_u || !src_v || !src_y_16 || !src_u_16 || !src_v_16) {
  355. printf("Skipped. Alloc failed " FILELINESTR(__FILE__, __LINE__) "\n");
  356. return 0;
  357. }
  358. uint16_t* p_src_y_16 = reinterpret_cast<uint16_t*>(src_y_16);
  359. uint16_t* p_src_u_16 = reinterpret_cast<uint16_t*>(src_u_16);
  360. uint16_t* p_src_v_16 = reinterpret_cast<uint16_t*>(src_v_16);
  361. MemRandomize(src_y, src_y_plane_size);
  362. MemRandomize(src_u, src_uv_plane_size);
  363. MemRandomize(src_v, src_uv_plane_size);
  364. for (i = 0; i < src_y_plane_size; ++i) {
  365. p_src_y_16[i] = src_y[i];
  366. }
  367. for (i = 0; i < src_uv_plane_size; ++i) {
  368. p_src_u_16[i] = src_u[i];
  369. p_src_v_16[i] = src_v[i];
  370. }
  371. int dst_width_uv = dst_width;
  372. int dst_height_uv = dst_height;
  373. int dst_y_plane_size = (dst_width) * (dst_height);
  374. int dst_uv_plane_size = (dst_width_uv) * (dst_height_uv);
  375. int dst_stride_y = dst_width;
  376. int dst_stride_uv = dst_width_uv;
  377. align_buffer_page_end(dst_y_8, dst_y_plane_size);
  378. align_buffer_page_end(dst_u_8, dst_uv_plane_size);
  379. align_buffer_page_end(dst_v_8, dst_uv_plane_size);
  380. align_buffer_page_end(dst_y_16, dst_y_plane_size * 2);
  381. align_buffer_page_end(dst_u_16, dst_uv_plane_size * 2);
  382. align_buffer_page_end(dst_v_16, dst_uv_plane_size * 2);
  383. uint16_t* p_dst_y_16 = reinterpret_cast<uint16_t*>(dst_y_16);
  384. uint16_t* p_dst_u_16 = reinterpret_cast<uint16_t*>(dst_u_16);
  385. uint16_t* p_dst_v_16 = reinterpret_cast<uint16_t*>(dst_v_16);
  386. MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
  387. I444Scale(src_y, src_stride_y, src_u, src_stride_uv, src_v, src_stride_uv,
  388. src_width, src_height, dst_y_8, dst_stride_y, dst_u_8,
  389. dst_stride_uv, dst_v_8, dst_stride_uv, dst_width, dst_height, f);
  390. MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
  391. for (i = 0; i < benchmark_iterations; ++i) {
  392. I444Scale_16(p_src_y_16, src_stride_y, p_src_u_16, src_stride_uv,
  393. p_src_v_16, src_stride_uv, src_width, src_height, p_dst_y_16,
  394. dst_stride_y, p_dst_u_16, dst_stride_uv, p_dst_v_16,
  395. dst_stride_uv, dst_width, dst_height, f);
  396. }
  397. // Expect an exact match.
  398. int max_diff = 0;
  399. for (i = 0; i < dst_y_plane_size; ++i) {
  400. int abs_diff = Abs(dst_y_8[i] - p_dst_y_16[i]);
  401. if (abs_diff > max_diff) {
  402. max_diff = abs_diff;
  403. }
  404. }
  405. for (i = 0; i < dst_uv_plane_size; ++i) {
  406. int abs_diff = Abs(dst_u_8[i] - p_dst_u_16[i]);
  407. if (abs_diff > max_diff) {
  408. max_diff = abs_diff;
  409. }
  410. abs_diff = Abs(dst_v_8[i] - p_dst_v_16[i]);
  411. if (abs_diff > max_diff) {
  412. max_diff = abs_diff;
  413. }
  414. }
  415. free_aligned_buffer_page_end(dst_y_8);
  416. free_aligned_buffer_page_end(dst_u_8);
  417. free_aligned_buffer_page_end(dst_v_8);
  418. free_aligned_buffer_page_end(dst_y_16);
  419. free_aligned_buffer_page_end(dst_u_16);
  420. free_aligned_buffer_page_end(dst_v_16);
  421. free_aligned_buffer_page_end(src_y);
  422. free_aligned_buffer_page_end(src_u);
  423. free_aligned_buffer_page_end(src_v);
  424. free_aligned_buffer_page_end(src_y_16);
  425. free_aligned_buffer_page_end(src_u_16);
  426. free_aligned_buffer_page_end(src_v_16);
  427. return max_diff;
  428. }
  429. // The following adjustments in dimensions ensure the scale factor will be
  430. // exactly achieved.
  431. // 2 is chroma subsample.
  432. #define DX(x, nom, denom) static_cast<int>(((Abs(x) / nom + 1) / 2) * nom * 2)
  433. #define SX(x, nom, denom) static_cast<int>(((x / nom + 1) / 2) * denom * 2)
  434. #define TEST_FACTOR1(name, filter, nom, denom, max_diff) \
  435. TEST_F(LibYUVScaleTest, I420ScaleDownBy##name##_##filter) { \
  436. int diff = I420TestFilter( \
  437. SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
  438. DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
  439. kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \
  440. benchmark_cpu_info_); \
  441. EXPECT_LE(diff, max_diff); \
  442. } \
  443. TEST_F(LibYUVScaleTest, I444ScaleDownBy##name##_##filter) { \
  444. int diff = I444TestFilter( \
  445. SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
  446. DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
  447. kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \
  448. benchmark_cpu_info_); \
  449. EXPECT_LE(diff, max_diff); \
  450. } \
  451. TEST_F(LibYUVScaleTest, I420ScaleDownBy##name##_##filter##_16) { \
  452. int diff = I420TestFilter_16( \
  453. SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
  454. DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
  455. kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \
  456. benchmark_cpu_info_); \
  457. EXPECT_LE(diff, max_diff); \
  458. } \
  459. TEST_F(LibYUVScaleTest, I444ScaleDownBy##name##_##filter##_16) { \
  460. int diff = I444TestFilter_16( \
  461. SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
  462. DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
  463. kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \
  464. benchmark_cpu_info_); \
  465. EXPECT_LE(diff, max_diff); \
  466. }
  467. // Test a scale factor with all 4 filters. Expect unfiltered to be exact, but
  468. // filtering is different fixed point implementations for SSSE3, Neon and C.
  469. #define TEST_FACTOR(name, nom, denom, boxdiff) \
  470. TEST_FACTOR1(name, None, nom, denom, 0) \
  471. TEST_FACTOR1(name, Linear, nom, denom, 3) \
  472. TEST_FACTOR1(name, Bilinear, nom, denom, 3) \
  473. TEST_FACTOR1(name, Box, nom, denom, boxdiff)
  474. TEST_FACTOR(2, 1, 2, 0)
  475. TEST_FACTOR(4, 1, 4, 0)
  476. // TEST_FACTOR(8, 1, 8, 0) Disable for benchmark performance. Takes 90 seconds.
  477. TEST_FACTOR(3by4, 3, 4, 1)
  478. TEST_FACTOR(3by8, 3, 8, 1)
  479. TEST_FACTOR(3, 1, 3, 0)
  480. #undef TEST_FACTOR1
  481. #undef TEST_FACTOR
  482. #undef SX
  483. #undef DX
  484. #define TEST_SCALETO1(name, width, height, filter, max_diff) \
  485. TEST_F(LibYUVScaleTest, I420##name##To##width##x##height##_##filter) { \
  486. int diff = I420TestFilter(benchmark_width_, benchmark_height_, width, \
  487. height, kFilter##filter, benchmark_iterations_, \
  488. disable_cpu_flags_, benchmark_cpu_info_); \
  489. EXPECT_LE(diff, max_diff); \
  490. } \
  491. TEST_F(LibYUVScaleTest, I444##name##To##width##x##height##_##filter) { \
  492. int diff = I444TestFilter(benchmark_width_, benchmark_height_, width, \
  493. height, kFilter##filter, benchmark_iterations_, \
  494. disable_cpu_flags_, benchmark_cpu_info_); \
  495. EXPECT_LE(diff, max_diff); \
  496. } \
  497. TEST_F(LibYUVScaleTest, I420##name##To##width##x##height##_##filter##_16) { \
  498. int diff = I420TestFilter_16( \
  499. benchmark_width_, benchmark_height_, width, height, kFilter##filter, \
  500. benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_); \
  501. EXPECT_LE(diff, max_diff); \
  502. } \
  503. TEST_F(LibYUVScaleTest, I444##name##To##width##x##height##_##filter##_16) { \
  504. int diff = I444TestFilter_16( \
  505. benchmark_width_, benchmark_height_, width, height, kFilter##filter, \
  506. benchmark_iterations_, disable_cpu_flags_, benchmark_cpu_info_); \
  507. EXPECT_LE(diff, max_diff); \
  508. } \
  509. TEST_F(LibYUVScaleTest, I420##name##From##width##x##height##_##filter) { \
  510. int diff = I420TestFilter(width, height, Abs(benchmark_width_), \
  511. Abs(benchmark_height_), kFilter##filter, \
  512. benchmark_iterations_, disable_cpu_flags_, \
  513. benchmark_cpu_info_); \
  514. EXPECT_LE(diff, max_diff); \
  515. } \
  516. TEST_F(LibYUVScaleTest, I444##name##From##width##x##height##_##filter) { \
  517. int diff = I444TestFilter(width, height, Abs(benchmark_width_), \
  518. Abs(benchmark_height_), kFilter##filter, \
  519. benchmark_iterations_, disable_cpu_flags_, \
  520. benchmark_cpu_info_); \
  521. EXPECT_LE(diff, max_diff); \
  522. } \
  523. TEST_F(LibYUVScaleTest, \
  524. I420##name##From##width##x##height##_##filter##_16) { \
  525. int diff = I420TestFilter_16(width, height, Abs(benchmark_width_), \
  526. Abs(benchmark_height_), kFilter##filter, \
  527. benchmark_iterations_, disable_cpu_flags_, \
  528. benchmark_cpu_info_); \
  529. EXPECT_LE(diff, max_diff); \
  530. } \
  531. TEST_F(LibYUVScaleTest, \
  532. I444##name##From##width##x##height##_##filter##_16) { \
  533. int diff = I444TestFilter_16(width, height, Abs(benchmark_width_), \
  534. Abs(benchmark_height_), kFilter##filter, \
  535. benchmark_iterations_, disable_cpu_flags_, \
  536. benchmark_cpu_info_); \
  537. EXPECT_LE(diff, max_diff); \
  538. }
  539. // Test scale to a specified size with all 4 filters.
  540. #define TEST_SCALETO(name, width, height) \
  541. TEST_SCALETO1(name, width, height, None, 0) \
  542. TEST_SCALETO1(name, width, height, Linear, 3) \
  543. TEST_SCALETO1(name, width, height, Bilinear, 3) \
  544. TEST_SCALETO1(name, width, height, Box, 3)
  545. TEST_SCALETO(Scale, 1, 1)
  546. TEST_SCALETO(Scale, 320, 240)
  547. TEST_SCALETO(Scale, 569, 480)
  548. TEST_SCALETO(Scale, 640, 360)
  549. TEST_SCALETO(Scale, 1280, 720)
  550. TEST_SCALETO(Scale, 1920, 1080)
  551. #undef TEST_SCALETO1
  552. #undef TEST_SCALETO
  553. #ifdef ENABLE_ROW_TESTS
  554. #ifdef HAS_SCALEROWDOWN2_SSSE3
  555. TEST_F(LibYUVScaleTest, TestScaleRowDown2Box_Odd_SSSE3) {
  556. SIMD_ALIGNED(uint8_t orig_pixels[128 * 2]);
  557. SIMD_ALIGNED(uint8_t dst_pixels_opt[64]);
  558. SIMD_ALIGNED(uint8_t dst_pixels_c[64]);
  559. memset(orig_pixels, 0, sizeof(orig_pixels));
  560. memset(dst_pixels_opt, 0, sizeof(dst_pixels_opt));
  561. memset(dst_pixels_c, 0, sizeof(dst_pixels_c));
  562. int has_ssse3 = TestCpuFlag(kCpuHasSSSE3);
  563. if (!has_ssse3) {
  564. printf("Warning SSSE3 not detected; Skipping test.\n");
  565. } else {
  566. // TL.
  567. orig_pixels[0] = 255u;
  568. orig_pixels[1] = 0u;
  569. orig_pixels[128 + 0] = 0u;
  570. orig_pixels[128 + 1] = 0u;
  571. // TR.
  572. orig_pixels[2] = 0u;
  573. orig_pixels[3] = 100u;
  574. orig_pixels[128 + 2] = 0u;
  575. orig_pixels[128 + 3] = 0u;
  576. // BL.
  577. orig_pixels[4] = 0u;
  578. orig_pixels[5] = 0u;
  579. orig_pixels[128 + 4] = 50u;
  580. orig_pixels[128 + 5] = 0u;
  581. // BR.
  582. orig_pixels[6] = 0u;
  583. orig_pixels[7] = 0u;
  584. orig_pixels[128 + 6] = 0u;
  585. orig_pixels[128 + 7] = 20u;
  586. // Odd.
  587. orig_pixels[126] = 4u;
  588. orig_pixels[127] = 255u;
  589. orig_pixels[128 + 126] = 16u;
  590. orig_pixels[128 + 127] = 255u;
  591. // Test regular half size.
  592. ScaleRowDown2Box_C(orig_pixels, 128, dst_pixels_c, 64);
  593. EXPECT_EQ(64u, dst_pixels_c[0]);
  594. EXPECT_EQ(25u, dst_pixels_c[1]);
  595. EXPECT_EQ(13u, dst_pixels_c[2]);
  596. EXPECT_EQ(5u, dst_pixels_c[3]);
  597. EXPECT_EQ(0u, dst_pixels_c[4]);
  598. EXPECT_EQ(133u, dst_pixels_c[63]);
  599. // Test Odd width version - Last pixel is just 1 horizontal pixel.
  600. ScaleRowDown2Box_Odd_C(orig_pixels, 128, dst_pixels_c, 64);
  601. EXPECT_EQ(64u, dst_pixels_c[0]);
  602. EXPECT_EQ(25u, dst_pixels_c[1]);
  603. EXPECT_EQ(13u, dst_pixels_c[2]);
  604. EXPECT_EQ(5u, dst_pixels_c[3]);
  605. EXPECT_EQ(0u, dst_pixels_c[4]);
  606. EXPECT_EQ(10u, dst_pixels_c[63]);
  607. // Test one pixel less, should skip the last pixel.
  608. memset(dst_pixels_c, 0, sizeof(dst_pixels_c));
  609. ScaleRowDown2Box_Odd_C(orig_pixels, 128, dst_pixels_c, 63);
  610. EXPECT_EQ(64u, dst_pixels_c[0]);
  611. EXPECT_EQ(25u, dst_pixels_c[1]);
  612. EXPECT_EQ(13u, dst_pixels_c[2]);
  613. EXPECT_EQ(5u, dst_pixels_c[3]);
  614. EXPECT_EQ(0u, dst_pixels_c[4]);
  615. EXPECT_EQ(0u, dst_pixels_c[63]);
  616. // Test regular half size SSSE3.
  617. ScaleRowDown2Box_SSSE3(orig_pixels, 128, dst_pixels_opt, 64);
  618. EXPECT_EQ(64u, dst_pixels_opt[0]);
  619. EXPECT_EQ(25u, dst_pixels_opt[1]);
  620. EXPECT_EQ(13u, dst_pixels_opt[2]);
  621. EXPECT_EQ(5u, dst_pixels_opt[3]);
  622. EXPECT_EQ(0u, dst_pixels_opt[4]);
  623. EXPECT_EQ(133u, dst_pixels_opt[63]);
  624. // Compare C and SSSE3 match.
  625. ScaleRowDown2Box_Odd_C(orig_pixels, 128, dst_pixels_c, 64);
  626. ScaleRowDown2Box_Odd_SSSE3(orig_pixels, 128, dst_pixels_opt, 64);
  627. for (int i = 0; i < 64; ++i) {
  628. EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
  629. }
  630. }
  631. }
  632. #endif // HAS_SCALEROWDOWN2_SSSE3
  633. extern "C" void ScaleRowUp2_16_NEON(const uint16_t* src_ptr,
  634. ptrdiff_t src_stride,
  635. uint16_t* dst,
  636. int dst_width);
  637. extern "C" void ScaleRowUp2_16_MMI(const uint16_t* src_ptr,
  638. ptrdiff_t src_stride,
  639. uint16_t* dst,
  640. int dst_width);
  641. extern "C" void ScaleRowUp2_16_C(const uint16_t* src_ptr,
  642. ptrdiff_t src_stride,
  643. uint16_t* dst,
  644. int dst_width);
  645. TEST_F(LibYUVScaleTest, TestScaleRowUp2_16) {
  646. SIMD_ALIGNED(uint16_t orig_pixels[640 * 2 + 1]); // 2 rows + 1 pixel overrun.
  647. SIMD_ALIGNED(uint16_t dst_pixels_opt[1280]);
  648. SIMD_ALIGNED(uint16_t dst_pixels_c[1280]);
  649. memset(orig_pixels, 0, sizeof(orig_pixels));
  650. memset(dst_pixels_opt, 1, sizeof(dst_pixels_opt));
  651. memset(dst_pixels_c, 2, sizeof(dst_pixels_c));
  652. for (int i = 0; i < 640 * 2 + 1; ++i) {
  653. orig_pixels[i] = i;
  654. }
  655. ScaleRowUp2_16_C(&orig_pixels[0], 640, &dst_pixels_c[0], 1280);
  656. for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
  657. #if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
  658. int has_neon = TestCpuFlag(kCpuHasNEON);
  659. if (has_neon) {
  660. ScaleRowUp2_16_NEON(&orig_pixels[0], 640, &dst_pixels_opt[0], 1280);
  661. } else {
  662. ScaleRowUp2_16_C(&orig_pixels[0], 640, &dst_pixels_opt[0], 1280);
  663. }
  664. #elif !defined(LIBYUV_DISABLE_MMI) && defined(_MIPS_ARCH_LOONGSON3A)
  665. int has_mmi = TestCpuFlag(kCpuHasMMI);
  666. if (has_mmi) {
  667. ScaleRowUp2_16_MMI(&orig_pixels[0], 640, &dst_pixels_opt[0], 1280);
  668. } else {
  669. ScaleRowUp2_16_C(&orig_pixels[0], 640, &dst_pixels_opt[0], 1280);
  670. }
  671. #else
  672. ScaleRowUp2_16_C(&orig_pixels[0], 640, &dst_pixels_opt[0], 1280);
  673. #endif
  674. }
  675. for (int i = 0; i < 1280; ++i) {
  676. EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
  677. }
  678. EXPECT_EQ(dst_pixels_c[0], (0 * 9 + 1 * 3 + 640 * 3 + 641 * 1 + 8) / 16);
  679. EXPECT_EQ(dst_pixels_c[1279], 800);
  680. }
  681. extern "C" void ScaleRowDown2Box_16_NEON(const uint16_t* src_ptr,
  682. ptrdiff_t src_stride,
  683. uint16_t* dst,
  684. int dst_width);
  685. TEST_F(LibYUVScaleTest, TestScaleRowDown2Box_16) {
  686. SIMD_ALIGNED(uint16_t orig_pixels[2560 * 2]);
  687. SIMD_ALIGNED(uint16_t dst_pixels_c[1280]);
  688. SIMD_ALIGNED(uint16_t dst_pixels_opt[1280]);
  689. memset(orig_pixels, 0, sizeof(orig_pixels));
  690. memset(dst_pixels_c, 1, sizeof(dst_pixels_c));
  691. memset(dst_pixels_opt, 2, sizeof(dst_pixels_opt));
  692. for (int i = 0; i < 2560 * 2; ++i) {
  693. orig_pixels[i] = i;
  694. }
  695. ScaleRowDown2Box_16_C(&orig_pixels[0], 2560, &dst_pixels_c[0], 1280);
  696. for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
  697. #if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
  698. int has_neon = TestCpuFlag(kCpuHasNEON);
  699. if (has_neon) {
  700. ScaleRowDown2Box_16_NEON(&orig_pixels[0], 2560, &dst_pixels_opt[0], 1280);
  701. } else {
  702. ScaleRowDown2Box_16_C(&orig_pixels[0], 2560, &dst_pixels_opt[0], 1280);
  703. }
  704. #else
  705. ScaleRowDown2Box_16_C(&orig_pixels[0], 2560, &dst_pixels_opt[0], 1280);
  706. #endif
  707. }
  708. for (int i = 0; i < 1280; ++i) {
  709. EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
  710. }
  711. EXPECT_EQ(dst_pixels_c[0], (0 + 1 + 2560 + 2561 + 2) / 4);
  712. EXPECT_EQ(dst_pixels_c[1279], 3839);
  713. }
  714. #endif // ENABLE_ROW_TESTS
  715. // Test scaling plane with 8 bit C vs 16 bit C and return maximum pixel
  716. // difference.
  717. // 0 = exact.
  718. static int TestPlaneFilter_16(int src_width,
  719. int src_height,
  720. int dst_width,
  721. int dst_height,
  722. FilterMode f,
  723. int benchmark_iterations,
  724. int disable_cpu_flags,
  725. int benchmark_cpu_info) {
  726. if (!SizeValid(src_width, src_height, dst_width, dst_height)) {
  727. return 0;
  728. }
  729. int i;
  730. int64_t src_y_plane_size = (Abs(src_width)) * (Abs(src_height));
  731. int src_stride_y = Abs(src_width);
  732. int dst_y_plane_size = dst_width * dst_height;
  733. int dst_stride_y = dst_width;
  734. align_buffer_page_end(src_y, src_y_plane_size);
  735. align_buffer_page_end(src_y_16, src_y_plane_size * 2);
  736. align_buffer_page_end(dst_y_8, dst_y_plane_size);
  737. align_buffer_page_end(dst_y_16, dst_y_plane_size * 2);
  738. uint16_t* p_src_y_16 = reinterpret_cast<uint16_t*>(src_y_16);
  739. uint16_t* p_dst_y_16 = reinterpret_cast<uint16_t*>(dst_y_16);
  740. MemRandomize(src_y, src_y_plane_size);
  741. memset(dst_y_8, 0, dst_y_plane_size);
  742. memset(dst_y_16, 1, dst_y_plane_size * 2);
  743. for (i = 0; i < src_y_plane_size; ++i) {
  744. p_src_y_16[i] = src_y[i] & 255;
  745. }
  746. MaskCpuFlags(disable_cpu_flags); // Disable all CPU optimization.
  747. ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y_8, dst_stride_y,
  748. dst_width, dst_height, f);
  749. MaskCpuFlags(benchmark_cpu_info); // Enable all CPU optimization.
  750. for (i = 0; i < benchmark_iterations; ++i) {
  751. ScalePlane_16(p_src_y_16, src_stride_y, src_width, src_height, p_dst_y_16,
  752. dst_stride_y, dst_width, dst_height, f);
  753. }
  754. // Expect an exact match.
  755. int max_diff = 0;
  756. for (i = 0; i < dst_y_plane_size; ++i) {
  757. int abs_diff = Abs(dst_y_8[i] - p_dst_y_16[i]);
  758. if (abs_diff > max_diff) {
  759. max_diff = abs_diff;
  760. }
  761. }
  762. free_aligned_buffer_page_end(dst_y_8);
  763. free_aligned_buffer_page_end(dst_y_16);
  764. free_aligned_buffer_page_end(src_y);
  765. free_aligned_buffer_page_end(src_y_16);
  766. return max_diff;
  767. }
  768. // The following adjustments in dimensions ensure the scale factor will be
  769. // exactly achieved.
  770. // 2 is chroma subsample.
  771. #define DX(x, nom, denom) static_cast<int>(((Abs(x) / nom + 1) / 2) * nom * 2)
  772. #define SX(x, nom, denom) static_cast<int>(((x / nom + 1) / 2) * denom * 2)
  773. #define TEST_FACTOR1(name, filter, nom, denom, max_diff) \
  774. TEST_F(LibYUVScaleTest, ScalePlaneDownBy##name##_##filter##_16) { \
  775. int diff = TestPlaneFilter_16( \
  776. SX(benchmark_width_, nom, denom), SX(benchmark_height_, nom, denom), \
  777. DX(benchmark_width_, nom, denom), DX(benchmark_height_, nom, denom), \
  778. kFilter##filter, benchmark_iterations_, disable_cpu_flags_, \
  779. benchmark_cpu_info_); \
  780. EXPECT_LE(diff, max_diff); \
  781. }
  782. // Test a scale factor with all 4 filters. Expect unfiltered to be exact, but
  783. // filtering is different fixed point implementations for SSSE3, Neon and C.
  784. #define TEST_FACTOR(name, nom, denom, boxdiff) \
  785. TEST_FACTOR1(name, None, nom, denom, 0) \
  786. TEST_FACTOR1(name, Linear, nom, denom, boxdiff) \
  787. TEST_FACTOR1(name, Bilinear, nom, denom, boxdiff) \
  788. TEST_FACTOR1(name, Box, nom, denom, boxdiff)
  789. TEST_FACTOR(2, 1, 2, 0)
  790. TEST_FACTOR(4, 1, 4, 0)
  791. // TEST_FACTOR(8, 1, 8, 0) Disable for benchmark performance. Takes 90 seconds.
  792. TEST_FACTOR(3by4, 3, 4, 1)
  793. TEST_FACTOR(3by8, 3, 8, 1)
  794. TEST_FACTOR(3, 1, 3, 0)
  795. #undef TEST_FACTOR1
  796. #undef TEST_FACTOR
  797. #undef SX
  798. #undef DX
  799. } // namespace libyuv