vf_vmafmotion.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368
  1. /*
  2. * Copyright (c) 2017 Ronald S. Bultje <rsbultje@gmail.com>
  3. * Copyright (c) 2017 Ashish Pratap Singh <ashk43712@gmail.com>
  4. *
  5. * This file is part of FFmpeg.
  6. *
  7. * FFmpeg is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Lesser General Public
  9. * License as published by the Free Software Foundation; either
  10. * version 2.1 of the License, or (at your option) any later version.
  11. *
  12. * FFmpeg is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with FFmpeg; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. */
  21. /**
  22. * @file
  23. * Calculate VMAF Motion score.
  24. */
  25. #include "libavutil/opt.h"
  26. #include "libavutil/pixdesc.h"
  27. #include "avfilter.h"
  28. #include "formats.h"
  29. #include "internal.h"
  30. #include "vmaf_motion.h"
  31. #define BIT_SHIFT 15
  32. static const float FILTER_5[5] = {
  33. 0.054488685,
  34. 0.244201342,
  35. 0.402619947,
  36. 0.244201342,
  37. 0.054488685
  38. };
  39. typedef struct VMAFMotionContext {
  40. const AVClass *class;
  41. VMAFMotionData data;
  42. FILE *stats_file;
  43. char *stats_file_str;
  44. } VMAFMotionContext;
  45. #define OFFSET(x) offsetof(VMAFMotionContext, x)
  46. #define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
  47. static const AVOption vmafmotion_options[] = {
  48. {"stats_file", "Set file where to store per-frame difference information", OFFSET(stats_file_str), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 0, FLAGS },
  49. { NULL }
  50. };
  51. AVFILTER_DEFINE_CLASS(vmafmotion);
  52. static uint64_t image_sad(const uint16_t *img1, const uint16_t *img2, int w,
  53. int h, ptrdiff_t _img1_stride, ptrdiff_t _img2_stride)
  54. {
  55. ptrdiff_t img1_stride = _img1_stride / sizeof(*img1);
  56. ptrdiff_t img2_stride = _img2_stride / sizeof(*img2);
  57. uint64_t sum = 0;
  58. int i, j;
  59. for (i = 0; i < h; i++) {
  60. for (j = 0; j < w; j++) {
  61. sum += abs(img1[j] - img2[j]);
  62. }
  63. img1 += img1_stride;
  64. img2 += img2_stride;
  65. }
  66. return sum;
  67. }
  68. static void convolution_x(const uint16_t *filter, int filt_w, const uint16_t *src,
  69. uint16_t *dst, int w, int h, ptrdiff_t _src_stride,
  70. ptrdiff_t _dst_stride)
  71. {
  72. ptrdiff_t src_stride = _src_stride / sizeof(*src);
  73. ptrdiff_t dst_stride = _dst_stride / sizeof(*dst);
  74. int radius = filt_w / 2;
  75. int borders_left = radius;
  76. int borders_right = w - (filt_w - radius);
  77. int i, j, k;
  78. int sum = 0;
  79. for (i = 0; i < h; i++) {
  80. for (j = 0; j < borders_left; j++) {
  81. sum = 0;
  82. for (k = 0; k < filt_w; k++) {
  83. int j_tap = FFABS(j - radius + k);
  84. if (j_tap >= w) {
  85. j_tap = w - (j_tap - w + 1);
  86. }
  87. sum += filter[k] * src[i * src_stride + j_tap];
  88. }
  89. dst[i * dst_stride + j] = sum >> BIT_SHIFT;
  90. }
  91. for (j = borders_left; j < borders_right; j++) {
  92. int sum = 0;
  93. for (k = 0; k < filt_w; k++) {
  94. sum += filter[k] * src[i * src_stride + j - radius + k];
  95. }
  96. dst[i * dst_stride + j] = sum >> BIT_SHIFT;
  97. }
  98. for (j = borders_right; j < w; j++) {
  99. sum = 0;
  100. for (k = 0; k < filt_w; k++) {
  101. int j_tap = FFABS(j - radius + k);
  102. if (j_tap >= w) {
  103. j_tap = w - (j_tap - w + 1);
  104. }
  105. sum += filter[k] * src[i * src_stride + j_tap];
  106. }
  107. dst[i * dst_stride + j] = sum >> BIT_SHIFT;
  108. }
  109. }
  110. }
  111. #define conv_y_fn(type, bits) \
  112. static void convolution_y_##bits##bit(const uint16_t *filter, int filt_w, \
  113. const uint8_t *_src, uint16_t *dst, \
  114. int w, int h, ptrdiff_t _src_stride, \
  115. ptrdiff_t _dst_stride) \
  116. { \
  117. const type *src = (const type *) _src; \
  118. ptrdiff_t src_stride = _src_stride / sizeof(*src); \
  119. ptrdiff_t dst_stride = _dst_stride / sizeof(*dst); \
  120. int radius = filt_w / 2; \
  121. int borders_top = radius; \
  122. int borders_bottom = h - (filt_w - radius); \
  123. int i, j, k; \
  124. int sum = 0; \
  125. \
  126. for (i = 0; i < borders_top; i++) { \
  127. for (j = 0; j < w; j++) { \
  128. sum = 0; \
  129. for (k = 0; k < filt_w; k++) { \
  130. int i_tap = FFABS(i - radius + k); \
  131. if (i_tap >= h) { \
  132. i_tap = h - (i_tap - h + 1); \
  133. } \
  134. sum += filter[k] * src[i_tap * src_stride + j]; \
  135. } \
  136. dst[i * dst_stride + j] = sum >> bits; \
  137. } \
  138. } \
  139. for (i = borders_top; i < borders_bottom; i++) { \
  140. for (j = 0; j < w; j++) { \
  141. sum = 0; \
  142. for (k = 0; k < filt_w; k++) { \
  143. sum += filter[k] * src[(i - radius + k) * src_stride + j]; \
  144. } \
  145. dst[i * dst_stride + j] = sum >> bits; \
  146. } \
  147. } \
  148. for (i = borders_bottom; i < h; i++) { \
  149. for (j = 0; j < w; j++) { \
  150. sum = 0; \
  151. for (k = 0; k < filt_w; k++) { \
  152. int i_tap = FFABS(i - radius + k); \
  153. if (i_tap >= h) { \
  154. i_tap = h - (i_tap - h + 1); \
  155. } \
  156. sum += filter[k] * src[i_tap * src_stride + j]; \
  157. } \
  158. dst[i * dst_stride + j] = sum >> bits; \
  159. } \
  160. } \
  161. }
  162. conv_y_fn(uint8_t, 8);
  163. conv_y_fn(uint16_t, 10);
  164. static void vmafmotiondsp_init(VMAFMotionDSPContext *dsp, int bpp) {
  165. dsp->convolution_x = convolution_x;
  166. dsp->convolution_y = bpp == 10 ? convolution_y_10bit : convolution_y_8bit;
  167. dsp->sad = image_sad;
  168. }
  169. double ff_vmafmotion_process(VMAFMotionData *s, AVFrame *ref)
  170. {
  171. double score;
  172. s->vmafdsp.convolution_y(s->filter, 5, ref->data[0], s->temp_data,
  173. s->width, s->height, ref->linesize[0], s->stride);
  174. s->vmafdsp.convolution_x(s->filter, 5, s->temp_data, s->blur_data[0],
  175. s->width, s->height, s->stride, s->stride);
  176. if (!s->nb_frames) {
  177. score = 0.0;
  178. } else {
  179. uint64_t sad = s->vmafdsp.sad(s->blur_data[1], s->blur_data[0],
  180. s->width, s->height, s->stride, s->stride);
  181. // the output score is always normalized to 8 bits
  182. score = (double) (sad * 1.0 / (s->width * s->height << (BIT_SHIFT - 8)));
  183. }
  184. FFSWAP(uint16_t *, s->blur_data[0], s->blur_data[1]);
  185. s->nb_frames++;
  186. s->motion_sum += score;
  187. return score;
  188. }
  189. static void set_meta(AVDictionary **metadata, const char *key, float d)
  190. {
  191. char value[128];
  192. snprintf(value, sizeof(value), "%0.2f", d);
  193. av_dict_set(metadata, key, value, 0);
  194. }
  195. static void do_vmafmotion(AVFilterContext *ctx, AVFrame *ref)
  196. {
  197. VMAFMotionContext *s = ctx->priv;
  198. double score;
  199. score = ff_vmafmotion_process(&s->data, ref);
  200. set_meta(&ref->metadata, "lavfi.vmafmotion.score", score);
  201. if (s->stats_file) {
  202. fprintf(s->stats_file,
  203. "n:%"PRId64" motion:%0.2lf\n", s->data.nb_frames, score);
  204. }
  205. }
  206. int ff_vmafmotion_init(VMAFMotionData *s,
  207. int w, int h, enum AVPixelFormat fmt)
  208. {
  209. size_t data_sz;
  210. int i;
  211. const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(fmt);
  212. s->width = w;
  213. s->height = h;
  214. s->stride = FFALIGN(w * sizeof(uint16_t), 32);
  215. data_sz = (size_t) s->stride * h;
  216. if (!(s->blur_data[0] = av_malloc(data_sz)) ||
  217. !(s->blur_data[1] = av_malloc(data_sz)) ||
  218. !(s->temp_data = av_malloc(data_sz))) {
  219. return AVERROR(ENOMEM);
  220. }
  221. for (i = 0; i < 5; i++) {
  222. s->filter[i] = lrint(FILTER_5[i] * (1 << BIT_SHIFT));
  223. }
  224. vmafmotiondsp_init(&s->vmafdsp, desc->comp[0].depth);
  225. return 0;
  226. }
  227. static int query_formats(AVFilterContext *ctx)
  228. {
  229. AVFilterFormats *fmts_list = NULL;
  230. int format, ret;
  231. for (format = 0; av_pix_fmt_desc_get(format); format++) {
  232. const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(format);
  233. if (!(desc->flags & (AV_PIX_FMT_FLAG_RGB | AV_PIX_FMT_FLAG_HWACCEL | AV_PIX_FMT_FLAG_BITSTREAM | AV_PIX_FMT_FLAG_PAL)) &&
  234. (desc->flags & AV_PIX_FMT_FLAG_PLANAR || desc->nb_components == 1) &&
  235. (!(desc->flags & AV_PIX_FMT_FLAG_BE) == !HAVE_BIGENDIAN || desc->comp[0].depth == 8) &&
  236. (desc->comp[0].depth == 8 || desc->comp[0].depth == 10) &&
  237. (ret = ff_add_format(&fmts_list, format)) < 0)
  238. return ret;
  239. }
  240. return ff_set_common_formats(ctx, fmts_list);
  241. }
  242. static int config_input_ref(AVFilterLink *inlink)
  243. {
  244. AVFilterContext *ctx = inlink->dst;
  245. VMAFMotionContext *s = ctx->priv;
  246. return ff_vmafmotion_init(&s->data, ctx->inputs[0]->w,
  247. ctx->inputs[0]->h, ctx->inputs[0]->format);
  248. }
  249. double ff_vmafmotion_uninit(VMAFMotionData *s)
  250. {
  251. av_free(s->blur_data[0]);
  252. av_free(s->blur_data[1]);
  253. av_free(s->temp_data);
  254. return s->nb_frames > 0 ? s->motion_sum / s->nb_frames : 0.0;
  255. }
  256. static int filter_frame(AVFilterLink *inlink, AVFrame *ref)
  257. {
  258. AVFilterContext *ctx = inlink->dst;
  259. do_vmafmotion(ctx, ref);
  260. return ff_filter_frame(ctx->outputs[0], ref);
  261. }
  262. static av_cold int init(AVFilterContext *ctx)
  263. {
  264. VMAFMotionContext *s = ctx->priv;
  265. if (s->stats_file_str) {
  266. if (!strcmp(s->stats_file_str, "-")) {
  267. s->stats_file = stdout;
  268. } else {
  269. s->stats_file = fopen(s->stats_file_str, "w");
  270. if (!s->stats_file) {
  271. int err = AVERROR(errno);
  272. char buf[128];
  273. av_strerror(err, buf, sizeof(buf));
  274. av_log(ctx, AV_LOG_ERROR, "Could not open stats file %s: %s\n",
  275. s->stats_file_str, buf);
  276. return err;
  277. }
  278. }
  279. }
  280. return 0;
  281. }
  282. static av_cold void uninit(AVFilterContext *ctx)
  283. {
  284. VMAFMotionContext *s = ctx->priv;
  285. double avg_motion = ff_vmafmotion_uninit(&s->data);
  286. if (s->data.nb_frames > 0) {
  287. av_log(ctx, AV_LOG_INFO, "VMAF Motion avg: %.3f\n", avg_motion);
  288. }
  289. if (s->stats_file && s->stats_file != stdout)
  290. fclose(s->stats_file);
  291. }
  292. static const AVFilterPad vmafmotion_inputs[] = {
  293. {
  294. .name = "reference",
  295. .type = AVMEDIA_TYPE_VIDEO,
  296. .filter_frame = filter_frame,
  297. .config_props = config_input_ref,
  298. },
  299. { NULL }
  300. };
  301. static const AVFilterPad vmafmotion_outputs[] = {
  302. {
  303. .name = "default",
  304. .type = AVMEDIA_TYPE_VIDEO,
  305. },
  306. { NULL }
  307. };
  308. AVFilter ff_vf_vmafmotion = {
  309. .name = "vmafmotion",
  310. .description = NULL_IF_CONFIG_SMALL("Calculate the VMAF Motion score."),
  311. .init = init,
  312. .uninit = uninit,
  313. .query_formats = query_formats,
  314. .priv_size = sizeof(VMAFMotionContext),
  315. .priv_class = &vmafmotion_class,
  316. .inputs = vmafmotion_inputs,
  317. .outputs = vmafmotion_outputs,
  318. };