avf_ahistogram.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432
  1. /*
  2. * Copyright (c) 2015 Paul B Mahol
  3. *
  4. * This file is part of FFmpeg.
  5. *
  6. * FFmpeg is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU Lesser General Public
  8. * License as published by the Free Software Foundation; either
  9. * version 2.1 of the License, or (at your option) any later version.
  10. *
  11. * FFmpeg is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. * Lesser General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU Lesser General Public
  17. * License along with FFmpeg; if not, write to the Free Software
  18. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19. */
  20. #include "libavutil/avassert.h"
  21. #include "libavutil/opt.h"
  22. #include "libavutil/parseutils.h"
  23. #include "avfilter.h"
  24. #include "filters.h"
  25. #include "formats.h"
  26. #include "audio.h"
  27. #include "video.h"
  28. #include "internal.h"
  29. enum DisplayScale { LINEAR, SQRT, CBRT, LOG, RLOG, NB_SCALES };
  30. enum AmplitudeScale { ALINEAR, ALOG, NB_ASCALES };
  31. enum SlideMode { REPLACE, SCROLL, NB_SLIDES };
  32. enum DisplayMode { SINGLE, SEPARATE, NB_DMODES };
  33. enum HistogramMode { ACCUMULATE, CURRENT, NB_HMODES };
  34. typedef struct AudioHistogramContext {
  35. const AVClass *class;
  36. AVFrame *out;
  37. int w, h;
  38. AVRational frame_rate;
  39. uint64_t *achistogram;
  40. uint64_t *shistogram;
  41. int ascale;
  42. int scale;
  43. float phisto;
  44. int histogram_h;
  45. int apos;
  46. int ypos;
  47. int slide;
  48. int dmode;
  49. int dchannels;
  50. int count;
  51. int frame_count;
  52. float *combine_buffer;
  53. AVFrame *in[101];
  54. int first;
  55. int nb_samples;
  56. } AudioHistogramContext;
  57. #define OFFSET(x) offsetof(AudioHistogramContext, x)
  58. #define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
  59. static const AVOption ahistogram_options[] = {
  60. { "dmode", "set method to display channels", OFFSET(dmode), AV_OPT_TYPE_INT, {.i64=SINGLE}, 0, NB_DMODES-1, FLAGS, "dmode" },
  61. { "single", "all channels use single histogram", 0, AV_OPT_TYPE_CONST, {.i64=SINGLE}, 0, 0, FLAGS, "dmode" },
  62. { "separate", "each channel have own histogram", 0, AV_OPT_TYPE_CONST, {.i64=SEPARATE}, 0, 0, FLAGS, "dmode" },
  63. { "rate", "set video rate", OFFSET(frame_rate), AV_OPT_TYPE_VIDEO_RATE, {.str="25"}, 0, INT_MAX, FLAGS },
  64. { "r", "set video rate", OFFSET(frame_rate), AV_OPT_TYPE_VIDEO_RATE, {.str="25"}, 0, INT_MAX, FLAGS },
  65. { "size", "set video size", OFFSET(w), AV_OPT_TYPE_IMAGE_SIZE, {.str="hd720"}, 0, 0, FLAGS },
  66. { "s", "set video size", OFFSET(w), AV_OPT_TYPE_IMAGE_SIZE, {.str="hd720"}, 0, 0, FLAGS },
  67. { "scale", "set display scale", OFFSET(scale), AV_OPT_TYPE_INT, {.i64=LOG}, LINEAR, NB_SCALES-1, FLAGS, "scale" },
  68. { "log", "logarithmic", 0, AV_OPT_TYPE_CONST, {.i64=LOG}, 0, 0, FLAGS, "scale" },
  69. { "sqrt", "square root", 0, AV_OPT_TYPE_CONST, {.i64=SQRT}, 0, 0, FLAGS, "scale" },
  70. { "cbrt", "cubic root", 0, AV_OPT_TYPE_CONST, {.i64=CBRT}, 0, 0, FLAGS, "scale" },
  71. { "lin", "linear", 0, AV_OPT_TYPE_CONST, {.i64=LINEAR}, 0, 0, FLAGS, "scale" },
  72. { "rlog", "reverse logarithmic", 0, AV_OPT_TYPE_CONST, {.i64=RLOG}, 0, 0, FLAGS, "scale" },
  73. { "ascale", "set amplitude scale", OFFSET(ascale), AV_OPT_TYPE_INT, {.i64=ALOG}, LINEAR, NB_ASCALES-1, FLAGS, "ascale" },
  74. { "log", "logarithmic", 0, AV_OPT_TYPE_CONST, {.i64=ALOG}, 0, 0, FLAGS, "ascale" },
  75. { "lin", "linear", 0, AV_OPT_TYPE_CONST, {.i64=ALINEAR}, 0, 0, FLAGS, "ascale" },
  76. { "acount", "how much frames to accumulate", OFFSET(count), AV_OPT_TYPE_INT, {.i64=1}, -1, 100, FLAGS },
  77. { "rheight", "set histogram ratio of window height", OFFSET(phisto), AV_OPT_TYPE_FLOAT, {.dbl=0.10}, 0, 1, FLAGS },
  78. { "slide", "set sonogram sliding", OFFSET(slide), AV_OPT_TYPE_INT, {.i64=REPLACE}, 0, NB_SLIDES-1, FLAGS, "slide" },
  79. { "replace", "replace old rows with new", 0, AV_OPT_TYPE_CONST, {.i64=REPLACE}, 0, 0, FLAGS, "slide" },
  80. { "scroll", "scroll from top to bottom", 0, AV_OPT_TYPE_CONST, {.i64=SCROLL}, 0, 0, FLAGS, "slide" },
  81. { NULL }
  82. };
  83. AVFILTER_DEFINE_CLASS(ahistogram);
  84. static int query_formats(AVFilterContext *ctx)
  85. {
  86. AVFilterFormats *formats = NULL;
  87. AVFilterChannelLayouts *layouts = NULL;
  88. AVFilterLink *inlink = ctx->inputs[0];
  89. AVFilterLink *outlink = ctx->outputs[0];
  90. static const enum AVSampleFormat sample_fmts[] = { AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_NONE };
  91. static const enum AVPixelFormat pix_fmts[] = { AV_PIX_FMT_YUVA444P, AV_PIX_FMT_NONE };
  92. int ret = AVERROR(EINVAL);
  93. formats = ff_make_format_list(sample_fmts);
  94. if ((ret = ff_formats_ref (formats, &inlink->out_formats )) < 0 ||
  95. (layouts = ff_all_channel_counts()) == NULL ||
  96. (ret = ff_channel_layouts_ref (layouts, &inlink->out_channel_layouts)) < 0)
  97. return ret;
  98. formats = ff_all_samplerates();
  99. if ((ret = ff_formats_ref(formats, &inlink->out_samplerates)) < 0)
  100. return ret;
  101. formats = ff_make_format_list(pix_fmts);
  102. if ((ret = ff_formats_ref(formats, &outlink->in_formats)) < 0)
  103. return ret;
  104. return 0;
  105. }
  106. static int config_input(AVFilterLink *inlink)
  107. {
  108. AVFilterContext *ctx = inlink->dst;
  109. AudioHistogramContext *s = ctx->priv;
  110. s->nb_samples = FFMAX(1, av_rescale(inlink->sample_rate, s->frame_rate.den, s->frame_rate.num));
  111. s->dchannels = s->dmode == SINGLE ? 1 : inlink->channels;
  112. s->shistogram = av_calloc(s->w, s->dchannels * sizeof(*s->shistogram));
  113. if (!s->shistogram)
  114. return AVERROR(ENOMEM);
  115. s->achistogram = av_calloc(s->w, s->dchannels * sizeof(*s->achistogram));
  116. if (!s->achistogram)
  117. return AVERROR(ENOMEM);
  118. return 0;
  119. }
  120. static int config_output(AVFilterLink *outlink)
  121. {
  122. AudioHistogramContext *s = outlink->src->priv;
  123. outlink->w = s->w;
  124. outlink->h = s->h;
  125. outlink->sample_aspect_ratio = (AVRational){1,1};
  126. outlink->frame_rate = s->frame_rate;
  127. s->histogram_h = s->h * s->phisto;
  128. s->ypos = s->h * s->phisto;
  129. if (s->dmode == SEPARATE) {
  130. s->combine_buffer = av_malloc_array(outlink->w * 3, sizeof(*s->combine_buffer));
  131. if (!s->combine_buffer)
  132. return AVERROR(ENOMEM);
  133. }
  134. return 0;
  135. }
  136. static int filter_frame(AVFilterLink *inlink, AVFrame *in)
  137. {
  138. AVFilterContext *ctx = inlink->dst;
  139. AVFilterLink *outlink = ctx->outputs[0];
  140. AudioHistogramContext *s = ctx->priv;
  141. const int H = s->histogram_h;
  142. const int w = s->w;
  143. int c, y, n, p, bin;
  144. uint64_t acmax = 1;
  145. if (!s->out || s->out->width != outlink->w ||
  146. s->out->height != outlink->h) {
  147. av_frame_free(&s->out);
  148. s->out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
  149. if (!s->out) {
  150. av_frame_free(&in);
  151. return AVERROR(ENOMEM);
  152. }
  153. for (n = H; n < s->h; n++) {
  154. memset(s->out->data[0] + n * s->out->linesize[0], 0, w);
  155. memset(s->out->data[1] + n * s->out->linesize[0], 127, w);
  156. memset(s->out->data[2] + n * s->out->linesize[0], 127, w);
  157. memset(s->out->data[3] + n * s->out->linesize[0], 0, w);
  158. }
  159. }
  160. if (s->dmode == SEPARATE) {
  161. for (y = 0; y < w; y++) {
  162. s->combine_buffer[3 * y ] = 0;
  163. s->combine_buffer[3 * y + 1] = 127.5;
  164. s->combine_buffer[3 * y + 2] = 127.5;
  165. }
  166. }
  167. for (n = 0; n < H; n++) {
  168. memset(s->out->data[0] + n * s->out->linesize[0], 0, w);
  169. memset(s->out->data[1] + n * s->out->linesize[0], 127, w);
  170. memset(s->out->data[2] + n * s->out->linesize[0], 127, w);
  171. memset(s->out->data[3] + n * s->out->linesize[0], 0, w);
  172. }
  173. s->out->pts = in->pts;
  174. s->first = s->frame_count;
  175. switch (s->ascale) {
  176. case ALINEAR:
  177. for (c = 0; c < inlink->channels; c++) {
  178. const float *src = (const float *)in->extended_data[c];
  179. uint64_t *achistogram = &s->achistogram[(s->dmode == SINGLE ? 0: c) * w];
  180. for (n = 0; n < in->nb_samples; n++) {
  181. bin = lrint(av_clipf(fabsf(src[n]), 0, 1) * (w - 1));
  182. achistogram[bin]++;
  183. }
  184. if (s->in[s->first] && s->count >= 0) {
  185. uint64_t *shistogram = &s->shistogram[(s->dmode == SINGLE ? 0: c) * w];
  186. const float *src2 = (const float *)s->in[s->first]->extended_data[c];
  187. for (n = 0; n < in->nb_samples; n++) {
  188. bin = lrint(av_clipf(fabsf(src2[n]), 0, 1) * (w - 1));
  189. shistogram[bin]++;
  190. }
  191. }
  192. }
  193. break;
  194. case ALOG:
  195. for (c = 0; c < inlink->channels; c++) {
  196. const float *src = (const float *)in->extended_data[c];
  197. uint64_t *achistogram = &s->achistogram[(s->dmode == SINGLE ? 0: c) * w];
  198. for (n = 0; n < in->nb_samples; n++) {
  199. bin = lrint(av_clipf(1 + log10(fabsf(src[n])) / 6, 0, 1) * (w - 1));
  200. achistogram[bin]++;
  201. }
  202. if (s->in[s->first] && s->count >= 0) {
  203. uint64_t *shistogram = &s->shistogram[(s->dmode == SINGLE ? 0: c) * w];
  204. const float *src2 = (const float *)s->in[s->first]->extended_data[c];
  205. for (n = 0; n < in->nb_samples; n++) {
  206. bin = lrint(av_clipf(1 + log10(fabsf(src2[n])) / 6, 0, 1) * (w - 1));
  207. shistogram[bin]++;
  208. }
  209. }
  210. }
  211. break;
  212. }
  213. av_frame_free(&s->in[s->frame_count]);
  214. s->in[s->frame_count] = in;
  215. s->frame_count++;
  216. if (s->frame_count > s->count)
  217. s->frame_count = 0;
  218. for (n = 0; n < w * s->dchannels; n++) {
  219. acmax = FFMAX(s->achistogram[n] - s->shistogram[n], acmax);
  220. }
  221. for (c = 0; c < s->dchannels; c++) {
  222. uint64_t *shistogram = &s->shistogram[c * w];
  223. uint64_t *achistogram = &s->achistogram[c * w];
  224. float yf, uf, vf;
  225. if (s->dmode == SEPARATE) {
  226. yf = 256.0f / s->dchannels;
  227. uf = yf * M_PI;
  228. vf = yf * M_PI;
  229. uf *= 0.5 * sin((2 * M_PI * c) / s->dchannels);
  230. vf *= 0.5 * cos((2 * M_PI * c) / s->dchannels);
  231. }
  232. for (n = 0; n < w; n++) {
  233. double a, aa;
  234. int h;
  235. a = achistogram[n] - shistogram[n];
  236. switch (s->scale) {
  237. case LINEAR:
  238. aa = a / (double)acmax;
  239. break;
  240. case SQRT:
  241. aa = sqrt(a) / sqrt(acmax);
  242. break;
  243. case CBRT:
  244. aa = cbrt(a) / cbrt(acmax);
  245. break;
  246. case LOG:
  247. aa = log2(a + 1) / log2(acmax + 1);
  248. break;
  249. case RLOG:
  250. aa = 1. - log2(a + 1) / log2(acmax + 1);
  251. if (aa == 1.)
  252. aa = 0;
  253. break;
  254. default:
  255. av_assert0(0);
  256. }
  257. h = aa * (H - 1);
  258. if (s->dmode == SINGLE) {
  259. for (y = H - h; y < H; y++) {
  260. s->out->data[0][y * s->out->linesize[0] + n] = 255;
  261. s->out->data[3][y * s->out->linesize[0] + n] = 255;
  262. }
  263. if (s->h - H > 0) {
  264. h = aa * 255;
  265. s->out->data[0][s->ypos * s->out->linesize[0] + n] = h;
  266. s->out->data[1][s->ypos * s->out->linesize[1] + n] = 127;
  267. s->out->data[2][s->ypos * s->out->linesize[2] + n] = 127;
  268. s->out->data[3][s->ypos * s->out->linesize[3] + n] = 255;
  269. }
  270. } else if (s->dmode == SEPARATE) {
  271. float *out = &s->combine_buffer[3 * n];
  272. int old;
  273. old = s->out->data[0][(H - h) * s->out->linesize[0] + n];
  274. for (y = H - h; y < H; y++) {
  275. if (s->out->data[0][y * s->out->linesize[0] + n] != old)
  276. break;
  277. old = s->out->data[0][y * s->out->linesize[0] + n];
  278. s->out->data[0][y * s->out->linesize[0] + n] = yf;
  279. s->out->data[1][y * s->out->linesize[1] + n] = 128+uf;
  280. s->out->data[2][y * s->out->linesize[2] + n] = 128+vf;
  281. s->out->data[3][y * s->out->linesize[3] + n] = 255;
  282. }
  283. out[0] += aa * yf;
  284. out[1] += aa * uf;
  285. out[2] += aa * vf;
  286. }
  287. }
  288. }
  289. if (s->h - H > 0) {
  290. if (s->dmode == SEPARATE) {
  291. for (n = 0; n < w; n++) {
  292. float *cb = &s->combine_buffer[3 * n];
  293. s->out->data[0][s->ypos * s->out->linesize[0] + n] = cb[0];
  294. s->out->data[1][s->ypos * s->out->linesize[1] + n] = cb[1];
  295. s->out->data[2][s->ypos * s->out->linesize[2] + n] = cb[2];
  296. s->out->data[3][s->ypos * s->out->linesize[3] + n] = 255;
  297. }
  298. }
  299. if (s->slide == SCROLL) {
  300. for (p = 0; p < 4; p++) {
  301. for (y = s->h; y >= H + 1; y--) {
  302. memmove(s->out->data[p] + (y ) * s->out->linesize[p],
  303. s->out->data[p] + (y-1) * s->out->linesize[p], w);
  304. }
  305. }
  306. }
  307. s->ypos++;
  308. if (s->slide == SCROLL || s->ypos >= s->h)
  309. s->ypos = H;
  310. }
  311. return ff_filter_frame(outlink, av_frame_clone(s->out));
  312. }
  313. static int activate(AVFilterContext *ctx)
  314. {
  315. AVFilterLink *inlink = ctx->inputs[0];
  316. AVFilterLink *outlink = ctx->outputs[0];
  317. AudioHistogramContext *s = ctx->priv;
  318. AVFrame *in;
  319. int ret;
  320. FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink);
  321. ret = ff_inlink_consume_samples(inlink, s->nb_samples, s->nb_samples, &in);
  322. if (ret < 0)
  323. return ret;
  324. if (ret > 0)
  325. return filter_frame(inlink, in);
  326. FF_FILTER_FORWARD_STATUS(inlink, outlink);
  327. FF_FILTER_FORWARD_WANTED(outlink, inlink);
  328. return FFERROR_NOT_READY;
  329. }
  330. static av_cold void uninit(AVFilterContext *ctx)
  331. {
  332. AudioHistogramContext *s = ctx->priv;
  333. int i;
  334. av_frame_free(&s->out);
  335. av_freep(&s->shistogram);
  336. av_freep(&s->achistogram);
  337. av_freep(&s->combine_buffer);
  338. for (i = 0; i < 101; i++)
  339. av_frame_free(&s->in[i]);
  340. }
  341. static const AVFilterPad ahistogram_inputs[] = {
  342. {
  343. .name = "default",
  344. .type = AVMEDIA_TYPE_AUDIO,
  345. .config_props = config_input,
  346. },
  347. { NULL }
  348. };
  349. static const AVFilterPad ahistogram_outputs[] = {
  350. {
  351. .name = "default",
  352. .type = AVMEDIA_TYPE_VIDEO,
  353. .config_props = config_output,
  354. },
  355. { NULL }
  356. };
  357. AVFilter ff_avf_ahistogram = {
  358. .name = "ahistogram",
  359. .description = NULL_IF_CONFIG_SMALL("Convert input audio to histogram video output."),
  360. .uninit = uninit,
  361. .query_formats = query_formats,
  362. .priv_size = sizeof(AudioHistogramContext),
  363. .activate = activate,
  364. .inputs = ahistogram_inputs,
  365. .outputs = ahistogram_outputs,
  366. .priv_class = &ahistogram_class,
  367. };