af_loudnorm.c 31 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933
  1. /*
  2. * Copyright (c) 2016 Kyle Swanson <k@ylo.ph>.
  3. *
  4. * This file is part of FFmpeg.
  5. *
  6. * FFmpeg is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU Lesser General Public
  8. * License as published by the Free Software Foundation; either
  9. * version 2.1 of the License, or (at your option) any later version.
  10. *
  11. * FFmpeg is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. * Lesser General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU Lesser General Public
  17. * License along with FFmpeg; if not, write to the Free Software
  18. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19. */
  20. /* http://k.ylo.ph/2016/04/04/loudnorm.html */
  21. #include "libavutil/opt.h"
  22. #include "avfilter.h"
  23. #include "internal.h"
  24. #include "audio.h"
  25. #include "ebur128.h"
  26. enum FrameType {
  27. FIRST_FRAME,
  28. INNER_FRAME,
  29. FINAL_FRAME,
  30. LINEAR_MODE,
  31. FRAME_NB
  32. };
  33. enum LimiterState {
  34. OUT,
  35. ATTACK,
  36. SUSTAIN,
  37. RELEASE,
  38. STATE_NB
  39. };
  40. enum PrintFormat {
  41. NONE,
  42. JSON,
  43. SUMMARY,
  44. PF_NB
  45. };
  46. typedef struct LoudNormContext {
  47. const AVClass *class;
  48. double target_i;
  49. double target_lra;
  50. double target_tp;
  51. double measured_i;
  52. double measured_lra;
  53. double measured_tp;
  54. double measured_thresh;
  55. double offset;
  56. int linear;
  57. int dual_mono;
  58. enum PrintFormat print_format;
  59. double *buf;
  60. int buf_size;
  61. int buf_index;
  62. int prev_buf_index;
  63. double delta[30];
  64. double weights[21];
  65. double prev_delta;
  66. int index;
  67. double gain_reduction[2];
  68. double *limiter_buf;
  69. double *prev_smp;
  70. int limiter_buf_index;
  71. int limiter_buf_size;
  72. enum LimiterState limiter_state;
  73. int peak_index;
  74. int env_index;
  75. int env_cnt;
  76. int attack_length;
  77. int release_length;
  78. int64_t pts;
  79. enum FrameType frame_type;
  80. int above_threshold;
  81. int prev_nb_samples;
  82. int channels;
  83. FFEBUR128State *r128_in;
  84. FFEBUR128State *r128_out;
  85. } LoudNormContext;
  86. #define OFFSET(x) offsetof(LoudNormContext, x)
  87. #define FLAGS AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
  88. static const AVOption loudnorm_options[] = {
  89. { "I", "set integrated loudness target", OFFSET(target_i), AV_OPT_TYPE_DOUBLE, {.dbl = -24.}, -70., -5., FLAGS },
  90. { "i", "set integrated loudness target", OFFSET(target_i), AV_OPT_TYPE_DOUBLE, {.dbl = -24.}, -70., -5., FLAGS },
  91. { "LRA", "set loudness range target", OFFSET(target_lra), AV_OPT_TYPE_DOUBLE, {.dbl = 7.}, 1., 20., FLAGS },
  92. { "lra", "set loudness range target", OFFSET(target_lra), AV_OPT_TYPE_DOUBLE, {.dbl = 7.}, 1., 20., FLAGS },
  93. { "TP", "set maximum true peak", OFFSET(target_tp), AV_OPT_TYPE_DOUBLE, {.dbl = -2.}, -9., 0., FLAGS },
  94. { "tp", "set maximum true peak", OFFSET(target_tp), AV_OPT_TYPE_DOUBLE, {.dbl = -2.}, -9., 0., FLAGS },
  95. { "measured_I", "measured IL of input file", OFFSET(measured_i), AV_OPT_TYPE_DOUBLE, {.dbl = 0.}, -99., 0., FLAGS },
  96. { "measured_i", "measured IL of input file", OFFSET(measured_i), AV_OPT_TYPE_DOUBLE, {.dbl = 0.}, -99., 0., FLAGS },
  97. { "measured_LRA", "measured LRA of input file", OFFSET(measured_lra), AV_OPT_TYPE_DOUBLE, {.dbl = 0.}, 0., 99., FLAGS },
  98. { "measured_lra", "measured LRA of input file", OFFSET(measured_lra), AV_OPT_TYPE_DOUBLE, {.dbl = 0.}, 0., 99., FLAGS },
  99. { "measured_TP", "measured true peak of input file", OFFSET(measured_tp), AV_OPT_TYPE_DOUBLE, {.dbl = 99.}, -99., 99., FLAGS },
  100. { "measured_tp", "measured true peak of input file", OFFSET(measured_tp), AV_OPT_TYPE_DOUBLE, {.dbl = 99.}, -99., 99., FLAGS },
  101. { "measured_thresh", "measured threshold of input file", OFFSET(measured_thresh), AV_OPT_TYPE_DOUBLE, {.dbl = -70.}, -99., 0., FLAGS },
  102. { "offset", "set offset gain", OFFSET(offset), AV_OPT_TYPE_DOUBLE, {.dbl = 0.}, -99., 99., FLAGS },
  103. { "linear", "normalize linearly if possible", OFFSET(linear), AV_OPT_TYPE_BOOL, {.i64 = 1}, 0, 1, FLAGS },
  104. { "dual_mono", "treat mono input as dual-mono", OFFSET(dual_mono), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, FLAGS },
  105. { "print_format", "set print format for stats", OFFSET(print_format), AV_OPT_TYPE_INT, {.i64 = NONE}, NONE, PF_NB -1, FLAGS, "print_format" },
  106. { "none", 0, 0, AV_OPT_TYPE_CONST, {.i64 = NONE}, 0, 0, FLAGS, "print_format" },
  107. { "json", 0, 0, AV_OPT_TYPE_CONST, {.i64 = JSON}, 0, 0, FLAGS, "print_format" },
  108. { "summary", 0, 0, AV_OPT_TYPE_CONST, {.i64 = SUMMARY}, 0, 0, FLAGS, "print_format" },
  109. { NULL }
  110. };
  111. AVFILTER_DEFINE_CLASS(loudnorm);
  112. static inline int frame_size(int sample_rate, int frame_len_msec)
  113. {
  114. const int frame_size = round((double)sample_rate * (frame_len_msec / 1000.0));
  115. return frame_size + (frame_size % 2);
  116. }
  117. static void init_gaussian_filter(LoudNormContext *s)
  118. {
  119. double total_weight = 0.0;
  120. const double sigma = 3.5;
  121. double adjust;
  122. int i;
  123. const int offset = 21 / 2;
  124. const double c1 = 1.0 / (sigma * sqrt(2.0 * M_PI));
  125. const double c2 = 2.0 * pow(sigma, 2.0);
  126. for (i = 0; i < 21; i++) {
  127. const int x = i - offset;
  128. s->weights[i] = c1 * exp(-(pow(x, 2.0) / c2));
  129. total_weight += s->weights[i];
  130. }
  131. adjust = 1.0 / total_weight;
  132. for (i = 0; i < 21; i++)
  133. s->weights[i] *= adjust;
  134. }
  135. static double gaussian_filter(LoudNormContext *s, int index)
  136. {
  137. double result = 0.;
  138. int i;
  139. index = index - 10 > 0 ? index - 10 : index + 20;
  140. for (i = 0; i < 21; i++)
  141. result += s->delta[((index + i) < 30) ? (index + i) : (index + i - 30)] * s->weights[i];
  142. return result;
  143. }
  144. static void detect_peak(LoudNormContext *s, int offset, int nb_samples, int channels, int *peak_delta, double *peak_value)
  145. {
  146. int n, c, i, index;
  147. double ceiling;
  148. double *buf;
  149. *peak_delta = -1;
  150. buf = s->limiter_buf;
  151. ceiling = s->target_tp;
  152. index = s->limiter_buf_index + (offset * channels) + (1920 * channels);
  153. if (index >= s->limiter_buf_size)
  154. index -= s->limiter_buf_size;
  155. if (s->frame_type == FIRST_FRAME) {
  156. for (c = 0; c < channels; c++)
  157. s->prev_smp[c] = fabs(buf[index + c - channels]);
  158. }
  159. for (n = 0; n < nb_samples; n++) {
  160. for (c = 0; c < channels; c++) {
  161. double this, next, max_peak;
  162. this = fabs(buf[(index + c) < s->limiter_buf_size ? (index + c) : (index + c - s->limiter_buf_size)]);
  163. next = fabs(buf[(index + c + channels) < s->limiter_buf_size ? (index + c + channels) : (index + c + channels - s->limiter_buf_size)]);
  164. if ((s->prev_smp[c] <= this) && (next <= this) && (this > ceiling) && (n > 0)) {
  165. int detected;
  166. detected = 1;
  167. for (i = 2; i < 12; i++) {
  168. next = fabs(buf[(index + c + (i * channels)) < s->limiter_buf_size ? (index + c + (i * channels)) : (index + c + (i * channels) - s->limiter_buf_size)]);
  169. if (next > this) {
  170. detected = 0;
  171. break;
  172. }
  173. }
  174. if (!detected)
  175. continue;
  176. for (c = 0; c < channels; c++) {
  177. if (c == 0 || fabs(buf[index + c]) > max_peak)
  178. max_peak = fabs(buf[index + c]);
  179. s->prev_smp[c] = fabs(buf[(index + c) < s->limiter_buf_size ? (index + c) : (index + c - s->limiter_buf_size)]);
  180. }
  181. *peak_delta = n;
  182. s->peak_index = index;
  183. *peak_value = max_peak;
  184. return;
  185. }
  186. s->prev_smp[c] = this;
  187. }
  188. index += channels;
  189. if (index >= s->limiter_buf_size)
  190. index -= s->limiter_buf_size;
  191. }
  192. }
  193. static void true_peak_limiter(LoudNormContext *s, double *out, int nb_samples, int channels)
  194. {
  195. int n, c, index, peak_delta, smp_cnt;
  196. double ceiling, peak_value;
  197. double *buf;
  198. buf = s->limiter_buf;
  199. ceiling = s->target_tp;
  200. index = s->limiter_buf_index;
  201. smp_cnt = 0;
  202. if (s->frame_type == FIRST_FRAME) {
  203. double max;
  204. max = 0.;
  205. for (n = 0; n < 1920; n++) {
  206. for (c = 0; c < channels; c++) {
  207. max = fabs(buf[c]) > max ? fabs(buf[c]) : max;
  208. }
  209. buf += channels;
  210. }
  211. if (max > ceiling) {
  212. s->gain_reduction[1] = ceiling / max;
  213. s->limiter_state = SUSTAIN;
  214. buf = s->limiter_buf;
  215. for (n = 0; n < 1920; n++) {
  216. for (c = 0; c < channels; c++) {
  217. double env;
  218. env = s->gain_reduction[1];
  219. buf[c] *= env;
  220. }
  221. buf += channels;
  222. }
  223. }
  224. buf = s->limiter_buf;
  225. }
  226. do {
  227. switch(s->limiter_state) {
  228. case OUT:
  229. detect_peak(s, smp_cnt, nb_samples - smp_cnt, channels, &peak_delta, &peak_value);
  230. if (peak_delta != -1) {
  231. s->env_cnt = 0;
  232. smp_cnt += (peak_delta - s->attack_length);
  233. s->gain_reduction[0] = 1.;
  234. s->gain_reduction[1] = ceiling / peak_value;
  235. s->limiter_state = ATTACK;
  236. s->env_index = s->peak_index - (s->attack_length * channels);
  237. if (s->env_index < 0)
  238. s->env_index += s->limiter_buf_size;
  239. s->env_index += (s->env_cnt * channels);
  240. if (s->env_index > s->limiter_buf_size)
  241. s->env_index -= s->limiter_buf_size;
  242. } else {
  243. smp_cnt = nb_samples;
  244. }
  245. break;
  246. case ATTACK:
  247. for (; s->env_cnt < s->attack_length; s->env_cnt++) {
  248. for (c = 0; c < channels; c++) {
  249. double env;
  250. env = s->gain_reduction[0] - ((double) s->env_cnt / (s->attack_length - 1) * (s->gain_reduction[0] - s->gain_reduction[1]));
  251. buf[s->env_index + c] *= env;
  252. }
  253. s->env_index += channels;
  254. if (s->env_index >= s->limiter_buf_size)
  255. s->env_index -= s->limiter_buf_size;
  256. smp_cnt++;
  257. if (smp_cnt >= nb_samples) {
  258. s->env_cnt++;
  259. break;
  260. }
  261. }
  262. if (smp_cnt < nb_samples) {
  263. s->env_cnt = 0;
  264. s->attack_length = 1920;
  265. s->limiter_state = SUSTAIN;
  266. }
  267. break;
  268. case SUSTAIN:
  269. detect_peak(s, smp_cnt, nb_samples, channels, &peak_delta, &peak_value);
  270. if (peak_delta == -1) {
  271. s->limiter_state = RELEASE;
  272. s->gain_reduction[0] = s->gain_reduction[1];
  273. s->gain_reduction[1] = 1.;
  274. s->env_cnt = 0;
  275. break;
  276. } else {
  277. double gain_reduction;
  278. gain_reduction = ceiling / peak_value;
  279. if (gain_reduction < s->gain_reduction[1]) {
  280. s->limiter_state = ATTACK;
  281. s->attack_length = peak_delta;
  282. if (s->attack_length <= 1)
  283. s->attack_length = 2;
  284. s->gain_reduction[0] = s->gain_reduction[1];
  285. s->gain_reduction[1] = gain_reduction;
  286. s->env_cnt = 0;
  287. break;
  288. }
  289. for (s->env_cnt = 0; s->env_cnt < peak_delta; s->env_cnt++) {
  290. for (c = 0; c < channels; c++) {
  291. double env;
  292. env = s->gain_reduction[1];
  293. buf[s->env_index + c] *= env;
  294. }
  295. s->env_index += channels;
  296. if (s->env_index >= s->limiter_buf_size)
  297. s->env_index -= s->limiter_buf_size;
  298. smp_cnt++;
  299. if (smp_cnt >= nb_samples) {
  300. s->env_cnt++;
  301. break;
  302. }
  303. }
  304. }
  305. break;
  306. case RELEASE:
  307. for (; s->env_cnt < s->release_length; s->env_cnt++) {
  308. for (c = 0; c < channels; c++) {
  309. double env;
  310. env = s->gain_reduction[0] + (((double) s->env_cnt / (s->release_length - 1)) * (s->gain_reduction[1] - s->gain_reduction[0]));
  311. buf[s->env_index + c] *= env;
  312. }
  313. s->env_index += channels;
  314. if (s->env_index >= s->limiter_buf_size)
  315. s->env_index -= s->limiter_buf_size;
  316. smp_cnt++;
  317. if (smp_cnt >= nb_samples) {
  318. s->env_cnt++;
  319. break;
  320. }
  321. }
  322. if (smp_cnt < nb_samples) {
  323. s->env_cnt = 0;
  324. s->limiter_state = OUT;
  325. }
  326. break;
  327. }
  328. } while (smp_cnt < nb_samples);
  329. for (n = 0; n < nb_samples; n++) {
  330. for (c = 0; c < channels; c++) {
  331. out[c] = buf[index + c];
  332. if (fabs(out[c]) > ceiling) {
  333. out[c] = ceiling * (out[c] < 0 ? -1 : 1);
  334. }
  335. }
  336. out += channels;
  337. index += channels;
  338. if (index >= s->limiter_buf_size)
  339. index -= s->limiter_buf_size;
  340. }
  341. }
  342. static int filter_frame(AVFilterLink *inlink, AVFrame *in)
  343. {
  344. AVFilterContext *ctx = inlink->dst;
  345. LoudNormContext *s = ctx->priv;
  346. AVFilterLink *outlink = ctx->outputs[0];
  347. AVFrame *out;
  348. const double *src;
  349. double *dst;
  350. double *buf;
  351. double *limiter_buf;
  352. int i, n, c, subframe_length, src_index;
  353. double gain, gain_next, env_global, env_shortterm,
  354. global, shortterm, lra, relative_threshold;
  355. if (av_frame_is_writable(in)) {
  356. out = in;
  357. } else {
  358. out = ff_get_audio_buffer(outlink, in->nb_samples);
  359. if (!out) {
  360. av_frame_free(&in);
  361. return AVERROR(ENOMEM);
  362. }
  363. av_frame_copy_props(out, in);
  364. }
  365. if (s->pts == AV_NOPTS_VALUE)
  366. s->pts = in->pts;
  367. out->pts = s->pts;
  368. src = (const double *)in->data[0];
  369. dst = (double *)out->data[0];
  370. buf = s->buf;
  371. limiter_buf = s->limiter_buf;
  372. ff_ebur128_add_frames_double(s->r128_in, src, in->nb_samples);
  373. if (s->frame_type == FIRST_FRAME && in->nb_samples < frame_size(inlink->sample_rate, 3000)) {
  374. double offset, offset_tp, true_peak;
  375. ff_ebur128_loudness_global(s->r128_in, &global);
  376. for (c = 0; c < inlink->channels; c++) {
  377. double tmp;
  378. ff_ebur128_sample_peak(s->r128_in, c, &tmp);
  379. if (c == 0 || tmp > true_peak)
  380. true_peak = tmp;
  381. }
  382. offset = s->target_i - global;
  383. offset_tp = true_peak + offset;
  384. s->offset = offset_tp < s->target_tp ? offset : s->target_tp - true_peak;
  385. s->offset = pow(10., s->offset / 20.);
  386. s->frame_type = LINEAR_MODE;
  387. }
  388. switch (s->frame_type) {
  389. case FIRST_FRAME:
  390. for (n = 0; n < in->nb_samples; n++) {
  391. for (c = 0; c < inlink->channels; c++) {
  392. buf[s->buf_index + c] = src[c];
  393. }
  394. src += inlink->channels;
  395. s->buf_index += inlink->channels;
  396. }
  397. ff_ebur128_loudness_shortterm(s->r128_in, &shortterm);
  398. if (shortterm < s->measured_thresh) {
  399. s->above_threshold = 0;
  400. env_shortterm = shortterm <= -70. ? 0. : s->target_i - s->measured_i;
  401. } else {
  402. s->above_threshold = 1;
  403. env_shortterm = shortterm <= -70. ? 0. : s->target_i - shortterm;
  404. }
  405. for (n = 0; n < 30; n++)
  406. s->delta[n] = pow(10., env_shortterm / 20.);
  407. s->prev_delta = s->delta[s->index];
  408. s->buf_index =
  409. s->limiter_buf_index = 0;
  410. for (n = 0; n < (s->limiter_buf_size / inlink->channels); n++) {
  411. for (c = 0; c < inlink->channels; c++) {
  412. limiter_buf[s->limiter_buf_index + c] = buf[s->buf_index + c] * s->delta[s->index] * s->offset;
  413. }
  414. s->limiter_buf_index += inlink->channels;
  415. if (s->limiter_buf_index >= s->limiter_buf_size)
  416. s->limiter_buf_index -= s->limiter_buf_size;
  417. s->buf_index += inlink->channels;
  418. }
  419. subframe_length = frame_size(inlink->sample_rate, 100);
  420. true_peak_limiter(s, dst, subframe_length, inlink->channels);
  421. ff_ebur128_add_frames_double(s->r128_out, dst, subframe_length);
  422. s->pts +=
  423. out->nb_samples =
  424. inlink->min_samples =
  425. inlink->max_samples =
  426. inlink->partial_buf_size = subframe_length;
  427. s->frame_type = INNER_FRAME;
  428. break;
  429. case INNER_FRAME:
  430. gain = gaussian_filter(s, s->index + 10 < 30 ? s->index + 10 : s->index + 10 - 30);
  431. gain_next = gaussian_filter(s, s->index + 11 < 30 ? s->index + 11 : s->index + 11 - 30);
  432. for (n = 0; n < in->nb_samples; n++) {
  433. for (c = 0; c < inlink->channels; c++) {
  434. buf[s->prev_buf_index + c] = src[c];
  435. limiter_buf[s->limiter_buf_index + c] = buf[s->buf_index + c] * (gain + (((double) n / in->nb_samples) * (gain_next - gain))) * s->offset;
  436. }
  437. src += inlink->channels;
  438. s->limiter_buf_index += inlink->channels;
  439. if (s->limiter_buf_index >= s->limiter_buf_size)
  440. s->limiter_buf_index -= s->limiter_buf_size;
  441. s->prev_buf_index += inlink->channels;
  442. if (s->prev_buf_index >= s->buf_size)
  443. s->prev_buf_index -= s->buf_size;
  444. s->buf_index += inlink->channels;
  445. if (s->buf_index >= s->buf_size)
  446. s->buf_index -= s->buf_size;
  447. }
  448. subframe_length = (frame_size(inlink->sample_rate, 100) - in->nb_samples) * inlink->channels;
  449. s->limiter_buf_index = s->limiter_buf_index + subframe_length < s->limiter_buf_size ? s->limiter_buf_index + subframe_length : s->limiter_buf_index + subframe_length - s->limiter_buf_size;
  450. true_peak_limiter(s, dst, in->nb_samples, inlink->channels);
  451. ff_ebur128_add_frames_double(s->r128_out, dst, in->nb_samples);
  452. ff_ebur128_loudness_range(s->r128_in, &lra);
  453. ff_ebur128_loudness_global(s->r128_in, &global);
  454. ff_ebur128_loudness_shortterm(s->r128_in, &shortterm);
  455. ff_ebur128_relative_threshold(s->r128_in, &relative_threshold);
  456. if (s->above_threshold == 0) {
  457. double shortterm_out;
  458. if (shortterm > s->measured_thresh)
  459. s->prev_delta *= 1.0058;
  460. ff_ebur128_loudness_shortterm(s->r128_out, &shortterm_out);
  461. if (shortterm_out >= s->target_i)
  462. s->above_threshold = 1;
  463. }
  464. if (shortterm < relative_threshold || shortterm <= -70. || s->above_threshold == 0) {
  465. s->delta[s->index] = s->prev_delta;
  466. } else {
  467. env_global = fabs(shortterm - global) < (s->target_lra / 2.) ? shortterm - global : (s->target_lra / 2.) * ((shortterm - global) < 0 ? -1 : 1);
  468. env_shortterm = s->target_i - shortterm;
  469. s->delta[s->index] = pow(10., (env_global + env_shortterm) / 20.);
  470. }
  471. s->prev_delta = s->delta[s->index];
  472. s->index++;
  473. if (s->index >= 30)
  474. s->index -= 30;
  475. s->prev_nb_samples = in->nb_samples;
  476. s->pts += in->nb_samples;
  477. break;
  478. case FINAL_FRAME:
  479. gain = gaussian_filter(s, s->index + 10 < 30 ? s->index + 10 : s->index + 10 - 30);
  480. s->limiter_buf_index = 0;
  481. src_index = 0;
  482. for (n = 0; n < s->limiter_buf_size / inlink->channels; n++) {
  483. for (c = 0; c < inlink->channels; c++) {
  484. s->limiter_buf[s->limiter_buf_index + c] = src[src_index + c] * gain * s->offset;
  485. }
  486. src_index += inlink->channels;
  487. s->limiter_buf_index += inlink->channels;
  488. if (s->limiter_buf_index >= s->limiter_buf_size)
  489. s->limiter_buf_index -= s->limiter_buf_size;
  490. }
  491. subframe_length = frame_size(inlink->sample_rate, 100);
  492. for (i = 0; i < in->nb_samples / subframe_length; i++) {
  493. true_peak_limiter(s, dst, subframe_length, inlink->channels);
  494. for (n = 0; n < subframe_length; n++) {
  495. for (c = 0; c < inlink->channels; c++) {
  496. if (src_index < (in->nb_samples * inlink->channels)) {
  497. limiter_buf[s->limiter_buf_index + c] = src[src_index + c] * gain * s->offset;
  498. } else {
  499. limiter_buf[s->limiter_buf_index + c] = 0.;
  500. }
  501. }
  502. if (src_index < (in->nb_samples * inlink->channels))
  503. src_index += inlink->channels;
  504. s->limiter_buf_index += inlink->channels;
  505. if (s->limiter_buf_index >= s->limiter_buf_size)
  506. s->limiter_buf_index -= s->limiter_buf_size;
  507. }
  508. dst += (subframe_length * inlink->channels);
  509. }
  510. dst = (double *)out->data[0];
  511. ff_ebur128_add_frames_double(s->r128_out, dst, in->nb_samples);
  512. break;
  513. case LINEAR_MODE:
  514. for (n = 0; n < in->nb_samples; n++) {
  515. for (c = 0; c < inlink->channels; c++) {
  516. dst[c] = src[c] * s->offset;
  517. }
  518. src += inlink->channels;
  519. dst += inlink->channels;
  520. }
  521. dst = (double *)out->data[0];
  522. ff_ebur128_add_frames_double(s->r128_out, dst, in->nb_samples);
  523. s->pts += in->nb_samples;
  524. break;
  525. }
  526. if (in != out)
  527. av_frame_free(&in);
  528. return ff_filter_frame(outlink, out);
  529. }
  530. static int request_frame(AVFilterLink *outlink)
  531. {
  532. int ret;
  533. AVFilterContext *ctx = outlink->src;
  534. AVFilterLink *inlink = ctx->inputs[0];
  535. LoudNormContext *s = ctx->priv;
  536. ret = ff_request_frame(inlink);
  537. if (ret == AVERROR_EOF && s->frame_type == INNER_FRAME) {
  538. double *src;
  539. double *buf;
  540. int nb_samples, n, c, offset;
  541. AVFrame *frame;
  542. nb_samples = (s->buf_size / inlink->channels) - s->prev_nb_samples;
  543. nb_samples -= (frame_size(inlink->sample_rate, 100) - s->prev_nb_samples);
  544. frame = ff_get_audio_buffer(outlink, nb_samples);
  545. if (!frame)
  546. return AVERROR(ENOMEM);
  547. frame->nb_samples = nb_samples;
  548. buf = s->buf;
  549. src = (double *)frame->data[0];
  550. offset = ((s->limiter_buf_size / inlink->channels) - s->prev_nb_samples) * inlink->channels;
  551. offset -= (frame_size(inlink->sample_rate, 100) - s->prev_nb_samples) * inlink->channels;
  552. s->buf_index = s->buf_index - offset < 0 ? s->buf_index - offset + s->buf_size : s->buf_index - offset;
  553. for (n = 0; n < nb_samples; n++) {
  554. for (c = 0; c < inlink->channels; c++) {
  555. src[c] = buf[s->buf_index + c];
  556. }
  557. src += inlink->channels;
  558. s->buf_index += inlink->channels;
  559. if (s->buf_index >= s->buf_size)
  560. s->buf_index -= s->buf_size;
  561. }
  562. s->frame_type = FINAL_FRAME;
  563. ret = filter_frame(inlink, frame);
  564. }
  565. return ret;
  566. }
  567. static int query_formats(AVFilterContext *ctx)
  568. {
  569. LoudNormContext *s = ctx->priv;
  570. AVFilterFormats *formats;
  571. AVFilterChannelLayouts *layouts;
  572. AVFilterLink *inlink = ctx->inputs[0];
  573. AVFilterLink *outlink = ctx->outputs[0];
  574. static const int input_srate[] = {192000, -1};
  575. static const enum AVSampleFormat sample_fmts[] = {
  576. AV_SAMPLE_FMT_DBL,
  577. AV_SAMPLE_FMT_NONE
  578. };
  579. int ret;
  580. layouts = ff_all_channel_counts();
  581. if (!layouts)
  582. return AVERROR(ENOMEM);
  583. ret = ff_set_common_channel_layouts(ctx, layouts);
  584. if (ret < 0)
  585. return ret;
  586. formats = ff_make_format_list(sample_fmts);
  587. if (!formats)
  588. return AVERROR(ENOMEM);
  589. ret = ff_set_common_formats(ctx, formats);
  590. if (ret < 0)
  591. return ret;
  592. if (s->frame_type != LINEAR_MODE) {
  593. formats = ff_make_format_list(input_srate);
  594. if (!formats)
  595. return AVERROR(ENOMEM);
  596. ret = ff_formats_ref(formats, &inlink->out_samplerates);
  597. if (ret < 0)
  598. return ret;
  599. ret = ff_formats_ref(formats, &outlink->in_samplerates);
  600. if (ret < 0)
  601. return ret;
  602. }
  603. return 0;
  604. }
  605. static int config_input(AVFilterLink *inlink)
  606. {
  607. AVFilterContext *ctx = inlink->dst;
  608. LoudNormContext *s = ctx->priv;
  609. s->r128_in = ff_ebur128_init(inlink->channels, inlink->sample_rate, 0, FF_EBUR128_MODE_I | FF_EBUR128_MODE_S | FF_EBUR128_MODE_LRA | FF_EBUR128_MODE_SAMPLE_PEAK);
  610. if (!s->r128_in)
  611. return AVERROR(ENOMEM);
  612. s->r128_out = ff_ebur128_init(inlink->channels, inlink->sample_rate, 0, FF_EBUR128_MODE_I | FF_EBUR128_MODE_S | FF_EBUR128_MODE_LRA | FF_EBUR128_MODE_SAMPLE_PEAK);
  613. if (!s->r128_out)
  614. return AVERROR(ENOMEM);
  615. if (inlink->channels == 1 && s->dual_mono) {
  616. ff_ebur128_set_channel(s->r128_in, 0, FF_EBUR128_DUAL_MONO);
  617. ff_ebur128_set_channel(s->r128_out, 0, FF_EBUR128_DUAL_MONO);
  618. }
  619. s->buf_size = frame_size(inlink->sample_rate, 3000) * inlink->channels;
  620. s->buf = av_malloc_array(s->buf_size, sizeof(*s->buf));
  621. if (!s->buf)
  622. return AVERROR(ENOMEM);
  623. s->limiter_buf_size = frame_size(inlink->sample_rate, 210) * inlink->channels;
  624. s->limiter_buf = av_malloc_array(s->buf_size, sizeof(*s->limiter_buf));
  625. if (!s->limiter_buf)
  626. return AVERROR(ENOMEM);
  627. s->prev_smp = av_malloc_array(inlink->channels, sizeof(*s->prev_smp));
  628. if (!s->prev_smp)
  629. return AVERROR(ENOMEM);
  630. init_gaussian_filter(s);
  631. if (s->frame_type != LINEAR_MODE) {
  632. inlink->min_samples =
  633. inlink->max_samples =
  634. inlink->partial_buf_size = frame_size(inlink->sample_rate, 3000);
  635. }
  636. s->pts = AV_NOPTS_VALUE;
  637. s->buf_index =
  638. s->prev_buf_index =
  639. s->limiter_buf_index = 0;
  640. s->channels = inlink->channels;
  641. s->index = 1;
  642. s->limiter_state = OUT;
  643. s->offset = pow(10., s->offset / 20.);
  644. s->target_tp = pow(10., s->target_tp / 20.);
  645. s->attack_length = frame_size(inlink->sample_rate, 10);
  646. s->release_length = frame_size(inlink->sample_rate, 100);
  647. return 0;
  648. }
  649. static av_cold int init(AVFilterContext *ctx)
  650. {
  651. LoudNormContext *s = ctx->priv;
  652. s->frame_type = FIRST_FRAME;
  653. if (s->linear) {
  654. double offset, offset_tp;
  655. offset = s->target_i - s->measured_i;
  656. offset_tp = s->measured_tp + offset;
  657. if (s->measured_tp != 99 && s->measured_thresh != -70 && s->measured_lra != 0 && s->measured_i != 0) {
  658. if ((offset_tp <= s->target_tp) && (s->measured_lra <= s->target_lra)) {
  659. s->frame_type = LINEAR_MODE;
  660. s->offset = offset;
  661. }
  662. }
  663. }
  664. return 0;
  665. }
  666. static av_cold void uninit(AVFilterContext *ctx)
  667. {
  668. LoudNormContext *s = ctx->priv;
  669. double i_in, i_out, lra_in, lra_out, thresh_in, thresh_out, tp_in, tp_out;
  670. int c;
  671. if (!s->r128_in || !s->r128_out)
  672. goto end;
  673. ff_ebur128_loudness_range(s->r128_in, &lra_in);
  674. ff_ebur128_loudness_global(s->r128_in, &i_in);
  675. ff_ebur128_relative_threshold(s->r128_in, &thresh_in);
  676. for (c = 0; c < s->channels; c++) {
  677. double tmp;
  678. ff_ebur128_sample_peak(s->r128_in, c, &tmp);
  679. if ((c == 0) || (tmp > tp_in))
  680. tp_in = tmp;
  681. }
  682. ff_ebur128_loudness_range(s->r128_out, &lra_out);
  683. ff_ebur128_loudness_global(s->r128_out, &i_out);
  684. ff_ebur128_relative_threshold(s->r128_out, &thresh_out);
  685. for (c = 0; c < s->channels; c++) {
  686. double tmp;
  687. ff_ebur128_sample_peak(s->r128_out, c, &tmp);
  688. if ((c == 0) || (tmp > tp_out))
  689. tp_out = tmp;
  690. }
  691. switch(s->print_format) {
  692. case NONE:
  693. break;
  694. case JSON:
  695. av_log(ctx, AV_LOG_INFO,
  696. "\n{\n"
  697. "\t\"input_i\" : \"%.2f\",\n"
  698. "\t\"input_tp\" : \"%.2f\",\n"
  699. "\t\"input_lra\" : \"%.2f\",\n"
  700. "\t\"input_thresh\" : \"%.2f\",\n"
  701. "\t\"output_i\" : \"%.2f\",\n"
  702. "\t\"output_tp\" : \"%+.2f\",\n"
  703. "\t\"output_lra\" : \"%.2f\",\n"
  704. "\t\"output_thresh\" : \"%.2f\",\n"
  705. "\t\"normalization_type\" : \"%s\",\n"
  706. "\t\"target_offset\" : \"%.2f\"\n"
  707. "}\n",
  708. i_in,
  709. 20. * log10(tp_in),
  710. lra_in,
  711. thresh_in,
  712. i_out,
  713. 20. * log10(tp_out),
  714. lra_out,
  715. thresh_out,
  716. s->frame_type == LINEAR_MODE ? "linear" : "dynamic",
  717. s->target_i - i_out
  718. );
  719. break;
  720. case SUMMARY:
  721. av_log(ctx, AV_LOG_INFO,
  722. "\n"
  723. "Input Integrated: %+6.1f LUFS\n"
  724. "Input True Peak: %+6.1f dBTP\n"
  725. "Input LRA: %6.1f LU\n"
  726. "Input Threshold: %+6.1f LUFS\n"
  727. "\n"
  728. "Output Integrated: %+6.1f LUFS\n"
  729. "Output True Peak: %+6.1f dBTP\n"
  730. "Output LRA: %6.1f LU\n"
  731. "Output Threshold: %+6.1f LUFS\n"
  732. "\n"
  733. "Normalization Type: %s\n"
  734. "Target Offset: %+6.1f LU\n",
  735. i_in,
  736. 20. * log10(tp_in),
  737. lra_in,
  738. thresh_in,
  739. i_out,
  740. 20. * log10(tp_out),
  741. lra_out,
  742. thresh_out,
  743. s->frame_type == LINEAR_MODE ? "Linear" : "Dynamic",
  744. s->target_i - i_out
  745. );
  746. break;
  747. }
  748. end:
  749. if (s->r128_in)
  750. ff_ebur128_destroy(&s->r128_in);
  751. if (s->r128_out)
  752. ff_ebur128_destroy(&s->r128_out);
  753. av_freep(&s->limiter_buf);
  754. av_freep(&s->prev_smp);
  755. av_freep(&s->buf);
  756. }
  757. static const AVFilterPad avfilter_af_loudnorm_inputs[] = {
  758. {
  759. .name = "default",
  760. .type = AVMEDIA_TYPE_AUDIO,
  761. .config_props = config_input,
  762. .filter_frame = filter_frame,
  763. },
  764. { NULL }
  765. };
  766. static const AVFilterPad avfilter_af_loudnorm_outputs[] = {
  767. {
  768. .name = "default",
  769. .request_frame = request_frame,
  770. .type = AVMEDIA_TYPE_AUDIO,
  771. },
  772. { NULL }
  773. };
  774. AVFilter ff_af_loudnorm = {
  775. .name = "loudnorm",
  776. .description = NULL_IF_CONFIG_SMALL("EBU R128 loudness normalization"),
  777. .priv_size = sizeof(LoudNormContext),
  778. .priv_class = &loudnorm_class,
  779. .query_formats = query_formats,
  780. .init = init,
  781. .uninit = uninit,
  782. .inputs = avfilter_af_loudnorm_inputs,
  783. .outputs = avfilter_af_loudnorm_outputs,
  784. };