af_dynaudnorm.c 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809
  1. /*
  2. * Dynamic Audio Normalizer
  3. * Copyright (c) 2015 LoRd_MuldeR <mulder2@gmx.de>. Some rights reserved.
  4. *
  5. * This file is part of FFmpeg.
  6. *
  7. * FFmpeg is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Lesser General Public
  9. * License as published by the Free Software Foundation; either
  10. * version 2.1 of the License, or (at your option) any later version.
  11. *
  12. * FFmpeg is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with FFmpeg; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. */
  21. /**
  22. * @file
  23. * Dynamic Audio Normalizer
  24. */
  25. #include <float.h>
  26. #include "libavutil/avassert.h"
  27. #include "libavutil/opt.h"
  28. #define FF_BUFQUEUE_SIZE 302
  29. #include "libavfilter/bufferqueue.h"
  30. #include "audio.h"
  31. #include "avfilter.h"
  32. #include "filters.h"
  33. #include "internal.h"
  34. typedef struct cqueue {
  35. double *elements;
  36. int size;
  37. int nb_elements;
  38. int first;
  39. } cqueue;
  40. typedef struct DynamicAudioNormalizerContext {
  41. const AVClass *class;
  42. struct FFBufQueue queue;
  43. int frame_len;
  44. int frame_len_msec;
  45. int filter_size;
  46. int dc_correction;
  47. int channels_coupled;
  48. int alt_boundary_mode;
  49. double peak_value;
  50. double max_amplification;
  51. double target_rms;
  52. double compress_factor;
  53. double *prev_amplification_factor;
  54. double *dc_correction_value;
  55. double *compress_threshold;
  56. double *fade_factors[2];
  57. double *weights;
  58. int channels;
  59. int delay;
  60. int eof;
  61. int64_t pts;
  62. cqueue **gain_history_original;
  63. cqueue **gain_history_minimum;
  64. cqueue **gain_history_smoothed;
  65. cqueue *is_enabled;
  66. } DynamicAudioNormalizerContext;
  67. #define OFFSET(x) offsetof(DynamicAudioNormalizerContext, x)
  68. #define FLAGS AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
  69. static const AVOption dynaudnorm_options[] = {
  70. { "f", "set the frame length in msec", OFFSET(frame_len_msec), AV_OPT_TYPE_INT, {.i64 = 500}, 10, 8000, FLAGS },
  71. { "g", "set the filter size", OFFSET(filter_size), AV_OPT_TYPE_INT, {.i64 = 31}, 3, 301, FLAGS },
  72. { "p", "set the peak value", OFFSET(peak_value), AV_OPT_TYPE_DOUBLE, {.dbl = 0.95}, 0.0, 1.0, FLAGS },
  73. { "m", "set the max amplification", OFFSET(max_amplification), AV_OPT_TYPE_DOUBLE, {.dbl = 10.0}, 1.0, 100.0, FLAGS },
  74. { "r", "set the target RMS", OFFSET(target_rms), AV_OPT_TYPE_DOUBLE, {.dbl = 0.0}, 0.0, 1.0, FLAGS },
  75. { "n", "set channel coupling", OFFSET(channels_coupled), AV_OPT_TYPE_BOOL, {.i64 = 1}, 0, 1, FLAGS },
  76. { "c", "set DC correction", OFFSET(dc_correction), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, FLAGS },
  77. { "b", "set alternative boundary mode", OFFSET(alt_boundary_mode), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, FLAGS },
  78. { "s", "set the compress factor", OFFSET(compress_factor), AV_OPT_TYPE_DOUBLE, {.dbl = 0.0}, 0.0, 30.0, FLAGS },
  79. { NULL }
  80. };
  81. AVFILTER_DEFINE_CLASS(dynaudnorm);
  82. static av_cold int init(AVFilterContext *ctx)
  83. {
  84. DynamicAudioNormalizerContext *s = ctx->priv;
  85. if (!(s->filter_size & 1)) {
  86. av_log(ctx, AV_LOG_ERROR, "filter size %d is invalid. Must be an odd value.\n", s->filter_size);
  87. return AVERROR(EINVAL);
  88. }
  89. return 0;
  90. }
  91. static int query_formats(AVFilterContext *ctx)
  92. {
  93. AVFilterFormats *formats;
  94. AVFilterChannelLayouts *layouts;
  95. static const enum AVSampleFormat sample_fmts[] = {
  96. AV_SAMPLE_FMT_DBLP,
  97. AV_SAMPLE_FMT_NONE
  98. };
  99. int ret;
  100. layouts = ff_all_channel_counts();
  101. if (!layouts)
  102. return AVERROR(ENOMEM);
  103. ret = ff_set_common_channel_layouts(ctx, layouts);
  104. if (ret < 0)
  105. return ret;
  106. formats = ff_make_format_list(sample_fmts);
  107. if (!formats)
  108. return AVERROR(ENOMEM);
  109. ret = ff_set_common_formats(ctx, formats);
  110. if (ret < 0)
  111. return ret;
  112. formats = ff_all_samplerates();
  113. if (!formats)
  114. return AVERROR(ENOMEM);
  115. return ff_set_common_samplerates(ctx, formats);
  116. }
  117. static inline int frame_size(int sample_rate, int frame_len_msec)
  118. {
  119. const int frame_size = lrint((double)sample_rate * (frame_len_msec / 1000.0));
  120. return frame_size + (frame_size % 2);
  121. }
  122. static void precalculate_fade_factors(double *fade_factors[2], int frame_len)
  123. {
  124. const double step_size = 1.0 / frame_len;
  125. int pos;
  126. for (pos = 0; pos < frame_len; pos++) {
  127. fade_factors[0][pos] = 1.0 - (step_size * (pos + 1.0));
  128. fade_factors[1][pos] = 1.0 - fade_factors[0][pos];
  129. }
  130. }
  131. static cqueue *cqueue_create(int size)
  132. {
  133. cqueue *q;
  134. q = av_malloc(sizeof(cqueue));
  135. if (!q)
  136. return NULL;
  137. q->size = size;
  138. q->nb_elements = 0;
  139. q->first = 0;
  140. q->elements = av_malloc_array(size, sizeof(double));
  141. if (!q->elements) {
  142. av_free(q);
  143. return NULL;
  144. }
  145. return q;
  146. }
  147. static void cqueue_free(cqueue *q)
  148. {
  149. if (q)
  150. av_free(q->elements);
  151. av_free(q);
  152. }
  153. static int cqueue_size(cqueue *q)
  154. {
  155. return q->nb_elements;
  156. }
  157. static int cqueue_empty(cqueue *q)
  158. {
  159. return !q->nb_elements;
  160. }
  161. static int cqueue_enqueue(cqueue *q, double element)
  162. {
  163. int i;
  164. av_assert2(q->nb_elements != q->size);
  165. i = (q->first + q->nb_elements) % q->size;
  166. q->elements[i] = element;
  167. q->nb_elements++;
  168. return 0;
  169. }
  170. static double cqueue_peek(cqueue *q, int index)
  171. {
  172. av_assert2(index < q->nb_elements);
  173. return q->elements[(q->first + index) % q->size];
  174. }
  175. static int cqueue_dequeue(cqueue *q, double *element)
  176. {
  177. av_assert2(!cqueue_empty(q));
  178. *element = q->elements[q->first];
  179. q->first = (q->first + 1) % q->size;
  180. q->nb_elements--;
  181. return 0;
  182. }
  183. static int cqueue_pop(cqueue *q)
  184. {
  185. av_assert2(!cqueue_empty(q));
  186. q->first = (q->first + 1) % q->size;
  187. q->nb_elements--;
  188. return 0;
  189. }
  190. static void init_gaussian_filter(DynamicAudioNormalizerContext *s)
  191. {
  192. double total_weight = 0.0;
  193. const double sigma = (((s->filter_size / 2.0) - 1.0) / 3.0) + (1.0 / 3.0);
  194. double adjust;
  195. int i;
  196. // Pre-compute constants
  197. const int offset = s->filter_size / 2;
  198. const double c1 = 1.0 / (sigma * sqrt(2.0 * M_PI));
  199. const double c2 = 2.0 * sigma * sigma;
  200. // Compute weights
  201. for (i = 0; i < s->filter_size; i++) {
  202. const int x = i - offset;
  203. s->weights[i] = c1 * exp(-x * x / c2);
  204. total_weight += s->weights[i];
  205. }
  206. // Adjust weights
  207. adjust = 1.0 / total_weight;
  208. for (i = 0; i < s->filter_size; i++) {
  209. s->weights[i] *= adjust;
  210. }
  211. }
  212. static av_cold void uninit(AVFilterContext *ctx)
  213. {
  214. DynamicAudioNormalizerContext *s = ctx->priv;
  215. int c;
  216. av_freep(&s->prev_amplification_factor);
  217. av_freep(&s->dc_correction_value);
  218. av_freep(&s->compress_threshold);
  219. av_freep(&s->fade_factors[0]);
  220. av_freep(&s->fade_factors[1]);
  221. for (c = 0; c < s->channels; c++) {
  222. if (s->gain_history_original)
  223. cqueue_free(s->gain_history_original[c]);
  224. if (s->gain_history_minimum)
  225. cqueue_free(s->gain_history_minimum[c]);
  226. if (s->gain_history_smoothed)
  227. cqueue_free(s->gain_history_smoothed[c]);
  228. }
  229. av_freep(&s->gain_history_original);
  230. av_freep(&s->gain_history_minimum);
  231. av_freep(&s->gain_history_smoothed);
  232. cqueue_free(s->is_enabled);
  233. s->is_enabled = NULL;
  234. av_freep(&s->weights);
  235. ff_bufqueue_discard_all(&s->queue);
  236. }
  237. static int config_input(AVFilterLink *inlink)
  238. {
  239. AVFilterContext *ctx = inlink->dst;
  240. DynamicAudioNormalizerContext *s = ctx->priv;
  241. int c;
  242. uninit(ctx);
  243. s->frame_len = frame_size(inlink->sample_rate, s->frame_len_msec);
  244. av_log(ctx, AV_LOG_DEBUG, "frame len %d\n", s->frame_len);
  245. s->fade_factors[0] = av_malloc_array(s->frame_len, sizeof(*s->fade_factors[0]));
  246. s->fade_factors[1] = av_malloc_array(s->frame_len, sizeof(*s->fade_factors[1]));
  247. s->prev_amplification_factor = av_malloc_array(inlink->channels, sizeof(*s->prev_amplification_factor));
  248. s->dc_correction_value = av_calloc(inlink->channels, sizeof(*s->dc_correction_value));
  249. s->compress_threshold = av_calloc(inlink->channels, sizeof(*s->compress_threshold));
  250. s->gain_history_original = av_calloc(inlink->channels, sizeof(*s->gain_history_original));
  251. s->gain_history_minimum = av_calloc(inlink->channels, sizeof(*s->gain_history_minimum));
  252. s->gain_history_smoothed = av_calloc(inlink->channels, sizeof(*s->gain_history_smoothed));
  253. s->weights = av_malloc_array(s->filter_size, sizeof(*s->weights));
  254. s->is_enabled = cqueue_create(s->filter_size);
  255. if (!s->prev_amplification_factor || !s->dc_correction_value ||
  256. !s->compress_threshold || !s->fade_factors[0] || !s->fade_factors[1] ||
  257. !s->gain_history_original || !s->gain_history_minimum ||
  258. !s->gain_history_smoothed || !s->is_enabled || !s->weights)
  259. return AVERROR(ENOMEM);
  260. for (c = 0; c < inlink->channels; c++) {
  261. s->prev_amplification_factor[c] = 1.0;
  262. s->gain_history_original[c] = cqueue_create(s->filter_size);
  263. s->gain_history_minimum[c] = cqueue_create(s->filter_size);
  264. s->gain_history_smoothed[c] = cqueue_create(s->filter_size);
  265. if (!s->gain_history_original[c] || !s->gain_history_minimum[c] ||
  266. !s->gain_history_smoothed[c])
  267. return AVERROR(ENOMEM);
  268. }
  269. precalculate_fade_factors(s->fade_factors, s->frame_len);
  270. init_gaussian_filter(s);
  271. s->channels = inlink->channels;
  272. s->delay = s->filter_size;
  273. return 0;
  274. }
  275. static inline double fade(double prev, double next, int pos,
  276. double *fade_factors[2])
  277. {
  278. return fade_factors[0][pos] * prev + fade_factors[1][pos] * next;
  279. }
  280. static inline double pow_2(const double value)
  281. {
  282. return value * value;
  283. }
  284. static inline double bound(const double threshold, const double val)
  285. {
  286. const double CONST = 0.8862269254527580136490837416705725913987747280611935; //sqrt(PI) / 2.0
  287. return erf(CONST * (val / threshold)) * threshold;
  288. }
  289. static double find_peak_magnitude(AVFrame *frame, int channel)
  290. {
  291. double max = DBL_EPSILON;
  292. int c, i;
  293. if (channel == -1) {
  294. for (c = 0; c < frame->channels; c++) {
  295. double *data_ptr = (double *)frame->extended_data[c];
  296. for (i = 0; i < frame->nb_samples; i++)
  297. max = FFMAX(max, fabs(data_ptr[i]));
  298. }
  299. } else {
  300. double *data_ptr = (double *)frame->extended_data[channel];
  301. for (i = 0; i < frame->nb_samples; i++)
  302. max = FFMAX(max, fabs(data_ptr[i]));
  303. }
  304. return max;
  305. }
  306. static double compute_frame_rms(AVFrame *frame, int channel)
  307. {
  308. double rms_value = 0.0;
  309. int c, i;
  310. if (channel == -1) {
  311. for (c = 0; c < frame->channels; c++) {
  312. const double *data_ptr = (double *)frame->extended_data[c];
  313. for (i = 0; i < frame->nb_samples; i++) {
  314. rms_value += pow_2(data_ptr[i]);
  315. }
  316. }
  317. rms_value /= frame->nb_samples * frame->channels;
  318. } else {
  319. const double *data_ptr = (double *)frame->extended_data[channel];
  320. for (i = 0; i < frame->nb_samples; i++) {
  321. rms_value += pow_2(data_ptr[i]);
  322. }
  323. rms_value /= frame->nb_samples;
  324. }
  325. return FFMAX(sqrt(rms_value), DBL_EPSILON);
  326. }
  327. static double get_max_local_gain(DynamicAudioNormalizerContext *s, AVFrame *frame,
  328. int channel)
  329. {
  330. const double maximum_gain = s->peak_value / find_peak_magnitude(frame, channel);
  331. const double rms_gain = s->target_rms > DBL_EPSILON ? (s->target_rms / compute_frame_rms(frame, channel)) : DBL_MAX;
  332. return bound(s->max_amplification, FFMIN(maximum_gain, rms_gain));
  333. }
  334. static double minimum_filter(cqueue *q)
  335. {
  336. double min = DBL_MAX;
  337. int i;
  338. for (i = 0; i < cqueue_size(q); i++) {
  339. min = FFMIN(min, cqueue_peek(q, i));
  340. }
  341. return min;
  342. }
  343. static double gaussian_filter(DynamicAudioNormalizerContext *s, cqueue *q)
  344. {
  345. double result = 0.0;
  346. int i;
  347. for (i = 0; i < cqueue_size(q); i++) {
  348. result += cqueue_peek(q, i) * s->weights[i];
  349. }
  350. return result;
  351. }
  352. static void update_gain_history(DynamicAudioNormalizerContext *s, int channel,
  353. double current_gain_factor)
  354. {
  355. if (cqueue_empty(s->gain_history_original[channel]) ||
  356. cqueue_empty(s->gain_history_minimum[channel])) {
  357. const int pre_fill_size = s->filter_size / 2;
  358. const double initial_value = s->alt_boundary_mode ? current_gain_factor : 1.0;
  359. s->prev_amplification_factor[channel] = initial_value;
  360. while (cqueue_size(s->gain_history_original[channel]) < pre_fill_size) {
  361. cqueue_enqueue(s->gain_history_original[channel], initial_value);
  362. }
  363. }
  364. cqueue_enqueue(s->gain_history_original[channel], current_gain_factor);
  365. while (cqueue_size(s->gain_history_original[channel]) >= s->filter_size) {
  366. double minimum;
  367. av_assert0(cqueue_size(s->gain_history_original[channel]) == s->filter_size);
  368. if (cqueue_empty(s->gain_history_minimum[channel])) {
  369. const int pre_fill_size = s->filter_size / 2;
  370. double initial_value = s->alt_boundary_mode ? cqueue_peek(s->gain_history_original[channel], 0) : 1.0;
  371. int input = pre_fill_size;
  372. while (cqueue_size(s->gain_history_minimum[channel]) < pre_fill_size) {
  373. input++;
  374. initial_value = FFMIN(initial_value, cqueue_peek(s->gain_history_original[channel], input));
  375. cqueue_enqueue(s->gain_history_minimum[channel], initial_value);
  376. }
  377. }
  378. minimum = minimum_filter(s->gain_history_original[channel]);
  379. cqueue_enqueue(s->gain_history_minimum[channel], minimum);
  380. cqueue_pop(s->gain_history_original[channel]);
  381. }
  382. while (cqueue_size(s->gain_history_minimum[channel]) >= s->filter_size) {
  383. double smoothed;
  384. av_assert0(cqueue_size(s->gain_history_minimum[channel]) == s->filter_size);
  385. smoothed = gaussian_filter(s, s->gain_history_minimum[channel]);
  386. cqueue_enqueue(s->gain_history_smoothed[channel], smoothed);
  387. cqueue_pop(s->gain_history_minimum[channel]);
  388. }
  389. }
  390. static inline double update_value(double new, double old, double aggressiveness)
  391. {
  392. av_assert0((aggressiveness >= 0.0) && (aggressiveness <= 1.0));
  393. return aggressiveness * new + (1.0 - aggressiveness) * old;
  394. }
  395. static void perform_dc_correction(DynamicAudioNormalizerContext *s, AVFrame *frame)
  396. {
  397. const double diff = 1.0 / frame->nb_samples;
  398. int is_first_frame = cqueue_empty(s->gain_history_original[0]);
  399. int c, i;
  400. for (c = 0; c < s->channels; c++) {
  401. double *dst_ptr = (double *)frame->extended_data[c];
  402. double current_average_value = 0.0;
  403. double prev_value;
  404. for (i = 0; i < frame->nb_samples; i++)
  405. current_average_value += dst_ptr[i] * diff;
  406. prev_value = is_first_frame ? current_average_value : s->dc_correction_value[c];
  407. s->dc_correction_value[c] = is_first_frame ? current_average_value : update_value(current_average_value, s->dc_correction_value[c], 0.1);
  408. for (i = 0; i < frame->nb_samples; i++) {
  409. dst_ptr[i] -= fade(prev_value, s->dc_correction_value[c], i, s->fade_factors);
  410. }
  411. }
  412. }
  413. static double setup_compress_thresh(double threshold)
  414. {
  415. if ((threshold > DBL_EPSILON) && (threshold < (1.0 - DBL_EPSILON))) {
  416. double current_threshold = threshold;
  417. double step_size = 1.0;
  418. while (step_size > DBL_EPSILON) {
  419. while ((llrint((current_threshold + step_size) * (UINT64_C(1) << 63)) >
  420. llrint(current_threshold * (UINT64_C(1) << 63))) &&
  421. (bound(current_threshold + step_size, 1.0) <= threshold)) {
  422. current_threshold += step_size;
  423. }
  424. step_size /= 2.0;
  425. }
  426. return current_threshold;
  427. } else {
  428. return threshold;
  429. }
  430. }
  431. static double compute_frame_std_dev(DynamicAudioNormalizerContext *s,
  432. AVFrame *frame, int channel)
  433. {
  434. double variance = 0.0;
  435. int i, c;
  436. if (channel == -1) {
  437. for (c = 0; c < s->channels; c++) {
  438. const double *data_ptr = (double *)frame->extended_data[c];
  439. for (i = 0; i < frame->nb_samples; i++) {
  440. variance += pow_2(data_ptr[i]); // Assume that MEAN is *zero*
  441. }
  442. }
  443. variance /= (s->channels * frame->nb_samples) - 1;
  444. } else {
  445. const double *data_ptr = (double *)frame->extended_data[channel];
  446. for (i = 0; i < frame->nb_samples; i++) {
  447. variance += pow_2(data_ptr[i]); // Assume that MEAN is *zero*
  448. }
  449. variance /= frame->nb_samples - 1;
  450. }
  451. return FFMAX(sqrt(variance), DBL_EPSILON);
  452. }
  453. static void perform_compression(DynamicAudioNormalizerContext *s, AVFrame *frame)
  454. {
  455. int is_first_frame = cqueue_empty(s->gain_history_original[0]);
  456. int c, i;
  457. if (s->channels_coupled) {
  458. const double standard_deviation = compute_frame_std_dev(s, frame, -1);
  459. const double current_threshold = FFMIN(1.0, s->compress_factor * standard_deviation);
  460. const double prev_value = is_first_frame ? current_threshold : s->compress_threshold[0];
  461. double prev_actual_thresh, curr_actual_thresh;
  462. s->compress_threshold[0] = is_first_frame ? current_threshold : update_value(current_threshold, s->compress_threshold[0], (1.0/3.0));
  463. prev_actual_thresh = setup_compress_thresh(prev_value);
  464. curr_actual_thresh = setup_compress_thresh(s->compress_threshold[0]);
  465. for (c = 0; c < s->channels; c++) {
  466. double *const dst_ptr = (double *)frame->extended_data[c];
  467. for (i = 0; i < frame->nb_samples; i++) {
  468. const double localThresh = fade(prev_actual_thresh, curr_actual_thresh, i, s->fade_factors);
  469. dst_ptr[i] = copysign(bound(localThresh, fabs(dst_ptr[i])), dst_ptr[i]);
  470. }
  471. }
  472. } else {
  473. for (c = 0; c < s->channels; c++) {
  474. const double standard_deviation = compute_frame_std_dev(s, frame, c);
  475. const double current_threshold = setup_compress_thresh(FFMIN(1.0, s->compress_factor * standard_deviation));
  476. const double prev_value = is_first_frame ? current_threshold : s->compress_threshold[c];
  477. double prev_actual_thresh, curr_actual_thresh;
  478. double *dst_ptr;
  479. s->compress_threshold[c] = is_first_frame ? current_threshold : update_value(current_threshold, s->compress_threshold[c], 1.0/3.0);
  480. prev_actual_thresh = setup_compress_thresh(prev_value);
  481. curr_actual_thresh = setup_compress_thresh(s->compress_threshold[c]);
  482. dst_ptr = (double *)frame->extended_data[c];
  483. for (i = 0; i < frame->nb_samples; i++) {
  484. const double localThresh = fade(prev_actual_thresh, curr_actual_thresh, i, s->fade_factors);
  485. dst_ptr[i] = copysign(bound(localThresh, fabs(dst_ptr[i])), dst_ptr[i]);
  486. }
  487. }
  488. }
  489. }
  490. static void analyze_frame(DynamicAudioNormalizerContext *s, AVFrame *frame)
  491. {
  492. if (s->dc_correction) {
  493. perform_dc_correction(s, frame);
  494. }
  495. if (s->compress_factor > DBL_EPSILON) {
  496. perform_compression(s, frame);
  497. }
  498. if (s->channels_coupled) {
  499. const double current_gain_factor = get_max_local_gain(s, frame, -1);
  500. int c;
  501. for (c = 0; c < s->channels; c++)
  502. update_gain_history(s, c, current_gain_factor);
  503. } else {
  504. int c;
  505. for (c = 0; c < s->channels; c++)
  506. update_gain_history(s, c, get_max_local_gain(s, frame, c));
  507. }
  508. }
  509. static void amplify_frame(DynamicAudioNormalizerContext *s, AVFrame *frame, int enabled)
  510. {
  511. int c, i;
  512. for (c = 0; c < s->channels; c++) {
  513. double *dst_ptr = (double *)frame->extended_data[c];
  514. double current_amplification_factor;
  515. cqueue_dequeue(s->gain_history_smoothed[c], &current_amplification_factor);
  516. for (i = 0; i < frame->nb_samples && enabled; i++) {
  517. const double amplification_factor = fade(s->prev_amplification_factor[c],
  518. current_amplification_factor, i,
  519. s->fade_factors);
  520. dst_ptr[i] *= amplification_factor;
  521. if (fabs(dst_ptr[i]) > s->peak_value)
  522. dst_ptr[i] = copysign(s->peak_value, dst_ptr[i]);
  523. }
  524. s->prev_amplification_factor[c] = current_amplification_factor;
  525. }
  526. }
  527. static int filter_frame(AVFilterLink *inlink, AVFrame *in)
  528. {
  529. AVFilterContext *ctx = inlink->dst;
  530. DynamicAudioNormalizerContext *s = ctx->priv;
  531. AVFilterLink *outlink = inlink->dst->outputs[0];
  532. int ret = 1;
  533. if (!cqueue_empty(s->gain_history_smoothed[0])) {
  534. double is_enabled;
  535. AVFrame *out = ff_bufqueue_get(&s->queue);
  536. cqueue_dequeue(s->is_enabled, &is_enabled);
  537. amplify_frame(s, out, is_enabled > 0.);
  538. ret = ff_filter_frame(outlink, out);
  539. }
  540. av_frame_make_writable(in);
  541. cqueue_enqueue(s->is_enabled, !ctx->is_disabled);
  542. analyze_frame(s, in);
  543. ff_bufqueue_add(ctx, &s->queue, in);
  544. return ret;
  545. }
  546. static int flush_buffer(DynamicAudioNormalizerContext *s, AVFilterLink *inlink,
  547. AVFilterLink *outlink)
  548. {
  549. AVFrame *out = ff_get_audio_buffer(outlink, s->frame_len);
  550. int c, i;
  551. if (!out)
  552. return AVERROR(ENOMEM);
  553. for (c = 0; c < s->channels; c++) {
  554. double *dst_ptr = (double *)out->extended_data[c];
  555. for (i = 0; i < out->nb_samples; i++) {
  556. dst_ptr[i] = s->alt_boundary_mode ? DBL_EPSILON : ((s->target_rms > DBL_EPSILON) ? FFMIN(s->peak_value, s->target_rms) : s->peak_value);
  557. if (s->dc_correction) {
  558. dst_ptr[i] *= ((i % 2) == 1) ? -1 : 1;
  559. dst_ptr[i] += s->dc_correction_value[c];
  560. }
  561. }
  562. }
  563. s->delay--;
  564. return filter_frame(inlink, out);
  565. }
  566. static int flush(AVFilterLink *outlink)
  567. {
  568. AVFilterContext *ctx = outlink->src;
  569. DynamicAudioNormalizerContext *s = ctx->priv;
  570. int ret = 0;
  571. if (!cqueue_empty(s->gain_history_smoothed[0])) {
  572. ret = flush_buffer(s, ctx->inputs[0], outlink);
  573. } else if (s->queue.available) {
  574. AVFrame *out = ff_bufqueue_get(&s->queue);
  575. s->pts = out->pts;
  576. ret = ff_filter_frame(outlink, out);
  577. s->delay = s->queue.available;
  578. }
  579. return ret;
  580. }
  581. static int activate(AVFilterContext *ctx)
  582. {
  583. AVFilterLink *inlink = ctx->inputs[0];
  584. AVFilterLink *outlink = ctx->outputs[0];
  585. DynamicAudioNormalizerContext *s = ctx->priv;
  586. AVFrame *in = NULL;
  587. int ret = 0, status;
  588. int64_t pts;
  589. FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink);
  590. if (!s->eof) {
  591. ret = ff_inlink_consume_samples(inlink, s->frame_len, s->frame_len, &in);
  592. if (ret < 0)
  593. return ret;
  594. if (ret > 0) {
  595. ret = filter_frame(inlink, in);
  596. if (ret <= 0)
  597. return ret;
  598. }
  599. if (ff_inlink_queued_samples(inlink) >= s->frame_len) {
  600. ff_filter_set_ready(ctx, 10);
  601. return 0;
  602. }
  603. }
  604. if (!s->eof && ff_inlink_acknowledge_status(inlink, &status, &pts)) {
  605. if (status == AVERROR_EOF)
  606. s->eof = 1;
  607. }
  608. if (s->eof && s->delay > 0)
  609. return flush(outlink);
  610. if (s->eof && s->delay <= 0) {
  611. ff_outlink_set_status(outlink, AVERROR_EOF, s->pts);
  612. return 0;
  613. }
  614. if (!s->eof)
  615. FF_FILTER_FORWARD_WANTED(outlink, inlink);
  616. return FFERROR_NOT_READY;
  617. }
  618. static const AVFilterPad avfilter_af_dynaudnorm_inputs[] = {
  619. {
  620. .name = "default",
  621. .type = AVMEDIA_TYPE_AUDIO,
  622. .config_props = config_input,
  623. },
  624. { NULL }
  625. };
  626. static const AVFilterPad avfilter_af_dynaudnorm_outputs[] = {
  627. {
  628. .name = "default",
  629. .type = AVMEDIA_TYPE_AUDIO,
  630. },
  631. { NULL }
  632. };
  633. AVFilter ff_af_dynaudnorm = {
  634. .name = "dynaudnorm",
  635. .description = NULL_IF_CONFIG_SMALL("Dynamic Audio Normalizer."),
  636. .query_formats = query_formats,
  637. .priv_size = sizeof(DynamicAudioNormalizerContext),
  638. .init = init,
  639. .uninit = uninit,
  640. .activate = activate,
  641. .inputs = avfilter_af_dynaudnorm_inputs,
  642. .outputs = avfilter_af_dynaudnorm_outputs,
  643. .priv_class = &dynaudnorm_class,
  644. .flags = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL,
  645. };