af_volumedetect.c 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166
  1. /*
  2. * Copyright (c) 2012 Nicolas George
  3. *
  4. * This file is part of FFmpeg.
  5. *
  6. * FFmpeg is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU Lesser General Public License
  8. * as published by the Free Software Foundation; either
  9. * version 2.1 of the License, or (at your option) any later version.
  10. *
  11. * FFmpeg is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. * GNU Lesser General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU Lesser General Public License
  17. * along with FFmpeg; if not, write to the Free Software Foundation, Inc.,
  18. * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19. */
  20. #include "libavutil/channel_layout.h"
  21. #include "libavutil/avassert.h"
  22. #include "audio.h"
  23. #include "avfilter.h"
  24. #include "internal.h"
  25. typedef struct VolDetectContext {
  26. /**
  27. * Number of samples at each PCM value.
  28. * histogram[0x8000 + i] is the number of samples at value i.
  29. * The extra element is there for symmetry.
  30. */
  31. uint64_t histogram[0x10001];
  32. } VolDetectContext;
  33. static int query_formats(AVFilterContext *ctx)
  34. {
  35. static const enum AVSampleFormat sample_fmts[] = {
  36. AV_SAMPLE_FMT_S16,
  37. AV_SAMPLE_FMT_S16P,
  38. AV_SAMPLE_FMT_NONE
  39. };
  40. AVFilterFormats *formats;
  41. AVFilterChannelLayouts *layouts;
  42. int ret;
  43. if (!(formats = ff_make_format_list(sample_fmts)))
  44. return AVERROR(ENOMEM);
  45. layouts = ff_all_channel_counts();
  46. if (!layouts)
  47. return AVERROR(ENOMEM);
  48. ret = ff_set_common_channel_layouts(ctx, layouts);
  49. if (ret < 0)
  50. return ret;
  51. return ff_set_common_formats(ctx, formats);
  52. }
  53. static int filter_frame(AVFilterLink *inlink, AVFrame *samples)
  54. {
  55. AVFilterContext *ctx = inlink->dst;
  56. VolDetectContext *vd = ctx->priv;
  57. int nb_samples = samples->nb_samples;
  58. int nb_channels = samples->channels;
  59. int nb_planes = nb_channels;
  60. int plane, i;
  61. int16_t *pcm;
  62. if (!av_sample_fmt_is_planar(samples->format)) {
  63. nb_samples *= nb_channels;
  64. nb_planes = 1;
  65. }
  66. for (plane = 0; plane < nb_planes; plane++) {
  67. pcm = (int16_t *)samples->extended_data[plane];
  68. for (i = 0; i < nb_samples; i++)
  69. vd->histogram[pcm[i] + 0x8000]++;
  70. }
  71. return ff_filter_frame(inlink->dst->outputs[0], samples);
  72. }
  73. #define MAX_DB 91
  74. static inline double logdb(uint64_t v)
  75. {
  76. double d = v / (double)(0x8000 * 0x8000);
  77. if (!v)
  78. return MAX_DB;
  79. return -log10(d) * 10;
  80. }
  81. static void print_stats(AVFilterContext *ctx)
  82. {
  83. VolDetectContext *vd = ctx->priv;
  84. int i, max_volume, shift;
  85. uint64_t nb_samples = 0, power = 0, nb_samples_shift = 0, sum = 0;
  86. uint64_t histdb[MAX_DB + 1] = { 0 };
  87. for (i = 0; i < 0x10000; i++)
  88. nb_samples += vd->histogram[i];
  89. av_log(ctx, AV_LOG_INFO, "n_samples: %"PRId64"\n", nb_samples);
  90. if (!nb_samples)
  91. return;
  92. /* If nb_samples > 1<<34, there is a risk of overflow in the
  93. multiplication or the sum: shift all histogram values to avoid that.
  94. The total number of samples must be recomputed to avoid rounding
  95. errors. */
  96. shift = av_log2(nb_samples >> 33);
  97. for (i = 0; i < 0x10000; i++) {
  98. nb_samples_shift += vd->histogram[i] >> shift;
  99. power += (i - 0x8000) * (i - 0x8000) * (vd->histogram[i] >> shift);
  100. }
  101. if (!nb_samples_shift)
  102. return;
  103. power = (power + nb_samples_shift / 2) / nb_samples_shift;
  104. av_assert0(power <= 0x8000 * 0x8000);
  105. av_log(ctx, AV_LOG_INFO, "mean_volume: %.1f dB\n", -logdb(power));
  106. max_volume = 0x8000;
  107. while (max_volume > 0 && !vd->histogram[0x8000 + max_volume] &&
  108. !vd->histogram[0x8000 - max_volume])
  109. max_volume--;
  110. av_log(ctx, AV_LOG_INFO, "max_volume: %.1f dB\n", -logdb(max_volume * max_volume));
  111. for (i = 0; i < 0x10000; i++)
  112. histdb[(int)logdb((i - 0x8000) * (i - 0x8000))] += vd->histogram[i];
  113. for (i = 0; i <= MAX_DB && !histdb[i]; i++);
  114. for (; i <= MAX_DB && sum < nb_samples / 1000; i++) {
  115. av_log(ctx, AV_LOG_INFO, "histogram_%ddb: %"PRId64"\n", i, histdb[i]);
  116. sum += histdb[i];
  117. }
  118. }
  119. static av_cold void uninit(AVFilterContext *ctx)
  120. {
  121. print_stats(ctx);
  122. }
  123. static const AVFilterPad volumedetect_inputs[] = {
  124. {
  125. .name = "default",
  126. .type = AVMEDIA_TYPE_AUDIO,
  127. .filter_frame = filter_frame,
  128. },
  129. { NULL }
  130. };
  131. static const AVFilterPad volumedetect_outputs[] = {
  132. {
  133. .name = "default",
  134. .type = AVMEDIA_TYPE_AUDIO,
  135. },
  136. { NULL }
  137. };
  138. AVFilter ff_af_volumedetect = {
  139. .name = "volumedetect",
  140. .description = NULL_IF_CONFIG_SMALL("Detect audio volume."),
  141. .priv_size = sizeof(VolDetectContext),
  142. .query_formats = query_formats,
  143. .uninit = uninit,
  144. .inputs = volumedetect_inputs,
  145. .outputs = volumedetect_outputs,
  146. };