vf_normalize.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389
  1. /*
  2. * Copyright (c) 2017 Richard Ling
  3. *
  4. * This file is part of FFmpeg.
  5. *
  6. * FFmpeg is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU Lesser General Public
  8. * License as published by the Free Software Foundation; either
  9. * version 2.1 of the License, or (at your option) any later version.
  10. *
  11. * FFmpeg is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. * Lesser General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU Lesser General Public
  17. * License along with FFmpeg; if not, write to the Free Software
  18. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19. */
  20. /*
  21. * Normalize RGB video (aka histogram stretching, contrast stretching).
  22. * See: https://en.wikipedia.org/wiki/Normalization_(image_processing)
  23. *
  24. * For each channel of each frame, the filter computes the input range and maps
  25. * it linearly to the user-specified output range. The output range defaults
  26. * to the full dynamic range from pure black to pure white.
  27. *
  28. * Naively maximising the dynamic range of each frame of video in isolation
  29. * may cause flickering (rapid changes in brightness of static objects in the
  30. * scene) when small dark or bright objects enter or leave the scene. This
  31. * filter can apply temporal smoothing to the input range to reduce flickering.
  32. * Temporal smoothing is similar to the auto-exposure (automatic gain control)
  33. * on a video camera, which performs the same function; and, like a video
  34. * camera, it may cause a period of over- or under-exposure of the video.
  35. *
  36. * The filter can normalize the R,G,B channels independently, which may cause
  37. * color shifting, or link them together as a single channel, which prevents
  38. * color shifting. More precisely, linked normalization preserves hue (as it's
  39. * defined in HSV/HSL color spaces) while independent normalization does not.
  40. * Independent normalization can be used to remove color casts, such as the
  41. * blue cast from underwater video, restoring more natural colors. The filter
  42. * can also combine independent and linked normalization in any ratio.
  43. *
  44. * Finally the overall strength of the filter can be adjusted, from no effect
  45. * to full normalization.
  46. *
  47. * The 5 AVOptions are:
  48. * blackpt, Colors which define the output range. The minimum input value
  49. * whitept is mapped to the blackpt. The maximum input value is mapped to
  50. * the whitept. The defaults are black and white respectively.
  51. * Specifying white for blackpt and black for whitept will give
  52. * color-inverted, normalized video. Shades of grey can be used
  53. * to reduce the dynamic range (contrast). Specifying saturated
  54. * colors here can create some interesting effects.
  55. *
  56. * smoothing The amount of temporal smoothing, expressed in frames (>=0).
  57. * the minimum and maximum input values of each channel are
  58. * smoothed using a rolling average over the current frame and
  59. * that many previous frames of video. Defaults to 0 (no temporal
  60. * smoothing).
  61. *
  62. * independence
  63. * Controls the ratio of independent (color shifting) channel
  64. * normalization to linked (color preserving) normalization. 0.0
  65. * is fully linked, 1.0 is fully independent. Defaults to fully
  66. * independent.
  67. *
  68. * strength Overall strength of the filter. 1.0 is full strength. 0.0 is
  69. * a rather expensive no-op. Values in between can give a gentle
  70. * boost to low-contrast video without creating an artificial
  71. * over-processed look. The default is full strength.
  72. */
  73. #include "libavutil/imgutils.h"
  74. #include "libavutil/opt.h"
  75. #include "libavutil/pixdesc.h"
  76. #include "avfilter.h"
  77. #include "drawutils.h"
  78. #include "formats.h"
  79. #include "internal.h"
  80. #include "video.h"
  81. typedef struct NormalizeContext {
  82. const AVClass *class;
  83. // Storage for the corresponding AVOptions
  84. uint8_t blackpt[4];
  85. uint8_t whitept[4];
  86. int smoothing;
  87. float independence;
  88. float strength;
  89. uint8_t co[4]; // Offsets to R,G,B,A bytes respectively in each pixel
  90. int num_components; // Number of components in the pixel format
  91. int step;
  92. int history_len; // Number of frames to average; based on smoothing factor
  93. int frame_num; // Increments on each frame, starting from 0.
  94. // Per-extremum, per-channel history, for temporal smoothing.
  95. struct {
  96. uint8_t *history; // History entries.
  97. uint32_t history_sum; // Sum of history entries.
  98. } min[3], max[3]; // Min and max for each channel in {R,G,B}.
  99. uint8_t *history_mem; // Single allocation for above history entries
  100. } NormalizeContext;
  101. #define OFFSET(x) offsetof(NormalizeContext, x)
  102. #define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
  103. static const AVOption normalize_options[] = {
  104. { "blackpt", "output color to which darkest input color is mapped", OFFSET(blackpt), AV_OPT_TYPE_COLOR, { .str = "black" }, CHAR_MIN, CHAR_MAX, FLAGS },
  105. { "whitept", "output color to which brightest input color is mapped", OFFSET(whitept), AV_OPT_TYPE_COLOR, { .str = "white" }, CHAR_MIN, CHAR_MAX, FLAGS },
  106. { "smoothing", "amount of temporal smoothing of the input range, to reduce flicker", OFFSET(smoothing), AV_OPT_TYPE_INT, {.i64=0}, 0, INT_MAX/8, FLAGS },
  107. { "independence", "proportion of independent to linked channel normalization", OFFSET(independence), AV_OPT_TYPE_FLOAT, {.dbl=1.0}, 0.0, 1.0, FLAGS },
  108. { "strength", "strength of filter, from no effect to full normalization", OFFSET(strength), AV_OPT_TYPE_FLOAT, {.dbl=1.0}, 0.0, 1.0, FLAGS },
  109. { NULL }
  110. };
  111. AVFILTER_DEFINE_CLASS(normalize);
  112. // This function is the main guts of the filter. Normalizes the input frame
  113. // into the output frame. The frames are known to have the same dimensions
  114. // and pixel format.
  115. static void normalize(NormalizeContext *s, AVFrame *in, AVFrame *out)
  116. {
  117. // Per-extremum, per-channel local variables.
  118. struct {
  119. uint8_t in; // Original input byte value for this frame.
  120. float smoothed; // Smoothed input value [0,255].
  121. float out; // Output value [0,255].
  122. } min[3], max[3]; // Min and max for each channel in {R,G,B}.
  123. float rgb_min_smoothed; // Min input range for linked normalization
  124. float rgb_max_smoothed; // Max input range for linked normalization
  125. uint8_t lut[3][256]; // Lookup table
  126. int x, y, c;
  127. // First, scan the input frame to find, for each channel, the minimum
  128. // (min.in) and maximum (max.in) values present in the channel.
  129. for (c = 0; c < 3; c++)
  130. min[c].in = max[c].in = in->data[0][s->co[c]];
  131. for (y = 0; y < in->height; y++) {
  132. uint8_t *inp = in->data[0] + y * in->linesize[0];
  133. uint8_t *outp = out->data[0] + y * out->linesize[0];
  134. for (x = 0; x < in->width; x++) {
  135. for (c = 0; c < 3; c++) {
  136. min[c].in = FFMIN(min[c].in, inp[s->co[c]]);
  137. max[c].in = FFMAX(max[c].in, inp[s->co[c]]);
  138. }
  139. inp += s->step;
  140. outp += s->step;
  141. }
  142. }
  143. // Next, for each channel, push min.in and max.in into their respective
  144. // histories, to determine the min.smoothed and max.smoothed for this frame.
  145. {
  146. int history_idx = s->frame_num % s->history_len;
  147. // Assume the history is not yet full; num_history_vals is the number
  148. // of frames received so far including the current frame.
  149. int num_history_vals = s->frame_num + 1;
  150. if (s->frame_num >= s->history_len) {
  151. //The history is full; drop oldest value and cap num_history_vals.
  152. for (c = 0; c < 3; c++) {
  153. s->min[c].history_sum -= s->min[c].history[history_idx];
  154. s->max[c].history_sum -= s->max[c].history[history_idx];
  155. }
  156. num_history_vals = s->history_len;
  157. }
  158. // For each extremum, update history_sum and calculate smoothed value
  159. // as the rolling average of the history entries.
  160. for (c = 0; c < 3; c++) {
  161. s->min[c].history_sum += (s->min[c].history[history_idx] = min[c].in);
  162. min[c].smoothed = s->min[c].history_sum / (float)num_history_vals;
  163. s->max[c].history_sum += (s->max[c].history[history_idx] = max[c].in);
  164. max[c].smoothed = s->max[c].history_sum / (float)num_history_vals;
  165. }
  166. }
  167. // Determine the input range for linked normalization. This is simply the
  168. // minimum of the per-channel minimums, and the maximum of the per-channel
  169. // maximums.
  170. rgb_min_smoothed = FFMIN3(min[0].smoothed, min[1].smoothed, min[2].smoothed);
  171. rgb_max_smoothed = FFMAX3(max[0].smoothed, max[1].smoothed, max[2].smoothed);
  172. // Now, process each channel to determine the input and output range and
  173. // build the lookup tables.
  174. for (c = 0; c < 3; c++) {
  175. int in_val;
  176. // Adjust the input range for this channel [min.smoothed,max.smoothed]
  177. // by mixing in the correct proportion of the linked normalization
  178. // input range [rgb_min_smoothed,rgb_max_smoothed].
  179. min[c].smoothed = (min[c].smoothed * s->independence)
  180. + (rgb_min_smoothed * (1.0f - s->independence));
  181. max[c].smoothed = (max[c].smoothed * s->independence)
  182. + (rgb_max_smoothed * (1.0f - s->independence));
  183. // Calculate the output range [min.out,max.out] as a ratio of the full-
  184. // strength output range [blackpt,whitept] and the original input range
  185. // [min.in,max.in], based on the user-specified filter strength.
  186. min[c].out = (s->blackpt[c] * s->strength)
  187. + (min[c].in * (1.0f - s->strength));
  188. max[c].out = (s->whitept[c] * s->strength)
  189. + (max[c].in * (1.0f - s->strength));
  190. // Now, build a lookup table which linearly maps the adjusted input range
  191. // [min.smoothed,max.smoothed] to the output range [min.out,max.out].
  192. // Perform the linear interpolation for each x:
  193. // lut[x] = (int)(float(x - min.smoothed) * scale + max.out + 0.5)
  194. // where scale = (max.out - min.out) / (max.smoothed - min.smoothed)
  195. if (min[c].smoothed == max[c].smoothed) {
  196. // There is no dynamic range to expand. No mapping for this channel.
  197. for (in_val = min[c].in; in_val <= max[c].in; in_val++)
  198. lut[c][in_val] = min[c].out;
  199. } else {
  200. // We must set lookup values for all values in the original input
  201. // range [min.in,max.in]. Since the original input range may be
  202. // larger than [min.smoothed,max.smoothed], some output values may
  203. // fall outside the [0,255] dynamic range. We need to clamp them.
  204. float scale = (max[c].out - min[c].out) / (max[c].smoothed - min[c].smoothed);
  205. for (in_val = min[c].in; in_val <= max[c].in; in_val++) {
  206. int out_val = (in_val - min[c].smoothed) * scale + min[c].out + 0.5f;
  207. out_val = FFMAX(out_val, 0);
  208. out_val = FFMIN(out_val, 255);
  209. lut[c][in_val] = out_val;
  210. }
  211. }
  212. }
  213. // Finally, process the pixels of the input frame using the lookup tables.
  214. for (y = 0; y < in->height; y++) {
  215. uint8_t *inp = in->data[0] + y * in->linesize[0];
  216. uint8_t *outp = out->data[0] + y * out->linesize[0];
  217. for (x = 0; x < in->width; x++) {
  218. for (c = 0; c < 3; c++)
  219. outp[s->co[c]] = lut[c][inp[s->co[c]]];
  220. if (s->num_components == 4)
  221. // Copy alpha as-is.
  222. outp[s->co[3]] = inp[s->co[3]];
  223. inp += s->step;
  224. outp += s->step;
  225. }
  226. }
  227. s->frame_num++;
  228. }
  229. // Now we define all the functions accessible from the ff_vf_normalize class,
  230. // which is ffmpeg's interface to our filter. See doc/filter_design.txt and
  231. // doc/writing_filters.txt for descriptions of what these interface functions
  232. // are expected to do.
  233. // Set the pixel formats that our filter supports. We should be able to process
  234. // any 8-bit RGB formats. 16-bit support might be useful one day.
  235. static int query_formats(AVFilterContext *ctx)
  236. {
  237. static const enum AVPixelFormat pixel_fmts[] = {
  238. AV_PIX_FMT_RGB24,
  239. AV_PIX_FMT_BGR24,
  240. AV_PIX_FMT_ARGB,
  241. AV_PIX_FMT_RGBA,
  242. AV_PIX_FMT_ABGR,
  243. AV_PIX_FMT_BGRA,
  244. AV_PIX_FMT_0RGB,
  245. AV_PIX_FMT_RGB0,
  246. AV_PIX_FMT_0BGR,
  247. AV_PIX_FMT_BGR0,
  248. AV_PIX_FMT_NONE
  249. };
  250. // According to filter_design.txt, using ff_set_common_formats() this way
  251. // ensures the pixel formats of the input and output will be the same. That
  252. // saves a bit of effort possibly needing to handle format conversions.
  253. AVFilterFormats *formats = ff_make_format_list(pixel_fmts);
  254. if (!formats)
  255. return AVERROR(ENOMEM);
  256. return ff_set_common_formats(ctx, formats);
  257. }
  258. // At this point we know the pixel format used for both input and output. We
  259. // can also access the frame rate of the input video and allocate some memory
  260. // appropriately
  261. static int config_input(AVFilterLink *inlink)
  262. {
  263. NormalizeContext *s = inlink->dst->priv;
  264. // Store offsets to R,G,B,A bytes respectively in each pixel
  265. const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
  266. int c;
  267. ff_fill_rgba_map(s->co, inlink->format);
  268. s->num_components = desc->nb_components;
  269. s->step = av_get_padded_bits_per_pixel(desc) >> 3;
  270. // Convert smoothing value to history_len (a count of frames to average,
  271. // must be at least 1). Currently this is a direct assignment, but the
  272. // smoothing value was originally envisaged as a number of seconds. In
  273. // future it would be nice to set history_len using a number of seconds,
  274. // but VFR video is currently an obstacle to doing so.
  275. s->history_len = s->smoothing + 1;
  276. // Allocate the history buffers -- there are 6 -- one for each extrema.
  277. // s->smoothing is limited to INT_MAX/8, so that (s->history_len * 6)
  278. // can't overflow on 32bit causing a too-small allocation.
  279. s->history_mem = av_malloc(s->history_len * 6);
  280. if (s->history_mem == NULL)
  281. return AVERROR(ENOMEM);
  282. for (c = 0; c < 3; c++) {
  283. s->min[c].history = s->history_mem + (c*2) * s->history_len;
  284. s->max[c].history = s->history_mem + (c*2+1) * s->history_len;
  285. }
  286. return 0;
  287. }
  288. // Free any memory allocations here
  289. static av_cold void uninit(AVFilterContext *ctx)
  290. {
  291. NormalizeContext *s = ctx->priv;
  292. av_freep(&s->history_mem);
  293. }
  294. // This function is pretty much standard from doc/writing_filters.txt. It
  295. // tries to do in-place filtering where possible, only allocating a new output
  296. // frame when absolutely necessary.
  297. static int filter_frame(AVFilterLink *inlink, AVFrame *in)
  298. {
  299. AVFilterContext *ctx = inlink->dst;
  300. AVFilterLink *outlink = ctx->outputs[0];
  301. NormalizeContext *s = ctx->priv;
  302. AVFrame *out;
  303. // Set 'direct' if we can modify the input frame in-place. Otherwise we
  304. // need to retrieve a new frame from the output link.
  305. int direct = av_frame_is_writable(in) && !ctx->is_disabled;
  306. if (direct) {
  307. out = in;
  308. } else {
  309. out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
  310. if (!out) {
  311. av_frame_free(&in);
  312. return AVERROR(ENOMEM);
  313. }
  314. av_frame_copy_props(out, in);
  315. }
  316. // Now we've got the input and output frames (which may be the same frame)
  317. // perform the filtering with our custom function.
  318. normalize(s, in, out);
  319. if (ctx->is_disabled) {
  320. av_frame_free(&out);
  321. return ff_filter_frame(outlink, in);
  322. }
  323. if (!direct)
  324. av_frame_free(&in);
  325. return ff_filter_frame(outlink, out);
  326. }
  327. static const AVFilterPad inputs[] = {
  328. {
  329. .name = "default",
  330. .type = AVMEDIA_TYPE_VIDEO,
  331. .filter_frame = filter_frame,
  332. .config_props = config_input,
  333. },
  334. { NULL }
  335. };
  336. static const AVFilterPad outputs[] = {
  337. {
  338. .name = "default",
  339. .type = AVMEDIA_TYPE_VIDEO,
  340. },
  341. { NULL }
  342. };
  343. AVFilter ff_vf_normalize = {
  344. .name = "normalize",
  345. .description = NULL_IF_CONFIG_SMALL("Normalize RGB video."),
  346. .priv_size = sizeof(NormalizeContext),
  347. .priv_class = &normalize_class,
  348. .uninit = uninit,
  349. .query_formats = query_formats,
  350. .inputs = inputs,
  351. .outputs = outputs,
  352. .flags = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL,
  353. };