vf_thumbnail_cuda.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454
  1. /*
  2. * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
  3. *
  4. * Permission is hereby granted, free of charge, to any person obtaining a
  5. * copy of this software and associated documentation files (the "Software"),
  6. * to deal in the Software without restriction, including without limitation
  7. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8. * and/or sell copies of the Software, and to permit persons to whom the
  9. * Software is furnished to do so, subject to the following conditions:
  10. *
  11. * The above copyright notice and this permission notice shall be included in
  12. * all copies or substantial portions of the Software.
  13. *
  14. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  17. * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  18. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  19. * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  20. * DEALINGS IN THE SOFTWARE.
  21. */
  22. #include "libavutil/hwcontext.h"
  23. #include "libavutil/hwcontext_cuda_internal.h"
  24. #include "libavutil/cuda_check.h"
  25. #include "libavutil/opt.h"
  26. #include "libavutil/pixdesc.h"
  27. #include "avfilter.h"
  28. #include "internal.h"
  29. #define CHECK_CU(x) FF_CUDA_CHECK_DL(ctx, s->hwctx->internal->cuda_dl, x)
  30. #define HIST_SIZE (3*256)
  31. #define DIV_UP(a, b) ( ((a) + (b) - 1) / (b) )
  32. #define BLOCKX 32
  33. #define BLOCKY 16
  34. static const enum AVPixelFormat supported_formats[] = {
  35. AV_PIX_FMT_NV12,
  36. AV_PIX_FMT_YUV420P,
  37. AV_PIX_FMT_YUV444P,
  38. AV_PIX_FMT_P010,
  39. AV_PIX_FMT_P016,
  40. AV_PIX_FMT_YUV444P16,
  41. };
  42. struct thumb_frame {
  43. AVFrame *buf; ///< cached frame
  44. int histogram[HIST_SIZE]; ///< RGB color distribution histogram of the frame
  45. };
  46. typedef struct ThumbnailCudaContext {
  47. const AVClass *class;
  48. int n; ///< current frame
  49. int n_frames; ///< number of frames for analysis
  50. struct thumb_frame *frames; ///< the n_frames frames
  51. AVRational tb; ///< copy of the input timebase to ease access
  52. AVBufferRef *hw_frames_ctx;
  53. AVCUDADeviceContext *hwctx;
  54. CUmodule cu_module;
  55. CUfunction cu_func_uchar;
  56. CUfunction cu_func_uchar2;
  57. CUfunction cu_func_ushort;
  58. CUfunction cu_func_ushort2;
  59. CUstream cu_stream;
  60. CUdeviceptr data;
  61. } ThumbnailCudaContext;
  62. #define OFFSET(x) offsetof(ThumbnailCudaContext, x)
  63. #define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
  64. static const AVOption thumbnail_cuda_options[] = {
  65. { "n", "set the frames batch size", OFFSET(n_frames), AV_OPT_TYPE_INT, {.i64=100}, 2, INT_MAX, FLAGS },
  66. { NULL }
  67. };
  68. AVFILTER_DEFINE_CLASS(thumbnail_cuda);
  69. static av_cold int init(AVFilterContext *ctx)
  70. {
  71. ThumbnailCudaContext *s = ctx->priv;
  72. s->frames = av_calloc(s->n_frames, sizeof(*s->frames));
  73. if (!s->frames) {
  74. av_log(ctx, AV_LOG_ERROR,
  75. "Allocation failure, try to lower the number of frames\n");
  76. return AVERROR(ENOMEM);
  77. }
  78. av_log(ctx, AV_LOG_VERBOSE, "batch size: %d frames\n", s->n_frames);
  79. return 0;
  80. }
  81. /**
  82. * @brief Compute Sum-square deviation to estimate "closeness".
  83. * @param hist color distribution histogram
  84. * @param median average color distribution histogram
  85. * @return sum of squared errors
  86. */
  87. static double frame_sum_square_err(const int *hist, const double *median)
  88. {
  89. int i;
  90. double err, sum_sq_err = 0;
  91. for (i = 0; i < HIST_SIZE; i++) {
  92. err = median[i] - (double)hist[i];
  93. sum_sq_err += err*err;
  94. }
  95. return sum_sq_err;
  96. }
  97. static AVFrame *get_best_frame(AVFilterContext *ctx)
  98. {
  99. AVFrame *picref;
  100. ThumbnailCudaContext *s = ctx->priv;
  101. int i, j, best_frame_idx = 0;
  102. int nb_frames = s->n;
  103. double avg_hist[HIST_SIZE] = {0}, sq_err, min_sq_err = -1;
  104. // average histogram of the N frames
  105. for (j = 0; j < FF_ARRAY_ELEMS(avg_hist); j++) {
  106. for (i = 0; i < nb_frames; i++)
  107. avg_hist[j] += (double)s->frames[i].histogram[j];
  108. avg_hist[j] /= nb_frames;
  109. }
  110. // find the frame closer to the average using the sum of squared errors
  111. for (i = 0; i < nb_frames; i++) {
  112. sq_err = frame_sum_square_err(s->frames[i].histogram, avg_hist);
  113. if (i == 0 || sq_err < min_sq_err)
  114. best_frame_idx = i, min_sq_err = sq_err;
  115. }
  116. // free and reset everything (except the best frame buffer)
  117. for (i = 0; i < nb_frames; i++) {
  118. memset(s->frames[i].histogram, 0, sizeof(s->frames[i].histogram));
  119. if (i != best_frame_idx)
  120. av_frame_free(&s->frames[i].buf);
  121. }
  122. s->n = 0;
  123. // raise the chosen one
  124. picref = s->frames[best_frame_idx].buf;
  125. av_log(ctx, AV_LOG_INFO, "frame id #%d (pts_time=%f) selected "
  126. "from a set of %d images\n", best_frame_idx,
  127. picref->pts * av_q2d(s->tb), nb_frames);
  128. s->frames[best_frame_idx].buf = NULL;
  129. return picref;
  130. }
  131. static int thumbnail_kernel(AVFilterContext *ctx, CUfunction func, int channels,
  132. int *histogram, uint8_t *src_dptr, int src_width, int src_height, int src_pitch, int pixel_size)
  133. {
  134. int ret;
  135. ThumbnailCudaContext *s = ctx->priv;
  136. CudaFunctions *cu = s->hwctx->internal->cuda_dl;
  137. CUtexObject tex = 0;
  138. void *args[] = { &tex, &histogram, &src_width, &src_height };
  139. CUDA_TEXTURE_DESC tex_desc = {
  140. .filterMode = CU_TR_FILTER_MODE_LINEAR,
  141. .flags = CU_TRSF_READ_AS_INTEGER,
  142. };
  143. CUDA_RESOURCE_DESC res_desc = {
  144. .resType = CU_RESOURCE_TYPE_PITCH2D,
  145. .res.pitch2D.format = pixel_size == 1 ?
  146. CU_AD_FORMAT_UNSIGNED_INT8 :
  147. CU_AD_FORMAT_UNSIGNED_INT16,
  148. .res.pitch2D.numChannels = channels,
  149. .res.pitch2D.width = src_width,
  150. .res.pitch2D.height = src_height,
  151. .res.pitch2D.pitchInBytes = src_pitch,
  152. .res.pitch2D.devPtr = (CUdeviceptr)src_dptr,
  153. };
  154. ret = CHECK_CU(cu->cuTexObjectCreate(&tex, &res_desc, &tex_desc, NULL));
  155. if (ret < 0)
  156. goto exit;
  157. ret = CHECK_CU(cu->cuLaunchKernel(func,
  158. DIV_UP(src_width, BLOCKX), DIV_UP(src_height, BLOCKY), 1,
  159. BLOCKX, BLOCKY, 1, 0, s->cu_stream, args, NULL));
  160. exit:
  161. if (tex)
  162. CHECK_CU(cu->cuTexObjectDestroy(tex));
  163. return ret;
  164. }
  165. static int thumbnail(AVFilterContext *ctx, int *histogram, AVFrame *in)
  166. {
  167. AVHWFramesContext *in_frames_ctx = (AVHWFramesContext*)in->hw_frames_ctx->data;
  168. ThumbnailCudaContext *s = ctx->priv;
  169. switch (in_frames_ctx->sw_format) {
  170. case AV_PIX_FMT_NV12:
  171. thumbnail_kernel(ctx, s->cu_func_uchar, 1,
  172. histogram, in->data[0], in->width, in->height, in->linesize[0], 1);
  173. thumbnail_kernel(ctx, s->cu_func_uchar2, 2,
  174. histogram + 256, in->data[1], in->width / 2, in->height / 2, in->linesize[1], 1);
  175. break;
  176. case AV_PIX_FMT_YUV420P:
  177. thumbnail_kernel(ctx, s->cu_func_uchar, 1,
  178. histogram, in->data[0], in->width, in->height, in->linesize[0], 1);
  179. thumbnail_kernel(ctx, s->cu_func_uchar, 1,
  180. histogram + 256, in->data[1], in->width / 2, in->height / 2, in->linesize[1], 1);
  181. thumbnail_kernel(ctx, s->cu_func_uchar, 1,
  182. histogram + 512, in->data[2], in->width / 2, in->height / 2, in->linesize[2], 1);
  183. break;
  184. case AV_PIX_FMT_YUV444P:
  185. thumbnail_kernel(ctx, s->cu_func_uchar, 1,
  186. histogram, in->data[0], in->width, in->height, in->linesize[0], 1);
  187. thumbnail_kernel(ctx, s->cu_func_uchar, 1,
  188. histogram + 256, in->data[1], in->width, in->height, in->linesize[1], 1);
  189. thumbnail_kernel(ctx, s->cu_func_uchar, 1,
  190. histogram + 512, in->data[2], in->width, in->height, in->linesize[2], 1);
  191. break;
  192. case AV_PIX_FMT_P010LE:
  193. case AV_PIX_FMT_P016LE:
  194. thumbnail_kernel(ctx, s->cu_func_ushort, 1,
  195. histogram, in->data[0], in->width, in->height, in->linesize[0], 2);
  196. thumbnail_kernel(ctx, s->cu_func_ushort2, 2,
  197. histogram + 256, in->data[1], in->width / 2, in->height / 2, in->linesize[1], 2);
  198. break;
  199. case AV_PIX_FMT_YUV444P16:
  200. thumbnail_kernel(ctx, s->cu_func_ushort2, 1,
  201. histogram, in->data[0], in->width, in->height, in->linesize[0], 2);
  202. thumbnail_kernel(ctx, s->cu_func_ushort2, 1,
  203. histogram + 256, in->data[1], in->width, in->height, in->linesize[1], 2);
  204. thumbnail_kernel(ctx, s->cu_func_ushort2, 1,
  205. histogram + 512, in->data[2], in->width, in->height, in->linesize[2], 2);
  206. break;
  207. default:
  208. return AVERROR_BUG;
  209. }
  210. return 0;
  211. }
  212. static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
  213. {
  214. AVFilterContext *ctx = inlink->dst;
  215. ThumbnailCudaContext *s = ctx->priv;
  216. CudaFunctions *cu = s->hwctx->internal->cuda_dl;
  217. AVFilterLink *outlink = ctx->outputs[0];
  218. int *hist = s->frames[s->n].histogram;
  219. AVHWFramesContext *hw_frames_ctx = (AVHWFramesContext*)s->hw_frames_ctx->data;
  220. CUcontext dummy;
  221. CUDA_MEMCPY2D cpy = { 0 };
  222. int ret = 0;
  223. // keep a reference of each frame
  224. s->frames[s->n].buf = frame;
  225. ret = CHECK_CU(cu->cuCtxPushCurrent(s->hwctx->cuda_ctx));
  226. if (ret < 0)
  227. return ret;
  228. CHECK_CU(cu->cuMemsetD8Async(s->data, 0, HIST_SIZE * sizeof(int), s->cu_stream));
  229. thumbnail(ctx, (int*)s->data, frame);
  230. cpy.srcMemoryType = CU_MEMORYTYPE_DEVICE;
  231. cpy.dstMemoryType = CU_MEMORYTYPE_HOST;
  232. cpy.srcDevice = s->data;
  233. cpy.dstHost = hist;
  234. cpy.srcPitch = HIST_SIZE * sizeof(int);
  235. cpy.dstPitch = HIST_SIZE * sizeof(int);
  236. cpy.WidthInBytes = HIST_SIZE * sizeof(int);
  237. cpy.Height = 1;
  238. ret = CHECK_CU(cu->cuMemcpy2DAsync(&cpy, s->cu_stream));
  239. if (ret < 0)
  240. return ret;
  241. if (hw_frames_ctx->sw_format == AV_PIX_FMT_NV12 || hw_frames_ctx->sw_format == AV_PIX_FMT_YUV420P ||
  242. hw_frames_ctx->sw_format == AV_PIX_FMT_P010LE || hw_frames_ctx->sw_format == AV_PIX_FMT_P016LE)
  243. {
  244. int i;
  245. for (i = 256; i < HIST_SIZE; i++)
  246. hist[i] = 4 * hist[i];
  247. }
  248. CHECK_CU(cu->cuCtxPopCurrent(&dummy));
  249. if (ret < 0)
  250. return ret;
  251. // no selection until the buffer of N frames is filled up
  252. s->n++;
  253. if (s->n < s->n_frames)
  254. return 0;
  255. return ff_filter_frame(outlink, get_best_frame(ctx));
  256. }
  257. static av_cold void uninit(AVFilterContext *ctx)
  258. {
  259. int i;
  260. ThumbnailCudaContext *s = ctx->priv;
  261. CudaFunctions *cu = s->hwctx->internal->cuda_dl;
  262. if (s->data) {
  263. CHECK_CU(cu->cuMemFree(s->data));
  264. s->data = 0;
  265. }
  266. if (s->cu_module) {
  267. CHECK_CU(cu->cuModuleUnload(s->cu_module));
  268. s->cu_module = NULL;
  269. }
  270. for (i = 0; i < s->n_frames && s->frames[i].buf; i++)
  271. av_frame_free(&s->frames[i].buf);
  272. av_freep(&s->frames);
  273. }
  274. static int request_frame(AVFilterLink *link)
  275. {
  276. AVFilterContext *ctx = link->src;
  277. ThumbnailCudaContext *s = ctx->priv;
  278. int ret = ff_request_frame(ctx->inputs[0]);
  279. if (ret == AVERROR_EOF && s->n) {
  280. ret = ff_filter_frame(link, get_best_frame(ctx));
  281. if (ret < 0)
  282. return ret;
  283. ret = AVERROR_EOF;
  284. }
  285. if (ret < 0)
  286. return ret;
  287. return 0;
  288. }
  289. static int format_is_supported(enum AVPixelFormat fmt)
  290. {
  291. int i;
  292. for (i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++)
  293. if (supported_formats[i] == fmt)
  294. return 1;
  295. return 0;
  296. }
  297. static int config_props(AVFilterLink *inlink)
  298. {
  299. AVFilterContext *ctx = inlink->dst;
  300. ThumbnailCudaContext *s = ctx->priv;
  301. AVHWFramesContext *hw_frames_ctx = (AVHWFramesContext*)inlink->hw_frames_ctx->data;
  302. AVCUDADeviceContext *device_hwctx = hw_frames_ctx->device_ctx->hwctx;
  303. CUcontext dummy, cuda_ctx = device_hwctx->cuda_ctx;
  304. CudaFunctions *cu = device_hwctx->internal->cuda_dl;
  305. int ret;
  306. extern char vf_thumbnail_cuda_ptx[];
  307. s->hwctx = device_hwctx;
  308. s->cu_stream = s->hwctx->stream;
  309. ret = CHECK_CU(cu->cuCtxPushCurrent(cuda_ctx));
  310. if (ret < 0)
  311. return ret;
  312. ret = CHECK_CU(cu->cuModuleLoadData(&s->cu_module, vf_thumbnail_cuda_ptx));
  313. if (ret < 0)
  314. return ret;
  315. ret = CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_uchar, s->cu_module, "Thumbnail_uchar"));
  316. if (ret < 0)
  317. return ret;
  318. ret = CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_uchar2, s->cu_module, "Thumbnail_uchar2"));
  319. if (ret < 0)
  320. return ret;
  321. ret = CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_ushort, s->cu_module, "Thumbnail_ushort"));
  322. if (ret < 0)
  323. return ret;
  324. ret = CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_ushort2, s->cu_module, "Thumbnail_ushort2"));
  325. if (ret < 0)
  326. return ret;
  327. ret = CHECK_CU(cu->cuMemAlloc(&s->data, HIST_SIZE * sizeof(int)));
  328. if (ret < 0)
  329. return ret;
  330. CHECK_CU(cu->cuCtxPopCurrent(&dummy));
  331. s->hw_frames_ctx = ctx->inputs[0]->hw_frames_ctx;
  332. ctx->outputs[0]->hw_frames_ctx = av_buffer_ref(s->hw_frames_ctx);
  333. if (!ctx->outputs[0]->hw_frames_ctx)
  334. return AVERROR(ENOMEM);
  335. s->tb = inlink->time_base;
  336. if (!format_is_supported(hw_frames_ctx->sw_format)) {
  337. av_log(ctx, AV_LOG_ERROR, "Unsupported input format: %s\n", av_get_pix_fmt_name(hw_frames_ctx->sw_format));
  338. return AVERROR(ENOSYS);
  339. }
  340. return 0;
  341. }
  342. static int query_formats(AVFilterContext *ctx)
  343. {
  344. static const enum AVPixelFormat pix_fmts[] = {
  345. AV_PIX_FMT_CUDA,
  346. AV_PIX_FMT_NONE
  347. };
  348. AVFilterFormats *fmts_list = ff_make_format_list(pix_fmts);
  349. if (!fmts_list)
  350. return AVERROR(ENOMEM);
  351. return ff_set_common_formats(ctx, fmts_list);
  352. }
  353. static const AVFilterPad thumbnail_cuda_inputs[] = {
  354. {
  355. .name = "default",
  356. .type = AVMEDIA_TYPE_VIDEO,
  357. .config_props = config_props,
  358. .filter_frame = filter_frame,
  359. },
  360. { NULL }
  361. };
  362. static const AVFilterPad thumbnail_cuda_outputs[] = {
  363. {
  364. .name = "default",
  365. .type = AVMEDIA_TYPE_VIDEO,
  366. .request_frame = request_frame,
  367. },
  368. { NULL }
  369. };
  370. AVFilter ff_vf_thumbnail_cuda = {
  371. .name = "thumbnail_cuda",
  372. .description = NULL_IF_CONFIG_SMALL("Select the most representative frame in a given sequence of consecutive frames."),
  373. .priv_size = sizeof(ThumbnailCudaContext),
  374. .init = init,
  375. .uninit = uninit,
  376. .query_formats = query_formats,
  377. .inputs = thumbnail_cuda_inputs,
  378. .outputs = thumbnail_cuda_outputs,
  379. .priv_class = &thumbnail_cuda_class,
  380. .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
  381. };