vf_transpose_npp.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487
  1. /*
  2. * This file is part of FFmpeg.
  3. *
  4. * FFmpeg is free software; you can redistribute it and/or
  5. * modify it under the terms of the GNU Lesser General Public
  6. * License as published by the Free Software Foundation; either
  7. * version 2.1 of the License, or (at your option) any later version.
  8. *
  9. * FFmpeg is distributed in the hope that it will be useful,
  10. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  12. * Lesser General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU Lesser General Public
  15. * License along with FFmpeg; if not, write to the Free Software
  16. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  17. */
  18. #include <nppi.h>
  19. #include <stdio.h>
  20. #include <string.h>
  21. #include "libavutil/common.h"
  22. #include "libavutil/hwcontext.h"
  23. #include "libavutil/hwcontext_cuda_internal.h"
  24. #include "libavutil/cuda_check.h"
  25. #include "libavutil/internal.h"
  26. #include "libavutil/opt.h"
  27. #include "libavutil/pixdesc.h"
  28. #include "avfilter.h"
  29. #include "formats.h"
  30. #include "internal.h"
  31. #include "video.h"
  32. #define CHECK_CU(x) FF_CUDA_CHECK_DL(ctx, device_hwctx->internal->cuda_dl, x)
  33. static const enum AVPixelFormat supported_formats[] = {
  34. AV_PIX_FMT_YUV420P,
  35. AV_PIX_FMT_YUV444P
  36. };
  37. enum TransposeStage {
  38. STAGE_ROTATE,
  39. STAGE_TRANSPOSE,
  40. STAGE_NB
  41. };
  42. enum Transpose {
  43. NPP_TRANSPOSE_CCLOCK_FLIP = 0,
  44. NPP_TRANSPOSE_CLOCK = 1,
  45. NPP_TRANSPOSE_CCLOCK = 2,
  46. NPP_TRANSPOSE_CLOCK_FLIP = 3
  47. };
  48. enum Passthrough {
  49. NPP_TRANSPOSE_PT_TYPE_NONE = 0,
  50. NPP_TRANSPOSE_PT_TYPE_LANDSCAPE,
  51. NPP_TRANSPOSE_PT_TYPE_PORTRAIT
  52. };
  53. typedef struct NPPTransposeStageContext {
  54. int stage_needed;
  55. enum AVPixelFormat in_fmt;
  56. enum AVPixelFormat out_fmt;
  57. struct {
  58. int width;
  59. int height;
  60. } planes_in[3], planes_out[3];
  61. AVBufferRef *frames_ctx;
  62. AVFrame *frame;
  63. } NPPTransposeStageContext;
  64. typedef struct NPPTransposeContext {
  65. const AVClass *class;
  66. NPPTransposeStageContext stages[STAGE_NB];
  67. AVFrame *tmp_frame;
  68. int passthrough; ///< PassthroughType, landscape passthrough mode enabled
  69. int dir; ///< TransposeDir
  70. } NPPTransposeContext;
  71. static int npptranspose_init(AVFilterContext *ctx)
  72. {
  73. NPPTransposeContext *s = ctx->priv;
  74. int i;
  75. for (i = 0; i < FF_ARRAY_ELEMS(s->stages); i++) {
  76. s->stages[i].frame = av_frame_alloc();
  77. if (!s->stages[i].frame)
  78. return AVERROR(ENOMEM);
  79. }
  80. s->tmp_frame = av_frame_alloc();
  81. if (!s->tmp_frame)
  82. return AVERROR(ENOMEM);
  83. return 0;
  84. }
  85. static void npptranspose_uninit(AVFilterContext *ctx)
  86. {
  87. NPPTransposeContext *s = ctx->priv;
  88. int i;
  89. for (i = 0; i < FF_ARRAY_ELEMS(s->stages); i++) {
  90. av_frame_free(&s->stages[i].frame);
  91. av_buffer_unref(&s->stages[i].frames_ctx);
  92. }
  93. av_frame_free(&s->tmp_frame);
  94. }
  95. static int npptranspose_query_formats(AVFilterContext *ctx)
  96. {
  97. static const enum AVPixelFormat pixel_formats[] = {
  98. AV_PIX_FMT_CUDA, AV_PIX_FMT_NONE,
  99. };
  100. AVFilterFormats *pix_fmts = ff_make_format_list(pixel_formats);
  101. return ff_set_common_formats(ctx, pix_fmts);
  102. }
  103. static int init_stage(NPPTransposeStageContext *stage, AVBufferRef *device_ctx)
  104. {
  105. AVBufferRef *out_ref = NULL;
  106. AVHWFramesContext *out_ctx;
  107. int in_sw, in_sh, out_sw, out_sh;
  108. int ret, i;
  109. av_pix_fmt_get_chroma_sub_sample(stage->in_fmt, &in_sw, &in_sh);
  110. av_pix_fmt_get_chroma_sub_sample(stage->out_fmt, &out_sw, &out_sh);
  111. if (!stage->planes_out[0].width) {
  112. stage->planes_out[0].width = stage->planes_in[0].width;
  113. stage->planes_out[0].height = stage->planes_in[0].height;
  114. }
  115. for (i = 1; i < FF_ARRAY_ELEMS(stage->planes_in); i++) {
  116. stage->planes_in[i].width = stage->planes_in[0].width >> in_sw;
  117. stage->planes_in[i].height = stage->planes_in[0].height >> in_sh;
  118. stage->planes_out[i].width = stage->planes_out[0].width >> out_sw;
  119. stage->planes_out[i].height = stage->planes_out[0].height >> out_sh;
  120. }
  121. out_ref = av_hwframe_ctx_alloc(device_ctx);
  122. if (!out_ref)
  123. return AVERROR(ENOMEM);
  124. out_ctx = (AVHWFramesContext*)out_ref->data;
  125. out_ctx->format = AV_PIX_FMT_CUDA;
  126. out_ctx->sw_format = stage->out_fmt;
  127. out_ctx->width = FFALIGN(stage->planes_out[0].width, 32);
  128. out_ctx->height = FFALIGN(stage->planes_out[0].height, 32);
  129. ret = av_hwframe_ctx_init(out_ref);
  130. if (ret < 0)
  131. goto fail;
  132. av_frame_unref(stage->frame);
  133. ret = av_hwframe_get_buffer(out_ref, stage->frame, 0);
  134. if (ret < 0)
  135. goto fail;
  136. stage->frame->width = stage->planes_out[0].width;
  137. stage->frame->height = stage->planes_out[0].height;
  138. av_buffer_unref(&stage->frames_ctx);
  139. stage->frames_ctx = out_ref;
  140. return 0;
  141. fail:
  142. av_buffer_unref(&out_ref);
  143. return ret;
  144. }
  145. static int format_is_supported(enum AVPixelFormat fmt)
  146. {
  147. int i;
  148. for (i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++)
  149. if (supported_formats[i] == fmt)
  150. return 1;
  151. return 0;
  152. }
  153. static int init_processing_chain(AVFilterContext *ctx, int in_width, int in_height,
  154. int out_width, int out_height)
  155. {
  156. NPPTransposeContext *s = ctx->priv;
  157. AVHWFramesContext *in_frames_ctx;
  158. enum AVPixelFormat format;
  159. int i, ret, last_stage = -1;
  160. int rot_width = out_width, rot_height = out_height;
  161. /* check that we have a hw context */
  162. if (!ctx->inputs[0]->hw_frames_ctx) {
  163. av_log(ctx, AV_LOG_ERROR, "No hw context provided on input\n");
  164. return AVERROR(EINVAL);
  165. }
  166. in_frames_ctx = (AVHWFramesContext*)ctx->inputs[0]->hw_frames_ctx->data;
  167. format = in_frames_ctx->sw_format;
  168. if (!format_is_supported(format)) {
  169. av_log(ctx, AV_LOG_ERROR, "Unsupported input format: %s\n",
  170. av_get_pix_fmt_name(format));
  171. return AVERROR(ENOSYS);
  172. }
  173. if (s->dir != NPP_TRANSPOSE_CCLOCK_FLIP) {
  174. s->stages[STAGE_ROTATE].stage_needed = 1;
  175. }
  176. if (s->dir == NPP_TRANSPOSE_CCLOCK_FLIP || s->dir == NPP_TRANSPOSE_CLOCK_FLIP) {
  177. s->stages[STAGE_TRANSPOSE].stage_needed = 1;
  178. /* Rotating by 180° in case of clock_flip, or not at all for cclock_flip, so width/height unchanged by rotation */
  179. rot_width = in_width;
  180. rot_height = in_height;
  181. }
  182. s->stages[STAGE_ROTATE].in_fmt = format;
  183. s->stages[STAGE_ROTATE].out_fmt = format;
  184. s->stages[STAGE_ROTATE].planes_in[0].width = in_width;
  185. s->stages[STAGE_ROTATE].planes_in[0].height = in_height;
  186. s->stages[STAGE_ROTATE].planes_out[0].width = rot_width;
  187. s->stages[STAGE_ROTATE].planes_out[0].height = rot_height;
  188. s->stages[STAGE_TRANSPOSE].in_fmt = format;
  189. s->stages[STAGE_TRANSPOSE].out_fmt = format;
  190. s->stages[STAGE_TRANSPOSE].planes_in[0].width = rot_width;
  191. s->stages[STAGE_TRANSPOSE].planes_in[0].height = rot_height;
  192. s->stages[STAGE_TRANSPOSE].planes_out[0].width = out_width;
  193. s->stages[STAGE_TRANSPOSE].planes_out[0].height = out_height;
  194. /* init the hardware contexts */
  195. for (i = 0; i < FF_ARRAY_ELEMS(s->stages); i++) {
  196. if (!s->stages[i].stage_needed)
  197. continue;
  198. ret = init_stage(&s->stages[i], in_frames_ctx->device_ref);
  199. if (ret < 0)
  200. return ret;
  201. last_stage = i;
  202. }
  203. if (last_stage >= 0) {
  204. ctx->outputs[0]->hw_frames_ctx = av_buffer_ref(s->stages[last_stage].frames_ctx);
  205. } else {
  206. ctx->outputs[0]->hw_frames_ctx = av_buffer_ref(ctx->inputs[0]->hw_frames_ctx);
  207. s->passthrough = 1;
  208. }
  209. if (!ctx->outputs[0]->hw_frames_ctx)
  210. return AVERROR(ENOMEM);
  211. return 0;
  212. }
  213. static int npptranspose_config_props(AVFilterLink *outlink)
  214. {
  215. AVFilterContext *ctx = outlink->src;
  216. AVFilterLink *inlink = ctx->inputs[0];
  217. NPPTransposeContext *s = ctx->priv;
  218. int ret;
  219. if ((inlink->w >= inlink->h && s->passthrough == NPP_TRANSPOSE_PT_TYPE_LANDSCAPE) ||
  220. (inlink->w <= inlink->h && s->passthrough == NPP_TRANSPOSE_PT_TYPE_PORTRAIT))
  221. {
  222. if (inlink->hw_frames_ctx) {
  223. outlink->hw_frames_ctx = av_buffer_ref(inlink->hw_frames_ctx);
  224. if (!outlink->hw_frames_ctx)
  225. return AVERROR(ENOMEM);
  226. }
  227. av_log(ctx, AV_LOG_VERBOSE,
  228. "w:%d h:%d -> w:%d h:%d (passthrough mode)\n",
  229. inlink->w, inlink->h, inlink->w, inlink->h);
  230. return 0;
  231. } else {
  232. s->passthrough = NPP_TRANSPOSE_PT_TYPE_NONE;
  233. }
  234. outlink->w = inlink->h;
  235. outlink->h = inlink->w;
  236. outlink->sample_aspect_ratio = (AVRational){inlink->sample_aspect_ratio.den, inlink->sample_aspect_ratio.num};
  237. ret = init_processing_chain(ctx, inlink->w, inlink->h, outlink->w, outlink->h);
  238. if (ret < 0)
  239. return ret;
  240. av_log(ctx, AV_LOG_VERBOSE, "w:%d h:%d -transpose-> w:%d h:%d\n",
  241. inlink->w, inlink->h, outlink->w, outlink->h);
  242. return 0;
  243. }
  244. static int npptranspose_rotate(AVFilterContext *ctx, NPPTransposeStageContext *stage,
  245. AVFrame *out, AVFrame *in)
  246. {
  247. NPPTransposeContext *s = ctx->priv;
  248. NppStatus err;
  249. int i;
  250. for (i = 0; i < FF_ARRAY_ELEMS(stage->planes_in) && i < FF_ARRAY_ELEMS(in->data) && in->data[i]; i++) {
  251. int iw = stage->planes_in[i].width;
  252. int ih = stage->planes_in[i].height;
  253. int ow = stage->planes_out[i].width;
  254. int oh = stage->planes_out[i].height;
  255. // nppRotate uses 0,0 as the rotation point
  256. // need to shift the image accordingly after rotation
  257. // need to substract 1 to get the correct coordinates
  258. double angle = s->dir == NPP_TRANSPOSE_CLOCK ? -90.0 : s->dir == NPP_TRANSPOSE_CCLOCK ? 90.0 : 180.0;
  259. int shiftw = (s->dir == NPP_TRANSPOSE_CLOCK || s->dir == NPP_TRANSPOSE_CLOCK_FLIP) ? ow - 1 : 0;
  260. int shifth = (s->dir == NPP_TRANSPOSE_CCLOCK || s->dir == NPP_TRANSPOSE_CLOCK_FLIP) ? oh - 1 : 0;
  261. err = nppiRotate_8u_C1R(in->data[i], (NppiSize){ iw, ih },
  262. in->linesize[i], (NppiRect){ 0, 0, iw, ih },
  263. out->data[i], out->linesize[i],
  264. (NppiRect){ 0, 0, ow, oh },
  265. angle, shiftw, shifth, NPPI_INTER_NN);
  266. if (err != NPP_SUCCESS) {
  267. av_log(ctx, AV_LOG_ERROR, "NPP rotate error: %d\n", err);
  268. return AVERROR_UNKNOWN;
  269. }
  270. }
  271. return 0;
  272. }
  273. static int npptranspose_transpose(AVFilterContext *ctx, NPPTransposeStageContext *stage,
  274. AVFrame *out, AVFrame *in)
  275. {
  276. NppStatus err;
  277. int i;
  278. for (i = 0; i < FF_ARRAY_ELEMS(stage->planes_in) && i < FF_ARRAY_ELEMS(in->data) && in->data[i]; i++) {
  279. int iw = stage->planes_in[i].width;
  280. int ih = stage->planes_in[i].height;
  281. err = nppiTranspose_8u_C1R(in->data[i], in->linesize[i],
  282. out->data[i], out->linesize[i],
  283. (NppiSize){ iw, ih });
  284. if (err != NPP_SUCCESS) {
  285. av_log(ctx, AV_LOG_ERROR, "NPP transpose error: %d\n", err);
  286. return AVERROR_UNKNOWN;
  287. }
  288. }
  289. return 0;
  290. }
  291. static int (*const npptranspose_process[])(AVFilterContext *ctx, NPPTransposeStageContext *stage,
  292. AVFrame *out, AVFrame *in) = {
  293. [STAGE_ROTATE] = npptranspose_rotate,
  294. [STAGE_TRANSPOSE] = npptranspose_transpose
  295. };
  296. static int npptranspose_filter(AVFilterContext *ctx, AVFrame *out, AVFrame *in)
  297. {
  298. NPPTransposeContext *s = ctx->priv;
  299. AVFrame *src = in;
  300. int i, ret, last_stage = -1;
  301. for (i = 0; i < FF_ARRAY_ELEMS(s->stages); i++) {
  302. if (!s->stages[i].stage_needed)
  303. continue;
  304. ret = npptranspose_process[i](ctx, &s->stages[i], s->stages[i].frame, src);
  305. if (ret < 0)
  306. return ret;
  307. src = s->stages[i].frame;
  308. last_stage = i;
  309. }
  310. if (last_stage < 0)
  311. return AVERROR_BUG;
  312. ret = av_hwframe_get_buffer(src->hw_frames_ctx, s->tmp_frame, 0);
  313. if (ret < 0)
  314. return ret;
  315. av_frame_move_ref(out, src);
  316. av_frame_move_ref(src, s->tmp_frame);
  317. ret = av_frame_copy_props(out, in);
  318. if (ret < 0)
  319. return ret;
  320. return 0;
  321. }
  322. static int npptranspose_filter_frame(AVFilterLink *link, AVFrame *in)
  323. {
  324. AVFilterContext *ctx = link->dst;
  325. NPPTransposeContext *s = ctx->priv;
  326. AVFilterLink *outlink = ctx->outputs[0];
  327. AVHWFramesContext *frames_ctx = (AVHWFramesContext*)outlink->hw_frames_ctx->data;
  328. AVCUDADeviceContext *device_hwctx = frames_ctx->device_ctx->hwctx;
  329. AVFrame *out = NULL;
  330. CUcontext dummy;
  331. int ret = 0;
  332. if (s->passthrough)
  333. return ff_filter_frame(outlink, in);
  334. out = av_frame_alloc();
  335. if (!out) {
  336. ret = AVERROR(ENOMEM);
  337. goto fail;
  338. }
  339. ret = CHECK_CU(device_hwctx->internal->cuda_dl->cuCtxPushCurrent(device_hwctx->cuda_ctx));
  340. if (ret < 0)
  341. goto fail;
  342. ret = npptranspose_filter(ctx, out, in);
  343. CHECK_CU(device_hwctx->internal->cuda_dl->cuCtxPopCurrent(&dummy));
  344. if (ret < 0)
  345. goto fail;
  346. av_frame_free(&in);
  347. return ff_filter_frame(outlink, out);
  348. fail:
  349. av_frame_free(&in);
  350. av_frame_free(&out);
  351. return ret;
  352. }
  353. #define OFFSET(x) offsetof(NPPTransposeContext, x)
  354. #define FLAGS (AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM)
  355. static const AVOption options[] = {
  356. { "dir", "set transpose direction", OFFSET(dir), AV_OPT_TYPE_INT, { .i64 = NPP_TRANSPOSE_CCLOCK_FLIP }, 0, 3, FLAGS, "dir" },
  357. { "cclock_flip", "rotate counter-clockwise with vertical flip", 0, AV_OPT_TYPE_CONST, { .i64 = NPP_TRANSPOSE_CCLOCK_FLIP }, 0, 0, FLAGS, "dir" },
  358. { "clock", "rotate clockwise", 0, AV_OPT_TYPE_CONST, { .i64 = NPP_TRANSPOSE_CLOCK }, 0, 0, FLAGS, "dir" },
  359. { "cclock", "rotate counter-clockwise", 0, AV_OPT_TYPE_CONST, { .i64 = NPP_TRANSPOSE_CCLOCK }, 0, 0, FLAGS, "dir" },
  360. { "clock_flip", "rotate clockwise with vertical flip", 0, AV_OPT_TYPE_CONST, { .i64 = NPP_TRANSPOSE_CLOCK_FLIP }, 0, 0, FLAGS, "dir" },
  361. { "passthrough", "do not apply transposition if the input matches the specified geometry", OFFSET(passthrough), AV_OPT_TYPE_INT, { .i64 = NPP_TRANSPOSE_PT_TYPE_NONE }, 0, 2, FLAGS, "passthrough" },
  362. { "none", "always apply transposition", 0, AV_OPT_TYPE_CONST, { .i64 = NPP_TRANSPOSE_PT_TYPE_NONE }, 0, 0, FLAGS, "passthrough" },
  363. { "landscape", "preserve landscape geometry", 0, AV_OPT_TYPE_CONST, { .i64 = NPP_TRANSPOSE_PT_TYPE_LANDSCAPE }, 0, 0, FLAGS, "passthrough" },
  364. { "portrait", "preserve portrait geometry", 0, AV_OPT_TYPE_CONST, { .i64 = NPP_TRANSPOSE_PT_TYPE_PORTRAIT }, 0, 0, FLAGS, "passthrough" },
  365. { NULL },
  366. };
  367. static const AVClass npptranspose_class = {
  368. .class_name = "npptranspose",
  369. .item_name = av_default_item_name,
  370. .option = options,
  371. .version = LIBAVUTIL_VERSION_INT,
  372. };
  373. static const AVFilterPad npptranspose_inputs[] = {
  374. {
  375. .name = "default",
  376. .type = AVMEDIA_TYPE_VIDEO,
  377. .filter_frame = npptranspose_filter_frame,
  378. },
  379. { NULL }
  380. };
  381. static const AVFilterPad npptranspose_outputs[] = {
  382. {
  383. .name = "default",
  384. .type = AVMEDIA_TYPE_VIDEO,
  385. .config_props = npptranspose_config_props,
  386. },
  387. { NULL }
  388. };
  389. AVFilter ff_vf_transpose_npp = {
  390. .name = "transpose_npp",
  391. .description = NULL_IF_CONFIG_SMALL("NVIDIA Performance Primitives video transpose"),
  392. .init = npptranspose_init,
  393. .uninit = npptranspose_uninit,
  394. .query_formats = npptranspose_query_formats,
  395. .priv_size = sizeof(NPPTransposeContext),
  396. .priv_class = &npptranspose_class,
  397. .inputs = npptranspose_inputs,
  398. .outputs = npptranspose_outputs,
  399. .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
  400. };