* commit '8a02a8031ef4f98faf5647f0e01a8922247bf748':
lavfi: add an NVIDIA NPP-based scaling filter
Merged-by: Derek Buitenhuis <derek.buitenhuis@gmail.com>
... | ... |
@@ -234,6 +234,7 @@ External library support: |
234 | 234 |
--enable-libmp3lame enable MP3 encoding via libmp3lame [no] |
235 | 235 |
--enable-libnut enable NUT (de)muxing via libnut, |
236 | 236 |
native (de)muxer exists [no] |
237 |
+ --enable-libnpp enable NVIDIA Performance Primitives-based code [no] |
|
237 | 238 |
--enable-libopencore-amrnb enable AMR-NB de/encoding via libopencore-amrnb [no] |
238 | 239 |
--enable-libopencore-amrwb enable AMR-WB decoding via libopencore-amrwb [no] |
239 | 240 |
--enable-libopencv enable video filtering via libopencv [no] |
... | ... |
@@ -1482,6 +1483,7 @@ EXTERNAL_LIBRARY_LIST=" |
1482 | 1482 |
libmodplug |
1483 | 1483 |
libmp3lame |
1484 | 1484 |
libnut |
1485 |
+ libnpp |
|
1485 | 1486 |
libopencore_amrnb |
1486 | 1487 |
libopencore_amrwb |
1487 | 1488 |
libopencv |
... | ... |
@@ -3002,6 +3004,7 @@ vidstabtransform_filter_deps="libvidstab" |
3002 | 3002 |
zmq_filter_deps="libzmq" |
3003 | 3003 |
zoompan_filter_deps="swscale" |
3004 | 3004 |
zscale_filter_deps="libzimg" |
3005 |
+scale_npp_filter_deps="cuda libnpp" |
|
3005 | 3006 |
scale_vaapi_filter_deps="vaapi VAProcPipelineParameterBuffer" |
3006 | 3007 |
|
3007 | 3008 |
# examples |
... | ... |
@@ -4959,6 +4962,7 @@ die_license_disabled gpl x11grab |
4959 | 4959 |
|
4960 | 4960 |
die_license_disabled nonfree cuda |
4961 | 4961 |
die_license_disabled nonfree libfaac |
4962 |
+die_license_disabled nonfree libnpp |
|
4962 | 4963 |
die_license_disabled nonfree nvenc |
4963 | 4964 |
enabled gpl && die_license_disabled_gpl nonfree libfdk_aac |
4964 | 4965 |
enabled gpl && die_license_disabled_gpl nonfree openssl |
... | ... |
@@ -5567,6 +5571,7 @@ enabled libmfx && require_pkg_config libmfx "mfx/mfxvideo.h" MFXInit |
5567 | 5567 |
enabled libmodplug && require_pkg_config libmodplug libmodplug/modplug.h ModPlug_Load |
5568 | 5568 |
enabled libmp3lame && require "libmp3lame >= 3.98.3" lame/lame.h lame_set_VBR_quality -lmp3lame |
5569 | 5569 |
enabled libnut && require libnut libnut.h nut_demuxer_init -lnut |
5570 |
+enabled libnpp && require libnpp npp.h nppGetLibVersion -lnppi -lnppc |
|
5570 | 5571 |
enabled libopencore_amrnb && require libopencore_amrnb opencore-amrnb/interf_dec.h Decoder_Interface_init -lopencore-amrnb |
5571 | 5572 |
enabled libopencore_amrwb && require libopencore_amrwb opencore-amrwb/dec_if.h D_IF_init -lopencore-amrwb |
5572 | 5573 |
enabled libopencv && { check_header opencv2/core/core_c.h && |
... | ... |
@@ -3395,6 +3395,47 @@ channels. Default is 0.3. |
3395 | 3395 |
Set level of input signal of original channel. Default is 0.8. |
3396 | 3396 |
@end table |
3397 | 3397 |
|
3398 |
+@section scale_npp |
|
3399 |
+ |
|
3400 |
+Use the NVIDIA Performance Primitives (libnpp) to perform scaling and/or pixel |
|
3401 |
+format conversion on CUDA video frames. Setting the output width and height |
|
3402 |
+works in the same way as for the @var{scale} filter. |
|
3403 |
+ |
|
3404 |
+The following additional options are accepted: |
|
3405 |
+@table @option |
|
3406 |
+@item format |
|
3407 |
+The pixel format of the output CUDA frames. If set to the string "same" (the |
|
3408 |
+default), the input format will be kept. Note that automatic format negotiation |
|
3409 |
+and conversion is not yet supported for hardware frames |
|
3410 |
+ |
|
3411 |
+@item interp_algo |
|
3412 |
+The interpolation algorithm used for resizing. One of the following: |
|
3413 |
+@table @option |
|
3414 |
+@item nn |
|
3415 |
+Nearest neighbour. |
|
3416 |
+ |
|
3417 |
+@item linear |
|
3418 |
+@item cubic |
|
3419 |
+@item cubic2p_bspline |
|
3420 |
+2-parameter cubic (B=1, C=0) |
|
3421 |
+ |
|
3422 |
+@item cubic2p_catmullrom |
|
3423 |
+2-parameter cubic (B=0, C=1/2) |
|
3424 |
+ |
|
3425 |
+@item cubic2p_b05c03 |
|
3426 |
+2-parameter cubic (B=1/2, C=3/10) |
|
3427 |
+ |
|
3428 |
+@item super |
|
3429 |
+Supersampling |
|
3430 |
+ |
|
3431 |
+@item lanczos |
|
3432 |
+@end table |
|
3433 |
+ |
|
3434 |
+@end table |
|
3435 |
+ |
|
3436 |
+@section select |
|
3437 |
+Select frames to pass in output. |
|
3438 |
+ |
|
3398 | 3439 |
@section treble |
3399 | 3440 |
|
3400 | 3441 |
Boost or cut treble (upper) frequencies of the audio using a two-pole |
... | ... |
@@ -242,6 +242,7 @@ OBJS-$(CONFIG_SCALE2REF_FILTER) += vf_scale.o |
242 | 242 |
OBJS-$(CONFIG_SELECT_FILTER) += f_select.o |
243 | 243 |
OBJS-$(CONFIG_SELECTIVECOLOR_FILTER) += vf_selectivecolor.o |
244 | 244 |
OBJS-$(CONFIG_SENDCMD_FILTER) += f_sendcmd.o |
245 |
+OBJS-$(CONFIG_SCALE_NPP_FILTER) += vf_scale_npp.o |
|
245 | 246 |
OBJS-$(CONFIG_SCALE_VAAPI_FILTER) += vf_scale_vaapi.o |
246 | 247 |
OBJS-$(CONFIG_SETDAR_FILTER) += vf_aspect.o |
247 | 248 |
OBJS-$(CONFIG_SETFIELD_FILTER) += vf_setfield.o |
... | ... |
@@ -258,6 +258,7 @@ void avfilter_register_all(void) |
258 | 258 |
REGISTER_FILTER(SAB, sab, vf); |
259 | 259 |
REGISTER_FILTER(SCALE, scale, vf); |
260 | 260 |
REGISTER_FILTER(SCALE2REF, scale2ref, vf); |
261 |
+ REGISTER_FILTER(SCALE_NPP, scale_npp, vf); |
|
261 | 262 |
REGISTER_FILTER(SCALE_VAAPI, scale_vaapi, vf); |
262 | 263 |
REGISTER_FILTER(SELECT, select, vf); |
263 | 264 |
REGISTER_FILTER(SELECTIVECOLOR, selectivecolor, vf); |
... | ... |
@@ -30,8 +30,8 @@ |
30 | 30 |
#include "libavutil/version.h" |
31 | 31 |
|
32 | 32 |
#define LIBAVFILTER_VERSION_MAJOR 6 |
33 |
-#define LIBAVFILTER_VERSION_MINOR 43 |
|
34 |
-#define LIBAVFILTER_VERSION_MICRO 101 |
|
33 |
+#define LIBAVFILTER_VERSION_MINOR 44 |
|
34 |
+#define LIBAVFILTER_VERSION_MICRO 100 |
|
35 | 35 |
|
36 | 36 |
#define LIBAVFILTER_VERSION_INT AV_VERSION_INT(LIBAVFILTER_VERSION_MAJOR, \ |
37 | 37 |
LIBAVFILTER_VERSION_MINOR, \ |
38 | 38 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,660 @@ |
0 |
+/* |
|
1 |
+ * This file is part of FFmpeg. |
|
2 |
+ * |
|
3 |
+ * FFmpeg is free software; you can redistribute it and/or |
|
4 |
+ * modify it under the terms of the GNU Lesser General Public |
|
5 |
+ * License as published by the Free Software Foundation; either |
|
6 |
+ * version 2.1 of the License, or (at your option) any later version. |
|
7 |
+ * |
|
8 |
+ * FFmpeg is distributed in the hope that it will be useful, |
|
9 |
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
10 |
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
11 |
+ * Lesser General Public License for more details. |
|
12 |
+ * |
|
13 |
+ * You should have received a copy of the GNU Lesser General Public |
|
14 |
+ * License along with FFmpeg; if not, write to the Free Software |
|
15 |
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
16 |
+ */ |
|
17 |
+ |
|
18 |
+/** |
|
19 |
+ * @file |
|
20 |
+ * scale video filter |
|
21 |
+ */ |
|
22 |
+ |
|
23 |
+#include <nppi.h> |
|
24 |
+#include <stdio.h> |
|
25 |
+#include <string.h> |
|
26 |
+ |
|
27 |
+#include "libavutil/avstring.h" |
|
28 |
+#include "libavutil/common.h" |
|
29 |
+#include "libavutil/eval.h" |
|
30 |
+#include "libavutil/hwcontext.h" |
|
31 |
+#include "libavutil/hwcontext_cuda.h" |
|
32 |
+#include "libavutil/internal.h" |
|
33 |
+#include "libavutil/mathematics.h" |
|
34 |
+#include "libavutil/opt.h" |
|
35 |
+#include "libavutil/pixdesc.h" |
|
36 |
+ |
|
37 |
+#include "avfilter.h" |
|
38 |
+#include "formats.h" |
|
39 |
+#include "internal.h" |
|
40 |
+#include "video.h" |
|
41 |
+ |
|
42 |
+static const enum AVPixelFormat supported_formats[] = { |
|
43 |
+ AV_PIX_FMT_YUV420P, |
|
44 |
+ AV_PIX_FMT_NV12, |
|
45 |
+ AV_PIX_FMT_YUV444P, |
|
46 |
+}; |
|
47 |
+ |
|
48 |
+static const enum AVPixelFormat deinterleaved_formats[][2] = { |
|
49 |
+ { AV_PIX_FMT_NV12, AV_PIX_FMT_YUV420P }, |
|
50 |
+}; |
|
51 |
+ |
|
52 |
+static const char *const var_names[] = { |
|
53 |
+ "PI", |
|
54 |
+ "PHI", |
|
55 |
+ "E", |
|
56 |
+ "in_w", "iw", |
|
57 |
+ "in_h", "ih", |
|
58 |
+ "out_w", "ow", |
|
59 |
+ "out_h", "oh", |
|
60 |
+ "a", "dar", |
|
61 |
+ "sar", |
|
62 |
+ NULL |
|
63 |
+}; |
|
64 |
+ |
|
65 |
+enum var_name { |
|
66 |
+ VAR_PI, |
|
67 |
+ VAR_PHI, |
|
68 |
+ VAR_E, |
|
69 |
+ VAR_IN_W, VAR_IW, |
|
70 |
+ VAR_IN_H, VAR_IH, |
|
71 |
+ VAR_OUT_W, VAR_OW, |
|
72 |
+ VAR_OUT_H, VAR_OH, |
|
73 |
+ VAR_A, VAR_DAR, |
|
74 |
+ VAR_SAR, |
|
75 |
+ VARS_NB |
|
76 |
+}; |
|
77 |
+ |
|
78 |
+enum ScaleStage { |
|
79 |
+ STAGE_DEINTERLEAVE, |
|
80 |
+ STAGE_RESIZE, |
|
81 |
+ STAGE_INTERLEAVE, |
|
82 |
+ STAGE_NB, |
|
83 |
+}; |
|
84 |
+ |
|
85 |
+typedef struct NPPScaleStageContext { |
|
86 |
+ int stage_needed; |
|
87 |
+ enum AVPixelFormat in_fmt; |
|
88 |
+ enum AVPixelFormat out_fmt; |
|
89 |
+ |
|
90 |
+ struct { |
|
91 |
+ int width; |
|
92 |
+ int height; |
|
93 |
+ } planes_in[3], planes_out[3]; |
|
94 |
+ |
|
95 |
+ AVBufferRef *frames_ctx; |
|
96 |
+ AVFrame *frame; |
|
97 |
+} NPPScaleStageContext; |
|
98 |
+ |
|
99 |
+typedef struct NPPScaleContext { |
|
100 |
+ const AVClass *class; |
|
101 |
+ |
|
102 |
+ NPPScaleStageContext stages[STAGE_NB]; |
|
103 |
+ AVFrame *tmp_frame; |
|
104 |
+ int passthrough; |
|
105 |
+ |
|
106 |
+ int shift_width, shift_height; |
|
107 |
+ |
|
108 |
+ /** |
|
109 |
+ * New dimensions. Special values are: |
|
110 |
+ * 0 = original width/height |
|
111 |
+ * -1 = keep original aspect |
|
112 |
+ */ |
|
113 |
+ int w, h; |
|
114 |
+ |
|
115 |
+ /** |
|
116 |
+ * Output sw format. AV_PIX_FMT_NONE for no conversion. |
|
117 |
+ */ |
|
118 |
+ enum AVPixelFormat format; |
|
119 |
+ |
|
120 |
+ char *w_expr; ///< width expression string |
|
121 |
+ char *h_expr; ///< height expression string |
|
122 |
+ char *format_str; |
|
123 |
+ |
|
124 |
+ int interp_algo; |
|
125 |
+} NPPScaleContext; |
|
126 |
+ |
|
127 |
+static int nppscale_init(AVFilterContext *ctx) |
|
128 |
+{ |
|
129 |
+ NPPScaleContext *s = ctx->priv; |
|
130 |
+ int i; |
|
131 |
+ |
|
132 |
+ if (!strcmp(s->format_str, "same")) { |
|
133 |
+ s->format = AV_PIX_FMT_NONE; |
|
134 |
+ } else { |
|
135 |
+ s->format = av_get_pix_fmt(s->format_str); |
|
136 |
+ if (s->format == AV_PIX_FMT_NONE) { |
|
137 |
+ av_log(ctx, AV_LOG_ERROR, "Unrecognized pixel format: %s\n", s->format_str); |
|
138 |
+ return AVERROR(EINVAL); |
|
139 |
+ } |
|
140 |
+ } |
|
141 |
+ |
|
142 |
+ for (i = 0; i < FF_ARRAY_ELEMS(s->stages); i++) { |
|
143 |
+ s->stages[i].frame = av_frame_alloc(); |
|
144 |
+ if (!s->stages[i].frame) |
|
145 |
+ return AVERROR(ENOMEM); |
|
146 |
+ } |
|
147 |
+ s->tmp_frame = av_frame_alloc(); |
|
148 |
+ if (!s->tmp_frame) |
|
149 |
+ return AVERROR(ENOMEM); |
|
150 |
+ |
|
151 |
+ return 0; |
|
152 |
+} |
|
153 |
+ |
|
154 |
+static void nppscale_uninit(AVFilterContext *ctx) |
|
155 |
+{ |
|
156 |
+ NPPScaleContext *s = ctx->priv; |
|
157 |
+ int i; |
|
158 |
+ |
|
159 |
+ for (i = 0; i < FF_ARRAY_ELEMS(s->stages); i++) { |
|
160 |
+ av_frame_free(&s->stages[i].frame); |
|
161 |
+ av_buffer_unref(&s->stages[i].frames_ctx); |
|
162 |
+ } |
|
163 |
+ av_frame_free(&s->tmp_frame); |
|
164 |
+} |
|
165 |
+ |
|
166 |
+static int nppscale_query_formats(AVFilterContext *ctx) |
|
167 |
+{ |
|
168 |
+ static const enum AVPixelFormat pixel_formats[] = { |
|
169 |
+ AV_PIX_FMT_CUDA, AV_PIX_FMT_NONE, |
|
170 |
+ }; |
|
171 |
+ AVFilterFormats *pix_fmts = ff_make_format_list(pixel_formats); |
|
172 |
+ |
|
173 |
+ ff_set_common_formats(ctx, pix_fmts); |
|
174 |
+ |
|
175 |
+ return 0; |
|
176 |
+} |
|
177 |
+ |
|
178 |
+static int init_stage(NPPScaleStageContext *stage, AVBufferRef *device_ctx) |
|
179 |
+{ |
|
180 |
+ AVBufferRef *out_ref = NULL; |
|
181 |
+ AVHWFramesContext *out_ctx; |
|
182 |
+ int in_sw, in_sh, out_sw, out_sh; |
|
183 |
+ int ret, i; |
|
184 |
+ |
|
185 |
+ av_pix_fmt_get_chroma_sub_sample(stage->in_fmt, &in_sw, &in_sh); |
|
186 |
+ av_pix_fmt_get_chroma_sub_sample(stage->out_fmt, &out_sw, &out_sh); |
|
187 |
+ if (!stage->planes_out[0].width) { |
|
188 |
+ stage->planes_out[0].width = stage->planes_in[0].width; |
|
189 |
+ stage->planes_out[0].height = stage->planes_in[0].height; |
|
190 |
+ } |
|
191 |
+ |
|
192 |
+ for (i = 1; i < FF_ARRAY_ELEMS(stage->planes_in); i++) { |
|
193 |
+ stage->planes_in[i].width = stage->planes_in[0].width >> in_sw; |
|
194 |
+ stage->planes_in[i].height = stage->planes_in[0].height >> in_sh; |
|
195 |
+ stage->planes_out[i].width = stage->planes_out[0].width >> out_sw; |
|
196 |
+ stage->planes_out[i].height = stage->planes_out[0].height >> out_sh; |
|
197 |
+ } |
|
198 |
+ |
|
199 |
+ out_ref = av_hwframe_ctx_alloc(device_ctx); |
|
200 |
+ if (!out_ref) |
|
201 |
+ return AVERROR(ENOMEM); |
|
202 |
+ out_ctx = (AVHWFramesContext*)out_ref->data; |
|
203 |
+ |
|
204 |
+ out_ctx->format = AV_PIX_FMT_CUDA; |
|
205 |
+ out_ctx->sw_format = stage->out_fmt; |
|
206 |
+ out_ctx->width = FFALIGN(stage->planes_out[0].width, 32); |
|
207 |
+ out_ctx->height = FFALIGN(stage->planes_out[0].height, 32); |
|
208 |
+ |
|
209 |
+ ret = av_hwframe_ctx_init(out_ref); |
|
210 |
+ if (ret < 0) |
|
211 |
+ goto fail; |
|
212 |
+ |
|
213 |
+ av_frame_unref(stage->frame); |
|
214 |
+ ret = av_hwframe_get_buffer(out_ref, stage->frame, 0); |
|
215 |
+ if (ret < 0) |
|
216 |
+ goto fail; |
|
217 |
+ |
|
218 |
+ stage->frame->width = stage->planes_out[0].width; |
|
219 |
+ stage->frame->height = stage->planes_out[0].height; |
|
220 |
+ |
|
221 |
+ av_buffer_unref(&stage->frames_ctx); |
|
222 |
+ stage->frames_ctx = out_ref; |
|
223 |
+ |
|
224 |
+ return 0; |
|
225 |
+fail: |
|
226 |
+ av_buffer_unref(&out_ref); |
|
227 |
+ return ret; |
|
228 |
+} |
|
229 |
+ |
|
230 |
+static int format_is_supported(enum AVPixelFormat fmt) |
|
231 |
+{ |
|
232 |
+ int i; |
|
233 |
+ |
|
234 |
+ for (i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++) |
|
235 |
+ if (supported_formats[i] == fmt) |
|
236 |
+ return 1; |
|
237 |
+ return 0; |
|
238 |
+} |
|
239 |
+ |
|
240 |
+static enum AVPixelFormat get_deinterleaved_format(enum AVPixelFormat fmt) |
|
241 |
+{ |
|
242 |
+ const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(fmt); |
|
243 |
+ int i, planes; |
|
244 |
+ |
|
245 |
+ planes = av_pix_fmt_count_planes(fmt); |
|
246 |
+ if (planes == desc->nb_components) |
|
247 |
+ return fmt; |
|
248 |
+ for (i = 0; i < FF_ARRAY_ELEMS(deinterleaved_formats); i++) |
|
249 |
+ if (deinterleaved_formats[i][0] == fmt) |
|
250 |
+ return deinterleaved_formats[i][1]; |
|
251 |
+ return AV_PIX_FMT_NONE; |
|
252 |
+} |
|
253 |
+ |
|
254 |
+static int init_processing_chain(AVFilterContext *ctx, int in_width, int in_height, |
|
255 |
+ int out_width, int out_height) |
|
256 |
+{ |
|
257 |
+ NPPScaleContext *s = ctx->priv; |
|
258 |
+ |
|
259 |
+ AVHWFramesContext *in_frames_ctx; |
|
260 |
+ |
|
261 |
+ enum AVPixelFormat in_format; |
|
262 |
+ enum AVPixelFormat out_format; |
|
263 |
+ enum AVPixelFormat in_deinterleaved_format; |
|
264 |
+ enum AVPixelFormat out_deinterleaved_format; |
|
265 |
+ |
|
266 |
+ int i, ret, last_stage = -1; |
|
267 |
+ |
|
268 |
+ /* check that we have a hw context */ |
|
269 |
+ if (!ctx->inputs[0]->hw_frames_ctx) { |
|
270 |
+ av_log(ctx, AV_LOG_ERROR, "No hw context provided on input\n"); |
|
271 |
+ return AVERROR(EINVAL); |
|
272 |
+ } |
|
273 |
+ in_frames_ctx = (AVHWFramesContext*)ctx->inputs[0]->hw_frames_ctx->data; |
|
274 |
+ in_format = in_frames_ctx->sw_format; |
|
275 |
+ out_format = (s->format == AV_PIX_FMT_NONE) ? in_format : s->format; |
|
276 |
+ |
|
277 |
+ if (!format_is_supported(in_format)) { |
|
278 |
+ av_log(ctx, AV_LOG_ERROR, "Unsupported input format: %s\n", |
|
279 |
+ av_get_pix_fmt_name(in_format)); |
|
280 |
+ return AVERROR(ENOSYS); |
|
281 |
+ } |
|
282 |
+ if (!format_is_supported(out_format)) { |
|
283 |
+ av_log(ctx, AV_LOG_ERROR, "Unsupported output format: %s\n", |
|
284 |
+ av_get_pix_fmt_name(out_format)); |
|
285 |
+ return AVERROR(ENOSYS); |
|
286 |
+ } |
|
287 |
+ |
|
288 |
+ in_deinterleaved_format = get_deinterleaved_format(in_format); |
|
289 |
+ out_deinterleaved_format = get_deinterleaved_format(out_format); |
|
290 |
+ if (in_deinterleaved_format == AV_PIX_FMT_NONE || |
|
291 |
+ out_deinterleaved_format == AV_PIX_FMT_NONE) |
|
292 |
+ return AVERROR_BUG; |
|
293 |
+ |
|
294 |
+ /* figure out which stages need to be done */ |
|
295 |
+ if (in_width != out_width || in_height != out_height || |
|
296 |
+ in_deinterleaved_format != out_deinterleaved_format) |
|
297 |
+ s->stages[STAGE_RESIZE].stage_needed = 1; |
|
298 |
+ |
|
299 |
+ if (!s->stages[STAGE_RESIZE].stage_needed && in_format == out_format) |
|
300 |
+ s->passthrough = 1; |
|
301 |
+ |
|
302 |
+ if (!s->passthrough) { |
|
303 |
+ if (in_format != in_deinterleaved_format) |
|
304 |
+ s->stages[STAGE_DEINTERLEAVE].stage_needed = 1; |
|
305 |
+ if (out_format != out_deinterleaved_format) |
|
306 |
+ s->stages[STAGE_INTERLEAVE].stage_needed = 1; |
|
307 |
+ } |
|
308 |
+ |
|
309 |
+ s->stages[STAGE_DEINTERLEAVE].in_fmt = in_format; |
|
310 |
+ s->stages[STAGE_DEINTERLEAVE].out_fmt = in_deinterleaved_format; |
|
311 |
+ s->stages[STAGE_DEINTERLEAVE].planes_in[0].width = in_width; |
|
312 |
+ s->stages[STAGE_DEINTERLEAVE].planes_in[0].height = in_height; |
|
313 |
+ |
|
314 |
+ s->stages[STAGE_RESIZE].in_fmt = in_deinterleaved_format; |
|
315 |
+ s->stages[STAGE_RESIZE].out_fmt = out_deinterleaved_format; |
|
316 |
+ s->stages[STAGE_RESIZE].planes_in[0].width = in_width; |
|
317 |
+ s->stages[STAGE_RESIZE].planes_in[0].height = in_height; |
|
318 |
+ s->stages[STAGE_RESIZE].planes_out[0].width = out_width; |
|
319 |
+ s->stages[STAGE_RESIZE].planes_out[0].height = out_height; |
|
320 |
+ |
|
321 |
+ s->stages[STAGE_INTERLEAVE].in_fmt = out_deinterleaved_format; |
|
322 |
+ s->stages[STAGE_INTERLEAVE].out_fmt = out_format; |
|
323 |
+ s->stages[STAGE_INTERLEAVE].planes_in[0].width = out_width; |
|
324 |
+ s->stages[STAGE_INTERLEAVE].planes_in[0].height = out_height; |
|
325 |
+ |
|
326 |
+ /* init the hardware contexts */ |
|
327 |
+ for (i = 0; i < FF_ARRAY_ELEMS(s->stages); i++) { |
|
328 |
+ if (!s->stages[i].stage_needed) |
|
329 |
+ continue; |
|
330 |
+ |
|
331 |
+ ret = init_stage(&s->stages[i], in_frames_ctx->device_ref); |
|
332 |
+ if (ret < 0) |
|
333 |
+ return ret; |
|
334 |
+ |
|
335 |
+ last_stage = i; |
|
336 |
+ } |
|
337 |
+ |
|
338 |
+ if (last_stage < 0) |
|
339 |
+ return AVERROR_BUG; |
|
340 |
+ ctx->outputs[0]->hw_frames_ctx = av_buffer_ref(s->stages[last_stage].frames_ctx); |
|
341 |
+ if (!ctx->outputs[0]->hw_frames_ctx) |
|
342 |
+ return AVERROR(ENOMEM); |
|
343 |
+ |
|
344 |
+ return 0; |
|
345 |
+} |
|
346 |
+ |
|
347 |
+static int nppscale_config_props(AVFilterLink *outlink) |
|
348 |
+{ |
|
349 |
+ AVFilterContext *ctx = outlink->src; |
|
350 |
+ AVFilterLink *inlink = outlink->src->inputs[0]; |
|
351 |
+ NPPScaleContext *s = ctx->priv; |
|
352 |
+ int64_t w, h; |
|
353 |
+ double var_values[VARS_NB], res; |
|
354 |
+ char *expr; |
|
355 |
+ int ret; |
|
356 |
+ |
|
357 |
+ var_values[VAR_PI] = M_PI; |
|
358 |
+ var_values[VAR_PHI] = M_PHI; |
|
359 |
+ var_values[VAR_E] = M_E; |
|
360 |
+ var_values[VAR_IN_W] = var_values[VAR_IW] = inlink->w; |
|
361 |
+ var_values[VAR_IN_H] = var_values[VAR_IH] = inlink->h; |
|
362 |
+ var_values[VAR_OUT_W] = var_values[VAR_OW] = NAN; |
|
363 |
+ var_values[VAR_OUT_H] = var_values[VAR_OH] = NAN; |
|
364 |
+ var_values[VAR_A] = (double) inlink->w / inlink->h; |
|
365 |
+ var_values[VAR_SAR] = inlink->sample_aspect_ratio.num ? |
|
366 |
+ (double) inlink->sample_aspect_ratio.num / inlink->sample_aspect_ratio.den : 1; |
|
367 |
+ var_values[VAR_DAR] = var_values[VAR_A] * var_values[VAR_SAR]; |
|
368 |
+ |
|
369 |
+ /* evaluate width and height */ |
|
370 |
+ av_expr_parse_and_eval(&res, (expr = s->w_expr), |
|
371 |
+ var_names, var_values, |
|
372 |
+ NULL, NULL, NULL, NULL, NULL, 0, ctx); |
|
373 |
+ s->w = var_values[VAR_OUT_W] = var_values[VAR_OW] = res; |
|
374 |
+ if ((ret = av_expr_parse_and_eval(&res, (expr = s->h_expr), |
|
375 |
+ var_names, var_values, |
|
376 |
+ NULL, NULL, NULL, NULL, NULL, 0, ctx)) < 0) |
|
377 |
+ goto fail; |
|
378 |
+ s->h = var_values[VAR_OUT_H] = var_values[VAR_OH] = res; |
|
379 |
+ /* evaluate again the width, as it may depend on the output height */ |
|
380 |
+ if ((ret = av_expr_parse_and_eval(&res, (expr = s->w_expr), |
|
381 |
+ var_names, var_values, |
|
382 |
+ NULL, NULL, NULL, NULL, NULL, 0, ctx)) < 0) |
|
383 |
+ goto fail; |
|
384 |
+ s->w = res; |
|
385 |
+ |
|
386 |
+ w = s->w; |
|
387 |
+ h = s->h; |
|
388 |
+ |
|
389 |
+ /* sanity check params */ |
|
390 |
+ if (w < -1 || h < -1) { |
|
391 |
+ av_log(ctx, AV_LOG_ERROR, "Size values less than -1 are not acceptable.\n"); |
|
392 |
+ return AVERROR(EINVAL); |
|
393 |
+ } |
|
394 |
+ if (w == -1 && h == -1) |
|
395 |
+ s->w = s->h = 0; |
|
396 |
+ |
|
397 |
+ if (!(w = s->w)) |
|
398 |
+ w = inlink->w; |
|
399 |
+ if (!(h = s->h)) |
|
400 |
+ h = inlink->h; |
|
401 |
+ if (w == -1) |
|
402 |
+ w = av_rescale(h, inlink->w, inlink->h); |
|
403 |
+ if (h == -1) |
|
404 |
+ h = av_rescale(w, inlink->h, inlink->w); |
|
405 |
+ |
|
406 |
+ if (w > INT_MAX || h > INT_MAX || |
|
407 |
+ (h * inlink->w) > INT_MAX || |
|
408 |
+ (w * inlink->h) > INT_MAX) |
|
409 |
+ av_log(ctx, AV_LOG_ERROR, "Rescaled value for width or height is too big.\n"); |
|
410 |
+ |
|
411 |
+ outlink->w = w; |
|
412 |
+ outlink->h = h; |
|
413 |
+ |
|
414 |
+ ret = init_processing_chain(ctx, inlink->w, inlink->h, w, h); |
|
415 |
+ if (ret < 0) |
|
416 |
+ return ret; |
|
417 |
+ |
|
418 |
+ av_log(ctx, AV_LOG_VERBOSE, "w:%d h:%d -> w:%d h:%d\n", |
|
419 |
+ inlink->w, inlink->h, outlink->w, outlink->h); |
|
420 |
+ |
|
421 |
+ if (inlink->sample_aspect_ratio.num) |
|
422 |
+ outlink->sample_aspect_ratio = av_mul_q((AVRational){outlink->h*inlink->w, |
|
423 |
+ outlink->w*inlink->h}, |
|
424 |
+ inlink->sample_aspect_ratio); |
|
425 |
+ else |
|
426 |
+ outlink->sample_aspect_ratio = inlink->sample_aspect_ratio; |
|
427 |
+ |
|
428 |
+ return 0; |
|
429 |
+ |
|
430 |
+fail: |
|
431 |
+ av_log(NULL, AV_LOG_ERROR, |
|
432 |
+ "Error when evaluating the expression '%s'\n", expr); |
|
433 |
+ return ret; |
|
434 |
+} |
|
435 |
+ |
|
436 |
+static int nppscale_deinterleave(AVFilterContext *ctx, NPPScaleStageContext *stage, |
|
437 |
+ AVFrame *out, AVFrame *in) |
|
438 |
+{ |
|
439 |
+ AVHWFramesContext *in_frames_ctx = (AVHWFramesContext*)in->hw_frames_ctx->data; |
|
440 |
+ NppStatus err; |
|
441 |
+ |
|
442 |
+ switch (in_frames_ctx->sw_format) { |
|
443 |
+ case AV_PIX_FMT_NV12: |
|
444 |
+ err = nppiYCbCr420_8u_P2P3R(in->data[0], in->linesize[0], |
|
445 |
+ in->data[1], in->linesize[1], |
|
446 |
+ out->data, out->linesize, |
|
447 |
+ (NppiSize){ in->width, in->height }); |
|
448 |
+ break; |
|
449 |
+ default: |
|
450 |
+ return AVERROR_BUG; |
|
451 |
+ } |
|
452 |
+ if (err != NPP_SUCCESS) { |
|
453 |
+ av_log(ctx, AV_LOG_ERROR, "NPP deinterleave error: %d\n", err); |
|
454 |
+ return AVERROR_UNKNOWN; |
|
455 |
+ } |
|
456 |
+ |
|
457 |
+ return 0; |
|
458 |
+} |
|
459 |
+ |
|
460 |
+static int nppscale_resize(AVFilterContext *ctx, NPPScaleStageContext *stage, |
|
461 |
+ AVFrame *out, AVFrame *in) |
|
462 |
+{ |
|
463 |
+ NPPScaleContext *s = ctx->priv; |
|
464 |
+ NppStatus err; |
|
465 |
+ int i; |
|
466 |
+ |
|
467 |
+ for (i = 0; i < FF_ARRAY_ELEMS(in->data) && in->data[i]; i++) { |
|
468 |
+ int iw = stage->planes_in[i].width; |
|
469 |
+ int ih = stage->planes_in[i].height; |
|
470 |
+ int ow = stage->planes_out[i].width; |
|
471 |
+ int oh = stage->planes_out[i].height; |
|
472 |
+ |
|
473 |
+ err = nppiResizeSqrPixel_8u_C1R(in->data[i], (NppiSize){ iw, ih }, |
|
474 |
+ in->linesize[i], (NppiRect){ 0, 0, iw, ih }, |
|
475 |
+ out->data[i], out->linesize[i], |
|
476 |
+ (NppiRect){ 0, 0, ow, oh }, |
|
477 |
+ (double)ow / iw, (double)oh / ih, |
|
478 |
+ 0.0, 0.0, s->interp_algo); |
|
479 |
+ if (err != NPP_SUCCESS) { |
|
480 |
+ av_log(ctx, AV_LOG_ERROR, "NPP resize error: %d\n", err); |
|
481 |
+ return AVERROR_UNKNOWN; |
|
482 |
+ } |
|
483 |
+ } |
|
484 |
+ |
|
485 |
+ return 0; |
|
486 |
+} |
|
487 |
+ |
|
488 |
+static int nppscale_interleave(AVFilterContext *ctx, NPPScaleStageContext *stage, |
|
489 |
+ AVFrame *out, AVFrame *in) |
|
490 |
+{ |
|
491 |
+ AVHWFramesContext *out_frames_ctx = (AVHWFramesContext*)out->hw_frames_ctx->data; |
|
492 |
+ NppStatus err; |
|
493 |
+ |
|
494 |
+ switch (out_frames_ctx->sw_format) { |
|
495 |
+ case AV_PIX_FMT_NV12: |
|
496 |
+ err = nppiYCbCr420_8u_P3P2R((const uint8_t**)in->data, |
|
497 |
+ in->linesize, |
|
498 |
+ out->data[0], out->linesize[0], |
|
499 |
+ out->data[1], out->linesize[1], |
|
500 |
+ (NppiSize){ in->width, in->height }); |
|
501 |
+ break; |
|
502 |
+ default: |
|
503 |
+ return AVERROR_BUG; |
|
504 |
+ } |
|
505 |
+ if (err != NPP_SUCCESS) { |
|
506 |
+ av_log(ctx, AV_LOG_ERROR, "NPP deinterleave error: %d\n", err); |
|
507 |
+ return AVERROR_UNKNOWN; |
|
508 |
+ } |
|
509 |
+ |
|
510 |
+ return 0; |
|
511 |
+} |
|
512 |
+ |
|
513 |
+static int (*const nppscale_process[])(AVFilterContext *ctx, NPPScaleStageContext *stage, |
|
514 |
+ AVFrame *out, AVFrame *in) = { |
|
515 |
+ [STAGE_DEINTERLEAVE] = nppscale_deinterleave, |
|
516 |
+ [STAGE_RESIZE] = nppscale_resize, |
|
517 |
+ [STAGE_INTERLEAVE] = nppscale_interleave, |
|
518 |
+}; |
|
519 |
+ |
|
520 |
+static int nppscale_scale(AVFilterContext *ctx, AVFrame *out, AVFrame *in) |
|
521 |
+{ |
|
522 |
+ NPPScaleContext *s = ctx->priv; |
|
523 |
+ AVFrame *src = in; |
|
524 |
+ int i, ret, last_stage = -1; |
|
525 |
+ |
|
526 |
+ for (i = 0; i < FF_ARRAY_ELEMS(s->stages); i++) { |
|
527 |
+ if (!s->stages[i].stage_needed) |
|
528 |
+ continue; |
|
529 |
+ |
|
530 |
+ ret = nppscale_process[i](ctx, &s->stages[i], s->stages[i].frame, src); |
|
531 |
+ if (ret < 0) |
|
532 |
+ return ret; |
|
533 |
+ |
|
534 |
+ src = s->stages[i].frame; |
|
535 |
+ last_stage = i; |
|
536 |
+ } |
|
537 |
+ |
|
538 |
+ if (last_stage < 0) |
|
539 |
+ return AVERROR_BUG; |
|
540 |
+ ret = av_hwframe_get_buffer(src->hw_frames_ctx, s->tmp_frame, 0); |
|
541 |
+ if (ret < 0) |
|
542 |
+ return ret; |
|
543 |
+ |
|
544 |
+ av_frame_move_ref(out, src); |
|
545 |
+ av_frame_move_ref(src, s->tmp_frame); |
|
546 |
+ |
|
547 |
+ ret = av_frame_copy_props(out, in); |
|
548 |
+ if (ret < 0) |
|
549 |
+ return ret; |
|
550 |
+ |
|
551 |
+ return 0; |
|
552 |
+} |
|
553 |
+ |
|
554 |
+static int nppscale_filter_frame(AVFilterLink *link, AVFrame *in) |
|
555 |
+{ |
|
556 |
+ AVFilterContext *ctx = link->dst; |
|
557 |
+ NPPScaleContext *s = ctx->priv; |
|
558 |
+ AVFilterLink *outlink = ctx->outputs[0]; |
|
559 |
+ AVHWFramesContext *frames_ctx = (AVHWFramesContext*)outlink->hw_frames_ctx->data; |
|
560 |
+ AVCUDADeviceContext *device_hwctx = frames_ctx->device_ctx->hwctx; |
|
561 |
+ |
|
562 |
+ AVFrame *out = NULL; |
|
563 |
+ CUresult err; |
|
564 |
+ CUcontext dummy; |
|
565 |
+ int ret = 0; |
|
566 |
+ |
|
567 |
+ if (s->passthrough) |
|
568 |
+ return ff_filter_frame(outlink, in); |
|
569 |
+ |
|
570 |
+ out = av_frame_alloc(); |
|
571 |
+ if (!out) { |
|
572 |
+ ret = AVERROR(ENOMEM); |
|
573 |
+ goto fail; |
|
574 |
+ } |
|
575 |
+ |
|
576 |
+ av_reduce(&out->sample_aspect_ratio.num, &out->sample_aspect_ratio.den, |
|
577 |
+ (int64_t)in->sample_aspect_ratio.num * outlink->h * link->w, |
|
578 |
+ (int64_t)in->sample_aspect_ratio.den * outlink->w * link->h, |
|
579 |
+ INT_MAX); |
|
580 |
+ |
|
581 |
+ err = cuCtxPushCurrent(device_hwctx->cuda_ctx); |
|
582 |
+ if (err != CUDA_SUCCESS) { |
|
583 |
+ ret = AVERROR_UNKNOWN; |
|
584 |
+ goto fail; |
|
585 |
+ } |
|
586 |
+ |
|
587 |
+ ret = nppscale_scale(ctx, out, in); |
|
588 |
+ |
|
589 |
+ cuCtxPopCurrent(&dummy); |
|
590 |
+ if (ret < 0) |
|
591 |
+ goto fail; |
|
592 |
+ |
|
593 |
+ av_frame_free(&in); |
|
594 |
+ return ff_filter_frame(outlink, out); |
|
595 |
+fail: |
|
596 |
+ av_frame_free(&in); |
|
597 |
+ av_frame_free(&out); |
|
598 |
+ return ret; |
|
599 |
+} |
|
600 |
+ |
|
601 |
+#define OFFSET(x) offsetof(NPPScaleContext, x) |
|
602 |
+#define FLAGS AV_OPT_FLAG_VIDEO_PARAM |
|
603 |
+static const AVOption options[] = { |
|
604 |
+ { "w", "Output video width", OFFSET(w_expr), AV_OPT_TYPE_STRING, { .str = "iw" }, .flags = FLAGS }, |
|
605 |
+ { "h", "Output video height", OFFSET(h_expr), AV_OPT_TYPE_STRING, { .str = "ih" }, .flags = FLAGS }, |
|
606 |
+ { "format", "Output pixel format", OFFSET(format_str), AV_OPT_TYPE_STRING, { .str = "same" }, .flags = FLAGS }, |
|
607 |
+ |
|
608 |
+ { "interp_algo", "Interpolation algorithm used for resizing", OFFSET(interp_algo), AV_OPT_TYPE_INT, { .i64 = NPPI_INTER_CUBIC }, 0, INT_MAX, FLAGS, "interp_algo" }, |
|
609 |
+ { "nn", "nearest neighbour", 0, AV_OPT_TYPE_CONST, { .i64 = NPPI_INTER_NN }, 0, 0, FLAGS, "interp_algo" }, |
|
610 |
+ { "linear", "linear", 0, AV_OPT_TYPE_CONST, { .i64 = NPPI_INTER_LINEAR }, 0, 0, FLAGS, "interp_algo" }, |
|
611 |
+ { "cubic", "cubic", 0, AV_OPT_TYPE_CONST, { .i64 = NPPI_INTER_CUBIC }, 0, 0, FLAGS, "interp_algo" }, |
|
612 |
+ { "cubic2p_bspline", "2-parameter cubic (B=1, C=0)", 0, AV_OPT_TYPE_CONST, { .i64 = NPPI_INTER_CUBIC2P_BSPLINE }, 0, 0, FLAGS, "interp_algo" }, |
|
613 |
+ { "cubic2p_catmullrom", "2-parameter cubic (B=0, C=1/2)", 0, AV_OPT_TYPE_CONST, { .i64 = NPPI_INTER_CUBIC2P_CATMULLROM }, 0, 0, FLAGS, "interp_algo" }, |
|
614 |
+ { "cubic2p_b05c03", "2-parameter cubic (B=1/2, C=3/10)", 0, AV_OPT_TYPE_CONST, { .i64 = NPPI_INTER_CUBIC2P_B05C03 }, 0, 0, FLAGS, "interp_algo" }, |
|
615 |
+ { "super", "supersampling", 0, AV_OPT_TYPE_CONST, { .i64 = NPPI_INTER_SUPER }, 0, 0, FLAGS, "interp_algo" }, |
|
616 |
+ { "lanczos", "Lanczos", 0, AV_OPT_TYPE_CONST, { .i64 = NPPI_INTER_LANCZOS }, 0, 0, FLAGS, "interp_algo" }, |
|
617 |
+ { NULL }, |
|
618 |
+}; |
|
619 |
+ |
|
620 |
+static const AVClass nppscale_class = { |
|
621 |
+ .class_name = "nppscale", |
|
622 |
+ .item_name = av_default_item_name, |
|
623 |
+ .option = options, |
|
624 |
+ .version = LIBAVUTIL_VERSION_INT, |
|
625 |
+}; |
|
626 |
+ |
|
627 |
+static const AVFilterPad nppscale_inputs[] = { |
|
628 |
+ { |
|
629 |
+ .name = "default", |
|
630 |
+ .type = AVMEDIA_TYPE_VIDEO, |
|
631 |
+ .filter_frame = nppscale_filter_frame, |
|
632 |
+ }, |
|
633 |
+ { NULL } |
|
634 |
+}; |
|
635 |
+ |
|
636 |
+static const AVFilterPad nppscale_outputs[] = { |
|
637 |
+ { |
|
638 |
+ .name = "default", |
|
639 |
+ .type = AVMEDIA_TYPE_VIDEO, |
|
640 |
+ .config_props = nppscale_config_props, |
|
641 |
+ }, |
|
642 |
+ { NULL } |
|
643 |
+}; |
|
644 |
+ |
|
645 |
+AVFilter ff_vf_scale_npp = { |
|
646 |
+ .name = "scale_npp", |
|
647 |
+ .description = NULL_IF_CONFIG_SMALL("NVIDIA Performance Primitives video " |
|
648 |
+ "scaling and format conversion"), |
|
649 |
+ |
|
650 |
+ .init = nppscale_init, |
|
651 |
+ .uninit = nppscale_uninit, |
|
652 |
+ .query_formats = nppscale_query_formats, |
|
653 |
+ |
|
654 |
+ .priv_size = sizeof(NPPScaleContext), |
|
655 |
+ .priv_class = &nppscale_class, |
|
656 |
+ |
|
657 |
+ .inputs = nppscale_inputs, |
|
658 |
+ .outputs = nppscale_outputs, |
|
659 |
+}; |