... | ... |
@@ -2382,6 +2382,7 @@ frei0r_filter_extralibs='$ldl' |
2382 | 2382 |
frei0r_src_filter_deps="frei0r dlopen" |
2383 | 2383 |
frei0r_src_filter_extralibs='$ldl' |
2384 | 2384 |
hqdn3d_filter_deps="gpl" |
2385 |
+hwupload_cuda_filter_deps="cuda" |
|
2385 | 2386 |
interlace_filter_deps="gpl" |
2386 | 2387 |
ocv_filter_deps="libopencv" |
2387 | 2388 |
resample_filter_deps="avresample" |
... | ... |
@@ -1610,6 +1610,17 @@ A floating point number which specifies chroma temporal strength. It defaults to |
1610 | 1610 |
@var{luma_tmp}*@var{chroma_spatial}/@var{luma_spatial}. |
1611 | 1611 |
@end table |
1612 | 1612 |
|
1613 |
+@section hwupload_cuda |
|
1614 |
+ |
|
1615 |
+Upload system memory frames to a CUDA device. |
|
1616 |
+ |
|
1617 |
+It accepts the following optional parameters: |
|
1618 |
+ |
|
1619 |
+@table @option |
|
1620 |
+@item device |
|
1621 |
+The number of the CUDA device to use |
|
1622 |
+@end table |
|
1623 |
+ |
|
1613 | 1624 |
@section interlace |
1614 | 1625 |
|
1615 | 1626 |
Simple interlacing filter from progressive contents. This interleaves upper (or |
... | ... |
@@ -56,6 +56,7 @@ OBJS-$(CONFIG_FREI0R_FILTER) += vf_frei0r.o |
56 | 56 |
OBJS-$(CONFIG_GRADFUN_FILTER) += vf_gradfun.o |
57 | 57 |
OBJS-$(CONFIG_HFLIP_FILTER) += vf_hflip.o |
58 | 58 |
OBJS-$(CONFIG_HQDN3D_FILTER) += vf_hqdn3d.o |
59 |
+OBJS-$(CONFIG_HWUPLOAD_CUDA_FILTER) += vf_hwupload_cuda.o |
|
59 | 60 |
OBJS-$(CONFIG_INTERLACE_FILTER) += vf_interlace.o |
60 | 61 |
OBJS-$(CONFIG_LUT_FILTER) += vf_lut.o |
61 | 62 |
OBJS-$(CONFIG_LUTRGB_FILTER) += vf_lut.o |
... | ... |
@@ -82,6 +82,7 @@ void avfilter_register_all(void) |
82 | 82 |
REGISTER_FILTER(GRADFUN, gradfun, vf); |
83 | 83 |
REGISTER_FILTER(HFLIP, hflip, vf); |
84 | 84 |
REGISTER_FILTER(HQDN3D, hqdn3d, vf); |
85 |
+ REGISTER_FILTER(HWUPLOAD_CUDA, hwupload_cuda, vf); |
|
85 | 86 |
REGISTER_FILTER(INTERLACE, interlace, vf); |
86 | 87 |
REGISTER_FILTER(LUT, lut, vf); |
87 | 88 |
REGISTER_FILTER(LUTRGB, lutrgb, vf); |
88 | 89 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,237 @@ |
0 |
+/* |
|
1 |
+ * This file is part of Libav. |
|
2 |
+ * |
|
3 |
+ * Libav is free software; you can redistribute it and/or |
|
4 |
+ * modify it under the terms of the GNU Lesser General Public |
|
5 |
+ * License as published by the Free Software Foundation; either |
|
6 |
+ * version 2.1 of the License, or (at your option) any later version. |
|
7 |
+ * |
|
8 |
+ * Libav is distributed in the hope that it will be useful, |
|
9 |
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
10 |
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
11 |
+ * Lesser General Public License for more details. |
|
12 |
+ * |
|
13 |
+ * You should have received a copy of the GNU Lesser General Public |
|
14 |
+ * License along with Libav; if not, write to the Free Software |
|
15 |
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
16 |
+ */ |
|
17 |
+ |
|
18 |
+#include "libavutil/buffer.h" |
|
19 |
+#include "libavutil/hwcontext.h" |
|
20 |
+#include "libavutil/hwcontext_cuda.h" |
|
21 |
+#include "libavutil/log.h" |
|
22 |
+#include "libavutil/opt.h" |
|
23 |
+ |
|
24 |
+#include "avfilter.h" |
|
25 |
+#include "formats.h" |
|
26 |
+#include "internal.h" |
|
27 |
+#include "video.h" |
|
28 |
+ |
|
29 |
+typedef struct CudaUploadContext { |
|
30 |
+ const AVClass *class; |
|
31 |
+ int device_idx; |
|
32 |
+ |
|
33 |
+ AVBufferRef *hwdevice; |
|
34 |
+ AVBufferRef *hwframe; |
|
35 |
+} CudaUploadContext; |
|
36 |
+ |
|
37 |
+static void cudaupload_ctx_free(AVHWDeviceContext *ctx) |
|
38 |
+{ |
|
39 |
+ AVCUDADeviceContext *hwctx = ctx->hwctx; |
|
40 |
+ cuCtxDestroy(hwctx->cuda_ctx); |
|
41 |
+} |
|
42 |
+ |
|
43 |
+static av_cold int cudaupload_init(AVFilterContext *ctx) |
|
44 |
+{ |
|
45 |
+ CudaUploadContext *s = ctx->priv; |
|
46 |
+ |
|
47 |
+ AVHWDeviceContext *device_ctx; |
|
48 |
+ AVCUDADeviceContext *device_hwctx; |
|
49 |
+ CUdevice device; |
|
50 |
+ CUcontext cuda_ctx = NULL, dummy; |
|
51 |
+ CUresult err; |
|
52 |
+ int ret; |
|
53 |
+ |
|
54 |
+ err = cuInit(0); |
|
55 |
+ if (err != CUDA_SUCCESS) { |
|
56 |
+ av_log(ctx, AV_LOG_ERROR, "Could not initialize the CUDA driver API\n"); |
|
57 |
+ return AVERROR_UNKNOWN; |
|
58 |
+ } |
|
59 |
+ |
|
60 |
+ err = cuDeviceGet(&device, s->device_idx); |
|
61 |
+ if (err != CUDA_SUCCESS) { |
|
62 |
+ av_log(ctx, AV_LOG_ERROR, "Could not get the device number %d\n", s->device_idx); |
|
63 |
+ return AVERROR_UNKNOWN; |
|
64 |
+ } |
|
65 |
+ |
|
66 |
+ err = cuCtxCreate(&cuda_ctx, 0, device); |
|
67 |
+ if (err != CUDA_SUCCESS) { |
|
68 |
+ av_log(ctx, AV_LOG_ERROR, "Error creating a CUDA context\n"); |
|
69 |
+ return AVERROR_UNKNOWN; |
|
70 |
+ } |
|
71 |
+ |
|
72 |
+ cuCtxPopCurrent(&dummy); |
|
73 |
+ |
|
74 |
+ s->hwdevice = av_hwdevice_ctx_alloc(AV_HWDEVICE_TYPE_CUDA); |
|
75 |
+ if (!s->hwdevice) { |
|
76 |
+ cuCtxDestroy(cuda_ctx); |
|
77 |
+ return AVERROR(ENOMEM); |
|
78 |
+ } |
|
79 |
+ |
|
80 |
+ device_ctx = (AVHWDeviceContext*)s->hwdevice->data; |
|
81 |
+ device_ctx->free = cudaupload_ctx_free; |
|
82 |
+ |
|
83 |
+ device_hwctx = device_ctx->hwctx; |
|
84 |
+ device_hwctx->cuda_ctx = cuda_ctx; |
|
85 |
+ |
|
86 |
+ ret = av_hwdevice_ctx_init(s->hwdevice); |
|
87 |
+ if (ret < 0) |
|
88 |
+ return ret; |
|
89 |
+ |
|
90 |
+ return 0; |
|
91 |
+} |
|
92 |
+ |
|
93 |
+static av_cold void cudaupload_uninit(AVFilterContext *ctx) |
|
94 |
+{ |
|
95 |
+ CudaUploadContext *s = ctx->priv; |
|
96 |
+ |
|
97 |
+ av_buffer_unref(&s->hwframe); |
|
98 |
+ av_buffer_unref(&s->hwdevice); |
|
99 |
+} |
|
100 |
+ |
|
101 |
+static int cudaupload_query_formats(AVFilterContext *ctx) |
|
102 |
+{ |
|
103 |
+ static const enum AVPixelFormat input_pix_fmts[] = { |
|
104 |
+ AV_PIX_FMT_NV12, AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV444P, |
|
105 |
+ AV_PIX_FMT_NONE, |
|
106 |
+ }; |
|
107 |
+ static const enum AVPixelFormat output_pix_fmts[] = { |
|
108 |
+ AV_PIX_FMT_CUDA, AV_PIX_FMT_NONE, |
|
109 |
+ }; |
|
110 |
+ AVFilterFormats *in_fmts = ff_make_format_list(input_pix_fmts); |
|
111 |
+ AVFilterFormats *out_fmts = ff_make_format_list(output_pix_fmts); |
|
112 |
+ |
|
113 |
+ ff_formats_ref(in_fmts, &ctx->inputs[0]->out_formats); |
|
114 |
+ ff_formats_ref(out_fmts, &ctx->outputs[0]->in_formats); |
|
115 |
+ |
|
116 |
+ return 0; |
|
117 |
+} |
|
118 |
+ |
|
119 |
+static int cudaupload_config_output(AVFilterLink *outlink) |
|
120 |
+{ |
|
121 |
+ AVFilterContext *ctx = outlink->src; |
|
122 |
+ AVFilterLink *inlink = ctx->inputs[0]; |
|
123 |
+ CudaUploadContext *s = ctx->priv; |
|
124 |
+ |
|
125 |
+ AVHWFramesContext *hwframe_ctx; |
|
126 |
+ int ret; |
|
127 |
+ |
|
128 |
+ av_buffer_unref(&s->hwframe); |
|
129 |
+ s->hwframe = av_hwframe_ctx_alloc(s->hwdevice); |
|
130 |
+ if (!s->hwframe) |
|
131 |
+ return AVERROR(ENOMEM); |
|
132 |
+ |
|
133 |
+ hwframe_ctx = (AVHWFramesContext*)s->hwframe->data; |
|
134 |
+ hwframe_ctx->format = AV_PIX_FMT_CUDA; |
|
135 |
+ hwframe_ctx->sw_format = inlink->format; |
|
136 |
+ hwframe_ctx->width = FFALIGN(inlink->w, 16); |
|
137 |
+ hwframe_ctx->height = FFALIGN(inlink->h, 16); |
|
138 |
+ |
|
139 |
+ ret = av_hwframe_ctx_init(s->hwframe); |
|
140 |
+ if (ret < 0) |
|
141 |
+ return ret; |
|
142 |
+ |
|
143 |
+ outlink->hw_frames_ctx = av_buffer_ref(s->hwframe); |
|
144 |
+ if (!outlink->hw_frames_ctx) |
|
145 |
+ return AVERROR(ENOMEM); |
|
146 |
+ |
|
147 |
+ return 0; |
|
148 |
+} |
|
149 |
+ |
|
150 |
+static int cudaupload_filter_frame(AVFilterLink *link, AVFrame *in) |
|
151 |
+{ |
|
152 |
+ AVFilterContext *ctx = link->dst; |
|
153 |
+ CudaUploadContext *s = ctx->priv; |
|
154 |
+ |
|
155 |
+ AVFrame *out = NULL; |
|
156 |
+ int ret; |
|
157 |
+ |
|
158 |
+ out = av_frame_alloc(); |
|
159 |
+ if (!out) { |
|
160 |
+ ret = AVERROR(ENOMEM); |
|
161 |
+ goto fail; |
|
162 |
+ } |
|
163 |
+ |
|
164 |
+ ret = av_hwframe_get_buffer(s->hwframe, out, 0); |
|
165 |
+ if (ret < 0) |
|
166 |
+ goto fail; |
|
167 |
+ |
|
168 |
+ out->width = in->width; |
|
169 |
+ out->height = in->height; |
|
170 |
+ |
|
171 |
+ ret = av_hwframe_transfer_data(out, in, 0); |
|
172 |
+ if (ret < 0) { |
|
173 |
+ av_log(ctx, AV_LOG_ERROR, "Error transferring data to the GPU\n"); |
|
174 |
+ goto fail; |
|
175 |
+ } |
|
176 |
+ |
|
177 |
+ ret = av_frame_copy_props(out, in); |
|
178 |
+ if (ret < 0) |
|
179 |
+ goto fail; |
|
180 |
+ |
|
181 |
+ av_frame_free(&in); |
|
182 |
+ |
|
183 |
+ return ff_filter_frame(ctx->outputs[0], out); |
|
184 |
+fail: |
|
185 |
+ av_frame_free(&in); |
|
186 |
+ av_frame_free(&out); |
|
187 |
+ return ret; |
|
188 |
+} |
|
189 |
+ |
|
190 |
+#define OFFSET(x) offsetof(CudaUploadContext, x) |
|
191 |
+#define FLAGS AV_OPT_FLAG_VIDEO_PARAM |
|
192 |
+static const AVOption options[] = { |
|
193 |
+ { "device", "Number of the device to use", OFFSET(device_idx), AV_OPT_TYPE_INT, { .i64 = 0 }, .flags = FLAGS }, |
|
194 |
+ { NULL }, |
|
195 |
+}; |
|
196 |
+ |
|
197 |
+static const AVClass cudaupload_class = { |
|
198 |
+ .class_name = "cudaupload", |
|
199 |
+ .item_name = av_default_item_name, |
|
200 |
+ .option = options, |
|
201 |
+ .version = LIBAVUTIL_VERSION_INT, |
|
202 |
+}; |
|
203 |
+ |
|
204 |
+static const AVFilterPad cudaupload_inputs[] = { |
|
205 |
+ { |
|
206 |
+ .name = "default", |
|
207 |
+ .type = AVMEDIA_TYPE_VIDEO, |
|
208 |
+ .filter_frame = cudaupload_filter_frame, |
|
209 |
+ }, |
|
210 |
+ { NULL } |
|
211 |
+}; |
|
212 |
+ |
|
213 |
+static const AVFilterPad cudaupload_outputs[] = { |
|
214 |
+ { |
|
215 |
+ .name = "default", |
|
216 |
+ .type = AVMEDIA_TYPE_VIDEO, |
|
217 |
+ .config_props = cudaupload_config_output, |
|
218 |
+ }, |
|
219 |
+ { NULL } |
|
220 |
+}; |
|
221 |
+ |
|
222 |
+AVFilter ff_vf_hwupload_cuda = { |
|
223 |
+ .name = "hwupload_cuda", |
|
224 |
+ .description = NULL_IF_CONFIG_SMALL("Upload a system memory frame to a CUDA device"), |
|
225 |
+ |
|
226 |
+ .init = cudaupload_init, |
|
227 |
+ .uninit = cudaupload_uninit, |
|
228 |
+ |
|
229 |
+ .query_formats = cudaupload_query_formats, |
|
230 |
+ |
|
231 |
+ .priv_size = sizeof(CudaUploadContext), |
|
232 |
+ .priv_class = &cudaupload_class, |
|
233 |
+ |
|
234 |
+ .inputs = cudaupload_inputs, |
|
235 |
+ .outputs = cudaupload_outputs, |
|
236 |
+}; |