* commit '21f7cd4acd8dc4b4796b55966dd015cb037164d8':
lavfi: add a filter for uploading normal frames to CUDA
Merged-by: Derek Buitenhuis <derek.buitenhuis@gmail.com>
... | ... |
@@ -2872,6 +2872,7 @@ fspp_filter_deps="gpl" |
2872 | 2872 |
geq_filter_deps="gpl" |
2873 | 2873 |
histeq_filter_deps="gpl" |
2874 | 2874 |
hqdn3d_filter_deps="gpl" |
2875 |
+hwupload_cuda_filter_deps="cuda" |
|
2875 | 2876 |
interlace_filter_deps="gpl" |
2876 | 2877 |
kerndeint_filter_deps="gpl" |
2877 | 2878 |
ladspa_filter_deps="ladspa dlopen" |
... | ... |
@@ -7940,6 +7940,18 @@ A floating point number which specifies chroma temporal strength. It defaults to |
7940 | 7940 |
@var{luma_tmp}*@var{chroma_spatial}/@var{luma_spatial}. |
7941 | 7941 |
@end table |
7942 | 7942 |
|
7943 |
+@anchor{hwupload_cuda} |
|
7944 |
+@section hwupload_cuda |
|
7945 |
+ |
|
7946 |
+Upload system memory frames to a CUDA device. |
|
7947 |
+ |
|
7948 |
+It accepts the following optional parameters: |
|
7949 |
+ |
|
7950 |
+@table @option |
|
7951 |
+@item device |
|
7952 |
+The number of the CUDA device to use |
|
7953 |
+@end table |
|
7954 |
+ |
|
7943 | 7955 |
@section hqx |
7944 | 7956 |
|
7945 | 7957 |
Apply a high-quality magnification filter designed for pixel art. This filter |
... | ... |
@@ -175,6 +175,7 @@ OBJS-$(CONFIG_HISTEQ_FILTER) += vf_histeq.o |
175 | 175 |
OBJS-$(CONFIG_HISTOGRAM_FILTER) += vf_histogram.o |
176 | 176 |
OBJS-$(CONFIG_HQDN3D_FILTER) += vf_hqdn3d.o |
177 | 177 |
OBJS-$(CONFIG_HQX_FILTER) += vf_hqx.o |
178 |
+OBJS-$(CONFIG_HWUPLOAD_CUDA_FILTER) += vf_hwupload_cuda.o |
|
178 | 179 |
OBJS-$(CONFIG_HSTACK_FILTER) += vf_stack.o framesync.o |
179 | 180 |
OBJS-$(CONFIG_HUE_FILTER) += vf_hue.o |
180 | 181 |
OBJS-$(CONFIG_IDET_FILTER) += vf_idet.o |
... | ... |
@@ -196,6 +196,7 @@ void avfilter_register_all(void) |
196 | 196 |
REGISTER_FILTER(HISTOGRAM, histogram, vf); |
197 | 197 |
REGISTER_FILTER(HQDN3D, hqdn3d, vf); |
198 | 198 |
REGISTER_FILTER(HQX, hqx, vf); |
199 |
+ REGISTER_FILTER(HWUPLOAD_CUDA, hwupload_cuda, vf); |
|
199 | 200 |
REGISTER_FILTER(HSTACK, hstack, vf); |
200 | 201 |
REGISTER_FILTER(HUE, hue, vf); |
201 | 202 |
REGISTER_FILTER(IDET, idet, vf); |
202 | 203 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,232 @@ |
0 |
+/* |
|
1 |
+ * This file is part of FFmpeg. |
|
2 |
+ * |
|
3 |
+ * FFmpeg is free software; you can redistribute it and/or |
|
4 |
+ * modify it under the terms of the GNU Lesser General Public |
|
5 |
+ * License as published by the Free Software Foundation; either |
|
6 |
+ * version 2.1 of the License, or (at your option) any later version. |
|
7 |
+ * |
|
8 |
+ * FFmpeg is distributed in the hope that it will be useful, |
|
9 |
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
10 |
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
11 |
+ * Lesser General Public License for more details. |
|
12 |
+ * |
|
13 |
+ * You should have received a copy of the GNU Lesser General Public |
|
14 |
+ * License along with FFmpeg; if not, write to the Free Software |
|
15 |
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
16 |
+ */ |
|
17 |
+ |
|
18 |
+#include "libavutil/buffer.h" |
|
19 |
+#include "libavutil/hwcontext.h" |
|
20 |
+#include "libavutil/hwcontext_cuda.h" |
|
21 |
+#include "libavutil/log.h" |
|
22 |
+#include "libavutil/opt.h" |
|
23 |
+ |
|
24 |
+#include "avfilter.h" |
|
25 |
+#include "formats.h" |
|
26 |
+#include "internal.h" |
|
27 |
+#include "video.h" |
|
28 |
+ |
|
29 |
+typedef struct CudaUploadContext { |
|
30 |
+ const AVClass *class; |
|
31 |
+ int device_idx; |
|
32 |
+ |
|
33 |
+ AVBufferRef *hwdevice; |
|
34 |
+ AVBufferRef *hwframe; |
|
35 |
+} CudaUploadContext; |
|
36 |
+ |
|
37 |
+static void cudaupload_ctx_free(AVHWDeviceContext *ctx) |
|
38 |
+{ |
|
39 |
+ AVCUDADeviceContext *hwctx = ctx->hwctx; |
|
40 |
+ cuCtxDestroy(hwctx->cuda_ctx); |
|
41 |
+} |
|
42 |
+ |
|
43 |
+static av_cold int cudaupload_init(AVFilterContext *ctx) |
|
44 |
+{ |
|
45 |
+ CudaUploadContext *s = ctx->priv; |
|
46 |
+ |
|
47 |
+ AVHWDeviceContext *device_ctx; |
|
48 |
+ AVCUDADeviceContext *device_hwctx; |
|
49 |
+ CUdevice device; |
|
50 |
+ CUcontext cuda_ctx = NULL, dummy; |
|
51 |
+ CUresult err; |
|
52 |
+ int ret; |
|
53 |
+ |
|
54 |
+ err = cuInit(0); |
|
55 |
+ if (err != CUDA_SUCCESS) { |
|
56 |
+ av_log(ctx, AV_LOG_ERROR, "Could not initialize the CUDA driver API\n"); |
|
57 |
+ return AVERROR_UNKNOWN; |
|
58 |
+ } |
|
59 |
+ |
|
60 |
+ err = cuDeviceGet(&device, s->device_idx); |
|
61 |
+ if (err != CUDA_SUCCESS) { |
|
62 |
+ av_log(ctx, AV_LOG_ERROR, "Could not get the device number %d\n", s->device_idx); |
|
63 |
+ return AVERROR_UNKNOWN; |
|
64 |
+ } |
|
65 |
+ |
|
66 |
+ err = cuCtxCreate(&cuda_ctx, 0, device); |
|
67 |
+ if (err != CUDA_SUCCESS) { |
|
68 |
+ av_log(ctx, AV_LOG_ERROR, "Error creating a CUDA context\n"); |
|
69 |
+ return AVERROR_UNKNOWN; |
|
70 |
+ } |
|
71 |
+ |
|
72 |
+ cuCtxPopCurrent(&dummy); |
|
73 |
+ |
|
74 |
+ s->hwdevice = av_hwdevice_ctx_alloc(AV_HWDEVICE_TYPE_CUDA); |
|
75 |
+ if (!s->hwdevice) { |
|
76 |
+ cuCtxDestroy(cuda_ctx); |
|
77 |
+ return AVERROR(ENOMEM); |
|
78 |
+ } |
|
79 |
+ |
|
80 |
+ device_ctx = (AVHWDeviceContext*)s->hwdevice->data; |
|
81 |
+ device_ctx->free = cudaupload_ctx_free; |
|
82 |
+ |
|
83 |
+ device_hwctx = device_ctx->hwctx; |
|
84 |
+ device_hwctx->cuda_ctx = cuda_ctx; |
|
85 |
+ |
|
86 |
+ ret = av_hwdevice_ctx_init(s->hwdevice); |
|
87 |
+ if (ret < 0) |
|
88 |
+ return ret; |
|
89 |
+ |
|
90 |
+ return 0; |
|
91 |
+} |
|
92 |
+ |
|
93 |
+static av_cold void cudaupload_uninit(AVFilterContext *ctx) |
|
94 |
+{ |
|
95 |
+ CudaUploadContext *s = ctx->priv; |
|
96 |
+ |
|
97 |
+ av_buffer_unref(&s->hwframe); |
|
98 |
+ av_buffer_unref(&s->hwdevice); |
|
99 |
+} |
|
100 |
+ |
|
101 |
+static int cudaupload_query_formats(AVFilterContext *ctx) |
|
102 |
+{ |
|
103 |
+ static const enum AVPixelFormat input_pix_fmts[] = { |
|
104 |
+ AV_PIX_FMT_NV12, AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV444P, |
|
105 |
+ AV_PIX_FMT_NONE, |
|
106 |
+ }; |
|
107 |
+ static const enum AVPixelFormat output_pix_fmts[] = { |
|
108 |
+ AV_PIX_FMT_CUDA, AV_PIX_FMT_NONE, |
|
109 |
+ }; |
|
110 |
+ AVFilterFormats *in_fmts = ff_make_format_list(input_pix_fmts); |
|
111 |
+ AVFilterFormats *out_fmts = ff_make_format_list(output_pix_fmts); |
|
112 |
+ |
|
113 |
+ ff_formats_ref(in_fmts, &ctx->inputs[0]->out_formats); |
|
114 |
+ ff_formats_ref(out_fmts, &ctx->outputs[0]->in_formats); |
|
115 |
+ |
|
116 |
+ return 0; |
|
117 |
+} |
|
118 |
+ |
|
119 |
+static int cudaupload_config_output(AVFilterLink *outlink) |
|
120 |
+{ |
|
121 |
+ AVFilterContext *ctx = outlink->src; |
|
122 |
+ AVFilterLink *inlink = ctx->inputs[0]; |
|
123 |
+ CudaUploadContext *s = ctx->priv; |
|
124 |
+ |
|
125 |
+ AVHWFramesContext *hwframe_ctx; |
|
126 |
+ int ret; |
|
127 |
+ |
|
128 |
+ av_buffer_unref(&s->hwframe); |
|
129 |
+ s->hwframe = av_hwframe_ctx_alloc(s->hwdevice); |
|
130 |
+ if (!s->hwframe) |
|
131 |
+ return AVERROR(ENOMEM); |
|
132 |
+ |
|
133 |
+ hwframe_ctx = (AVHWFramesContext*)s->hwframe->data; |
|
134 |
+ hwframe_ctx->format = AV_PIX_FMT_CUDA; |
|
135 |
+ hwframe_ctx->sw_format = inlink->format; |
|
136 |
+ hwframe_ctx->width = FFALIGN(inlink->w, 16); |
|
137 |
+ hwframe_ctx->height = FFALIGN(inlink->h, 16); |
|
138 |
+ |
|
139 |
+ ret = av_hwframe_ctx_init(s->hwframe); |
|
140 |
+ if (ret < 0) |
|
141 |
+ return ret; |
|
142 |
+ |
|
143 |
+ outlink->hw_frames_ctx = av_buffer_ref(s->hwframe); |
|
144 |
+ if (!outlink->hw_frames_ctx) |
|
145 |
+ return AVERROR(ENOMEM); |
|
146 |
+ |
|
147 |
+ return 0; |
|
148 |
+} |
|
149 |
+ |
|
150 |
+static int cudaupload_filter_frame(AVFilterLink *link, AVFrame *in) |
|
151 |
+{ |
|
152 |
+ AVFilterContext *ctx = link->dst; |
|
153 |
+ CudaUploadContext *s = ctx->priv; |
|
154 |
+ |
|
155 |
+ AVFrame *out = NULL; |
|
156 |
+ int ret; |
|
157 |
+ |
|
158 |
+ out = av_frame_alloc(); |
|
159 |
+ if (!out) { |
|
160 |
+ ret = AVERROR(ENOMEM); |
|
161 |
+ goto fail; |
|
162 |
+ } |
|
163 |
+ |
|
164 |
+ ret = av_hwframe_get_buffer(s->hwframe, out, 0); |
|
165 |
+ if (ret < 0) |
|
166 |
+ goto fail; |
|
167 |
+ |
|
168 |
+ out->width = in->width; |
|
169 |
+ out->height = in->height; |
|
170 |
+ |
|
171 |
+ ret = av_hwframe_transfer_data(out, in, 0); |
|
172 |
+ if (ret < 0) { |
|
173 |
+ av_log(ctx, AV_LOG_ERROR, "Error transferring data to the GPU\n"); |
|
174 |
+ goto fail; |
|
175 |
+ } |
|
176 |
+ |
|
177 |
+ ret = av_frame_copy_props(out, in); |
|
178 |
+ if (ret < 0) |
|
179 |
+ goto fail; |
|
180 |
+ |
|
181 |
+ av_frame_free(&in); |
|
182 |
+ |
|
183 |
+ return ff_filter_frame(ctx->outputs[0], out); |
|
184 |
+fail: |
|
185 |
+ av_frame_free(&in); |
|
186 |
+ av_frame_free(&out); |
|
187 |
+ return ret; |
|
188 |
+} |
|
189 |
+ |
|
190 |
+#define OFFSET(x) offsetof(CudaUploadContext, x) |
|
191 |
+#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM) |
|
192 |
+static const AVOption cudaupload_options[] = { |
|
193 |
+ { "device", "Number of the device to use", OFFSET(device_idx), AV_OPT_TYPE_INT, { .i64 = 0 }, .flags = FLAGS }, |
|
194 |
+ { NULL }, |
|
195 |
+}; |
|
196 |
+ |
|
197 |
+AVFILTER_DEFINE_CLASS(cudaupload) |
|
198 |
+ |
|
199 |
+static const AVFilterPad cudaupload_inputs[] = { |
|
200 |
+ { |
|
201 |
+ .name = "default", |
|
202 |
+ .type = AVMEDIA_TYPE_VIDEO, |
|
203 |
+ .filter_frame = cudaupload_filter_frame, |
|
204 |
+ }, |
|
205 |
+ { NULL } |
|
206 |
+}; |
|
207 |
+ |
|
208 |
+static const AVFilterPad cudaupload_outputs[] = { |
|
209 |
+ { |
|
210 |
+ .name = "default", |
|
211 |
+ .type = AVMEDIA_TYPE_VIDEO, |
|
212 |
+ .config_props = cudaupload_config_output, |
|
213 |
+ }, |
|
214 |
+ { NULL } |
|
215 |
+}; |
|
216 |
+ |
|
217 |
+AVFilter ff_vf_hwupload_cuda = { |
|
218 |
+ .name = "hwupload_cuda", |
|
219 |
+ .description = NULL_IF_CONFIG_SMALL("Upload a system memory frame to a CUDA device."), |
|
220 |
+ |
|
221 |
+ .init = cudaupload_init, |
|
222 |
+ .uninit = cudaupload_uninit, |
|
223 |
+ |
|
224 |
+ .query_formats = cudaupload_query_formats, |
|
225 |
+ |
|
226 |
+ .priv_size = sizeof(CudaUploadContext), |
|
227 |
+ .priv_class = &cudaupload_class, |
|
228 |
+ |
|
229 |
+ .inputs = cudaupload_inputs, |
|
230 |
+ .outputs = cudaupload_outputs, |
|
231 |
+}; |