Browse code

avcodec: Implement vc1 nvdec hwaccel

This hwaccel is interesting because it also works for wmv3/9 content,
which is not supported by the nvidia parser used by cuviddec.

Philip Langdale authored on 2017/11/14 14:09:27
Showing 8 changed files
... ...
@@ -13,7 +13,7 @@ version <next>:
13 13
 - PCE support for extended channel layouts in the AAC encoder
14 14
 - native aptX encoder and decoder
15 15
 - Raw aptX muxer and demuxer
16
-- NVIDIA NVDEC-accelerated H.264, HEVC and VP9 hwaccel decoding
16
+- NVIDIA NVDEC-accelerated H.264, HEVC, VC1 and VP9 hwaccel decoding
17 17
 - Intel QSV-accelerated overlay filter
18 18
 
19 19
 
... ...
@@ -2740,6 +2740,8 @@ vc1_d3d11va2_hwaccel_select="vc1_decoder"
2740 2740
 vc1_dxva2_hwaccel_deps="dxva2"
2741 2741
 vc1_dxva2_hwaccel_select="vc1_decoder"
2742 2742
 vc1_mmal_hwaccel_deps="mmal"
2743
+vc1_nvdec_hwaccel_deps="nvdec"
2744
+vc1_nvdec_hwaccel_select="vc1_decoder"
2743 2745
 vc1_qsv_hwaccel_deps="libmfx"
2744 2746
 vc1_vaapi_hwaccel_deps="vaapi"
2745 2747
 vc1_vaapi_hwaccel_select="vc1_decoder"
... ...
@@ -2763,6 +2765,7 @@ vp9_vaapi_hwaccel_select="vp9_decoder"
2763 2763
 wmv3_d3d11va_hwaccel_select="vc1_d3d11va_hwaccel"
2764 2764
 wmv3_d3d11va2_hwaccel_select="vc1_d3d11va2_hwaccel"
2765 2765
 wmv3_dxva2_hwaccel_select="vc1_dxva2_hwaccel"
2766
+wmv3_nvdec_hwaccel_select="vc1_nvdec_hwaccel"
2766 2767
 wmv3_vaapi_hwaccel_select="vc1_vaapi_hwaccel"
2767 2768
 wmv3_vdpau_hwaccel_select="vc1_vdpau_hwaccel"
2768 2769
 
... ...
@@ -864,6 +864,7 @@ OBJS-$(CONFIG_MPEG4_VDPAU_HWACCEL)        += vdpau_mpeg4.o
864 864
 OBJS-$(CONFIG_MPEG4_VIDEOTOOLBOX_HWACCEL) += videotoolbox.o
865 865
 OBJS-$(CONFIG_VC1_D3D11VA_HWACCEL)        += dxva2_vc1.o
866 866
 OBJS-$(CONFIG_VC1_DXVA2_HWACCEL)          += dxva2_vc1.o
867
+OBJS-$(CONFIG_VC1_NVDEC_HWACCEL)          += nvdec_vc1.o
867 868
 OBJS-$(CONFIG_VC1_QSV_HWACCEL)            += qsvdec_other.o
868 869
 OBJS-$(CONFIG_VC1_VAAPI_HWACCEL)          += vaapi_vc1.o
869 870
 OBJS-$(CONFIG_VC1_VDPAU_HWACCEL)          += vdpau_vc1.o
... ...
@@ -111,6 +111,7 @@ static void register_all(void)
111 111
     REGISTER_HWACCEL(VC1_D3D11VA,       vc1_d3d11va);
112 112
     REGISTER_HWACCEL(VC1_D3D11VA2,      vc1_d3d11va2);
113 113
     REGISTER_HWACCEL(VC1_DXVA2,         vc1_dxva2);
114
+    REGISTER_HWACCEL(VC1_NVDEC,         vc1_nvdec);
114 115
     REGISTER_HWACCEL(VC1_VAAPI,         vc1_vaapi);
115 116
     REGISTER_HWACCEL(VC1_VDPAU,         vc1_vdpau);
116 117
     REGISTER_HWACCEL(VC1_MMAL,          vc1_mmal);
... ...
@@ -128,6 +129,7 @@ static void register_all(void)
128 128
     REGISTER_HWACCEL(WMV3_D3D11VA,      wmv3_d3d11va);
129 129
     REGISTER_HWACCEL(WMV3_D3D11VA2,     wmv3_d3d11va2);
130 130
     REGISTER_HWACCEL(WMV3_DXVA2,        wmv3_dxva2);
131
+    REGISTER_HWACCEL(WMV3_NVDEC,        wmv3_nvdec);
131 132
     REGISTER_HWACCEL(WMV3_VAAPI,        wmv3_vaapi);
132 133
     REGISTER_HWACCEL(WMV3_VDPAU,        wmv3_vdpau);
133 134
 
... ...
@@ -54,7 +54,9 @@ static int map_avcodec_id(enum AVCodecID id)
54 54
     switch (id) {
55 55
     case AV_CODEC_ID_H264: return cudaVideoCodec_H264;
56 56
     case AV_CODEC_ID_HEVC: return cudaVideoCodec_HEVC;
57
+    case AV_CODEC_ID_VC1:  return cudaVideoCodec_VC1;
57 58
     case AV_CODEC_ID_VP9:  return cudaVideoCodec_VP9;
59
+    case AV_CODEC_ID_WMV3:  return cudaVideoCodec_VC1;
58 60
     }
59 61
     return -1;
60 62
 }
61 63
new file mode 100644
... ...
@@ -0,0 +1,184 @@
0
+/*
1
+ * VC1 HW decode acceleration through NVDEC
2
+ *
3
+ * Copyright (c) 2017 Philip Langdale
4
+ *
5
+ * This file is part of FFmpeg.
6
+ *
7
+ * FFmpeg is free software; you can redistribute it and/or
8
+ * modify it under the terms of the GNU Lesser General Public
9
+ * License as published by the Free Software Foundation; either
10
+ * version 2.1 of the License, or (at your option) any later version.
11
+ *
12
+ * FFmpeg is distributed in the hope that it will be useful,
13
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15
+ * Lesser General Public License for more details.
16
+ *
17
+ * You should have received a copy of the GNU Lesser General Public
18
+ * License along with FFmpeg; if not, write to the Free Software
19
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
+ */
21
+
22
+#include "avcodec.h"
23
+#include "nvdec.h"
24
+#include "decode.h"
25
+#include "vc1.h"
26
+
27
+static unsigned char get_ref_idx(AVFrame *frame)
28
+{
29
+    FrameDecodeData *fdd;
30
+    NVDECFrame *cf;
31
+
32
+    if (!frame || !frame->private_ref)
33
+        return 255;
34
+
35
+    fdd = (FrameDecodeData*)frame->private_ref->data;
36
+    cf  = (NVDECFrame*)fdd->hwaccel_priv;
37
+
38
+    return cf->idx;
39
+}
40
+
41
+static int nvdec_vc1_start_frame(AVCodecContext *avctx, const uint8_t *buffer, uint32_t size)
42
+{
43
+    VC1Context *v = avctx->priv_data;
44
+    MpegEncContext *s = &v->s;
45
+
46
+    NVDECContext      *ctx = avctx->internal->hwaccel_priv_data;
47
+    CUVIDPICPARAMS     *pp = &ctx->pic_params;
48
+    FrameDecodeData *fdd;
49
+    NVDECFrame *cf;
50
+    AVFrame *cur_frame = s->current_picture.f;
51
+
52
+    int ret;
53
+
54
+    ret = ff_nvdec_start_frame(avctx, cur_frame);
55
+    if (ret < 0)
56
+        return ret;
57
+
58
+    fdd = (FrameDecodeData*)cur_frame->private_ref->data;
59
+    cf  = (NVDECFrame*)fdd->hwaccel_priv;
60
+
61
+    *pp = (CUVIDPICPARAMS) {
62
+        .PicWidthInMbs     = (cur_frame->width  + 15) / 16,
63
+        .FrameHeightInMbs  = (cur_frame->height + 15) / 16,
64
+        .CurrPicIdx        = cf->idx,
65
+        .field_pic_flag    = v->field_mode,
66
+        .bottom_field_flag = v->cur_field_type,
67
+        .second_field      = v->second_field,
68
+
69
+        .intra_pic_flag    = s->pict_type == AV_PICTURE_TYPE_I ||
70
+                             s->pict_type == AV_PICTURE_TYPE_BI,
71
+        .ref_pic_flag      = s->pict_type == AV_PICTURE_TYPE_I ||
72
+                             s->pict_type == AV_PICTURE_TYPE_P,
73
+
74
+        .CodecSpecific.vc1 = {
75
+            .ForwardRefIdx     = get_ref_idx(s->last_picture.f),
76
+            .BackwardRefIdx    = get_ref_idx(s->next_picture.f),
77
+            .FrameWidth        = cur_frame->width,
78
+            .FrameHeight       = cur_frame->height,
79
+
80
+            .intra_pic_flag    = s->pict_type == AV_PICTURE_TYPE_I ||
81
+                                 s->pict_type == AV_PICTURE_TYPE_BI,
82
+            .ref_pic_flag      = s->pict_type == AV_PICTURE_TYPE_I ||
83
+                                 s->pict_type == AV_PICTURE_TYPE_P,
84
+            .progressive_fcm   = v->fcm == 0,
85
+
86
+            .profile           = v->profile,
87
+            .postprocflag      = v->postprocflag,
88
+            .pulldown          = v->broadcast,
89
+            .interlace         = v->interlace,
90
+            .tfcntrflag        = v->tfcntrflag,
91
+            .finterpflag       = v->finterpflag,
92
+            .psf               = v->psf,
93
+            .multires          = v->multires,
94
+            .syncmarker        = v->resync_marker,
95
+            .rangered          = v->rangered,
96
+            .maxbframes        = s->max_b_frames,
97
+
98
+            .panscan_flag      = v->panscanflag,
99
+            .refdist_flag      = v->refdist_flag,
100
+            .extended_mv       = v->extended_mv,
101
+            .dquant            = v->dquant,
102
+            .vstransform       = v->vstransform,
103
+            .loopfilter        = v->s.loop_filter,
104
+            .fastuvmc          = v->fastuvmc,
105
+            .overlap           = v->overlap,
106
+            .quantizer         = v->quantizer_mode,
107
+            .extended_dmv      = v->extended_dmv,
108
+            .range_mapy_flag   = v->range_mapy_flag,
109
+            .range_mapy        = v->range_mapy,
110
+            .range_mapuv_flag  = v->range_mapuv_flag,
111
+            .range_mapuv       = v->range_mapuv,
112
+            .rangeredfrm       = v->rangeredfrm,
113
+        }
114
+    };
115
+
116
+    return 0;
117
+}
118
+
119
+static int nvdec_vc1_end_frame(AVCodecContext *avctx)
120
+{
121
+    NVDECContext *ctx = avctx->internal->hwaccel_priv_data;
122
+    int ret = ff_nvdec_end_frame(avctx);
123
+    ctx->bitstream = NULL;
124
+    return ret;
125
+}
126
+
127
+static int nvdec_vc1_decode_slice(AVCodecContext *avctx, const uint8_t *buffer, uint32_t size)
128
+{
129
+    NVDECContext *ctx = avctx->internal->hwaccel_priv_data;
130
+    void *tmp;
131
+
132
+    tmp = av_fast_realloc(ctx->slice_offsets, &ctx->slice_offsets_allocated,
133
+                          (ctx->nb_slices + 1) * sizeof(*ctx->slice_offsets));
134
+    if (!tmp)
135
+        return AVERROR(ENOMEM);
136
+    ctx->slice_offsets = tmp;
137
+
138
+    if (!ctx->bitstream)
139
+        ctx->bitstream = (uint8_t*)buffer;
140
+
141
+    ctx->slice_offsets[ctx->nb_slices] = buffer - ctx->bitstream;
142
+    ctx->bitstream_len += size;
143
+    ctx->nb_slices++;
144
+
145
+    return 0;
146
+}
147
+
148
+static int nvdec_vc1_frame_params(AVCodecContext *avctx,
149
+                                  AVBufferRef *hw_frames_ctx)
150
+{
151
+    // Each frame can at most have one P and one B reference
152
+    return ff_nvdec_frame_params(avctx, hw_frames_ctx, 2);
153
+}
154
+
155
+AVHWAccel ff_vc1_nvdec_hwaccel = {
156
+    .name                 = "vc1_nvdec",
157
+    .type                 = AVMEDIA_TYPE_VIDEO,
158
+    .id                   = AV_CODEC_ID_VC1,
159
+    .pix_fmt              = AV_PIX_FMT_CUDA,
160
+    .start_frame          = nvdec_vc1_start_frame,
161
+    .end_frame            = nvdec_vc1_end_frame,
162
+    .decode_slice         = nvdec_vc1_decode_slice,
163
+    .frame_params         = nvdec_vc1_frame_params,
164
+    .init                 = ff_nvdec_decode_init,
165
+    .uninit               = ff_nvdec_decode_uninit,
166
+    .priv_data_size       = sizeof(NVDECContext),
167
+};
168
+
169
+#if CONFIG_WMV3_NVDEC_HWACCEL
170
+AVHWAccel ff_wmv3_nvdec_hwaccel = {
171
+    .name                 = "wmv3_nvdec",
172
+    .type                 = AVMEDIA_TYPE_VIDEO,
173
+    .id                   = AV_CODEC_ID_WMV3,
174
+    .pix_fmt              = AV_PIX_FMT_CUDA,
175
+    .start_frame          = nvdec_vc1_start_frame,
176
+    .end_frame            = nvdec_vc1_end_frame,
177
+    .decode_slice         = nvdec_vc1_decode_slice,
178
+    .frame_params         = nvdec_vc1_frame_params,
179
+    .init                 = ff_nvdec_decode_init,
180
+    .uninit               = ff_nvdec_decode_uninit,
181
+    .priv_data_size       = sizeof(NVDECContext),
182
+};
183
+#endif
... ...
@@ -1119,6 +1119,9 @@ static const enum AVPixelFormat vc1_hwaccel_pixfmt_list_420[] = {
1119 1119
     AV_PIX_FMT_D3D11VA_VLD,
1120 1120
     AV_PIX_FMT_D3D11,
1121 1121
 #endif
1122
+#if CONFIG_VC1_NVDEC_HWACCEL
1123
+    AV_PIX_FMT_CUDA,
1124
+#endif
1122 1125
 #if CONFIG_VC1_VAAPI_HWACCEL
1123 1126
     AV_PIX_FMT_VAAPI,
1124 1127
 #endif
... ...
@@ -29,7 +29,7 @@
29 29
 
30 30
 #define LIBAVCODEC_VERSION_MAJOR  58
31 31
 #define LIBAVCODEC_VERSION_MINOR   3
32
-#define LIBAVCODEC_VERSION_MICRO 101
32
+#define LIBAVCODEC_VERSION_MICRO 102
33 33
 
34 34
 #define LIBAVCODEC_VERSION_INT  AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \
35 35
                                                LIBAVCODEC_VERSION_MINOR, \