Browse code

avcodec: implement vp9 nvdec hwaccel

Timo Rothenpieler authored on 2017/11/12 05:19:07
Showing 8 changed files
... ...
@@ -13,7 +13,7 @@ version <next>:
13 13
 - PCE support for extended channel layouts in the AAC encoder
14 14
 - native aptX encoder and decoder
15 15
 - Raw aptX muxer and demuxer
16
-- NVIDIA NVDEC-accelerated H.264 and HEVC hwaccel decoding
16
+- NVIDIA NVDEC-accelerated H.264, HEVC and VP9 hwaccel decoding
17 17
 - Intel QSV-accelerated overlay filter
18 18
 
19 19
 
... ...
@@ -2764,6 +2764,8 @@ vp9_d3d11va2_hwaccel_select="vp9_decoder"
2764 2764
 vp9_dxva2_hwaccel_deps="dxva2 DXVA_PicParams_VP9"
2765 2765
 vp9_dxva2_hwaccel_select="vp9_decoder"
2766 2766
 vp9_mediacodec_hwaccel_deps="mediacodec"
2767
+vp9_nvdec_hwaccel_deps="cuda nvdec"
2768
+vp9_nvdec_hwaccel_select="vp9_decoder"
2767 2769
 vp9_vaapi_hwaccel_deps="vaapi VADecPictureParameterBufferVP9_bit_depth"
2768 2770
 vp9_vaapi_hwaccel_select="vp9_decoder"
2769 2771
 wmv3_d3d11va_hwaccel_select="vc1_d3d11va_hwaccel"
... ...
@@ -869,6 +869,7 @@ OBJS-$(CONFIG_VC1_VAAPI_HWACCEL)          += vaapi_vc1.o
869 869
 OBJS-$(CONFIG_VC1_VDPAU_HWACCEL)          += vdpau_vc1.o
870 870
 OBJS-$(CONFIG_VP9_D3D11VA_HWACCEL)        += dxva2_vp9.o
871 871
 OBJS-$(CONFIG_VP9_DXVA2_HWACCEL)          += dxva2_vp9.o
872
+OBJS-$(CONFIG_VP9_NVDEC_HWACCEL)          += nvdec_vp9.o
872 873
 OBJS-$(CONFIG_VP9_VAAPI_HWACCEL)          += vaapi_vp9.o
873 874
 OBJS-$(CONFIG_VP8_QSV_HWACCEL)            += qsvdec_other.o
874 875
 
... ...
@@ -123,6 +123,7 @@ static void register_all(void)
123 123
     REGISTER_HWACCEL(VP9_D3D11VA2,      vp9_d3d11va2);
124 124
     REGISTER_HWACCEL(VP9_DXVA2,         vp9_dxva2);
125 125
     REGISTER_HWACCEL(VP9_MEDIACODEC,    vp9_mediacodec);
126
+    REGISTER_HWACCEL(VP9_NVDEC,         vp9_nvdec);
126 127
     REGISTER_HWACCEL(VP9_VAAPI,         vp9_vaapi);
127 128
     REGISTER_HWACCEL(WMV3_D3D11VA,      wmv3_d3d11va);
128 129
     REGISTER_HWACCEL(WMV3_D3D11VA2,     wmv3_d3d11va2);
... ...
@@ -54,6 +54,7 @@ static int map_avcodec_id(enum AVCodecID id)
54 54
     switch (id) {
55 55
     case AV_CODEC_ID_H264: return cudaVideoCodec_H264;
56 56
     case AV_CODEC_ID_HEVC: return cudaVideoCodec_HEVC;
57
+    case AV_CODEC_ID_VP9:  return cudaVideoCodec_VP9;
57 58
     }
58 59
     return -1;
59 60
 }
60 61
new file mode 100644
... ...
@@ -0,0 +1,227 @@
0
+/*
1
+ * VP9 HW decode acceleration through NVDEC
2
+ *
3
+ * Copyright (c) 2016 Timo Rothenpieler
4
+ *
5
+ * This file is part of FFmpeg.
6
+ *
7
+ * FFmpeg is free software; you can redistribute it and/or
8
+ * modify it under the terms of the GNU Lesser General Public
9
+ * License as published by the Free Software Foundation; either
10
+ * version 2.1 of the License, or (at your option) any later version.
11
+ *
12
+ * FFmpeg is distributed in the hope that it will be useful,
13
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15
+ * Lesser General Public License for more details.
16
+ *
17
+ * You should have received a copy of the GNU Lesser General Public
18
+ * License along with FFmpeg; if not, write to the Free Software
19
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
+ */
21
+
22
+#include "libavutil/pixdesc.h"
23
+
24
+#include "avcodec.h"
25
+#include "nvdec.h"
26
+#include "decode.h"
27
+#include "internal.h"
28
+#include "vp9shared.h"
29
+
30
+static unsigned char get_ref_idx(AVFrame *frame)
31
+{
32
+    FrameDecodeData *fdd;
33
+    NVDECFrame *cf;
34
+
35
+    if (!frame || !frame->private_ref)
36
+        return 255;
37
+
38
+    fdd = (FrameDecodeData*)frame->private_ref->data;
39
+    cf  = (NVDECFrame*)fdd->hwaccel_priv;
40
+
41
+    return cf->idx;
42
+}
43
+
44
+static int nvdec_vp9_start_frame(AVCodecContext *avctx, const uint8_t *buffer, uint32_t size)
45
+{
46
+    VP9SharedContext *h = avctx->priv_data;
47
+    const AVPixFmtDescriptor *pixdesc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);
48
+
49
+    NVDECContext      *ctx = avctx->internal->hwaccel_priv_data;
50
+    CUVIDPICPARAMS     *pp = &ctx->pic_params;
51
+    CUVIDVP9PICPARAMS *ppc = &pp->CodecSpecific.vp9;
52
+    FrameDecodeData *fdd;
53
+    NVDECFrame *cf;
54
+    AVFrame *cur_frame = h->frames[CUR_FRAME].tf.f;
55
+
56
+    int ret, i;
57
+
58
+    ret = ff_nvdec_start_frame(avctx, cur_frame);
59
+    if (ret < 0)
60
+        return ret;
61
+
62
+    fdd = (FrameDecodeData*)cur_frame->private_ref->data;
63
+    cf  = (NVDECFrame*)fdd->hwaccel_priv;
64
+
65
+    *pp = (CUVIDPICPARAMS) {
66
+        .PicWidthInMbs     = (cur_frame->width  + 15) / 16,
67
+        .FrameHeightInMbs  = (cur_frame->height + 15) / 16,
68
+        .CurrPicIdx        = cf->idx,
69
+
70
+        .CodecSpecific.vp9 = {
71
+            .width                    = cur_frame->width,
72
+            .height                   = cur_frame->height,
73
+
74
+            .LastRefIdx               = get_ref_idx(h->refs[h->h.refidx[0]].f),
75
+            .GoldenRefIdx             = get_ref_idx(h->refs[h->h.refidx[1]].f),
76
+            .AltRefIdx                = get_ref_idx(h->refs[h->h.refidx[2]].f),
77
+
78
+            .profile                  = h->h.profile,
79
+            .frameContextIdx          = h->h.framectxid,
80
+            .frameType                = !h->h.keyframe,
81
+            .showFrame                = !h->h.invisible,
82
+            .errorResilient           = h->h.errorres,
83
+            .frameParallelDecoding    = h->h.parallelmode,
84
+            .subSamplingX             = pixdesc->log2_chroma_w,
85
+            .subSamplingY             = pixdesc->log2_chroma_h,
86
+            .intraOnly                = h->h.intraonly,
87
+            .allow_high_precision_mv  = h->h.keyframe ? 0 : h->h.highprecisionmvs,
88
+            .refreshEntropyProbs      = h->h.refreshctx,
89
+
90
+            .bitDepthMinus8Luma       = pixdesc->comp[0].depth - 8,
91
+            .bitDepthMinus8Chroma     = pixdesc->comp[1].depth - 8,
92
+
93
+            .loopFilterLevel          = h->h.filter.level,
94
+            .loopFilterSharpness      = h->h.filter.sharpness,
95
+            .modeRefLfEnabled         = h->h.lf_delta.enabled,
96
+
97
+            .log2_tile_columns        = h->h.tiling.log2_tile_cols,
98
+            .log2_tile_rows           = h->h.tiling.log2_tile_rows,
99
+
100
+            .segmentEnabled           = h->h.segmentation.enabled,
101
+            .segmentMapUpdate         = h->h.segmentation.update_map,
102
+            .segmentMapTemporalUpdate = h->h.segmentation.temporal,
103
+            .segmentFeatureMode       = h->h.segmentation.absolute_vals,
104
+
105
+            .qpYAc                    = h->h.yac_qi,
106
+            .qpYDc                    = h->h.ydc_qdelta,
107
+            .qpChDc                   = h->h.uvdc_qdelta,
108
+            .qpChAc                   = h->h.uvac_qdelta,
109
+
110
+            .resetFrameContext        = h->h.resetctx,
111
+            .mcomp_filter_type        = h->h.filtermode ^ (h->h.filtermode <= 1),
112
+
113
+            .frameTagSize             = h->h.uncompressed_header_size,
114
+            .offsetToDctParts         = h->h.compressed_header_size,
115
+
116
+            .refFrameSignBias[0]      = 0,
117
+        }
118
+    };
119
+
120
+    for (i = 0; i < 2; i++)
121
+        ppc->mbModeLfDelta[i] = h->h.lf_delta.mode[i];
122
+
123
+    for (i = 0; i < 4; i++)
124
+        ppc->mbRefLfDelta[i] = h->h.lf_delta.ref[i];
125
+
126
+    for (i = 0; i < 7; i++)
127
+        ppc->mb_segment_tree_probs[i] = h->h.segmentation.prob[i];
128
+
129
+    for (i = 0; i < 3; i++) {
130
+        ppc->activeRefIdx[i] = h->h.refidx[i];
131
+        ppc->segment_pred_probs[i] = h->h.segmentation.pred_prob[i];
132
+        ppc->refFrameSignBias[i + 1] = h->h.signbias[i];
133
+    }
134
+
135
+    for (i = 0; i < 8; i++) {
136
+        ppc->segmentFeatureEnable[i][0] = h->h.segmentation.feat[i].q_enabled;
137
+        ppc->segmentFeatureEnable[i][1] = h->h.segmentation.feat[i].lf_enabled;
138
+        ppc->segmentFeatureEnable[i][2] = h->h.segmentation.feat[i].ref_enabled;
139
+        ppc->segmentFeatureEnable[i][3] = h->h.segmentation.feat[i].skip_enabled;
140
+
141
+        ppc->segmentFeatureData[i][0] = h->h.segmentation.feat[i].q_val;
142
+        ppc->segmentFeatureData[i][1] = h->h.segmentation.feat[i].lf_val;
143
+        ppc->segmentFeatureData[i][2] = h->h.segmentation.feat[i].ref_val;
144
+        ppc->segmentFeatureData[i][3] = 0;
145
+    }
146
+
147
+    switch (avctx->colorspace) {
148
+    default:
149
+    case AVCOL_SPC_UNSPECIFIED:
150
+        ppc->colorSpace = 0;
151
+        break;
152
+    case AVCOL_SPC_BT470BG:
153
+        ppc->colorSpace = 1;
154
+        break;
155
+    case AVCOL_SPC_BT709:
156
+        ppc->colorSpace = 2;
157
+        break;
158
+    case AVCOL_SPC_SMPTE170M:
159
+        ppc->colorSpace = 3;
160
+        break;
161
+    case AVCOL_SPC_SMPTE240M:
162
+        ppc->colorSpace = 4;
163
+        break;
164
+    case AVCOL_SPC_BT2020_NCL:
165
+        ppc->colorSpace = 5;
166
+        break;
167
+    case AVCOL_SPC_RESERVED:
168
+        ppc->colorSpace = 6;
169
+        break;
170
+    case AVCOL_SPC_RGB:
171
+        ppc->colorSpace = 7;
172
+        break;
173
+    }
174
+
175
+    return 0;
176
+}
177
+
178
+static int nvdec_vp9_end_frame(AVCodecContext *avctx)
179
+{
180
+    NVDECContext *ctx = avctx->internal->hwaccel_priv_data;
181
+    int ret = ff_nvdec_end_frame(avctx);
182
+    ctx->bitstream = NULL;
183
+    return ret;
184
+}
185
+
186
+static int nvdec_vp9_decode_slice(AVCodecContext *avctx, const uint8_t *buffer, uint32_t size)
187
+{
188
+    NVDECContext *ctx = avctx->internal->hwaccel_priv_data;
189
+    void *tmp;
190
+
191
+    tmp = av_fast_realloc(ctx->slice_offsets, &ctx->slice_offsets_allocated,
192
+                          (ctx->nb_slices + 1) * sizeof(*ctx->slice_offsets));
193
+    if (!tmp)
194
+        return AVERROR(ENOMEM);
195
+    ctx->slice_offsets = tmp;
196
+
197
+    if (!ctx->bitstream)
198
+        ctx->bitstream = (uint8_t*)buffer;
199
+
200
+    ctx->slice_offsets[ctx->nb_slices] = buffer - ctx->bitstream;
201
+    ctx->bitstream_len += size;
202
+    ctx->nb_slices++;
203
+
204
+    return 0;
205
+}
206
+
207
+static int nvdec_vp9_frame_params(AVCodecContext *avctx,
208
+                                  AVBufferRef *hw_frames_ctx)
209
+{
210
+    // VP9 uses a fixed size pool of 8 possible reference frames
211
+    return ff_nvdec_frame_params(avctx, hw_frames_ctx, 8);
212
+}
213
+
214
+AVHWAccel ff_vp9_nvdec_hwaccel = {
215
+    .name                 = "vp9_nvdec",
216
+    .type                 = AVMEDIA_TYPE_VIDEO,
217
+    .id                   = AV_CODEC_ID_VP9,
218
+    .pix_fmt              = AV_PIX_FMT_CUDA,
219
+    .start_frame          = nvdec_vp9_start_frame,
220
+    .end_frame            = nvdec_vp9_end_frame,
221
+    .decode_slice         = nvdec_vp9_decode_slice,
222
+    .frame_params         = nvdec_vp9_frame_params,
223
+    .init                 = ff_nvdec_decode_init,
224
+    .uninit               = ff_nvdec_decode_uninit,
225
+    .priv_data_size       = sizeof(NVDECContext),
226
+};
... ...
@@ -29,7 +29,7 @@
29 29
 
30 30
 #define LIBAVCODEC_VERSION_MAJOR  58
31 31
 #define LIBAVCODEC_VERSION_MINOR   3
32
-#define LIBAVCODEC_VERSION_MICRO 100
32
+#define LIBAVCODEC_VERSION_MICRO 101
33 33
 
34 34
 #define LIBAVCODEC_VERSION_INT  AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \
35 35
                                                LIBAVCODEC_VERSION_MINOR, \
... ...
@@ -169,7 +169,10 @@ fail:
169 169
 
170 170
 static int update_size(AVCodecContext *avctx, int w, int h)
171 171
 {
172
-#define HWACCEL_MAX (CONFIG_VP9_DXVA2_HWACCEL + CONFIG_VP9_D3D11VA_HWACCEL * 2 + CONFIG_VP9_VAAPI_HWACCEL)
172
+#define HWACCEL_MAX (CONFIG_VP9_DXVA2_HWACCEL + \
173
+                     CONFIG_VP9_D3D11VA_HWACCEL * 2 + \
174
+                     CONFIG_VP9_NVDEC_HWACCEL + \
175
+                     CONFIG_VP9_VAAPI_HWACCEL)
173 176
     enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmtp = pix_fmts;
174 177
     VP9Context *s = avctx->priv_data;
175 178
     uint8_t *p;
... ...
@@ -191,12 +194,18 @@ static int update_size(AVCodecContext *avctx, int w, int h)
191 191
             *fmtp++ = AV_PIX_FMT_D3D11VA_VLD;
192 192
             *fmtp++ = AV_PIX_FMT_D3D11;
193 193
 #endif
194
+#if CONFIG_VP9_NVDEC_HWACCEL
195
+            *fmtp++ = AV_PIX_FMT_CUDA;
196
+#endif
194 197
 #if CONFIG_VP9_VAAPI_HWACCEL
195 198
             *fmtp++ = AV_PIX_FMT_VAAPI;
196 199
 #endif
197 200
             break;
198 201
         case AV_PIX_FMT_YUV420P10:
199 202
         case AV_PIX_FMT_YUV420P12:
203
+#if CONFIG_VP9_NVDEC_HWACCEL
204
+            *fmtp++ = AV_PIX_FMT_CUDA;
205
+#endif
200 206
 #if CONFIG_VP9_VAAPI_HWACCEL
201 207
             *fmtp++ = AV_PIX_FMT_VAAPI;
202 208
 #endif