Browse code

avcodec: implement vp9 dxva2 hwaccel

Hendrik Leppkes authored on 2015/12/03 19:09:51
Showing 7 changed files
... ...
@@ -42,6 +42,7 @@ version <next>:
42 42
 - mipsdspr1 option has been renamed to mipsdsp
43 43
 - aemphasis filter
44 44
 - mips32r5 option has been removed
45
+- DXVA2-accelerated VP9 decoding
45 46
 
46 47
 
47 48
 version 2.8:
... ...
@@ -2567,6 +2567,10 @@ vc1_mmal_hwaccel_deps="mmal"
2567 2567
 vc1_mmal_decoder_select="vc1_decoder"
2568 2568
 vc1_qsv_hwaccel_deps="libmfx"
2569 2569
 vc1_qsv_hwaccel_select="qsvdec_vc1"
2570
+vp9_d3d11va_hwaccel_deps="dxva2 DXVA_PicParams_VP9"
2571
+vp9_d3d11va_hwaccel_select="vp9_decoder"
2572
+vp9_dxva2_hwaccel_deps="dxva2 DXVA_PicParams_VP9"
2573
+vp9_dxva2_hwaccel_select="vp9_decoder"
2570 2574
 wmv3_crystalhd_decoder_select="crystalhd"
2571 2575
 wmv3_d3d11va_hwaccel_select="vc1_d3d11va_hwaccel"
2572 2576
 wmv3_dxva2_hwaccel_select="vc1_dxva2_hwaccel"
... ...
@@ -5297,6 +5301,7 @@ check_lib "CoreServices/CoreServices.h" UTGetOSTypeFromString "-framework CoreSe
5297 5297
 check_struct "sys/time.h sys/resource.h" "struct rusage" ru_maxrss
5298 5298
 
5299 5299
 check_type "windows.h dxva.h" "DXVA_PicParams_HEVC" -DWINAPI_FAMILY=WINAPI_FAMILY_DESKTOP_APP -D_CRT_BUILD_DESKTOP_APP=0
5300
+check_type "windows.h dxva.h" "DXVA_PicParams_VP9" -DWINAPI_FAMILY=WINAPI_FAMILY_DESKTOP_APP -D_CRT_BUILD_DESKTOP_APP=0
5300 5301
 check_type "windows.h d3d11.h" "ID3D11VideoDecoder"
5301 5302
 check_type "windows.h d3d11.h" "ID3D11VideoContext"
5302 5303
 check_type "d3d9.h dxva2api.h" DXVA2_ConfigPictureDecode -D_WIN32_WINNT=0x0602
... ...
@@ -745,6 +745,8 @@ OBJS-$(CONFIG_VC1_D3D11VA_HWACCEL)        += dxva2_vc1.o
745 745
 OBJS-$(CONFIG_VC1_DXVA2_HWACCEL)          += dxva2_vc1.o
746 746
 OBJS-$(CONFIG_VC1_VAAPI_HWACCEL)          += vaapi_vc1.o
747 747
 OBJS-$(CONFIG_VC1_VDPAU_HWACCEL)          += vdpau_vc1.o
748
+OBJS-$(CONFIG_VP9_D3D11VA_HWACCEL)        += dxva2_vp9.o
749
+OBJS-$(CONFIG_VP9_DXVA2_HWACCEL)          += dxva2_vp9.o
748 750
 
749 751
 # libavformat dependencies
750 752
 OBJS-$(CONFIG_ADTS_MUXER)              += mpeg4audio.o
... ...
@@ -110,6 +110,8 @@ void avcodec_register_all(void)
110 110
     REGISTER_HWACCEL(VC1_VDPAU,         vc1_vdpau);
111 111
     REGISTER_HWACCEL(VC1_MMAL,          vc1_mmal);
112 112
     REGISTER_HWACCEL(VC1_QSV,           vc1_qsv);
113
+    REGISTER_HWACCEL(VP9_D3D11VA,       vp9_d3d11va);
114
+    REGISTER_HWACCEL(VP9_DXVA2,         vp9_dxva2);
113 115
     REGISTER_HWACCEL(WMV3_D3D11VA,      wmv3_d3d11va);
114 116
     REGISTER_HWACCEL(WMV3_DXVA2,        wmv3_dxva2);
115 117
     REGISTER_HWACCEL(WMV3_VAAPI,        wmv3_vaapi);
116 118
new file mode 100644
... ...
@@ -0,0 +1,337 @@
0
+/*
1
+ * DXVA2 VP9 HW acceleration.
2
+ *
3
+ * copyright (c) 2015 Hendrik Leppkes
4
+ *
5
+ * This file is part of FFmpeg.
6
+ *
7
+ * FFmpeg is free software; you can redistribute it and/or
8
+ * modify it under the terms of the GNU Lesser General Public
9
+ * License as published by the Free Software Foundation; either
10
+ * version 2.1 of the License, or (at your option) any later version.
11
+ *
12
+ * FFmpeg is distributed in the hope that it will be useful,
13
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15
+ * Lesser General Public License for more details.
16
+ *
17
+ * You should have received a copy of the GNU Lesser General Public
18
+ * License along with FFmpeg; if not, write to the Free Software
19
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
+ */
21
+
22
+#include "libavutil/avassert.h"
23
+#include "libavutil/pixdesc.h"
24
+
25
+#include "vp9.h"
26
+
27
+// The headers above may include w32threads.h, which uses the original
28
+// _WIN32_WINNT define, while dxva2_internal.h redefines it to target a
29
+// potentially newer version.
30
+#include "dxva2_internal.h"
31
+
32
+struct vp9_dxva2_picture_context {
33
+    DXVA_PicParams_VP9    pp;
34
+    DXVA_Slice_VPx_Short  slice;
35
+    const uint8_t         *bitstream;
36
+    unsigned              bitstream_size;
37
+};
38
+
39
+static void fill_picture_entry(DXVA_PicEntry_VPx *pic,
40
+                               unsigned index, unsigned flag)
41
+{
42
+    av_assert0((index & 0x7f) == index && (flag & 0x01) == flag);
43
+    pic->bPicEntry = index | (flag << 7);
44
+}
45
+
46
+static int fill_picture_parameters(const AVCodecContext *avctx, AVDXVAContext *ctx, const VP9SharedContext *h,
47
+                                    DXVA_PicParams_VP9 *pp)
48
+{
49
+    int i;
50
+    const AVPixFmtDescriptor * pixdesc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);
51
+
52
+    if (!pixdesc)
53
+        return -1;
54
+
55
+    memset(pp, 0, sizeof(*pp));
56
+
57
+    fill_picture_entry(&pp->CurrPic, ff_dxva2_get_surface_index(avctx, ctx, h->frames[CUR_FRAME].tf.f), 0);
58
+
59
+    pp->profile = h->h.profile;
60
+    pp->wFormatAndPictureInfoFlags = ((h->h.keyframe == 0)   <<  0) |
61
+                                     ((h->h.invisible == 0)  <<  1) |
62
+                                     (h->h.errorres          <<  2) |
63
+                                     (pixdesc->log2_chroma_w <<  3) | /* subsampling_x */
64
+                                     (pixdesc->log2_chroma_h <<  4) | /* subsampling_y */
65
+                                     (0                      <<  5) | /* extra_plane */
66
+                                     (h->h.refreshctx        <<  6) |
67
+                                     (h->h.parallelmode      <<  7) |
68
+                                     (h->h.intraonly         <<  8) |
69
+                                     (h->h.framectxid        <<  9) |
70
+                                     (h->h.resetctx          << 11) |
71
+                                     ((h->h.keyframe ? 0 : h->h.highprecisionmvs) << 13) |
72
+                                     (0                      << 14);  /* ReservedFormatInfo2Bits */
73
+
74
+    pp->width  = avctx->width;
75
+    pp->height = avctx->height;
76
+    pp->BitDepthMinus8Luma   = pixdesc->comp[0].depth - 8;
77
+    pp->BitDepthMinus8Chroma = pixdesc->comp[1].depth - 8;
78
+    /* swap 0/1 to match the reference */
79
+    pp->interp_filter = h->h.filtermode ^ (h->h.filtermode <= 1);
80
+    pp->Reserved8Bits = 0;
81
+
82
+    for (i = 0; i < 8; i++) {
83
+        if (h->refs[i].f->buf[0]) {
84
+            fill_picture_entry(&pp->ref_frame_map[i], ff_dxva2_get_surface_index(avctx, ctx, h->refs[i].f), 0);
85
+            pp->ref_frame_coded_width[i]  = h->refs[i].f->width;
86
+            pp->ref_frame_coded_height[i] = h->refs[i].f->height;
87
+        } else
88
+            pp->ref_frame_map[i].bPicEntry = 0xFF;
89
+    }
90
+
91
+    for (i = 0; i < 3; i++) {
92
+        uint8_t refidx = h->h.refidx[i];
93
+        if (h->refs[refidx].f->buf[0])
94
+            fill_picture_entry(&pp->frame_refs[i], ff_dxva2_get_surface_index(avctx, ctx, h->refs[refidx].f), 0);
95
+        else
96
+            pp->frame_refs[i].bPicEntry = 0xFF;
97
+
98
+        pp->ref_frame_sign_bias[i + 1] = h->h.signbias[i];
99
+    }
100
+
101
+    pp->filter_level    = h->h.filter.level;
102
+    pp->sharpness_level = h->h.filter.sharpness;
103
+
104
+    pp->wControlInfoFlags = (h->h.lf_delta.enabled   << 0) |
105
+                            (h->h.lf_delta.updated   << 1) |
106
+                            (h->h.use_last_frame_mvs << 2) |
107
+                            (0                       << 3);  /* ReservedControlInfo5Bits */
108
+
109
+    for (i = 0; i < 4; i++)
110
+        pp->ref_deltas[i]  = h->h.lf_delta.ref[i];
111
+
112
+    for (i = 0; i < 2; i++)
113
+        pp->mode_deltas[i]  = h->h.lf_delta.mode[i];
114
+
115
+    pp->base_qindex   = h->h.yac_qi;
116
+    pp->y_dc_delta_q  = h->h.ydc_qdelta;
117
+    pp->uv_dc_delta_q = h->h.uvdc_qdelta;
118
+    pp->uv_ac_delta_q = h->h.uvac_qdelta;
119
+
120
+    /* segmentation data */
121
+    pp->stVP9Segments.wSegmentInfoFlags = (h->h.segmentation.enabled       << 0) |
122
+                                          (h->h.segmentation.update_map    << 1) |
123
+                                          (h->h.segmentation.temporal      << 2) |
124
+                                          (h->h.segmentation.absolute_vals << 3) |
125
+                                          (0                               << 4);  /* ReservedSegmentFlags4Bits */
126
+
127
+    for (i = 0; i < 7; i++)
128
+        pp->stVP9Segments.tree_probs[i] = h->h.segmentation.prob[i];
129
+
130
+    if (h->h.segmentation.temporal)
131
+        for (i = 0; i < 3; i++)
132
+            pp->stVP9Segments.pred_probs[i] = h->h.segmentation.pred_prob[i];
133
+    else
134
+        memset(pp->stVP9Segments.pred_probs, 255, sizeof(pp->stVP9Segments.pred_probs));
135
+
136
+    for (i = 0; i < 8; i++) {
137
+        pp->stVP9Segments.feature_mask[i] = (h->h.segmentation.feat[i].q_enabled    << 0) |
138
+                                            (h->h.segmentation.feat[i].lf_enabled   << 1) |
139
+                                            (h->h.segmentation.feat[i].ref_enabled  << 2) |
140
+                                            (h->h.segmentation.feat[i].skip_enabled << 3);
141
+
142
+        pp->stVP9Segments.feature_data[i][0] = h->h.segmentation.feat[i].q_val;
143
+        pp->stVP9Segments.feature_data[i][1] = h->h.segmentation.feat[i].lf_val;
144
+        pp->stVP9Segments.feature_data[i][2] = h->h.segmentation.feat[i].ref_val;
145
+        pp->stVP9Segments.feature_data[i][3] = 0; /* no data for skip */
146
+    }
147
+
148
+    pp->log2_tile_cols = h->h.tiling.log2_tile_cols;
149
+    pp->log2_tile_rows = h->h.tiling.log2_tile_rows;
150
+
151
+    pp->uncompressed_header_size_byte_aligned = h->h.uncompressed_header_size;
152
+    pp->first_partition_size = h->h.compressed_header_size;
153
+
154
+    pp->StatusReportFeedbackNumber = 1 + DXVA_CONTEXT_REPORT_ID(avctx, ctx)++;
155
+    return 0;
156
+}
157
+
158
+static void fill_slice_short(DXVA_Slice_VPx_Short *slice,
159
+                             unsigned position, unsigned size)
160
+{
161
+    memset(slice, 0, sizeof(*slice));
162
+    slice->BSNALunitDataLocation = position;
163
+    slice->SliceBytesInBuffer    = size;
164
+    slice->wBadSliceChopping     = 0;
165
+}
166
+
167
+static int commit_bitstream_and_slice_buffer(AVCodecContext *avctx,
168
+                                             DECODER_BUFFER_DESC *bs,
169
+                                             DECODER_BUFFER_DESC *sc)
170
+{
171
+    const VP9SharedContext *h = avctx->priv_data;
172
+    AVDXVAContext *ctx = avctx->hwaccel_context;
173
+    struct vp9_dxva2_picture_context *ctx_pic = h->frames[CUR_FRAME].hwaccel_picture_private;
174
+    void     *dxva_data_ptr;
175
+    uint8_t  *dxva_data;
176
+    unsigned dxva_size;
177
+    unsigned padding;
178
+    unsigned type;
179
+
180
+#if CONFIG_D3D11VA
181
+    if (avctx->pix_fmt == AV_PIX_FMT_D3D11VA_VLD) {
182
+        type = D3D11_VIDEO_DECODER_BUFFER_BITSTREAM;
183
+        if (FAILED(ID3D11VideoContext_GetDecoderBuffer(D3D11VA_CONTEXT(ctx)->video_context,
184
+                                                       D3D11VA_CONTEXT(ctx)->decoder,
185
+                                                       type,
186
+                                                       &dxva_size, &dxva_data_ptr)))
187
+            return -1;
188
+    }
189
+#endif
190
+#if CONFIG_DXVA2
191
+    if (avctx->pix_fmt == AV_PIX_FMT_DXVA2_VLD) {
192
+        type = DXVA2_BitStreamDateBufferType;
193
+        if (FAILED(IDirectXVideoDecoder_GetBuffer(DXVA2_CONTEXT(ctx)->decoder,
194
+                                                  type,
195
+                                                  &dxva_data_ptr, &dxva_size)))
196
+            return -1;
197
+    }
198
+#endif
199
+
200
+    dxva_data = dxva_data_ptr;
201
+
202
+    if (ctx_pic->slice.SliceBytesInBuffer > dxva_size) {
203
+        av_log(avctx, AV_LOG_ERROR, "Failed to build bitstream");
204
+        return -1;
205
+    }
206
+
207
+    memcpy(dxva_data, ctx_pic->bitstream, ctx_pic->slice.SliceBytesInBuffer);
208
+
209
+    padding = FFMIN(128 - ((ctx_pic->slice.SliceBytesInBuffer) & 127), dxva_size - ctx_pic->slice.SliceBytesInBuffer);
210
+    if (padding > 0) {
211
+        memset(dxva_data + ctx_pic->slice.SliceBytesInBuffer, 0, padding);
212
+        ctx_pic->slice.SliceBytesInBuffer += padding;
213
+    }
214
+
215
+#if CONFIG_D3D11VA
216
+    if (avctx->pix_fmt == AV_PIX_FMT_D3D11VA_VLD)
217
+        if (FAILED(ID3D11VideoContext_ReleaseDecoderBuffer(D3D11VA_CONTEXT(ctx)->video_context, D3D11VA_CONTEXT(ctx)->decoder, type)))
218
+            return -1;
219
+#endif
220
+#if CONFIG_DXVA2
221
+    if (avctx->pix_fmt == AV_PIX_FMT_DXVA2_VLD)
222
+        if (FAILED(IDirectXVideoDecoder_ReleaseBuffer(DXVA2_CONTEXT(ctx)->decoder, type)))
223
+            return -1;
224
+#endif
225
+
226
+#if CONFIG_D3D11VA
227
+    if (avctx->pix_fmt == AV_PIX_FMT_D3D11VA_VLD) {
228
+        D3D11_VIDEO_DECODER_BUFFER_DESC *dsc11 = bs;
229
+        memset(dsc11, 0, sizeof(*dsc11));
230
+        dsc11->BufferType           = type;
231
+        dsc11->DataSize             = ctx_pic->slice.SliceBytesInBuffer;
232
+        dsc11->NumMBsInBuffer       = 0;
233
+
234
+        type = D3D11_VIDEO_DECODER_BUFFER_SLICE_CONTROL;
235
+    }
236
+#endif
237
+#if CONFIG_DXVA2
238
+    if (avctx->pix_fmt == AV_PIX_FMT_DXVA2_VLD) {
239
+        DXVA2_DecodeBufferDesc *dsc2 = bs;
240
+        memset(dsc2, 0, sizeof(*dsc2));
241
+        dsc2->CompressedBufferType = type;
242
+        dsc2->DataSize             = ctx_pic->slice.SliceBytesInBuffer;
243
+        dsc2->NumMBsInBuffer       = 0;
244
+
245
+        type = DXVA2_SliceControlBufferType;
246
+    }
247
+#endif
248
+
249
+    return ff_dxva2_commit_buffer(avctx, ctx, sc,
250
+                                  type,
251
+                                  &ctx_pic->slice, sizeof(ctx_pic->slice), 0);
252
+}
253
+
254
+
255
+static int dxva2_vp9_start_frame(AVCodecContext *avctx,
256
+                                 av_unused const uint8_t *buffer,
257
+                                 av_unused uint32_t size)
258
+{
259
+    const VP9SharedContext *h = avctx->priv_data;
260
+    AVDXVAContext *ctx = avctx->hwaccel_context;
261
+    struct vp9_dxva2_picture_context *ctx_pic = h->frames[CUR_FRAME].hwaccel_picture_private;
262
+
263
+    if (DXVA_CONTEXT_DECODER(avctx, ctx) == NULL ||
264
+        DXVA_CONTEXT_CFG(avctx, ctx) == NULL ||
265
+        DXVA_CONTEXT_COUNT(avctx, ctx) <= 0)
266
+        return -1;
267
+    av_assert0(ctx_pic);
268
+
269
+    /* Fill up DXVA_PicParams_VP9 */
270
+    if (fill_picture_parameters(avctx, ctx, h, &ctx_pic->pp) < 0)
271
+        return -1;
272
+
273
+    ctx_pic->bitstream_size = 0;
274
+    ctx_pic->bitstream      = NULL;
275
+    return 0;
276
+}
277
+
278
+static int dxva2_vp9_decode_slice(AVCodecContext *avctx,
279
+                                  const uint8_t *buffer,
280
+                                  uint32_t size)
281
+{
282
+    const VP9SharedContext *h = avctx->priv_data;
283
+    struct vp9_dxva2_picture_context *ctx_pic = h->frames[CUR_FRAME].hwaccel_picture_private;
284
+    unsigned position;
285
+
286
+    if (!ctx_pic->bitstream)
287
+        ctx_pic->bitstream = buffer;
288
+    ctx_pic->bitstream_size += size;
289
+
290
+    position = buffer - ctx_pic->bitstream;
291
+    fill_slice_short(&ctx_pic->slice, position, size);
292
+
293
+    return 0;
294
+}
295
+
296
+static int dxva2_vp9_end_frame(AVCodecContext *avctx)
297
+{
298
+    VP9SharedContext *h = avctx->priv_data;
299
+    struct vp9_dxva2_picture_context *ctx_pic = h->frames[CUR_FRAME].hwaccel_picture_private;
300
+    int ret;
301
+
302
+    if (ctx_pic->bitstream_size <= 0)
303
+        return -1;
304
+
305
+    ret = ff_dxva2_common_end_frame(avctx, h->frames[CUR_FRAME].tf.f,
306
+                                    &ctx_pic->pp, sizeof(ctx_pic->pp),
307
+                                    NULL, 0,
308
+                                    commit_bitstream_and_slice_buffer);
309
+    return ret;
310
+}
311
+
312
+#if CONFIG_VP9_DXVA2_HWACCEL
313
+AVHWAccel ff_vp9_dxva2_hwaccel = {
314
+    .name           = "vp9_dxva2",
315
+    .type           = AVMEDIA_TYPE_VIDEO,
316
+    .id             = AV_CODEC_ID_VP9,
317
+    .pix_fmt        = AV_PIX_FMT_DXVA2_VLD,
318
+    .start_frame    = dxva2_vp9_start_frame,
319
+    .decode_slice   = dxva2_vp9_decode_slice,
320
+    .end_frame      = dxva2_vp9_end_frame,
321
+    .frame_priv_data_size = sizeof(struct vp9_dxva2_picture_context),
322
+};
323
+#endif
324
+
325
+#if CONFIG_VP9_D3D11VA_HWACCEL
326
+AVHWAccel ff_vp9_d3d11va_hwaccel = {
327
+    .name           = "vp9_d3d11va",
328
+    .type           = AVMEDIA_TYPE_VIDEO,
329
+    .id             = AV_CODEC_ID_VP9,
330
+    .pix_fmt        = AV_PIX_FMT_D3D11VA_VLD,
331
+    .start_frame    = dxva2_vp9_start_frame,
332
+    .decode_slice   = dxva2_vp9_decode_slice,
333
+    .end_frame      = dxva2_vp9_end_frame,
334
+    .frame_priv_data_size = sizeof(struct vp9_dxva2_picture_context),
335
+};
336
+#endif
... ...
@@ -29,8 +29,8 @@
29 29
 #include "libavutil/version.h"
30 30
 
31 31
 #define LIBAVCODEC_VERSION_MAJOR  57
32
-#define LIBAVCODEC_VERSION_MINOR  16
33
-#define LIBAVCODEC_VERSION_MICRO 101
32
+#define LIBAVCODEC_VERSION_MINOR  17
33
+#define LIBAVCODEC_VERSION_MICRO 100
34 34
 
35 35
 #define LIBAVCODEC_VERSION_INT  AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \
36 36
                                                LIBAVCODEC_VERSION_MINOR, \
... ...
@@ -240,7 +240,7 @@ fail:
240 240
 
241 241
 static int update_size(AVCodecContext *ctx, int w, int h)
242 242
 {
243
-#define HWACCEL_MAX (0)
243
+#define HWACCEL_MAX (CONFIG_VP9_DXVA2_HWACCEL + CONFIG_VP9_D3D11VA_HWACCEL)
244 244
     enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmtp = pix_fmts;
245 245
     VP9Context *s = ctx->priv_data;
246 246
     uint8_t *p;
... ...
@@ -254,6 +254,15 @@ static int update_size(AVCodecContext *ctx, int w, int h)
254 254
     if ((res = ff_set_dimensions(ctx, w, h)) < 0)
255 255
         return res;
256 256
 
257
+    if (s->pix_fmt == AV_PIX_FMT_YUV420P) {
258
+#if CONFIG_VP9_DXVA2_HWACCEL
259
+        *fmtp++ = AV_PIX_FMT_DXVA2_VLD;
260
+#endif
261
+#if CONFIG_VP9_D3D11VA_HWACCEL
262
+        *fmtp++ = AV_PIX_FMT_D3D11VA_VLD;
263
+#endif
264
+    }
265
+
257 266
     *fmtp++ = s->pix_fmt;
258 267
     *fmtp = AV_PIX_FMT_NONE;
259 268