Browse code

avcodec: Implement mpeg4 nvdec hwaccel

This was predictably nightmarish, given how ridiculous mpeg4 is.
I had to stare at the cuvid parser output for a long time to work
out what each field was supposed to be, and even then, I still don't
fully understand some of them. Particularly:

vop_coded: If I'm reading the decoder correctly, this flag will always
be 1 as the decoder will not pass the hwaccel any frame
where it is not 1.
divx_flags: There's obviously no documentation on what the possible
flags are. I simply observed that this is '0' for a
normal bitstream and '5' for packed b-frames.
gmc_enabled: I had a number of guesses as to what this mapped to.
I picked the condition I did based on when the cuvid
parser was setting flag.

Also note that as with the vdpau hwaccel, the decoder needs to
consume the entire frame and not the slice.

Philip Langdale authored on 2017/11/16 13:59:29
Showing 8 changed files
... ...
@@ -13,7 +13,7 @@ version <next>:
13 13
 - PCE support for extended channel layouts in the AAC encoder
14 14
 - native aptX encoder and decoder
15 15
 - Raw aptX muxer and demuxer
16
-- NVIDIA NVDEC-accelerated H.264, HEVC, MPEG-1/2, VC1 and VP9 hwaccel decoding
16
+- NVIDIA NVDEC-accelerated H.264, HEVC, MPEG-1/2/4, VC1 and VP9 hwaccel decoding
17 17
 - Intel QSV-accelerated overlay filter
18 18
 - mcompand audio filter
19 19
 - acontrast audio filter
... ...
@@ -2735,6 +2735,8 @@ mpeg2_xvmc_hwaccel_select="mpeg2video_decoder"
2735 2735
 mpeg4_cuvid_hwaccel_select="mpeg4_cuvid_decoder"
2736 2736
 mpeg4_mediacodec_hwaccel_deps="mediacodec"
2737 2737
 mpeg4_mmal_hwaccel_deps="mmal"
2738
+mpeg4_nvdec_hwaccel_deps="nvdec"
2739
+mpeg4_nvdec_hwaccel_select="mpeg4_decoder"
2738 2740
 mpeg4_vaapi_hwaccel_deps="vaapi"
2739 2741
 mpeg4_vaapi_hwaccel_select="mpeg4_decoder"
2740 2742
 mpeg4_vdpau_hwaccel_deps="vdpau"
... ...
@@ -861,6 +861,7 @@ OBJS-$(CONFIG_MPEG2_VAAPI_HWACCEL)        += vaapi_mpeg2.o
861 861
 OBJS-$(CONFIG_MPEG2_VDPAU_HWACCEL)        += vdpau_mpeg12.o
862 862
 OBJS-$(CONFIG_MPEG2_VIDEOTOOLBOX_HWACCEL) += videotoolbox.o
863 863
 OBJS-$(CONFIG_MPEG2_XVMC_HWACCEL)         += mpegvideo_xvmc.o
864
+OBJS-$(CONFIG_MPEG4_NVDEC_HWACCEL)        += nvdec_mpeg4.o
864 865
 OBJS-$(CONFIG_MPEG4_VAAPI_HWACCEL)        += vaapi_mpeg4.o
865 866
 OBJS-$(CONFIG_MPEG4_VDPAU_HWACCEL)        += vdpau_mpeg4.o
866 867
 OBJS-$(CONFIG_MPEG4_VIDEOTOOLBOX_HWACCEL) += videotoolbox.o
... ...
@@ -106,6 +106,7 @@ static void register_all(void)
106 106
     REGISTER_HWACCEL(MPEG4_CUVID,       mpeg4_cuvid);
107 107
     REGISTER_HWACCEL(MPEG4_MEDIACODEC,  mpeg4_mediacodec);
108 108
     REGISTER_HWACCEL(MPEG4_MMAL,        mpeg4_mmal);
109
+    REGISTER_HWACCEL(MPEG4_NVDEC,       mpeg4_nvdec);
109 110
     REGISTER_HWACCEL(MPEG4_VAAPI,       mpeg4_vaapi);
110 111
     REGISTER_HWACCEL(MPEG4_VDPAU,       mpeg4_vdpau);
111 112
     REGISTER_HWACCEL(MPEG4_VIDEOTOOLBOX, mpeg4_videotoolbox);
... ...
@@ -714,6 +714,9 @@ const enum AVPixelFormat ff_h263_hwaccel_pixfmt_list_420[] = {
714 714
 #if CONFIG_H263_VAAPI_HWACCEL || CONFIG_MPEG4_VAAPI_HWACCEL
715 715
     AV_PIX_FMT_VAAPI,
716 716
 #endif
717
+#if CONFIG_MPEG4_NVDEC_HWACCEL
718
+    AV_PIX_FMT_CUDA,
719
+#endif
717 720
 #if CONFIG_MPEG4_VDPAU_HWACCEL
718 721
     AV_PIX_FMT_VDPAU,
719 722
 #endif
... ...
@@ -56,6 +56,7 @@ static int map_avcodec_id(enum AVCodecID id)
56 56
     case AV_CODEC_ID_HEVC:       return cudaVideoCodec_HEVC;
57 57
     case AV_CODEC_ID_MPEG1VIDEO: return cudaVideoCodec_MPEG1;
58 58
     case AV_CODEC_ID_MPEG2VIDEO: return cudaVideoCodec_MPEG2;
59
+    case AV_CODEC_ID_MPEG4:      return cudaVideoCodec_MPEG4;
59 60
     case AV_CODEC_ID_VC1:        return cudaVideoCodec_VC1;
60 61
     case AV_CODEC_ID_VP9:        return cudaVideoCodec_VP9;
61 62
     case AV_CODEC_ID_WMV3:       return cudaVideoCodec_VC1;
62 63
new file mode 100644
... ...
@@ -0,0 +1,121 @@
0
+/*
1
+ * MPEG-4 Part 2 HW decode acceleration through NVDEC
2
+ *
3
+ * Copyright (c) 2017 Philip Langdale
4
+ *
5
+ * This file is part of FFmpeg.
6
+ *
7
+ * FFmpeg is free software; you can redistribute it and/or
8
+ * modify it under the terms of the GNU Lesser General Public
9
+ * License as published by the Free Software Foundation; either
10
+ * version 2.1 of the License, or (at your option) any later version.
11
+ *
12
+ * FFmpeg is distributed in the hope that it will be useful,
13
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15
+ * Lesser General Public License for more details.
16
+ *
17
+ * You should have received a copy of the GNU Lesser General Public
18
+ * License along with FFmpeg; if not, write to the Free Software
19
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
+ */
21
+
22
+#include "avcodec.h"
23
+#include "mpeg4video.h"
24
+#include "nvdec.h"
25
+#include "decode.h"
26
+
27
+static int nvdec_mpeg4_start_frame(AVCodecContext *avctx, const uint8_t *buffer, uint32_t size)
28
+{
29
+    Mpeg4DecContext *m = avctx->priv_data;
30
+    MpegEncContext *s = &m->m;
31
+
32
+    NVDECContext      *ctx = avctx->internal->hwaccel_priv_data;
33
+    CUVIDPICPARAMS     *pp = &ctx->pic_params;
34
+    CUVIDMPEG4PICPARAMS *ppc = &pp->CodecSpecific.mpeg4;
35
+    FrameDecodeData *fdd;
36
+    NVDECFrame *cf;
37
+    AVFrame *cur_frame = s->current_picture.f;
38
+
39
+    int ret, i;
40
+
41
+    ret = ff_nvdec_start_frame(avctx, cur_frame);
42
+    if (ret < 0)
43
+        return ret;
44
+
45
+    fdd = (FrameDecodeData*)cur_frame->private_ref->data;
46
+    cf  = (NVDECFrame*)fdd->hwaccel_priv;
47
+
48
+    *pp = (CUVIDPICPARAMS) {
49
+        .PicWidthInMbs     = (cur_frame->width  + 15) / 16,
50
+        .FrameHeightInMbs  = (cur_frame->height + 15) / 16,
51
+        .CurrPicIdx        = cf->idx,
52
+
53
+        .intra_pic_flag    = s->pict_type == AV_PICTURE_TYPE_I,
54
+        .ref_pic_flag      = s->pict_type == AV_PICTURE_TYPE_I ||
55
+                             s->pict_type == AV_PICTURE_TYPE_P ||
56
+                             s->pict_type == AV_PICTURE_TYPE_S,
57
+
58
+        .CodecSpecific.mpeg4 = {
59
+            .ForwardRefIdx                = ff_nvdec_get_ref_idx(s->last_picture.f),
60
+            .BackwardRefIdx               = ff_nvdec_get_ref_idx(s->next_picture.f),
61
+
62
+            .video_object_layer_width     = s->width,
63
+            .video_object_layer_height    = s->height,
64
+            .vop_time_increment_bitcount  = m->time_increment_bits,
65
+            .top_field_first              = s->top_field_first,
66
+            .resync_marker_disable        = !m->resync_marker,
67
+            .quant_type                   = s->mpeg_quant,
68
+            .quarter_sample               = s->quarter_sample,
69
+            .short_video_header           = avctx->codec->id == AV_CODEC_ID_H263,
70
+            .divx_flags                   = s->divx_packed ? 5 : 0,
71
+
72
+            .vop_coding_type              = s->pict_type - AV_PICTURE_TYPE_I,
73
+            .vop_coded                    = 1,
74
+            .vop_rounding_type            = s->no_rounding,
75
+            .alternate_vertical_scan_flag = s->alternate_scan,
76
+            .interlaced                   = !s->progressive_sequence,
77
+            .vop_fcode_forward            = s->f_code,
78
+            .vop_fcode_backward           = s->b_code,
79
+            .trd                          = { s->pp_time, s->pp_field_time >> 1 },
80
+            .trb                          = { s->pb_time, s->pb_field_time >> 1 },
81
+
82
+            .gmc_enabled                  = s->pict_type == AV_PICTURE_TYPE_S &&
83
+                                            m->vol_sprite_usage == GMC_SPRITE,
84
+        }
85
+    };
86
+
87
+    for (i = 0; i < 64; ++i) {
88
+        ppc->QuantMatrixIntra[i] = s->intra_matrix[i];
89
+        ppc->QuantMatrixInter[i] = s->inter_matrix[i];
90
+    }
91
+
92
+    // We need to pass the full frame buffer and not just the slice
93
+    return ff_nvdec_simple_decode_slice(avctx, buffer, size);
94
+}
95
+
96
+static int nvdec_mpeg4_decode_slice(AVCodecContext *avctx, const uint8_t *buffer, uint32_t size)
97
+{
98
+    return 0;
99
+}
100
+
101
+static int nvdec_mpeg4_frame_params(AVCodecContext *avctx,
102
+                                  AVBufferRef *hw_frames_ctx)
103
+{
104
+    // Each frame can at most have one P and one B reference
105
+    return ff_nvdec_frame_params(avctx, hw_frames_ctx, 2);
106
+}
107
+
108
+AVHWAccel ff_mpeg4_nvdec_hwaccel = {
109
+    .name                 = "mpeg4_nvdec",
110
+    .type                 = AVMEDIA_TYPE_VIDEO,
111
+    .id                   = AV_CODEC_ID_MPEG4,
112
+    .pix_fmt              = AV_PIX_FMT_CUDA,
113
+    .start_frame          = nvdec_mpeg4_start_frame,
114
+    .end_frame            = ff_nvdec_simple_end_frame,
115
+    .decode_slice         = nvdec_mpeg4_decode_slice,
116
+    .frame_params         = nvdec_mpeg4_frame_params,
117
+    .init                 = ff_nvdec_decode_init,
118
+    .uninit               = ff_nvdec_decode_uninit,
119
+    .priv_data_size       = sizeof(NVDECContext),
120
+};
... ...
@@ -29,7 +29,7 @@
29 29
 
30 30
 #define LIBAVCODEC_VERSION_MAJOR  58
31 31
 #define LIBAVCODEC_VERSION_MINOR   3
32
-#define LIBAVCODEC_VERSION_MICRO 104
32
+#define LIBAVCODEC_VERSION_MICRO 105
33 33
 
34 34
 #define LIBAVCODEC_VERSION_INT  AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \
35 35
                                                LIBAVCODEC_VERSION_MINOR, \