GitList

Browse code

avcodec: implement vp9 nvdec hwaccel

Timo Rothenpieler authored on 2017/11/12 05:19:07
Showing 8 changed files

Changelog index 869ecde..68829f2 100644
configure index 46b4815..da6d644 100755
libavcodec/Makefile index 8c5dfd6..2476aec 100644
libavcodec/allcodecs.c index c817003..8ac9d96 100644
libavcodec/nvdec.c index 274f69f..7d768b2 100644
libavcodec/nvdec_vp9.c index 0000000..06c13e6
libavcodec/version.h index 9c17e57..9d1b0ee 100644
libavcodec/vp9.c index 6b5de19..6ce9be6 100644

@@ -13,7 +13,7 @@ version <next>:
                      - PCE support for extended channel layouts in the AAC encoder
                      - native aptX encoder and decoder
                      - Raw aptX muxer and demuxer
                     -- NVIDIA NVDEC-accelerated H.264 and HEVC hwaccel decoding
                     +- NVIDIA NVDEC-accelerated H.264, HEVC and VP9 hwaccel decoding
                      - Intel QSV-accelerated overlay filter

configure

History View file @ f3f73f0

@@ -2764,6 +2764,8 @@ vp9_d3d11va2_hwaccel_select="vp9_decoder"
                      vp9_dxva2_hwaccel_deps="dxva2 DXVA_PicParams_VP9"
                      vp9_dxva2_hwaccel_select="vp9_decoder"
                      vp9_mediacodec_hwaccel_deps="mediacodec"
                     +vp9_nvdec_hwaccel_deps="cuda nvdec"
                     +vp9_nvdec_hwaccel_select="vp9_decoder"
                      vp9_vaapi_hwaccel_deps="vaapi VADecPictureParameterBufferVP9_bit_depth"
                      vp9_vaapi_hwaccel_select="vp9_decoder"
                      wmv3_d3d11va_hwaccel_select="vc1_d3d11va_hwaccel"

libavcodec/Makefile

History View file @ f3f73f0

@@ -869,6 +869,7 @@ OBJS-$(CONFIG_VC1_VAAPI_HWACCEL)          += vaapi_vc1.o
                      OBJS-$(CONFIG_VC1_VDPAU_HWACCEL)          += vdpau_vc1.o
                      OBJS-$(CONFIG_VP9_D3D11VA_HWACCEL)        += dxva2_vp9.o
                      OBJS-$(CONFIG_VP9_DXVA2_HWACCEL)          += dxva2_vp9.o
                     +OBJS-$(CONFIG_VP9_NVDEC_HWACCEL)          += nvdec_vp9.o
                      OBJS-$(CONFIG_VP9_VAAPI_HWACCEL)          += vaapi_vp9.o
                      OBJS-$(CONFIG_VP8_QSV_HWACCEL)            += qsvdec_other.o

libavcodec/allcodecs.c

History View file @ f3f73f0

@@ -123,6 +123,7 @@ static void register_all(void)
                          REGISTER_HWACCEL(VP9_D3D11VA2,      vp9_d3d11va2);
                          REGISTER_HWACCEL(VP9_DXVA2,         vp9_dxva2);
                          REGISTER_HWACCEL(VP9_MEDIACODEC,    vp9_mediacodec);
                     +    REGISTER_HWACCEL(VP9_NVDEC,         vp9_nvdec);
                          REGISTER_HWACCEL(VP9_VAAPI,         vp9_vaapi);
                          REGISTER_HWACCEL(WMV3_D3D11VA,      wmv3_d3d11va);
                          REGISTER_HWACCEL(WMV3_D3D11VA2,     wmv3_d3d11va2);

libavcodec/nvdec.c

History View file @ f3f73f0

@@ -54,6 +54,7 @@ static int map_avcodec_id(enum AVCodecID id)
     switch (id) {
     case AV_CODEC_ID_H264: return cudaVideoCodec_H264;
     case AV_CODEC_ID_HEVC: return cudaVideoCodec_HEVC;
+    case AV_CODEC_ID_VP9:  return cudaVideoCodec_VP9;
     }
     return -1;
 }

libavcodec/nvdec_vp9.c

History View file @ f3f73f0

                     new file mode 100644
@@ -0,0 +1,227 @@
                     +/*
                     + * VP9 HW decode acceleration through NVDEC
                     + *
                     + * Copyright (c) 2016 Timo Rothenpieler
                     + *
                     + * This file is part of FFmpeg.
                     + *
                     + * FFmpeg is free software; you can redistribute it and/or
                     + * modify it under the terms of the GNU Lesser General Public
                     + * License as published by the Free Software Foundation; either
                     + * version 2.1 of the License, or (at your option) any later version.
                     + *
                     + * FFmpeg is distributed in the hope that it will be useful,
                     + * but WITHOUT ANY WARRANTY; without even the implied warranty of
                     + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
                     + * Lesser General Public License for more details.
                     + *
                     + * You should have received a copy of the GNU Lesser General Public
                     + * License along with FFmpeg; if not, write to the Free Software
                     + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
                     + */
+                    +
                     +#include "libavutil/pixdesc.h"
+                    +
                     +#include "avcodec.h"
                     +#include "nvdec.h"
                     +#include "decode.h"
                     +#include "internal.h"
                     +#include "vp9shared.h"
+                    +
                     +static unsigned char get_ref_idx(AVFrame *frame)
                     +{
                     +    FrameDecodeData *fdd;
                     +    NVDECFrame *cf;
+                    +
                     +    if (!frame || !frame->private_ref)
                     +        return 255;
+                    +
                     +    fdd = (FrameDecodeData*)frame->private_ref->data;
                     +    cf  = (NVDECFrame*)fdd->hwaccel_priv;
+                    +
                     +    return cf->idx;
                     +}
+                    +
                     +static int nvdec_vp9_start_frame(AVCodecContext *avctx, const uint8_t *buffer, uint32_t size)
                     +{
                     +    VP9SharedContext *h = avctx->priv_data;
                     +    const AVPixFmtDescriptor *pixdesc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);
+                    +
                     +    NVDECContext      *ctx = avctx->internal->hwaccel_priv_data;
                     +    CUVIDPICPARAMS     *pp = &ctx->pic_params;
                     +    CUVIDVP9PICPARAMS *ppc = &pp->CodecSpecific.vp9;
                     +    FrameDecodeData *fdd;
                     +    NVDECFrame *cf;
                     +    AVFrame *cur_frame = h->frames[CUR_FRAME].tf.f;
+                    +
                     +    int ret, i;
+                    +
                     +    ret = ff_nvdec_start_frame(avctx, cur_frame);
                     +    if (ret < 0)
                     +        return ret;
+                    +
                     +    fdd = (FrameDecodeData*)cur_frame->private_ref->data;
                     +    cf  = (NVDECFrame*)fdd->hwaccel_priv;
+                    +
                     +    *pp = (CUVIDPICPARAMS) {
                     +        .PicWidthInMbs     = (cur_frame->width  + 15) / 16,
                     +        .FrameHeightInMbs  = (cur_frame->height + 15) / 16,
                     +        .CurrPicIdx        = cf->idx,
+                    +
                     +        .CodecSpecific.vp9 = {
                     +            .width                    = cur_frame->width,
                     +            .height                   = cur_frame->height,
+                    +
                     +            .LastRefIdx               = get_ref_idx(h->refs[h->h.refidx[0]].f),
                     +            .GoldenRefIdx             = get_ref_idx(h->refs[h->h.refidx[1]].f),
                     +            .AltRefIdx                = get_ref_idx(h->refs[h->h.refidx[2]].f),
+                    +
                     +            .profile                  = h->h.profile,
                     +            .frameContextIdx          = h->h.framectxid,
                     +            .frameType                = !h->h.keyframe,
                     +            .showFrame                = !h->h.invisible,
                     +            .errorResilient           = h->h.errorres,
                     +            .frameParallelDecoding    = h->h.parallelmode,
                     +            .subSamplingX             = pixdesc->log2_chroma_w,
                     +            .subSamplingY             = pixdesc->log2_chroma_h,
                     +            .intraOnly                = h->h.intraonly,
                     +            .allow_high_precision_mv  = h->h.keyframe ? 0 : h->h.highprecisionmvs,
                     +            .refreshEntropyProbs      = h->h.refreshctx,
+                    +
                     +            .bitDepthMinus8Luma       = pixdesc->comp[0].depth - 8,
                     +            .bitDepthMinus8Chroma     = pixdesc->comp[1].depth - 8,
+                    +
                     +            .loopFilterLevel          = h->h.filter.level,
                     +            .loopFilterSharpness      = h->h.filter.sharpness,
                     +            .modeRefLfEnabled         = h->h.lf_delta.enabled,
+                    +
                     +            .log2_tile_columns        = h->h.tiling.log2_tile_cols,
                     +            .log2_tile_rows           = h->h.tiling.log2_tile_rows,
+                    +
                     +            .segmentEnabled           = h->h.segmentation.enabled,
                     +            .segmentMapUpdate         = h->h.segmentation.update_map,
                     +            .segmentMapTemporalUpdate = h->h.segmentation.temporal,
                     +            .segmentFeatureMode       = h->h.segmentation.absolute_vals,
+                    +
                     +            .qpYAc                    = h->h.yac_qi,
                     +            .qpYDc                    = h->h.ydc_qdelta,
                     +            .qpChDc                   = h->h.uvdc_qdelta,
                     +            .qpChAc                   = h->h.uvac_qdelta,
+                    +
                     +            .resetFrameContext        = h->h.resetctx,
                     +            .mcomp_filter_type        = h->h.filtermode ^ (h->h.filtermode <= 1),
+                    +
                     +            .frameTagSize             = h->h.uncompressed_header_size,
                     +            .offsetToDctParts         = h->h.compressed_header_size,
+                    +
                     +            .refFrameSignBias[0]      = 0,
                     +        }
                     +    };
+                    +
                     +    for (i = 0; i < 2; i++)
                     +        ppc->mbModeLfDelta[i] = h->h.lf_delta.mode[i];
+                    +
                     +    for (i = 0; i < 4; i++)
                     +        ppc->mbRefLfDelta[i] = h->h.lf_delta.ref[i];
+                    +
                     +    for (i = 0; i < 7; i++)
                     +        ppc->mb_segment_tree_probs[i] = h->h.segmentation.prob[i];
+                    +
                     +    for (i = 0; i < 3; i++) {
                     +        ppc->activeRefIdx[i] = h->h.refidx[i];
                     +        ppc->segment_pred_probs[i] = h->h.segmentation.pred_prob[i];
                     +        ppc->refFrameSignBias[i + 1] = h->h.signbias[i];
                     +    }
+                    +
                     +    for (i = 0; i < 8; i++) {
                     +        ppc->segmentFeatureEnable[i][0] = h->h.segmentation.feat[i].q_enabled;
                     +        ppc->segmentFeatureEnable[i][1] = h->h.segmentation.feat[i].lf_enabled;
                     +        ppc->segmentFeatureEnable[i][2] = h->h.segmentation.feat[i].ref_enabled;
                     +        ppc->segmentFeatureEnable[i][3] = h->h.segmentation.feat[i].skip_enabled;
+                    +
                     +        ppc->segmentFeatureData[i][0] = h->h.segmentation.feat[i].q_val;
                     +        ppc->segmentFeatureData[i][1] = h->h.segmentation.feat[i].lf_val;
                     +        ppc->segmentFeatureData[i][2] = h->h.segmentation.feat[i].ref_val;
                     +        ppc->segmentFeatureData[i][3] = 0;
                     +    }
+                    +
                     +    switch (avctx->colorspace) {
                     +    default:
                     +    case AVCOL_SPC_UNSPECIFIED:
                     +        ppc->colorSpace = 0;
                     +        break;
                     +    case AVCOL_SPC_BT470BG:
                     +        ppc->colorSpace = 1;
                     +        break;
                     +    case AVCOL_SPC_BT709:
                     +        ppc->colorSpace = 2;
                     +        break;
                     +    case AVCOL_SPC_SMPTE170M:
                     +        ppc->colorSpace = 3;
                     +        break;
                     +    case AVCOL_SPC_SMPTE240M:
                     +        ppc->colorSpace = 4;
                     +        break;
                     +    case AVCOL_SPC_BT2020_NCL:
                     +        ppc->colorSpace = 5;
                     +        break;
                     +    case AVCOL_SPC_RESERVED:
                     +        ppc->colorSpace = 6;
                     +        break;
                     +    case AVCOL_SPC_RGB:
                     +        ppc->colorSpace = 7;
                     +        break;
                     +    }
+                    +
                     +    return 0;
                     +}
+                    +
                     +static int nvdec_vp9_end_frame(AVCodecContext *avctx)
                     +{
                     +    NVDECContext *ctx = avctx->internal->hwaccel_priv_data;
                     +    int ret = ff_nvdec_end_frame(avctx);
                     +    ctx->bitstream = NULL;
                     +    return ret;
                     +}
+                    +
                     +static int nvdec_vp9_decode_slice(AVCodecContext *avctx, const uint8_t *buffer, uint32_t size)
                     +{
                     +    NVDECContext *ctx = avctx->internal->hwaccel_priv_data;
                     +    void *tmp;
+                    +
                     +    tmp = av_fast_realloc(ctx->slice_offsets, &ctx->slice_offsets_allocated,
                     +                          (ctx->nb_slices + 1) * sizeof(*ctx->slice_offsets));
                     +    if (!tmp)
                     +        return AVERROR(ENOMEM);
                     +    ctx->slice_offsets = tmp;
+                    +
                     +    if (!ctx->bitstream)
                     +        ctx->bitstream = (uint8_t*)buffer;
+                    +
                     +    ctx->slice_offsets[ctx->nb_slices] = buffer - ctx->bitstream;
                     +    ctx->bitstream_len += size;
                     +    ctx->nb_slices++;
+                    +
                     +    return 0;
                     +}
+                    +
                     +static int nvdec_vp9_frame_params(AVCodecContext *avctx,
                     +                                  AVBufferRef *hw_frames_ctx)
                     +{
                     +    // VP9 uses a fixed size pool of 8 possible reference frames
                     +    return ff_nvdec_frame_params(avctx, hw_frames_ctx, 8);
                     +}
+                    +
                     +AVHWAccel ff_vp9_nvdec_hwaccel = {
                     +    .name                 = "vp9_nvdec",
                     +    .type                 = AVMEDIA_TYPE_VIDEO,
                     +    .id                   = AV_CODEC_ID_VP9,
                     +    .pix_fmt              = AV_PIX_FMT_CUDA,
                     +    .start_frame          = nvdec_vp9_start_frame,
                     +    .end_frame            = nvdec_vp9_end_frame,
                     +    .decode_slice         = nvdec_vp9_decode_slice,
                     +    .frame_params         = nvdec_vp9_frame_params,
                     +    .init                 = ff_nvdec_decode_init,
                     +    .uninit               = ff_nvdec_decode_uninit,
                     +    .priv_data_size       = sizeof(NVDECContext),
                     +};

libavcodec/version.h

History View file @ f3f73f0

@@ -29,7 +29,7 @@
                      #define LIBAVCODEC_VERSION_MAJOR  58
                      #define LIBAVCODEC_VERSION_MINOR   3
                     -#define LIBAVCODEC_VERSION_MICRO 100
                     +#define LIBAVCODEC_VERSION_MICRO 101
                      #define LIBAVCODEC_VERSION_INT  AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \
                                                                     LIBAVCODEC_VERSION_MINOR, \

libavcodec/vp9.c

History View file @ f3f73f0

@@ -169,7 +169,10 @@ fail:
                      static int update_size(AVCodecContext *avctx, int w, int h)
+                     {
                     -#define HWACCEL_MAX (CONFIG_VP9_DXVA2_HWACCEL + CONFIG_VP9_D3D11VA_HWACCEL * 2 + CONFIG_VP9_VAAPI_HWACCEL)
                     +#define HWACCEL_MAX (CONFIG_VP9_DXVA2_HWACCEL + \
                     +                     CONFIG_VP9_D3D11VA_HWACCEL * 2 + \
                     +                     CONFIG_VP9_NVDEC_HWACCEL + \
                     +                     CONFIG_VP9_VAAPI_HWACCEL)
                          enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmtp = pix_fmts;
                          VP9Context *s = avctx->priv_data;
                          uint8_t *p;
@@ -191,12 +194,18 @@ static int update_size(AVCodecContext *avctx, int w, int h)
                                  *fmtp++ = AV_PIX_FMT_D3D11VA_VLD;
                                  *fmtp++ = AV_PIX_FMT_D3D11;
                      #endif
                     +#if CONFIG_VP9_NVDEC_HWACCEL
                     +            *fmtp++ = AV_PIX_FMT_CUDA;
                     +#endif
                      #if CONFIG_VP9_VAAPI_HWACCEL
                                  *fmtp++ = AV_PIX_FMT_VAAPI;
                      #endif
                                  break;
                              case AV_PIX_FMT_YUV420P10:
                              case AV_PIX_FMT_YUV420P12:
                     +#if CONFIG_VP9_NVDEC_HWACCEL
                     +            *fmtp++ = AV_PIX_FMT_CUDA;
                     +#endif
                      #if CONFIG_VP9_VAAPI_HWACCEL
                                  *fmtp++ = AV_PIX_FMT_VAAPI;
                      #endif

...	...	@@ -54,6 +54,7 @@ static int map_avcodec_id(enum AVCodecID id)
54	54	switch (id) {
55	55	case AV_CODEC_ID_H264: return cudaVideoCodec_H264;
56	56	case AV_CODEC_ID_HEVC: return cudaVideoCodec_HEVC;
	57	+ case AV_CODEC_ID_VP9: return cudaVideoCodec_VP9;
57	58	}
58	59	return -1;
59	60	}