GitList

Browse code

Merge remote-tracking branch 'qatar/master'

* qatar/master: (25 commits)
rv40dsp x86: MMX/MMX2/3DNow/SSE2/SSSE3 implementations of MC
ape: Use unsigned integer maths
arm: dsputil: fix overreads in put/avg_pixels functions
h264: K&R formatting cosmetics for header files (part II/II)
h264: K&R formatting cosmetics for header files (part I/II)
rtmp: Implement check bandwidth notification.
rtmp: Support 'rtmp_swfurl', an option which specifies the URL of the SWF player.
rtmp: Support 'rtmp_flashver', an option which overrides the version of the Flash plugin.
rtmp: Support 'rtmp_tcurl', an option which overrides the URL of the target stream.
cmdutils: Add fallback case to switch in check_stream_specifier().
sctp: be consistent with socket option level
configure: Add _XOPEN_SOURCE=600 to Solaris preprocessor flags.
vcr1enc: drop pointless empty encode_init() wrapper function
vcr1: drop pointless write-only AVCodecContext member from VCR1Context
vcr1: group encoder code together to save #ifdefs
vcr1: cosmetics: K&R prettyprinting, typos, parentheses, dead code, comments
mov: make one comment slightly more specific
lavr: replace the SSE version of ff_conv_fltp_to_flt_6ch() with SSE4 and AVX
lavfi: move audio-related functions to a separate file.
lavfi: remove some audio-related function from public API.
...

Conflicts:
cmdutils.c
libavcodec/h264.h
libavcodec/h264_mvpred.h
libavcodec/vcr1.c
libavfilter/avfilter.c
libavfilter/avfilter.h
libavfilter/defaults.c
libavfilter/internal.h

Merged-by: Michael Niedermayer <michaelni@gmx.at>

Michael Niedermayer authored on 2012/05/11 05:41:29
Showing 44 changed files

cmdutils.c index 3d5c5f2..2879a42 100644
configure index 4313bdd..270de65 100755
libavcodec/apedec.c index 05639f0..c006420 100644
libavcodec/arm/dsputil_neon.S index c660cb0..7b30170 100644
libavcodec/h264.h index f5542bf..0e11e30 100644
libavcodec/h264_mvpred.h index 85405c1..12064c8 100644
libavcodec/h264data.h index a5ed069..60df532 100644
libavcodec/h264dsp.h index d7c1920..45f81a0 100644
libavcodec/h264pred.h index 599cdb2..d68f39b 100644
libavcodec/vcr1.c index 2a25982..13aded9 100644
libavcodec/x86/dsputil_mmx.c index 68df954..e6cd161 100644
libavcodec/x86/dsputil_mmx.h index 6ba5ea8..91940f6 100644
libavcodec/x86/rv40dsp.asm index e8acfb2..9b50940 100644
libavcodec/x86/rv40dsp_init.c index df468aa..3f42363 100644
libavfilter/Makefile index 0647772..962dbf6 100644
libavfilter/af_aconvert.c index 8c1b5dc..e0f3474 100644
libavfilter/af_aformat.c index e7ef7f0..ab64840 100644
libavfilter/af_amerge.c index a160391..6d141fe 100644
libavfilter/af_anull.c index 8419f57..140ecb3 100644
libavfilter/af_aresample.c index 786fb85..23980d1 100644
libavfilter/af_ashowinfo.c index 12d0315..c336c52 100644
libavfilter/af_asplit.c index ec5032b..3bdbeac 100644
libavfilter/af_astreamsync.c index ccf55c0..2e8a02c 100644
libavfilter/af_earwax.c index 7caeb2f..da5c06e 100644
libavfilter/af_pan.c index 30d5d1b..dba930e 100644
libavfilter/af_silencedetect.c index 4f95b54..94197d7 100644
libavfilter/af_volume.c index 99ae8b8..881a451 100644
libavfilter/asrc_aevalsrc.c index 7bd6a89..11750bb 100644
libavfilter/asrc_anullsrc.c index 288e2bf..86497b9 100644
libavfilter/audio.c index 0000000..31f6796
libavfilter/audio.h index 0000000..051efb2
libavfilter/avfilter.c index 89afee0..073b259 100644
libavfilter/avfilter.h index d7d4c16..9fa6ab4 100644
libavfilter/defaults.c index e57a129..d2e0149 100644
libavfilter/internal.h index e8516c3..09d6055 100644
libavfilter/src_buffer.c index 19c49a3..f6c67f4 100644
libavfilter/src_movie.c index 1d65ade..15b82ab 100644
libavfilter/version.h index 1aee037..82817a7 100644
libavformat/mov.c index 240bccf..9a2a59f 100644
libavformat/rtmpproto.c index 2feb240..c7db91d 100644
libavformat/sctp.c index 3823e03..817b004 100644
libavresample/x86/audio_convert.asm index 809c5d1..ba59f33 100644
libavresample/x86/audio_convert_init.c index 6883f10..206aede 100644
libavutil/x86/x86util.asm index 8c8d485..258626a 100644

@@ -38,6 +38,7 @@
                      #if CONFIG_POSTPROC
                      #include "libpostproc/postprocess.h"
                      #endif
                     +#include "libavutil/avassert.h"
                      #include "libavutil/avstring.h"
                      #include "libavutil/mathematics.h"
                      #include "libavutil/parseutils.h"
@@ -1075,7 +1076,7 @@ int check_stream_specifier(AVFormatContext *s, AVStream *st, const char *spec)
                              case 's': type = AVMEDIA_TYPE_SUBTITLE;   break;
                              case 'd': type = AVMEDIA_TYPE_DATA;       break;
                              case 't': type = AVMEDIA_TYPE_ATTACHMENT; break;
                     -        default: abort(); // never reached, silence warning
                     +        default:  av_assert0(0);
+                             }
                              if (type != st->codec->codec_type)
                                  return 0;

@@ -2662,7 +2662,7 @@ case $target_os in
                              SHFLAGS='-shared -Wl,-h,$$(@F)'
                              enabled x86 && SHFLAGS="-mimpure-text $SHFLAGS"
                              network_extralibs="-lsocket -lnsl"
                     -        add_cppflags -D__EXTENSIONS__
                     +        add_cppflags -D__EXTENSIONS__ -D_XOPEN_SOURCE=600
                              # When using suncc to build, the Solaris linker will mark
                              # an executable with each instruction set encountered by
                              # the Solaris assembler.  As our libraries contain their own

@@ -393,7 +393,7 @@ static inline int range_get_symbol(APEContext *ctx,
+                     }
                      /** @} */ // group rangecoder
                     -static inline void update_rice(APERice *rice, int x)
                     +static inline void update_rice(APERice *rice, unsigned int x)
+                     {
                          int lim = rice->k ? (1 << (rice->k + 4)) : 0;
                          rice->ksum += ((x + 1) / 2) - ((rice->ksum + 16) >> 5);
@@ -406,7 +406,7 @@ static inline void update_rice(APERice *rice, int x)
                      static inline int ape_decode_value(APEContext *ctx, APERice *rice)
+                     {
                     -    int x, overflow;
                     +    unsigned int x, overflow;
                          if (ctx->fileversion < 3990) {
                              int tmpk;

@@ -95,6 +95,7 @@ endfunc
                      .endm
                      .macro  pixels16_y2     rnd=1, avg=0
                     +        sub             r3,  r3,  #2
                              vld1.64         {q0},     [r1], r2
                              vld1.64         {q1},     [r1], r2
 :      subs            r3,  r3,  #2
@@ -114,10 +115,25 @@ endfunc
                              vst1.64         {q2},     [r0,:128], r2
                              vst1.64         {q3},     [r0,:128], r2
                              bne             1b
+                    +
                     +        avg             q2,  q0,  q1
                     +        vld1.64         {q0},     [r1], r2
                     +        avg             q3,  q0,  q1
                     +  .if \avg
                     +        vld1.8          {q8},     [r0,:128], r2
                     +        vld1.8          {q9},     [r0,:128]
                     +        vrhadd.u8       q2,  q2,  q8
                     +        vrhadd.u8       q3,  q3,  q9
                     +        sub             r0,  r0,  r2
                     +  .endif
                     +        vst1.64         {q2},     [r0,:128], r2
                     +        vst1.64         {q3},     [r0,:128], r2
+                    +
                              bx              lr
                      .endm
                      .macro  pixels16_xy2    rnd=1, avg=0
                     +        sub             r3,  r3,  #2
                              vld1.64         {d0-d2},  [r1], r2
                              vld1.64         {d4-d6},  [r1], r2
                        .ifeq \rnd
@@ -173,6 +189,42 @@ endfunc
                              vaddl.u8        q11, d3,  d5
                              vst1.64         {q15},    [r0,:128], r2
                              bgt             1b
+                    +
                     +        vld1.64         {d0-d2},  [r1], r2
                     +        vadd.u16        q12, q8,  q9
                     +  .ifeq \rnd
                     +        vadd.u16        q12, q12, q13
                     +  .endif
                     +        vext.8          q15, q0,  q1,  #1
                     +        vadd.u16        q1 , q10, q11
                     +        shrn            d28, q12, #2
                     +  .ifeq \rnd
                     +        vadd.u16        q1,  q1,  q13
                     +  .endif
                     +        shrn            d29, q1,  #2
                     +  .if \avg
                     +        vld1.8          {q8},     [r0,:128]
                     +        vrhadd.u8       q14, q14, q8
                     +  .endif
                     +        vaddl.u8        q8,  d0,  d30
                     +        vaddl.u8        q10, d1,  d31
                     +        vst1.64         {q14},    [r0,:128], r2
                     +        vadd.u16        q12, q8,  q9
                     +  .ifeq \rnd
                     +        vadd.u16        q12, q12, q13
                     +  .endif
                     +        vadd.u16        q0,  q10, q11
                     +        shrn            d30, q12, #2
                     +  .ifeq \rnd
                     +        vadd.u16        q0,  q0,  q13
                     +  .endif
                     +        shrn            d31, q0,  #2
                     +  .if \avg
                     +        vld1.8          {q9},     [r0,:128]
                     +        vrhadd.u8       q15, q15, q9
                     +  .endif
                     +        vst1.64         {q15},    [r0,:128], r2
+                    +
                              bx              lr
                      .endm
@@ -228,6 +280,7 @@ endfunc
                      .endm
                      .macro  pixels8_y2      rnd=1, avg=0
                     +        sub             r3,  r3,  #2
                              vld1.64         {d0},     [r1], r2
                              vld1.64         {d1},     [r1], r2
 :      subs            r3,  r3,  #2
@@ -246,10 +299,24 @@ endfunc
                              vst1.64         {d4},     [r0,:64], r2
                              vst1.64         {d5},     [r0,:64], r2
                              bne             1b
+                    +
                     +        avg             d4,  d0,  d1
                     +        vld1.64         {d0},     [r1], r2
                     +        avg             d5,  d0,  d1
                     +  .if \avg
                     +        vld1.8          {d2},     [r0,:64], r2
                     +        vld1.8          {d3},     [r0,:64]
                     +        vrhadd.u8       q2,  q2,  q1
                     +        sub             r0,  r0,  r2
                     +  .endif
                     +        vst1.64         {d4},     [r0,:64], r2
                     +        vst1.64         {d5},     [r0,:64], r2
+                    +
                              bx              lr
                      .endm
                      .macro  pixels8_xy2     rnd=1, avg=0
                     +        sub             r3,  r3,  #2
                              vld1.64         {q0},     [r1], r2
                              vld1.64         {q1},     [r1], r2
                        .ifeq \rnd
@@ -291,6 +358,31 @@ endfunc
                              vaddl.u8        q9,  d2,  d6
                              vst1.64         {d7},     [r0,:64], r2
                              bgt             1b
+                    +
                     +        vld1.64         {q0},     [r1], r2
                     +        vadd.u16        q10, q8,  q9
                     +        vext.8          d4,  d0,  d1,  #1
                     +  .ifeq \rnd
                     +        vadd.u16        q10, q10, q11
                     +  .endif
                     +        vaddl.u8        q8,  d0,  d4
                     +        shrn            d5,  q10, #2
                     +        vadd.u16        q10, q8,  q9
                     +  .if \avg
                     +        vld1.8          {d7},     [r0,:64]
                     +        vrhadd.u8       d5,  d5,  d7
                     +  .endif
                     +  .ifeq \rnd
                     +        vadd.u16        q10, q10, q11
                     +  .endif
                     +        vst1.64         {d5},     [r0,:64], r2
                     +        shrn            d7,  q10, #2
                     +  .if \avg
                     +        vld1.8          {d5},     [r0,:64]
                     +        vrhadd.u8       d7,  d7,  d5
                     +  .endif
                     +        vst1.64         {d7},     [r0,:64], r2
+                    +
                              bx              lr
                      .endm

@@ -37,14 +37,14 @@
                      #include "rectangle.h"
                      #define interlaced_dct interlaced_dct_is_a_bad_name
                     -#define mb_intra mb_intra_is_not_initialized_see_mb_type
                     +#define mb_intra       mb_intra_is_not_initialized_see_mb_type
                     -#define MAX_SPS_COUNT 32
                     -#define MAX_PPS_COUNT 256
                     +#define MAX_SPS_COUNT          32
                     +#define MAX_PPS_COUNT         256
                     -#define MAX_MMCO_COUNT 66
                     +#define MAX_MMCO_COUNT         66
                     -#define MAX_DELAYED_PIC_COUNT 16
                     +#define MAX_DELAYED_PIC_COUNT  16
                      #define MAX_MBPAIR_SIZE (256*1024) // a tighter bound could be calculated if someone cares about a few bytes
@@ -61,25 +61,25 @@
                      #define MAX_SLICES 16
                      #ifdef ALLOW_INTERLACE
                     -#define MB_MBAFF h->mb_mbaff
                     -#define MB_FIELD h->mb_field_decoding_flag
                     +#define MB_MBAFF    h->mb_mbaff
                     +#define MB_FIELD    h->mb_field_decoding_flag
                      #define FRAME_MBAFF h->mb_aff_frame
                      #define FIELD_PICTURE (s->picture_structure != PICT_FRAME)
                      #define LEFT_MBS 2
                     -#define LTOP 0
                     -#define LBOT 1
                     -#define LEFT(i) (i)
                     +#define LTOP     0
                     +#define LBOT     1
                     +#define LEFT(i)  (i)
                      #else
                     -#define MB_MBAFF 0
                     -#define MB_FIELD 0
                     -#define FRAME_MBAFF 0
                     +#define MB_MBAFF      0
                     +#define MB_FIELD      0
                     +#define FRAME_MBAFF   0
                      #define FIELD_PICTURE 0
                      #undef  IS_INTERLACED
                      #define IS_INTERLACED(mb_type) 0
                      #define LEFT_MBS 1
                     -#define LTOP 0
                     -#define LBOT 0
                     -#define LEFT(i) 0
                     +#define LTOP     0
                     +#define LBOT     0
                     +#define LEFT(i)  0
                      #endif
                      #define FIELD_OR_MBAFF_PICTURE (FRAME_MBAFF || FIELD_PICTURE)
@@ -91,9 +91,9 @@
                      #define CHROMA422 (h->sps.chroma_format_idc == 2)
                      #define CHROMA444 (h->sps.chroma_format_idc == 3)
                     -#define EXTENDED_SAR          255
                     +#define EXTENDED_SAR       255
                     -#define MB_TYPE_REF0       MB_TYPE_ACPRED //dirty but it fits in 16 bit
                     +#define MB_TYPE_REF0       MB_TYPE_ACPRED // dirty but it fits in 16 bit
                      #define MB_TYPE_8x8DCT     0x01000000
                      #define IS_REF0(a)         ((a) & MB_TYPE_REF0)
                      #define IS_8x8DCT(a)       ((a) & MB_TYPE_8x8DCT)
@@ -108,7 +108,7 @@
                      /* NAL unit types */
                      enum {
                     -    NAL_SLICE=1,
                     +    NAL_SLICE = 1,
                          NAL_DPA,
                          NAL_DPB,
                          NAL_DPC,
@@ -121,17 +121,17 @@ enum {
                          NAL_END_STREAM,
                          NAL_FILLER_DATA,
                          NAL_SPS_EXT,
                     -    NAL_AUXILIARY_SLICE=19
                     +    NAL_AUXILIARY_SLICE = 19
                      };
                      /**
                       * SEI message types
                       */
                      typedef enum {
                     -    SEI_BUFFERING_PERIOD             =  0, ///< buffering period (H.264, D.1.1)
                     -    SEI_TYPE_PIC_TIMING              =  1, ///< picture timing
                     -    SEI_TYPE_USER_DATA_UNREGISTERED  =  5, ///< unregistered user data
                     -    SEI_TYPE_RECOVERY_POINT          =  6  ///< recovery point (frame # to decoder sync)
                     +    SEI_BUFFERING_PERIOD            = 0,   ///< buffering period (H.264, D.1.1)
                     +    SEI_TYPE_PIC_TIMING             = 1,   ///< picture timing
                     +    SEI_TYPE_USER_DATA_UNREGISTERED = 5,   ///< unregistered user data
                     +    SEI_TYPE_RECOVERY_POINT         = 6    ///< recovery point (frame # to decoder sync)
                      } SEI_Type;
                      /**
@@ -152,8 +152,7 @@ typedef enum {
                      /**
                       * Sequence parameter set
                       */
                     -typedef struct SPS{
+                    -
                     +typedef struct SPS {
                          int profile_idc;
                          int level_idc;
                          int chroma_format_idc;
@@ -170,9 +169,9 @@ typedef struct SPS{
                          int mb_width;                      ///< pic_width_in_mbs_minus1 + 1
                          int mb_height;                     ///< pic_height_in_map_units_minus1 + 1
                          int frame_mbs_only_flag;
                     -    int mb_aff;                        ///<mb_adaptive_frame_field_flag
                     +    int mb_aff;                        ///< mb_adaptive_frame_field_flag
                          int direct_8x8_inference_flag;
                     -    int crop;                   ///< frame_cropping_flag
                     +    int crop;                          ///< frame_cropping_flag
                          unsigned int crop_left;            ///< frame_cropping_rect_left_offset
                          unsigned int crop_right;           ///< frame_cropping_rect_right_offset
                          unsigned int crop_top;             ///< frame_cropping_rect_top_offset
@@ -189,7 +188,7 @@ typedef struct SPS{
                          uint32_t num_units_in_tick;
                          uint32_t time_scale;
                          int fixed_frame_rate_flag;
                     -    short offset_for_ref_frame[256]; //FIXME dyn aloc?
                     +    short offset_for_ref_frame[256]; // FIXME dyn aloc?
                          int bitstream_restriction_flag;
                          int num_reorder_frames;
                          int scaling_matrix_present;
@@ -199,20 +198,20 @@ typedef struct SPS{
                          int vcl_hrd_parameters_present_flag;
                          int pic_struct_present_flag;
                          int time_offset_length;
                     -    int cpb_cnt;                       ///< See H.264 E.1.2
                     -    int initial_cpb_removal_delay_length; ///< initial_cpb_removal_delay_length_minus1 +1
                     -    int cpb_removal_delay_length;      ///< cpb_removal_delay_length_minus1 + 1
                     -    int dpb_output_delay_length;       ///< dpb_output_delay_length_minus1 + 1
                     -    int bit_depth_luma;                ///< bit_depth_luma_minus8 + 8
                     -    int bit_depth_chroma;              ///< bit_depth_chroma_minus8 + 8
                     -    int residual_color_transform_flag; ///< residual_colour_transform_flag
                     -    int constraint_set_flags;          ///< constraint_set[0-3]_flag
                     -}SPS;
                     +    int cpb_cnt;                          ///< See H.264 E.1.2
                     +    int initial_cpb_removal_delay_length; ///< initial_cpb_removal_delay_length_minus1 + 1
                     +    int cpb_removal_delay_length;         ///< cpb_removal_delay_length_minus1 + 1
                     +    int dpb_output_delay_length;          ///< dpb_output_delay_length_minus1 + 1
                     +    int bit_depth_luma;                   ///< bit_depth_luma_minus8 + 8
                     +    int bit_depth_chroma;                 ///< bit_depth_chroma_minus8 + 8
                     +    int residual_color_transform_flag;    ///< residual_colour_transform_flag
                     +    int constraint_set_flags;             ///< constraint_set[0-3]_flag
                     +} SPS;
                      /**
                       * Picture parameter set
                       */
                     -typedef struct PPS{
                     +typedef struct PPS {
                          unsigned int sps_id;
                          int cabac;                  ///< entropy_coding_mode_flag
                          int pic_order_present;      ///< pic_order_present_flag
@@ -225,20 +224,20 @@ typedef struct PPS{
                          int init_qs;                ///< pic_init_qs_minus26 + 26
                          int chroma_qp_index_offset[2];
                          int deblocking_filter_parameters_present; ///< deblocking_filter_parameters_present_flag
                     -    int constrained_intra_pred; ///< constrained_intra_pred_flag
                     -    int redundant_pic_cnt_present; ///< redundant_pic_cnt_present_flag
                     -    int transform_8x8_mode;     ///< transform_8x8_mode_flag
                     +    int constrained_intra_pred;     ///< constrained_intra_pred_flag
                     +    int redundant_pic_cnt_present;  ///< redundant_pic_cnt_present_flag
                     +    int transform_8x8_mode;         ///< transform_8x8_mode_flag
                          uint8_t scaling_matrix4[6][16];
                          uint8_t scaling_matrix8[6][64];
                          uint8_t chroma_qp_table[2][QP_MAX_NUM+1];  ///< pre-scaled (with chroma_qp_index_offset) version of qp_table
                          int chroma_qp_diff;
                     -}PPS;
                     +} PPS;
                      /**
                       * Memory management control operation opcode.
                       */
                     -typedef enum MMCOOpcode{
                     -    MMCO_END=0,
                     +typedef enum MMCOOpcode {
                     +    MMCO_END = 0,
                          MMCO_SHORT2UNUSED,
                          MMCO_LONG2UNUSED,
                          MMCO_SHORT2LONG,
@@ -250,7 +249,7 @@ typedef enum MMCOOpcode{
                      /**
                       * Memory management control operation.
                       */
                     -typedef struct MMCO{
                     +typedef struct MMCO {
                          MMCOOpcode opcode;
                          int short_pic_num;  ///< pic_num without wrapping (pic_num & max_pic_num)
                          int long_arg;       ///< index, pic_num, or num long refs depending on opcode
@@ -259,18 +258,18 @@ typedef struct MMCO{
                      /**
                       * H264Context
                       */
                     -typedef struct H264Context{
                     +typedef struct H264Context {
                          MpegEncContext s;
                          H264DSPContext h264dsp;
                          int pixel_shift;    ///< 0 for 8-bit H264, 1 for high-bit-depth H264
                     -    int chroma_qp[2]; //QPc
                     +    int chroma_qp[2];   // QPc
                          int qp_thresh;      ///< QP threshold to skip loopfilter
                          int prev_mb_skipped;
                          int next_mb_skipped;
                     -    //prediction stuff
                     +    // prediction stuff
                          int chroma_pred_mode;
                          int intra16x16_pred_mode;
@@ -284,32 +283,32 @@ typedef struct H264Context{
                          int topright_type;
                          int left_type[LEFT_MBS];
                     -    const uint8_t * left_block;
                     +    const uint8_t *left_block;
                          int topleft_partition;
                     -    int8_t intra4x4_pred_mode_cache[5*8];
                     -    int8_t (*intra4x4_pred_mode);
                     +    int8_t intra4x4_pred_mode_cache[5 * 8];
                     +    int8_t(*intra4x4_pred_mode);
                          H264PredContext hpc;
                          unsigned int topleft_samples_available;
                          unsigned int top_samples_available;
                          unsigned int topright_samples_available;
                          unsigned int left_samples_available;
                     -    uint8_t (*top_borders[2])[(16*3)*2];
                     +    uint8_t (*top_borders[2])[(16 * 3) * 2];
                          /**
                           * non zero coeff count cache.
                           * is 64 if not available.
                           */
                     -    DECLARE_ALIGNED(8, uint8_t, non_zero_count_cache)[15*8];
                     +    DECLARE_ALIGNED(8, uint8_t, non_zero_count_cache)[15 * 8];
                          uint8_t (*non_zero_count)[48];
                          /**
                           * Motion vector cache.
                           */
                     -    DECLARE_ALIGNED(16, int16_t, mv_cache)[2][5*8][2];
                     -    DECLARE_ALIGNED(8, int8_t, ref_cache)[2][5*8];
                     -#define LIST_NOT_USED -1 //FIXME rename?
                     +    DECLARE_ALIGNED(16, int16_t, mv_cache)[2][5 * 8][2];
                     +    DECLARE_ALIGNED(8, int8_t, ref_cache)[2][5 * 8];
                     +#define LIST_NOT_USED -1 // FIXME rename?
                      #define PART_NOT_AVAILABLE -2
                          /**
@@ -321,13 +320,13 @@ typedef struct H264Context{
                           * block_offset[ 0..23] for frame macroblocks
                           * block_offset[24..47] for field macroblocks
                           */
                     -    int block_offset[2*(16*3)];
                     +    int block_offset[2 * (16 * 3)];
                     -    uint32_t *mb2b_xy; //FIXME are these 4 a good idea?
                     +    uint32_t *mb2b_xy;  // FIXME are these 4 a good idea?
                          uint32_t *mb2br_xy;
                     -    int b_stride; //FIXME use s->b4_stride
                     +    int b_stride;       // FIXME use s->b4_stride
                     -    int mb_linesize;   ///< may be equal to s->linesize or s->linesize*2, for mbaff
                     +    int mb_linesize;    ///< may be equal to s->linesize or s->linesize * 2, for mbaff
                          int mb_uvlinesize;
                          int emu_edge_width;
@@ -338,32 +337,32 @@ typedef struct H264Context{
                          /**
                           * current pps
                           */
                     -    PPS pps; //FIXME move to Picture perhaps? (->no) do we need that?
                     +    PPS pps; // FIXME move to Picture perhaps? (->no) do we need that?
                     -    uint32_t dequant4_buffer[6][QP_MAX_NUM+1][16]; //FIXME should these be moved down?
                     -    uint32_t dequant8_buffer[6][QP_MAX_NUM+1][64];
                     -    uint32_t (*dequant4_coeff[6])[16];
                     -    uint32_t (*dequant8_coeff[6])[64];
                     +    uint32_t dequant4_buffer[6][QP_MAX_NUM + 1][16]; // FIXME should these be moved down?
                     +    uint32_t dequant8_buffer[6][QP_MAX_NUM + 1][64];
                     +    uint32_t(*dequant4_coeff[6])[16];
                     +    uint32_t(*dequant8_coeff[6])[64];
                          int slice_num;
                     -    uint16_t *slice_table;     ///< slice_table_base + 2*mb_stride + 1
                     +    uint16_t *slice_table;      ///< slice_table_base + 2*mb_stride + 1
                          int slice_type;
                     -    int slice_type_nos;        ///< S free slice type (SI/SP are remapped to I/P)
                     +    int slice_type_nos;         ///< S free slice type (SI/SP are remapped to I/P)
                          int slice_type_fixed;
                     -    //interlacing specific flags
                     +    // interlacing specific flags
                          int mb_aff_frame;
                          int mb_field_decoding_flag;
                     -    int mb_mbaff;              ///< mb_aff_frame && mb_field_decoding_flag
                     +    int mb_mbaff;               ///< mb_aff_frame && mb_field_decoding_flag
                          DECLARE_ALIGNED(8, uint16_t, sub_mb_type)[4];
                     -    //Weighted pred stuff
                     +    // Weighted pred stuff
                          int use_weight;
                          int use_weight_chroma;
                          int luma_log2_weight_denom;
                          int chroma_log2_weight_denom;
                     -    //The following 2 can be changed to int8_t but that causes 10cpu cycles speedloss
                     +    // The following 2 can be changed to int8_t but that causes 10cpu cycles speedloss
                          int luma_weight[48][2][2];
                          int chroma_weight[48][2][2][2];
                          int implicit_weight[48][48][2];
@@ -373,48 +372,48 @@ typedef struct H264Context{
                          int col_fieldoff;
                          int dist_scale_factor[16];
                          int dist_scale_factor_field[2][32];
                     -    int map_col_to_list0[2][16+32];
                     -    int map_col_to_list0_field[2][2][16+32];
                     +    int map_col_to_list0[2][16 + 32];
                     +    int map_col_to_list0_field[2][2][16 + 32];
                          /**
                           * num_ref_idx_l0/1_active_minus1 + 1
                           */
                     -    uint8_t *list_counts;            ///< Array of list_count per MB specifying the slice type
                     -    unsigned int ref_count[2];   ///< counts frames or fields, depending on current mb mode
                     +    unsigned int ref_count[2];          ///< counts frames or fields, depending on current mb mode
                          unsigned int list_count;
                     -    Picture ref_list[2][48];         /**< 0..15: frame refs, 16..47: mbaff field refs.
                     -                                          Reordered version of default_ref_list
                     -                                          according to picture reordering in slice header */
                     -    int ref2frm[MAX_SLICES][2][64];  ///< reference to frame number lists, used in the loop filter, the first 2 are for -2,-1
                     +    uint8_t *list_counts;               ///< Array of list_count per MB specifying the slice type
                     +    Picture ref_list[2][48];            /**< 0..15: frame refs, 16..47: mbaff field refs.
                     +                                         *   Reordered version of default_ref_list
                     +                                         *   according to picture reordering in slice header */
                     +    int ref2frm[MAX_SLICES][2][64];     ///< reference to frame number lists, used in the loop filter, the first 2 are for -2,-1
                     -    //data partitioning
                     +    // data partitioning
                          GetBitContext intra_gb;
                          GetBitContext inter_gb;
                          GetBitContext *intra_gb_ptr;
                          GetBitContext *inter_gb_ptr;
                     -    DECLARE_ALIGNED(16, DCTELEM, mb)[16*48*2]; ///< as a dct coeffecient is int32_t in high depth, we need to reserve twice the space.
                     -    DECLARE_ALIGNED(16, DCTELEM, mb_luma_dc)[3][16*2];
                     -    DCTELEM mb_padding[256*2];        ///< as mb is addressed by scantable[i] and scantable is uint8_t we can either check that i is not too large or ensure that there is some unused stuff after mb
                     +    DECLARE_ALIGNED(16, DCTELEM, mb)[16 * 48 * 2]; ///< as a dct coeffecient is int32_t in high depth, we need to reserve twice the space.
                     +    DECLARE_ALIGNED(16, DCTELEM, mb_luma_dc)[3][16 * 2];
                     +    DCTELEM mb_padding[256 * 2];        ///< as mb is addressed by scantable[i] and scantable is uint8_t we can either check that i is not too large or ensure that there is some unused stuff after mb
                          /**
                           * Cabac
                           */
                          CABACContext cabac;
                     -    uint8_t      cabac_state[1024];
                     +    uint8_t cabac_state[1024];
                     -    /* 0x100 -> non null luma_dc, 0x80/0x40 -> non null chroma_dc (cb/cr), 0x?0 -> chroma_cbp(0,1,2), 0x0? luma_cbp */
                     -    uint16_t     *cbp_table;
                     +    /* 0x100 -> non null luma_dc, 0x80/0x40 -> non null chroma_dc (cb/cr), 0x?0 -> chroma_cbp(0, 1, 2), 0x0? luma_cbp */
                     +    uint16_t *cbp_table;
                          int cbp;
                          int top_cbp;
                          int left_cbp;
                          /* chroma_pred_mode for i4x4 or i16x16, else 0 */
                     -    uint8_t     *chroma_pred_mode_table;
                     -    int         last_qscale_diff;
                     -    uint8_t     (*mvd_table[2])[2];
                     -    DECLARE_ALIGNED(16, uint8_t, mvd_cache)[2][5*8][2];
                     -    uint8_t     *direct_table;
                     -    uint8_t     direct_cache[5*8];
                     +    uint8_t *chroma_pred_mode_table;
                     +    int last_qscale_diff;
                     +    uint8_t (*mvd_table[2])[2];
                     +    DECLARE_ALIGNED(16, uint8_t, mvd_cache)[2][5 * 8][2];
                     +    uint8_t *direct_table;
                     +    uint8_t direct_cache[5 * 8];
                          uint8_t zigzag_scan[16];
                          uint8_t zigzag_scan8x8[64];
@@ -435,13 +434,13 @@ typedef struct H264Context{
                          int is_complex;
                     -    //deblock
                     -    int deblocking_filter;         ///< disable_deblocking_filter_idc with 1<->0
                     +    // deblock
                     +    int deblocking_filter;          ///< disable_deblocking_filter_idc with 1 <-> 0
                          int slice_alpha_c0_offset;
                          int slice_beta_offset;
                     -//=============================================================
                     -    //Things below are not used in the MB or more inner code
                     +    // =============================================================
                     +    // Things below are not used in the MB or more inner code
                          int nal_ref_idc;
                          int nal_unit_type;
@@ -451,37 +450,36 @@ typedef struct H264Context{
                          /**
                           * Used to parse AVC variant of h264
                           */
                     -    int is_avc; ///< this flag is != 0 if codec is avc1
                     -    int nal_length_size; ///< Number of bytes used for nal length (1, 2 or 4)
                     -    int got_first; ///< this flag is != 0 if we've parsed a frame
                     +    int is_avc;           ///< this flag is != 0 if codec is avc1
                     +    int nal_length_size;  ///< Number of bytes used for nal length (1, 2 or 4)
                     +    int got_first;        ///< this flag is != 0 if we've parsed a frame
                          SPS *sps_buffers[MAX_SPS_COUNT];
                          PPS *pps_buffers[MAX_PPS_COUNT];
                     -    int dequant_coeff_pps;     ///< reinit tables when pps changes
                     +    int dequant_coeff_pps;      ///< reinit tables when pps changes
                          uint16_t *slice_table_base;
+                    -
                     -    //POC stuff
                     +    // POC stuff
                          int poc_lsb;
                          int poc_msb;
                          int delta_poc_bottom;
                          int delta_poc[2];
                          int frame_num;
                     -    int prev_poc_msb;             ///< poc_msb of the last reference pic for POC type 0
                     -    int prev_poc_lsb;             ///< poc_lsb of the last reference pic for POC type 0
                     -    int frame_num_offset;         ///< for POC type 2
                     -    int prev_frame_num_offset;    ///< for POC type 2
                     -    int prev_frame_num;           ///< frame_num of the last pic for POC type 1/2
                     +    int prev_poc_msb;           ///< poc_msb of the last reference pic for POC type 0
                     +    int prev_poc_lsb;           ///< poc_lsb of the last reference pic for POC type 0
                     +    int frame_num_offset;       ///< for POC type 2
                     +    int prev_frame_num_offset;  ///< for POC type 2
                     +    int prev_frame_num;         ///< frame_num of the last pic for POC type 1/2
                          /**
                     -     * frame_num for frames or 2*frame_num+1 for field pics.
                     +     * frame_num for frames or 2 * frame_num + 1 for field pics.
                           */
                          int curr_pic_num;
                          /**
                     -     * max_frame_num or 2*max_frame_num for field pics.
                     +     * max_frame_num or 2 * max_frame_num for field pics.
                           */
                          int max_pic_num;
@@ -490,7 +488,7 @@ typedef struct H264Context{
                          Picture *short_ref[32];
                          Picture *long_ref[32];
                          Picture default_ref_list[2][32]; ///< base reference list for all slices of a coded picture
                     -    Picture *delayed_pic[MAX_DELAYED_PIC_COUNT+2]; //FIXME size?
                     +    Picture *delayed_pic[MAX_DELAYED_PIC_COUNT + 2]; // FIXME size?
                          int last_pocs[MAX_DELAYED_PIC_COUNT];
                          Picture *next_output_pic;
                          int outputed_poc;
@@ -503,10 +501,10 @@ typedef struct H264Context{
                          int mmco_index;
                          int mmco_reset;
                     -    int long_ref_count;  ///< number of actual long term references
                     -    int short_ref_count; ///< number of actual short term references
                     +    int long_ref_count;     ///< number of actual long term references
                     +    int short_ref_count;    ///< number of actual short term references
                     -    int          cabac_init_idc;
                     +    int cabac_init_idc;
                          /**
                           * @name Members for slice based multithreading
@@ -582,12 +580,12 @@ typedef struct H264Context{
                           */
                          int recovery_frame;
                     -    int luma_weight_flag[2];   ///< 7.4.3.2 luma_weight_lX_flag
                     -    int chroma_weight_flag[2]; ///< 7.4.3.2 chroma_weight_lX_flag
                     +    int luma_weight_flag[2];    ///< 7.4.3.2 luma_weight_lX_flag
                     +    int chroma_weight_flag[2];  ///< 7.4.3.2 chroma_weight_lX_flag
                          // Timestamp stuff
                     -    int sei_buffering_period_present;  ///< Buffering period SEI flag
                     -    int initial_cpb_removal_delay[32]; ///< Initial timestamps for CPBs
                     +    int sei_buffering_period_present;   ///< Buffering period SEI flag
                     +    int initial_cpb_removal_delay[32];  ///< Initial timestamps for CPBs
                          int cur_chroma_format_idc;
@@ -598,10 +596,9 @@ typedef struct H264Context{
                          uint8_t parse_history[4];
                          int parse_history_count;
                          int parse_last_mb;
                     -}H264Context;
+                    -
                     +} H264Context;
                     -extern const uint8_t ff_h264_chroma_qp[5][QP_MAX_NUM+1]; ///< One chroma qp table for each possible bit depth (8-12).
                     +extern const uint8_t ff_h264_chroma_qp[5][QP_MAX_NUM + 1]; ///< One chroma qp table for each possible bit depth (8-12).
                      extern const uint16_t ff_h264_mb_sizes[4];
                      /**
@@ -628,13 +625,16 @@ int ff_h264_decode_picture_parameter_set(H264Context *h, int bit_length);
                       * Decode a network abstraction layer unit.
                       * @param consumed is the number of bytes used as input
                       * @param length is the length of the array
                     - * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
                     + * @param dst_length is the number of decoded bytes FIXME here
                     + *                   or a decode rbsp tailing?
                       * @return decoded bytes, might be src+1 if no escapes
                       */
                     -const uint8_t *ff_h264_decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length);
                     +const uint8_t *ff_h264_decode_nal(H264Context *h, const uint8_t *src,
                     +                                  int *dst_length, int *consumed, int length);
                      /**
                     - * Free any data that may have been allocated in the H264 context like SPS, PPS etc.
                     + * Free any data that may have been allocated in the H264 context
                     + * like SPS, PPS etc.
                       */
                      av_cold void ff_h264_free_context(H264Context *h);
@@ -667,12 +667,16 @@ int ff_h264_decode_ref_pic_marking(H264Context *h, GetBitContext *gb);
                      void ff_generate_sliding_window_mmcos(H264Context *h);
+                    -
                      /**
                     - * Check if the top & left blocks are available if needed & change the dc mode so it only uses the available blocks.
                     + * Check if the top & left blocks are available if needed & change the
                     + * dc mode so it only uses the available blocks.
                       */
                      int ff_h264_check_intra4x4_pred_mode(H264Context *h);
                     +/**
                     + * Check if the top & left blocks are available if needed & change the
                     + * dc mode so it only uses the available blocks.
                     + */
                      int ff_h264_check_intra_pred_mode(H264Context *h, int mode, int is_chroma);
                      void ff_h264_hl_decode_mb(H264Context *h);
@@ -683,24 +687,28 @@ av_cold void ff_h264_decode_init_vlc(void);
                      /**
                       * Decode a macroblock
                     - * @return 0 if OK, ER_AC_ERROR / ER_DC_ERROR / ER_MV_ERROR if an error is noticed
                     + * @return 0 if OK, ER_AC_ERROR / ER_DC_ERROR / ER_MV_ERROR on error
                       */
                      int ff_h264_decode_mb_cavlc(H264Context *h);
                      /**
                       * Decode a CABAC coded macroblock
                     - * @return 0 if OK, ER_AC_ERROR / ER_DC_ERROR / ER_MV_ERROR if an error is noticed
                     + * @return 0 if OK, ER_AC_ERROR / ER_DC_ERROR / ER_MV_ERROR on error
                       */
                      int ff_h264_decode_mb_cabac(H264Context *h);
                      void ff_h264_init_cabac_states(H264Context *h);
                     -void ff_h264_direct_dist_scale_factor(H264Context * const h);
                     -void ff_h264_direct_ref_list_init(H264Context * const h);
                     -void ff_h264_pred_direct_motion(H264Context * const h, int *mb_type);
                     +void ff_h264_direct_dist_scale_factor(H264Context *const h);
                     +void ff_h264_direct_ref_list_init(H264Context *const h);
                     +void ff_h264_pred_direct_motion(H264Context *const h, int *mb_type);
                     -void ff_h264_filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
                     -void ff_h264_filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
                     +void ff_h264_filter_mb_fast(H264Context *h, int mb_x, int mb_y,
                     +                            uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr,
                     +                            unsigned int linesize, unsigned int uvlinesize);
                     +void ff_h264_filter_mb(H264Context *h, int mb_x, int mb_y,
                     +                       uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr,
                     +                       unsigned int linesize, unsigned int uvlinesize);
                      /**
                       * Reset SEI values at the beginning of the frame.
@@ -709,16 +717,15 @@ void ff_h264_filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint
                       */
                      void ff_h264_reset_sei(H264Context *h);
+                    -
                      /*
                     -o-o o-o
                     - / / /
                     -o-o o-o
                     - ,---'
                     -o-o o-o
                     - / / /
                     -o-o o-o
                     -*/
                     + * o-o o-o
                     + *  / / /
                     + * o-o o-o
                     + *  ,---'
                     + * o-o o-o
                     + *  / / /
                     + * o-o o-o
                     + */
                      /* Scan8 organization:
                       *    0 1 2 3 4 5 6 7
@@ -743,156 +750,173 @@ o-o o-o
                      #define LUMA_DC_BLOCK_INDEX   48
                      #define CHROMA_DC_BLOCK_INDEX 49
                     -//This table must be here because scan8[constant] must be known at compiletime
                     -static const uint8_t scan8[16*3 + 3]={
                     - 4+ 1*8, 5+ 1*8, 4+ 2*8, 5+ 2*8,
                     - 6+ 1*8, 7+ 1*8, 6+ 2*8, 7+ 2*8,
                     - 4+ 3*8, 5+ 3*8, 4+ 4*8, 5+ 4*8,
                     - 6+ 3*8, 7+ 3*8, 6+ 4*8, 7+ 4*8,
                     - 4+ 6*8, 5+ 6*8, 4+ 7*8, 5+ 7*8,
                     - 6+ 6*8, 7+ 6*8, 6+ 7*8, 7+ 7*8,
                     - 4+ 8*8, 5+ 8*8, 4+ 9*8, 5+ 9*8,
                     - 6+ 8*8, 7+ 8*8, 6+ 9*8, 7+ 9*8,
                     - 4+11*8, 5+11*8, 4+12*8, 5+12*8,
                     - 6+11*8, 7+11*8, 6+12*8, 7+12*8,
                     - 4+13*8, 5+13*8, 4+14*8, 5+14*8,
                     - 6+13*8, 7+13*8, 6+14*8, 7+14*8,
                     - 0+ 0*8, 0+ 5*8, 0+10*8
                     +// This table must be here because scan8[constant] must be known at compiletime
                     +static const uint8_t scan8[16 * 3 + 3] = {
                     +    4 +  1 * 8, 5 +  1 * 8, 4 +  2 * 8, 5 +  2 * 8,
                     +    6 +  1 * 8, 7 +  1 * 8, 6 +  2 * 8, 7 +  2 * 8,
                     +    4 +  3 * 8, 5 +  3 * 8, 4 +  4 * 8, 5 +  4 * 8,
                     +    6 +  3 * 8, 7 +  3 * 8, 6 +  4 * 8, 7 +  4 * 8,
                     +    4 +  6 * 8, 5 +  6 * 8, 4 +  7 * 8, 5 +  7 * 8,
                     +    6 +  6 * 8, 7 +  6 * 8, 6 +  7 * 8, 7 +  7 * 8,
                     +    4 +  8 * 8, 5 +  8 * 8, 4 +  9 * 8, 5 +  9 * 8,
                     +    6 +  8 * 8, 7 +  8 * 8, 6 +  9 * 8, 7 +  9 * 8,
                     +    4 + 11 * 8, 5 + 11 * 8, 4 + 12 * 8, 5 + 12 * 8,
                     +    6 + 11 * 8, 7 + 11 * 8, 6 + 12 * 8, 7 + 12 * 8,
                     +    4 + 13 * 8, 5 + 13 * 8, 4 + 14 * 8, 5 + 14 * 8,
                     +    6 + 13 * 8, 7 + 13 * 8, 6 + 14 * 8, 7 + 14 * 8,
                     +    0 +  0 * 8, 0 +  5 * 8, 0 + 10 * 8
                      };
                     -static av_always_inline uint32_t pack16to32(int a, int b){
                     +static av_always_inline uint32_t pack16to32(int a, int b)
                     +{
                      #if HAVE_BIGENDIAN
                     -   return (b&0xFFFF) + (a<<16);
                     +    return (b & 0xFFFF) + (a << 16);
                      #else
                     -   return (a&0xFFFF) + (b<<16);
                     +    return (a & 0xFFFF) + (b << 16);
                      #endif
+                     }
                     -static av_always_inline uint16_t pack8to16(int a, int b){
                     +static av_always_inline uint16_t pack8to16(int a, int b)
                     +{
                      #if HAVE_BIGENDIAN
                     -   return (b&0xFF) + (a<<8);
                     +    return (b & 0xFF) + (a << 8);
                      #else
                     -   return (a&0xFF) + (b<<8);
                     +    return (a & 0xFF) + (b << 8);
                      #endif
+                     }
                      /**
                       * Get the chroma qp.
                       */
                     -static av_always_inline int get_chroma_qp(H264Context *h, int t, int qscale){
                     +static av_always_inline int get_chroma_qp(H264Context *h, int t, int qscale)
                     +{
                          return h->pps.chroma_qp_table[t][qscale];
+                     }
                      /**
                       * Get the predicted intra4x4 prediction mode.
                       */
                     -static av_always_inline int pred_intra_mode(H264Context *h, int n){
                     -    const int index8= scan8[n];
                     -    const int left= h->intra4x4_pred_mode_cache[index8 - 1];
                     -    const int top = h->intra4x4_pred_mode_cache[index8 - 8];
                     -    const int min= FFMIN(left, top);
                     +static av_always_inline int pred_intra_mode(H264Context *h, int n)
                     +{
                     +    const int index8 = scan8[n];
                     +    const int left   = h->intra4x4_pred_mode_cache[index8 - 1];
                     +    const int top    = h->intra4x4_pred_mode_cache[index8 - 8];
                     +    const int min    = FFMIN(left, top);
                     -    tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min);
                     +    tprintf(h->s.avctx, "mode:%d %d min:%d\n", left, top, min);
                     -    if(min<0) return DC_PRED;
                     -    else      return min;
                     +    if (min < 0)
                     +        return DC_PRED;
                     +    else
                     +        return min;
+                     }
                     -static av_always_inline void write_back_intra_pred_mode(H264Context *h){
                     -    int8_t *i4x4= h->intra4x4_pred_mode + h->mb2br_xy[h->mb_xy];
                     -    int8_t *i4x4_cache= h->intra4x4_pred_mode_cache;
                     +static av_always_inline void write_back_intra_pred_mode(H264Context *h)
                     +{
                     +    int8_t *i4x4       = h->intra4x4_pred_mode + h->mb2br_xy[h->mb_xy];
                     +    int8_t *i4x4_cache = h->intra4x4_pred_mode_cache;
                     -    AV_COPY32(i4x4, i4x4_cache + 4 + 8*4);
                     -    i4x4[4]= i4x4_cache[7+8*3];
                     -    i4x4[5]= i4x4_cache[7+8*2];
                     -    i4x4[6]= i4x4_cache[7+8*1];
                     +    AV_COPY32(i4x4, i4x4_cache + 4 + 8 * 4);
                     +    i4x4[4] = i4x4_cache[7 + 8 * 3];
                     +    i4x4[5] = i4x4_cache[7 + 8 * 2];
                     +    i4x4[6] = i4x4_cache[7 + 8 * 1];
+                     }
                     -static av_always_inline void write_back_non_zero_count(H264Context *h){
                     -    const int mb_xy= h->mb_xy;
                     -    uint8_t *nnz = h->non_zero_count[mb_xy];
                     +static av_always_inline void write_back_non_zero_count(H264Context *h)
                     +{
                     +    const int mb_xy    = h->mb_xy;
                     +    uint8_t *nnz       = h->non_zero_count[mb_xy];
                          uint8_t *nnz_cache = h->non_zero_count_cache;
                     -    AV_COPY32(&nnz[ 0], &nnz_cache[4+8* 1]);
                     -    AV_COPY32(&nnz[ 4], &nnz_cache[4+8* 2]);
                     -    AV_COPY32(&nnz[ 8], &nnz_cache[4+8* 3]);
                     -    AV_COPY32(&nnz[12], &nnz_cache[4+8* 4]);
                     -    AV_COPY32(&nnz[16], &nnz_cache[4+8* 6]);
                     -    AV_COPY32(&nnz[20], &nnz_cache[4+8* 7]);
                     -    AV_COPY32(&nnz[32], &nnz_cache[4+8*11]);
                     -    AV_COPY32(&nnz[36], &nnz_cache[4+8*12]);
+                    -
                     -    if(!h->s.chroma_y_shift){
                     -        AV_COPY32(&nnz[24], &nnz_cache[4+8* 8]);
                     -        AV_COPY32(&nnz[28], &nnz_cache[4+8* 9]);
                     -        AV_COPY32(&nnz[40], &nnz_cache[4+8*13]);
                     -        AV_COPY32(&nnz[44], &nnz_cache[4+8*14]);
                     +    AV_COPY32(&nnz[ 0], &nnz_cache[4 + 8 * 1]);
                     +    AV_COPY32(&nnz[ 4], &nnz_cache[4 + 8 * 2]);
                     +    AV_COPY32(&nnz[ 8], &nnz_cache[4 + 8 * 3]);
                     +    AV_COPY32(&nnz[12], &nnz_cache[4 + 8 * 4]);
                     +    AV_COPY32(&nnz[16], &nnz_cache[4 + 8 * 6]);
                     +    AV_COPY32(&nnz[20], &nnz_cache[4 + 8 * 7]);
                     +    AV_COPY32(&nnz[32], &nnz_cache[4 + 8 * 11]);
                     +    AV_COPY32(&nnz[36], &nnz_cache[4 + 8 * 12]);
+                    +
                     +    if (!h->s.chroma_y_shift) {
                     +        AV_COPY32(&nnz[24], &nnz_cache[4 + 8 * 8]);
                     +        AV_COPY32(&nnz[28], &nnz_cache[4 + 8 * 9]);
                     +        AV_COPY32(&nnz[40], &nnz_cache[4 + 8 * 13]);
                     +        AV_COPY32(&nnz[44], &nnz_cache[4 + 8 * 14]);
+                         }
+                     }
                     -static av_always_inline void write_back_motion_list(H264Context *h, MpegEncContext * const s, int b_stride,
                     -                                                    int b_xy, int b8_xy, int mb_type, int list )
                     +static av_always_inline void write_back_motion_list(H264Context *h,
                     +                                                    MpegEncContext *const s,
                     +                                                    int b_stride,
                     +                                                    int b_xy, int b8_xy,
                     +                                                    int mb_type, int list)
+                     {
                     -    int16_t (*mv_dst)[2] = &s->current_picture.f.motion_val[list][b_xy];
                     -    int16_t (*mv_src)[2] = &h->mv_cache[list][scan8[0]];
                     -    AV_COPY128(mv_dst + 0*b_stride, mv_src + 8*0);
                     -    AV_COPY128(mv_dst + 1*b_stride, mv_src + 8*1);
                     -    AV_COPY128(mv_dst + 2*b_stride, mv_src + 8*2);
                     -    AV_COPY128(mv_dst + 3*b_stride, mv_src + 8*3);
                     -    if( CABAC ) {
                     -        uint8_t (*mvd_dst)[2] = &h->mvd_table[list][FMO ? 8*h->mb_xy : h->mb2br_xy[h->mb_xy]];
                     -        uint8_t (*mvd_src)[2] = &h->mvd_cache[list][scan8[0]];
                     -        if(IS_SKIP(mb_type))
                     +    int16_t(*mv_dst)[2] = &s->current_picture.f.motion_val[list][b_xy];
                     +    int16_t(*mv_src)[2] = &h->mv_cache[list][scan8[0]];
                     +    AV_COPY128(mv_dst + 0 * b_stride, mv_src + 8 * 0);
                     +    AV_COPY128(mv_dst + 1 * b_stride, mv_src + 8 * 1);
                     +    AV_COPY128(mv_dst + 2 * b_stride, mv_src + 8 * 2);
                     +    AV_COPY128(mv_dst + 3 * b_stride, mv_src + 8 * 3);
                     +    if (CABAC) {
                     +        uint8_t (*mvd_dst)[2] = &h->mvd_table[list][FMO ? 8 * h->mb_xy
                     +                                                        : h->mb2br_xy[h->mb_xy]];
                     +        uint8_t(*mvd_src)[2]  = &h->mvd_cache[list][scan8[0]];
                     +        if (IS_SKIP(mb_type)) {
                                  AV_ZERO128(mvd_dst);
                     -        else{
                     -            AV_COPY64(mvd_dst, mvd_src + 8*3);
                     -            AV_COPY16(mvd_dst + 3 + 3, mvd_src + 3 + 8*0);
                     -            AV_COPY16(mvd_dst + 3 + 2, mvd_src + 3 + 8*1);
                     -            AV_COPY16(mvd_dst + 3 + 1, mvd_src + 3 + 8*2);
                     +        } else {
                     +            AV_COPY64(mvd_dst, mvd_src + 8 * 3);
                     +            AV_COPY16(mvd_dst + 3 + 3, mvd_src + 3 + 8 * 0);
                     +            AV_COPY16(mvd_dst + 3 + 2, mvd_src + 3 + 8 * 1);
                     +            AV_COPY16(mvd_dst + 3 + 1, mvd_src + 3 + 8 * 2);
+                             }
+                         }
+                         {
                              int8_t *ref_index = &s->current_picture.f.ref_index[list][b8_xy];
                              int8_t *ref_cache = h->ref_cache[list];
                     -        ref_index[0+0*2]= ref_cache[scan8[0]];
                     -        ref_index[1+0*2]= ref_cache[scan8[4]];
                     -        ref_index[0+1*2]= ref_cache[scan8[8]];
                     -        ref_index[1+1*2]= ref_cache[scan8[12]];
                     +        ref_index[0 + 0 * 2] = ref_cache[scan8[0]];
                     +        ref_index[1 + 0 * 2] = ref_cache[scan8[4]];
                     +        ref_index[0 + 1 * 2] = ref_cache[scan8[8]];
                     +        ref_index[1 + 1 * 2] = ref_cache[scan8[12]];
+                         }
+                     }
                     -static av_always_inline void write_back_motion(H264Context *h, int mb_type){
                     -    MpegEncContext * const s = &h->s;
                     -    const int b_stride = h->b_stride;
                     -    const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride; //try mb2b(8)_xy
                     -    const int b8_xy= 4*h->mb_xy;
                     +static av_always_inline void write_back_motion(H264Context *h, int mb_type)
                     +{
                     +    MpegEncContext *const s = &h->s;
                     +    const int b_stride      = h->b_stride;
                     +    const int b_xy  = 4 * s->mb_x + 4 * s->mb_y * h->b_stride; // try mb2b(8)_xy
                     +    const int b8_xy = 4 * h->mb_xy;
                     -    if(USES_LIST(mb_type, 0)){
                     +    if (USES_LIST(mb_type, 0)) {
                              write_back_motion_list(h, s, b_stride, b_xy, b8_xy, mb_type, 0);
                     -    }else{
                     +    } else {
                              fill_rectangle(&s->current_picture.f.ref_index[0][b8_xy],
 , 2, 2, (uint8_t)LIST_NOT_USED, 1);
+                         }
                     -    if(USES_LIST(mb_type, 1)){
                     +    if (USES_LIST(mb_type, 1))
                              write_back_motion_list(h, s, b_stride, b_xy, b8_xy, mb_type, 1);
                     -    }
                     -    if(h->slice_type_nos == AV_PICTURE_TYPE_B && CABAC){
                     -        if(IS_8X8(mb_type)){
                     -            uint8_t *direct_table = &h->direct_table[4*h->mb_xy];
                     -            direct_table[1] = h->sub_mb_type[1]>>1;
                     -            direct_table[2] = h->sub_mb_type[2]>>1;
                     -            direct_table[3] = h->sub_mb_type[3]>>1;
                     +    if (h->slice_type_nos == AV_PICTURE_TYPE_B && CABAC) {
                     +        if (IS_8X8(mb_type)) {
                     +            uint8_t *direct_table = &h->direct_table[4 * h->mb_xy];
                     +            direct_table[1] = h->sub_mb_type[1] >> 1;
                     +            direct_table[2] = h->sub_mb_type[2] >> 1;
                     +            direct_table[3] = h->sub_mb_type[3] >> 1;
+                             }
+                         }
+                     }
                     -static av_always_inline int get_dct8x8_allowed(H264Context *h){
                     -    if(h->sps.direct_8x8_inference_flag)
                     -        return !(AV_RN64A(h->sub_mb_type) & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8                )*0x0001000100010001ULL));
                     +static av_always_inline int get_dct8x8_allowed(H264Context *h)
                     +{
                     +    if (h->sps.direct_8x8_inference_flag)
                     +        return !(AV_RN64A(h->sub_mb_type) &
                     +                 ((MB_TYPE_16x8 | MB_TYPE_8x16 | MB_TYPE_8x8) *
                     +                  0x0001000100010001ULL));
                          else
                     -        return !(AV_RN64A(h->sub_mb_type) & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8|MB_TYPE_DIRECT2)*0x0001000100010001ULL));
                     +        return !(AV_RN64A(h->sub_mb_type) &
                     +                 ((MB_TYPE_16x8 | MB_TYPE_8x16 | MB_TYPE_8x8 | MB_TYPE_DIRECT2) *
                     +                  0x0001000100010001ULL));
+                     }
                      #endif /* AVCODEC_H264_H */

@@ -35,53 +35,53 @@
                      //#undef NDEBUG
                      #include <assert.h>
                     -static av_always_inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
                     -    const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
                     -    MpegEncContext *s = &h->s;
                     +static av_always_inline int fetch_diagonal_mv(H264Context *h, const int16_t **C,
                     +                                              int i, int list, int part_width)
                     +{
                     +    const int topright_ref = h->ref_cache[list][i - 8 + part_width];
                     +    MpegEncContext *s      = &h->s;
                          /* there is no consistent mapping of mvs to neighboring locations that will
                           * make mbaff happy, so we can't move all this logic to fill_caches */
                     -    if(FRAME_MBAFF){
+                    -
                     -#define SET_DIAG_MV(MV_OP, REF_OP, XY, Y4)\
                     -                const int xy = XY, y4 = Y4;\
                     -                const int mb_type = mb_types[xy+(y4>>2)*s->mb_stride];\
                     -                if(!USES_LIST(mb_type,list))\
                     -                    return LIST_NOT_USED;\
                     -                mv = s->current_picture_ptr->f.motion_val[list][h->mb2b_xy[xy] + 3 + y4*h->b_stride];\
                     -                h->mv_cache[list][scan8[0]-2][0] = mv[0];\
                     -                h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
                     -                return s->current_picture_ptr->f.ref_index[list][4*xy + 1 + (y4 & ~1)] REF_OP;
+                    -
                     -        if(topright_ref == PART_NOT_AVAILABLE
                     -           && i >= scan8[0]+8 && (i&7)==4
                     -           && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
                     +    if (FRAME_MBAFF) {
                     +#define SET_DIAG_MV(MV_OP, REF_OP, XY, Y4)                              \
                     +        const int xy = XY, y4 = Y4;                                     \
                     +        const int mb_type = mb_types[xy + (y4 >> 2) * s->mb_stride];    \
                     +        if (!USES_LIST(mb_type, list))                                  \
                     +            return LIST_NOT_USED;                                       \
                     +        mv = s->current_picture_ptr->f.motion_val[list][h->mb2b_xy[xy] + 3 + y4 * h->b_stride]; \
                     +        h->mv_cache[list][scan8[0] - 2][0] = mv[0];                     \
                     +        h->mv_cache[list][scan8[0] - 2][1] = mv[1] MV_OP;               \
                     +        return s->current_picture_ptr->f.ref_index[list][4 * xy + 1 + (y4 & ~1)] REF_OP;
+                    +
                     +        if (topright_ref == PART_NOT_AVAILABLE
                     +            && i >= scan8[0] + 8 && (i & 7) == 4
                     +            && h->ref_cache[list][scan8[0] - 1] != PART_NOT_AVAILABLE) {
                                  const uint32_t *mb_types = s->current_picture_ptr->f.mb_type;
                                  const int16_t *mv;
                     -            AV_ZERO32(h->mv_cache[list][scan8[0]-2]);
                     -            *C = h->mv_cache[list][scan8[0]-2];
                     +            AV_ZERO32(h->mv_cache[list][scan8[0] - 2]);
                     +            *C = h->mv_cache[list][scan8[0] - 2];
                     -            if(!MB_FIELD
                     -               && IS_INTERLACED(h->left_type[0])){
                     -                SET_DIAG_MV(*2, >>1, h->left_mb_xy[0]+s->mb_stride, (s->mb_y&1)*2+(i>>5));
                     +            if (!MB_FIELD && IS_INTERLACED(h->left_type[0])) {
                     +                SET_DIAG_MV(* 2, >> 1, h->left_mb_xy[0] + s->mb_stride,
                     +                            (s->mb_y & 1) * 2 + (i >> 5));
+                                 }
                     -            if(MB_FIELD
                     -               && !IS_INTERLACED(h->left_type[0])){
                     +            if (MB_FIELD && !IS_INTERLACED(h->left_type[0])) {
                                      // left shift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's OK.
                     -                SET_DIAG_MV(/2, <<1, h->left_mb_xy[i>=36], ((i>>2))&3);
                     +                SET_DIAG_MV(/ 2, << 1, h->left_mb_xy[i >= 36], ((i >> 2)) & 3);
+                                 }
+                             }
                      #undef SET_DIAG_MV
+                         }
                     -    if(topright_ref != PART_NOT_AVAILABLE){
                     -        *C= h->mv_cache[list][ i - 8 + part_width ];
                     +    if (topright_ref != PART_NOT_AVAILABLE) {
                     +        *C = h->mv_cache[list][i - 8 + part_width];
                              return topright_ref;
                     -    }else{
                     +    } else {
                              tprintf(s->avctx, "topright MV not available\n");
                     -        *C= h->mv_cache[list][ i - 8 - 1 ];
                     -        return h->ref_cache[list][ i - 8 - 1 ];
                     +        *C = h->mv_cache[list][i - 8 - 1];
                     +        return h->ref_cache[list][i - 8 - 1];
+                         }
+                     }
@@ -92,53 +92,61 @@ static av_always_inline int fetch_diagonal_mv(H264Context *h, const int16_t **C,
                       * @param mx the x component of the predicted motion vector
                       * @param my the y component of the predicted motion vector
                       */
                     -static av_always_inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
                     -    const int index8= scan8[n];
                     -    const int top_ref=      h->ref_cache[list][ index8 - 8 ];
                     -    const int left_ref=     h->ref_cache[list][ index8 - 1 ];
                     -    const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
                     -    const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
                     -    const int16_t * C;
                     +static av_always_inline void pred_motion(H264Context *const h, int n,
                     +                                         int part_width, int list, int ref,
                     +                                         int *const mx, int *const my)
                     +{
                     +    const int index8       = scan8[n];
                     +    const int top_ref      = h->ref_cache[list][index8 - 8];
                     +    const int left_ref     = h->ref_cache[list][index8 - 1];
                     +    const int16_t *const A = h->mv_cache[list][index8 - 1];
                     +    const int16_t *const B = h->mv_cache[list][index8 - 8];
                     +    const int16_t *C;
                          int diagonal_ref, match_count;
                     -    assert(part_width==1 || part_width==2 || part_width==4);
                     +    assert(part_width == 1 || part_width == 2 || part_width == 4);
                      /* mv_cache
                     -  B . . A T T T T
                     -  U . . L . . , .
                     -  U . . L . . . .
                     -  U . . L . . , .
                     -  . . . L . . . .
                     -*/
+                    -
                     -    diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
                     -    match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
                     + * B . . A T T T T
                     + * U . . L . . , .
                     + * U . . L . . . .
                     + * U . . L . . , .
                     + * . . . L . . . .
                     + */
+                    +
                     +    diagonal_ref = fetch_diagonal_mv(h, &C, index8, list, part_width);
                     +    match_count  = (diagonal_ref == ref) + (top_ref == ref) + (left_ref == ref);
                          tprintf(h->s.avctx, "pred_motion match_count=%d\n", match_count);
                     -    if(match_count > 1){ //most common
                     -        *mx= mid_pred(A[0], B[0], C[0]);
                     -        *my= mid_pred(A[1], B[1], C[1]);
                     -    }else if(match_count==1){
                     -        if(left_ref==ref){
                     -            *mx= A[0];
                     -            *my= A[1];
                     -        }else if(top_ref==ref){
                     -            *mx= B[0];
                     -            *my= B[1];
                     -        }else{
                     -            *mx= C[0];
                     -            *my= C[1];
                     +    if (match_count > 1) { //most common
                     +        *mx = mid_pred(A[0], B[0], C[0]);
                     +        *my = mid_pred(A[1], B[1], C[1]);
                     +    } else if (match_count == 1) {
                     +        if (left_ref == ref) {
                     +            *mx = A[0];
                     +            *my = A[1];
                     +        } else if (top_ref == ref) {
                     +            *mx = B[0];
                     +            *my = B[1];
                     +        } else {
                     +            *mx = C[0];
                     +            *my = C[1];
+                             }
                     -    }else{
                     -        if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
                     -            *mx= A[0];
                     -            *my= A[1];
                     -        }else{
                     -            *mx= mid_pred(A[0], B[0], C[0]);
                     -            *my= mid_pred(A[1], B[1], C[1]);
                     +    } else {
                     +        if (top_ref      == PART_NOT_AVAILABLE &&
                     +            diagonal_ref == PART_NOT_AVAILABLE &&
                     +            left_ref     != PART_NOT_AVAILABLE) {
                     +            *mx = A[0];
                     +            *my = A[1];
                     +        } else {
                     +            *mx = mid_pred(A[0], B[0], C[0]);
                     +            *my = mid_pred(A[1], B[1], C[1]);
+                             }
+                         }
                     -    tprintf(h->s.avctx, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1],                    diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
                     +    tprintf(h->s.avctx,
                     +            "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n",
                     +            top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref,
                     +            A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
+                     }
                      /**
@@ -147,27 +155,32 @@ static av_always_inline void pred_motion(H264Context * const h, int n, int part_
                       * @param mx the x component of the predicted motion vector
                       * @param my the y component of the predicted motion vector
                       */
                     -static av_always_inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
                     -    if(n==0){
                     -        const int top_ref=      h->ref_cache[list][ scan8[0] - 8 ];
                     -        const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
+                    -
                     -        tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
+                    -
                     -        if(top_ref == ref){
                     -            *mx= B[0];
                     -            *my= B[1];
                     +static av_always_inline void pred_16x8_motion(H264Context *const h,
                     +                                              int n, int list, int ref,
                     +                                              int *const mx, int *const my)
                     +{
                     +    if (n == 0) {
                     +        const int top_ref      = h->ref_cache[list][scan8[0] - 8];
                     +        const int16_t *const B = h->mv_cache[list][scan8[0] - 8];
+                    +
                     +        tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n",
                     +                top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
+                    +
                     +        if (top_ref == ref) {
                     +            *mx = B[0];
                     +            *my = B[1];
                                  return;
+                             }
                     -    }else{
                     -        const int left_ref=     h->ref_cache[list][ scan8[8] - 1 ];
                     -        const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
                     +    } else {
                     +        const int left_ref     = h->ref_cache[list][scan8[8] - 1];
                     +        const int16_t *const A = h->mv_cache[list][scan8[8] - 1];
                     -        tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
                     +        tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n",
                     +                left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
                     -        if(left_ref == ref){
                     -            *mx= A[0];
                     -            *my= A[1];
                     +        if (left_ref == ref) {
                     +            *mx = A[0];
                     +            *my = A[1];
                                  return;
+                             }
+                         }
@@ -182,29 +195,34 @@ static av_always_inline void pred_16x8_motion(H264Context * const h, int n, int
                       * @param mx the x component of the predicted motion vector
                       * @param my the y component of the predicted motion vector
                       */
                     -static av_always_inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
                     -    if(n==0){
                     -        const int left_ref=      h->ref_cache[list][ scan8[0] - 1 ];
                     -        const int16_t * const A=  h->mv_cache[list][ scan8[0] - 1 ];
+                    -
                     -        tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
+                    -
                     -        if(left_ref == ref){
                     -            *mx= A[0];
                     -            *my= A[1];
                     +static av_always_inline void pred_8x16_motion(H264Context *const h,
                     +                                              int n, int list, int ref,
                     +                                              int *const mx, int *const my)
                     +{
                     +    if (n == 0) {
                     +        const int left_ref     = h->ref_cache[list][scan8[0] - 1];
                     +        const int16_t *const A = h->mv_cache[list][scan8[0] - 1];
+                    +
                     +        tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n",
                     +                left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
+                    +
                     +        if (left_ref == ref) {
                     +            *mx = A[0];
                     +            *my = A[1];
                                  return;
+                             }
                     -    }else{
                     -        const int16_t * C;
                     +    } else {
                     +        const int16_t *C;
                              int diagonal_ref;
                     -        diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
                     +        diagonal_ref = fetch_diagonal_mv(h, &C, scan8[4], list, 2);
                     -        tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
                     +        tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n",
                     +                diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
                     -        if(diagonal_ref == ref){
                     -            *mx= C[0];
                     -            *my= C[1];
                     +        if (diagonal_ref == ref) {
                     +            *mx = C[0];
                     +            *my = C[1];
                                  return;
+                             }
+                         }
@@ -213,168 +231,174 @@ static av_always_inline void pred_8x16_motion(H264Context * const h, int n, int
                          pred_motion(h, n, 2, list, ref, mx, my);
+                     }
                     -#define FIX_MV_MBAFF(type, refn, mvn, idx)\
                     -    if(FRAME_MBAFF){\
                     -        if(MB_FIELD){\
                     -            if(!IS_INTERLACED(type)){\
                     -                refn <<= 1;\
                     -                AV_COPY32(mvbuf[idx], mvn);\
                     -                mvbuf[idx][1] /= 2;\
                     -                mvn = mvbuf[idx];\
                     -            }\
                     -        }else{\
                     -            if(IS_INTERLACED(type)){\
                     -                refn >>= 1;\
                     -                AV_COPY32(mvbuf[idx], mvn);\
                     -                mvbuf[idx][1] <<= 1;\
                     -                mvn = mvbuf[idx];\
                     -            }\
                     -        }\
                     +#define FIX_MV_MBAFF(type, refn, mvn, idx)      \
                     +    if (FRAME_MBAFF) {                          \
                     +        if (MB_FIELD) {                         \
                     +            if (!IS_INTERLACED(type)) {         \
                     +                refn <<= 1;                     \
                     +                AV_COPY32(mvbuf[idx], mvn);     \
                     +                mvbuf[idx][1] /= 2;             \
                     +                mvn = mvbuf[idx];               \
                     +            }                                   \
                     +        } else {                                \
                     +            if (IS_INTERLACED(type)) {          \
                     +                refn >>= 1;                     \
                     +                AV_COPY32(mvbuf[idx], mvn);     \
                     +                mvbuf[idx][1] <<= 1;            \
                     +                mvn = mvbuf[idx];               \
                     +            }                                   \
                     +        }                                       \
+                         }
                     -static av_always_inline void pred_pskip_motion(H264Context * const h){
                     -    DECLARE_ALIGNED(4, static const int16_t, zeromv)[2] = {0};
                     +static av_always_inline void pred_pskip_motion(H264Context *const h)
                     +{
                     +    DECLARE_ALIGNED(4, static const int16_t, zeromv)[2] = { 0 };
                          DECLARE_ALIGNED(4, int16_t, mvbuf)[3][2];
                     -    MpegEncContext * const s = &h->s;
                     -    int8_t *ref      = s->current_picture.f.ref_index[0];
                     -    int16_t (*mv)[2] = s->current_picture.f.motion_val[0];
                     +    MpegEncContext *const s = &h->s;
                     +    int8_t *ref     = s->current_picture.f.ref_index[0];
                     +    int16_t(*mv)[2] = s->current_picture.f.motion_val[0];
                          int top_ref, left_ref, diagonal_ref, match_count, mx, my;
                          const int16_t *A, *B, *C;
                          int b_stride = h->b_stride;
                          fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
                     -    /* To avoid doing an entire fill_decode_caches, we inline the relevant parts here.
                     -     * FIXME: this is a partial duplicate of the logic in fill_decode_caches, but it's
                     -     * faster this way.  Is there a way to avoid this duplication?
                     +    /* To avoid doing an entire fill_decode_caches, we inline the relevant
                     +     * parts here.
                     +     * FIXME: this is a partial duplicate of the logic in fill_decode_caches,
                     +     * but it's faster this way.  Is there a way to avoid this duplication?
                           */
                     -    if(USES_LIST(h->left_type[LTOP], 0)){
                     -        left_ref = ref[4*h->left_mb_xy[LTOP] + 1 + (h->left_block[0]&~1)];
                     -        A = mv[h->mb2b_xy[h->left_mb_xy[LTOP]] + 3 + b_stride*h->left_block[0]];
                     +    if (USES_LIST(h->left_type[LTOP], 0)) {
                     +        left_ref = ref[4 * h->left_mb_xy[LTOP] + 1 + (h->left_block[0] & ~1)];
                     +        A = mv[h->mb2b_xy[h->left_mb_xy[LTOP]] + 3 + b_stride * h->left_block[0]];
                              FIX_MV_MBAFF(h->left_type[LTOP], left_ref, A, 0);
                     -        if(!(left_ref | AV_RN32A(A))){
                     +        if (!(left_ref | AV_RN32A(A)))
                                  goto zeromv;
                     -        }
                     -    }else if(h->left_type[LTOP]){
                     +    } else if (h->left_type[LTOP]) {
                              left_ref = LIST_NOT_USED;
                     -        A = zeromv;
                     -    }else{
                     +        A        = zeromv;
                     +    } else {
                              goto zeromv;
+                         }
                     -    if(USES_LIST(h->top_type, 0)){
                     -        top_ref = ref[4*h->top_mb_xy + 2];
                     -        B = mv[h->mb2b_xy[h->top_mb_xy] + 3*b_stride];
                     +    if (USES_LIST(h->top_type, 0)) {
                     +        top_ref = ref[4 * h->top_mb_xy + 2];
                     +        B       = mv[h->mb2b_xy[h->top_mb_xy] + 3 * b_stride];
                              FIX_MV_MBAFF(h->top_type, top_ref, B, 1);
                     -        if(!(top_ref | AV_RN32A(B))){
                     +        if (!(top_ref | AV_RN32A(B)))
                                  goto zeromv;
                     -        }
                     -    }else if(h->top_type){
                     +    } else if (h->top_type) {
                              top_ref = LIST_NOT_USED;
                     -        B = zeromv;
                     -    }else{
                     +        B       = zeromv;
                     +    } else {
                              goto zeromv;
+                         }
                     -    tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
                     +    tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n",
                     +            top_ref, left_ref, h->s.mb_x, h->s.mb_y);
                     -    if(USES_LIST(h->topright_type, 0)){
                     -        diagonal_ref = ref[4*h->topright_mb_xy + 2];
                     -        C = mv[h->mb2b_xy[h->topright_mb_xy] + 3*b_stride];
                     +    if (USES_LIST(h->topright_type, 0)) {
                     +        diagonal_ref = ref[4 * h->topright_mb_xy + 2];
                     +        C = mv[h->mb2b_xy[h->topright_mb_xy] + 3 * b_stride];
                              FIX_MV_MBAFF(h->topright_type, diagonal_ref, C, 2);
                     -    }else if(h->topright_type){
                     +    } else if (h->topright_type) {
                              diagonal_ref = LIST_NOT_USED;
                              C = zeromv;
                     -    }else{
                     -        if(USES_LIST(h->topleft_type, 0)){
                     -            diagonal_ref = ref[4*h->topleft_mb_xy + 1 + (h->topleft_partition & 2)];
                     -            C = mv[h->mb2b_xy[h->topleft_mb_xy] + 3 + b_stride + (h->topleft_partition & 2*b_stride)];
                     +    } else {
                     +        if (USES_LIST(h->topleft_type, 0)) {
                     +            diagonal_ref = ref[4 * h->topleft_mb_xy + 1 +
                     +                               (h->topleft_partition & 2)];
                     +            C = mv[h->mb2b_xy[h->topleft_mb_xy] + 3 + b_stride +
                     +                   (h->topleft_partition & 2 * b_stride)];
                                  FIX_MV_MBAFF(h->topleft_type, diagonal_ref, C, 2);
                     -        }else if(h->topleft_type){
                     +        } else if (h->topleft_type) {
                                  diagonal_ref = LIST_NOT_USED;
                     -            C = zeromv;
                     -        }else{
                     +            C            = zeromv;
                     +        } else {
                                  diagonal_ref = PART_NOT_AVAILABLE;
                     -            C = zeromv;
                     +            C            = zeromv;
+                             }
+                         }
                     -    match_count= !diagonal_ref + !top_ref + !left_ref;
                     +    match_count = !diagonal_ref + !top_ref + !left_ref;
                          tprintf(h->s.avctx, "pred_pskip_motion match_count=%d\n", match_count);
                     -    if(match_count > 1){
                     +    if (match_count > 1) {
                              mx = mid_pred(A[0], B[0], C[0]);
                              my = mid_pred(A[1], B[1], C[1]);
                     -    }else if(match_count==1){
                     -        if(!left_ref){
                     +    } else if (match_count == 1) {
                     +        if (!left_ref) {
                                  mx = A[0];
                                  my = A[1];
                     -        }else if(!top_ref){
                     +        } else if (!top_ref) {
                                  mx = B[0];
                                  my = B[1];
                     -        }else{
                     +        } else {
                                  mx = C[0];
                                  my = C[1];
+                             }
                     -    }else{
                     +    } else {
                              mx = mid_pred(A[0], B[0], C[0]);
                              my = mid_pred(A[1], B[1], C[1]);
+                         }
                     -    fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
                     +    fill_rectangle(h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx, my), 4);
                          return;
+                    +
                      zeromv:
                     -    fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, 0, 4);
                     +    fill_rectangle(h->mv_cache[0][scan8[0]], 4, 4, 8, 0, 4);
                          return;
+                     }
                     -static void fill_decode_neighbors(H264Context *h, int mb_type){
                     -    MpegEncContext * const s = &h->s;
                     -    const int mb_xy= h->mb_xy;
                     +static void fill_decode_neighbors(H264Context *h, int mb_type)
                     +{
                     +    MpegEncContext *const s = &h->s;
                     +    const int mb_xy = h->mb_xy;
                          int topleft_xy, top_xy, topright_xy, left_xy[LEFT_MBS];
                     -    static const uint8_t left_block_options[4][32]={
                     -        {0,1,2,3,7,10,8,11,3+0*4, 3+1*4, 3+2*4, 3+3*4, 1+4*4, 1+8*4, 1+5*4, 1+9*4},
                     -        {2,2,3,3,8,11,8,11,3+2*4, 3+2*4, 3+3*4, 3+3*4, 1+5*4, 1+9*4, 1+5*4, 1+9*4},
                     -        {0,0,1,1,7,10,7,10,3+0*4, 3+0*4, 3+1*4, 3+1*4, 1+4*4, 1+8*4, 1+4*4, 1+8*4},
                     -        {0,2,0,2,7,10,7,10,3+0*4, 3+2*4, 3+0*4, 3+2*4, 1+4*4, 1+8*4, 1+4*4, 1+8*4}
                     +    static const uint8_t left_block_options[4][32] = {
                     +        { 0, 1, 2, 3, 7, 10, 8, 11, 3 + 0 * 4, 3 + 1 * 4, 3 + 2 * 4, 3 + 3 * 4, 1 + 4 * 4, 1 + 8 * 4, 1 + 5 * 4, 1 + 9 * 4 },
                     +        { 2, 2, 3, 3, 8, 11, 8, 11, 3 + 2 * 4, 3 + 2 * 4, 3 + 3 * 4, 3 + 3 * 4, 1 + 5 * 4, 1 + 9 * 4, 1 + 5 * 4, 1 + 9 * 4 },
                     +        { 0, 0, 1, 1, 7, 10, 7, 10, 3 + 0 * 4, 3 + 0 * 4, 3 + 1 * 4, 3 + 1 * 4, 1 + 4 * 4, 1 + 8 * 4, 1 + 4 * 4, 1 + 8 * 4 },
                     +        { 0, 2, 0, 2, 7, 10, 7, 10, 3 + 0 * 4, 3 + 2 * 4, 3 + 0 * 4, 3 + 2 * 4, 1 + 4 * 4, 1 + 8 * 4, 1 + 4 * 4, 1 + 8 * 4 }
                          };
                     -    h->topleft_partition= -1;
                     +    h->topleft_partition = -1;
                     -    top_xy     = mb_xy  - (s->mb_stride << MB_FIELD);
                     +    top_xy = mb_xy - (s->mb_stride << MB_FIELD);
                          /* Wow, what a mess, why didn't they simplify the interlacing & intra
                           * stuff, I can't imagine that these complex rules are worth it. */
                     -    topleft_xy = top_xy - 1;
                     -    topright_xy= top_xy + 1;
                     -    left_xy[LBOT] = left_xy[LTOP] = mb_xy-1;
                     +    topleft_xy    = top_xy - 1;
                     +    topright_xy   = top_xy + 1;
                     +    left_xy[LBOT] = left_xy[LTOP] = mb_xy - 1;
                          h->left_block = left_block_options[0];
                     -    if(FRAME_MBAFF){
                     +    if (FRAME_MBAFF) {
                              const int left_mb_field_flag = IS_INTERLACED(s->current_picture.f.mb_type[mb_xy - 1]);
                              const int curr_mb_field_flag = IS_INTERLACED(mb_type);
                     -        if(s->mb_y&1){
                     +        if (s->mb_y & 1) {
                                  if (left_mb_field_flag != curr_mb_field_flag) {
                                      left_xy[LBOT] = left_xy[LTOP] = mb_xy - s->mb_stride - 1;
                                      if (curr_mb_field_flag) {
                                          left_xy[LBOT] += s->mb_stride;
                     -                    h->left_block = left_block_options[3];
                     +                    h->left_block  = left_block_options[3];
                                      } else {
                                          topleft_xy += s->mb_stride;
                     -                    // take top left mv from the middle of the mb, as opposed to all other modes which use the bottom right partition
                     +                    /* take top left mv from the middle of the mb, as opposed
                     +                     * to all other modes which use the bottom right partition */
                                          h->topleft_partition = 0;
                     -                    h->left_block = left_block_options[1];
                     +                    h->left_block        = left_block_options[1];
+                                     }
+                                 }
                     -        }else{
                     -            if(curr_mb_field_flag){
                     +        } else {
                     +            if (curr_mb_field_flag) {
                                      topleft_xy  += s->mb_stride & (((s->current_picture.f.mb_type[top_xy - 1] >> 7) & 1) - 1);
                                      topright_xy += s->mb_stride & (((s->current_picture.f.mb_type[top_xy + 1] >> 7) & 1) - 1);
                     -                top_xy      += s->mb_stride & (((s->current_picture.f.mb_type[top_xy    ] >> 7) & 1) - 1);
                     +                top_xy      += s->mb_stride & (((s->current_picture.f.mb_type[top_xy]     >> 7) & 1) - 1);
+                                 }
                                  if (left_mb_field_flag != curr_mb_field_flag) {
                                      if (curr_mb_field_flag) {
                                          left_xy[LBOT] += s->mb_stride;
                     -                    h->left_block = left_block_options[3];
                     +                    h->left_block  = left_block_options[3];
                                      } else {
                                          h->left_block = left_block_options[2];
+                                     }
@@ -382,9 +406,9 @@ static void fill_decode_neighbors(H264Context *h, int mb_type){
+                             }
+                         }
                     -    h->topleft_mb_xy = topleft_xy;
                     -    h->top_mb_xy     = top_xy;
                     -    h->topright_mb_xy= topright_xy;
                     +    h->topleft_mb_xy    = topleft_xy;
                     +    h->top_mb_xy        = top_xy;
                     +    h->topright_mb_xy   = topright_xy;
                          h->left_mb_xy[LTOP] = left_xy[LTOP];
                          h->left_mb_xy[LBOT] = left_xy[LBOT];
                          //FIXME do we need all in the context?
@@ -395,351 +419,372 @@ static void fill_decode_neighbors(H264Context *h, int mb_type){
                          h->left_type[LTOP] = s->current_picture.f.mb_type[left_xy[LTOP]];
                          h->left_type[LBOT] = s->current_picture.f.mb_type[left_xy[LBOT]];
                     -    if(FMO){
                     -    if(h->slice_table[topleft_xy    ] != h->slice_num) h->topleft_type = 0;
                     -    if(h->slice_table[top_xy        ] != h->slice_num) h->top_type     = 0;
                     -    if(h->slice_table[left_xy[LTOP] ] != h->slice_num) h->left_type[LTOP] = h->left_type[LBOT] = 0;
                     -    }else{
                     -        if(h->slice_table[topleft_xy ] != h->slice_num){
                     +    if (FMO) {
                     +        if (h->slice_table[topleft_xy] != h->slice_num)
                     +            h->topleft_type = 0;
                     +        if (h->slice_table[top_xy] != h->slice_num)
                     +            h->top_type = 0;
                     +        if (h->slice_table[left_xy[LTOP]] != h->slice_num)
                     +            h->left_type[LTOP] = h->left_type[LBOT] = 0;
                     +    } else {
                     +        if (h->slice_table[topleft_xy] != h->slice_num) {
                                  h->topleft_type = 0;
                     -            if(h->slice_table[top_xy        ] != h->slice_num) h->top_type     = 0;
                     -            if(h->slice_table[left_xy[LTOP] ] != h->slice_num) h->left_type[LTOP] = h->left_type[LBOT] = 0;
                     +            if (h->slice_table[top_xy] != h->slice_num)
                     +                h->top_type = 0;
                     +            if (h->slice_table[left_xy[LTOP]] != h->slice_num)
                     +                h->left_type[LTOP] = h->left_type[LBOT] = 0;
+                             }
+                         }
                     -    if(h->slice_table[topright_xy] != h->slice_num) h->topright_type= 0;
                     +    if (h->slice_table[topright_xy] != h->slice_num)
                     +        h->topright_type = 0;
+                     }
                     -static void fill_decode_caches(H264Context *h, int mb_type){
                     -    MpegEncContext * const s = &h->s;
                     +static void fill_decode_caches(H264Context *h, int mb_type)
                     +{
                     +    MpegEncContext *const s = &h->s;
                          int topleft_xy, top_xy, topright_xy, left_xy[LEFT_MBS];
                          int topleft_type, top_type, topright_type, left_type[LEFT_MBS];
                     -    const uint8_t * left_block= h->left_block;
                     +    const uint8_t *left_block = h->left_block;
                          int i;
                          uint8_t *nnz;
                          uint8_t *nnz_cache;
                     -    topleft_xy     = h->topleft_mb_xy;
                     -    top_xy         = h->top_mb_xy;
                     -    topright_xy    = h->topright_mb_xy;
                     -    left_xy[LTOP]  = h->left_mb_xy[LTOP];
                     -    left_xy[LBOT]  = h->left_mb_xy[LBOT];
                     -    topleft_type   = h->topleft_type;
                     -    top_type       = h->top_type;
                     -    topright_type  = h->topright_type;
                     -    left_type[LTOP]= h->left_type[LTOP];
                     -    left_type[LBOT]= h->left_type[LBOT];
+                    -
                     -    if(!IS_SKIP(mb_type)){
                     -        if(IS_INTRA(mb_type)){
                     -            int type_mask= h->pps.constrained_intra_pred ? IS_INTRA(-1) : -1;
                     -            h->topleft_samples_available=
                     -            h->top_samples_available=
                     -            h->left_samples_available= 0xFFFF;
                     -            h->topright_samples_available= 0xEEEA;
+                    -
                     -            if(!(top_type & type_mask)){
                     -                h->topleft_samples_available= 0xB3FF;
                     -                h->top_samples_available= 0x33FF;
                     -                h->topright_samples_available= 0x26EA;
                     +    topleft_xy      = h->topleft_mb_xy;
                     +    top_xy          = h->top_mb_xy;
                     +    topright_xy     = h->topright_mb_xy;
                     +    left_xy[LTOP]   = h->left_mb_xy[LTOP];
                     +    left_xy[LBOT]   = h->left_mb_xy[LBOT];
                     +    topleft_type    = h->topleft_type;
                     +    top_type        = h->top_type;
                     +    topright_type   = h->topright_type;
                     +    left_type[LTOP] = h->left_type[LTOP];
                     +    left_type[LBOT] = h->left_type[LBOT];
+                    +
                     +    if (!IS_SKIP(mb_type)) {
                     +        if (IS_INTRA(mb_type)) {
                     +            int type_mask = h->pps.constrained_intra_pred ? IS_INTRA(-1) : -1;
                     +            h->topleft_samples_available      =
                     +                h->top_samples_available      =
                     +                    h->left_samples_available = 0xFFFF;
                     +            h->topright_samples_available     = 0xEEEA;
+                    +
                     +            if (!(top_type & type_mask)) {
                     +                h->topleft_samples_available  = 0xB3FF;
                     +                h->top_samples_available      = 0x33FF;
                     +                h->topright_samples_available = 0x26EA;
+                                 }
                     -            if(IS_INTERLACED(mb_type) != IS_INTERLACED(left_type[LTOP])){
                     -                if(IS_INTERLACED(mb_type)){
                     -                    if(!(left_type[LTOP] & type_mask)){
                     -                        h->topleft_samples_available&= 0xDFFF;
                     -                        h->left_samples_available&= 0x5FFF;
                     +            if (IS_INTERLACED(mb_type) != IS_INTERLACED(left_type[LTOP])) {
                     +                if (IS_INTERLACED(mb_type)) {
                     +                    if (!(left_type[LTOP] & type_mask)) {
                     +                        h->topleft_samples_available &= 0xDFFF;
                     +                        h->left_samples_available    &= 0x5FFF;
+                                         }
                     -                    if(!(left_type[LBOT] & type_mask)){
                     -                        h->topleft_samples_available&= 0xFF5F;
                     -                        h->left_samples_available&= 0xFF5F;
                     +                    if (!(left_type[LBOT] & type_mask)) {
                     +                        h->topleft_samples_available &= 0xFF5F;
                     +                        h->left_samples_available    &= 0xFF5F;
+                                         }
                     -                }else{
                     +                } else {
                                          int left_typei = s->current_picture.f.mb_type[left_xy[LTOP] + s->mb_stride];
                                          assert(left_xy[LTOP] == left_xy[LBOT]);
                     -                    if(!((left_typei & type_mask) && (left_type[LTOP] & type_mask))){
                     -                        h->topleft_samples_available&= 0xDF5F;
                     -                        h->left_samples_available&= 0x5F5F;
                     +                    if (!((left_typei & type_mask) && (left_type[LTOP] & type_mask))) {
                     +                        h->topleft_samples_available &= 0xDF5F;
                     +                        h->left_samples_available    &= 0x5F5F;
+                                         }
+                                     }
                     -            }else{
                     -                if(!(left_type[LTOP] & type_mask)){
                     -                    h->topleft_samples_available&= 0xDF5F;
                     -                    h->left_samples_available&= 0x5F5F;
                     +            } else {
                     +                if (!(left_type[LTOP] & type_mask)) {
                     +                    h->topleft_samples_available &= 0xDF5F;
                     +                    h->left_samples_available    &= 0x5F5F;
+                                     }
+                                 }
                     -            if(!(topleft_type & type_mask))
                     -                h->topleft_samples_available&= 0x7FFF;
                     +            if (!(topleft_type & type_mask))
                     +                h->topleft_samples_available &= 0x7FFF;
                     -            if(!(topright_type & type_mask))
                     -                h->topright_samples_available&= 0xFBFF;
                     +            if (!(topright_type & type_mask))
                     +                h->topright_samples_available &= 0xFBFF;
                     -            if(IS_INTRA4x4(mb_type)){
                     -                if(IS_INTRA4x4(top_type)){
                     -                    AV_COPY32(h->intra4x4_pred_mode_cache+4+8*0, h->intra4x4_pred_mode + h->mb2br_xy[top_xy]);
                     -                }else{
                     -                    h->intra4x4_pred_mode_cache[4+8*0]=
                     -                    h->intra4x4_pred_mode_cache[5+8*0]=
                     -                    h->intra4x4_pred_mode_cache[6+8*0]=
                     -                    h->intra4x4_pred_mode_cache[7+8*0]= 2 - 3*!(top_type & type_mask);
                     +            if (IS_INTRA4x4(mb_type)) {
                     +                if (IS_INTRA4x4(top_type)) {
                     +                    AV_COPY32(h->intra4x4_pred_mode_cache + 4 + 8 * 0, h->intra4x4_pred_mode + h->mb2br_xy[top_xy]);
                     +                } else {
                     +                    h->intra4x4_pred_mode_cache[4 + 8 * 0] =
                     +                    h->intra4x4_pred_mode_cache[5 + 8 * 0] =
                     +                    h->intra4x4_pred_mode_cache[6 + 8 * 0] =
                     +                    h->intra4x4_pred_mode_cache[7 + 8 * 0] = 2 - 3 * !(top_type & type_mask);
+                                     }
                     -                for(i=0; i<2; i++){
                     -                    if(IS_INTRA4x4(left_type[LEFT(i)])){
                     -                        int8_t *mode= h->intra4x4_pred_mode + h->mb2br_xy[left_xy[LEFT(i)]];
                     -                        h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= mode[6-left_block[0+2*i]];
                     -                        h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= mode[6-left_block[1+2*i]];
                     -                    }else{
                     -                        h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
                     -                        h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= 2 - 3*!(left_type[LEFT(i)] & type_mask);
                     +                for (i = 0; i < 2; i++) {
                     +                    if (IS_INTRA4x4(left_type[LEFT(i)])) {
                     +                        int8_t *mode = h->intra4x4_pred_mode + h->mb2br_xy[left_xy[LEFT(i)]];
                     +                        h->intra4x4_pred_mode_cache[3 + 8 * 1 + 2 * 8 * i] = mode[6 - left_block[0 + 2 * i]];
                     +                        h->intra4x4_pred_mode_cache[3 + 8 * 2 + 2 * 8 * i] = mode[6 - left_block[1 + 2 * i]];
                     +                    } else {
                     +                        h->intra4x4_pred_mode_cache[3 + 8 * 1 + 2 * 8 * i] =
                     +                        h->intra4x4_pred_mode_cache[3 + 8 * 2 + 2 * 8 * i] = 2 - 3 * !(left_type[LEFT(i)] & type_mask);
+                                         }
+                                     }
+                                 }
+                             }
+                    -
                     -/*
                     -0 . T T. T T T T
                     -1 L . .L . . . .
                     -2 L . .L . . . .
                     -3 . T TL . . . .
                     -4 L . .L . . . .
                     -5 L . .. . . . .
                     -*/
                     -//FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
                     -    nnz_cache = h->non_zero_count_cache;
                     -    if(top_type){
                     -        nnz = h->non_zero_count[top_xy];
                     -        AV_COPY32(&nnz_cache[4+8* 0], &nnz[4*3]);
                     -        if(!s->chroma_y_shift){
                     -            AV_COPY32(&nnz_cache[4+8* 5], &nnz[4* 7]);
                     -            AV_COPY32(&nnz_cache[4+8*10], &nnz[4*11]);
                     -        }else{
                     -            AV_COPY32(&nnz_cache[4+8* 5], &nnz[4* 5]);
                     -            AV_COPY32(&nnz_cache[4+8*10], &nnz[4* 9]);
                     +        /*
                     +         * 0 . T T. T T T T
                     +         * 1 L . .L . . . .
                     +         * 2 L . .L . . . .
                     +         * 3 . T TL . . . .
                     +         * 4 L . .L . . . .
                     +         * 5 L . .. . . . .
                     +         */
                     +        /* FIXME: constraint_intra_pred & partitioning & nnz
                     +         * (let us hope this is just a typo in the spec) */
                     +        nnz_cache = h->non_zero_count_cache;
                     +        if (top_type) {
                     +            nnz = h->non_zero_count[top_xy];
                     +            AV_COPY32(&nnz_cache[4 + 8 * 0], &nnz[4 * 3]);
                     +            if (!s->chroma_y_shift) {
                     +                AV_COPY32(&nnz_cache[4 + 8 *  5], &nnz[4 *  7]);
                     +                AV_COPY32(&nnz_cache[4 + 8 * 10], &nnz[4 * 11]);
                     +            } else {
                     +                AV_COPY32(&nnz_cache[4 + 8 *  5], &nnz[4 * 5]);
                     +                AV_COPY32(&nnz_cache[4 + 8 * 10], &nnz[4 * 9]);
                     +            }
                     +        } else {
                     +            uint32_t top_empty = CABAC && !IS_INTRA(mb_type) ? 0 : 0x40404040;
                     +            AV_WN32A(&nnz_cache[4 + 8 *  0], top_empty);
                     +            AV_WN32A(&nnz_cache[4 + 8 *  5], top_empty);
                     +            AV_WN32A(&nnz_cache[4 + 8 * 10], top_empty);
+                             }
                     -    }else{
                     -        uint32_t top_empty = CABAC && !IS_INTRA(mb_type) ? 0 : 0x40404040;
                     -        AV_WN32A(&nnz_cache[4+8* 0], top_empty);
                     -        AV_WN32A(&nnz_cache[4+8* 5], top_empty);
                     -        AV_WN32A(&nnz_cache[4+8*10], top_empty);
                     -    }
                     -    for (i=0; i<2; i++) {
                     -        if(left_type[LEFT(i)]){
                     -            nnz = h->non_zero_count[left_xy[LEFT(i)]];
                     -            nnz_cache[3+8* 1 + 2*8*i]= nnz[left_block[8+0+2*i]];
                     -            nnz_cache[3+8* 2 + 2*8*i]= nnz[left_block[8+1+2*i]];
                     -            if(CHROMA444){
                     -                nnz_cache[3+8* 6 + 2*8*i]= nnz[left_block[8+0+2*i]+4*4];
                     -                nnz_cache[3+8* 7 + 2*8*i]= nnz[left_block[8+1+2*i]+4*4];
                     -                nnz_cache[3+8*11 + 2*8*i]= nnz[left_block[8+0+2*i]+8*4];
                     -                nnz_cache[3+8*12 + 2*8*i]= nnz[left_block[8+1+2*i]+8*4];
                     -            }else if(CHROMA422) {
                     -                nnz_cache[3+8* 6 + 2*8*i]= nnz[left_block[8+0+2*i]-2+4*4];
                     -                nnz_cache[3+8* 7 + 2*8*i]= nnz[left_block[8+1+2*i]-2+4*4];
                     -                nnz_cache[3+8*11 + 2*8*i]= nnz[left_block[8+0+2*i]-2+8*4];
                     -                nnz_cache[3+8*12 + 2*8*i]= nnz[left_block[8+1+2*i]-2+8*4];
                     -            }else{
                     -                nnz_cache[3+8* 6 +   8*i]= nnz[left_block[8+4+2*i]];
                     -                nnz_cache[3+8*11 +   8*i]= nnz[left_block[8+5+2*i]];
                     +        for (i = 0; i < 2; i++) {
                     +            if (left_type[LEFT(i)]) {
                     +                nnz = h->non_zero_count[left_xy[LEFT(i)]];
                     +                nnz_cache[3 + 8 * 1 + 2 * 8 * i] = nnz[left_block[8 + 0 + 2 * i]];
                     +                nnz_cache[3 + 8 * 2 + 2 * 8 * i] = nnz[left_block[8 + 1 + 2 * i]];
                     +                if (CHROMA444) {
                     +                    nnz_cache[3 + 8 *  6 + 2 * 8 * i] = nnz[left_block[8 + 0 + 2 * i] + 4 * 4];
                     +                    nnz_cache[3 + 8 *  7 + 2 * 8 * i] = nnz[left_block[8 + 1 + 2 * i] + 4 * 4];
                     +                    nnz_cache[3 + 8 * 11 + 2 * 8 * i] = nnz[left_block[8 + 0 + 2 * i] + 8 * 4];
                     +                    nnz_cache[3 + 8 * 12 + 2 * 8 * i] = nnz[left_block[8 + 1 + 2 * i] + 8 * 4];
                     +                } else if (CHROMA422) {
                     +                    nnz_cache[3 + 8 *  6 + 2 * 8 * i] = nnz[left_block[8 + 0 + 2 * i] - 2 + 4 * 4];
                     +                    nnz_cache[3 + 8 *  7 + 2 * 8 * i] = nnz[left_block[8 + 1 + 2 * i] - 2 + 4 * 4];
                     +                    nnz_cache[3 + 8 * 11 + 2 * 8 * i] = nnz[left_block[8 + 0 + 2 * i] - 2 + 8 * 4];
                     +                    nnz_cache[3 + 8 * 12 + 2 * 8 * i] = nnz[left_block[8 + 1 + 2 * i] - 2 + 8 * 4];
                     +                } else {
                     +                    nnz_cache[3 + 8 *  6 + 8 * i] = nnz[left_block[8 + 4 + 2 * i]];
                     +                    nnz_cache[3 + 8 * 11 + 8 * i] = nnz[left_block[8 + 5 + 2 * i]];
                     +                }
                     +            } else {
                     +                nnz_cache[3 + 8 *  1 + 2 * 8 * i] =
                     +                nnz_cache[3 + 8 *  2 + 2 * 8 * i] =
                     +                nnz_cache[3 + 8 *  6 + 2 * 8 * i] =
                     +                nnz_cache[3 + 8 *  7 + 2 * 8 * i] =
                     +                nnz_cache[3 + 8 * 11 + 2 * 8 * i] =
                     +                nnz_cache[3 + 8 * 12 + 2 * 8 * i] = CABAC && !IS_INTRA(mb_type) ? 0 : 64;
+                                 }
                     -        }else{
                     -            nnz_cache[3+8* 1 + 2*8*i]=
                     -            nnz_cache[3+8* 2 + 2*8*i]=
                     -            nnz_cache[3+8* 6 + 2*8*i]=
                     -            nnz_cache[3+8* 7 + 2*8*i]=
                     -            nnz_cache[3+8*11 + 2*8*i]=
                     -            nnz_cache[3+8*12 + 2*8*i]= CABAC && !IS_INTRA(mb_type) ? 0 : 64;
+                             }
                     -    }
                     -    if( CABAC ) {
                     -        // top_cbp
                     -        if(top_type) {
                     -            h->top_cbp = h->cbp_table[top_xy];
                     -        } else {
                     -            h->top_cbp = IS_INTRA(mb_type) ? 0x7CF : 0x00F;
                     -        }
                     -        // left_cbp
                     -        if (left_type[LTOP]) {
                     -            h->left_cbp =   (h->cbp_table[left_xy[LTOP]] & 0x7F0)
                     -                        |  ((h->cbp_table[left_xy[LTOP]]>>(left_block[0]&(~1)))&2)
                     -                        | (((h->cbp_table[left_xy[LBOT]]>>(left_block[2]&(~1)))&2) << 2);
                     -        } else {
                     -            h->left_cbp = IS_INTRA(mb_type) ? 0x7CF : 0x00F;
                     +        if (CABAC) {
                     +            // top_cbp
                     +            if (top_type)
                     +                h->top_cbp = h->cbp_table[top_xy];
                     +            else
                     +                h->top_cbp = IS_INTRA(mb_type) ? 0x7CF : 0x00F;
                     +            // left_cbp
                     +            if (left_type[LTOP]) {
                     +                h->left_cbp =   (h->cbp_table[left_xy[LTOP]] & 0x7F0) |
                     +                               ((h->cbp_table[left_xy[LTOP]] >> (left_block[0] & (~1))) & 2) |
                     +                              (((h->cbp_table[left_xy[LBOT]] >> (left_block[2] & (~1))) & 2) << 2);
                     +            } else {
                     +                h->left_cbp = IS_INTRA(mb_type) ? 0x7CF : 0x00F;
                     +            }
+                             }
+                         }
                     -    }
                     -    if(IS_INTER(mb_type) || (IS_DIRECT(mb_type) && h->direct_spatial_mv_pred)){
                     +    if (IS_INTER(mb_type) || (IS_DIRECT(mb_type) && h->direct_spatial_mv_pred)) {
                              int list;
                              int b_stride = h->b_stride;
                     -        for(list=0; list<h->list_count; list++){
                     +        for (list = 0; list < h->list_count; list++) {
                                  int8_t *ref_cache = &h->ref_cache[list][scan8[0]];
                                  int8_t *ref       = s->current_picture.f.ref_index[list];
                     -            int16_t (*mv_cache)[2] = &h->mv_cache[list][scan8[0]];
                     -            int16_t (*mv)[2]       = s->current_picture.f.motion_val[list];
                     -            if(!USES_LIST(mb_type, list)){
                     +            int16_t(*mv_cache)[2] = &h->mv_cache[list][scan8[0]];
                     +            int16_t(*mv)[2]       = s->current_picture.f.motion_val[list];
                     +            if (!USES_LIST(mb_type, list))
                                      continue;
                     -            }
                                  assert(!(IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred));
                     -            if(USES_LIST(top_type, list)){
                     -                const int b_xy= h->mb2b_xy[top_xy] + 3*b_stride;
                     -                AV_COPY128(mv_cache[0 - 1*8], mv[b_xy + 0]);
                     -                ref_cache[0 - 1*8]=
                     -                ref_cache[1 - 1*8]= ref[4*top_xy + 2];
                     -                ref_cache[2 - 1*8]=
                     -                ref_cache[3 - 1*8]= ref[4*top_xy + 3];
                     -            }else{
                     -                AV_ZERO128(mv_cache[0 - 1*8]);
                     -                AV_WN32A(&ref_cache[0 - 1*8], ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101u);
                     +            if (USES_LIST(top_type, list)) {
                     +                const int b_xy = h->mb2b_xy[top_xy] + 3 * b_stride;
                     +                AV_COPY128(mv_cache[0 - 1 * 8], mv[b_xy + 0]);
                     +                ref_cache[0 - 1 * 8] =
                     +                ref_cache[1 - 1 * 8] = ref[4 * top_xy + 2];
                     +                ref_cache[2 - 1 * 8] =
                     +                ref_cache[3 - 1 * 8] = ref[4 * top_xy + 3];
                     +            } else {
                     +                AV_ZERO128(mv_cache[0 - 1 * 8]);
                     +                AV_WN32A(&ref_cache[0 - 1 * 8],
                     +                         ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE) & 0xFF) * 0x01010101u);
+                                 }
                     -            if(mb_type & (MB_TYPE_16x8|MB_TYPE_8x8)){
                     -            for(i=0; i<2; i++){
                     -                int cache_idx = -1 + i*2*8;
                     -                if(USES_LIST(left_type[LEFT(i)], list)){
                     -                    const int b_xy= h->mb2b_xy[left_xy[LEFT(i)]] + 3;
                     -                    const int b8_xy= 4*left_xy[LEFT(i)] + 1;
                     -                    AV_COPY32(mv_cache[cache_idx  ], mv[b_xy + b_stride*left_block[0+i*2]]);
                     -                    AV_COPY32(mv_cache[cache_idx+8], mv[b_xy + b_stride*left_block[1+i*2]]);
                     -                    ref_cache[cache_idx  ]= ref[b8_xy + (left_block[0+i*2]&~1)];
                     -                    ref_cache[cache_idx+8]= ref[b8_xy + (left_block[1+i*2]&~1)];
                     -                }else{
                     -                    AV_ZERO32(mv_cache[cache_idx  ]);
                     -                    AV_ZERO32(mv_cache[cache_idx+8]);
                     -                    ref_cache[cache_idx  ]=
                     -                    ref_cache[cache_idx+8]= (left_type[LEFT(i)]) ? LIST_NOT_USED : PART_NOT_AVAILABLE;
                     +            if (mb_type & (MB_TYPE_16x8 | MB_TYPE_8x8)) {
                     +                for (i = 0; i < 2; i++) {
                     +                    int cache_idx = -1 + i * 2 * 8;
                     +                    if (USES_LIST(left_type[LEFT(i)], list)) {
                     +                        const int b_xy  = h->mb2b_xy[left_xy[LEFT(i)]] + 3;
                     +                        const int b8_xy = 4 * left_xy[LEFT(i)] + 1;
                     +                        AV_COPY32(mv_cache[cache_idx],
                     +                                  mv[b_xy + b_stride * left_block[0 + i * 2]]);
                     +                        AV_COPY32(mv_cache[cache_idx + 8],
                     +                                  mv[b_xy + b_stride * left_block[1 + i * 2]]);
                     +                        ref_cache[cache_idx]     = ref[b8_xy + (left_block[0 + i * 2] & ~1)];
                     +                        ref_cache[cache_idx + 8] = ref[b8_xy + (left_block[1 + i * 2] & ~1)];
                     +                    } else {
                     +                        AV_ZERO32(mv_cache[cache_idx]);
                     +                        AV_ZERO32(mv_cache[cache_idx + 8]);
                     +                        ref_cache[cache_idx]     =
                     +                        ref_cache[cache_idx + 8] = (left_type[LEFT(i)]) ? LIST_NOT_USED
                     +                                                                        : PART_NOT_AVAILABLE;
                     +                    }
+                                     }
                     -            }
                     -            }else{
                     -                if(USES_LIST(left_type[LTOP], list)){
                     -                    const int b_xy= h->mb2b_xy[left_xy[LTOP]] + 3;
                     -                    const int b8_xy= 4*left_xy[LTOP] + 1;
                     -                    AV_COPY32(mv_cache[-1], mv[b_xy + b_stride*left_block[0]]);
                     -                    ref_cache[-1]= ref[b8_xy + (left_block[0]&~1)];
                     -                }else{
                     +            } else {
                     +                if (USES_LIST(left_type[LTOP], list)) {
                     +                    const int b_xy  = h->mb2b_xy[left_xy[LTOP]] + 3;
                     +                    const int b8_xy = 4 * left_xy[LTOP] + 1;
                     +                    AV_COPY32(mv_cache[-1], mv[b_xy + b_stride * left_block[0]]);
                     +                    ref_cache[-1] = ref[b8_xy + (left_block[0] & ~1)];
                     +                } else {
                                          AV_ZERO32(mv_cache[-1]);
                     -                    ref_cache[-1]= left_type[LTOP] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
                     +                    ref_cache[-1] = left_type[LTOP] ? LIST_NOT_USED
                     +                                                    : PART_NOT_AVAILABLE;
+                                     }
+                                 }
                     -            if(USES_LIST(topright_type, list)){
                     -                const int b_xy= h->mb2b_xy[topright_xy] + 3*b_stride;
                     -                AV_COPY32(mv_cache[4 - 1*8], mv[b_xy]);
                     -                ref_cache[4 - 1*8]= ref[4*topright_xy + 2];
                     -            }else{
                     -                AV_ZERO32(mv_cache[4 - 1*8]);
                     -                ref_cache[4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
                     +            if (USES_LIST(topright_type, list)) {
                     +                const int b_xy = h->mb2b_xy[topright_xy] + 3 * b_stride;
                     +                AV_COPY32(mv_cache[4 - 1 * 8], mv[b_xy]);
                     +                ref_cache[4 - 1 * 8] = ref[4 * topright_xy + 2];
                     +            } else {
                     +                AV_ZERO32(mv_cache[4 - 1 * 8]);
                     +                ref_cache[4 - 1 * 8] = topright_type ? LIST_NOT_USED
                     +                                                     : PART_NOT_AVAILABLE;
+                                 }
                                  if(ref_cache[2 - 1*8] < 0 || ref_cache[4 - 1*8] < 0){
                     -                if(USES_LIST(topleft_type, list)){
                     -                    const int b_xy = h->mb2b_xy[topleft_xy] + 3 + b_stride + (h->topleft_partition & 2*b_stride);
                     -                    const int b8_xy= 4*topleft_xy + 1 + (h->topleft_partition & 2);
                     -                    AV_COPY32(mv_cache[-1 - 1*8], mv[b_xy]);
                     -                    ref_cache[-1 - 1*8]= ref[b8_xy];
                     -                }else{
                     -                    AV_ZERO32(mv_cache[-1 - 1*8]);
                     -                    ref_cache[-1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
                     +                if (USES_LIST(topleft_type, list)) {
                     +                    const int b_xy  = h->mb2b_xy[topleft_xy] + 3 + b_stride +
                     +                                      (h->topleft_partition & 2 * b_stride);
                     +                    const int b8_xy = 4 * topleft_xy + 1 + (h->topleft_partition & 2);
                     +                    AV_COPY32(mv_cache[-1 - 1 * 8], mv[b_xy]);
                     +                    ref_cache[-1 - 1 * 8] = ref[b8_xy];
                     +                } else {
                     +                    AV_ZERO32(mv_cache[-1 - 1 * 8]);
                     +                    ref_cache[-1 - 1 * 8] = topleft_type ? LIST_NOT_USED
                     +                                                         : PART_NOT_AVAILABLE;
+                                     }
+                                 }
                     -            if((mb_type&(MB_TYPE_SKIP|MB_TYPE_DIRECT2)) && !FRAME_MBAFF)
                     +            if ((mb_type & (MB_TYPE_SKIP | MB_TYPE_DIRECT2)) && !FRAME_MBAFF)
                                      continue;
                     -            if(!(mb_type&(MB_TYPE_SKIP|MB_TYPE_DIRECT2))){
                     -                uint8_t (*mvd_cache)[2] = &h->mvd_cache[list][scan8[0]];
                     -                uint8_t (*mvd)[2] = h->mvd_table[list];
                     -                ref_cache[2+8*0] =
                     -                ref_cache[2+8*2] = PART_NOT_AVAILABLE;
                     -                AV_ZERO32(mv_cache[2+8*0]);
                     -                AV_ZERO32(mv_cache[2+8*2]);
+                    -
                     -                if( CABAC ) {
                     -                    if(USES_LIST(top_type, list)){
                     -                        const int b_xy= h->mb2br_xy[top_xy];
                     -                        AV_COPY64(mvd_cache[0 - 1*8], mvd[b_xy + 0]);
                     -                    }else{
                     -                        AV_ZERO64(mvd_cache[0 - 1*8]);
                     +            if (!(mb_type & (MB_TYPE_SKIP | MB_TYPE_DIRECT2))) {
                     +                uint8_t(*mvd_cache)[2]   = &h->mvd_cache[list][scan8[0]];
                     +                uint8_t(*mvd)[2]         = h->mvd_table[list];
                     +                ref_cache[2 + 8 * 0] =
                     +                ref_cache[2 + 8 * 2] = PART_NOT_AVAILABLE;
                     +                AV_ZERO32(mv_cache[2 + 8 * 0]);
                     +                AV_ZERO32(mv_cache[2 + 8 * 2]);
+                    +
                     +                if (CABAC) {
                     +                    if (USES_LIST(top_type, list)) {
                     +                        const int b_xy = h->mb2br_xy[top_xy];
                     +                        AV_COPY64(mvd_cache[0 - 1 * 8], mvd[b_xy + 0]);
                     +                    } else {
                     +                        AV_ZERO64(mvd_cache[0 - 1 * 8]);
+                                         }
                     -                    if(USES_LIST(left_type[LTOP], list)){
                     -                        const int b_xy= h->mb2br_xy[left_xy[LTOP]] + 6;
                     -                        AV_COPY16(mvd_cache[-1 + 0*8], mvd[b_xy - left_block[0]]);
                     -                        AV_COPY16(mvd_cache[-1 + 1*8], mvd[b_xy - left_block[1]]);
                     -                    }else{
                     -                        AV_ZERO16(mvd_cache[-1 + 0*8]);
                     -                        AV_ZERO16(mvd_cache[-1 + 1*8]);
                     +                    if (USES_LIST(left_type[LTOP], list)) {
                     +                        const int b_xy = h->mb2br_xy[left_xy[LTOP]] + 6;
                     +                        AV_COPY16(mvd_cache[-1 + 0 * 8], mvd[b_xy - left_block[0]]);
                     +                        AV_COPY16(mvd_cache[-1 + 1 * 8], mvd[b_xy - left_block[1]]);
                     +                    } else {
                     +                        AV_ZERO16(mvd_cache[-1 + 0 * 8]);
                     +                        AV_ZERO16(mvd_cache[-1 + 1 * 8]);
+                                         }
                     -                    if(USES_LIST(left_type[LBOT], list)){
                     -                        const int b_xy= h->mb2br_xy[left_xy[LBOT]] + 6;
                     -                        AV_COPY16(mvd_cache[-1 + 2*8], mvd[b_xy - left_block[2]]);
                     -                        AV_COPY16(mvd_cache[-1 + 3*8], mvd[b_xy - left_block[3]]);
                     -                    }else{
                     -                        AV_ZERO16(mvd_cache[-1 + 2*8]);
                     -                        AV_ZERO16(mvd_cache[-1 + 3*8]);
                     +                    if (USES_LIST(left_type[LBOT], list)) {
                     +                        const int b_xy = h->mb2br_xy[left_xy[LBOT]] + 6;
                     +                        AV_COPY16(mvd_cache[-1 + 2 * 8], mvd[b_xy - left_block[2]]);
                     +                        AV_COPY16(mvd_cache[-1 + 3 * 8], mvd[b_xy - left_block[3]]);
                     +                    } else {
                     +                        AV_ZERO16(mvd_cache[-1 + 2 * 8]);
                     +                        AV_ZERO16(mvd_cache[-1 + 3 * 8]);
+                                         }
                     -                    AV_ZERO16(mvd_cache[2+8*0]);
                     -                    AV_ZERO16(mvd_cache[2+8*2]);
                     -                    if(h->slice_type_nos == AV_PICTURE_TYPE_B){
                     +                    AV_ZERO16(mvd_cache[2 + 8 * 0]);
                     +                    AV_ZERO16(mvd_cache[2 + 8 * 2]);
                     +                    if (h->slice_type_nos == AV_PICTURE_TYPE_B) {
                                              uint8_t *direct_cache = &h->direct_cache[scan8[0]];
                                              uint8_t *direct_table = h->direct_table;
                     -                        fill_rectangle(direct_cache, 4, 4, 8, MB_TYPE_16x16>>1, 1);
+                    -
                     -                        if(IS_DIRECT(top_type)){
                     -                            AV_WN32A(&direct_cache[-1*8], 0x01010101u*(MB_TYPE_DIRECT2>>1));
                     -                        }else if(IS_8X8(top_type)){
                     -                            int b8_xy = 4*top_xy;
                     -                            direct_cache[0 - 1*8]= direct_table[b8_xy + 2];
                     -                            direct_cache[2 - 1*8]= direct_table[b8_xy + 3];
                     -                        }else{
                     -                            AV_WN32A(&direct_cache[-1*8], 0x01010101*(MB_TYPE_16x16>>1));
                     +                        fill_rectangle(direct_cache, 4, 4, 8, MB_TYPE_16x16 >> 1, 1);
+                    +
                     +                        if (IS_DIRECT(top_type)) {
                     +                            AV_WN32A(&direct_cache[-1 * 8],
                     +                                     0x01010101u * (MB_TYPE_DIRECT2 >> 1));
                     +                        } else if (IS_8X8(top_type)) {
                     +                            int b8_xy = 4 * top_xy;
                     +                            direct_cache[0 - 1 * 8] = direct_table[b8_xy + 2];
                     +                            direct_cache[2 - 1 * 8] = direct_table[b8_xy + 3];
                     +                        } else {
                     +                            AV_WN32A(&direct_cache[-1 * 8],
                     +                                     0x01010101 * (MB_TYPE_16x16 >> 1));
+                                             }
                     -                        if(IS_DIRECT(left_type[LTOP]))
                     -                            direct_cache[-1 + 0*8]= MB_TYPE_DIRECT2>>1;
                     -                        else if(IS_8X8(left_type[LTOP]))
                     -                            direct_cache[-1 + 0*8]= direct_table[4*left_xy[LTOP] + 1 + (left_block[0]&~1)];
                     +                        if (IS_DIRECT(left_type[LTOP]))
                     +                            direct_cache[-1 + 0 * 8] = MB_TYPE_DIRECT2 >> 1;
                     +                        else if (IS_8X8(left_type[LTOP]))
                     +                            direct_cache[-1 + 0 * 8] = direct_table[4 * left_xy[LTOP] + 1 + (left_block[0] & ~1)];
                                              else
                     -                            direct_cache[-1 + 0*8]= MB_TYPE_16x16>>1;
                     +                            direct_cache[-1 + 0 * 8] = MB_TYPE_16x16 >> 1;
                     -                        if(IS_DIRECT(left_type[LBOT]))
                     -                            direct_cache[-1 + 2*8]= MB_TYPE_DIRECT2>>1;
                     -                        else if(IS_8X8(left_type[LBOT]))
                     -                            direct_cache[-1 + 2*8]= direct_table[4*left_xy[LBOT] + 1 + (left_block[2]&~1)];
                     +                        if (IS_DIRECT(left_type[LBOT]))
                     +                            direct_cache[-1 + 2 * 8] = MB_TYPE_DIRECT2 >> 1;
                     +                        else if (IS_8X8(left_type[LBOT]))
                     +                            direct_cache[-1 + 2 * 8] = direct_table[4 * left_xy[LBOT] + 1 + (left_block[2] & ~1)];
                                              else
                     -                            direct_cache[-1 + 2*8]= MB_TYPE_16x16>>1;
                     +                            direct_cache[-1 + 2 * 8] = MB_TYPE_16x16 >> 1;
+                                         }
+                                     }
+                                 }
                     -            if(FRAME_MBAFF){
                     -#define MAP_MVS\
                     -                    MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
                     -                    MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
                     -                    MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
                     -                    MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
                     -                    MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
                     -                    MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
                     -                    MAP_F2F(scan8[0] - 1 + 0*8, left_type[LTOP])\
                     -                    MAP_F2F(scan8[0] - 1 + 1*8, left_type[LTOP])\
                     -                    MAP_F2F(scan8[0] - 1 + 2*8, left_type[LBOT])\
                     -                    MAP_F2F(scan8[0] - 1 + 3*8, left_type[LBOT])
                     -                if(MB_FIELD){
                     -#define MAP_F2F(idx, mb_type)\
                     -                    if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
                     -                        h->ref_cache[list][idx] <<= 1;\
                     -                        h->mv_cache[list][idx][1] /= 2;\
                     -                        h->mvd_cache[list][idx][1] >>=1;\
                     -                    }
+                    +
                     +#define MAP_MVS                                                         \
                     +    MAP_F2F(scan8[0] - 1 - 1 * 8, topleft_type)                         \
                     +    MAP_F2F(scan8[0] + 0 - 1 * 8, top_type)                             \
                     +    MAP_F2F(scan8[0] + 1 - 1 * 8, top_type)                             \
                     +    MAP_F2F(scan8[0] + 2 - 1 * 8, top_type)                             \
                     +    MAP_F2F(scan8[0] + 3 - 1 * 8, top_type)                             \
                     +    MAP_F2F(scan8[0] + 4 - 1 * 8, topright_type)                        \
                     +    MAP_F2F(scan8[0] - 1 + 0 * 8, left_type[LTOP])                      \
                     +    MAP_F2F(scan8[0] - 1 + 1 * 8, left_type[LTOP])                      \
                     +    MAP_F2F(scan8[0] - 1 + 2 * 8, left_type[LBOT])                      \
                     +    MAP_F2F(scan8[0] - 1 + 3 * 8, left_type[LBOT])
+                    +
                     +            if (FRAME_MBAFF) {
                     +                if (MB_FIELD) {
+                    +
                     +#define MAP_F2F(idx, mb_type)                                           \
                     +    if (!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0) {      \
                     +        h->ref_cache[list][idx]    <<= 1;                               \
                     +        h->mv_cache[list][idx][1]   /= 2;                               \
                     +        h->mvd_cache[list][idx][1] >>= 1;                               \
                     +    }
+                    +
                                          MAP_MVS
                     +                } else {
+                    +
                      #undef MAP_F2F
                     -                }else{
                     -#define MAP_F2F(idx, mb_type)\
                     -                    if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
                     -                        h->ref_cache[list][idx] >>= 1;\
                     -                        h->mv_cache[list][idx][1] <<= 1;\
                     -                        h->mvd_cache[list][idx][1] <<= 1;\
                     -                    }
                     +#define MAP_F2F(idx, mb_type)                                           \
                     +    if (IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0) {       \
                     +        h->ref_cache[list][idx]    >>= 1;                               \
                     +        h->mv_cache[list][idx][1]  <<= 1;                               \
                     +        h->mvd_cache[list][idx][1] <<= 1;                               \
                     +    }
+                    +
                                          MAP_MVS
                      #undef MAP_F2F
+                                     }
@@ -747,36 +792,34 @@ static void fill_decode_caches(H264Context *h, int mb_type){
+                             }
+                         }
                     -        h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[LTOP]);
                     +    h->neighbor_transform_size = !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[LTOP]);
+                     }
                      /**
                       * decodes a P_SKIP or B_SKIP macroblock
                       */
                     -static void av_unused decode_mb_skip(H264Context *h){
                     -    MpegEncContext * const s = &h->s;
                     -    const int mb_xy= h->mb_xy;
                     -    int mb_type=0;
                     +static void av_unused decode_mb_skip(H264Context *h)
                     +{
                     +    MpegEncContext *const s = &h->s;
                     +    const int mb_xy = h->mb_xy;
                     +    int mb_type     = 0;
                          memset(h->non_zero_count[mb_xy], 0, 48);
                     -    if(MB_FIELD)
                     -        mb_type|= MB_TYPE_INTERLACED;
                     +    if (MB_FIELD)
                     +        mb_type |= MB_TYPE_INTERLACED;
                     -    if( h->slice_type_nos == AV_PICTURE_TYPE_B )
                     -    {
                     +    if (h->slice_type_nos == AV_PICTURE_TYPE_B) {
                              // just for fill_caches. pred_direct_motion will set the real mb_type
                     -        mb_type|= MB_TYPE_L0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
                     -        if(h->direct_spatial_mv_pred){
                     +        mb_type |= MB_TYPE_L0L1 | MB_TYPE_DIRECT2 | MB_TYPE_SKIP;
                     +        if (h->direct_spatial_mv_pred) {
                                  fill_decode_neighbors(h, mb_type);
                     -        fill_decode_caches(h, mb_type); //FIXME check what is needed and what not ...
                     +            fill_decode_caches(h, mb_type); //FIXME check what is needed and what not ...
+                             }
                              ff_h264_pred_direct_motion(h, &mb_type);
                     -        mb_type|= MB_TYPE_SKIP;
                     -    }
                     -    else
                     -    {
                     -        mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
                     +        mb_type |= MB_TYPE_SKIP;
                     +    } else {
                     +        mb_type |= MB_TYPE_16x16 | MB_TYPE_P0L0 | MB_TYPE_P1L0 | MB_TYPE_SKIP;
                              fill_decode_neighbors(h, mb_type);
                              pred_pskip_motion(h);
@@ -785,8 +828,8 @@ static void av_unused decode_mb_skip(H264Context *h){
                          write_back_motion(h, mb_type);
                          s->current_picture.f.mb_type[mb_xy]      = mb_type;
                          s->current_picture.f.qscale_table[mb_xy] = s->qscale;
                     -    h->slice_table[ mb_xy ]= h->slice_num;
                     -    h->prev_mb_skipped= 1;
                     +    h->slice_table[mb_xy]                    = h->slice_num;
                     +    h->prev_mb_skipped                       = 1;
+                     }
                      #endif /* AVCODEC_H264_MVPRED_H */

@@ -30,240 +30,243 @@
                      #define AVCODEC_H264DATA_H
                      #include <stdint.h>
+                    +
                      #include "libavutil/rational.h"
                      #include "mpegvideo.h"
                      #include "h264.h"
                     +static const uint8_t golomb_to_pict_type[5] = {
                     +    AV_PICTURE_TYPE_P, AV_PICTURE_TYPE_B, AV_PICTURE_TYPE_I,
                     +    AV_PICTURE_TYPE_SP, AV_PICTURE_TYPE_SI
                     +};
                     -static const uint8_t golomb_to_pict_type[5]=
                     -{AV_PICTURE_TYPE_P, AV_PICTURE_TYPE_B, AV_PICTURE_TYPE_I, AV_PICTURE_TYPE_SP, AV_PICTURE_TYPE_SI};
+                    -
                     -static const uint8_t golomb_to_intra4x4_cbp[48]={
                     - 47, 31, 15,  0, 23, 27, 29, 30,  7, 11, 13, 14, 39, 43, 45, 46,
                     - 16,  3,  5, 10, 12, 19, 21, 26, 28, 35, 37, 42, 44,  1,  2,  4,
                     -  8, 17, 18, 20, 24,  6,  9, 22, 25, 32, 33, 34, 36, 40, 38, 41
                     +static const uint8_t golomb_to_intra4x4_cbp[48] = {
                     +    47, 31, 15, 0,  23, 27, 29, 30, 7,  11, 13, 14, 39, 43, 45, 46,
                     +    16, 3,  5,  10, 12, 19, 21, 26, 28, 35, 37, 42, 44, 1,  2,  4,
                     +    8,  17, 18, 20, 24, 6,  9,  22, 25, 32, 33, 34, 36, 40, 38, 41
                      };
                     -static const uint8_t golomb_to_inter_cbp[48]={
                     -  0, 16,  1,  2,  4,  8, 32,  3,  5, 10, 12, 15, 47,  7, 11, 13,
                     - 14,  6,  9, 31, 35, 37, 42, 44, 33, 34, 36, 40, 39, 43, 45, 46,
                     - 17, 18, 20, 24, 19, 21, 26, 28, 23, 27, 29, 30, 22, 25, 38, 41
                     +static const uint8_t golomb_to_inter_cbp[48] = {
                     +    0,  16, 1,  2,  4,  8,  32, 3,  5,  10, 12, 15, 47, 7,  11, 13,
                     +    14, 6,  9,  31, 35, 37, 42, 44, 33, 34, 36, 40, 39, 43, 45, 46,
                     +    17, 18, 20, 24, 19, 21, 26, 28, 23, 27, 29, 30, 22, 25, 38, 41
                      };
                     -static const uint8_t zigzag_scan[16]={
                     - 0+0*4, 1+0*4, 0+1*4, 0+2*4,
                     - 1+1*4, 2+0*4, 3+0*4, 2+1*4,
                     - 1+2*4, 0+3*4, 1+3*4, 2+2*4,
                     - 3+1*4, 3+2*4, 2+3*4, 3+3*4,
                     +static const uint8_t zigzag_scan[16] = {
                     +    0 + 0 * 4, 1 + 0 * 4, 0 + 1 * 4, 0 + 2 * 4,
                     +    1 + 1 * 4, 2 + 0 * 4, 3 + 0 * 4, 2 + 1 * 4,
                     +    1 + 2 * 4, 0 + 3 * 4, 1 + 3 * 4, 2 + 2 * 4,
                     +    3 + 1 * 4, 3 + 2 * 4, 2 + 3 * 4, 3 + 3 * 4,
                      };
                     -static const uint8_t field_scan[16]={
                     - 0+0*4, 0+1*4, 1+0*4, 0+2*4,
                     - 0+3*4, 1+1*4, 1+2*4, 1+3*4,
                     - 2+0*4, 2+1*4, 2+2*4, 2+3*4,
                     - 3+0*4, 3+1*4, 3+2*4, 3+3*4,
                     +static const uint8_t field_scan[16] = {
                     +    0 + 0 * 4, 0 + 1 * 4, 1 + 0 * 4, 0 + 2 * 4,
                     +    0 + 3 * 4, 1 + 1 * 4, 1 + 2 * 4, 1 + 3 * 4,
                     +    2 + 0 * 4, 2 + 1 * 4, 2 + 2 * 4, 2 + 3 * 4,
                     +    3 + 0 * 4, 3 + 1 * 4, 3 + 2 * 4, 3 + 3 * 4,
                      };
                     -static const uint8_t luma_dc_zigzag_scan[16]={
                     - 0*16 + 0*64, 1*16 + 0*64, 2*16 + 0*64, 0*16 + 2*64,
                     - 3*16 + 0*64, 0*16 + 1*64, 1*16 + 1*64, 2*16 + 1*64,
                     - 1*16 + 2*64, 2*16 + 2*64, 3*16 + 2*64, 0*16 + 3*64,
                     - 3*16 + 1*64, 1*16 + 3*64, 2*16 + 3*64, 3*16 + 3*64,
                     +static const uint8_t luma_dc_zigzag_scan[16] = {
                     +    0 * 16 + 0 * 64, 1 * 16 + 0 * 64, 2 * 16 + 0 * 64, 0 * 16 + 2 * 64,
                     +    3 * 16 + 0 * 64, 0 * 16 + 1 * 64, 1 * 16 + 1 * 64, 2 * 16 + 1 * 64,
                     +    1 * 16 + 2 * 64, 2 * 16 + 2 * 64, 3 * 16 + 2 * 64, 0 * 16 + 3 * 64,
                     +    3 * 16 + 1 * 64, 1 * 16 + 3 * 64, 2 * 16 + 3 * 64, 3 * 16 + 3 * 64,
                      };
                     -static const uint8_t luma_dc_field_scan[16]={
                     - 0*16 + 0*64, 2*16 + 0*64, 1*16 + 0*64, 0*16 + 2*64,
                     - 2*16 + 2*64, 3*16 + 0*64, 1*16 + 2*64, 3*16 + 2*64,
                     - 0*16 + 1*64, 2*16 + 1*64, 0*16 + 3*64, 2*16 + 3*64,
                     - 1*16 + 1*64, 3*16 + 1*64, 1*16 + 3*64, 3*16 + 3*64,
                     +static const uint8_t luma_dc_field_scan[16] = {
                     +    0 * 16 + 0 * 64, 2 * 16 + 0 * 64, 1 * 16 + 0 * 64, 0 * 16 + 2 * 64,
                     +    2 * 16 + 2 * 64, 3 * 16 + 0 * 64, 1 * 16 + 2 * 64, 3 * 16 + 2 * 64,
                     +    0 * 16 + 1 * 64, 2 * 16 + 1 * 64, 0 * 16 + 3 * 64, 2 * 16 + 3 * 64,
                     +    1 * 16 + 1 * 64, 3 * 16 + 1 * 64, 1 * 16 + 3 * 64, 3 * 16 + 3 * 64,
                      };
                     -static const uint8_t chroma_dc_scan[4]={
                     - (0+0*2)*16, (1+0*2)*16,
                     - (0+1*2)*16, (1+1*2)*16,
                     +static const uint8_t chroma_dc_scan[4] = {
                     +    (0 + 0 * 2) * 16, (1 + 0 * 2) * 16,
                     +    (0 + 1 * 2) * 16, (1 + 1 * 2) * 16,
                      };
                     -static const uint8_t chroma422_dc_scan[8]={
                     - (0+0*2)*16, (0+1*2)*16,
                     - (1+0*2)*16, (0+2*2)*16,
                     - (0+3*2)*16, (1+1*2)*16,
                     - (1+2*2)*16, (1+3*2)*16,
                     +static const uint8_t chroma422_dc_scan[8] = {
                     +    (0 + 0 * 2) * 16, (0 + 1 * 2) * 16,
                     +    (1 + 0 * 2) * 16, (0 + 2 * 2) * 16,
                     +    (0 + 3 * 2) * 16, (1 + 1 * 2) * 16,
                     +    (1 + 2 * 2) * 16, (1 + 3 * 2) * 16,
                      };
                      // zigzag_scan8x8_cavlc[i] = zigzag_scan8x8[(i/4) + 16*(i%4)]
                     -static const uint8_t zigzag_scan8x8_cavlc[64]={
                     - 0+0*8, 1+1*8, 1+2*8, 2+2*8,
                     - 4+1*8, 0+5*8, 3+3*8, 7+0*8,
                     - 3+4*8, 1+7*8, 5+3*8, 6+3*8,
                     - 2+7*8, 6+4*8, 5+6*8, 7+5*8,
                     - 1+0*8, 2+0*8, 0+3*8, 3+1*8,
                     - 3+2*8, 0+6*8, 4+2*8, 6+1*8,
                     - 2+5*8, 2+6*8, 6+2*8, 5+4*8,
                     - 3+7*8, 7+3*8, 4+7*8, 7+6*8,
                     - 0+1*8, 3+0*8, 0+4*8, 4+0*8,
                     - 2+3*8, 1+5*8, 5+1*8, 5+2*8,
                     - 1+6*8, 3+5*8, 7+1*8, 4+5*8,
                     - 4+6*8, 7+4*8, 5+7*8, 6+7*8,
                     - 0+2*8, 2+1*8, 1+3*8, 5+0*8,
                     - 1+4*8, 2+4*8, 6+0*8, 4+3*8,
                     - 0+7*8, 4+4*8, 7+2*8, 3+6*8,
                     - 5+5*8, 6+5*8, 6+6*8, 7+7*8,
                     +static const uint8_t zigzag_scan8x8_cavlc[64] = {
                     +    0 + 0 * 8, 1 + 1 * 8, 1 + 2 * 8, 2 + 2 * 8,
                     +    4 + 1 * 8, 0 + 5 * 8, 3 + 3 * 8, 7 + 0 * 8,
                     +    3 + 4 * 8, 1 + 7 * 8, 5 + 3 * 8, 6 + 3 * 8,
                     +    2 + 7 * 8, 6 + 4 * 8, 5 + 6 * 8, 7 + 5 * 8,
                     +    1 + 0 * 8, 2 + 0 * 8, 0 + 3 * 8, 3 + 1 * 8,
                     +    3 + 2 * 8, 0 + 6 * 8, 4 + 2 * 8, 6 + 1 * 8,
                     +    2 + 5 * 8, 2 + 6 * 8, 6 + 2 * 8, 5 + 4 * 8,
                     +    3 + 7 * 8, 7 + 3 * 8, 4 + 7 * 8, 7 + 6 * 8,
                     +    0 + 1 * 8, 3 + 0 * 8, 0 + 4 * 8, 4 + 0 * 8,
                     +    2 + 3 * 8, 1 + 5 * 8, 5 + 1 * 8, 5 + 2 * 8,
                     +    1 + 6 * 8, 3 + 5 * 8, 7 + 1 * 8, 4 + 5 * 8,
                     +    4 + 6 * 8, 7 + 4 * 8, 5 + 7 * 8, 6 + 7 * 8,
                     +    0 + 2 * 8, 2 + 1 * 8, 1 + 3 * 8, 5 + 0 * 8,
                     +    1 + 4 * 8, 2 + 4 * 8, 6 + 0 * 8, 4 + 3 * 8,
                     +    0 + 7 * 8, 4 + 4 * 8, 7 + 2 * 8, 3 + 6 * 8,
                     +    5 + 5 * 8, 6 + 5 * 8, 6 + 6 * 8, 7 + 7 * 8,
                      };
                     -static const uint8_t field_scan8x8[64]={
                     - 0+0*8, 0+1*8, 0+2*8, 1+0*8,
                     - 1+1*8, 0+3*8, 0+4*8, 1+2*8,
                     - 2+0*8, 1+3*8, 0+5*8, 0+6*8,
                     - 0+7*8, 1+4*8, 2+1*8, 3+0*8,
                     - 2+2*8, 1+5*8, 1+6*8, 1+7*8,
                     - 2+3*8, 3+1*8, 4+0*8, 3+2*8,
                     - 2+4*8, 2+5*8, 2+6*8, 2+7*8,
                     - 3+3*8, 4+1*8, 5+0*8, 4+2*8,
                     - 3+4*8, 3+5*8, 3+6*8, 3+7*8,
                     - 4+3*8, 5+1*8, 6+0*8, 5+2*8,
                     - 4+4*8, 4+5*8, 4+6*8, 4+7*8,
                     - 5+3*8, 6+1*8, 6+2*8, 5+4*8,
                     - 5+5*8, 5+6*8, 5+7*8, 6+3*8,
                     - 7+0*8, 7+1*8, 6+4*8, 6+5*8,
                     - 6+6*8, 6+7*8, 7+2*8, 7+3*8,
                     - 7+4*8, 7+5*8, 7+6*8, 7+7*8,
                     +static const uint8_t field_scan8x8[64] = {
                     +    0 + 0 * 8, 0 + 1 * 8, 0 + 2 * 8, 1 + 0 * 8,
                     +    1 + 1 * 8, 0 + 3 * 8, 0 + 4 * 8, 1 + 2 * 8,
                     +    2 + 0 * 8, 1 + 3 * 8, 0 + 5 * 8, 0 + 6 * 8,
                     +    0 + 7 * 8, 1 + 4 * 8, 2 + 1 * 8, 3 + 0 * 8,
                     +    2 + 2 * 8, 1 + 5 * 8, 1 + 6 * 8, 1 + 7 * 8,
                     +    2 + 3 * 8, 3 + 1 * 8, 4 + 0 * 8, 3 + 2 * 8,
                     +    2 + 4 * 8, 2 + 5 * 8, 2 + 6 * 8, 2 + 7 * 8,
                     +    3 + 3 * 8, 4 + 1 * 8, 5 + 0 * 8, 4 + 2 * 8,
                     +    3 + 4 * 8, 3 + 5 * 8, 3 + 6 * 8, 3 + 7 * 8,
                     +    4 + 3 * 8, 5 + 1 * 8, 6 + 0 * 8, 5 + 2 * 8,
                     +    4 + 4 * 8, 4 + 5 * 8, 4 + 6 * 8, 4 + 7 * 8,
                     +    5 + 3 * 8, 6 + 1 * 8, 6 + 2 * 8, 5 + 4 * 8,
                     +    5 + 5 * 8, 5 + 6 * 8, 5 + 7 * 8, 6 + 3 * 8,
                     +    7 + 0 * 8, 7 + 1 * 8, 6 + 4 * 8, 6 + 5 * 8,
                     +    6 + 6 * 8, 6 + 7 * 8, 7 + 2 * 8, 7 + 3 * 8,
                     +    7 + 4 * 8, 7 + 5 * 8, 7 + 6 * 8, 7 + 7 * 8,
                      };
                     -static const uint8_t field_scan8x8_cavlc[64]={
                     - 0+0*8, 1+1*8, 2+0*8, 0+7*8,
                     - 2+2*8, 2+3*8, 2+4*8, 3+3*8,
                     - 3+4*8, 4+3*8, 4+4*8, 5+3*8,
                     - 5+5*8, 7+0*8, 6+6*8, 7+4*8,
                     - 0+1*8, 0+3*8, 1+3*8, 1+4*8,
                     - 1+5*8, 3+1*8, 2+5*8, 4+1*8,
                     - 3+5*8, 5+1*8, 4+5*8, 6+1*8,
                     - 5+6*8, 7+1*8, 6+7*8, 7+5*8,
                     - 0+2*8, 0+4*8, 0+5*8, 2+1*8,
                     - 1+6*8, 4+0*8, 2+6*8, 5+0*8,
                     - 3+6*8, 6+0*8, 4+6*8, 6+2*8,
                     - 5+7*8, 6+4*8, 7+2*8, 7+6*8,
                     - 1+0*8, 1+2*8, 0+6*8, 3+0*8,
                     - 1+7*8, 3+2*8, 2+7*8, 4+2*8,
                     - 3+7*8, 5+2*8, 4+7*8, 5+4*8,
                     - 6+3*8, 6+5*8, 7+3*8, 7+7*8,
                     +static const uint8_t field_scan8x8_cavlc[64] = {
                     +    0 + 0 * 8, 1 + 1 * 8, 2 + 0 * 8, 0 + 7 * 8,
                     +    2 + 2 * 8, 2 + 3 * 8, 2 + 4 * 8, 3 + 3 * 8,
                     +    3 + 4 * 8, 4 + 3 * 8, 4 + 4 * 8, 5 + 3 * 8,
                     +    5 + 5 * 8, 7 + 0 * 8, 6 + 6 * 8, 7 + 4 * 8,
                     +    0 + 1 * 8, 0 + 3 * 8, 1 + 3 * 8, 1 + 4 * 8,
                     +    1 + 5 * 8, 3 + 1 * 8, 2 + 5 * 8, 4 + 1 * 8,
                     +    3 + 5 * 8, 5 + 1 * 8, 4 + 5 * 8, 6 + 1 * 8,
                     +    5 + 6 * 8, 7 + 1 * 8, 6 + 7 * 8, 7 + 5 * 8,
                     +    0 + 2 * 8, 0 + 4 * 8, 0 + 5 * 8, 2 + 1 * 8,
                     +    1 + 6 * 8, 4 + 0 * 8, 2 + 6 * 8, 5 + 0 * 8,
                     +    3 + 6 * 8, 6 + 0 * 8, 4 + 6 * 8, 6 + 2 * 8,
                     +    5 + 7 * 8, 6 + 4 * 8, 7 + 2 * 8, 7 + 6 * 8,
                     +    1 + 0 * 8, 1 + 2 * 8, 0 + 6 * 8, 3 + 0 * 8,
                     +    1 + 7 * 8, 3 + 2 * 8, 2 + 7 * 8, 4 + 2 * 8,
                     +    3 + 7 * 8, 5 + 2 * 8, 4 + 7 * 8, 5 + 4 * 8,
                     +    6 + 3 * 8, 6 + 5 * 8, 7 + 3 * 8, 7 + 7 * 8,
                      };
                     -typedef struct IMbInfo{
                     +typedef struct IMbInfo {
                          uint16_t type;
                          uint8_t pred_mode;
                          uint8_t cbp;
                      } IMbInfo;
                     -static const IMbInfo i_mb_type_info[26]={
                     -{MB_TYPE_INTRA4x4  , -1, -1},
                     -{MB_TYPE_INTRA16x16,  2,  0},
                     -{MB_TYPE_INTRA16x16,  1,  0},
                     -{MB_TYPE_INTRA16x16,  0,  0},
                     -{MB_TYPE_INTRA16x16,  3,  0},
                     -{MB_TYPE_INTRA16x16,  2,  16},
                     -{MB_TYPE_INTRA16x16,  1,  16},
                     -{MB_TYPE_INTRA16x16,  0,  16},
                     -{MB_TYPE_INTRA16x16,  3,  16},
                     -{MB_TYPE_INTRA16x16,  2,  32},
                     -{MB_TYPE_INTRA16x16,  1,  32},
                     -{MB_TYPE_INTRA16x16,  0,  32},
                     -{MB_TYPE_INTRA16x16,  3,  32},
                     -{MB_TYPE_INTRA16x16,  2,  15+0},
                     -{MB_TYPE_INTRA16x16,  1,  15+0},
                     -{MB_TYPE_INTRA16x16,  0,  15+0},
                     -{MB_TYPE_INTRA16x16,  3,  15+0},
                     -{MB_TYPE_INTRA16x16,  2,  15+16},
                     -{MB_TYPE_INTRA16x16,  1,  15+16},
                     -{MB_TYPE_INTRA16x16,  0,  15+16},
                     -{MB_TYPE_INTRA16x16,  3,  15+16},
                     -{MB_TYPE_INTRA16x16,  2,  15+32},
                     -{MB_TYPE_INTRA16x16,  1,  15+32},
                     -{MB_TYPE_INTRA16x16,  0,  15+32},
                     -{MB_TYPE_INTRA16x16,  3,  15+32},
                     -{MB_TYPE_INTRA_PCM , -1, -1},
                     +static const IMbInfo i_mb_type_info[26] = {
                     +    { MB_TYPE_INTRA4x4,  -1,  -1 },
                     +    { MB_TYPE_INTRA16x16, 2,   0 },
                     +    { MB_TYPE_INTRA16x16, 1,   0 },
                     +    { MB_TYPE_INTRA16x16, 0,   0 },
                     +    { MB_TYPE_INTRA16x16, 3,   0 },
                     +    { MB_TYPE_INTRA16x16, 2,  16 },
                     +    { MB_TYPE_INTRA16x16, 1,  16 },
                     +    { MB_TYPE_INTRA16x16, 0,  16 },
                     +    { MB_TYPE_INTRA16x16, 3,  16 },
                     +    { MB_TYPE_INTRA16x16, 2,  32 },
                     +    { MB_TYPE_INTRA16x16, 1,  32 },
                     +    { MB_TYPE_INTRA16x16, 0,  32 },
                     +    { MB_TYPE_INTRA16x16, 3,  32 },
                     +    { MB_TYPE_INTRA16x16, 2,  15 +  0 },
                     +    { MB_TYPE_INTRA16x16, 1,  15 +  0 },
                     +    { MB_TYPE_INTRA16x16, 0,  15 +  0 },
                     +    { MB_TYPE_INTRA16x16, 3,  15 +  0 },
                     +    { MB_TYPE_INTRA16x16, 2,  15 + 16 },
                     +    { MB_TYPE_INTRA16x16, 1,  15 + 16 },
                     +    { MB_TYPE_INTRA16x16, 0,  15 + 16 },
                     +    { MB_TYPE_INTRA16x16, 3,  15 + 16 },
                     +    { MB_TYPE_INTRA16x16, 2,  15 + 32 },
                     +    { MB_TYPE_INTRA16x16, 1,  15 + 32 },
                     +    { MB_TYPE_INTRA16x16, 0,  15 + 32 },
                     +    { MB_TYPE_INTRA16x16, 3,  15 + 32 },
                     +    { MB_TYPE_INTRA_PCM,  -1, -1 },
                      };
                     -typedef struct PMbInfo{
                     +typedef struct PMbInfo {
                          uint16_t type;
                          uint8_t partition_count;
                      } PMbInfo;
                     -static const PMbInfo p_mb_type_info[5]={
                     -{MB_TYPE_16x16|MB_TYPE_P0L0             , 1},
                     -{MB_TYPE_16x8 |MB_TYPE_P0L0|MB_TYPE_P1L0, 2},
                     -{MB_TYPE_8x16 |MB_TYPE_P0L0|MB_TYPE_P1L0, 2},
                     -{MB_TYPE_8x8  |MB_TYPE_P0L0|MB_TYPE_P1L0, 4},
                     -{MB_TYPE_8x8  |MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_REF0, 4},
                     +static const PMbInfo p_mb_type_info[5] = {
                     +    { MB_TYPE_16x16 | MB_TYPE_P0L0,                               1 },
                     +    { MB_TYPE_16x8  | MB_TYPE_P0L0 | MB_TYPE_P1L0,                2 },
                     +    { MB_TYPE_8x16  | MB_TYPE_P0L0 | MB_TYPE_P1L0,                2 },
                     +    { MB_TYPE_8x8   | MB_TYPE_P0L0 | MB_TYPE_P1L0,                4 },
                     +    { MB_TYPE_8x8   | MB_TYPE_P0L0 | MB_TYPE_P1L0 | MB_TYPE_REF0, 4 },
                      };
                     -static const PMbInfo p_sub_mb_type_info[4]={
                     -{MB_TYPE_16x16|MB_TYPE_P0L0             , 1},
                     -{MB_TYPE_16x8 |MB_TYPE_P0L0             , 2},
                     -{MB_TYPE_8x16 |MB_TYPE_P0L0             , 2},
                     -{MB_TYPE_8x8  |MB_TYPE_P0L0             , 4},
                     +static const PMbInfo p_sub_mb_type_info[4] = {
                     +    { MB_TYPE_16x16 | MB_TYPE_P0L0, 1 },
                     +    { MB_TYPE_16x8  | MB_TYPE_P0L0, 2 },
                     +    { MB_TYPE_8x16  | MB_TYPE_P0L0, 2 },
                     +    { MB_TYPE_8x8   | MB_TYPE_P0L0, 4 },
                      };
                     -static const PMbInfo b_mb_type_info[23]={
                     -{MB_TYPE_DIRECT2|MB_TYPE_L0L1                                      , 1, },
                     -{MB_TYPE_16x16|MB_TYPE_P0L0                                       , 1, },
                     -{MB_TYPE_16x16             |MB_TYPE_P0L1                          , 1, },
                     -{MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1                          , 1, },
                     -{MB_TYPE_16x8 |MB_TYPE_P0L0             |MB_TYPE_P1L0             , 2, },
                     -{MB_TYPE_8x16 |MB_TYPE_P0L0             |MB_TYPE_P1L0             , 2, },
                     -{MB_TYPE_16x8              |MB_TYPE_P0L1             |MB_TYPE_P1L1, 2, },
                     -{MB_TYPE_8x16              |MB_TYPE_P0L1             |MB_TYPE_P1L1, 2, },
                     -{MB_TYPE_16x8 |MB_TYPE_P0L0                          |MB_TYPE_P1L1, 2, },
                     -{MB_TYPE_8x16 |MB_TYPE_P0L0                          |MB_TYPE_P1L1, 2, },
                     -{MB_TYPE_16x8              |MB_TYPE_P0L1|MB_TYPE_P1L0             , 2, },
                     -{MB_TYPE_8x16              |MB_TYPE_P0L1|MB_TYPE_P1L0             , 2, },
                     -{MB_TYPE_16x8 |MB_TYPE_P0L0             |MB_TYPE_P1L0|MB_TYPE_P1L1, 2, },
                     -{MB_TYPE_8x16 |MB_TYPE_P0L0             |MB_TYPE_P1L0|MB_TYPE_P1L1, 2, },
                     -{MB_TYPE_16x8              |MB_TYPE_P0L1|MB_TYPE_P1L0|MB_TYPE_P1L1, 2, },
                     -{MB_TYPE_8x16              |MB_TYPE_P0L1|MB_TYPE_P1L0|MB_TYPE_P1L1, 2, },
                     -{MB_TYPE_16x8 |MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_P1L0             , 2, },
                     -{MB_TYPE_8x16 |MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_P1L0             , 2, },
                     -{MB_TYPE_16x8 |MB_TYPE_P0L0|MB_TYPE_P0L1             |MB_TYPE_P1L1, 2, },
                     -{MB_TYPE_8x16 |MB_TYPE_P0L0|MB_TYPE_P0L1             |MB_TYPE_P1L1, 2, },
                     -{MB_TYPE_16x8 |MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_P1L0|MB_TYPE_P1L1, 2, },
                     -{MB_TYPE_8x16 |MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_P1L0|MB_TYPE_P1L1, 2, },
                     -{MB_TYPE_8x8  |MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_P1L0|MB_TYPE_P1L1, 4, },
                     +static const PMbInfo b_mb_type_info[23] = {
                     +    { MB_TYPE_DIRECT2 | MB_TYPE_L0L1,                                              1, },
                     +    { MB_TYPE_16x16   | MB_TYPE_P0L0,                                              1, },
                     +    { MB_TYPE_16x16   | MB_TYPE_P0L1,                                              1, },
                     +    { MB_TYPE_16x16   | MB_TYPE_P0L0 | MB_TYPE_P0L1,                               1, },
                     +    { MB_TYPE_16x8    | MB_TYPE_P0L0 | MB_TYPE_P1L0,                               2, },
                     +    { MB_TYPE_8x16    | MB_TYPE_P0L0 | MB_TYPE_P1L0,                               2, },
                     +    { MB_TYPE_16x8    | MB_TYPE_P0L1 | MB_TYPE_P1L1,                               2, },
                     +    { MB_TYPE_8x16    | MB_TYPE_P0L1 | MB_TYPE_P1L1,                               2, },
                     +    { MB_TYPE_16x8    | MB_TYPE_P0L0 | MB_TYPE_P1L1,                               2, },
                     +    { MB_TYPE_8x16    | MB_TYPE_P0L0 | MB_TYPE_P1L1,                               2, },
                     +    { MB_TYPE_16x8    | MB_TYPE_P0L1 | MB_TYPE_P1L0,                               2, },
                     +    { MB_TYPE_8x16    | MB_TYPE_P0L1 | MB_TYPE_P1L0,                               2, },
                     +    { MB_TYPE_16x8    | MB_TYPE_P0L0 | MB_TYPE_P1L0 | MB_TYPE_P1L1,                2, },
                     +    { MB_TYPE_8x16    | MB_TYPE_P0L0 | MB_TYPE_P1L0 | MB_TYPE_P1L1,                2, },
                     +    { MB_TYPE_16x8    | MB_TYPE_P0L1 | MB_TYPE_P1L0 | MB_TYPE_P1L1,                2, },
                     +    { MB_TYPE_8x16    | MB_TYPE_P0L1 | MB_TYPE_P1L0 | MB_TYPE_P1L1,                2, },
                     +    { MB_TYPE_16x8    | MB_TYPE_P0L0 | MB_TYPE_P0L1 | MB_TYPE_P1L0,                2, },
                     +    { MB_TYPE_8x16    | MB_TYPE_P0L0 | MB_TYPE_P0L1 | MB_TYPE_P1L0,                2, },
                     +    { MB_TYPE_16x8    | MB_TYPE_P0L0 | MB_TYPE_P0L1 | MB_TYPE_P1L1,                2, },
                     +    { MB_TYPE_8x16    | MB_TYPE_P0L0 | MB_TYPE_P0L1 | MB_TYPE_P1L1,                2, },
                     +    { MB_TYPE_16x8    | MB_TYPE_P0L0 | MB_TYPE_P0L1 | MB_TYPE_P1L0 | MB_TYPE_P1L1, 2, },
                     +    { MB_TYPE_8x16    | MB_TYPE_P0L0 | MB_TYPE_P0L1 | MB_TYPE_P1L0 | MB_TYPE_P1L1, 2, },
                     +    { MB_TYPE_8x8     | MB_TYPE_P0L0 | MB_TYPE_P0L1 | MB_TYPE_P1L0 | MB_TYPE_P1L1, 4, },
                      };
                     -static const PMbInfo b_sub_mb_type_info[13]={
                     -{MB_TYPE_DIRECT2                                                   , 1, },
                     -{MB_TYPE_16x16|MB_TYPE_P0L0                                       , 1, },
                     -{MB_TYPE_16x16             |MB_TYPE_P0L1                          , 1, },
                     -{MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1                          , 1, },
                     -{MB_TYPE_16x8 |MB_TYPE_P0L0             |MB_TYPE_P1L0             , 2, },
                     -{MB_TYPE_8x16 |MB_TYPE_P0L0             |MB_TYPE_P1L0             , 2, },
                     -{MB_TYPE_16x8              |MB_TYPE_P0L1             |MB_TYPE_P1L1, 2, },
                     -{MB_TYPE_8x16              |MB_TYPE_P0L1             |MB_TYPE_P1L1, 2, },
                     -{MB_TYPE_16x8 |MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_P1L0|MB_TYPE_P1L1, 2, },
                     -{MB_TYPE_8x16 |MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_P1L0|MB_TYPE_P1L1, 2, },
                     -{MB_TYPE_8x8  |MB_TYPE_P0L0             |MB_TYPE_P1L0             , 4, },
                     -{MB_TYPE_8x8               |MB_TYPE_P0L1             |MB_TYPE_P1L1, 4, },
                     -{MB_TYPE_8x8  |MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_P1L0|MB_TYPE_P1L1, 4, },
                     +static const PMbInfo b_sub_mb_type_info[13] = {
                     +    { MB_TYPE_DIRECT2,                                                           1, },
                     +    { MB_TYPE_16x16 | MB_TYPE_P0L0,                                              1, },
                     +    { MB_TYPE_16x16 | MB_TYPE_P0L1,                                              1, },
                     +    { MB_TYPE_16x16 | MB_TYPE_P0L0 | MB_TYPE_P0L1,                               1, },
                     +    { MB_TYPE_16x8  | MB_TYPE_P0L0 | MB_TYPE_P1L0,                               2, },
                     +    { MB_TYPE_8x16  | MB_TYPE_P0L0 | MB_TYPE_P1L0,                               2, },
                     +    { MB_TYPE_16x8  | MB_TYPE_P0L1 | MB_TYPE_P1L1,                               2, },
                     +    { MB_TYPE_8x16  | MB_TYPE_P0L1 | MB_TYPE_P1L1,                               2, },
                     +    { MB_TYPE_16x8  | MB_TYPE_P0L0 | MB_TYPE_P0L1 | MB_TYPE_P1L0 | MB_TYPE_P1L1, 2, },
                     +    { MB_TYPE_8x16  | MB_TYPE_P0L0 | MB_TYPE_P0L1 | MB_TYPE_P1L0 | MB_TYPE_P1L1, 2, },
                     +    { MB_TYPE_8x8   | MB_TYPE_P0L0 | MB_TYPE_P1L0,                               4, },
                     +    { MB_TYPE_8x8   | MB_TYPE_P0L1 | MB_TYPE_P1L1,                               4, },
                     +    { MB_TYPE_8x8   | MB_TYPE_P0L0 | MB_TYPE_P0L1 | MB_TYPE_P1L0 | MB_TYPE_P1L1, 4, },
                      };
                     -static const uint8_t dequant4_coeff_init[6][3]={
                     -  {10,13,16},
                     -  {11,14,18},
                     -  {13,16,20},
                     -  {14,18,23},
                     -  {16,20,25},
                     -  {18,23,29},
                     +static const uint8_t dequant4_coeff_init[6][3] = {
                     +    { 10, 13, 16 },
                     +    { 11, 14, 18 },
                     +    { 13, 16, 20 },
                     +    { 14, 18, 23 },
                     +    { 16, 20, 25 },
                     +    { 18, 23, 29 },
                      };
                      static const uint8_t dequant8_coeff_init_scan[16] = {
                     -  0,3,4,3, 3,1,5,1, 4,5,2,5, 3,1,5,1
                     +    0, 3, 4, 3, 3, 1, 5, 1, 4, 5, 2, 5, 3, 1, 5, 1
                      };
                     -static const uint8_t dequant8_coeff_init[6][6]={
                     -  {20,18,32,19,25,24},
                     -  {22,19,35,21,28,26},
                     -  {26,23,42,24,33,31},
                     -  {28,25,45,26,35,33},
                     -  {32,28,51,30,40,38},
                     -  {36,32,58,34,46,43},
+                    +
                     +static const uint8_t dequant8_coeff_init[6][6] = {
                     +    { 20, 18, 32, 19, 25, 24 },
                     +    { 22, 19, 35, 21, 28, 26 },
                     +    { 26, 23, 42, 24, 33, 31 },
                     +    { 28, 25, 45, 26, 35, 33 },
                     +    { 32, 28, 51, 30, 40, 38 },
                     +    { 36, 32, 58, 34, 46, 43 },
                      };
                      #endif /* AVCODEC_H264DATA_H */

@@ -28,56 +28,90 @@
                      #define AVCODEC_H264DSP_H
                      #include <stdint.h>
+                    +
                      #include "dsputil.h"
                      typedef void (*h264_weight_func)(uint8_t *block, int stride, int height,
                                                       int log2_denom, int weight, int offset);
                     -typedef void (*h264_biweight_func)(uint8_t *dst, uint8_t *src, int stride, int height,
                     -                                   int log2_denom, int weightd, int weights, int offset);
                     +typedef void (*h264_biweight_func)(uint8_t *dst, uint8_t *src,
                     +                                   int stride, int height, int log2_denom,
                     +                                   int weightd, int weights, int offset);
                      /**
                       * Context for storing H.264 DSP functions
                       */
                     -typedef struct H264DSPContext{
                     +typedef struct H264DSPContext {
                          /* weighted MC */
                          h264_weight_func weight_h264_pixels_tab[4];
                          h264_biweight_func biweight_h264_pixels_tab[4];
                          /* loop filter */
                     -    void (*h264_v_loop_filter_luma)(uint8_t *pix/*align 16*/, int stride, int alpha, int beta, int8_t *tc0);
                     -    void (*h264_h_loop_filter_luma)(uint8_t *pix/*align 4 */, int stride, int alpha, int beta, int8_t *tc0);
                     -    void (*h264_h_loop_filter_luma_mbaff)(uint8_t *pix/*align 16*/, int stride, int alpha, int beta, int8_t *tc0);
                     +    void (*h264_v_loop_filter_luma)(uint8_t *pix /*align 16*/, int stride,
                     +                                    int alpha, int beta, int8_t *tc0);
                     +    void (*h264_h_loop_filter_luma)(uint8_t *pix /*align 4 */, int stride,
                     +                                    int alpha, int beta, int8_t *tc0);
                     +    void (*h264_h_loop_filter_luma_mbaff)(uint8_t *pix /*align 16*/, int stride,
                     +                                          int alpha, int beta, int8_t *tc0);
                          /* v/h_loop_filter_luma_intra: align 16 */
                     -    void (*h264_v_loop_filter_luma_intra)(uint8_t *pix, int stride, int alpha, int beta);
                     -    void (*h264_h_loop_filter_luma_intra)(uint8_t *pix, int stride, int alpha, int beta);
                     -    void (*h264_h_loop_filter_luma_mbaff_intra)(uint8_t *pix/*align 16*/, int stride, int alpha, int beta);
                     -    void (*h264_v_loop_filter_chroma)(uint8_t *pix/*align 8*/, int stride, int alpha, int beta, int8_t *tc0);
                     -    void (*h264_h_loop_filter_chroma)(uint8_t *pix/*align 4*/, int stride, int alpha, int beta, int8_t *tc0);
                     -    void (*h264_h_loop_filter_chroma_mbaff)(uint8_t *pix/*align 8*/, int stride, int alpha, int beta, int8_t *tc0);
                     -    void (*h264_v_loop_filter_chroma_intra)(uint8_t *pix/*align 8*/, int stride, int alpha, int beta);
                     -    void (*h264_h_loop_filter_chroma_intra)(uint8_t *pix/*align 8*/, int stride, int alpha, int beta);
                     -    void (*h264_h_loop_filter_chroma_mbaff_intra)(uint8_t *pix/*align 8*/, int stride, int alpha, int beta);
                     +    void (*h264_v_loop_filter_luma_intra)(uint8_t *pix, int stride,
                     +                                          int alpha, int beta);
                     +    void (*h264_h_loop_filter_luma_intra)(uint8_t *pix, int stride,
                     +                                          int alpha, int beta);
                     +    void (*h264_h_loop_filter_luma_mbaff_intra)(uint8_t *pix /*align 16*/,
                     +                                                int stride, int alpha, int beta);
                     +    void (*h264_v_loop_filter_chroma)(uint8_t *pix /*align 8*/, int stride,
                     +                                      int alpha, int beta, int8_t *tc0);
                     +    void (*h264_h_loop_filter_chroma)(uint8_t *pix /*align 4*/, int stride,
                     +                                      int alpha, int beta, int8_t *tc0);
                     +    void (*h264_h_loop_filter_chroma_mbaff)(uint8_t *pix /*align 8*/,
                     +                                            int stride, int alpha, int beta,
                     +                                            int8_t *tc0);
                     +    void (*h264_v_loop_filter_chroma_intra)(uint8_t *pix /*align 8*/,
                     +                                            int stride, int alpha, int beta);
                     +    void (*h264_h_loop_filter_chroma_intra)(uint8_t *pix /*align 8*/,
                     +                                            int stride, int alpha, int beta);
                     +    void (*h264_h_loop_filter_chroma_mbaff_intra)(uint8_t *pix /*align 8*/,
                     +                                                  int stride, int alpha, int beta);
                          // h264_loop_filter_strength: simd only. the C version is inlined in h264.c
                     -    void (*h264_loop_filter_strength)(int16_t bS[2][4][4], uint8_t nnz[40], int8_t ref[2][40], int16_t mv[2][40][2],
                     -                                      int bidir, int edges, int step, int mask_mv0, int mask_mv1, int field);
                     +    void (*h264_loop_filter_strength)(int16_t bS[2][4][4], uint8_t nnz[40],
                     +                                      int8_t ref[2][40], int16_t mv[2][40][2],
                     +                                      int bidir, int edges, int step,
                     +                                      int mask_mv0, int mask_mv1, int field);
                          /* IDCT */
                     -    void (*h264_idct_add)(uint8_t *dst/*align 4*/, DCTELEM *block/*align 16*/, int stride);
                     -    void (*h264_idct8_add)(uint8_t *dst/*align 8*/, DCTELEM *block/*align 16*/, int stride);
                     -    void (*h264_idct_dc_add)(uint8_t *dst/*align 4*/, DCTELEM *block/*align 16*/, int stride);
                     -    void (*h264_idct8_dc_add)(uint8_t *dst/*align 8*/, DCTELEM *block/*align 16*/, int stride);
                     +    void (*h264_idct_add)(uint8_t *dst /*align 4*/,
                     +                          DCTELEM *block /*align 16*/, int stride);
                     +    void (*h264_idct8_add)(uint8_t *dst /*align 8*/,
                     +                           DCTELEM *block /*align 16*/, int stride);
                     +    void (*h264_idct_dc_add)(uint8_t *dst /*align 4*/,
                     +                             DCTELEM *block /*align 16*/, int stride);
                     +    void (*h264_idct8_dc_add)(uint8_t *dst /*align 8*/,
                     +                              DCTELEM *block /*align 16*/, int stride);
                     -    void (*h264_idct_add16)(uint8_t *dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[15*8]);
                     -    void (*h264_idct8_add4)(uint8_t *dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[15*8]);
                     -    void (*h264_idct_add8)(uint8_t **dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[15*8]);
                     -    void (*h264_idct_add16intra)(uint8_t *dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[15*8]);
                     -    void (*h264_luma_dc_dequant_idct)(DCTELEM *output, DCTELEM *input/*align 16*/, int qmul);
                     +    void (*h264_idct_add16)(uint8_t *dst /*align 16*/, const int *blockoffset,
                     +                            DCTELEM *block /*align 16*/, int stride,
                     +                            const uint8_t nnzc[15 * 8]);
                     +    void (*h264_idct8_add4)(uint8_t *dst /*align 16*/, const int *blockoffset,
                     +                            DCTELEM *block /*align 16*/, int stride,
                     +                            const uint8_t nnzc[15 * 8]);
                     +    void (*h264_idct_add8)(uint8_t **dst /*align 16*/, const int *blockoffset,
                     +                           DCTELEM *block /*align 16*/, int stride,
                     +                           const uint8_t nnzc[15 * 8]);
                     +    void (*h264_idct_add16intra)(uint8_t *dst /*align 16*/, const int *blockoffset,
                     +                                 DCTELEM *block /*align 16*/,
                     +                                 int stride, const uint8_t nnzc[15 * 8]);
                     +    void (*h264_luma_dc_dequant_idct)(DCTELEM *output,
                     +                                      DCTELEM *input /*align 16*/, int qmul);
                          void (*h264_chroma_dc_dequant_idct)(DCTELEM *block, int qmul);
                     -}H264DSPContext;
                     +} H264DSPContext;
                     -void ff_h264dsp_init(H264DSPContext *c, const int bit_depth, const int chroma_format_idc);
                     -void ff_h264dsp_init_arm(H264DSPContext *c, const int bit_depth, const int chroma_format_idc);
                     -void ff_h264dsp_init_ppc(H264DSPContext *c, const int bit_depth, const int chroma_format_idc);
                     -void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, const int chroma_format_idc);
                     +void ff_h264dsp_init(H264DSPContext *c, const int bit_depth,
                     +                     const int chroma_format_idc);
                     +void ff_h264dsp_init_arm(H264DSPContext *c, const int bit_depth,
                     +                         const int chroma_format_idc);
                     +void ff_h264dsp_init_ppc(H264DSPContext *c, const int bit_depth,
                     +                         const int chroma_format_idc);
                     +void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth,
                     +                         const int chroma_format_idc);
                      #endif /* AVCODEC_H264DSP_H */

@@ -35,18 +35,18 @@
                       * Prediction types
                       */
                      //@{
                     -#define VERT_PRED             0
                     -#define HOR_PRED              1
                     -#define DC_PRED               2
                     -#define DIAG_DOWN_LEFT_PRED   3
                     -#define DIAG_DOWN_RIGHT_PRED  4
                     -#define VERT_RIGHT_PRED       5
                     -#define HOR_DOWN_PRED         6
                     -#define VERT_LEFT_PRED        7
                     -#define HOR_UP_PRED           8
                     +#define VERT_PRED              0
                     +#define HOR_PRED               1
                     +#define DC_PRED                2
                     +#define DIAG_DOWN_LEFT_PRED    3
                     +#define DIAG_DOWN_RIGHT_PRED   4
                     +#define VERT_RIGHT_PRED        5
                     +#define HOR_DOWN_PRED          6
                     +#define VERT_LEFT_PRED         7
                     +#define HOR_UP_PRED            8
                      // DC edge (not for VP8)
                     -#define LEFT_DC_PRED          9
                     +#define LEFT_DC_PRED           9
                      #define TOP_DC_PRED           10
                      #define DC_128_PRED           11
@@ -56,7 +56,7 @@
                      #define VERT_LEFT_PRED_RV40_NODOWN        14
                      // VP8 specific
                     -#define TM_VP8_PRED           9     ///< "True Motion", used instead of plane
                     +#define TM_VP8_PRED            9    ///< "True Motion", used instead of plane
                      #define VERT_VP8_PRED         10    ///< for VP8, #VERT_PRED is the average of
                                                          ///< (left col+cur col x2+right col) / 4;
                                                          ///< this is the "unaveraged" one
@@ -65,44 +65,53 @@
                      #define DC_127_PRED           12
                      #define DC_129_PRED           13
                     -#define DC_PRED8x8            0
                     -#define HOR_PRED8x8           1
                     -#define VERT_PRED8x8          2
                     -#define PLANE_PRED8x8         3
                     +#define DC_PRED8x8             0
                     +#define HOR_PRED8x8            1
                     +#define VERT_PRED8x8           2
                     +#define PLANE_PRED8x8          3
                      // DC edge
                     -#define LEFT_DC_PRED8x8       4
                     -#define TOP_DC_PRED8x8        5
                     -#define DC_128_PRED8x8        6
                     +#define LEFT_DC_PRED8x8        4
                     +#define TOP_DC_PRED8x8         5
                     +#define DC_128_PRED8x8         6
                      // H264/SVQ3 (8x8) specific
                     -#define ALZHEIMER_DC_L0T_PRED8x8 7
                     -#define ALZHEIMER_DC_0LT_PRED8x8 8
                     -#define ALZHEIMER_DC_L00_PRED8x8 9
                     +#define ALZHEIMER_DC_L0T_PRED8x8  7
                     +#define ALZHEIMER_DC_0LT_PRED8x8  8
                     +#define ALZHEIMER_DC_L00_PRED8x8  9
                      #define ALZHEIMER_DC_0L0_PRED8x8 10
                      // VP8 specific
                     -#define DC_127_PRED8x8        7
                     -#define DC_129_PRED8x8        8
                     +#define DC_127_PRED8x8         7
                     +#define DC_129_PRED8x8         8
                      //@}
                      /**
                       * Context for storing H.264 prediction functions
                       */
                     -typedef struct H264PredContext{
                     -    void (*pred4x4  [9+3+3])(uint8_t *src, const uint8_t *topright, int stride);//FIXME move to dsp?
                     -    void (*pred8x8l [9+3])(uint8_t *src, int topleft, int topright, int stride);
                     -    void (*pred8x8  [4+3+4])(uint8_t *src, int stride);
                     -    void (*pred16x16[4+3+2])(uint8_t *src, int stride);
                     +typedef struct H264PredContext {
                     +    void(*pred4x4[9 + 3 + 3])(uint8_t *src, const uint8_t *topright, int stride); //FIXME move to dsp?
                     +    void(*pred8x8l[9 + 3])(uint8_t *src, int topleft, int topright, int stride);
                     +    void(*pred8x8[4 + 3 + 4])(uint8_t *src, int stride);
                     +    void(*pred16x16[4 + 3 + 2])(uint8_t *src, int stride);
                     -    void (*pred4x4_add  [2])(uint8_t *pix/*align  4*/, const DCTELEM *block/*align 16*/, int stride);
                     -    void (*pred8x8l_add [2])(uint8_t *pix/*align  8*/, const DCTELEM *block/*align 16*/, int stride);
                     -    void (*pred8x8_add  [3])(uint8_t *pix/*align  8*/, const int *block_offset, const DCTELEM *block/*align 16*/, int stride);
                     -    void (*pred16x16_add[3])(uint8_t *pix/*align 16*/, const int *block_offset, const DCTELEM *block/*align 16*/, int stride);
                     -}H264PredContext;
                     +    void(*pred4x4_add[2])(uint8_t *pix /*align  4*/,
                     +                          const DCTELEM *block /*align 16*/, int stride);
                     +    void(*pred8x8l_add[2])(uint8_t *pix /*align  8*/,
                     +                           const DCTELEM *block /*align 16*/, int stride);
                     +    void(*pred8x8_add[3])(uint8_t *pix /*align  8*/,
                     +                          const int *block_offset,
                     +                          const DCTELEM *block /*align 16*/, int stride);
                     +    void(*pred16x16_add[3])(uint8_t *pix /*align 16*/,
                     +                            const int *block_offset,
                     +                            const DCTELEM *block /*align 16*/, int stride);
                     +} H264PredContext;
                     -void ff_h264_pred_init(H264PredContext *h, int codec_id, const int bit_depth, const int chroma_format_idc);
                     -void ff_h264_pred_init_arm(H264PredContext *h, int codec_id, const int bit_depth, const int chroma_format_idc);
                     -void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth, const int chroma_format_idc);
                     +void ff_h264_pred_init(H264PredContext *h, int codec_id,
                     +                       const int bit_depth, const int chroma_format_idc);
                     +void ff_h264_pred_init_arm(H264PredContext *h, int codec_id,
                     +                           const int bit_depth, const int chroma_format_idc);
                     +void ff_h264_pred_init_x86(H264PredContext *h, int codec_id,
                     +                           const int bit_depth, const int chroma_format_idc);
                      #endif /* AVCODEC_H264PRED_H */

@@ -21,39 +21,57 @@
                      /**
                       * @file
                     - * ati vcr1 codec.
                     + * ATI VCR1 codec
                       */
                      #include "avcodec.h"
                      #include "dsputil.h"
                     -//#undef NDEBUG
                     -//#include <assert.h>
+                    -
                     -/* Disable the encoder. */
                     -#undef CONFIG_VCR1_ENCODER
                     -#define CONFIG_VCR1_ENCODER 0
+                    -
                     -typedef struct VCR1Context{
                     -    AVCodecContext *avctx;
                     +typedef struct VCR1Context {
                          AVFrame picture;
                          int delta[16];
                          int offset[4];
                      } VCR1Context;
                     -static int decode_frame(AVCodecContext *avctx,
                     -                        void *data, int *data_size,
                     -                        AVPacket *avpkt)
                     +static av_cold void common_init(AVCodecContext *avctx)
                     +{
                     +    VCR1Context *const a = avctx->priv_data;
+                    +
                     +    avctx->coded_frame = &a->picture;
                     +    avcodec_get_frame_defaults(&a->picture);
                     +}
+                    +
                     +static av_cold int decode_init(AVCodecContext *avctx)
                     +{
                     +    common_init(avctx);
+                    +
                     +    avctx->pix_fmt = PIX_FMT_YUV410P;
+                    +
                     +    return 0;
                     +}
+                    +
                     +static av_cold int decode_end(AVCodecContext *avctx)
                     +{
                     +    VCR1Context *s = avctx->priv_data;
+                    +
                     +    if (s->picture.data[0])
                     +        avctx->release_buffer(avctx, &s->picture);
+                    +
                     +    return 0;
                     +}
+                    +
                     +static int decode_frame(AVCodecContext *avctx, void *data,
                     +                        int *data_size, AVPacket *avpkt)
+                     {
                     -    const uint8_t *buf = avpkt->data;
                     -    int buf_size = avpkt->size;
                     -    VCR1Context * const a = avctx->priv_data;
                     -    AVFrame *picture = data;
                     -    AVFrame * const p = &a->picture;
                     -    const uint8_t *bytestream= buf;
                     +    const uint8_t *buf        = avpkt->data;
                     +    int buf_size              = avpkt->size;
                     +    VCR1Context *const a      = avctx->priv_data;
                     +    AVFrame *picture          = data;
                     +    AVFrame *const p          = &a->picture;
                     +    const uint8_t *bytestream = buf;
                          int i, x, y;
                     -    if(p->data[0])
                     +    if (p->data[0])
                              avctx->release_buffer(avctx, p);
                          if(buf_size < 16 + avctx->height + avctx->width*avctx->height*5/8){
@@ -61,57 +79,57 @@ static int decode_frame(AVCodecContext *avctx,
                              return AVERROR(EINVAL);
+                         }
                     -    p->reference= 0;
                     -    if(avctx->get_buffer(avctx, p) < 0){
                     +    p->reference = 0;
                     +    if (avctx->get_buffer(avctx, p) < 0) {
                              av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
                              return -1;
+                         }
                     -    p->pict_type= AV_PICTURE_TYPE_I;
                     -    p->key_frame= 1;
                     +    p->pict_type = AV_PICTURE_TYPE_I;
                     +    p->key_frame = 1;
                     -    for(i=0; i<16; i++){
                     -        a->delta[i]= *(bytestream++);
                     +    for (i = 0; i < 16; i++) {
                     +        a->delta[i] = *bytestream++;
                              bytestream++;
+                         }
                     -    for(y=0; y<avctx->height; y++){
                     +    for (y = 0; y < avctx->height; y++) {
                              int offset;
                     -        uint8_t *luma= &a->picture.data[0][ y*a->picture.linesize[0] ];
                     +        uint8_t *luma = &a->picture.data[0][y * a->picture.linesize[0]];
                     -        if((y&3) == 0){
                     -            uint8_t *cb= &a->picture.data[1][ (y>>2)*a->picture.linesize[1] ];
                     -            uint8_t *cr= &a->picture.data[2][ (y>>2)*a->picture.linesize[2] ];
                     +        if ((y & 3) == 0) {
                     +            uint8_t *cb = &a->picture.data[1][(y >> 2) * a->picture.linesize[1]];
                     +            uint8_t *cr = &a->picture.data[2][(y >> 2) * a->picture.linesize[2]];
                     -            for(i=0; i<4; i++)
                     -                a->offset[i]= *(bytestream++);
                     +            for (i = 0; i < 4; i++)
                     +                a->offset[i] = *bytestream++;
                     -            offset= a->offset[0] - a->delta[ bytestream[2]&0xF ];
                     -            for(x=0; x<avctx->width; x+=4){
                     -                luma[0]=( offset += a->delta[ bytestream[2]&0xF ]);
                     -                luma[1]=( offset += a->delta[ bytestream[2]>>4  ]);
                     -                luma[2]=( offset += a->delta[ bytestream[0]&0xF ]);
                     -                luma[3]=( offset += a->delta[ bytestream[0]>>4  ]);
                     -                luma += 4;
                     +            offset = a->offset[0] - a->delta[bytestream[2] & 0xF];
                     +            for (x = 0; x < avctx->width; x += 4) {
                     +                luma[0]     = offset += a->delta[bytestream[2] & 0xF];
                     +                luma[1]     = offset += a->delta[bytestream[2] >>  4];
                     +                luma[2]     = offset += a->delta[bytestream[0] & 0xF];
                     +                luma[3]     = offset += a->delta[bytestream[0] >>  4];
                     +                luma       += 4;
                     -                *(cb++) = bytestream[3];
                     -                *(cr++) = bytestream[1];
                     +                *cb++       = bytestream[3];
                     +                *cr++       = bytestream[1];
                     -                bytestream+= 4;
                     +                bytestream += 4;
+                                 }
                     -        }else{
                     -            offset= a->offset[y&3] - a->delta[ bytestream[2]&0xF ];
+                    -
                     -            for(x=0; x<avctx->width; x+=8){
                     -                luma[0]=( offset += a->delta[ bytestream[2]&0xF ]);
                     -                luma[1]=( offset += a->delta[ bytestream[2]>>4  ]);
                     -                luma[2]=( offset += a->delta[ bytestream[3]&0xF ]);
                     -                luma[3]=( offset += a->delta[ bytestream[3]>>4  ]);
                     -                luma[4]=( offset += a->delta[ bytestream[0]&0xF ]);
                     -                luma[5]=( offset += a->delta[ bytestream[0]>>4  ]);
                     -                luma[6]=( offset += a->delta[ bytestream[1]&0xF ]);
                     -                luma[7]=( offset += a->delta[ bytestream[1]>>4  ]);
                     -                luma += 8;
                     -                bytestream+= 4;
                     +        } else {
                     +            offset = a->offset[y & 3] - a->delta[bytestream[2] & 0xF];
+                    +
                     +            for (x = 0; x < avctx->width; x += 8) {
                     +                luma[0]     = offset += a->delta[bytestream[2] & 0xF];
                     +                luma[1]     = offset += a->delta[bytestream[2] >>  4];
                     +                luma[2]     = offset += a->delta[bytestream[3] & 0xF];
                     +                luma[3]     = offset += a->delta[bytestream[3] >>  4];
                     +                luma[4]     = offset += a->delta[bytestream[0] & 0xF];
                     +                luma[5]     = offset += a->delta[bytestream[0] >>  4];
                     +                luma[6]     = offset += a->delta[bytestream[1] & 0xF];
                     +                luma[7]     = offset += a->delta[bytestream[1] >>  4];
                     +                luma       += 8;
                     +                bytestream += 4;
+                                 }
+                             }
+                         }
@@ -122,62 +140,6 @@ static int decode_frame(AVCodecContext *avctx,
                          return buf_size;
+                     }
                     -#if CONFIG_VCR1_ENCODER
                     -static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){
                     -    VCR1Context * const a = avctx->priv_data;
                     -    AVFrame *pict = data;
                     -    AVFrame * const p = &a->picture;
                     -    int size;
+                    -
                     -    *p = *pict;
                     -    p->pict_type= AV_PICTURE_TYPE_I;
                     -    p->key_frame= 1;
+                    -
                     -    avpriv_align_put_bits(&a->pb);
                     -    while(get_bit_count(&a->pb)&31)
                     -        put_bits(&a->pb, 8, 0);
+                    -
                     -    size= get_bit_count(&a->pb)/32;
+                    -
                     -    return size*4;
                     -}
                     -#endif
+                    -
                     -static av_cold void common_init(AVCodecContext *avctx){
                     -    VCR1Context * const a = avctx->priv_data;
+                    -
                     -    avctx->coded_frame = &a->picture;
                     -    avcodec_get_frame_defaults(&a->picture);
                     -    a->avctx= avctx;
                     -}
+                    -
                     -static av_cold int decode_init(AVCodecContext *avctx){
+                    -
                     -    common_init(avctx);
+                    -
                     -    avctx->pix_fmt= PIX_FMT_YUV410P;
+                    -
                     -    return 0;
                     -}
+                    -
                     -static av_cold int decode_end(AVCodecContext *avctx){
                     -    VCR1Context *s = avctx->priv_data;
+                    -
                     -    if (s->picture.data[0])
                     -        avctx->release_buffer(avctx, &s->picture);
+                    -
                     -    return 0;
                     -}
+                    -
                     -#if CONFIG_VCR1_ENCODER
                     -static av_cold int encode_init(AVCodecContext *avctx){
+                    -
                     -    common_init(avctx);
+                    -
                     -    return 0;
                     -}
                     -#endif
+                    -
                      AVCodec ff_vcr1_decoder = {
                          .name           = "vcr1",
                          .type           = AVMEDIA_TYPE_VIDEO,
@@ -190,14 +152,39 @@ AVCodec ff_vcr1_decoder = {
                          .long_name      = NULL_IF_CONFIG_SMALL("ATI VCR1"),
                      };
                     +/* Disable the encoder. */
                     +#undef CONFIG_VCR1_ENCODER
                     +#define CONFIG_VCR1_ENCODER 0
+                    +
                      #if CONFIG_VCR1_ENCODER
                     +static int encode_frame(AVCodecContext *avctx, unsigned char *buf,
                     +                        int buf_size, void *data)
                     +{
                     +    VCR1Context *const a = avctx->priv_data;
                     +    AVFrame *pict        = data;
                     +    AVFrame *const p     = &a->picture;
                     +    int size;
+                    +
                     +    *p           = *pict;
                     +    p->pict_type = AV_PICTURE_TYPE_I;
                     +    p->key_frame = 1;
+                    +
                     +    avpriv_align_put_bits(&a->pb);
                     +    while (get_bit_count(&a->pb) & 31)
                     +        put_bits(&a->pb, 8, 0);
+                    +
                     +    size = get_bit_count(&a->pb) / 32;
+                    +
                     +    return size * 4;
                     +}
+                    +
                      AVCodec ff_vcr1_encoder = {
                          .name           = "vcr1",
                          .type           = AVMEDIA_TYPE_VIDEO,
                          .id             = CODEC_ID_VCR1,
                          .priv_data_size = sizeof(VCR1Context),
                     -    .init           = encode_init,
                     +    .init           = common_init,
                          .encode         = encode_frame,
                          .long_name      = NULL_IF_CONFIG_SMALL("ATI VCR1"),
                      };
                     -#endif
                     +#endif /* CONFIG_VCR1_ENCODER */

...	...	@@ -89,7 +89,7 @@ static int query_formats(AVFilterContext *ctx)
89	89
90	90	static void filter_samples(AVFilterLink inlink, AVFilterBufferRef insamplesref)
91	91	{
92		- avfilter_filter_samples(inlink->dst->outputs[0], insamplesref);
	92	+ ff_filter_samples(inlink->dst->outputs[0], insamplesref);
93	93	}
94	94
95	95	AVFilter avfilter_af_aformat = {

...	...	@@ -167,7 +167,7 @@ static void filter_samples(AVFilterLink inlink, AVFilterBufferRef insamples)
167	167	}
168	168	}
169	169	}
170		- avfilter_filter_samples(outlink, insamples);
	170	+ ff_filter_samples(outlink, insamples);
171	171	}
172	172
173	173	AVFilter avfilter_af_volume = {

@@ -1792,6 +1792,22 @@ QPEL_2TAP(avg_, 16, 3dnow)
                      QPEL_2TAP(put_,  8, 3dnow)
                      QPEL_2TAP(avg_,  8, 3dnow)
                     +void ff_put_rv40_qpel8_mc33_mmx(uint8_t *dst, uint8_t *src, int stride)
                     +{
                     +  put_pixels8_xy2_mmx(dst, src, stride, 8);
                     +}
                     +void ff_put_rv40_qpel16_mc33_mmx(uint8_t *dst, uint8_t *src, int stride)
                     +{
                     +  put_pixels16_xy2_mmx(dst, src, stride, 16);
                     +}
                     +void ff_avg_rv40_qpel8_mc33_mmx(uint8_t *dst, uint8_t *src, int stride)
                     +{
                     +  avg_pixels8_xy2_mmx(dst, src, stride, 8);
                     +}
                     +void ff_avg_rv40_qpel16_mc33_mmx(uint8_t *dst, uint8_t *src, int stride)
                     +{
                     +  avg_pixels16_xy2_mmx(dst, src, stride, 16);
                     +}
                      #if HAVE_YASM
                      typedef void emu_edge_core_func(uint8_t *buf, const uint8_t *src,

@@ -199,6 +199,11 @@ void ff_avg_cavs_qpel16_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride);
                      void ff_put_vc1_mspel_mc00_mmx(uint8_t *dst, const uint8_t *src, int stride, int rnd);
                      void ff_avg_vc1_mspel_mc00_mmx2(uint8_t *dst, const uint8_t *src, int stride, int rnd);
                     +void ff_put_rv40_qpel8_mc33_mmx(uint8_t *block, uint8_t *pixels, int line_size);
                     +void ff_put_rv40_qpel16_mc33_mmx(uint8_t *block, uint8_t *pixels, int line_size);
                     +void ff_avg_rv40_qpel8_mc33_mmx(uint8_t *block, uint8_t *pixels, int line_size);
                     +void ff_avg_rv40_qpel16_mc33_mmx(uint8_t *block, uint8_t *pixels, int line_size);
+                    +
                      void ff_mmx_idct(DCTELEM *block);
                      void ff_mmxext_idct(DCTELEM *block);

@@ -1,5 +1,7 @@
                      ;******************************************************************************
                      ;* MMX/SSE2-optimized functions for the RV40 decoder
                     +;* Copyright (c) 2010 Ronald S. Bultje <rsbultje@gmail.com>
                     +;* Copyright (c) 2010 Jason Garrett-Glaser <darkshikari@gmail.com>
                      ;* Copyright (C) 2012 Christophe Gisquet <christophe.gisquet@gmail.com>
                      ;*
                      ;* This file is part of Libav.
@@ -25,11 +27,319 @@
                      SECTION_RODATA
                      align 16
                     -shift_round:   times 8 dw 1 << (16 - 6)
                     -cextern pw_16
                     +pw_1024:   times 8 dw 1 << (16 - 6) ; pw_1024
+                    +
                     +sixtap_filter_hb_m:  times 8 db   1, -5
                     +                     times 8 db  52, 20
                     +                     ; multiplied by 2 to have the same shift
                     +                     times 8 db   2, -10
                     +                     times 8 db  40,  40
                     +                     ; back to normal
                     +                     times 8 db   1, -5
                     +                     times 8 db  20, 52
+                    +
                     +sixtap_filter_v_m:   times 8 dw   1
                     +                     times 8 dw  -5
                     +                     times 8 dw  52
                     +                     times 8 dw  20
                     +                     ; multiplied by 2 to have the same shift
                     +                     times 8 dw   2
                     +                     times 8 dw -10
                     +                     times 8 dw  40
                     +                     times 8 dw  40
                     +                     ; back to normal
                     +                     times 8 dw   1
                     +                     times 8 dw  -5
                     +                     times 8 dw  20
                     +                     times 8 dw  52
+                    +
                     +%ifdef PIC
                     +%define sixtap_filter_hw   picregq
                     +%define sixtap_filter_hb   picregq
                     +%define sixtap_filter_v    picregq
                     +%define npicregs 1
                     +%else
                     +%define sixtap_filter_hw   sixtap_filter_hw_m
                     +%define sixtap_filter_hb   sixtap_filter_hb_m
                     +%define sixtap_filter_v    sixtap_filter_v_m
                     +%define npicregs 0
                     +%endif
+                    +
                     +filter_h6_shuf1: db 0, 1, 1, 2, 2, 3, 3, 4, 4, 5,  5, 6,  6,  7,  7,  8
                     +filter_h6_shuf2: db 2, 3, 3, 4, 4, 5, 5, 6, 6, 7,  7, 8,  8,  9,  9, 10
                     +filter_h6_shuf3: db 5, 4, 6, 5, 7, 6, 8, 7, 9, 8, 10, 9, 11, 10, 12, 11
+                    +
                     +cextern  pw_32
                     +cextern  pw_16
                     +cextern  pw_512
                      SECTION .text
                     +;-----------------------------------------------------------------------------
                     +; subpel MC functions:
                     +;
                     +; void [put|rv40]_rv40_qpel_[h|v]_<opt>(uint8_t *dst, int deststride,
                     +;                                       uint8_t *src, int srcstride,
                     +;                                       int len, int m);
                     +;----------------------------------------------------------------------
                     +%macro LOAD  2
                     +%if WIN64
                     +   movsxd   %1q, %1d
                     +%endif
                     +%ifdef PIC
                     +   add      %1q, picregq
                     +%else
                     +   add      %1q, %2
                     +%endif
                     +%endmacro
+                    +
                     +%macro STORE 3
                     +%ifidn %3, avg
                     +    movh      %2, [dstq]
                     +%endif
                     +    packuswb  %1, %1
                     +%ifidn %3, avg
                     +%if cpuflag(3dnow)
                     +    pavgusb   %1, %2
                     +%else
                     +    pavgb     %1, %2
                     +%endif
                     +%endif
                     +    movh  [dstq], %1
                     +%endmacro
+                    +
                     +%macro FILTER_V 1
                     +cglobal %1_rv40_qpel_v, 6,6+npicregs,12, dst, dststride, src, srcstride, height, my, picreg
                     +%ifdef PIC
                     +    lea  picregq, [sixtap_filter_v_m]
                     +%endif
                     +    pxor      m7, m7
                     +    LOAD      my, sixtap_filter_v
+                    +
                     +    ; read 5 lines
                     +    sub     srcq, srcstrideq
                     +    sub     srcq, srcstrideq
                     +    movh      m0, [srcq]
                     +    movh      m1, [srcq+srcstrideq]
                     +    movh      m2, [srcq+srcstrideq*2]
                     +    lea     srcq, [srcq+srcstrideq*2]
                     +    add     srcq, srcstrideq
                     +    movh      m3, [srcq]
                     +    movh      m4, [srcq+srcstrideq]
                     +    punpcklbw m0, m7
                     +    punpcklbw m1, m7
                     +    punpcklbw m2, m7
                     +    punpcklbw m3, m7
                     +    punpcklbw m4, m7
+                    +
                     +%ifdef m8
                     +    mova      m8, [myq+ 0]
                     +    mova      m9, [myq+16]
                     +    mova     m10, [myq+32]
                     +    mova     m11, [myq+48]
                     +%define COEFF05  m8
                     +%define COEFF14  m9
                     +%define COEFF2   m10
                     +%define COEFF3   m11
                     +%else
                     +%define COEFF05  [myq+ 0]
                     +%define COEFF14  [myq+16]
                     +%define COEFF2   [myq+32]
                     +%define COEFF3   [myq+48]
                     +%endif
                     +.nextrow:
                     +    mova      m6, m1
                     +    movh      m5, [srcq+2*srcstrideq]      ; read new row
                     +    paddw     m6, m4
                     +    punpcklbw m5, m7
                     +    pmullw    m6, COEFF14
                     +    paddw     m0, m5
                     +    pmullw    m0, COEFF05
                     +    paddw     m6, m0
                     +    mova      m0, m1
                     +    paddw     m6, [pw_32]
                     +    mova      m1, m2
                     +    pmullw    m2, COEFF2
                     +    paddw     m6, m2
                     +    mova      m2, m3
                     +    pmullw    m3, COEFF3
                     +    paddw     m6, m3
+                    +
                     +    ; round/clip/store
                     +    mova      m3, m4
                     +    psraw     m6, 6
                     +    mova      m4, m5
                     +    STORE     m6, m5, %1
+                    +
                     +    ; go to next line
                     +    add     dstq, dststrideq
                     +    add     srcq, srcstrideq
                     +    dec  heightd                           ; next row
                     +    jg .nextrow
                     +    REP_RET
                     +%endmacro
+                    +
                     +%macro FILTER_H  1
                     +cglobal %1_rv40_qpel_h, 6, 6+npicregs, 12, dst, dststride, src, srcstride, height, mx, picreg
                     +%ifdef PIC
                     +    lea  picregq, [sixtap_filter_v_m]
                     +%endif
                     +    pxor      m7, m7
                     +    LOAD      mx, sixtap_filter_v
                     +    mova      m6, [pw_32]
                     +%ifdef m8
                     +    mova      m8, [mxq+ 0]
                     +    mova      m9, [mxq+16]
                     +    mova     m10, [mxq+32]
                     +    mova     m11, [mxq+48]
                     +%define COEFF05  m8
                     +%define COEFF14  m9
                     +%define COEFF2   m10
                     +%define COEFF3   m11
                     +%else
                     +%define COEFF05  [mxq+ 0]
                     +%define COEFF14  [mxq+16]
                     +%define COEFF2   [mxq+32]
                     +%define COEFF3   [mxq+48]
                     +%endif
                     +.nextrow:
                     +    movq      m0, [srcq-2]
                     +    movq      m5, [srcq+3]
                     +    movq      m1, [srcq-1]
                     +    movq      m4, [srcq+2]
                     +    punpcklbw m0, m7
                     +    punpcklbw m5, m7
                     +    punpcklbw m1, m7
                     +    punpcklbw m4, m7
                     +    movq      m2, [srcq-0]
                     +    movq      m3, [srcq+1]
                     +    paddw     m0, m5
                     +    paddw     m1, m4
                     +    punpcklbw m2, m7
                     +    punpcklbw m3, m7
                     +    pmullw    m0, COEFF05
                     +    pmullw    m1, COEFF14
                     +    pmullw    m2, COEFF2
                     +    pmullw    m3, COEFF3
                     +    paddw     m0, m6
                     +    paddw     m1, m2
                     +    paddw     m0, m3
                     +    paddw     m0, m1
                     +    psraw     m0, 6
                     +    STORE     m0, m1, %1
+                    +
                     +    ; go to next line
                     +    add     dstq, dststrideq
                     +    add     srcq, srcstrideq
                     +    dec  heightd            ; next row
                     +    jg .nextrow
                     +    REP_RET
                     +%endmacro
+                    +
                     +%if ARCH_X86_32
                     +INIT_MMX  mmx
                     +FILTER_V  put
                     +FILTER_H  put
+                    +
                     +INIT_MMX  mmx2
                     +FILTER_V  avg
                     +FILTER_H  avg
+                    +
                     +INIT_MMX  3dnow
                     +FILTER_V  avg
                     +FILTER_H  avg
                     +%endif
+                    +
                     +INIT_XMM  sse2
                     +FILTER_H  put
                     +FILTER_H  avg
                     +FILTER_V  put
                     +FILTER_V  avg
+                    +
                     +%macro FILTER_SSSE3 1
                     +cglobal %1_rv40_qpel_v, 6,6+npicregs,8, dst, dststride, src, srcstride, height, my, picreg
                     +%ifdef PIC
                     +    lea  picregq, [sixtap_filter_hb_m]
                     +%endif
+                    +
                     +    ; read 5 lines
                     +    sub     srcq, srcstrideq
                     +    LOAD      my, sixtap_filter_hb
                     +    sub     srcq, srcstrideq
                     +    movh      m0, [srcq]
                     +    movh      m1, [srcq+srcstrideq]
                     +    movh      m2, [srcq+srcstrideq*2]
                     +    lea     srcq, [srcq+srcstrideq*2]
                     +    add     srcq, srcstrideq
                     +    mova      m5, [myq]
                     +    movh      m3, [srcq]
                     +    movh      m4, [srcq+srcstrideq]
                     +    lea     srcq, [srcq+2*srcstrideq]
+                    +
                     +.nextrow:
                     +    mova      m6, m2
                     +    punpcklbw m0, m1
                     +    punpcklbw m6, m3
                     +    pmaddubsw m0, m5
                     +    pmaddubsw m6, [myq+16]
                     +    movh      m7, [srcq]      ; read new row
                     +    paddw     m6, m0
                     +    mova      m0, m1
                     +    mova      m1, m2
                     +    mova      m2, m3
                     +    mova      m3, m4
                     +    mova      m4, m7
                     +    punpcklbw m7, m3
                     +    pmaddubsw m7, m5
                     +    paddw     m6, m7
                     +    pmulhrsw  m6, [pw_512]
                     +    STORE     m6, m7, %1
+                    +
                     +    ; go to next line
                     +    add     dstq, dststrideq
                     +    add     srcq, srcstrideq
                     +    dec       heightd                          ; next row
                     +    jg       .nextrow
                     +    REP_RET
+                    +
                     +cglobal %1_rv40_qpel_h, 6,6+npicregs,8, dst, dststride, src, srcstride, height, mx, picreg
                     +%ifdef PIC
                     +    lea  picregq, [sixtap_filter_hb_m]
                     +%endif
                     +    mova      m3, [filter_h6_shuf2]
                     +    mova      m4, [filter_h6_shuf3]
                     +    LOAD      mx, sixtap_filter_hb
                     +    mova      m5, [mxq] ; set up 6tap filter in bytes
                     +    mova      m6, [mxq+16]
                     +    mova      m7, [filter_h6_shuf1]
+                    +
                     +.nextrow:
                     +    movu      m0, [srcq-2]
                     +    mova      m1, m0
                     +    mova      m2, m0
                     +    pshufb    m0, m7
                     +    pshufb    m1, m3
                     +    pshufb    m2, m4
                     +    pmaddubsw m0, m5
                     +    pmaddubsw m1, m6
                     +    pmaddubsw m2, m5
                     +    paddw     m0, m1
                     +    paddw     m0, m2
                     +    pmulhrsw  m0, [pw_512]
                     +    STORE     m0, m1, %1
+                    +
                     +    ; go to next line
                     +    add     dstq, dststrideq
                     +    add     srcq, srcstrideq
                     +    dec  heightd            ; next row
                     +    jg .nextrow
                     +    REP_RET
                     +%endmacro
+                    +
                     +INIT_XMM ssse3
                     +FILTER_SSSE3  put
                     +FILTER_SSSE3  avg
+                    +
                      ; %1=5bits weights?, %2=dst %3=src1 %4=src3 %5=stride if sse2
                      %macro RV40_WCORE  4-5
                          movh       m4, [%3 + r6 + 0]
@@ -143,7 +453,7 @@ SECTION .text
                      %macro RV40_WEIGHT  3
                      cglobal rv40_weight_func_%1_%2, 6, 7, 8
                      %if cpuflag(ssse3)
                     -    mova       m1, [shift_round]
                     +    mova       m1, [pw_1024]
                      %else
                          mova       m1, [pw_16]
                      %endif

@@ -22,8 +22,11 @@
                      /**
                       * @file
                       * RV40 decoder motion compensation functions x86-optimised
                     + * 2,0 and 0,2 have h264 equivalents.
                     + * 3,3 is bugged in the rv40 format and maps to _xy2 version
                       */
                     +#include "libavcodec/x86/dsputil_mmx.h"
                      #include "libavcodec/rv34dsp.h"
                      void ff_put_rv40_chroma_mc8_mmx  (uint8_t *dst, uint8_t *src,
@@ -53,6 +56,132 @@ DECLARE_WEIGHT(mmx)
                      DECLARE_WEIGHT(sse2)
                      DECLARE_WEIGHT(ssse3)
                     +/** @{ */
                     +/**
                     + * Define one qpel function.
                     + * LOOPSIZE must be already set to the number of pixels processed per
                     + * iteration in the inner loop of the called functions.
                     + * COFF(x) must be already defined so as to provide the offset into any
                     + * array of coeffs used by the called function for the qpel position x.
                     + */
                     +#define QPEL_FUNC_DECL(OP, SIZE, PH, PV, OPT)                           \
                     +static void OP ## rv40_qpel ##SIZE ##_mc ##PH ##PV ##OPT(uint8_t *dst,  \
                     +                                                         uint8_t *src,  \
                     +                                                         int stride)    \
                     +{                                                                       \
                     +    int i;                                                              \
                     +    if (PH && PV) {                                                     \
                     +        DECLARE_ALIGNED(16, uint8_t, tmp)[SIZE * (SIZE + 5)];           \
                     +        uint8_t *tmpptr = tmp + SIZE * 2;                               \
                     +        src -= stride * 2;                                              \
                     +                                                                        \
                     +        for (i = 0; i < SIZE; i += LOOPSIZE)                            \
                     +            ff_put_rv40_qpel_h ##OPT(tmp + i, SIZE, src + i, stride,    \
                     +                                     SIZE + 5, HCOFF(PH));              \
                     +        for (i = 0; i < SIZE; i += LOOPSIZE)                            \
                     +            ff_ ##OP ##rv40_qpel_v ##OPT(dst + i, stride, tmpptr + i,   \
                     +                                         SIZE, SIZE, VCOFF(PV));        \
                     +    } else if (PV) {                                                    \
                     +        for (i = 0; i < SIZE; i += LOOPSIZE)                            \
                     +            ff_ ##OP ##rv40_qpel_v ## OPT(dst + i, stride, src + i,     \
                     +                                          stride, SIZE, VCOFF(PV));     \
                     +    } else {                                                            \
                     +        for (i = 0; i < SIZE; i += LOOPSIZE)                            \
                     +            ff_ ##OP ##rv40_qpel_h ## OPT(dst + i, stride, src + i,     \
                     +                                          stride, SIZE, HCOFF(PH));     \
                     +    }                                                                   \
                     +};
+                    +
                     +/** Declare functions for sizes 8 and 16 and given operations
                     + *  and qpel position. */
                     +#define QPEL_FUNCS_DECL(OP, PH, PV, OPT) \
                     +    QPEL_FUNC_DECL(OP,  8, PH, PV, OPT)  \
                     +    QPEL_FUNC_DECL(OP, 16, PH, PV, OPT)
+                    +
                     +/** Declare all functions for all sizes and qpel positions */
                     +#define QPEL_MC_DECL(OP, OPT)                                           \
                     +void ff_ ##OP ##rv40_qpel_h ##OPT(uint8_t *dst, ptrdiff_t dstStride,    \
                     +                                  const uint8_t *src,                   \
                     +                                  ptrdiff_t srcStride,                  \
                     +                                  int len, int m);                      \
                     +void ff_ ##OP ##rv40_qpel_v ##OPT(uint8_t *dst, ptrdiff_t dstStride,    \
                     +                                  const uint8_t *src,                   \
                     +                                  ptrdiff_t srcStride,                  \
                     +                                  int len, int m);                      \
                     +QPEL_FUNCS_DECL(OP, 0, 1, OPT)                                          \
                     +QPEL_FUNCS_DECL(OP, 0, 3, OPT)                                          \
                     +QPEL_FUNCS_DECL(OP, 1, 0, OPT)                                          \
                     +QPEL_FUNCS_DECL(OP, 1, 1, OPT)                                          \
                     +QPEL_FUNCS_DECL(OP, 1, 2, OPT)                                          \
                     +QPEL_FUNCS_DECL(OP, 1, 3, OPT)                                          \
                     +QPEL_FUNCS_DECL(OP, 2, 1, OPT)                                          \
                     +QPEL_FUNCS_DECL(OP, 2, 2, OPT)                                          \
                     +QPEL_FUNCS_DECL(OP, 2, 3, OPT)                                          \
                     +QPEL_FUNCS_DECL(OP, 3, 0, OPT)                                          \
                     +QPEL_FUNCS_DECL(OP, 3, 1, OPT)                                          \
                     +QPEL_FUNCS_DECL(OP, 3, 2, OPT)
                     +/** @} */
+                    +
                     +#define LOOPSIZE  8
                     +#define HCOFF(x)  (32 * (x - 1))
                     +#define VCOFF(x)  (32 * (x - 1))
                     +QPEL_MC_DECL(put_, _ssse3)
                     +QPEL_MC_DECL(avg_, _ssse3)
+                    +
                     +#undef LOOPSIZE
                     +#undef HCOFF
                     +#undef VCOFF
                     +#define LOOPSIZE  8
                     +#define HCOFF(x)  (64 * (x - 1))
                     +#define VCOFF(x)  (64 * (x - 1))
                     +QPEL_MC_DECL(put_, _sse2)
                     +QPEL_MC_DECL(avg_, _sse2)
+                    +
                     +#if ARCH_X86_32
                     +#undef LOOPSIZE
                     +#undef HCOFF
                     +#undef VCOFF
                     +#define LOOPSIZE  4
                     +#define HCOFF(x)  (64 * (x - 1))
                     +#define VCOFF(x)  (64 * (x - 1))
+                    +
                     +QPEL_MC_DECL(put_, _mmx)
+                    +
                     +#define ff_put_rv40_qpel_h_mmx2  ff_put_rv40_qpel_h_mmx
                     +#define ff_put_rv40_qpel_v_mmx2  ff_put_rv40_qpel_v_mmx
                     +QPEL_MC_DECL(avg_, _mmx2)
+                    +
                     +#define ff_put_rv40_qpel_h_3dnow  ff_put_rv40_qpel_h_mmx
                     +#define ff_put_rv40_qpel_v_3dnow  ff_put_rv40_qpel_v_mmx
                     +QPEL_MC_DECL(avg_, _3dnow)
                     +#endif
+                    +
                     +/** @{ */
                     +/** Set one function */
                     +#define QPEL_FUNC_SET(OP, SIZE, PH, PV, OPT)                            \
                     +    c-> OP ## pixels_tab[2 - SIZE / 8][4 * PV + PH] = OP ## rv40_qpel ##SIZE ## _mc ##PH ##PV ##OPT;
+                    +
                     +/** Set functions put and avg for sizes 8 and 16 and a given qpel position */
                     +#define QPEL_FUNCS_SET(OP, PH, PV, OPT)         \
                     +    QPEL_FUNC_SET(OP,  8, PH, PV, OPT)          \
                     +    QPEL_FUNC_SET(OP, 16, PH, PV, OPT)
+                    +
                     +/** Set all functions for all sizes and qpel positions */
                     +#define QPEL_MC_SET(OP, OPT)   \
                     +QPEL_FUNCS_SET (OP, 0, 1, OPT) \
                     +QPEL_FUNCS_SET (OP, 0, 3, OPT) \
                     +QPEL_FUNCS_SET (OP, 1, 0, OPT) \
                     +QPEL_FUNCS_SET (OP, 1, 1, OPT) \
                     +QPEL_FUNCS_SET (OP, 1, 2, OPT) \
                     +QPEL_FUNCS_SET (OP, 1, 3, OPT) \
                     +QPEL_FUNCS_SET (OP, 2, 1, OPT) \
                     +QPEL_FUNCS_SET (OP, 2, 2, OPT) \
                     +QPEL_FUNCS_SET (OP, 2, 3, OPT) \
                     +QPEL_FUNCS_SET (OP, 3, 0, OPT) \
                     +QPEL_FUNCS_SET (OP, 3, 1, OPT) \
                     +QPEL_FUNCS_SET (OP, 3, 2, OPT)
                     +/** @} */
+                    +
                      void ff_rv40dsp_init_x86(RV34DSPContext *c, DSPContext *dsp)
+                     {
                      #if HAVE_YASM
@@ -65,25 +194,42 @@ void ff_rv40dsp_init_x86(RV34DSPContext *c, DSPContext *dsp)
                              c->rv40_weight_pixels_tab[0][1] = ff_rv40_weight_func_rnd_8_mmx;
                              c->rv40_weight_pixels_tab[1][0] = ff_rv40_weight_func_nornd_16_mmx;
                              c->rv40_weight_pixels_tab[1][1] = ff_rv40_weight_func_nornd_8_mmx;
                     +        c->put_pixels_tab[0][15] = ff_put_rv40_qpel16_mc33_mmx;
                     +        c->put_pixels_tab[1][15] = ff_put_rv40_qpel8_mc33_mmx;
                     +        c->avg_pixels_tab[0][15] = ff_avg_rv40_qpel16_mc33_mmx;
                     +        c->avg_pixels_tab[1][15] = ff_avg_rv40_qpel8_mc33_mmx;
                     +#if ARCH_X86_32
                     +        QPEL_MC_SET(put_, _mmx)
                     +#endif
+                         }
                          if (mm_flags & AV_CPU_FLAG_MMX2) {
                              c->avg_chroma_pixels_tab[0] = ff_avg_rv40_chroma_mc8_mmx2;
                              c->avg_chroma_pixels_tab[1] = ff_avg_rv40_chroma_mc4_mmx2;
                     +#if ARCH_X86_32
                     +        QPEL_MC_SET(avg_, _mmx2)
                     +#endif
                          } else if (mm_flags & AV_CPU_FLAG_3DNOW) {
                              c->avg_chroma_pixels_tab[0] = ff_avg_rv40_chroma_mc8_3dnow;
                              c->avg_chroma_pixels_tab[1] = ff_avg_rv40_chroma_mc4_3dnow;
                     +#if ARCH_X86_32
                     +        QPEL_MC_SET(avg_, _3dnow)
                     +#endif
+                         }
                          if (mm_flags & AV_CPU_FLAG_SSE2) {
                              c->rv40_weight_pixels_tab[0][0] = ff_rv40_weight_func_rnd_16_sse2;
                              c->rv40_weight_pixels_tab[0][1] = ff_rv40_weight_func_rnd_8_sse2;
                              c->rv40_weight_pixels_tab[1][0] = ff_rv40_weight_func_nornd_16_sse2;
                              c->rv40_weight_pixels_tab[1][1] = ff_rv40_weight_func_nornd_8_sse2;
                     +        QPEL_MC_SET(put_, _sse2)
                     +        QPEL_MC_SET(avg_, _sse2)
+                         }
                          if (mm_flags & AV_CPU_FLAG_SSSE3) {
                              c->rv40_weight_pixels_tab[0][0] = ff_rv40_weight_func_rnd_16_ssse3;
                              c->rv40_weight_pixels_tab[0][1] = ff_rv40_weight_func_rnd_8_ssse3;
                              c->rv40_weight_pixels_tab[1][0] = ff_rv40_weight_func_nornd_16_ssse3;
                              c->rv40_weight_pixels_tab[1][1] = ff_rv40_weight_func_nornd_8_ssse3;
                     +        QPEL_MC_SET(put_, _ssse3)
                     +        QPEL_MC_SET(avg_, _ssse3)
+                         }
                      #endif
+                     }

@@ -21,6 +21,7 @@ HEADERS = asrc_abuffer.h                                                \
                                vsrc_buffer.h                                                 \
                      OBJS = allfilters.o                                                     \
                     +       audio.o                                                          \
                             avfilter.o                                                       \
                             avfiltergraph.o                                                  \
                             buffersink.o                                                     \

@@ -144,7 +144,7 @@ static void filter_samples(AVFilterLink *inlink, AVFilterBufferRef *insamplesref
                          AConvertContext *aconvert = inlink->dst->priv;
                          const int n = insamplesref->audio->nb_samples;
                          AVFilterLink *const outlink = inlink->dst->outputs[0];
                     -    AVFilterBufferRef *outsamplesref = avfilter_get_audio_buffer(outlink, AV_PERM_WRITE, n);
                     +    AVFilterBufferRef *outsamplesref = ff_get_audio_buffer(outlink, AV_PERM_WRITE, n);
                          swr_convert(aconvert->swr, outsamplesref->data, n,
                                              (void *)insamplesref->data, n);
@@ -153,7 +153,7 @@ static void filter_samples(AVFilterLink *inlink, AVFilterBufferRef *insamplesref
                          outsamplesref->audio->channel_layout = outlink->channel_layout;
                          outsamplesref->audio->planar         = outlink->planar;
                     -    avfilter_filter_samples(outlink, outsamplesref);
                     +    ff_filter_samples(outlink, outsamplesref);
                          avfilter_unref_buffer(insamplesref);
+                     }

@@ -208,7 +208,7 @@ static void filter_samples(AVFilterLink *inlink, AVFilterBufferRef *insamples)
                          nb_samples = FFMIN(am->queue[0].nb_samples,
                                             am->queue[1].nb_samples);
                     -    outbuf = avfilter_get_audio_buffer(ctx->outputs[0], AV_PERM_WRITE,
                     +    outbuf = ff_get_audio_buffer(ctx->outputs[0], AV_PERM_WRITE,
                                                             nb_samples);
                          outs = outbuf->data[0];
                          for (i = 0; i < 2; i++) {
@@ -264,7 +264,7 @@ static void filter_samples(AVFilterLink *inlink, AVFilterBufferRef *insamples)
                                          am->queue[i].nb_buf * sizeof(**inbuf));
+                             }
+                         }
                     -    avfilter_filter_samples(ctx->outputs[0], outbuf);
                     +    ff_filter_samples(ctx->outputs[0], outbuf);
+                     }
                      AVFilter avfilter_af_amerge = {

@@ -21,6 +21,7 @@
                       * null audio filter
                       */
                     +#include "audio.h"
                      #include "avfilter.h"
                      AVFilter avfilter_af_anull = {
@@ -31,8 +32,8 @@ AVFilter avfilter_af_anull = {
                          .inputs    = (const AVFilterPad[]) {{ .name       = "default",
                                                          .type             = AVMEDIA_TYPE_AUDIO,
                     -                                    .get_audio_buffer = avfilter_null_get_audio_buffer,
                     -                                    .filter_samples   = avfilter_null_filter_samples },
                     +                                    .get_audio_buffer = ff_null_get_audio_buffer,
                     +                                    .filter_samples   = ff_null_filter_samples },
                                                        { .name = NULL}},
                          .outputs   = (const AVFilterPad[]) {{ .name       = "default",

@@ -92,7 +92,7 @@ static void filter_samples(AVFilterLink *inlink, AVFilterBufferRef *insamplesref
                          const int n_in  = insamplesref->audio->nb_samples;
                          int n_out       = n_in * aresample->ratio;
                          AVFilterLink *const outlink = inlink->dst->outputs[0];
                     -    AVFilterBufferRef *outsamplesref = avfilter_get_audio_buffer(outlink, AV_PERM_WRITE, n_out);
                     +    AVFilterBufferRef *outsamplesref = ff_get_audio_buffer(outlink, AV_PERM_WRITE, n_out);
                          n_out = swr_convert(aresample->swr, outsamplesref->data, n_out,
                                                       (void *)insamplesref->data, n_in);
@@ -103,7 +103,7 @@ static void filter_samples(AVFilterLink *inlink, AVFilterBufferRef *insamplesref
                          outsamplesref->pts = insamplesref->pts == AV_NOPTS_VALUE ? AV_NOPTS_VALUE :
                              av_rescale(outlink->sample_rate, insamplesref->pts, inlink ->sample_rate);
                     -    avfilter_filter_samples(outlink, outsamplesref);
                     +    ff_filter_samples(outlink, outsamplesref);
                          avfilter_unref_buffer(insamplesref);
+                     }

@@ -83,7 +83,7 @@ static void filter_samples(AVFilterLink *inlink, AVFilterBufferRef *samplesref)
                          av_log(ctx, AV_LOG_INFO, "]\n");
                          showinfo->frame++;
                     -    avfilter_filter_samples(inlink->dst->outputs[0], samplesref);
                     +    ff_filter_samples(inlink->dst->outputs[0], samplesref);
+                     }
                      AVFilter avfilter_af_ashowinfo = {
@@ -95,7 +95,7 @@ AVFilter avfilter_af_ashowinfo = {
                          .inputs    = (const AVFilterPad[]) {{ .name       = "default",
                                                          .type             = AVMEDIA_TYPE_AUDIO,
                     -                                    .get_audio_buffer = avfilter_null_get_audio_buffer,
                     +                                    .get_audio_buffer = ff_null_get_audio_buffer,
                                                          .filter_samples   = filter_samples,
                                                          .min_perms        = AV_PERM_READ, },
                                                        { .name = NULL}},

@@ -119,7 +119,7 @@ static void send_out(AVFilterContext *ctx, int out_id)
                                  av_q2d(ctx->outputs[out_id]->time_base) * buf->pts;
                          as->var_values[VAR_T1 + out_id] += buf->audio->nb_samples /
                                                         (double)ctx->inputs[out_id]->sample_rate;
                     -    avfilter_filter_samples(ctx->outputs[out_id], buf);
                     +    ff_filter_samples(ctx->outputs[out_id], buf);
                          queue->nb--;
                          queue->tail = (queue->tail + 1) % QUEUE_SIZE;
                          if (as->req[out_id])

@@ -122,7 +122,7 @@ static void filter_samples(AVFilterLink *inlink, AVFilterBufferRef *insamples)
                          AVFilterLink *outlink = inlink->dst->outputs[0];
                          int16_t *taps, *endin, *in, *out;
                          AVFilterBufferRef *outsamples =
                     -        avfilter_get_audio_buffer(inlink, AV_PERM_WRITE,
                     +        ff_get_audio_buffer(inlink, AV_PERM_WRITE,
                                                        insamples->audio->nb_samples);
                          avfilter_copy_buffer_ref_props(outsamples, insamples);
@@ -141,7 +141,7 @@ static void filter_samples(AVFilterLink *inlink, AVFilterBufferRef *insamples)
                          // save part of input for next round
                          memcpy(taps, endin, NUMTAPS * sizeof(*taps));
                     -    avfilter_filter_samples(outlink, outsamples);
                     +    ff_filter_samples(outlink, outsamples);
                          avfilter_unref_buffer(insamples);
+                     }

@@ -340,7 +340,7 @@ static void filter_samples(AVFilterLink *inlink, AVFilterBufferRef *insamples)
+                     {
                          int n = insamples->audio->nb_samples;
                          AVFilterLink *const outlink = inlink->dst->outputs[0];
                     -    AVFilterBufferRef *outsamples = avfilter_get_audio_buffer(outlink, AV_PERM_WRITE, n);
                     +    AVFilterBufferRef *outsamples = ff_get_audio_buffer(outlink, AV_PERM_WRITE, n);
                          PanContext *pan = inlink->dst->priv;
                          swr_convert(pan->swr, outsamples->data, n, (void *)insamples->data, n);
@@ -348,7 +348,7 @@ static void filter_samples(AVFilterLink *inlink, AVFilterBufferRef *insamples)
                          outsamples->audio->channel_layout = outlink->channel_layout;
                          outsamples->audio->planar         = outlink->planar;
                     -    avfilter_filter_samples(outlink, outsamples);
                     +    ff_filter_samples(outlink, outsamples);
                          avfilter_unref_buffer(insamples);
+                     }

@@ -123,7 +123,7 @@ static void filter_samples(AVFilterLink *inlink, AVFilterBufferRef *insamples)
+                             }
+                         }
                     -    avfilter_filter_samples(inlink->dst->outputs[0], insamples);
                     +    ff_filter_samples(inlink->dst->outputs[0], insamples);
+                     }
                      static int query_formats(AVFilterContext *ctx)
@@ -163,7 +163,7 @@ AVFilter avfilter_af_silencedetect = {
                          .inputs = (const AVFilterPad[]) {
                              { .name             = "default",
                                .type             = AVMEDIA_TYPE_AUDIO,
                     -          .get_audio_buffer = avfilter_null_get_audio_buffer,
                     +          .get_audio_buffer = ff_null_get_audio_buffer,
                                .filter_samples   = filter_samples, },
                              { .name = NULL }
                          },

@@ -205,7 +205,7 @@ static int request_frame(AVFilterLink *outlink)
                          if (eval->duration >= 0 && t > eval->duration)
                              return AVERROR_EOF;
                     -    samplesref = avfilter_get_audio_buffer(outlink, AV_PERM_WRITE, eval->nb_samples);
                     +    samplesref = ff_get_audio_buffer(outlink, AV_PERM_WRITE, eval->nb_samples);
                          /* evaluate expression for each single sample and for each channel */
                          for (i = 0; i < eval->nb_samples; i++, eval->n++) {
@@ -223,7 +223,7 @@ static int request_frame(AVFilterLink *outlink)
                          samplesref->audio->sample_rate = eval->sample_rate;
                          eval->pts += eval->nb_samples;
                     -    avfilter_filter_samples(outlink, samplesref);
                     +    ff_filter_samples(outlink, samplesref);
                          return 0;
+                     }

@@ -108,13 +108,13 @@ static int request_frame(AVFilterLink *outlink)
                          AVFilterBufferRef *samplesref;
                          samplesref =
                     -        avfilter_get_audio_buffer(outlink, AV_PERM_WRITE, null->nb_samples);
                     +        ff_get_audio_buffer(outlink, AV_PERM_WRITE, null->nb_samples);
                          samplesref->pts = null->pts;
                          samplesref->pos = -1;
                          samplesref->audio->channel_layout = null->channel_layout;
                          samplesref->audio->sample_rate = outlink->sample_rate;
                     -    avfilter_filter_samples(outlink, avfilter_ref_buffer(samplesref, ~0));
                     +    ff_filter_samples(outlink, avfilter_ref_buffer(samplesref, ~0));
                          avfilter_unref_buffer(samplesref);
                          null->pts += null->nb_samples;

                     new file mode 100644
@@ -0,0 +1,291 @@
                     +/*
                     + * Copyright (c) Stefano Sabatini | stefasab at gmail.com
                     + * Copyright (c) S.N. Hemanth Meenakshisundaram | smeenaks at ucsd.edu
                     + *
                     + * This file is part of FFmpeg.
                     + *
                     + * FFmpeg is free software; you can redistribute it and/or
                     + * modify it under the terms of the GNU Lesser General Public
                     + * License as published by the Free Software Foundation; either
                     + * version 2.1 of the License, or (at your option) any later version.
                     + *
                     + * FFmpeg is distributed in the hope that it will be useful,
                     + * but WITHOUT ANY WARRANTY; without even the implied warranty of
                     + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
                     + * Lesser General Public License for more details.
                     + *
                     + * You should have received a copy of the GNU Lesser General Public
                     + * License along with FFmpeg; if not, write to the Free Software
                     + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
                     + */
+                    +
                     +#include "libavutil/avassert.h"
                     +#include "libavutil/audioconvert.h"
+                    +
                     +#include "audio.h"
                     +#include "avfilter.h"
                     +#include "internal.h"
+                    +
                     +AVFilterBufferRef *ff_null_get_audio_buffer(AVFilterLink *link, int perms,
                     +                                            int nb_samples)
                     +{
                     +    return ff_get_audio_buffer(link->dst->outputs[0], perms, nb_samples);
                     +}
+                    +
                     +AVFilterBufferRef *ff_default_get_audio_buffer(AVFilterLink *link, int perms,
                     +                                                     int nb_samples)
                     +{
                     +    AVFilterBufferRef *samplesref = NULL;
                     +    int linesize[8] = {0};
                     +    uint8_t *data[8] = {0};
                     +    int ch, nb_channels = av_get_channel_layout_nb_channels(link->channel_layout);
+                    +
                     +    /* right now we don't support more than 8 channels */
                     +    av_assert0(nb_channels <= 8);
+                    +
                     +    /* Calculate total buffer size, round to multiple of 16 to be SIMD friendly */
                     +    if (av_samples_alloc(data, linesize,
                     +                         nb_channels, nb_samples,
                     +                         av_get_alt_sample_fmt(link->format, link->planar),
                     +                         16) < 0)
                     +        return NULL;
+                    +
                     +    for (ch = 1; link->planar && ch < nb_channels; ch++)
                     +        linesize[ch] = linesize[0];
                     +    samplesref =
                     +        avfilter_get_audio_buffer_ref_from_arrays(data, linesize, perms,
                     +                                                  nb_samples, link->format,
                     +                                                  link->channel_layout, link->planar);
                     +    if (!samplesref) {
                     +        av_free(data[0]);
                     +        return NULL;
                     +    }
+                    +
                     +    return samplesref;
                     +}
+                    +
                     +static AVFilterBufferRef *ff_default_get_audio_buffer_alt(AVFilterLink *link, int perms,
                     +                                               int nb_samples)
                     +{
                     +    AVFilterBufferRef *samplesref = NULL;
                     +    uint8_t **data;
                     +    int planar      = av_sample_fmt_is_planar(link->format);
                     +    int nb_channels = av_get_channel_layout_nb_channels(link->channel_layout);
                     +    int planes      = planar ? nb_channels : 1;
                     +    int linesize;
+                    +
                     +    if (!(data = av_mallocz(sizeof(*data) * planes)))
                     +        goto fail;
+                    +
                     +    if (av_samples_alloc(data, &linesize, nb_channels, nb_samples, link->format, 0) < 0)
                     +        goto fail;
+                    +
                     +    samplesref = avfilter_get_audio_buffer_ref_from_arrays_alt(data, linesize, perms,
                     +                                                           nb_samples, link->format,
                     +                                                           link->channel_layout);
                     +    if (!samplesref)
                     +        goto fail;
+                    +
                     +    av_freep(&data);
+                    +
                     +fail:
                     +    if (data)
                     +        av_freep(&data[0]);
                     +    av_freep(&data);
                     +    return samplesref;
                     +}
+                    +
                     +AVFilterBufferRef *ff_get_audio_buffer(AVFilterLink *link, int perms,
                     +                                       int nb_samples)
                     +{
                     +    AVFilterBufferRef *ret = NULL;
+                    +
                     +    if (link->dstpad->get_audio_buffer)
                     +        ret = link->dstpad->get_audio_buffer(link, perms, nb_samples);
+                    +
                     +    if (!ret)
                     +        ret = ff_default_get_audio_buffer(link, perms, nb_samples);
+                    +
                     +    if (ret)
                     +        ret->type = AVMEDIA_TYPE_AUDIO;
+                    +
                     +    return ret;
                     +}
+                    +
                     +AVFilterBufferRef *
                     +avfilter_get_audio_buffer_ref_from_arrays(uint8_t *data[8], int linesize[8], int perms,
                     +                                          int nb_samples, enum AVSampleFormat sample_fmt,
                     +                                          uint64_t channel_layout, int planar)
                     +{
                     +    AVFilterBuffer *samples = av_mallocz(sizeof(AVFilterBuffer));
                     +    AVFilterBufferRef *samplesref = av_mallocz(sizeof(AVFilterBufferRef));
+                    +
                     +    if (!samples || !samplesref)
                     +        goto fail;
+                    +
                     +    samplesref->buf = samples;
                     +    samplesref->buf->free = ff_avfilter_default_free_buffer;
                     +    if (!(samplesref->audio = av_mallocz(sizeof(AVFilterBufferRefAudioProps))))
                     +        goto fail;
+                    +
                     +    samplesref->audio->nb_samples     = nb_samples;
                     +    samplesref->audio->channel_layout = channel_layout;
                     +    samplesref->audio->planar         = planar;
+                    +
                     +    /* make sure the buffer gets read permission or it's useless for output */
                     +    samplesref->perms = perms | AV_PERM_READ;
+                    +
                     +    samples->refcount = 1;
                     +    samplesref->type = AVMEDIA_TYPE_AUDIO;
                     +    samplesref->format = sample_fmt;
+                    +
                     +    memcpy(samples->data,        data,     sizeof(samples->data));
                     +    memcpy(samples->linesize,    linesize, sizeof(samples->linesize));
                     +    memcpy(samplesref->data,     data,     sizeof(samplesref->data));
                     +    memcpy(samplesref->linesize, linesize, sizeof(samplesref->linesize));
+                    +
                     +    return samplesref;
+                    +
                     +fail:
                     +    if (samplesref && samplesref->audio)
                     +        av_freep(&samplesref->audio);
                     +    av_freep(&samplesref);
                     +    av_freep(&samples);
                     +    return NULL;
                     +}
+                    +
                     +AVFilterBufferRef* avfilter_get_audio_buffer_ref_from_arrays_alt(uint8_t **data,
                     +                                                             int linesize,int perms,
                     +                                                             int nb_samples,
                     +                                                             enum AVSampleFormat sample_fmt,
                     +                                                             uint64_t channel_layout)
                     +{
                     +    int planes;
                     +    AVFilterBuffer    *samples    = av_mallocz(sizeof(*samples));
                     +    AVFilterBufferRef *samplesref = av_mallocz(sizeof(*samplesref));
+                    +
                     +    if (!samples || !samplesref)
                     +        goto fail;
+                    +
                     +    samplesref->buf         = samples;
                     +    samplesref->buf->free   = ff_avfilter_default_free_buffer;
                     +    if (!(samplesref->audio = av_mallocz(sizeof(*samplesref->audio))))
                     +        goto fail;
+                    +
                     +    samplesref->audio->nb_samples     = nb_samples;
                     +    samplesref->audio->channel_layout = channel_layout;
                     +    samplesref->audio->planar         = av_sample_fmt_is_planar(sample_fmt);
+                    +
                     +    planes = samplesref->audio->planar ? av_get_channel_layout_nb_channels(channel_layout) : 1;
+                    +
                     +    /* make sure the buffer gets read permission or it's useless for output */
                     +    samplesref->perms = perms | AV_PERM_READ;
+                    +
                     +    samples->refcount  = 1;
                     +    samplesref->type   = AVMEDIA_TYPE_AUDIO;
                     +    samplesref->format = sample_fmt;
+                    +
                     +    memcpy(samples->data, data,
                     +           FFMIN(FF_ARRAY_ELEMS(samples->data), planes)*sizeof(samples->data[0]));
                     +    memcpy(samplesref->data, samples->data, sizeof(samples->data));
+                    +
                     +    samples->linesize[0] = samplesref->linesize[0] = linesize;
+                    +
                     +    if (planes > FF_ARRAY_ELEMS(samples->data)) {
                     +        samples->   extended_data = av_mallocz(sizeof(*samples->extended_data) *
                     +                                               planes);
                     +        samplesref->extended_data = av_mallocz(sizeof(*samplesref->extended_data) *
                     +                                               planes);
+                    +
                     +        if (!samples->extended_data || !samplesref->extended_data)
                     +            goto fail;
+                    +
                     +        memcpy(samples->   extended_data, data, sizeof(*data)*planes);
                     +        memcpy(samplesref->extended_data, data, sizeof(*data)*planes);
                     +    } else {
                     +        samples->extended_data    = samples->data;
                     +        samplesref->extended_data = samplesref->data;
                     +    }
+                    +
                     +    return samplesref;
+                    +
                     +fail:
                     +    if (samples && samples->extended_data != samples->data)
                     +        av_freep(&samples->extended_data);
                     +    if (samplesref) {
                     +        av_freep(&samplesref->audio);
                     +        if (samplesref->extended_data != samplesref->data)
                     +            av_freep(&samplesref->extended_data);
                     +    }
                     +    av_freep(&samplesref);
                     +    av_freep(&samples);
                     +    return NULL;
                     +}
+                    +
                     +void ff_null_filter_samples(AVFilterLink *link, AVFilterBufferRef *samplesref)
                     +{
                     +    ff_filter_samples(link->dst->outputs[0], samplesref);
                     +}
+                    +
                     +/* FIXME: samplesref is same as link->cur_buf. Need to consider removing the redundant parameter. */
                     +void ff_default_filter_samples(AVFilterLink *inlink, AVFilterBufferRef *samplesref)
                     +{
                     +    AVFilterLink *outlink = NULL;
+                    +
                     +    if (inlink->dst->output_count)
                     +        outlink = inlink->dst->outputs[0];
+                    +
                     +    if (outlink) {
                     +        outlink->out_buf = ff_default_get_audio_buffer(inlink, AV_PERM_WRITE,
                     +                                                       samplesref->audio->nb_samples);
                     +        outlink->out_buf->pts                = samplesref->pts;
                     +        outlink->out_buf->audio->sample_rate = samplesref->audio->sample_rate;
                     +        ff_filter_samples(outlink, avfilter_ref_buffer(outlink->out_buf, ~0));
                     +        avfilter_unref_buffer(outlink->out_buf);
                     +        outlink->out_buf = NULL;
                     +    }
                     +    avfilter_unref_buffer(samplesref);
                     +    inlink->cur_buf = NULL;
                     +}
+                    +
                     +void ff_filter_samples(AVFilterLink *link, AVFilterBufferRef *samplesref)
                     +{
                     +    void (*filter_samples)(AVFilterLink *, AVFilterBufferRef *);
                     +    AVFilterPad *dst = link->dstpad;
                     +    int64_t pts;
+                    +
                     +    FF_DPRINTF_START(NULL, filter_samples); ff_dlog_link(NULL, link, 1);
+                    +
                     +    if (!(filter_samples = dst->filter_samples))
                     +        filter_samples = ff_default_filter_samples;
+                    +
                     +    /* prepare to copy the samples if the buffer has insufficient permissions */
                     +    if ((dst->min_perms & samplesref->perms) != dst->min_perms ||
                     +        dst->rej_perms & samplesref->perms) {
                     +        int  i, planar = av_sample_fmt_is_planar(samplesref->format);
                     +        int planes = !planar ? 1:
                     +                     av_get_channel_layout_nb_channels(samplesref->audio->channel_layout);
+                    +
                     +        av_log(link->dst, AV_LOG_DEBUG,
                     +               "Copying audio data in avfilter (have perms %x, need %x, reject %x)\n",
                     +               samplesref->perms, link->dstpad->min_perms, link->dstpad->rej_perms);
+                    +
                     +        link->cur_buf = ff_default_get_audio_buffer(link, dst->min_perms,
                     +                                                    samplesref->audio->nb_samples);
                     +        link->cur_buf->pts                = samplesref->pts;
                     +        link->cur_buf->audio->sample_rate = samplesref->audio->sample_rate;
+                    +
                     +        /* Copy actual data into new samples buffer */
                     +        for (i = 0; samplesref->data[i] && i < 8; i++)
                     +            memcpy(link->cur_buf->data[i], samplesref->data[i], samplesref->linesize[0]);
                     +        for (i = 0; i < planes; i++)
                     +            memcpy(link->cur_buf->extended_data[i], samplesref->extended_data[i], samplesref->linesize[0]);
+                    +
                     +        avfilter_unref_buffer(samplesref);
                     +    } else
                     +        link->cur_buf = samplesref;
+                    +
                     +    pts = link->cur_buf->pts;
                     +    filter_samples(link, link->cur_buf);
                     +    ff_update_link_current_pts(link, pts);
                     +}

                     new file mode 100644
@@ -0,0 +1,65 @@
                     +/*
                     + * Copyright (c) Stefano Sabatini | stefasab at gmail.com
                     + * Copyright (c) S.N. Hemanth Meenakshisundaram | smeenaks at ucsd.edu
                     + *
                     + * This file is part of FFmpeg.
                     + *
                     + * FFmpeg is free software; you can redistribute it and/or
                     + * modify it under the terms of the GNU Lesser General Public
                     + * License as published by the Free Software Foundation; either
                     + * version 2.1 of the License, or (at your option) any later version.
                     + *
                     + * FFmpeg is distributed in the hope that it will be useful,
                     + * but WITHOUT ANY WARRANTY; without even the implied warranty of
                     + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
                     + * Lesser General Public License for more details.
                     + *
                     + * You should have received a copy of the GNU Lesser General Public
                     + * License along with FFmpeg; if not, write to the Free Software
                     + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
                     + */
+                    +
                     +#ifndef AVFILTER_AUDIO_H
                     +#define AVFILTER_AUDIO_H
+                    +
                     +#include "avfilter.h"
+                    +
+                    +
                     +/** default handler for get_audio_buffer() for audio inputs */
                     +AVFilterBufferRef *ff_default_get_audio_buffer(AVFilterLink *link, int perms,
                     +                                                     int nb_samples);
+                    +
                     +/** get_audio_buffer() handler for filters which simply pass audio along */
                     +AVFilterBufferRef *ff_null_get_audio_buffer(AVFilterLink *link, int perms,
                     +                                                  int nb_samples);
+                    +
                     +/**
                     + * Request an audio samples buffer with a specific set of permissions.
                     + *
                     + * @param link           the output link to the filter from which the buffer will
                     + *                       be requested
                     + * @param perms          the required access permissions
                     + * @param nb_samples     the number of samples per channel
                     + * @return               A reference to the samples. This must be unreferenced with
                     + *                       avfilter_unref_buffer when you are finished with it.
                     + */
                     +AVFilterBufferRef *ff_get_audio_buffer(AVFilterLink *link, int perms,
                     +                                             int nb_samples);
+                    +
                     +/** default handler for filter_samples() for audio inputs */
                     +void ff_default_filter_samples(AVFilterLink *link, AVFilterBufferRef *samplesref);
+                    +
                     +/** filter_samples() handler for filters which simply pass audio along */
                     +void ff_null_filter_samples(AVFilterLink *link, AVFilterBufferRef *samplesref);
+                    +
                     +/**
                     + * Send a buffer of audio samples to the next filter.
                     + *
                     + * @param link       the output link over which the audio samples are being sent
                     + * @param samplesref a reference to the buffer of audio samples being sent. The
                     + *                   receiving filter will free this reference when it no longer
                     + *                   needs it or pass it on to the next filter.
                     + */
                     +void ff_filter_samples(AVFilterLink *link, AVFilterBufferRef *samplesref);
+                    +
                     +#endif /* AVFILTER_AUDIO_H */

@@ -430,7 +430,8 @@ struct AVFilterPad {
+                          *
                           * Input audio pads only.
                           */
                     -    AVFilterBufferRef *(*get_audio_buffer)(AVFilterLink *link, int perms, int nb_samples);
                     +    AVFilterBufferRef *(*get_audio_buffer)(AVFilterLink *link, int perms,
                     +                                           int nb_samples);
                          /**
                           * Callback called after the slices of a frame are completely sent. If
@@ -508,16 +509,10 @@ void avfilter_default_draw_slice(AVFilterLink *link, int y, int h, int slice_dir
                      /** default handler for end_frame() for video inputs */
                      void avfilter_default_end_frame(AVFilterLink *link);
                     -/** default handler for filter_samples() for audio inputs */
                     -void avfilter_default_filter_samples(AVFilterLink *link, AVFilterBufferRef *samplesref);
+                    -
                      /** default handler for get_video_buffer() for video inputs */
                      AVFilterBufferRef *avfilter_default_get_video_buffer(AVFilterLink *link,
                                                                           int perms, int w, int h);
                     -/** default handler for get_audio_buffer() for audio inputs */
                     -AVFilterBufferRef *avfilter_default_get_audio_buffer(AVFilterLink *link,
                     -                                                     int perms, int nb_samples);
                      /**
                       * Helpers for query_formats() which set all links to the same list of
@@ -541,17 +536,10 @@ void avfilter_null_draw_slice(AVFilterLink *link, int y, int h, int slice_dir);
                      /** end_frame() handler for filters which simply pass video along */
                      void avfilter_null_end_frame(AVFilterLink *link);
                     -/** filter_samples() handler for filters which simply pass audio along */
                     -void avfilter_null_filter_samples(AVFilterLink *link, AVFilterBufferRef *samplesref);
+                    -
                      /** get_video_buffer() handler for filters which simply pass video along */
                      AVFilterBufferRef *avfilter_null_get_video_buffer(AVFilterLink *link,
                                                                        int perms, int w, int h);
                     -/** get_audio_buffer() handler for filters which simply pass audio along */
                     -AVFilterBufferRef *avfilter_null_get_audio_buffer(AVFilterLink *link,
                     -                                                  int perms, int nb_samples);
+                    -
                      /**
                       * Filter definition. This defines the pads a filter contains, and all the
                       * callback functions used to interact with the filter.
@@ -665,7 +653,7 @@ struct AVFilterLink {
                          AVRational sample_aspect_ratio; ///< agreed upon sample aspect ratio
                          /* These parameters apply only to audio */
                          uint64_t channel_layout;    ///< channel layout of current buffer (see libavutil/audioconvert.h)
                     -#if LIBAVFILTER_VERSION_MAJOR < 3
                     +#if FF_API_SAMPLERATE64
                          int64_t sample_rate;        ///< samples per second
                      #else
                          int sample_rate;            ///< samples per second
@@ -791,19 +779,6 @@ avfilter_get_video_buffer_ref_from_arrays(uint8_t * const data[4], const int lin
                                                                int w, int h, enum PixelFormat format);
                      /**
                     - * Request an audio samples buffer with a specific set of permissions.
                     - *
                     - * @param link           the output link to the filter from which the buffer will
                     - *                       be requested
                     - * @param perms          the required access permissions
                     - * @param nb_samples     the number of samples per channel
                     - * @return               A reference to the samples. This must be unreferenced with
                     - *                       avfilter_unref_buffer when you are finished with it.
                     - */
                     -AVFilterBufferRef *avfilter_get_audio_buffer(AVFilterLink *link, int perms,
                     -                                             int nb_samples);
+                    -
                     -/**
                       * Create an audio buffer reference wrapped around an already
                       * allocated samples buffer.
+                      *
@@ -904,17 +879,7 @@ void avfilter_draw_slice(AVFilterLink *link, int y, int h, int slice_dir);
                       */
                      int avfilter_process_command(AVFilterContext *filter, const char *cmd, const char *arg, char *res, int res_len, int flags);
                     -/**
                     - * Send a buffer of audio samples to the next filter.
                     - *
                     - * @param link       the output link over which the audio samples are being sent
                     - * @param samplesref a reference to the buffer of audio samples being sent. The
                     - *                   receiving filter will free this reference when it no longer
                     - *                   needs it or pass it on to the next filter.
                     - */
                     -void avfilter_filter_samples(AVFilterLink *link, AVFilterBufferRef *samplesref);
+                    -
                     -/** Initialize the filter system. Register all built-in filters. */
                     +/** Initialize the filter system. Register all builtin filters. */
                      void avfilter_register_all(void);
                      /** Uninitialize the filter system. Unregister all filters. */
@@ -1024,4 +989,6 @@ static inline void avfilter_insert_outpad(AVFilterContext *f, unsigned index,
                                              &f->output_pads, &f->outputs, p);
+                     }
                     +#include "audio.h"
+                    +
                      #endif /* AVFILTER_AVFILTER_H */

@@ -149,4 +149,10 @@ static inline void ff_null_start_frame_keep_ref(AVFilterLink *inlink,
                          avfilter_start_frame(inlink->dst->outputs[0], avfilter_ref_buffer(picref, ~0));
+                     }
                     +void ff_update_link_current_pts(AVFilterLink *link, int64_t pts);
+                    +
                     +#define FF_DPRINTF_START(ctx, func) av_dlog(NULL, "%-16s: ", #func)
+                    +
                     +void ff_dlog_link(void *ctx, AVFilterLink *link, int end);
+                    +
                      #endif /* AVFILTER_INTERNAL_H */