Signed-off-by: Diego Biurrun <diego@biurrun.de>
Signed-off-by: Ronald S. Bultje <rsbultje@gmail.com>
| ... | ... |
@@ -92,7 +92,7 @@ void ff_h264_idct8_add4_neon(uint8_t *dst, const int *block_offset, |
| 92 | 92 |
DCTELEM *block, int stride, |
| 93 | 93 |
const uint8_t nnzc[6*8]); |
| 94 | 94 |
|
| 95 |
-static void ff_h264dsp_init_neon(H264DSPContext *c, const int bit_depth) |
|
| 95 |
+static void ff_h264dsp_init_neon(H264DSPContext *c, const int bit_depth, const int chroma_format_idc) |
|
| 96 | 96 |
{
|
| 97 | 97 |
if (bit_depth == 8) {
|
| 98 | 98 |
c->h264_v_loop_filter_luma = ff_h264_v_loop_filter_luma_neon; |
| ... | ... |
@@ -122,14 +122,15 @@ static void ff_h264dsp_init_neon(H264DSPContext *c, const int bit_depth) |
| 122 | 122 |
c->h264_idct_dc_add = ff_h264_idct_dc_add_neon; |
| 123 | 123 |
c->h264_idct_add16 = ff_h264_idct_add16_neon; |
| 124 | 124 |
c->h264_idct_add16intra = ff_h264_idct_add16intra_neon; |
| 125 |
- c->h264_idct_add8 = ff_h264_idct_add8_neon; |
|
| 125 |
+ if (chroma_format_idc == 1) |
|
| 126 |
+ c->h264_idct_add8 = ff_h264_idct_add8_neon; |
|
| 126 | 127 |
c->h264_idct8_add = ff_h264_idct8_add_neon; |
| 127 | 128 |
c->h264_idct8_dc_add = ff_h264_idct8_dc_add_neon; |
| 128 | 129 |
c->h264_idct8_add4 = ff_h264_idct8_add4_neon; |
| 129 | 130 |
} |
| 130 | 131 |
} |
| 131 | 132 |
|
| 132 |
-void ff_h264dsp_init_arm(H264DSPContext *c, const int bit_depth) |
|
| 133 |
+void ff_h264dsp_init_arm(H264DSPContext *c, const int bit_depth, const int chroma_format_idc) |
|
| 133 | 134 |
{
|
| 134 |
- if (HAVE_NEON) ff_h264dsp_init_neon(c, bit_depth); |
|
| 135 |
+ if (HAVE_NEON) ff_h264dsp_init_neon(c, bit_depth, chroma_format_idc); |
|
| 135 | 136 |
} |
| ... | ... |
@@ -42,7 +42,7 @@ void ff_pred8x8_0lt_dc_neon(uint8_t *src, int stride); |
| 42 | 42 |
void ff_pred8x8_l00_dc_neon(uint8_t *src, int stride); |
| 43 | 43 |
void ff_pred8x8_0l0_dc_neon(uint8_t *src, int stride); |
| 44 | 44 |
|
| 45 |
-static void ff_h264_pred_init_neon(H264PredContext *h, int codec_id, const int bit_depth) |
|
| 45 |
+static void ff_h264_pred_init_neon(H264PredContext *h, int codec_id, const int bit_depth, const int chroma_format_idc) |
|
| 46 | 46 |
{
|
| 47 | 47 |
const int high_depth = bit_depth > 8; |
| 48 | 48 |
|
| ... | ... |
@@ -74,7 +74,7 @@ static void ff_h264_pred_init_neon(H264PredContext *h, int codec_id, const int b |
| 74 | 74 |
h->pred16x16[PLANE_PRED8x8 ] = ff_pred16x16_plane_neon; |
| 75 | 75 |
} |
| 76 | 76 |
|
| 77 |
-void ff_h264_pred_init_arm(H264PredContext *h, int codec_id, int bit_depth) |
|
| 77 |
+void ff_h264_pred_init_arm(H264PredContext *h, int codec_id, int bit_depth, const int chroma_format_idc) |
|
| 78 | 78 |
{
|
| 79 |
- if (HAVE_NEON) ff_h264_pred_init_neon(h, codec_id, bit_depth); |
|
| 79 |
+ if (HAVE_NEON) ff_h264_pred_init_neon(h, codec_id, bit_depth, chroma_format_idc); |
|
| 80 | 80 |
} |
| ... | ... |
@@ -63,8 +63,10 @@ void ff_h264_idct_dc_add_ ## depth ## _c(uint8_t *dst, DCTELEM *block, int strid |
| 63 | 63 |
void ff_h264_idct_add16_ ## depth ## _c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);\ |
| 64 | 64 |
void ff_h264_idct_add16intra_ ## depth ## _c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);\ |
| 65 | 65 |
void ff_h264_idct8_add4_ ## depth ## _c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);\ |
| 66 |
+void ff_h264_idct_add8_422_ ## depth ## _c(uint8_t **dest, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);\ |
|
| 66 | 67 |
void ff_h264_idct_add8_ ## depth ## _c(uint8_t **dest, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);\ |
| 67 | 68 |
void ff_h264_luma_dc_dequant_idct_ ## depth ## _c(DCTELEM *output, DCTELEM *input, int qmul);\ |
| 69 |
+void ff_h264_chroma422_dc_dequant_idct_ ## depth ## _c(DCTELEM *block, int qmul);\ |
|
| 68 | 70 |
void ff_h264_chroma_dc_dequant_idct_ ## depth ## _c(DCTELEM *block, int qmul); |
| 69 | 71 |
|
| 70 | 72 |
H264_IDCT( 8) |
| ... | ... |
@@ -942,7 +942,7 @@ static void clone_tables(H264Context *dst, H264Context *src, int i){
|
| 942 | 942 |
dst->list_counts = src->list_counts; |
| 943 | 943 |
|
| 944 | 944 |
dst->s.obmc_scratchpad = NULL; |
| 945 |
- ff_h264_pred_init(&dst->hpc, src->s.codec_id, src->sps.bit_depth_luma); |
|
| 945 |
+ ff_h264_pred_init(&dst->hpc, src->s.codec_id, src->sps.bit_depth_luma, src->sps.chroma_format_idc); |
|
| 946 | 946 |
} |
| 947 | 947 |
|
| 948 | 948 |
/** |
| ... | ... |
@@ -970,8 +970,8 @@ static av_cold void common_init(H264Context *h){
|
| 970 | 970 |
s->height = s->avctx->height; |
| 971 | 971 |
s->codec_id= s->avctx->codec->id; |
| 972 | 972 |
|
| 973 |
- ff_h264dsp_init(&h->h264dsp, 8); |
|
| 974 |
- ff_h264_pred_init(&h->hpc, s->codec_id, 8); |
|
| 973 |
+ ff_h264dsp_init(&h->h264dsp, 8, 1); |
|
| 974 |
+ ff_h264_pred_init(&h->hpc, s->codec_id, 8, 1); |
|
| 975 | 975 |
|
| 976 | 976 |
h->dequant_coeff_pps= -1; |
| 977 | 977 |
s->unrestricted_mv=1; |
| ... | ... |
@@ -1432,11 +1432,16 @@ static void decode_postinit(H264Context *h, int setup_finished){
|
| 1432 | 1432 |
ff_thread_finish_setup(s->avctx); |
| 1433 | 1433 |
} |
| 1434 | 1434 |
|
| 1435 |
-static av_always_inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int chroma444, int simple){
|
|
| 1435 |
+static av_always_inline void backup_mb_border(H264Context *h, uint8_t *src_y, |
|
| 1436 |
+ uint8_t *src_cb, uint8_t *src_cr, |
|
| 1437 |
+ int linesize, int uvlinesize, int simple) |
|
| 1438 |
+{
|
|
| 1436 | 1439 |
MpegEncContext * const s = &h->s; |
| 1437 | 1440 |
uint8_t *top_border; |
| 1438 | 1441 |
int top_idx = 1; |
| 1439 | 1442 |
const int pixel_shift = h->pixel_shift; |
| 1443 |
+ int chroma444 = CHROMA444; |
|
| 1444 |
+ int chroma422 = CHROMA422; |
|
| 1440 | 1445 |
|
| 1441 | 1446 |
src_y -= linesize; |
| 1442 | 1447 |
src_cb -= uvlinesize; |
| ... | ... |
@@ -1460,6 +1465,14 @@ static av_always_inline void backup_mb_border(H264Context *h, uint8_t *src_y, ui |
| 1460 | 1460 |
AV_COPY128(top_border+16, src_cb + 15*uvlinesize); |
| 1461 | 1461 |
AV_COPY128(top_border+32, src_cr + 15*uvlinesize); |
| 1462 | 1462 |
} |
| 1463 |
+ } else if(chroma422) {
|
|
| 1464 |
+ if (pixel_shift) {
|
|
| 1465 |
+ AV_COPY128(top_border+32, src_cb + 15*uvlinesize); |
|
| 1466 |
+ AV_COPY128(top_border+48, src_cr + 15*uvlinesize); |
|
| 1467 |
+ } else {
|
|
| 1468 |
+ AV_COPY64(top_border+16, src_cb + 15*uvlinesize); |
|
| 1469 |
+ AV_COPY64(top_border+24, src_cr + 15*uvlinesize); |
|
| 1470 |
+ } |
|
| 1463 | 1471 |
} else {
|
| 1464 | 1472 |
if (pixel_shift) {
|
| 1465 | 1473 |
AV_COPY128(top_border+32, src_cb+7*uvlinesize); |
| ... | ... |
@@ -1495,6 +1508,14 @@ static av_always_inline void backup_mb_border(H264Context *h, uint8_t *src_y, ui |
| 1495 | 1495 |
AV_COPY128(top_border+16, src_cb + 16*linesize); |
| 1496 | 1496 |
AV_COPY128(top_border+32, src_cr + 16*linesize); |
| 1497 | 1497 |
} |
| 1498 |
+ } else if(chroma422) {
|
|
| 1499 |
+ if (pixel_shift) {
|
|
| 1500 |
+ AV_COPY128(top_border+32, src_cb+16*uvlinesize); |
|
| 1501 |
+ AV_COPY128(top_border+48, src_cr+16*uvlinesize); |
|
| 1502 |
+ } else {
|
|
| 1503 |
+ AV_COPY64(top_border+16, src_cb+16*uvlinesize); |
|
| 1504 |
+ AV_COPY64(top_border+24, src_cr+16*uvlinesize); |
|
| 1505 |
+ } |
|
| 1498 | 1506 |
} else {
|
| 1499 | 1507 |
if (pixel_shift) {
|
| 1500 | 1508 |
AV_COPY128(top_border+32, src_cb+8*uvlinesize); |
| ... | ... |
@@ -1773,10 +1794,11 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, i |
| 1773 | 1773 |
/* is_h264 should always be true if SVQ3 is disabled. */ |
| 1774 | 1774 |
const int is_h264 = !CONFIG_SVQ3_DECODER || simple || s->codec_id == CODEC_ID_H264; |
| 1775 | 1775 |
void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride); |
| 1776 |
+ const int block_h = 16 >> s->chroma_y_shift; |
|
| 1776 | 1777 |
|
| 1777 | 1778 |
dest_y = s->current_picture.f.data[0] + ((mb_x << pixel_shift) + mb_y * s->linesize ) * 16; |
| 1778 |
- dest_cb = s->current_picture.f.data[1] + ((mb_x << pixel_shift) + mb_y * s->uvlinesize) * 8; |
|
| 1779 |
- dest_cr = s->current_picture.f.data[2] + ((mb_x << pixel_shift) + mb_y * s->uvlinesize) * 8; |
|
| 1779 |
+ dest_cb = s->current_picture.f.data[1] + (mb_x << pixel_shift)*8 + mb_y * s->uvlinesize * block_h; |
|
| 1780 |
+ dest_cr = s->current_picture.f.data[2] + (mb_x << pixel_shift)*8 + mb_y * s->uvlinesize * block_h; |
|
| 1780 | 1781 |
|
| 1781 | 1782 |
s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + (64 << pixel_shift), s->linesize, 4); |
| 1782 | 1783 |
s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + (64 << pixel_shift), dest_cr - dest_cb, 2); |
| ... | ... |
@@ -1789,8 +1811,8 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, i |
| 1789 | 1789 |
block_offset = &h->block_offset[48]; |
| 1790 | 1790 |
if(mb_y&1){ //FIXME move out of this function?
|
| 1791 | 1791 |
dest_y -= s->linesize*15; |
| 1792 |
- dest_cb-= s->uvlinesize*7; |
|
| 1793 |
- dest_cr-= s->uvlinesize*7; |
|
| 1792 |
+ dest_cb-= s->uvlinesize * (block_h - 1); |
|
| 1793 |
+ dest_cr-= s->uvlinesize * (block_h - 1); |
|
| 1794 | 1794 |
} |
| 1795 | 1795 |
if(FRAME_MBAFF) {
|
| 1796 | 1796 |
int list; |
| ... | ... |
@@ -1842,12 +1864,12 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, i |
| 1842 | 1842 |
} |
| 1843 | 1843 |
} |
| 1844 | 1844 |
} else {
|
| 1845 |
- for (i = 0; i < 8; i++) {
|
|
| 1845 |
+ for (i = 0; i < block_h; i++) {
|
|
| 1846 | 1846 |
uint16_t *tmp_cb = (uint16_t*)(dest_cb + i*uvlinesize); |
| 1847 | 1847 |
for (j = 0; j < 8; j++) |
| 1848 | 1848 |
tmp_cb[j] = get_bits(&gb, bit_depth); |
| 1849 | 1849 |
} |
| 1850 |
- for (i = 0; i < 8; i++) {
|
|
| 1850 |
+ for (i = 0; i < block_h; i++) {
|
|
| 1851 | 1851 |
uint16_t *tmp_cr = (uint16_t*)(dest_cr + i*uvlinesize); |
| 1852 | 1852 |
for (j = 0; j < 8; j++) |
| 1853 | 1853 |
tmp_cr[j] = get_bits(&gb, bit_depth); |
| ... | ... |
@@ -1865,7 +1887,7 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, i |
| 1865 | 1865 |
memset(dest_cr + i*uvlinesize, 128, 8); |
| 1866 | 1866 |
} |
| 1867 | 1867 |
} else {
|
| 1868 |
- for (i = 0; i < 8; i++) {
|
|
| 1868 |
+ for (i = 0; i < block_h; i++) {
|
|
| 1869 | 1869 |
memcpy(dest_cb + i*uvlinesize, h->mb + 128 + i*4, 8); |
| 1870 | 1870 |
memcpy(dest_cr + i*uvlinesize, h->mb + 160 + i*4, 8); |
| 1871 | 1871 |
} |
| ... | ... |
@@ -1913,10 +1935,18 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, i |
| 1913 | 1913 |
} |
| 1914 | 1914 |
}else{
|
| 1915 | 1915 |
if(is_h264){
|
| 1916 |
+ int qp[2]; |
|
| 1917 |
+ if (CHROMA422) {
|
|
| 1918 |
+ qp[0] = h->chroma_qp[0] + 3; |
|
| 1919 |
+ qp[1] = h->chroma_qp[1] + 3; |
|
| 1920 |
+ } else {
|
|
| 1921 |
+ qp[0] = h->chroma_qp[0]; |
|
| 1922 |
+ qp[1] = h->chroma_qp[1]; |
|
| 1923 |
+ } |
|
| 1916 | 1924 |
if(h->non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+0] ]) |
| 1917 |
- h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + (16*16*1 << pixel_shift), h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]); |
|
| 1925 |
+ h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + (16*16*1 << pixel_shift), h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][qp[0]][0]); |
|
| 1918 | 1926 |
if(h->non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+1] ]) |
| 1919 |
- h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + (16*16*2 << pixel_shift), h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]); |
|
| 1927 |
+ h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + (16*16*2 << pixel_shift), h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][qp[1]][0]); |
|
| 1920 | 1928 |
h->h264dsp.h264_idct_add8(dest, block_offset, |
| 1921 | 1929 |
h->mb, uvlinesize, |
| 1922 | 1930 |
h->non_zero_count_cache); |
| ... | ... |
@@ -2555,11 +2585,13 @@ static int decode_slice_header(H264Context *h, H264Context *h0){
|
| 2555 | 2555 |
|
| 2556 | 2556 |
h->b_stride= s->mb_width*4; |
| 2557 | 2557 |
|
| 2558 |
+ s->chroma_y_shift = h->sps.chroma_format_idc <= 1; // 400 uses yuv420p |
|
| 2559 |
+ |
|
| 2558 | 2560 |
s->width = 16*s->mb_width - (2>>CHROMA444)*FFMIN(h->sps.crop_right, (8<<CHROMA444)-1); |
| 2559 | 2561 |
if(h->sps.frame_mbs_only_flag) |
| 2560 |
- s->height= 16*s->mb_height - (2>>CHROMA444)*FFMIN(h->sps.crop_bottom, (8<<CHROMA444)-1); |
|
| 2562 |
+ s->height= 16*s->mb_height - (1<<s->chroma_y_shift)*FFMIN(h->sps.crop_bottom, (16>>s->chroma_y_shift)-1); |
|
| 2561 | 2563 |
else |
| 2562 |
- s->height= 16*s->mb_height - (4>>CHROMA444)*FFMIN(h->sps.crop_bottom, (8<<CHROMA444)-1); |
|
| 2564 |
+ s->height= 16*s->mb_height - (2<<s->chroma_y_shift)*FFMIN(h->sps.crop_bottom, (16>>s->chroma_y_shift)-1); |
|
| 2563 | 2565 |
|
| 2564 | 2566 |
if (s->context_initialized |
| 2565 | 2567 |
&& ( s->width != s->avctx->width || s->height != s->avctx->height |
| ... | ... |
@@ -2601,14 +2633,26 @@ static int decode_slice_header(H264Context *h, H264Context *h0){
|
| 2601 | 2601 |
|
| 2602 | 2602 |
switch (h->sps.bit_depth_luma) {
|
| 2603 | 2603 |
case 9 : |
| 2604 |
- s->avctx->pix_fmt = CHROMA444 ? PIX_FMT_YUV444P9 : PIX_FMT_YUV420P9; |
|
| 2604 |
+ if (CHROMA444) |
|
| 2605 |
+ s->avctx->pix_fmt = PIX_FMT_YUV444P9; |
|
| 2606 |
+ else if (CHROMA422) |
|
| 2607 |
+ s->avctx->pix_fmt = PIX_FMT_YUV422P9; |
|
| 2608 |
+ else |
|
| 2609 |
+ s->avctx->pix_fmt = PIX_FMT_YUV420P9; |
|
| 2605 | 2610 |
break; |
| 2606 | 2611 |
case 10 : |
| 2607 |
- s->avctx->pix_fmt = CHROMA444 ? PIX_FMT_YUV444P10 : PIX_FMT_YUV420P10; |
|
| 2612 |
+ if (CHROMA444) |
|
| 2613 |
+ s->avctx->pix_fmt = PIX_FMT_YUV444P10; |
|
| 2614 |
+ else if (CHROMA422) |
|
| 2615 |
+ s->avctx->pix_fmt = PIX_FMT_YUV422P10; |
|
| 2616 |
+ else |
|
| 2617 |
+ s->avctx->pix_fmt = PIX_FMT_YUV420P10; |
|
| 2608 | 2618 |
break; |
| 2609 | 2619 |
default: |
| 2610 | 2620 |
if (CHROMA444){
|
| 2611 | 2621 |
s->avctx->pix_fmt = s->avctx->color_range == AVCOL_RANGE_JPEG ? PIX_FMT_YUVJ444P : PIX_FMT_YUV444P; |
| 2622 |
+ } else if (CHROMA422) {
|
|
| 2623 |
+ s->avctx->pix_fmt = s->avctx->color_range == AVCOL_RANGE_JPEG ? PIX_FMT_YUVJ422P : PIX_FMT_YUV422P; |
|
| 2612 | 2624 |
}else{
|
| 2613 | 2625 |
s->avctx->pix_fmt = s->avctx->get_format(s->avctx, |
| 2614 | 2626 |
s->avctx->codec->pix_fmts ? |
| ... | ... |
@@ -3272,6 +3316,7 @@ static void loop_filter(H264Context *h, int start_x, int end_x){
|
| 3272 | 3272 |
const int end_mb_y= s->mb_y + FRAME_MBAFF; |
| 3273 | 3273 |
const int old_slice_type= h->slice_type; |
| 3274 | 3274 |
const int pixel_shift = h->pixel_shift; |
| 3275 |
+ const int block_h = 16 >> s->chroma_y_shift; |
|
| 3275 | 3276 |
|
| 3276 | 3277 |
if(h->deblocking_filter) {
|
| 3277 | 3278 |
for(mb_x= start_x; mb_x<end_x; mb_x++){
|
| ... | ... |
@@ -3288,8 +3333,8 @@ static void loop_filter(H264Context *h, int start_x, int end_x){
|
| 3288 | 3288 |
s->mb_x= mb_x; |
| 3289 | 3289 |
s->mb_y= mb_y; |
| 3290 | 3290 |
dest_y = s->current_picture.f.data[0] + ((mb_x << pixel_shift) + mb_y * s->linesize ) * 16; |
| 3291 |
- dest_cb = s->current_picture.f.data[1] + ((mb_x << pixel_shift) + mb_y * s->uvlinesize) * (8 << CHROMA444); |
|
| 3292 |
- dest_cr = s->current_picture.f.data[2] + ((mb_x << pixel_shift) + mb_y * s->uvlinesize) * (8 << CHROMA444); |
|
| 3291 |
+ dest_cb = s->current_picture.f.data[1] + (mb_x << pixel_shift) * (8 << CHROMA444) + mb_y * s->uvlinesize * block_h; |
|
| 3292 |
+ dest_cr = s->current_picture.f.data[2] + (mb_x << pixel_shift) * (8 << CHROMA444) + mb_y * s->uvlinesize * block_h; |
|
| 3293 | 3293 |
//FIXME simplify above |
| 3294 | 3294 |
|
| 3295 | 3295 |
if (MB_FIELD) {
|
| ... | ... |
@@ -3297,14 +3342,14 @@ static void loop_filter(H264Context *h, int start_x, int end_x){
|
| 3297 | 3297 |
uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2; |
| 3298 | 3298 |
if(mb_y&1){ //FIXME move out of this function?
|
| 3299 | 3299 |
dest_y -= s->linesize*15; |
| 3300 |
- dest_cb-= s->uvlinesize*((8 << CHROMA444)-1); |
|
| 3301 |
- dest_cr-= s->uvlinesize*((8 << CHROMA444)-1); |
|
| 3300 |
+ dest_cb-= s->uvlinesize * (block_h - 1); |
|
| 3301 |
+ dest_cr-= s->uvlinesize * (block_h - 1); |
|
| 3302 | 3302 |
} |
| 3303 | 3303 |
} else {
|
| 3304 | 3304 |
linesize = h->mb_linesize = s->linesize; |
| 3305 | 3305 |
uvlinesize = h->mb_uvlinesize = s->uvlinesize; |
| 3306 | 3306 |
} |
| 3307 |
- backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, CHROMA444, 0); |
|
| 3307 |
+ backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0); |
|
| 3308 | 3308 |
if(fill_filter_caches(h, mb_type)) |
| 3309 | 3309 |
continue; |
| 3310 | 3310 |
h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.f.qscale_table[mb_xy]); |
| ... | ... |
@@ -3742,13 +3787,15 @@ static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
|
| 3742 | 3742 |
if(avctx->has_b_frames < 2) |
| 3743 | 3743 |
avctx->has_b_frames= !s->low_delay; |
| 3744 | 3744 |
|
| 3745 |
- if (avctx->bits_per_raw_sample != h->sps.bit_depth_luma) {
|
|
| 3745 |
+ if (avctx->bits_per_raw_sample != h->sps.bit_depth_luma || |
|
| 3746 |
+ h->cur_chroma_format_idc != h->sps.chroma_format_idc) {
|
|
| 3746 | 3747 |
if (h->sps.bit_depth_luma >= 8 && h->sps.bit_depth_luma <= 10) {
|
| 3747 | 3748 |
avctx->bits_per_raw_sample = h->sps.bit_depth_luma; |
| 3749 |
+ h->cur_chroma_format_idc = h->sps.chroma_format_idc; |
|
| 3748 | 3750 |
h->pixel_shift = h->sps.bit_depth_luma > 8; |
| 3749 | 3751 |
|
| 3750 |
- ff_h264dsp_init(&h->h264dsp, h->sps.bit_depth_luma); |
|
| 3751 |
- ff_h264_pred_init(&h->hpc, s->codec_id, h->sps.bit_depth_luma); |
|
| 3752 |
+ ff_h264dsp_init(&h->h264dsp, h->sps.bit_depth_luma, h->sps.chroma_format_idc); |
|
| 3753 |
+ ff_h264_pred_init(&h->hpc, s->codec_id, h->sps.bit_depth_luma, h->sps.chroma_format_idc); |
|
| 3752 | 3754 |
s->dsp.dct_bits = h->sps.bit_depth_luma > 8 ? 32 : 16; |
| 3753 | 3755 |
dsputil_init(&s->dsp, s->avctx); |
| 3754 | 3756 |
} else {
|
| ... | ... |
@@ -39,13 +39,6 @@ |
| 39 | 39 |
#define interlaced_dct interlaced_dct_is_a_bad_name |
| 40 | 40 |
#define mb_intra mb_intra_is_not_initialized_see_mb_type |
| 41 | 41 |
|
| 42 |
-#define CHROMA_DC_COEFF_TOKEN_VLC_BITS 8 |
|
| 43 |
-#define COEFF_TOKEN_VLC_BITS 8 |
|
| 44 |
-#define TOTAL_ZEROS_VLC_BITS 9 |
|
| 45 |
-#define CHROMA_DC_TOTAL_ZEROS_VLC_BITS 3 |
|
| 46 |
-#define RUN_VLC_BITS 3 |
|
| 47 |
-#define RUN7_VLC_BITS 6 |
|
| 48 |
- |
|
| 49 | 42 |
#define MAX_SPS_COUNT 32 |
| 50 | 43 |
#define MAX_PPS_COUNT 256 |
| 51 | 44 |
|
| ... | ... |
@@ -92,6 +85,7 @@ |
| 92 | 92 |
#define CABAC h->pps.cabac |
| 93 | 93 |
#endif |
| 94 | 94 |
|
| 95 |
+#define CHROMA422 (h->sps.chroma_format_idc == 2) |
|
| 95 | 96 |
#define CHROMA444 (h->sps.chroma_format_idc == 3) |
| 96 | 97 |
|
| 97 | 98 |
#define EXTENDED_SAR 255 |
| ... | ... |
@@ -582,6 +576,8 @@ typedef struct H264Context{
|
| 582 | 582 |
// Timestamp stuff |
| 583 | 583 |
int sei_buffering_period_present; ///< Buffering period SEI flag |
| 584 | 584 |
int initial_cpb_removal_delay[32]; ///< Initial timestamps for CPBs |
| 585 |
+ |
|
| 586 |
+ int cur_chroma_format_idc; |
|
| 585 | 587 |
}H264Context; |
| 586 | 588 |
|
| 587 | 589 |
|
| ... | ... |
@@ -809,7 +805,7 @@ static av_always_inline void write_back_non_zero_count(H264Context *h){
|
| 809 | 809 |
AV_COPY32(&nnz[32], &nnz_cache[4+8*11]); |
| 810 | 810 |
AV_COPY32(&nnz[36], &nnz_cache[4+8*12]); |
| 811 | 811 |
|
| 812 |
- if(CHROMA444){
|
|
| 812 |
+ if(!h->s.chroma_y_shift){
|
|
| 813 | 813 |
AV_COPY32(&nnz[24], &nnz_cache[4+8* 8]); |
| 814 | 814 |
AV_COPY32(&nnz[28], &nnz_cache[4+8* 9]); |
| 815 | 815 |
AV_COPY32(&nnz[40], &nnz_cache[4+8*13]); |
| ... | ... |
@@ -1565,7 +1565,12 @@ DECLARE_ASM_CONST(1, uint8_t, last_coeff_flag_offset_8x8)[63] = {
|
| 1565 | 1565 |
5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8 |
| 1566 | 1566 |
}; |
| 1567 | 1567 |
|
| 1568 |
-static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff, int is_dc ) {
|
|
| 1568 |
+static av_always_inline void |
|
| 1569 |
+decode_cabac_residual_internal(H264Context *h, DCTELEM *block, |
|
| 1570 |
+ int cat, int n, const uint8_t *scantable, |
|
| 1571 |
+ const uint32_t *qmul, int max_coeff, |
|
| 1572 |
+ int is_dc, int chroma422) |
|
| 1573 |
+{
|
|
| 1569 | 1574 |
static const int significant_coeff_flag_offset[2][14] = {
|
| 1570 | 1575 |
{ 105+0, 105+15, 105+29, 105+44, 105+47, 402, 484+0, 484+15, 484+29, 660, 528+0, 528+15, 528+29, 718 },
|
| 1571 | 1576 |
{ 277+0, 277+15, 277+29, 277+44, 277+47, 436, 776+0, 776+15, 776+29, 675, 820+0, 820+15, 820+29, 733 }
|
| ... | ... |
@@ -1587,12 +1592,16 @@ static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCT |
| 1587 | 1587 |
9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9, |
| 1588 | 1588 |
9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 } |
| 1589 | 1589 |
}; |
| 1590 |
+ static const uint8_t sig_coeff_offset_dc[7] = { 0, 0, 1, 1, 2, 2, 2 };
|
|
| 1590 | 1591 |
/* node ctx: 0..3: abslevel1 (with abslevelgt1 == 0). |
| 1591 | 1592 |
* 4..7: abslevelgt1 + 3 (and abslevel1 doesn't matter). |
| 1592 | 1593 |
* map node ctx => cabac ctx for level=1 */ |
| 1593 | 1594 |
static const uint8_t coeff_abs_level1_ctx[8] = { 1, 2, 3, 4, 0, 0, 0, 0 };
|
| 1594 | 1595 |
/* map node ctx => cabac ctx for level>1 */ |
| 1595 |
- static const uint8_t coeff_abs_levelgt1_ctx[8] = { 5, 5, 5, 5, 6, 7, 8, 9 };
|
|
| 1596 |
+ static const uint8_t coeff_abs_levelgt1_ctx[2][8] = {
|
|
| 1597 |
+ { 5, 5, 5, 5, 6, 7, 8, 9 },
|
|
| 1598 |
+ { 5, 5, 5, 5, 6, 7, 8, 8 }, // 422/dc case
|
|
| 1599 |
+ }; |
|
| 1596 | 1600 |
static const uint8_t coeff_abs_level_transition[2][8] = {
|
| 1597 | 1601 |
/* update node ctx after decoding a level=1 */ |
| 1598 | 1602 |
{ 1, 2, 3, 3, 4, 5, 6, 7 },
|
| ... | ... |
@@ -1651,12 +1660,20 @@ static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCT |
| 1651 | 1651 |
coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, |
| 1652 | 1652 |
last_coeff_ctx_base, sig_off); |
| 1653 | 1653 |
} else {
|
| 1654 |
- coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index, |
|
| 1655 |
- last_coeff_ctx_base-significant_coeff_ctx_base); |
|
| 1654 |
+ if (is_dc && chroma422) { // dc 422
|
|
| 1655 |
+ DECODE_SIGNIFICANCE(7, sig_coeff_offset_dc[last], sig_coeff_offset_dc[last]); |
|
| 1656 |
+ } else {
|
|
| 1657 |
+ coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index, |
|
| 1658 |
+ last_coeff_ctx_base-significant_coeff_ctx_base); |
|
| 1659 |
+ } |
|
| 1656 | 1660 |
#else |
| 1657 | 1661 |
DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] ); |
| 1658 | 1662 |
} else {
|
| 1659 |
- DECODE_SIGNIFICANCE( max_coeff - 1, last, last ); |
|
| 1663 |
+ if (is_dc && chroma422) { // dc 422
|
|
| 1664 |
+ DECODE_SIGNIFICANCE(7, sig_coeff_offset_dc[last], sig_coeff_offset_dc[last]); |
|
| 1665 |
+ } else {
|
|
| 1666 |
+ DECODE_SIGNIFICANCE(max_coeff - 1, last, last); |
|
| 1667 |
+ } |
|
| 1660 | 1668 |
#endif |
| 1661 | 1669 |
} |
| 1662 | 1670 |
assert(coeff_count > 0); |
| ... | ... |
@@ -1691,7 +1708,7 @@ static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCT |
| 1691 | 1691 |
} \ |
| 1692 | 1692 |
} else { \
|
| 1693 | 1693 |
int coeff_abs = 2; \ |
| 1694 |
- ctx = coeff_abs_levelgt1_ctx[node_ctx] + abs_level_m1_ctx_base; \ |
|
| 1694 |
+ ctx = coeff_abs_levelgt1_ctx[is_dc && chroma422][node_ctx] + abs_level_m1_ctx_base; \ |
|
| 1695 | 1695 |
node_ctx = coeff_abs_level_transition[1][node_ctx]; \ |
| 1696 | 1696 |
\ |
| 1697 | 1697 |
while( coeff_abs < 15 && get_cabac( CC, ctx ) ) { \
|
| ... | ... |
@@ -1733,11 +1750,18 @@ static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCT |
| 1733 | 1733 |
} |
| 1734 | 1734 |
|
| 1735 | 1735 |
static void decode_cabac_residual_dc_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, int max_coeff ) {
|
| 1736 |
- decode_cabac_residual_internal(h, block, cat, n, scantable, NULL, max_coeff, 1); |
|
| 1736 |
+ decode_cabac_residual_internal(h, block, cat, n, scantable, NULL, max_coeff, 1, 0); |
|
| 1737 |
+} |
|
| 1738 |
+ |
|
| 1739 |
+static void decode_cabac_residual_dc_internal_422(H264Context *h, DCTELEM *block, |
|
| 1740 |
+ int cat, int n, const uint8_t *scantable, |
|
| 1741 |
+ int max_coeff) |
|
| 1742 |
+{
|
|
| 1743 |
+ decode_cabac_residual_internal(h, block, cat, n, scantable, NULL, max_coeff, 1, 1); |
|
| 1737 | 1744 |
} |
| 1738 | 1745 |
|
| 1739 | 1746 |
static void decode_cabac_residual_nondc_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
|
| 1740 |
- decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 0); |
|
| 1747 |
+ decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 0, 0); |
|
| 1741 | 1748 |
} |
| 1742 | 1749 |
|
| 1743 | 1750 |
/* cat: 0-> DC 16x16 n = 0 |
| ... | ... |
@@ -1761,6 +1785,19 @@ static av_always_inline void decode_cabac_residual_dc( H264Context *h, DCTELEM * |
| 1761 | 1761 |
decode_cabac_residual_dc_internal( h, block, cat, n, scantable, max_coeff ); |
| 1762 | 1762 |
} |
| 1763 | 1763 |
|
| 1764 |
+static av_always_inline void |
|
| 1765 |
+decode_cabac_residual_dc_422(H264Context *h, DCTELEM *block, |
|
| 1766 |
+ int cat, int n, const uint8_t *scantable, |
|
| 1767 |
+ int max_coeff) |
|
| 1768 |
+{
|
|
| 1769 |
+ /* read coded block flag */ |
|
| 1770 |
+ if (get_cabac(&h->cabac, &h->cabac_state[get_cabac_cbf_ctx(h, cat, n, max_coeff, 1)]) == 0) {
|
|
| 1771 |
+ h->non_zero_count_cache[scan8[n]] = 0; |
|
| 1772 |
+ return; |
|
| 1773 |
+ } |
|
| 1774 |
+ decode_cabac_residual_dc_internal_422(h, block, cat, n, scantable, max_coeff); |
|
| 1775 |
+} |
|
| 1776 |
+ |
|
| 1764 | 1777 |
static av_always_inline void decode_cabac_residual_nondc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
|
| 1765 | 1778 |
/* read coded block flag */ |
| 1766 | 1779 |
if( (cat != 5 || CHROMA444) && get_cabac( &h->cabac, &h->cabac_state[get_cabac_cbf_ctx( h, cat, n, max_coeff, 0 ) ] ) == 0 ) {
|
| ... | ... |
@@ -2313,7 +2350,36 @@ decode_intra_mb: |
| 2313 | 2313 |
if(CHROMA444){
|
| 2314 | 2314 |
decode_cabac_luma_residual(h, scan, scan8x8, pixel_shift, mb_type, cbp, 1); |
| 2315 | 2315 |
decode_cabac_luma_residual(h, scan, scan8x8, pixel_shift, mb_type, cbp, 2); |
| 2316 |
- } else {
|
|
| 2316 |
+ } else if (CHROMA422) {
|
|
| 2317 |
+ if( cbp&0x30 ){
|
|
| 2318 |
+ int c; |
|
| 2319 |
+ for( c = 0; c < 2; c++ ) {
|
|
| 2320 |
+ //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c ); |
|
| 2321 |
+ decode_cabac_residual_dc_422(h, h->mb + ((256 + 16*16*c) << pixel_shift), 3, |
|
| 2322 |
+ CHROMA_DC_BLOCK_INDEX + c, |
|
| 2323 |
+ chroma422_dc_scan, 8); |
|
| 2324 |
+ } |
|
| 2325 |
+ } |
|
| 2326 |
+ |
|
| 2327 |
+ if( cbp&0x20 ) {
|
|
| 2328 |
+ int c, i, i8x8; |
|
| 2329 |
+ for( c = 0; c < 2; c++ ) {
|
|
| 2330 |
+ DCTELEM *mb = h->mb + (16*(16 + 16*c) << pixel_shift); |
|
| 2331 |
+ qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]]; |
|
| 2332 |
+ for (i8x8 = 0; i8x8 < 2; i8x8++) {
|
|
| 2333 |
+ for (i = 0; i < 4; i++) {
|
|
| 2334 |
+ const int index = 16 + 16 * c + 8*i8x8 + i; |
|
| 2335 |
+ //av_log(s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16); |
|
| 2336 |
+ decode_cabac_residual_nondc(h, mb, 4, index, scan + 1, qmul, 15); |
|
| 2337 |
+ mb += 16<<pixel_shift; |
|
| 2338 |
+ } |
|
| 2339 |
+ } |
|
| 2340 |
+ } |
|
| 2341 |
+ } else {
|
|
| 2342 |
+ fill_rectangle(&h->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1); |
|
| 2343 |
+ fill_rectangle(&h->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1); |
|
| 2344 |
+ } |
|
| 2345 |
+ } else /* yuv420 */ {
|
|
| 2317 | 2346 |
if( cbp&0x30 ){
|
| 2318 | 2347 |
int c; |
| 2319 | 2348 |
for( c = 0; c < 2; c++ ) {
|
| ... | ... |
@@ -62,6 +62,30 @@ static const uint8_t chroma_dc_coeff_token_bits[4*5]={
|
| 62 | 62 |
2, 3, 2, 0, |
| 63 | 63 |
}; |
| 64 | 64 |
|
| 65 |
+static const uint8_t chroma422_dc_coeff_token_len[4*9]={
|
|
| 66 |
+ 1, 0, 0, 0, |
|
| 67 |
+ 7, 2, 0, 0, |
|
| 68 |
+ 7, 7, 3, 0, |
|
| 69 |
+ 9, 7, 7, 5, |
|
| 70 |
+ 9, 9, 7, 6, |
|
| 71 |
+ 10, 10, 9, 7, |
|
| 72 |
+ 11, 11, 10, 7, |
|
| 73 |
+ 12, 12, 11, 10, |
|
| 74 |
+ 13, 12, 12, 11, |
|
| 75 |
+}; |
|
| 76 |
+ |
|
| 77 |
+static const uint8_t chroma422_dc_coeff_token_bits[4*9]={
|
|
| 78 |
+ 1, 0, 0, 0, |
|
| 79 |
+ 15, 1, 0, 0, |
|
| 80 |
+ 14, 13, 1, 0, |
|
| 81 |
+ 7, 12, 11, 1, |
|
| 82 |
+ 6, 5, 10, 1, |
|
| 83 |
+ 7, 6, 4, 9, |
|
| 84 |
+ 7, 6, 5, 8, |
|
| 85 |
+ 7, 6, 5, 4, |
|
| 86 |
+ 7, 5, 4, 4, |
|
| 87 |
+}; |
|
| 88 |
+ |
|
| 65 | 89 |
static const uint8_t coeff_token_len[4][4*17]={
|
| 66 | 90 |
{
|
| 67 | 91 |
1, 0, 0, 0, |
| ... | ... |
@@ -172,6 +196,26 @@ static const uint8_t chroma_dc_total_zeros_bits[3][4]= {
|
| 172 | 172 |
{ 1, 0, 0, 0,},
|
| 173 | 173 |
}; |
| 174 | 174 |
|
| 175 |
+static const uint8_t chroma422_dc_total_zeros_len[7][8]= {
|
|
| 176 |
+ { 1, 3, 3, 4, 4, 4, 5, 5 },
|
|
| 177 |
+ { 3, 2, 3, 3, 3, 3, 3 },
|
|
| 178 |
+ { 3, 3, 2, 2, 3, 3 },
|
|
| 179 |
+ { 3, 2, 2, 2, 3 },
|
|
| 180 |
+ { 2, 2, 2, 2 },
|
|
| 181 |
+ { 2, 2, 1 },
|
|
| 182 |
+ { 1, 1 },
|
|
| 183 |
+}; |
|
| 184 |
+ |
|
| 185 |
+static const uint8_t chroma422_dc_total_zeros_bits[7][8]= {
|
|
| 186 |
+ { 1, 2, 3, 2, 3, 1, 1, 0 },
|
|
| 187 |
+ { 0, 1, 1, 4, 5, 6, 7 },
|
|
| 188 |
+ { 0, 1, 1, 2, 6, 7 },
|
|
| 189 |
+ { 6, 0, 1, 2, 7 },
|
|
| 190 |
+ { 0, 1, 2, 3 },
|
|
| 191 |
+ { 0, 1, 1 },
|
|
| 192 |
+ { 0, 1 },
|
|
| 193 |
+}; |
|
| 194 |
+ |
|
| 175 | 195 |
static const uint8_t run_len[7][16]={
|
| 176 | 196 |
{1,1},
|
| 177 | 197 |
{1,2,2},
|
| ... | ... |
@@ -200,6 +244,10 @@ static VLC chroma_dc_coeff_token_vlc; |
| 200 | 200 |
static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2]; |
| 201 | 201 |
static const int chroma_dc_coeff_token_vlc_table_size = 256; |
| 202 | 202 |
|
| 203 |
+static VLC chroma422_dc_coeff_token_vlc; |
|
| 204 |
+static VLC_TYPE chroma422_dc_coeff_token_vlc_table[8192][2]; |
|
| 205 |
+static const int chroma422_dc_coeff_token_vlc_table_size = 8192; |
|
| 206 |
+ |
|
| 203 | 207 |
static VLC total_zeros_vlc[15]; |
| 204 | 208 |
static VLC_TYPE total_zeros_vlc_tables[15][512][2]; |
| 205 | 209 |
static const int total_zeros_vlc_tables_size = 512; |
| ... | ... |
@@ -208,6 +256,10 @@ static VLC chroma_dc_total_zeros_vlc[3]; |
| 208 | 208 |
static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2]; |
| 209 | 209 |
static const int chroma_dc_total_zeros_vlc_tables_size = 8; |
| 210 | 210 |
|
| 211 |
+static VLC chroma422_dc_total_zeros_vlc[7]; |
|
| 212 |
+static VLC_TYPE chroma422_dc_total_zeros_vlc_tables[7][32][2]; |
|
| 213 |
+static const int chroma422_dc_total_zeros_vlc_tables_size = 32; |
|
| 214 |
+ |
|
| 211 | 215 |
static VLC run_vlc[6]; |
| 212 | 216 |
static VLC_TYPE run_vlc_tables[6][8][2]; |
| 213 | 217 |
static const int run_vlc_tables_size = 8; |
| ... | ... |
@@ -219,6 +271,14 @@ static const int run7_vlc_table_size = 96; |
| 219 | 219 |
#define LEVEL_TAB_BITS 8 |
| 220 | 220 |
static int8_t cavlc_level_tab[7][1<<LEVEL_TAB_BITS][2]; |
| 221 | 221 |
|
| 222 |
+#define CHROMA_DC_COEFF_TOKEN_VLC_BITS 8 |
|
| 223 |
+#define CHROMA422_DC_COEFF_TOKEN_VLC_BITS 13 |
|
| 224 |
+#define COEFF_TOKEN_VLC_BITS 8 |
|
| 225 |
+#define TOTAL_ZEROS_VLC_BITS 9 |
|
| 226 |
+#define CHROMA_DC_TOTAL_ZEROS_VLC_BITS 3 |
|
| 227 |
+#define CHROMA422_DC_TOTAL_ZEROS_VLC_BITS 5 |
|
| 228 |
+#define RUN_VLC_BITS 3 |
|
| 229 |
+#define RUN7_VLC_BITS 6 |
|
| 222 | 230 |
|
| 223 | 231 |
/** |
| 224 | 232 |
* gets the predicted number of non-zero coefficients. |
| ... | ... |
@@ -278,6 +338,13 @@ av_cold void ff_h264_decode_init_vlc(void){
|
| 278 | 278 |
&chroma_dc_coeff_token_bits[0], 1, 1, |
| 279 | 279 |
INIT_VLC_USE_NEW_STATIC); |
| 280 | 280 |
|
| 281 |
+ chroma422_dc_coeff_token_vlc.table = chroma422_dc_coeff_token_vlc_table; |
|
| 282 |
+ chroma422_dc_coeff_token_vlc.table_allocated = chroma422_dc_coeff_token_vlc_table_size; |
|
| 283 |
+ init_vlc(&chroma422_dc_coeff_token_vlc, CHROMA422_DC_COEFF_TOKEN_VLC_BITS, 4*9, |
|
| 284 |
+ &chroma422_dc_coeff_token_len [0], 1, 1, |
|
| 285 |
+ &chroma422_dc_coeff_token_bits[0], 1, 1, |
|
| 286 |
+ INIT_VLC_USE_NEW_STATIC); |
|
| 287 |
+ |
|
| 281 | 288 |
offset = 0; |
| 282 | 289 |
for(i=0; i<4; i++){
|
| 283 | 290 |
coeff_token_vlc[i].table = coeff_token_vlc_tables+offset; |
| ... | ... |
@@ -304,6 +371,17 @@ av_cold void ff_h264_decode_init_vlc(void){
|
| 304 | 304 |
&chroma_dc_total_zeros_bits[i][0], 1, 1, |
| 305 | 305 |
INIT_VLC_USE_NEW_STATIC); |
| 306 | 306 |
} |
| 307 |
+ |
|
| 308 |
+ for(i=0; i<7; i++){
|
|
| 309 |
+ chroma422_dc_total_zeros_vlc[i].table = chroma422_dc_total_zeros_vlc_tables[i]; |
|
| 310 |
+ chroma422_dc_total_zeros_vlc[i].table_allocated = chroma422_dc_total_zeros_vlc_tables_size; |
|
| 311 |
+ init_vlc(&chroma422_dc_total_zeros_vlc[i], |
|
| 312 |
+ CHROMA422_DC_TOTAL_ZEROS_VLC_BITS, 8, |
|
| 313 |
+ &chroma422_dc_total_zeros_len [i][0], 1, 1, |
|
| 314 |
+ &chroma422_dc_total_zeros_bits[i][0], 1, 1, |
|
| 315 |
+ INIT_VLC_USE_NEW_STATIC); |
|
| 316 |
+ } |
|
| 317 |
+ |
|
| 307 | 318 |
for(i=0; i<15; i++){
|
| 308 | 319 |
total_zeros_vlc[i].table = total_zeros_vlc_tables[i]; |
| 309 | 320 |
total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size; |
| ... | ... |
@@ -373,7 +451,10 @@ static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, in |
| 373 | 373 |
//FIXME put trailing_onex into the context |
| 374 | 374 |
|
| 375 | 375 |
if(max_coeff <= 8){
|
| 376 |
- coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1); |
|
| 376 |
+ if (max_coeff == 4) |
|
| 377 |
+ coeff_token = get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1); |
|
| 378 |
+ else |
|
| 379 |
+ coeff_token = get_vlc2(gb, chroma422_dc_coeff_token_vlc.table, CHROMA422_DC_COEFF_TOKEN_VLC_BITS, 1); |
|
| 377 | 380 |
total_coeff= coeff_token>>2; |
| 378 | 381 |
}else{
|
| 379 | 382 |
if(n >= LUMA_DC_BLOCK_INDEX){
|
| ... | ... |
@@ -483,11 +564,16 @@ static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, in |
| 483 | 483 |
if(total_coeff == max_coeff) |
| 484 | 484 |
zeros_left=0; |
| 485 | 485 |
else{
|
| 486 |
- /* FIXME: we don't actually support 4:2:2 yet. */ |
|
| 487 |
- if(max_coeff <= 8) |
|
| 488 |
- zeros_left= get_vlc2(gb, (chroma_dc_total_zeros_vlc-1)[ total_coeff ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1); |
|
| 489 |
- else |
|
| 486 |
+ if (max_coeff <= 8) {
|
|
| 487 |
+ if (max_coeff == 4) |
|
| 488 |
+ zeros_left = get_vlc2(gb, (chroma_dc_total_zeros_vlc-1)[total_coeff].table, |
|
| 489 |
+ CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1); |
|
| 490 |
+ else |
|
| 491 |
+ zeros_left = get_vlc2(gb, (chroma422_dc_total_zeros_vlc-1)[total_coeff].table, |
|
| 492 |
+ CHROMA422_DC_TOTAL_ZEROS_VLC_BITS, 1); |
|
| 493 |
+ } else {
|
|
| 490 | 494 |
zeros_left= get_vlc2(gb, (total_zeros_vlc-1)[ total_coeff ].table, TOTAL_ZEROS_VLC_BITS, 1); |
| 495 |
+ } |
|
| 491 | 496 |
} |
| 492 | 497 |
|
| 493 | 498 |
#define STORE_BLOCK(type) \ |
| ... | ... |
@@ -994,7 +1080,7 @@ decode_intra_mb: |
| 994 | 994 |
s->current_picture.f.mb_type[mb_xy] = mb_type; |
| 995 | 995 |
|
| 996 | 996 |
if(cbp || IS_INTRA16x16(mb_type)){
|
| 997 |
- int i4x4, chroma_idx; |
|
| 997 |
+ int i4x4, i8x8, chroma_idx; |
|
| 998 | 998 |
int dquant; |
| 999 | 999 |
int ret; |
| 1000 | 1000 |
GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr; |
| ... | ... |
@@ -1036,7 +1122,34 @@ decode_intra_mb: |
| 1036 | 1036 |
if( decode_luma_residual(h, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 2) < 0 ){
|
| 1037 | 1037 |
return -1; |
| 1038 | 1038 |
} |
| 1039 |
- } else {
|
|
| 1039 |
+ } else if (CHROMA422) {
|
|
| 1040 |
+ if(cbp&0x30){
|
|
| 1041 |
+ for(chroma_idx=0; chroma_idx<2; chroma_idx++) |
|
| 1042 |
+ if (decode_residual(h, gb, h->mb + ((256 + 16*16*chroma_idx) << pixel_shift), |
|
| 1043 |
+ CHROMA_DC_BLOCK_INDEX+chroma_idx, chroma422_dc_scan, |
|
| 1044 |
+ NULL, 8) < 0) {
|
|
| 1045 |
+ return -1; |
|
| 1046 |
+ } |
|
| 1047 |
+ } |
|
| 1048 |
+ |
|
| 1049 |
+ if(cbp&0x20){
|
|
| 1050 |
+ for(chroma_idx=0; chroma_idx<2; chroma_idx++){
|
|
| 1051 |
+ const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]]; |
|
| 1052 |
+ DCTELEM *mb = h->mb + (16*(16 + 16*chroma_idx) << pixel_shift); |
|
| 1053 |
+ for (i8x8 = 0; i8x8 < 2; i8x8++) {
|
|
| 1054 |
+ for (i4x4 = 0; i4x4 < 4; i4x4++) {
|
|
| 1055 |
+ const int index = 16 + 16*chroma_idx + 8*i8x8 + i4x4; |
|
| 1056 |
+ if (decode_residual(h, gb, mb, index, scan + 1, qmul, 15) < 0) |
|
| 1057 |
+ return -1; |
|
| 1058 |
+ mb += 16 << pixel_shift; |
|
| 1059 |
+ } |
|
| 1060 |
+ } |
|
| 1061 |
+ } |
|
| 1062 |
+ }else{
|
|
| 1063 |
+ fill_rectangle(&h->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1); |
|
| 1064 |
+ fill_rectangle(&h->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1); |
|
| 1065 |
+ } |
|
| 1066 |
+ } else /* yuv420 */ {
|
|
| 1040 | 1067 |
if(cbp&0x30){
|
| 1041 | 1068 |
for(chroma_idx=0; chroma_idx<2; chroma_idx++) |
| 1042 | 1069 |
if( decode_residual(h, gb, h->mb + ((256 + 16*16*chroma_idx) << pixel_shift), CHROMA_DC_BLOCK_INDEX+chroma_idx, chroma_dc_scan, NULL, 4) < 0){
|
| ... | ... |
@@ -212,6 +212,7 @@ static void av_always_inline h264_filter_mb_fast_internal( H264Context *h, int m |
| 212 | 212 |
MpegEncContext * const s = &h->s; |
| 213 | 213 |
int chroma = !(CONFIG_GRAY && (s->flags&CODEC_FLAG_GRAY)); |
| 214 | 214 |
int chroma444 = CHROMA444; |
| 215 |
+ int chroma422 = CHROMA422; |
|
| 215 | 216 |
|
| 216 | 217 |
int mb_xy = h->mb_xy; |
| 217 | 218 |
int left_type= h->left_type[LTOP]; |
| ... | ... |
@@ -289,6 +290,23 @@ static void av_always_inline h264_filter_mb_fast_internal( H264Context *h, int m |
| 289 | 289 |
filter_mb_edgeh( &img_cb[4*3*linesize], linesize, bS3, qpc, a, b, h, 0); |
| 290 | 290 |
filter_mb_edgeh( &img_cr[4*3*linesize], linesize, bS3, qpc, a, b, h, 0); |
| 291 | 291 |
} |
| 292 |
+ }else if(chroma422){
|
|
| 293 |
+ if(left_type){
|
|
| 294 |
+ filter_mb_edgecv(&img_cb[2*0<<pixel_shift], uvlinesize, bS4, qpc0, a, b, h, 1); |
|
| 295 |
+ filter_mb_edgecv(&img_cr[2*0<<pixel_shift], uvlinesize, bS4, qpc0, a, b, h, 1); |
|
| 296 |
+ } |
|
| 297 |
+ filter_mb_edgecv(&img_cb[2*2<<pixel_shift], uvlinesize, bS3, qpc, a, b, h, 0); |
|
| 298 |
+ filter_mb_edgecv(&img_cr[2*2<<pixel_shift], uvlinesize, bS3, qpc, a, b, h, 0); |
|
| 299 |
+ if(top_type){
|
|
| 300 |
+ filter_mb_edgech(&img_cb[4*0*uvlinesize], uvlinesize, bSH, qpc1, a, b, h, 1); |
|
| 301 |
+ filter_mb_edgech(&img_cr[4*0*uvlinesize], uvlinesize, bSH, qpc1, a, b, h, 1); |
|
| 302 |
+ } |
|
| 303 |
+ filter_mb_edgech(&img_cb[4*1*uvlinesize], uvlinesize, bS3, qpc, a, b, h, 0); |
|
| 304 |
+ filter_mb_edgech(&img_cr[4*1*uvlinesize], uvlinesize, bS3, qpc, a, b, h, 0); |
|
| 305 |
+ filter_mb_edgech(&img_cb[4*2*uvlinesize], uvlinesize, bS3, qpc, a, b, h, 0); |
|
| 306 |
+ filter_mb_edgech(&img_cr[4*2*uvlinesize], uvlinesize, bS3, qpc, a, b, h, 0); |
|
| 307 |
+ filter_mb_edgech(&img_cb[4*3*uvlinesize], uvlinesize, bS3, qpc, a, b, h, 0); |
|
| 308 |
+ filter_mb_edgech(&img_cr[4*3*uvlinesize], uvlinesize, bS3, qpc, a, b, h, 0); |
|
| 292 | 309 |
}else{
|
| 293 | 310 |
if(left_type){
|
| 294 | 311 |
filter_mb_edgecv( &img_cb[2*0<<pixel_shift], uvlinesize, bS4, qpc0, a, b, h, 1); |
| ... | ... |
@@ -411,10 +429,12 @@ static int check_mv(H264Context *h, long b_idx, long bn_idx, int mvy_limit){
|
| 411 | 411 |
return v; |
| 412 | 412 |
} |
| 413 | 413 |
|
| 414 |
-static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize, int mb_xy, int mb_type, int mvy_limit, int first_vertical_edge_done, int a, int b, int chroma, int chroma444, int dir) {
|
|
| 414 |
+static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize, int mb_xy, int mb_type, int mvy_limit, int first_vertical_edge_done, int a, int b, int chroma, int dir) {
|
|
| 415 | 415 |
MpegEncContext * const s = &h->s; |
| 416 | 416 |
int edge; |
| 417 | 417 |
int chroma_qp_avg[2]; |
| 418 |
+ int chroma444 = CHROMA444; |
|
| 419 |
+ int chroma422 = CHROMA422; |
|
| 418 | 420 |
const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy; |
| 419 | 421 |
const int mbm_type = dir == 0 ? h->left_type[LTOP] : h->top_type; |
| 420 | 422 |
|
| ... | ... |
@@ -564,8 +584,9 @@ static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, u |
| 564 | 564 |
for( edge = 1; edge < edges; edge++ ) {
|
| 565 | 565 |
DECLARE_ALIGNED(8, int16_t, bS)[4]; |
| 566 | 566 |
int qp; |
| 567 |
+ const int deblock_edge = !IS_8x8DCT(mb_type & (edge<<24)); // (edge&1) && IS_8x8DCT(mb_type) |
|
| 567 | 568 |
|
| 568 |
- if( IS_8x8DCT(mb_type & (edge<<24)) ) // (edge&1) && IS_8x8DCT(mb_type) |
|
| 569 |
+ if (!deblock_edge && (!chroma422 || dir == 0)) |
|
| 569 | 570 |
continue; |
| 570 | 571 |
|
| 571 | 572 |
if( IS_INTRA(mb_type)) {
|
| ... | ... |
@@ -627,14 +648,23 @@ static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, u |
| 627 | 627 |
} |
| 628 | 628 |
} |
| 629 | 629 |
} else {
|
| 630 |
- filter_mb_edgeh( &img_y[4*edge*linesize], linesize, bS, qp, a, b, h, 0 ); |
|
| 631 |
- if (chroma) {
|
|
| 632 |
- if (chroma444) {
|
|
| 633 |
- filter_mb_edgeh ( &img_cb[4*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[0], a, b, h, 0); |
|
| 634 |
- filter_mb_edgeh ( &img_cr[4*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[1], a, b, h, 0); |
|
| 635 |
- } else if( (edge&1) == 0 ) {
|
|
| 636 |
- filter_mb_edgech( &img_cb[2*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[0], a, b, h, 0); |
|
| 637 |
- filter_mb_edgech( &img_cr[2*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[1], a, b, h, 0); |
|
| 630 |
+ if (chroma422) {
|
|
| 631 |
+ if (deblock_edge) |
|
| 632 |
+ filter_mb_edgeh(&img_y[4*edge*linesize], linesize, bS, qp, a, b, h, 0); |
|
| 633 |
+ if (chroma) {
|
|
| 634 |
+ filter_mb_edgech(&img_cb[4*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[0], a, b, h, 0); |
|
| 635 |
+ filter_mb_edgech(&img_cr[4*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[1], a, b, h, 0); |
|
| 636 |
+ } |
|
| 637 |
+ } else {
|
|
| 638 |
+ filter_mb_edgeh(&img_y[4*edge*linesize], linesize, bS, qp, a, b, h, 0); |
|
| 639 |
+ if (chroma) {
|
|
| 640 |
+ if (chroma444) {
|
|
| 641 |
+ filter_mb_edgeh (&img_cb[4*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[0], a, b, h, 0); |
|
| 642 |
+ filter_mb_edgeh (&img_cr[4*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[1], a, b, h, 0); |
|
| 643 |
+ } else if ((edge&1) == 0) {
|
|
| 644 |
+ filter_mb_edgech(&img_cb[2*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[0], a, b, h, 0); |
|
| 645 |
+ filter_mb_edgech(&img_cr[2*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[1], a, b, h, 0); |
|
| 646 |
+ } |
|
| 638 | 647 |
} |
| 639 | 648 |
} |
| 640 | 649 |
} |
| ... | ... |
@@ -726,6 +756,11 @@ void ff_h264_filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint |
| 726 | 726 |
filter_mb_mbaff_edgev ( h, img_cb + 8*uvlinesize, uvlinesize, bS+4, 1, bqp[1], a, b, 1 ); |
| 727 | 727 |
filter_mb_mbaff_edgev ( h, img_cr, uvlinesize, bS , 1, rqp[0], a, b, 1 ); |
| 728 | 728 |
filter_mb_mbaff_edgev ( h, img_cr + 8*uvlinesize, uvlinesize, bS+4, 1, rqp[1], a, b, 1 ); |
| 729 |
+ } else if (CHROMA422) {
|
|
| 730 |
+ filter_mb_mbaff_edgecv(h, img_cb, uvlinesize, bS , 1, bqp[0], a, b, 1); |
|
| 731 |
+ filter_mb_mbaff_edgecv(h, img_cb + 8*uvlinesize, uvlinesize, bS+4, 1, bqp[1], a, b, 1); |
|
| 732 |
+ filter_mb_mbaff_edgecv(h, img_cr, uvlinesize, bS , 1, rqp[0], a, b, 1); |
|
| 733 |
+ filter_mb_mbaff_edgecv(h, img_cr + 8*uvlinesize, uvlinesize, bS+4, 1, rqp[1], a, b, 1); |
|
| 729 | 734 |
}else{
|
| 730 | 735 |
filter_mb_mbaff_edgecv( h, img_cb, uvlinesize, bS , 1, bqp[0], a, b, 1 ); |
| 731 | 736 |
filter_mb_mbaff_edgecv( h, img_cb + 4*uvlinesize, uvlinesize, bS+4, 1, bqp[1], a, b, 1 ); |
| ... | ... |
@@ -754,9 +789,9 @@ void ff_h264_filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint |
| 754 | 754 |
|
| 755 | 755 |
#if CONFIG_SMALL |
| 756 | 756 |
for( dir = 0; dir < 2; dir++ ) |
| 757 |
- filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, dir ? 0 : first_vertical_edge_done, a, b, chroma, CHROMA444, dir); |
|
| 757 |
+ filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, dir ? 0 : first_vertical_edge_done, a, b, chroma, dir); |
|
| 758 | 758 |
#else |
| 759 |
- filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, first_vertical_edge_done, a, b, chroma, CHROMA444, 0); |
|
| 760 |
- filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, 0, a, b, chroma, CHROMA444, 1); |
|
| 759 |
+ filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, first_vertical_edge_done, a, b, chroma, 0); |
|
| 760 |
+ filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, 0, a, b, chroma, 1); |
|
| 761 | 761 |
#endif |
| 762 | 762 |
} |
| ... | ... |
@@ -510,7 +510,7 @@ static void fill_decode_caches(H264Context *h, int mb_type){
|
| 510 | 510 |
if(top_type){
|
| 511 | 511 |
nnz = h->non_zero_count[top_xy]; |
| 512 | 512 |
AV_COPY32(&nnz_cache[4+8* 0], &nnz[4*3]); |
| 513 |
- if(CHROMA444){
|
|
| 513 |
+ if(!s->chroma_y_shift){
|
|
| 514 | 514 |
AV_COPY32(&nnz_cache[4+8* 5], &nnz[4* 7]); |
| 515 | 515 |
AV_COPY32(&nnz_cache[4+8*10], &nnz[4*11]); |
| 516 | 516 |
}else{
|
| ... | ... |
@@ -534,6 +534,11 @@ static void fill_decode_caches(H264Context *h, int mb_type){
|
| 534 | 534 |
nnz_cache[3+8* 7 + 2*8*i]= nnz[left_block[8+1+2*i]+4*4]; |
| 535 | 535 |
nnz_cache[3+8*11 + 2*8*i]= nnz[left_block[8+0+2*i]+8*4]; |
| 536 | 536 |
nnz_cache[3+8*12 + 2*8*i]= nnz[left_block[8+1+2*i]+8*4]; |
| 537 |
+ }else if(CHROMA422) {
|
|
| 538 |
+ nnz_cache[3+8* 6 + 2*8*i]= nnz[left_block[8+0+2*i]-2+4*4]; |
|
| 539 |
+ nnz_cache[3+8* 7 + 2*8*i]= nnz[left_block[8+1+2*i]-2+4*4]; |
|
| 540 |
+ nnz_cache[3+8*11 + 2*8*i]= nnz[left_block[8+0+2*i]-2+8*4]; |
|
| 541 |
+ nnz_cache[3+8*12 + 2*8*i]= nnz[left_block[8+1+2*i]-2+8*4]; |
|
| 537 | 542 |
}else{
|
| 538 | 543 |
nnz_cache[3+8* 6 + 8*i]= nnz[left_block[8+4+2*i]]; |
| 539 | 544 |
nnz_cache[3+8*11 + 8*i]= nnz[left_block[8+5+2*i]]; |
| ... | ... |
@@ -396,7 +396,8 @@ int ff_h264_decode_seq_parameter_set(H264Context *h){
|
| 396 | 396 |
#endif |
| 397 | 397 |
sps->crop= get_bits1(&s->gb); |
| 398 | 398 |
if(sps->crop){
|
| 399 |
- int crop_limit = sps->chroma_format_idc == 3 ? 16 : 8; |
|
| 399 |
+ int crop_vertical_limit = sps->chroma_format_idc & 2 ? 16 : 8; |
|
| 400 |
+ int crop_horizontal_limit = sps->chroma_format_idc == 3 ? 16 : 8; |
|
| 400 | 401 |
sps->crop_left = get_ue_golomb(&s->gb); |
| 401 | 402 |
sps->crop_right = get_ue_golomb(&s->gb); |
| 402 | 403 |
sps->crop_top = get_ue_golomb(&s->gb); |
| ... | ... |
@@ -404,7 +405,7 @@ int ff_h264_decode_seq_parameter_set(H264Context *h){
|
| 404 | 404 |
if(sps->crop_left || sps->crop_top){
|
| 405 | 405 |
av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n"); |
| 406 | 406 |
} |
| 407 |
- if(sps->crop_right >= crop_limit || sps->crop_bottom >= crop_limit){
|
|
| 407 |
+ if(sps->crop_right >= crop_horizontal_limit || sps->crop_bottom >= crop_vertical_limit){
|
|
| 408 | 408 |
av_log(h->s.avctx, AV_LOG_ERROR, "brainfart cropping not supported, this could look slightly wrong ...\n"); |
| 409 | 409 |
} |
| 410 | 410 |
}else{
|
| ... | ... |
@@ -80,7 +80,14 @@ static const uint8_t luma_dc_field_scan[16]={
|
| 80 | 80 |
|
| 81 | 81 |
static const uint8_t chroma_dc_scan[4]={
|
| 82 | 82 |
(0+0*2)*16, (1+0*2)*16, |
| 83 |
- (0+1*2)*16, (1+1*2)*16, //FIXME |
|
| 83 |
+ (0+1*2)*16, (1+1*2)*16, |
|
| 84 |
+}; |
|
| 85 |
+ |
|
| 86 |
+static const uint8_t chroma422_dc_scan[8]={
|
|
| 87 |
+ (0+0*2)*16, (0+1*2)*16, |
|
| 88 |
+ (1+0*2)*16, (0+2*2)*16, |
|
| 89 |
+ (0+3*2)*16, (1+1*2)*16, |
|
| 90 |
+ (1+2*2)*16, (1+3*2)*16, |
|
| 84 | 91 |
}; |
| 85 | 92 |
|
| 86 | 93 |
// zigzag_scan8x8_cavlc[i] = zigzag_scan8x8[(i/4) + 16*(i%4)] |
| ... | ... |
@@ -41,7 +41,7 @@ |
| 41 | 41 |
#include "h264dsp_template.c" |
| 42 | 42 |
#undef BIT_DEPTH |
| 43 | 43 |
|
| 44 |
-void ff_h264dsp_init(H264DSPContext *c, const int bit_depth) |
|
| 44 |
+void ff_h264dsp_init(H264DSPContext *c, const int bit_depth, const int chroma_format_idc) |
|
| 45 | 45 |
{
|
| 46 | 46 |
#undef FUNC |
| 47 | 47 |
#define FUNC(a, depth) a ## _ ## depth ## _c |
| ... | ... |
@@ -53,10 +53,16 @@ void ff_h264dsp_init(H264DSPContext *c, const int bit_depth) |
| 53 | 53 |
c->h264_idct8_dc_add= FUNC(ff_h264_idct8_dc_add, depth);\ |
| 54 | 54 |
c->h264_idct_add16 = FUNC(ff_h264_idct_add16, depth);\ |
| 55 | 55 |
c->h264_idct8_add4 = FUNC(ff_h264_idct8_add4, depth);\ |
| 56 |
- c->h264_idct_add8 = FUNC(ff_h264_idct_add8, depth);\ |
|
| 56 |
+ if (chroma_format_idc == 1)\ |
|
| 57 |
+ c->h264_idct_add8 = FUNC(ff_h264_idct_add8, depth);\ |
|
| 58 |
+ else\ |
|
| 59 |
+ c->h264_idct_add8 = FUNC(ff_h264_idct_add8_422, depth);\ |
|
| 57 | 60 |
c->h264_idct_add16intra= FUNC(ff_h264_idct_add16intra, depth);\ |
| 58 | 61 |
c->h264_luma_dc_dequant_idct= FUNC(ff_h264_luma_dc_dequant_idct, depth);\ |
| 59 |
- c->h264_chroma_dc_dequant_idct= FUNC(ff_h264_chroma_dc_dequant_idct, depth);\ |
|
| 62 |
+ if (chroma_format_idc == 1)\ |
|
| 63 |
+ c->h264_chroma_dc_dequant_idct= FUNC(ff_h264_chroma_dc_dequant_idct, depth);\ |
|
| 64 |
+ else\ |
|
| 65 |
+ c->h264_chroma_dc_dequant_idct= FUNC(ff_h264_chroma422_dc_dequant_idct, depth);\ |
|
| 60 | 66 |
\ |
| 61 | 67 |
c->weight_h264_pixels_tab[0]= FUNC(weight_h264_pixels16x16, depth);\ |
| 62 | 68 |
c->weight_h264_pixels_tab[1]= FUNC(weight_h264_pixels16x8, depth);\ |
| ... | ... |
@@ -86,11 +92,23 @@ void ff_h264dsp_init(H264DSPContext *c, const int bit_depth) |
| 86 | 86 |
c->h264_h_loop_filter_luma_intra= FUNC(h264_h_loop_filter_luma_intra, depth);\ |
| 87 | 87 |
c->h264_h_loop_filter_luma_mbaff_intra= FUNC(h264_h_loop_filter_luma_mbaff_intra, depth);\ |
| 88 | 88 |
c->h264_v_loop_filter_chroma= FUNC(h264_v_loop_filter_chroma, depth);\ |
| 89 |
- c->h264_h_loop_filter_chroma= FUNC(h264_h_loop_filter_chroma, depth);\ |
|
| 90 |
- c->h264_h_loop_filter_chroma_mbaff= FUNC(h264_h_loop_filter_chroma_mbaff, depth);\ |
|
| 89 |
+ if (chroma_format_idc == 1)\ |
|
| 90 |
+ c->h264_h_loop_filter_chroma= FUNC(h264_h_loop_filter_chroma, depth);\ |
|
| 91 |
+ else\ |
|
| 92 |
+ c->h264_h_loop_filter_chroma= FUNC(h264_h_loop_filter_chroma422, depth);\ |
|
| 93 |
+ if (chroma_format_idc == 1)\ |
|
| 94 |
+ c->h264_h_loop_filter_chroma_mbaff= FUNC(h264_h_loop_filter_chroma_mbaff, depth);\ |
|
| 95 |
+ else\ |
|
| 96 |
+ c->h264_h_loop_filter_chroma_mbaff= FUNC(h264_h_loop_filter_chroma422_mbaff, depth);\ |
|
| 91 | 97 |
c->h264_v_loop_filter_chroma_intra= FUNC(h264_v_loop_filter_chroma_intra, depth);\ |
| 92 |
- c->h264_h_loop_filter_chroma_intra= FUNC(h264_h_loop_filter_chroma_intra, depth);\ |
|
| 93 |
- c->h264_h_loop_filter_chroma_mbaff_intra= FUNC(h264_h_loop_filter_chroma_mbaff_intra, depth);\ |
|
| 98 |
+ if (chroma_format_idc == 1)\ |
|
| 99 |
+ c->h264_h_loop_filter_chroma_intra= FUNC(h264_h_loop_filter_chroma_intra, depth);\ |
|
| 100 |
+ else\ |
|
| 101 |
+ c->h264_h_loop_filter_chroma_intra= FUNC(h264_h_loop_filter_chroma422_intra, depth);\ |
|
| 102 |
+ if (chroma_format_idc == 1)\ |
|
| 103 |
+ c->h264_h_loop_filter_chroma_mbaff_intra= FUNC(h264_h_loop_filter_chroma_mbaff_intra, depth);\ |
|
| 104 |
+ else\ |
|
| 105 |
+ c->h264_h_loop_filter_chroma_mbaff_intra= FUNC(h264_h_loop_filter_chroma422_mbaff_intra, depth);\ |
|
| 94 | 106 |
c->h264_loop_filter_strength= NULL; |
| 95 | 107 |
|
| 96 | 108 |
switch (bit_depth) {
|
| ... | ... |
@@ -105,7 +123,7 @@ void ff_h264dsp_init(H264DSPContext *c, const int bit_depth) |
| 105 | 105 |
break; |
| 106 | 106 |
} |
| 107 | 107 |
|
| 108 |
- if (ARCH_ARM) ff_h264dsp_init_arm(c, bit_depth); |
|
| 109 |
- if (HAVE_ALTIVEC) ff_h264dsp_init_ppc(c, bit_depth); |
|
| 110 |
- if (HAVE_MMX) ff_h264dsp_init_x86(c, bit_depth); |
|
| 108 |
+ if (ARCH_ARM) ff_h264dsp_init_arm(c, bit_depth, chroma_format_idc); |
|
| 109 |
+ if (HAVE_ALTIVEC) ff_h264dsp_init_ppc(c, bit_depth, chroma_format_idc); |
|
| 110 |
+ if (HAVE_MMX) ff_h264dsp_init_x86(c, bit_depth, chroma_format_idc); |
|
| 111 | 111 |
} |
| ... | ... |
@@ -74,9 +74,9 @@ typedef struct H264DSPContext{
|
| 74 | 74 |
void (*h264_chroma_dc_dequant_idct)(DCTELEM *block, int qmul); |
| 75 | 75 |
}H264DSPContext; |
| 76 | 76 |
|
| 77 |
-void ff_h264dsp_init(H264DSPContext *c, const int bit_depth); |
|
| 78 |
-void ff_h264dsp_init_arm(H264DSPContext *c, const int bit_depth); |
|
| 79 |
-void ff_h264dsp_init_ppc(H264DSPContext *c, const int bit_depth); |
|
| 80 |
-void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth); |
|
| 77 |
+void ff_h264dsp_init(H264DSPContext *c, const int bit_depth, const int chroma_format_idc); |
|
| 78 |
+void ff_h264dsp_init_arm(H264DSPContext *c, const int bit_depth, const int chroma_format_idc); |
|
| 79 |
+void ff_h264dsp_init_ppc(H264DSPContext *c, const int bit_depth, const int chroma_format_idc); |
|
| 80 |
+void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, const int chroma_format_idc); |
|
| 81 | 81 |
|
| 82 | 82 |
#endif /* AVCODEC_H264DSP_H */ |
| ... | ... |
@@ -275,6 +275,14 @@ static void FUNCC(h264_h_loop_filter_chroma_mbaff)(uint8_t *pix, int stride, int |
| 275 | 275 |
{
|
| 276 | 276 |
FUNCC(h264_loop_filter_chroma)(pix, sizeof(pixel), stride, 1, alpha, beta, tc0); |
| 277 | 277 |
} |
| 278 |
+static void FUNCC(h264_h_loop_filter_chroma422)(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) |
|
| 279 |
+{
|
|
| 280 |
+ FUNCC(h264_loop_filter_chroma)(pix, sizeof(pixel), stride, 4, alpha, beta, tc0); |
|
| 281 |
+} |
|
| 282 |
+static void FUNCC(h264_h_loop_filter_chroma422_mbaff)(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) |
|
| 283 |
+{
|
|
| 284 |
+ FUNCC(h264_loop_filter_chroma)(pix, sizeof(pixel), stride, 2, alpha, beta, tc0); |
|
| 285 |
+} |
|
| 278 | 286 |
|
| 279 | 287 |
static av_always_inline av_flatten void FUNCC(h264_loop_filter_chroma_intra)(uint8_t *_pix, int xstride, int ystride, int inner_iters, int alpha, int beta) |
| 280 | 288 |
{
|
| ... | ... |
@@ -312,3 +320,11 @@ static void FUNCC(h264_h_loop_filter_chroma_mbaff_intra)(uint8_t *pix, int strid |
| 312 | 312 |
{
|
| 313 | 313 |
FUNCC(h264_loop_filter_chroma_intra)(pix, sizeof(pixel), stride, 1, alpha, beta); |
| 314 | 314 |
} |
| 315 |
+static void FUNCC(h264_h_loop_filter_chroma422_intra)(uint8_t *pix, int stride, int alpha, int beta) |
|
| 316 |
+{
|
|
| 317 |
+ FUNCC(h264_loop_filter_chroma_intra)(pix, sizeof(pixel), stride, 4, alpha, beta); |
|
| 318 |
+} |
|
| 319 |
+static void FUNCC(h264_h_loop_filter_chroma422_mbaff_intra)(uint8_t *pix, int stride, int alpha, int beta) |
|
| 320 |
+{
|
|
| 321 |
+ FUNCC(h264_loop_filter_chroma_intra)(pix, sizeof(pixel), stride, 2, alpha, beta); |
|
| 322 |
+} |
| ... | ... |
@@ -224,6 +224,29 @@ void FUNCC(ff_h264_idct_add8)(uint8_t **dest, const int *block_offset, DCTELEM * |
| 224 | 224 |
} |
| 225 | 225 |
} |
| 226 | 226 |
} |
| 227 |
+ |
|
| 228 |
+void FUNCC(ff_h264_idct_add8_422)(uint8_t **dest, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[15*8]){
|
|
| 229 |
+ int i, j; |
|
| 230 |
+ |
|
| 231 |
+ for(j=1; j<3; j++){
|
|
| 232 |
+ for(i=j*16; i<j*16+4; i++){
|
|
| 233 |
+ if(nnzc[ scan8[i] ]) |
|
| 234 |
+ FUNCC(ff_h264_idct_add )(dest[j-1] + block_offset[i], block + i*16*sizeof(pixel), stride); |
|
| 235 |
+ else if(((dctcoef*)block)[i*16]) |
|
| 236 |
+ FUNCC(ff_h264_idct_dc_add)(dest[j-1] + block_offset[i], block + i*16*sizeof(pixel), stride); |
|
| 237 |
+ } |
|
| 238 |
+ } |
|
| 239 |
+ |
|
| 240 |
+ for(j=1; j<3; j++){
|
|
| 241 |
+ for(i=j*16+4; i<j*16+8; i++){
|
|
| 242 |
+ if(nnzc[ scan8[i+4] ]) |
|
| 243 |
+ FUNCC(ff_h264_idct_add )(dest[j-1] + block_offset[i+4], block + i*16*sizeof(pixel), stride); |
|
| 244 |
+ else if(((dctcoef*)block)[i*16]) |
|
| 245 |
+ FUNCC(ff_h264_idct_dc_add)(dest[j-1] + block_offset[i+4], block + i*16*sizeof(pixel), stride); |
|
| 246 |
+ } |
|
| 247 |
+ } |
|
| 248 |
+} |
|
| 249 |
+ |
|
| 227 | 250 |
/** |
| 228 | 251 |
* IDCT transforms the 16 dc values and dequantizes them. |
| 229 | 252 |
* @param qmul quantization parameter |
| ... | ... |
@@ -263,6 +286,33 @@ void FUNCC(ff_h264_luma_dc_dequant_idct)(DCTELEM *_output, DCTELEM *_input, int |
| 263 | 263 |
#undef stride |
| 264 | 264 |
} |
| 265 | 265 |
|
| 266 |
+void FUNCC(ff_h264_chroma422_dc_dequant_idct)(DCTELEM *_block, int qmul){
|
|
| 267 |
+ const int stride= 16*2; |
|
| 268 |
+ const int xStride= 16; |
|
| 269 |
+ int i; |
|
| 270 |
+ int temp[8]; |
|
| 271 |
+ static const uint8_t x_offset[2]={0, 16};
|
|
| 272 |
+ dctcoef *block = (dctcoef*)_block; |
|
| 273 |
+ |
|
| 274 |
+ for(i=0; i<4; i++){
|
|
| 275 |
+ temp[2*i+0] = block[stride*i + xStride*0] + block[stride*i + xStride*1]; |
|
| 276 |
+ temp[2*i+1] = block[stride*i + xStride*0] - block[stride*i + xStride*1]; |
|
| 277 |
+ } |
|
| 278 |
+ |
|
| 279 |
+ for(i=0; i<2; i++){
|
|
| 280 |
+ const int offset= x_offset[i]; |
|
| 281 |
+ const int z0= temp[2*0+i] + temp[2*2+i]; |
|
| 282 |
+ const int z1= temp[2*0+i] - temp[2*2+i]; |
|
| 283 |
+ const int z2= temp[2*1+i] - temp[2*3+i]; |
|
| 284 |
+ const int z3= temp[2*1+i] + temp[2*3+i]; |
|
| 285 |
+ |
|
| 286 |
+ block[stride*0+offset]= ((z0 + z3)*qmul + 128) >> 8; |
|
| 287 |
+ block[stride*1+offset]= ((z1 + z2)*qmul + 128) >> 8; |
|
| 288 |
+ block[stride*2+offset]= ((z1 - z2)*qmul + 128) >> 8; |
|
| 289 |
+ block[stride*3+offset]= ((z0 - z3)*qmul + 128) >> 8; |
|
| 290 |
+ } |
|
| 291 |
+} |
|
| 292 |
+ |
|
| 266 | 293 |
void FUNCC(ff_h264_chroma_dc_dequant_idct)(DCTELEM *_block, int qmul){
|
| 267 | 294 |
const int stride= 16*2; |
| 268 | 295 |
const int xStride= 16; |
| ... | ... |
@@ -361,7 +361,7 @@ static void pred8x8_tm_vp8_c(uint8_t *src, int stride){
|
| 361 | 361 |
/** |
| 362 | 362 |
* Set the intra prediction function pointers. |
| 363 | 363 |
*/ |
| 364 |
-void ff_h264_pred_init(H264PredContext *h, int codec_id, const int bit_depth){
|
|
| 364 |
+void ff_h264_pred_init(H264PredContext *h, int codec_id, const int bit_depth, const int chroma_format_idc){
|
|
| 365 | 365 |
// MpegEncContext * const s = &h->s; |
| 366 | 366 |
|
| 367 | 367 |
#undef FUNC |
| ... | ... |
@@ -434,20 +434,39 @@ void ff_h264_pred_init(H264PredContext *h, int codec_id, const int bit_depth){
|
| 434 | 434 |
h->pred8x8l[TOP_DC_PRED ]= FUNCC(pred8x8l_top_dc , depth);\ |
| 435 | 435 |
h->pred8x8l[DC_128_PRED ]= FUNCC(pred8x8l_128_dc , depth);\ |
| 436 | 436 |
\ |
| 437 |
- h->pred8x8[VERT_PRED8x8 ]= FUNCC(pred8x8_vertical , depth);\ |
|
| 438 |
- h->pred8x8[HOR_PRED8x8 ]= FUNCC(pred8x8_horizontal , depth);\ |
|
| 437 |
+ if (chroma_format_idc == 1) {\
|
|
| 438 |
+ h->pred8x8[VERT_PRED8x8 ]= FUNCC(pred8x8_vertical , depth);\ |
|
| 439 |
+ h->pred8x8[HOR_PRED8x8 ]= FUNCC(pred8x8_horizontal , depth);\ |
|
| 440 |
+ } else {\
|
|
| 441 |
+ h->pred8x8[VERT_PRED8x8 ]= FUNCC(pred8x16_vertical , depth);\ |
|
| 442 |
+ h->pred8x8[HOR_PRED8x8 ]= FUNCC(pred8x16_horizontal , depth);\ |
|
| 443 |
+ }\ |
|
| 439 | 444 |
if (codec_id != CODEC_ID_VP8) {\
|
| 440 |
- h->pred8x8[PLANE_PRED8x8]= FUNCC(pred8x8_plane , depth);\ |
|
| 445 |
+ if (chroma_format_idc == 1) {\
|
|
| 446 |
+ h->pred8x8[PLANE_PRED8x8]= FUNCC(pred8x8_plane , depth);\ |
|
| 447 |
+ } else {\
|
|
| 448 |
+ h->pred8x8[PLANE_PRED8x8]= FUNCC(pred8x16_plane , depth);\ |
|
| 449 |
+ }\ |
|
| 441 | 450 |
} else\ |
| 442 | 451 |
h->pred8x8[PLANE_PRED8x8]= FUNCD(pred8x8_tm_vp8);\ |
| 443 | 452 |
if(codec_id != CODEC_ID_RV40 && codec_id != CODEC_ID_VP8){\
|
| 444 |
- h->pred8x8[DC_PRED8x8 ]= FUNCC(pred8x8_dc , depth);\ |
|
| 445 |
- h->pred8x8[LEFT_DC_PRED8x8]= FUNCC(pred8x8_left_dc , depth);\ |
|
| 446 |
- h->pred8x8[TOP_DC_PRED8x8 ]= FUNCC(pred8x8_top_dc , depth);\ |
|
| 447 |
- h->pred8x8[ALZHEIMER_DC_L0T_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_l0t, depth);\ |
|
| 448 |
- h->pred8x8[ALZHEIMER_DC_0LT_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_0lt, depth);\ |
|
| 449 |
- h->pred8x8[ALZHEIMER_DC_L00_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_l00, depth);\ |
|
| 450 |
- h->pred8x8[ALZHEIMER_DC_0L0_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_0l0, depth);\ |
|
| 453 |
+ if (chroma_format_idc == 1) {\
|
|
| 454 |
+ h->pred8x8[DC_PRED8x8 ]= FUNCC(pred8x8_dc , depth);\ |
|
| 455 |
+ h->pred8x8[LEFT_DC_PRED8x8]= FUNCC(pred8x8_left_dc , depth);\ |
|
| 456 |
+ h->pred8x8[TOP_DC_PRED8x8 ]= FUNCC(pred8x8_top_dc , depth);\ |
|
| 457 |
+ h->pred8x8[ALZHEIMER_DC_L0T_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_l0t, depth);\ |
|
| 458 |
+ h->pred8x8[ALZHEIMER_DC_0LT_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_0lt, depth);\ |
|
| 459 |
+ h->pred8x8[ALZHEIMER_DC_L00_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_l00, depth);\ |
|
| 460 |
+ h->pred8x8[ALZHEIMER_DC_0L0_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_0l0, depth);\ |
|
| 461 |
+ } else {\
|
|
| 462 |
+ h->pred8x8[DC_PRED8x8 ]= FUNCC(pred8x16_dc , depth);\ |
|
| 463 |
+ h->pred8x8[LEFT_DC_PRED8x8]= FUNCC(pred8x16_left_dc , depth);\ |
|
| 464 |
+ h->pred8x8[TOP_DC_PRED8x8 ]= FUNCC(pred8x16_top_dc , depth);\ |
|
| 465 |
+ h->pred8x8[ALZHEIMER_DC_L0T_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_l0t, depth);\ |
|
| 466 |
+ h->pred8x8[ALZHEIMER_DC_0LT_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_0lt, depth);\ |
|
| 467 |
+ h->pred8x8[ALZHEIMER_DC_L00_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_l00, depth);\ |
|
| 468 |
+ h->pred8x8[ALZHEIMER_DC_0L0_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_0l0, depth);\ |
|
| 469 |
+ }\ |
|
| 451 | 470 |
}else{\
|
| 452 | 471 |
h->pred8x8[DC_PRED8x8 ]= FUNCD(pred8x8_dc_rv40);\ |
| 453 | 472 |
h->pred8x8[LEFT_DC_PRED8x8]= FUNCD(pred8x8_left_dc_rv40);\ |
| ... | ... |
@@ -457,7 +476,11 @@ void ff_h264_pred_init(H264PredContext *h, int codec_id, const int bit_depth){
|
| 457 | 457 |
h->pred8x8[DC_129_PRED8x8]= FUNCC(pred8x8_129_dc , depth);\ |
| 458 | 458 |
}\ |
| 459 | 459 |
}\ |
| 460 |
- h->pred8x8[DC_128_PRED8x8 ]= FUNCC(pred8x8_128_dc , depth);\ |
|
| 460 |
+ if (chroma_format_idc == 1) {\
|
|
| 461 |
+ h->pred8x8[DC_128_PRED8x8 ]= FUNCC(pred8x8_128_dc , depth);\ |
|
| 462 |
+ } else {\
|
|
| 463 |
+ h->pred8x8[DC_128_PRED8x8 ]= FUNCC(pred8x16_128_dc , depth);\ |
|
| 464 |
+ }\ |
|
| 461 | 465 |
\ |
| 462 | 466 |
h->pred16x16[DC_PRED8x8 ]= FUNCC(pred16x16_dc , depth);\ |
| 463 | 467 |
h->pred16x16[VERT_PRED8x8 ]= FUNCC(pred16x16_vertical , depth);\ |
| ... | ... |
@@ -504,6 +527,6 @@ void ff_h264_pred_init(H264PredContext *h, int codec_id, const int bit_depth){
|
| 504 | 504 |
break; |
| 505 | 505 |
} |
| 506 | 506 |
|
| 507 |
- if (ARCH_ARM) ff_h264_pred_init_arm(h, codec_id, bit_depth); |
|
| 508 |
- if (HAVE_MMX) ff_h264_pred_init_x86(h, codec_id, bit_depth); |
|
| 507 |
+ if (ARCH_ARM) ff_h264_pred_init_arm(h, codec_id, bit_depth, chroma_format_idc); |
|
| 508 |
+ if (HAVE_MMX) ff_h264_pred_init_x86(h, codec_id, bit_depth, chroma_format_idc); |
|
| 509 | 509 |
} |
| ... | ... |
@@ -101,8 +101,8 @@ typedef struct H264PredContext{
|
| 101 | 101 |
void (*pred16x16_add[3])(uint8_t *pix/*align 16*/, const int *block_offset, const DCTELEM *block/*align 16*/, int stride); |
| 102 | 102 |
}H264PredContext; |
| 103 | 103 |
|
| 104 |
-void ff_h264_pred_init(H264PredContext *h, int codec_id, const int bit_depth); |
|
| 105 |
-void ff_h264_pred_init_arm(H264PredContext *h, int codec_id, const int bit_depth); |
|
| 106 |
-void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth); |
|
| 104 |
+void ff_h264_pred_init(H264PredContext *h, int codec_id, const int bit_depth, const int chroma_format_idc); |
|
| 105 |
+void ff_h264_pred_init_arm(H264PredContext *h, int codec_id, const int bit_depth, const int chroma_format_idc); |
|
| 106 |
+void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth, const int chroma_format_idc); |
|
| 107 | 107 |
|
| 108 | 108 |
#endif /* AVCODEC_H264PRED_H */ |
| ... | ... |
@@ -454,6 +454,19 @@ static void FUNCC(pred8x8_vertical)(uint8_t *_src, int _stride){
|
| 454 | 454 |
} |
| 455 | 455 |
} |
| 456 | 456 |
|
| 457 |
+static void FUNCC(pred8x16_vertical)(uint8_t *_src, int _stride){
|
|
| 458 |
+ int i; |
|
| 459 |
+ pixel *src = (pixel*)_src; |
|
| 460 |
+ int stride = _stride>>(sizeof(pixel)-1); |
|
| 461 |
+ const pixel4 a= AV_RN4PA(((pixel4*)(src-stride))+0); |
|
| 462 |
+ const pixel4 b= AV_RN4PA(((pixel4*)(src-stride))+1); |
|
| 463 |
+ |
|
| 464 |
+ for(i=0; i<16; i++){
|
|
| 465 |
+ AV_WN4PA(((pixel4*)(src+i*stride))+0, a); |
|
| 466 |
+ AV_WN4PA(((pixel4*)(src+i*stride))+1, b); |
|
| 467 |
+ } |
|
| 468 |
+} |
|
| 469 |
+ |
|
| 457 | 470 |
static void FUNCC(pred8x8_horizontal)(uint8_t *_src, int stride){
|
| 458 | 471 |
int i; |
| 459 | 472 |
pixel *src = (pixel*)_src; |
| ... | ... |
@@ -466,6 +479,17 @@ static void FUNCC(pred8x8_horizontal)(uint8_t *_src, int stride){
|
| 466 | 466 |
} |
| 467 | 467 |
} |
| 468 | 468 |
|
| 469 |
+static void FUNCC(pred8x16_horizontal)(uint8_t *_src, int stride){
|
|
| 470 |
+ int i; |
|
| 471 |
+ pixel *src = (pixel*)_src; |
|
| 472 |
+ stride >>= sizeof(pixel)-1; |
|
| 473 |
+ for(i=0; i<16; i++){
|
|
| 474 |
+ const pixel4 a = PIXEL_SPLAT_X4(src[-1+i*stride]); |
|
| 475 |
+ AV_WN4PA(((pixel4*)(src+i*stride))+0, a); |
|
| 476 |
+ AV_WN4PA(((pixel4*)(src+i*stride))+1, a); |
|
| 477 |
+ } |
|
| 478 |
+} |
|
| 479 |
+ |
|
| 469 | 480 |
#define PRED8x8_X(n, v)\ |
| 470 | 481 |
static void FUNCC(pred8x8_##n##_dc)(uint8_t *_src, int stride){\
|
| 471 | 482 |
int i;\ |
| ... | ... |
@@ -482,6 +506,11 @@ PRED8x8_X(127, (1<<(BIT_DEPTH-1))-1); |
| 482 | 482 |
PRED8x8_X(128, (1<<(BIT_DEPTH-1))+0); |
| 483 | 483 |
PRED8x8_X(129, (1<<(BIT_DEPTH-1))+1); |
| 484 | 484 |
|
| 485 |
+static void FUNCC(pred8x16_128_dc)(uint8_t *_src, int stride){
|
|
| 486 |
+ FUNCC(pred8x8_128_dc)(_src, stride); |
|
| 487 |
+ FUNCC(pred8x8_128_dc)(_src+8*stride, stride); |
|
| 488 |
+} |
|
| 489 |
+ |
|
| 485 | 490 |
static void FUNCC(pred8x8_left_dc)(uint8_t *_src, int stride){
|
| 486 | 491 |
int i; |
| 487 | 492 |
int dc0, dc2; |
| ... | ... |
@@ -507,6 +536,11 @@ static void FUNCC(pred8x8_left_dc)(uint8_t *_src, int stride){
|
| 507 | 507 |
} |
| 508 | 508 |
} |
| 509 | 509 |
|
| 510 |
+static void FUNCC(pred8x16_left_dc)(uint8_t *_src, int stride){
|
|
| 511 |
+ FUNCC(pred8x8_left_dc)(_src, stride); |
|
| 512 |
+ FUNCC(pred8x8_left_dc)(_src+8*stride, stride); |
|
| 513 |
+} |
|
| 514 |
+ |
|
| 510 | 515 |
static void FUNCC(pred8x8_top_dc)(uint8_t *_src, int stride){
|
| 511 | 516 |
int i; |
| 512 | 517 |
int dc0, dc1; |
| ... | ... |
@@ -532,6 +566,27 @@ static void FUNCC(pred8x8_top_dc)(uint8_t *_src, int stride){
|
| 532 | 532 |
} |
| 533 | 533 |
} |
| 534 | 534 |
|
| 535 |
+static void FUNCC(pred8x16_top_dc)(uint8_t *_src, int stride){
|
|
| 536 |
+ int i; |
|
| 537 |
+ int dc0, dc1; |
|
| 538 |
+ pixel4 dc0splat, dc1splat; |
|
| 539 |
+ pixel *src = (pixel*)_src; |
|
| 540 |
+ stride >>= sizeof(pixel)-1; |
|
| 541 |
+ |
|
| 542 |
+ dc0=dc1=0; |
|
| 543 |
+ for(i=0;i<4; i++){
|
|
| 544 |
+ dc0+= src[i-stride]; |
|
| 545 |
+ dc1+= src[4+i-stride]; |
|
| 546 |
+ } |
|
| 547 |
+ dc0splat = PIXEL_SPLAT_X4((dc0 + 2)>>2); |
|
| 548 |
+ dc1splat = PIXEL_SPLAT_X4((dc1 + 2)>>2); |
|
| 549 |
+ |
|
| 550 |
+ for(i=0; i<16; i++){
|
|
| 551 |
+ AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat); |
|
| 552 |
+ AV_WN4PA(((pixel4*)(src+i*stride))+1, dc1splat); |
|
| 553 |
+ } |
|
| 554 |
+} |
|
| 555 |
+ |
|
| 535 | 556 |
static void FUNCC(pred8x8_dc)(uint8_t *_src, int stride){
|
| 536 | 557 |
int i; |
| 537 | 558 |
int dc0, dc1, dc2; |
| ... | ... |
@@ -560,6 +615,48 @@ static void FUNCC(pred8x8_dc)(uint8_t *_src, int stride){
|
| 560 | 560 |
} |
| 561 | 561 |
} |
| 562 | 562 |
|
| 563 |
+static void FUNCC(pred8x16_dc)(uint8_t *_src, int stride){
|
|
| 564 |
+ int i; |
|
| 565 |
+ int dc0, dc1, dc2, dc3, dc4; |
|
| 566 |
+ pixel4 dc0splat, dc1splat, dc2splat, dc3splat, dc4splat, dc5splat, dc6splat, dc7splat; |
|
| 567 |
+ pixel *src = (pixel*)_src; |
|
| 568 |
+ stride >>= sizeof(pixel)-1; |
|
| 569 |
+ |
|
| 570 |
+ dc0=dc1=dc2=dc3=dc4=0; |
|
| 571 |
+ for(i=0;i<4; i++){
|
|
| 572 |
+ dc0+= src[-1+i*stride] + src[i-stride]; |
|
| 573 |
+ dc1+= src[4+i-stride]; |
|
| 574 |
+ dc2+= src[-1+(i+4)*stride]; |
|
| 575 |
+ dc3+= src[-1+(i+8)*stride]; |
|
| 576 |
+ dc4+= src[-1+(i+12)*stride]; |
|
| 577 |
+ } |
|
| 578 |
+ dc0splat = PIXEL_SPLAT_X4((dc0 + 4)>>3); |
|
| 579 |
+ dc1splat = PIXEL_SPLAT_X4((dc1 + 2)>>2); |
|
| 580 |
+ dc2splat = PIXEL_SPLAT_X4((dc2 + 2)>>2); |
|
| 581 |
+ dc3splat = PIXEL_SPLAT_X4((dc1 + dc2 + 4)>>3); |
|
| 582 |
+ dc4splat = PIXEL_SPLAT_X4((dc3 + 2)>>2); |
|
| 583 |
+ dc5splat = PIXEL_SPLAT_X4((dc1 + dc3 + 4)>>3); |
|
| 584 |
+ dc6splat = PIXEL_SPLAT_X4((dc4 + 2)>>2); |
|
| 585 |
+ dc7splat = PIXEL_SPLAT_X4((dc1 + dc4 + 4)>>3); |
|
| 586 |
+ |
|
| 587 |
+ for(i=0; i<4; i++){
|
|
| 588 |
+ AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat); |
|
| 589 |
+ AV_WN4PA(((pixel4*)(src+i*stride))+1, dc1splat); |
|
| 590 |
+ } |
|
| 591 |
+ for(i=4; i<8; i++){
|
|
| 592 |
+ AV_WN4PA(((pixel4*)(src+i*stride))+0, dc2splat); |
|
| 593 |
+ AV_WN4PA(((pixel4*)(src+i*stride))+1, dc3splat); |
|
| 594 |
+ } |
|
| 595 |
+ for(i=8; i<12; i++){
|
|
| 596 |
+ AV_WN4PA(((pixel4*)(src+i*stride))+0, dc4splat); |
|
| 597 |
+ AV_WN4PA(((pixel4*)(src+i*stride))+1, dc5splat); |
|
| 598 |
+ } |
|
| 599 |
+ for(i=12; i<16; i++){
|
|
| 600 |
+ AV_WN4PA(((pixel4*)(src+i*stride))+0, dc6splat); |
|
| 601 |
+ AV_WN4PA(((pixel4*)(src+i*stride))+1, dc7splat); |
|
| 602 |
+ } |
|
| 603 |
+} |
|
| 604 |
+ |
|
| 563 | 605 |
//the following 4 function should not be optimized! |
| 564 | 606 |
static void FUNC(pred8x8_mad_cow_dc_l0t)(uint8_t *src, int stride){
|
| 565 | 607 |
FUNCC(pred8x8_top_dc)(src, stride); |
| ... | ... |
@@ -618,6 +715,47 @@ static void FUNCC(pred8x8_plane)(uint8_t *_src, int _stride){
|
| 618 | 618 |
} |
| 619 | 619 |
} |
| 620 | 620 |
|
| 621 |
+static void FUNCC(pred8x16_plane)(uint8_t *_src, int _stride){
|
|
| 622 |
+ int j, k; |
|
| 623 |
+ int a; |
|
| 624 |
+ INIT_CLIP |
|
| 625 |
+ pixel *src = (pixel*)_src; |
|
| 626 |
+ int stride = _stride>>(sizeof(pixel)-1); |
|
| 627 |
+ const pixel * const src0 = src +3-stride; |
|
| 628 |
+ const pixel * src1 = src +8*stride-1; |
|
| 629 |
+ const pixel * src2 = src1-2*stride; // == src+6*stride-1; |
|
| 630 |
+ int H = src0[1] - src0[-1]; |
|
| 631 |
+ int V = src1[0] - src2[ 0]; |
|
| 632 |
+ |
|
| 633 |
+ for (k = 2; k <= 4; ++k) {
|
|
| 634 |
+ src1 += stride; src2 -= stride; |
|
| 635 |
+ H += k*(src0[k] - src0[-k]); |
|
| 636 |
+ V += k*(src1[0] - src2[ 0]); |
|
| 637 |
+ } |
|
| 638 |
+ for (; k <= 8; ++k) {
|
|
| 639 |
+ src1 += stride; src2 -= stride; |
|
| 640 |
+ V += k*(src1[0] - src2[0]); |
|
| 641 |
+ } |
|
| 642 |
+ |
|
| 643 |
+ H = (17*H+16) >> 5; |
|
| 644 |
+ V = (5*V+32) >> 6; |
|
| 645 |
+ |
|
| 646 |
+ a = 16*(src1[0] + src2[8] + 1) - 7*V - 3*H; |
|
| 647 |
+ for(j=16; j>0; --j) {
|
|
| 648 |
+ int b = a; |
|
| 649 |
+ a += V; |
|
| 650 |
+ src[0] = CLIP((b ) >> 5); |
|
| 651 |
+ src[1] = CLIP((b+ H) >> 5); |
|
| 652 |
+ src[2] = CLIP((b+2*H) >> 5); |
|
| 653 |
+ src[3] = CLIP((b+3*H) >> 5); |
|
| 654 |
+ src[4] = CLIP((b+4*H) >> 5); |
|
| 655 |
+ src[5] = CLIP((b+5*H) >> 5); |
|
| 656 |
+ src[6] = CLIP((b+6*H) >> 5); |
|
| 657 |
+ src[7] = CLIP((b+7*H) >> 5); |
|
| 658 |
+ src += stride; |
|
| 659 |
+ } |
|
| 660 |
+} |
|
| 661 |
+ |
|
| 621 | 662 |
#define SRC(x,y) src[(x)+(y)*stride] |
| 622 | 663 |
#define PL(y) \ |
| 623 | 664 |
const int l##y = (SRC(-1,y-1) + 2*SRC(-1,y) + SRC(-1,y+1) + 2) >> 2; |
| ... | ... |
@@ -999,12 +999,13 @@ void dsputil_h264_init_ppc(DSPContext* c, AVCodecContext *avctx) {
|
| 999 | 999 |
} |
| 1000 | 1000 |
} |
| 1001 | 1001 |
|
| 1002 |
-void ff_h264dsp_init_ppc(H264DSPContext *c, const int bit_depth) |
|
| 1002 |
+void ff_h264dsp_init_ppc(H264DSPContext *c, const int bit_depth, const int chroma_format_idc) |
|
| 1003 | 1003 |
{
|
| 1004 | 1004 |
if (av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC) {
|
| 1005 | 1005 |
if (bit_depth == 8) {
|
| 1006 | 1006 |
c->h264_idct_add = ff_h264_idct_add_altivec; |
| 1007 |
- c->h264_idct_add8 = ff_h264_idct_add8_altivec; |
|
| 1007 |
+ if (chroma_format_idc == 1) |
|
| 1008 |
+ c->h264_idct_add8 = ff_h264_idct_add8_altivec; |
|
| 1008 | 1009 |
c->h264_idct_add16 = ff_h264_idct_add16_altivec; |
| 1009 | 1010 |
c->h264_idct_add16intra = ff_h264_idct_add16intra_altivec; |
| 1010 | 1011 |
c->h264_idct_dc_add= h264_idct_dc_add_altivec; |
| ... | ... |
@@ -1343,7 +1343,7 @@ av_cold int ff_rv34_decode_init(AVCodecContext *avctx) |
| 1343 | 1343 |
if (MPV_common_init(s) < 0) |
| 1344 | 1344 |
return -1; |
| 1345 | 1345 |
|
| 1346 |
- ff_h264_pred_init(&r->h, CODEC_ID_RV40, 8); |
|
| 1346 |
+ ff_h264_pred_init(&r->h, CODEC_ID_RV40, 8, 1); |
|
| 1347 | 1347 |
|
| 1348 | 1348 |
#if CONFIG_RV30_DECODER |
| 1349 | 1349 |
if (avctx->codec_id == CODEC_ID_RV30) |
| ... | ... |
@@ -1769,7 +1769,7 @@ static av_cold int vp8_decode_init(AVCodecContext *avctx) |
| 1769 | 1769 |
avctx->pix_fmt = PIX_FMT_YUV420P; |
| 1770 | 1770 |
|
| 1771 | 1771 |
dsputil_init(&s->dsp, avctx); |
| 1772 |
- ff_h264_pred_init(&s->hpc, CODEC_ID_VP8, 8); |
|
| 1772 |
+ ff_h264_pred_init(&s->hpc, CODEC_ID_VP8, 8, 1); |
|
| 1773 | 1773 |
ff_vp8dsp_init(&s->vp8dsp); |
| 1774 | 1774 |
|
| 1775 | 1775 |
return 0; |
| ... | ... |
@@ -167,7 +167,7 @@ void ff_pred4x4_tm_vp8_mmxext (uint8_t *src, const uint8_t *topright, int s |
| 167 | 167 |
void ff_pred4x4_tm_vp8_ssse3 (uint8_t *src, const uint8_t *topright, int stride); |
| 168 | 168 |
void ff_pred4x4_vertical_vp8_mmxext(uint8_t *src, const uint8_t *topright, int stride); |
| 169 | 169 |
|
| 170 |
-void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth) |
|
| 170 |
+void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth, const int chroma_format_idc) |
|
| 171 | 171 |
{
|
| 172 | 172 |
#if HAVE_YASM |
| 173 | 173 |
int mm_flags = av_get_cpu_flags(); |
| ... | ... |
@@ -176,14 +176,17 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth |
| 176 | 176 |
if (mm_flags & AV_CPU_FLAG_MMX) {
|
| 177 | 177 |
h->pred16x16[VERT_PRED8x8 ] = ff_pred16x16_vertical_mmx; |
| 178 | 178 |
h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_mmx; |
| 179 |
- h->pred8x8 [VERT_PRED8x8 ] = ff_pred8x8_vertical_mmx; |
|
| 180 |
- h->pred8x8 [HOR_PRED8x8 ] = ff_pred8x8_horizontal_mmx; |
|
| 179 |
+ if (chroma_format_idc == 1) {
|
|
| 180 |
+ h->pred8x8 [VERT_PRED8x8 ] = ff_pred8x8_vertical_mmx; |
|
| 181 |
+ h->pred8x8 [HOR_PRED8x8 ] = ff_pred8x8_horizontal_mmx; |
|
| 182 |
+ } |
|
| 181 | 183 |
if (codec_id == CODEC_ID_VP8) {
|
| 182 | 184 |
h->pred16x16[PLANE_PRED8x8 ] = ff_pred16x16_tm_vp8_mmx; |
| 183 | 185 |
h->pred8x8 [PLANE_PRED8x8 ] = ff_pred8x8_tm_vp8_mmx; |
| 184 | 186 |
h->pred4x4 [TM_VP8_PRED ] = ff_pred4x4_tm_vp8_mmx; |
| 185 | 187 |
} else {
|
| 186 |
- h->pred8x8 [PLANE_PRED8x8] = ff_pred8x8_plane_mmx; |
|
| 188 |
+ if (chroma_format_idc == 1) |
|
| 189 |
+ h->pred8x8 [PLANE_PRED8x8] = ff_pred8x8_plane_mmx; |
|
| 187 | 190 |
if (codec_id == CODEC_ID_SVQ3) {
|
| 188 | 191 |
h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_svq3_mmx; |
| 189 | 192 |
} else if (codec_id == CODEC_ID_RV40) {
|
| ... | ... |
@@ -197,7 +200,8 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth |
| 197 | 197 |
if (mm_flags & AV_CPU_FLAG_MMX2) {
|
| 198 | 198 |
h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_mmxext; |
| 199 | 199 |
h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_mmxext; |
| 200 |
- h->pred8x8 [HOR_PRED8x8 ] = ff_pred8x8_horizontal_mmxext; |
|
| 200 |
+ if (chroma_format_idc == 1) |
|
| 201 |
+ h->pred8x8[HOR_PRED8x8 ] = ff_pred8x8_horizontal_mmxext; |
|
| 201 | 202 |
h->pred8x8l [TOP_DC_PRED ] = ff_pred8x8l_top_dc_mmxext; |
| 202 | 203 |
h->pred8x8l [DC_PRED ] = ff_pred8x8l_dc_mmxext; |
| 203 | 204 |
h->pred8x8l [HOR_PRED ] = ff_pred8x8l_horizontal_mmxext; |
| ... | ... |
@@ -221,8 +225,10 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth |
| 221 | 221 |
h->pred4x4 [HOR_UP_PRED ] = ff_pred4x4_horizontal_up_mmxext; |
| 222 | 222 |
} |
| 223 | 223 |
if (codec_id == CODEC_ID_SVQ3 || codec_id == CODEC_ID_H264) {
|
| 224 |
- h->pred8x8 [TOP_DC_PRED8x8 ] = ff_pred8x8_top_dc_mmxext; |
|
| 225 |
- h->pred8x8 [DC_PRED8x8 ] = ff_pred8x8_dc_mmxext; |
|
| 224 |
+ if (chroma_format_idc == 1) {
|
|
| 225 |
+ h->pred8x8[TOP_DC_PRED8x8 ] = ff_pred8x8_top_dc_mmxext; |
|
| 226 |
+ h->pred8x8[DC_PRED8x8 ] = ff_pred8x8_dc_mmxext; |
|
| 227 |
+ } |
|
| 226 | 228 |
} |
| 227 | 229 |
if (codec_id == CODEC_ID_VP8) {
|
| 228 | 230 |
h->pred16x16[PLANE_PRED8x8 ] = ff_pred16x16_tm_vp8_mmxext; |
| ... | ... |
@@ -231,7 +237,8 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth |
| 231 | 231 |
h->pred4x4 [TM_VP8_PRED ] = ff_pred4x4_tm_vp8_mmxext; |
| 232 | 232 |
h->pred4x4 [VERT_PRED ] = ff_pred4x4_vertical_vp8_mmxext; |
| 233 | 233 |
} else {
|
| 234 |
- h->pred8x8 [PLANE_PRED8x8] = ff_pred8x8_plane_mmx2; |
|
| 234 |
+ if (chroma_format_idc == 1) |
|
| 235 |
+ h->pred8x8 [PLANE_PRED8x8] = ff_pred8x8_plane_mmx2; |
|
| 235 | 236 |
if (codec_id == CODEC_ID_SVQ3) {
|
| 236 | 237 |
h->pred16x16[PLANE_PRED8x8 ] = ff_pred16x16_plane_svq3_mmx2; |
| 237 | 238 |
} else if (codec_id == CODEC_ID_RV40) {
|
| ... | ... |
@@ -257,7 +264,8 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth |
| 257 | 257 |
h->pred16x16[PLANE_PRED8x8 ] = ff_pred16x16_tm_vp8_sse2; |
| 258 | 258 |
h->pred8x8 [PLANE_PRED8x8 ] = ff_pred8x8_tm_vp8_sse2; |
| 259 | 259 |
} else {
|
| 260 |
- h->pred8x8 [PLANE_PRED8x8 ] = ff_pred8x8_plane_sse2; |
|
| 260 |
+ if (chroma_format_idc == 1) |
|
| 261 |
+ h->pred8x8 [PLANE_PRED8x8] = ff_pred8x8_plane_sse2; |
|
| 261 | 262 |
if (codec_id == CODEC_ID_SVQ3) {
|
| 262 | 263 |
h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_svq3_sse2; |
| 263 | 264 |
} else if (codec_id == CODEC_ID_RV40) {
|
| ... | ... |
@@ -271,7 +279,8 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth |
| 271 | 271 |
if (mm_flags & AV_CPU_FLAG_SSSE3) {
|
| 272 | 272 |
h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_ssse3; |
| 273 | 273 |
h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_ssse3; |
| 274 |
- h->pred8x8 [HOR_PRED8x8 ] = ff_pred8x8_horizontal_ssse3; |
|
| 274 |
+ if (chroma_format_idc == 1) |
|
| 275 |
+ h->pred8x8 [HOR_PRED8x8 ] = ff_pred8x8_horizontal_ssse3; |
|
| 275 | 276 |
h->pred8x8l [TOP_DC_PRED ] = ff_pred8x8l_top_dc_ssse3; |
| 276 | 277 |
h->pred8x8l [DC_PRED ] = ff_pred8x8l_dc_ssse3; |
| 277 | 278 |
h->pred8x8l [HOR_PRED ] = ff_pred8x8l_horizontal_ssse3; |
| ... | ... |
@@ -286,7 +295,8 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth |
| 286 | 286 |
h->pred8x8 [PLANE_PRED8x8 ] = ff_pred8x8_tm_vp8_ssse3; |
| 287 | 287 |
h->pred4x4 [TM_VP8_PRED ] = ff_pred4x4_tm_vp8_ssse3; |
| 288 | 288 |
} else {
|
| 289 |
- h->pred8x8 [PLANE_PRED8x8] = ff_pred8x8_plane_ssse3; |
|
| 289 |
+ if (chroma_format_idc == 1) |
|
| 290 |
+ h->pred8x8 [PLANE_PRED8x8] = ff_pred8x8_plane_ssse3; |
|
| 290 | 291 |
if (codec_id == CODEC_ID_SVQ3) {
|
| 291 | 292 |
h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_svq3_ssse3; |
| 292 | 293 |
} else if (codec_id == CODEC_ID_RV40) {
|
| ... | ... |
@@ -301,7 +311,8 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth |
| 301 | 301 |
h->pred4x4[DC_PRED ] = ff_pred4x4_dc_10_mmxext; |
| 302 | 302 |
h->pred4x4[HOR_UP_PRED ] = ff_pred4x4_horizontal_up_10_mmxext; |
| 303 | 303 |
|
| 304 |
- h->pred8x8[DC_PRED8x8 ] = ff_pred8x8_dc_10_mmxext; |
|
| 304 |
+ if (chroma_format_idc == 1) |
|
| 305 |
+ h->pred8x8[DC_PRED8x8 ] = ff_pred8x8_dc_10_mmxext; |
|
| 305 | 306 |
|
| 306 | 307 |
h->pred8x8l[DC_128_PRED ] = ff_pred8x8l_128_dc_10_mmxext; |
| 307 | 308 |
|
| ... | ... |
@@ -319,11 +330,13 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth |
| 319 | 319 |
h->pred4x4[VERT_RIGHT_PRED ] = ff_pred4x4_vertical_right_10_sse2; |
| 320 | 320 |
h->pred4x4[HOR_DOWN_PRED ] = ff_pred4x4_horizontal_down_10_sse2; |
| 321 | 321 |
|
| 322 |
- h->pred8x8[DC_PRED8x8 ] = ff_pred8x8_dc_10_sse2; |
|
| 323 |
- h->pred8x8[TOP_DC_PRED8x8 ] = ff_pred8x8_top_dc_10_sse2; |
|
| 324 |
- h->pred8x8[PLANE_PRED8x8 ] = ff_pred8x8_plane_10_sse2; |
|
| 325 |
- h->pred8x8[VERT_PRED8x8 ] = ff_pred8x8_vertical_10_sse2; |
|
| 326 |
- h->pred8x8[HOR_PRED8x8 ] = ff_pred8x8_horizontal_10_sse2; |
|
| 322 |
+ if (chroma_format_idc == 1) {
|
|
| 323 |
+ h->pred8x8[DC_PRED8x8 ] = ff_pred8x8_dc_10_sse2; |
|
| 324 |
+ h->pred8x8[TOP_DC_PRED8x8 ] = ff_pred8x8_top_dc_10_sse2; |
|
| 325 |
+ h->pred8x8[PLANE_PRED8x8 ] = ff_pred8x8_plane_10_sse2; |
|
| 326 |
+ h->pred8x8[VERT_PRED8x8 ] = ff_pred8x8_vertical_10_sse2; |
|
| 327 |
+ h->pred8x8[HOR_PRED8x8 ] = ff_pred8x8_horizontal_10_sse2; |
|
| 328 |
+ } |
|
| 327 | 329 |
|
| 328 | 330 |
h->pred8x8l[VERT_PRED ] = ff_pred8x8l_vertical_10_sse2; |
| 329 | 331 |
h->pred8x8l[HOR_PRED ] = ff_pred8x8l_horizontal_10_sse2; |
| ... | ... |
@@ -350,7 +350,7 @@ H264_BIWEIGHT_10_SSE( 4, 8, 10) |
| 350 | 350 |
H264_BIWEIGHT_10_SSE( 4, 4, 10) |
| 351 | 351 |
H264_BIWEIGHT_10_SSE( 4, 2, 10) |
| 352 | 352 |
|
| 353 |
-void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth) |
|
| 353 |
+void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, const int chroma_format_idc) |
|
| 354 | 354 |
{
|
| 355 | 355 |
int mm_flags = av_get_cpu_flags(); |
| 356 | 356 |
|
| ... | ... |
@@ -368,7 +368,8 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth) |
| 368 | 368 |
|
| 369 | 369 |
c->h264_idct_add16 = ff_h264_idct_add16_8_mmx; |
| 370 | 370 |
c->h264_idct8_add4 = ff_h264_idct8_add4_8_mmx; |
| 371 |
- c->h264_idct_add8 = ff_h264_idct_add8_8_mmx; |
|
| 371 |
+ if (chroma_format_idc == 1) |
|
| 372 |
+ c->h264_idct_add8 = ff_h264_idct_add8_8_mmx; |
|
| 372 | 373 |
c->h264_idct_add16intra = ff_h264_idct_add16intra_8_mmx; |
| 373 | 374 |
c->h264_luma_dc_dequant_idct= ff_h264_luma_dc_dequant_idct_mmx; |
| 374 | 375 |
|
| ... | ... |
@@ -377,13 +378,16 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth) |
| 377 | 377 |
c->h264_idct8_dc_add = ff_h264_idct8_dc_add_8_mmx2; |
| 378 | 378 |
c->h264_idct_add16 = ff_h264_idct_add16_8_mmx2; |
| 379 | 379 |
c->h264_idct8_add4 = ff_h264_idct8_add4_8_mmx2; |
| 380 |
- c->h264_idct_add8 = ff_h264_idct_add8_8_mmx2; |
|
| 380 |
+ if (chroma_format_idc == 1) |
|
| 381 |
+ c->h264_idct_add8 = ff_h264_idct_add8_8_mmx2; |
|
| 381 | 382 |
c->h264_idct_add16intra= ff_h264_idct_add16intra_8_mmx2; |
| 382 | 383 |
|
| 383 | 384 |
c->h264_v_loop_filter_chroma= ff_deblock_v_chroma_8_mmxext; |
| 384 |
- c->h264_h_loop_filter_chroma= ff_deblock_h_chroma_8_mmxext; |
|
| 385 | 385 |
c->h264_v_loop_filter_chroma_intra= ff_deblock_v_chroma_intra_8_mmxext; |
| 386 |
- c->h264_h_loop_filter_chroma_intra= ff_deblock_h_chroma_intra_8_mmxext; |
|
| 386 |
+ if (chroma_format_idc == 1) {
|
|
| 387 |
+ c->h264_h_loop_filter_chroma= ff_deblock_h_chroma_8_mmxext; |
|
| 388 |
+ c->h264_h_loop_filter_chroma_intra= ff_deblock_h_chroma_intra_8_mmxext; |
|
| 389 |
+ } |
|
| 387 | 390 |
#if ARCH_X86_32 |
| 388 | 391 |
c->h264_v_loop_filter_luma= ff_deblock_v_luma_8_mmxext; |
| 389 | 392 |
c->h264_h_loop_filter_luma= ff_deblock_h_luma_8_mmxext; |
| ... | ... |
@@ -413,7 +417,8 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth) |
| 413 | 413 |
|
| 414 | 414 |
c->h264_idct_add16 = ff_h264_idct_add16_8_sse2; |
| 415 | 415 |
c->h264_idct8_add4 = ff_h264_idct8_add4_8_sse2; |
| 416 |
- c->h264_idct_add8 = ff_h264_idct_add8_8_sse2; |
|
| 416 |
+ if (chroma_format_idc == 1) |
|
| 417 |
+ c->h264_idct_add8 = ff_h264_idct_add8_8_sse2; |
|
| 417 | 418 |
c->h264_idct_add16intra = ff_h264_idct_add16intra_8_sse2; |
| 418 | 419 |
c->h264_luma_dc_dequant_idct= ff_h264_luma_dc_dequant_idct_sse2; |
| 419 | 420 |
|
| ... | ... |
@@ -472,7 +477,8 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth) |
| 472 | 472 |
c->h264_idct8_dc_add = ff_h264_idct8_dc_add_10_sse2; |
| 473 | 473 |
|
| 474 | 474 |
c->h264_idct_add16 = ff_h264_idct_add16_10_sse2; |
| 475 |
- c->h264_idct_add8 = ff_h264_idct_add8_10_sse2; |
|
| 475 |
+ if (chroma_format_idc == 1) |
|
| 476 |
+ c->h264_idct_add8 = ff_h264_idct_add8_10_sse2; |
|
| 476 | 477 |
c->h264_idct_add16intra= ff_h264_idct_add16intra_10_sse2; |
| 477 | 478 |
#if HAVE_ALIGNED_STACK |
| 478 | 479 |
c->h264_idct8_add = ff_h264_idct8_add_10_sse2; |
| ... | ... |
@@ -532,7 +538,8 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth) |
| 532 | 532 |
c->h264_idct8_dc_add = ff_h264_idct8_dc_add_10_avx; |
| 533 | 533 |
|
| 534 | 534 |
c->h264_idct_add16 = ff_h264_idct_add16_10_avx; |
| 535 |
- c->h264_idct_add8 = ff_h264_idct_add8_10_avx; |
|
| 535 |
+ if (chroma_format_idc == 1) |
|
| 536 |
+ c->h264_idct_add8 = ff_h264_idct_add8_10_avx; |
|
| 536 | 537 |
c->h264_idct_add16intra= ff_h264_idct_add16intra_10_avx; |
| 537 | 538 |
#if HAVE_ALIGNED_STACK |
| 538 | 539 |
c->h264_idct8_add = ff_h264_idct8_add_10_avx; |