libavcodec/idctdsp.c
e3fcb143
 /*
581b5f0b
  * This file is part of FFmpeg.
e3fcb143
  *
581b5f0b
  * FFmpeg is free software; you can redistribute it and/or
e3fcb143
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
581b5f0b
  * FFmpeg is distributed in the hope that it will be useful,
e3fcb143
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
581b5f0b
  * License along with FFmpeg; if not, write to the Free Software
e3fcb143
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 #include "config.h"
 #include "libavutil/attributes.h"
 #include "libavutil/common.h"
 #include "avcodec.h"
 #include "dct.h"
 #include "faanidct.h"
 #include "idctdsp.h"
 #include "simple_idct.h"
f54e01c2
 #include "xvididct.h"
e3fcb143
 
 av_cold void ff_init_scantable(uint8_t *permutation, ScanTable *st,
                                const uint8_t *src_scantable)
 {
     int i, end;
 
     st->scantable = src_scantable;
 
     for (i = 0; i < 64; i++) {
         int j = src_scantable[i];
         st->permutated[i] = permutation[j];
     }
 
     end = -1;
     for (i = 0; i < 64; i++) {
         int j = st->permutated[i];
         if (j > end)
             end = j;
         st->raster_end[i] = end;
     }
 }
 
 av_cold void ff_init_scantable_permutation(uint8_t *idct_permutation,
b4987f72
                                            enum idct_permutation_type perm_type)
e3fcb143
 {
     int i;
 
     if (ARCH_X86)
         if (ff_init_scantable_permutation_x86(idct_permutation,
b4987f72
                                               perm_type))
e3fcb143
             return;
 
b4987f72
     switch (perm_type) {
     case FF_IDCT_PERM_NONE:
e3fcb143
         for (i = 0; i < 64; i++)
             idct_permutation[i] = i;
         break;
b4987f72
     case FF_IDCT_PERM_LIBMPEG2:
e3fcb143
         for (i = 0; i < 64; i++)
             idct_permutation[i] = (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
         break;
b4987f72
     case FF_IDCT_PERM_TRANSPOSE:
e3fcb143
         for (i = 0; i < 64; i++)
             idct_permutation[i] = ((i & 7) << 3) | (i >> 3);
         break;
b4987f72
     case FF_IDCT_PERM_PARTTRANS:
e3fcb143
         for (i = 0; i < 64; i++)
             idct_permutation[i] = (i & 0x24) | ((i & 3) << 3) | ((i >> 3) & 3);
         break;
     default:
         av_log(NULL, AV_LOG_ERROR,
                "Internal error, IDCT permutation not set\n");
     }
 }
 
32baeafe
 void ff_put_pixels_clamped_c(const int16_t *block, uint8_t *av_restrict pixels,
                              ptrdiff_t line_size)
a8592db9
 {
     int i;
 
     /* read the pixels */
     for (i = 0; i < 8; i++) {
         pixels[0] = av_clip_uint8(block[0]);
         pixels[1] = av_clip_uint8(block[1]);
         pixels[2] = av_clip_uint8(block[2]);
         pixels[3] = av_clip_uint8(block[3]);
         pixels[4] = av_clip_uint8(block[4]);
         pixels[5] = av_clip_uint8(block[5]);
         pixels[6] = av_clip_uint8(block[6]);
         pixels[7] = av_clip_uint8(block[7]);
 
         pixels += line_size;
         block  += 8;
     }
 }
 
581b5f0b
 static void put_pixels_clamped4_c(const int16_t *block, uint8_t *av_restrict pixels,
                                  int line_size)
 {
     int i;
 
     /* read the pixels */
     for(i=0;i<4;i++) {
         pixels[0] = av_clip_uint8(block[0]);
         pixels[1] = av_clip_uint8(block[1]);
         pixels[2] = av_clip_uint8(block[2]);
         pixels[3] = av_clip_uint8(block[3]);
 
         pixels += line_size;
         block += 8;
     }
 }
 
 static void put_pixels_clamped2_c(const int16_t *block, uint8_t *av_restrict pixels,
                                  int line_size)
 {
     int i;
 
     /* read the pixels */
     for(i=0;i<2;i++) {
         pixels[0] = av_clip_uint8(block[0]);
         pixels[1] = av_clip_uint8(block[1]);
 
         pixels += line_size;
         block += 8;
     }
 }
 
e3fcb143
 static void put_signed_pixels_clamped_c(const int16_t *block,
581b5f0b
                                         uint8_t *av_restrict pixels,
c99a8828
                                         ptrdiff_t line_size)
e3fcb143
 {
     int i, j;
 
     for (i = 0; i < 8; i++) {
         for (j = 0; j < 8; j++) {
             if (*block < -128)
                 *pixels = 0;
             else if (*block > 127)
                 *pixels = 255;
             else
                 *pixels = (uint8_t) (*block + 128);
             block++;
             pixels++;
         }
         pixels += (line_size - 8);
     }
 }
 
32baeafe
 void ff_add_pixels_clamped_c(const int16_t *block, uint8_t *av_restrict pixels,
                              ptrdiff_t line_size)
a8592db9
 {
     int i;
 
     /* read the pixels */
     for (i = 0; i < 8; i++) {
         pixels[0] = av_clip_uint8(pixels[0] + block[0]);
         pixels[1] = av_clip_uint8(pixels[1] + block[1]);
         pixels[2] = av_clip_uint8(pixels[2] + block[2]);
         pixels[3] = av_clip_uint8(pixels[3] + block[3]);
         pixels[4] = av_clip_uint8(pixels[4] + block[4]);
         pixels[5] = av_clip_uint8(pixels[5] + block[5]);
         pixels[6] = av_clip_uint8(pixels[6] + block[6]);
         pixels[7] = av_clip_uint8(pixels[7] + block[7]);
         pixels   += line_size;
         block    += 8;
     }
 }
 
581b5f0b
 static void add_pixels_clamped4_c(const int16_t *block, uint8_t *av_restrict pixels,
                           int line_size)
 {
     int i;
 
     /* read the pixels */
     for(i=0;i<4;i++) {
         pixels[0] = av_clip_uint8(pixels[0] + block[0]);
         pixels[1] = av_clip_uint8(pixels[1] + block[1]);
         pixels[2] = av_clip_uint8(pixels[2] + block[2]);
         pixels[3] = av_clip_uint8(pixels[3] + block[3]);
         pixels += line_size;
         block += 8;
     }
 }
 
 static void add_pixels_clamped2_c(const int16_t *block, uint8_t *av_restrict pixels,
                           int line_size)
 {
     int i;
 
     /* read the pixels */
     for(i=0;i<2;i++) {
         pixels[0] = av_clip_uint8(pixels[0] + block[0]);
         pixels[1] = av_clip_uint8(pixels[1] + block[1]);
         pixels += line_size;
         block += 8;
     }
 }
 
5a49097b
 static void ff_jref_idct4_put(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
581b5f0b
 {
     ff_j_rev_dct4 (block);
     put_pixels_clamped4_c(block, dest, line_size);
 }
5a49097b
 static void ff_jref_idct4_add(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
581b5f0b
 {
     ff_j_rev_dct4 (block);
     add_pixels_clamped4_c(block, dest, line_size);
 }
 
5a49097b
 static void ff_jref_idct2_put(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
581b5f0b
 {
     ff_j_rev_dct2 (block);
     put_pixels_clamped2_c(block, dest, line_size);
 }
5a49097b
 static void ff_jref_idct2_add(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
581b5f0b
 {
     ff_j_rev_dct2 (block);
     add_pixels_clamped2_c(block, dest, line_size);
 }
 
5a49097b
 static void ff_jref_idct1_put(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
581b5f0b
 {
     dest[0] = av_clip_uint8((block[0] + 4)>>3);
 }
5a49097b
 static void ff_jref_idct1_add(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
581b5f0b
 {
     dest[0] = av_clip_uint8(dest[0] + ((block[0] + 4)>>3));
 }
e3fcb143
 
 av_cold void ff_idctdsp_init(IDCTDSPContext *c, AVCodecContext *avctx)
 {
     const unsigned high_bit_depth = avctx->bits_per_raw_sample > 8;
 
581b5f0b
     if (avctx->lowres==1) {
521f5697
         c->idct_put  = ff_jref_idct4_put;
         c->idct_add  = ff_jref_idct4_add;
         c->idct      = ff_j_rev_dct4;
         c->perm_type = FF_IDCT_PERM_NONE;
581b5f0b
     } else if (avctx->lowres==2) {
521f5697
         c->idct_put  = ff_jref_idct2_put;
         c->idct_add  = ff_jref_idct2_add;
         c->idct      = ff_j_rev_dct2;
         c->perm_type = FF_IDCT_PERM_NONE;
581b5f0b
     } else if (avctx->lowres==3) {
521f5697
         c->idct_put  = ff_jref_idct1_put;
         c->idct_add  = ff_jref_idct1_add;
         c->idct      = ff_j_rev_dct1;
8b0dd494
         c->perm_type = FF_IDCT_PERM_NONE;
e3fcb143
     } else {
88c1869a
         if (avctx->bits_per_raw_sample == 10 || avctx->bits_per_raw_sample == 9) {
f9d3841a
             /* 10-bit MPEG-4 Simple Studio Profile requires a higher precision IDCT
                However, it only uses idct_put */
07767c70
             if (c->mpeg4_studio_profile) {
f9d3841a
                 c->idct_put              = ff_simple_idct_put_int32_10bit;
07767c70
                 c->idct_add              = NULL;
                 c->idct                  = NULL;
             } else {
f9d3841a
                 c->idct_put              = ff_simple_idct_put_int16_10bit;
                 c->idct_add              = ff_simple_idct_add_int16_10bit;
                 c->idct                  = ff_simple_idct_int16_10bit;
             }
42d32635
             c->perm_type             = FF_IDCT_PERM_NONE;
581b5f0b
         } else if (avctx->bits_per_raw_sample == 12) {
699fa8f3
             c->idct_put              = ff_simple_idct_put_int16_12bit;
             c->idct_add              = ff_simple_idct_add_int16_12bit;
             c->idct                  = ff_simple_idct_int16_12bit;
42d32635
             c->perm_type             = FF_IDCT_PERM_NONE;
581b5f0b
         } else {
5ff2b334
             if (avctx->idct_algo == FF_IDCT_INT) {
5db23c07
                 c->idct_put  = ff_jref_idct_put;
                 c->idct_add  = ff_jref_idct_add;
5ff2b334
                 c->idct      = ff_j_rev_dct;
                 c->perm_type = FF_IDCT_PERM_LIBMPEG2;
f044fc03
 #if CONFIG_FAANIDCT
5ff2b334
             } else if (avctx->idct_algo == FF_IDCT_FAAN) {
                 c->idct_put  = ff_faanidct_put;
                 c->idct_add  = ff_faanidct_add;
                 c->idct      = ff_faanidct;
                 c->perm_type = FF_IDCT_PERM_NONE;
f044fc03
 #endif /* CONFIG_FAANIDCT */
5ff2b334
             } else { // accurate/default
9054439b
                 /* Be sure FF_IDCT_NONE will select this one, since it uses FF_IDCT_PERM_NONE */
699fa8f3
                 c->idct_put  = ff_simple_idct_put_int16_8bit;
                 c->idct_add  = ff_simple_idct_add_int16_8bit;
                 c->idct      = ff_simple_idct_int16_8bit;
5ff2b334
                 c->perm_type = FF_IDCT_PERM_NONE;
             }
581b5f0b
         }
e3fcb143
     }
 
32baeafe
     c->put_pixels_clamped        = ff_put_pixels_clamped_c;
e3fcb143
     c->put_signed_pixels_clamped = put_signed_pixels_clamped_c;
32baeafe
     c->add_pixels_clamped        = ff_add_pixels_clamped_c;
e3fcb143
 
889cb3ae
     if (CONFIG_MPEG4_DECODER && avctx->idct_algo == FF_IDCT_XVID)
b3b05a11
         ff_xvid_idct_init(c, avctx);
f54e01c2
 
4c8e528d
     if (ARCH_AARCH64)
         ff_idctdsp_init_aarch64(c, avctx, high_bit_depth);
2375b094
     if (ARCH_ALPHA)
         ff_idctdsp_init_alpha(c, avctx, high_bit_depth);
e3fcb143
     if (ARCH_ARM)
         ff_idctdsp_init_arm(c, avctx, high_bit_depth);
     if (ARCH_PPC)
         ff_idctdsp_init_ppc(c, avctx, high_bit_depth);
     if (ARCH_X86)
         ff_idctdsp_init_x86(c, avctx, high_bit_depth);
d12f76ff
     if (ARCH_MIPS)
         ff_idctdsp_init_mips(c, avctx, high_bit_depth);
e3fcb143
 
     ff_init_scantable_permutation(c->idct_permutation,
b4987f72
                                   c->perm_type);
e3fcb143
 }