* qatar/master: (22 commits)
configure: enable memalign_hack automatically when needed
swscale: unbreak the build on non-x86 systems.
swscale: remove if(bitexact) branch from functions.
swscale: remove if(canMMX2BeUsed) conditional.
swscale: remove swScale_{c,MMX,MMX2} duplication.
swscale: use emms_c().
Move emms_c() from libavcodec to libavutil.
tiff: set palette in the context when specified in TIFF_PAL tag
rtsp: use strtoul to parse rtptime and seq values.
pgssubdec: fix incorrect colors.
dvdsubdec: fix incorrect colors.
ape: Allow demuxing of files with metadata tags.
swscale: remove dead macro WRITEBGR24OLD.
swscale: remove AMD3DNOW "optimizations".
swscale: remove duplicate code in ppc/ subdirectory.
swscale: remove duplicated x86/ functions.
swscale: force --enable-runtime-cpudetect and remove SWS_CPU_CAPS_*.
vsrc_buffer.h: add file doxy
vsrc_buffer: tweak error message in init()
msmpeg4: reindent.
...
Merged-by: Michael Niedermayer <michaelni@gmx.at>
| ... | ... |
@@ -2859,11 +2859,6 @@ check_header X11/extensions/XvMClib.h |
| 2859 | 2859 |
|
| 2860 | 2860 |
check_struct dxva2api.h DXVA_PictureParameters wDecodedPictureIndex |
| 2861 | 2861 |
|
| 2862 |
-if ! enabled_any memalign memalign_hack posix_memalign malloc_aligned && |
|
| 2863 |
- enabled_any $need_memalign ; then |
|
| 2864 |
- die "Error, no aligned memory allocator but SSE enabled, disable it or use --enable-memalign-hack." |
|
| 2865 |
-fi |
|
| 2866 |
- |
|
| 2867 | 2862 |
disabled zlib || check_lib zlib.h zlibVersion -lz || disable zlib |
| 2868 | 2863 |
disabled bzlib || check_lib2 bzlib.h BZ2_bzlibVersion -lbz2 || disable bzlib |
| 2869 | 2864 |
|
| ... | ... |
@@ -3156,6 +3151,9 @@ check_deps $CONFIG_LIST \ |
| 3156 | 3156 |
|
| 3157 | 3157 |
enabled asm || { arch=c; disable $ARCH_LIST $ARCH_EXT_LIST; }
|
| 3158 | 3158 |
|
| 3159 |
+! enabled_any memalign posix_memalign malloc_aligned && |
|
| 3160 |
+ enabled_any $need_memalign && enable memalign_hack |
|
| 3161 |
+ |
|
| 3159 | 3162 |
echo "install prefix $prefix" |
| 3160 | 3163 |
echo "source path $source_path" |
| 3161 | 3164 |
echo "C compiler $cc" |
| ... | ... |
@@ -433,3 +433,49 @@ For more information about libx264 and the supported options see: |
| 433 | 433 |
@url{http://www.videolan.org/developers/x264.html}
|
| 434 | 434 |
|
| 435 | 435 |
@c man end VIDEO ENCODERS |
| 436 |
+ |
|
| 437 |
+@subheading Floating-Point-Only AC-3 Encoding Options |
|
| 438 |
+ |
|
| 439 |
+These options are only valid for the floating-point encoder and do not exist |
|
| 440 |
+for the fixed-point encoder due to the corresponding features not being |
|
| 441 |
+implemented in fixed-point. |
|
| 442 |
+ |
|
| 443 |
+@table @option |
|
| 444 |
+ |
|
| 445 |
+@item -channel_coupling @var{boolean}
|
|
| 446 |
+Enables/Disables use of channel coupling, which is an optional AC-3 feature |
|
| 447 |
+that increases quality by combining high frequency information from multiple |
|
| 448 |
+channels into a single channel. The per-channel high frequency information is |
|
| 449 |
+sent with less accuracy in both the frequency and time domains. This allows |
|
| 450 |
+more bits to be used for lower frequencies while preserving enough information |
|
| 451 |
+to reconstruct the high frequencies. This option is enabled by default for the |
|
| 452 |
+floating-point encoder and should generally be left as enabled except for |
|
| 453 |
+testing purposes or to increase encoding speed. |
|
| 454 |
+@table @option |
|
| 455 |
+@item -1 |
|
| 456 |
+@itemx auto |
|
| 457 |
+Selected by Encoder (default) |
|
| 458 |
+@item 0 |
|
| 459 |
+@itemx off |
|
| 460 |
+Disable Channel Coupling |
|
| 461 |
+@item 1 |
|
| 462 |
+@itemx on |
|
| 463 |
+Enable Channel Coupling |
|
| 464 |
+@end table |
|
| 465 |
+ |
|
| 466 |
+@item -cpl_start_band @var{number}
|
|
| 467 |
+Coupling Start Band. Sets the channel coupling start band, from 1 to 15. If a |
|
| 468 |
+value higher than the bandwidth is used, it will be reduced to 1 less than the |
|
| 469 |
+coupling end band. If @var{auto} is used, the start band will be determined by
|
|
| 470 |
+the encoder based on the bit rate, sample rate, and channel layout. This option |
|
| 471 |
+has no effect if channel coupling is disabled. |
|
| 472 |
+@table @option |
|
| 473 |
+@item -1 |
|
| 474 |
+@itemx auto |
|
| 475 |
+Selected by Encoder (default) |
|
| 476 |
+@end table |
|
| 477 |
+ |
|
| 478 |
+@end table |
|
| 479 |
+ |
|
| 480 |
+@c man end ENCODERS |
|
| 481 |
+ |
| ... | ... |
@@ -269,8 +269,6 @@ OBJS-$(CONFIG_MPEG2VIDEO_ENCODER) += mpeg12enc.o mpegvideo_enc.o \ |
| 269 | 269 |
mpegvideo.o error_resilience.o |
| 270 | 270 |
OBJS-$(CONFIG_MPEG4_VAAPI_HWACCEL) += vaapi_mpeg4.o |
| 271 | 271 |
OBJS-$(CONFIG_MSMPEG4V1_DECODER) += msmpeg4.o msmpeg4data.o |
| 272 |
-OBJS-$(CONFIG_MSMPEG4V1_ENCODER) += msmpeg4.o msmpeg4data.o h263dec.o \ |
|
| 273 |
- h263.o ituh263dec.o mpeg4videodec.o |
|
| 274 | 272 |
OBJS-$(CONFIG_MSMPEG4V2_DECODER) += msmpeg4.o msmpeg4data.o h263dec.o \ |
| 275 | 273 |
h263.o ituh263dec.o mpeg4videodec.o |
| 276 | 274 |
OBJS-$(CONFIG_MSMPEG4V2_ENCODER) += msmpeg4.o msmpeg4data.o h263dec.o \ |
| ... | ... |
@@ -28,7 +28,8 @@ |
| 28 | 28 |
#define AVCODEC_AC3_H |
| 29 | 29 |
|
| 30 | 30 |
#define AC3_MAX_CODED_FRAME_SIZE 3840 /* in bytes */ |
| 31 |
-#define AC3_MAX_CHANNELS 6 /* including LFE channel */ |
|
| 31 |
+#define AC3_MAX_CHANNELS 7 /**< maximum number of channels, including coupling channel */ |
|
| 32 |
+#define CPL_CH 0 /**< coupling channel index */ |
|
| 32 | 33 |
|
| 33 | 34 |
#define AC3_MAX_COEFS 256 |
| 34 | 35 |
#define AC3_BLOCK_SIZE 256 |
| ... | ... |
@@ -158,7 +159,9 @@ typedef struct AC3EncOptions {
|
| 158 | 158 |
|
| 159 | 159 |
/* other encoding options */ |
| 160 | 160 |
int allow_per_frame_metadata; |
| 161 |
- int stereo_rematrixing; |
|
| 161 |
+ int stereo_rematrixing; |
|
| 162 |
+ int channel_coupling; |
|
| 163 |
+ int cpl_start; |
|
| 162 | 164 |
} AC3EncOptions; |
| 163 | 165 |
|
| 164 | 166 |
|
| ... | ... |
@@ -54,12 +54,6 @@ const uint8_t ff_eac3_hebap_tab[64] = {
|
| 54 | 54 |
}; |
| 55 | 55 |
|
| 56 | 56 |
/** |
| 57 |
- * Table E2.16 Default Coupling Banding Structure |
|
| 58 |
- */ |
|
| 59 |
-const uint8_t ff_eac3_default_cpl_band_struct[18] = |
|
| 60 |
-{ 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1 };
|
|
| 61 |
- |
|
| 62 |
-/** |
|
| 63 | 57 |
* Table E2.15 Default Spectral Extension Banding Structure |
| 64 | 58 |
*/ |
| 65 | 59 |
const uint8_t ff_eac3_default_spx_band_struct[17] = |
| ... | ... |
@@ -27,7 +27,6 @@ |
| 27 | 27 |
extern const uint8_t ff_ac3_ungroup_3_in_5_bits_tab[32][3]; |
| 28 | 28 |
|
| 29 | 29 |
extern const uint8_t ff_eac3_hebap_tab[64]; |
| 30 |
-extern const uint8_t ff_eac3_default_cpl_band_struct[18]; |
|
| 31 | 30 |
extern const uint8_t ff_eac3_default_spx_band_struct[17]; |
| 32 | 31 |
|
| 33 | 32 |
#endif /* AVCODEC_AC3DEC_DATA_H */ |
| ... | ... |
@@ -70,6 +70,7 @@ typedef struct AC3MDCTContext {
|
| 70 | 70 |
FFTContext fft; ///< FFT context for MDCT calculation |
| 71 | 71 |
} AC3MDCTContext; |
| 72 | 72 |
|
| 73 |
+ |
|
| 73 | 74 |
/** |
| 74 | 75 |
* Data for a single audio block. |
| 75 | 76 |
*/ |
| ... | ... |
@@ -83,10 +84,22 @@ typedef struct AC3Block {
|
| 83 | 83 |
int16_t **band_psd; ///< psd per critical band |
| 84 | 84 |
int16_t **mask; ///< masking curve |
| 85 | 85 |
uint16_t **qmant; ///< quantized mantissas |
| 86 |
+ uint8_t **cpl_coord_exp; ///< coupling coord exponents (cplcoexp) |
|
| 87 |
+ uint8_t **cpl_coord_mant; ///< coupling coord mantissas (cplcomant) |
|
| 86 | 88 |
uint8_t coeff_shift[AC3_MAX_CHANNELS]; ///< fixed-point coefficient shift values |
| 87 | 89 |
uint8_t new_rematrixing_strategy; ///< send new rematrixing flags in this block |
| 90 |
+ int num_rematrixing_bands; ///< number of rematrixing bands |
|
| 88 | 91 |
uint8_t rematrixing_flags[4]; ///< rematrixing flags |
| 89 | 92 |
struct AC3Block *exp_ref_block[AC3_MAX_CHANNELS]; ///< reference blocks for EXP_REUSE |
| 93 |
+ int new_cpl_strategy; ///< send new coupling strategy |
|
| 94 |
+ int cpl_in_use; ///< coupling in use for this block (cplinu) |
|
| 95 |
+ uint8_t channel_in_cpl[AC3_MAX_CHANNELS]; ///< channel in coupling (chincpl) |
|
| 96 |
+ int num_cpl_channels; ///< number of channels in coupling |
|
| 97 |
+ uint8_t new_cpl_coords; ///< send new coupling coordinates (cplcoe) |
|
| 98 |
+ uint8_t cpl_master_exp[AC3_MAX_CHANNELS]; ///< coupling coord master exponents (mstrcplco) |
|
| 99 |
+ int new_snr_offsets; ///< send new SNR offsets |
|
| 100 |
+ int new_cpl_leak; ///< send new coupling leak info |
|
| 101 |
+ int end_freq[AC3_MAX_CHANNELS]; ///< end frequency bin (endmant) |
|
| 90 | 102 |
} AC3Block; |
| 91 | 103 |
|
| 92 | 104 |
/** |
| ... | ... |
@@ -133,10 +146,16 @@ typedef struct AC3EncodeContext {
|
| 133 | 133 |
|
| 134 | 134 |
int cutoff; ///< user-specified cutoff frequency, in Hz |
| 135 | 135 |
int bandwidth_code; ///< bandwidth code (0 to 60) (chbwcod) |
| 136 |
- int nb_coefs[AC3_MAX_CHANNELS]; |
|
| 136 |
+ int start_freq[AC3_MAX_CHANNELS]; ///< start frequency bin (strtmant) |
|
| 137 |
+ int cpl_end_freq; ///< coupling channel end frequency bin |
|
| 138 |
+ |
|
| 139 |
+ int cpl_on; ///< coupling turned on for this frame |
|
| 140 |
+ int cpl_enabled; ///< coupling enabled for all frames |
|
| 141 |
+ int num_cpl_subbands; ///< number of coupling subbands (ncplsubnd) |
|
| 142 |
+ int num_cpl_bands; ///< number of coupling bands (ncplbnd) |
|
| 143 |
+ uint8_t cpl_band_sizes[AC3_MAX_CPL_BANDS]; ///< number of coeffs in each coupling band |
|
| 137 | 144 |
|
| 138 | 145 |
int rematrixing_enabled; ///< stereo rematrixing enabled |
| 139 |
- int num_rematrixing_bands; ///< number of rematrixing bands |
|
| 140 | 146 |
|
| 141 | 147 |
/* bitrate allocation control */ |
| 142 | 148 |
int slow_gain_code; ///< slow gain code (sgaincod) |
| ... | ... |
@@ -163,6 +182,8 @@ typedef struct AC3EncodeContext {
|
| 163 | 163 |
int16_t *band_psd_buffer; |
| 164 | 164 |
int16_t *mask_buffer; |
| 165 | 165 |
uint16_t *qmant_buffer; |
| 166 |
+ uint8_t *cpl_coord_exp_buffer; |
|
| 167 |
+ uint8_t *cpl_coord_mant_buffer; |
|
| 166 | 168 |
|
| 167 | 169 |
uint8_t exp_strategy[AC3_MAX_CHANNELS][AC3_MAX_BLOCKS]; ///< exponent strategies |
| 168 | 170 |
|
| ... | ... |
@@ -237,6 +258,12 @@ const AVOption ff_ac3_options[] = {
|
| 237 | 237 |
{"hdcd", "HDCD", 0, FF_OPT_TYPE_CONST, {.dbl = 1 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "ad_conv_type"},
|
| 238 | 238 |
/* Other Encoding Options */ |
| 239 | 239 |
{"stereo_rematrixing", "Stereo Rematrixing", OFFSET(stereo_rematrixing), FF_OPT_TYPE_INT, {.dbl = 1 }, 0, 1, AC3ENC_PARAM},
|
| 240 |
+#if CONFIG_AC3ENC_FLOAT |
|
| 241 |
+{"channel_coupling", "Channel Coupling", OFFSET(channel_coupling), FF_OPT_TYPE_INT, {.dbl = 1 }, 0, 1, AC3ENC_PARAM, "channel_coupling"},
|
|
| 242 |
+ {"auto", "Selected by the Encoder", 0, FF_OPT_TYPE_CONST, {.dbl = -1 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "channel_coupling"},
|
|
| 243 |
+{"cpl_start_band", "Coupling Start Band", OFFSET(cpl_start), FF_OPT_TYPE_INT, {.dbl = -1 }, -1, 15, AC3ENC_PARAM, "cpl_start_band"},
|
|
| 244 |
+ {"auto", "Selected by the Encoder", 0, FF_OPT_TYPE_CONST, {.dbl = -1 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "cpl_start_band"},
|
|
| 245 |
+#endif |
|
| 240 | 246 |
{NULL}
|
| 241 | 247 |
}; |
| 242 | 248 |
#endif |
| ... | ... |
@@ -267,9 +294,9 @@ static void scale_coefficients(AC3EncodeContext *s); |
| 267 | 267 |
|
| 268 | 268 |
/** |
| 269 | 269 |
* LUT for number of exponent groups. |
| 270 |
- * exponent_group_tab[exponent strategy-1][number of coefficients] |
|
| 270 |
+ * exponent_group_tab[coupling][exponent strategy-1][number of coefficients] |
|
| 271 | 271 |
*/ |
| 272 |
-static uint8_t exponent_group_tab[3][256]; |
|
| 272 |
+static uint8_t exponent_group_tab[2][3][256]; |
|
| 273 | 273 |
|
| 274 | 274 |
|
| 275 | 275 |
/** |
| ... | ... |
@@ -331,6 +358,49 @@ static const uint8_t ac3_bandwidth_tab[5][3][19] = {
|
| 331 | 331 |
|
| 332 | 332 |
|
| 333 | 333 |
/** |
| 334 |
+ * LUT to select the coupling start band based on the bit rate, sample rate, and |
|
| 335 |
+ * number of full-bandwidth channels. -1 = coupling off |
|
| 336 |
+ * ac3_coupling_start_tab[channel_mode-2][sample rate code][bit rate code] |
|
| 337 |
+ * |
|
| 338 |
+ * TODO: more testing for optimal parameters. |
|
| 339 |
+ * multi-channel tests at 44.1kHz and 32kHz. |
|
| 340 |
+ */ |
|
| 341 |
+static const int8_t ac3_coupling_start_tab[6][3][19] = {
|
|
| 342 |
+// 32 40 48 56 64 80 96 112 128 160 192 224 256 320 384 448 512 576 640 |
|
| 343 |
+ |
|
| 344 |
+ // 2/0 |
|
| 345 |
+ { { 0, 0, 0, 0, 0, 0, 0, 1, 1, 7, 8, 11, 12, -1, -1, -1, -1, -1, -1 },
|
|
| 346 |
+ { 0, 0, 0, 0, 0, 0, 1, 3, 5, 7, 10, 12, 13, -1, -1, -1, -1, -1, -1 },
|
|
| 347 |
+ { 0, 0, 0, 0, 1, 2, 2, 9, 13, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1 } },
|
|
| 348 |
+ |
|
| 349 |
+ // 3/0 |
|
| 350 |
+ { { 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 6, 9, 11, 12, 13, -1, -1, -1, -1 },
|
|
| 351 |
+ { 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 6, 9, 11, 12, 13, -1, -1, -1, -1 },
|
|
| 352 |
+ { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 } },
|
|
| 353 |
+ |
|
| 354 |
+ // 2/1 - untested |
|
| 355 |
+ { { 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 6, 9, 11, 12, 13, -1, -1, -1, -1 },
|
|
| 356 |
+ { 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 6, 9, 11, 12, 13, -1, -1, -1, -1 },
|
|
| 357 |
+ { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 } },
|
|
| 358 |
+ |
|
| 359 |
+ // 3/1 |
|
| 360 |
+ { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 2, 10, 11, 11, 12, 12, 14, -1 },
|
|
| 361 |
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 2, 10, 11, 11, 12, 12, 14, -1 },
|
|
| 362 |
+ { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 } },
|
|
| 363 |
+ |
|
| 364 |
+ // 2/2 - untested |
|
| 365 |
+ { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 2, 10, 11, 11, 12, 12, 14, -1 },
|
|
| 366 |
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 2, 10, 11, 11, 12, 12, 14, -1 },
|
|
| 367 |
+ { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 } },
|
|
| 368 |
+ |
|
| 369 |
+ // 3/2 |
|
| 370 |
+ { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 6, 8, 11, 12, 12, -1, -1 },
|
|
| 371 |
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 6, 8, 11, 12, 12, -1, -1 },
|
|
| 372 |
+ { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 } },
|
|
| 373 |
+}; |
|
| 374 |
+ |
|
| 375 |
+ |
|
| 376 |
+/** |
|
| 334 | 377 |
* Adjust the frame size to make the average bit rate match the target bit rate. |
| 335 | 378 |
* This is only needed for 11025, 22050, and 44100 sample rates. |
| 336 | 379 |
*/ |
| ... | ... |
@@ -392,15 +462,297 @@ static void apply_mdct(AC3EncodeContext *s) |
| 392 | 392 |
|
| 393 | 393 |
apply_window(&s->dsp, s->windowed_samples, input_samples, s->mdct.window, AC3_WINDOW_SIZE); |
| 394 | 394 |
|
| 395 |
- block->coeff_shift[ch] = normalize_samples(s); |
|
| 395 |
+ block->coeff_shift[ch+1] = normalize_samples(s); |
|
| 396 | 396 |
|
| 397 |
- s->mdct.fft.mdct_calcw(&s->mdct.fft, block->mdct_coef[ch], |
|
| 397 |
+ s->mdct.fft.mdct_calcw(&s->mdct.fft, block->mdct_coef[ch+1], |
|
| 398 | 398 |
s->windowed_samples); |
| 399 | 399 |
} |
| 400 | 400 |
} |
| 401 | 401 |
} |
| 402 | 402 |
|
| 403 | 403 |
|
| 404 |
+static void compute_coupling_strategy(AC3EncodeContext *s) |
|
| 405 |
+{
|
|
| 406 |
+ int blk, ch; |
|
| 407 |
+ int got_cpl_snr; |
|
| 408 |
+ |
|
| 409 |
+ /* set coupling use flags for each block/channel */ |
|
| 410 |
+ /* TODO: turn coupling on/off and adjust start band based on bit usage */ |
|
| 411 |
+ for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
|
|
| 412 |
+ AC3Block *block = &s->blocks[blk]; |
|
| 413 |
+ for (ch = 1; ch <= s->fbw_channels; ch++) |
|
| 414 |
+ block->channel_in_cpl[ch] = s->cpl_on; |
|
| 415 |
+ } |
|
| 416 |
+ |
|
| 417 |
+ /* enable coupling for each block if at least 2 channels have coupling |
|
| 418 |
+ enabled for that block */ |
|
| 419 |
+ got_cpl_snr = 0; |
|
| 420 |
+ for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
|
|
| 421 |
+ AC3Block *block = &s->blocks[blk]; |
|
| 422 |
+ block->num_cpl_channels = 0; |
|
| 423 |
+ for (ch = 1; ch <= s->fbw_channels; ch++) |
|
| 424 |
+ block->num_cpl_channels += block->channel_in_cpl[ch]; |
|
| 425 |
+ block->cpl_in_use = block->num_cpl_channels > 1; |
|
| 426 |
+ if (!block->cpl_in_use) {
|
|
| 427 |
+ block->num_cpl_channels = 0; |
|
| 428 |
+ for (ch = 1; ch <= s->fbw_channels; ch++) |
|
| 429 |
+ block->channel_in_cpl[ch] = 0; |
|
| 430 |
+ } |
|
| 431 |
+ |
|
| 432 |
+ block->new_cpl_strategy = !blk; |
|
| 433 |
+ if (blk) {
|
|
| 434 |
+ for (ch = 1; ch <= s->fbw_channels; ch++) {
|
|
| 435 |
+ if (block->channel_in_cpl[ch] != s->blocks[blk-1].channel_in_cpl[ch]) {
|
|
| 436 |
+ block->new_cpl_strategy = 1; |
|
| 437 |
+ break; |
|
| 438 |
+ } |
|
| 439 |
+ } |
|
| 440 |
+ } |
|
| 441 |
+ block->new_cpl_leak = block->new_cpl_strategy; |
|
| 442 |
+ |
|
| 443 |
+ if (!blk || (block->cpl_in_use && !got_cpl_snr)) {
|
|
| 444 |
+ block->new_snr_offsets = 1; |
|
| 445 |
+ if (block->cpl_in_use) |
|
| 446 |
+ got_cpl_snr = 1; |
|
| 447 |
+ } else {
|
|
| 448 |
+ block->new_snr_offsets = 0; |
|
| 449 |
+ } |
|
| 450 |
+ } |
|
| 451 |
+ |
|
| 452 |
+ /* set bandwidth for each channel */ |
|
| 453 |
+ for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
|
|
| 454 |
+ AC3Block *block = &s->blocks[blk]; |
|
| 455 |
+ for (ch = 1; ch <= s->fbw_channels; ch++) {
|
|
| 456 |
+ if (block->channel_in_cpl[ch]) |
|
| 457 |
+ block->end_freq[ch] = s->start_freq[CPL_CH]; |
|
| 458 |
+ else |
|
| 459 |
+ block->end_freq[ch] = s->bandwidth_code * 3 + 73; |
|
| 460 |
+ } |
|
| 461 |
+ } |
|
| 462 |
+} |
|
| 463 |
+ |
|
| 464 |
+ |
|
| 465 |
+/** |
|
| 466 |
+ * Calculate a single coupling coordinate. |
|
| 467 |
+ */ |
|
| 468 |
+static inline float calc_cpl_coord(float energy_ch, float energy_cpl) |
|
| 469 |
+{
|
|
| 470 |
+ float coord = 0.125; |
|
| 471 |
+ if (energy_cpl > 0) |
|
| 472 |
+ coord *= sqrtf(energy_ch / energy_cpl); |
|
| 473 |
+ return coord; |
|
| 474 |
+} |
|
| 475 |
+ |
|
| 476 |
+ |
|
| 477 |
+/** |
|
| 478 |
+ * Calculate coupling channel and coupling coordinates. |
|
| 479 |
+ * TODO: Currently this is only used for the floating-point encoder. I was |
|
| 480 |
+ * able to make it work for the fixed-point encoder, but quality was |
|
| 481 |
+ * generally lower in most cases than not using coupling. If a more |
|
| 482 |
+ * adaptive coupling strategy were to be implemented it might be useful |
|
| 483 |
+ * at that time to use coupling for the fixed-point encoder as well. |
|
| 484 |
+ */ |
|
| 485 |
+static void apply_channel_coupling(AC3EncodeContext *s) |
|
| 486 |
+{
|
|
| 487 |
+#if CONFIG_AC3ENC_FLOAT |
|
| 488 |
+ DECLARE_ALIGNED(16, float, cpl_coords) [AC3_MAX_BLOCKS][AC3_MAX_CHANNELS][16]; |
|
| 489 |
+ DECLARE_ALIGNED(16, int32_t, fixed_cpl_coords)[AC3_MAX_BLOCKS][AC3_MAX_CHANNELS][16]; |
|
| 490 |
+ int blk, ch, bnd, i, j; |
|
| 491 |
+ CoefSumType energy[AC3_MAX_BLOCKS][AC3_MAX_CHANNELS][16] = {{{0}}};
|
|
| 492 |
+ int num_cpl_coefs = s->num_cpl_subbands * 12; |
|
| 493 |
+ |
|
| 494 |
+ /* calculate coupling channel from fbw channels */ |
|
| 495 |
+ for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
|
|
| 496 |
+ AC3Block *block = &s->blocks[blk]; |
|
| 497 |
+ CoefType *cpl_coef = &block->mdct_coef[CPL_CH][s->start_freq[CPL_CH]]; |
|
| 498 |
+ if (!block->cpl_in_use) |
|
| 499 |
+ continue; |
|
| 500 |
+ memset(cpl_coef-1, 0, (num_cpl_coefs+4) * sizeof(*cpl_coef)); |
|
| 501 |
+ for (ch = 1; ch <= s->fbw_channels; ch++) {
|
|
| 502 |
+ CoefType *ch_coef = &block->mdct_coef[ch][s->start_freq[CPL_CH]]; |
|
| 503 |
+ if (!block->channel_in_cpl[ch]) |
|
| 504 |
+ continue; |
|
| 505 |
+ for (i = 0; i < num_cpl_coefs; i++) |
|
| 506 |
+ cpl_coef[i] += ch_coef[i]; |
|
| 507 |
+ } |
|
| 508 |
+ /* note: coupling start bin % 4 will always be 1 and num_cpl_coefs |
|
| 509 |
+ will always be a multiple of 12, so we need to subtract 1 from |
|
| 510 |
+ the start and add 4 to the length when using optimized |
|
| 511 |
+ functions which require 16-byte alignment. */ |
|
| 512 |
+ |
|
| 513 |
+ /* coefficients must be clipped to +/- 1.0 in order to be encoded */ |
|
| 514 |
+ s->dsp.vector_clipf(cpl_coef-1, cpl_coef-1, -1.0f, 1.0f, num_cpl_coefs+4); |
|
| 515 |
+ |
|
| 516 |
+ /* scale coupling coefficients from float to 24-bit fixed-point */ |
|
| 517 |
+ s->ac3dsp.float_to_fixed24(&block->fixed_coef[CPL_CH][s->start_freq[CPL_CH]-1], |
|
| 518 |
+ cpl_coef-1, num_cpl_coefs+4); |
|
| 519 |
+ } |
|
| 520 |
+ |
|
| 521 |
+ /* calculate energy in each band in coupling channel and each fbw channel */ |
|
| 522 |
+ /* TODO: possibly use SIMD to speed up energy calculation */ |
|
| 523 |
+ bnd = 0; |
|
| 524 |
+ i = s->start_freq[CPL_CH]; |
|
| 525 |
+ while (i < s->cpl_end_freq) {
|
|
| 526 |
+ int band_size = s->cpl_band_sizes[bnd]; |
|
| 527 |
+ for (ch = CPL_CH; ch <= s->fbw_channels; ch++) {
|
|
| 528 |
+ for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
|
|
| 529 |
+ AC3Block *block = &s->blocks[blk]; |
|
| 530 |
+ if (!block->cpl_in_use || (ch > CPL_CH && !block->channel_in_cpl[ch])) |
|
| 531 |
+ continue; |
|
| 532 |
+ for (j = 0; j < band_size; j++) {
|
|
| 533 |
+ CoefType v = block->mdct_coef[ch][i+j]; |
|
| 534 |
+ MAC_COEF(energy[blk][ch][bnd], v, v); |
|
| 535 |
+ } |
|
| 536 |
+ } |
|
| 537 |
+ } |
|
| 538 |
+ i += band_size; |
|
| 539 |
+ bnd++; |
|
| 540 |
+ } |
|
| 541 |
+ |
|
| 542 |
+ /* determine which blocks to send new coupling coordinates for */ |
|
| 543 |
+ for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
|
|
| 544 |
+ AC3Block *block = &s->blocks[blk]; |
|
| 545 |
+ AC3Block *block0 = blk ? &s->blocks[blk-1] : NULL; |
|
| 546 |
+ int new_coords = 0; |
|
| 547 |
+ CoefSumType coord_diff[AC3_MAX_CHANNELS] = {0,};
|
|
| 548 |
+ |
|
| 549 |
+ if (block->cpl_in_use) {
|
|
| 550 |
+ /* calculate coupling coordinates for all blocks and calculate the |
|
| 551 |
+ average difference between coordinates in successive blocks */ |
|
| 552 |
+ for (ch = 1; ch <= s->fbw_channels; ch++) {
|
|
| 553 |
+ if (!block->channel_in_cpl[ch]) |
|
| 554 |
+ continue; |
|
| 555 |
+ |
|
| 556 |
+ for (bnd = 0; bnd < s->num_cpl_bands; bnd++) {
|
|
| 557 |
+ cpl_coords[blk][ch][bnd] = calc_cpl_coord(energy[blk][ch][bnd], |
|
| 558 |
+ energy[blk][CPL_CH][bnd]); |
|
| 559 |
+ if (blk > 0 && block0->cpl_in_use && |
|
| 560 |
+ block0->channel_in_cpl[ch]) {
|
|
| 561 |
+ coord_diff[ch] += fabs(cpl_coords[blk-1][ch][bnd] - |
|
| 562 |
+ cpl_coords[blk ][ch][bnd]); |
|
| 563 |
+ } |
|
| 564 |
+ } |
|
| 565 |
+ coord_diff[ch] /= s->num_cpl_bands; |
|
| 566 |
+ } |
|
| 567 |
+ |
|
| 568 |
+ /* send new coordinates if this is the first block, if previous |
|
| 569 |
+ * block did not use coupling but this block does, the channels |
|
| 570 |
+ * using coupling has changed from the previous block, or the |
|
| 571 |
+ * coordinate difference from the last block for any channel is |
|
| 572 |
+ * greater than a threshold value. */ |
|
| 573 |
+ if (blk == 0) {
|
|
| 574 |
+ new_coords = 1; |
|
| 575 |
+ } else if (!block0->cpl_in_use) {
|
|
| 576 |
+ new_coords = 1; |
|
| 577 |
+ } else {
|
|
| 578 |
+ for (ch = 1; ch <= s->fbw_channels; ch++) {
|
|
| 579 |
+ if (block->channel_in_cpl[ch] && !block0->channel_in_cpl[ch]) {
|
|
| 580 |
+ new_coords = 1; |
|
| 581 |
+ break; |
|
| 582 |
+ } |
|
| 583 |
+ } |
|
| 584 |
+ if (!new_coords) {
|
|
| 585 |
+ for (ch = 1; ch <= s->fbw_channels; ch++) {
|
|
| 586 |
+ if (block->channel_in_cpl[ch] && coord_diff[ch] > 0.04) {
|
|
| 587 |
+ new_coords = 1; |
|
| 588 |
+ break; |
|
| 589 |
+ } |
|
| 590 |
+ } |
|
| 591 |
+ } |
|
| 592 |
+ } |
|
| 593 |
+ } |
|
| 594 |
+ block->new_cpl_coords = new_coords; |
|
| 595 |
+ } |
|
| 596 |
+ |
|
| 597 |
+ /* calculate final coupling coordinates, taking into account reusing of |
|
| 598 |
+ coordinates in successive blocks */ |
|
| 599 |
+ for (bnd = 0; bnd < s->num_cpl_bands; bnd++) {
|
|
| 600 |
+ blk = 0; |
|
| 601 |
+ while (blk < AC3_MAX_BLOCKS) {
|
|
| 602 |
+ int blk1; |
|
| 603 |
+ CoefSumType energy_cpl; |
|
| 604 |
+ AC3Block *block = &s->blocks[blk]; |
|
| 605 |
+ |
|
| 606 |
+ if (!block->cpl_in_use) {
|
|
| 607 |
+ blk++; |
|
| 608 |
+ continue; |
|
| 609 |
+ } |
|
| 610 |
+ |
|
| 611 |
+ energy_cpl = energy[blk][CPL_CH][bnd]; |
|
| 612 |
+ blk1 = blk+1; |
|
| 613 |
+ while (!s->blocks[blk1].new_cpl_coords && blk1 < AC3_MAX_BLOCKS) {
|
|
| 614 |
+ if (s->blocks[blk1].cpl_in_use) |
|
| 615 |
+ energy_cpl += energy[blk1][CPL_CH][bnd]; |
|
| 616 |
+ blk1++; |
|
| 617 |
+ } |
|
| 618 |
+ |
|
| 619 |
+ for (ch = 1; ch <= s->fbw_channels; ch++) {
|
|
| 620 |
+ CoefType energy_ch; |
|
| 621 |
+ if (!block->channel_in_cpl[ch]) |
|
| 622 |
+ continue; |
|
| 623 |
+ energy_ch = energy[blk][ch][bnd]; |
|
| 624 |
+ blk1 = blk+1; |
|
| 625 |
+ while (!s->blocks[blk1].new_cpl_coords && blk1 < AC3_MAX_BLOCKS) {
|
|
| 626 |
+ if (s->blocks[blk1].cpl_in_use) |
|
| 627 |
+ energy_ch += energy[blk1][ch][bnd]; |
|
| 628 |
+ blk1++; |
|
| 629 |
+ } |
|
| 630 |
+ cpl_coords[blk][ch][bnd] = calc_cpl_coord(energy_ch, energy_cpl); |
|
| 631 |
+ } |
|
| 632 |
+ blk = blk1; |
|
| 633 |
+ } |
|
| 634 |
+ } |
|
| 635 |
+ |
|
| 636 |
+ /* calculate exponents/mantissas for coupling coordinates */ |
|
| 637 |
+ for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
|
|
| 638 |
+ AC3Block *block = &s->blocks[blk]; |
|
| 639 |
+ if (!block->cpl_in_use || !block->new_cpl_coords) |
|
| 640 |
+ continue; |
|
| 641 |
+ |
|
| 642 |
+ s->ac3dsp.float_to_fixed24(fixed_cpl_coords[blk][1], |
|
| 643 |
+ cpl_coords[blk][1], |
|
| 644 |
+ s->fbw_channels * 16); |
|
| 645 |
+ s->ac3dsp.extract_exponents(block->cpl_coord_exp[1], |
|
| 646 |
+ fixed_cpl_coords[blk][1], |
|
| 647 |
+ s->fbw_channels * 16); |
|
| 648 |
+ |
|
| 649 |
+ for (ch = 1; ch <= s->fbw_channels; ch++) {
|
|
| 650 |
+ int bnd, min_exp, max_exp, master_exp; |
|
| 651 |
+ |
|
| 652 |
+ /* determine master exponent */ |
|
| 653 |
+ min_exp = max_exp = block->cpl_coord_exp[ch][0]; |
|
| 654 |
+ for (bnd = 1; bnd < s->num_cpl_bands; bnd++) {
|
|
| 655 |
+ int exp = block->cpl_coord_exp[ch][bnd]; |
|
| 656 |
+ min_exp = FFMIN(exp, min_exp); |
|
| 657 |
+ max_exp = FFMAX(exp, max_exp); |
|
| 658 |
+ } |
|
| 659 |
+ master_exp = ((max_exp - 15) + 2) / 3; |
|
| 660 |
+ master_exp = FFMAX(master_exp, 0); |
|
| 661 |
+ while (min_exp < master_exp * 3) |
|
| 662 |
+ master_exp--; |
|
| 663 |
+ for (bnd = 0; bnd < s->num_cpl_bands; bnd++) {
|
|
| 664 |
+ block->cpl_coord_exp[ch][bnd] = av_clip(block->cpl_coord_exp[ch][bnd] - |
|
| 665 |
+ master_exp * 3, 0, 15); |
|
| 666 |
+ } |
|
| 667 |
+ block->cpl_master_exp[ch] = master_exp; |
|
| 668 |
+ |
|
| 669 |
+ /* quantize mantissas */ |
|
| 670 |
+ for (bnd = 0; bnd < s->num_cpl_bands; bnd++) {
|
|
| 671 |
+ int cpl_exp = block->cpl_coord_exp[ch][bnd]; |
|
| 672 |
+ int cpl_mant = (fixed_cpl_coords[blk][ch][bnd] << (5 + cpl_exp + master_exp * 3)) >> 24; |
|
| 673 |
+ if (cpl_exp == 15) |
|
| 674 |
+ cpl_mant >>= 1; |
|
| 675 |
+ else |
|
| 676 |
+ cpl_mant -= 16; |
|
| 677 |
+ |
|
| 678 |
+ block->cpl_coord_mant[ch][bnd] = cpl_mant; |
|
| 679 |
+ } |
|
| 680 |
+ } |
|
| 681 |
+ } |
|
| 682 |
+#endif /* CONFIG_AC3ENC_FLOAT */ |
|
| 683 |
+} |
|
| 684 |
+ |
|
| 685 |
+ |
|
| 404 | 686 |
/** |
| 405 | 687 |
* Determine rematrixing flags for each block and band. |
| 406 | 688 |
*/ |
| ... | ... |
@@ -413,23 +765,32 @@ static void compute_rematrixing_strategy(AC3EncodeContext *s) |
| 413 | 413 |
if (s->channel_mode != AC3_CHMODE_STEREO) |
| 414 | 414 |
return; |
| 415 | 415 |
|
| 416 |
- s->num_rematrixing_bands = 4; |
|
| 417 |
- |
|
| 418 |
- nb_coefs = FFMIN(s->nb_coefs[0], s->nb_coefs[1]); |
|
| 419 |
- |
|
| 420 | 416 |
for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
|
| 421 | 417 |
block = &s->blocks[blk]; |
| 422 | 418 |
block->new_rematrixing_strategy = !blk; |
| 423 |
- if (!s->rematrixing_enabled) |
|
| 419 |
+ |
|
| 420 |
+ if (!s->rematrixing_enabled) {
|
|
| 421 |
+ block0 = block; |
|
| 424 | 422 |
continue; |
| 425 |
- for (bnd = 0; bnd < s->num_rematrixing_bands; bnd++) {
|
|
| 423 |
+ } |
|
| 424 |
+ |
|
| 425 |
+ block->num_rematrixing_bands = 4; |
|
| 426 |
+ if (block->cpl_in_use) {
|
|
| 427 |
+ block->num_rematrixing_bands -= (s->start_freq[CPL_CH] <= 61); |
|
| 428 |
+ block->num_rematrixing_bands -= (s->start_freq[CPL_CH] == 37); |
|
| 429 |
+ if (blk && block->num_rematrixing_bands != block0->num_rematrixing_bands) |
|
| 430 |
+ block->new_rematrixing_strategy = 1; |
|
| 431 |
+ } |
|
| 432 |
+ nb_coefs = FFMIN(block->end_freq[1], block->end_freq[2]); |
|
| 433 |
+ |
|
| 434 |
+ for (bnd = 0; bnd < block->num_rematrixing_bands; bnd++) {
|
|
| 426 | 435 |
/* calculate calculate sum of squared coeffs for one band in one block */ |
| 427 | 436 |
int start = ff_ac3_rematrix_band_tab[bnd]; |
| 428 | 437 |
int end = FFMIN(nb_coefs, ff_ac3_rematrix_band_tab[bnd+1]); |
| 429 | 438 |
CoefSumType sum[4] = {0,};
|
| 430 | 439 |
for (i = start; i < end; i++) {
|
| 431 |
- CoefType lt = block->mdct_coef[0][i]; |
|
| 432 |
- CoefType rt = block->mdct_coef[1][i]; |
|
| 440 |
+ CoefType lt = block->mdct_coef[1][i]; |
|
| 441 |
+ CoefType rt = block->mdct_coef[2][i]; |
|
| 433 | 442 |
CoefType md = lt + rt; |
| 434 | 443 |
CoefType sd = lt - rt; |
| 435 | 444 |
MAC_COEF(sum[0], lt, lt); |
| ... | ... |
@@ -468,21 +829,20 @@ static void apply_rematrixing(AC3EncodeContext *s) |
| 468 | 468 |
if (!s->rematrixing_enabled) |
| 469 | 469 |
return; |
| 470 | 470 |
|
| 471 |
- nb_coefs = FFMIN(s->nb_coefs[0], s->nb_coefs[1]); |
|
| 472 |
- |
|
| 473 | 471 |
for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
|
| 474 | 472 |
AC3Block *block = &s->blocks[blk]; |
| 475 | 473 |
if (block->new_rematrixing_strategy) |
| 476 | 474 |
flags = block->rematrixing_flags; |
| 477 |
- for (bnd = 0; bnd < s->num_rematrixing_bands; bnd++) {
|
|
| 475 |
+ nb_coefs = FFMIN(block->end_freq[1], block->end_freq[2]); |
|
| 476 |
+ for (bnd = 0; bnd < block->num_rematrixing_bands; bnd++) {
|
|
| 478 | 477 |
if (flags[bnd]) {
|
| 479 | 478 |
start = ff_ac3_rematrix_band_tab[bnd]; |
| 480 | 479 |
end = FFMIN(nb_coefs, ff_ac3_rematrix_band_tab[bnd+1]); |
| 481 | 480 |
for (i = start; i < end; i++) {
|
| 482 |
- int32_t lt = block->fixed_coef[0][i]; |
|
| 483 |
- int32_t rt = block->fixed_coef[1][i]; |
|
| 484 |
- block->fixed_coef[0][i] = (lt + rt) >> 1; |
|
| 485 |
- block->fixed_coef[1][i] = (lt - rt) >> 1; |
|
| 481 |
+ int32_t lt = block->fixed_coef[1][i]; |
|
| 482 |
+ int32_t rt = block->fixed_coef[2][i]; |
|
| 483 |
+ block->fixed_coef[1][i] = (lt + rt) >> 1; |
|
| 484 |
+ block->fixed_coef[2][i] = (lt - rt) >> 1; |
|
| 486 | 485 |
} |
| 487 | 486 |
} |
| 488 | 487 |
} |
| ... | ... |
@@ -499,12 +859,13 @@ static av_cold void exponent_init(AC3EncodeContext *s) |
| 499 | 499 |
|
| 500 | 500 |
for (expstr = EXP_D15-1; expstr <= EXP_D45-1; expstr++) {
|
| 501 | 501 |
grpsize = 3 << expstr; |
| 502 |
- for (i = 73; i < 256; i++) {
|
|
| 503 |
- exponent_group_tab[expstr][i] = (i + grpsize - 4) / grpsize; |
|
| 502 |
+ for (i = 12; i < 256; i++) {
|
|
| 503 |
+ exponent_group_tab[0][expstr][i] = (i + grpsize - 4) / grpsize; |
|
| 504 |
+ exponent_group_tab[1][expstr][i] = (i ) / grpsize; |
|
| 504 | 505 |
} |
| 505 | 506 |
} |
| 506 | 507 |
/* LFE */ |
| 507 |
- exponent_group_tab[0][7] = 2; |
|
| 508 |
+ exponent_group_tab[0][0][7] = 2; |
|
| 508 | 509 |
} |
| 509 | 510 |
|
| 510 | 511 |
|
| ... | ... |
@@ -517,7 +878,7 @@ static void extract_exponents(AC3EncodeContext *s) |
| 517 | 517 |
{
|
| 518 | 518 |
int blk, ch; |
| 519 | 519 |
|
| 520 |
- for (ch = 0; ch < s->channels; ch++) {
|
|
| 520 |
+ for (ch = !s->cpl_on; ch <= s->channels; ch++) {
|
|
| 521 | 521 |
for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
|
| 522 | 522 |
AC3Block *block = &s->blocks[blk]; |
| 523 | 523 |
s->ac3dsp.extract_exponents(block->exp[ch], block->fixed_coef[ch], |
| ... | ... |
@@ -542,7 +903,7 @@ static void compute_exp_strategy(AC3EncodeContext *s) |
| 542 | 542 |
{
|
| 543 | 543 |
int ch, blk, blk1; |
| 544 | 544 |
|
| 545 |
- for (ch = 0; ch < s->fbw_channels; ch++) {
|
|
| 545 |
+ for (ch = !s->cpl_on; ch <= s->fbw_channels; ch++) {
|
|
| 546 | 546 |
uint8_t *exp_strategy = s->exp_strategy[ch]; |
| 547 | 547 |
uint8_t *exp = s->blocks[0].exp[ch]; |
| 548 | 548 |
int exp_diff; |
| ... | ... |
@@ -551,13 +912,18 @@ static void compute_exp_strategy(AC3EncodeContext *s) |
| 551 | 551 |
reused in the next frame */ |
| 552 | 552 |
exp_strategy[0] = EXP_NEW; |
| 553 | 553 |
exp += AC3_MAX_COEFS; |
| 554 |
- for (blk = 1; blk < AC3_MAX_BLOCKS; blk++) {
|
|
| 554 |
+ for (blk = 1; blk < AC3_MAX_BLOCKS; blk++, exp += AC3_MAX_COEFS) {
|
|
| 555 |
+ if ((ch == CPL_CH && (!s->blocks[blk].cpl_in_use || !s->blocks[blk-1].cpl_in_use)) || |
|
| 556 |
+ (ch > CPL_CH && (s->blocks[blk].channel_in_cpl[ch] != s->blocks[blk-1].channel_in_cpl[ch]))) {
|
|
| 557 |
+ exp_strategy[blk] = EXP_NEW; |
|
| 558 |
+ continue; |
|
| 559 |
+ } |
|
| 555 | 560 |
exp_diff = s->dsp.sad[0](NULL, exp, exp - AC3_MAX_COEFS, 16, 16); |
| 556 |
- if (exp_diff > EXP_DIFF_THRESHOLD) |
|
| 561 |
+ exp_strategy[blk] = EXP_REUSE; |
|
| 562 |
+ if (ch == CPL_CH && exp_diff > (EXP_DIFF_THRESHOLD * (s->blocks[blk].end_freq[ch] - s->start_freq[ch]) / AC3_MAX_COEFS)) |
|
| 563 |
+ exp_strategy[blk] = EXP_NEW; |
|
| 564 |
+ else if (ch > CPL_CH && exp_diff > EXP_DIFF_THRESHOLD) |
|
| 557 | 565 |
exp_strategy[blk] = EXP_NEW; |
| 558 |
- else |
|
| 559 |
- exp_strategy[blk] = EXP_REUSE; |
|
| 560 |
- exp += AC3_MAX_COEFS; |
|
| 561 | 566 |
} |
| 562 | 567 |
|
| 563 | 568 |
/* now select the encoding strategy type : if exponents are often |
| ... | ... |
@@ -588,25 +954,26 @@ static void compute_exp_strategy(AC3EncodeContext *s) |
| 588 | 588 |
/** |
| 589 | 589 |
* Update the exponents so that they are the ones the decoder will decode. |
| 590 | 590 |
*/ |
| 591 |
-static void encode_exponents_blk_ch(uint8_t *exp, int nb_exps, int exp_strategy) |
|
| 591 |
+static void encode_exponents_blk_ch(uint8_t *exp, int nb_exps, int exp_strategy, |
|
| 592 |
+ int cpl) |
|
| 592 | 593 |
{
|
| 593 | 594 |
int nb_groups, i, k; |
| 594 | 595 |
|
| 595 |
- nb_groups = exponent_group_tab[exp_strategy-1][nb_exps] * 3; |
|
| 596 |
+ nb_groups = exponent_group_tab[cpl][exp_strategy-1][nb_exps] * 3; |
|
| 596 | 597 |
|
| 597 | 598 |
/* for each group, compute the minimum exponent */ |
| 598 | 599 |
switch(exp_strategy) {
|
| 599 | 600 |
case EXP_D25: |
| 600 |
- for (i = 1, k = 1; i <= nb_groups; i++) {
|
|
| 601 |
+ for (i = 1, k = 1-cpl; i <= nb_groups; i++) {
|
|
| 601 | 602 |
uint8_t exp_min = exp[k]; |
| 602 | 603 |
if (exp[k+1] < exp_min) |
| 603 | 604 |
exp_min = exp[k+1]; |
| 604 |
- exp[i] = exp_min; |
|
| 605 |
+ exp[i-cpl] = exp_min; |
|
| 605 | 606 |
k += 2; |
| 606 | 607 |
} |
| 607 | 608 |
break; |
| 608 | 609 |
case EXP_D45: |
| 609 |
- for (i = 1, k = 1; i <= nb_groups; i++) {
|
|
| 610 |
+ for (i = 1, k = 1-cpl; i <= nb_groups; i++) {
|
|
| 610 | 611 |
uint8_t exp_min = exp[k]; |
| 611 | 612 |
if (exp[k+1] < exp_min) |
| 612 | 613 |
exp_min = exp[k+1]; |
| ... | ... |
@@ -614,14 +981,14 @@ static void encode_exponents_blk_ch(uint8_t *exp, int nb_exps, int exp_strategy) |
| 614 | 614 |
exp_min = exp[k+2]; |
| 615 | 615 |
if (exp[k+3] < exp_min) |
| 616 | 616 |
exp_min = exp[k+3]; |
| 617 |
- exp[i] = exp_min; |
|
| 617 |
+ exp[i-cpl] = exp_min; |
|
| 618 | 618 |
k += 4; |
| 619 | 619 |
} |
| 620 | 620 |
break; |
| 621 | 621 |
} |
| 622 | 622 |
|
| 623 | 623 |
/* constraint for DC exponent */ |
| 624 |
- if (exp[0] > 15) |
|
| 624 |
+ if (!cpl && exp[0] > 15) |
|
| 625 | 625 |
exp[0] = 15; |
| 626 | 626 |
|
| 627 | 627 |
/* decrease the delta between each groups to within 2 so that they can be |
| ... | ... |
@@ -632,18 +999,21 @@ static void encode_exponents_blk_ch(uint8_t *exp, int nb_exps, int exp_strategy) |
| 632 | 632 |
while (--i >= 0) |
| 633 | 633 |
exp[i] = FFMIN(exp[i], exp[i+1] + 2); |
| 634 | 634 |
|
| 635 |
+ if (cpl) |
|
| 636 |
+ exp[-1] = exp[0] & ~1; |
|
| 637 |
+ |
|
| 635 | 638 |
/* now we have the exponent values the decoder will see */ |
| 636 | 639 |
switch (exp_strategy) {
|
| 637 | 640 |
case EXP_D25: |
| 638 |
- for (i = nb_groups, k = nb_groups * 2; i > 0; i--) {
|
|
| 639 |
- uint8_t exp1 = exp[i]; |
|
| 641 |
+ for (i = nb_groups, k = (nb_groups * 2)-cpl; i > 0; i--) {
|
|
| 642 |
+ uint8_t exp1 = exp[i-cpl]; |
|
| 640 | 643 |
exp[k--] = exp1; |
| 641 | 644 |
exp[k--] = exp1; |
| 642 | 645 |
} |
| 643 | 646 |
break; |
| 644 | 647 |
case EXP_D45: |
| 645 |
- for (i = nb_groups, k = nb_groups * 4; i > 0; i--) {
|
|
| 646 |
- exp[k] = exp[k-1] = exp[k-2] = exp[k-3] = exp[i]; |
|
| 648 |
+ for (i = nb_groups, k = (nb_groups * 4)-cpl; i > 0; i--) {
|
|
| 649 |
+ exp[k] = exp[k-1] = exp[k-2] = exp[k-3] = exp[i-cpl]; |
|
| 647 | 650 |
k -= 4; |
| 648 | 651 |
} |
| 649 | 652 |
break; |
| ... | ... |
@@ -659,32 +1029,40 @@ static void encode_exponents_blk_ch(uint8_t *exp, int nb_exps, int exp_strategy) |
| 659 | 659 |
*/ |
| 660 | 660 |
static void encode_exponents(AC3EncodeContext *s) |
| 661 | 661 |
{
|
| 662 |
- int blk, blk1, ch; |
|
| 662 |
+ int blk, blk1, ch, cpl; |
|
| 663 | 663 |
uint8_t *exp, *exp_strategy; |
| 664 | 664 |
int nb_coefs, num_reuse_blocks; |
| 665 | 665 |
|
| 666 |
- for (ch = 0; ch < s->channels; ch++) {
|
|
| 667 |
- exp = s->blocks[0].exp[ch]; |
|
| 666 |
+ for (ch = !s->cpl_on; ch <= s->channels; ch++) {
|
|
| 667 |
+ exp = s->blocks[0].exp[ch] + s->start_freq[ch]; |
|
| 668 | 668 |
exp_strategy = s->exp_strategy[ch]; |
| 669 |
- nb_coefs = s->nb_coefs[ch]; |
|
| 670 | 669 |
|
| 670 |
+ cpl = (ch == CPL_CH); |
|
| 671 | 671 |
blk = 0; |
| 672 | 672 |
while (blk < AC3_MAX_BLOCKS) {
|
| 673 |
+ AC3Block *block = &s->blocks[blk]; |
|
| 674 |
+ if (cpl && !block->cpl_in_use) {
|
|
| 675 |
+ exp += AC3_MAX_COEFS; |
|
| 676 |
+ blk++; |
|
| 677 |
+ continue; |
|
| 678 |
+ } |
|
| 679 |
+ nb_coefs = block->end_freq[ch] - s->start_freq[ch]; |
|
| 673 | 680 |
blk1 = blk + 1; |
| 674 | 681 |
|
| 675 | 682 |
/* count the number of EXP_REUSE blocks after the current block |
| 676 | 683 |
and set exponent reference block pointers */ |
| 677 |
- s->blocks[blk].exp_ref_block[ch] = &s->blocks[blk]; |
|
| 684 |
+ block->exp_ref_block[ch] = block; |
|
| 678 | 685 |
while (blk1 < AC3_MAX_BLOCKS && exp_strategy[blk1] == EXP_REUSE) {
|
| 679 |
- s->blocks[blk1].exp_ref_block[ch] = &s->blocks[blk]; |
|
| 686 |
+ s->blocks[blk1].exp_ref_block[ch] = block; |
|
| 680 | 687 |
blk1++; |
| 681 | 688 |
} |
| 682 | 689 |
num_reuse_blocks = blk1 - blk - 1; |
| 683 | 690 |
|
| 684 | 691 |
/* for the EXP_REUSE case we select the min of the exponents */ |
| 685 |
- s->ac3dsp.ac3_exponent_min(exp, num_reuse_blocks, nb_coefs); |
|
| 692 |
+ s->ac3dsp.ac3_exponent_min(exp-s->start_freq[ch], num_reuse_blocks, |
|
| 693 |
+ AC3_MAX_COEFS); |
|
| 686 | 694 |
|
| 687 |
- encode_exponents_blk_ch(exp, nb_coefs, exp_strategy[blk]); |
|
| 695 |
+ encode_exponents_blk_ch(exp, nb_coefs, exp_strategy[blk], cpl); |
|
| 688 | 696 |
|
| 689 | 697 |
exp += AC3_MAX_COEFS * (num_reuse_blocks + 1); |
| 690 | 698 |
blk = blk1; |
| ... | ... |
@@ -700,7 +1078,7 @@ static void encode_exponents(AC3EncodeContext *s) |
| 700 | 700 |
*/ |
| 701 | 701 |
static void group_exponents(AC3EncodeContext *s) |
| 702 | 702 |
{
|
| 703 |
- int blk, ch, i; |
|
| 703 |
+ int blk, ch, i, cpl; |
|
| 704 | 704 |
int group_size, nb_groups, bit_count; |
| 705 | 705 |
uint8_t *p; |
| 706 | 706 |
int delta0, delta1, delta2; |
| ... | ... |
@@ -709,14 +1087,15 @@ static void group_exponents(AC3EncodeContext *s) |
| 709 | 709 |
bit_count = 0; |
| 710 | 710 |
for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
|
| 711 | 711 |
AC3Block *block = &s->blocks[blk]; |
| 712 |
- for (ch = 0; ch < s->channels; ch++) {
|
|
| 712 |
+ for (ch = !block->cpl_in_use; ch <= s->channels; ch++) {
|
|
| 713 | 713 |
int exp_strategy = s->exp_strategy[ch][blk]; |
| 714 | 714 |
if (exp_strategy == EXP_REUSE) |
| 715 | 715 |
continue; |
| 716 |
+ cpl = (ch == CPL_CH); |
|
| 716 | 717 |
group_size = exp_strategy + (exp_strategy == EXP_D45); |
| 717 |
- nb_groups = exponent_group_tab[exp_strategy-1][s->nb_coefs[ch]]; |
|
| 718 |
+ nb_groups = exponent_group_tab[cpl][exp_strategy-1][block->end_freq[ch]-s->start_freq[ch]]; |
|
| 718 | 719 |
bit_count += 4 + (nb_groups * 7); |
| 719 |
- p = block->exp[ch]; |
|
| 720 |
+ p = block->exp[ch] + s->start_freq[ch] - cpl; |
|
| 720 | 721 |
|
| 721 | 722 |
/* DC exponent */ |
| 722 | 723 |
exp1 = *p++; |
| ... | ... |
@@ -783,9 +1162,7 @@ static void count_frame_bits_fixed(AC3EncodeContext *s) |
| 783 | 783 |
|
| 784 | 784 |
/* assumptions: |
| 785 | 785 |
* no dynamic range codes |
| 786 |
- * no channel coupling |
|
| 787 | 786 |
* bit allocation parameters do not change between blocks |
| 788 |
- * SNR offsets do not change between blocks |
|
| 789 | 787 |
* no delta bit allocation |
| 790 | 788 |
* no skipped data |
| 791 | 789 |
* no auxilliary data |
| ... | ... |
@@ -806,11 +1183,6 @@ static void count_frame_bits_fixed(AC3EncodeContext *s) |
| 806 | 806 |
/* dynamic range */ |
| 807 | 807 |
frame_bits++; |
| 808 | 808 |
|
| 809 |
- /* coupling strategy */ |
|
| 810 |
- frame_bits++; |
|
| 811 |
- if (!blk) |
|
| 812 |
- frame_bits++; |
|
| 813 |
- |
|
| 814 | 809 |
/* exponent strategy */ |
| 815 | 810 |
frame_bits += 2 * s->fbw_channels; |
| 816 | 811 |
if (s->lfe_on) |
| ... | ... |
@@ -821,11 +1193,6 @@ static void count_frame_bits_fixed(AC3EncodeContext *s) |
| 821 | 821 |
if (!blk) |
| 822 | 822 |
frame_bits += 2 + 2 + 2 + 2 + 3; |
| 823 | 823 |
|
| 824 |
- /* snr offsets and fast gain codes */ |
|
| 825 |
- frame_bits++; |
|
| 826 |
- if (!blk) |
|
| 827 |
- frame_bits += 6 + s->channels * (4 + 3); |
|
| 828 |
- |
|
| 829 | 824 |
/* delta bit allocation */ |
| 830 | 825 |
frame_bits++; |
| 831 | 826 |
|
| ... | ... |
@@ -857,7 +1224,7 @@ static void bit_alloc_init(AC3EncodeContext *s) |
| 857 | 857 |
s->slow_gain_code = 1; |
| 858 | 858 |
s->db_per_bit_code = 3; |
| 859 | 859 |
s->floor_code = 7; |
| 860 |
- for (ch = 0; ch < s->channels; ch++) |
|
| 860 |
+ for (ch = 0; ch <= s->channels; ch++) |
|
| 861 | 861 |
s->fast_gain_code[ch] = 4; |
| 862 | 862 |
|
| 863 | 863 |
/* initial snr offset */ |
| ... | ... |
@@ -871,6 +1238,8 @@ static void bit_alloc_init(AC3EncodeContext *s) |
| 871 | 871 |
s->bit_alloc.slow_gain = ff_ac3_slow_gain_tab[s->slow_gain_code]; |
| 872 | 872 |
s->bit_alloc.db_per_bit = ff_ac3_db_per_bit_tab[s->db_per_bit_code]; |
| 873 | 873 |
s->bit_alloc.floor = ff_ac3_floor_tab[s->floor_code]; |
| 874 |
+ s->bit_alloc.cpl_fast_leak = 0; |
|
| 875 |
+ s->bit_alloc.cpl_slow_leak = 0; |
|
| 874 | 876 |
|
| 875 | 877 |
count_frame_bits_fixed(s); |
| 876 | 878 |
} |
| ... | ... |
@@ -899,17 +1268,64 @@ static void count_frame_bits(AC3EncodeContext *s) |
| 899 | 899 |
|
| 900 | 900 |
/* audio blocks */ |
| 901 | 901 |
for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
|
| 902 |
+ AC3Block *block = &s->blocks[blk]; |
|
| 903 |
+ |
|
| 904 |
+ /* coupling strategy */ |
|
| 905 |
+ frame_bits++; |
|
| 906 |
+ if (block->new_cpl_strategy) {
|
|
| 907 |
+ frame_bits++; |
|
| 908 |
+ if (block->cpl_in_use) {
|
|
| 909 |
+ frame_bits += s->fbw_channels; |
|
| 910 |
+ if (s->channel_mode == AC3_CHMODE_STEREO) |
|
| 911 |
+ frame_bits++; |
|
| 912 |
+ frame_bits += 4 + 4; |
|
| 913 |
+ frame_bits += s->num_cpl_subbands - 1; |
|
| 914 |
+ } |
|
| 915 |
+ } |
|
| 916 |
+ |
|
| 917 |
+ /* coupling coordinates */ |
|
| 918 |
+ if (block->cpl_in_use) {
|
|
| 919 |
+ for (ch = 1; ch <= s->fbw_channels; ch++) {
|
|
| 920 |
+ if (block->channel_in_cpl[ch]) {
|
|
| 921 |
+ frame_bits++; |
|
| 922 |
+ if (block->new_cpl_coords) {
|
|
| 923 |
+ frame_bits += 2; |
|
| 924 |
+ frame_bits += (4 + 4) * s->num_cpl_bands; |
|
| 925 |
+ } |
|
| 926 |
+ } |
|
| 927 |
+ } |
|
| 928 |
+ } |
|
| 929 |
+ |
|
| 902 | 930 |
/* stereo rematrixing */ |
| 903 | 931 |
if (s->channel_mode == AC3_CHMODE_STEREO) {
|
| 904 | 932 |
frame_bits++; |
| 905 | 933 |
if (s->blocks[blk].new_rematrixing_strategy) |
| 906 |
- frame_bits += s->num_rematrixing_bands; |
|
| 934 |
+ frame_bits += block->num_rematrixing_bands; |
|
| 907 | 935 |
} |
| 908 | 936 |
|
| 909 | 937 |
/* bandwidth codes & gain range */ |
| 910 |
- for (ch = 0; ch < s->fbw_channels; ch++) {
|
|
| 911 |
- if (s->exp_strategy[ch][blk] != EXP_REUSE) |
|
| 912 |
- frame_bits += 6 + 2; |
|
| 938 |
+ for (ch = 1; ch <= s->fbw_channels; ch++) {
|
|
| 939 |
+ if (s->exp_strategy[ch][blk] != EXP_REUSE) {
|
|
| 940 |
+ if (!block->channel_in_cpl[ch]) |
|
| 941 |
+ frame_bits += 6; |
|
| 942 |
+ frame_bits += 2; |
|
| 943 |
+ } |
|
| 944 |
+ } |
|
| 945 |
+ |
|
| 946 |
+ /* coupling exponent strategy */ |
|
| 947 |
+ if (block->cpl_in_use) |
|
| 948 |
+ frame_bits += 2; |
|
| 949 |
+ |
|
| 950 |
+ /* snr offsets and fast gain codes */ |
|
| 951 |
+ frame_bits++; |
|
| 952 |
+ if (block->new_snr_offsets) |
|
| 953 |
+ frame_bits += 6 + (s->channels + block->cpl_in_use) * (4 + 3); |
|
| 954 |
+ |
|
| 955 |
+ /* coupling leak info */ |
|
| 956 |
+ if (block->cpl_in_use) {
|
|
| 957 |
+ frame_bits++; |
|
| 958 |
+ if (block->new_cpl_leak) |
|
| 959 |
+ frame_bits += 3 + 3; |
|
| 913 | 960 |
} |
| 914 | 961 |
} |
| 915 | 962 |
|
| ... | ... |
@@ -943,16 +1359,16 @@ static void bit_alloc_masking(AC3EncodeContext *s) |
| 943 | 943 |
|
| 944 | 944 |
for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
|
| 945 | 945 |
AC3Block *block = &s->blocks[blk]; |
| 946 |
- for (ch = 0; ch < s->channels; ch++) {
|
|
| 946 |
+ for (ch = !block->cpl_in_use; ch <= s->channels; ch++) {
|
|
| 947 | 947 |
/* We only need psd and mask for calculating bap. |
| 948 | 948 |
Since we currently do not calculate bap when exponent |
| 949 | 949 |
strategy is EXP_REUSE we do not need to calculate psd or mask. */ |
| 950 | 950 |
if (s->exp_strategy[ch][blk] != EXP_REUSE) {
|
| 951 |
- ff_ac3_bit_alloc_calc_psd(block->exp[ch], 0, |
|
| 952 |
- s->nb_coefs[ch], |
|
| 953 |
- block->psd[ch], block->band_psd[ch]); |
|
| 951 |
+ ff_ac3_bit_alloc_calc_psd(block->exp[ch], s->start_freq[ch], |
|
| 952 |
+ block->end_freq[ch], block->psd[ch], |
|
| 953 |
+ block->band_psd[ch]); |
|
| 954 | 954 |
ff_ac3_bit_alloc_calc_mask(&s->bit_alloc, block->band_psd[ch], |
| 955 |
- 0, s->nb_coefs[ch], |
|
| 955 |
+ s->start_freq[ch], block->end_freq[ch], |
|
| 956 | 956 |
ff_ac3_fast_gain_tab[s->fast_gain_code[ch]], |
| 957 | 957 |
ch == s->lfe_channel, |
| 958 | 958 |
DBA_NONE, 0, NULL, NULL, NULL, |
| ... | ... |
@@ -970,11 +1386,12 @@ static void bit_alloc_masking(AC3EncodeContext *s) |
| 970 | 970 |
static void reset_block_bap(AC3EncodeContext *s) |
| 971 | 971 |
{
|
| 972 | 972 |
int blk, ch; |
| 973 |
+ int channels = s->channels + 1; |
|
| 973 | 974 |
if (s->blocks[0].bap[0] == s->bap_buffer) |
| 974 | 975 |
return; |
| 975 | 976 |
for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
|
| 976 |
- for (ch = 0; ch < s->channels; ch++) {
|
|
| 977 |
- s->blocks[blk].bap[ch] = &s->bap_buffer[AC3_MAX_COEFS * (blk * s->channels + ch)]; |
|
| 977 |
+ for (ch = 0; ch < channels; ch++) {
|
|
| 978 |
+ s->blocks[blk].bap[ch] = &s->bap_buffer[AC3_MAX_COEFS * (blk * channels + ch)]; |
|
| 978 | 979 |
} |
| 979 | 980 |
} |
| 980 | 981 |
} |
| ... | ... |
@@ -1000,28 +1417,37 @@ static int bit_alloc(AC3EncodeContext *s, int snr_offset) |
| 1000 | 1000 |
for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
|
| 1001 | 1001 |
AC3Block *block = &s->blocks[blk]; |
| 1002 | 1002 |
AC3Block *ref_block; |
| 1003 |
+ int av_uninit(ch0); |
|
| 1004 |
+ int got_cpl = !block->cpl_in_use; |
|
| 1003 | 1005 |
// initialize grouped mantissa counts. these are set so that they are |
| 1004 | 1006 |
// padded to the next whole group size when bits are counted in |
| 1005 | 1007 |
// compute_mantissa_size_final |
| 1006 | 1008 |
mant_cnt[0] = mant_cnt[3] = 0; |
| 1007 | 1009 |
mant_cnt[1] = mant_cnt[2] = 2; |
| 1008 | 1010 |
mant_cnt[4] = 1; |
| 1009 |
- for (ch = 0; ch < s->channels; ch++) {
|
|
| 1011 |
+ for (ch = 1; ch <= s->channels; ch++) {
|
|
| 1012 |
+ if (!got_cpl && ch > 1 && block->channel_in_cpl[ch-1]) {
|
|
| 1013 |
+ ch0 = ch - 1; |
|
| 1014 |
+ ch = CPL_CH; |
|
| 1015 |
+ got_cpl = 1; |
|
| 1016 |
+ } |
|
| 1017 |
+ |
|
| 1010 | 1018 |
/* Currently the only bit allocation parameters which vary across |
| 1011 | 1019 |
blocks within a frame are the exponent values. We can take |
| 1012 | 1020 |
advantage of that by reusing the bit allocation pointers |
| 1013 | 1021 |
whenever we reuse exponents. */ |
| 1014 | 1022 |
ref_block = block->exp_ref_block[ch]; |
| 1015 | 1023 |
if (s->exp_strategy[ch][blk] != EXP_REUSE) {
|
| 1016 |
- s->ac3dsp.bit_alloc_calc_bap(ref_block->mask[ch], |
|
| 1017 |
- ref_block->psd[ch], 0, |
|
| 1018 |
- s->nb_coefs[ch], snr_offset, |
|
| 1019 |
- s->bit_alloc.floor, ff_ac3_bap_tab, |
|
| 1020 |
- ref_block->bap[ch]); |
|
| 1024 |
+ s->ac3dsp.bit_alloc_calc_bap(ref_block->mask[ch], ref_block->psd[ch], |
|
| 1025 |
+ s->start_freq[ch], block->end_freq[ch], |
|
| 1026 |
+ snr_offset, s->bit_alloc.floor, |
|
| 1027 |
+ ff_ac3_bap_tab, ref_block->bap[ch]); |
|
| 1021 | 1028 |
} |
| 1022 | 1029 |
mantissa_bits += s->ac3dsp.compute_mantissa_size(mant_cnt, |
| 1023 |
- ref_block->bap[ch], |
|
| 1024 |
- s->nb_coefs[ch]); |
|
| 1030 |
+ ref_block->bap[ch]+s->start_freq[ch], |
|
| 1031 |
+ block->end_freq[ch]-s->start_freq[ch]); |
|
| 1032 |
+ if (ch == CPL_CH) |
|
| 1033 |
+ ch = ch0; |
|
| 1025 | 1034 |
} |
| 1026 | 1035 |
mantissa_bits += compute_mantissa_size_final(mant_cnt); |
| 1027 | 1036 |
} |
| ... | ... |
@@ -1047,7 +1473,7 @@ static int cbr_bit_allocation(AC3EncodeContext *s) |
| 1047 | 1047 |
|
| 1048 | 1048 |
/* if previous frame SNR offset was 1023, check if current frame can also |
| 1049 | 1049 |
use SNR offset of 1023. if so, skip the search. */ |
| 1050 |
- if ((snr_offset | s->fine_snr_offset[0]) == 1023) {
|
|
| 1050 |
+ if ((snr_offset | s->fine_snr_offset[1]) == 1023) {
|
|
| 1051 | 1051 |
if (bit_alloc(s, 1023) <= bits_left) |
| 1052 | 1052 |
return 0; |
| 1053 | 1053 |
} |
| ... | ... |
@@ -1071,7 +1497,7 @@ static int cbr_bit_allocation(AC3EncodeContext *s) |
| 1071 | 1071 |
reset_block_bap(s); |
| 1072 | 1072 |
|
| 1073 | 1073 |
s->coarse_snr_offset = snr_offset >> 4; |
| 1074 |
- for (ch = 0; ch < s->channels; ch++) |
|
| 1074 |
+ for (ch = !s->cpl_on; ch <= s->channels; ch++) |
|
| 1075 | 1075 |
s->fine_snr_offset[ch] = snr_offset & 0xF; |
| 1076 | 1076 |
|
| 1077 | 1077 |
return 0; |
| ... | ... |
@@ -1089,26 +1515,26 @@ static int downgrade_exponents(AC3EncodeContext *s) |
| 1089 | 1089 |
{
|
| 1090 | 1090 |
int ch, blk; |
| 1091 | 1091 |
|
| 1092 |
- for (ch = 0; ch < s->fbw_channels; ch++) {
|
|
| 1093 |
- for (blk = AC3_MAX_BLOCKS-1; blk >= 0; blk--) {
|
|
| 1092 |
+ for (blk = AC3_MAX_BLOCKS-1; blk >= 0; blk--) {
|
|
| 1093 |
+ for (ch = !s->blocks[blk].cpl_in_use; ch <= s->fbw_channels; ch++) {
|
|
| 1094 | 1094 |
if (s->exp_strategy[ch][blk] == EXP_D15) {
|
| 1095 | 1095 |
s->exp_strategy[ch][blk] = EXP_D25; |
| 1096 | 1096 |
return 0; |
| 1097 | 1097 |
} |
| 1098 | 1098 |
} |
| 1099 | 1099 |
} |
| 1100 |
- for (ch = 0; ch < s->fbw_channels; ch++) {
|
|
| 1101 |
- for (blk = AC3_MAX_BLOCKS-1; blk >= 0; blk--) {
|
|
| 1100 |
+ for (blk = AC3_MAX_BLOCKS-1; blk >= 0; blk--) {
|
|
| 1101 |
+ for (ch = !s->blocks[blk].cpl_in_use; ch <= s->fbw_channels; ch++) {
|
|
| 1102 | 1102 |
if (s->exp_strategy[ch][blk] == EXP_D25) {
|
| 1103 | 1103 |
s->exp_strategy[ch][blk] = EXP_D45; |
| 1104 | 1104 |
return 0; |
| 1105 | 1105 |
} |
| 1106 | 1106 |
} |
| 1107 | 1107 |
} |
| 1108 |
- for (ch = 0; ch < s->fbw_channels; ch++) {
|
|
| 1109 |
- /* block 0 cannot reuse exponents, so only downgrade D45 to REUSE if |
|
| 1110 |
- the block number > 0 */ |
|
| 1111 |
- for (blk = AC3_MAX_BLOCKS-1; blk > 0; blk--) {
|
|
| 1108 |
+ /* block 0 cannot reuse exponents, so only downgrade D45 to REUSE if |
|
| 1109 |
+ the block number > 0 */ |
|
| 1110 |
+ for (blk = AC3_MAX_BLOCKS-1; blk > 0; blk--) {
|
|
| 1111 |
+ for (ch = !s->blocks[blk].cpl_in_use; ch <= s->fbw_channels; ch++) {
|
|
| 1112 | 1112 |
if (s->exp_strategy[ch][blk] > EXP_REUSE) {
|
| 1113 | 1113 |
s->exp_strategy[ch][blk] = EXP_REUSE; |
| 1114 | 1114 |
return 0; |
| ... | ... |
@@ -1135,7 +1561,18 @@ static int compute_bit_allocation(AC3EncodeContext *s) |
| 1135 | 1135 |
|
| 1136 | 1136 |
ret = cbr_bit_allocation(s); |
| 1137 | 1137 |
while (ret) {
|
| 1138 |
- /* fallback 1: downgrade exponents */ |
|
| 1138 |
+ /* fallback 1: disable channel coupling */ |
|
| 1139 |
+ if (s->cpl_on) {
|
|
| 1140 |
+ s->cpl_on = 0; |
|
| 1141 |
+ compute_coupling_strategy(s); |
|
| 1142 |
+ compute_rematrixing_strategy(s); |
|
| 1143 |
+ apply_rematrixing(s); |
|
| 1144 |
+ process_exponents(s); |
|
| 1145 |
+ ret = compute_bit_allocation(s); |
|
| 1146 |
+ continue; |
|
| 1147 |
+ } |
|
| 1148 |
+ |
|
| 1149 |
+ /* fallback 2: downgrade exponents */ |
|
| 1139 | 1150 |
if (!downgrade_exponents(s)) {
|
| 1140 | 1151 |
extract_exponents(s); |
| 1141 | 1152 |
encode_exponents(s); |
| ... | ... |
@@ -1189,12 +1626,13 @@ static inline int asym_quant(int c, int e, int qbits) |
| 1189 | 1189 |
* Quantize a set of mantissas for a single channel in a single block. |
| 1190 | 1190 |
*/ |
| 1191 | 1191 |
static void quantize_mantissas_blk_ch(AC3Mant *s, int32_t *fixed_coef, |
| 1192 |
- uint8_t *exp, |
|
| 1193 |
- uint8_t *bap, uint16_t *qmant, int n) |
|
| 1192 |
+ uint8_t *exp, uint8_t *bap, |
|
| 1193 |
+ uint16_t *qmant, int start_freq, |
|
| 1194 |
+ int end_freq) |
|
| 1194 | 1195 |
{
|
| 1195 | 1196 |
int i; |
| 1196 | 1197 |
|
| 1197 |
- for (i = 0; i < n; i++) {
|
|
| 1198 |
+ for (i = start_freq; i < end_freq; i++) {
|
|
| 1198 | 1199 |
int v; |
| 1199 | 1200 |
int c = fixed_coef[i]; |
| 1200 | 1201 |
int e = exp[i]; |
| ... | ... |
@@ -1284,19 +1722,27 @@ static void quantize_mantissas_blk_ch(AC3Mant *s, int32_t *fixed_coef, |
| 1284 | 1284 |
*/ |
| 1285 | 1285 |
static void quantize_mantissas(AC3EncodeContext *s) |
| 1286 | 1286 |
{
|
| 1287 |
- int blk, ch; |
|
| 1288 |
- |
|
| 1287 |
+ int blk, ch, ch0=0, got_cpl; |
|
| 1289 | 1288 |
|
| 1290 | 1289 |
for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
|
| 1291 | 1290 |
AC3Block *block = &s->blocks[blk]; |
| 1292 | 1291 |
AC3Block *ref_block; |
| 1293 | 1292 |
AC3Mant m = { 0 };
|
| 1294 | 1293 |
|
| 1295 |
- for (ch = 0; ch < s->channels; ch++) {
|
|
| 1294 |
+ got_cpl = !block->cpl_in_use; |
|
| 1295 |
+ for (ch = 1; ch <= s->channels; ch++) {
|
|
| 1296 |
+ if (!got_cpl && ch > 1 && block->channel_in_cpl[ch-1]) {
|
|
| 1297 |
+ ch0 = ch - 1; |
|
| 1298 |
+ ch = CPL_CH; |
|
| 1299 |
+ got_cpl = 1; |
|
| 1300 |
+ } |
|
| 1296 | 1301 |
ref_block = block->exp_ref_block[ch]; |
| 1297 | 1302 |
quantize_mantissas_blk_ch(&m, block->fixed_coef[ch], |
| 1298 |
- ref_block->exp[ch], ref_block->bap[ch], |
|
| 1299 |
- block->qmant[ch], s->nb_coefs[ch]); |
|
| 1303 |
+ ref_block->exp[ch], |
|
| 1304 |
+ ref_block->bap[ch], block->qmant[ch], |
|
| 1305 |
+ s->start_freq[ch], block->end_freq[ch]); |
|
| 1306 |
+ if (ch == CPL_CH) |
|
| 1307 |
+ ch = ch0; |
|
| 1300 | 1308 |
} |
| 1301 | 1309 |
} |
| 1302 | 1310 |
} |
| ... | ... |
@@ -1363,7 +1809,8 @@ static void output_frame_header(AC3EncodeContext *s) |
| 1363 | 1363 |
*/ |
| 1364 | 1364 |
static void output_audio_block(AC3EncodeContext *s, int blk) |
| 1365 | 1365 |
{
|
| 1366 |
- int ch, i, baie, rbnd; |
|
| 1366 |
+ int ch, i, baie, bnd, got_cpl; |
|
| 1367 |
+ int av_uninit(ch0); |
|
| 1367 | 1368 |
AC3Block *block = &s->blocks[blk]; |
| 1368 | 1369 |
|
| 1369 | 1370 |
/* block switching */ |
| ... | ... |
@@ -1378,11 +1825,38 @@ static void output_audio_block(AC3EncodeContext *s, int blk) |
| 1378 | 1378 |
put_bits(&s->pb, 1, 0); |
| 1379 | 1379 |
|
| 1380 | 1380 |
/* channel coupling */ |
| 1381 |
- if (!blk) {
|
|
| 1382 |
- put_bits(&s->pb, 1, 1); /* coupling strategy present */ |
|
| 1383 |
- put_bits(&s->pb, 1, 0); /* no coupling strategy */ |
|
| 1384 |
- } else {
|
|
| 1385 |
- put_bits(&s->pb, 1, 0); /* no new coupling strategy */ |
|
| 1381 |
+ put_bits(&s->pb, 1, block->new_cpl_strategy); |
|
| 1382 |
+ if (block->new_cpl_strategy) {
|
|
| 1383 |
+ put_bits(&s->pb, 1, block->cpl_in_use); |
|
| 1384 |
+ if (block->cpl_in_use) {
|
|
| 1385 |
+ int start_sub, end_sub; |
|
| 1386 |
+ for (ch = 1; ch <= s->fbw_channels; ch++) |
|
| 1387 |
+ put_bits(&s->pb, 1, block->channel_in_cpl[ch]); |
|
| 1388 |
+ if (s->channel_mode == AC3_CHMODE_STEREO) |
|
| 1389 |
+ put_bits(&s->pb, 1, 0); /* phase flags in use */ |
|
| 1390 |
+ start_sub = (s->start_freq[CPL_CH] - 37) / 12; |
|
| 1391 |
+ end_sub = (s->cpl_end_freq - 37) / 12; |
|
| 1392 |
+ put_bits(&s->pb, 4, start_sub); |
|
| 1393 |
+ put_bits(&s->pb, 4, end_sub - 3); |
|
| 1394 |
+ for (bnd = start_sub+1; bnd < end_sub; bnd++) |
|
| 1395 |
+ put_bits(&s->pb, 1, ff_eac3_default_cpl_band_struct[bnd]); |
|
| 1396 |
+ } |
|
| 1397 |
+ } |
|
| 1398 |
+ |
|
| 1399 |
+ /* coupling coordinates */ |
|
| 1400 |
+ if (block->cpl_in_use) {
|
|
| 1401 |
+ for (ch = 1; ch <= s->fbw_channels; ch++) {
|
|
| 1402 |
+ if (block->channel_in_cpl[ch]) {
|
|
| 1403 |
+ put_bits(&s->pb, 1, block->new_cpl_coords); |
|
| 1404 |
+ if (block->new_cpl_coords) {
|
|
| 1405 |
+ put_bits(&s->pb, 2, block->cpl_master_exp[ch]); |
|
| 1406 |
+ for (bnd = 0; bnd < s->num_cpl_bands; bnd++) {
|
|
| 1407 |
+ put_bits(&s->pb, 4, block->cpl_coord_exp [ch][bnd]); |
|
| 1408 |
+ put_bits(&s->pb, 4, block->cpl_coord_mant[ch][bnd]); |
|
| 1409 |
+ } |
|
| 1410 |
+ } |
|
| 1411 |
+ } |
|
| 1412 |
+ } |
|
| 1386 | 1413 |
} |
| 1387 | 1414 |
|
| 1388 | 1415 |
/* stereo rematrixing */ |
| ... | ... |
@@ -1390,40 +1864,41 @@ static void output_audio_block(AC3EncodeContext *s, int blk) |
| 1390 | 1390 |
put_bits(&s->pb, 1, block->new_rematrixing_strategy); |
| 1391 | 1391 |
if (block->new_rematrixing_strategy) {
|
| 1392 | 1392 |
/* rematrixing flags */ |
| 1393 |
- for (rbnd = 0; rbnd < s->num_rematrixing_bands; rbnd++) |
|
| 1394 |
- put_bits(&s->pb, 1, block->rematrixing_flags[rbnd]); |
|
| 1393 |
+ for (bnd = 0; bnd < block->num_rematrixing_bands; bnd++) |
|
| 1394 |
+ put_bits(&s->pb, 1, block->rematrixing_flags[bnd]); |
|
| 1395 | 1395 |
} |
| 1396 | 1396 |
} |
| 1397 | 1397 |
|
| 1398 | 1398 |
/* exponent strategy */ |
| 1399 |
- for (ch = 0; ch < s->fbw_channels; ch++) |
|
| 1399 |
+ for (ch = !block->cpl_in_use; ch <= s->fbw_channels; ch++) |
|
| 1400 | 1400 |
put_bits(&s->pb, 2, s->exp_strategy[ch][blk]); |
| 1401 | 1401 |
if (s->lfe_on) |
| 1402 | 1402 |
put_bits(&s->pb, 1, s->exp_strategy[s->lfe_channel][blk]); |
| 1403 | 1403 |
|
| 1404 | 1404 |
/* bandwidth */ |
| 1405 |
- for (ch = 0; ch < s->fbw_channels; ch++) {
|
|
| 1406 |
- if (s->exp_strategy[ch][blk] != EXP_REUSE) |
|
| 1405 |
+ for (ch = 1; ch <= s->fbw_channels; ch++) {
|
|
| 1406 |
+ if (s->exp_strategy[ch][blk] != EXP_REUSE && !block->channel_in_cpl[ch]) |
|
| 1407 | 1407 |
put_bits(&s->pb, 6, s->bandwidth_code); |
| 1408 | 1408 |
} |
| 1409 | 1409 |
|
| 1410 | 1410 |
/* exponents */ |
| 1411 |
- for (ch = 0; ch < s->channels; ch++) {
|
|
| 1411 |
+ for (ch = !block->cpl_in_use; ch <= s->channels; ch++) {
|
|
| 1412 | 1412 |
int nb_groups; |
| 1413 |
+ int cpl = (ch == CPL_CH); |
|
| 1413 | 1414 |
|
| 1414 | 1415 |
if (s->exp_strategy[ch][blk] == EXP_REUSE) |
| 1415 | 1416 |
continue; |
| 1416 | 1417 |
|
| 1417 | 1418 |
/* DC exponent */ |
| 1418 |
- put_bits(&s->pb, 4, block->grouped_exp[ch][0]); |
|
| 1419 |
+ put_bits(&s->pb, 4, block->grouped_exp[ch][0] >> cpl); |
|
| 1419 | 1420 |
|
| 1420 | 1421 |
/* exponent groups */ |
| 1421 |
- nb_groups = exponent_group_tab[s->exp_strategy[ch][blk]-1][s->nb_coefs[ch]]; |
|
| 1422 |
+ nb_groups = exponent_group_tab[cpl][s->exp_strategy[ch][blk]-1][block->end_freq[ch]-s->start_freq[ch]]; |
|
| 1422 | 1423 |
for (i = 1; i <= nb_groups; i++) |
| 1423 | 1424 |
put_bits(&s->pb, 7, block->grouped_exp[ch][i]); |
| 1424 | 1425 |
|
| 1425 | 1426 |
/* gain range info */ |
| 1426 |
- if (ch != s->lfe_channel) |
|
| 1427 |
+ if (ch != s->lfe_channel && !cpl) |
|
| 1427 | 1428 |
put_bits(&s->pb, 2, 0); |
| 1428 | 1429 |
} |
| 1429 | 1430 |
|
| ... | ... |
@@ -1439,23 +1914,40 @@ static void output_audio_block(AC3EncodeContext *s, int blk) |
| 1439 | 1439 |
} |
| 1440 | 1440 |
|
| 1441 | 1441 |
/* snr offset */ |
| 1442 |
- put_bits(&s->pb, 1, baie); |
|
| 1443 |
- if (baie) {
|
|
| 1442 |
+ put_bits(&s->pb, 1, block->new_snr_offsets); |
|
| 1443 |
+ if (block->new_snr_offsets) {
|
|
| 1444 | 1444 |
put_bits(&s->pb, 6, s->coarse_snr_offset); |
| 1445 |
- for (ch = 0; ch < s->channels; ch++) {
|
|
| 1445 |
+ for (ch = !block->cpl_in_use; ch <= s->channels; ch++) {
|
|
| 1446 | 1446 |
put_bits(&s->pb, 4, s->fine_snr_offset[ch]); |
| 1447 | 1447 |
put_bits(&s->pb, 3, s->fast_gain_code[ch]); |
| 1448 | 1448 |
} |
| 1449 | 1449 |
} |
| 1450 | 1450 |
|
| 1451 |
+ /* coupling leak */ |
|
| 1452 |
+ if (block->cpl_in_use) {
|
|
| 1453 |
+ put_bits(&s->pb, 1, block->new_cpl_leak); |
|
| 1454 |
+ if (block->new_cpl_leak) {
|
|
| 1455 |
+ put_bits(&s->pb, 3, s->bit_alloc.cpl_fast_leak); |
|
| 1456 |
+ put_bits(&s->pb, 3, s->bit_alloc.cpl_slow_leak); |
|
| 1457 |
+ } |
|
| 1458 |
+ } |
|
| 1459 |
+ |
|
| 1451 | 1460 |
put_bits(&s->pb, 1, 0); /* no delta bit allocation */ |
| 1452 | 1461 |
put_bits(&s->pb, 1, 0); /* no data to skip */ |
| 1453 | 1462 |
|
| 1454 | 1463 |
/* mantissas */ |
| 1455 |
- for (ch = 0; ch < s->channels; ch++) {
|
|
| 1464 |
+ got_cpl = !block->cpl_in_use; |
|
| 1465 |
+ for (ch = 1; ch <= s->channels; ch++) {
|
|
| 1456 | 1466 |
int b, q; |
| 1457 |
- AC3Block *ref_block = block->exp_ref_block[ch]; |
|
| 1458 |
- for (i = 0; i < s->nb_coefs[ch]; i++) {
|
|
| 1467 |
+ AC3Block *ref_block; |
|
| 1468 |
+ |
|
| 1469 |
+ if (!got_cpl && ch > 1 && block->channel_in_cpl[ch-1]) {
|
|
| 1470 |
+ ch0 = ch - 1; |
|
| 1471 |
+ ch = CPL_CH; |
|
| 1472 |
+ got_cpl = 1; |
|
| 1473 |
+ } |
|
| 1474 |
+ ref_block = block->exp_ref_block[ch]; |
|
| 1475 |
+ for (i = s->start_freq[ch]; i < block->end_freq[ch]; i++) {
|
|
| 1459 | 1476 |
q = block->qmant[ch][i]; |
| 1460 | 1477 |
b = ref_block->bap[ch][i]; |
| 1461 | 1478 |
switch (b) {
|
| ... | ... |
@@ -1469,6 +1961,8 @@ static void output_audio_block(AC3EncodeContext *s, int blk) |
| 1469 | 1469 |
default: put_bits(&s->pb, b-1, q); break; |
| 1470 | 1470 |
} |
| 1471 | 1471 |
} |
| 1472 |
+ if (ch == CPL_CH) |
|
| 1473 |
+ ch = ch0; |
|
| 1472 | 1474 |
} |
| 1473 | 1475 |
} |
| 1474 | 1476 |
|
| ... | ... |
@@ -1854,6 +2348,12 @@ static int ac3_encode_frame(AVCodecContext *avctx, unsigned char *frame, |
| 1854 | 1854 |
|
| 1855 | 1855 |
scale_coefficients(s); |
| 1856 | 1856 |
|
| 1857 |
+ s->cpl_on = s->cpl_enabled; |
|
| 1858 |
+ compute_coupling_strategy(s); |
|
| 1859 |
+ |
|
| 1860 |
+ if (s->cpl_on) |
|
| 1861 |
+ apply_channel_coupling(s); |
|
| 1862 |
+ |
|
| 1857 | 1863 |
compute_rematrixing_strategy(s); |
| 1858 | 1864 |
|
| 1859 | 1865 |
apply_rematrixing(s); |
| ... | ... |
@@ -1934,7 +2434,7 @@ static av_cold int set_channel_info(AC3EncodeContext *s, int channels, |
| 1934 | 1934 |
s->lfe_on = !!(ch_layout & AV_CH_LOW_FREQUENCY); |
| 1935 | 1935 |
s->channels = channels; |
| 1936 | 1936 |
s->fbw_channels = channels - s->lfe_on; |
| 1937 |
- s->lfe_channel = s->lfe_on ? s->fbw_channels : -1; |
|
| 1937 |
+ s->lfe_channel = s->lfe_on ? s->fbw_channels + 1 : -1; |
|
| 1938 | 1938 |
if (s->lfe_on) |
| 1939 | 1939 |
ch_layout -= AV_CH_LOW_FREQUENCY; |
| 1940 | 1940 |
|
| ... | ... |
@@ -2033,6 +2533,10 @@ static av_cold int validate_options(AVCodecContext *avctx, AC3EncodeContext *s) |
| 2033 | 2033 |
s->rematrixing_enabled = s->options.stereo_rematrixing && |
| 2034 | 2034 |
(s->channel_mode == AC3_CHMODE_STEREO); |
| 2035 | 2035 |
|
| 2036 |
+ s->cpl_enabled = s->options.channel_coupling && |
|
| 2037 |
+ s->channel_mode >= AC3_CHMODE_STEREO && |
|
| 2038 |
+ CONFIG_AC3ENC_FLOAT; |
|
| 2039 |
+ |
|
| 2036 | 2040 |
return 0; |
| 2037 | 2041 |
} |
| 2038 | 2042 |
|
| ... | ... |
@@ -2044,7 +2548,8 @@ static av_cold int validate_options(AVCodecContext *avctx, AC3EncodeContext *s) |
| 2044 | 2044 |
*/ |
| 2045 | 2045 |
static av_cold void set_bandwidth(AC3EncodeContext *s) |
| 2046 | 2046 |
{
|
| 2047 |
- int ch; |
|
| 2047 |
+ int blk, ch; |
|
| 2048 |
+ int av_uninit(cpl_start); |
|
| 2048 | 2049 |
|
| 2049 | 2050 |
if (s->cutoff) {
|
| 2050 | 2051 |
/* calculate bandwidth based on user-specified cutoff frequency */ |
| ... | ... |
@@ -2057,11 +2562,54 @@ static av_cold void set_bandwidth(AC3EncodeContext *s) |
| 2057 | 2057 |
} |
| 2058 | 2058 |
|
| 2059 | 2059 |
/* set number of coefficients for each channel */ |
| 2060 |
- for (ch = 0; ch < s->fbw_channels; ch++) {
|
|
| 2061 |
- s->nb_coefs[ch] = s->bandwidth_code * 3 + 73; |
|
| 2060 |
+ for (ch = 1; ch <= s->fbw_channels; ch++) {
|
|
| 2061 |
+ s->start_freq[ch] = 0; |
|
| 2062 |
+ for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) |
|
| 2063 |
+ s->blocks[blk].end_freq[ch] = s->bandwidth_code * 3 + 73; |
|
| 2064 |
+ } |
|
| 2065 |
+ /* LFE channel always has 7 coefs */ |
|
| 2066 |
+ if (s->lfe_on) {
|
|
| 2067 |
+ s->start_freq[s->lfe_channel] = 0; |
|
| 2068 |
+ for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) |
|
| 2069 |
+ s->blocks[blk].end_freq[ch] = 7; |
|
| 2070 |
+ } |
|
| 2071 |
+ |
|
| 2072 |
+ /* initialize coupling strategy */ |
|
| 2073 |
+ if (s->cpl_enabled) {
|
|
| 2074 |
+ if (s->options.cpl_start >= 0) {
|
|
| 2075 |
+ cpl_start = s->options.cpl_start; |
|
| 2076 |
+ } else {
|
|
| 2077 |
+ cpl_start = ac3_coupling_start_tab[s->channel_mode-2][s->bit_alloc.sr_code][s->frame_size_code/2]; |
|
| 2078 |
+ if (cpl_start < 0) |
|
| 2079 |
+ s->cpl_enabled = 0; |
|
| 2080 |
+ } |
|
| 2081 |
+ } |
|
| 2082 |
+ if (s->cpl_enabled) {
|
|
| 2083 |
+ int i, cpl_start_band, cpl_end_band; |
|
| 2084 |
+ uint8_t *cpl_band_sizes = s->cpl_band_sizes; |
|
| 2085 |
+ |
|
| 2086 |
+ cpl_end_band = s->bandwidth_code / 4 + 3; |
|
| 2087 |
+ cpl_start_band = av_clip(cpl_start, 0, FFMIN(cpl_end_band-1, 15)); |
|
| 2088 |
+ |
|
| 2089 |
+ s->num_cpl_subbands = cpl_end_band - cpl_start_band; |
|
| 2090 |
+ |
|
| 2091 |
+ s->num_cpl_bands = 1; |
|
| 2092 |
+ *cpl_band_sizes = 12; |
|
| 2093 |
+ for (i = cpl_start_band + 1; i < cpl_end_band; i++) {
|
|
| 2094 |
+ if (ff_eac3_default_cpl_band_struct[i]) {
|
|
| 2095 |
+ *cpl_band_sizes += 12; |
|
| 2096 |
+ } else {
|
|
| 2097 |
+ s->num_cpl_bands++; |
|
| 2098 |
+ cpl_band_sizes++; |
|
| 2099 |
+ *cpl_band_sizes = 12; |
|
| 2100 |
+ } |
|
| 2101 |
+ } |
|
| 2102 |
+ |
|
| 2103 |
+ s->start_freq[CPL_CH] = cpl_start_band * 12 + 37; |
|
| 2104 |
+ s->cpl_end_freq = cpl_end_band * 12 + 37; |
|
| 2105 |
+ for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) |
|
| 2106 |
+ s->blocks[blk].end_freq[CPL_CH] = s->cpl_end_freq; |
|
| 2062 | 2107 |
} |
| 2063 |
- if (s->lfe_on) |
|
| 2064 |
- s->nb_coefs[s->lfe_channel] = 7; /* LFE channel always has 7 coefs */ |
|
| 2065 | 2108 |
} |
| 2066 | 2109 |
|
| 2067 | 2110 |
|
| ... | ... |
@@ -2069,6 +2617,7 @@ static av_cold int allocate_buffers(AVCodecContext *avctx) |
| 2069 | 2069 |
{
|
| 2070 | 2070 |
int blk, ch; |
| 2071 | 2071 |
AC3EncodeContext *s = avctx->priv_data; |
| 2072 |
+ int channels = s->channels + 1; /* includes coupling channel */ |
|
| 2072 | 2073 |
|
| 2073 | 2074 |
FF_ALLOC_OR_GOTO(avctx, s->planar_samples, s->channels * sizeof(*s->planar_samples), |
| 2074 | 2075 |
alloc_fail); |
| ... | ... |
@@ -2077,74 +2626,90 @@ static av_cold int allocate_buffers(AVCodecContext *avctx) |
| 2077 | 2077 |
(AC3_FRAME_SIZE+AC3_BLOCK_SIZE) * sizeof(**s->planar_samples), |
| 2078 | 2078 |
alloc_fail); |
| 2079 | 2079 |
} |
| 2080 |
- FF_ALLOC_OR_GOTO(avctx, s->bap_buffer, AC3_MAX_BLOCKS * s->channels * |
|
| 2080 |
+ FF_ALLOC_OR_GOTO(avctx, s->bap_buffer, AC3_MAX_BLOCKS * channels * |
|
| 2081 | 2081 |
AC3_MAX_COEFS * sizeof(*s->bap_buffer), alloc_fail); |
| 2082 |
- FF_ALLOC_OR_GOTO(avctx, s->bap1_buffer, AC3_MAX_BLOCKS * s->channels * |
|
| 2082 |
+ FF_ALLOC_OR_GOTO(avctx, s->bap1_buffer, AC3_MAX_BLOCKS * channels * |
|
| 2083 | 2083 |
AC3_MAX_COEFS * sizeof(*s->bap1_buffer), alloc_fail); |
| 2084 |
- FF_ALLOC_OR_GOTO(avctx, s->mdct_coef_buffer, AC3_MAX_BLOCKS * s->channels * |
|
| 2084 |
+ FF_ALLOC_OR_GOTO(avctx, s->mdct_coef_buffer, AC3_MAX_BLOCKS * channels * |
|
| 2085 | 2085 |
AC3_MAX_COEFS * sizeof(*s->mdct_coef_buffer), alloc_fail); |
| 2086 |
- FF_ALLOC_OR_GOTO(avctx, s->exp_buffer, AC3_MAX_BLOCKS * s->channels * |
|
| 2086 |
+ FF_ALLOC_OR_GOTO(avctx, s->exp_buffer, AC3_MAX_BLOCKS * channels * |
|
| 2087 | 2087 |
AC3_MAX_COEFS * sizeof(*s->exp_buffer), alloc_fail); |
| 2088 |
- FF_ALLOC_OR_GOTO(avctx, s->grouped_exp_buffer, AC3_MAX_BLOCKS * s->channels * |
|
| 2088 |
+ FF_ALLOC_OR_GOTO(avctx, s->grouped_exp_buffer, AC3_MAX_BLOCKS * channels * |
|
| 2089 | 2089 |
128 * sizeof(*s->grouped_exp_buffer), alloc_fail); |
| 2090 |
- FF_ALLOC_OR_GOTO(avctx, s->psd_buffer, AC3_MAX_BLOCKS * s->channels * |
|
| 2090 |
+ FF_ALLOC_OR_GOTO(avctx, s->psd_buffer, AC3_MAX_BLOCKS * channels * |
|
| 2091 | 2091 |
AC3_MAX_COEFS * sizeof(*s->psd_buffer), alloc_fail); |
| 2092 |
- FF_ALLOC_OR_GOTO(avctx, s->band_psd_buffer, AC3_MAX_BLOCKS * s->channels * |
|
| 2092 |
+ FF_ALLOC_OR_GOTO(avctx, s->band_psd_buffer, AC3_MAX_BLOCKS * channels * |
|
| 2093 | 2093 |
64 * sizeof(*s->band_psd_buffer), alloc_fail); |
| 2094 |
- FF_ALLOC_OR_GOTO(avctx, s->mask_buffer, AC3_MAX_BLOCKS * s->channels * |
|
| 2094 |
+ FF_ALLOC_OR_GOTO(avctx, s->mask_buffer, AC3_MAX_BLOCKS * channels * |
|
| 2095 | 2095 |
64 * sizeof(*s->mask_buffer), alloc_fail); |
| 2096 |
- FF_ALLOC_OR_GOTO(avctx, s->qmant_buffer, AC3_MAX_BLOCKS * s->channels * |
|
| 2096 |
+ FF_ALLOC_OR_GOTO(avctx, s->qmant_buffer, AC3_MAX_BLOCKS * channels * |
|
| 2097 | 2097 |
AC3_MAX_COEFS * sizeof(*s->qmant_buffer), alloc_fail); |
| 2098 |
+ if (s->cpl_enabled) {
|
|
| 2099 |
+ FF_ALLOC_OR_GOTO(avctx, s->cpl_coord_exp_buffer, AC3_MAX_BLOCKS * channels * |
|
| 2100 |
+ 16 * sizeof(*s->cpl_coord_exp_buffer), alloc_fail); |
|
| 2101 |
+ FF_ALLOC_OR_GOTO(avctx, s->cpl_coord_mant_buffer, AC3_MAX_BLOCKS * channels * |
|
| 2102 |
+ 16 * sizeof(*s->cpl_coord_mant_buffer), alloc_fail); |
|
| 2103 |
+ } |
|
| 2098 | 2104 |
for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
|
| 2099 | 2105 |
AC3Block *block = &s->blocks[blk]; |
| 2100 |
- FF_ALLOC_OR_GOTO(avctx, block->bap, s->channels * sizeof(*block->bap), |
|
| 2106 |
+ FF_ALLOC_OR_GOTO(avctx, block->bap, channels * sizeof(*block->bap), |
|
| 2101 | 2107 |
alloc_fail); |
| 2102 |
- FF_ALLOCZ_OR_GOTO(avctx, block->mdct_coef, s->channels * sizeof(*block->mdct_coef), |
|
| 2108 |
+ FF_ALLOCZ_OR_GOTO(avctx, block->mdct_coef, channels * sizeof(*block->mdct_coef), |
|
| 2103 | 2109 |
alloc_fail); |
| 2104 |
- FF_ALLOCZ_OR_GOTO(avctx, block->exp, s->channels * sizeof(*block->exp), |
|
| 2110 |
+ FF_ALLOCZ_OR_GOTO(avctx, block->exp, channels * sizeof(*block->exp), |
|
| 2105 | 2111 |
alloc_fail); |
| 2106 |
- FF_ALLOCZ_OR_GOTO(avctx, block->grouped_exp, s->channels * sizeof(*block->grouped_exp), |
|
| 2112 |
+ FF_ALLOCZ_OR_GOTO(avctx, block->grouped_exp, channels * sizeof(*block->grouped_exp), |
|
| 2107 | 2113 |
alloc_fail); |
| 2108 |
- FF_ALLOCZ_OR_GOTO(avctx, block->psd, s->channels * sizeof(*block->psd), |
|
| 2114 |
+ FF_ALLOCZ_OR_GOTO(avctx, block->psd, channels * sizeof(*block->psd), |
|
| 2109 | 2115 |
alloc_fail); |
| 2110 |
- FF_ALLOCZ_OR_GOTO(avctx, block->band_psd, s->channels * sizeof(*block->band_psd), |
|
| 2116 |
+ FF_ALLOCZ_OR_GOTO(avctx, block->band_psd, channels * sizeof(*block->band_psd), |
|
| 2111 | 2117 |
alloc_fail); |
| 2112 |
- FF_ALLOCZ_OR_GOTO(avctx, block->mask, s->channels * sizeof(*block->mask), |
|
| 2118 |
+ FF_ALLOCZ_OR_GOTO(avctx, block->mask, channels * sizeof(*block->mask), |
|
| 2113 | 2119 |
alloc_fail); |
| 2114 |
- FF_ALLOCZ_OR_GOTO(avctx, block->qmant, s->channels * sizeof(*block->qmant), |
|
| 2120 |
+ FF_ALLOCZ_OR_GOTO(avctx, block->qmant, channels * sizeof(*block->qmant), |
|
| 2115 | 2121 |
alloc_fail); |
| 2122 |
+ if (s->cpl_enabled) {
|
|
| 2123 |
+ FF_ALLOCZ_OR_GOTO(avctx, block->cpl_coord_exp, channels * sizeof(*block->cpl_coord_exp), |
|
| 2124 |
+ alloc_fail); |
|
| 2125 |
+ FF_ALLOCZ_OR_GOTO(avctx, block->cpl_coord_mant, channels * sizeof(*block->cpl_coord_mant), |
|
| 2126 |
+ alloc_fail); |
|
| 2127 |
+ } |
|
| 2116 | 2128 |
|
| 2117 |
- for (ch = 0; ch < s->channels; ch++) {
|
|
| 2129 |
+ for (ch = 0; ch < channels; ch++) {
|
|
| 2118 | 2130 |
/* arrangement: block, channel, coeff */ |
| 2119 |
- block->bap[ch] = &s->bap_buffer [AC3_MAX_COEFS * (blk * s->channels + ch)]; |
|
| 2120 |
- block->mdct_coef[ch] = &s->mdct_coef_buffer [AC3_MAX_COEFS * (blk * s->channels + ch)]; |
|
| 2121 |
- block->grouped_exp[ch] = &s->grouped_exp_buffer[128 * (blk * s->channels + ch)]; |
|
| 2122 |
- block->psd[ch] = &s->psd_buffer [AC3_MAX_COEFS * (blk * s->channels + ch)]; |
|
| 2123 |
- block->band_psd[ch] = &s->band_psd_buffer [64 * (blk * s->channels + ch)]; |
|
| 2124 |
- block->mask[ch] = &s->mask_buffer [64 * (blk * s->channels + ch)]; |
|
| 2125 |
- block->qmant[ch] = &s->qmant_buffer [AC3_MAX_COEFS * (blk * s->channels + ch)]; |
|
| 2131 |
+ block->bap[ch] = &s->bap_buffer [AC3_MAX_COEFS * (blk * channels + ch)]; |
|
| 2132 |
+ block->grouped_exp[ch] = &s->grouped_exp_buffer[128 * (blk * channels + ch)]; |
|
| 2133 |
+ block->psd[ch] = &s->psd_buffer [AC3_MAX_COEFS * (blk * channels + ch)]; |
|
| 2134 |
+ block->band_psd[ch] = &s->band_psd_buffer [64 * (blk * channels + ch)]; |
|
| 2135 |
+ block->mask[ch] = &s->mask_buffer [64 * (blk * channels + ch)]; |
|
| 2136 |
+ block->qmant[ch] = &s->qmant_buffer [AC3_MAX_COEFS * (blk * channels + ch)]; |
|
| 2137 |
+ if (s->cpl_enabled) {
|
|
| 2138 |
+ block->cpl_coord_exp[ch] = &s->cpl_coord_exp_buffer [16 * (blk * channels + ch)]; |
|
| 2139 |
+ block->cpl_coord_mant[ch] = &s->cpl_coord_mant_buffer[16 * (blk * channels + ch)]; |
|
| 2140 |
+ } |
|
| 2126 | 2141 |
|
| 2127 | 2142 |
/* arrangement: channel, block, coeff */ |
| 2128 | 2143 |
block->exp[ch] = &s->exp_buffer [AC3_MAX_COEFS * (AC3_MAX_BLOCKS * ch + blk)]; |
| 2144 |
+ block->mdct_coef[ch] = &s->mdct_coef_buffer [AC3_MAX_COEFS * (AC3_MAX_BLOCKS * ch + blk)]; |
|
| 2129 | 2145 |
} |
| 2130 | 2146 |
} |
| 2131 | 2147 |
|
| 2132 | 2148 |
if (CONFIG_AC3ENC_FLOAT) {
|
| 2133 |
- FF_ALLOC_OR_GOTO(avctx, s->fixed_coef_buffer, AC3_MAX_BLOCKS * s->channels * |
|
| 2149 |
+ FF_ALLOC_OR_GOTO(avctx, s->fixed_coef_buffer, AC3_MAX_BLOCKS * channels * |
|
| 2134 | 2150 |
AC3_MAX_COEFS * sizeof(*s->fixed_coef_buffer), alloc_fail); |
| 2135 | 2151 |
for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
|
| 2136 | 2152 |
AC3Block *block = &s->blocks[blk]; |
| 2137 |
- FF_ALLOCZ_OR_GOTO(avctx, block->fixed_coef, s->channels * |
|
| 2153 |
+ FF_ALLOCZ_OR_GOTO(avctx, block->fixed_coef, channels * |
|
| 2138 | 2154 |
sizeof(*block->fixed_coef), alloc_fail); |
| 2139 |
- for (ch = 0; ch < s->channels; ch++) |
|
| 2140 |
- block->fixed_coef[ch] = &s->fixed_coef_buffer[AC3_MAX_COEFS * (blk * s->channels + ch)]; |
|
| 2155 |
+ for (ch = 0; ch < channels; ch++) |
|
| 2156 |
+ block->fixed_coef[ch] = &s->fixed_coef_buffer[AC3_MAX_COEFS * (AC3_MAX_BLOCKS * ch + blk)]; |
|
| 2141 | 2157 |
} |
| 2142 | 2158 |
} else {
|
| 2143 | 2159 |
for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
|
| 2144 | 2160 |
AC3Block *block = &s->blocks[blk]; |
| 2145 |
- FF_ALLOCZ_OR_GOTO(avctx, block->fixed_coef, s->channels * |
|
| 2161 |
+ FF_ALLOCZ_OR_GOTO(avctx, block->fixed_coef, channels * |
|
| 2146 | 2162 |
sizeof(*block->fixed_coef), alloc_fail); |
| 2147 |
- for (ch = 0; ch < s->channels; ch++) |
|
| 2163 |
+ for (ch = 0; ch < channels; ch++) |
|
| 2148 | 2164 |
block->fixed_coef[ch] = (int32_t *)block->mdct_coef[ch]; |
| 2149 | 2165 |
} |
| 2150 | 2166 |
} |
| ... | ... |
@@ -101,7 +101,7 @@ static void scale_coefficients(AC3EncodeContext *s) |
| 101 | 101 |
|
| 102 | 102 |
for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
|
| 103 | 103 |
AC3Block *block = &s->blocks[blk]; |
| 104 |
- for (ch = 0; ch < s->channels; ch++) {
|
|
| 104 |
+ for (ch = 1; ch <= s->channels; ch++) {
|
|
| 105 | 105 |
s->ac3dsp.ac3_rshift_int32(block->mdct_coef[ch], AC3_MAX_COEFS, |
| 106 | 106 |
block->coeff_shift[ch]); |
| 107 | 107 |
} |
| ... | ... |
@@ -93,8 +93,10 @@ static int normalize_samples(AC3EncodeContext *s) |
| 93 | 93 |
*/ |
| 94 | 94 |
static void scale_coefficients(AC3EncodeContext *s) |
| 95 | 95 |
{
|
| 96 |
- s->ac3dsp.float_to_fixed24(s->fixed_coef_buffer, s->mdct_coef_buffer, |
|
| 97 |
- AC3_MAX_COEFS * AC3_MAX_BLOCKS * s->channels); |
|
| 96 |
+ int chan_size = AC3_MAX_COEFS * AC3_MAX_BLOCKS; |
|
| 97 |
+ s->ac3dsp.float_to_fixed24(s->fixed_coef_buffer + chan_size, |
|
| 98 |
+ s->mdct_coef_buffer + chan_size, |
|
| 99 |
+ chan_size * s->channels); |
|
| 98 | 100 |
} |
| 99 | 101 |
|
| 100 | 102 |
|
| ... | ... |
@@ -138,6 +138,13 @@ const uint16_t ff_ac3_bitrate_tab[19] = {
|
| 138 | 138 |
*/ |
| 139 | 139 |
const uint8_t ff_ac3_rematrix_band_tab[5] = { 13, 25, 37, 61, 253 };
|
| 140 | 140 |
|
| 141 |
+/** |
|
| 142 |
+ * Table E2.16 Default Coupling Banding Structure |
|
| 143 |
+ */ |
|
| 144 |
+const uint8_t ff_eac3_default_cpl_band_struct[18] = {
|
|
| 145 |
+ 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1 |
|
| 146 |
+}; |
|
| 147 |
+ |
|
| 141 | 148 |
/* AC-3 MDCT window */ |
| 142 | 149 |
|
| 143 | 150 |
/* MDCT window */ |
| ... | ... |
@@ -39,6 +39,7 @@ extern const uint8_t ff_ac3_dec_channel_map[8][2][6]; |
| 39 | 39 |
extern const uint16_t ff_ac3_sample_rate_tab[3]; |
| 40 | 40 |
extern const uint16_t ff_ac3_bitrate_tab[19]; |
| 41 | 41 |
extern const uint8_t ff_ac3_rematrix_band_tab[5]; |
| 42 |
+extern const uint8_t ff_eac3_default_cpl_band_struct[18]; |
|
| 42 | 43 |
extern const int16_t ff_ac3_window[AC3_WINDOW_SIZE/2]; |
| 43 | 44 |
extern const uint8_t ff_ac3_log_add_tab[260]; |
| 44 | 45 |
extern const uint16_t ff_ac3_hearing_threshold_tab[AC3_CRITICAL_BANDS][3]; |
| ... | ... |
@@ -156,7 +156,7 @@ void avcodec_register_all(void) |
| 156 | 156 |
REGISTER_DECODER (MPEG1_VDPAU, mpeg1_vdpau); |
| 157 | 157 |
REGISTER_DECODER (MPEG2_CRYSTALHD, mpeg2_crystalhd); |
| 158 | 158 |
REGISTER_DECODER (MSMPEG4_CRYSTALHD, msmpeg4_crystalhd); |
| 159 |
- REGISTER_ENCDEC (MSMPEG4V1, msmpeg4v1); |
|
| 159 |
+ REGISTER_DECODER (MSMPEG4V1, msmpeg4v1); |
|
| 160 | 160 |
REGISTER_ENCDEC (MSMPEG4V2, msmpeg4v2); |
| 161 | 161 |
REGISTER_ENCDEC (MSMPEG4V3, msmpeg4v3); |
| 162 | 162 |
REGISTER_DECODER (MSRLE, msrle); |
| ... | ... |
@@ -628,13 +628,6 @@ static inline int get_penalty_factor(int lambda, int lambda2, int type){
|
| 628 | 628 |
} |
| 629 | 629 |
} |
| 630 | 630 |
|
| 631 |
-/** |
|
| 632 |
- * Empty mmx state. |
|
| 633 |
- * this must be called between any dsp function and float/double code. |
|
| 634 |
- * for example sin(); dsp->idct_put(); emms_c(); cos() |
|
| 635 |
- */ |
|
| 636 |
-#define emms_c() |
|
| 637 |
- |
|
| 638 | 631 |
void dsputil_init_alpha(DSPContext* c, AVCodecContext *avctx); |
| 639 | 632 |
void dsputil_init_arm(DSPContext* c, AVCodecContext *avctx); |
| 640 | 633 |
void dsputil_init_bfin(DSPContext* c, AVCodecContext *avctx); |
| ... | ... |
@@ -652,22 +645,9 @@ void ff_intrax8dsp_init(DSPContext* c, AVCodecContext *avctx); |
| 652 | 652 |
void ff_mlp_init(DSPContext* c, AVCodecContext *avctx); |
| 653 | 653 |
void ff_mlp_init_x86(DSPContext* c, AVCodecContext *avctx); |
| 654 | 654 |
|
| 655 |
-#if HAVE_MMX |
|
| 656 |
- |
|
| 657 |
-#undef emms_c |
|
| 658 | 655 |
|
| 659 |
-static inline void emms(void) |
|
| 660 |
-{
|
|
| 661 |
- __asm__ volatile ("emms;":::"memory");
|
|
| 662 |
-} |
|
| 663 |
- |
|
| 664 |
-#define emms_c() \ |
|
| 665 |
-{\
|
|
| 666 |
- if(av_get_cpu_flags() & AV_CPU_FLAG_MMX)\ |
|
| 667 |
- emms();\ |
|
| 668 |
-} |
|
| 656 |
+#if ARCH_ARM |
|
| 669 | 657 |
|
| 670 |
-#elif ARCH_ARM |
|
| 671 | 658 |
|
| 672 | 659 |
#if HAVE_NEON |
| 673 | 660 |
# define STRIDE_ALIGN 16 |
| ... | ... |
@@ -638,15 +638,6 @@ av_cold int MPV_encode_init(AVCodecContext *avctx) |
| 638 | 638 |
s->low_delay= s->max_b_frames ? 0 : 1; |
| 639 | 639 |
avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1); |
| 640 | 640 |
break; |
| 641 |
- case CODEC_ID_MSMPEG4V1: |
|
| 642 |
- s->out_format = FMT_H263; |
|
| 643 |
- s->h263_msmpeg4 = 1; |
|
| 644 |
- s->h263_pred = 1; |
|
| 645 |
- s->unrestricted_mv = 1; |
|
| 646 |
- s->msmpeg4_version= 1; |
|
| 647 |
- avctx->delay=0; |
|
| 648 |
- s->low_delay=1; |
|
| 649 |
- break; |
|
| 650 | 641 |
case CODEC_ID_MSMPEG4V2: |
| 651 | 642 |
s->out_format = FMT_H263; |
| 652 | 643 |
s->h263_msmpeg4 = 1; |
| ... | ... |
@@ -3807,18 +3798,6 @@ AVCodec ff_h263p_encoder = {
|
| 3807 | 3807 |
.long_name= NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
|
| 3808 | 3808 |
}; |
| 3809 | 3809 |
|
| 3810 |
-AVCodec ff_msmpeg4v1_encoder = {
|
|
| 3811 |
- "msmpeg4v1", |
|
| 3812 |
- AVMEDIA_TYPE_VIDEO, |
|
| 3813 |
- CODEC_ID_MSMPEG4V1, |
|
| 3814 |
- sizeof(MpegEncContext), |
|
| 3815 |
- MPV_encode_init, |
|
| 3816 |
- MPV_encode_picture, |
|
| 3817 |
- MPV_encode_end, |
|
| 3818 |
- .pix_fmts= (const enum PixelFormat[]){PIX_FMT_YUV420P, PIX_FMT_NONE},
|
|
| 3819 |
- .long_name= NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 1"),
|
|
| 3820 |
-}; |
|
| 3821 |
- |
|
| 3822 | 3810 |
AVCodec ff_msmpeg4v2_encoder = {
|
| 3823 | 3811 |
"msmpeg4v2", |
| 3824 | 3812 |
AVMEDIA_TYPE_VIDEO, |
| ... | ... |
@@ -846,22 +846,14 @@ static void msmpeg4_encode_dc(MpegEncContext * s, int level, int n, int *dir_ptr |
| 846 | 846 |
int pred, extquant; |
| 847 | 847 |
int extrabits = 0; |
| 848 | 848 |
|
| 849 |
- if(s->msmpeg4_version==1){
|
|
| 850 |
- int32_t *dc_val; |
|
| 851 |
- pred = msmpeg4v1_pred_dc(s, n, &dc_val); |
|
| 852 |
- |
|
| 853 |
- /* update predictor */ |
|
| 854 |
- *dc_val= level; |
|
| 855 |
- }else{
|
|
| 856 |
- int16_t *dc_val; |
|
| 857 |
- pred = msmpeg4_pred_dc(s, n, &dc_val, dir_ptr); |
|
| 849 |
+ int16_t *dc_val; |
|
| 850 |
+ pred = msmpeg4_pred_dc(s, n, &dc_val, dir_ptr); |
|
| 858 | 851 |
|
| 859 |
- /* update predictor */ |
|
| 860 |
- if (n < 4) {
|
|
| 861 |
- *dc_val = level * s->y_dc_scale; |
|
| 862 |
- } else {
|
|
| 863 |
- *dc_val = level * s->c_dc_scale; |
|
| 864 |
- } |
|
| 852 |
+ /* update predictor */ |
|
| 853 |
+ if (n < 4) {
|
|
| 854 |
+ *dc_val = level * s->y_dc_scale; |
|
| 855 |
+ } else {
|
|
| 856 |
+ *dc_val = level * s->c_dc_scale; |
|
| 865 | 857 |
} |
| 866 | 858 |
|
| 867 | 859 |
/* do the prediction */ |
| ... | ... |
@@ -54,8 +54,7 @@ int ff_wmv2_decode_mb(MpegEncContext *s, DCTELEM block[6][64]); |
| 54 | 54 |
CONFIG_MSMPEG4V3_DECODER || \ |
| 55 | 55 |
CONFIG_WMV2_DECODER || \ |
| 56 | 56 |
CONFIG_VC1_DECODER) |
| 57 |
-#define CONFIG_MSMPEG4_ENCODER (CONFIG_MSMPEG4V1_ENCODER || \ |
|
| 58 |
- CONFIG_MSMPEG4V2_ENCODER || \ |
|
| 57 |
+#define CONFIG_MSMPEG4_ENCODER (CONFIG_MSMPEG4V2_ENCODER || \ |
|
| 59 | 58 |
CONFIG_MSMPEG4V3_ENCODER || \ |
| 60 | 59 |
CONFIG_WMV2_ENCODER) |
| 61 | 60 |
|
| ... | ... |
@@ -39,6 +39,8 @@ typedef struct TiffContext {
|
| 39 | 39 |
|
| 40 | 40 |
int width, height; |
| 41 | 41 |
unsigned int bpp, bppcount; |
| 42 |
+ uint32_t palette[256]; |
|
| 43 |
+ int palette_is_set; |
|
| 42 | 44 |
int le; |
| 43 | 45 |
enum TiffCompr compr; |
| 44 | 46 |
int invert; |
| ... | ... |
@@ -255,11 +257,15 @@ static int init_image(TiffContext *s) |
| 255 | 255 |
av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n"); |
| 256 | 256 |
return ret; |
| 257 | 257 |
} |
| 258 |
- if (s->bpp == 8 && s->picture.data[1]){
|
|
| 259 |
- /* make default grayscale pal */ |
|
| 260 |
- pal = (uint32_t *) s->picture.data[1]; |
|
| 261 |
- for (i = 0; i < 256; i++) |
|
| 262 |
- pal[i] = i * 0x010101; |
|
| 258 |
+ if (s->avctx->pix_fmt == PIX_FMT_PAL8) {
|
|
| 259 |
+ if (s->palette_is_set) {
|
|
| 260 |
+ memcpy(s->picture.data[1], s->palette, sizeof(s->palette)); |
|
| 261 |
+ } else {
|
|
| 262 |
+ /* make default grayscale pal */ |
|
| 263 |
+ pal = (uint32_t *) s->picture.data[1]; |
|
| 264 |
+ for (i = 0; i < 256; i++) |
|
| 265 |
+ pal[i] = i * 0x010101; |
|
| 266 |
+ } |
|
| 263 | 267 |
} |
| 264 | 268 |
return 0; |
| 265 | 269 |
} |
| ... | ... |
@@ -442,11 +448,7 @@ static int tiff_decode_tag(TiffContext *s, const uint8_t *start, const uint8_t * |
| 442 | 442 |
s->fill_order = value - 1; |
| 443 | 443 |
break; |
| 444 | 444 |
case TIFF_PAL: |
| 445 |
- if(s->avctx->pix_fmt != PIX_FMT_PAL8){
|
|
| 446 |
- av_log(s->avctx, AV_LOG_ERROR, "Palette met but this is not palettized format\n"); |
|
| 447 |
- return -1; |
|
| 448 |
- } |
|
| 449 |
- pal = (uint32_t *) s->picture.data[1]; |
|
| 445 |
+ pal = (uint32_t *) s->palette; |
|
| 450 | 446 |
off = type_sizes[type]; |
| 451 | 447 |
rp = buf; |
| 452 | 448 |
gp = buf + count / 3 * off; |
| ... | ... |
@@ -459,6 +461,7 @@ static int tiff_decode_tag(TiffContext *s, const uint8_t *start, const uint8_t * |
| 459 | 459 |
j |= tget(&bp, type, s->le) >> off; |
| 460 | 460 |
pal[i] = j; |
| 461 | 461 |
} |
| 462 |
+ s->palette_is_set = 1; |
|
| 462 | 463 |
break; |
| 463 | 464 |
case TIFF_PLANAR: |
| 464 | 465 |
if(value == 2){
|
| ... | ... |
@@ -287,13 +287,8 @@ zrmjpeg |
| 287 | 287 |
CpuCaps gCpuCaps; //FIXME initialize this so optims work |
| 288 | 288 |
|
| 289 | 289 |
|
| 290 |
-//exact copy from vf_scale.c |
|
| 291 | 290 |
int get_sws_cpuflags(void){
|
| 292 |
- return |
|
| 293 |
- (gCpuCaps.hasMMX ? SWS_CPU_CAPS_MMX : 0) |
|
| 294 |
- | (gCpuCaps.hasMMX2 ? SWS_CPU_CAPS_MMX2 : 0) |
|
| 295 |
- | (gCpuCaps.has3DNow ? SWS_CPU_CAPS_3DNOW : 0) |
|
| 296 |
- | (gCpuCaps.hasAltiVec ? SWS_CPU_CAPS_ALTIVEC : 0); |
|
| 291 |
+ return 0; |
|
| 297 | 292 |
} |
| 298 | 293 |
|
| 299 | 294 |
static void sws_getFlagsAndFilterFromCmdLine(int *flags, SwsFilter **srcFilterParam, SwsFilter **dstFilterParam) |
| ... | ... |
@@ -348,7 +343,7 @@ struct SwsContext *sws_getContextFromCmdLine(int srcW, int srcH, int srcFormat, |
| 348 | 348 |
if (srcFormat == IMGFMT_RGB8 || srcFormat == IMGFMT_BGR8) sfmt = PIX_FMT_PAL8; |
| 349 | 349 |
sws_getFlagsAndFilterFromCmdLine(&flags, &srcFilterParam, &dstFilterParam); |
| 350 | 350 |
|
| 351 |
- return sws_getContext(srcW, srcH, sfmt, dstW, dstH, dfmt, flags | get_sws_cpuflags(), srcFilterParam, dstFilterParam, NULL); |
|
| 351 |
+ return sws_getContext(srcW, srcH, sfmt, dstW, dstH, dfmt, flags , srcFilterParam, dstFilterParam, NULL); |
|
| 352 | 352 |
} |
| 353 | 353 |
|
| 354 | 354 |
typedef struct {
|
| ... | ... |
@@ -276,7 +276,7 @@ static int ape_read_header(AVFormatContext * s, AVFormatParameters * ap) |
| 276 | 276 |
ape->frames[0].nblocks = ape->blocksperframe; |
| 277 | 277 |
ape->frames[0].skip = 0; |
| 278 | 278 |
for (i = 1; i < ape->totalframes; i++) {
|
| 279 |
- ape->frames[i].pos = ape->seektable[i] + ape->junklength; //ape->frames[i-1].pos + ape->blocksperframe; |
|
| 279 |
+ ape->frames[i].pos = ape->seektable[i] + ape->junklength; |
|
| 280 | 280 |
ape->frames[i].nblocks = ape->blocksperframe; |
| 281 | 281 |
ape->frames[i - 1].size = ape->frames[i].pos - ape->frames[i - 1].pos; |
| 282 | 282 |
ape->frames[i].skip = (ape->frames[i].pos - ape->frames[0].pos) & 3; |
| ... | ... |
@@ -37,6 +37,7 @@ |
| 37 | 37 |
#include "config.h" |
| 38 | 38 |
#include "attributes.h" |
| 39 | 39 |
#include "timer.h" |
| 40 |
+#include "cpu.h" |
|
| 40 | 41 |
|
| 41 | 42 |
#ifndef attribute_align_arg |
| 42 | 43 |
#if ARCH_X86_32 && AV_GCC_VERSION_AT_LEAST(4,2) |
| ... | ... |
@@ -222,4 +223,19 @@ |
| 222 | 222 |
# define ONLY_IF_THREADS_ENABLED(x) NULL |
| 223 | 223 |
#endif |
| 224 | 224 |
|
| 225 |
+#if HAVE_MMX |
|
| 226 |
+/** |
|
| 227 |
+ * Empty mmx state. |
|
| 228 |
+ * this must be called between any dsp function and float/double code. |
|
| 229 |
+ * for example sin(); dsp->idct_put(); emms_c(); cos() |
|
| 230 |
+ */ |
|
| 231 |
+static av_always_inline void emms_c(void) |
|
| 232 |
+{
|
|
| 233 |
+ if(av_get_cpu_flags() & AV_CPU_FLAG_MMX) |
|
| 234 |
+ __asm__ volatile ("emms" ::: "memory");
|
|
| 235 |
+} |
|
| 236 |
+#else /* HAVE_MMX */ |
|
| 237 |
+#define emms_c() |
|
| 238 |
+#endif /* HAVE_MMX */ |
|
| 239 |
+ |
|
| 225 | 240 |
#endif /* AVUTIL_INTERNAL_H */ |
| ... | ... |
@@ -79,15 +79,13 @@ static int yuyvtoyv12_unscaled(SwsContext *c, uint8_t* src[], int srcStride[], i |
| 79 | 79 |
void ff_bfin_get_unscaled_swscale(SwsContext *c) |
| 80 | 80 |
{
|
| 81 | 81 |
SwsFunc swScale = c->swScale; |
| 82 |
- if (c->flags & SWS_CPU_CAPS_BFIN) |
|
| 83 |
- if (c->dstFormat == PIX_FMT_YUV420P) |
|
| 84 |
- if (c->srcFormat == PIX_FMT_UYVY422) {
|
|
| 85 |
- av_log (NULL, AV_LOG_VERBOSE, "selecting Blackfin optimized uyvytoyv12_unscaled\n"); |
|
| 86 |
- c->swScale = uyvytoyv12_unscaled; |
|
| 87 |
- } |
|
| 88 |
- if (c->dstFormat == PIX_FMT_YUV420P) |
|
| 89 |
- if (c->srcFormat == PIX_FMT_YUYV422) {
|
|
| 90 |
- av_log (NULL, AV_LOG_VERBOSE, "selecting Blackfin optimized yuyvtoyv12_unscaled\n"); |
|
| 91 |
- c->swScale = yuyvtoyv12_unscaled; |
|
| 92 |
- } |
|
| 82 |
+ |
|
| 83 |
+ if (c->dstFormat == PIX_FMT_YUV420P && c->srcFormat == PIX_FMT_UYVY422) {
|
|
| 84 |
+ av_log (NULL, AV_LOG_VERBOSE, "selecting Blackfin optimized uyvytoyv12_unscaled\n"); |
|
| 85 |
+ c->swScale = uyvytoyv12_unscaled; |
|
| 86 |
+ } |
|
| 87 |
+ if (c->dstFormat == PIX_FMT_YUV420P && c->srcFormat == PIX_FMT_YUYV422) {
|
|
| 88 |
+ av_log (NULL, AV_LOG_VERBOSE, "selecting Blackfin optimized yuyvtoyv12_unscaled\n"); |
|
| 89 |
+ c->swScale = yuyvtoyv12_unscaled; |
|
| 90 |
+ } |
|
| 93 | 91 |
} |
| ... | ... |
@@ -33,31 +33,6 @@ |
| 33 | 33 |
|
| 34 | 34 |
#define FUNC(s,d,n) {s,d,#n,n}
|
| 35 | 35 |
|
| 36 |
-static int cpu_caps; |
|
| 37 |
- |
|
| 38 |
-static char *args_parse(int argc, char *argv[]) |
|
| 39 |
-{
|
|
| 40 |
- int o; |
|
| 41 |
- |
|
| 42 |
- while ((o = getopt(argc, argv, "m23")) != -1) {
|
|
| 43 |
- switch (o) {
|
|
| 44 |
- case 'm': |
|
| 45 |
- cpu_caps |= SWS_CPU_CAPS_MMX; |
|
| 46 |
- break; |
|
| 47 |
- case '2': |
|
| 48 |
- cpu_caps |= SWS_CPU_CAPS_MMX2; |
|
| 49 |
- break; |
|
| 50 |
- case '3': |
|
| 51 |
- cpu_caps |= SWS_CPU_CAPS_3DNOW; |
|
| 52 |
- break; |
|
| 53 |
- default: |
|
| 54 |
- av_log(NULL, AV_LOG_ERROR, "Unknown option %c\n", o); |
|
| 55 |
- } |
|
| 56 |
- } |
|
| 57 |
- |
|
| 58 |
- return argv[optind]; |
|
| 59 |
-} |
|
| 60 |
- |
|
| 61 | 36 |
int main(int argc, char **argv) |
| 62 | 37 |
{
|
| 63 | 38 |
int i, funcNum; |
| ... | ... |
@@ -70,9 +45,7 @@ int main(int argc, char **argv) |
| 70 | 70 |
return -1; |
| 71 | 71 |
|
| 72 | 72 |
av_log(NULL, AV_LOG_INFO, "memory corruption test ...\n"); |
| 73 |
- args_parse(argc, argv); |
|
| 74 |
- av_log(NULL, AV_LOG_INFO, "CPU capabilities forced to %x\n", cpu_caps); |
|
| 75 |
- sws_rgb2rgb_init(cpu_caps); |
|
| 73 |
+ sws_rgb2rgb_init(); |
|
| 76 | 74 |
|
| 77 | 75 |
for(funcNum=0; ; funcNum++) {
|
| 78 | 76 |
struct func_info_s {
|
| ... | ... |
@@ -48,12 +48,6 @@ static const AVOption options[] = {
|
| 48 | 48 |
{ "spline", "natural bicubic spline", 0, FF_OPT_TYPE_CONST, {.dbl = SWS_SPLINE }, INT_MIN, INT_MAX, VE, "sws_flags" },
|
| 49 | 49 |
{ "print_info", "print info", 0, FF_OPT_TYPE_CONST, {.dbl = SWS_PRINT_INFO }, INT_MIN, INT_MAX, VE, "sws_flags" },
|
| 50 | 50 |
{ "accurate_rnd", "accurate rounding", 0, FF_OPT_TYPE_CONST, {.dbl = SWS_ACCURATE_RND }, INT_MIN, INT_MAX, VE, "sws_flags" },
|
| 51 |
- { "mmx", "MMX SIMD acceleration", 0, FF_OPT_TYPE_CONST, {.dbl = SWS_CPU_CAPS_MMX }, INT_MIN, INT_MAX, VE, "sws_flags" },
|
|
| 52 |
- { "mmx2", "MMX2 SIMD acceleration", 0, FF_OPT_TYPE_CONST, {.dbl = SWS_CPU_CAPS_MMX2 }, INT_MIN, INT_MAX, VE, "sws_flags" },
|
|
| 53 |
- { "sse2", "SSE2 SIMD acceleration", 0, FF_OPT_TYPE_CONST, {.dbl = SWS_CPU_CAPS_SSE2 }, INT_MIN, INT_MAX, VE, "sws_flags" },
|
|
| 54 |
- { "3dnow", "3DNOW SIMD acceleration", 0, FF_OPT_TYPE_CONST, {.dbl = SWS_CPU_CAPS_3DNOW }, INT_MIN, INT_MAX, VE, "sws_flags" },
|
|
| 55 |
- { "altivec", "AltiVec SIMD acceleration", 0, FF_OPT_TYPE_CONST, {.dbl = SWS_CPU_CAPS_ALTIVEC }, INT_MIN, INT_MAX, VE, "sws_flags" },
|
|
| 56 |
- { "bfin", "Blackfin SIMD acceleration", 0, FF_OPT_TYPE_CONST, {.dbl = SWS_CPU_CAPS_BFIN }, INT_MIN, INT_MAX, VE, "sws_flags" },
|
|
| 57 | 51 |
{ "full_chroma_int", "full chroma interpolation", 0 , FF_OPT_TYPE_CONST, {.dbl = SWS_FULL_CHR_H_INT }, INT_MIN, INT_MAX, VE, "sws_flags" },
|
| 58 | 52 |
{ "full_chroma_inp", "full chroma input", 0 , FF_OPT_TYPE_CONST, {.dbl = SWS_FULL_CHR_H_INP }, INT_MIN, INT_MAX, VE, "sws_flags" },
|
| 59 | 53 |
{ "bitexact", "", 0 , FF_OPT_TYPE_CONST, {.dbl = SWS_BITEXACT }, INT_MIN, INT_MAX, VE, "sws_flags" },
|
| ... | ... |
@@ -23,69 +23,16 @@ |
| 23 | 23 |
#include "swscale_altivec_template.c" |
| 24 | 24 |
#endif |
| 25 | 25 |
|
| 26 |
+#if COMPILE_TEMPLATE_ALTIVEC |
|
| 26 | 27 |
static inline void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, |
| 27 | 28 |
const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize, const int16_t **alpSrc, |
| 28 | 29 |
uint8_t *dest, uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, long dstW, long chrDstW) |
| 29 | 30 |
{
|
| 30 |
-#if COMPILE_TEMPLATE_ALTIVEC |
|
| 31 | 31 |
yuv2yuvX_altivec_real(lumFilter, lumSrc, lumFilterSize, |
| 32 | 32 |
chrFilter, chrSrc, chrFilterSize, |
| 33 | 33 |
dest, uDest, vDest, dstW, chrDstW); |
| 34 |
-#else //COMPILE_TEMPLATE_ALTIVEC |
|
| 35 |
- yuv2yuvXinC(lumFilter, lumSrc, lumFilterSize, |
|
| 36 |
- chrFilter, chrSrc, chrFilterSize, |
|
| 37 |
- alpSrc, dest, uDest, vDest, aDest, dstW, chrDstW); |
|
| 38 |
-#endif //!COMPILE_TEMPLATE_ALTIVEC |
|
| 39 |
-} |
|
| 40 |
- |
|
| 41 |
-static inline void RENAME(yuv2nv12X)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, |
|
| 42 |
- const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize, |
|
| 43 |
- uint8_t *dest, uint8_t *uDest, int dstW, int chrDstW, enum PixelFormat dstFormat) |
|
| 44 |
-{
|
|
| 45 |
- yuv2nv12XinC(lumFilter, lumSrc, lumFilterSize, |
|
| 46 |
- chrFilter, chrSrc, chrFilterSize, |
|
| 47 |
- dest, uDest, dstW, chrDstW, dstFormat); |
|
| 48 | 34 |
} |
| 49 | 35 |
|
| 50 |
-static inline void RENAME(yuv2yuv1)(SwsContext *c, const int16_t *lumSrc, const int16_t *chrSrc, const int16_t *alpSrc, |
|
| 51 |
- uint8_t *dest, uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, long dstW, long chrDstW) |
|
| 52 |
-{
|
|
| 53 |
- int i; |
|
| 54 |
- for (i=0; i<dstW; i++) {
|
|
| 55 |
- int val= (lumSrc[i]+64)>>7; |
|
| 56 |
- |
|
| 57 |
- if (val&256) {
|
|
| 58 |
- if (val<0) val=0; |
|
| 59 |
- else val=255; |
|
| 60 |
- } |
|
| 61 |
- |
|
| 62 |
- dest[i]= val; |
|
| 63 |
- } |
|
| 64 |
- |
|
| 65 |
- if (uDest) |
|
| 66 |
- for (i=0; i<chrDstW; i++) {
|
|
| 67 |
- int u=(chrSrc[i ]+64)>>7; |
|
| 68 |
- int v=(chrSrc[i + VOFW]+64)>>7; |
|
| 69 |
- |
|
| 70 |
- if ((u|v)&256) {
|
|
| 71 |
- if (u<0) u=0; |
|
| 72 |
- else if (u>255) u=255; |
|
| 73 |
- if (v<0) v=0; |
|
| 74 |
- else if (v>255) v=255; |
|
| 75 |
- } |
|
| 76 |
- |
|
| 77 |
- uDest[i]= u; |
|
| 78 |
- vDest[i]= v; |
|
| 79 |
- } |
|
| 80 |
- |
|
| 81 |
- if (CONFIG_SWSCALE_ALPHA && aDest) |
|
| 82 |
- for (i=0; i<dstW; i++) {
|
|
| 83 |
- int val= (alpSrc[i]+64)>>7; |
|
| 84 |
- aDest[i]= av_clip_uint8(val); |
|
| 85 |
- } |
|
| 86 |
-} |
|
| 87 |
- |
|
| 88 |
- |
|
| 89 | 36 |
/** |
| 90 | 37 |
* vertical scale YV12 to RGB |
| 91 | 38 |
*/ |
| ... | ... |
@@ -93,7 +40,6 @@ static inline void RENAME(yuv2packedX)(SwsContext *c, const int16_t *lumFilter, |
| 93 | 93 |
const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize, |
| 94 | 94 |
const int16_t **alpSrc, uint8_t *dest, long dstW, long dstY) |
| 95 | 95 |
{
|
| 96 |
-#if COMPILE_TEMPLATE_ALTIVEC |
|
| 97 | 96 |
/* The following list of supported dstFormat values should |
| 98 | 97 |
match what's found in the body of ff_yuv2packedX_altivec() */ |
| 99 | 98 |
if (!(c->flags & SWS_BITEXACT) && !c->alpPixBuf && |
| ... | ... |
@@ -104,815 +50,17 @@ static inline void RENAME(yuv2packedX)(SwsContext *c, const int16_t *lumFilter, |
| 104 | 104 |
chrFilter, chrSrc, chrFilterSize, |
| 105 | 105 |
dest, dstW, dstY); |
| 106 | 106 |
else |
| 107 |
-#endif |
|
| 108 | 107 |
yuv2packedXinC(c, lumFilter, lumSrc, lumFilterSize, |
| 109 | 108 |
chrFilter, chrSrc, chrFilterSize, |
| 110 | 109 |
alpSrc, dest, dstW, dstY); |
| 111 | 110 |
} |
| 111 |
+#endif |
|
| 112 | 112 |
|
| 113 |
-/** |
|
| 114 |
- * vertical bilinear scale YV12 to RGB |
|
| 115 |
- */ |
|
| 116 |
-static inline void RENAME(yuv2packed2)(SwsContext *c, const uint16_t *buf0, const uint16_t *buf1, const uint16_t *uvbuf0, const uint16_t *uvbuf1, |
|
| 117 |
- const uint16_t *abuf0, const uint16_t *abuf1, uint8_t *dest, int dstW, int yalpha, int uvalpha, int y) |
|
| 118 |
-{
|
|
| 119 |
- int yalpha1=4095- yalpha; |
|
| 120 |
- int uvalpha1=4095-uvalpha; |
|
| 121 |
- int i; |
|
| 122 |
- |
|
| 123 |
- YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB2_C, YSCALE_YUV_2_PACKED2_C(void,0), YSCALE_YUV_2_GRAY16_2_C, YSCALE_YUV_2_MONO2_C) |
|
| 124 |
-} |
|
| 125 |
- |
|
| 126 |
-/** |
|
| 127 |
- * YV12 to RGB without scaling or interpolating |
|
| 128 |
- */ |
|
| 129 |
-static inline void RENAME(yuv2packed1)(SwsContext *c, const uint16_t *buf0, const uint16_t *uvbuf0, const uint16_t *uvbuf1, |
|
| 130 |
- const uint16_t *abuf0, uint8_t *dest, int dstW, int uvalpha, enum PixelFormat dstFormat, int flags, int y) |
|
| 131 |
-{
|
|
| 132 |
- const int yalpha1=0; |
|
| 133 |
- int i; |
|
| 134 |
- |
|
| 135 |
- const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1 |
|
| 136 |
- const int yalpha= 4096; //FIXME ... |
|
| 137 |
- |
|
| 138 |
- if (flags&SWS_FULL_CHR_H_INT) {
|
|
| 139 |
- c->yuv2packed2(c, buf0, buf0, uvbuf0, uvbuf1, abuf0, abuf0, dest, dstW, 0, uvalpha, y); |
|
| 140 |
- return; |
|
| 141 |
- } |
|
| 142 |
- |
|
| 143 |
- if (uvalpha < 2048) {
|
|
| 144 |
- YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1_C, YSCALE_YUV_2_PACKED1_C(void,0), YSCALE_YUV_2_GRAY16_1_C, YSCALE_YUV_2_MONO2_C) |
|
| 145 |
- } else {
|
|
| 146 |
- YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1B_C, YSCALE_YUV_2_PACKED1B_C(void,0), YSCALE_YUV_2_GRAY16_1_C, YSCALE_YUV_2_MONO2_C) |
|
| 147 |
- } |
|
| 148 |
-} |
|
| 149 |
- |
|
| 150 |
-//FIXME yuy2* can read up to 7 samples too much |
|
| 151 |
- |
|
| 152 |
-static inline void RENAME(yuy2ToY)(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused) |
|
| 153 |
-{
|
|
| 154 |
- int i; |
|
| 155 |
- for (i=0; i<width; i++) |
|
| 156 |
- dst[i]= src[2*i]; |
|
| 157 |
-} |
|
| 158 |
- |
|
| 159 |
-static inline void RENAME(yuy2ToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused) |
|
| 160 |
-{
|
|
| 161 |
- int i; |
|
| 162 |
- for (i=0; i<width; i++) {
|
|
| 163 |
- dstU[i]= src1[4*i + 1]; |
|
| 164 |
- dstV[i]= src1[4*i + 3]; |
|
| 165 |
- } |
|
| 166 |
- assert(src1 == src2); |
|
| 167 |
-} |
|
| 168 |
- |
|
| 169 |
-static inline void RENAME(LEToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused) |
|
| 170 |
-{
|
|
| 171 |
- int i; |
|
| 172 |
- for (i=0; i<width; i++) {
|
|
| 173 |
- dstU[i]= src1[2*i + 1]; |
|
| 174 |
- dstV[i]= src2[2*i + 1]; |
|
| 175 |
- } |
|
| 176 |
-} |
|
| 177 |
- |
|
| 178 |
-/* This is almost identical to the previous, end exists only because |
|
| 179 |
- * yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */ |
|
| 180 |
-static inline void RENAME(uyvyToY)(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused) |
|
| 181 |
-{
|
|
| 182 |
- int i; |
|
| 183 |
- for (i=0; i<width; i++) |
|
| 184 |
- dst[i]= src[2*i+1]; |
|
| 185 |
-} |
|
| 186 |
- |
|
| 187 |
-static inline void RENAME(uyvyToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused) |
|
| 188 |
-{
|
|
| 189 |
- int i; |
|
| 190 |
- for (i=0; i<width; i++) {
|
|
| 191 |
- dstU[i]= src1[4*i + 0]; |
|
| 192 |
- dstV[i]= src1[4*i + 2]; |
|
| 193 |
- } |
|
| 194 |
- assert(src1 == src2); |
|
| 195 |
-} |
|
| 196 |
- |
|
| 197 |
-static inline void RENAME(BEToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused) |
|
| 198 |
-{
|
|
| 199 |
- int i; |
|
| 200 |
- for (i=0; i<width; i++) {
|
|
| 201 |
- dstU[i]= src1[2*i]; |
|
| 202 |
- dstV[i]= src2[2*i]; |
|
| 203 |
- } |
|
| 204 |
-} |
|
| 205 |
- |
|
| 206 |
-static inline void RENAME(nvXXtoUV)(uint8_t *dst1, uint8_t *dst2, |
|
| 207 |
- const uint8_t *src, long width) |
|
| 208 |
-{
|
|
| 209 |
- int i; |
|
| 210 |
- for (i = 0; i < width; i++) {
|
|
| 211 |
- dst1[i] = src[2*i+0]; |
|
| 212 |
- dst2[i] = src[2*i+1]; |
|
| 213 |
- } |
|
| 214 |
-} |
|
| 215 |
- |
|
| 216 |
-static inline void RENAME(nv12ToUV)(uint8_t *dstU, uint8_t *dstV, |
|
| 217 |
- const uint8_t *src1, const uint8_t *src2, |
|
| 218 |
- long width, uint32_t *unused) |
|
| 219 |
-{
|
|
| 220 |
- RENAME(nvXXtoUV)(dstU, dstV, src1, width); |
|
| 221 |
-} |
|
| 222 |
- |
|
| 223 |
-static inline void RENAME(nv21ToUV)(uint8_t *dstU, uint8_t *dstV, |
|
| 224 |
- const uint8_t *src1, const uint8_t *src2, |
|
| 225 |
- long width, uint32_t *unused) |
|
| 226 |
-{
|
|
| 227 |
- RENAME(nvXXtoUV)(dstV, dstU, src1, width); |
|
| 228 |
-} |
|
| 229 |
- |
|
| 230 |
- |
|
| 231 |
-static inline void RENAME(bgr24ToY)(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused) |
|
| 232 |
-{
|
|
| 233 |
- int i; |
|
| 234 |
- for (i=0; i<width; i++) {
|
|
| 235 |
- int b= src[i*3+0]; |
|
| 236 |
- int g= src[i*3+1]; |
|
| 237 |
- int r= src[i*3+2]; |
|
| 238 |
- |
|
| 239 |
- dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT); |
|
| 240 |
- } |
|
| 241 |
-} |
|
| 242 |
- |
|
| 243 |
-static inline void RENAME(bgr24ToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused) |
|
| 244 |
-{
|
|
| 245 |
- int i; |
|
| 246 |
- for (i=0; i<width; i++) {
|
|
| 247 |
- int b= src1[3*i + 0]; |
|
| 248 |
- int g= src1[3*i + 1]; |
|
| 249 |
- int r= src1[3*i + 2]; |
|
| 250 |
- |
|
| 251 |
- dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT; |
|
| 252 |
- dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT; |
|
| 253 |
- } |
|
| 254 |
- assert(src1 == src2); |
|
| 255 |
-} |
|
| 256 |
- |
|
| 257 |
-static inline void RENAME(bgr24ToUV_half)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused) |
|
| 258 |
-{
|
|
| 259 |
- int i; |
|
| 260 |
- for (i=0; i<width; i++) {
|
|
| 261 |
- int b= src1[6*i + 0] + src1[6*i + 3]; |
|
| 262 |
- int g= src1[6*i + 1] + src1[6*i + 4]; |
|
| 263 |
- int r= src1[6*i + 2] + src1[6*i + 5]; |
|
| 264 |
- |
|
| 265 |
- dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1); |
|
| 266 |
- dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1); |
|
| 267 |
- } |
|
| 268 |
- assert(src1 == src2); |
|
| 269 |
-} |
|
| 270 |
- |
|
| 271 |
-static inline void RENAME(rgb24ToY)(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused) |
|
| 272 |
-{
|
|
| 273 |
- int i; |
|
| 274 |
- for (i=0; i<width; i++) {
|
|
| 275 |
- int r= src[i*3+0]; |
|
| 276 |
- int g= src[i*3+1]; |
|
| 277 |
- int b= src[i*3+2]; |
|
| 278 |
- |
|
| 279 |
- dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT); |
|
| 280 |
- } |
|
| 281 |
-} |
|
| 282 |
- |
|
| 283 |
-static inline void RENAME(rgb24ToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused) |
|
| 284 |
-{
|
|
| 285 |
- int i; |
|
| 286 |
- assert(src1==src2); |
|
| 287 |
- for (i=0; i<width; i++) {
|
|
| 288 |
- int r= src1[3*i + 0]; |
|
| 289 |
- int g= src1[3*i + 1]; |
|
| 290 |
- int b= src1[3*i + 2]; |
|
| 291 |
- |
|
| 292 |
- dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT; |
|
| 293 |
- dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT; |
|
| 294 |
- } |
|
| 295 |
-} |
|
| 296 |
- |
|
| 297 |
-static inline void RENAME(rgb24ToUV_half)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused) |
|
| 298 |
-{
|
|
| 299 |
- int i; |
|
| 300 |
- assert(src1==src2); |
|
| 301 |
- for (i=0; i<width; i++) {
|
|
| 302 |
- int r= src1[6*i + 0] + src1[6*i + 3]; |
|
| 303 |
- int g= src1[6*i + 1] + src1[6*i + 4]; |
|
| 304 |
- int b= src1[6*i + 2] + src1[6*i + 5]; |
|
| 305 |
- |
|
| 306 |
- dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1); |
|
| 307 |
- dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1); |
|
| 308 |
- } |
|
| 309 |
-} |
|
| 310 |
- |
|
| 311 |
- |
|
| 312 |
-// bilinear / bicubic scaling |
|
| 313 |
-static inline void RENAME(hScale)(int16_t *dst, int dstW, const uint8_t *src, int srcW, int xInc, |
|
| 314 |
- const int16_t *filter, const int16_t *filterPos, long filterSize) |
|
| 315 |
-{
|
|
| 316 |
-#if COMPILE_TEMPLATE_ALTIVEC |
|
| 317 |
- hScale_altivec_real(dst, dstW, src, srcW, xInc, filter, filterPos, filterSize); |
|
| 318 |
-#else |
|
| 319 |
- int i; |
|
| 320 |
- for (i=0; i<dstW; i++) {
|
|
| 321 |
- int j; |
|
| 322 |
- int srcPos= filterPos[i]; |
|
| 323 |
- int val=0; |
|
| 324 |
- //printf("filterPos: %d\n", filterPos[i]);
|
|
| 325 |
- for (j=0; j<filterSize; j++) {
|
|
| 326 |
- //printf("filter: %d, src: %d\n", filter[i], src[srcPos + j]);
|
|
| 327 |
- val += ((int)src[srcPos + j])*filter[filterSize*i + j]; |
|
| 328 |
- } |
|
| 329 |
- //filter += hFilterSize; |
|
| 330 |
- dst[i] = FFMIN(val>>7, (1<<15)-1); // the cubic equation does overflow ... |
|
| 331 |
- //dst[i] = val>>7; |
|
| 332 |
- } |
|
| 333 |
-#endif /* COMPILE_TEMPLATE_ALTIVEC */ |
|
| 334 |
-} |
|
| 335 |
- |
|
| 336 |
-//FIXME all pal and rgb srcFormats could do this convertion as well |
|
| 337 |
-//FIXME all scalers more complex than bilinear could do half of this transform |
|
| 338 |
-static void RENAME(chrRangeToJpeg)(uint16_t *dst, int width) |
|
| 339 |
-{
|
|
| 340 |
- int i; |
|
| 341 |
- for (i = 0; i < width; i++) {
|
|
| 342 |
- dst[i ] = (FFMIN(dst[i ],30775)*4663 - 9289992)>>12; //-264 |
|
| 343 |
- dst[i+VOFW] = (FFMIN(dst[i+VOFW],30775)*4663 - 9289992)>>12; //-264 |
|
| 344 |
- } |
|
| 345 |
-} |
|
| 346 |
-static void RENAME(chrRangeFromJpeg)(uint16_t *dst, int width) |
|
| 347 |
-{
|
|
| 348 |
- int i; |
|
| 349 |
- for (i = 0; i < width; i++) {
|
|
| 350 |
- dst[i ] = (dst[i ]*1799 + 4081085)>>11; //1469 |
|
| 351 |
- dst[i+VOFW] = (dst[i+VOFW]*1799 + 4081085)>>11; //1469 |
|
| 352 |
- } |
|
| 353 |
-} |
|
| 354 |
-static void RENAME(lumRangeToJpeg)(uint16_t *dst, int width) |
|
| 355 |
-{
|
|
| 356 |
- int i; |
|
| 357 |
- for (i = 0; i < width; i++) |
|
| 358 |
- dst[i] = (FFMIN(dst[i],30189)*19077 - 39057361)>>14; |
|
| 359 |
-} |
|
| 360 |
-static void RENAME(lumRangeFromJpeg)(uint16_t *dst, int width) |
|
| 361 |
-{
|
|
| 362 |
- int i; |
|
| 363 |
- for (i = 0; i < width; i++) |
|
| 364 |
- dst[i] = (dst[i]*14071 + 33561947)>>14; |
|
| 365 |
-} |
|
| 366 |
- |
|
| 367 |
-static inline void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst, |
|
| 368 |
- long dstWidth, const uint8_t *src, int srcW, |
|
| 369 |
- int xInc) |
|
| 370 |
-{
|
|
| 371 |
- int i; |
|
| 372 |
- unsigned int xpos=0; |
|
| 373 |
- for (i=0;i<dstWidth;i++) {
|
|
| 374 |
- register unsigned int xx=xpos>>16; |
|
| 375 |
- register unsigned int xalpha=(xpos&0xFFFF)>>9; |
|
| 376 |
- dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha; |
|
| 377 |
- xpos+=xInc; |
|
| 378 |
- } |
|
| 379 |
-} |
|
| 380 |
- |
|
| 381 |
- // *** horizontal scale Y line to temp buffer |
|
| 382 |
-static inline void RENAME(hyscale)(SwsContext *c, uint16_t *dst, long dstWidth, const uint8_t *src, int srcW, int xInc, |
|
| 383 |
- const int16_t *hLumFilter, |
|
| 384 |
- const int16_t *hLumFilterPos, int hLumFilterSize, |
|
| 385 |
- uint8_t *formatConvBuffer, |
|
| 386 |
- uint32_t *pal, int isAlpha) |
|
| 387 |
-{
|
|
| 388 |
- void (*toYV12)(uint8_t *, const uint8_t *, long, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12; |
|
| 389 |
- void (*convertRange)(int16_t *, int) = isAlpha ? NULL : c->lumConvertRange; |
|
| 390 |
- |
|
| 391 |
- src += isAlpha ? c->alpSrcOffset : c->lumSrcOffset; |
|
| 392 |
- |
|
| 393 |
- if (toYV12) {
|
|
| 394 |
- toYV12(formatConvBuffer, src, srcW, pal); |
|
| 395 |
- src= formatConvBuffer; |
|
| 396 |
- } |
|
| 397 |
- |
|
| 398 |
- if (c->hScale16) {
|
|
| 399 |
- c->hScale16(dst, dstWidth, (uint16_t*)src, srcW, xInc, hLumFilter, hLumFilterPos, hLumFilterSize, av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1); |
|
| 400 |
- } else if (!c->hyscale_fast) {
|
|
| 401 |
- c->hScale(dst, dstWidth, src, srcW, xInc, hLumFilter, hLumFilterPos, hLumFilterSize); |
|
| 402 |
- } else { // fast bilinear upscale / crap downscale
|
|
| 403 |
- c->hyscale_fast(c, dst, dstWidth, src, srcW, xInc); |
|
| 404 |
- } |
|
| 405 |
- |
|
| 406 |
- if (convertRange) |
|
| 407 |
- convertRange(dst, dstWidth); |
|
| 408 |
-} |
|
| 409 |
- |
|
| 410 |
-static inline void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst, |
|
| 411 |
- long dstWidth, const uint8_t *src1, |
|
| 412 |
- const uint8_t *src2, int srcW, int xInc) |
|
| 413 |
-{
|
|
| 414 |
- int i; |
|
| 415 |
- unsigned int xpos=0; |
|
| 416 |
- for (i=0;i<dstWidth;i++) {
|
|
| 417 |
- register unsigned int xx=xpos>>16; |
|
| 418 |
- register unsigned int xalpha=(xpos&0xFFFF)>>9; |
|
| 419 |
- dst[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha); |
|
| 420 |
- dst[i+VOFW]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha); |
|
| 421 |
- /* slower |
|
| 422 |
- dst[i]= (src1[xx]<<7) + (src1[xx+1] - src1[xx])*xalpha; |
|
| 423 |
- dst[i+VOFW]=(src2[xx]<<7) + (src2[xx+1] - src2[xx])*xalpha; |
|
| 424 |
- */ |
|
| 425 |
- xpos+=xInc; |
|
| 426 |
- } |
|
| 427 |
-} |
|
| 428 |
- |
|
| 429 |
-inline static void RENAME(hcscale)(SwsContext *c, uint16_t *dst, long dstWidth, const uint8_t *src1, const uint8_t *src2, |
|
| 430 |
- int srcW, int xInc, const int16_t *hChrFilter, |
|
| 431 |
- const int16_t *hChrFilterPos, int hChrFilterSize, |
|
| 432 |
- uint8_t *formatConvBuffer, |
|
| 433 |
- uint32_t *pal) |
|
| 434 |
-{
|
|
| 435 |
- |
|
| 436 |
- src1 += c->chrSrcOffset; |
|
| 437 |
- src2 += c->chrSrcOffset; |
|
| 438 |
- |
|
| 439 |
- if (c->chrToYV12) {
|
|
| 440 |
- c->chrToYV12(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal); |
|
| 441 |
- src1= formatConvBuffer; |
|
| 442 |
- src2= formatConvBuffer+VOFW; |
|
| 443 |
- } |
|
| 444 |
- |
|
| 445 |
- if (c->hScale16) {
|
|
| 446 |
- c->hScale16(dst , dstWidth, (uint16_t*)src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize, av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1); |
|
| 447 |
- c->hScale16(dst+VOFW, dstWidth, (uint16_t*)src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize, av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1); |
|
| 448 |
- } else if (!c->hcscale_fast) {
|
|
| 449 |
- c->hScale(dst , dstWidth, src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize); |
|
| 450 |
- c->hScale(dst+VOFW, dstWidth, src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize); |
|
| 451 |
- } else { // fast bilinear upscale / crap downscale
|
|
| 452 |
- c->hcscale_fast(c, dst, dstWidth, src1, src2, srcW, xInc); |
|
| 453 |
- } |
|
| 454 |
- |
|
| 455 |
- if (c->chrConvertRange) |
|
| 456 |
- c->chrConvertRange(dst, dstWidth); |
|
| 457 |
-} |
|
| 458 |
- |
|
| 459 |
-#define DEBUG_SWSCALE_BUFFERS 0 |
|
| 460 |
-#define DEBUG_BUFFERS(...) if (DEBUG_SWSCALE_BUFFERS) av_log(c, AV_LOG_DEBUG, __VA_ARGS__) |
|
| 461 |
- |
|
| 462 |
-static int RENAME(swScale)(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY, |
|
| 463 |
- int srcSliceH, uint8_t* dst[], int dstStride[]) |
|
| 464 |
-{
|
|
| 465 |
- /* load a few things into local vars to make the code more readable? and faster */ |
|
| 466 |
- const int srcW= c->srcW; |
|
| 467 |
- const int dstW= c->dstW; |
|
| 468 |
- const int dstH= c->dstH; |
|
| 469 |
- const int chrDstW= c->chrDstW; |
|
| 470 |
- const int chrSrcW= c->chrSrcW; |
|
| 471 |
- const int lumXInc= c->lumXInc; |
|
| 472 |
- const int chrXInc= c->chrXInc; |
|
| 473 |
- const enum PixelFormat dstFormat= c->dstFormat; |
|
| 474 |
- const int flags= c->flags; |
|
| 475 |
- int16_t *vLumFilterPos= c->vLumFilterPos; |
|
| 476 |
- int16_t *vChrFilterPos= c->vChrFilterPos; |
|
| 477 |
- int16_t *hLumFilterPos= c->hLumFilterPos; |
|
| 478 |
- int16_t *hChrFilterPos= c->hChrFilterPos; |
|
| 479 |
- int16_t *vLumFilter= c->vLumFilter; |
|
| 480 |
- int16_t *vChrFilter= c->vChrFilter; |
|
| 481 |
- int16_t *hLumFilter= c->hLumFilter; |
|
| 482 |
- int16_t *hChrFilter= c->hChrFilter; |
|
| 483 |
- int32_t *lumMmxFilter= c->lumMmxFilter; |
|
| 484 |
- int32_t *chrMmxFilter= c->chrMmxFilter; |
|
| 485 |
- int32_t av_unused *alpMmxFilter= c->alpMmxFilter; |
|
| 486 |
- const int vLumFilterSize= c->vLumFilterSize; |
|
| 487 |
- const int vChrFilterSize= c->vChrFilterSize; |
|
| 488 |
- const int hLumFilterSize= c->hLumFilterSize; |
|
| 489 |
- const int hChrFilterSize= c->hChrFilterSize; |
|
| 490 |
- int16_t **lumPixBuf= c->lumPixBuf; |
|
| 491 |
- int16_t **chrPixBuf= c->chrPixBuf; |
|
| 492 |
- int16_t **alpPixBuf= c->alpPixBuf; |
|
| 493 |
- const int vLumBufSize= c->vLumBufSize; |
|
| 494 |
- const int vChrBufSize= c->vChrBufSize; |
|
| 495 |
- uint8_t *formatConvBuffer= c->formatConvBuffer; |
|
| 496 |
- const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample; |
|
| 497 |
- const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample); |
|
| 498 |
- int lastDstY; |
|
| 499 |
- uint32_t *pal=c->pal_yuv; |
|
| 500 |
- |
|
| 501 |
- /* vars which will change and which we need to store back in the context */ |
|
| 502 |
- int dstY= c->dstY; |
|
| 503 |
- int lumBufIndex= c->lumBufIndex; |
|
| 504 |
- int chrBufIndex= c->chrBufIndex; |
|
| 505 |
- int lastInLumBuf= c->lastInLumBuf; |
|
| 506 |
- int lastInChrBuf= c->lastInChrBuf; |
|
| 507 |
- |
|
| 508 |
- if (isPacked(c->srcFormat)) {
|
|
| 509 |
- src[0]= |
|
| 510 |
- src[1]= |
|
| 511 |
- src[2]= |
|
| 512 |
- src[3]= src[0]; |
|
| 513 |
- srcStride[0]= |
|
| 514 |
- srcStride[1]= |
|
| 515 |
- srcStride[2]= |
|
| 516 |
- srcStride[3]= srcStride[0]; |
|
| 517 |
- } |
|
| 518 |
- srcStride[1]<<= c->vChrDrop; |
|
| 519 |
- srcStride[2]<<= c->vChrDrop; |
|
| 520 |
- |
|
| 521 |
- DEBUG_BUFFERS("swScale() %p[%d] %p[%d] %p[%d] %p[%d] -> %p[%d] %p[%d] %p[%d] %p[%d]\n",
|
|
| 522 |
- src[0], srcStride[0], src[1], srcStride[1], src[2], srcStride[2], src[3], srcStride[3], |
|
| 523 |
- dst[0], dstStride[0], dst[1], dstStride[1], dst[2], dstStride[2], dst[3], dstStride[3]); |
|
| 524 |
- DEBUG_BUFFERS("srcSliceY: %d srcSliceH: %d dstY: %d dstH: %d\n",
|
|
| 525 |
- srcSliceY, srcSliceH, dstY, dstH); |
|
| 526 |
- DEBUG_BUFFERS("vLumFilterSize: %d vLumBufSize: %d vChrFilterSize: %d vChrBufSize: %d\n",
|
|
| 527 |
- vLumFilterSize, vLumBufSize, vChrFilterSize, vChrBufSize); |
|
| 528 |
- |
|
| 529 |
- if (dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0 || dstStride[3]%8 != 0) {
|
|
| 530 |
- static int warnedAlready=0; //FIXME move this into the context perhaps |
|
| 531 |
- if (flags & SWS_PRINT_INFO && !warnedAlready) {
|
|
| 532 |
- av_log(c, AV_LOG_WARNING, "Warning: dstStride is not aligned!\n" |
|
| 533 |
- " ->cannot do aligned memory accesses anymore\n"); |
|
| 534 |
- warnedAlready=1; |
|
| 535 |
- } |
|
| 536 |
- } |
|
| 537 |
- |
|
| 538 |
- /* Note the user might start scaling the picture in the middle so this |
|
| 539 |
- will not get executed. This is not really intended but works |
|
| 540 |
- currently, so people might do it. */ |
|
| 541 |
- if (srcSliceY ==0) {
|
|
| 542 |
- lumBufIndex=-1; |
|
| 543 |
- chrBufIndex=-1; |
|
| 544 |
- dstY=0; |
|
| 545 |
- lastInLumBuf= -1; |
|
| 546 |
- lastInChrBuf= -1; |
|
| 547 |
- } |
|
| 548 |
- |
|
| 549 |
- lastDstY= dstY; |
|
| 550 |
- |
|
| 551 |
- for (;dstY < dstH; dstY++) {
|
|
| 552 |
- unsigned char *dest =dst[0]+dstStride[0]*dstY; |
|
| 553 |
- const int chrDstY= dstY>>c->chrDstVSubSample; |
|
| 554 |
- unsigned char *uDest=dst[1]+dstStride[1]*chrDstY; |
|
| 555 |
- unsigned char *vDest=dst[2]+dstStride[2]*chrDstY; |
|
| 556 |
- unsigned char *aDest=(CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3]+dstStride[3]*dstY : NULL; |
|
| 557 |
- |
|
| 558 |
- const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input |
|
| 559 |
- const int firstLumSrcY2= vLumFilterPos[FFMIN(dstY | ((1<<c->chrDstVSubSample) - 1), dstH-1)]; |
|
| 560 |
- const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input |
|
| 561 |
- int lastLumSrcY= firstLumSrcY + vLumFilterSize -1; // Last line needed as input |
|
| 562 |
- int lastLumSrcY2=firstLumSrcY2+ vLumFilterSize -1; // Last line needed as input |
|
| 563 |
- int lastChrSrcY= firstChrSrcY + vChrFilterSize -1; // Last line needed as input |
|
| 564 |
- int enough_lines; |
|
| 565 |
- |
|
| 566 |
- //handle holes (FAST_BILINEAR & weird filters) |
|
| 567 |
- if (firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1; |
|
| 568 |
- if (firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1; |
|
| 569 |
- assert(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1); |
|
| 570 |
- assert(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1); |
|
| 571 |
- |
|
| 572 |
- DEBUG_BUFFERS("dstY: %d\n", dstY);
|
|
| 573 |
- DEBUG_BUFFERS("\tfirstLumSrcY: %d lastLumSrcY: %d lastInLumBuf: %d\n",
|
|
| 574 |
- firstLumSrcY, lastLumSrcY, lastInLumBuf); |
|
| 575 |
- DEBUG_BUFFERS("\tfirstChrSrcY: %d lastChrSrcY: %d lastInChrBuf: %d\n",
|
|
| 576 |
- firstChrSrcY, lastChrSrcY, lastInChrBuf); |
|
| 577 |
- |
|
| 578 |
- // Do we have enough lines in this slice to output the dstY line |
|
| 579 |
- enough_lines = lastLumSrcY2 < srcSliceY + srcSliceH && lastChrSrcY < -((-srcSliceY - srcSliceH)>>c->chrSrcVSubSample); |
|
| 580 |
- |
|
| 581 |
- if (!enough_lines) {
|
|
| 582 |
- lastLumSrcY = srcSliceY + srcSliceH - 1; |
|
| 583 |
- lastChrSrcY = chrSrcSliceY + chrSrcSliceH - 1; |
|
| 584 |
- DEBUG_BUFFERS("buffering slice: lastLumSrcY %d lastChrSrcY %d\n",
|
|
| 585 |
- lastLumSrcY, lastChrSrcY); |
|
| 586 |
- } |
|
| 587 |
- |
|
| 588 |
- //Do horizontal scaling |
|
| 589 |
- while(lastInLumBuf < lastLumSrcY) {
|
|
| 590 |
- const uint8_t *src1= src[0]+(lastInLumBuf + 1 - srcSliceY)*srcStride[0]; |
|
| 591 |
- const uint8_t *src2= src[3]+(lastInLumBuf + 1 - srcSliceY)*srcStride[3]; |
|
| 592 |
- lumBufIndex++; |
|
| 593 |
- assert(lumBufIndex < 2*vLumBufSize); |
|
| 594 |
- assert(lastInLumBuf + 1 - srcSliceY < srcSliceH); |
|
| 595 |
- assert(lastInLumBuf + 1 - srcSliceY >= 0); |
|
| 596 |
- RENAME(hyscale)(c, lumPixBuf[ lumBufIndex ], dstW, src1, srcW, lumXInc, |
|
| 597 |
- hLumFilter, hLumFilterPos, hLumFilterSize, |
|
| 598 |
- formatConvBuffer, |
|
| 599 |
- pal, 0); |
|
| 600 |
- if (CONFIG_SWSCALE_ALPHA && alpPixBuf) |
|
| 601 |
- RENAME(hyscale)(c, alpPixBuf[ lumBufIndex ], dstW, src2, srcW, lumXInc, |
|
| 602 |
- hLumFilter, hLumFilterPos, hLumFilterSize, |
|
| 603 |
- formatConvBuffer, |
|
| 604 |
- pal, 1); |
|
| 605 |
- lastInLumBuf++; |
|
| 606 |
- DEBUG_BUFFERS("\t\tlumBufIndex %d: lastInLumBuf: %d\n",
|
|
| 607 |
- lumBufIndex, lastInLumBuf); |
|
| 608 |
- } |
|
| 609 |
- while(lastInChrBuf < lastChrSrcY) {
|
|
| 610 |
- const uint8_t *src1= src[1]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[1]; |
|
| 611 |
- const uint8_t *src2= src[2]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[2]; |
|
| 612 |
- chrBufIndex++; |
|
| 613 |
- assert(chrBufIndex < 2*vChrBufSize); |
|
| 614 |
- assert(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH)); |
|
| 615 |
- assert(lastInChrBuf + 1 - chrSrcSliceY >= 0); |
|
| 616 |
- //FIXME replace parameters through context struct (some at least) |
|
| 617 |
- |
|
| 618 |
- if (c->needs_hcscale) |
|
| 619 |
- RENAME(hcscale)(c, chrPixBuf[ chrBufIndex ], chrDstW, src1, src2, chrSrcW, chrXInc, |
|
| 620 |
- hChrFilter, hChrFilterPos, hChrFilterSize, |
|
| 621 |
- formatConvBuffer, |
|
| 622 |
- pal); |
|
| 623 |
- lastInChrBuf++; |
|
| 624 |
- DEBUG_BUFFERS("\t\tchrBufIndex %d: lastInChrBuf: %d\n",
|
|
| 625 |
- chrBufIndex, lastInChrBuf); |
|
| 626 |
- } |
|
| 627 |
- //wrap buf index around to stay inside the ring buffer |
|
| 628 |
- if (lumBufIndex >= vLumBufSize) lumBufIndex-= vLumBufSize; |
|
| 629 |
- if (chrBufIndex >= vChrBufSize) chrBufIndex-= vChrBufSize; |
|
| 630 |
- if (!enough_lines) |
|
| 631 |
- break; //we can't output a dstY line so let's try with the next slice |
|
| 632 |
- |
|
| 633 |
- if (dstY < dstH-2) {
|
|
| 634 |
- const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize; |
|
| 635 |
- const int16_t **chrSrcPtr= (const int16_t **) chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize; |
|
| 636 |
- const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL; |
|
| 637 |
- if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) {
|
|
| 638 |
- const int chrSkipMask= (1<<c->chrDstVSubSample)-1; |
|
| 639 |
- if (dstY&chrSkipMask) uDest= NULL; //FIXME split functions in lumi / chromi |
|
| 640 |
- c->yuv2nv12X(c, |
|
| 641 |
- vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize, |
|
| 642 |
- vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, |
|
| 643 |
- dest, uDest, dstW, chrDstW, dstFormat); |
|
| 644 |
- } else if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like
|
|
| 645 |
- const int chrSkipMask= (1<<c->chrDstVSubSample)-1; |
|
| 646 |
- if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi |
|
| 647 |
- if (is16BPS(dstFormat) || isNBPS(dstFormat)) {
|
|
| 648 |
- yuv2yuvX16inC( |
|
| 649 |
- vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize, |
|
| 650 |
- vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, |
|
| 651 |
- alpSrcPtr, (uint16_t *) dest, (uint16_t *) uDest, (uint16_t *) vDest, (uint16_t *) aDest, dstW, chrDstW, |
|
| 652 |
- dstFormat); |
|
| 653 |
- } else if (vLumFilterSize == 1 && vChrFilterSize == 1) { // unscaled YV12
|
|
| 654 |
- const int16_t *lumBuf = lumSrcPtr[0]; |
|
| 655 |
- const int16_t *chrBuf= chrSrcPtr[0]; |
|
| 656 |
- const int16_t *alpBuf= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? alpSrcPtr[0] : NULL; |
|
| 657 |
- c->yuv2yuv1(c, lumBuf, chrBuf, alpBuf, dest, uDest, vDest, aDest, dstW, chrDstW); |
|
| 658 |
- } else { //General YV12
|
|
| 659 |
- c->yuv2yuvX(c, |
|
| 660 |
- vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize, |
|
| 661 |
- vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, |
|
| 662 |
- alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW); |
|
| 663 |
- } |
|
| 664 |
- } else {
|
|
| 665 |
- assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2); |
|
| 666 |
- assert(chrSrcPtr + vChrFilterSize - 1 < chrPixBuf + vChrBufSize*2); |
|
| 667 |
- if (vLumFilterSize == 1 && vChrFilterSize == 2) { //unscaled RGB
|
|
| 668 |
- int chrAlpha= vChrFilter[2*dstY+1]; |
|
| 669 |
- if(flags & SWS_FULL_CHR_H_INT) {
|
|
| 670 |
- yuv2rgbXinC_full(c, //FIXME write a packed1_full function |
|
| 671 |
- vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize, |
|
| 672 |
- vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, |
|
| 673 |
- alpSrcPtr, dest, dstW, dstY); |
|
| 674 |
- } else {
|
|
| 675 |
- c->yuv2packed1(c, *lumSrcPtr, *chrSrcPtr, *(chrSrcPtr+1), |
|
| 676 |
- alpPixBuf ? *alpSrcPtr : NULL, |
|
| 677 |
- dest, dstW, chrAlpha, dstFormat, flags, dstY); |
|
| 678 |
- } |
|
| 679 |
- } else if (vLumFilterSize == 2 && vChrFilterSize == 2) { //bilinear upscale RGB
|
|
| 680 |
- int lumAlpha= vLumFilter[2*dstY+1]; |
|
| 681 |
- int chrAlpha= vChrFilter[2*dstY+1]; |
|
| 682 |
- lumMmxFilter[2]= |
|
| 683 |
- lumMmxFilter[3]= vLumFilter[2*dstY ]*0x10001; |
|
| 684 |
- chrMmxFilter[2]= |
|
| 685 |
- chrMmxFilter[3]= vChrFilter[2*chrDstY]*0x10001; |
|
| 686 |
- if(flags & SWS_FULL_CHR_H_INT) {
|
|
| 687 |
- yuv2rgbXinC_full(c, //FIXME write a packed2_full function |
|
| 688 |
- vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize, |
|
| 689 |
- vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, |
|
| 690 |
- alpSrcPtr, dest, dstW, dstY); |
|
| 691 |
- } else {
|
|
| 692 |
- c->yuv2packed2(c, *lumSrcPtr, *(lumSrcPtr+1), *chrSrcPtr, *(chrSrcPtr+1), |
|
| 693 |
- alpPixBuf ? *alpSrcPtr : NULL, alpPixBuf ? *(alpSrcPtr+1) : NULL, |
|
| 694 |
- dest, dstW, lumAlpha, chrAlpha, dstY); |
|
| 695 |
- } |
|
| 696 |
- } else { //general RGB
|
|
| 697 |
- if(flags & SWS_FULL_CHR_H_INT) {
|
|
| 698 |
- yuv2rgbXinC_full(c, |
|
| 699 |
- vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize, |
|
| 700 |
- vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, |
|
| 701 |
- alpSrcPtr, dest, dstW, dstY); |
|
| 702 |
- } else {
|
|
| 703 |
- c->yuv2packedX(c, |
|
| 704 |
- vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize, |
|
| 705 |
- vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, |
|
| 706 |
- alpSrcPtr, dest, dstW, dstY); |
|
| 707 |
- } |
|
| 708 |
- } |
|
| 709 |
- } |
|
| 710 |
- } else { // hmm looks like we can't use MMX here without overwriting this array's tail
|
|
| 711 |
- const int16_t **lumSrcPtr= (const int16_t **)lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize; |
|
| 712 |
- const int16_t **chrSrcPtr= (const int16_t **)chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize; |
|
| 713 |
- const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **)alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL; |
|
| 714 |
- if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) {
|
|
| 715 |
- const int chrSkipMask= (1<<c->chrDstVSubSample)-1; |
|
| 716 |
- if (dstY&chrSkipMask) uDest= NULL; //FIXME split functions in lumi / chromi |
|
| 717 |
- yuv2nv12XinC( |
|
| 718 |
- vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize, |
|
| 719 |
- vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, |
|
| 720 |
- dest, uDest, dstW, chrDstW, dstFormat); |
|
| 721 |
- } else if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12
|
|
| 722 |
- const int chrSkipMask= (1<<c->chrDstVSubSample)-1; |
|
| 723 |
- if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi |
|
| 724 |
- if (is16BPS(dstFormat) || isNBPS(dstFormat)) {
|
|
| 725 |
- yuv2yuvX16inC( |
|
| 726 |
- vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize, |
|
| 727 |
- vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, |
|
| 728 |
- alpSrcPtr, (uint16_t *) dest, (uint16_t *) uDest, (uint16_t *) vDest, (uint16_t *) aDest, dstW, chrDstW, |
|
| 729 |
- dstFormat); |
|
| 730 |
- } else {
|
|
| 731 |
- yuv2yuvXinC( |
|
| 732 |
- vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize, |
|
| 733 |
- vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, |
|
| 734 |
- alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW); |
|
| 735 |
- } |
|
| 736 |
- } else {
|
|
| 737 |
- assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2); |
|
| 738 |
- assert(chrSrcPtr + vChrFilterSize - 1 < chrPixBuf + vChrBufSize*2); |
|
| 739 |
- if(flags & SWS_FULL_CHR_H_INT) {
|
|
| 740 |
- yuv2rgbXinC_full(c, |
|
| 741 |
- vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize, |
|
| 742 |
- vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, |
|
| 743 |
- alpSrcPtr, dest, dstW, dstY); |
|
| 744 |
- } else {
|
|
| 745 |
- yuv2packedXinC(c, |
|
| 746 |
- vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize, |
|
| 747 |
- vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, |
|
| 748 |
- alpSrcPtr, dest, dstW, dstY); |
|
| 749 |
- } |
|
| 750 |
- } |
|
| 751 |
- } |
|
| 752 |
- } |
|
| 753 |
- |
|
| 754 |
- if ((dstFormat == PIX_FMT_YUVA420P) && !alpPixBuf) |
|
| 755 |
- fillPlane(dst[3], dstStride[3], dstW, dstY-lastDstY, lastDstY, 255); |
|
| 756 |
- |
|
| 757 |
- /* store changed local vars back in the context */ |
|
| 758 |
- c->dstY= dstY; |
|
| 759 |
- c->lumBufIndex= lumBufIndex; |
|
| 760 |
- c->chrBufIndex= chrBufIndex; |
|
| 761 |
- c->lastInLumBuf= lastInLumBuf; |
|
| 762 |
- c->lastInChrBuf= lastInChrBuf; |
|
| 763 |
- |
|
| 764 |
- return dstY - lastDstY; |
|
| 765 |
-} |
|
| 766 | 113 |
|
| 767 | 114 |
static void RENAME(sws_init_swScale)(SwsContext *c) |
| 768 | 115 |
{
|
| 769 | 116 |
enum PixelFormat srcFormat = c->srcFormat; |
| 770 | 117 |
|
| 771 |
- c->yuv2nv12X = RENAME(yuv2nv12X ); |
|
| 772 |
- c->yuv2yuv1 = RENAME(yuv2yuv1 ); |
|
| 773 | 118 |
c->yuv2yuvX = RENAME(yuv2yuvX ); |
| 774 |
- c->yuv2packed1 = RENAME(yuv2packed1 ); |
|
| 775 |
- c->yuv2packed2 = RENAME(yuv2packed2 ); |
|
| 776 | 119 |
c->yuv2packedX = RENAME(yuv2packedX ); |
| 777 |
- |
|
| 778 |
- c->hScale = RENAME(hScale ); |
|
| 779 |
- |
|
| 780 |
- if (c->flags & SWS_FAST_BILINEAR) |
|
| 781 |
- {
|
|
| 782 |
- c->hyscale_fast = RENAME(hyscale_fast); |
|
| 783 |
- c->hcscale_fast = RENAME(hcscale_fast); |
|
| 784 |
- } |
|
| 785 |
- |
|
| 786 |
- c->chrToYV12 = NULL; |
|
| 787 |
- switch(srcFormat) {
|
|
| 788 |
- case PIX_FMT_YUYV422 : c->chrToYV12 = RENAME(yuy2ToUV); break; |
|
| 789 |
- case PIX_FMT_UYVY422 : c->chrToYV12 = RENAME(uyvyToUV); break; |
|
| 790 |
- case PIX_FMT_NV12 : c->chrToYV12 = RENAME(nv12ToUV); break; |
|
| 791 |
- case PIX_FMT_NV21 : c->chrToYV12 = RENAME(nv21ToUV); break; |
|
| 792 |
- case PIX_FMT_RGB8 : |
|
| 793 |
- case PIX_FMT_BGR8 : |
|
| 794 |
- case PIX_FMT_PAL8 : |
|
| 795 |
- case PIX_FMT_BGR4_BYTE: |
|
| 796 |
- case PIX_FMT_RGB4_BYTE: c->chrToYV12 = palToUV; break; |
|
| 797 |
- case PIX_FMT_GRAY16BE : |
|
| 798 |
- case PIX_FMT_YUV420P9BE: |
|
| 799 |
- case PIX_FMT_YUV422P10BE: |
|
| 800 |
- case PIX_FMT_YUV420P10BE: |
|
| 801 |
- case PIX_FMT_YUV420P16BE: |
|
| 802 |
- case PIX_FMT_YUV422P16BE: |
|
| 803 |
- case PIX_FMT_YUV444P16BE: c->hScale16= HAVE_BIGENDIAN ? RENAME(hScale16) : RENAME(hScale16X); break; |
|
| 804 |
- case PIX_FMT_GRAY16LE : |
|
| 805 |
- case PIX_FMT_YUV420P9LE: |
|
| 806 |
- case PIX_FMT_YUV422P10LE: |
|
| 807 |
- case PIX_FMT_YUV420P10LE: |
|
| 808 |
- case PIX_FMT_YUV420P16LE: |
|
| 809 |
- case PIX_FMT_YUV422P16LE: |
|
| 810 |
- case PIX_FMT_YUV444P16LE: c->hScale16= HAVE_BIGENDIAN ? RENAME(hScale16X) : RENAME(hScale16); break; |
|
| 811 |
- } |
|
| 812 |
- if (c->chrSrcHSubSample) {
|
|
| 813 |
- switch(srcFormat) {
|
|
| 814 |
- case PIX_FMT_RGB48BE: |
|
| 815 |
- case PIX_FMT_RGB48LE: c->chrToYV12 = rgb48ToUV_half; break; |
|
| 816 |
- case PIX_FMT_BGR48BE: |
|
| 817 |
- case PIX_FMT_BGR48LE: c->chrToYV12 = bgr48ToUV_half; break; |
|
| 818 |
- case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_half; break; |
|
| 819 |
- case PIX_FMT_RGB32_1: c->chrToYV12 = bgr321ToUV_half; break; |
|
| 820 |
- case PIX_FMT_BGR24 : c->chrToYV12 = RENAME(bgr24ToUV_half); break; |
|
| 821 |
- case PIX_FMT_BGR565 : c->chrToYV12 = bgr16ToUV_half; break; |
|
| 822 |
- case PIX_FMT_BGR555 : c->chrToYV12 = bgr15ToUV_half; break; |
|
| 823 |
- case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_half; break; |
|
| 824 |
- case PIX_FMT_BGR32_1: c->chrToYV12 = rgb321ToUV_half; break; |
|
| 825 |
- case PIX_FMT_RGB24 : c->chrToYV12 = RENAME(rgb24ToUV_half); break; |
|
| 826 |
- case PIX_FMT_RGB565 : c->chrToYV12 = rgb16ToUV_half; break; |
|
| 827 |
- case PIX_FMT_RGB555 : c->chrToYV12 = rgb15ToUV_half; break; |
|
| 828 |
- } |
|
| 829 |
- } else {
|
|
| 830 |
- switch(srcFormat) {
|
|
| 831 |
- case PIX_FMT_RGB48BE: |
|
| 832 |
- case PIX_FMT_RGB48LE: c->chrToYV12 = rgb48ToUV; break; |
|
| 833 |
- case PIX_FMT_BGR48BE: |
|
| 834 |
- case PIX_FMT_BGR48LE: c->chrToYV12 = bgr48ToUV; break; |
|
| 835 |
- case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV; break; |
|
| 836 |
- case PIX_FMT_RGB32_1: c->chrToYV12 = bgr321ToUV; break; |
|
| 837 |
- case PIX_FMT_BGR24 : c->chrToYV12 = RENAME(bgr24ToUV); break; |
|
| 838 |
- case PIX_FMT_BGR565 : c->chrToYV12 = bgr16ToUV; break; |
|
| 839 |
- case PIX_FMT_BGR555 : c->chrToYV12 = bgr15ToUV; break; |
|
| 840 |
- case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV; break; |
|
| 841 |
- case PIX_FMT_BGR32_1: c->chrToYV12 = rgb321ToUV; break; |
|
| 842 |
- case PIX_FMT_RGB24 : c->chrToYV12 = RENAME(rgb24ToUV); break; |
|
| 843 |
- case PIX_FMT_RGB565 : c->chrToYV12 = rgb16ToUV; break; |
|
| 844 |
- case PIX_FMT_RGB555 : c->chrToYV12 = rgb15ToUV; break; |
|
| 845 |
- } |
|
| 846 |
- } |
|
| 847 |
- |
|
| 848 |
- c->lumToYV12 = NULL; |
|
| 849 |
- c->alpToYV12 = NULL; |
|
| 850 |
- switch (srcFormat) {
|
|
| 851 |
- case PIX_FMT_YUYV422 : |
|
| 852 |
- case PIX_FMT_GRAY8A : |
|
| 853 |
- c->lumToYV12 = RENAME(yuy2ToY); break; |
|
| 854 |
- case PIX_FMT_UYVY422 : |
|
| 855 |
- c->lumToYV12 = RENAME(uyvyToY); break; |
|
| 856 |
- case PIX_FMT_BGR24 : c->lumToYV12 = RENAME(bgr24ToY); break; |
|
| 857 |
- case PIX_FMT_BGR565 : c->lumToYV12 = bgr16ToY; break; |
|
| 858 |
- case PIX_FMT_BGR555 : c->lumToYV12 = bgr15ToY; break; |
|
| 859 |
- case PIX_FMT_RGB24 : c->lumToYV12 = RENAME(rgb24ToY); break; |
|
| 860 |
- case PIX_FMT_RGB565 : c->lumToYV12 = rgb16ToY; break; |
|
| 861 |
- case PIX_FMT_RGB555 : c->lumToYV12 = rgb15ToY; break; |
|
| 862 |
- case PIX_FMT_RGB8 : |
|
| 863 |
- case PIX_FMT_BGR8 : |
|
| 864 |
- case PIX_FMT_PAL8 : |
|
| 865 |
- case PIX_FMT_BGR4_BYTE: |
|
| 866 |
- case PIX_FMT_RGB4_BYTE: c->lumToYV12 = palToY; break; |
|
| 867 |
- case PIX_FMT_MONOBLACK: c->lumToYV12 = monoblack2Y; break; |
|
| 868 |
- case PIX_FMT_MONOWHITE: c->lumToYV12 = monowhite2Y; break; |
|
| 869 |
- case PIX_FMT_RGB32 : c->lumToYV12 = bgr32ToY; break; |
|
| 870 |
- case PIX_FMT_RGB32_1: c->lumToYV12 = bgr321ToY; break; |
|
| 871 |
- case PIX_FMT_BGR32 : c->lumToYV12 = rgb32ToY; break; |
|
| 872 |
- case PIX_FMT_BGR32_1: c->lumToYV12 = rgb321ToY; break; |
|
| 873 |
- case PIX_FMT_RGB48BE: |
|
| 874 |
- case PIX_FMT_RGB48LE: c->lumToYV12 = rgb48ToY; break; |
|
| 875 |
- case PIX_FMT_BGR48BE: |
|
| 876 |
- case PIX_FMT_BGR48LE: c->lumToYV12 = bgr48ToY; break; |
|
| 877 |
- } |
|
| 878 |
- if (c->alpPixBuf) {
|
|
| 879 |
- switch (srcFormat) {
|
|
| 880 |
- case PIX_FMT_RGB32 : |
|
| 881 |
- case PIX_FMT_RGB32_1: |
|
| 882 |
- case PIX_FMT_BGR32 : |
|
| 883 |
- case PIX_FMT_BGR32_1: c->alpToYV12 = abgrToA; break; |
|
| 884 |
- case PIX_FMT_GRAY8A : c->alpToYV12 = RENAME(yuy2ToY); break; |
|
| 885 |
- case PIX_FMT_PAL8 : c->alpToYV12 = palToA; break; |
|
| 886 |
- } |
|
| 887 |
- } |
|
| 888 |
- |
|
| 889 |
- switch (srcFormat) {
|
|
| 890 |
- case PIX_FMT_GRAY8A : |
|
| 891 |
- c->alpSrcOffset = 1; |
|
| 892 |
- break; |
|
| 893 |
- case PIX_FMT_RGB32 : |
|
| 894 |
- case PIX_FMT_BGR32 : |
|
| 895 |
- c->alpSrcOffset = 3; |
|
| 896 |
- break; |
|
| 897 |
- case PIX_FMT_RGB48LE: |
|
| 898 |
- case PIX_FMT_BGR48LE: |
|
| 899 |
- c->lumSrcOffset = 1; |
|
| 900 |
- c->chrSrcOffset = 1; |
|
| 901 |
- c->alpSrcOffset = 1; |
|
| 902 |
- break; |
|
| 903 |
- } |
|
| 904 |
- |
|
| 905 |
- if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
|
|
| 906 |
- if (c->srcRange) {
|
|
| 907 |
- c->lumConvertRange = RENAME(lumRangeFromJpeg); |
|
| 908 |
- c->chrConvertRange = RENAME(chrRangeFromJpeg); |
|
| 909 |
- } else {
|
|
| 910 |
- c->lumConvertRange = RENAME(lumRangeToJpeg); |
|
| 911 |
- c->chrConvertRange = RENAME(chrRangeToJpeg); |
|
| 912 |
- } |
|
| 913 |
- } |
|
| 914 |
- |
|
| 915 |
- if (!(isGray(srcFormat) || isGray(c->dstFormat) || |
|
| 916 |
- srcFormat == PIX_FMT_MONOBLACK || srcFormat == PIX_FMT_MONOWHITE)) |
|
| 917 |
- c->needs_hcscale = 1; |
|
| 918 | 120 |
} |
| ... | ... |
@@ -94,6 +94,7 @@ adjustment. |
| 94 | 94 |
#include "libswscale/rgb2rgb.h" |
| 95 | 95 |
#include "libswscale/swscale.h" |
| 96 | 96 |
#include "libswscale/swscale_internal.h" |
| 97 |
+#include "libavutil/cpu.h" |
|
| 97 | 98 |
|
| 98 | 99 |
#undef PROFILE_THE_BEAST |
| 99 | 100 |
#undef INC_SCALING |
| ... | ... |
@@ -692,7 +693,7 @@ static int altivec_uyvy_rgb32 (SwsContext *c, |
| 692 | 692 |
*/ |
| 693 | 693 |
SwsFunc ff_yuv2rgb_init_altivec(SwsContext *c) |
| 694 | 694 |
{
|
| 695 |
- if (!(c->flags & SWS_CPU_CAPS_ALTIVEC)) |
|
| 695 |
+ if (!(av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC)) |
|
| 696 | 696 |
return NULL; |
| 697 | 697 |
|
| 698 | 698 |
/* |
| ... | ... |
@@ -116,12 +116,11 @@ void (*yuyvtoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t |
| 116 | 116 |
32-bit C version, and and&add trick by Michael Niedermayer |
| 117 | 117 |
*/ |
| 118 | 118 |
|
| 119 |
-void sws_rgb2rgb_init(int flags) |
|
| 119 |
+void sws_rgb2rgb_init(void) |
|
| 120 | 120 |
{
|
| 121 | 121 |
rgb2rgb_init_c(); |
| 122 |
-#if HAVE_MMX2 || HAVE_AMD3DNOW || HAVE_MMX |
|
| 123 |
- rgb2rgb_init_x86(flags); |
|
| 124 |
-#endif /* HAVE_MMX2 || HAVE_AMD3DNOW || HAVE_MMX */ |
|
| 122 |
+ if (HAVE_MMX) |
|
| 123 |
+ rgb2rgb_init_x86(); |
|
| 125 | 124 |
} |
| 126 | 125 |
|
| 127 | 126 |
#if LIBSWSCALE_VERSION_MAJOR < 1 |
| ... | ... |
@@ -166,8 +166,8 @@ extern void (*yuyvtoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const u |
| 166 | 166 |
long width, long height, |
| 167 | 167 |
long lumStride, long chromStride, long srcStride); |
| 168 | 168 |
|
| 169 |
-void sws_rgb2rgb_init(int flags); |
|
| 169 |
+void sws_rgb2rgb_init(void); |
|
| 170 | 170 |
|
| 171 |
-void rgb2rgb_init_x86(int flags); |
|
| 171 |
+void rgb2rgb_init_x86(void); |
|
| 172 | 172 |
|
| 173 | 173 |
#endif /* SWSCALE_RGB2RGB_H */ |
| ... | ... |
@@ -278,25 +278,6 @@ static inline void rgb16tobgr24_c(const uint8_t *src, uint8_t *dst, long src_siz |
| 278 | 278 |
} |
| 279 | 279 |
} |
| 280 | 280 |
|
| 281 |
-/* |
|
| 282 |
- * mm0 = 00 B3 00 B2 00 B1 00 B0 |
|
| 283 |
- * mm1 = 00 G3 00 G2 00 G1 00 G0 |
|
| 284 |
- * mm2 = 00 R3 00 R2 00 R1 00 R0 |
|
| 285 |
- * mm6 = FF FF FF FF FF FF FF FF |
|
| 286 |
- * mm7 = 00 00 00 00 00 00 00 00 |
|
| 287 |
- */ |
|
| 288 |
-#define PACK_RGB32 \ |
|
| 289 |
- "packuswb %%mm7, %%mm0 \n\t" /* 00 00 00 00 B3 B2 B1 B0 */ \ |
|
| 290 |
- "packuswb %%mm7, %%mm1 \n\t" /* 00 00 00 00 G3 G2 G1 G0 */ \ |
|
| 291 |
- "packuswb %%mm7, %%mm2 \n\t" /* 00 00 00 00 R3 R2 R1 R0 */ \ |
|
| 292 |
- "punpcklbw %%mm1, %%mm0 \n\t" /* G3 B3 G2 B2 G1 B1 G0 B0 */ \ |
|
| 293 |
- "punpcklbw %%mm6, %%mm2 \n\t" /* FF R3 FF R2 FF R1 FF R0 */ \ |
|
| 294 |
- "movq %%mm0, %%mm3 \n\t" \ |
|
| 295 |
- "punpcklwd %%mm2, %%mm0 \n\t" /* FF R1 G1 B1 FF R0 G0 B0 */ \ |
|
| 296 |
- "punpckhwd %%mm2, %%mm3 \n\t" /* FF R3 G3 B3 FF R2 G2 B2 */ \ |
|
| 297 |
- MOVNTQ" %%mm0, %0 \n\t" \ |
|
| 298 |
- MOVNTQ" %%mm3, 8%0 \n\t" \ |
|
| 299 |
- |
|
| 300 | 281 |
static inline void rgb15to32_c(const uint8_t *src, uint8_t *dst, long src_size) |
| 301 | 282 |
{
|
| 302 | 283 |
const uint16_t *end; |
| ... | ... |
@@ -63,6 +63,7 @@ untested special converters |
| 63 | 63 |
#include "libavutil/avassert.h" |
| 64 | 64 |
#include "libavutil/intreadwrite.h" |
| 65 | 65 |
#include "libavutil/x86_cpu.h" |
| 66 |
+#include "libavutil/cpu.h" |
|
| 66 | 67 |
#include "libavutil/avutil.h" |
| 67 | 68 |
#include "libavutil/mathematics.h" |
| 68 | 69 |
#include "libavutil/bswap.h" |
| ... | ... |
@@ -71,10 +72,6 @@ untested special converters |
| 71 | 71 |
#undef MOVNTQ |
| 72 | 72 |
#undef PAVGB |
| 73 | 73 |
|
| 74 |
-//#undef HAVE_MMX2 |
|
| 75 |
-//#define HAVE_AMD3DNOW |
|
| 76 |
-//#undef HAVE_MMX |
|
| 77 |
-//#undef ARCH_X86 |
|
| 78 | 74 |
#define DITHER1XBPP |
| 79 | 75 |
|
| 80 | 76 |
#define isPacked(x) ( \ |
| ... | ... |
@@ -1262,57 +1259,13 @@ static inline void monoblack2Y(uint8_t *dst, const uint8_t *src, long width, uin |
| 1262 | 1262 |
|
| 1263 | 1263 |
//Note: we have C, MMX, MMX2, 3DNOW versions, there is no 3DNOW+MMX2 one |
| 1264 | 1264 |
//Plain C versions |
| 1265 |
-#if CONFIG_RUNTIME_CPUDETECT |
|
| 1266 |
-# define COMPILE_C 1 |
|
| 1267 |
-# if ARCH_X86 |
|
| 1268 |
-# define COMPILE_MMX 1 |
|
| 1269 |
-# define COMPILE_MMX2 1 |
|
| 1270 |
-# define COMPILE_3DNOW 1 |
|
| 1271 |
-# elif ARCH_PPC |
|
| 1272 |
-# define COMPILE_ALTIVEC HAVE_ALTIVEC |
|
| 1273 |
-# endif |
|
| 1274 |
-#else /* CONFIG_RUNTIME_CPUDETECT */ |
|
| 1275 |
-# if ARCH_X86 |
|
| 1276 |
-# if HAVE_MMX2 |
|
| 1277 |
-# define COMPILE_MMX2 1 |
|
| 1278 |
-# elif HAVE_AMD3DNOW |
|
| 1279 |
-# define COMPILE_3DNOW 1 |
|
| 1280 |
-# elif HAVE_MMX |
|
| 1281 |
-# define COMPILE_MMX 1 |
|
| 1282 |
-# else |
|
| 1283 |
-# define COMPILE_C 1 |
|
| 1284 |
-# endif |
|
| 1285 |
-# elif ARCH_PPC && HAVE_ALTIVEC |
|
| 1286 |
-# define COMPILE_ALTIVEC 1 |
|
| 1287 |
-# else |
|
| 1288 |
-# define COMPILE_C 1 |
|
| 1289 |
-# endif |
|
| 1290 |
-#endif |
|
| 1291 |
- |
|
| 1292 |
-#ifndef COMPILE_C |
|
| 1293 |
-# define COMPILE_C 0 |
|
| 1294 |
-#endif |
|
| 1295 |
-#ifndef COMPILE_MMX |
|
| 1296 |
-# define COMPILE_MMX 0 |
|
| 1297 |
-#endif |
|
| 1298 |
-#ifndef COMPILE_MMX2 |
|
| 1299 |
-# define COMPILE_MMX2 0 |
|
| 1300 |
-#endif |
|
| 1301 |
-#ifndef COMPILE_3DNOW |
|
| 1302 |
-# define COMPILE_3DNOW 0 |
|
| 1303 |
-#endif |
|
| 1304 |
-#ifndef COMPILE_ALTIVEC |
|
| 1305 |
-# define COMPILE_ALTIVEC 0 |
|
| 1306 |
-#endif |
|
| 1307 | 1265 |
|
| 1308 |
-#define COMPILE_TEMPLATE_MMX 0 |
|
| 1309 | 1266 |
#define COMPILE_TEMPLATE_MMX2 0 |
| 1310 |
-#define COMPILE_TEMPLATE_AMD3DNOW 0 |
|
| 1311 | 1267 |
#define COMPILE_TEMPLATE_ALTIVEC 0 |
| 1312 | 1268 |
|
| 1313 | 1269 |
#include "swscale_template.c" |
| 1314 | 1270 |
|
| 1315 |
-#if COMPILE_ALTIVEC |
|
| 1271 |
+#if HAVE_ALTIVEC |
|
| 1316 | 1272 |
#undef RENAME |
| 1317 | 1273 |
#undef COMPILE_TEMPLATE_ALTIVEC |
| 1318 | 1274 |
#define COMPILE_TEMPLATE_ALTIVEC 1 |
| ... | ... |
@@ -1320,90 +1273,42 @@ static inline void monoblack2Y(uint8_t *dst, const uint8_t *src, long width, uin |
| 1320 | 1320 |
#include "ppc/swscale_template.c" |
| 1321 | 1321 |
#endif |
| 1322 | 1322 |
|
| 1323 |
-#if ARCH_X86 |
|
| 1324 |
- |
|
| 1325 | 1323 |
//MMX versions |
| 1326 |
-#if COMPILE_MMX |
|
| 1324 |
+#if HAVE_MMX |
|
| 1327 | 1325 |
#undef RENAME |
| 1328 |
-#undef COMPILE_TEMPLATE_MMX |
|
| 1329 | 1326 |
#undef COMPILE_TEMPLATE_MMX2 |
| 1330 |
-#undef COMPILE_TEMPLATE_AMD3DNOW |
|
| 1331 |
-#define COMPILE_TEMPLATE_MMX 1 |
|
| 1332 | 1327 |
#define COMPILE_TEMPLATE_MMX2 0 |
| 1333 |
-#define COMPILE_TEMPLATE_AMD3DNOW 0 |
|
| 1334 | 1328 |
#define RENAME(a) a ## _MMX |
| 1335 | 1329 |
#include "x86/swscale_template.c" |
| 1336 | 1330 |
#endif |
| 1337 | 1331 |
|
| 1338 | 1332 |
//MMX2 versions |
| 1339 |
-#if COMPILE_MMX2 |
|
| 1333 |
+#if HAVE_MMX2 |
|
| 1340 | 1334 |
#undef RENAME |
| 1341 |
-#undef COMPILE_TEMPLATE_MMX |
|
| 1342 | 1335 |
#undef COMPILE_TEMPLATE_MMX2 |
| 1343 |
-#undef COMPILE_TEMPLATE_AMD3DNOW |
|
| 1344 |
-#define COMPILE_TEMPLATE_MMX 1 |
|
| 1345 | 1336 |
#define COMPILE_TEMPLATE_MMX2 1 |
| 1346 |
-#define COMPILE_TEMPLATE_AMD3DNOW 0 |
|
| 1347 | 1337 |
#define RENAME(a) a ## _MMX2 |
| 1348 | 1338 |
#include "x86/swscale_template.c" |
| 1349 | 1339 |
#endif |
| 1350 | 1340 |
|
| 1351 |
-//3DNOW versions |
|
| 1352 |
-#if COMPILE_3DNOW |
|
| 1353 |
-#undef RENAME |
|
| 1354 |
-#undef COMPILE_TEMPLATE_MMX |
|
| 1355 |
-#undef COMPILE_TEMPLATE_MMX2 |
|
| 1356 |
-#undef COMPILE_TEMPLATE_AMD3DNOW |
|
| 1357 |
-#define COMPILE_TEMPLATE_MMX 1 |
|
| 1358 |
-#define COMPILE_TEMPLATE_MMX2 0 |
|
| 1359 |
-#define COMPILE_TEMPLATE_AMD3DNOW 1 |
|
| 1360 |
-#define RENAME(a) a ## _3DNow |
|
| 1361 |
-#include "x86/swscale_template.c" |
|
| 1362 |
-#endif |
|
| 1363 |
- |
|
| 1364 |
-#endif //ARCH_X86 |
|
| 1365 |
- |
|
| 1366 | 1341 |
SwsFunc ff_getSwsFunc(SwsContext *c) |
| 1367 | 1342 |
{
|
| 1343 |
+ int cpu_flags = av_get_cpu_flags(); |
|
| 1344 |
+ |
|
| 1368 | 1345 |
sws_init_swScale_c(c); |
| 1369 | 1346 |
|
| 1370 |
-#if CONFIG_RUNTIME_CPUDETECT |
|
| 1371 |
-#if ARCH_X86 |
|
| 1372 |
- // ordered per speed fastest first |
|
| 1373 |
- if (c->flags & SWS_CPU_CAPS_MMX2) {
|
|
| 1374 |
- sws_init_swScale_MMX2(c); |
|
| 1375 |
- return swScale_MMX2; |
|
| 1376 |
- } else if (c->flags & SWS_CPU_CAPS_3DNOW) {
|
|
| 1377 |
- sws_init_swScale_3DNow(c); |
|
| 1378 |
- return swScale_3DNow; |
|
| 1379 |
- } else if (c->flags & SWS_CPU_CAPS_MMX) {
|
|
| 1347 |
+#if HAVE_MMX |
|
| 1348 |
+ if (cpu_flags & AV_CPU_FLAG_MMX) |
|
| 1380 | 1349 |
sws_init_swScale_MMX(c); |
| 1381 |
- return swScale_MMX; |
|
| 1382 |
- } |
|
| 1383 |
- |
|
| 1384 |
-#else |
|
| 1385 |
-#if COMPILE_ALTIVEC |
|
| 1386 |
- if (c->flags & SWS_CPU_CAPS_ALTIVEC) {
|
|
| 1387 |
- sws_init_swScale_altivec(c); |
|
| 1388 |
- return swScale_altivec; |
|
| 1389 |
- } |
|
| 1390 | 1350 |
#endif |
| 1391 |
-#endif /* ARCH_X86 */ |
|
| 1392 |
-#else //CONFIG_RUNTIME_CPUDETECT |
|
| 1393 |
-#if COMPILE_TEMPLATE_MMX2 |
|
| 1394 |
- sws_init_swScale_MMX2(c); |
|
| 1395 |
- return swScale_MMX2; |
|
| 1396 |
-#elif COMPILE_TEMPLATE_AMD3DNOW |
|
| 1397 |
- sws_init_swScale_3DNow(c); |
|
| 1398 |
- return swScale_3DNow; |
|
| 1399 |
-#elif COMPILE_TEMPLATE_MMX |
|
| 1400 |
- sws_init_swScale_MMX(c); |
|
| 1401 |
- return swScale_MMX; |
|
| 1402 |
-#elif COMPILE_TEMPLATE_ALTIVEC |
|
| 1403 |
- sws_init_swScale_altivec(c); |
|
| 1404 |
- return swScale_altivec; |
|
| 1351 |
+#if HAVE_MMX2 |
|
| 1352 |
+ if (cpu_flags & AV_CPU_FLAG_MMX2) |
|
| 1353 |
+ sws_init_swScale_MMX2(c); |
|
| 1354 |
+#endif |
|
| 1355 |
+#if HAVE_ALTIVEC |
|
| 1356 |
+ if (cpu_flags & AV_CPU_FLAG_ALTIVEC) |
|
| 1357 |
+ sws_init_swScale_altivec(c); |
|
| 1405 | 1358 |
#endif |
| 1406 |
-#endif //!CONFIG_RUNTIME_CPUDETECT |
|
| 1407 | 1359 |
|
| 1408 | 1360 |
return swScale_c; |
| 1409 | 1361 |
} |
| ... | ... |
@@ -1900,23 +1805,6 @@ static int planarCopyWrapper(SwsContext *c, const uint8_t* src[], int srcStride[ |
| 1900 | 1900 |
return srcSliceH; |
| 1901 | 1901 |
} |
| 1902 | 1902 |
|
| 1903 |
-int ff_hardcodedcpuflags(void) |
|
| 1904 |
-{
|
|
| 1905 |
- int flags = 0; |
|
| 1906 |
-#if COMPILE_TEMPLATE_MMX2 |
|
| 1907 |
- flags |= SWS_CPU_CAPS_MMX|SWS_CPU_CAPS_MMX2; |
|
| 1908 |
-#elif COMPILE_TEMPLATE_AMD3DNOW |
|
| 1909 |
- flags |= SWS_CPU_CAPS_MMX|SWS_CPU_CAPS_3DNOW; |
|
| 1910 |
-#elif COMPILE_TEMPLATE_MMX |
|
| 1911 |
- flags |= SWS_CPU_CAPS_MMX; |
|
| 1912 |
-#elif COMPILE_TEMPLATE_ALTIVEC |
|
| 1913 |
- flags |= SWS_CPU_CAPS_ALTIVEC; |
|
| 1914 |
-#elif ARCH_BFIN |
|
| 1915 |
- flags |= SWS_CPU_CAPS_BFIN; |
|
| 1916 |
-#endif |
|
| 1917 |
- return flags; |
|
| 1918 |
-} |
|
| 1919 |
- |
|
| 1920 | 1903 |
void ff_get_unscaled_swscale(SwsContext *c) |
| 1921 | 1904 |
{
|
| 1922 | 1905 |
const enum PixelFormat srcFormat = c->srcFormat; |
| ... | ... |
@@ -2000,8 +1888,8 @@ void ff_get_unscaled_swscale(SwsContext *c) |
| 2000 | 2000 |
if(srcFormat == PIX_FMT_UYVY422 && dstFormat == PIX_FMT_YUV422P) |
| 2001 | 2001 |
c->swScale= uyvyToYuv422Wrapper; |
| 2002 | 2002 |
|
| 2003 |
-#if COMPILE_ALTIVEC |
|
| 2004 |
- if ((c->flags & SWS_CPU_CAPS_ALTIVEC) && |
|
| 2003 |
+#if HAVE_ALTIVEC |
|
| 2004 |
+ if ((av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC) && |
|
| 2005 | 2005 |
!(c->flags & SWS_BITEXACT) && |
| 2006 | 2006 |
srcFormat == PIX_FMT_YUV420P) {
|
| 2007 | 2007 |
// unscaled YV12 -> packed YUV, we want speed |
| ... | ... |
@@ -2031,8 +1919,7 @@ void ff_get_unscaled_swscale(SwsContext *c) |
| 2031 | 2031 |
c->swScale= planarCopyWrapper; |
| 2032 | 2032 |
} |
| 2033 | 2033 |
#if ARCH_BFIN |
| 2034 |
- if (flags & SWS_CPU_CAPS_BFIN) |
|
| 2035 |
- ff_bfin_get_unscaled_swscale (c); |
|
| 2034 |
+ ff_bfin_get_unscaled_swscale (c); |
|
| 2036 | 2035 |
#endif |
| 2037 | 2036 |
} |
| 2038 | 2037 |
|
| ... | ... |
@@ -95,13 +95,6 @@ const char *swscale_license(void); |
| 95 | 95 |
#define SWS_ACCURATE_RND 0x40000 |
| 96 | 96 |
#define SWS_BITEXACT 0x80000 |
| 97 | 97 |
|
| 98 |
-#define SWS_CPU_CAPS_MMX 0x80000000 |
|
| 99 |
-#define SWS_CPU_CAPS_MMX2 0x20000000 |
|
| 100 |
-#define SWS_CPU_CAPS_3DNOW 0x40000000 |
|
| 101 |
-#define SWS_CPU_CAPS_ALTIVEC 0x10000000 |
|
| 102 |
-#define SWS_CPU_CAPS_BFIN 0x01000000 |
|
| 103 |
-#define SWS_CPU_CAPS_SSE2 0x02000000 |
|
| 104 |
- |
|
| 105 | 98 |
#define SWS_MAX_REDUCE_CUTOFF 0.002 |
| 106 | 99 |
|
| 107 | 100 |
#define SWS_CS_ITU709 1 |
| ... | ... |
@@ -482,11 +482,6 @@ extern const AVClass sws_context_class; |
| 482 | 482 |
void ff_get_unscaled_swscale(SwsContext *c); |
| 483 | 483 |
|
| 484 | 484 |
/** |
| 485 |
- * Returns the SWS_CPU_CAPS for the optimized code compiled into swscale. |
|
| 486 |
- */ |
|
| 487 |
-int ff_hardcodedcpuflags(void); |
|
| 488 |
- |
|
| 489 |
-/** |
|
| 490 | 485 |
* Returns function pointer to fastest main scaler path function depending |
| 491 | 486 |
* on architecture and available optimizations. |
| 492 | 487 |
*/ |
| ... | ... |
@@ -363,153 +363,11 @@ static inline void hScale_c(int16_t *dst, int dstW, const uint8_t *src, |
| 363 | 363 |
} |
| 364 | 364 |
} |
| 365 | 365 |
|
| 366 |
-static inline void RENAME(hScale16)(int16_t *dst, int dstW, const uint16_t *src, int srcW, int xInc, |
|
| 366 |
+static inline void hScale16_c(int16_t *dst, int dstW, const uint16_t *src, int srcW, int xInc, |
|
| 367 | 367 |
const int16_t *filter, const int16_t *filterPos, long filterSize, int shift) |
| 368 | 368 |
{
|
| 369 | 369 |
int i, j; |
| 370 |
-#if COMPILE_TEMPLATE_MMX |
|
| 371 |
- assert(filterSize % 4 == 0 && filterSize>0); |
|
| 372 |
- if (filterSize==4 && shift<15) { // Always true for upscaling, sometimes for down, too.
|
|
| 373 |
- x86_reg counter= -2*dstW; |
|
| 374 |
- filter-= counter*2; |
|
| 375 |
- filterPos-= counter/2; |
|
| 376 |
- dst-= counter/2; |
|
| 377 |
- __asm__ volatile( |
|
| 378 |
- "movd %5, %%mm7 \n\t" |
|
| 379 |
-#if defined(PIC) |
|
| 380 |
- "push %%"REG_b" \n\t" |
|
| 381 |
-#endif |
|
| 382 |
- "push %%"REG_BP" \n\t" // we use 7 regs here ... |
|
| 383 |
- "mov %%"REG_a", %%"REG_BP" \n\t" |
|
| 384 |
- ".p2align 4 \n\t" |
|
| 385 |
- "1: \n\t" |
|
| 386 |
- "movzwl (%2, %%"REG_BP"), %%eax \n\t" |
|
| 387 |
- "movzwl 2(%2, %%"REG_BP"), %%ebx \n\t" |
|
| 388 |
- "movq (%1, %%"REG_BP", 4), %%mm1 \n\t" |
|
| 389 |
- "movq 8(%1, %%"REG_BP", 4), %%mm3 \n\t" |
|
| 390 |
- "movq (%3, %%"REG_a", 2), %%mm0 \n\t" |
|
| 391 |
- "movq (%3, %%"REG_b", 2), %%mm2 \n\t" |
|
| 392 |
- "pmaddwd %%mm1, %%mm0 \n\t" |
|
| 393 |
- "pmaddwd %%mm2, %%mm3 \n\t" |
|
| 394 |
- "movq %%mm0, %%mm4 \n\t" |
|
| 395 |
- "punpckldq %%mm3, %%mm0 \n\t" |
|
| 396 |
- "punpckhdq %%mm3, %%mm4 \n\t" |
|
| 397 |
- "paddd %%mm4, %%mm0 \n\t" |
|
| 398 |
- "psrad %%mm7, %%mm0 \n\t" |
|
| 399 |
- "packssdw %%mm0, %%mm0 \n\t" |
|
| 400 |
- "movd %%mm0, (%4, %%"REG_BP") \n\t" |
|
| 401 |
- "add $4, %%"REG_BP" \n\t" |
|
| 402 |
- " jnc 1b \n\t" |
|
| 403 |
- |
|
| 404 |
- "pop %%"REG_BP" \n\t" |
|
| 405 |
-#if defined(PIC) |
|
| 406 |
- "pop %%"REG_b" \n\t" |
|
| 407 |
-#endif |
|
| 408 |
- : "+a" (counter) |
|
| 409 |
- : "c" (filter), "d" (filterPos), "S" (src), "D" (dst), "m"(shift) |
|
| 410 |
-#if !defined(PIC) |
|
| 411 |
- : "%"REG_b |
|
| 412 |
-#endif |
|
| 413 |
- ); |
|
| 414 |
- } else if (filterSize==8 && shift<15) {
|
|
| 415 |
- x86_reg counter= -2*dstW; |
|
| 416 |
- filter-= counter*4; |
|
| 417 |
- filterPos-= counter/2; |
|
| 418 |
- dst-= counter/2; |
|
| 419 |
- __asm__ volatile( |
|
| 420 |
- "movd %5, %%mm7 \n\t" |
|
| 421 |
-#if defined(PIC) |
|
| 422 |
- "push %%"REG_b" \n\t" |
|
| 423 |
-#endif |
|
| 424 |
- "push %%"REG_BP" \n\t" // we use 7 regs here ... |
|
| 425 |
- "mov %%"REG_a", %%"REG_BP" \n\t" |
|
| 426 |
- ".p2align 4 \n\t" |
|
| 427 |
- "1: \n\t" |
|
| 428 |
- "movzwl (%2, %%"REG_BP"), %%eax \n\t" |
|
| 429 |
- "movzwl 2(%2, %%"REG_BP"), %%ebx \n\t" |
|
| 430 |
- "movq (%1, %%"REG_BP", 8), %%mm1 \n\t" |
|
| 431 |
- "movq 16(%1, %%"REG_BP", 8), %%mm3 \n\t" |
|
| 432 |
- "movq (%3, %%"REG_a", 2), %%mm0 \n\t" |
|
| 433 |
- "movq (%3, %%"REG_b", 2), %%mm2 \n\t" |
|
| 434 |
- "pmaddwd %%mm1, %%mm0 \n\t" |
|
| 435 |
- "pmaddwd %%mm2, %%mm3 \n\t" |
|
| 436 |
- |
|
| 437 |
- "movq 8(%1, %%"REG_BP", 8), %%mm1 \n\t" |
|
| 438 |
- "movq 24(%1, %%"REG_BP", 8), %%mm5 \n\t" |
|
| 439 |
- "movq 8(%3, %%"REG_a", 2), %%mm4 \n\t" |
|
| 440 |
- "movq 8(%3, %%"REG_b", 2), %%mm2 \n\t" |
|
| 441 |
- "pmaddwd %%mm1, %%mm4 \n\t" |
|
| 442 |
- "pmaddwd %%mm2, %%mm5 \n\t" |
|
| 443 |
- "paddd %%mm4, %%mm0 \n\t" |
|
| 444 |
- "paddd %%mm5, %%mm3 \n\t" |
|
| 445 |
- "movq %%mm0, %%mm4 \n\t" |
|
| 446 |
- "punpckldq %%mm3, %%mm0 \n\t" |
|
| 447 |
- "punpckhdq %%mm3, %%mm4 \n\t" |
|
| 448 |
- "paddd %%mm4, %%mm0 \n\t" |
|
| 449 |
- "psrad %%mm7, %%mm0 \n\t" |
|
| 450 |
- "packssdw %%mm0, %%mm0 \n\t" |
|
| 451 |
- "movd %%mm0, (%4, %%"REG_BP") \n\t" |
|
| 452 |
- "add $4, %%"REG_BP" \n\t" |
|
| 453 |
- " jnc 1b \n\t" |
|
| 454 |
- |
|
| 455 |
- "pop %%"REG_BP" \n\t" |
|
| 456 |
-#if defined(PIC) |
|
| 457 |
- "pop %%"REG_b" \n\t" |
|
| 458 |
-#endif |
|
| 459 |
- : "+a" (counter) |
|
| 460 |
- : "c" (filter), "d" (filterPos), "S" (src), "D" (dst), "m"(shift) |
|
| 461 |
-#if !defined(PIC) |
|
| 462 |
- : "%"REG_b |
|
| 463 |
-#endif |
|
| 464 |
- ); |
|
| 465 |
- } else if (shift<15){
|
|
| 466 |
- const uint16_t *offset = src+filterSize; |
|
| 467 |
- x86_reg counter= -2*dstW; |
|
| 468 |
- //filter-= counter*filterSize/2; |
|
| 469 |
- filterPos-= counter/2; |
|
| 470 |
- dst-= counter/2; |
|
| 471 |
- __asm__ volatile( |
|
| 472 |
- "movd %7, %%mm7 \n\t" |
|
| 473 |
- ".p2align 4 \n\t" |
|
| 474 |
- "1: \n\t" |
|
| 475 |
- "mov %2, %%"REG_c" \n\t" |
|
| 476 |
- "movzwl (%%"REG_c", %0), %%eax \n\t" |
|
| 477 |
- "movzwl 2(%%"REG_c", %0), %%edx \n\t" |
|
| 478 |
- "mov %5, %%"REG_c" \n\t" |
|
| 479 |
- "pxor %%mm4, %%mm4 \n\t" |
|
| 480 |
- "pxor %%mm5, %%mm5 \n\t" |
|
| 481 |
- "2: \n\t" |
|
| 482 |
- "movq (%1), %%mm1 \n\t" |
|
| 483 |
- "movq (%1, %6), %%mm3 \n\t" |
|
| 484 |
- "movq (%%"REG_c", %%"REG_a", 2), %%mm0 \n\t" |
|
| 485 |
- "movq (%%"REG_c", %%"REG_d", 2), %%mm2 \n\t" |
|
| 486 |
- "pmaddwd %%mm1, %%mm0 \n\t" |
|
| 487 |
- "pmaddwd %%mm2, %%mm3 \n\t" |
|
| 488 |
- "paddd %%mm3, %%mm5 \n\t" |
|
| 489 |
- "paddd %%mm0, %%mm4 \n\t" |
|
| 490 |
- "add $8, %1 \n\t" |
|
| 491 |
- "add $8, %%"REG_c" \n\t" |
|
| 492 |
- "cmp %4, %%"REG_c" \n\t" |
|
| 493 |
- " jb 2b \n\t" |
|
| 494 |
- "add %6, %1 \n\t" |
|
| 495 |
- "movq %%mm4, %%mm0 \n\t" |
|
| 496 |
- "punpckldq %%mm5, %%mm4 \n\t" |
|
| 497 |
- "punpckhdq %%mm5, %%mm0 \n\t" |
|
| 498 |
- "paddd %%mm0, %%mm4 \n\t" |
|
| 499 |
- "psrad %%mm7, %%mm4 \n\t" |
|
| 500 |
- "packssdw %%mm4, %%mm4 \n\t" |
|
| 501 |
- "mov %3, %%"REG_a" \n\t" |
|
| 502 |
- "movd %%mm4, (%%"REG_a", %0) \n\t" |
|
| 503 |
- "add $4, %0 \n\t" |
|
| 504 |
- " jnc 1b \n\t" |
|
| 505 |
- |
|
| 506 |
- : "+r" (counter), "+r" (filter) |
|
| 507 |
- : "m" (filterPos), "m" (dst), "m"(offset), |
|
| 508 |
- "m" (src), "r" ((x86_reg)filterSize*2), "m"(shift) |
|
| 509 |
- : "%"REG_a, "%"REG_c, "%"REG_d |
|
| 510 |
- ); |
|
| 511 |
- } else |
|
| 512 |
-#endif |
|
| 370 |
+ |
|
| 513 | 371 |
for (i=0; i<dstW; i++) {
|
| 514 | 372 |
int srcPos= filterPos[i]; |
| 515 | 373 |
int val=0; |
| ... | ... |
@@ -520,7 +378,7 @@ static inline void RENAME(hScale16)(int16_t *dst, int dstW, const uint16_t *src, |
| 520 | 520 |
} |
| 521 | 521 |
} |
| 522 | 522 |
|
| 523 |
-static inline void RENAME(hScale16X)(int16_t *dst, int dstW, const uint16_t *src, int srcW, int xInc, |
|
| 523 |
+static inline void hScale16X_c(int16_t *dst, int dstW, const uint16_t *src, int srcW, int xInc, |
|
| 524 | 524 |
const int16_t *filter, const int16_t *filterPos, long filterSize, int shift) |
| 525 | 525 |
{
|
| 526 | 526 |
int i, j; |
| ... | ... |
@@ -660,6 +518,11 @@ inline static void hcscale_c(SwsContext *c, uint16_t *dst, long dstWidth, |
| 660 | 660 |
#define DEBUG_SWSCALE_BUFFERS 0 |
| 661 | 661 |
#define DEBUG_BUFFERS(...) if (DEBUG_SWSCALE_BUFFERS) av_log(c, AV_LOG_DEBUG, __VA_ARGS__) |
| 662 | 662 |
|
| 663 |
+#if HAVE_MMX |
|
| 664 |
+static void updateMMXDitherTables(SwsContext *c, int dstY, int lumBufIndex, int chrBufIndex, |
|
| 665 |
+ int lastInLumBuf, int lastInChrBuf); |
|
| 666 |
+#endif |
|
| 667 |
+ |
|
| 663 | 668 |
static int swScale_c(SwsContext *c, const uint8_t* src[], int srcStride[], |
| 664 | 669 |
int srcSliceY, int srcSliceH, uint8_t* dst[], int dstStride[]) |
| 665 | 670 |
{
|
| ... | ... |
@@ -831,6 +694,9 @@ static int swScale_c(SwsContext *c, const uint8_t* src[], int srcStride[], |
| 831 | 831 |
if (!enough_lines) |
| 832 | 832 |
break; //we can't output a dstY line so let's try with the next slice |
| 833 | 833 |
|
| 834 |
+#if HAVE_MMX |
|
| 835 |
+ updateMMXDitherTables(c, dstY, lumBufIndex, chrBufIndex, lastInLumBuf, lastInChrBuf); |
|
| 836 |
+#endif |
|
| 834 | 837 |
if (dstY < dstH-2) {
|
| 835 | 838 |
const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize; |
| 836 | 839 |
const int16_t **chrSrcPtr= (const int16_t **) chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize; |
| ... | ... |
@@ -955,6 +821,12 @@ static int swScale_c(SwsContext *c, const uint8_t* src[], int srcStride[], |
| 955 | 955 |
if ((dstFormat == PIX_FMT_YUVA420P) && !alpPixBuf) |
| 956 | 956 |
fillPlane(dst[3], dstStride[3], dstW, dstY-lastDstY, lastDstY, 255); |
| 957 | 957 |
|
| 958 |
+#if HAVE_MMX2 |
|
| 959 |
+ if (av_get_cpu_flags() & AV_CPU_FLAG_MMX2) |
|
| 960 |
+ __asm__ volatile("sfence":::"memory");
|
|
| 961 |
+#endif |
|
| 962 |
+ emms_c(); |
|
| 963 |
+ |
|
| 958 | 964 |
/* store changed local vars back in the context */ |
| 959 | 965 |
c->dstY= dstY; |
| 960 | 966 |
c->lumBufIndex= lumBufIndex; |
| ... | ... |
@@ -1001,14 +873,14 @@ static void sws_init_swScale_c(SwsContext *c) |
| 1001 | 1001 |
case PIX_FMT_YUV420P10BE: |
| 1002 | 1002 |
case PIX_FMT_YUV420P16BE: |
| 1003 | 1003 |
case PIX_FMT_YUV422P16BE: |
| 1004 |
- case PIX_FMT_YUV444P16BE: c->hScale16= HAVE_BIGENDIAN ? RENAME(hScale16) : RENAME(hScale16X); break; |
|
| 1004 |
+ case PIX_FMT_YUV444P16BE: c->hScale16= HAVE_BIGENDIAN ? hScale16_c : hScale16X_c; break; |
|
| 1005 | 1005 |
case PIX_FMT_GRAY16LE : |
| 1006 | 1006 |
case PIX_FMT_YUV420P9LE: |
| 1007 | 1007 |
case PIX_FMT_YUV422P10LE: |
| 1008 | 1008 |
case PIX_FMT_YUV420P10LE: |
| 1009 | 1009 |
case PIX_FMT_YUV420P16LE: |
| 1010 | 1010 |
case PIX_FMT_YUV422P16LE: |
| 1011 |
- case PIX_FMT_YUV444P16LE: c->hScale16= HAVE_BIGENDIAN ? RENAME(hScale16X) : RENAME(hScale16); break; |
|
| 1011 |
+ case PIX_FMT_YUV444P16LE: c->hScale16= HAVE_BIGENDIAN ? hScale16X_c : hScale16_c; break; |
|
| 1012 | 1012 |
} |
| 1013 | 1013 |
if (c->chrSrcHSubSample) {
|
| 1014 | 1014 |
switch(srcFormat) {
|
| ... | ... |
@@ -185,7 +185,7 @@ static double getSplineCoeff(double a, double b, double c, double d, double dist |
| 185 | 185 |
} |
| 186 | 186 |
|
| 187 | 187 |
static int initFilter(int16_t **outFilter, int16_t **filterPos, int *outFilterSize, int xInc, |
| 188 |
- int srcW, int dstW, int filterAlign, int one, int flags, |
|
| 188 |
+ int srcW, int dstW, int filterAlign, int one, int flags, int cpu_flags, |
|
| 189 | 189 |
SwsVector *srcFilter, SwsVector *dstFilter, double param[2]) |
| 190 | 190 |
{
|
| 191 | 191 |
int i; |
| ... | ... |
@@ -196,10 +196,8 @@ static int initFilter(int16_t **outFilter, int16_t **filterPos, int *outFilterSi |
| 196 | 196 |
int64_t *filter2=NULL; |
| 197 | 197 |
const int64_t fone= 1LL<<54; |
| 198 | 198 |
int ret= -1; |
| 199 |
-#if ARCH_X86 |
|
| 200 |
- if (flags & SWS_CPU_CAPS_MMX) |
|
| 201 |
- __asm__ volatile("emms\n\t"::: "memory"); //FIXME this should not be required but it IS (even for non-MMX versions)
|
|
| 202 |
-#endif |
|
| 199 |
+ |
|
| 200 |
+ emms_c(); //FIXME this should not be required but it IS (even for non-MMX versions) |
|
| 203 | 201 |
|
| 204 | 202 |
// NOTE: the +1 is for the MMX scaler which reads over the end |
| 205 | 203 |
FF_ALLOC_OR_GOTO(NULL, *filterPos, (dstW+1)*sizeof(int16_t), fail); |
| ... | ... |
@@ -416,7 +414,7 @@ static int initFilter(int16_t **outFilter, int16_t **filterPos, int *outFilterSi |
| 416 | 416 |
if (min>minFilterSize) minFilterSize= min; |
| 417 | 417 |
} |
| 418 | 418 |
|
| 419 |
- if (flags & SWS_CPU_CAPS_ALTIVEC) {
|
|
| 419 |
+ if (HAVE_ALTIVEC && cpu_flags & AV_CPU_FLAG_ALTIVEC) {
|
|
| 420 | 420 |
// we can handle the special case 4, |
| 421 | 421 |
// so we don't want to go to the full 8 |
| 422 | 422 |
if (minFilterSize < 5) |
| ... | ... |
@@ -431,7 +429,7 @@ static int initFilter(int16_t **outFilter, int16_t **filterPos, int *outFilterSi |
| 431 | 431 |
filterAlign = 1; |
| 432 | 432 |
} |
| 433 | 433 |
|
| 434 |
- if (flags & SWS_CPU_CAPS_MMX) {
|
|
| 434 |
+ if (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) {
|
|
| 435 | 435 |
// special case for unscaled vertical filtering |
| 436 | 436 |
if (minFilterSize == 1 && filterAlign == 2) |
| 437 | 437 |
filterAlign= 1; |
| ... | ... |
@@ -521,7 +519,7 @@ fail: |
| 521 | 521 |
return ret; |
| 522 | 522 |
} |
| 523 | 523 |
|
| 524 |
-#if ARCH_X86 && (HAVE_MMX2 || CONFIG_RUNTIME_CPUDETECT) |
|
| 524 |
+#if HAVE_MMX2 |
|
| 525 | 525 |
static int initMMX2HScaler(int dstW, int xInc, uint8_t *filterCode, int16_t *filter, int32_t *filterPos, int numSplits) |
| 526 | 526 |
{
|
| 527 | 527 |
uint8_t *fragmentA; |
| ... | ... |
@@ -679,7 +677,7 @@ static int initMMX2HScaler(int dstW, int xInc, uint8_t *filterCode, int16_t *fil |
| 679 | 679 |
|
| 680 | 680 |
return fragmentPos + 1; |
| 681 | 681 |
} |
| 682 |
-#endif /* ARCH_X86 && (HAVE_MMX2 || CONFIG_RUNTIME_CPUDETECT) */ |
|
| 682 |
+#endif /* HAVE_MMX2 */ |
|
| 683 | 683 |
|
| 684 | 684 |
static void getSubSampleFactors(int *h, int *v, enum PixelFormat format) |
| 685 | 685 |
{
|
| ... | ... |
@@ -687,8 +685,6 @@ static void getSubSampleFactors(int *h, int *v, enum PixelFormat format) |
| 687 | 687 |
*v = av_pix_fmt_descriptors[format].log2_chroma_h; |
| 688 | 688 |
} |
| 689 | 689 |
|
| 690 |
-static int update_flags_cpu(int flags); |
|
| 691 |
- |
|
| 692 | 690 |
int sws_setColorspaceDetails(SwsContext *c, const int inv_table[4], int srcRange, const int table[4], int dstRange, int brightness, int contrast, int saturation) |
| 693 | 691 |
{
|
| 694 | 692 |
memcpy(c->srcColorspaceTable, inv_table, sizeof(int)*4); |
| ... | ... |
@@ -703,15 +699,12 @@ int sws_setColorspaceDetails(SwsContext *c, const int inv_table[4], int srcRange |
| 703 | 703 |
|
| 704 | 704 |
c->dstFormatBpp = av_get_bits_per_pixel(&av_pix_fmt_descriptors[c->dstFormat]); |
| 705 | 705 |
c->srcFormatBpp = av_get_bits_per_pixel(&av_pix_fmt_descriptors[c->srcFormat]); |
| 706 |
- c->flags = update_flags_cpu(c->flags); |
|
| 707 | 706 |
|
| 708 | 707 |
ff_yuv2rgb_c_init_tables(c, inv_table, srcRange, brightness, contrast, saturation); |
| 709 | 708 |
//FIXME factorize |
| 710 | 709 |
|
| 711 |
-#if HAVE_ALTIVEC |
|
| 712 |
- if (c->flags & SWS_CPU_CAPS_ALTIVEC) |
|
| 710 |
+ if (HAVE_ALTIVEC && av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC) |
|
| 713 | 711 |
ff_yuv2rgb_init_tables_altivec(c, inv_table, brightness, contrast, saturation); |
| 714 |
-#endif |
|
| 715 | 712 |
return 0; |
| 716 | 713 |
} |
| 717 | 714 |
|
| ... | ... |
@@ -741,27 +734,6 @@ static int handle_jpeg(enum PixelFormat *format) |
| 741 | 741 |
} |
| 742 | 742 |
} |
| 743 | 743 |
|
| 744 |
-static int update_flags_cpu(int flags) |
|
| 745 |
-{
|
|
| 746 |
-#if !CONFIG_RUNTIME_CPUDETECT //ensure that the flags match the compiled variant if cpudetect is off |
|
| 747 |
- flags &= ~( SWS_CPU_CAPS_MMX |
|
| 748 |
- |SWS_CPU_CAPS_MMX2 |
|
| 749 |
- |SWS_CPU_CAPS_3DNOW |
|
| 750 |
- |SWS_CPU_CAPS_SSE2 |
|
| 751 |
- |SWS_CPU_CAPS_ALTIVEC |
|
| 752 |
- |SWS_CPU_CAPS_BFIN); |
|
| 753 |
- flags |= ff_hardcodedcpuflags(); |
|
| 754 |
-#else /* !CONFIG_RUNTIME_CPUDETECT */ |
|
| 755 |
- int cpuflags = av_get_cpu_flags(); |
|
| 756 |
- |
|
| 757 |
- flags |= (cpuflags & AV_CPU_FLAG_SSE2 ? SWS_CPU_CAPS_SSE2 : 0); |
|
| 758 |
- flags |= (cpuflags & AV_CPU_FLAG_MMX ? SWS_CPU_CAPS_MMX : 0); |
|
| 759 |
- flags |= (cpuflags & AV_CPU_FLAG_MMX2 ? SWS_CPU_CAPS_MMX2 : 0); |
|
| 760 |
- flags |= (cpuflags & AV_CPU_FLAG_3DNOW ? SWS_CPU_CAPS_3DNOW : 0); |
|
| 761 |
-#endif /* CONFIG_RUNTIME_CPUDETECT */ |
|
| 762 |
- return flags; |
|
| 763 |
-} |
|
| 764 |
- |
|
| 765 | 744 |
SwsContext *sws_alloc_context(void) |
| 766 | 745 |
{
|
| 767 | 746 |
SwsContext *c= av_mallocz(sizeof(SwsContext)); |
| ... | ... |
@@ -782,16 +754,14 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter) |
| 782 | 782 |
int srcH= c->srcH; |
| 783 | 783 |
int dstW= c->dstW; |
| 784 | 784 |
int dstH= c->dstH; |
| 785 |
- int flags; |
|
| 785 |
+ int flags, cpu_flags; |
|
| 786 | 786 |
enum PixelFormat srcFormat= c->srcFormat; |
| 787 | 787 |
enum PixelFormat dstFormat= c->dstFormat; |
| 788 | 788 |
|
| 789 |
- flags= c->flags = update_flags_cpu(c->flags); |
|
| 790 |
-#if ARCH_X86 |
|
| 791 |
- if (flags & SWS_CPU_CAPS_MMX) |
|
| 792 |
- __asm__ volatile("emms\n\t"::: "memory");
|
|
| 793 |
-#endif |
|
| 794 |
- if (!rgb15to16) sws_rgb2rgb_init(flags); |
|
| 789 |
+ cpu_flags = av_get_cpu_flags(); |
|
| 790 |
+ flags = c->flags; |
|
| 791 |
+ emms_c(); |
|
| 792 |
+ if (!rgb15to16) sws_rgb2rgb_init(); |
|
| 795 | 793 |
|
| 796 | 794 |
unscaled = (srcW == dstW && srcH == dstH); |
| 797 | 795 |
|
| ... | ... |
@@ -884,7 +854,7 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter) |
| 884 | 884 |
} |
| 885 | 885 |
} |
| 886 | 886 |
|
| 887 |
- if (flags & SWS_CPU_CAPS_MMX2) {
|
|
| 887 |
+ if (HAVE_MMX2 && cpu_flags & AV_CPU_FLAG_MMX2) {
|
|
| 888 | 888 |
c->canMMX2BeUsed= (dstW >=srcW && (dstW&31)==0 && (srcW&15)==0) ? 1 : 0; |
| 889 | 889 |
if (!c->canMMX2BeUsed && dstW >=srcW && (srcW&15)==0 && (flags&SWS_FAST_BILINEAR)) {
|
| 890 | 890 |
if (flags&SWS_PRINT_INFO) |
| ... | ... |
@@ -910,7 +880,7 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter) |
| 910 | 910 |
c->chrXInc+= 20; |
| 911 | 911 |
} |
| 912 | 912 |
//we don't use the x86 asm scaler if MMX is available |
| 913 |
- else if (flags & SWS_CPU_CAPS_MMX) {
|
|
| 913 |
+ else if (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) {
|
|
| 914 | 914 |
c->lumXInc = ((srcW-2)<<16)/(dstW-2) - 20; |
| 915 | 915 |
c->chrXInc = ((c->chrSrcW-2)<<16)/(c->chrDstW-2) - 20; |
| 916 | 916 |
} |
| ... | ... |
@@ -918,7 +888,7 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter) |
| 918 | 918 |
|
| 919 | 919 |
/* precalculate horizontal scaler filter coefficients */ |
| 920 | 920 |
{
|
| 921 |
-#if ARCH_X86 && (HAVE_MMX2 || CONFIG_RUNTIME_CPUDETECT) |
|
| 921 |
+#if HAVE_MMX2 |
|
| 922 | 922 |
// can't downscale !!! |
| 923 | 923 |
if (c->canMMX2BeUsed && (flags & SWS_FAST_BILINEAR)) {
|
| 924 | 924 |
c->lumMmx2FilterCodeSize = initMMX2HScaler( dstW, c->lumXInc, NULL, NULL, NULL, 8); |
| ... | ... |
@@ -954,21 +924,21 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter) |
| 954 | 954 |
mprotect(c->chrMmx2FilterCode, c->chrMmx2FilterCodeSize, PROT_EXEC | PROT_READ); |
| 955 | 955 |
#endif |
| 956 | 956 |
} else |
| 957 |
-#endif /* ARCH_X86 && (HAVE_MMX2 || CONFIG_RUNTIME_CPUDETECT) */ |
|
| 957 |
+#endif /* HAVE_MMX2 */ |
|
| 958 | 958 |
{
|
| 959 | 959 |
const int filterAlign= |
| 960 |
- (flags & SWS_CPU_CAPS_MMX) ? 4 : |
|
| 961 |
- (flags & SWS_CPU_CAPS_ALTIVEC) ? 8 : |
|
| 960 |
+ (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) ? 4 : |
|
| 961 |
+ (HAVE_ALTIVEC && cpu_flags & AV_CPU_FLAG_ALTIVEC) ? 8 : |
|
| 962 | 962 |
1; |
| 963 | 963 |
|
| 964 | 964 |
if (initFilter(&c->hLumFilter, &c->hLumFilterPos, &c->hLumFilterSize, c->lumXInc, |
| 965 | 965 |
srcW , dstW, filterAlign, 1<<14, |
| 966 |
- (flags&SWS_BICUBLIN) ? (flags|SWS_BICUBIC) : flags, |
|
| 966 |
+ (flags&SWS_BICUBLIN) ? (flags|SWS_BICUBIC) : flags, cpu_flags, |
|
| 967 | 967 |
srcFilter->lumH, dstFilter->lumH, c->param) < 0) |
| 968 | 968 |
goto fail; |
| 969 | 969 |
if (initFilter(&c->hChrFilter, &c->hChrFilterPos, &c->hChrFilterSize, c->chrXInc, |
| 970 | 970 |
c->chrSrcW, c->chrDstW, filterAlign, 1<<14, |
| 971 |
- (flags&SWS_BICUBLIN) ? (flags|SWS_BILINEAR) : flags, |
|
| 971 |
+ (flags&SWS_BICUBLIN) ? (flags|SWS_BILINEAR) : flags, cpu_flags, |
|
| 972 | 972 |
srcFilter->chrH, dstFilter->chrH, c->param) < 0) |
| 973 | 973 |
goto fail; |
| 974 | 974 |
} |
| ... | ... |
@@ -977,18 +947,18 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter) |
| 977 | 977 |
/* precalculate vertical scaler filter coefficients */ |
| 978 | 978 |
{
|
| 979 | 979 |
const int filterAlign= |
| 980 |
- (flags & SWS_CPU_CAPS_MMX) && (flags & SWS_ACCURATE_RND) ? 2 : |
|
| 981 |
- (flags & SWS_CPU_CAPS_ALTIVEC) ? 8 : |
|
| 980 |
+ (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) && (flags & SWS_ACCURATE_RND) ? 2 : |
|
| 981 |
+ (HAVE_ALTIVEC && cpu_flags & AV_CPU_FLAG_ALTIVEC) ? 8 : |
|
| 982 | 982 |
1; |
| 983 | 983 |
|
| 984 | 984 |
if (initFilter(&c->vLumFilter, &c->vLumFilterPos, &c->vLumFilterSize, c->lumYInc, |
| 985 | 985 |
srcH , dstH, filterAlign, (1<<12), |
| 986 |
- (flags&SWS_BICUBLIN) ? (flags|SWS_BICUBIC) : flags, |
|
| 986 |
+ (flags&SWS_BICUBLIN) ? (flags|SWS_BICUBIC) : flags, cpu_flags, |
|
| 987 | 987 |
srcFilter->lumV, dstFilter->lumV, c->param) < 0) |
| 988 | 988 |
goto fail; |
| 989 | 989 |
if (initFilter(&c->vChrFilter, &c->vChrFilterPos, &c->vChrFilterSize, c->chrYInc, |
| 990 | 990 |
c->chrSrcH, c->chrDstH, filterAlign, (1<<12), |
| 991 |
- (flags&SWS_BICUBLIN) ? (flags|SWS_BILINEAR) : flags, |
|
| 991 |
+ (flags&SWS_BICUBLIN) ? (flags|SWS_BILINEAR) : flags, cpu_flags, |
|
| 992 | 992 |
srcFilter->chrV, dstFilter->chrV, c->param) < 0) |
| 993 | 993 |
goto fail; |
| 994 | 994 |
|
| ... | ... |
@@ -1082,13 +1052,13 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter) |
| 1082 | 1082 |
#endif |
| 1083 | 1083 |
sws_format_name(dstFormat)); |
| 1084 | 1084 |
|
| 1085 |
- if (flags & SWS_CPU_CAPS_MMX2) av_log(c, AV_LOG_INFO, "using MMX2\n"); |
|
| 1086 |
- else if (flags & SWS_CPU_CAPS_3DNOW) av_log(c, AV_LOG_INFO, "using 3DNOW\n"); |
|
| 1087 |
- else if (flags & SWS_CPU_CAPS_MMX) av_log(c, AV_LOG_INFO, "using MMX\n"); |
|
| 1088 |
- else if (flags & SWS_CPU_CAPS_ALTIVEC) av_log(c, AV_LOG_INFO, "using AltiVec\n"); |
|
| 1085 |
+ if (HAVE_MMX2 && cpu_flags & AV_CPU_FLAG_MMX2) av_log(c, AV_LOG_INFO, "using MMX2\n"); |
|
| 1086 |
+ else if (HAVE_AMD3DNOW && cpu_flags & AV_CPU_FLAG_3DNOW) av_log(c, AV_LOG_INFO, "using 3DNOW\n"); |
|
| 1087 |
+ else if (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) av_log(c, AV_LOG_INFO, "using MMX\n"); |
|
| 1088 |
+ else if (HAVE_ALTIVEC && cpu_flags & AV_CPU_FLAG_ALTIVEC) av_log(c, AV_LOG_INFO, "using AltiVec\n"); |
|
| 1089 | 1089 |
else av_log(c, AV_LOG_INFO, "using C\n"); |
| 1090 | 1090 |
|
| 1091 |
- if (flags & SWS_CPU_CAPS_MMX) {
|
|
| 1091 |
+ if (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) {
|
|
| 1092 | 1092 |
if (c->canMMX2BeUsed && (flags&SWS_FAST_BILINEAR)) |
| 1093 | 1093 |
av_log(c, AV_LOG_VERBOSE, "using FAST_BILINEAR MMX2 scaler for horizontal scaling\n"); |
| 1094 | 1094 |
else {
|
| ... | ... |
@@ -1107,7 +1077,7 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter) |
| 1107 | 1107 |
av_log(c, AV_LOG_VERBOSE, "using n-tap MMX scaler for horizontal chrominance scaling\n"); |
| 1108 | 1108 |
} |
| 1109 | 1109 |
} else {
|
| 1110 |
-#if ARCH_X86 |
|
| 1110 |
+#if HAVE_MMX |
|
| 1111 | 1111 |
av_log(c, AV_LOG_VERBOSE, "using x86 asm scaler for horizontal scaling\n"); |
| 1112 | 1112 |
#else |
| 1113 | 1113 |
if (flags & SWS_FAST_BILINEAR) |
| ... | ... |
@@ -1118,31 +1088,41 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter) |
| 1118 | 1118 |
} |
| 1119 | 1119 |
if (isPlanarYUV(dstFormat)) {
|
| 1120 | 1120 |
if (c->vLumFilterSize==1) |
| 1121 |
- av_log(c, AV_LOG_VERBOSE, "using 1-tap %s \"scaler\" for vertical scaling (YV12 like)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C"); |
|
| 1121 |
+ av_log(c, AV_LOG_VERBOSE, "using 1-tap %s \"scaler\" for vertical scaling (YV12 like)\n", |
|
| 1122 |
+ (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) ? "MMX" : "C"); |
|
| 1122 | 1123 |
else |
| 1123 |
- av_log(c, AV_LOG_VERBOSE, "using n-tap %s scaler for vertical scaling (YV12 like)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C"); |
|
| 1124 |
+ av_log(c, AV_LOG_VERBOSE, "using n-tap %s scaler for vertical scaling (YV12 like)\n", |
|
| 1125 |
+ (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) ? "MMX" : "C"); |
|
| 1124 | 1126 |
} else {
|
| 1125 | 1127 |
if (c->vLumFilterSize==1 && c->vChrFilterSize==2) |
| 1126 | 1128 |
av_log(c, AV_LOG_VERBOSE, "using 1-tap %s \"scaler\" for vertical luminance scaling (BGR)\n" |
| 1127 |
- " 2-tap scaler for vertical chrominance scaling (BGR)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C"); |
|
| 1129 |
+ " 2-tap scaler for vertical chrominance scaling (BGR)\n", |
|
| 1130 |
+ (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) ? "MMX" : "C"); |
|
| 1128 | 1131 |
else if (c->vLumFilterSize==2 && c->vChrFilterSize==2) |
| 1129 |
- av_log(c, AV_LOG_VERBOSE, "using 2-tap linear %s scaler for vertical scaling (BGR)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C"); |
|
| 1132 |
+ av_log(c, AV_LOG_VERBOSE, "using 2-tap linear %s scaler for vertical scaling (BGR)\n", |
|
| 1133 |
+ (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) ? "MMX" : "C"); |
|
| 1130 | 1134 |
else |
| 1131 |
- av_log(c, AV_LOG_VERBOSE, "using n-tap %s scaler for vertical scaling (BGR)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C"); |
|
| 1135 |
+ av_log(c, AV_LOG_VERBOSE, "using n-tap %s scaler for vertical scaling (BGR)\n", |
|
| 1136 |
+ (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) ? "MMX" : "C"); |
|
| 1132 | 1137 |
} |
| 1133 | 1138 |
|
| 1134 | 1139 |
if (dstFormat==PIX_FMT_BGR24) |
| 1135 | 1140 |
av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR24 converter\n", |
| 1136 |
- (flags & SWS_CPU_CAPS_MMX2) ? "MMX2" : ((flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C")); |
|
| 1141 |
+ (HAVE_MMX2 && cpu_flags & AV_CPU_FLAG_MMX2) ? "MMX2" : |
|
| 1142 |
+ ((HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) ? "MMX" : "C")); |
|
| 1137 | 1143 |
else if (dstFormat==PIX_FMT_RGB32) |
| 1138 |
- av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR32 converter\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C"); |
|
| 1144 |
+ av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR32 converter\n", |
|
| 1145 |
+ (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) ? "MMX" : "C"); |
|
| 1139 | 1146 |
else if (dstFormat==PIX_FMT_BGR565) |
| 1140 |
- av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR16 converter\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C"); |
|
| 1147 |
+ av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR16 converter\n", |
|
| 1148 |
+ (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) ? "MMX" : "C"); |
|
| 1141 | 1149 |
else if (dstFormat==PIX_FMT_BGR555) |
| 1142 |
- av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR15 converter\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C"); |
|
| 1150 |
+ av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR15 converter\n", |
|
| 1151 |
+ (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) ? "MMX" : "C"); |
|
| 1143 | 1152 |
else if (dstFormat == PIX_FMT_RGB444BE || dstFormat == PIX_FMT_RGB444LE || |
| 1144 | 1153 |
dstFormat == PIX_FMT_BGR444BE || dstFormat == PIX_FMT_BGR444LE) |
| 1145 |
- av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR12 converter\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C"); |
|
| 1154 |
+ av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR12 converter\n", |
|
| 1155 |
+ (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) ? "MMX" : "C"); |
|
| 1146 | 1156 |
|
| 1147 | 1157 |
av_log(c, AV_LOG_VERBOSE, "%dx%d -> %dx%d\n", srcW, srcH, dstW, dstH); |
| 1148 | 1158 |
av_log(c, AV_LOG_DEBUG, "lum srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n", |
| ... | ... |
@@ -1527,7 +1507,7 @@ void sws_freeContext(SwsContext *c) |
| 1527 | 1527 |
av_freep(&c->hLumFilterPos); |
| 1528 | 1528 |
av_freep(&c->hChrFilterPos); |
| 1529 | 1529 |
|
| 1530 |
-#if ARCH_X86 |
|
| 1530 |
+#if HAVE_MMX |
|
| 1531 | 1531 |
#ifdef MAP_ANONYMOUS |
| 1532 | 1532 |
if (c->lumMmx2FilterCode) munmap(c->lumMmx2FilterCode, c->lumMmx2FilterCodeSize); |
| 1533 | 1533 |
if (c->chrMmx2FilterCode) munmap(c->chrMmx2FilterCode, c->chrMmx2FilterCodeSize); |
| ... | ... |
@@ -1540,7 +1520,7 @@ void sws_freeContext(SwsContext *c) |
| 1540 | 1540 |
#endif |
| 1541 | 1541 |
c->lumMmx2FilterCode=NULL; |
| 1542 | 1542 |
c->chrMmx2FilterCode=NULL; |
| 1543 |
-#endif /* ARCH_X86 */ |
|
| 1543 |
+#endif /* HAVE_MMX */ |
|
| 1544 | 1544 |
|
| 1545 | 1545 |
av_freep(&c->yuvTable); |
| 1546 | 1546 |
|
| ... | ... |
@@ -1557,8 +1537,6 @@ struct SwsContext *sws_getCachedContext(struct SwsContext *context, |
| 1557 | 1557 |
if (!param) |
| 1558 | 1558 |
param = default_param; |
| 1559 | 1559 |
|
| 1560 |
- flags = update_flags_cpu(flags); |
|
| 1561 |
- |
|
| 1562 | 1560 |
if (context && |
| 1563 | 1561 |
(context->srcW != srcW || |
| 1564 | 1562 |
context->srcH != srcH || |
| ... | ... |
@@ -27,6 +27,7 @@ |
| 27 | 27 |
|
| 28 | 28 |
#include "config.h" |
| 29 | 29 |
#include "libavutil/x86_cpu.h" |
| 30 |
+#include "libavutil/cpu.h" |
|
| 30 | 31 |
#include "libavutil/bswap.h" |
| 31 | 32 |
#include "libswscale/rgb2rgb.h" |
| 32 | 33 |
#include "libswscale/swscale.h" |
| ... | ... |
@@ -122,16 +123,16 @@ DECLARE_ASM_CONST(8, uint64_t, blue_15mask) = 0x0000001f0000001fULL; |
| 122 | 122 |
32-bit C version, and and&add trick by Michael Niedermayer |
| 123 | 123 |
*/ |
| 124 | 124 |
|
| 125 |
-void rgb2rgb_init_x86(int flags) |
|
| 125 |
+void rgb2rgb_init_x86(void) |
|
| 126 | 126 |
{
|
| 127 |
-#if HAVE_MMX2 || HAVE_AMD3DNOW || HAVE_MMX |
|
| 128 |
- if (flags & SWS_CPU_CAPS_SSE2) |
|
| 129 |
- rgb2rgb_init_SSE2(); |
|
| 130 |
- else if (flags & SWS_CPU_CAPS_MMX2) |
|
| 131 |
- rgb2rgb_init_MMX2(); |
|
| 132 |
- else if (flags & SWS_CPU_CAPS_3DNOW) |
|
| 133 |
- rgb2rgb_init_3DNOW(); |
|
| 134 |
- else if (flags & SWS_CPU_CAPS_MMX) |
|
| 127 |
+ int cpu_flags = av_get_cpu_flags(); |
|
| 128 |
+ |
|
| 129 |
+ if (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) |
|
| 135 | 130 |
rgb2rgb_init_MMX(); |
| 136 |
-#endif /* HAVE_MMX2 || HAVE_AMD3DNOW || HAVE_MMX */ |
|
| 131 |
+ if (HAVE_AMD3DNOW && cpu_flags & AV_CPU_FLAG_3DNOW) |
|
| 132 |
+ rgb2rgb_init_3DNOW(); |
|
| 133 |
+ if (HAVE_MMX2 && cpu_flags & AV_CPU_FLAG_MMX2) |
|
| 134 |
+ rgb2rgb_init_MMX2(); |
|
| 135 |
+ if (HAVE_SSE && cpu_flags & AV_CPU_FLAG_SSE2) |
|
| 136 |
+ rgb2rgb_init_SSE2(); |
|
| 137 | 137 |
} |
| ... | ... |
@@ -22,24 +22,15 @@ |
| 22 | 22 |
|
| 23 | 23 |
#undef REAL_MOVNTQ |
| 24 | 24 |
#undef MOVNTQ |
| 25 |
-#undef PAVGB |
|
| 26 | 25 |
#undef PREFETCH |
| 27 | 26 |
|
| 28 |
-#if COMPILE_TEMPLATE_AMD3DNOW |
|
| 29 |
-#define PREFETCH "prefetch" |
|
| 30 |
-#elif COMPILE_TEMPLATE_MMX2 |
|
| 27 |
+#if COMPILE_TEMPLATE_MMX2 |
|
| 31 | 28 |
#define PREFETCH "prefetchnta" |
| 32 | 29 |
#else |
| 33 | 30 |
#define PREFETCH " # nop" |
| 34 | 31 |
#endif |
| 35 | 32 |
|
| 36 | 33 |
#if COMPILE_TEMPLATE_MMX2 |
| 37 |
-#define PAVGB(a,b) "pavgb " #a ", " #b " \n\t" |
|
| 38 |
-#elif COMPILE_TEMPLATE_AMD3DNOW |
|
| 39 |
-#define PAVGB(a,b) "pavgusb " #a ", " #b " \n\t" |
|
| 40 |
-#endif |
|
| 41 |
- |
|
| 42 |
-#if COMPILE_TEMPLATE_MMX2 |
|
| 43 | 34 |
#define REAL_MOVNTQ(a,b) "movntq " #a ", " #b " \n\t" |
| 44 | 35 |
#else |
| 45 | 36 |
#define REAL_MOVNTQ(a,b) "movq " #a ", " #b " \n\t" |
| ... | ... |
@@ -709,62 +700,6 @@ |
| 709 | 709 |
" jb 1b \n\t" |
| 710 | 710 |
#define WRITERGB15(dst, dstw, index) REAL_WRITERGB15(dst, dstw, index) |
| 711 | 711 |
|
| 712 |
-#define WRITEBGR24OLD(dst, dstw, index) \ |
|
| 713 |
- /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\ |
|
| 714 |
- "movq %%mm2, %%mm1 \n\t" /* B */\ |
|
| 715 |
- "movq %%mm5, %%mm6 \n\t" /* R */\ |
|
| 716 |
- "punpcklbw %%mm4, %%mm2 \n\t" /* GBGBGBGB 0 */\ |
|
| 717 |
- "punpcklbw %%mm7, %%mm5 \n\t" /* 0R0R0R0R 0 */\ |
|
| 718 |
- "punpckhbw %%mm4, %%mm1 \n\t" /* GBGBGBGB 2 */\ |
|
| 719 |
- "punpckhbw %%mm7, %%mm6 \n\t" /* 0R0R0R0R 2 */\ |
|
| 720 |
- "movq %%mm2, %%mm0 \n\t" /* GBGBGBGB 0 */\ |
|
| 721 |
- "movq %%mm1, %%mm3 \n\t" /* GBGBGBGB 2 */\ |
|
| 722 |
- "punpcklwd %%mm5, %%mm0 \n\t" /* 0RGB0RGB 0 */\ |
|
| 723 |
- "punpckhwd %%mm5, %%mm2 \n\t" /* 0RGB0RGB 1 */\ |
|
| 724 |
- "punpcklwd %%mm6, %%mm1 \n\t" /* 0RGB0RGB 2 */\ |
|
| 725 |
- "punpckhwd %%mm6, %%mm3 \n\t" /* 0RGB0RGB 3 */\ |
|
| 726 |
-\ |
|
| 727 |
- "movq %%mm0, %%mm4 \n\t" /* 0RGB0RGB 0 */\ |
|
| 728 |
- "psrlq $8, %%mm0 \n\t" /* 00RGB0RG 0 */\ |
|
| 729 |
- "pand "MANGLE(bm00000111)", %%mm4 \n\t" /* 00000RGB 0 */\ |
|
| 730 |
- "pand "MANGLE(bm11111000)", %%mm0 \n\t" /* 00RGB000 0.5 */\ |
|
| 731 |
- "por %%mm4, %%mm0 \n\t" /* 00RGBRGB 0 */\ |
|
| 732 |
- "movq %%mm2, %%mm4 \n\t" /* 0RGB0RGB 1 */\ |
|
| 733 |
- "psllq $48, %%mm2 \n\t" /* GB000000 1 */\ |
|
| 734 |
- "por %%mm2, %%mm0 \n\t" /* GBRGBRGB 0 */\ |
|
| 735 |
-\ |
|
| 736 |
- "movq %%mm4, %%mm2 \n\t" /* 0RGB0RGB 1 */\ |
|
| 737 |
- "psrld $16, %%mm4 \n\t" /* 000R000R 1 */\ |
|
| 738 |
- "psrlq $24, %%mm2 \n\t" /* 0000RGB0 1.5 */\ |
|
| 739 |
- "por %%mm4, %%mm2 \n\t" /* 000RRGBR 1 */\ |
|
| 740 |
- "pand "MANGLE(bm00001111)", %%mm2 \n\t" /* 0000RGBR 1 */\ |
|
| 741 |
- "movq %%mm1, %%mm4 \n\t" /* 0RGB0RGB 2 */\ |
|
| 742 |
- "psrlq $8, %%mm1 \n\t" /* 00RGB0RG 2 */\ |
|
| 743 |
- "pand "MANGLE(bm00000111)", %%mm4 \n\t" /* 00000RGB 2 */\ |
|
| 744 |
- "pand "MANGLE(bm11111000)", %%mm1 \n\t" /* 00RGB000 2.5 */\ |
|
| 745 |
- "por %%mm4, %%mm1 \n\t" /* 00RGBRGB 2 */\ |
|
| 746 |
- "movq %%mm1, %%mm4 \n\t" /* 00RGBRGB 2 */\ |
|
| 747 |
- "psllq $32, %%mm1 \n\t" /* BRGB0000 2 */\ |
|
| 748 |
- "por %%mm1, %%mm2 \n\t" /* BRGBRGBR 1 */\ |
|
| 749 |
-\ |
|
| 750 |
- "psrlq $32, %%mm4 \n\t" /* 000000RG 2.5 */\ |
|
| 751 |
- "movq %%mm3, %%mm5 \n\t" /* 0RGB0RGB 3 */\ |
|
| 752 |
- "psrlq $8, %%mm3 \n\t" /* 00RGB0RG 3 */\ |
|
| 753 |
- "pand "MANGLE(bm00000111)", %%mm5 \n\t" /* 00000RGB 3 */\ |
|
| 754 |
- "pand "MANGLE(bm11111000)", %%mm3 \n\t" /* 00RGB000 3.5 */\ |
|
| 755 |
- "por %%mm5, %%mm3 \n\t" /* 00RGBRGB 3 */\ |
|
| 756 |
- "psllq $16, %%mm3 \n\t" /* RGBRGB00 3 */\ |
|
| 757 |
- "por %%mm4, %%mm3 \n\t" /* RGBRGBRG 2.5 */\ |
|
| 758 |
-\ |
|
| 759 |
- MOVNTQ(%%mm0, (dst))\ |
|
| 760 |
- MOVNTQ(%%mm2, 8(dst))\ |
|
| 761 |
- MOVNTQ(%%mm3, 16(dst))\ |
|
| 762 |
- "add $24, "#dst" \n\t"\ |
|
| 763 |
-\ |
|
| 764 |
- "add $8, "#index" \n\t"\ |
|
| 765 |
- "cmp "#dstw", "#index" \n\t"\ |
|
| 766 |
- " jb 1b \n\t" |
|
| 767 |
- |
|
| 768 | 712 |
#define WRITEBGR24MMX(dst, dstw, index) \ |
| 769 | 713 |
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\ |
| 770 | 714 |
"movq %%mm2, %%mm1 \n\t" /* B */\ |
| ... | ... |
@@ -896,7 +831,6 @@ static inline void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter, con |
| 896 | 896 |
const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize, const int16_t **alpSrc, |
| 897 | 897 |
uint8_t *dest, uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, long dstW, long chrDstW) |
| 898 | 898 |
{
|
| 899 |
- if(!(c->flags & SWS_BITEXACT)) {
|
|
| 900 | 899 |
if (c->flags & SWS_ACCURATE_RND) {
|
| 901 | 900 |
if (uDest) {
|
| 902 | 901 |
YSCALEYUV2YV12X_ACCURATE( "0", CHR_MMX_FILTER_OFFSET, uDest, chrDstW) |
| ... | ... |
@@ -918,27 +852,11 @@ static inline void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter, con |
| 918 | 918 |
|
| 919 | 919 |
YSCALEYUV2YV12X("0", LUM_MMX_FILTER_OFFSET, dest, dstW)
|
| 920 | 920 |
} |
| 921 |
- return; |
|
| 922 |
- } |
|
| 923 |
- yuv2yuvXinC(lumFilter, lumSrc, lumFilterSize, |
|
| 924 |
- chrFilter, chrSrc, chrFilterSize, |
|
| 925 |
- alpSrc, dest, uDest, vDest, aDest, dstW, chrDstW); |
|
| 926 |
-} |
|
| 927 |
- |
|
| 928 |
-static inline void RENAME(yuv2nv12X)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, |
|
| 929 |
- const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize, |
|
| 930 |
- uint8_t *dest, uint8_t *uDest, int dstW, int chrDstW, enum PixelFormat dstFormat) |
|
| 931 |
-{
|
|
| 932 |
- yuv2nv12XinC(lumFilter, lumSrc, lumFilterSize, |
|
| 933 |
- chrFilter, chrSrc, chrFilterSize, |
|
| 934 |
- dest, uDest, dstW, chrDstW, dstFormat); |
|
| 935 | 921 |
} |
| 936 | 922 |
|
| 937 | 923 |
static inline void RENAME(yuv2yuv1)(SwsContext *c, const int16_t *lumSrc, const int16_t *chrSrc, const int16_t *alpSrc, |
| 938 | 924 |
uint8_t *dest, uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, long dstW, long chrDstW) |
| 939 | 925 |
{
|
| 940 |
- int i; |
|
| 941 |
- if(!(c->flags & SWS_BITEXACT)) {
|
|
| 942 | 926 |
long p= 4; |
| 943 | 927 |
const int16_t *src[4]= {alpSrc + dstW, lumSrc + dstW, chrSrc + chrDstW, chrSrc + VOFW + chrDstW};
|
| 944 | 928 |
uint8_t *dst[4]= {aDest, dest, uDest, vDest};
|
| ... | ... |
@@ -967,40 +885,6 @@ static inline void RENAME(yuv2yuv1)(SwsContext *c, const int16_t *lumSrc, const |
| 967 | 967 |
} |
| 968 | 968 |
} |
| 969 | 969 |
} |
| 970 |
- return; |
|
| 971 |
- } |
|
| 972 |
- for (i=0; i<dstW; i++) {
|
|
| 973 |
- int val= (lumSrc[i]+64)>>7; |
|
| 974 |
- |
|
| 975 |
- if (val&256) {
|
|
| 976 |
- if (val<0) val=0; |
|
| 977 |
- else val=255; |
|
| 978 |
- } |
|
| 979 |
- |
|
| 980 |
- dest[i]= val; |
|
| 981 |
- } |
|
| 982 |
- |
|
| 983 |
- if (uDest) |
|
| 984 |
- for (i=0; i<chrDstW; i++) {
|
|
| 985 |
- int u=(chrSrc[i ]+64)>>7; |
|
| 986 |
- int v=(chrSrc[i + VOFW]+64)>>7; |
|
| 987 |
- |
|
| 988 |
- if ((u|v)&256) {
|
|
| 989 |
- if (u<0) u=0; |
|
| 990 |
- else if (u>255) u=255; |
|
| 991 |
- if (v<0) v=0; |
|
| 992 |
- else if (v>255) v=255; |
|
| 993 |
- } |
|
| 994 |
- |
|
| 995 |
- uDest[i]= u; |
|
| 996 |
- vDest[i]= v; |
|
| 997 |
- } |
|
| 998 |
- |
|
| 999 |
- if (CONFIG_SWSCALE_ALPHA && aDest) |
|
| 1000 |
- for (i=0; i<dstW; i++) {
|
|
| 1001 |
- int val= (alpSrc[i]+64)>>7; |
|
| 1002 |
- aDest[i]= av_clip_uint8(val); |
|
| 1003 |
- } |
|
| 1004 | 970 |
} |
| 1005 | 971 |
|
| 1006 | 972 |
|
| ... | ... |
@@ -1013,7 +897,7 @@ static inline void RENAME(yuv2packedX)(SwsContext *c, const int16_t *lumFilter, |
| 1013 | 1013 |
{
|
| 1014 | 1014 |
x86_reg dummy=0; |
| 1015 | 1015 |
x86_reg dstW_reg = dstW; |
| 1016 |
- if(!(c->flags & SWS_BITEXACT)) {
|
|
| 1016 |
+ |
|
| 1017 | 1017 |
if (c->flags & SWS_ACCURATE_RND) {
|
| 1018 | 1018 |
switch(c->dstFormat) {
|
| 1019 | 1019 |
case PIX_FMT_RGB32: |
| ... | ... |
@@ -1170,7 +1054,7 @@ static inline void RENAME(yuv2packedX)(SwsContext *c, const int16_t *lumFilter, |
| 1170 | 1170 |
return; |
| 1171 | 1171 |
} |
| 1172 | 1172 |
} |
| 1173 |
- } |
|
| 1173 |
+ |
|
| 1174 | 1174 |
yuv2packedXinC(c, lumFilter, lumSrc, lumFilterSize, |
| 1175 | 1175 |
chrFilter, chrSrc, chrFilterSize, |
| 1176 | 1176 |
alpSrc, dest, dstW, dstY); |
| ... | ... |
@@ -1182,11 +1066,6 @@ static inline void RENAME(yuv2packedX)(SwsContext *c, const int16_t *lumFilter, |
| 1182 | 1182 |
static inline void RENAME(yuv2packed2)(SwsContext *c, const uint16_t *buf0, const uint16_t *buf1, const uint16_t *uvbuf0, const uint16_t *uvbuf1, |
| 1183 | 1183 |
const uint16_t *abuf0, const uint16_t *abuf1, uint8_t *dest, int dstW, int yalpha, int uvalpha, int y) |
| 1184 | 1184 |
{
|
| 1185 |
- int yalpha1=4095- yalpha; |
|
| 1186 |
- int uvalpha1=4095-uvalpha; |
|
| 1187 |
- int i; |
|
| 1188 |
- |
|
| 1189 |
- if(!(c->flags & SWS_BITEXACT)) {
|
|
| 1190 | 1185 |
switch(c->dstFormat) {
|
| 1191 | 1186 |
//Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :( |
| 1192 | 1187 |
case PIX_FMT_RGB32: |
| ... | ... |
@@ -1317,10 +1196,10 @@ static inline void RENAME(yuv2packed2)(SwsContext *c, const uint16_t *buf0, cons |
| 1317 | 1317 |
"a" (&c->redDither) |
| 1318 | 1318 |
); |
| 1319 | 1319 |
return; |
| 1320 |
- default: break; |
|
| 1321 | 1320 |
} |
| 1322 |
- } |
|
| 1323 |
- YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB2_C, YSCALE_YUV_2_PACKED2_C(void,0), YSCALE_YUV_2_GRAY16_2_C, YSCALE_YUV_2_MONO2_C) |
|
| 1321 |
+ |
|
| 1322 |
+ yuv2packed2_c(c, buf0, buf1, uvbuf0, uvbuf1, abuf0, abuf1, |
|
| 1323 |
+ dest, dstW, yalpha, uvalpha, y); |
|
| 1324 | 1324 |
} |
| 1325 | 1325 |
|
| 1326 | 1326 |
/** |
| ... | ... |
@@ -1329,18 +1208,13 @@ static inline void RENAME(yuv2packed2)(SwsContext *c, const uint16_t *buf0, cons |
| 1329 | 1329 |
static inline void RENAME(yuv2packed1)(SwsContext *c, const uint16_t *buf0, const uint16_t *uvbuf0, const uint16_t *uvbuf1, |
| 1330 | 1330 |
const uint16_t *abuf0, uint8_t *dest, int dstW, int uvalpha, enum PixelFormat dstFormat, int flags, int y) |
| 1331 | 1331 |
{
|
| 1332 |
- const int yalpha1=0; |
|
| 1333 |
- int i; |
|
| 1334 |
- |
|
| 1335 |
- const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1 |
|
| 1336 |
- const int yalpha= 4096; //FIXME ... |
|
| 1332 |
+ const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1 |
|
| 1337 | 1333 |
|
| 1338 |
- if (flags&SWS_FULL_CHR_H_INT) {
|
|
| 1339 |
- c->yuv2packed2(c, buf0, buf0, uvbuf0, uvbuf1, abuf0, abuf0, dest, dstW, 0, uvalpha, y); |
|
| 1340 |
- return; |
|
| 1341 |
- } |
|
| 1334 |
+ if (flags&SWS_FULL_CHR_H_INT) {
|
|
| 1335 |
+ c->yuv2packed2(c, buf0, buf0, uvbuf0, uvbuf1, abuf0, abuf0, dest, dstW, 0, uvalpha, y); |
|
| 1336 |
+ return; |
|
| 1337 |
+ } |
|
| 1342 | 1338 |
|
| 1343 |
- if(!(flags & SWS_BITEXACT)) {
|
|
| 1344 | 1339 |
if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
|
| 1345 | 1340 |
switch(dstFormat) {
|
| 1346 | 1341 |
case PIX_FMT_RGB32: |
| ... | ... |
@@ -1554,12 +1428,9 @@ static inline void RENAME(yuv2packed1)(SwsContext *c, const uint16_t *buf0, cons |
| 1554 | 1554 |
return; |
| 1555 | 1555 |
} |
| 1556 | 1556 |
} |
| 1557 |
- } |
|
| 1558 |
- if (uvalpha < 2048) {
|
|
| 1559 |
- YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1_C, YSCALE_YUV_2_PACKED1_C(void,0), YSCALE_YUV_2_GRAY16_1_C, YSCALE_YUV_2_MONO2_C) |
|
| 1560 |
- } else {
|
|
| 1561 |
- YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1B_C, YSCALE_YUV_2_PACKED1B_C(void,0), YSCALE_YUV_2_GRAY16_1_C, YSCALE_YUV_2_MONO2_C) |
|
| 1562 |
- } |
|
| 1557 |
+ |
|
| 1558 |
+ yuv2packed1_c(c, buf0, uvbuf0, uvbuf1, abuf0, dest, |
|
| 1559 |
+ dstW, uvalpha, dstFormat, flags, y); |
|
| 1563 | 1560 |
} |
| 1564 | 1561 |
|
| 1565 | 1562 |
//FIXME yuy2* can read up to 7 samples too much |
| ... | ... |
@@ -1866,20 +1737,6 @@ static inline void RENAME(bgr24ToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t |
| 1866 | 1866 |
assert(src1 == src2); |
| 1867 | 1867 |
} |
| 1868 | 1868 |
|
| 1869 |
-static inline void RENAME(bgr24ToUV_half)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused) |
|
| 1870 |
-{
|
|
| 1871 |
- int i; |
|
| 1872 |
- for (i=0; i<width; i++) {
|
|
| 1873 |
- int b= src1[6*i + 0] + src1[6*i + 3]; |
|
| 1874 |
- int g= src1[6*i + 1] + src1[6*i + 4]; |
|
| 1875 |
- int r= src1[6*i + 2] + src1[6*i + 5]; |
|
| 1876 |
- |
|
| 1877 |
- dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1); |
|
| 1878 |
- dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1); |
|
| 1879 |
- } |
|
| 1880 |
- assert(src1 == src2); |
|
| 1881 |
-} |
|
| 1882 |
- |
|
| 1883 | 1869 |
static inline void RENAME(rgb24ToY)(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused) |
| 1884 | 1870 |
{
|
| 1885 | 1871 |
RENAME(bgr24ToY_mmx)(dst, src, width, PIX_FMT_RGB24); |
| ... | ... |
@@ -1891,20 +1748,6 @@ static inline void RENAME(rgb24ToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t |
| 1891 | 1891 |
RENAME(bgr24ToUV_mmx)(dstU, dstV, src1, width, PIX_FMT_RGB24); |
| 1892 | 1892 |
} |
| 1893 | 1893 |
|
| 1894 |
-static inline void RENAME(rgb24ToUV_half)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused) |
|
| 1895 |
-{
|
|
| 1896 |
- int i; |
|
| 1897 |
- assert(src1==src2); |
|
| 1898 |
- for (i=0; i<width; i++) {
|
|
| 1899 |
- int r= src1[6*i + 0] + src1[6*i + 3]; |
|
| 1900 |
- int g= src1[6*i + 1] + src1[6*i + 4]; |
|
| 1901 |
- int b= src1[6*i + 2] + src1[6*i + 5]; |
|
| 1902 |
- |
|
| 1903 |
- dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1); |
|
| 1904 |
- dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1); |
|
| 1905 |
- } |
|
| 1906 |
-} |
|
| 1907 |
- |
|
| 1908 | 1894 |
|
| 1909 | 1895 |
// bilinear / bicubic scaling |
| 1910 | 1896 |
static inline void RENAME(hScale)(int16_t *dst, int dstW, const uint8_t *src, int srcW, int xInc, |
| ... | ... |
@@ -2061,50 +1904,168 @@ static inline void RENAME(hScale)(int16_t *dst, int dstW, const uint8_t *src, in |
| 2061 | 2061 |
} |
| 2062 | 2062 |
} |
| 2063 | 2063 |
|
| 2064 |
-//FIXME all pal and rgb srcFormats could do this convertion as well |
|
| 2065 |
-//FIXME all scalers more complex than bilinear could do half of this transform |
|
| 2066 |
-static void RENAME(chrRangeToJpeg)(int16_t *dst, int width) |
|
| 2064 |
+static inline void RENAME(hScale16)(int16_t *dst, int dstW, const uint16_t *src, int srcW, int xInc, |
|
| 2065 |
+ const int16_t *filter, const int16_t *filterPos, long filterSize, int shift) |
|
| 2067 | 2066 |
{
|
| 2068 |
- int i; |
|
| 2069 |
- for (i = 0; i < width; i++) {
|
|
| 2070 |
- dst[i ] = (FFMIN(dst[i ],30775)*4663 - 9289992)>>12; //-264 |
|
| 2071 |
- dst[i+VOFW] = (FFMIN(dst[i+VOFW],30775)*4663 - 9289992)>>12; //-264 |
|
| 2072 |
- } |
|
| 2073 |
-} |
|
| 2074 |
-static void RENAME(chrRangeFromJpeg)(int16_t *dst, int width) |
|
| 2075 |
-{
|
|
| 2076 |
- int i; |
|
| 2077 |
- for (i = 0; i < width; i++) {
|
|
| 2078 |
- dst[i ] = (dst[i ]*1799 + 4081085)>>11; //1469 |
|
| 2079 |
- dst[i+VOFW] = (dst[i+VOFW]*1799 + 4081085)>>11; //1469 |
|
| 2067 |
+ int i, j; |
|
| 2068 |
+ |
|
| 2069 |
+ assert(filterSize % 4 == 0 && filterSize>0); |
|
| 2070 |
+ if (filterSize==4 && shift<15) { // Always true for upscaling, sometimes for down, too.
|
|
| 2071 |
+ x86_reg counter= -2*dstW; |
|
| 2072 |
+ filter-= counter*2; |
|
| 2073 |
+ filterPos-= counter/2; |
|
| 2074 |
+ dst-= counter/2; |
|
| 2075 |
+ __asm__ volatile( |
|
| 2076 |
+ "movd %5, %%mm7 \n\t" |
|
| 2077 |
+#if defined(PIC) |
|
| 2078 |
+ "push %%"REG_b" \n\t" |
|
| 2079 |
+#endif |
|
| 2080 |
+ "push %%"REG_BP" \n\t" // we use 7 regs here ... |
|
| 2081 |
+ "mov %%"REG_a", %%"REG_BP" \n\t" |
|
| 2082 |
+ ".p2align 4 \n\t" |
|
| 2083 |
+ "1: \n\t" |
|
| 2084 |
+ "movzwl (%2, %%"REG_BP"), %%eax \n\t" |
|
| 2085 |
+ "movzwl 2(%2, %%"REG_BP"), %%ebx \n\t" |
|
| 2086 |
+ "movq (%1, %%"REG_BP", 4), %%mm1 \n\t" |
|
| 2087 |
+ "movq 8(%1, %%"REG_BP", 4), %%mm3 \n\t" |
|
| 2088 |
+ "movq (%3, %%"REG_a", 2), %%mm0 \n\t" |
|
| 2089 |
+ "movq (%3, %%"REG_b", 2), %%mm2 \n\t" |
|
| 2090 |
+ "pmaddwd %%mm1, %%mm0 \n\t" |
|
| 2091 |
+ "pmaddwd %%mm2, %%mm3 \n\t" |
|
| 2092 |
+ "movq %%mm0, %%mm4 \n\t" |
|
| 2093 |
+ "punpckldq %%mm3, %%mm0 \n\t" |
|
| 2094 |
+ "punpckhdq %%mm3, %%mm4 \n\t" |
|
| 2095 |
+ "paddd %%mm4, %%mm0 \n\t" |
|
| 2096 |
+ "psrad %%mm7, %%mm0 \n\t" |
|
| 2097 |
+ "packssdw %%mm0, %%mm0 \n\t" |
|
| 2098 |
+ "movd %%mm0, (%4, %%"REG_BP") \n\t" |
|
| 2099 |
+ "add $4, %%"REG_BP" \n\t" |
|
| 2100 |
+ " jnc 1b \n\t" |
|
| 2101 |
+ |
|
| 2102 |
+ "pop %%"REG_BP" \n\t" |
|
| 2103 |
+#if defined(PIC) |
|
| 2104 |
+ "pop %%"REG_b" \n\t" |
|
| 2105 |
+#endif |
|
| 2106 |
+ : "+a" (counter) |
|
| 2107 |
+ : "c" (filter), "d" (filterPos), "S" (src), "D" (dst), "m"(shift) |
|
| 2108 |
+#if !defined(PIC) |
|
| 2109 |
+ : "%"REG_b |
|
| 2110 |
+#endif |
|
| 2111 |
+ ); |
|
| 2112 |
+ } else if (filterSize==8 && shift<15) {
|
|
| 2113 |
+ x86_reg counter= -2*dstW; |
|
| 2114 |
+ filter-= counter*4; |
|
| 2115 |
+ filterPos-= counter/2; |
|
| 2116 |
+ dst-= counter/2; |
|
| 2117 |
+ __asm__ volatile( |
|
| 2118 |
+ "movd %5, %%mm7 \n\t" |
|
| 2119 |
+#if defined(PIC) |
|
| 2120 |
+ "push %%"REG_b" \n\t" |
|
| 2121 |
+#endif |
|
| 2122 |
+ "push %%"REG_BP" \n\t" // we use 7 regs here ... |
|
| 2123 |
+ "mov %%"REG_a", %%"REG_BP" \n\t" |
|
| 2124 |
+ ".p2align 4 \n\t" |
|
| 2125 |
+ "1: \n\t" |
|
| 2126 |
+ "movzwl (%2, %%"REG_BP"), %%eax \n\t" |
|
| 2127 |
+ "movzwl 2(%2, %%"REG_BP"), %%ebx \n\t" |
|
| 2128 |
+ "movq (%1, %%"REG_BP", 8), %%mm1 \n\t" |
|
| 2129 |
+ "movq 16(%1, %%"REG_BP", 8), %%mm3 \n\t" |
|
| 2130 |
+ "movq (%3, %%"REG_a", 2), %%mm0 \n\t" |
|
| 2131 |
+ "movq (%3, %%"REG_b", 2), %%mm2 \n\t" |
|
| 2132 |
+ "pmaddwd %%mm1, %%mm0 \n\t" |
|
| 2133 |
+ "pmaddwd %%mm2, %%mm3 \n\t" |
|
| 2134 |
+ |
|
| 2135 |
+ "movq 8(%1, %%"REG_BP", 8), %%mm1 \n\t" |
|
| 2136 |
+ "movq 24(%1, %%"REG_BP", 8), %%mm5 \n\t" |
|
| 2137 |
+ "movq 8(%3, %%"REG_a", 2), %%mm4 \n\t" |
|
| 2138 |
+ "movq 8(%3, %%"REG_b", 2), %%mm2 \n\t" |
|
| 2139 |
+ "pmaddwd %%mm1, %%mm4 \n\t" |
|
| 2140 |
+ "pmaddwd %%mm2, %%mm5 \n\t" |
|
| 2141 |
+ "paddd %%mm4, %%mm0 \n\t" |
|
| 2142 |
+ "paddd %%mm5, %%mm3 \n\t" |
|
| 2143 |
+ "movq %%mm0, %%mm4 \n\t" |
|
| 2144 |
+ "punpckldq %%mm3, %%mm0 \n\t" |
|
| 2145 |
+ "punpckhdq %%mm3, %%mm4 \n\t" |
|
| 2146 |
+ "paddd %%mm4, %%mm0 \n\t" |
|
| 2147 |
+ "psrad %%mm7, %%mm0 \n\t" |
|
| 2148 |
+ "packssdw %%mm0, %%mm0 \n\t" |
|
| 2149 |
+ "movd %%mm0, (%4, %%"REG_BP") \n\t" |
|
| 2150 |
+ "add $4, %%"REG_BP" \n\t" |
|
| 2151 |
+ " jnc 1b \n\t" |
|
| 2152 |
+ |
|
| 2153 |
+ "pop %%"REG_BP" \n\t" |
|
| 2154 |
+#if defined(PIC) |
|
| 2155 |
+ "pop %%"REG_b" \n\t" |
|
| 2156 |
+#endif |
|
| 2157 |
+ : "+a" (counter) |
|
| 2158 |
+ : "c" (filter), "d" (filterPos), "S" (src), "D" (dst), "m"(shift) |
|
| 2159 |
+#if !defined(PIC) |
|
| 2160 |
+ : "%"REG_b |
|
| 2161 |
+#endif |
|
| 2162 |
+ ); |
|
| 2163 |
+ } else if (shift<15){
|
|
| 2164 |
+ const uint16_t *offset = src+filterSize; |
|
| 2165 |
+ x86_reg counter= -2*dstW; |
|
| 2166 |
+ //filter-= counter*filterSize/2; |
|
| 2167 |
+ filterPos-= counter/2; |
|
| 2168 |
+ dst-= counter/2; |
|
| 2169 |
+ __asm__ volatile( |
|
| 2170 |
+ "movd %7, %%mm7 \n\t" |
|
| 2171 |
+ ".p2align 4 \n\t" |
|
| 2172 |
+ "1: \n\t" |
|
| 2173 |
+ "mov %2, %%"REG_c" \n\t" |
|
| 2174 |
+ "movzwl (%%"REG_c", %0), %%eax \n\t" |
|
| 2175 |
+ "movzwl 2(%%"REG_c", %0), %%edx \n\t" |
|
| 2176 |
+ "mov %5, %%"REG_c" \n\t" |
|
| 2177 |
+ "pxor %%mm4, %%mm4 \n\t" |
|
| 2178 |
+ "pxor %%mm5, %%mm5 \n\t" |
|
| 2179 |
+ "2: \n\t" |
|
| 2180 |
+ "movq (%1), %%mm1 \n\t" |
|
| 2181 |
+ "movq (%1, %6), %%mm3 \n\t" |
|
| 2182 |
+ "movq (%%"REG_c", %%"REG_a", 2), %%mm0 \n\t" |
|
| 2183 |
+ "movq (%%"REG_c", %%"REG_d", 2), %%mm2 \n\t" |
|
| 2184 |
+ "pmaddwd %%mm1, %%mm0 \n\t" |
|
| 2185 |
+ "pmaddwd %%mm2, %%mm3 \n\t" |
|
| 2186 |
+ "paddd %%mm3, %%mm5 \n\t" |
|
| 2187 |
+ "paddd %%mm0, %%mm4 \n\t" |
|
| 2188 |
+ "add $8, %1 \n\t" |
|
| 2189 |
+ "add $8, %%"REG_c" \n\t" |
|
| 2190 |
+ "cmp %4, %%"REG_c" \n\t" |
|
| 2191 |
+ " jb 2b \n\t" |
|
| 2192 |
+ "add %6, %1 \n\t" |
|
| 2193 |
+ "movq %%mm4, %%mm0 \n\t" |
|
| 2194 |
+ "punpckldq %%mm5, %%mm4 \n\t" |
|
| 2195 |
+ "punpckhdq %%mm5, %%mm0 \n\t" |
|
| 2196 |
+ "paddd %%mm0, %%mm4 \n\t" |
|
| 2197 |
+ "psrad %%mm7, %%mm4 \n\t" |
|
| 2198 |
+ "packssdw %%mm4, %%mm4 \n\t" |
|
| 2199 |
+ "mov %3, %%"REG_a" \n\t" |
|
| 2200 |
+ "movd %%mm4, (%%"REG_a", %0) \n\t" |
|
| 2201 |
+ "add $4, %0 \n\t" |
|
| 2202 |
+ " jnc 1b \n\t" |
|
| 2203 |
+ |
|
| 2204 |
+ : "+r" (counter), "+r" (filter) |
|
| 2205 |
+ : "m" (filterPos), "m" (dst), "m"(offset), |
|
| 2206 |
+ "m" (src), "r" ((x86_reg)filterSize*2), "m"(shift) |
|
| 2207 |
+ : "%"REG_a, "%"REG_c, "%"REG_d |
|
| 2208 |
+ ); |
|
| 2209 |
+ } else |
|
| 2210 |
+ for (i=0; i<dstW; i++) {
|
|
| 2211 |
+ int srcPos= filterPos[i]; |
|
| 2212 |
+ int val=0; |
|
| 2213 |
+ for (j=0; j<filterSize; j++) {
|
|
| 2214 |
+ val += ((int)src[srcPos + j])*filter[filterSize*i + j]; |
|
| 2215 |
+ } |
|
| 2216 |
+ dst[i] = FFMIN(val>>shift, (1<<15)-1); // the cubic equation does overflow ... |
|
| 2080 | 2217 |
} |
| 2081 | 2218 |
} |
| 2082 |
-static void RENAME(lumRangeToJpeg)(int16_t *dst, int width) |
|
| 2083 |
-{
|
|
| 2084 |
- int i; |
|
| 2085 |
- for (i = 0; i < width; i++) |
|
| 2086 |
- dst[i] = (FFMIN(dst[i],30189)*19077 - 39057361)>>14; |
|
| 2087 |
-} |
|
| 2088 |
-static void RENAME(lumRangeFromJpeg)(int16_t *dst, int width) |
|
| 2089 |
-{
|
|
| 2090 |
- int i; |
|
| 2091 |
- for (i = 0; i < width; i++) |
|
| 2092 |
- dst[i] = (dst[i]*14071 + 33561947)>>14; |
|
| 2093 |
-} |
|
| 2094 | 2219 |
|
| 2095 |
-#define FAST_BILINEAR_X86 \ |
|
| 2096 |
- "subl %%edi, %%esi \n\t" /* src[xx+1] - src[xx] */ \ |
|
| 2097 |
- "imull %%ecx, %%esi \n\t" /* (src[xx+1] - src[xx])*xalpha */ \ |
|
| 2098 |
- "shll $16, %%edi \n\t" \ |
|
| 2099 |
- "addl %%edi, %%esi \n\t" /* src[xx+1]*xalpha + src[xx]*(1-xalpha) */ \ |
|
| 2100 |
- "mov %1, %%"REG_D"\n\t" \ |
|
| 2101 |
- "shrl $9, %%esi \n\t" \ |
|
| 2102 | 2220 |
|
| 2221 |
+#if COMPILE_TEMPLATE_MMX2 |
|
| 2103 | 2222 |
static inline void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst, |
| 2104 | 2223 |
long dstWidth, const uint8_t *src, int srcW, |
| 2105 | 2224 |
int xInc) |
| 2106 | 2225 |
{
|
| 2107 |
-#if COMPILE_TEMPLATE_MMX2 |
|
| 2108 | 2226 |
int32_t *filterPos = c->hLumFilterPos; |
| 2109 | 2227 |
int16_t *filter = c->hLumFilter; |
| 2110 | 2228 |
int canMMX2BeUsed = c->canMMX2BeUsed; |
| ... | ... |
@@ -2113,7 +2074,7 @@ static inline void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst, |
| 2113 | 2113 |
#if defined(PIC) |
| 2114 | 2114 |
DECLARE_ALIGNED(8, uint64_t, ebxsave); |
| 2115 | 2115 |
#endif |
| 2116 |
- if (canMMX2BeUsed) {
|
|
| 2116 |
+ |
|
| 2117 | 2117 |
__asm__ volatile( |
| 2118 | 2118 |
#if defined(PIC) |
| 2119 | 2119 |
"mov %%"REG_b", %5 \n\t" |
| ... | ... |
@@ -2172,80 +2133,12 @@ static inline void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst, |
| 2172 | 2172 |
#endif |
| 2173 | 2173 |
); |
| 2174 | 2174 |
for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) dst[i] = src[srcW-1]*128; |
| 2175 |
- } else {
|
|
| 2176 |
-#endif /* COMPILE_TEMPLATE_MMX2 */ |
|
| 2177 |
- x86_reg xInc_shr16 = xInc >> 16; |
|
| 2178 |
- uint16_t xInc_mask = xInc & 0xffff; |
|
| 2179 |
- x86_reg dstWidth_reg = dstWidth; |
|
| 2180 |
- //NO MMX just normal asm ... |
|
| 2181 |
- __asm__ volatile( |
|
| 2182 |
- "xor %%"REG_a", %%"REG_a" \n\t" // i |
|
| 2183 |
- "xor %%"REG_d", %%"REG_d" \n\t" // xx |
|
| 2184 |
- "xorl %%ecx, %%ecx \n\t" // xalpha |
|
| 2185 |
- ".p2align 4 \n\t" |
|
| 2186 |
- "1: \n\t" |
|
| 2187 |
- "movzbl (%0, %%"REG_d"), %%edi \n\t" //src[xx] |
|
| 2188 |
- "movzbl 1(%0, %%"REG_d"), %%esi \n\t" //src[xx+1] |
|
| 2189 |
- FAST_BILINEAR_X86 |
|
| 2190 |
- "movw %%si, (%%"REG_D", %%"REG_a", 2) \n\t" |
|
| 2191 |
- "addw %4, %%cx \n\t" //xalpha += xInc&0xFFFF |
|
| 2192 |
- "adc %3, %%"REG_d" \n\t" //xx+= xInc>>16 + carry |
|
| 2193 |
- |
|
| 2194 |
- "movzbl (%0, %%"REG_d"), %%edi \n\t" //src[xx] |
|
| 2195 |
- "movzbl 1(%0, %%"REG_d"), %%esi \n\t" //src[xx+1] |
|
| 2196 |
- FAST_BILINEAR_X86 |
|
| 2197 |
- "movw %%si, 2(%%"REG_D", %%"REG_a", 2) \n\t" |
|
| 2198 |
- "addw %4, %%cx \n\t" //xalpha += xInc&0xFFFF |
|
| 2199 |
- "adc %3, %%"REG_d" \n\t" //xx+= xInc>>16 + carry |
|
| 2200 |
- |
|
| 2201 |
- |
|
| 2202 |
- "add $2, %%"REG_a" \n\t" |
|
| 2203 |
- "cmp %2, %%"REG_a" \n\t" |
|
| 2204 |
- " jb 1b \n\t" |
|
| 2205 |
- |
|
| 2206 |
- |
|
| 2207 |
- :: "r" (src), "m" (dst), "m" (dstWidth_reg), "m" (xInc_shr16), "m" (xInc_mask) |
|
| 2208 |
- : "%"REG_a, "%"REG_d, "%ecx", "%"REG_D, "%esi" |
|
| 2209 |
- ); |
|
| 2210 |
-#if COMPILE_TEMPLATE_MMX2 |
|
| 2211 |
- } //if MMX2 can't be used |
|
| 2212 |
-#endif |
|
| 2213 |
-} |
|
| 2214 |
- |
|
| 2215 |
- // *** horizontal scale Y line to temp buffer |
|
| 2216 |
-static inline void RENAME(hyscale)(SwsContext *c, uint16_t *dst, long dstWidth, const uint8_t *src, int srcW, int xInc, |
|
| 2217 |
- const int16_t *hLumFilter, |
|
| 2218 |
- const int16_t *hLumFilterPos, int hLumFilterSize, |
|
| 2219 |
- uint8_t *formatConvBuffer, |
|
| 2220 |
- uint32_t *pal, int isAlpha) |
|
| 2221 |
-{
|
|
| 2222 |
- void (*toYV12)(uint8_t *, const uint8_t *, long, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12; |
|
| 2223 |
- void (*convertRange)(int16_t *, int) = isAlpha ? NULL : c->lumConvertRange; |
|
| 2224 |
- |
|
| 2225 |
- src += isAlpha ? c->alpSrcOffset : c->lumSrcOffset; |
|
| 2226 |
- |
|
| 2227 |
- if (toYV12) {
|
|
| 2228 |
- toYV12(formatConvBuffer, src, srcW, pal); |
|
| 2229 |
- src= formatConvBuffer; |
|
| 2230 |
- } |
|
| 2231 |
- |
|
| 2232 |
- if (c->hScale16) {
|
|
| 2233 |
- c->hScale16(dst, dstWidth, (uint16_t*)src, srcW, xInc, hLumFilter, hLumFilterPos, hLumFilterSize, av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1); |
|
| 2234 |
- } else if (!c->hyscale_fast) {
|
|
| 2235 |
- c->hScale(dst, dstWidth, src, srcW, xInc, hLumFilter, hLumFilterPos, hLumFilterSize); |
|
| 2236 |
- } else { // fast bilinear upscale / crap downscale
|
|
| 2237 |
- c->hyscale_fast(c, dst, dstWidth, src, srcW, xInc); |
|
| 2238 |
- } |
|
| 2239 |
- |
|
| 2240 |
- if (convertRange) |
|
| 2241 |
- convertRange(dst, dstWidth); |
|
| 2242 | 2175 |
} |
| 2243 | 2176 |
|
| 2244 | 2177 |
static inline void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst, |
| 2245 | 2178 |
long dstWidth, const uint8_t *src1, |
| 2246 | 2179 |
const uint8_t *src2, int srcW, int xInc) |
| 2247 | 2180 |
{
|
| 2248 |
-#if COMPILE_TEMPLATE_MMX2 |
|
| 2249 | 2181 |
int32_t *filterPos = c->hChrFilterPos; |
| 2250 | 2182 |
int16_t *filter = c->hChrFilter; |
| 2251 | 2183 |
int canMMX2BeUsed = c->canMMX2BeUsed; |
| ... | ... |
@@ -2254,7 +2147,7 @@ static inline void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst, |
| 2254 | 2254 |
#if defined(PIC) |
| 2255 | 2255 |
DECLARE_ALIGNED(8, uint64_t, ebxsave); |
| 2256 | 2256 |
#endif |
| 2257 |
- if (canMMX2BeUsed) {
|
|
| 2257 |
+ |
|
| 2258 | 2258 |
__asm__ volatile( |
| 2259 | 2259 |
#if defined(PIC) |
| 2260 | 2260 |
"mov %%"REG_b", %6 \n\t" |
| ... | ... |
@@ -2304,252 +2197,32 @@ static inline void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst, |
| 2304 | 2304 |
dst[i] = src1[srcW-1]*128; |
| 2305 | 2305 |
dst[i+VOFW] = src2[srcW-1]*128; |
| 2306 | 2306 |
} |
| 2307 |
- } else {
|
|
| 2308 |
-#endif /* COMPILE_TEMPLATE_MMX2 */ |
|
| 2309 |
- x86_reg xInc_shr16 = (x86_reg) (xInc >> 16); |
|
| 2310 |
- uint16_t xInc_mask = xInc & 0xffff; |
|
| 2311 |
- x86_reg dstWidth_reg = dstWidth; |
|
| 2312 |
- __asm__ volatile( |
|
| 2313 |
- "xor %%"REG_a", %%"REG_a" \n\t" // i |
|
| 2314 |
- "xor %%"REG_d", %%"REG_d" \n\t" // xx |
|
| 2315 |
- "xorl %%ecx, %%ecx \n\t" // xalpha |
|
| 2316 |
- ".p2align 4 \n\t" |
|
| 2317 |
- "1: \n\t" |
|
| 2318 |
- "mov %0, %%"REG_S" \n\t" |
|
| 2319 |
- "movzbl (%%"REG_S", %%"REG_d"), %%edi \n\t" //src[xx] |
|
| 2320 |
- "movzbl 1(%%"REG_S", %%"REG_d"), %%esi \n\t" //src[xx+1] |
|
| 2321 |
- FAST_BILINEAR_X86 |
|
| 2322 |
- "movw %%si, (%%"REG_D", %%"REG_a", 2) \n\t" |
|
| 2323 |
- |
|
| 2324 |
- "movzbl (%5, %%"REG_d"), %%edi \n\t" //src[xx] |
|
| 2325 |
- "movzbl 1(%5, %%"REG_d"), %%esi \n\t" //src[xx+1] |
|
| 2326 |
- FAST_BILINEAR_X86 |
|
| 2327 |
- "movw %%si, "AV_STRINGIFY(VOF)"(%%"REG_D", %%"REG_a", 2) \n\t" |
|
| 2328 |
- |
|
| 2329 |
- "addw %4, %%cx \n\t" //xalpha += xInc&0xFFFF |
|
| 2330 |
- "adc %3, %%"REG_d" \n\t" //xx+= xInc>>16 + carry |
|
| 2331 |
- "add $1, %%"REG_a" \n\t" |
|
| 2332 |
- "cmp %2, %%"REG_a" \n\t" |
|
| 2333 |
- " jb 1b \n\t" |
|
| 2334 |
- |
|
| 2335 |
-/* GCC 3.3 makes MPlayer crash on IA-32 machines when using "g" operand here, |
|
| 2336 |
-which is needed to support GCC 4.0. */ |
|
| 2337 |
-#if ARCH_X86_64 && AV_GCC_VERSION_AT_LEAST(3,4) |
|
| 2338 |
- :: "m" (src1), "m" (dst), "g" (dstWidth_reg), "m" (xInc_shr16), "m" (xInc_mask), |
|
| 2339 |
-#else |
|
| 2340 |
- :: "m" (src1), "m" (dst), "m" (dstWidth_reg), "m" (xInc_shr16), "m" (xInc_mask), |
|
| 2341 |
-#endif |
|
| 2342 |
- "r" (src2) |
|
| 2343 |
- : "%"REG_a, "%"REG_d, "%ecx", "%"REG_D, "%esi" |
|
| 2344 |
- ); |
|
| 2345 |
-#if COMPILE_TEMPLATE_MMX2 |
|
| 2346 |
- } //if MMX2 can't be used |
|
| 2347 |
-#endif |
|
| 2348 |
-} |
|
| 2349 |
- |
|
| 2350 |
-inline static void RENAME(hcscale)(SwsContext *c, uint16_t *dst, long dstWidth, const uint8_t *src1, const uint8_t *src2, |
|
| 2351 |
- int srcW, int xInc, const int16_t *hChrFilter, |
|
| 2352 |
- const int16_t *hChrFilterPos, int hChrFilterSize, |
|
| 2353 |
- uint8_t *formatConvBuffer, |
|
| 2354 |
- uint32_t *pal) |
|
| 2355 |
-{
|
|
| 2356 |
- |
|
| 2357 |
- src1 += c->chrSrcOffset; |
|
| 2358 |
- src2 += c->chrSrcOffset; |
|
| 2359 |
- |
|
| 2360 |
- if (c->chrToYV12) {
|
|
| 2361 |
- c->chrToYV12(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal); |
|
| 2362 |
- src1= formatConvBuffer; |
|
| 2363 |
- src2= formatConvBuffer+VOFW; |
|
| 2364 |
- } |
|
| 2365 |
- |
|
| 2366 |
- if (c->hScale16) {
|
|
| 2367 |
- c->hScale16(dst , dstWidth, (uint16_t*)src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize, av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1); |
|
| 2368 |
- c->hScale16(dst+VOFW, dstWidth, (uint16_t*)src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize, av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1); |
|
| 2369 |
- } else if (!c->hcscale_fast) {
|
|
| 2370 |
- c->hScale(dst , dstWidth, src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize); |
|
| 2371 |
- c->hScale(dst+VOFW, dstWidth, src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize); |
|
| 2372 |
- } else { // fast bilinear upscale / crap downscale
|
|
| 2373 |
- c->hcscale_fast(c, dst, dstWidth, src1, src2, srcW, xInc); |
|
| 2374 |
- } |
|
| 2375 |
- |
|
| 2376 |
- if (c->chrConvertRange) |
|
| 2377 |
- c->chrConvertRange(dst, dstWidth); |
|
| 2378 | 2307 |
} |
| 2308 |
+#endif /* COMPILE_TEMPLATE_MMX2 */ |
|
| 2379 | 2309 |
|
| 2380 |
-#define DEBUG_SWSCALE_BUFFERS 0 |
|
| 2381 |
-#define DEBUG_BUFFERS(...) if (DEBUG_SWSCALE_BUFFERS) av_log(c, AV_LOG_DEBUG, __VA_ARGS__) |
|
| 2382 |
- |
|
| 2383 |
-static int RENAME(swScale)(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY, |
|
| 2384 |
- int srcSliceH, uint8_t* dst[], int dstStride[]) |
|
| 2310 |
+#if !COMPILE_TEMPLATE_MMX2 |
|
| 2311 |
+static void updateMMXDitherTables(SwsContext *c, int dstY, int lumBufIndex, int chrBufIndex, |
|
| 2312 |
+ int lastInLumBuf, int lastInChrBuf) |
|
| 2385 | 2313 |
{
|
| 2386 |
- /* load a few things into local vars to make the code more readable? and faster */ |
|
| 2387 |
- const int srcW= c->srcW; |
|
| 2388 |
- const int dstW= c->dstW; |
|
| 2389 | 2314 |
const int dstH= c->dstH; |
| 2390 |
- const int chrDstW= c->chrDstW; |
|
| 2391 |
- const int chrSrcW= c->chrSrcW; |
|
| 2392 |
- const int lumXInc= c->lumXInc; |
|
| 2393 |
- const int chrXInc= c->chrXInc; |
|
| 2394 |
- const enum PixelFormat dstFormat= c->dstFormat; |
|
| 2395 | 2315 |
const int flags= c->flags; |
| 2316 |
+ int16_t **lumPixBuf= c->lumPixBuf; |
|
| 2317 |
+ int16_t **chrPixBuf= c->chrPixBuf; |
|
| 2318 |
+ int16_t **alpPixBuf= c->alpPixBuf; |
|
| 2319 |
+ const int vLumBufSize= c->vLumBufSize; |
|
| 2320 |
+ const int vChrBufSize= c->vChrBufSize; |
|
| 2396 | 2321 |
int16_t *vLumFilterPos= c->vLumFilterPos; |
| 2397 | 2322 |
int16_t *vChrFilterPos= c->vChrFilterPos; |
| 2398 |
- int16_t *hLumFilterPos= c->hLumFilterPos; |
|
| 2399 |
- int16_t *hChrFilterPos= c->hChrFilterPos; |
|
| 2400 | 2323 |
int16_t *vLumFilter= c->vLumFilter; |
| 2401 | 2324 |
int16_t *vChrFilter= c->vChrFilter; |
| 2402 |
- int16_t *hLumFilter= c->hLumFilter; |
|
| 2403 |
- int16_t *hChrFilter= c->hChrFilter; |
|
| 2404 | 2325 |
int32_t *lumMmxFilter= c->lumMmxFilter; |
| 2405 | 2326 |
int32_t *chrMmxFilter= c->chrMmxFilter; |
| 2406 | 2327 |
int32_t av_unused *alpMmxFilter= c->alpMmxFilter; |
| 2407 | 2328 |
const int vLumFilterSize= c->vLumFilterSize; |
| 2408 | 2329 |
const int vChrFilterSize= c->vChrFilterSize; |
| 2409 |
- const int hLumFilterSize= c->hLumFilterSize; |
|
| 2410 |
- const int hChrFilterSize= c->hChrFilterSize; |
|
| 2411 |
- int16_t **lumPixBuf= c->lumPixBuf; |
|
| 2412 |
- int16_t **chrPixBuf= c->chrPixBuf; |
|
| 2413 |
- int16_t **alpPixBuf= c->alpPixBuf; |
|
| 2414 |
- const int vLumBufSize= c->vLumBufSize; |
|
| 2415 |
- const int vChrBufSize= c->vChrBufSize; |
|
| 2416 |
- uint8_t *formatConvBuffer= c->formatConvBuffer; |
|
| 2417 |
- const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample; |
|
| 2418 |
- const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample); |
|
| 2419 |
- int lastDstY; |
|
| 2420 |
- uint32_t *pal=c->pal_yuv; |
|
| 2421 |
- |
|
| 2422 |
- /* vars which will change and which we need to store back in the context */ |
|
| 2423 |
- int dstY= c->dstY; |
|
| 2424 |
- int lumBufIndex= c->lumBufIndex; |
|
| 2425 |
- int chrBufIndex= c->chrBufIndex; |
|
| 2426 |
- int lastInLumBuf= c->lastInLumBuf; |
|
| 2427 |
- int lastInChrBuf= c->lastInChrBuf; |
|
| 2428 |
- |
|
| 2429 |
- if (isPacked(c->srcFormat)) {
|
|
| 2430 |
- src[0]= |
|
| 2431 |
- src[1]= |
|
| 2432 |
- src[2]= |
|
| 2433 |
- src[3]= src[0]; |
|
| 2434 |
- srcStride[0]= |
|
| 2435 |
- srcStride[1]= |
|
| 2436 |
- srcStride[2]= |
|
| 2437 |
- srcStride[3]= srcStride[0]; |
|
| 2438 |
- } |
|
| 2439 |
- srcStride[1]<<= c->vChrDrop; |
|
| 2440 |
- srcStride[2]<<= c->vChrDrop; |
|
| 2441 |
- |
|
| 2442 |
- DEBUG_BUFFERS("swScale() %p[%d] %p[%d] %p[%d] %p[%d] -> %p[%d] %p[%d] %p[%d] %p[%d]\n",
|
|
| 2443 |
- src[0], srcStride[0], src[1], srcStride[1], src[2], srcStride[2], src[3], srcStride[3], |
|
| 2444 |
- dst[0], dstStride[0], dst[1], dstStride[1], dst[2], dstStride[2], dst[3], dstStride[3]); |
|
| 2445 |
- DEBUG_BUFFERS("srcSliceY: %d srcSliceH: %d dstY: %d dstH: %d\n",
|
|
| 2446 |
- srcSliceY, srcSliceH, dstY, dstH); |
|
| 2447 |
- DEBUG_BUFFERS("vLumFilterSize: %d vLumBufSize: %d vChrFilterSize: %d vChrBufSize: %d\n",
|
|
| 2448 |
- vLumFilterSize, vLumBufSize, vChrFilterSize, vChrBufSize); |
|
| 2449 |
- |
|
| 2450 |
- if (dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0 || dstStride[3]%8 != 0) {
|
|
| 2451 |
- static int warnedAlready=0; //FIXME move this into the context perhaps |
|
| 2452 |
- if (flags & SWS_PRINT_INFO && !warnedAlready) {
|
|
| 2453 |
- av_log(c, AV_LOG_WARNING, "Warning: dstStride is not aligned!\n" |
|
| 2454 |
- " ->cannot do aligned memory accesses anymore\n"); |
|
| 2455 |
- warnedAlready=1; |
|
| 2456 |
- } |
|
| 2457 |
- } |
|
| 2458 |
- |
|
| 2459 |
- /* Note the user might start scaling the picture in the middle so this |
|
| 2460 |
- will not get executed. This is not really intended but works |
|
| 2461 |
- currently, so people might do it. */ |
|
| 2462 |
- if (srcSliceY ==0) {
|
|
| 2463 |
- lumBufIndex=-1; |
|
| 2464 |
- chrBufIndex=-1; |
|
| 2465 |
- dstY=0; |
|
| 2466 |
- lastInLumBuf= -1; |
|
| 2467 |
- lastInChrBuf= -1; |
|
| 2468 |
- } |
|
| 2469 |
- |
|
| 2470 |
- lastDstY= dstY; |
|
| 2471 |
- |
|
| 2472 |
- for (;dstY < dstH; dstY++) {
|
|
| 2473 |
- unsigned char *dest =dst[0]+dstStride[0]*dstY; |
|
| 2474 |
- const int chrDstY= dstY>>c->chrDstVSubSample; |
|
| 2475 |
- unsigned char *uDest=dst[1]+dstStride[1]*chrDstY; |
|
| 2476 |
- unsigned char *vDest=dst[2]+dstStride[2]*chrDstY; |
|
| 2477 |
- unsigned char *aDest=(CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3]+dstStride[3]*dstY : NULL; |
|
| 2478 |
- |
|
| 2479 |
- const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input |
|
| 2480 |
- const int firstLumSrcY2= vLumFilterPos[FFMIN(dstY | ((1<<c->chrDstVSubSample) - 1), dstH-1)]; |
|
| 2481 |
- const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input |
|
| 2482 |
- int lastLumSrcY= firstLumSrcY + vLumFilterSize -1; // Last line needed as input |
|
| 2483 |
- int lastLumSrcY2=firstLumSrcY2+ vLumFilterSize -1; // Last line needed as input |
|
| 2484 |
- int lastChrSrcY= firstChrSrcY + vChrFilterSize -1; // Last line needed as input |
|
| 2485 |
- int enough_lines; |
|
| 2486 |
- |
|
| 2487 |
- //handle holes (FAST_BILINEAR & weird filters) |
|
| 2488 |
- if (firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1; |
|
| 2489 |
- if (firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1; |
|
| 2490 |
- assert(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1); |
|
| 2491 |
- assert(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1); |
|
| 2492 |
- |
|
| 2493 |
- DEBUG_BUFFERS("dstY: %d\n", dstY);
|
|
| 2494 |
- DEBUG_BUFFERS("\tfirstLumSrcY: %d lastLumSrcY: %d lastInLumBuf: %d\n",
|
|
| 2495 |
- firstLumSrcY, lastLumSrcY, lastInLumBuf); |
|
| 2496 |
- DEBUG_BUFFERS("\tfirstChrSrcY: %d lastChrSrcY: %d lastInChrBuf: %d\n",
|
|
| 2497 |
- firstChrSrcY, lastChrSrcY, lastInChrBuf); |
|
| 2498 |
- |
|
| 2499 |
- // Do we have enough lines in this slice to output the dstY line |
|
| 2500 |
- enough_lines = lastLumSrcY2 < srcSliceY + srcSliceH && lastChrSrcY < -((-srcSliceY - srcSliceH)>>c->chrSrcVSubSample); |
|
| 2501 |
- |
|
| 2502 |
- if (!enough_lines) {
|
|
| 2503 |
- lastLumSrcY = srcSliceY + srcSliceH - 1; |
|
| 2504 |
- lastChrSrcY = chrSrcSliceY + chrSrcSliceH - 1; |
|
| 2505 |
- DEBUG_BUFFERS("buffering slice: lastLumSrcY %d lastChrSrcY %d\n",
|
|
| 2506 |
- lastLumSrcY, lastChrSrcY); |
|
| 2507 |
- } |
|
| 2508 |
- |
|
| 2509 |
- //Do horizontal scaling |
|
| 2510 |
- while(lastInLumBuf < lastLumSrcY) {
|
|
| 2511 |
- const uint8_t *src1= src[0]+(lastInLumBuf + 1 - srcSliceY)*srcStride[0]; |
|
| 2512 |
- const uint8_t *src2= src[3]+(lastInLumBuf + 1 - srcSliceY)*srcStride[3]; |
|
| 2513 |
- lumBufIndex++; |
|
| 2514 |
- assert(lumBufIndex < 2*vLumBufSize); |
|
| 2515 |
- assert(lastInLumBuf + 1 - srcSliceY < srcSliceH); |
|
| 2516 |
- assert(lastInLumBuf + 1 - srcSliceY >= 0); |
|
| 2517 |
- RENAME(hyscale)(c, lumPixBuf[ lumBufIndex ], dstW, src1, srcW, lumXInc, |
|
| 2518 |
- hLumFilter, hLumFilterPos, hLumFilterSize, |
|
| 2519 |
- formatConvBuffer, |
|
| 2520 |
- pal, 0); |
|
| 2521 |
- if (CONFIG_SWSCALE_ALPHA && alpPixBuf) |
|
| 2522 |
- RENAME(hyscale)(c, alpPixBuf[ lumBufIndex ], dstW, src2, srcW, lumXInc, |
|
| 2523 |
- hLumFilter, hLumFilterPos, hLumFilterSize, |
|
| 2524 |
- formatConvBuffer, |
|
| 2525 |
- pal, 1); |
|
| 2526 |
- lastInLumBuf++; |
|
| 2527 |
- DEBUG_BUFFERS("\t\tlumBufIndex %d: lastInLumBuf: %d\n",
|
|
| 2528 |
- lumBufIndex, lastInLumBuf); |
|
| 2529 |
- } |
|
| 2530 |
- while(lastInChrBuf < lastChrSrcY) {
|
|
| 2531 |
- const uint8_t *src1= src[1]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[1]; |
|
| 2532 |
- const uint8_t *src2= src[2]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[2]; |
|
| 2533 |
- chrBufIndex++; |
|
| 2534 |
- assert(chrBufIndex < 2*vChrBufSize); |
|
| 2535 |
- assert(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH)); |
|
| 2536 |
- assert(lastInChrBuf + 1 - chrSrcSliceY >= 0); |
|
| 2537 |
- //FIXME replace parameters through context struct (some at least) |
|
| 2538 |
- |
|
| 2539 |
- if (c->needs_hcscale) |
|
| 2540 |
- RENAME(hcscale)(c, chrPixBuf[ chrBufIndex ], chrDstW, src1, src2, chrSrcW, chrXInc, |
|
| 2541 |
- hChrFilter, hChrFilterPos, hChrFilterSize, |
|
| 2542 |
- formatConvBuffer, |
|
| 2543 |
- pal); |
|
| 2544 |
- lastInChrBuf++; |
|
| 2545 |
- DEBUG_BUFFERS("\t\tchrBufIndex %d: lastInChrBuf: %d\n",
|
|
| 2546 |
- chrBufIndex, lastInChrBuf); |
|
| 2547 |
- } |
|
| 2548 |
- //wrap buf index around to stay inside the ring buffer |
|
| 2549 |
- if (lumBufIndex >= vLumBufSize) lumBufIndex-= vLumBufSize; |
|
| 2550 |
- if (chrBufIndex >= vChrBufSize) chrBufIndex-= vChrBufSize; |
|
| 2551 |
- if (!enough_lines) |
|
| 2552 |
- break; //we can't output a dstY line so let's try with the next slice |
|
| 2330 |
+ const int chrDstY= dstY>>c->chrDstVSubSample; |
|
| 2331 |
+ const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input |
|
| 2332 |
+ const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input |
|
| 2553 | 2333 |
|
| 2554 | 2334 |
c->blueDither= ff_dither8[dstY&1]; |
| 2555 | 2335 |
if (c->dstFormat == PIX_FMT_RGB555 || c->dstFormat == PIX_FMT_BGR555) |
| ... | ... |
@@ -2557,7 +2230,7 @@ static int RENAME(swScale)(SwsContext *c, const uint8_t* src[], int srcStride[], |
| 2557 | 2557 |
else |
| 2558 | 2558 |
c->greenDither= ff_dither4[dstY&1]; |
| 2559 | 2559 |
c->redDither= ff_dither8[(dstY+1)&1]; |
| 2560 |
- if (dstY < dstH-2) {
|
|
| 2560 |
+ if (dstY < dstH - 2) {
|
|
| 2561 | 2561 |
const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize; |
| 2562 | 2562 |
const int16_t **chrSrcPtr= (const int16_t **) chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize; |
| 2563 | 2563 |
const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL; |
| ... | ... |
@@ -2606,183 +2279,52 @@ static int RENAME(swScale)(SwsContext *c, const uint8_t* src[], int srcStride[], |
| 2606 | 2606 |
((uint16_t)vChrFilter[chrDstY*vChrFilterSize + i])*0x10001; |
| 2607 | 2607 |
} |
| 2608 | 2608 |
} |
| 2609 |
- if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) {
|
|
| 2610 |
- const int chrSkipMask= (1<<c->chrDstVSubSample)-1; |
|
| 2611 |
- if (dstY&chrSkipMask) uDest= NULL; //FIXME split functions in lumi / chromi |
|
| 2612 |
- c->yuv2nv12X(c, |
|
| 2613 |
- vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize, |
|
| 2614 |
- vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, |
|
| 2615 |
- dest, uDest, dstW, chrDstW, dstFormat); |
|
| 2616 |
- } else if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like
|
|
| 2617 |
- const int chrSkipMask= (1<<c->chrDstVSubSample)-1; |
|
| 2618 |
- if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi |
|
| 2619 |
- if (is16BPS(dstFormat) || isNBPS(dstFormat)) {
|
|
| 2620 |
- yuv2yuvX16inC( |
|
| 2621 |
- vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize, |
|
| 2622 |
- vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, |
|
| 2623 |
- alpSrcPtr, (uint16_t *) dest, (uint16_t *) uDest, (uint16_t *) vDest, (uint16_t *) aDest, dstW, chrDstW, |
|
| 2624 |
- dstFormat); |
|
| 2625 |
- } else if (vLumFilterSize == 1 && vChrFilterSize == 1) { // unscaled YV12
|
|
| 2626 |
- const int16_t *lumBuf = lumSrcPtr[0]; |
|
| 2627 |
- const int16_t *chrBuf= chrSrcPtr[0]; |
|
| 2628 |
- const int16_t *alpBuf= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? alpSrcPtr[0] : NULL; |
|
| 2629 |
- c->yuv2yuv1(c, lumBuf, chrBuf, alpBuf, dest, uDest, vDest, aDest, dstW, chrDstW); |
|
| 2630 |
- } else { //General YV12
|
|
| 2631 |
- c->yuv2yuvX(c, |
|
| 2632 |
- vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize, |
|
| 2633 |
- vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, |
|
| 2634 |
- alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW); |
|
| 2635 |
- } |
|
| 2636 |
- } else {
|
|
| 2637 |
- assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2); |
|
| 2638 |
- assert(chrSrcPtr + vChrFilterSize - 1 < chrPixBuf + vChrBufSize*2); |
|
| 2639 |
- if (vLumFilterSize == 1 && vChrFilterSize == 2) { //unscaled RGB
|
|
| 2640 |
- int chrAlpha= vChrFilter[2*dstY+1]; |
|
| 2641 |
- if(flags & SWS_FULL_CHR_H_INT) {
|
|
| 2642 |
- yuv2rgbXinC_full(c, //FIXME write a packed1_full function |
|
| 2643 |
- vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize, |
|
| 2644 |
- vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, |
|
| 2645 |
- alpSrcPtr, dest, dstW, dstY); |
|
| 2646 |
- } else {
|
|
| 2647 |
- c->yuv2packed1(c, *lumSrcPtr, *chrSrcPtr, *(chrSrcPtr+1), |
|
| 2648 |
- alpPixBuf ? *alpSrcPtr : NULL, |
|
| 2649 |
- dest, dstW, chrAlpha, dstFormat, flags, dstY); |
|
| 2650 |
- } |
|
| 2651 |
- } else if (vLumFilterSize == 2 && vChrFilterSize == 2) { //bilinear upscale RGB
|
|
| 2652 |
- int lumAlpha= vLumFilter[2*dstY+1]; |
|
| 2653 |
- int chrAlpha= vChrFilter[2*dstY+1]; |
|
| 2654 |
- lumMmxFilter[2]= |
|
| 2655 |
- lumMmxFilter[3]= vLumFilter[2*dstY ]*0x10001; |
|
| 2656 |
- chrMmxFilter[2]= |
|
| 2657 |
- chrMmxFilter[3]= vChrFilter[2*chrDstY]*0x10001; |
|
| 2658 |
- if(flags & SWS_FULL_CHR_H_INT) {
|
|
| 2659 |
- yuv2rgbXinC_full(c, //FIXME write a packed2_full function |
|
| 2660 |
- vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize, |
|
| 2661 |
- vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, |
|
| 2662 |
- alpSrcPtr, dest, dstW, dstY); |
|
| 2663 |
- } else {
|
|
| 2664 |
- c->yuv2packed2(c, *lumSrcPtr, *(lumSrcPtr+1), *chrSrcPtr, *(chrSrcPtr+1), |
|
| 2665 |
- alpPixBuf ? *alpSrcPtr : NULL, alpPixBuf ? *(alpSrcPtr+1) : NULL, |
|
| 2666 |
- dest, dstW, lumAlpha, chrAlpha, dstY); |
|
| 2667 |
- } |
|
| 2668 |
- } else { //general RGB
|
|
| 2669 |
- if(flags & SWS_FULL_CHR_H_INT) {
|
|
| 2670 |
- yuv2rgbXinC_full(c, |
|
| 2671 |
- vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize, |
|
| 2672 |
- vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, |
|
| 2673 |
- alpSrcPtr, dest, dstW, dstY); |
|
| 2674 |
- } else {
|
|
| 2675 |
- c->yuv2packedX(c, |
|
| 2676 |
- vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize, |
|
| 2677 |
- vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, |
|
| 2678 |
- alpSrcPtr, dest, dstW, dstY); |
|
| 2679 |
- } |
|
| 2680 |
- } |
|
| 2681 |
- } |
|
| 2682 |
- } else { // hmm looks like we can't use MMX here without overwriting this array's tail
|
|
| 2683 |
- const int16_t **lumSrcPtr= (const int16_t **)lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize; |
|
| 2684 |
- const int16_t **chrSrcPtr= (const int16_t **)chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize; |
|
| 2685 |
- const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **)alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL; |
|
| 2686 |
- if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) {
|
|
| 2687 |
- const int chrSkipMask= (1<<c->chrDstVSubSample)-1; |
|
| 2688 |
- if (dstY&chrSkipMask) uDest= NULL; //FIXME split functions in lumi / chromi |
|
| 2689 |
- yuv2nv12XinC( |
|
| 2690 |
- vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize, |
|
| 2691 |
- vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, |
|
| 2692 |
- dest, uDest, dstW, chrDstW, dstFormat); |
|
| 2693 |
- } else if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12
|
|
| 2694 |
- const int chrSkipMask= (1<<c->chrDstVSubSample)-1; |
|
| 2695 |
- if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi |
|
| 2696 |
- if (is16BPS(dstFormat) || isNBPS(dstFormat)) {
|
|
| 2697 |
- yuv2yuvX16inC( |
|
| 2698 |
- vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize, |
|
| 2699 |
- vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, |
|
| 2700 |
- alpSrcPtr, (uint16_t *) dest, (uint16_t *) uDest, (uint16_t *) vDest, (uint16_t *) aDest, dstW, chrDstW, |
|
| 2701 |
- dstFormat); |
|
| 2702 |
- } else {
|
|
| 2703 |
- yuv2yuvXinC( |
|
| 2704 |
- vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize, |
|
| 2705 |
- vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, |
|
| 2706 |
- alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW); |
|
| 2707 |
- } |
|
| 2708 |
- } else {
|
|
| 2709 |
- assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2); |
|
| 2710 |
- assert(chrSrcPtr + vChrFilterSize - 1 < chrPixBuf + vChrBufSize*2); |
|
| 2711 |
- if(flags & SWS_FULL_CHR_H_INT) {
|
|
| 2712 |
- yuv2rgbXinC_full(c, |
|
| 2713 |
- vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize, |
|
| 2714 |
- vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, |
|
| 2715 |
- alpSrcPtr, dest, dstW, dstY); |
|
| 2716 |
- } else {
|
|
| 2717 |
- yuv2packedXinC(c, |
|
| 2718 |
- vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize, |
|
| 2719 |
- vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, |
|
| 2720 |
- alpSrcPtr, dest, dstW, dstY); |
|
| 2721 |
- } |
|
| 2722 |
- } |
|
| 2723 | 2609 |
} |
| 2724 |
- } |
|
| 2725 |
- |
|
| 2726 |
- if ((dstFormat == PIX_FMT_YUVA420P) && !alpPixBuf) |
|
| 2727 |
- fillPlane(dst[3], dstStride[3], dstW, dstY-lastDstY, lastDstY, 255); |
|
| 2728 |
- |
|
| 2729 |
- if (flags & SWS_CPU_CAPS_MMX2 ) __asm__ volatile("sfence":::"memory");
|
|
| 2730 |
- /* On K6 femms is faster than emms. On K7 femms is directly mapped to emms. */ |
|
| 2731 |
- if (flags & SWS_CPU_CAPS_3DNOW) __asm__ volatile("femms" :::"memory");
|
|
| 2732 |
- else __asm__ volatile("emms" :::"memory");
|
|
| 2733 |
- /* store changed local vars back in the context */ |
|
| 2734 |
- c->dstY= dstY; |
|
| 2735 |
- c->lumBufIndex= lumBufIndex; |
|
| 2736 |
- c->chrBufIndex= chrBufIndex; |
|
| 2737 |
- c->lastInLumBuf= lastInLumBuf; |
|
| 2738 |
- c->lastInChrBuf= lastInChrBuf; |
|
| 2739 |
- |
|
| 2740 |
- return dstY - lastDstY; |
|
| 2741 | 2610 |
} |
| 2611 |
+#endif /* !COMPILE_TEMPLATE_MMX2 */ |
|
| 2742 | 2612 |
|
| 2743 | 2613 |
static void RENAME(sws_init_swScale)(SwsContext *c) |
| 2744 | 2614 |
{
|
| 2745 | 2615 |
enum PixelFormat srcFormat = c->srcFormat; |
| 2746 | 2616 |
|
| 2747 |
- c->yuv2nv12X = RENAME(yuv2nv12X ); |
|
| 2748 |
- c->yuv2yuv1 = RENAME(yuv2yuv1 ); |
|
| 2749 |
- c->yuv2yuvX = RENAME(yuv2yuvX ); |
|
| 2750 |
- c->yuv2packed1 = RENAME(yuv2packed1 ); |
|
| 2751 |
- c->yuv2packed2 = RENAME(yuv2packed2 ); |
|
| 2752 |
- c->yuv2packedX = RENAME(yuv2packedX ); |
|
| 2617 |
+ if (!(c->flags & SWS_BITEXACT)) {
|
|
| 2618 |
+ c->yuv2yuv1 = RENAME(yuv2yuv1 ); |
|
| 2619 |
+ c->yuv2yuvX = RENAME(yuv2yuvX ); |
|
| 2620 |
+ c->yuv2packed1 = RENAME(yuv2packed1 ); |
|
| 2621 |
+ c->yuv2packed2 = RENAME(yuv2packed2 ); |
|
| 2622 |
+ c->yuv2packedX = RENAME(yuv2packedX ); |
|
| 2623 |
+ } |
|
| 2753 | 2624 |
|
| 2754 | 2625 |
c->hScale = RENAME(hScale ); |
| 2755 | 2626 |
|
| 2756 | 2627 |
// Use the new MMX scaler if the MMX2 one can't be used (it is faster than the x86 ASM one). |
| 2628 |
+#if COMPILE_TEMPLATE_MMX2 |
|
| 2757 | 2629 |
if (c->flags & SWS_FAST_BILINEAR && c->canMMX2BeUsed) |
| 2758 | 2630 |
{
|
| 2759 | 2631 |
c->hyscale_fast = RENAME(hyscale_fast); |
| 2760 | 2632 |
c->hcscale_fast = RENAME(hcscale_fast); |
| 2761 | 2633 |
} else {
|
| 2634 |
+#endif /* COMPILE_TEMPLATE_MMX2 */ |
|
| 2762 | 2635 |
c->hyscale_fast = NULL; |
| 2763 | 2636 |
c->hcscale_fast = NULL; |
| 2637 |
+#if COMPILE_TEMPLATE_MMX2 |
|
| 2764 | 2638 |
} |
| 2639 |
+#endif /* COMPILE_TEMPLATE_MMX2 */ |
|
| 2765 | 2640 |
|
| 2766 |
- switch(srcFormat) {
|
|
| 2641 |
+ switch(srcFormat) {
|
|
| 2767 | 2642 |
case PIX_FMT_YUYV422 : c->chrToYV12 = RENAME(yuy2ToUV); break; |
| 2768 | 2643 |
case PIX_FMT_UYVY422 : c->chrToYV12 = RENAME(uyvyToUV); break; |
| 2769 | 2644 |
case PIX_FMT_NV12 : c->chrToYV12 = RENAME(nv12ToUV); break; |
| 2770 | 2645 |
case PIX_FMT_NV21 : c->chrToYV12 = RENAME(nv21ToUV); break; |
| 2771 |
- case PIX_FMT_YUV420P16BE: |
|
| 2772 |
- case PIX_FMT_YUV422P16BE: |
|
| 2773 |
- case PIX_FMT_YUV444P16BE: c->chrToYV12 = RENAME(BEToUV); break; |
|
| 2646 |
+ case PIX_FMT_GRAY16LE : |
|
| 2647 |
+ case PIX_FMT_YUV420P9LE: |
|
| 2648 |
+ case PIX_FMT_YUV422P10LE: |
|
| 2649 |
+ case PIX_FMT_YUV420P10LE: |
|
| 2774 | 2650 |
case PIX_FMT_YUV420P16LE: |
| 2775 | 2651 |
case PIX_FMT_YUV422P16LE: |
| 2776 |
- case PIX_FMT_YUV444P16LE: c->chrToYV12 = RENAME(LEToUV); break; |
|
| 2777 |
- default: break; |
|
| 2778 |
- } |
|
| 2779 |
- if (c->chrSrcHSubSample) {
|
|
| 2780 |
- switch(srcFormat) {
|
|
| 2781 |
- case PIX_FMT_BGR24 : c->chrToYV12 = RENAME(bgr24ToUV_half); break; |
|
| 2782 |
- case PIX_FMT_RGB24 : c->chrToYV12 = RENAME(rgb24ToUV_half); break; |
|
| 2783 |
- default: break; |
|
| 2784 |
- } |
|
| 2785 |
- } else {
|
|
| 2652 |
+ case PIX_FMT_YUV444P16LE: c->hScale16= RENAME(hScale16); break; |
|
| 2653 |
+ } |
|
| 2654 |
+ if (!c->chrSrcHSubSample) {
|
|
| 2786 | 2655 |
switch(srcFormat) {
|
| 2787 | 2656 |
case PIX_FMT_BGR24 : c->chrToYV12 = RENAME(bgr24ToUV); break; |
| 2788 | 2657 |
case PIX_FMT_RGB24 : c->chrToYV12 = RENAME(rgb24ToUV); break; |
| ... | ... |
@@ -2792,16 +2334,10 @@ static void RENAME(sws_init_swScale)(SwsContext *c) |
| 2792 | 2792 |
|
| 2793 | 2793 |
switch (srcFormat) {
|
| 2794 | 2794 |
case PIX_FMT_YUYV422 : |
| 2795 |
- case PIX_FMT_YUV420P16BE: |
|
| 2796 |
- case PIX_FMT_YUV422P16BE: |
|
| 2797 |
- case PIX_FMT_YUV444P16BE: |
|
| 2798 | 2795 |
case PIX_FMT_Y400A : |
| 2799 |
- case PIX_FMT_GRAY16BE : c->lumToYV12 = RENAME(yuy2ToY); break; |
|
| 2796 |
+ c->lumToYV12 = RENAME(yuy2ToY); break; |
|
| 2800 | 2797 |
case PIX_FMT_UYVY422 : |
| 2801 |
- case PIX_FMT_YUV420P16LE: |
|
| 2802 |
- case PIX_FMT_YUV422P16LE: |
|
| 2803 |
- case PIX_FMT_YUV444P16LE: |
|
| 2804 |
- case PIX_FMT_GRAY16LE : c->lumToYV12 = RENAME(uyvyToY); break; |
|
| 2798 |
+ c->lumToYV12 = RENAME(uyvyToY); break; |
|
| 2805 | 2799 |
case PIX_FMT_BGR24 : c->lumToYV12 = RENAME(bgr24ToY); break; |
| 2806 | 2800 |
case PIX_FMT_RGB24 : c->lumToYV12 = RENAME(rgb24ToY); break; |
| 2807 | 2801 |
default: break; |
| ... | ... |
@@ -2812,14 +2348,4 @@ static void RENAME(sws_init_swScale)(SwsContext *c) |
| 2812 | 2812 |
default: break; |
| 2813 | 2813 |
} |
| 2814 | 2814 |
} |
| 2815 |
- |
|
| 2816 |
- if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
|
|
| 2817 |
- if (c->srcRange) {
|
|
| 2818 |
- c->lumConvertRange = RENAME(lumRangeFromJpeg); |
|
| 2819 |
- c->chrConvertRange = RENAME(chrRangeFromJpeg); |
|
| 2820 |
- } else {
|
|
| 2821 |
- c->lumConvertRange = RENAME(lumRangeToJpeg); |
|
| 2822 |
- c->chrConvertRange = RENAME(chrRangeToJpeg); |
|
| 2823 |
- } |
|
| 2824 |
- } |
|
| 2825 | 2815 |
} |
| ... | ... |
@@ -34,6 +34,7 @@ |
| 34 | 34 |
#include "libswscale/swscale.h" |
| 35 | 35 |
#include "libswscale/swscale_internal.h" |
| 36 | 36 |
#include "libavutil/x86_cpu.h" |
| 37 |
+#include "libavutil/cpu.h" |
|
| 37 | 38 |
|
| 38 | 39 |
#define DITHER1XBPP // only for MMX |
| 39 | 40 |
|
| ... | ... |
@@ -46,57 +47,58 @@ DECLARE_ASM_CONST(8, uint64_t, pb_03) = 0x0303030303030303ULL; |
| 46 | 46 |
DECLARE_ASM_CONST(8, uint64_t, pb_07) = 0x0707070707070707ULL; |
| 47 | 47 |
|
| 48 | 48 |
//MMX versions |
| 49 |
+#if HAVE_MMX |
|
| 49 | 50 |
#undef RENAME |
| 50 |
-#undef HAVE_MMX2 |
|
| 51 |
-#undef HAVE_AMD3DNOW |
|
| 52 |
-#define HAVE_MMX2 0 |
|
| 53 |
-#define HAVE_AMD3DNOW 0 |
|
| 51 |
+#undef COMPILE_TEMPLATE_MMX2 |
|
| 52 |
+#define COMPILE_TEMPLATE_MMX2 0 |
|
| 54 | 53 |
#define RENAME(a) a ## _MMX |
| 55 | 54 |
#include "yuv2rgb_template.c" |
| 55 |
+#endif /* HAVE_MMX */ |
|
| 56 | 56 |
|
| 57 | 57 |
//MMX2 versions |
| 58 |
+#if HAVE_MMX2 |
|
| 58 | 59 |
#undef RENAME |
| 59 |
-#undef HAVE_MMX2 |
|
| 60 |
-#define HAVE_MMX2 1 |
|
| 60 |
+#undef COMPILE_TEMPLATE_MMX2 |
|
| 61 |
+#define COMPILE_TEMPLATE_MMX2 1 |
|
| 61 | 62 |
#define RENAME(a) a ## _MMX2 |
| 62 | 63 |
#include "yuv2rgb_template.c" |
| 64 |
+#endif /* HAVE_MMX2 */ |
|
| 63 | 65 |
|
| 64 | 66 |
SwsFunc ff_yuv2rgb_init_mmx(SwsContext *c) |
| 65 | 67 |
{
|
| 66 |
- if (c->flags & SWS_CPU_CAPS_MMX2) {
|
|
| 68 |
+ int cpu_flags = av_get_cpu_flags(); |
|
| 69 |
+ |
|
| 70 |
+ if (c->srcFormat != PIX_FMT_YUV420P && |
|
| 71 |
+ c->srcFormat != PIX_FMT_YUVA420P) |
|
| 72 |
+ return NULL; |
|
| 73 |
+ |
|
| 74 |
+ if (HAVE_MMX2 && cpu_flags & AV_CPU_FLAG_MMX2) {
|
|
| 67 | 75 |
switch (c->dstFormat) {
|
| 68 |
- case PIX_FMT_RGB32: |
|
| 69 |
- if (CONFIG_SWSCALE_ALPHA && c->srcFormat == PIX_FMT_YUVA420P) {
|
|
| 70 |
- if (HAVE_7REGS) return yuva420_rgb32_MMX2; |
|
| 71 |
- break; |
|
| 72 |
- } else return yuv420_rgb32_MMX2; |
|
| 73 |
- case PIX_FMT_BGR32: |
|
| 74 |
- if (CONFIG_SWSCALE_ALPHA && c->srcFormat == PIX_FMT_YUVA420P) {
|
|
| 75 |
- if (HAVE_7REGS) return yuva420_bgr32_MMX2; |
|
| 76 |
- break; |
|
| 77 |
- } else return yuv420_bgr32_MMX2; |
|
| 78 | 76 |
case PIX_FMT_RGB24: return yuv420_rgb24_MMX2; |
| 79 | 77 |
case PIX_FMT_BGR24: return yuv420_bgr24_MMX2; |
| 80 |
- case PIX_FMT_RGB565: return yuv420_rgb16_MMX2; |
|
| 81 |
- case PIX_FMT_RGB555: return yuv420_rgb15_MMX2; |
|
| 82 | 78 |
} |
| 83 | 79 |
} |
| 84 |
- if (c->flags & SWS_CPU_CAPS_MMX) {
|
|
| 80 |
+ |
|
| 81 |
+ if (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) {
|
|
| 85 | 82 |
switch (c->dstFormat) {
|
| 86 |
- case PIX_FMT_RGB32: |
|
| 87 |
- if (CONFIG_SWSCALE_ALPHA && c->srcFormat == PIX_FMT_YUVA420P) {
|
|
| 88 |
- if (HAVE_7REGS) return yuva420_rgb32_MMX; |
|
| 89 |
- break; |
|
| 90 |
- } else return yuv420_rgb32_MMX; |
|
| 91 |
- case PIX_FMT_BGR32: |
|
| 92 |
- if (CONFIG_SWSCALE_ALPHA && c->srcFormat == PIX_FMT_YUVA420P) {
|
|
| 93 |
- if (HAVE_7REGS) return yuva420_bgr32_MMX; |
|
| 94 |
- break; |
|
| 95 |
- } else return yuv420_bgr32_MMX; |
|
| 96 |
- case PIX_FMT_RGB24: return yuv420_rgb24_MMX; |
|
| 97 |
- case PIX_FMT_BGR24: return yuv420_bgr24_MMX; |
|
| 98 |
- case PIX_FMT_RGB565: return yuv420_rgb16_MMX; |
|
| 99 |
- case PIX_FMT_RGB555: return yuv420_rgb15_MMX; |
|
| 83 |
+ case PIX_FMT_RGB32: |
|
| 84 |
+ if (CONFIG_SWSCALE_ALPHA && c->srcFormat == PIX_FMT_YUVA420P) {
|
|
| 85 |
+#if HAVE_7REGS |
|
| 86 |
+ return yuva420_rgb32_MMX; |
|
| 87 |
+#endif |
|
| 88 |
+ break; |
|
| 89 |
+ } else return yuv420_rgb32_MMX; |
|
| 90 |
+ case PIX_FMT_BGR32: |
|
| 91 |
+ if (CONFIG_SWSCALE_ALPHA && c->srcFormat == PIX_FMT_YUVA420P) {
|
|
| 92 |
+#if HAVE_7REGS |
|
| 93 |
+ return yuva420_bgr32_MMX; |
|
| 94 |
+#endif |
|
| 95 |
+ break; |
|
| 96 |
+ } else return yuv420_bgr32_MMX; |
|
| 97 |
+ case PIX_FMT_RGB24: return yuv420_rgb24_MMX; |
|
| 98 |
+ case PIX_FMT_BGR24: return yuv420_bgr24_MMX; |
|
| 99 |
+ case PIX_FMT_RGB565: return yuv420_rgb16_MMX; |
|
| 100 |
+ case PIX_FMT_RGB555: return yuv420_rgb15_MMX; |
|
| 100 | 101 |
} |
| 101 | 102 |
} |
| 102 | 103 |
|
| ... | ... |
@@ -25,14 +25,7 @@ |
| 25 | 25 |
#undef EMMS |
| 26 | 26 |
#undef SFENCE |
| 27 | 27 |
|
| 28 |
-#if HAVE_AMD3DNOW |
|
| 29 |
-/* On K6 femms is faster than emms. On K7 femms is directly mapped to emms. */ |
|
| 30 |
-#define EMMS "femms" |
|
| 31 |
-#else |
|
| 32 |
-#define EMMS "emms" |
|
| 33 |
-#endif |
|
| 34 |
- |
|
| 35 |
-#if HAVE_MMX2 |
|
| 28 |
+#if COMPILE_TEMPLATE_MMX2 |
|
| 36 | 29 |
#define MOVNTQ "movntq" |
| 37 | 30 |
#define SFENCE "sfence" |
| 38 | 31 |
#else |
| ... | ... |
@@ -159,7 +152,8 @@ |
| 159 | 159 |
} \ |
| 160 | 160 |
|
| 161 | 161 |
#define YUV2RGB_ENDFUNC \ |
| 162 |
- __asm__ volatile (SFENCE"\n\t"EMMS); \ |
|
| 162 |
+ __asm__ volatile (SFENCE"\n\t" \ |
|
| 163 |
+ "emms \n\t"); \ |
|
| 163 | 164 |
return srcSliceH; \ |
| 164 | 165 |
|
| 165 | 166 |
#define IF0(x) |
| ... | ... |
@@ -188,6 +182,7 @@ |
| 188 | 188 |
"paddusb "GREEN_DITHER"(%4), %%mm2\n\t" \ |
| 189 | 189 |
"paddusb "RED_DITHER"(%4), %%mm1\n\t" \ |
| 190 | 190 |
|
| 191 |
+#if !COMPILE_TEMPLATE_MMX2 |
|
| 191 | 192 |
static inline int RENAME(yuv420_rgb15)(SwsContext *c, const uint8_t *src[], |
| 192 | 193 |
int srcStride[], |
| 193 | 194 |
int srcSliceY, int srcSliceH, |
| ... | ... |
@@ -243,6 +238,7 @@ static inline int RENAME(yuv420_rgb16)(SwsContext *c, const uint8_t *src[], |
| 243 | 243 |
YUV2RGB_OPERANDS |
| 244 | 244 |
YUV2RGB_ENDFUNC |
| 245 | 245 |
} |
| 246 |
+#endif /* !COMPILE_TEMPLATE_MMX2 */ |
|
| 246 | 247 |
|
| 247 | 248 |
#define RGB_PACK24(blue, red)\ |
| 248 | 249 |
"packuswb %%mm3, %%mm0 \n" /* R0 R2 R4 R6 R1 R3 R5 R7 */\ |
| ... | ... |
@@ -259,7 +255,7 @@ static inline int RENAME(yuv420_rgb16)(SwsContext *c, const uint8_t *src[], |
| 259 | 259 |
"punpckhwd %%mm6, %%mm5 \n" /* R4 G4 B4 R5 R6 G6 B6 R7 */\ |
| 260 | 260 |
RGB_PACK24_B |
| 261 | 261 |
|
| 262 |
-#if HAVE_MMX2 |
|
| 262 |
+#if COMPILE_TEMPLATE_MMX2 |
|
| 263 | 263 |
DECLARE_ASM_CONST(8, int16_t, mask1101[4]) = {-1,-1, 0,-1};
|
| 264 | 264 |
DECLARE_ASM_CONST(8, int16_t, mask0010[4]) = { 0, 0,-1, 0};
|
| 265 | 265 |
DECLARE_ASM_CONST(8, int16_t, mask0110[4]) = { 0,-1,-1, 0};
|
| ... | ... |
@@ -366,6 +362,7 @@ static inline int RENAME(yuv420_bgr24)(SwsContext *c, const uint8_t *src[], |
| 366 | 366 |
MOVNTQ " %%mm5, 16(%1)\n\t" \ |
| 367 | 367 |
MOVNTQ " %%mm"alpha", 24(%1)\n\t" \ |
| 368 | 368 |
|
| 369 |
+#if !COMPILE_TEMPLATE_MMX2 |
|
| 369 | 370 |
static inline int RENAME(yuv420_rgb32)(SwsContext *c, const uint8_t *src[], |
| 370 | 371 |
int srcStride[], |
| 371 | 372 |
int srcSliceY, int srcSliceH, |
| ... | ... |
@@ -386,12 +383,12 @@ static inline int RENAME(yuv420_rgb32)(SwsContext *c, const uint8_t *src[], |
| 386 | 386 |
YUV2RGB_ENDFUNC |
| 387 | 387 |
} |
| 388 | 388 |
|
| 389 |
+#if HAVE_7REGS && CONFIG_SWSCALE_ALPHA |
|
| 389 | 390 |
static inline int RENAME(yuva420_rgb32)(SwsContext *c, const uint8_t *src[], |
| 390 | 391 |
int srcStride[], |
| 391 | 392 |
int srcSliceY, int srcSliceH, |
| 392 | 393 |
uint8_t *dst[], int dstStride[]) |
| 393 | 394 |
{
|
| 394 |
-#if HAVE_7REGS |
|
| 395 | 395 |
int y, h_size; |
| 396 | 396 |
|
| 397 | 397 |
YUV2RGB_LOOP(4) |
| ... | ... |
@@ -406,9 +403,8 @@ static inline int RENAME(yuva420_rgb32)(SwsContext *c, const uint8_t *src[], |
| 406 | 406 |
YUV2RGB_ENDLOOP(4) |
| 407 | 407 |
YUV2RGB_OPERANDS_ALPHA |
| 408 | 408 |
YUV2RGB_ENDFUNC |
| 409 |
-#endif |
|
| 410 |
- return 0; |
|
| 411 | 409 |
} |
| 410 |
+#endif |
|
| 412 | 411 |
|
| 413 | 412 |
static inline int RENAME(yuv420_bgr32)(SwsContext *c, const uint8_t *src[], |
| 414 | 413 |
int srcStride[], |
| ... | ... |
@@ -430,12 +426,12 @@ static inline int RENAME(yuv420_bgr32)(SwsContext *c, const uint8_t *src[], |
| 430 | 430 |
YUV2RGB_ENDFUNC |
| 431 | 431 |
} |
| 432 | 432 |
|
| 433 |
+#if HAVE_7REGS && CONFIG_SWSCALE_ALPHA |
|
| 433 | 434 |
static inline int RENAME(yuva420_bgr32)(SwsContext *c, const uint8_t *src[], |
| 434 | 435 |
int srcStride[], |
| 435 | 436 |
int srcSliceY, int srcSliceH, |
| 436 | 437 |
uint8_t *dst[], int dstStride[]) |
| 437 | 438 |
{
|
| 438 |
-#if HAVE_7REGS |
|
| 439 | 439 |
int y, h_size; |
| 440 | 440 |
|
| 441 | 441 |
YUV2RGB_LOOP(4) |
| ... | ... |
@@ -450,6 +446,7 @@ static inline int RENAME(yuva420_bgr32)(SwsContext *c, const uint8_t *src[], |
| 450 | 450 |
YUV2RGB_ENDLOOP(4) |
| 451 | 451 |
YUV2RGB_OPERANDS_ALPHA |
| 452 | 452 |
YUV2RGB_ENDFUNC |
| 453 |
-#endif |
|
| 454 |
- return 0; |
|
| 455 | 453 |
} |
| 454 |
+#endif |
|
| 455 |
+ |
|
| 456 |
+#endif /* !COMPILE_TEMPLATE_MMX2 */ |
| ... | ... |
@@ -32,7 +32,7 @@ |
| 32 | 32 |
#include "rgb2rgb.h" |
| 33 | 33 |
#include "swscale.h" |
| 34 | 34 |
#include "swscale_internal.h" |
| 35 |
-#include "libavutil/x86_cpu.h" |
|
| 35 |
+#include "libavutil/cpu.h" |
|
| 36 | 36 |
#include "libavutil/bswap.h" |
| 37 | 37 |
|
| 38 | 38 |
extern const uint8_t dither_4x4_16[4][8]; |
| ... | ... |
@@ -579,24 +579,18 @@ CLOSEYUV2RGBFUNC(1) |
| 579 | 579 |
SwsFunc ff_yuv2rgb_get_func_ptr(SwsContext *c) |
| 580 | 580 |
{
|
| 581 | 581 |
SwsFunc t = NULL; |
| 582 |
-#if HAVE_MMX |
|
| 583 |
- t = ff_yuv2rgb_init_mmx(c); |
|
| 584 |
-#endif |
|
| 585 |
-#if HAVE_VIS |
|
| 586 |
- t = ff_yuv2rgb_init_vis(c); |
|
| 587 |
-#endif |
|
| 588 |
-#if CONFIG_MLIB |
|
| 589 |
- t = ff_yuv2rgb_init_mlib(c); |
|
| 590 |
-#endif |
|
| 591 |
-#if HAVE_ALTIVEC |
|
| 592 |
- if (c->flags & SWS_CPU_CAPS_ALTIVEC) |
|
| 593 |
- t = ff_yuv2rgb_init_altivec(c); |
|
| 594 |
-#endif |
|
| 595 | 582 |
|
| 596 |
-#if ARCH_BFIN |
|
| 597 |
- if (c->flags & SWS_CPU_CAPS_BFIN) |
|
| 583 |
+ if (HAVE_MMX) {
|
|
| 584 |
+ t = ff_yuv2rgb_init_mmx(c); |
|
| 585 |
+ } else if (HAVE_VIS) {
|
|
| 586 |
+ t = ff_yuv2rgb_init_vis(c); |
|
| 587 |
+ } else if (CONFIG_MLIB) {
|
|
| 588 |
+ t = ff_yuv2rgb_init_mlib(c); |
|
| 589 |
+ } else if (HAVE_ALTIVEC) {
|
|
| 590 |
+ t = ff_yuv2rgb_init_altivec(c); |
|
| 591 |
+ } else if (ARCH_BFIN) {
|
|
| 598 | 592 |
t = ff_yuv2rgb_get_func_ptr_bfin(c); |
| 599 |
-#endif |
|
| 593 |
+ } |
|
| 600 | 594 |
|
| 601 | 595 |
if (t) |
| 602 | 596 |
return t; |