For certain types of filters where the intermediate sum of coefficients
can go above the fixed-point equivalent of 1.0 in the middle of a filter,
the sum of a 31-bit calculation can overflow in both directions and can
thus not be represented in a 32-bit signed or unsigned integer. To work
around this, we subtract 0x40000000 from a signed integer base, so that
we're halfway signed/unsigned, which makes it fit even if it overflows.
After the filter finishes, we add the scaled bias back after a shift.
We use the same trick for 16-bit bpc YUV output routines.
Signed-off-by: Mans Rullgard <mans@mansr.com>
| ... | ... |
@@ -754,8 +754,8 @@ yuv2rgb48_X_c_template(SwsContext *c, const int16_t *lumFilter, |
| 754 | 754 |
|
| 755 | 755 |
for (i = 0; i < (dstW >> 1); i++) {
|
| 756 | 756 |
int j; |
| 757 |
- int Y1 = 0; |
|
| 758 |
- int Y2 = 0; |
|
| 757 |
+ int Y1 = -0x40000000; |
|
| 758 |
+ int Y2 = -0x40000000; |
|
| 759 | 759 |
int U = -128 << 23; // 19 |
| 760 | 760 |
int V = -128 << 23; |
| 761 | 761 |
int R, G, B; |
| ... | ... |
@@ -771,7 +771,9 @@ yuv2rgb48_X_c_template(SwsContext *c, const int16_t *lumFilter, |
| 771 | 771 |
|
| 772 | 772 |
// 8bit: 12+15=27; 16-bit: 12+19=31 |
| 773 | 773 |
Y1 >>= 14; // 10 |
| 774 |
+ Y1 += 0x10000; |
|
| 774 | 775 |
Y2 >>= 14; |
| 776 |
+ Y2 += 0x10000; |
|
| 775 | 777 |
U >>= 14; |
| 776 | 778 |
V >>= 14; |
| 777 | 779 |
|