... | ... |
@@ -35,21 +35,24 @@ |
35 | 35 |
#include "video.h" |
36 | 36 |
|
37 | 37 |
typedef struct { |
38 |
- int16_t coefs[4][512*16]; |
|
38 |
+ int16_t *coefs[4]; |
|
39 | 39 |
uint16_t *line; |
40 | 40 |
uint16_t *frame_prev[3]; |
41 |
+ double strength[4]; |
|
41 | 42 |
int hsub, vsub; |
42 | 43 |
int depth; |
43 | 44 |
} HQDN3DContext; |
44 | 45 |
|
46 |
+#define LUT_BITS (depth==16 ? 8 : 4) |
|
45 | 47 |
#define RIGHTSHIFT(a,b) (((a)+(((1<<(b))-1)>>1))>>(b)) |
46 | 48 |
#define LOAD(x) ((depth==8 ? src[x] : AV_RN16A(src+(x)*2)) << (16-depth)) |
47 | 49 |
#define STORE(x,val) (depth==8 ? dst[x] = RIGHTSHIFT(val, 16-depth)\ |
48 | 50 |
: AV_WN16A(dst+(x)*2, RIGHTSHIFT(val, 16-depth))) |
49 | 51 |
|
50 |
-static inline uint32_t lowpass(int prev, int cur, int16_t *coef) |
|
52 |
+av_always_inline |
|
53 |
+static inline uint32_t lowpass(int prev, int cur, int16_t *coef, int depth) |
|
51 | 54 |
{ |
52 |
- int d = (prev-cur)>>4; |
|
55 |
+ int d = (prev - cur) >> (8 - LUT_BITS); |
|
53 | 56 |
return cur + coef[d]; |
54 | 57 |
} |
55 | 58 |
|
... | ... |
@@ -62,11 +65,11 @@ static void denoise_temporal(uint8_t *src, uint8_t *dst, |
62 | 62 |
long x, y; |
63 | 63 |
uint32_t tmp; |
64 | 64 |
|
65 |
- temporal += 0x1000; |
|
65 |
+ temporal += 256 << LUT_BITS; |
|
66 | 66 |
|
67 | 67 |
for (y = 0; y < h; y++) { |
68 | 68 |
for (x = 0; x < w; x++) { |
69 |
- frame_ant[x] = tmp = lowpass(frame_ant[x], LOAD(x), temporal); |
|
69 |
+ frame_ant[x] = tmp = lowpass(frame_ant[x], LOAD(x), temporal, depth); |
|
70 | 70 |
STORE(x, tmp); |
71 | 71 |
} |
72 | 72 |
src += sstride; |
... | ... |
@@ -85,15 +88,15 @@ static void denoise_spatial(uint8_t *src, uint8_t *dst, |
85 | 85 |
uint32_t pixel_ant; |
86 | 86 |
uint32_t tmp; |
87 | 87 |
|
88 |
- spatial += 0x1000; |
|
89 |
- temporal += 0x1000; |
|
88 |
+ spatial += 256 << LUT_BITS; |
|
89 |
+ temporal += 256 << LUT_BITS; |
|
90 | 90 |
|
91 | 91 |
/* First line has no top neighbor. Only left one for each tmp and |
92 | 92 |
* last frame */ |
93 | 93 |
pixel_ant = LOAD(0); |
94 | 94 |
for (x = 0; x < w; x++) { |
95 |
- line_ant[x] = tmp = pixel_ant = lowpass(pixel_ant, LOAD(x), spatial); |
|
96 |
- frame_ant[x] = tmp = lowpass(frame_ant[x], tmp, temporal); |
|
95 |
+ line_ant[x] = tmp = pixel_ant = lowpass(pixel_ant, LOAD(x), spatial, depth); |
|
96 |
+ frame_ant[x] = tmp = lowpass(frame_ant[x], tmp, temporal, depth); |
|
97 | 97 |
STORE(x, tmp); |
98 | 98 |
} |
99 | 99 |
|
... | ... |
@@ -103,13 +106,13 @@ static void denoise_spatial(uint8_t *src, uint8_t *dst, |
103 | 103 |
frame_ant += w; |
104 | 104 |
pixel_ant = LOAD(0); |
105 | 105 |
for (x = 0; x < w-1; x++) { |
106 |
- line_ant[x] = tmp = lowpass(line_ant[x], pixel_ant, spatial); |
|
107 |
- pixel_ant = lowpass(pixel_ant, LOAD(x+1), spatial); |
|
108 |
- frame_ant[x] = tmp = lowpass(frame_ant[x], tmp, temporal); |
|
106 |
+ line_ant[x] = tmp = lowpass(line_ant[x], pixel_ant, spatial, depth); |
|
107 |
+ pixel_ant = lowpass(pixel_ant, LOAD(x+1), spatial, depth); |
|
108 |
+ frame_ant[x] = tmp = lowpass(frame_ant[x], tmp, temporal, depth); |
|
109 | 109 |
STORE(x, tmp); |
110 | 110 |
} |
111 |
- line_ant[x] = tmp = lowpass(line_ant[x], pixel_ant, spatial); |
|
112 |
- frame_ant[x] = tmp = lowpass(frame_ant[x], tmp, temporal); |
|
111 |
+ line_ant[x] = tmp = lowpass(line_ant[x], pixel_ant, spatial, depth); |
|
112 |
+ frame_ant[x] = tmp = lowpass(frame_ant[x], tmp, temporal, depth); |
|
113 | 113 |
STORE(x, tmp); |
114 | 114 |
} |
115 | 115 |
} |
... | ... |
@@ -120,6 +123,8 @@ static void denoise_depth(uint8_t *src, uint8_t *dst, |
120 | 120 |
int w, int h, int sstride, int dstride, |
121 | 121 |
int16_t *spatial, int16_t *temporal, int depth) |
122 | 122 |
{ |
123 |
+ // FIXME: For 16bit depth, frame_ant could be a pointer to the previous |
|
124 |
+ // filtered frame rather than a separate buffer. |
|
123 | 125 |
long x, y; |
124 | 126 |
uint16_t *frame_ant = *frame_ant_ptr; |
125 | 127 |
if (!frame_ant) { |
... | ... |
@@ -145,24 +150,28 @@ static void denoise_depth(uint8_t *src, uint8_t *dst, |
145 | 145 |
case 8: denoise_depth(__VA_ARGS__, 8); break;\ |
146 | 146 |
case 9: denoise_depth(__VA_ARGS__, 9); break;\ |
147 | 147 |
case 10: denoise_depth(__VA_ARGS__, 10); break;\ |
148 |
+ case 16: denoise_depth(__VA_ARGS__, 16); break;\ |
|
148 | 149 |
} |
149 | 150 |
|
150 |
-static void precalc_coefs(int16_t *ct, double dist25) |
|
151 |
+static int16_t *precalc_coefs(double dist25, int depth) |
|
151 | 152 |
{ |
152 | 153 |
int i; |
153 | 154 |
double gamma, simil, C; |
155 |
+ int16_t *ct = av_malloc((512<<LUT_BITS)*sizeof(int16_t)); |
|
156 |
+ if (!ct) |
|
157 |
+ return NULL; |
|
154 | 158 |
|
155 | 159 |
gamma = log(0.25) / log(1.0 - FFMIN(dist25,252.0)/255.0 - 0.00001); |
156 | 160 |
|
157 |
- for (i = -255*16; i <= 255*16; i++) { |
|
158 |
- // lowpass() truncates (not rounds) the diff, so +15/32 for the midpoint of the bin. |
|
159 |
- double f = (i + 15.0/32.0) / 16.0; |
|
161 |
+ for (i = -255<<LUT_BITS; i <= 255<<LUT_BITS; i++) { |
|
162 |
+ double f = ((i<<(9-LUT_BITS)) + (1<<(8-LUT_BITS)) - 1) / 512.0; // midpoint of the bin |
|
160 | 163 |
simil = 1.0 - FFABS(f) / 255.0; |
161 | 164 |
C = pow(simil, gamma) * 256.0 * f; |
162 |
- ct[16*256+i] = lrint(C); |
|
165 |
+ ct[(256<<LUT_BITS)+i] = lrint(C); |
|
163 | 166 |
} |
164 | 167 |
|
165 | 168 |
ct[0] = !!dist25; |
169 |
+ return ct; |
|
166 | 170 |
} |
167 | 171 |
|
168 | 172 |
#define PARAM1_DEFAULT 4.0 |
... | ... |
@@ -210,6 +219,11 @@ static int init(AVFilterContext *ctx, const char *args) |
210 | 210 |
} |
211 | 211 |
} |
212 | 212 |
|
213 |
+ hqdn3d->strength[0] = lum_spac; |
|
214 |
+ hqdn3d->strength[1] = lum_tmp; |
|
215 |
+ hqdn3d->strength[2] = chrom_spac; |
|
216 |
+ hqdn3d->strength[3] = chrom_tmp; |
|
217 |
+ |
|
213 | 218 |
av_log(ctx, AV_LOG_VERBOSE, "ls:%lf cs:%lf lt:%lf ct:%lf\n", |
214 | 219 |
lum_spac, chrom_spac, lum_tmp, chrom_tmp); |
215 | 220 |
if (lum_spac < 0 || chrom_spac < 0 || isnan(chrom_tmp)) { |
... | ... |
@@ -219,11 +233,6 @@ static int init(AVFilterContext *ctx, const char *args) |
219 | 219 |
return AVERROR(EINVAL); |
220 | 220 |
} |
221 | 221 |
|
222 |
- precalc_coefs(hqdn3d->coefs[0], lum_spac); |
|
223 |
- precalc_coefs(hqdn3d->coefs[1], lum_tmp); |
|
224 |
- precalc_coefs(hqdn3d->coefs[2], chrom_spac); |
|
225 |
- precalc_coefs(hqdn3d->coefs[3], chrom_tmp); |
|
226 |
- |
|
227 | 222 |
return 0; |
228 | 223 |
} |
229 | 224 |
|
... | ... |
@@ -231,6 +240,10 @@ static void uninit(AVFilterContext *ctx) |
231 | 231 |
{ |
232 | 232 |
HQDN3DContext *hqdn3d = ctx->priv; |
233 | 233 |
|
234 |
+ av_freep(&hqdn3d->coefs[0]); |
|
235 |
+ av_freep(&hqdn3d->coefs[1]); |
|
236 |
+ av_freep(&hqdn3d->coefs[2]); |
|
237 |
+ av_freep(&hqdn3d->coefs[3]); |
|
234 | 238 |
av_freep(&hqdn3d->line); |
235 | 239 |
av_freep(&hqdn3d->frame_prev[0]); |
236 | 240 |
av_freep(&hqdn3d->frame_prev[1]); |
... | ... |
@@ -256,6 +269,9 @@ static int query_formats(AVFilterContext *ctx) |
256 | 256 |
AV_NE( PIX_FMT_YUV420P10BE, PIX_FMT_YUV420P10LE ), |
257 | 257 |
AV_NE( PIX_FMT_YUV422P10BE, PIX_FMT_YUV422P10LE ), |
258 | 258 |
AV_NE( PIX_FMT_YUV444P10BE, PIX_FMT_YUV444P10LE ), |
259 |
+ AV_NE( PIX_FMT_YUV420P16BE, PIX_FMT_YUV420P16LE ), |
|
260 |
+ AV_NE( PIX_FMT_YUV422P16BE, PIX_FMT_YUV422P16LE ), |
|
261 |
+ AV_NE( PIX_FMT_YUV444P16BE, PIX_FMT_YUV444P16LE ), |
|
259 | 262 |
PIX_FMT_NONE |
260 | 263 |
}; |
261 | 264 |
|
... | ... |
@@ -276,6 +292,12 @@ static int config_input(AVFilterLink *inlink) |
276 | 276 |
if (!hqdn3d->line) |
277 | 277 |
return AVERROR(ENOMEM); |
278 | 278 |
|
279 |
+ for (int i=0; i<4; i++) { |
|
280 |
+ hqdn3d->coefs[i] = precalc_coefs(hqdn3d->strength[i], hqdn3d->depth); |
|
281 |
+ if (!hqdn3d->coefs[i]) |
|
282 |
+ return AVERROR(ENOMEM); |
|
283 |
+ } |
|
284 |
+ |
|
279 | 285 |
return 0; |
280 | 286 |
} |
281 | 287 |
|