... | ... |
@@ -610,7 +610,7 @@ OBJS-$(CONFIG_VP8_DECODER) += vp8.o vp56rac.o |
610 | 610 |
OBJS-$(CONFIG_VP8_CUVID_DECODER) += cuvid.o |
611 | 611 |
OBJS-$(CONFIG_VP8_MEDIACODEC_DECODER) += mediacodecdec.o |
612 | 612 |
OBJS-$(CONFIG_VP8_VAAPI_ENCODER) += vaapi_encode_vp8.o |
613 |
-OBJS-$(CONFIG_VP9_DECODER) += vp9.o vp9data.o vp9dsp.o vp9lpf.o \ |
|
613 |
+OBJS-$(CONFIG_VP9_DECODER) += vp9.o vp9data.o vp9dsp.o vp9lpf.o vp9recon.o \ |
|
614 | 614 |
vp9block.o vp9prob.o vp9mvs.o vp56rac.o \ |
615 | 615 |
vp9dsp_8bpp.o vp9dsp_10bpp.o vp9dsp_12bpp.o |
616 | 616 |
OBJS-$(CONFIG_VP9_CUVID_DECODER) += cuvid.o |
... | ... |
@@ -405,8 +405,10 @@ static void FN(inter_pred)(AVCodecContext *avctx) |
405 | 405 |
} |
406 | 406 |
} else { |
407 | 407 |
int bwl = bwlog_tab[0][b->bs]; |
408 |
- int bw = bwh_tab[0][b->bs][0] * 4, bh = bwh_tab[0][b->bs][1] * 4; |
|
409 |
- int uvbw = bwh_tab[s->ss_h][b->bs][0] * 4, uvbh = bwh_tab[s->ss_v][b->bs][1] * 4; |
|
408 |
+ int bw = ff_vp9_bwh_tab[0][b->bs][0] * 4; |
|
409 |
+ int bh = ff_vp9_bwh_tab[0][b->bs][1] * 4; |
|
410 |
+ int uvbw = ff_vp9_bwh_tab[s->ss_h][b->bs][0] * 4; |
|
411 |
+ int uvbh = ff_vp9_bwh_tab[s->ss_v][b->bs][1] * 4; |
|
410 | 412 |
|
411 | 413 |
mc_luma_dir(s, mc[bwl][b->filter][0], s->dst[0], ls_y, |
412 | 414 |
ref1->data[0], ref1->linesize[0], tref1, |
... | ... |
@@ -31,16 +31,6 @@ |
31 | 31 |
#include "vp9data.h" |
32 | 32 |
#include "vp9dec.h" |
33 | 33 |
|
34 |
-static const uint8_t bwh_tab[2][N_BS_SIZES][2] = { |
|
35 |
- { |
|
36 |
- { 16, 16 }, { 16, 8 }, { 8, 16 }, { 8, 8 }, { 8, 4 }, { 4, 8 }, |
|
37 |
- { 4, 4 }, { 4, 2 }, { 2, 4 }, { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 }, |
|
38 |
- }, { |
|
39 |
- { 8, 8 }, { 8, 4 }, { 4, 8 }, { 4, 4 }, { 4, 2 }, { 2, 4 }, |
|
40 |
- { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, |
|
41 |
- } |
|
42 |
-}; |
|
43 |
- |
|
44 | 34 |
static av_always_inline void setctx_2d(uint8_t *ptr, int w, int h, |
45 | 35 |
ptrdiff_t stride, int v) |
46 | 36 |
{ |
... | ... |
@@ -103,8 +93,8 @@ static void decode_mode(AVCodecContext *avctx) |
103 | 103 |
VP9Block *b = s->b; |
104 | 104 |
int row = s->row, col = s->col, row7 = s->row7; |
105 | 105 |
enum TxfmMode max_tx = max_tx_for_bl_bp[b->bs]; |
106 |
- int bw4 = bwh_tab[1][b->bs][0], w4 = FFMIN(s->cols - col, bw4); |
|
107 |
- int bh4 = bwh_tab[1][b->bs][1], h4 = FFMIN(s->rows - row, bh4), y; |
|
106 |
+ int bw4 = ff_vp9_bwh_tab[1][b->bs][0], w4 = FFMIN(s->cols - col, bw4); |
|
107 |
+ int bh4 = ff_vp9_bwh_tab[1][b->bs][1], h4 = FFMIN(s->rows - row, bh4), y; |
|
108 | 108 |
int have_a = row > 0, have_l = col > s->tile_col_start; |
109 | 109 |
int vref, filter_id; |
110 | 110 |
|
... | ... |
@@ -272,8 +262,8 @@ static void decode_mode(AVCodecContext *avctx) |
272 | 272 |
b->mode[2] = |
273 | 273 |
b->mode[1] = b->mode[0]; |
274 | 274 |
// FIXME this can probably be optimized |
275 |
- memset(a, b->mode[0], bwh_tab[0][b->bs][0]); |
|
276 |
- memset(l, b->mode[0], bwh_tab[0][b->bs][1]); |
|
275 |
+ memset(a, b->mode[0], ff_vp9_bwh_tab[0][b->bs][0]); |
|
276 |
+ memset(l, b->mode[0], ff_vp9_bwh_tab[0][b->bs][1]); |
|
277 | 277 |
} |
278 | 278 |
b->uvmode = vp8_rac_get_tree(&s->c, ff_vp9_intramode_tree, |
279 | 279 |
ff_vp9_default_kf_uvmode_probs[b->mode[3]]); |
... | ... |
@@ -725,7 +715,7 @@ static void decode_mode(AVCodecContext *avctx) |
725 | 725 |
} |
726 | 726 |
#endif |
727 | 727 |
|
728 |
- switch (bwh_tab[1][b->bs][0]) { |
|
728 |
+ switch (ff_vp9_bwh_tab[1][b->bs][0]) { |
|
729 | 729 |
#define SET_CTXS(dir, off, n) \ |
730 | 730 |
do { \ |
731 | 731 |
SPLAT_CTX(s->dir##_skip_ctx[off], b->skip, n); \ |
... | ... |
@@ -748,7 +738,7 @@ static void decode_mode(AVCodecContext *avctx) |
748 | 748 |
case 4: SET_CTXS(above, col, 4); break; |
749 | 749 |
case 8: SET_CTXS(above, col, 8); break; |
750 | 750 |
} |
751 |
- switch (bwh_tab[1][b->bs][1]) { |
|
751 |
+ switch (ff_vp9_bwh_tab[1][b->bs][1]) { |
|
752 | 752 |
case 1: SET_CTXS(left, row7, 1); break; |
753 | 753 |
case 2: SET_CTXS(left, row7, 2); break; |
754 | 754 |
case 4: SET_CTXS(left, row7, 4); break; |
... | ... |
@@ -983,7 +973,7 @@ static av_always_inline int decode_coeffs(AVCodecContext *avctx, int is8bitsperp |
983 | 983 |
uint8_t (*p)[6][11] = s->prob.coef[b->tx][0 /* y */][!b->intra]; |
984 | 984 |
unsigned (*c)[6][3] = s->counts.coef[b->tx][0 /* y */][!b->intra]; |
985 | 985 |
unsigned (*e)[6][2] = s->counts.eob[b->tx][0 /* y */][!b->intra]; |
986 |
- int w4 = bwh_tab[1][b->bs][0] << 1, h4 = bwh_tab[1][b->bs][1] << 1; |
|
986 |
+ int w4 = ff_vp9_bwh_tab[1][b->bs][0] << 1, h4 = ff_vp9_bwh_tab[1][b->bs][1] << 1; |
|
987 | 987 |
int end_x = FFMIN(2 * (s->cols - col), w4); |
988 | 988 |
int end_y = FFMIN(2 * (s->rows - row), h4); |
989 | 989 |
int n, pl, x, y, ret; |
... | ... |
@@ -1152,615 +1142,6 @@ static int decode_coeffs_16bpp(AVCodecContext *avctx) |
1152 | 1152 |
return decode_coeffs(avctx, 0); |
1153 | 1153 |
} |
1154 | 1154 |
|
1155 |
-static av_always_inline int check_intra_mode(VP9Context *s, int mode, uint8_t **a, |
|
1156 |
- uint8_t *dst_edge, ptrdiff_t stride_edge, |
|
1157 |
- uint8_t *dst_inner, ptrdiff_t stride_inner, |
|
1158 |
- uint8_t *l, int col, int x, int w, |
|
1159 |
- int row, int y, enum TxfmMode tx, |
|
1160 |
- int p, int ss_h, int ss_v, int bytesperpixel) |
|
1161 |
-{ |
|
1162 |
- int have_top = row > 0 || y > 0; |
|
1163 |
- int have_left = col > s->tile_col_start || x > 0; |
|
1164 |
- int have_right = x < w - 1; |
|
1165 |
- int bpp = s->s.h.bpp; |
|
1166 |
- static const uint8_t mode_conv[10][2 /* have_left */][2 /* have_top */] = { |
|
1167 |
- [VERT_PRED] = { { DC_127_PRED, VERT_PRED }, |
|
1168 |
- { DC_127_PRED, VERT_PRED } }, |
|
1169 |
- [HOR_PRED] = { { DC_129_PRED, DC_129_PRED }, |
|
1170 |
- { HOR_PRED, HOR_PRED } }, |
|
1171 |
- [DC_PRED] = { { DC_128_PRED, TOP_DC_PRED }, |
|
1172 |
- { LEFT_DC_PRED, DC_PRED } }, |
|
1173 |
- [DIAG_DOWN_LEFT_PRED] = { { DC_127_PRED, DIAG_DOWN_LEFT_PRED }, |
|
1174 |
- { DC_127_PRED, DIAG_DOWN_LEFT_PRED } }, |
|
1175 |
- [DIAG_DOWN_RIGHT_PRED] = { { DIAG_DOWN_RIGHT_PRED, DIAG_DOWN_RIGHT_PRED }, |
|
1176 |
- { DIAG_DOWN_RIGHT_PRED, DIAG_DOWN_RIGHT_PRED } }, |
|
1177 |
- [VERT_RIGHT_PRED] = { { VERT_RIGHT_PRED, VERT_RIGHT_PRED }, |
|
1178 |
- { VERT_RIGHT_PRED, VERT_RIGHT_PRED } }, |
|
1179 |
- [HOR_DOWN_PRED] = { { HOR_DOWN_PRED, HOR_DOWN_PRED }, |
|
1180 |
- { HOR_DOWN_PRED, HOR_DOWN_PRED } }, |
|
1181 |
- [VERT_LEFT_PRED] = { { DC_127_PRED, VERT_LEFT_PRED }, |
|
1182 |
- { DC_127_PRED, VERT_LEFT_PRED } }, |
|
1183 |
- [HOR_UP_PRED] = { { DC_129_PRED, DC_129_PRED }, |
|
1184 |
- { HOR_UP_PRED, HOR_UP_PRED } }, |
|
1185 |
- [TM_VP8_PRED] = { { DC_129_PRED, VERT_PRED }, |
|
1186 |
- { HOR_PRED, TM_VP8_PRED } }, |
|
1187 |
- }; |
|
1188 |
- static const struct { |
|
1189 |
- uint8_t needs_left:1; |
|
1190 |
- uint8_t needs_top:1; |
|
1191 |
- uint8_t needs_topleft:1; |
|
1192 |
- uint8_t needs_topright:1; |
|
1193 |
- uint8_t invert_left:1; |
|
1194 |
- } edges[N_INTRA_PRED_MODES] = { |
|
1195 |
- [VERT_PRED] = { .needs_top = 1 }, |
|
1196 |
- [HOR_PRED] = { .needs_left = 1 }, |
|
1197 |
- [DC_PRED] = { .needs_top = 1, .needs_left = 1 }, |
|
1198 |
- [DIAG_DOWN_LEFT_PRED] = { .needs_top = 1, .needs_topright = 1 }, |
|
1199 |
- [DIAG_DOWN_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1, |
|
1200 |
- .needs_topleft = 1 }, |
|
1201 |
- [VERT_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1, |
|
1202 |
- .needs_topleft = 1 }, |
|
1203 |
- [HOR_DOWN_PRED] = { .needs_left = 1, .needs_top = 1, |
|
1204 |
- .needs_topleft = 1 }, |
|
1205 |
- [VERT_LEFT_PRED] = { .needs_top = 1, .needs_topright = 1 }, |
|
1206 |
- [HOR_UP_PRED] = { .needs_left = 1, .invert_left = 1 }, |
|
1207 |
- [TM_VP8_PRED] = { .needs_left = 1, .needs_top = 1, |
|
1208 |
- .needs_topleft = 1 }, |
|
1209 |
- [LEFT_DC_PRED] = { .needs_left = 1 }, |
|
1210 |
- [TOP_DC_PRED] = { .needs_top = 1 }, |
|
1211 |
- [DC_128_PRED] = { 0 }, |
|
1212 |
- [DC_127_PRED] = { 0 }, |
|
1213 |
- [DC_129_PRED] = { 0 } |
|
1214 |
- }; |
|
1215 |
- |
|
1216 |
- av_assert2(mode >= 0 && mode < 10); |
|
1217 |
- mode = mode_conv[mode][have_left][have_top]; |
|
1218 |
- if (edges[mode].needs_top) { |
|
1219 |
- uint8_t *top, *topleft; |
|
1220 |
- int n_px_need = 4 << tx, n_px_have = (((s->cols - col) << !ss_h) - x) * 4; |
|
1221 |
- int n_px_need_tr = 0; |
|
1222 |
- |
|
1223 |
- if (tx == TX_4X4 && edges[mode].needs_topright && have_right) |
|
1224 |
- n_px_need_tr = 4; |
|
1225 |
- |
|
1226 |
- // if top of sb64-row, use s->intra_pred_data[] instead of |
|
1227 |
- // dst[-stride] for intra prediction (it contains pre- instead of |
|
1228 |
- // post-loopfilter data) |
|
1229 |
- if (have_top) { |
|
1230 |
- top = !(row & 7) && !y ? |
|
1231 |
- s->intra_pred_data[p] + (col * (8 >> ss_h) + x * 4) * bytesperpixel : |
|
1232 |
- y == 0 ? &dst_edge[-stride_edge] : &dst_inner[-stride_inner]; |
|
1233 |
- if (have_left) |
|
1234 |
- topleft = !(row & 7) && !y ? |
|
1235 |
- s->intra_pred_data[p] + (col * (8 >> ss_h) + x * 4) * bytesperpixel : |
|
1236 |
- y == 0 || x == 0 ? &dst_edge[-stride_edge] : |
|
1237 |
- &dst_inner[-stride_inner]; |
|
1238 |
- } |
|
1239 |
- |
|
1240 |
- if (have_top && |
|
1241 |
- (!edges[mode].needs_topleft || (have_left && top == topleft)) && |
|
1242 |
- (tx != TX_4X4 || !edges[mode].needs_topright || have_right) && |
|
1243 |
- n_px_need + n_px_need_tr <= n_px_have) { |
|
1244 |
- *a = top; |
|
1245 |
- } else { |
|
1246 |
- if (have_top) { |
|
1247 |
- if (n_px_need <= n_px_have) { |
|
1248 |
- memcpy(*a, top, n_px_need * bytesperpixel); |
|
1249 |
- } else { |
|
1250 |
-#define memset_bpp(c, i1, v, i2, num) do { \ |
|
1251 |
- if (bytesperpixel == 1) { \ |
|
1252 |
- memset(&(c)[(i1)], (v)[(i2)], (num)); \ |
|
1253 |
- } else { \ |
|
1254 |
- int n, val = AV_RN16A(&(v)[(i2) * 2]); \ |
|
1255 |
- for (n = 0; n < (num); n++) { \ |
|
1256 |
- AV_WN16A(&(c)[((i1) + n) * 2], val); \ |
|
1257 |
- } \ |
|
1258 |
- } \ |
|
1259 |
-} while (0) |
|
1260 |
- memcpy(*a, top, n_px_have * bytesperpixel); |
|
1261 |
- memset_bpp(*a, n_px_have, (*a), n_px_have - 1, n_px_need - n_px_have); |
|
1262 |
- } |
|
1263 |
- } else { |
|
1264 |
-#define memset_val(c, val, num) do { \ |
|
1265 |
- if (bytesperpixel == 1) { \ |
|
1266 |
- memset((c), (val), (num)); \ |
|
1267 |
- } else { \ |
|
1268 |
- int n; \ |
|
1269 |
- for (n = 0; n < (num); n++) { \ |
|
1270 |
- AV_WN16A(&(c)[n * 2], (val)); \ |
|
1271 |
- } \ |
|
1272 |
- } \ |
|
1273 |
-} while (0) |
|
1274 |
- memset_val(*a, (128 << (bpp - 8)) - 1, n_px_need); |
|
1275 |
- } |
|
1276 |
- if (edges[mode].needs_topleft) { |
|
1277 |
- if (have_left && have_top) { |
|
1278 |
-#define assign_bpp(c, i1, v, i2) do { \ |
|
1279 |
- if (bytesperpixel == 1) { \ |
|
1280 |
- (c)[(i1)] = (v)[(i2)]; \ |
|
1281 |
- } else { \ |
|
1282 |
- AV_COPY16(&(c)[(i1) * 2], &(v)[(i2) * 2]); \ |
|
1283 |
- } \ |
|
1284 |
-} while (0) |
|
1285 |
- assign_bpp(*a, -1, topleft, -1); |
|
1286 |
- } else { |
|
1287 |
-#define assign_val(c, i, v) do { \ |
|
1288 |
- if (bytesperpixel == 1) { \ |
|
1289 |
- (c)[(i)] = (v); \ |
|
1290 |
- } else { \ |
|
1291 |
- AV_WN16A(&(c)[(i) * 2], (v)); \ |
|
1292 |
- } \ |
|
1293 |
-} while (0) |
|
1294 |
- assign_val((*a), -1, (128 << (bpp - 8)) + (have_top ? +1 : -1)); |
|
1295 |
- } |
|
1296 |
- } |
|
1297 |
- if (tx == TX_4X4 && edges[mode].needs_topright) { |
|
1298 |
- if (have_top && have_right && |
|
1299 |
- n_px_need + n_px_need_tr <= n_px_have) { |
|
1300 |
- memcpy(&(*a)[4 * bytesperpixel], &top[4 * bytesperpixel], 4 * bytesperpixel); |
|
1301 |
- } else { |
|
1302 |
- memset_bpp(*a, 4, *a, 3, 4); |
|
1303 |
- } |
|
1304 |
- } |
|
1305 |
- } |
|
1306 |
- } |
|
1307 |
- if (edges[mode].needs_left) { |
|
1308 |
- if (have_left) { |
|
1309 |
- int n_px_need = 4 << tx, i, n_px_have = (((s->rows - row) << !ss_v) - y) * 4; |
|
1310 |
- uint8_t *dst = x == 0 ? dst_edge : dst_inner; |
|
1311 |
- ptrdiff_t stride = x == 0 ? stride_edge : stride_inner; |
|
1312 |
- |
|
1313 |
- if (edges[mode].invert_left) { |
|
1314 |
- if (n_px_need <= n_px_have) { |
|
1315 |
- for (i = 0; i < n_px_need; i++) |
|
1316 |
- assign_bpp(l, i, &dst[i * stride], -1); |
|
1317 |
- } else { |
|
1318 |
- for (i = 0; i < n_px_have; i++) |
|
1319 |
- assign_bpp(l, i, &dst[i * stride], -1); |
|
1320 |
- memset_bpp(l, n_px_have, l, n_px_have - 1, n_px_need - n_px_have); |
|
1321 |
- } |
|
1322 |
- } else { |
|
1323 |
- if (n_px_need <= n_px_have) { |
|
1324 |
- for (i = 0; i < n_px_need; i++) |
|
1325 |
- assign_bpp(l, n_px_need - 1 - i, &dst[i * stride], -1); |
|
1326 |
- } else { |
|
1327 |
- for (i = 0; i < n_px_have; i++) |
|
1328 |
- assign_bpp(l, n_px_need - 1 - i, &dst[i * stride], -1); |
|
1329 |
- memset_bpp(l, 0, l, n_px_need - n_px_have, n_px_need - n_px_have); |
|
1330 |
- } |
|
1331 |
- } |
|
1332 |
- } else { |
|
1333 |
- memset_val(l, (128 << (bpp - 8)) + 1, 4 << tx); |
|
1334 |
- } |
|
1335 |
- } |
|
1336 |
- |
|
1337 |
- return mode; |
|
1338 |
-} |
|
1339 |
- |
|
1340 |
-static av_always_inline void intra_recon(AVCodecContext *avctx, ptrdiff_t y_off, |
|
1341 |
- ptrdiff_t uv_off, int bytesperpixel) |
|
1342 |
-{ |
|
1343 |
- VP9Context *s = avctx->priv_data; |
|
1344 |
- VP9Block *b = s->b; |
|
1345 |
- int row = s->row, col = s->col; |
|
1346 |
- int w4 = bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n; |
|
1347 |
- int h4 = bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2); |
|
1348 |
- int end_x = FFMIN(2 * (s->cols - col), w4); |
|
1349 |
- int end_y = FFMIN(2 * (s->rows - row), h4); |
|
1350 |
- int tx = 4 * s->s.h.lossless + b->tx, uvtx = b->uvtx + 4 * s->s.h.lossless; |
|
1351 |
- int uvstep1d = 1 << b->uvtx, p; |
|
1352 |
- uint8_t *dst = s->dst[0], *dst_r = s->s.frames[CUR_FRAME].tf.f->data[0] + y_off; |
|
1353 |
- LOCAL_ALIGNED_32(uint8_t, a_buf, [96]); |
|
1354 |
- LOCAL_ALIGNED_32(uint8_t, l, [64]); |
|
1355 |
- |
|
1356 |
- for (n = 0, y = 0; y < end_y; y += step1d) { |
|
1357 |
- uint8_t *ptr = dst, *ptr_r = dst_r; |
|
1358 |
- for (x = 0; x < end_x; x += step1d, ptr += 4 * step1d * bytesperpixel, |
|
1359 |
- ptr_r += 4 * step1d * bytesperpixel, n += step) { |
|
1360 |
- int mode = b->mode[b->bs > BS_8x8 && b->tx == TX_4X4 ? |
|
1361 |
- y * 2 + x : 0]; |
|
1362 |
- uint8_t *a = &a_buf[32]; |
|
1363 |
- enum TxfmType txtp = ff_vp9_intra_txfm_type[mode]; |
|
1364 |
- int eob = b->skip ? 0 : b->tx > TX_8X8 ? AV_RN16A(&s->eob[n]) : s->eob[n]; |
|
1365 |
- |
|
1366 |
- mode = check_intra_mode(s, mode, &a, ptr_r, |
|
1367 |
- s->s.frames[CUR_FRAME].tf.f->linesize[0], |
|
1368 |
- ptr, s->y_stride, l, |
|
1369 |
- col, x, w4, row, y, b->tx, 0, 0, 0, bytesperpixel); |
|
1370 |
- s->dsp.intra_pred[b->tx][mode](ptr, s->y_stride, l, a); |
|
1371 |
- if (eob) |
|
1372 |
- s->dsp.itxfm_add[tx][txtp](ptr, s->y_stride, |
|
1373 |
- s->block + 16 * n * bytesperpixel, eob); |
|
1374 |
- } |
|
1375 |
- dst_r += 4 * step1d * s->s.frames[CUR_FRAME].tf.f->linesize[0]; |
|
1376 |
- dst += 4 * step1d * s->y_stride; |
|
1377 |
- } |
|
1378 |
- |
|
1379 |
- // U/V |
|
1380 |
- w4 >>= s->ss_h; |
|
1381 |
- end_x >>= s->ss_h; |
|
1382 |
- end_y >>= s->ss_v; |
|
1383 |
- step = 1 << (b->uvtx * 2); |
|
1384 |
- for (p = 0; p < 2; p++) { |
|
1385 |
- dst = s->dst[1 + p]; |
|
1386 |
- dst_r = s->s.frames[CUR_FRAME].tf.f->data[1 + p] + uv_off; |
|
1387 |
- for (n = 0, y = 0; y < end_y; y += uvstep1d) { |
|
1388 |
- uint8_t *ptr = dst, *ptr_r = dst_r; |
|
1389 |
- for (x = 0; x < end_x; x += uvstep1d, ptr += 4 * uvstep1d * bytesperpixel, |
|
1390 |
- ptr_r += 4 * uvstep1d * bytesperpixel, n += step) { |
|
1391 |
- int mode = b->uvmode; |
|
1392 |
- uint8_t *a = &a_buf[32]; |
|
1393 |
- int eob = b->skip ? 0 : b->uvtx > TX_8X8 ? AV_RN16A(&s->uveob[p][n]) : s->uveob[p][n]; |
|
1394 |
- |
|
1395 |
- mode = check_intra_mode(s, mode, &a, ptr_r, |
|
1396 |
- s->s.frames[CUR_FRAME].tf.f->linesize[1], |
|
1397 |
- ptr, s->uv_stride, l, col, x, w4, row, y, |
|
1398 |
- b->uvtx, p + 1, s->ss_h, s->ss_v, bytesperpixel); |
|
1399 |
- s->dsp.intra_pred[b->uvtx][mode](ptr, s->uv_stride, l, a); |
|
1400 |
- if (eob) |
|
1401 |
- s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, s->uv_stride, |
|
1402 |
- s->uvblock[p] + 16 * n * bytesperpixel, eob); |
|
1403 |
- } |
|
1404 |
- dst_r += 4 * uvstep1d * s->s.frames[CUR_FRAME].tf.f->linesize[1]; |
|
1405 |
- dst += 4 * uvstep1d * s->uv_stride; |
|
1406 |
- } |
|
1407 |
- } |
|
1408 |
-} |
|
1409 |
- |
|
1410 |
-static void intra_recon_8bpp(AVCodecContext *avctx, ptrdiff_t y_off, ptrdiff_t uv_off) |
|
1411 |
-{ |
|
1412 |
- intra_recon(avctx, y_off, uv_off, 1); |
|
1413 |
-} |
|
1414 |
- |
|
1415 |
-static void intra_recon_16bpp(AVCodecContext *avctx, ptrdiff_t y_off, ptrdiff_t uv_off) |
|
1416 |
-{ |
|
1417 |
- intra_recon(avctx, y_off, uv_off, 2); |
|
1418 |
-} |
|
1419 |
- |
|
1420 |
-static av_always_inline void mc_luma_unscaled(VP9Context *s, vp9_mc_func (*mc)[2], |
|
1421 |
- uint8_t *dst, ptrdiff_t dst_stride, |
|
1422 |
- const uint8_t *ref, ptrdiff_t ref_stride, |
|
1423 |
- ThreadFrame *ref_frame, |
|
1424 |
- ptrdiff_t y, ptrdiff_t x, const VP56mv *mv, |
|
1425 |
- int bw, int bh, int w, int h, int bytesperpixel) |
|
1426 |
-{ |
|
1427 |
- int mx = mv->x, my = mv->y, th; |
|
1428 |
- |
|
1429 |
- y += my >> 3; |
|
1430 |
- x += mx >> 3; |
|
1431 |
- ref += y * ref_stride + x * bytesperpixel; |
|
1432 |
- mx &= 7; |
|
1433 |
- my &= 7; |
|
1434 |
- // FIXME bilinear filter only needs 0/1 pixels, not 3/4 |
|
1435 |
- // we use +7 because the last 7 pixels of each sbrow can be changed in |
|
1436 |
- // the longest loopfilter of the next sbrow |
|
1437 |
- th = (y + bh + 4 * !!my + 7) >> 6; |
|
1438 |
- ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0); |
|
1439 |
- // The arm/aarch64 _hv filters read one more row than what actually is |
|
1440 |
- // needed, so switch to emulated edge one pixel sooner vertically |
|
1441 |
- // (!!my * 5) than horizontally (!!mx * 4). |
|
1442 |
- if (x < !!mx * 3 || y < !!my * 3 || |
|
1443 |
- x + !!mx * 4 > w - bw || y + !!my * 5 > h - bh) { |
|
1444 |
- s->vdsp.emulated_edge_mc(s->edge_emu_buffer, |
|
1445 |
- ref - !!my * 3 * ref_stride - !!mx * 3 * bytesperpixel, |
|
1446 |
- 160, ref_stride, |
|
1447 |
- bw + !!mx * 7, bh + !!my * 7, |
|
1448 |
- x - !!mx * 3, y - !!my * 3, w, h); |
|
1449 |
- ref = s->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel; |
|
1450 |
- ref_stride = 160; |
|
1451 |
- } |
|
1452 |
- mc[!!mx][!!my](dst, dst_stride, ref, ref_stride, bh, mx << 1, my << 1); |
|
1453 |
-} |
|
1454 |
- |
|
1455 |
-static av_always_inline void mc_chroma_unscaled(VP9Context *s, vp9_mc_func (*mc)[2], |
|
1456 |
- uint8_t *dst_u, uint8_t *dst_v, |
|
1457 |
- ptrdiff_t dst_stride, |
|
1458 |
- const uint8_t *ref_u, ptrdiff_t src_stride_u, |
|
1459 |
- const uint8_t *ref_v, ptrdiff_t src_stride_v, |
|
1460 |
- ThreadFrame *ref_frame, |
|
1461 |
- ptrdiff_t y, ptrdiff_t x, const VP56mv *mv, |
|
1462 |
- int bw, int bh, int w, int h, int bytesperpixel) |
|
1463 |
-{ |
|
1464 |
- int mx = mv->x * (1 << !s->ss_h), my = mv->y * (1 << !s->ss_v), th; |
|
1465 |
- |
|
1466 |
- y += my >> 4; |
|
1467 |
- x += mx >> 4; |
|
1468 |
- ref_u += y * src_stride_u + x * bytesperpixel; |
|
1469 |
- ref_v += y * src_stride_v + x * bytesperpixel; |
|
1470 |
- mx &= 15; |
|
1471 |
- my &= 15; |
|
1472 |
- // FIXME bilinear filter only needs 0/1 pixels, not 3/4 |
|
1473 |
- // we use +7 because the last 7 pixels of each sbrow can be changed in |
|
1474 |
- // the longest loopfilter of the next sbrow |
|
1475 |
- th = (y + bh + 4 * !!my + 7) >> (6 - s->ss_v); |
|
1476 |
- ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0); |
|
1477 |
- // The arm/aarch64 _hv filters read one more row than what actually is |
|
1478 |
- // needed, so switch to emulated edge one pixel sooner vertically |
|
1479 |
- // (!!my * 5) than horizontally (!!mx * 4). |
|
1480 |
- if (x < !!mx * 3 || y < !!my * 3 || |
|
1481 |
- x + !!mx * 4 > w - bw || y + !!my * 5 > h - bh) { |
|
1482 |
- s->vdsp.emulated_edge_mc(s->edge_emu_buffer, |
|
1483 |
- ref_u - !!my * 3 * src_stride_u - !!mx * 3 * bytesperpixel, |
|
1484 |
- 160, src_stride_u, |
|
1485 |
- bw + !!mx * 7, bh + !!my * 7, |
|
1486 |
- x - !!mx * 3, y - !!my * 3, w, h); |
|
1487 |
- ref_u = s->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel; |
|
1488 |
- mc[!!mx][!!my](dst_u, dst_stride, ref_u, 160, bh, mx, my); |
|
1489 |
- |
|
1490 |
- s->vdsp.emulated_edge_mc(s->edge_emu_buffer, |
|
1491 |
- ref_v - !!my * 3 * src_stride_v - !!mx * 3 * bytesperpixel, |
|
1492 |
- 160, src_stride_v, |
|
1493 |
- bw + !!mx * 7, bh + !!my * 7, |
|
1494 |
- x - !!mx * 3, y - !!my * 3, w, h); |
|
1495 |
- ref_v = s->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel; |
|
1496 |
- mc[!!mx][!!my](dst_v, dst_stride, ref_v, 160, bh, mx, my); |
|
1497 |
- } else { |
|
1498 |
- mc[!!mx][!!my](dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my); |
|
1499 |
- mc[!!mx][!!my](dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my); |
|
1500 |
- } |
|
1501 |
-} |
|
1502 |
- |
|
1503 |
-#define mc_luma_dir(s, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, \ |
|
1504 |
- px, py, pw, ph, bw, bh, w, h, i) \ |
|
1505 |
- mc_luma_unscaled(s, s->dsp.mc, dst, dst_ls, src, src_ls, tref, row, col, \ |
|
1506 |
- mv, bw, bh, w, h, bytesperpixel) |
|
1507 |
-#define mc_chroma_dir(s, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \ |
|
1508 |
- row, col, mv, px, py, pw, ph, bw, bh, w, h, i) \ |
|
1509 |
- mc_chroma_unscaled(s, s->dsp.mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \ |
|
1510 |
- row, col, mv, bw, bh, w, h, bytesperpixel) |
|
1511 |
-#define SCALED 0 |
|
1512 |
-#define FN(x) x##_8bpp |
|
1513 |
-#define BYTES_PER_PIXEL 1 |
|
1514 |
-#include "vp9_mc_template.c" |
|
1515 |
-#undef FN |
|
1516 |
-#undef BYTES_PER_PIXEL |
|
1517 |
-#define FN(x) x##_16bpp |
|
1518 |
-#define BYTES_PER_PIXEL 2 |
|
1519 |
-#include "vp9_mc_template.c" |
|
1520 |
-#undef mc_luma_dir |
|
1521 |
-#undef mc_chroma_dir |
|
1522 |
-#undef FN |
|
1523 |
-#undef BYTES_PER_PIXEL |
|
1524 |
-#undef SCALED |
|
1525 |
- |
|
1526 |
-static av_always_inline void mc_luma_scaled(VP9Context *s, vp9_scaled_mc_func smc, |
|
1527 |
- vp9_mc_func (*mc)[2], |
|
1528 |
- uint8_t *dst, ptrdiff_t dst_stride, |
|
1529 |
- const uint8_t *ref, ptrdiff_t ref_stride, |
|
1530 |
- ThreadFrame *ref_frame, |
|
1531 |
- ptrdiff_t y, ptrdiff_t x, const VP56mv *in_mv, |
|
1532 |
- int px, int py, int pw, int ph, |
|
1533 |
- int bw, int bh, int w, int h, int bytesperpixel, |
|
1534 |
- const uint16_t *scale, const uint8_t *step) |
|
1535 |
-{ |
|
1536 |
- if (s->s.frames[CUR_FRAME].tf.f->width == ref_frame->f->width && |
|
1537 |
- s->s.frames[CUR_FRAME].tf.f->height == ref_frame->f->height) { |
|
1538 |
- mc_luma_unscaled(s, mc, dst, dst_stride, ref, ref_stride, ref_frame, |
|
1539 |
- y, x, in_mv, bw, bh, w, h, bytesperpixel); |
|
1540 |
- } else { |
|
1541 |
-#define scale_mv(n, dim) (((int64_t)(n) * scale[dim]) >> 14) |
|
1542 |
- int mx, my; |
|
1543 |
- int refbw_m1, refbh_m1; |
|
1544 |
- int th; |
|
1545 |
- VP56mv mv; |
|
1546 |
- |
|
1547 |
- mv.x = av_clip(in_mv->x, -(x + pw - px + 4) * 8, (s->cols * 8 - x + px + 3) * 8); |
|
1548 |
- mv.y = av_clip(in_mv->y, -(y + ph - py + 4) * 8, (s->rows * 8 - y + py + 3) * 8); |
|
1549 |
- // BUG libvpx seems to scale the two components separately. This introduces |
|
1550 |
- // rounding errors but we have to reproduce them to be exactly compatible |
|
1551 |
- // with the output from libvpx... |
|
1552 |
- mx = scale_mv(mv.x * 2, 0) + scale_mv(x * 16, 0); |
|
1553 |
- my = scale_mv(mv.y * 2, 1) + scale_mv(y * 16, 1); |
|
1554 |
- |
|
1555 |
- y = my >> 4; |
|
1556 |
- x = mx >> 4; |
|
1557 |
- ref += y * ref_stride + x * bytesperpixel; |
|
1558 |
- mx &= 15; |
|
1559 |
- my &= 15; |
|
1560 |
- refbw_m1 = ((bw - 1) * step[0] + mx) >> 4; |
|
1561 |
- refbh_m1 = ((bh - 1) * step[1] + my) >> 4; |
|
1562 |
- // FIXME bilinear filter only needs 0/1 pixels, not 3/4 |
|
1563 |
- // we use +7 because the last 7 pixels of each sbrow can be changed in |
|
1564 |
- // the longest loopfilter of the next sbrow |
|
1565 |
- th = (y + refbh_m1 + 4 + 7) >> 6; |
|
1566 |
- ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0); |
|
1567 |
- // The arm/aarch64 _hv filters read one more row than what actually is |
|
1568 |
- // needed, so switch to emulated edge one pixel sooner vertically |
|
1569 |
- // (y + 5 >= h - refbh_m1) than horizontally (x + 4 >= w - refbw_m1). |
|
1570 |
- if (x < 3 || y < 3 || x + 4 >= w - refbw_m1 || y + 5 >= h - refbh_m1) { |
|
1571 |
- s->vdsp.emulated_edge_mc(s->edge_emu_buffer, |
|
1572 |
- ref - 3 * ref_stride - 3 * bytesperpixel, |
|
1573 |
- 288, ref_stride, |
|
1574 |
- refbw_m1 + 8, refbh_m1 + 8, |
|
1575 |
- x - 3, y - 3, w, h); |
|
1576 |
- ref = s->edge_emu_buffer + 3 * 288 + 3 * bytesperpixel; |
|
1577 |
- ref_stride = 288; |
|
1578 |
- } |
|
1579 |
- smc(dst, dst_stride, ref, ref_stride, bh, mx, my, step[0], step[1]); |
|
1580 |
- } |
|
1581 |
-} |
|
1582 |
- |
|
1583 |
-static av_always_inline void mc_chroma_scaled(VP9Context *s, vp9_scaled_mc_func smc, |
|
1584 |
- vp9_mc_func (*mc)[2], |
|
1585 |
- uint8_t *dst_u, uint8_t *dst_v, |
|
1586 |
- ptrdiff_t dst_stride, |
|
1587 |
- const uint8_t *ref_u, ptrdiff_t src_stride_u, |
|
1588 |
- const uint8_t *ref_v, ptrdiff_t src_stride_v, |
|
1589 |
- ThreadFrame *ref_frame, |
|
1590 |
- ptrdiff_t y, ptrdiff_t x, const VP56mv *in_mv, |
|
1591 |
- int px, int py, int pw, int ph, |
|
1592 |
- int bw, int bh, int w, int h, int bytesperpixel, |
|
1593 |
- const uint16_t *scale, const uint8_t *step) |
|
1594 |
-{ |
|
1595 |
- if (s->s.frames[CUR_FRAME].tf.f->width == ref_frame->f->width && |
|
1596 |
- s->s.frames[CUR_FRAME].tf.f->height == ref_frame->f->height) { |
|
1597 |
- mc_chroma_unscaled(s, mc, dst_u, dst_v, dst_stride, ref_u, src_stride_u, |
|
1598 |
- ref_v, src_stride_v, ref_frame, |
|
1599 |
- y, x, in_mv, bw, bh, w, h, bytesperpixel); |
|
1600 |
- } else { |
|
1601 |
- int mx, my; |
|
1602 |
- int refbw_m1, refbh_m1; |
|
1603 |
- int th; |
|
1604 |
- VP56mv mv; |
|
1605 |
- |
|
1606 |
- if (s->ss_h) { |
|
1607 |
- // BUG https://code.google.com/p/webm/issues/detail?id=820 |
|
1608 |
- mv.x = av_clip(in_mv->x, -(x + pw - px + 4) * 16, (s->cols * 4 - x + px + 3) * 16); |
|
1609 |
- mx = scale_mv(mv.x, 0) + (scale_mv(x * 16, 0) & ~15) + (scale_mv(x * 32, 0) & 15); |
|
1610 |
- } else { |
|
1611 |
- mv.x = av_clip(in_mv->x, -(x + pw - px + 4) * 8, (s->cols * 8 - x + px + 3) * 8); |
|
1612 |
- mx = scale_mv(mv.x * 2, 0) + scale_mv(x * 16, 0); |
|
1613 |
- } |
|
1614 |
- if (s->ss_v) { |
|
1615 |
- // BUG https://code.google.com/p/webm/issues/detail?id=820 |
|
1616 |
- mv.y = av_clip(in_mv->y, -(y + ph - py + 4) * 16, (s->rows * 4 - y + py + 3) * 16); |
|
1617 |
- my = scale_mv(mv.y, 1) + (scale_mv(y * 16, 1) & ~15) + (scale_mv(y * 32, 1) & 15); |
|
1618 |
- } else { |
|
1619 |
- mv.y = av_clip(in_mv->y, -(y + ph - py + 4) * 8, (s->rows * 8 - y + py + 3) * 8); |
|
1620 |
- my = scale_mv(mv.y * 2, 1) + scale_mv(y * 16, 1); |
|
1621 |
- } |
|
1622 |
-#undef scale_mv |
|
1623 |
- y = my >> 4; |
|
1624 |
- x = mx >> 4; |
|
1625 |
- ref_u += y * src_stride_u + x * bytesperpixel; |
|
1626 |
- ref_v += y * src_stride_v + x * bytesperpixel; |
|
1627 |
- mx &= 15; |
|
1628 |
- my &= 15; |
|
1629 |
- refbw_m1 = ((bw - 1) * step[0] + mx) >> 4; |
|
1630 |
- refbh_m1 = ((bh - 1) * step[1] + my) >> 4; |
|
1631 |
- // FIXME bilinear filter only needs 0/1 pixels, not 3/4 |
|
1632 |
- // we use +7 because the last 7 pixels of each sbrow can be changed in |
|
1633 |
- // the longest loopfilter of the next sbrow |
|
1634 |
- th = (y + refbh_m1 + 4 + 7) >> (6 - s->ss_v); |
|
1635 |
- ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0); |
|
1636 |
- // The arm/aarch64 _hv filters read one more row than what actually is |
|
1637 |
- // needed, so switch to emulated edge one pixel sooner vertically |
|
1638 |
- // (y + 5 >= h - refbh_m1) than horizontally (x + 4 >= w - refbw_m1). |
|
1639 |
- if (x < 3 || y < 3 || x + 4 >= w - refbw_m1 || y + 5 >= h - refbh_m1) { |
|
1640 |
- s->vdsp.emulated_edge_mc(s->edge_emu_buffer, |
|
1641 |
- ref_u - 3 * src_stride_u - 3 * bytesperpixel, |
|
1642 |
- 288, src_stride_u, |
|
1643 |
- refbw_m1 + 8, refbh_m1 + 8, |
|
1644 |
- x - 3, y - 3, w, h); |
|
1645 |
- ref_u = s->edge_emu_buffer + 3 * 288 + 3 * bytesperpixel; |
|
1646 |
- smc(dst_u, dst_stride, ref_u, 288, bh, mx, my, step[0], step[1]); |
|
1647 |
- |
|
1648 |
- s->vdsp.emulated_edge_mc(s->edge_emu_buffer, |
|
1649 |
- ref_v - 3 * src_stride_v - 3 * bytesperpixel, |
|
1650 |
- 288, src_stride_v, |
|
1651 |
- refbw_m1 + 8, refbh_m1 + 8, |
|
1652 |
- x - 3, y - 3, w, h); |
|
1653 |
- ref_v = s->edge_emu_buffer + 3 * 288 + 3 * bytesperpixel; |
|
1654 |
- smc(dst_v, dst_stride, ref_v, 288, bh, mx, my, step[0], step[1]); |
|
1655 |
- } else { |
|
1656 |
- smc(dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my, step[0], step[1]); |
|
1657 |
- smc(dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my, step[0], step[1]); |
|
1658 |
- } |
|
1659 |
- } |
|
1660 |
-} |
|
1661 |
- |
|
1662 |
-#define mc_luma_dir(s, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, \ |
|
1663 |
- px, py, pw, ph, bw, bh, w, h, i) \ |
|
1664 |
- mc_luma_scaled(s, s->dsp.s##mc, s->dsp.mc, dst, dst_ls, src, src_ls, tref, row, col, \ |
|
1665 |
- mv, px, py, pw, ph, bw, bh, w, h, bytesperpixel, \ |
|
1666 |
- s->mvscale[b->ref[i]], s->mvstep[b->ref[i]]) |
|
1667 |
-#define mc_chroma_dir(s, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \ |
|
1668 |
- row, col, mv, px, py, pw, ph, bw, bh, w, h, i) \ |
|
1669 |
- mc_chroma_scaled(s, s->dsp.s##mc, s->dsp.mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \ |
|
1670 |
- row, col, mv, px, py, pw, ph, bw, bh, w, h, bytesperpixel, \ |
|
1671 |
- s->mvscale[b->ref[i]], s->mvstep[b->ref[i]]) |
|
1672 |
-#define SCALED 1 |
|
1673 |
-#define FN(x) x##_scaled_8bpp |
|
1674 |
-#define BYTES_PER_PIXEL 1 |
|
1675 |
-#include "vp9_mc_template.c" |
|
1676 |
-#undef FN |
|
1677 |
-#undef BYTES_PER_PIXEL |
|
1678 |
-#define FN(x) x##_scaled_16bpp |
|
1679 |
-#define BYTES_PER_PIXEL 2 |
|
1680 |
-#include "vp9_mc_template.c" |
|
1681 |
-#undef mc_luma_dir |
|
1682 |
-#undef mc_chroma_dir |
|
1683 |
-#undef FN |
|
1684 |
-#undef BYTES_PER_PIXEL |
|
1685 |
-#undef SCALED |
|
1686 |
- |
|
1687 |
-static av_always_inline void inter_recon(AVCodecContext *avctx, int bytesperpixel) |
|
1688 |
-{ |
|
1689 |
- VP9Context *s = avctx->priv_data; |
|
1690 |
- VP9Block *b = s->b; |
|
1691 |
- int row = s->row, col = s->col; |
|
1692 |
- |
|
1693 |
- if (s->mvscale[b->ref[0]][0] || (b->comp && s->mvscale[b->ref[1]][0])) { |
|
1694 |
- if (bytesperpixel == 1) { |
|
1695 |
- inter_pred_scaled_8bpp(avctx); |
|
1696 |
- } else { |
|
1697 |
- inter_pred_scaled_16bpp(avctx); |
|
1698 |
- } |
|
1699 |
- } else { |
|
1700 |
- if (bytesperpixel == 1) { |
|
1701 |
- inter_pred_8bpp(avctx); |
|
1702 |
- } else { |
|
1703 |
- inter_pred_16bpp(avctx); |
|
1704 |
- } |
|
1705 |
- } |
|
1706 |
- |
|
1707 |
- if (!b->skip) { |
|
1708 |
- /* mostly copied intra_recon() */ |
|
1709 |
- |
|
1710 |
- int w4 = bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n; |
|
1711 |
- int h4 = bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2); |
|
1712 |
- int end_x = FFMIN(2 * (s->cols - col), w4); |
|
1713 |
- int end_y = FFMIN(2 * (s->rows - row), h4); |
|
1714 |
- int tx = 4 * s->s.h.lossless + b->tx, uvtx = b->uvtx + 4 * s->s.h.lossless; |
|
1715 |
- int uvstep1d = 1 << b->uvtx, p; |
|
1716 |
- uint8_t *dst = s->dst[0]; |
|
1717 |
- |
|
1718 |
- // y itxfm add |
|
1719 |
- for (n = 0, y = 0; y < end_y; y += step1d) { |
|
1720 |
- uint8_t *ptr = dst; |
|
1721 |
- for (x = 0; x < end_x; x += step1d, |
|
1722 |
- ptr += 4 * step1d * bytesperpixel, n += step) { |
|
1723 |
- int eob = b->tx > TX_8X8 ? AV_RN16A(&s->eob[n]) : s->eob[n]; |
|
1724 |
- |
|
1725 |
- if (eob) |
|
1726 |
- s->dsp.itxfm_add[tx][DCT_DCT](ptr, s->y_stride, |
|
1727 |
- s->block + 16 * n * bytesperpixel, eob); |
|
1728 |
- } |
|
1729 |
- dst += 4 * s->y_stride * step1d; |
|
1730 |
- } |
|
1731 |
- |
|
1732 |
- // uv itxfm add |
|
1733 |
- end_x >>= s->ss_h; |
|
1734 |
- end_y >>= s->ss_v; |
|
1735 |
- step = 1 << (b->uvtx * 2); |
|
1736 |
- for (p = 0; p < 2; p++) { |
|
1737 |
- dst = s->dst[p + 1]; |
|
1738 |
- for (n = 0, y = 0; y < end_y; y += uvstep1d) { |
|
1739 |
- uint8_t *ptr = dst; |
|
1740 |
- for (x = 0; x < end_x; x += uvstep1d, |
|
1741 |
- ptr += 4 * uvstep1d * bytesperpixel, n += step) { |
|
1742 |
- int eob = b->uvtx > TX_8X8 ? AV_RN16A(&s->uveob[p][n]) : s->uveob[p][n]; |
|
1743 |
- |
|
1744 |
- if (eob) |
|
1745 |
- s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, s->uv_stride, |
|
1746 |
- s->uvblock[p] + 16 * n * bytesperpixel, eob); |
|
1747 |
- } |
|
1748 |
- dst += 4 * uvstep1d * s->uv_stride; |
|
1749 |
- } |
|
1750 |
- } |
|
1751 |
- } |
|
1752 |
-} |
|
1753 |
- |
|
1754 |
-static void inter_recon_8bpp(AVCodecContext *avctx) |
|
1755 |
-{ |
|
1756 |
- inter_recon(avctx, 1); |
|
1757 |
-} |
|
1758 |
- |
|
1759 |
-static void inter_recon_16bpp(AVCodecContext *avctx) |
|
1760 |
-{ |
|
1761 |
- inter_recon(avctx, 2); |
|
1762 |
-} |
|
1763 |
- |
|
1764 | 1155 |
static av_always_inline void mask_edges(uint8_t (*mask)[8][4], int ss_h, int ss_v, |
1765 | 1156 |
int row_and_7, int col_and_7, |
1766 | 1157 |
int w, int h, int col_end, int row_end, |
... | ... |
@@ -1891,7 +1272,7 @@ void ff_vp9_decode_block(AVCodecContext *avctx, int row, int col, |
1891 | 1891 |
VP9Block *b = s->b; |
1892 | 1892 |
enum BlockSize bs = bl * 3 + bp; |
1893 | 1893 |
int bytesperpixel = s->bytesperpixel; |
1894 |
- int w4 = bwh_tab[1][bs][0], h4 = bwh_tab[1][bs][1], lvl; |
|
1894 |
+ int w4 = ff_vp9_bwh_tab[1][bs][0], h4 = ff_vp9_bwh_tab[1][bs][1], lvl; |
|
1895 | 1895 |
int emu[2]; |
1896 | 1896 |
AVFrame *f = s->s.frames[CUR_FRAME].tf.f; |
1897 | 1897 |
|
... | ... |
@@ -2001,15 +1382,15 @@ void ff_vp9_decode_block(AVCodecContext *avctx, int row, int col, |
2001 | 2001 |
} |
2002 | 2002 |
if (b->intra) { |
2003 | 2003 |
if (s->s.h.bpp > 8) { |
2004 |
- intra_recon_16bpp(avctx, yoff, uvoff); |
|
2004 |
+ ff_vp9_intra_recon_16bpp(avctx, yoff, uvoff); |
|
2005 | 2005 |
} else { |
2006 |
- intra_recon_8bpp(avctx, yoff, uvoff); |
|
2006 |
+ ff_vp9_intra_recon_8bpp(avctx, yoff, uvoff); |
|
2007 | 2007 |
} |
2008 | 2008 |
} else { |
2009 | 2009 |
if (s->s.h.bpp > 8) { |
2010 |
- inter_recon_16bpp(avctx); |
|
2010 |
+ ff_vp9_inter_recon_16bpp(avctx); |
|
2011 | 2011 |
} else { |
2012 |
- inter_recon_8bpp(avctx); |
|
2012 |
+ ff_vp9_inter_recon_8bpp(avctx); |
|
2013 | 2013 |
} |
2014 | 2014 |
} |
2015 | 2015 |
if (emu[0]) { |
... | ... |
@@ -22,6 +22,16 @@ |
22 | 22 |
#include "vp9.h" |
23 | 23 |
#include "vp9data.h" |
24 | 24 |
|
25 |
+const uint8_t ff_vp9_bwh_tab[2][N_BS_SIZES][2] = { |
|
26 |
+ { |
|
27 |
+ { 16, 16 }, { 16, 8 }, { 8, 16 }, { 8, 8 }, { 8, 4 }, { 4, 8 }, |
|
28 |
+ { 4, 4 }, { 4, 2 }, { 2, 4 }, { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 }, |
|
29 |
+ }, { |
|
30 |
+ { 8, 8 }, { 8, 4 }, { 4, 8 }, { 4, 4 }, { 4, 2 }, { 2, 4 }, |
|
31 |
+ { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, |
|
32 |
+ } |
|
33 |
+}; |
|
34 |
+ |
|
25 | 35 |
const int8_t ff_vp9_partition_tree[3][2] = { |
26 | 36 |
{ -PARTITION_NONE, 1 }, // '0' |
27 | 37 |
{ -PARTITION_H, 2 }, // '10' |
... | ... |
@@ -26,6 +26,7 @@ |
26 | 26 |
|
27 | 27 |
#include "vp9dec.h" |
28 | 28 |
|
29 |
+extern const uint8_t ff_vp9_bwh_tab[2][N_BS_SIZES][2]; |
|
29 | 30 |
extern const int8_t ff_vp9_partition_tree[3][2]; |
30 | 31 |
extern const uint8_t ff_vp9_default_kf_partition_probs[4][4][3]; |
31 | 32 |
extern const int8_t ff_vp9_segmentation_tree[7][2]; |
... | ... |
@@ -206,4 +206,11 @@ void ff_vp9_decode_block(AVCodecContext *ctx, int row, int col, |
206 | 206 |
void ff_vp9_loopfilter_sb(AVCodecContext *avctx, VP9Filter *lflvl, |
207 | 207 |
int row, int col, ptrdiff_t yoff, ptrdiff_t uvoff); |
208 | 208 |
|
209 |
+void ff_vp9_intra_recon_8bpp(AVCodecContext *avctx, |
|
210 |
+ ptrdiff_t y_off, ptrdiff_t uv_off); |
|
211 |
+void ff_vp9_intra_recon_16bpp(AVCodecContext *avctx, |
|
212 |
+ ptrdiff_t y_off, ptrdiff_t uv_off); |
|
213 |
+void ff_vp9_inter_recon_8bpp(AVCodecContext *avctx); |
|
214 |
+void ff_vp9_inter_recon_16bpp(AVCodecContext *avctx); |
|
215 |
+ |
|
209 | 216 |
#endif /* AVCODEC_VP9DEC_H */ |
210 | 217 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,639 @@ |
0 |
+/* |
|
1 |
+ * VP9 compatible video decoder |
|
2 |
+ * |
|
3 |
+ * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com> |
|
4 |
+ * Copyright (C) 2013 Clément Bœsch <u pkh me> |
|
5 |
+ * |
|
6 |
+ * This file is part of FFmpeg. |
|
7 |
+ * |
|
8 |
+ * FFmpeg is free software; you can redistribute it and/or |
|
9 |
+ * modify it under the terms of the GNU Lesser General Public |
|
10 |
+ * License as published by the Free Software Foundation; either |
|
11 |
+ * version 2.1 of the License, or (at your option) any later version. |
|
12 |
+ * |
|
13 |
+ * FFmpeg is distributed in the hope that it will be useful, |
|
14 |
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
15 |
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
16 |
+ * Lesser General Public License for more details. |
|
17 |
+ * |
|
18 |
+ * You should have received a copy of the GNU Lesser General Public |
|
19 |
+ * License along with FFmpeg; if not, write to the Free Software |
|
20 |
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
21 |
+ */ |
|
22 |
+ |
|
23 |
+#include "libavutil/avassert.h" |
|
24 |
+ |
|
25 |
+#include "avcodec.h" |
|
26 |
+#include "internal.h" |
|
27 |
+#include "videodsp.h" |
|
28 |
+#include "vp9data.h" |
|
29 |
+#include "vp9dec.h" |
|
30 |
+ |
|
31 |
+static av_always_inline int check_intra_mode(VP9Context *s, int mode, uint8_t **a, |
|
32 |
+ uint8_t *dst_edge, ptrdiff_t stride_edge, |
|
33 |
+ uint8_t *dst_inner, ptrdiff_t stride_inner, |
|
34 |
+ uint8_t *l, int col, int x, int w, |
|
35 |
+ int row, int y, enum TxfmMode tx, |
|
36 |
+ int p, int ss_h, int ss_v, int bytesperpixel) |
|
37 |
+{ |
|
38 |
+ int have_top = row > 0 || y > 0; |
|
39 |
+ int have_left = col > s->tile_col_start || x > 0; |
|
40 |
+ int have_right = x < w - 1; |
|
41 |
+ int bpp = s->s.h.bpp; |
|
42 |
+ static const uint8_t mode_conv[10][2 /* have_left */][2 /* have_top */] = { |
|
43 |
+ [VERT_PRED] = { { DC_127_PRED, VERT_PRED }, |
|
44 |
+ { DC_127_PRED, VERT_PRED } }, |
|
45 |
+ [HOR_PRED] = { { DC_129_PRED, DC_129_PRED }, |
|
46 |
+ { HOR_PRED, HOR_PRED } }, |
|
47 |
+ [DC_PRED] = { { DC_128_PRED, TOP_DC_PRED }, |
|
48 |
+ { LEFT_DC_PRED, DC_PRED } }, |
|
49 |
+ [DIAG_DOWN_LEFT_PRED] = { { DC_127_PRED, DIAG_DOWN_LEFT_PRED }, |
|
50 |
+ { DC_127_PRED, DIAG_DOWN_LEFT_PRED } }, |
|
51 |
+ [DIAG_DOWN_RIGHT_PRED] = { { DIAG_DOWN_RIGHT_PRED, DIAG_DOWN_RIGHT_PRED }, |
|
52 |
+ { DIAG_DOWN_RIGHT_PRED, DIAG_DOWN_RIGHT_PRED } }, |
|
53 |
+ [VERT_RIGHT_PRED] = { { VERT_RIGHT_PRED, VERT_RIGHT_PRED }, |
|
54 |
+ { VERT_RIGHT_PRED, VERT_RIGHT_PRED } }, |
|
55 |
+ [HOR_DOWN_PRED] = { { HOR_DOWN_PRED, HOR_DOWN_PRED }, |
|
56 |
+ { HOR_DOWN_PRED, HOR_DOWN_PRED } }, |
|
57 |
+ [VERT_LEFT_PRED] = { { DC_127_PRED, VERT_LEFT_PRED }, |
|
58 |
+ { DC_127_PRED, VERT_LEFT_PRED } }, |
|
59 |
+ [HOR_UP_PRED] = { { DC_129_PRED, DC_129_PRED }, |
|
60 |
+ { HOR_UP_PRED, HOR_UP_PRED } }, |
|
61 |
+ [TM_VP8_PRED] = { { DC_129_PRED, VERT_PRED }, |
|
62 |
+ { HOR_PRED, TM_VP8_PRED } }, |
|
63 |
+ }; |
|
64 |
+ static const struct { |
|
65 |
+ uint8_t needs_left:1; |
|
66 |
+ uint8_t needs_top:1; |
|
67 |
+ uint8_t needs_topleft:1; |
|
68 |
+ uint8_t needs_topright:1; |
|
69 |
+ uint8_t invert_left:1; |
|
70 |
+ } edges[N_INTRA_PRED_MODES] = { |
|
71 |
+ [VERT_PRED] = { .needs_top = 1 }, |
|
72 |
+ [HOR_PRED] = { .needs_left = 1 }, |
|
73 |
+ [DC_PRED] = { .needs_top = 1, .needs_left = 1 }, |
|
74 |
+ [DIAG_DOWN_LEFT_PRED] = { .needs_top = 1, .needs_topright = 1 }, |
|
75 |
+ [DIAG_DOWN_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1, |
|
76 |
+ .needs_topleft = 1 }, |
|
77 |
+ [VERT_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1, |
|
78 |
+ .needs_topleft = 1 }, |
|
79 |
+ [HOR_DOWN_PRED] = { .needs_left = 1, .needs_top = 1, |
|
80 |
+ .needs_topleft = 1 }, |
|
81 |
+ [VERT_LEFT_PRED] = { .needs_top = 1, .needs_topright = 1 }, |
|
82 |
+ [HOR_UP_PRED] = { .needs_left = 1, .invert_left = 1 }, |
|
83 |
+ [TM_VP8_PRED] = { .needs_left = 1, .needs_top = 1, |
|
84 |
+ .needs_topleft = 1 }, |
|
85 |
+ [LEFT_DC_PRED] = { .needs_left = 1 }, |
|
86 |
+ [TOP_DC_PRED] = { .needs_top = 1 }, |
|
87 |
+ [DC_128_PRED] = { 0 }, |
|
88 |
+ [DC_127_PRED] = { 0 }, |
|
89 |
+ [DC_129_PRED] = { 0 } |
|
90 |
+ }; |
|
91 |
+ |
|
92 |
+ av_assert2(mode >= 0 && mode < 10); |
|
93 |
+ mode = mode_conv[mode][have_left][have_top]; |
|
94 |
+ if (edges[mode].needs_top) { |
|
95 |
+ uint8_t *top, *topleft; |
|
96 |
+ int n_px_need = 4 << tx, n_px_have = (((s->cols - col) << !ss_h) - x) * 4; |
|
97 |
+ int n_px_need_tr = 0; |
|
98 |
+ |
|
99 |
+ if (tx == TX_4X4 && edges[mode].needs_topright && have_right) |
|
100 |
+ n_px_need_tr = 4; |
|
101 |
+ |
|
102 |
+ // if top of sb64-row, use s->intra_pred_data[] instead of |
|
103 |
+ // dst[-stride] for intra prediction (it contains pre- instead of |
|
104 |
+ // post-loopfilter data) |
|
105 |
+ if (have_top) { |
|
106 |
+ top = !(row & 7) && !y ? |
|
107 |
+ s->intra_pred_data[p] + (col * (8 >> ss_h) + x * 4) * bytesperpixel : |
|
108 |
+ y == 0 ? &dst_edge[-stride_edge] : &dst_inner[-stride_inner]; |
|
109 |
+ if (have_left) |
|
110 |
+ topleft = !(row & 7) && !y ? |
|
111 |
+ s->intra_pred_data[p] + (col * (8 >> ss_h) + x * 4) * bytesperpixel : |
|
112 |
+ y == 0 || x == 0 ? &dst_edge[-stride_edge] : |
|
113 |
+ &dst_inner[-stride_inner]; |
|
114 |
+ } |
|
115 |
+ |
|
116 |
+ if (have_top && |
|
117 |
+ (!edges[mode].needs_topleft || (have_left && top == topleft)) && |
|
118 |
+ (tx != TX_4X4 || !edges[mode].needs_topright || have_right) && |
|
119 |
+ n_px_need + n_px_need_tr <= n_px_have) { |
|
120 |
+ *a = top; |
|
121 |
+ } else { |
|
122 |
+ if (have_top) { |
|
123 |
+ if (n_px_need <= n_px_have) { |
|
124 |
+ memcpy(*a, top, n_px_need * bytesperpixel); |
|
125 |
+ } else { |
|
126 |
+#define memset_bpp(c, i1, v, i2, num) do { \ |
|
127 |
+ if (bytesperpixel == 1) { \ |
|
128 |
+ memset(&(c)[(i1)], (v)[(i2)], (num)); \ |
|
129 |
+ } else { \ |
|
130 |
+ int n, val = AV_RN16A(&(v)[(i2) * 2]); \ |
|
131 |
+ for (n = 0; n < (num); n++) { \ |
|
132 |
+ AV_WN16A(&(c)[((i1) + n) * 2], val); \ |
|
133 |
+ } \ |
|
134 |
+ } \ |
|
135 |
+} while (0) |
|
136 |
+ memcpy(*a, top, n_px_have * bytesperpixel); |
|
137 |
+ memset_bpp(*a, n_px_have, (*a), n_px_have - 1, n_px_need - n_px_have); |
|
138 |
+ } |
|
139 |
+ } else { |
|
140 |
+#define memset_val(c, val, num) do { \ |
|
141 |
+ if (bytesperpixel == 1) { \ |
|
142 |
+ memset((c), (val), (num)); \ |
|
143 |
+ } else { \ |
|
144 |
+ int n; \ |
|
145 |
+ for (n = 0; n < (num); n++) { \ |
|
146 |
+ AV_WN16A(&(c)[n * 2], (val)); \ |
|
147 |
+ } \ |
|
148 |
+ } \ |
|
149 |
+} while (0) |
|
150 |
+ memset_val(*a, (128 << (bpp - 8)) - 1, n_px_need); |
|
151 |
+ } |
|
152 |
+ if (edges[mode].needs_topleft) { |
|
153 |
+ if (have_left && have_top) { |
|
154 |
+#define assign_bpp(c, i1, v, i2) do { \ |
|
155 |
+ if (bytesperpixel == 1) { \ |
|
156 |
+ (c)[(i1)] = (v)[(i2)]; \ |
|
157 |
+ } else { \ |
|
158 |
+ AV_COPY16(&(c)[(i1) * 2], &(v)[(i2) * 2]); \ |
|
159 |
+ } \ |
|
160 |
+} while (0) |
|
161 |
+ assign_bpp(*a, -1, topleft, -1); |
|
162 |
+ } else { |
|
163 |
+#define assign_val(c, i, v) do { \ |
|
164 |
+ if (bytesperpixel == 1) { \ |
|
165 |
+ (c)[(i)] = (v); \ |
|
166 |
+ } else { \ |
|
167 |
+ AV_WN16A(&(c)[(i) * 2], (v)); \ |
|
168 |
+ } \ |
|
169 |
+} while (0) |
|
170 |
+ assign_val((*a), -1, (128 << (bpp - 8)) + (have_top ? +1 : -1)); |
|
171 |
+ } |
|
172 |
+ } |
|
173 |
+ if (tx == TX_4X4 && edges[mode].needs_topright) { |
|
174 |
+ if (have_top && have_right && |
|
175 |
+ n_px_need + n_px_need_tr <= n_px_have) { |
|
176 |
+ memcpy(&(*a)[4 * bytesperpixel], &top[4 * bytesperpixel], 4 * bytesperpixel); |
|
177 |
+ } else { |
|
178 |
+ memset_bpp(*a, 4, *a, 3, 4); |
|
179 |
+ } |
|
180 |
+ } |
|
181 |
+ } |
|
182 |
+ } |
|
183 |
+ if (edges[mode].needs_left) { |
|
184 |
+ if (have_left) { |
|
185 |
+ int n_px_need = 4 << tx, i, n_px_have = (((s->rows - row) << !ss_v) - y) * 4; |
|
186 |
+ uint8_t *dst = x == 0 ? dst_edge : dst_inner; |
|
187 |
+ ptrdiff_t stride = x == 0 ? stride_edge : stride_inner; |
|
188 |
+ |
|
189 |
+ if (edges[mode].invert_left) { |
|
190 |
+ if (n_px_need <= n_px_have) { |
|
191 |
+ for (i = 0; i < n_px_need; i++) |
|
192 |
+ assign_bpp(l, i, &dst[i * stride], -1); |
|
193 |
+ } else { |
|
194 |
+ for (i = 0; i < n_px_have; i++) |
|
195 |
+ assign_bpp(l, i, &dst[i * stride], -1); |
|
196 |
+ memset_bpp(l, n_px_have, l, n_px_have - 1, n_px_need - n_px_have); |
|
197 |
+ } |
|
198 |
+ } else { |
|
199 |
+ if (n_px_need <= n_px_have) { |
|
200 |
+ for (i = 0; i < n_px_need; i++) |
|
201 |
+ assign_bpp(l, n_px_need - 1 - i, &dst[i * stride], -1); |
|
202 |
+ } else { |
|
203 |
+ for (i = 0; i < n_px_have; i++) |
|
204 |
+ assign_bpp(l, n_px_need - 1 - i, &dst[i * stride], -1); |
|
205 |
+ memset_bpp(l, 0, l, n_px_need - n_px_have, n_px_need - n_px_have); |
|
206 |
+ } |
|
207 |
+ } |
|
208 |
+ } else { |
|
209 |
+ memset_val(l, (128 << (bpp - 8)) + 1, 4 << tx); |
|
210 |
+ } |
|
211 |
+ } |
|
212 |
+ |
|
213 |
+ return mode; |
|
214 |
+} |
|
215 |
+ |
|
216 |
+static av_always_inline void intra_recon(AVCodecContext *avctx, ptrdiff_t y_off, |
|
217 |
+ ptrdiff_t uv_off, int bytesperpixel) |
|
218 |
+{ |
|
219 |
+ VP9Context *s = avctx->priv_data; |
|
220 |
+ VP9Block *b = s->b; |
|
221 |
+ int row = s->row, col = s->col; |
|
222 |
+ int w4 = ff_vp9_bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n; |
|
223 |
+ int h4 = ff_vp9_bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2); |
|
224 |
+ int end_x = FFMIN(2 * (s->cols - col), w4); |
|
225 |
+ int end_y = FFMIN(2 * (s->rows - row), h4); |
|
226 |
+ int tx = 4 * s->s.h.lossless + b->tx, uvtx = b->uvtx + 4 * s->s.h.lossless; |
|
227 |
+ int uvstep1d = 1 << b->uvtx, p; |
|
228 |
+ uint8_t *dst = s->dst[0], *dst_r = s->s.frames[CUR_FRAME].tf.f->data[0] + y_off; |
|
229 |
+ LOCAL_ALIGNED_32(uint8_t, a_buf, [96]); |
|
230 |
+ LOCAL_ALIGNED_32(uint8_t, l, [64]); |
|
231 |
+ |
|
232 |
+ for (n = 0, y = 0; y < end_y; y += step1d) { |
|
233 |
+ uint8_t *ptr = dst, *ptr_r = dst_r; |
|
234 |
+ for (x = 0; x < end_x; x += step1d, ptr += 4 * step1d * bytesperpixel, |
|
235 |
+ ptr_r += 4 * step1d * bytesperpixel, n += step) { |
|
236 |
+ int mode = b->mode[b->bs > BS_8x8 && b->tx == TX_4X4 ? |
|
237 |
+ y * 2 + x : 0]; |
|
238 |
+ uint8_t *a = &a_buf[32]; |
|
239 |
+ enum TxfmType txtp = ff_vp9_intra_txfm_type[mode]; |
|
240 |
+ int eob = b->skip ? 0 : b->tx > TX_8X8 ? AV_RN16A(&s->eob[n]) : s->eob[n]; |
|
241 |
+ |
|
242 |
+ mode = check_intra_mode(s, mode, &a, ptr_r, |
|
243 |
+ s->s.frames[CUR_FRAME].tf.f->linesize[0], |
|
244 |
+ ptr, s->y_stride, l, |
|
245 |
+ col, x, w4, row, y, b->tx, 0, 0, 0, bytesperpixel); |
|
246 |
+ s->dsp.intra_pred[b->tx][mode](ptr, s->y_stride, l, a); |
|
247 |
+ if (eob) |
|
248 |
+ s->dsp.itxfm_add[tx][txtp](ptr, s->y_stride, |
|
249 |
+ s->block + 16 * n * bytesperpixel, eob); |
|
250 |
+ } |
|
251 |
+ dst_r += 4 * step1d * s->s.frames[CUR_FRAME].tf.f->linesize[0]; |
|
252 |
+ dst += 4 * step1d * s->y_stride; |
|
253 |
+ } |
|
254 |
+ |
|
255 |
+ // U/V |
|
256 |
+ w4 >>= s->ss_h; |
|
257 |
+ end_x >>= s->ss_h; |
|
258 |
+ end_y >>= s->ss_v; |
|
259 |
+ step = 1 << (b->uvtx * 2); |
|
260 |
+ for (p = 0; p < 2; p++) { |
|
261 |
+ dst = s->dst[1 + p]; |
|
262 |
+ dst_r = s->s.frames[CUR_FRAME].tf.f->data[1 + p] + uv_off; |
|
263 |
+ for (n = 0, y = 0; y < end_y; y += uvstep1d) { |
|
264 |
+ uint8_t *ptr = dst, *ptr_r = dst_r; |
|
265 |
+ for (x = 0; x < end_x; x += uvstep1d, ptr += 4 * uvstep1d * bytesperpixel, |
|
266 |
+ ptr_r += 4 * uvstep1d * bytesperpixel, n += step) { |
|
267 |
+ int mode = b->uvmode; |
|
268 |
+ uint8_t *a = &a_buf[32]; |
|
269 |
+ int eob = b->skip ? 0 : b->uvtx > TX_8X8 ? AV_RN16A(&s->uveob[p][n]) : s->uveob[p][n]; |
|
270 |
+ |
|
271 |
+ mode = check_intra_mode(s, mode, &a, ptr_r, |
|
272 |
+ s->s.frames[CUR_FRAME].tf.f->linesize[1], |
|
273 |
+ ptr, s->uv_stride, l, col, x, w4, row, y, |
|
274 |
+ b->uvtx, p + 1, s->ss_h, s->ss_v, bytesperpixel); |
|
275 |
+ s->dsp.intra_pred[b->uvtx][mode](ptr, s->uv_stride, l, a); |
|
276 |
+ if (eob) |
|
277 |
+ s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, s->uv_stride, |
|
278 |
+ s->uvblock[p] + 16 * n * bytesperpixel, eob); |
|
279 |
+ } |
|
280 |
+ dst_r += 4 * uvstep1d * s->s.frames[CUR_FRAME].tf.f->linesize[1]; |
|
281 |
+ dst += 4 * uvstep1d * s->uv_stride; |
|
282 |
+ } |
|
283 |
+ } |
|
284 |
+} |
|
285 |
+ |
|
286 |
+void ff_vp9_intra_recon_8bpp(AVCodecContext *avctx, ptrdiff_t y_off, ptrdiff_t uv_off) |
|
287 |
+{ |
|
288 |
+ intra_recon(avctx, y_off, uv_off, 1); |
|
289 |
+} |
|
290 |
+ |
|
291 |
+void ff_vp9_intra_recon_16bpp(AVCodecContext *avctx, ptrdiff_t y_off, ptrdiff_t uv_off) |
|
292 |
+{ |
|
293 |
+ intra_recon(avctx, y_off, uv_off, 2); |
|
294 |
+} |
|
295 |
+ |
|
296 |
+static av_always_inline void mc_luma_unscaled(VP9Context *s, vp9_mc_func (*mc)[2], |
|
297 |
+ uint8_t *dst, ptrdiff_t dst_stride, |
|
298 |
+ const uint8_t *ref, ptrdiff_t ref_stride, |
|
299 |
+ ThreadFrame *ref_frame, |
|
300 |
+ ptrdiff_t y, ptrdiff_t x, const VP56mv *mv, |
|
301 |
+ int bw, int bh, int w, int h, int bytesperpixel) |
|
302 |
+{ |
|
303 |
+ int mx = mv->x, my = mv->y, th; |
|
304 |
+ |
|
305 |
+ y += my >> 3; |
|
306 |
+ x += mx >> 3; |
|
307 |
+ ref += y * ref_stride + x * bytesperpixel; |
|
308 |
+ mx &= 7; |
|
309 |
+ my &= 7; |
|
310 |
+ // FIXME bilinear filter only needs 0/1 pixels, not 3/4 |
|
311 |
+ // we use +7 because the last 7 pixels of each sbrow can be changed in |
|
312 |
+ // the longest loopfilter of the next sbrow |
|
313 |
+ th = (y + bh + 4 * !!my + 7) >> 6; |
|
314 |
+ ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0); |
|
315 |
+ // The arm/aarch64 _hv filters read one more row than what actually is |
|
316 |
+ // needed, so switch to emulated edge one pixel sooner vertically |
|
317 |
+ // (!!my * 5) than horizontally (!!mx * 4). |
|
318 |
+ if (x < !!mx * 3 || y < !!my * 3 || |
|
319 |
+ x + !!mx * 4 > w - bw || y + !!my * 5 > h - bh) { |
|
320 |
+ s->vdsp.emulated_edge_mc(s->edge_emu_buffer, |
|
321 |
+ ref - !!my * 3 * ref_stride - !!mx * 3 * bytesperpixel, |
|
322 |
+ 160, ref_stride, |
|
323 |
+ bw + !!mx * 7, bh + !!my * 7, |
|
324 |
+ x - !!mx * 3, y - !!my * 3, w, h); |
|
325 |
+ ref = s->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel; |
|
326 |
+ ref_stride = 160; |
|
327 |
+ } |
|
328 |
+ mc[!!mx][!!my](dst, dst_stride, ref, ref_stride, bh, mx << 1, my << 1); |
|
329 |
+} |
|
330 |
+ |
|
331 |
+static av_always_inline void mc_chroma_unscaled(VP9Context *s, vp9_mc_func (*mc)[2], |
|
332 |
+ uint8_t *dst_u, uint8_t *dst_v, |
|
333 |
+ ptrdiff_t dst_stride, |
|
334 |
+ const uint8_t *ref_u, ptrdiff_t src_stride_u, |
|
335 |
+ const uint8_t *ref_v, ptrdiff_t src_stride_v, |
|
336 |
+ ThreadFrame *ref_frame, |
|
337 |
+ ptrdiff_t y, ptrdiff_t x, const VP56mv *mv, |
|
338 |
+ int bw, int bh, int w, int h, int bytesperpixel) |
|
339 |
+{ |
|
340 |
+ int mx = mv->x * (1 << !s->ss_h), my = mv->y * (1 << !s->ss_v), th; |
|
341 |
+ |
|
342 |
+ y += my >> 4; |
|
343 |
+ x += mx >> 4; |
|
344 |
+ ref_u += y * src_stride_u + x * bytesperpixel; |
|
345 |
+ ref_v += y * src_stride_v + x * bytesperpixel; |
|
346 |
+ mx &= 15; |
|
347 |
+ my &= 15; |
|
348 |
+ // FIXME bilinear filter only needs 0/1 pixels, not 3/4 |
|
349 |
+ // we use +7 because the last 7 pixels of each sbrow can be changed in |
|
350 |
+ // the longest loopfilter of the next sbrow |
|
351 |
+ th = (y + bh + 4 * !!my + 7) >> (6 - s->ss_v); |
|
352 |
+ ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0); |
|
353 |
+ // The arm/aarch64 _hv filters read one more row than what actually is |
|
354 |
+ // needed, so switch to emulated edge one pixel sooner vertically |
|
355 |
+ // (!!my * 5) than horizontally (!!mx * 4). |
|
356 |
+ if (x < !!mx * 3 || y < !!my * 3 || |
|
357 |
+ x + !!mx * 4 > w - bw || y + !!my * 5 > h - bh) { |
|
358 |
+ s->vdsp.emulated_edge_mc(s->edge_emu_buffer, |
|
359 |
+ ref_u - !!my * 3 * src_stride_u - !!mx * 3 * bytesperpixel, |
|
360 |
+ 160, src_stride_u, |
|
361 |
+ bw + !!mx * 7, bh + !!my * 7, |
|
362 |
+ x - !!mx * 3, y - !!my * 3, w, h); |
|
363 |
+ ref_u = s->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel; |
|
364 |
+ mc[!!mx][!!my](dst_u, dst_stride, ref_u, 160, bh, mx, my); |
|
365 |
+ |
|
366 |
+ s->vdsp.emulated_edge_mc(s->edge_emu_buffer, |
|
367 |
+ ref_v - !!my * 3 * src_stride_v - !!mx * 3 * bytesperpixel, |
|
368 |
+ 160, src_stride_v, |
|
369 |
+ bw + !!mx * 7, bh + !!my * 7, |
|
370 |
+ x - !!mx * 3, y - !!my * 3, w, h); |
|
371 |
+ ref_v = s->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel; |
|
372 |
+ mc[!!mx][!!my](dst_v, dst_stride, ref_v, 160, bh, mx, my); |
|
373 |
+ } else { |
|
374 |
+ mc[!!mx][!!my](dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my); |
|
375 |
+ mc[!!mx][!!my](dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my); |
|
376 |
+ } |
|
377 |
+} |
|
378 |
+ |
|
379 |
+#define mc_luma_dir(s, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, \ |
|
380 |
+ px, py, pw, ph, bw, bh, w, h, i) \ |
|
381 |
+ mc_luma_unscaled(s, s->dsp.mc, dst, dst_ls, src, src_ls, tref, row, col, \ |
|
382 |
+ mv, bw, bh, w, h, bytesperpixel) |
|
383 |
+#define mc_chroma_dir(s, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \ |
|
384 |
+ row, col, mv, px, py, pw, ph, bw, bh, w, h, i) \ |
|
385 |
+ mc_chroma_unscaled(s, s->dsp.mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \ |
|
386 |
+ row, col, mv, bw, bh, w, h, bytesperpixel) |
|
387 |
+#define SCALED 0 |
|
388 |
+#define FN(x) x##_8bpp |
|
389 |
+#define BYTES_PER_PIXEL 1 |
|
390 |
+#include "vp9_mc_template.c" |
|
391 |
+#undef FN |
|
392 |
+#undef BYTES_PER_PIXEL |
|
393 |
+#define FN(x) x##_16bpp |
|
394 |
+#define BYTES_PER_PIXEL 2 |
|
395 |
+#include "vp9_mc_template.c" |
|
396 |
+#undef mc_luma_dir |
|
397 |
+#undef mc_chroma_dir |
|
398 |
+#undef FN |
|
399 |
+#undef BYTES_PER_PIXEL |
|
400 |
+#undef SCALED |
|
401 |
+ |
|
402 |
+static av_always_inline void mc_luma_scaled(VP9Context *s, vp9_scaled_mc_func smc, |
|
403 |
+ vp9_mc_func (*mc)[2], |
|
404 |
+ uint8_t *dst, ptrdiff_t dst_stride, |
|
405 |
+ const uint8_t *ref, ptrdiff_t ref_stride, |
|
406 |
+ ThreadFrame *ref_frame, |
|
407 |
+ ptrdiff_t y, ptrdiff_t x, const VP56mv *in_mv, |
|
408 |
+ int px, int py, int pw, int ph, |
|
409 |
+ int bw, int bh, int w, int h, int bytesperpixel, |
|
410 |
+ const uint16_t *scale, const uint8_t *step) |
|
411 |
+{ |
|
412 |
+ if (s->s.frames[CUR_FRAME].tf.f->width == ref_frame->f->width && |
|
413 |
+ s->s.frames[CUR_FRAME].tf.f->height == ref_frame->f->height) { |
|
414 |
+ mc_luma_unscaled(s, mc, dst, dst_stride, ref, ref_stride, ref_frame, |
|
415 |
+ y, x, in_mv, bw, bh, w, h, bytesperpixel); |
|
416 |
+ } else { |
|
417 |
+#define scale_mv(n, dim) (((int64_t)(n) * scale[dim]) >> 14) |
|
418 |
+ int mx, my; |
|
419 |
+ int refbw_m1, refbh_m1; |
|
420 |
+ int th; |
|
421 |
+ VP56mv mv; |
|
422 |
+ |
|
423 |
+ mv.x = av_clip(in_mv->x, -(x + pw - px + 4) * 8, (s->cols * 8 - x + px + 3) * 8); |
|
424 |
+ mv.y = av_clip(in_mv->y, -(y + ph - py + 4) * 8, (s->rows * 8 - y + py + 3) * 8); |
|
425 |
+ // BUG libvpx seems to scale the two components separately. This introduces |
|
426 |
+ // rounding errors but we have to reproduce them to be exactly compatible |
|
427 |
+ // with the output from libvpx... |
|
428 |
+ mx = scale_mv(mv.x * 2, 0) + scale_mv(x * 16, 0); |
|
429 |
+ my = scale_mv(mv.y * 2, 1) + scale_mv(y * 16, 1); |
|
430 |
+ |
|
431 |
+ y = my >> 4; |
|
432 |
+ x = mx >> 4; |
|
433 |
+ ref += y * ref_stride + x * bytesperpixel; |
|
434 |
+ mx &= 15; |
|
435 |
+ my &= 15; |
|
436 |
+ refbw_m1 = ((bw - 1) * step[0] + mx) >> 4; |
|
437 |
+ refbh_m1 = ((bh - 1) * step[1] + my) >> 4; |
|
438 |
+ // FIXME bilinear filter only needs 0/1 pixels, not 3/4 |
|
439 |
+ // we use +7 because the last 7 pixels of each sbrow can be changed in |
|
440 |
+ // the longest loopfilter of the next sbrow |
|
441 |
+ th = (y + refbh_m1 + 4 + 7) >> 6; |
|
442 |
+ ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0); |
|
443 |
+ // The arm/aarch64 _hv filters read one more row than what actually is |
|
444 |
+ // needed, so switch to emulated edge one pixel sooner vertically |
|
445 |
+ // (y + 5 >= h - refbh_m1) than horizontally (x + 4 >= w - refbw_m1). |
|
446 |
+ if (x < 3 || y < 3 || x + 4 >= w - refbw_m1 || y + 5 >= h - refbh_m1) { |
|
447 |
+ s->vdsp.emulated_edge_mc(s->edge_emu_buffer, |
|
448 |
+ ref - 3 * ref_stride - 3 * bytesperpixel, |
|
449 |
+ 288, ref_stride, |
|
450 |
+ refbw_m1 + 8, refbh_m1 + 8, |
|
451 |
+ x - 3, y - 3, w, h); |
|
452 |
+ ref = s->edge_emu_buffer + 3 * 288 + 3 * bytesperpixel; |
|
453 |
+ ref_stride = 288; |
|
454 |
+ } |
|
455 |
+ smc(dst, dst_stride, ref, ref_stride, bh, mx, my, step[0], step[1]); |
|
456 |
+ } |
|
457 |
+} |
|
458 |
+ |
|
459 |
+static av_always_inline void mc_chroma_scaled(VP9Context *s, vp9_scaled_mc_func smc, |
|
460 |
+ vp9_mc_func (*mc)[2], |
|
461 |
+ uint8_t *dst_u, uint8_t *dst_v, |
|
462 |
+ ptrdiff_t dst_stride, |
|
463 |
+ const uint8_t *ref_u, ptrdiff_t src_stride_u, |
|
464 |
+ const uint8_t *ref_v, ptrdiff_t src_stride_v, |
|
465 |
+ ThreadFrame *ref_frame, |
|
466 |
+ ptrdiff_t y, ptrdiff_t x, const VP56mv *in_mv, |
|
467 |
+ int px, int py, int pw, int ph, |
|
468 |
+ int bw, int bh, int w, int h, int bytesperpixel, |
|
469 |
+ const uint16_t *scale, const uint8_t *step) |
|
470 |
+{ |
|
471 |
+ if (s->s.frames[CUR_FRAME].tf.f->width == ref_frame->f->width && |
|
472 |
+ s->s.frames[CUR_FRAME].tf.f->height == ref_frame->f->height) { |
|
473 |
+ mc_chroma_unscaled(s, mc, dst_u, dst_v, dst_stride, ref_u, src_stride_u, |
|
474 |
+ ref_v, src_stride_v, ref_frame, |
|
475 |
+ y, x, in_mv, bw, bh, w, h, bytesperpixel); |
|
476 |
+ } else { |
|
477 |
+ int mx, my; |
|
478 |
+ int refbw_m1, refbh_m1; |
|
479 |
+ int th; |
|
480 |
+ VP56mv mv; |
|
481 |
+ |
|
482 |
+ if (s->ss_h) { |
|
483 |
+ // BUG https://code.google.com/p/webm/issues/detail?id=820 |
|
484 |
+ mv.x = av_clip(in_mv->x, -(x + pw - px + 4) * 16, (s->cols * 4 - x + px + 3) * 16); |
|
485 |
+ mx = scale_mv(mv.x, 0) + (scale_mv(x * 16, 0) & ~15) + (scale_mv(x * 32, 0) & 15); |
|
486 |
+ } else { |
|
487 |
+ mv.x = av_clip(in_mv->x, -(x + pw - px + 4) * 8, (s->cols * 8 - x + px + 3) * 8); |
|
488 |
+ mx = scale_mv(mv.x * 2, 0) + scale_mv(x * 16, 0); |
|
489 |
+ } |
|
490 |
+ if (s->ss_v) { |
|
491 |
+ // BUG https://code.google.com/p/webm/issues/detail?id=820 |
|
492 |
+ mv.y = av_clip(in_mv->y, -(y + ph - py + 4) * 16, (s->rows * 4 - y + py + 3) * 16); |
|
493 |
+ my = scale_mv(mv.y, 1) + (scale_mv(y * 16, 1) & ~15) + (scale_mv(y * 32, 1) & 15); |
|
494 |
+ } else { |
|
495 |
+ mv.y = av_clip(in_mv->y, -(y + ph - py + 4) * 8, (s->rows * 8 - y + py + 3) * 8); |
|
496 |
+ my = scale_mv(mv.y * 2, 1) + scale_mv(y * 16, 1); |
|
497 |
+ } |
|
498 |
+#undef scale_mv |
|
499 |
+ y = my >> 4; |
|
500 |
+ x = mx >> 4; |
|
501 |
+ ref_u += y * src_stride_u + x * bytesperpixel; |
|
502 |
+ ref_v += y * src_stride_v + x * bytesperpixel; |
|
503 |
+ mx &= 15; |
|
504 |
+ my &= 15; |
|
505 |
+ refbw_m1 = ((bw - 1) * step[0] + mx) >> 4; |
|
506 |
+ refbh_m1 = ((bh - 1) * step[1] + my) >> 4; |
|
507 |
+ // FIXME bilinear filter only needs 0/1 pixels, not 3/4 |
|
508 |
+ // we use +7 because the last 7 pixels of each sbrow can be changed in |
|
509 |
+ // the longest loopfilter of the next sbrow |
|
510 |
+ th = (y + refbh_m1 + 4 + 7) >> (6 - s->ss_v); |
|
511 |
+ ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0); |
|
512 |
+ // The arm/aarch64 _hv filters read one more row than what actually is |
|
513 |
+ // needed, so switch to emulated edge one pixel sooner vertically |
|
514 |
+ // (y + 5 >= h - refbh_m1) than horizontally (x + 4 >= w - refbw_m1). |
|
515 |
+ if (x < 3 || y < 3 || x + 4 >= w - refbw_m1 || y + 5 >= h - refbh_m1) { |
|
516 |
+ s->vdsp.emulated_edge_mc(s->edge_emu_buffer, |
|
517 |
+ ref_u - 3 * src_stride_u - 3 * bytesperpixel, |
|
518 |
+ 288, src_stride_u, |
|
519 |
+ refbw_m1 + 8, refbh_m1 + 8, |
|
520 |
+ x - 3, y - 3, w, h); |
|
521 |
+ ref_u = s->edge_emu_buffer + 3 * 288 + 3 * bytesperpixel; |
|
522 |
+ smc(dst_u, dst_stride, ref_u, 288, bh, mx, my, step[0], step[1]); |
|
523 |
+ |
|
524 |
+ s->vdsp.emulated_edge_mc(s->edge_emu_buffer, |
|
525 |
+ ref_v - 3 * src_stride_v - 3 * bytesperpixel, |
|
526 |
+ 288, src_stride_v, |
|
527 |
+ refbw_m1 + 8, refbh_m1 + 8, |
|
528 |
+ x - 3, y - 3, w, h); |
|
529 |
+ ref_v = s->edge_emu_buffer + 3 * 288 + 3 * bytesperpixel; |
|
530 |
+ smc(dst_v, dst_stride, ref_v, 288, bh, mx, my, step[0], step[1]); |
|
531 |
+ } else { |
|
532 |
+ smc(dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my, step[0], step[1]); |
|
533 |
+ smc(dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my, step[0], step[1]); |
|
534 |
+ } |
|
535 |
+ } |
|
536 |
+} |
|
537 |
+ |
|
538 |
+#define mc_luma_dir(s, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, \ |
|
539 |
+ px, py, pw, ph, bw, bh, w, h, i) \ |
|
540 |
+ mc_luma_scaled(s, s->dsp.s##mc, s->dsp.mc, dst, dst_ls, src, src_ls, tref, row, col, \ |
|
541 |
+ mv, px, py, pw, ph, bw, bh, w, h, bytesperpixel, \ |
|
542 |
+ s->mvscale[b->ref[i]], s->mvstep[b->ref[i]]) |
|
543 |
+#define mc_chroma_dir(s, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \ |
|
544 |
+ row, col, mv, px, py, pw, ph, bw, bh, w, h, i) \ |
|
545 |
+ mc_chroma_scaled(s, s->dsp.s##mc, s->dsp.mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \ |
|
546 |
+ row, col, mv, px, py, pw, ph, bw, bh, w, h, bytesperpixel, \ |
|
547 |
+ s->mvscale[b->ref[i]], s->mvstep[b->ref[i]]) |
|
548 |
+#define SCALED 1 |
|
549 |
+#define FN(x) x##_scaled_8bpp |
|
550 |
+#define BYTES_PER_PIXEL 1 |
|
551 |
+#include "vp9_mc_template.c" |
|
552 |
+#undef FN |
|
553 |
+#undef BYTES_PER_PIXEL |
|
554 |
+#define FN(x) x##_scaled_16bpp |
|
555 |
+#define BYTES_PER_PIXEL 2 |
|
556 |
+#include "vp9_mc_template.c" |
|
557 |
+#undef mc_luma_dir |
|
558 |
+#undef mc_chroma_dir |
|
559 |
+#undef FN |
|
560 |
+#undef BYTES_PER_PIXEL |
|
561 |
+#undef SCALED |
|
562 |
+ |
|
563 |
+static av_always_inline void inter_recon(AVCodecContext *avctx, int bytesperpixel) |
|
564 |
+{ |
|
565 |
+ VP9Context *s = avctx->priv_data; |
|
566 |
+ VP9Block *b = s->b; |
|
567 |
+ int row = s->row, col = s->col; |
|
568 |
+ |
|
569 |
+ if (s->mvscale[b->ref[0]][0] || (b->comp && s->mvscale[b->ref[1]][0])) { |
|
570 |
+ if (bytesperpixel == 1) { |
|
571 |
+ inter_pred_scaled_8bpp(avctx); |
|
572 |
+ } else { |
|
573 |
+ inter_pred_scaled_16bpp(avctx); |
|
574 |
+ } |
|
575 |
+ } else { |
|
576 |
+ if (bytesperpixel == 1) { |
|
577 |
+ inter_pred_8bpp(avctx); |
|
578 |
+ } else { |
|
579 |
+ inter_pred_16bpp(avctx); |
|
580 |
+ } |
|
581 |
+ } |
|
582 |
+ |
|
583 |
+ if (!b->skip) { |
|
584 |
+ /* mostly copied intra_recon() */ |
|
585 |
+ |
|
586 |
+ int w4 = ff_vp9_bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n; |
|
587 |
+ int h4 = ff_vp9_bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2); |
|
588 |
+ int end_x = FFMIN(2 * (s->cols - col), w4); |
|
589 |
+ int end_y = FFMIN(2 * (s->rows - row), h4); |
|
590 |
+ int tx = 4 * s->s.h.lossless + b->tx, uvtx = b->uvtx + 4 * s->s.h.lossless; |
|
591 |
+ int uvstep1d = 1 << b->uvtx, p; |
|
592 |
+ uint8_t *dst = s->dst[0]; |
|
593 |
+ |
|
594 |
+ // y itxfm add |
|
595 |
+ for (n = 0, y = 0; y < end_y; y += step1d) { |
|
596 |
+ uint8_t *ptr = dst; |
|
597 |
+ for (x = 0; x < end_x; x += step1d, |
|
598 |
+ ptr += 4 * step1d * bytesperpixel, n += step) { |
|
599 |
+ int eob = b->tx > TX_8X8 ? AV_RN16A(&s->eob[n]) : s->eob[n]; |
|
600 |
+ |
|
601 |
+ if (eob) |
|
602 |
+ s->dsp.itxfm_add[tx][DCT_DCT](ptr, s->y_stride, |
|
603 |
+ s->block + 16 * n * bytesperpixel, eob); |
|
604 |
+ } |
|
605 |
+ dst += 4 * s->y_stride * step1d; |
|
606 |
+ } |
|
607 |
+ |
|
608 |
+ // uv itxfm add |
|
609 |
+ end_x >>= s->ss_h; |
|
610 |
+ end_y >>= s->ss_v; |
|
611 |
+ step = 1 << (b->uvtx * 2); |
|
612 |
+ for (p = 0; p < 2; p++) { |
|
613 |
+ dst = s->dst[p + 1]; |
|
614 |
+ for (n = 0, y = 0; y < end_y; y += uvstep1d) { |
|
615 |
+ uint8_t *ptr = dst; |
|
616 |
+ for (x = 0; x < end_x; x += uvstep1d, |
|
617 |
+ ptr += 4 * uvstep1d * bytesperpixel, n += step) { |
|
618 |
+ int eob = b->uvtx > TX_8X8 ? AV_RN16A(&s->uveob[p][n]) : s->uveob[p][n]; |
|
619 |
+ |
|
620 |
+ if (eob) |
|
621 |
+ s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, s->uv_stride, |
|
622 |
+ s->uvblock[p] + 16 * n * bytesperpixel, eob); |
|
623 |
+ } |
|
624 |
+ dst += 4 * uvstep1d * s->uv_stride; |
|
625 |
+ } |
|
626 |
+ } |
|
627 |
+ } |
|
628 |
+} |
|
629 |
+ |
|
630 |
+void ff_vp9_inter_recon_8bpp(AVCodecContext *avctx) |
|
631 |
+{ |
|
632 |
+ inter_recon(avctx, 1); |
|
633 |
+} |
|
634 |
+ |
|
635 |
+void ff_vp9_inter_recon_16bpp(AVCodecContext *avctx) |
|
636 |
+{ |
|
637 |
+ inter_recon(avctx, 2); |
|
638 |
+} |