From ~780 cycles to 551 cycles, mostly just by using libc memcpy()
instead of manually shuffling individual bytes around.
| ... | ... |
@@ -355,38 +355,45 @@ void ff_emulated_edge_mc(uint8_t *buf, const uint8_t *src, int linesize, int blo |
| 355 | 355 |
start_x= FFMAX(0, -src_x); |
| 356 | 356 |
end_y= FFMIN(block_h, h-src_y); |
| 357 | 357 |
end_x= FFMIN(block_w, w-src_x); |
| 358 |
+ assert(start_y < end_y && block_h); |
|
| 359 |
+ assert(start_x < end_x && block_w); |
|
| 358 | 360 |
|
| 359 |
- // copy existing part |
|
| 360 |
- for(y=start_y; y<end_y; y++){
|
|
| 361 |
- for(x=start_x; x<end_x; x++){
|
|
| 362 |
- buf[x + y*linesize]= src[x + y*linesize]; |
|
| 363 |
- } |
|
| 364 |
- } |
|
| 361 |
+ w = end_x - start_x; |
|
| 362 |
+ src += start_y*linesize + start_x; |
|
| 363 |
+ buf += start_x; |
|
| 365 | 364 |
|
| 366 | 365 |
//top |
| 367 | 366 |
for(y=0; y<start_y; y++){
|
| 368 |
- for(x=start_x; x<end_x; x++){
|
|
| 369 |
- buf[x + y*linesize]= buf[x + start_y*linesize]; |
|
| 370 |
- } |
|
| 367 |
+ memcpy(buf, src, w); |
|
| 368 |
+ buf += linesize; |
|
| 369 |
+ } |
|
| 370 |
+ |
|
| 371 |
+ // copy existing part |
|
| 372 |
+ for(; y<end_y; y++){
|
|
| 373 |
+ memcpy(buf, src, w); |
|
| 374 |
+ src += linesize; |
|
| 375 |
+ buf += linesize; |
|
| 371 | 376 |
} |
| 372 | 377 |
|
| 373 | 378 |
//bottom |
| 374 |
- for(y=end_y; y<block_h; y++){
|
|
| 375 |
- for(x=start_x; x<end_x; x++){
|
|
| 376 |
- buf[x + y*linesize]= buf[x + (end_y-1)*linesize]; |
|
| 377 |
- } |
|
| 379 |
+ src -= linesize; |
|
| 380 |
+ for(; y<block_h; y++){
|
|
| 381 |
+ memcpy(buf, src, w); |
|
| 382 |
+ buf += linesize; |
|
| 378 | 383 |
} |
| 379 | 384 |
|
| 380 |
- for(y=0; y<block_h; y++){
|
|
| 385 |
+ buf -= block_h * linesize + start_x; |
|
| 386 |
+ while (block_h--){
|
|
| 381 | 387 |
//left |
| 382 | 388 |
for(x=0; x<start_x; x++){
|
| 383 |
- buf[x + y*linesize]= buf[start_x + y*linesize]; |
|
| 389 |
+ buf[x] = buf[start_x]; |
|
| 384 | 390 |
} |
| 385 | 391 |
|
| 386 | 392 |
//right |
| 387 | 393 |
for(x=end_x; x<block_w; x++){
|
| 388 |
- buf[x + y*linesize]= buf[end_x - 1 + y*linesize]; |
|
| 394 |
+ buf[x] = buf[end_x - 1]; |
|
| 389 | 395 |
} |
| 396 |
+ buf += linesize; |
|
| 390 | 397 |
} |
| 391 | 398 |
} |
| 392 | 399 |
|