This eliminates all aliasing violation warnings in h264 code.
No measurable speed difference with gcc-4.4.3 on i7.
Originally committed as revision 21881 to svn://svn.ffmpeg.org/ffmpeg/trunk
... | ... |
@@ -156,11 +156,11 @@ const uint8_t *ff_h264_decode_nal(H264Context *h, const uint8_t *src, int *dst_l |
156 | 156 |
# if HAVE_FAST_64BIT |
157 | 157 |
# define RS 7 |
158 | 158 |
for(i=0; i+1<length; i+=9){ |
159 |
- if(!((~*(const uint64_t*)(src+i) & (*(const uint64_t*)(src+i) - 0x0100010001000101ULL)) & 0x8000800080008080ULL)) |
|
159 |
+ if(!((~AV_RN64A(src+i) & (AV_RN64A(src+i) - 0x0100010001000101ULL)) & 0x8000800080008080ULL)) |
|
160 | 160 |
# else |
161 | 161 |
# define RS 3 |
162 | 162 |
for(i=0; i+1<length; i+=5){ |
163 |
- if(!((~*(const uint32_t*)(src+i) & (*(const uint32_t*)(src+i) - 0x01000101U)) & 0x80008080U)) |
|
163 |
+ if(!((~AV_RN32A(src+i) & (AV_RN32A(src+i) - 0x01000101U)) & 0x80008080U)) |
|
164 | 164 |
# endif |
165 | 165 |
continue; |
166 | 166 |
if(i>0 && !src[i]) i--; |
... | ... |
@@ -28,6 +28,7 @@ |
28 | 28 |
#ifndef AVCODEC_H264_H |
29 | 29 |
#define AVCODEC_H264_H |
30 | 30 |
|
31 |
+#include "libavutil/intreadwrite.h" |
|
31 | 32 |
#include "dsputil.h" |
32 | 33 |
#include "cabac.h" |
33 | 34 |
#include "mpegvideo.h" |
... | ... |
@@ -921,7 +922,7 @@ static void fill_decode_caches(H264Context *h, int mb_type){ |
921 | 921 |
*/ |
922 | 922 |
//FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec) |
923 | 923 |
if(top_type){ |
924 |
- *(uint32_t*)&h->non_zero_count_cache[4+8*0]= *(uint32_t*)&h->non_zero_count[top_xy][4+3*8]; |
|
924 |
+ AV_COPY32(&h->non_zero_count_cache[4+8*0], &h->non_zero_count[top_xy][4+3*8]); |
|
925 | 925 |
h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][1+1*8]; |
926 | 926 |
h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][2+1*8]; |
927 | 927 |
|
... | ... |
@@ -933,7 +934,7 @@ static void fill_decode_caches(H264Context *h, int mb_type){ |
933 | 933 |
|
934 | 934 |
h->non_zero_count_cache[1+8*3]= |
935 | 935 |
h->non_zero_count_cache[2+8*3]= |
936 |
- *(uint32_t*)&h->non_zero_count_cache[4+8*0]= CABAC && !IS_INTRA(mb_type) ? 0 : 0x40404040; |
|
936 |
+ AV_WN32A(&h->non_zero_count_cache[4+8*0], CABAC && !IS_INTRA(mb_type) ? 0 : 0x40404040); |
|
937 | 937 |
} |
938 | 938 |
|
939 | 939 |
for (i=0; i<2; i++) { |
... | ... |
@@ -1002,7 +1003,7 @@ static void fill_decode_caches(H264Context *h, int mb_type){ |
1002 | 1002 |
h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1]; |
1003 | 1003 |
}else{ |
1004 | 1004 |
AV_ZERO128(h->mv_cache[list][scan8[0] + 0 - 1*8]); |
1005 |
- *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101; |
|
1005 |
+ AV_WN32A(&h->ref_cache[list][scan8[0] + 0 - 1*8], ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101); |
|
1006 | 1006 |
} |
1007 | 1007 |
|
1008 | 1008 |
for(i=0; i<2; i++){ |
... | ... |
@@ -1010,13 +1011,13 @@ static void fill_decode_caches(H264Context *h, int mb_type){ |
1010 | 1010 |
if(USES_LIST(left_type[i], list)){ |
1011 | 1011 |
const int b_xy= h->mb2b_xy[left_xy[i]] + 3; |
1012 | 1012 |
const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1; |
1013 |
- *(uint32_t*)h->mv_cache[list][cache_idx ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]]; |
|
1014 |
- *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]]; |
|
1013 |
+ AV_COPY32(h->mv_cache[list][cache_idx ], s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]]); |
|
1014 |
+ AV_COPY32(h->mv_cache[list][cache_idx+8], s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]]); |
|
1015 | 1015 |
h->ref_cache[list][cache_idx ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)]; |
1016 | 1016 |
h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)]; |
1017 | 1017 |
}else{ |
1018 |
- *(uint32_t*)h->mv_cache [list][cache_idx ]= |
|
1019 |
- *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0; |
|
1018 |
+ AV_ZERO32(h->mv_cache [list][cache_idx ]); |
|
1019 |
+ AV_ZERO32(h->mv_cache [list][cache_idx+8]); |
|
1020 | 1020 |
h->ref_cache[list][cache_idx ]= |
1021 | 1021 |
h->ref_cache[list][cache_idx+8]= (left_type[i]) ? LIST_NOT_USED : PART_NOT_AVAILABLE; |
1022 | 1022 |
} |
... | ... |
@@ -1025,20 +1026,20 @@ static void fill_decode_caches(H264Context *h, int mb_type){ |
1025 | 1025 |
if(USES_LIST(topleft_type, list)){ |
1026 | 1026 |
const int b_xy = h->mb2b_xy [topleft_xy] + 3 + h->b_stride + (h->topleft_partition & 2*h->b_stride); |
1027 | 1027 |
const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + (h->topleft_partition & h->b8_stride); |
1028 |
- *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy]; |
|
1028 |
+ AV_COPY32(h->mv_cache[list][scan8[0] - 1 - 1*8], s->current_picture.motion_val[list][b_xy]); |
|
1029 | 1029 |
h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy]; |
1030 | 1030 |
}else{ |
1031 |
- *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0; |
|
1031 |
+ AV_ZERO32(h->mv_cache[list][scan8[0] - 1 - 1*8]); |
|
1032 | 1032 |
h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE; |
1033 | 1033 |
} |
1034 | 1034 |
|
1035 | 1035 |
if(USES_LIST(topright_type, list)){ |
1036 | 1036 |
const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride; |
1037 | 1037 |
const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride; |
1038 |
- *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy]; |
|
1038 |
+ AV_COPY32(h->mv_cache[list][scan8[0] + 4 - 1*8], s->current_picture.motion_val[list][b_xy]); |
|
1039 | 1039 |
h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy]; |
1040 | 1040 |
}else{ |
1041 |
- *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0; |
|
1041 |
+ AV_ZERO32(h->mv_cache [list][scan8[0] + 4 - 1*8]); |
|
1042 | 1042 |
h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE; |
1043 | 1043 |
} |
1044 | 1044 |
|
... | ... |
@@ -1051,11 +1052,11 @@ static void fill_decode_caches(H264Context *h, int mb_type){ |
1051 | 1051 |
h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else) |
1052 | 1052 |
h->ref_cache[list][scan8[4 ]] = |
1053 | 1053 |
h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE; |
1054 |
- *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]= |
|
1055 |
- *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]= |
|
1056 |
- *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else) |
|
1057 |
- *(uint32_t*)h->mv_cache [list][scan8[4 ]]= |
|
1058 |
- *(uint32_t*)h->mv_cache [list][scan8[12]]= 0; |
|
1054 |
+ AV_ZERO32(h->mv_cache [list][scan8[5 ]+1]); |
|
1055 |
+ AV_ZERO32(h->mv_cache [list][scan8[7 ]+1]); |
|
1056 |
+ AV_ZERO32(h->mv_cache [list][scan8[13]+1]); //FIXME remove past 3 (init somewhere else) |
|
1057 |
+ AV_ZERO32(h->mv_cache [list][scan8[4 ]]); |
|
1058 |
+ AV_ZERO32(h->mv_cache [list][scan8[12]]); |
|
1059 | 1059 |
|
1060 | 1060 |
if( CABAC ) { |
1061 | 1061 |
/* XXX beurk, Load mvd */ |
... | ... |
@@ -1067,37 +1068,37 @@ static void fill_decode_caches(H264Context *h, int mb_type){ |
1067 | 1067 |
} |
1068 | 1068 |
if(USES_LIST(left_type[0], list)){ |
1069 | 1069 |
const int b_xy= h->mb2b_xy[left_xy[0]] + 3; |
1070 |
- *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]]; |
|
1071 |
- *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]]; |
|
1070 |
+ AV_COPY32(h->mvd_cache[list][scan8[0] - 1 + 0*8], h->mvd_table[list][b_xy + h->b_stride*left_block[0]]); |
|
1071 |
+ AV_COPY32(h->mvd_cache[list][scan8[0] - 1 + 1*8], h->mvd_table[list][b_xy + h->b_stride*left_block[1]]); |
|
1072 | 1072 |
}else{ |
1073 |
- *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]= |
|
1074 |
- *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0; |
|
1073 |
+ AV_ZERO32(h->mvd_cache [list][scan8[0] - 1 + 0*8]); |
|
1074 |
+ AV_ZERO32(h->mvd_cache [list][scan8[0] - 1 + 1*8]); |
|
1075 | 1075 |
} |
1076 | 1076 |
if(USES_LIST(left_type[1], list)){ |
1077 | 1077 |
const int b_xy= h->mb2b_xy[left_xy[1]] + 3; |
1078 |
- *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]]; |
|
1079 |
- *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]]; |
|
1078 |
+ AV_COPY32(h->mvd_cache[list][scan8[0] - 1 + 2*8], h->mvd_table[list][b_xy + h->b_stride*left_block[2]]); |
|
1079 |
+ AV_COPY32(h->mvd_cache[list][scan8[0] - 1 + 3*8], h->mvd_table[list][b_xy + h->b_stride*left_block[3]]); |
|
1080 | 1080 |
}else{ |
1081 |
- *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]= |
|
1082 |
- *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0; |
|
1081 |
+ AV_ZERO32(h->mvd_cache [list][scan8[0] - 1 + 2*8]); |
|
1082 |
+ AV_ZERO32(h->mvd_cache [list][scan8[0] - 1 + 3*8]); |
|
1083 | 1083 |
} |
1084 |
- *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]= |
|
1085 |
- *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]= |
|
1086 |
- *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else) |
|
1087 |
- *(uint32_t*)h->mvd_cache [list][scan8[4 ]]= |
|
1088 |
- *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0; |
|
1084 |
+ AV_ZERO32(h->mvd_cache [list][scan8[5 ]+1]); |
|
1085 |
+ AV_ZERO32(h->mvd_cache [list][scan8[7 ]+1]); |
|
1086 |
+ AV_ZERO32(h->mvd_cache [list][scan8[13]+1]); //FIXME remove past 3 (init somewhere else) |
|
1087 |
+ AV_ZERO32(h->mvd_cache [list][scan8[4 ]]); |
|
1088 |
+ AV_ZERO32(h->mvd_cache [list][scan8[12]]); |
|
1089 | 1089 |
|
1090 | 1090 |
if(h->slice_type_nos == FF_B_TYPE){ |
1091 | 1091 |
fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, MB_TYPE_16x16>>1, 1); |
1092 | 1092 |
|
1093 | 1093 |
if(IS_DIRECT(top_type)){ |
1094 |
- *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101*(MB_TYPE_DIRECT2>>1); |
|
1094 |
+ AV_WN32A(&h->direct_cache[scan8[0] - 1*8], 0x01010101*(MB_TYPE_DIRECT2>>1)); |
|
1095 | 1095 |
}else if(IS_8X8(top_type)){ |
1096 | 1096 |
int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride; |
1097 | 1097 |
h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy]; |
1098 | 1098 |
h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1]; |
1099 | 1099 |
}else{ |
1100 |
- *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101*(MB_TYPE_16x16>>1); |
|
1100 |
+ AV_WN32A(&h->direct_cache[scan8[0] - 1*8], 0x01010101*(MB_TYPE_16x16>>1)); |
|
1101 | 1101 |
} |
1102 | 1102 |
|
1103 | 1103 |
if(IS_DIRECT(left_type[0])) |
... | ... |
@@ -1223,8 +1224,8 @@ static int fill_filter_caches(H264Context *h, int mb_type){ |
1223 | 1223 |
|
1224 | 1224 |
AV_COPY64(&h->non_zero_count_cache[0+8*1], &h->non_zero_count[mb_xy][ 0]); |
1225 | 1225 |
AV_COPY64(&h->non_zero_count_cache[0+8*2], &h->non_zero_count[mb_xy][ 8]); |
1226 |
- *((uint32_t*)&h->non_zero_count_cache[0+8*5])= *((uint32_t*)&h->non_zero_count[mb_xy][16]); |
|
1227 |
- *((uint32_t*)&h->non_zero_count_cache[4+8*3])= *((uint32_t*)&h->non_zero_count[mb_xy][20]); |
|
1226 |
+ AV_COPY32(&h->non_zero_count_cache[0+8*5], &h->non_zero_count[mb_xy][16]); |
|
1227 |
+ AV_COPY32(&h->non_zero_count_cache[4+8*3], &h->non_zero_count[mb_xy][20]); |
|
1228 | 1228 |
AV_COPY64(&h->non_zero_count_cache[0+8*4], &h->non_zero_count[mb_xy][24]); |
1229 | 1229 |
|
1230 | 1230 |
h->cbp= h->cbp_table[mb_xy]; |
... | ... |
@@ -1239,21 +1240,21 @@ static int fill_filter_caches(H264Context *h, int mb_type){ |
1239 | 1239 |
|
1240 | 1240 |
if(!USES_LIST(mb_type, list)){ |
1241 | 1241 |
fill_rectangle( h->mv_cache[list][scan8[0]], 4, 4, 8, pack16to32(0,0), 4); |
1242 |
- *(uint32_t*)&h->ref_cache[list][scan8[ 0]] = |
|
1243 |
- *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = |
|
1244 |
- *(uint32_t*)&h->ref_cache[list][scan8[ 8]] = |
|
1245 |
- *(uint32_t*)&h->ref_cache[list][scan8[10]] = ((LIST_NOT_USED)&0xFF)*0x01010101U; |
|
1242 |
+ AV_WN32A(&h->ref_cache[list][scan8[ 0]], ((LIST_NOT_USED)&0xFF)*0x01010101u); |
|
1243 |
+ AV_WN32A(&h->ref_cache[list][scan8[ 2]], ((LIST_NOT_USED)&0xFF)*0x01010101u); |
|
1244 |
+ AV_WN32A(&h->ref_cache[list][scan8[ 8]], ((LIST_NOT_USED)&0xFF)*0x01010101u); |
|
1245 |
+ AV_WN32A(&h->ref_cache[list][scan8[10]], ((LIST_NOT_USED)&0xFF)*0x01010101u); |
|
1246 | 1246 |
continue; |
1247 | 1247 |
} |
1248 | 1248 |
|
1249 | 1249 |
ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]]; |
1250 | 1250 |
{ |
1251 | 1251 |
int (*ref2frm)[64] = h->ref2frm[ h->slice_num&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2); |
1252 |
- *(uint32_t*)&h->ref_cache[list][scan8[ 0]] = |
|
1253 |
- *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101; |
|
1252 |
+ AV_WN32A(&h->ref_cache[list][scan8[ 0]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101); |
|
1253 |
+ AV_WN32A(&h->ref_cache[list][scan8[ 2]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101); |
|
1254 | 1254 |
ref += h->b8_stride; |
1255 |
- *(uint32_t*)&h->ref_cache[list][scan8[ 8]] = |
|
1256 |
- *(uint32_t*)&h->ref_cache[list][scan8[10]] = (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101; |
|
1255 |
+ AV_WN32A(&h->ref_cache[list][scan8[ 8]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101); |
|
1256 |
+ AV_WN32A(&h->ref_cache[list][scan8[10]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101); |
|
1257 | 1257 |
} |
1258 | 1258 |
|
1259 | 1259 |
b_stride = h->b_stride; |
... | ... |
@@ -1277,7 +1278,7 @@ static int fill_filter_caches(H264Context *h, int mb_type){ |
1277 | 1277 |
*/ |
1278 | 1278 |
//FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec) |
1279 | 1279 |
if(top_type){ |
1280 |
- *(uint32_t*)&h->non_zero_count_cache[4+8*0]= *(uint32_t*)&h->non_zero_count[top_xy][4+3*8]; |
|
1280 |
+ AV_COPY32(&h->non_zero_count_cache[4+8*0], &h->non_zero_count[top_xy][4+3*8]); |
|
1281 | 1281 |
} |
1282 | 1282 |
|
1283 | 1283 |
if(left_type[0]){ |
... | ... |
@@ -1333,7 +1334,7 @@ static int fill_filter_caches(H264Context *h, int mb_type){ |
1333 | 1333 |
h->ref_cache[list][scan8[0] + 3 - 1*8]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 1]]; |
1334 | 1334 |
}else{ |
1335 | 1335 |
AV_ZERO128(h->mv_cache[list][scan8[0] + 0 - 1*8]); |
1336 |
- *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((LIST_NOT_USED)&0xFF)*0x01010101U; |
|
1336 |
+ AV_WN32A(&h->ref_cache[list][scan8[0] + 0 - 1*8], ((LIST_NOT_USED)&0xFF)*0x01010101u); |
|
1337 | 1337 |
} |
1338 | 1338 |
|
1339 | 1339 |
if(!IS_INTERLACED(mb_type^left_type[0])){ |
... | ... |
@@ -1341,19 +1342,19 @@ static int fill_filter_caches(H264Context *h, int mb_type){ |
1341 | 1341 |
const int b_xy= h->mb2b_xy[left_xy[0]] + 3; |
1342 | 1342 |
const int b8_xy= h->mb2b8_xy[left_xy[0]] + 1; |
1343 | 1343 |
int (*ref2frm)[64] = h->ref2frm[ h->slice_table[left_xy[0]]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2); |
1344 |
- *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 0 ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*0]; |
|
1345 |
- *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 8 ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*1]; |
|
1346 |
- *(uint32_t*)h->mv_cache[list][scan8[0] - 1 +16 ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*2]; |
|
1347 |
- *(uint32_t*)h->mv_cache[list][scan8[0] - 1 +24 ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*3]; |
|
1344 |
+ AV_COPY32(h->mv_cache[list][scan8[0] - 1 + 0 ], s->current_picture.motion_val[list][b_xy + h->b_stride*0]); |
|
1345 |
+ AV_COPY32(h->mv_cache[list][scan8[0] - 1 + 8 ], s->current_picture.motion_val[list][b_xy + h->b_stride*1]); |
|
1346 |
+ AV_COPY32(h->mv_cache[list][scan8[0] - 1 +16 ], s->current_picture.motion_val[list][b_xy + h->b_stride*2]); |
|
1347 |
+ AV_COPY32(h->mv_cache[list][scan8[0] - 1 +24 ], s->current_picture.motion_val[list][b_xy + h->b_stride*3]); |
|
1348 | 1348 |
h->ref_cache[list][scan8[0] - 1 + 0 ]= |
1349 | 1349 |
h->ref_cache[list][scan8[0] - 1 + 8 ]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + h->b8_stride*0]]; |
1350 | 1350 |
h->ref_cache[list][scan8[0] - 1 +16 ]= |
1351 | 1351 |
h->ref_cache[list][scan8[0] - 1 +24 ]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + h->b8_stride*1]]; |
1352 | 1352 |
}else{ |
1353 |
- *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 0 ]= |
|
1354 |
- *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 8 ]= |
|
1355 |
- *(uint32_t*)h->mv_cache [list][scan8[0] - 1 +16 ]= |
|
1356 |
- *(uint32_t*)h->mv_cache [list][scan8[0] - 1 +24 ]= 0; |
|
1353 |
+ AV_ZERO32(h->mv_cache [list][scan8[0] - 1 + 0 ]); |
|
1354 |
+ AV_ZERO32(h->mv_cache [list][scan8[0] - 1 + 8 ]); |
|
1355 |
+ AV_ZERO32(h->mv_cache [list][scan8[0] - 1 +16 ]); |
|
1356 |
+ AV_ZERO32(h->mv_cache [list][scan8[0] - 1 +24 ]); |
|
1357 | 1357 |
h->ref_cache[list][scan8[0] - 1 + 0 ]= |
1358 | 1358 |
h->ref_cache[list][scan8[0] - 1 + 8 ]= |
1359 | 1359 |
h->ref_cache[list][scan8[0] - 1 + 16 ]= |
... | ... |
@@ -1386,8 +1387,8 @@ static inline void write_back_non_zero_count(H264Context *h){ |
1386 | 1386 |
|
1387 | 1387 |
AV_COPY64(&h->non_zero_count[mb_xy][ 0], &h->non_zero_count_cache[0+8*1]); |
1388 | 1388 |
AV_COPY64(&h->non_zero_count[mb_xy][ 8], &h->non_zero_count_cache[0+8*2]); |
1389 |
- *((uint32_t*)&h->non_zero_count[mb_xy][16]) = *((uint32_t*)&h->non_zero_count_cache[0+8*5]); |
|
1390 |
- *((uint32_t*)&h->non_zero_count[mb_xy][20]) = *((uint32_t*)&h->non_zero_count_cache[4+8*3]); |
|
1389 |
+ AV_COPY32(&h->non_zero_count[mb_xy][16], &h->non_zero_count_cache[0+8*5]); |
|
1390 |
+ AV_COPY32(&h->non_zero_count[mb_xy][20], &h->non_zero_count_cache[4+8*3]); |
|
1391 | 1391 |
AV_COPY64(&h->non_zero_count[mb_xy][24], &h->non_zero_count_cache[0+8*4]); |
1392 | 1392 |
} |
1393 | 1393 |
|
... | ... |
@@ -1446,9 +1447,9 @@ static inline void write_back_motion(H264Context *h, int mb_type){ |
1446 | 1446 |
|
1447 | 1447 |
static inline int get_dct8x8_allowed(H264Context *h){ |
1448 | 1448 |
if(h->sps.direct_8x8_inference_flag) |
1449 |
- return !(*(uint64_t*)h->sub_mb_type & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8 )*0x0001000100010001ULL)); |
|
1449 |
+ return !(AV_RN64A(h->sub_mb_type) & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8 )*0x0001000100010001ULL)); |
|
1450 | 1450 |
else |
1451 |
- return !(*(uint64_t*)h->sub_mb_type & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8|MB_TYPE_DIRECT2)*0x0001000100010001ULL)); |
|
1451 |
+ return !(AV_RN64A(h->sub_mb_type) & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8|MB_TYPE_DIRECT2)*0x0001000100010001ULL)); |
|
1452 | 1452 |
} |
1453 | 1453 |
|
1454 | 1454 |
/** |
... | ... |
@@ -183,11 +183,11 @@ static void pred_spatial_direct_motion(H264Context * const h, int *mb_type){ |
183 | 183 |
}else { |
184 | 184 |
assert(match_count==1); |
185 | 185 |
if(left_ref==ref[list]){ |
186 |
- mv[list]= *(uint32_t*)A; |
|
186 |
+ mv[list]= AV_RN32A(A); |
|
187 | 187 |
}else if(top_ref==ref[list]){ |
188 |
- mv[list]= *(uint32_t*)B; |
|
188 |
+ mv[list]= AV_RN32A(B); |
|
189 | 189 |
}else{ |
190 |
- mv[list]= *(uint32_t*)C; |
|
190 |
+ mv[list]= AV_RN32A(C); |
|
191 | 191 |
} |
192 | 192 |
} |
193 | 193 |
}else{ |
... | ... |
@@ -362,9 +362,9 @@ single_col: |
362 | 362 |
const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride]; |
363 | 363 |
if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){ |
364 | 364 |
if(ref[0] == 0) |
365 |
- *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0; |
|
365 |
+ AV_ZERO32(h->mv_cache[0][scan8[i8*4+i4]]); |
|
366 | 366 |
if(ref[1] == 0) |
367 |
- *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0; |
|
367 |
+ AV_ZERO32(h->mv_cache[1][scan8[i8*4+i4]]); |
|
368 | 368 |
m++; |
369 | 369 |
} |
370 | 370 |
} |
... | ... |
@@ -571,8 +571,8 @@ single_col: |
571 | 571 |
int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]]; |
572 | 572 |
mv_l0[0] = (scale * mv_col[0] + 128) >> 8; |
573 | 573 |
mv_l0[1] = (scale * mv_col[1] + 128) >> 8; |
574 |
- *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = |
|
575 |
- pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]); |
|
574 |
+ AV_WN32A(h->mv_cache[1][scan8[i8*4+i4]], |
|
575 |
+ pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1])); |
|
576 | 576 |
} |
577 | 577 |
} |
578 | 578 |
} |
... | ... |
@@ -25,6 +25,7 @@ |
25 | 25 |
* @author Michael Niedermayer <michaelni@gmx.at> |
26 | 26 |
*/ |
27 | 27 |
|
28 |
+#include "libavutil/intreadwrite.h" |
|
28 | 29 |
#include "internal.h" |
29 | 30 |
#include "dsputil.h" |
30 | 31 |
#include "avcodec.h" |
... | ... |
@@ -368,11 +369,13 @@ void ff_h264_filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, |
368 | 368 |
return; |
369 | 369 |
} else { |
370 | 370 |
LOCAL_ALIGNED_8(int16_t, bS, [2], [4][4]); |
371 |
- uint64_t (*bSv)[4] = (uint64_t(*)[4])bS; |
|
372 | 371 |
int edges; |
373 | 372 |
if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 ) { |
374 | 373 |
edges = 4; |
375 |
- bSv[0][0] = bSv[0][2] = bSv[1][0] = bSv[1][2] = 0x0002000200020002ULL; |
|
374 |
+ AV_WN64A(bS[0][0], 0x0002000200020002ULL); |
|
375 |
+ AV_WN64A(bS[0][2], 0x0002000200020002ULL); |
|
376 |
+ AV_WN64A(bS[1][0], 0x0002000200020002ULL); |
|
377 |
+ AV_WN64A(bS[1][2], 0x0002000200020002ULL); |
|
376 | 378 |
} else { |
377 | 379 |
int mask_edge1 = (3*(((5*mb_type)>>5)&1)) | (mb_type>>4); //(mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 : (mb_type & MB_TYPE_16x8) ? 1 : 0; |
378 | 380 |
int mask_edge0 = 3*((mask_edge1>>1) & ((5*left_type)>>5)&1); // (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) && (h->left_type[0] & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 : 0; |
... | ... |
@@ -382,12 +385,12 @@ void ff_h264_filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, |
382 | 382 |
h->list_count==2, edges, step, mask_edge0, mask_edge1, FIELD_PICTURE); |
383 | 383 |
} |
384 | 384 |
if( IS_INTRA(left_type) ) |
385 |
- bSv[0][0] = 0x0004000400040004ULL; |
|
385 |
+ AV_WN64A(bS[0][0], 0x0004000400040004ULL); |
|
386 | 386 |
if( IS_INTRA(h->top_type) ) |
387 |
- bSv[1][0] = FIELD_PICTURE ? 0x0003000300030003ULL : 0x0004000400040004ULL; |
|
387 |
+ AV_WN64A(bS[1][0], FIELD_PICTURE ? 0x0003000300030003ULL : 0x0004000400040004ULL); |
|
388 | 388 |
|
389 | 389 |
#define FILTER(hv,dir,edge)\ |
390 |
- if(bSv[dir][edge]) {\ |
|
390 |
+ if(AV_RN64A(bS[dir][edge])) { \ |
|
391 | 391 |
filter_mb_edge##hv( &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir, h );\ |
392 | 392 |
if(!(edge&1)) {\ |
393 | 393 |
filter_mb_edgec##hv( &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir, h );\ |
... | ... |
@@ -477,7 +480,7 @@ static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, u |
477 | 477 |
DECLARE_ALIGNED_8(int16_t, bS)[4]; |
478 | 478 |
int qp; |
479 | 479 |
if( IS_INTRA(mb_type|s->current_picture.mb_type[mbn_xy]) ) { |
480 |
- *(uint64_t*)bS= 0x0003000300030003ULL; |
|
480 |
+ AV_WN64A(bS, 0x0003000300030003ULL); |
|
481 | 481 |
} else { |
482 | 482 |
if(!CABAC && IS_8x8DCT(s->current_picture.mb_type[mbn_xy])){ |
483 | 483 |
bS[0]= 1+((h->cbp_table[mbn_xy] & 4)||h->non_zero_count_cache[scan8[0]+0]); |
... | ... |
@@ -508,17 +511,17 @@ static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, u |
508 | 508 |
int qp; |
509 | 509 |
|
510 | 510 |
if( IS_INTRA(mb_type|mbm_type)) { |
511 |
- *(uint64_t*)bS= 0x0003000300030003ULL; |
|
511 |
+ AV_WN64A(bS, 0x0003000300030003ULL); |
|
512 | 512 |
if ( (!IS_INTERLACED(mb_type|mbm_type)) |
513 | 513 |
|| ((FRAME_MBAFF || (s->picture_structure != PICT_FRAME)) && (dir == 0)) |
514 | 514 |
) |
515 |
- *(uint64_t*)bS= 0x0004000400040004ULL; |
|
515 |
+ AV_WN64A(bS, 0x0004000400040004ULL); |
|
516 | 516 |
} else { |
517 | 517 |
int i; |
518 | 518 |
int mv_done; |
519 | 519 |
|
520 | 520 |
if( dir && FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbm_type)) { |
521 |
- *(uint64_t*)bS= 0x0001000100010001ULL; |
|
521 |
+ AV_WN64A(bS, 0x0001000100010001ULL); |
|
522 | 522 |
mv_done = 1; |
523 | 523 |
} |
524 | 524 |
else if( mask_par0 && ((mbm_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) { |
... | ... |
@@ -588,13 +591,13 @@ static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, u |
588 | 588 |
continue; |
589 | 589 |
|
590 | 590 |
if( IS_INTRA(mb_type)) { |
591 |
- *(uint64_t*)bS= 0x0003000300030003ULL; |
|
591 |
+ AV_WN64A(bS, 0x0003000300030003ULL); |
|
592 | 592 |
} else { |
593 | 593 |
int i; |
594 | 594 |
int mv_done; |
595 | 595 |
|
596 | 596 |
if( edge & mask_edge ) { |
597 |
- *(uint64_t*)bS= 0; |
|
597 |
+ AV_ZERO64(bS); |
|
598 | 598 |
mv_done = 1; |
599 | 599 |
} |
600 | 600 |
else if( mask_par0 ) { |
... | ... |
@@ -674,10 +677,10 @@ void ff_h264_filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint |
674 | 674 |
int i; |
675 | 675 |
first_vertical_edge_done = 1; |
676 | 676 |
|
677 |
- if( IS_INTRA(mb_type) ) |
|
678 |
- *(uint64_t*)&bS[0]= |
|
679 |
- *(uint64_t*)&bS[4]= 0x0004000400040004ULL; |
|
680 |
- else { |
|
677 |
+ if( IS_INTRA(mb_type) ) { |
|
678 |
+ AV_WN64A(&bS[0], 0x0004000400040004ULL); |
|
679 |
+ AV_WN64A(&bS[4], 0x0004000400040004ULL); |
|
680 |
+ } else { |
|
681 | 681 |
static const uint8_t offset[2][2][8]={ |
682 | 682 |
{ |
683 | 683 |
{7+8*0, 7+8*0, 7+8*0, 7+8*0, 7+8*1, 7+8*1, 7+8*1, 7+8*1}, |
... | ... |
@@ -58,7 +58,7 @@ static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, in |
58 | 58 |
&& h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){ |
59 | 59 |
const uint32_t *mb_types = s->current_picture_ptr->mb_type; |
60 | 60 |
const int16_t *mv; |
61 |
- *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0; |
|
61 |
+ AV_ZERO32(h->mv_cache[list][scan8[0]-2]); |
|
62 | 62 |
*C = h->mv_cache[list][scan8[0]-2]; |
63 | 63 |
|
64 | 64 |
if(!MB_FIELD |
... | ... |
@@ -220,8 +220,8 @@ static inline void pred_pskip_motion(H264Context * const h, int * const mx, int |
220 | 220 |
tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y); |
221 | 221 |
|
222 | 222 |
if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE |
223 |
- || !( top_ref | *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ]) |
|
224 |
- || !(left_ref | *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ])){ |
|
223 |
+ || !( top_ref | AV_RN32A(h->mv_cache[0][ scan8[0] - 8 ])) |
|
224 |
+ || !(left_ref | AV_RN32A(h->mv_cache[0][ scan8[0] - 1 ]))){ |
|
225 | 225 |
|
226 | 226 |
*mx = *my = 0; |
227 | 227 |
return; |