Browse code

Increase alignment of av_malloc() as needed by AVX ASM.

Signed-off-by: Reinhard Tartler <siretart@tauware.de>

Vitor Sessak authored on 2011/04/24 02:24:31
Showing 1 changed files
... ...
@@ -69,21 +69,21 @@ void *av_malloc(size_t size)
69 69
 #endif
70 70
 
71 71
     /* let's disallow possible ambiguous cases */
72
-    if(size > (INT_MAX-16) )
72
+    if(size > (INT_MAX-32) )
73 73
         return NULL;
74 74
 
75 75
 #if CONFIG_MEMALIGN_HACK
76
-    ptr = malloc(size+16);
76
+    ptr = malloc(size+32);
77 77
     if(!ptr)
78 78
         return ptr;
79
-    diff= ((-(long)ptr - 1)&15) + 1;
79
+    diff= ((-(long)ptr - 1)&31) + 1;
80 80
     ptr = (char*)ptr + diff;
81 81
     ((char*)ptr)[-1]= diff;
82 82
 #elif HAVE_POSIX_MEMALIGN
83
-    if (posix_memalign(&ptr,16,size))
83
+    if (posix_memalign(&ptr,32,size))
84 84
         ptr = NULL;
85 85
 #elif HAVE_MEMALIGN
86
-    ptr = memalign(16,size);
86
+    ptr = memalign(32,size);
87 87
     /* Why 64?
88 88
        Indeed, we should align it:
89 89
          on 4 for 386
... ...
@@ -93,10 +93,8 @@ void *av_malloc(size_t size)
93 93
        Because L1 and L2 caches are aligned on those values.
94 94
        But I don't want to code such logic here!
95 95
      */
96
-     /* Why 16?
97
-        Because some CPUs need alignment, for example SSE2 on P4, & most RISC CPUs
98
-        it will just trigger an exception and the unaligned load will be done in the
99
-        exception handler or it will just segfault (SSE2 on P4).
96
+     /* Why 32?
97
+        For AVX ASM. SSE / NEON needs only 16.
100 98
         Why not larger? Because I did not see a difference in benchmarks ...
101 99
      */
102 100
      /* benchmarks with P3