@@ -135,6 +135,13 @@ typedef struct _bigval_t {
135135 size_t sz ;
136136 uintptr_t age : 2 ;
137137 };
138+ #ifdef _P64 // Add padding so that char data[] below is 64-byte aligned
139+ // (8 pointers of 8 bytes each) - (4 other pointers in struct)
140+ void * _padding [8 - 4 ];
141+ #else
142+ // (16 pointers of 4 bytes each) - (4 other pointers in struct)
143+ void * _padding [16 - 4 ];
144+ #endif
138145 //struct buff_t <>;
139146 union {
140147 uintptr_t header ;
@@ -146,7 +153,7 @@ typedef struct _bigval_t {
146153#if !defined(_COMPILER_MICROSOFT_ )
147154 int _dummy [0 ];
148155#endif
149- // must be 16- aligned here, in 32 & 64b
156+ // must be 64-byte aligned here, in 32 & 64 bit modes
150157 char data [];
151158} bigval_t ;
152159
@@ -171,7 +178,7 @@ typedef struct _pool_t {
171178
172179#define GC_PAGE_LG2 14 // log2(size of a page)
173180#define GC_PAGE_SZ (1 << GC_PAGE_LG2) // 16k
174- #define GC_PAGE_OFFSET (16 - (sizeof_jl_taggedvalue_t % 16 ))
181+ #define GC_PAGE_OFFSET (JL_SMALL_BYTE_ALIGNMENT - (sizeof_jl_taggedvalue_t % JL_SMALL_BYTE_ALIGNMENT ))
175182
176183// pool page metadata
177184typedef struct _gcpage_t {
@@ -437,15 +444,8 @@ static int jl_gc_finalizers_inhibited; // don't run finalizers during codegen #1
437444
438445// malloc wrappers, aligned allocation
439446
440- #if defined(_P64 ) || defined(__APPLE__ )
441- #define malloc_a16 (sz ) malloc(sz)
442- #define realloc_a16 (p , sz , oldsz ) realloc((p), (sz))
443- #define free_a16 (p ) free(p)
444- #else
445- #define malloc_a16 (sz ) jl_malloc_aligned(sz, 16)
446- #define realloc_a16 (p , sz , oldsz ) jl_realloc_aligned(p, sz, oldsz, 16)
447- #define free_a16 (p ) jl_free_aligned(p)
448- #endif
447+ #define malloc_cache_align (sz ) jl_malloc_aligned(sz, JL_CACHE_BYTE_ALIGNMENT)
448+ #define realloc_cache_align (p , sz , oldsz ) jl_realloc_aligned(p, sz, oldsz, JL_CACHE_BYTE_ALIGNMENT)
449449
450450static void schedule_finalization (void * o , void * f )
451451{
@@ -1011,10 +1011,10 @@ static NOINLINE void *alloc_big(size_t sz)
10111011{
10121012 maybe_collect ();
10131013 size_t offs = offsetof(bigval_t , header );
1014- size_t allocsz = LLT_ALIGN (sz + offs , 16 );
1014+ size_t allocsz = LLT_ALIGN (sz + offs , JL_CACHE_BYTE_ALIGNMENT );
10151015 if (allocsz < sz ) // overflow in adding offs, size was "negative"
10161016 jl_throw (jl_memory_exception );
1017- bigval_t * v = (bigval_t * )malloc_a16 (allocsz );
1017+ bigval_t * v = (bigval_t * )malloc_cache_align (allocsz );
10181018 if (v == NULL )
10191019 jl_throw (jl_memory_exception );
10201020 jl_atomic_fetch_add (& allocd_bytes , allocsz );
@@ -1074,7 +1074,7 @@ static bigval_t **sweep_big_list(int sweep_mask, bigval_t **pv)
10741074#ifdef MEMDEBUG
10751075 memset (v , 0xbb , v -> sz & ~3 );
10761076#endif
1077- free_a16 (v );
1077+ jl_free_aligned (v );
10781078 big_freed ++ ;
10791079 }
10801080 big_total ++ ;
@@ -1141,7 +1141,7 @@ static void jl_gc_free_array(jl_array_t *a)
11411141 if (a -> flags .how == 2 ) {
11421142 char * d = (char * )a -> data - a -> offset * a -> elsize ;
11431143 if (a -> flags .isaligned )
1144- free_a16 (d );
1144+ jl_free_aligned (d );
11451145 else
11461146 free (d );
11471147 freed_bytes += array_nbytes (a );
@@ -2500,7 +2500,7 @@ void *reallocb(void *b, size_t sz)
25002500 if (allocsz < sz) // overflow in adding offs, size was "negative"
25012501 jl_throw(jl_memory_exception);
25022502 bigval_t *bv = bigval_header(buff);
2503- bv = (bigval_t*)realloc_a16 (bv, allocsz, bv->sz&~3);
2503+ bv = (bigval_t*)realloc_cache_align (bv, allocsz, bv->sz&~3);
25042504 if (bv == NULL)
25052505 jl_throw(jl_memory_exception);
25062506 return &bv->data[0];
@@ -2539,7 +2539,7 @@ JL_DLLEXPORT jl_value_t *jl_gc_alloc_0w(void)
25392539
25402540JL_DLLEXPORT jl_value_t * jl_gc_alloc_1w (void )
25412541{
2542- const int sz = LLT_ALIGN (sizeof_jl_taggedvalue_t + sizeof (void * ), 16 );
2542+ const int sz = LLT_ALIGN (sizeof_jl_taggedvalue_t + sizeof (void * ), JL_SMALL_BYTE_ALIGNMENT );
25432543 void * tag = NULL ;
25442544#ifdef MEMDEBUG
25452545 tag = alloc_big (sz );
@@ -2552,7 +2552,7 @@ JL_DLLEXPORT jl_value_t *jl_gc_alloc_1w(void)
25522552
25532553JL_DLLEXPORT jl_value_t * jl_gc_alloc_2w (void )
25542554{
2555- const int sz = LLT_ALIGN (sizeof_jl_taggedvalue_t + sizeof (void * ) * 2 , 16 );
2555+ const int sz = LLT_ALIGN (sizeof_jl_taggedvalue_t + sizeof (void * ) * 2 , JL_SMALL_BYTE_ALIGNMENT );
25562556 void * tag = NULL ;
25572557#ifdef MEMDEBUG
25582558 tag = alloc_big (sz );
@@ -2565,7 +2565,7 @@ JL_DLLEXPORT jl_value_t *jl_gc_alloc_2w(void)
25652565
25662566JL_DLLEXPORT jl_value_t * jl_gc_alloc_3w (void )
25672567{
2568- const int sz = LLT_ALIGN (sizeof_jl_taggedvalue_t + sizeof (void * ) * 3 , 16 );
2568+ const int sz = LLT_ALIGN (sizeof_jl_taggedvalue_t + sizeof (void * ) * 3 , JL_SMALL_BYTE_ALIGNMENT );
25692569 void * tag = NULL ;
25702570#ifdef MEMDEBUG
25712571 tag = alloc_big (sz );
@@ -2612,7 +2612,7 @@ jl_thread_heap_t *jl_mk_thread_heap(void)
26122612#ifdef JULIA_ENABLE_THREADING
26132613 // Cache-aligned malloc
26142614 jl_thread_heap =
2615- (jl_thread_heap_t * )jl_malloc_aligned (sizeof (jl_thread_heap_t ), 64 );
2615+ (jl_thread_heap_t * )jl_malloc_aligned (sizeof (jl_thread_heap_t ), JL_CACHE_BYTE_ALIGNMENT );
26162616#endif
26172617 FOR_CURRENT_HEAP () {
26182618 const int * szc = sizeclasses ;
@@ -2781,6 +2781,7 @@ static void big_obj_stats(void)
27812781
27822782JL_DLLEXPORT void * jl_gc_counted_malloc (size_t sz )
27832783{
2784+ sz += JL_SMALL_BYTE_ALIGNMENT ;
27842785 maybe_collect ();
27852786 allocd_bytes += sz ;
27862787 gc_num .malloc ++ ;
@@ -2792,6 +2793,7 @@ JL_DLLEXPORT void *jl_gc_counted_malloc(size_t sz)
27922793
27932794JL_DLLEXPORT void * jl_gc_counted_calloc (size_t nm , size_t sz )
27942795{
2796+ nm += JL_SMALL_BYTE_ALIGNMENT ;
27952797 maybe_collect ();
27962798 allocd_bytes += nm * sz ;
27972799 gc_num .malloc ++ ;
@@ -2804,15 +2806,15 @@ JL_DLLEXPORT void *jl_gc_counted_calloc(size_t nm, size_t sz)
28042806JL_DLLEXPORT void jl_gc_counted_free (void * p , size_t sz )
28052807{
28062808 free (p );
2807- freed_bytes += sz ;
2809+ freed_bytes += sz + JL_SMALL_BYTE_ALIGNMENT ;
28082810 gc_num .freecall ++ ;
28092811}
28102812
2811- JL_DLLEXPORT void * jl_gc_counted_realloc_with_old_size (void * p , size_t old ,
2812- size_t sz )
2813+ JL_DLLEXPORT void * jl_gc_counted_realloc_with_old_size (void * p , size_t old , size_t sz )
28132814{
2815+ old += JL_SMALL_BYTE_ALIGNMENT ;
2816+ sz += JL_SMALL_BYTE_ALIGNMENT ;
28142817 maybe_collect ();
2815-
28162818 if (sz < old )
28172819 freed_bytes += (old - sz );
28182820 else
@@ -2826,7 +2828,7 @@ JL_DLLEXPORT void *jl_gc_counted_realloc_with_old_size(void *p, size_t old,
28262828
28272829JL_DLLEXPORT void * jl_malloc (size_t sz )
28282830{
2829- int64_t * p = (int64_t * )jl_gc_counted_malloc (sz + 16 );
2831+ int64_t * p = (int64_t * )jl_gc_counted_malloc (sz );
28302832 p [0 ] = sz ;
28312833 return (void * )(p + 2 );
28322834}
@@ -2835,7 +2837,7 @@ JL_DLLEXPORT void *jl_calloc(size_t nm, size_t sz)
28352837{
28362838 int64_t * p ;
28372839 size_t nmsz = nm * sz ;
2838- p = (int64_t * )jl_gc_counted_calloc (nmsz + 16 , 1 );
2840+ p = (int64_t * )jl_gc_counted_calloc (nmsz , 1 );
28392841 p [0 ] = nmsz ;
28402842 return (void * )(p + 2 );
28412843}
@@ -2844,27 +2846,27 @@ JL_DLLEXPORT void jl_free(void *p)
28442846{
28452847 int64_t * pp = (int64_t * )p - 2 ;
28462848 size_t sz = pp [0 ];
2847- jl_gc_counted_free (pp , sz + 16 );
2849+ jl_gc_counted_free (pp , sz );
28482850}
28492851
28502852JL_DLLEXPORT void * jl_realloc (void * p , size_t sz )
28512853{
28522854 int64_t * pp = (int64_t * )p - 2 ;
28532855 size_t szold = pp [0 ];
2854- int64_t * pnew = (int64_t * )jl_gc_counted_realloc_with_old_size (pp , szold + 16 , sz + 16 );
2856+ int64_t * pnew = (int64_t * )jl_gc_counted_realloc_with_old_size (pp , szold , sz );
28552857 pnew [0 ] = sz ;
28562858 return (void * )(pnew + 2 );
28572859}
28582860
28592861JL_DLLEXPORT void * jl_gc_managed_malloc (size_t sz )
28602862{
28612863 maybe_collect ();
2862- size_t allocsz = LLT_ALIGN (sz , 16 );
2864+ size_t allocsz = LLT_ALIGN (sz , JL_CACHE_BYTE_ALIGNMENT );
28632865 if (allocsz < sz ) // overflow in adding offs, size was "negative"
28642866 jl_throw (jl_memory_exception );
28652867 allocd_bytes += allocsz ;
28662868 gc_num .malloc ++ ;
2867- void * b = malloc_a16 (allocsz );
2869+ void * b = malloc_cache_align (allocsz );
28682870 if (b == NULL )
28692871 jl_throw (jl_memory_exception );
28702872 return b ;
@@ -2875,7 +2877,7 @@ JL_DLLEXPORT void *jl_gc_managed_realloc(void *d, size_t sz, size_t oldsz,
28752877{
28762878 maybe_collect ();
28772879
2878- size_t allocsz = LLT_ALIGN (sz , 16 );
2880+ size_t allocsz = LLT_ALIGN (sz , JL_CACHE_BYTE_ALIGNMENT );
28792881 if (allocsz < sz ) // overflow in adding offs, size was "negative"
28802882 jl_throw (jl_memory_exception );
28812883
@@ -2891,7 +2893,7 @@ JL_DLLEXPORT void *jl_gc_managed_realloc(void *d, size_t sz, size_t oldsz,
28912893
28922894 void * b ;
28932895 if (isaligned )
2894- b = realloc_a16 (d , allocsz , oldsz );
2896+ b = realloc_cache_align (d , allocsz , oldsz );
28952897 else
28962898 b = realloc (d , allocsz );
28972899 if (b == NULL )
0 commit comments