Skip to content

Commit 01a7d27

Browse files
committed
fix excess array object alignment
1 parent ae1b469 commit 01a7d27

File tree

3 files changed

+54
-26
lines changed

3 files changed

+54
-26
lines changed

src/array.c

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -114,20 +114,25 @@ static jl_array_t *_new_array_(jl_value_t *atype, uint32_t ndims, size_t *dims,
114114
}
115115

116116
int ndimwords = jl_array_ndimwords(ndims);
117-
int tsz = JL_ARRAY_ALIGN(sizeof(jl_array_t) + ndimwords*sizeof(size_t), JL_CACHE_BYTE_ALIGNMENT);
117+
int tsz = sizeof(jl_array_t) + ndimwords*sizeof(size_t);
118118
if (tot <= ARRAY_INLINE_NBYTES) {
119+
// align data area
119120
if (isunboxed && elsz >= 4)
120-
tsz = JL_ARRAY_ALIGN(tsz, JL_SMALL_BYTE_ALIGNMENT); // align data area
121+
tsz = JL_ARRAY_ALIGN(tsz, JL_SMALL_BYTE_ALIGNMENT);
122+
if (tsz+tot > GC_MAX_SZCLASS) {
123+
// object won't be pool allocated, so take advantage of larger alignment
124+
tsz = JL_ARRAY_ALIGN(tsz, JL_CACHE_BYTE_ALIGNMENT);
125+
}
121126
size_t doffs = tsz;
122127
tsz += tot;
123-
tsz = JL_ARRAY_ALIGN(tsz, JL_SMALL_BYTE_ALIGNMENT); // align whole object
128+
// jl_array_t is large enough that objects will always be aligned 16
124129
a = (jl_array_t*)jl_gc_alloc(ct->ptls, tsz, atype);
130+
assert(((size_t)a & 15) == 0);
125131
// No allocation or safepoint allowed after this
126132
a->flags.how = 0;
127133
data = (char*)a + doffs;
128134
}
129135
else {
130-
tsz = JL_ARRAY_ALIGN(tsz, JL_CACHE_BYTE_ALIGNMENT); // align whole object
131136
data = jl_gc_managed_malloc(tot);
132137
// Allocate the Array **after** allocating the data
133138
// to make sure the array is still young
@@ -223,7 +228,7 @@ JL_DLLEXPORT jl_array_t *jl_reshape_array(jl_value_t *atype, jl_array_t *data,
223228
assert(jl_types_equal(jl_tparam0(jl_typeof(data)), jl_tparam0(atype)));
224229

225230
int ndimwords = jl_array_ndimwords(ndims);
226-
int tsz = JL_ARRAY_ALIGN(sizeof(jl_array_t) + ndimwords * sizeof(size_t) + sizeof(void*), JL_SMALL_BYTE_ALIGNMENT);
231+
int tsz = sizeof(jl_array_t) + ndimwords * sizeof(size_t) + sizeof(void*);
227232
a = (jl_array_t*)jl_gc_alloc(ct->ptls, tsz, atype);
228233
// No allocation or safepoint allowed after this
229234
a->flags.pooled = tsz <= GC_MAX_SZCLASS;
@@ -304,7 +309,7 @@ JL_DLLEXPORT jl_array_t *jl_string_to_array(jl_value_t *str)
304309
jl_array_t *a;
305310

306311
int ndimwords = jl_array_ndimwords(1);
307-
int tsz = JL_ARRAY_ALIGN(sizeof(jl_array_t) + ndimwords*sizeof(size_t) + sizeof(void*), JL_SMALL_BYTE_ALIGNMENT);
312+
int tsz = sizeof(jl_array_t) + ndimwords*sizeof(size_t) + sizeof(void*);
308313
a = (jl_array_t*)jl_gc_alloc(ct->ptls, tsz, jl_array_uint8_type);
309314
a->flags.pooled = tsz <= GC_MAX_SZCLASS;
310315
a->flags.ndims = 1;
@@ -351,7 +356,7 @@ JL_DLLEXPORT jl_array_t *jl_ptr_to_array_1d(jl_value_t *atype, void *data,
351356
"unsafe_wrap: pointer %p is not properly aligned to %u bytes", data, align);
352357

353358
int ndimwords = jl_array_ndimwords(1);
354-
int tsz = JL_ARRAY_ALIGN(sizeof(jl_array_t) + ndimwords*sizeof(size_t), JL_CACHE_BYTE_ALIGNMENT);
359+
int tsz = sizeof(jl_array_t) + ndimwords*sizeof(size_t);
355360
a = (jl_array_t*)jl_gc_alloc(ct->ptls, tsz, atype);
356361
// No allocation or safepoint allowed after this
357362
a->flags.pooled = tsz <= GC_MAX_SZCLASS;
@@ -418,7 +423,7 @@ JL_DLLEXPORT jl_array_t *jl_ptr_to_array(jl_value_t *atype, void *data,
418423
"unsafe_wrap: pointer %p is not properly aligned to %u bytes", data, align);
419424

420425
int ndimwords = jl_array_ndimwords(ndims);
421-
int tsz = JL_ARRAY_ALIGN(sizeof(jl_array_t) + ndimwords*sizeof(size_t), JL_CACHE_BYTE_ALIGNMENT);
426+
int tsz = sizeof(jl_array_t) + ndimwords*sizeof(size_t);
422427
a = (jl_array_t*)jl_gc_alloc(ct->ptls, tsz, atype);
423428
// No allocation or safepoint allowed after this
424429
a->flags.pooled = tsz <= GC_MAX_SZCLASS;

src/staticdata.c

Lines changed: 31 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -782,14 +782,28 @@ static void jl_write_values(jl_serializer_state *s)
782782
#define JL_ARRAY_ALIGN(jl_value, nbytes) LLT_ALIGN(jl_value, nbytes)
783783
jl_array_t *ar = (jl_array_t*)v;
784784
jl_value_t *et = jl_tparam0(jl_typeof(v));
785+
size_t alen = jl_array_len(ar);
786+
size_t datasize = alen * ar->elsize;
787+
size_t tot = datasize;
788+
int isbitsunion = jl_array_isbitsunion(ar);
789+
if (isbitsunion)
790+
tot += alen;
791+
else if (ar->elsize == 1)
792+
tot += 1;
785793
int ndimwords = jl_array_ndimwords(ar->flags.ndims);
786-
size_t tsz = JL_ARRAY_ALIGN(sizeof(jl_array_t) + ndimwords * sizeof(size_t), JL_CACHE_BYTE_ALIGNMENT);
794+
size_t headersize = sizeof(jl_array_t) + ndimwords*sizeof(size_t);
787795
// copy header
788-
ios_write(s->s, (char*)v, tsz);
796+
ios_write(s->s, (char*)v, headersize);
797+
size_t alignment_amt = 0, align16_padding = 0;
798+
if (!ar->flags.ptrarray && ar->elsize >= 4) {
799+
alignment_amt = JL_SMALL_BYTE_ALIGNMENT;
800+
align16_padding = JL_ARRAY_ALIGN(headersize, JL_SMALL_BYTE_ALIGNMENT) - headersize;
801+
}
802+
// use the same cutoff as array.c for stricter alignment
803+
if (headersize + tot + align16_padding > GC_MAX_SZCLASS)
804+
alignment_amt = JL_CACHE_BYTE_ALIGNMENT;
789805
// make some header modifications in-place
790806
jl_array_t *newa = (jl_array_t*)&s->s->buf[reloc_offset];
791-
size_t alen = jl_array_len(ar);
792-
size_t tot = alen * ar->elsize;
793807
if (newa->flags.ndims == 1)
794808
newa->maxsize = alen;
795809
newa->offset = 0;
@@ -799,8 +813,9 @@ static void jl_write_values(jl_serializer_state *s)
799813

800814
// write data
801815
if (!ar->flags.ptrarray && !ar->flags.hasptr) {
802-
uintptr_t data = LLT_ALIGN(ios_pos(s->const_data), 16);
803-
// realign stream to max(data-align(array), sizeof(void*))
816+
if (alignment_amt < 16)
817+
alignment_amt = 16;
818+
uintptr_t data = LLT_ALIGN(ios_pos(s->const_data), alignment_amt);
804819
write_padding(s->const_data, data - ios_pos(s->const_data));
805820
// write data and relocations
806821
newa->data = NULL; // relocation offset
@@ -815,22 +830,22 @@ static void jl_write_values(jl_serializer_state *s)
815830
write_pointer(s->const_data);
816831
}
817832
else {
818-
int isbitsunion = jl_array_isbitsunion(ar);
819-
if (ar->elsize == 1 && !isbitsunion)
820-
tot += 1;
821833
ios_write(s->const_data, (char*)jl_array_data(ar), tot);
822-
if (isbitsunion)
823-
ios_write(s->const_data, jl_array_typetagdata(ar), alen);
824834
}
825835
}
826836
else {
827-
newa->data = (void*)tsz; // relocation offset
837+
if (alignment_amt) {
838+
size_t aligned_hdr_sz = JL_ARRAY_ALIGN(headersize, alignment_amt);
839+
write_padding(s->s, aligned_hdr_sz - headersize);
840+
headersize = aligned_hdr_sz;
841+
}
842+
newa->data = (void*)headersize; // relocation offset
828843
arraylist_push(&s->relocs_list, (void*)(reloc_offset + offsetof(jl_array_t, data))); // relocation location
829844
arraylist_push(&s->relocs_list, (void*)(((uintptr_t)DataRef << RELOC_TAG_OFFSET) + item)); // relocation target
830845
if (ar->flags.hasptr) {
831846
// copy all of the data first
832847
const char *data = (const char*)jl_array_data(ar);
833-
ios_write(s->s, data, tot);
848+
ios_write(s->s, data, datasize);
834849
// the rewrite all of the embedded pointers to null+relocation
835850
uint16_t elsz = ar->elsize;
836851
size_t j, np = ((jl_datatype_t*)et)->layout->npointers;
@@ -840,12 +855,12 @@ static void jl_write_values(jl_serializer_state *s)
840855
size_t offset = i * elsz + jl_ptr_offset(((jl_datatype_t*)et), j) * sizeof(jl_value_t*);
841856
jl_value_t *fld = *(jl_value_t**)&data[offset];
842857
if (fld != NULL) {
843-
arraylist_push(&s->relocs_list, (void*)(uintptr_t)(reloc_offset + tsz + offset)); // relocation location
858+
arraylist_push(&s->relocs_list, (void*)(uintptr_t)(reloc_offset + headersize + offset)); // relocation location
844859
arraylist_push(&s->relocs_list, (void*)backref_id(s, fld)); // relocation target
845-
memset(&s->s->buf[reloc_offset + tsz + offset], 0, sizeof(fld)); // relocation offset (none)
860+
memset(&s->s->buf[reloc_offset + headersize + offset], 0, sizeof(fld)); // relocation offset (none)
846861
}
847862
else {
848-
assert(*(jl_value_t**)&s->s->buf[reloc_offset + tsz + offset] == NULL);
863+
assert(*(jl_value_t**)&s->s->buf[reloc_offset + headersize + offset] == NULL);
849864
}
850865
}
851866
}

test/cmdlineargs.jl

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -324,7 +324,11 @@ let exename = `$(Base.julia_cmd()) --startup-file=no --color=no`
324324
rm(memfile)
325325
@test popfirst!(got) == " 0 g(x) = x + 123456"
326326
@test popfirst!(got) == " - function f(x)"
327-
@test popfirst!(got) == " 80 []"
327+
if Sys.WORD_SIZE == 64
328+
@test popfirst!(got) == " 48 []"
329+
else
330+
@test popfirst!(got) == " 32 []"
331+
end
328332
if Sys.WORD_SIZE == 64
329333
# P64 pools with 64 bit tags
330334
@test popfirst!(got) == " 16 Base.invokelatest(g, 0)"
@@ -337,7 +341,11 @@ let exename = `$(Base.julia_cmd()) --startup-file=no --color=no`
337341
@test popfirst!(got) == " 8 Base.invokelatest(g, 0)"
338342
@test popfirst!(got) == " 32 Base.invokelatest(g, x)"
339343
end
340-
@test popfirst!(got) == " 80 []"
344+
if Sys.WORD_SIZE == 64
345+
@test popfirst!(got) == " 48 []"
346+
else
347+
@test popfirst!(got) == " 32 []"
348+
end
341349
@test popfirst!(got) == " - end"
342350
@test popfirst!(got) == " - f(1.23)"
343351
@test isempty(got) || got

0 commit comments

Comments
 (0)