diff --git a/ChangeLog b/ChangeLog index 66ebe002e2..c96f699e67 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,21 @@ +2003-07-24 Ulrich Drepper + + * include/link.h (struct link_map): Add l_tls_firstbyte_offset field. + * sysdeps/generic/dl-tls.c [TLS_TCB_AT_TP] (_dl_determine_tlsoffset): + Fix calculation of offsets to take misalignment of first byte in + file into account. + * elf/dl-load.c (_dl_map_object_from_fd): Initialize + l_tls_firstbyte_offset field. + * elf/rtld.c (_dl_start_final, _dl_start, dl_main): Likewise. + * elf/dl-reloc.c (_dl_allocate_static_tls): Change return type to int. + Take l_tls_firstbyte_offset information into account. + (CHECK_STATIS_TLS): _dl_allocate_static_tls can fail now. + * sysdeps/generic/ldsodefs.h: Adjust _dl_allocate_static_tls prototype. + * elf/Makefile: Add rules to build and run tst-tls14. + * elf/tst-tls14.c: New file. + * elf/tst-tlsmod14a.c: New file. + * elf/tst-tlsmod14b.c: New file. + 2003-07-23 Jakub Jelinek * sysdeps/pthread/lio_listio.c (LIO_OPCODE_BASE): Define. diff --git a/elf/Makefile b/elf/Makefile index 7c654f86ae..bfecc360e2 100644 --- a/elf/Makefile +++ b/elf/Makefile @@ -148,7 +148,7 @@ tests += loadtest restest1 preloadtest loadfail multiload origtest resolvfail \ neededtest3 neededtest4 unload2 lateglobal initfirst global \ restest2 next dblload dblunload reldep5 reldep6 reldep7 reldep8 \ circleload1 tst-tls3 tst-tls4 tst-tls5 tst-tls6 tst-tls7 tst-tls8 \ - tst-tls10 tst-tls11 tst-tls12 tst-tls13 + tst-tls10 tst-tls11 tst-tls12 tst-tls13 tst-tls14 # reldep9 test-srcs = tst-pathopt tests-vis-yes = vismain @@ -171,7 +171,7 @@ modules-names = testobj1 testobj2 testobj3 testobj4 testobj5 testobj6 \ tst-tlsmod1 tst-tlsmod2 tst-tlsmod3 tst-tlsmod4 \ tst-tlsmod5 tst-tlsmod6 tst-tlsmod7 tst-tlsmod8 \ tst-tlsmod9 tst-tlsmod10 tst-tlsmod11 tst-tlsmod12 \ - tst-tlsmod13 tst-tlsmod13a \ + tst-tlsmod13 tst-tlsmod13a tst-tlsmod14a tst-tlsmod14b \ circlemod1 circlemod1a circlemod2 circlemod2a \ circlemod3 circlemod3a \ reldep8mod1 reldep8mod2 reldep8mod3 \ @@ -428,6 +428,8 @@ tst-tlsmod8.so-no-z-defs = yes tst-tlsmod9.so-no-z-defs = yes tst-tlsmod10.so-no-z-defs = yes tst-tlsmod12.so-no-z-defs = yes +tst-tlsmod14a.so-no-z-defs = yes +tst-tlsmod14b.so-no-z-defs = yes circlemod2.so-no-z-defs = yes circlemod3.so-no-z-defs = yes circlemod3a.so-no-z-defs = yes @@ -633,6 +635,9 @@ $(objpfx)tst-tls12: $(objpfx)tst-tlsmod12.so $(objpfx)tst-tls13: $(libdl) $(objpfx)tst-tls13.out: $(objpfx)tst-tlsmod13a.so +$(objpfx)tst-tls14: $(objpfx)tst-tlsmod14a.so $(libdl) +$(objpfx)tst-tls14.out:$(objpfx)tst-tlsmod14b.so + ifdef libdl $(objpfx)tst-tls9-static: $(common-objpfx)dlfcn/libdl.a $(objpfx)tst-tls9-static.out: $(objpfx)tst-tlsmod5.so $(objpfx)tst-tlsmod6.so diff --git a/elf/dl-load.c b/elf/dl-load.c index f3c9e82423..249ef84639 100644 --- a/elf/dl-load.c +++ b/elf/dl-load.c @@ -987,6 +987,10 @@ _dl_map_object_from_fd (const char *name, int fd, struct filebuf *fbp, l->l_tls_blocksize = ph->p_memsz; l->l_tls_align = ph->p_align; + if (ph->p_align == 0) + l->l_tls_firstbyte_offset = 0; + else + l->l_tls_firstbyte_offset = ph->p_vaddr & (ph->p_align - 1); l->l_tls_initimage_size = ph->p_filesz; /* Since we don't know the load address yet only store the offset. We will adjust it later. */ diff --git a/elf/dl-reloc.c b/elf/dl-reloc.c index 6165fe4aca..d82ea108d0 100644 --- a/elf/dl-reloc.c +++ b/elf/dl-reloc.c @@ -41,27 +41,39 @@ the static TLS area already allocated for each running thread. If this object's TLS segment is too big to fit, we fail. If it fits, we set MAP->l_tls_offset and return. */ -void +int internal_function __attribute_noinline__ _dl_allocate_static_tls (struct link_map *map) { size_t offset; size_t used; size_t check; + size_t freebytes; + size_t n; + size_t blsize; + + /* If the alignment requirements are too high fail. */ + if (map->l_tls_align > GL(dl_tls_static_align)) + return 1; # if TLS_TCB_AT_TP - offset = roundup (GL(dl_tls_static_used) + map->l_tls_blocksize, - map->l_tls_align); - used = offset; - check = offset + TLS_TCB_SIZE; + freebytes = GL(dl_tls_static_size) - GL(dl_tls_static_used) - TLS_TCB_SIZE; + + blsize = map->l_tls_blocksize + map->l_tls_firstbyte_offset; + if (freebytes < blsize) + return 1; + + n = (freebytes - blsize) / map->l_tls_align; + + offset = GL(dl_tls_static_used) + (freebytes - n * map->l_tls_align + - map->l_tls_firstbyte_offset); + + map->l_tls_offset = GL(dl_tls_static_used) = offset; # elif TLS_DTV_AT_TP offset = roundup (GL(dl_tls_static_used), map->l_tls_align); used = offset + map->l_tls_blocksize; check = used; /* dl_tls_static_used includes the TCB at the beginning. */ -# else -# error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined" -# endif if (check > GL(dl_tls_static_size)) { @@ -72,6 +84,11 @@ shared object cannot be dlopen()ed: static TLS memory too small"); map->l_tls_offset = offset; GL(dl_tls_static_used) = used; +# else +# error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined" +# endif + + return 0; } #endif @@ -212,7 +229,9 @@ _dl_relocate_object (struct link_map *l, struct r_scope_elem *scope[], #define CHECK_STATIC_TLS(map, sym_map) \ do { \ if (__builtin_expect ((sym_map)->l_tls_offset == NO_TLS_OFFSET, 0)) \ - _dl_allocate_static_tls (sym_map); \ + if (_dl_allocate_static_tls (sym_map) != 0) \ + INTUSE(_dl_signal_error) (0, sym_map->l_name, NULL, N_("\ +cannot allocate memory in static TLS block")); \ } while (0) #include "dynamic-link.h" diff --git a/elf/rtld.c b/elf/rtld.c index 4a086c2d32..c63405ac99 100644 --- a/elf/rtld.c +++ b/elf/rtld.c @@ -227,6 +227,7 @@ _dl_start_final (void *arg, struct dl_start_final_info *info) assert (info->l.l_tls_modid != 0); GL(dl_rtld_map).l_tls_blocksize = info->l.l_tls_blocksize; GL(dl_rtld_map).l_tls_align = info->l.l_tls_align; + GL(dl_rtld_map).l_tls_firstbyte_offset = info->l.l_tls_firstbyte_offset; GL(dl_rtld_map).l_tls_initimage_size = info->l.l_tls_initimage_size; GL(dl_rtld_map).l_tls_initimage = info->l.l_tls_initimage; GL(dl_rtld_map).l_tls_offset = info->l.l_tls_offset; @@ -347,6 +348,11 @@ _dl_start (void *arg) bootstrap_map.l_tls_blocksize = phdr[cnt].p_memsz; bootstrap_map.l_tls_align = phdr[cnt].p_align; + if (phdr[cnt].p_align == 0) + bootstrap_map.l_tls_firstbyte_offset = 0; + else + bootstrap_map.l_tls_firstbyte_offset = (phdr[cnt].p_vaddr + & (phdr[cnt].p_align - 1)); assert (bootstrap_map.l_tls_blocksize != 0); bootstrap_map.l_tls_initimage_size = phdr[cnt].p_filesz; bootstrap_map.l_tls_initimage = (void *) (bootstrap_map.l_addr @@ -860,6 +866,11 @@ of this helper program; chances are you did not intend to run this program.\n\ check for this special but unimportant case. */ GL(dl_loaded)->l_tls_blocksize = ph->p_memsz; GL(dl_loaded)->l_tls_align = ph->p_align; + if (ph->p_align == 0) + GL(dl_loaded)->l_tls_firstbyte_offset = 0; + else + GL(dl_loaded)->l_tls_firstbyte_offset = (ph->p_vaddr + & (ph->p_align - 1)); GL(dl_loaded)->l_tls_initimage_size = ph->p_filesz; GL(dl_loaded)->l_tls_initimage = (void *) ph->p_vaddr; diff --git a/elf/tst-tls14.c b/elf/tst-tls14.c new file mode 100644 index 0000000000..4ae367a38f --- /dev/null +++ b/elf/tst-tls14.c @@ -0,0 +1,56 @@ +/* Check alignment of TLS variable. */ +#include +#include +#include +#include + + +#define AL 4096 +struct foo +{ + int i; +} __attribute ((aligned (AL))); + +static __thread struct foo f; +static struct foo g; + + +extern int in_dso1 (void); + + +static int +do_test (void) +{ + int result = 0; + + int fail = (((uintptr_t) &f) & (AL - 1)) != 0; + printf ("&f = %p %s\n", &f, fail ? "FAIL" : "OK"); + result |= fail; + + fail = (((uintptr_t) &g) & (AL - 1)) != 0; + printf ("&g = %p %s\n", &g, fail ? "FAIL" : "OK"); + result |= fail; + + result |= in_dso1 (); + + void *h = dlopen ("tst-tlsmod14b.so", RTLD_LAZY); + if (h == NULL) + { + printf ("cannot open tst-tlsmod14b.so: %m\n"); + exit (1); + } + + int (*fp) (void) = (int (*) (void)) dlsym (h, "in_dso2"); + if (fp == NULL) + { + puts ("cannot find in_dso2"); + exit (1); + } + + result |= fp (); + + return result; +} + +#define TEST_FUNCTION do_test () +#include "../test-skeleton.c" diff --git a/elf/tst-tlsmod14a.c b/elf/tst-tlsmod14a.c new file mode 100644 index 0000000000..4843e5937e --- /dev/null +++ b/elf/tst-tlsmod14a.c @@ -0,0 +1,36 @@ +#include +#include + + +#define AL 4096 +struct foo +{ + int i; +} __attribute ((aligned (AL))); + +static __thread struct foo f; +static struct foo g; + + +#ifndef FCT +# define FCT in_dso1 +#endif + + +int +FCT (void) +{ + puts (__func__); + + int result = 0; + + int fail = (((uintptr_t) &f) & (AL - 1)) != 0; + printf ("&f = %p %s\n", &f, fail ? "FAIL" : "OK"); + result |= fail; + + fail = (((uintptr_t) &g) & (AL - 1)) != 0; + printf ("&g = %p %s\n", &g, fail ? "FAIL" : "OK"); + result |= fail; + + return result; +} diff --git a/elf/tst-tlsmod14b.c b/elf/tst-tlsmod14b.c new file mode 100644 index 0000000000..24d9ceaf7e --- /dev/null +++ b/elf/tst-tlsmod14b.c @@ -0,0 +1,2 @@ +#define FCT in_dso2 +#include "tst-tlsmod14a.c" diff --git a/include/link.h b/include/link.h index cc2387b7a0..cd66222795 100644 --- a/include/link.h +++ b/include/link.h @@ -1,6 +1,6 @@ /* Data structure for communication from the run-time dynamic linker for loaded ELF shared objects. - Copyright (C) 1995-1999,2000,01,02 Free Software Foundation, Inc. + Copyright (C) 1995-2002, 2003 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -268,6 +268,8 @@ struct link_map size_t l_tls_blocksize; /* Alignment requirement of the TLS block. */ size_t l_tls_align; + /* Offset of first byte module alignment. */ + size_t l_tls_firstbyte_offset; # ifndef NO_TLS_OFFSET # define NO_TLS_OFFSET 0 # endif diff --git a/sysdeps/generic/dl-tls.c b/sysdeps/generic/dl-tls.c index a51e3f7bc7..ccad53c59f 100644 --- a/sysdeps/generic/dl-tls.c +++ b/sysdeps/generic/dl-tls.c @@ -117,9 +117,10 @@ internal_function _dl_determine_tlsoffset (void) { struct dtv_slotinfo *slotinfo; - size_t max_align = __alignof__ (void *); + size_t max_align; size_t offset; size_t cnt; + size_t freebytes; /* The first element of the dtv slot info list is allocated. */ assert (GL(dl_tls_dtv_slotinfo_list) != NULL); @@ -127,24 +128,78 @@ _dl_determine_tlsoffset (void) dl_tls_dtv_slotinfo_list list. */ assert (GL(dl_tls_dtv_slotinfo_list)->next == NULL); + /* Determining the offset of the various parts of the static TLS + block has several dependencies. In addition we have to work + around bugs in some toolchains. + + Each TLS block from the objects available at link time has a size + and an alignment requirement. The GNU ld computes the alignment + requirements for the data at the positions *in the file*, though. + I.e, it is not simply possible to allocate a block with the size + of the TLS program header entry. The data is layed out assuming + that the first byte of the TLS block fulfills + + p_vaddr mod p_align == &TLS_BLOCK mod p_align + + This means we have to add artificial padding at the beginning of + the TLS block. These bytes are never used for the TLS data in + this module but the first byte allocated must be aligned + according to mod p_align == 0 so that the first byte of the TLS + block is aligned according to p_vaddr mod p_align. This is ugly + and the linker can help by computing the offsets in the TLS block + assuming the first byte of the TLS block is aligned according to + p_align. + + We can handle this wrong behavior because of another bug in GNU + ld. The p_vaddr field of the TLS segment must be zero (according + to the spec) since the linker does not know the address or offset + where it will end up at. Once a linker is available which + handles the alignment correctly it should set p_addr to zero and + all will automatically fall into place. + + The extra space which might be allocated before the first byte of + the TLS block need not go unused. The code below tries to use + that memory for the next TLS block. This can work if the total + memory requirement for the next TLS block is smaller than the + gap. */ + # if TLS_TCB_AT_TP /* We simply start with zero. */ + max_align = __alignof (void *); offset = 0; + freebytes = 0; slotinfo = GL(dl_tls_dtv_slotinfo_list)->slotinfo; for (cnt = 1; slotinfo[cnt].map != NULL; ++cnt) { assert (cnt < GL(dl_tls_dtv_slotinfo_list)->len); - max_align = MAX (max_align, slotinfo[cnt].map->l_tls_align); + size_t blsize = (slotinfo[cnt].map->l_tls_blocksize + + slotinfo[cnt].map->l_tls_firstbyte_offset); - /* Compute the offset of the next TLS block. */ - offset = roundup (offset + slotinfo[cnt].map->l_tls_blocksize, - slotinfo[cnt].map->l_tls_align); + if (blsize <= freebytes) + { + /* When we come here the amount of memory we was "wasted" + for the correct alignment of the previous block is larger + than what we need for this module. So use it. */ + size_t n = (freebytes - blsize) / slotinfo[cnt].map->l_tls_align; + freebytes = (n * slotinfo[cnt].map->l_tls_align + + slotinfo[cnt].map->l_tls_firstbyte_offset); + } + else + { + /* There is either no gap from the bottom of the static TLS + block to the first used byte or the gap is too small. + Extend the static TLS block. */ + offset += roundup (blsize, max_align); + freebytes = slotinfo[cnt].map->l_tls_firstbyte_offset; + } + + max_align = MAX (max_align, slotinfo[cnt].map->l_tls_align); /* XXX For some architectures we perhaps should store the negative offset. */ - slotinfo[cnt].map->l_tls_offset = offset; + slotinfo[cnt].map->l_tls_offset = offset - freebytes; } /* The thread descriptor (pointed to by the thread pointer) has its @@ -156,11 +211,12 @@ _dl_determine_tlsoffset (void) // XXX model. GL(dl_tls_static_used) = offset; - GL(dl_tls_static_size) = roundup (offset + TLS_STATIC_SURPLUS + TLS_TCB_SIZE, - TLS_TCB_ALIGN); + GL(dl_tls_static_size) = (offset + roundup (TLS_STATIC_SURPLUS, max_align) + + TLS_TCB_SIZE); # elif TLS_DTV_AT_TP /* The TLS blocks start right after the TCB. */ offset = TLS_TCB_SIZE; + max_align = __alignof (void *); /* The first block starts right after the TCB. */ slotinfo = GL(dl_tls_dtv_slotinfo_list)->slotinfo; @@ -201,7 +257,7 @@ _dl_determine_tlsoffset (void) # endif /* The alignment requirement for the static TLS block. */ - GL(dl_tls_static_align) = MAX (TLS_TCB_ALIGN, max_align); + GL(dl_tls_static_align) = max_align; } diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h index 570b52261b..565edb3fd8 100644 --- a/sysdeps/generic/ldsodefs.h +++ b/sysdeps/generic/ldsodefs.h @@ -797,7 +797,7 @@ rtld_hidden_proto (_dl_allocate_tls) extern void _dl_get_tls_static_info (size_t *sizep, size_t *alignp) internal_function; -extern void _dl_allocate_static_tls (struct link_map *map) +extern int _dl_allocate_static_tls (struct link_map *map) internal_function attribute_hidden; /* These are internal entry points to the two halves of _dl_allocate_tls,