Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
S390: Save and restore fprs/vrs while resolving symbols.
On s390, no fpr/vrs were saved while resolving a symbol
via _dl_runtime_resolve/_dl_runtime_profile.

According to the abi, the fpr-arguments are defined as call clobbered.
In leaf-functions, gcc 4.9 and newer can use fprs for saving/restoring gprs
instead of saving them to the stack.
If gcc do this in one of the resolver-functions, then the floating point
arguments of a library-function are invalid for the first library-function-call.
Thus, this patch saves/restores the fprs around the resolving code.

The same could occur for vector registers. Furthermore an ifunc-resolver
could also clobber the vector/floating point argument registers.
Thus this patch provides the further variants _dl_runtime_resolve_vx/
_dl_runtime_profile_vx, which are used if the kernel claims, that
we run on a machine with vector registers.

Furthermore, if _dl_runtime_profile calls _dl_call_pltexit,
the pointers to inregs-/outregs-structs were setup invalid.
Now they point to the correct location in the stack-frame.
Before branching back to the caller, the return values are now
restored instead of containing the return values of the
_dl_call_pltexit() call.
On s390-32, an endless loop occurs if _dl_call_pltexit() should be called.
Now, this code-path branches to this function instead of just after the
preceding basr-instruction.

ChangeLog:

	* sysdeps/s390/s390-32/dl-trampoline.S: Include dl-trampoline.h twice
	to create a non-vector/vector version for _dl_runtime_resolve and
	_dl_runtime_profile. Move implementation to ...
	* sysdeps/s390/s390-32/dl-trampoline.h: ... here.
	(_dl_runtime_resolve) Save and restore fpr/vrs.
	(_dl_runtime_profile) Save and restore vrs and fix some issues
	if _dl_call_pltexit is called.
	* sysdeps/s390/s390-32/dl-machine.h (elf_machine_runtime_setup):
	Choose the correct resolver function if running on a machine with vx.
	* sysdeps/s390/s390-64/dl-trampoline.S: Include dl-trampoline.h twice
	to create a non-vector/vector version for _dl_runtime_resolve and
	_dl_runtime_profile. Move implementation to ...
	* sysdeps/s390/s390-64/dl-trampoline.h: ... here.
	(_dl_runtime_resolve) Save and restore fpr/vrs.
	(_dl_runtime_profile) Save and restore vrs and fix some issues
	* sysdeps/s390/s390-64/dl-machine.h: (elf_machine_runtime_setup):
	Choose the correct resolver function if running on a machine with vx.
  • Loading branch information
Stefan Liebler committed Mar 31, 2016
1 parent e91bd74 commit 4603c51
Show file tree
Hide file tree
Showing 7 changed files with 516 additions and 248 deletions.
20 changes: 20 additions & 0 deletions ChangeLog
@@ -1,3 +1,23 @@
2016-03-31 Stefan Liebler <stli@linux.vnet.ibm.com>

* sysdeps/s390/s390-32/dl-trampoline.S: Include dl-trampoline.h twice
to create a non-vector/vector version for _dl_runtime_resolve and
_dl_runtime_profile. Move implementation to ...
* sysdeps/s390/s390-32/dl-trampoline.h: ... here.
(_dl_runtime_resolve) Save and restore fpr/vrs.
(_dl_runtime_profile) Save and restore vrs and fix some issues
if _dl_call_pltexit is called.
* sysdeps/s390/s390-32/dl-machine.h (elf_machine_runtime_setup):
Choose the correct resolver function if running on a machine with vx.
* sysdeps/s390/s390-64/dl-trampoline.S: Include dl-trampoline.h twice
to create a non-vector/vector version for _dl_runtime_resolve and
_dl_runtime_profile. Move implementation to ...
* sysdeps/s390/s390-64/dl-trampoline.h: ... here.
(_dl_runtime_resolve) Save and restore fpr/vrs.
(_dl_runtime_profile) Save and restore vrs and fix some issues
* sysdeps/s390/s390-64/dl-machine.h: (elf_machine_runtime_setup):
Choose the correct resolver function if running on a machine with vx.

2016-03-31 Adhemerval Zanella <adhemerval.zanella@linaro.org> 2016-03-31 Adhemerval Zanella <adhemerval.zanella@linaro.org>


* elf/tst-dlsym-error.c: Include <string.h> for strchrnul. * elf/tst-dlsym-error.c: Include <string.h> for strchrnul.
Expand Down
27 changes: 24 additions & 3 deletions sysdeps/s390/s390-32/dl-machine.h
Expand Up @@ -89,6 +89,11 @@ elf_machine_runtime_setup (struct link_map *l, int lazy, int profile)
{ {
extern void _dl_runtime_resolve (Elf32_Word); extern void _dl_runtime_resolve (Elf32_Word);
extern void _dl_runtime_profile (Elf32_Word); extern void _dl_runtime_profile (Elf32_Word);
#if defined HAVE_S390_VX_ASM_SUPPORT
extern void _dl_runtime_resolve_vx (Elf32_Word);
extern void _dl_runtime_profile_vx (Elf32_Word);
#endif



if (l->l_info[DT_JMPREL] && lazy) if (l->l_info[DT_JMPREL] && lazy)
{ {
Expand Down Expand Up @@ -116,7 +121,14 @@ elf_machine_runtime_setup (struct link_map *l, int lazy, int profile)
end in this function. */ end in this function. */
if (__glibc_unlikely (profile)) if (__glibc_unlikely (profile))
{ {
#if defined HAVE_S390_VX_ASM_SUPPORT
if (GLRO(dl_hwcap) & HWCAP_S390_VX)
got[2] = (Elf32_Addr) &_dl_runtime_profile_vx;
else
got[2] = (Elf32_Addr) &_dl_runtime_profile;
#else
got[2] = (Elf32_Addr) &_dl_runtime_profile; got[2] = (Elf32_Addr) &_dl_runtime_profile;
#endif


if (GLRO(dl_profile) != NULL if (GLRO(dl_profile) != NULL
&& _dl_name_match_p (GLRO(dl_profile), l)) && _dl_name_match_p (GLRO(dl_profile), l))
Expand All @@ -125,9 +137,18 @@ elf_machine_runtime_setup (struct link_map *l, int lazy, int profile)
GL(dl_profile_map) = l; GL(dl_profile_map) = l;
} }
else else
/* This function will get called to fix up the GOT entry indicated by {
the offset on the stack, and then jump to the resolved address. */ /* This function will get called to fix up the GOT entry indicated by
got[2] = (Elf32_Addr) &_dl_runtime_resolve; the offset on the stack, and then jump to the resolved address. */
#if defined HAVE_S390_VX_ASM_SUPPORT
if (GLRO(dl_hwcap) & HWCAP_S390_VX)
got[2] = (Elf32_Addr) &_dl_runtime_resolve_vx;
else
got[2] = (Elf32_Addr) &_dl_runtime_resolve;
#else
got[2] = (Elf32_Addr) &_dl_runtime_resolve;
#endif
}
} }


return lazy; return lazy;
Expand Down
134 changes: 11 additions & 123 deletions sysdeps/s390/s390-32/dl-trampoline.S
Expand Up @@ -16,130 +16,18 @@
License along with the GNU C Library; if not, see License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */ <http://www.gnu.org/licenses/>. */


/* This code is used in dl-runtime.c to call the `fixup' function
and then redirect to the address it returns. */

/* The PLT stubs will call _dl_runtime_resolve/_dl_runtime_profile
* with the following linkage:
* r2 - r6 : parameter registers
* f0, f2 : floating point parameter registers
* 24(r15), 28(r15) : PLT arguments PLT1, PLT2
* 96(r15) : additional stack parameters
* The normal clobber rules for function calls apply:
* r0 - r5 : call clobbered
* r6 - r13 : call saved
* r14 : return address (call clobbered)
* r15 : stack pointer (call saved)
* f4, f6 : call saved
* f0 - f3, f5, f7 - f15 : call clobbered
*/

#include <sysdep.h> #include <sysdep.h>


.text .text
.globl _dl_runtime_resolve /* Create variant of _dl_runtime_resolve/profile for machines before z13.
.type _dl_runtime_resolve, @function No vector registers are saved/restored. */
cfi_startproc #include <dl-trampoline.h>
.align 16
_dl_runtime_resolve: #if defined HAVE_S390_VX_ASM_SUPPORT
stm %r2,%r5,32(%r15) # save registers /* Create variant of _dl_runtime_resolve/profile for z13 and newer.
st %r14,8(%r15) The vector registers are saved/restored, too.*/
cfi_offset (r14, -88) # define _dl_runtime_resolve _dl_runtime_resolve_vx
lr %r0,%r15 # create stack frame # define _dl_runtime_profile _dl_runtime_profile_vx
ahi %r15,-96 # define RESTORE_VRS
cfi_adjust_cfa_offset (96) # include <dl-trampoline.h>
st 0,0(%r15)
lm %r2,%r3,120(%r15) # load args saved by PLT
basr %r1,0
0: l %r14,1f-0b(%r1)
bas %r14,0(%r14,%r1) # call resolver
lr %r1,%r2 # function addr returned in r2
ahi %r15,96 # remove stack frame
cfi_adjust_cfa_offset (-96)
l %r14,8(15) # restore registers
lm %r2,%r5,32(%r15)
br %r1
1: .long _dl_fixup - 0b
cfi_endproc
.size _dl_runtime_resolve, .-_dl_runtime_resolve


#ifndef PROF
.globl _dl_runtime_profile
.type _dl_runtime_profile, @function
cfi_startproc
.align 16
_dl_runtime_profile:
stm %r2,%r6,32(%r15) # save registers
std %f0,56(%r15)
std %f2,64(%r15)
st %r6,8(%r15)
st %r12,12(%r15)
st %r14,16(%r15)
cfi_offset (r6, -64)
cfi_offset (f0, -40)
cfi_offset (f2, -32)
cfi_offset (r12, -84)
cfi_offset (r14, -80)
lr %r12,%r15 # create stack frame
cfi_def_cfa_register (12)
ahi %r15,-96
st %r12,0(%r15)
lm %r2,%r3,24(%r12) # load arguments saved by PLT
lr %r4,%r14 # return address as third parameter
basr %r1,0
0: l %r14,6f-0b(%r1)
la %r5,32(%r12) # pointer to struct La_s390_32_regs
la %r6,20(%r12) # long int * framesize
bas %r14,0(%r14,%r1) # call resolver
lr %r1,%r2 # function addr returned in r2
icm %r0,15,20(%r12) # load & test framesize
jnm 2f

lm %r2,%r6,32(%r12)
ld %f0,56(%r12)
ld %f2,64(%r12)
lr %r15,%r12 # remove stack frame
cfi_def_cfa_register (15)
l %r14,16(%r15) # restore registers
l %r12,12(%r15)
br %r1 # tail-call to the resolved function

cfi_def_cfa_register (12)
2: jz 4f # framesize == 0 ?
ahi %r0,7 # align framesize to 8
lhi %r2,-8
nr %r0,%r2
slr %r15,%r0 # make room for framesize bytes
st %r12,0(%r15)
la %r2,96(%r15)
la %r3,96(%r12)
srl %r0,3
3: mvc 0(8,%r2),0(%r3) # copy additional parameters
la %r2,8(%r2)
la %r3,8(%r3)
brct %r0,3b
4: lm %r2,%r6,32(%r12) # load register parameters
ld %f0,56(%r12)
ld %f2,64(%r12)
basr %r14,%r1 # call resolved function
stm %r2,%r3,72(%r12)
std %f0,80(%r12)
lm %r2,%r3,24(%r12) # load arguments saved by PLT
basr %r1,0
5: l %r14,7f-5b(%r1)
la %r4,32(%r12) # pointer to struct La_s390_32_regs
la %r5,72(%r12) # pointer to struct La_s390_32_retval
basr %r14,%r1 # call _dl_call_pltexit

lr %r15,%r12 # remove stack frame
cfi_def_cfa_register (15)
l %r14,16(%r15) # restore registers
l %r12,12(%r15)
br %r14

6: .long _dl_profile_fixup - 0b
7: .long _dl_call_pltexit - 5b
cfi_endproc
.size _dl_runtime_profile, .-_dl_runtime_profile
#endif #endif

0 comments on commit 4603c51

Please sign in to comment.