Fu, Chao-Ying
2009-07-31 00:57:38 UTC
Hi All,
We tried to implement an optimization to transform JALR to BAL
for function calls inside a shared library to speed up the performance.
It turned out that BFD_RELOC_MIPS_JALR was designed as a hint to help the JALR
transformation. But, this relocation is enabled by N32 and N64 ABIs.
So, we made a patch to enable BFD_RELOC_MIPS_JALR for mips32, mips32r2,
mips64, and mips64r2 for all ABIs.
In order to utilize this optimization, we need to use -mno-explicit-relocs
for GCC to let the assembler emit BFD_RELOC_MIPS_JALR for shared libraries (-mshared).
The JAL to BAL transformation is just enabled by the same mechanism in this patch.
Please see the example and check if this patch may break something. Thanks a lot!
Ex 1: (Calls inside a shared library)
# cat call.c
int t2() { return 1984 + t3(); }
int t3() { return 0; }
# cc1 -quiet call.c -O2 -mabicalls -mshared -G0 -mno-explicit-relocs -o call.s -fno-inline-small-functions
# as-new call.s -o call.o -mips32r2
# objdump -dr call.o
call.o: file format elf32-tradbigmips
Disassembly of section .text:
00000000 <t3>:
0: 3c1c0000 lui gp,0x0
0: R_MIPS_HI16 _gp_disp
4: 279c0000 addiu gp,gp,0
4: R_MIPS_LO16 _gp_disp
8: 0399e021 addu gp,gp,t9
c: 03e00008 jr ra
10: 00001021 move v0,zero
00000014 <t2>:
14: 3c1c0000 lui gp,0x0
14: R_MIPS_HI16 _gp_disp
18: 279c0000 addiu gp,gp,0
18: R_MIPS_LO16 _gp_disp
1c: 0399e021 addu gp,gp,t9
20: 27bdffe0 addiu sp,sp,-32
24: afbf001c sw ra,28(sp)
28: afbc0010 sw gp,16(sp)
2c: 8f990000 lw t9,0(gp)
2c: R_MIPS_CALL16 t3
30: 0320f809 jalr t9 <------------------
30: R_MIPS_JALR t3
34: 00000000 nop
38: 8fbc0010 lw gp,16(sp)
3c: 8fbf001c lw ra,28(sp)
40: 244207c0 addiu v0,v0,1984
44: 03e00008 jr ra
48: 27bd0020 addiu sp,sp,32
# ld-new -shared call.o -o libcall.so
# objdump -dr libcall.so
libcall.so: file format elf32-tradbigmips
Disassembly of section .text:
000002d4 <t3>:
2d4: 3c1c0002 lui gp,0x2
2d8: 279c803c addiu gp,gp,-32708
2dc: 0399e021 addu gp,gp,t9
2e0: 03e00008 jr ra
2e4: 00001021 move v0,zero
000002e8 <t2>:
2e8: 3c1c0002 lui gp,0x2
2ec: 279c8028 addiu gp,gp,-32728
2f0: 0399e021 addu gp,gp,t9
2f4: 27bdffe0 addiu sp,sp,-32
2f8: afbf001c sw ra,28(sp)
2fc: afbc0010 sw gp,16(sp)
300: 8f998018 lw t9,-32744(gp)
304: 0411fff3 bal 2d4 <t3> <--------------------
308: 00000000 nop
30c: 8fbc0010 lw gp,16(sp)
310: 8fbf001c lw ra,28(sp)
314: 244207c0 addiu v0,v0,1984
318: 03e00008 jr ra
31c: 27bd0020 addiu sp,sp,32
Ex 2: (Calls not in a shared library)
# cat call.c
int t2() { return 1984 + t3(); }
int t3() { return 0; }
# cc1 -quiet call.c -O2 -mabicalls -mno-shared -G0 -o call.s -fno-inline-small-functions
# as-new call.s -o call.o -mips32r2
# objdump -dr call.o
call.o: file format elf32-tradbigmips
Disassembly of section .text:
00000000 <t3>:
0: 03e00008 jr ra
4: 00001021 move v0,zero
00000008 <t2>:
8: 27bdffe0 addiu sp,sp,-32
c: afbf001c sw ra,28(sp)
10: 0c000000 jal 0 <t3> <-----------------
10: R_MIPS_26 t3
14: 00000000 nop
18: 8fbf001c lw ra,28(sp)
1c: 244207c0 addiu v0,v0,1984
20: 03e00008 jr ra
24: 27bd0020 addiu sp,sp,32
# ld-new call.o -o call
# objdump -dr call
call: file format elf32-tradbigmips
Disassembly of section .text:
0040006c <t3>:
40006c: 03e00008 jr ra
400070: 00001021 move v0,zero
00400074 <t2>:
400074: 27bdffe0 addiu sp,sp,-32
400078: afbf001c sw ra,28(sp)
40007c: 0411fffb bal 40006c <t3> <-----------------
400080: 00000000 nop
400084: 8fbf001c lw ra,28(sp)
400088: 244207c0 addiu v0,v0,1984
40008c: 03e00008 jr ra
400090: 27bd0020 addiu sp,sp,32
Regards,
Chao-ying
gas/ChangeLog
2009-07-30 Chao-ying Fu <***@mips.com>
* config/tc-mips.c (MIPS_JALR_HINT_P): New define. True for mips32,
mip32r2, mips64, and mips64r2.
(macro_build_jalr): If MIPS_JALR_HINT_P, emit BFD_RELOC_MIPS_JALR.
bfd/ChangeLog
2009-07-30 Chao-ying Fu <***@mips.com>
* elf32-mips.c (mips_reloc_map): Add BFD_RELOC_MIPS_JALR.
* elfxx-mips.c (JAL_JALR_TO_BAL_P): New define to transform JAL/JALR
to BAL for CPUs that include RM9000, mips32, mips32r2, mips64, and mips64r2.
(mips_elf_perform_relocation): Use JAL_JALR_TO_BAL_P to guard the transformation.
Index: src/gas/config/tc-mips.c
===================================================================
--- src.orig/gas/config/tc-mips.c 2009-07-30 16:31:53.379834000 -0700
+++ src/gas/config/tc-mips.c 2009-07-30 16:54:32.814022000 -0700
@@ -290,6 +290,12 @@ static int file_ase_mips16;
|| mips_opts.isa == ISA_MIPS64 \
|| mips_opts.isa == ISA_MIPS64R2)
+/* True if we want to create BFD_RELOC_MIPS_JALR for jalr $25. */
+#define MIPS_JALR_HINT_P (mips_opts.isa == ISA_MIPS32 \
+ || mips_opts.isa == ISA_MIPS32R2 \
+ || mips_opts.isa == ISA_MIPS64 \
+ || mips_opts.isa == ISA_MIPS64R2)
+
/* True if -mips3d was passed or implied by arguments passed on the
command line (e.g., by -march). */
static int file_ase_mips3d;
@@ -3923,12 +3929,11 @@ macro_build_jalr (expressionS *ep)
char *f = NULL;
if (HAVE_NEWABI)
- {
- frag_grow (8);
- f = frag_more (0);
- }
+ frag_grow (8);
+ if (HAVE_NEWABI || MIPS_JALR_HINT_P)
+ f = frag_more (0);
macro_build (NULL, "jalr", "d,s", RA, PIC_CALL_REG);
- if (HAVE_NEWABI)
+ if (HAVE_NEWABI || MIPS_JALR_HINT_P)
fix_new_exp (frag_now, f - frag_now->fr_literal,
4, ep, FALSE, BFD_RELOC_MIPS_JALR);
}
Index: src/bfd/elf32-mips.c
===================================================================
--- src.orig/bfd/elf32-mips.c 2009-07-30 16:31:53.587623000 -0700
+++ src/bfd/elf32-mips.c 2009-07-30 16:40:12.216495000 -0700
@@ -1261,6 +1261,7 @@ static const struct elf_reloc_map mips_r
{ BFD_RELOC_MIPS_GOT_PAGE, R_MIPS_GOT_PAGE },
{ BFD_RELOC_MIPS_GOT_OFST, R_MIPS_GOT_OFST },
{ BFD_RELOC_MIPS_GOT_DISP, R_MIPS_GOT_DISP },
+ { BFD_RELOC_MIPS_JALR, R_MIPS_JALR },
{ BFD_RELOC_MIPS_TLS_DTPMOD32, R_MIPS_TLS_DTPMOD32 },
{ BFD_RELOC_MIPS_TLS_DTPREL32, R_MIPS_TLS_DTPREL32 },
{ BFD_RELOC_MIPS_TLS_DTPMOD64, R_MIPS_TLS_DTPMOD64 },
Index: src/bfd/elfxx-mips.c
===================================================================
--- src.orig/bfd/elfxx-mips.c 2009-07-30 16:31:53.713500000 -0700
+++ src/bfd/elfxx-mips.c 2009-07-30 16:40:12.261451000 -0700
@@ -668,6 +668,16 @@ static bfd *reldyn_sorting_bfd;
( ((elf_elfheader (abfd)->e_flags & EF_MIPS_ARCH) != E_MIPS_ARCH_1) \
|| ((elf_elfheader (abfd)->e_flags & EF_MIPS_MACH) == E_MIPS_MACH_3900))
+/* True if ABFD is for CPUs that are faster if jal/jalr is converted to bal.
+ This should be safe for all architectures, but for now we enable it
+ for RM9000, mips32, mips32r2, mips64, and mips64r2. */
+#define JAL_JALR_TO_BAL_P(abfd) \
+ ( ((elf_elfheader (abfd)->e_flags & EF_MIPS_MACH) == E_MIPS_MACH_9000) \
+ || ((elf_elfheader (abfd)->e_flags & EF_MIPS_ARCH) == E_MIPS_ARCH_32) \
+ || ((elf_elfheader (abfd)->e_flags & EF_MIPS_ARCH) == E_MIPS_ARCH_32R2) \
+ || ((elf_elfheader (abfd)->e_flags & EF_MIPS_ARCH) == E_MIPS_ARCH_64) \
+ || ((elf_elfheader (abfd)->e_flags & EF_MIPS_ARCH) == E_MIPS_ARCH_64R2))
+
/* True if ABFD is a PIC object. */
#define PIC_OBJECT_P(abfd) \
((elf_elfheader (abfd)->e_flags & EF_MIPS_PIC) != 0)
@@ -5590,7 +5600,7 @@ mips_elf_perform_relocation (struct bfd_
prediction hardware. If we are linking for the RM9000, and we
see jal, and bal fits, use it instead. Note that this
transformation should be safe for all architectures. */
- if (bfd_get_mach (input_bfd) == bfd_mach_mips9000
+ if (JAL_JALR_TO_BAL_P (input_bfd)
&& !info->relocatable
&& !require_jalx
&& ((r_type == R_MIPS_26 && (x >> 26) == 0x3) /* jal addr */
We tried to implement an optimization to transform JALR to BAL
for function calls inside a shared library to speed up the performance.
It turned out that BFD_RELOC_MIPS_JALR was designed as a hint to help the JALR
transformation. But, this relocation is enabled by N32 and N64 ABIs.
So, we made a patch to enable BFD_RELOC_MIPS_JALR for mips32, mips32r2,
mips64, and mips64r2 for all ABIs.
In order to utilize this optimization, we need to use -mno-explicit-relocs
for GCC to let the assembler emit BFD_RELOC_MIPS_JALR for shared libraries (-mshared).
The JAL to BAL transformation is just enabled by the same mechanism in this patch.
Please see the example and check if this patch may break something. Thanks a lot!
Ex 1: (Calls inside a shared library)
# cat call.c
int t2() { return 1984 + t3(); }
int t3() { return 0; }
# cc1 -quiet call.c -O2 -mabicalls -mshared -G0 -mno-explicit-relocs -o call.s -fno-inline-small-functions
# as-new call.s -o call.o -mips32r2
# objdump -dr call.o
call.o: file format elf32-tradbigmips
Disassembly of section .text:
00000000 <t3>:
0: 3c1c0000 lui gp,0x0
0: R_MIPS_HI16 _gp_disp
4: 279c0000 addiu gp,gp,0
4: R_MIPS_LO16 _gp_disp
8: 0399e021 addu gp,gp,t9
c: 03e00008 jr ra
10: 00001021 move v0,zero
00000014 <t2>:
14: 3c1c0000 lui gp,0x0
14: R_MIPS_HI16 _gp_disp
18: 279c0000 addiu gp,gp,0
18: R_MIPS_LO16 _gp_disp
1c: 0399e021 addu gp,gp,t9
20: 27bdffe0 addiu sp,sp,-32
24: afbf001c sw ra,28(sp)
28: afbc0010 sw gp,16(sp)
2c: 8f990000 lw t9,0(gp)
2c: R_MIPS_CALL16 t3
30: 0320f809 jalr t9 <------------------
30: R_MIPS_JALR t3
34: 00000000 nop
38: 8fbc0010 lw gp,16(sp)
3c: 8fbf001c lw ra,28(sp)
40: 244207c0 addiu v0,v0,1984
44: 03e00008 jr ra
48: 27bd0020 addiu sp,sp,32
# ld-new -shared call.o -o libcall.so
# objdump -dr libcall.so
libcall.so: file format elf32-tradbigmips
Disassembly of section .text:
000002d4 <t3>:
2d4: 3c1c0002 lui gp,0x2
2d8: 279c803c addiu gp,gp,-32708
2dc: 0399e021 addu gp,gp,t9
2e0: 03e00008 jr ra
2e4: 00001021 move v0,zero
000002e8 <t2>:
2e8: 3c1c0002 lui gp,0x2
2ec: 279c8028 addiu gp,gp,-32728
2f0: 0399e021 addu gp,gp,t9
2f4: 27bdffe0 addiu sp,sp,-32
2f8: afbf001c sw ra,28(sp)
2fc: afbc0010 sw gp,16(sp)
300: 8f998018 lw t9,-32744(gp)
304: 0411fff3 bal 2d4 <t3> <--------------------
308: 00000000 nop
30c: 8fbc0010 lw gp,16(sp)
310: 8fbf001c lw ra,28(sp)
314: 244207c0 addiu v0,v0,1984
318: 03e00008 jr ra
31c: 27bd0020 addiu sp,sp,32
Ex 2: (Calls not in a shared library)
# cat call.c
int t2() { return 1984 + t3(); }
int t3() { return 0; }
# cc1 -quiet call.c -O2 -mabicalls -mno-shared -G0 -o call.s -fno-inline-small-functions
# as-new call.s -o call.o -mips32r2
# objdump -dr call.o
call.o: file format elf32-tradbigmips
Disassembly of section .text:
00000000 <t3>:
0: 03e00008 jr ra
4: 00001021 move v0,zero
00000008 <t2>:
8: 27bdffe0 addiu sp,sp,-32
c: afbf001c sw ra,28(sp)
10: 0c000000 jal 0 <t3> <-----------------
10: R_MIPS_26 t3
14: 00000000 nop
18: 8fbf001c lw ra,28(sp)
1c: 244207c0 addiu v0,v0,1984
20: 03e00008 jr ra
24: 27bd0020 addiu sp,sp,32
# ld-new call.o -o call
# objdump -dr call
call: file format elf32-tradbigmips
Disassembly of section .text:
0040006c <t3>:
40006c: 03e00008 jr ra
400070: 00001021 move v0,zero
00400074 <t2>:
400074: 27bdffe0 addiu sp,sp,-32
400078: afbf001c sw ra,28(sp)
40007c: 0411fffb bal 40006c <t3> <-----------------
400080: 00000000 nop
400084: 8fbf001c lw ra,28(sp)
400088: 244207c0 addiu v0,v0,1984
40008c: 03e00008 jr ra
400090: 27bd0020 addiu sp,sp,32
Regards,
Chao-ying
gas/ChangeLog
2009-07-30 Chao-ying Fu <***@mips.com>
* config/tc-mips.c (MIPS_JALR_HINT_P): New define. True for mips32,
mip32r2, mips64, and mips64r2.
(macro_build_jalr): If MIPS_JALR_HINT_P, emit BFD_RELOC_MIPS_JALR.
bfd/ChangeLog
2009-07-30 Chao-ying Fu <***@mips.com>
* elf32-mips.c (mips_reloc_map): Add BFD_RELOC_MIPS_JALR.
* elfxx-mips.c (JAL_JALR_TO_BAL_P): New define to transform JAL/JALR
to BAL for CPUs that include RM9000, mips32, mips32r2, mips64, and mips64r2.
(mips_elf_perform_relocation): Use JAL_JALR_TO_BAL_P to guard the transformation.
Index: src/gas/config/tc-mips.c
===================================================================
--- src.orig/gas/config/tc-mips.c 2009-07-30 16:31:53.379834000 -0700
+++ src/gas/config/tc-mips.c 2009-07-30 16:54:32.814022000 -0700
@@ -290,6 +290,12 @@ static int file_ase_mips16;
|| mips_opts.isa == ISA_MIPS64 \
|| mips_opts.isa == ISA_MIPS64R2)
+/* True if we want to create BFD_RELOC_MIPS_JALR for jalr $25. */
+#define MIPS_JALR_HINT_P (mips_opts.isa == ISA_MIPS32 \
+ || mips_opts.isa == ISA_MIPS32R2 \
+ || mips_opts.isa == ISA_MIPS64 \
+ || mips_opts.isa == ISA_MIPS64R2)
+
/* True if -mips3d was passed or implied by arguments passed on the
command line (e.g., by -march). */
static int file_ase_mips3d;
@@ -3923,12 +3929,11 @@ macro_build_jalr (expressionS *ep)
char *f = NULL;
if (HAVE_NEWABI)
- {
- frag_grow (8);
- f = frag_more (0);
- }
+ frag_grow (8);
+ if (HAVE_NEWABI || MIPS_JALR_HINT_P)
+ f = frag_more (0);
macro_build (NULL, "jalr", "d,s", RA, PIC_CALL_REG);
- if (HAVE_NEWABI)
+ if (HAVE_NEWABI || MIPS_JALR_HINT_P)
fix_new_exp (frag_now, f - frag_now->fr_literal,
4, ep, FALSE, BFD_RELOC_MIPS_JALR);
}
Index: src/bfd/elf32-mips.c
===================================================================
--- src.orig/bfd/elf32-mips.c 2009-07-30 16:31:53.587623000 -0700
+++ src/bfd/elf32-mips.c 2009-07-30 16:40:12.216495000 -0700
@@ -1261,6 +1261,7 @@ static const struct elf_reloc_map mips_r
{ BFD_RELOC_MIPS_GOT_PAGE, R_MIPS_GOT_PAGE },
{ BFD_RELOC_MIPS_GOT_OFST, R_MIPS_GOT_OFST },
{ BFD_RELOC_MIPS_GOT_DISP, R_MIPS_GOT_DISP },
+ { BFD_RELOC_MIPS_JALR, R_MIPS_JALR },
{ BFD_RELOC_MIPS_TLS_DTPMOD32, R_MIPS_TLS_DTPMOD32 },
{ BFD_RELOC_MIPS_TLS_DTPREL32, R_MIPS_TLS_DTPREL32 },
{ BFD_RELOC_MIPS_TLS_DTPMOD64, R_MIPS_TLS_DTPMOD64 },
Index: src/bfd/elfxx-mips.c
===================================================================
--- src.orig/bfd/elfxx-mips.c 2009-07-30 16:31:53.713500000 -0700
+++ src/bfd/elfxx-mips.c 2009-07-30 16:40:12.261451000 -0700
@@ -668,6 +668,16 @@ static bfd *reldyn_sorting_bfd;
( ((elf_elfheader (abfd)->e_flags & EF_MIPS_ARCH) != E_MIPS_ARCH_1) \
|| ((elf_elfheader (abfd)->e_flags & EF_MIPS_MACH) == E_MIPS_MACH_3900))
+/* True if ABFD is for CPUs that are faster if jal/jalr is converted to bal.
+ This should be safe for all architectures, but for now we enable it
+ for RM9000, mips32, mips32r2, mips64, and mips64r2. */
+#define JAL_JALR_TO_BAL_P(abfd) \
+ ( ((elf_elfheader (abfd)->e_flags & EF_MIPS_MACH) == E_MIPS_MACH_9000) \
+ || ((elf_elfheader (abfd)->e_flags & EF_MIPS_ARCH) == E_MIPS_ARCH_32) \
+ || ((elf_elfheader (abfd)->e_flags & EF_MIPS_ARCH) == E_MIPS_ARCH_32R2) \
+ || ((elf_elfheader (abfd)->e_flags & EF_MIPS_ARCH) == E_MIPS_ARCH_64) \
+ || ((elf_elfheader (abfd)->e_flags & EF_MIPS_ARCH) == E_MIPS_ARCH_64R2))
+
/* True if ABFD is a PIC object. */
#define PIC_OBJECT_P(abfd) \
((elf_elfheader (abfd)->e_flags & EF_MIPS_PIC) != 0)
@@ -5590,7 +5600,7 @@ mips_elf_perform_relocation (struct bfd_
prediction hardware. If we are linking for the RM9000, and we
see jal, and bal fits, use it instead. Note that this
transformation should be safe for all architectures. */
- if (bfd_get_mach (input_bfd) == bfd_mach_mips9000
+ if (JAL_JALR_TO_BAL_P (input_bfd)
&& !info->relocatable
&& !require_jalx
&& ((r_type == R_MIPS_26 && (x >> 26) == 0x3) /* jal addr */