Discussion:
[PATCH] opcodes/riscv: Hide '.L0 ' fake symbols
(too old to reply)
Andrew Burgess
2018-12-03 15:23:00 UTC
Permalink
The RISC-V assembler generates fake labels with the name '.L0 ' as
part of the debug information (see
gas/config/tc-riscv.h:FAKE_LABEL_NAME).

The problem is that currently, when disassembling an object file, the
output looks like this (this is an example from the GDB testsuite, but
is pretty representative of anything with debug information):

000000000000001e <main>:
1e: 7179 addi sp,sp,-48
20: f406 sd ra,40(sp)
22: f022 sd s0,32(sp)
24: 1800 addi s0,sp,48

0000000000000026 <.L0 >:
26: 87aa mv a5,a0
28: feb43023 sd a1,-32(s0)
2c: fcc43c23 sd a2,-40(s0)
30: fef42623 sw a5,-20(s0)

0000000000000034 <.L0 >:
34: fec42783 lw a5,-20(s0)
38: 0007871b sext.w a4,a5
3c: 678d lui a5,0x3
3e: 03978793 addi a5,a5,57 # 3039 <.LASF30+0x2a9d>
42: 02f71463 bne a4,a5,6a <.L0 >

0000000000000046 <.L0 >:
46: 000007b7 lui a5,0x0
4a: 0007b783 ld a5,0(a5) # 0 <need_malloc>
4e: 6f9c ld a5,24(a5)

0000000000000050 <.L0 >:
50: 86be mv a3,a5
52: 466d li a2,27
54: 4585 li a1,1
56: 000007b7 lui a5,0x0
5a: 00078513 mv a0,a5
5e: 00000097 auipc ra,0x0
62: 000080e7 jalr ra # 5e <.L0 +0xe>

0000000000000066 <.L0 >:
66: 4785 li a5,1
68: a869 j 102 <.L0 >

000000000000006a <.L0 >:
6a: 000007b7 lui a5,0x0
6e: 00078513 mv a0,a5
72: 00000097 auipc ra,0x0
76: 000080e7 jalr ra # 72 <.L0 +0x8>

The frequent repeated '.L0 ' labels are pointless, as they are
non-unique there's no way to match a use of '.L0 ' to its appearence
in the output, so we'd be better off just not printing it at all.
That's what this patch does by defining a 'symbol_is_valid' method for
RISC-V. With this commit, the same disassembly now looks like this:

000000000000001e <main>:
1e: 7179 addi sp,sp,-48
20: f406 sd ra,40(sp)
22: f022 sd s0,32(sp)
24: 1800 addi s0,sp,48
26: 87aa mv a5,a0
28: feb43023 sd a1,-32(s0)
2c: fcc43c23 sd a2,-40(s0)
30: fef42623 sw a5,-20(s0)
34: fec42783 lw a5,-20(s0)
38: 0007871b sext.w a4,a5
3c: 678d lui a5,0x3
3e: 03978793 addi a5,a5,57 # 3039 <.LASF30+0x2a9d>
42: 02f71463 bne a4,a5,6a <.L4>
46: 000007b7 lui a5,0x0
4a: 0007b783 ld a5,0(a5) # 0 <need_malloc>
4e: 6f9c ld a5,24(a5)
50: 86be mv a3,a5
52: 466d li a2,27
54: 4585 li a1,1
56: 000007b7 lui a5,0x0
5a: 00078513 mv a0,a5
5e: 00000097 auipc ra,0x0
62: 000080e7 jalr ra # 5e <main+0x40>
66: 4785 li a5,1
68: a869 j 102 <.L5>

000000000000006a <.L4>:
6a: 000007b7 lui a5,0x0
6e: 00078513 mv a0,a5
72: 00000097 auipc ra,0x0
76: 000080e7 jalr ra # 72 <.L4+0x8>

include/ChangeLog:

* dis-asm.h (riscv_symbol_is_valid): Declare.

opcodes/ChangeLog:

* disassembler.c (disassemble_init_for_target): Add RISC-V
initialisation.
* riscv-dis.c (riscv_symbol_is_valid): New function.
---
include/ChangeLog | 4 ++++
include/dis-asm.h | 1 +
opcodes/ChangeLog | 6 ++++++
opcodes/disassemble.c | 5 +++++
opcodes/riscv-dis.c | 18 ++++++++++++++++++
5 files changed, 34 insertions(+)

diff --git a/include/dis-asm.h b/include/dis-asm.h
index 84627950c02..6a55f3c1e9a 100644
--- a/include/dis-asm.h
+++ b/include/dis-asm.h
@@ -302,6 +302,7 @@ extern void print_wasm32_disassembler_options (FILE *);
extern bfd_boolean aarch64_symbol_is_valid (asymbol *, struct disassemble_info *);
extern bfd_boolean arm_symbol_is_valid (asymbol *, struct disassemble_info *);
extern bfd_boolean csky_symbol_is_valid (asymbol *, struct disassemble_info *);
+extern bfd_boolean riscv_symbol_is_valid (asymbol *, struct disassemble_info *);
extern void disassemble_init_powerpc (struct disassemble_info *);
extern void disassemble_init_s390 (struct disassemble_info *);
extern void disassemble_init_wasm32 (struct disassemble_info *);
diff --git a/opcodes/disassemble.c b/opcodes/disassemble.c
index 750d76ad865..36617c4977d 100644
--- a/opcodes/disassemble.c
+++ b/opcodes/disassemble.c
@@ -649,6 +649,11 @@ disassemble_init_for_target (struct disassemble_info * info)
#ifdef ARCH_powerpc
case bfd_arch_powerpc:
#endif
+#ifdef ARCH_riscv
+ case bfd_arch_riscv:
+ info->symbol_is_valid = riscv_symbol_is_valid;
+ break;
+#endif
#ifdef ARCH_rs6000
case bfd_arch_rs6000:
#endif
diff --git a/opcodes/riscv-dis.c b/opcodes/riscv-dis.c
index 890f1f8e697..0ae9bbba64c 100644
--- a/opcodes/riscv-dis.c
+++ b/opcodes/riscv-dis.c
@@ -518,6 +518,24 @@ print_insn_riscv (bfd_vma memaddr, struct disassemble_info *info)
return riscv_disassemble_insn (memaddr, insn, info);
}

+/* Prevent use of the fake labels that are generated as part of the DWARF
+ debug in the assembler. The fake labels are edefined in
+ gas/config/tc-riscv.h as FAKE_LABEL_NAME. */
+
+bfd_boolean
+riscv_symbol_is_valid (asymbol * sym,
+ struct disassemble_info * info ATTRIBUTE_UNUSED)
+{
+ const char * name;
+
+ if (sym == NULL)
+ return FALSE;
+
+ name = bfd_asymbol_name (sym);
+
+ return (strcmp (name, ".L0 ") != 0);
+}
+
void
print_riscv_disassembler_options (FILE *stream)
{
--
2.14.5
Andreas Schwab
2018-12-03 16:24:40 UTC
Permalink
Post by Andrew Burgess
diff --git a/opcodes/disassemble.c b/opcodes/disassemble.c
index 750d76ad865..36617c4977d 100644
--- a/opcodes/disassemble.c
+++ b/opcodes/disassemble.c
@@ -649,6 +649,11 @@ disassemble_init_for_target (struct disassemble_info * info)
#ifdef ARCH_powerpc
#endif
+#ifdef ARCH_riscv
+ info->symbol_is_valid = riscv_symbol_is_valid;
+ break;
+#endif
#ifdef ARCH_rs6000
#endif
You are adding code and a break in the middle of two unreleated cases.
That will break the powerpc case. Moreover, there is already another
use of case bfd_arch_riscv, so this cannot even compile.

Andreas.
--
Andreas Schwab, ***@linux-m68k.org
GPG Key fingerprint = 7578 EB47 D4E5 4D69 2510 2552 DF73 E780 A9DA AEC1
"And now for something completely different."
Andrew Burgess
2018-12-03 16:47:26 UTC
Permalink
Post by Andreas Schwab
Post by Andrew Burgess
diff --git a/opcodes/disassemble.c b/opcodes/disassemble.c
index 750d76ad865..36617c4977d 100644
--- a/opcodes/disassemble.c
+++ b/opcodes/disassemble.c
@@ -649,6 +649,11 @@ disassemble_init_for_target (struct disassemble_info * info)
#ifdef ARCH_powerpc
#endif
+#ifdef ARCH_riscv
+ info->symbol_is_valid = riscv_symbol_is_valid;
+ break;
+#endif
#ifdef ARCH_rs6000
#endif
Thanks for your feedback.
Post by Andreas Schwab
You are adding code and a break in the middle of two unreleated cases.
That will break the powerpc case.
Gah! My bad. Apologies for that. I've fixed this below.
Post by Andreas Schwab
Moreover, there is already another
use of case bfd_arch_riscv, so this cannot even compile.
Umm, not in my version of master, last commit (before mine)
b570a287cfb. I guess we're both making mistakes today :)

I've included the version of disassmble_init_for_target as I see it
(before my patch) below, so you can confirm this matches what you
see (maybe you do have something different somehow). I see no other
mention of riscv. My patch definitely builds (that's how I generated
the output in the commit message).

Thanks again for your feedback.

Andrew

---

void
disassemble_init_for_target (struct disassemble_info * info)
{
if (info == NULL)
return;

switch (info->arch)
{
#ifdef ARCH_aarch64
case bfd_arch_aarch64:
info->symbol_is_valid = aarch64_symbol_is_valid;
info->disassembler_needs_relocs = TRUE;
break;
#endif
#ifdef ARCH_arm
case bfd_arch_arm:
info->symbol_is_valid = arm_symbol_is_valid;
info->disassembler_needs_relocs = TRUE;
break;
#endif
#ifdef ARCH_csky
case bfd_arch_csky:
info->symbol_is_valid = csky_symbol_is_valid;
info->disassembler_needs_relocs = TRUE;
break;
#endif

#ifdef ARCH_ia64
case bfd_arch_ia64:
info->skip_zeroes = 16;
break;
#endif
#ifdef ARCH_tic4x
case bfd_arch_tic4x:
info->skip_zeroes = 32;
break;
#endif
#ifdef ARCH_mep
case bfd_arch_mep:
info->skip_zeroes = 256;
info->skip_zeroes_at_end = 0;
break;
#endif
#ifdef ARCH_metag
case bfd_arch_metag:
info->disassembler_needs_relocs = TRUE;
break;
#endif
#ifdef ARCH_m32c
case bfd_arch_m32c:
/* This processor in fact is little endian. The value set here
reflects the way opcodes are written in the cgen description. */
info->endian = BFD_ENDIAN_BIG;
if (! info->insn_sets)
{
info->insn_sets = cgen_bitset_create (ISA_MAX);
if (info->mach == bfd_mach_m16c)
cgen_bitset_set (info->insn_sets, ISA_M16C);
else
cgen_bitset_set (info->insn_sets, ISA_M32C);
}
break;
#endif
#ifdef ARCH_pru
case bfd_arch_pru:
info->disassembler_needs_relocs = TRUE;
break;
#endif
#ifdef ARCH_powerpc
case bfd_arch_powerpc:
#endif
#ifdef ARCH_rs6000
case bfd_arch_rs6000:
#endif
#if defined (ARCH_powerpc) || defined (ARCH_rs6000)
disassemble_init_powerpc (info);
break;
#endif
#ifdef ARCH_wasm32
case bfd_arch_wasm32:
disassemble_init_wasm32 (info);
break;
#endif
#ifdef ARCH_s390
case bfd_arch_s390:
disassemble_init_s390 (info);
break;
#endif
#ifdef ARCH_nds32
case bfd_arch_nds32:
disassemble_init_nds32 (info);
break;
#endif
default:
break;
}
}

---

opcodes/riscv: Hide '.L0 ' fake symbols

The RISC-V assembler generates fake labels with the name '.L0 ' as
part of the debug information (see
gas/config/tc-riscv.h:FAKE_LABEL_NAME).

The problem is that currently, when disassembling an object file, the
output looks like this (this is an example from the GDB testsuite, but
is pretty representative of anything with debug information):

000000000000001e <main>:
1e: 7179 addi sp,sp,-48
20: f406 sd ra,40(sp)
22: f022 sd s0,32(sp)
24: 1800 addi s0,sp,48

0000000000000026 <.L0 >:
26: 87aa mv a5,a0
28: feb43023 sd a1,-32(s0)
2c: fcc43c23 sd a2,-40(s0)
30: fef42623 sw a5,-20(s0)

0000000000000034 <.L0 >:
34: fec42783 lw a5,-20(s0)
38: 0007871b sext.w a4,a5
3c: 678d lui a5,0x3
3e: 03978793 addi a5,a5,57 # 3039 <.LASF30+0x2a9d>
42: 02f71463 bne a4,a5,6a <.L0 >

0000000000000046 <.L0 >:
46: 000007b7 lui a5,0x0
4a: 0007b783 ld a5,0(a5) # 0 <need_malloc>
4e: 6f9c ld a5,24(a5)

0000000000000050 <.L0 >:
50: 86be mv a3,a5
52: 466d li a2,27
54: 4585 li a1,1
56: 000007b7 lui a5,0x0
5a: 00078513 mv a0,a5
5e: 00000097 auipc ra,0x0
62: 000080e7 jalr ra # 5e <.L0 +0xe>

0000000000000066 <.L0 >:
66: 4785 li a5,1
68: a869 j 102 <.L0 >

000000000000006a <.L0 >:
6a: 000007b7 lui a5,0x0
6e: 00078513 mv a0,a5
72: 00000097 auipc ra,0x0
76: 000080e7 jalr ra # 72 <.L0 +0x8>

The frequent repeated '.L0 ' labels are pointless, as they are
non-unique there's no way to match a use of '.L0 ' to its appearence
in the output, so we'd be better off just not printing it at all.
That's what this patch does by defining a 'symbol_is_valid' method for
RISC-V. With this commit, the same disassembly now looks like this:

000000000000001e <main>:
1e: 7179 addi sp,sp,-48
20: f406 sd ra,40(sp)
22: f022 sd s0,32(sp)
24: 1800 addi s0,sp,48
26: 87aa mv a5,a0
28: feb43023 sd a1,-32(s0)
2c: fcc43c23 sd a2,-40(s0)
30: fef42623 sw a5,-20(s0)
34: fec42783 lw a5,-20(s0)
38: 0007871b sext.w a4,a5
3c: 678d lui a5,0x3
3e: 03978793 addi a5,a5,57 # 3039 <.LASF30+0x2a9d>
42: 02f71463 bne a4,a5,6a <.L4>
46: 000007b7 lui a5,0x0
4a: 0007b783 ld a5,0(a5) # 0 <need_malloc>
4e: 6f9c ld a5,24(a5)
50: 86be mv a3,a5
52: 466d li a2,27
54: 4585 li a1,1
56: 000007b7 lui a5,0x0
5a: 00078513 mv a0,a5
5e: 00000097 auipc ra,0x0
62: 000080e7 jalr ra # 5e <main+0x40>
66: 4785 li a5,1
68: a869 j 102 <.L5>

000000000000006a <.L4>:
6a: 000007b7 lui a5,0x0
6e: 00078513 mv a0,a5
72: 00000097 auipc ra,0x0
76: 000080e7 jalr ra # 72 <.L4+0x8>

include/ChangeLog:

* dis-asm.h (riscv_symbol_is_valid): Declare.

opcodes/ChangeLog:

* disassembler.c (disassemble_init_for_target): Add RISC-V
initialisation.
* riscv-dis.c (riscv_symbol_is_valid): New function.
---
include/ChangeLog | 4 ++++
include/dis-asm.h | 1 +
opcodes/ChangeLog | 6 ++++++
opcodes/disassemble.c | 5 +++++
opcodes/riscv-dis.c | 18 ++++++++++++++++++
5 files changed, 34 insertions(+)

diff --git a/include/dis-asm.h b/include/dis-asm.h
index 84627950c02..6a55f3c1e9a 100644
--- a/include/dis-asm.h
+++ b/include/dis-asm.h
@@ -302,6 +302,7 @@ extern void print_wasm32_disassembler_options (FILE *);
extern bfd_boolean aarch64_symbol_is_valid (asymbol *, struct disassemble_info *);
extern bfd_boolean arm_symbol_is_valid (asymbol *, struct disassemble_info *);
extern bfd_boolean csky_symbol_is_valid (asymbol *, struct disassemble_info *);
+extern bfd_boolean riscv_symbol_is_valid (asymbol *, struct disassemble_info *);
extern void disassemble_init_powerpc (struct disassemble_info *);
extern void disassemble_init_s390 (struct disassemble_info *);
extern void disassemble_init_wasm32 (struct disassemble_info *);
diff --git a/opcodes/disassemble.c b/opcodes/disassemble.c
index 750d76ad865..7370bd13593 100644
--- a/opcodes/disassemble.c
+++ b/opcodes/disassemble.c
@@ -656,6 +656,11 @@ disassemble_init_for_target (struct disassemble_info * info)
disassemble_init_powerpc (info);
break;
#endif
+#ifdef ARCH_riscv
+ case bfd_arch_riscv:
+ info->symbol_is_valid = riscv_symbol_is_valid;
+ break;
+#endif
#ifdef ARCH_wasm32
case bfd_arch_wasm32:
disassemble_init_wasm32 (info);
diff --git a/opcodes/riscv-dis.c b/opcodes/riscv-dis.c
index 890f1f8e697..0ae9bbba64c 100644
--- a/opcodes/riscv-dis.c
+++ b/opcodes/riscv-dis.c
@@ -518,6 +518,24 @@ print_insn_riscv (bfd_vma memaddr, struct disassemble_info *info)
return riscv_disassemble_insn (memaddr, insn, info);
}

+/* Prevent use of the fake labels that are generated as part of the DWARF
+ debug in the assembler. The fake labels are edefined in
+ gas/config/tc-riscv.h as FAKE_LABEL_NAME. */
+
+bfd_boolean
+riscv_symbol_is_valid (asymbol * sym,
+ struct disassemble_info * info ATTRIBUTE_UNUSED)
+{
+ const char * name;
+
+ if (sym == NULL)
+ return FALSE;
+
+ name = bfd_asymbol_name (sym);
+
+ return (strcmp (name, ".L0 ") != 0);
+}
+
void
print_riscv_disassembler_options (FILE *stream)
{
--
2.14.5
Post by Andreas Schwab
Andreas.
--
GPG Key fingerprint = 7578 EB47 D4E5 4D69 2510 2552 DF73 E780 A9DA AEC1
"And now for something completely different."
Andreas Schwab
2018-12-03 17:10:43 UTC
Permalink
Post by Andrew Burgess
Umm, not in my version of master, last commit (before mine)
b570a287cfb. I guess we're both making mistakes today :)
$ git grep -C 10 bfd_arch_riscv b570a287cfb -- opcodes/disassemble.c
b570a287cfb:opcodes/disassemble.c- else
b570a287cfb:opcodes/disassemble.c- disassemble = print_insn_little_powerpc;
b570a287cfb:opcodes/disassemble.c- break;
b570a287cfb:opcodes/disassemble.c-#endif
b570a287cfb:opcodes/disassemble.c-#ifdef ARCH_pru
b570a287cfb:opcodes/disassemble.c- case bfd_arch_pru:
b570a287cfb:opcodes/disassemble.c- disassemble = print_insn_pru;
b570a287cfb:opcodes/disassemble.c- break;
b570a287cfb:opcodes/disassemble.c-#endif
b570a287cfb:opcodes/disassemble.c-#ifdef ARCH_riscv
b570a287cfb:opcodes/disassemble.c: case bfd_arch_riscv:
b570a287cfb:opcodes/disassemble.c- disassemble = print_insn_riscv;
b570a287cfb:opcodes/disassemble.c- break;
b570a287cfb:opcodes/disassemble.c-#endif
b570a287cfb:opcodes/disassemble.c-#ifdef ARCH_rl78
b570a287cfb:opcodes/disassemble.c- case bfd_arch_rl78:
b570a287cfb:opcodes/disassemble.c- disassemble = rl78_get_disassembler (abfd);
b570a287cfb:opcodes/disassemble.c- break;
b570a287cfb:opcodes/disassemble.c-#endif
b570a287cfb:opcodes/disassemble.c-#ifdef ARCH_rx
b570a287cfb:opcodes/disassemble.c- case bfd_arch_rx:

Andreas.
--
Andreas Schwab, ***@linux-m68k.org
GPG Key fingerprint = 7578 EB47 D4E5 4D69 2510 2552 DF73 E780 A9DA AEC1
"And now for something completely different."
Andreas Schwab
2018-12-03 17:14:32 UTC
Permalink
Post by Andreas Schwab
Post by Andrew Burgess
Umm, not in my version of master, last commit (before mine)
b570a287cfb. I guess we're both making mistakes today :)
$ git grep -C 10 bfd_arch_riscv b570a287cfb -- opcodes/disassemble.c
b570a287cfb:opcodes/disassemble.c- else
b570a287cfb:opcodes/disassemble.c- disassemble = print_insn_little_powerpc;
b570a287cfb:opcodes/disassemble.c- break;
b570a287cfb:opcodes/disassemble.c-#endif
b570a287cfb:opcodes/disassemble.c-#ifdef ARCH_pru
b570a287cfb:opcodes/disassemble.c- disassemble = print_insn_pru;
b570a287cfb:opcodes/disassemble.c- break;
b570a287cfb:opcodes/disassemble.c-#endif
b570a287cfb:opcodes/disassemble.c-#ifdef ARCH_riscv
b570a287cfb:opcodes/disassemble.c- disassemble = print_insn_riscv;
b570a287cfb:opcodes/disassemble.c- break;
b570a287cfb:opcodes/disassemble.c-#endif
b570a287cfb:opcodes/disassemble.c-#ifdef ARCH_rl78
b570a287cfb:opcodes/disassemble.c- disassemble = rl78_get_disassembler (abfd);
b570a287cfb:opcodes/disassemble.c- break;
b570a287cfb:opcodes/disassemble.c-#endif
b570a287cfb:opcodes/disassemble.c-#ifdef ARCH_rx
Sorry, wrong function. :-/

Andreas.
--
Andreas Schwab, ***@linux-m68k.org
GPG Key fingerprint = 7578 EB47 D4E5 4D69 2510 2552 DF73 E780 A9DA AEC1
"And now for something completely different."
Jim Wilson
2018-12-03 20:24:03 UTC
Permalink
On Mon, Dec 3, 2018 at 7:23 AM Andrew Burgess
Post by Andrew Burgess
The RISC-V assembler generates fake labels with the name '.L0 ' as
part of the debug information (see
gas/config/tc-riscv.h:FAKE_LABEL_NAME).
They are also used for relaxable relocations, such as a medany auipc
instruction.
Post by Andrew Burgess
* dis-asm.h (riscv_symbol_is_valid): Declare.
* disassembler.c (disassemble_init_for_target): Add RISC-V
initialisation.
* riscv-dis.c (riscv_symbol_is_valid): New function.
This looks mostly OK to me, with the fix for the problem that Andreas
pointed out except that ...
Post by Andrew Burgess
+/* Prevent use of the fake labels that are generated as part of the DWARF
+ debug in the assembler. The fake labels are edefined in
+ gas/config/tc-riscv.h as FAKE_LABEL_NAME. */
...
+ return (strcmp (name, ".L0 ") != 0);
You have a comment here pointing at GAS, but there is no comment in
GAS pointing here. The value for FAKE_LABEL_NAME has already changed
at least once in gas, so we should have a better connection here to
make sure that a future gas change won't accidentally break this
disassembler support. Putting a comment in gas pointing here would be
OK. Another possible solution is to move the definition of
FAKE_LABEL_NAME into include/opcodes/riscv.h, and then you can use it
directly here. gas/config/tc-riscv.h already includes this header
file so I think this should work OK.

You also have a typo in the comment, edefined -> defined.

Jim
Andrew Burgess
2018-12-05 19:46:23 UTC
Permalink
Post by Jim Wilson
On Mon, Dec 3, 2018 at 7:23 AM Andrew Burgess
Post by Andrew Burgess
The RISC-V assembler generates fake labels with the name '.L0 ' as
part of the debug information (see
gas/config/tc-riscv.h:FAKE_LABEL_NAME).
They are also used for relaxable relocations, such as a medany auipc
instruction.
Post by Andrew Burgess
* dis-asm.h (riscv_symbol_is_valid): Declare.
* disassembler.c (disassemble_init_for_target): Add RISC-V
initialisation.
* riscv-dis.c (riscv_symbol_is_valid): New function.
This looks mostly OK to me, with the fix for the problem that Andreas
pointed out except that ...
Post by Andrew Burgess
+/* Prevent use of the fake labels that are generated as part of the DWARF
+ debug in the assembler. The fake labels are edefined in
+ gas/config/tc-riscv.h as FAKE_LABEL_NAME. */
...
+ return (strcmp (name, ".L0 ") != 0);
You have a comment here pointing at GAS, but there is no comment in
GAS pointing here. The value for FAKE_LABEL_NAME has already changed
at least once in gas, so we should have a better connection here to
make sure that a future gas change won't accidentally break this
disassembler support. Putting a comment in gas pointing here would be
OK. Another possible solution is to move the definition of
FAKE_LABEL_NAME into include/opcodes/riscv.h, and then you can use it
directly here. gas/config/tc-riscv.h already includes this header
file so I think this should work OK.
You also have a typo in the comment, edefined -> defined.
This revision addresses the feedback from yourself and Andreas.

OK to apply?

Thanks,
Andrew

---

opcodes/riscv: Hide '.L0 ' fake symbols

The RISC-V assembler generates fake labels with the name '.L0 ' as
part of the debug information (see
gas/config/tc-riscv.h:FAKE_LABEL_NAME).

The problem is that currently, when disassembling an object file, the
output looks like this (this is an example from the GDB testsuite, but
is pretty representative of anything with debug information):

000000000000001e <main>:
1e: 7179 addi sp,sp,-48
20: f406 sd ra,40(sp)
22: f022 sd s0,32(sp)
24: 1800 addi s0,sp,48

0000000000000026 <.L0 >:
26: 87aa mv a5,a0
28: feb43023 sd a1,-32(s0)
2c: fcc43c23 sd a2,-40(s0)
30: fef42623 sw a5,-20(s0)

0000000000000034 <.L0 >:
34: fec42783 lw a5,-20(s0)
38: 0007871b sext.w a4,a5
3c: 678d lui a5,0x3
3e: 03978793 addi a5,a5,57 # 3039 <.LASF30+0x2a9d>
42: 02f71463 bne a4,a5,6a <.L0 >

0000000000000046 <.L0 >:
46: 000007b7 lui a5,0x0
4a: 0007b783 ld a5,0(a5) # 0 <need_malloc>
4e: 6f9c ld a5,24(a5)

0000000000000050 <.L0 >:
50: 86be mv a3,a5
52: 466d li a2,27
54: 4585 li a1,1
56: 000007b7 lui a5,0x0
5a: 00078513 mv a0,a5
5e: 00000097 auipc ra,0x0
62: 000080e7 jalr ra # 5e <.L0 +0xe>

0000000000000066 <.L0 >:
66: 4785 li a5,1
68: a869 j 102 <.L0 >

000000000000006a <.L0 >:
6a: 000007b7 lui a5,0x0
6e: 00078513 mv a0,a5
72: 00000097 auipc ra,0x0
76: 000080e7 jalr ra # 72 <.L0 +0x8>

The frequent repeated '.L0 ' labels are pointless, as they are
non-unique there's no way to match a use of '.L0 ' to its appearence
in the output, so we'd be better off just not printing it at all.
That's what this patch does by defining a 'symbol_is_valid' method for
RISC-V. With this commit, the same disassembly now looks like this:

000000000000001e <main>:
1e: 7179 addi sp,sp,-48
20: f406 sd ra,40(sp)
22: f022 sd s0,32(sp)
24: 1800 addi s0,sp,48
26: 87aa mv a5,a0
28: feb43023 sd a1,-32(s0)
2c: fcc43c23 sd a2,-40(s0)
30: fef42623 sw a5,-20(s0)
34: fec42783 lw a5,-20(s0)
38: 0007871b sext.w a4,a5
3c: 678d lui a5,0x3
3e: 03978793 addi a5,a5,57 # 3039 <.LASF30+0x2a9d>
42: 02f71463 bne a4,a5,6a <.L4>
46: 000007b7 lui a5,0x0
4a: 0007b783 ld a5,0(a5) # 0 <need_malloc>
4e: 6f9c ld a5,24(a5)
50: 86be mv a3,a5
52: 466d li a2,27
54: 4585 li a1,1
56: 000007b7 lui a5,0x0
5a: 00078513 mv a0,a5
5e: 00000097 auipc ra,0x0
62: 000080e7 jalr ra # 5e <main+0x40>
66: 4785 li a5,1
68: a869 j 102 <.L5>

000000000000006a <.L4>:
6a: 000007b7 lui a5,0x0
6e: 00078513 mv a0,a5
72: 00000097 auipc ra,0x0
76: 000080e7 jalr ra # 72 <.L4+0x8>

In order to share the fake label between the assembler and the
libopcodes library, I've added some new defines RISCV_FAKE_LABEL_NAME
and RISCV_FAKE_LABEL_CHAR in include/opcode/riscv.h. I could have
just moved FAKE_LABEL_NAME to the include file, however, I thnk this
would be confusing, someone working on the assembler would likely not
expect to find FAKE_LABEL_NAME defined outside of the assembler source
tree. By introducing the RISCV_FAKE_LABEL_* defines I can leave the
assembler standard FAKE_LABEL_ defines in the assembler source, but
still share the RISCV_FAKE_LABEL_* with libopcodes.

gas/ChangeLog:

* config/tc-riscv.h (FAKE_LABEL_NAME): Define as
RISCV_FAKE_LABEL_NAME.
(FAKE_LABEL_CHAR): Define as RISCV_FAKE_LABEL_CHAR.

include/ChangeLog:

* dis-asm.h (riscv_symbol_is_valid): Declare.
* opcode/riscv.h (RISCV_FAKE_LABEL_NAME): Define.
(RISCV_FAKE_LABEL_CHAR): Define.

opcodes/ChangeLog:

* disassembler.c (disassemble_init_for_target): Add RISC-V
initialisation.
* riscv-dis.c (riscv_symbol_is_valid): New function.
---
gas/ChangeLog | 6 ++++++
gas/config/tc-riscv.h | 4 ++--
include/ChangeLog | 6 ++++++
include/dis-asm.h | 1 +
include/opcode/riscv.h | 6 ++++++
opcodes/ChangeLog | 6 ++++++
opcodes/disassemble.c | 5 +++++
opcodes/riscv-dis.c | 17 +++++++++++++++++
8 files changed, 49 insertions(+), 2 deletions(-)

diff --git a/gas/config/tc-riscv.h b/gas/config/tc-riscv.h
index 5e59740e384..41a18be934a 100644
--- a/gas/config/tc-riscv.h
+++ b/gas/config/tc-riscv.h
@@ -38,10 +38,10 @@ struct expressionS;
/* Symbols named FAKE_LABEL_NAME are emitted when generating DWARF, so make
sure FAKE_LABEL_NAME is printable. It still must be distinct from any
real label name. So, append a space, which other labels can't contain. */
-#define FAKE_LABEL_NAME ".L0 "
+#define FAKE_LABEL_NAME RISCV_FAKE_LABEL_NAME
/* Changing the special character in FAKE_LABEL_NAME requires changing
FAKE_LABEL_CHAR too. */
-#define FAKE_LABEL_CHAR ' '
+#define FAKE_LABEL_CHAR RISCV_FAKE_LABEL_CHAR

#define md_relax_frag(segment, fragp, stretch) \
riscv_relax_frag (segment, fragp, stretch)
diff --git a/include/dis-asm.h b/include/dis-asm.h
index 84627950c02..6a55f3c1e9a 100644
--- a/include/dis-asm.h
+++ b/include/dis-asm.h
@@ -302,6 +302,7 @@ extern void print_wasm32_disassembler_options (FILE *);
extern bfd_boolean aarch64_symbol_is_valid (asymbol *, struct disassemble_info *);
extern bfd_boolean arm_symbol_is_valid (asymbol *, struct disassemble_info *);
extern bfd_boolean csky_symbol_is_valid (asymbol *, struct disassemble_info *);
+extern bfd_boolean riscv_symbol_is_valid (asymbol *, struct disassemble_info *);
extern void disassemble_init_powerpc (struct disassemble_info *);
extern void disassemble_init_s390 (struct disassemble_info *);
extern void disassemble_init_wasm32 (struct disassemble_info *);
diff --git a/include/opcode/riscv.h b/include/opcode/riscv.h
index 12cb4ca4544..d57f45beb13 100644
--- a/include/opcode/riscv.h
+++ b/include/opcode/riscv.h
@@ -270,6 +270,12 @@ static const char * const riscv_pred_succ[16] =
#define NGPR 32
#define NFPR 32

+/* These fake label defines are use by both the assembler, and
+ libopcodes. The assembler uses this when it needs to generate a fake
+ label, and libopcodes uses it to hide the fake labels in its output. */
+#define RISCV_FAKE_LABEL_NAME ".L0 "
+#define RISCV_FAKE_LABEL_CHAR ' '
+
/* Replace bits MASK << SHIFT of STRUCT with the equivalent bits in
VALUE << SHIFT. VALUE is evaluated exactly once. */
#define INSERT_BITS(STRUCT, VALUE, MASK, SHIFT) \
diff --git a/opcodes/disassemble.c b/opcodes/disassemble.c
index 750d76ad865..7370bd13593 100644
--- a/opcodes/disassemble.c
+++ b/opcodes/disassemble.c
@@ -656,6 +656,11 @@ disassemble_init_for_target (struct disassemble_info * info)
disassemble_init_powerpc (info);
break;
#endif
+#ifdef ARCH_riscv
+ case bfd_arch_riscv:
+ info->symbol_is_valid = riscv_symbol_is_valid;
+ break;
+#endif
#ifdef ARCH_wasm32
case bfd_arch_wasm32:
disassemble_init_wasm32 (info);
diff --git a/opcodes/riscv-dis.c b/opcodes/riscv-dis.c
index 97908209483..f1bbfdb1f14 100644
--- a/opcodes/riscv-dis.c
+++ b/opcodes/riscv-dis.c
@@ -518,6 +518,23 @@ print_insn_riscv (bfd_vma memaddr, struct disassemble_info *info)
return riscv_disassemble_insn (memaddr, insn, info);
}

+/* Prevent use of the fake labels that are generated as part of the DWARF
+ and for relaxable relocations in the assembler. */
+
+bfd_boolean
+riscv_symbol_is_valid (asymbol * sym,
+ struct disassemble_info * info ATTRIBUTE_UNUSED)
+{
+ const char * name;
+
+ if (sym == NULL)
+ return FALSE;
+
+ name = bfd_asymbol_name (sym);
+
+ return (strcmp (name, RISCV_FAKE_LABEL_NAME) != 0);
+}
+
void
print_riscv_disassembler_options (FILE *stream)
{
--
2.14.5
Jim Wilson
2018-12-05 19:49:20 UTC
Permalink
On Wed, Dec 5, 2018 at 11:46 AM Andrew Burgess
Post by Andrew Burgess
This revision addresses the feedback from yourself and Andreas.
OK to apply?
OK.

Jim

Loading...