|
|
Message-ID: <z6t4ztqt6uhufqoezsmn627lwly45jlh6qwvp4xth2tcj36v4o@txpjfna7inio>
Date: Thu, 20 Mar 2025 12:07:21 +0100
From: Ignacy Gawędzki <ignacy.gawedzki@...en-communications.fr>
To: musl@...ts.openwall.com
Subject: [PATCH 1/1] tools: Rework adding of CFI annotations.
Rework awk scripts used to add CFI annotations to i386 and x86_64
assembly, in order to properly maintain CFA offset across in-function
jumps.
Add arm and aarch64 versions of these scripts.
Signed-off-by: Ignacy Gawędzki <ignacy.gawedzki@...en-communications.fr>
---
tools/add-cfi.aarch64.awk | 287 +++++++++++++++++++++++++++++
tools/add-cfi.arm.awk | 367 ++++++++++++++++++++++++++++++++++++++
tools/add-cfi.common.awk | 36 +++-
tools/add-cfi.i386.awk | 331 +++++++++++++++++++++++-----------
tools/add-cfi.x86_64.awk | 292 ++++++++++++++++++++----------
5 files changed, 1100 insertions(+), 213 deletions(-)
create mode 100644 tools/add-cfi.aarch64.awk
create mode 100644 tools/add-cfi.arm.awk
diff --git a/tools/add-cfi.aarch64.awk b/tools/add-cfi.aarch64.awk
new file mode 100644
index 00000000..cc27e517
--- /dev/null
+++ b/tools/add-cfi.aarch64.awk
@@ -0,0 +1,287 @@
+# Insert GAS CFI directives ("control frame information") into AArch64 asm input.
+#
+# CFI directives tell the assembler how to generate "stack frame" debug info.
+# This information can tell a debugger (like gdb) how to find the current stack
+# frame at any point in the program code, and how to find the values which
+# various registers had at higher points in the call stack.
+# With this information, the debugger can show a backtrace, and you can move up
+# and down the call stack and examine the values of local variables.
+
+BEGIN {
+ # Don't put CFI data in the .eh_frame ELF section (which we don't keep).
+ print ".cfi_sections .debug_frame"
+
+ # Only emit CFI directives inside a function.
+ in_function = ""
+
+ # Emit .loc directives with line numbers from original source.
+ printf ".file 1 \"%s\"\n", ARGV[1]
+ line_number = 0
+
+ re_label = "([0-9+|[a-zA-Z_][a-zA-Z0-9_]*)"
+
+ # Build an associative array of canonical register names.
+ for (i = 0; i < 30; ++i)
+ regname["x" i] = regname["w" i] = "x" i
+ regname["x30"] = regname["w30"] = regname["lr"] = "x30"
+ regname["xzr"] = regname["wzr"] = "xzr"
+ regname["sp"] = regname["wsp"] = "sp"
+}
+
+{
+ ++line_number
+
+ # Clean the input up before doing anything else.
+ # Delete comments.
+ gsub(/^#.*|\/\/.*|\/\*.*\*\//, "")
+
+ # Canonicalize whitespace.
+ gsub(/[ \t]+/, " ") # Mawk doesn't understand \s.
+ gsub(/ *, */, ",")
+ gsub(/ *: */, ": ")
+ gsub(/ $/, "")
+ gsub(/^ /, "")
+}
+
+# Check for assembler directives which we care about.
+/^\.(section|data|text)/ {
+ # A .cfi_startproc/.cfi_endproc pair should be within the same section
+ # otherwise, clang will choke when generating ELF output.
+ if (in_function) {
+ print ".cfi_endproc"
+ in_function = ""
+ }
+}
+
+# Record each function name.
+/^\.type [a-zA-Z0-9_]+( STT_FUNCTION|,[#@%"]function)/ {
+ functions[substr($2, 1, length($2) - 10)] = 1
+}
+
+# Not interested in assembler directives beyond this, just pass them through.
+/^\./ {
+ print
+ next
+}
+
+# Helper to adjust CFA offset.
+function adjust_sp_offset(delta) {
+ if (in_function) {
+ printf ".cfi_adjust_cfa_offset %d\n", delta
+ cfa_offset[in_function] += delta
+ }
+}
+
+# Helper to invalidate unsaved register.
+function trashed(reg) {
+ if (in_function && !(reg in saved) && !(reg in dirty))
+ printf ".cfi_undefined %s\n", reg
+ dirty[reg] = 1
+}
+
+# Helper to process jumps to labels by saving the current CFA offset.
+function jump_to_label(label) {
+ if (in_function) {
+ if (match(label, /^[0-9]+f$/)) # "forward" label
+ cfa_offset[substr(label, 1, RLENGTH - 1)] = cfa_offset[in_function]
+ else if (match(label, /^[a-zA-Z_][a-zA-Z0-9_]*$/))
+ cfa_offset[label] = cfa_offset[in_function]
+ }
+}
+
+# Helper to set relative offset of registers pushed on the stack.
+function push_regs(regs, numregs, i) {
+ adjust_sp_offset(numregs * 4)
+ for (i = 1; i <= numregs; ++i) {
+ reg = regname[regs[i]]
+ if (!(reg in saved) && !(reg in dirty)) {
+ printf ".cfi_rel_offset %s,%i\n", reg, ((i - 1) * 4)
+ saved[reg] = 1
+ }
+ }
+}
+
+# Helper to invalidate unsaved registers popped from the stack.
+function pop_regs(regs, numregs, i) {
+ adjust_sp_offset(numregs * -4)
+ for (i = 1; i <= numregs; ++i) {
+ reg = regname[regs[i]]
+ trashed(reg)
+ }
+}
+
+# Helper to save a single register saved in SP-relative locations.
+function save_reg(reg, offset) {
+ reg = regname[reg]
+ if (!(reg in saved) && !(reg in dirty)) {
+ printf ".cfi_rel_offset %s,%d\n", reg, offset
+ saved[reg] = 1
+ }
+}
+
+# Process labels.
+$0 ~ "^" re_label ":" {
+ # Parse each leading label.
+ while (match($0, "^" re_label ":")) {
+
+ # Extract label name.
+ label = substr($1, 1, RLENGTH - 1)
+
+ # Remove label from current line.
+ sub("^" re_label ": ?", "")
+
+ if (label in functions) {
+ if (in_function) {
+ print ".cfi_endproc"
+ for (l in called)
+ delete called[l]
+ }
+
+ in_function = label
+ print ".cfi_startproc"
+
+ for (reg in saved)
+ delete saved[reg]
+ for (reg in dirty)
+ delete dirty[reg]
+ }
+
+ printf "%s:\n", label
+
+ # If this label has been jumped to, define the CFA offset to its
+ # value at the location of the jump.
+ if (!(label in functions) && in_function && label in cfa_offset) {
+ if (cfa_offset[in_function] != cfa_offset[label]) {
+ printf ".cfi_def_cfa_offset %d\n", cfa_offset[label]
+ cfa_offset[in_function] = cfa_offset[label]
+ }
+ delete cfa_offset[label]
+ }
+
+ # If this label has been called, possibly invalidate LR.
+ if (label in called && !(label in functions)) {
+ trashed("lr")
+ delete called[label]
+ }
+ }
+ # An instruction may follow on the same line, so continue processing.
+}
+
+# Skip empty line.
+/^$/ { next }
+
+# Issue source line number.
+{
+ printf ".loc 1 %d\n", line_number
+ print
+}
+
+# Process jumps to label (using B*).
+/^b[^xrl]/ {
+ jump_to_label($2)
+}
+
+# Process jumps to label (using [CT]BN?Z).
+/^[ct]bn?z / {
+ if (match($2, /,.+$/))
+ jump_to_label(substr($2, RSTART + 1, RLENGTH - 1))
+}
+
+# Issue relative offsets of registers stored in SP-relative locations.
+/^st(n?p|r[bh]?|l[lu]?r|tr|ur) .+,\[(sp|x30)[,\]]/ {
+ if (in_function) {
+ if (match($2, /(,#?[+-]?(0x[0-9a-fA-F]+|[0-9]+))?\]$/)) {
+ # Offset with no write-back.
+ if (RLENGTH == 1)
+ offset = 0
+ else
+ offset = parse_const(substr($2, RSTART + 2, RLENGTH - 3))
+ split($2, operands, ",")
+ if (match($1, /^stn?p$/)) {
+ if (match(operands[1], /^x/)) {
+ save_reg(operands[1], offset)
+ save_reg(operands[2], offset + 8)
+ }
+ } else if (match(operands[1], /x^/))
+ save_reg(operands[1], offset)
+ } else if (match($2, /,#?[+-]?(0x[0-9a-fA-F]+|[0-9]+)\]!$/)) {
+ # Pre-index with write-back.
+ offset = parse_const(substr($2, RSTART + 2, RLENGTH - 4))
+ adjust_sp_offset(-offset)
+ split($2, operands, ",")
+ if ($1 == "stp") {
+ if (match(operands[1], /^x/)) {
+ save_reg(operands[1], 0)
+ save_reg(operands[2], 8)
+ }
+ } else if (match(operands[1], /^x/))
+ save_reg(operands[1], 0)
+ } else if (match($2, /,#?[+-]?(0x[0-9a-fA-F]+|[0-9]+)$/)) {
+ # Post-index
+ offset = parse_const(substr($2, RSTART + 2, RLENGTH - 2))
+ split($2, operands, ",")
+ if ($1 == "stp") {
+ if (match(operands[1], /^x/)) {
+ save_reg(operands[1], 0)
+ save_reg(operands[2], 8)
+ }
+ } else if (match(operands[1], /^x/))
+ save_reg(operands[1], 0)
+ adjust_sp_offset(-offset)
+ }
+ }
+}
+
+# Adjust CFA offset when decreasing SP.
+/subs?(\.[nw])? sp,sp,/ {
+ if (in_function && match($2, /,#[+-]?(0x[0-9a-fA-F]+|[0-9]+)$/))
+ adjust_sp_offset(parse_const(substr($2, RSTART + 2, RLENGTH - 2)))
+}
+
+# Adjust CFA offset when increasing SP.
+/adds?(\.[nw])? sp,sp,/ {
+ if (in_function && match($2, /,#[+-]?(0x[0-9a-fA-F]+|[0-9]+)$/))
+ adjust_sp_offset(-parse_const(substr($2, RSTART + 2, RLENGTH - 2)))
+}
+
+# Process calls to labels.
+/bl[a-z]* / {
+ if (match($2, /^[0-9]+f$/)) # "forward" label
+ called[substr($2, 1, RLENGTH - 1)] = 1
+ else if (match($2, /^[a-zA-Z_][0-9a-zA-Z_]*$/))
+ called[$2] = 1
+}
+
+# Invalidate unsaved registers being written to.
+/^(adcs?|adds?|adrp?|ands?|asrv?|bfc|bfi|bfm|bfxil|bics?|cin[cv]|cl[sz]|cneg|crc32[a-z]+|csel|csetm?|csin[cv]|csneg|eo[nr]|extr|ldap(r[bh]?|ur(s?[bhw]?))|ldar[bh]?|ldax[pr][bh]?|ldlar[bh]?|ldr((aa)?|s?[bhw])|ldtrs?[bhw]?|ldurs?[bhw]?|ldxr[bh]?|ls[lr]v?|madd|mneg|mov[knz]?|mrs|msub|mul|mvn|negs?|ngcs?|orn|orr|pac[a-z0-9]+|rbit|rev(16|32)?|rorv?|sbcs?|sbfiz|sbfm|sbfx|sdiv|smaddl|smnegl|smsubl|smul[hl]|subs?|sxt[bhw]|sysl|ubfiz|ubfm|ubfx|udiv|umaddl|umnegl|umsubl|umul[hl]|uxt[bhw]) ([xw]([0-9]|[12][0-9]|30)|sp),/ {
+ split($2, args, ",")
+ reg = args[1]
+ if (reg != "sp")
+ trashed(regname[reg])
+}
+
+# Invalidate unsaved registers being written to by atomic operations in memory.
+/^ld(add|clr|eor|set|[su](max|min))/ {
+ split($2, args, ",")
+ trashed(regname[args[2]])
+}
+
+# Invalidate unsaved registers being written to by pair loading.
+/^ld[nx]p(sw)? / {
+ split($2, args, ",")
+ trashed(regname[args[1]])
+ trashed(regname[args[2]])
+}
+
+# Invalidate unsaved registers being written to by long instructions.
+/^(smlals?|smlal(bb|bt|tb|tt)|smlaldx?|smlsldx?|smull|umaal|umlal|umulls?) / {
+ split($2, args, ",")
+ trashed(regname[args[1]])
+ trashed(regname[args[2]])
+}
+
+END {
+ # Issue end of function if still inside one.
+ if (in_function)
+ print ".cfi_endproc"
+}
diff --git a/tools/add-cfi.arm.awk b/tools/add-cfi.arm.awk
new file mode 100644
index 00000000..7aa0cf8c
--- /dev/null
+++ b/tools/add-cfi.arm.awk
@@ -0,0 +1,367 @@
+# Insert GAS CFI directives ("control frame information") into ARM asm input.
+#
+# CFI directives tell the assembler how to generate "stack frame" debug info.
+# This information can tell a debugger (like gdb) how to find the current stack
+# frame at any point in the program code, and how to find the values which
+# various registers had at higher points in the call stack.
+# With this information, the debugger can show a backtrace, and you can move up
+# and down the call stack and examine the values of local variables.
+
+BEGIN {
+ # Don't put CFI data in the .eh_frame ELF section (which we don't keep).
+ print ".cfi_sections .debug_frame"
+
+ # Only emit CFI directives inside a function.
+ in_function = ""
+
+ # Emit .loc directives with line numbers from original source.
+ printf ".file 1 \"%s\"\n", ARGV[1]
+ line_number = 0
+
+ re_label = "([0-9+|[a-zA-Z_][a-zA-Z0-9_]*)"
+
+ # Build an associative array of canonical register names.
+ for (i = 0; i < 10; ++i) {
+ regname["r" i] = "r" i
+ regnum["r" i] = i
+ }
+ regname["r10"] = regname["sl"] = "r10"
+ regnum["r10"] = regnum["sl"] = 10
+ regname["r11"] = regname["fp"] = "r11"
+ regnum["r11"] = regnum["fp"] = 11
+ regname["r12"] = regname["ip"] = "r12"
+ regnum["r12"] = regnum["ip"] = 12
+ regname["r13"] = regname["sp"] = "r13"
+ regnum["r13"] = regnum["sp"] = 13
+ regname["r14"] = regname["lr"] = "r14"
+ regnum["r14"] = regnum["lr"] = 14
+ regname["r15"] = regname["pc"] = "r15"
+ regnum["r15"] = regnum["pc"] = 15
+}
+
+{
+ ++line_number
+
+ # Clean the input up before doing anything else.
+ # Delete comments.
+ gsub(/(^#|@|\/\/).*|\/\*.*\*\//, "")
+
+ # Canonicalize whitespace.
+ gsub(/[ \t]+/, " ") # Mawk doesn't understand \s.
+ gsub(/ *, */, ",")
+ gsub(/ *: */, ": ")
+ gsub(/ $/, "")
+ gsub(/^ /, "")
+}
+
+# Check for assembler directives which we care about.
+/^\.(section|data|text)/ {
+ # A .cfi_startproc/.cfi_endproc pair should be within the same section
+ # otherwise, clang will choke when generating ELF output.
+ if (in_function) {
+ print ".cfi_endproc"
+ in_function = ""
+ }
+}
+
+# Record each function name.
+/^\.type [a-zA-Z0-9_]+( STT_FUNCTION|,[#@%"]function)/ {
+ functions[substr($2, 1, length($2) - 10)] = 1
+}
+
+# Not interested in assembler directives beyond this, just pass them through.
+/^\./ {
+ print
+ next
+}
+
+# Helper to adjust CFA offset.
+function adjust_sp_offset(delta) {
+ if (in_function) {
+ printf ".cfi_adjust_cfa_offset %d\n", delta
+ cfa_offset[in_function] += delta
+ }
+}
+
+# Helper to invalidate unsaved register.
+function trashed(reg) {
+ if (in_function && !(reg in saved) && !(reg in dirty))
+ printf ".cfi_undefined %s\n", reg
+ dirty[reg] = 1
+}
+
+# Helper to process jumps to labels by saving the current CFA offset.
+function jump_to_label(label) {
+ if (in_function) {
+ if (match(label, /^[0-9]+f$/)) # "forward" label
+ cfa_offset[substr(label, 1, RLENGTH - 1)] = cfa_offset[in_function]
+ else if (match(label, /^[a-zA-Z_][a-zA-Z0-9_]*$/))
+ cfa_offset[label] = cfa_offset[in_function]
+ }
+}
+
+# Helper to save a single register saved in SP-relative locations.
+function save_reg(reg, offset) {
+ reg = regname[reg]
+ if (!(reg in saved) && !(reg in dirty)) {
+ printf ".cfi_rel_offset %s,%d\n", reg, offset
+ saved[reg] = 1
+ }
+}
+
+# Helper to save registers relative to SP.
+function save_regs(regs, numregs, i) {
+ for (i = 1; i <= numregs; ++i)
+ save_reg(regname[regs[i]], (i - 1) * -4)
+}
+
+# Helper to set relative offset of registers pushed on the stack.
+function push_regs(regs, numregs, i) {
+ adjust_sp_offset(numregs * 4)
+ for (i = 1; i <= numregs; ++i)
+ save_reg(regname[regs[i]], (i - 1) * 4)
+}
+
+# Helper to invalidate unsaved registers popped from the stack.
+function pop_regs(regs, numregs, i) {
+ adjust_sp_offset(numregs * -4)
+ for (i = 1; i <= numregs; ++i) {
+ reg = regname[regs[i]]
+ trashed(reg)
+ }
+}
+
+# Helper to parse register lists.
+function split_reglist(arg, regs, num, toks, tmp, dash, i, j) {
+ while (match(arg, /^{[^}]+}/)) {
+ num = split(substr(arg, RSTART + 1, RLENGTH - 2), toks, ",")
+ for (i = 1; i <= num; ++i)
+ if (match(toks[i], /^r([0-9]|1[0-5])-r([0-9]|1[0-5])$/)) {
+ dash = index(toks[i], "-")
+ first = 0 + substr(toks[i], 2, dash - 2)
+ last = 0 + substr(toks[i], dash + 2)
+ for (j = first; j <= last; ++j)
+ tmp[j]
+ } else
+ tmp[regnum[toks[i]]]
+ arg = substr(arg, RSTART + RLENGTH)
+ if (!match(arg, /^[\t ]*[+|][\t ]*/))
+ break
+ arg = substr(arg, RLENGTH + 1)
+ }
+ num = 0
+ for (i = 0; i < 16; ++i) {
+ if (!(i in tmp))
+ continue
+ regs[++num] = regname["r" i]
+ }
+ return num
+}
+
+# Process labels.
+$0 ~ "^" re_label ":" {
+ # Parse each leading label.
+ while (match($0, "^" re_label ":")) {
+
+ # Extract label name.
+ label = substr($1, 1, RLENGTH - 1)
+
+ # Remove label from current line.
+ sub("^" re_label ": ?", "")
+
+ if (label in functions) {
+ if (in_function) {
+ print ".cfi_endproc"
+ for (l in called)
+ delete called[l]
+ }
+
+ in_function = label
+ print ".cfi_startproc"
+
+ for (reg in saved)
+ delete saved[reg]
+ for (reg in dirty)
+ delete dirty[reg]
+ }
+
+ printf "%s:\n", label
+
+ # If this label has been jumped to, define the CFA offset to its
+ # value at the location of the jump.
+ if (!(label in functions) && in_function && label in cfa_offset) {
+ if (cfa_offset[in_function] != cfa_offset[label]) {
+ printf ".cfi_def_cfa_offset %d\n", cfa_offset[label]
+ cfa_offset[in_function] = cfa_offset[label]
+ }
+ delete cfa_offset[label]
+ }
+
+ # If this label has been called, possibly invalidate LR.
+ if (label in called && !(label in functions)) {
+ trashed("lr")
+ delete called[label]
+ }
+ }
+ # An instruction may follow on the same line, so continue processing.
+}
+
+# Skip empty line.
+/^$/ { next }
+
+# Issue source line number.
+{
+ printf ".loc 1 %d\n", line_number
+ print
+}
+
+# Process jumps to label (using B*).
+/^b[^xl]/ {
+ jump_to_label($2)
+}
+
+# Process jumps to label (using CBNZ?).
+/^cbnz? / {
+ if (match($2, /,.*$/))
+ jump_to_label(substr($2, RSTART + 1, RLENGTH - 1))
+}
+
+# Adjust CFA offset and issue relative offsets of pushed registers using PUSH.
+/^push / {
+ if (in_function) {
+ numregs = split_reglist($2, regs)
+ push_regs(regs, numregs);
+ }
+}
+
+# Adjust CFA offset and Issue relative offsets of pushed registers using STMFD.
+/^stm(fd|db)(al)?(\.[nw])? (sp|r13)!,/ {
+ if (in_function) {
+ numregs = split_reglist(substr($2, index($2, ",") + 1), regs)
+ push_regs(regs, numregs);
+ }
+}
+
+/^stm(ia|ea)?(al)?(\.[nw])? (sp|r13),/ {
+ if (in_function) {
+ numregs = split_reglist(substr($2, index($2, ",") + 1), regs)
+ save_regs(regs, numregs);
+ }
+}
+
+# Adjust CFA offset and invalidate unsaved registers popped using POP.
+/^pop / {
+ if (in_function) {
+ numregs = split_reglist($2, regs)
+ pop_regs(regs, numregs)
+ }
+}
+
+# Adjust CFA offset and invalidate unsaved registers popped using LDMFD.
+/^ldm(fd|ia)(al)?(\.[nw])? (sp|r13)!,/ {
+ if (in_function) {
+ numregs = split_reglist(substr($2, index($2, ",") + 1), regs)
+ pop_regs(regs, numregs)
+ }
+}
+
+# Issue relative offsets of registers stored in SP-relative locations.
+/^str[a-z.]* .*,\[(sp|r13)[,\]]/ {
+ if (in_function && !match($1, /^str(ex)?[bh]/)) {
+ if (match($2, /(,#[+-]?(0x[0-9a-fA-F]+|[0-9]+))?\]$/)) {
+ # Offset with no write-back.
+ if (RLENGTH == 1)
+ offset = 0
+ else
+ offset = parse_const(substr($2, RSTART + 2, RLENGTH - 3))
+ split($2, operands, ",")
+ if (match($1, /^str(ex)?d/)) {
+ save_reg(operands[1], offset)
+ save_reg(operands[2], offset + 4)
+ } else
+ save_reg(operands[1], offset)
+ } else if (match($2, /,#[+-]?(0x[0-9a-fA-F]+|[0-9]+)\]!$/)) {
+ # Pre-index with write-back.
+ offset = parse_const(substr($2, RSTART + 2, RLENGTH - 4))
+ adjust_sp_offset(-offset)
+ split($2, operands, ",")
+ if (match($1, /^str(ex)?d/)) {
+ save_reg(operands[1], 0)
+ save_reg(operands[2], 4)
+ } else
+ save_reg(operands[1], 0)
+ } else if (match($2, /,#[+-]?(0x[0-9a-fA-F]+|[0-9]+)$/)) {
+ # Post-index
+ offset = parse_const(substr($2, RSTART + 2, RLENGTH - 2))
+ split($2, operands, ",")
+ if (match($1, /^str(ex)?d/)) {
+ save_reg(operands[1], 0)
+ save_reg(operands[2], 4)
+ } else
+ save_reg(operands[1], 0)
+ adjust_sp_offset(-offset)
+ }
+ }
+}
+
+# Adjust CFA offset when decreasing SP.
+/subs?(al)?(\.[nw])? (sp|r13),(sp|r13),/ {
+ if (in_function && match($2, /,#[+-]?(0x[0-9a-fA-F]+|[0-9]+)$/))
+ adjust_sp_offset(parse_const(substr($2, RSTART + 2, RLENGTH - 2)))
+}
+
+# Adjust CFA offset when increasing SP.
+/adds?(al)?(\.[nw])? (sp|r13),(sp|r13),/ {
+ if (in_function && match($2, /,#[+-]?(0x[0-9a-fA-F]+|[0-9]+)$/))
+ adjust_sp_offset(-parse_const(substr($2, RSTART + 2, RLENGTH - 2)))
+}
+
+# Process calls to labels.
+/bl[a-z]* / {
+ if (match($2, /^[0-9]+f$/)) # "forward" label
+ called[substr($2, 1, RLENGTH - 1)] = 1
+ else if (match($2, /^[a-zA-Z_][0-9a-zA-Z_]*$/))
+ called[$2] = 1
+}
+
+# Invalidate unsaved registers being written to.
+/^((adc|add|and|asr|adr|bic|eor|lsl|lsr|mla|mov|mul|mvn|orn|orr|ror|rrx|rsb|rsc|sbc|sub)s?|bfc|bfi|clz|cpy|ldr[a-z]*|mls||movt|mrs|neg|pkh(bt|tb)|qadd(8|16)?|qasx|qdadd|qdsub|qsax|qsub(8|16)?|rbit|rev(16)?|revsh|sadd(16|8)|sasx|sbfx|sdiv|sel|shadd(16|8)|shasx|shsax|shsub(16|8)|smla(bb|bt|tb|tt)|smladx?|smlaw[tb]|smlsdx?|smmlar?|smlsr?|smmulr?|smuadx?|smul(bb|bt|tb|tt)|smulw[bt]|smusdx?|ssat(16)?|ssax|ssub(16|8)|swpb?|sxtab(16)?|sxtah|sxtb(16)?|sxth|sxtb(16)?|sxth|uadd(16|8)|uasx|ubfx|udiv|uhadd(16|8)|uhasx|uhsax|uhsub(16|8)|uqadd(16|8)|uqasx|uqsax|uqsub(16|8)|usada?8|usat(16)?|usax|usub(16|8)|uxtab(16)?|uxtah|uxtb(16)?|uxth)(eq|ne|cs|cc|mi|pl|vs|vc|hi|ls|ge|lt|gt|le|al)? (r([0-9]|1[0-5])|ip|sp|lr|pc),/ {
+ split($2, args, ",")
+ reg = args[1]
+ if (reg != "sp")
+ trashed(regname[reg])
+}
+
+# Invalidate unsaved registers being written to by long instructions.
+/^(smlals?|smlal(bb|bt|tb|tt)|smlaldx?|smlsldx?|smull|umaal|umlal|umulls?)/ {
+ split($2, args, ",")
+ trashed(regname[args[1]])
+ trashed(regname[args[2]])
+}
+
+# Invalidate unsaved register being modified by write-back on store multiple.
+/^stm[a-z.]* [^,]+!,/ {
+ first_arg = substr($2, 1, index($2, ",") - 1)
+ if (!match(first_arg, /^(sp|r13)/))
+ trashed(regname[substr(first_arg, 1, length(first_arg) - 1)])
+}
+
+# Invalidate unsaved registers being modified by load multiple.
+/^ldm[a-z.]* [^,]+,{.*}$/ {
+ comma = index($2, ",")
+ first_arg = substr($2, 1, comma - 1)
+ other_args = substr($2, comma + 1)
+ if (!match(first_arg, /^(sp|r13)/)) {
+ if (match(first_arg, /!$/))
+ trashed(regname[substr(first_arg, 1, RSTART - 1)])
+ numregs = split_reglist(other_args, regs)
+ for (i = 1; i <= numregs; ++i)
+ trashed(regname[regs[i]])
+ }
+}
+
+END {
+ # Issue end of function if still inside one.
+ if (in_function)
+ print ".cfi_endproc"
+}
diff --git a/tools/add-cfi.common.awk b/tools/add-cfi.common.awk
index 04482d43..fe3aec03 100644
--- a/tools/add-cfi.common.awk
+++ b/tools/add-cfi.common.awk
@@ -1,26 +1,46 @@
-function hex2int(str, i) {
+function hex2int(str, i) {
str = tolower(str)
for (i = 1; i <= 16; i++) {
char = substr("0123456789abcdef", i, 1)
- lookup[char] = i-1
+ lookup[char] = i - 1
}
result = 0
for (i = 1; i <= length(str); i++) {
- result = result * 16
- char = substr(str, i, 1)
- result = result + lookup[char]
+ result *= 16
+ char = substr(str, i, 1)
+ result += lookup[char]
+ }
+ return result
+}
+
+function oct2int(str, i) {
+ str = tolower(str)
+
+ for (i = 1; i <= 8; ++i) {
+ char = substr("01234567", i, 1)
+ lookup[char] = i - 1
+ }
+
+ result = 0
+ for (i = 1; i <= length(str); ++i) {
+ result *= 8
+ char = substr(str, i, 1)
+ result += lookup[char]
}
return result
}
function parse_const(str) {
- sign = sub(/^-/, "", str)
- hex = sub(/^0x/, "", str)
+ neg = sub(/^-/, "", str)
+ oct = match(str, /^0[0-7]/)
+ hex = sub(/^0x/, "", str)
if (hex)
n = hex2int(str)
+ else if (oct)
+ n = oct2int(str)
else
n = str+0
- return sign ? -n : n
+ return neg? -n: n
}
diff --git a/tools/add-cfi.i386.awk b/tools/add-cfi.i386.awk
index d05037de..d4b59e3f 100644
--- a/tools/add-cfi.i386.awk
+++ b/tools/add-cfi.i386.awk
@@ -1,123 +1,179 @@
-# Insert GAS CFI directives ("control frame information") into x86-32 asm input
+# Insert GAS CFI directives ("control frame information") into x86-32 asm input.
#
-# CFI directives tell the assembler how to generate "stack frame" debug info
+# CFI directives tell the assembler how to generate "stack frame" debug info.
# This information can tell a debugger (like gdb) how to find the current stack
# frame at any point in the program code, and how to find the values which
-# various registers had at higher points in the call stack
+# various registers had at higher points in the call stack.
# With this information, the debugger can show a backtrace, and you can move up
-# and down the call stack and examine the values of local variables
+# and down the call stack and examine the values of local variables.
BEGIN {
- # don't put CFI data in the .eh_frame ELF section (which we don't keep)
+ # Don't put CFI data in the .eh_frame ELF section (which we don't keep).
print ".cfi_sections .debug_frame"
- # only emit CFI directives inside a function
- in_function = 0
+ # Only emit CFI directives inside a function.
+ in_function = ""
- # emit .loc directives with line numbers from original source
+ # Emit .loc directives with line numbers from original source.
printf ".file 1 \"%s\"\n", ARGV[1]
line_number = 0
- # used to detect "call label; label:" trick
- called = ""
+ re_label = "([0-9]+|[a-zA-Z_][a-zA-Z0-9_]*)"
+
+ for (i = 1; i <= 4; ++i) {
+ letter = substr("abcd", i, 1)
+ regname[letter "l"] = regname[letter "h"] = regname[letter "x"] = \
+ regname["e" letter "x"] = "e" letter "x"
+ }
+
+ regname["si"] = regname["esi"] = "esi"
+ regname["di"] = regname["edi"] = "edi"
+ regname["bp"] = regname["ebp"] = "ebp"
+ regname["sp"] = regname["esp"] = "esp"
}
+# For instructions with 2 operands, get 1st operand (assuming it is constant).
function get_const1() {
- # for instructions with 2 operands, get 1st operand (assuming it is constant)
- match($0, /-?(0x[0-9a-fA-F]+|[0-9]+),/)
- return parse_const(substr($0, RSTART, RLENGTH-1))
+ match($2, /^\$[+-]?(0x[0-9a-fA-F]+|[0-9]+),/)
+ return parse_const(substr($2, 2, RLENGTH - 2))
}
-function canonicalize_reg(register) {
- if (match(register, /^e/))
- return register
- else if (match(register, /[hl]$/)) # AH, AL, BH, BL, etc
- return "e" substr(register, 1, 1) "x"
- else # AX, BX, CX, etc
- return "e" register
-}
+# Only use if you already know there is 1 and only 1 register.
function get_reg() {
- # only use if you already know there is 1 and only 1 register
- match($0, /%e?([abcd][hlx]|si|di|bp)/)
- return canonicalize_reg(substr($0, RSTART+1, RLENGTH-1))
+ return regname[substr($2, 2, length($2) - 1)]
}
+
+# For instructions with 2 operands, get 1st operand (assuming it is register).
function get_reg1() {
- # for instructions with 2 operands, get 1st operand (assuming it is register)
- match($0, /%e?([abcd][hlx]|si|di|bp),/)
- return canonicalize_reg(substr($0, RSTART+1, RLENGTH-2))
+ match($2, /^%e?([abcd][hlx]|si|di|bp),/)
+ return regname[substr($2, 2, RLENGTH - 2)]
}
+
+# For instructions with 2 operands, get 2nd operand (assuming it is register).
function get_reg2() {
- # for instructions with 2 operands, get 2nd operand (assuming it is register)
- match($0, /,%e?([abcd][hlx]|si|di|bp)/)
- return canonicalize_reg(substr($0, RSTART+2, RLENGTH-2))
+ match($2, /,%e?([abcd][hlx]|si|di|bp)$/)
+ return regname[substr($2, RSTART + 2, RLENGTH - 2)]
}
+# Helper to adjust CFA offset.
function adjust_sp_offset(delta) {
- if (in_function)
+ if (in_function) {
printf ".cfi_adjust_cfa_offset %d\n", delta
+ cfa_offset[in_function] += delta
+ }
+}
+
+function save_reg(reg, offset) {
+ if (!(reg in saved) && !(reg in dirty)) {
+ printf ".cfi_rel_offset %s,%d\n", reg, offset
+ saved[reg] = 1
+ }
+}
+
+# Helper to process jumps to labels by saving the current CFA offset.
+function jump_to_label(label) {
+ if (in_function) {
+ if (match(label, /^[0-9]+f$/)) # "forward" label
+ cfa_offset[substr(label, 1, RLENGTH - 1)] = cfa_offset[in_function]
+ else if (match(label, /^[a-zA-Z_][a-zA-Z0-9_]*$/))
+ cfa_offset[label] = cfa_offset[in_function]
+ }
}
{
- line_number = line_number + 1
+ ++line_number
- # clean the input up before doing anything else
- # delete comments
- gsub(/(#|\/\/).*/, "")
+ # Clean the input up before doing anything else.
+ # Delete comments.
+ gsub(/#.*|\/\*.*\*\//, "")
- # canonicalize whitespace
- gsub(/[ \t]+/, " ") # mawk doesn't understand \s
+ # Canonicalize whitespace.
+ gsub(/[ \t]+/, " ") # Mawk doesn't understand \s.
gsub(/ *, */, ",")
gsub(/ *: */, ": ")
+ gsub(/%cs: */, "%cs:")
+ gsub(/%ds: */, "%ds:")
+ gsub(/%ss: */, "%ss:")
+ gsub(/%es: */, "%es:")
+ gsub(/%fs: */, "%fs:")
+ gsub(/%gs: */, "%gs:")
gsub(/ $/, "")
gsub(/^ /, "")
}
-# check for assembler directives which we care about
+# Check for assembler directives which we care about.
/^\.(section|data|text)/ {
- # a .cfi_startproc/.cfi_endproc pair should be within the same section
- # otherwise, clang will choke when generating ELF output
+ # A .cfi_startproc/.cfi_endproc pair should be within the same section.
+ # Otherwise, clang will choke when generating ELF output.
if (in_function) {
print ".cfi_endproc"
- in_function = 0
+ in_function = ""
}
}
-/^\.type [a-zA-Z0-9_]+,@function/ {
- functions[substr($2, 1, length($2)-10)] = 1
+
+# Record each function name.
+/^\.type [a-zA-Z0-9_]+( STT_FUNCTION|,[#@%"]function)/ {
+ functions[substr($2, 1, length($2) - 10)] = 1
}
-# not interested in assembler directives beyond this, just pass them through
+
+# Not interested in assembler directives beyond this, just pass them through.
/^\./ {
print
next
}
-/^[a-zA-Z0-9_]+:/ {
- label = substr($1, 1, length($1)-1) # drop trailing :
+$0 ~ "^" re_label ":" {
+ # Parse each leading label.
+ while (match($0, "^" re_label ":")) {
- if (called == label) {
- # note adjustment of stack pointer from "call label; label:"
- adjust_sp_offset(4)
- }
+ # Extract label name.
+ label = substr($1, 1, RLENGTH - 1)
- if (functions[label]) {
- if (in_function)
- print ".cfi_endproc"
+ # Remove label from current line.
+ sub("^" re_label ": ?", "")
- in_function = 1
- print ".cfi_startproc"
+ if (label in functions) {
+ if (in_function) {
+ print ".cfi_endproc"
+ for (l in called)
+ delete called[l]
+ }
- for (register in saved)
- delete saved[register]
- for (register in dirty)
- delete dirty[register]
- }
+ in_function = label
+ print ".cfi_startproc"
+
+ for (reg in saved)
+ delete saved[reg]
+ for (reg in dirty)
+ delete dirty[reg]
+ }
+
+ printf "%s:\n", label
+
+ # If this label has been jumped to, define the CFA offset to its
+ # value at the location of the jump.
+ if (!(label in functions) && in_function && label in cfa_offset) {
+ if (cfa_offset[in_function] != cfa_offset[label]) {
+ printf ".cfi_def_cfa_offset %d\n", cfa_offset[label]
+ cfa_offset[in_function] = cfa_offset[label]
+ }
+ delete cfa_offset[label]
+ }
- # an instruction may follow on the same line, so continue processing
+ # If this label has been called, adjust CFA offset.
+ if (label in called && !(label in functions)) {
+ adjust_sp_offset(4);
+ delete called[label]
+ }
+ }
+ # An instruction may follow on the same line, so continue processing.
}
+# Skip empty line.
/^$/ { next }
+# Issue source line number.
{
- called = ""
printf ".loc 1 %d\n", line_number
print
}
@@ -126,82 +182,145 @@ function adjust_sp_offset(delta) {
# We do NOT attempt to understand foolish and ridiculous tricks like stashing
# the stack pointer and then using %esp as a scratch register, or bitshifting
# it or taking its square root or anything stupid like that.
-# %esp should only be adjusted by pushing/popping or adding/subtracting constants
+# %esp should only be adjusted by pushing/popping or adding/subtracting
+# constants.
#
-/pushl?/ {
- if (match($0, / %(ax|bx|cx|dx|di|si|bp|sp)/))
+/^push[wl]? / {
+ if ($1 == "pushw" || match($2, /^%([abcd]x|di|si|bp|sp)$/))
adjust_sp_offset(2)
else
adjust_sp_offset(4)
}
-/popl?/ {
- if (match($0, / %(ax|bx|cx|dx|di|si|bp|sp)/))
+
+/^pop[wl]? / {
+ if ($1 == "popw" || match($2, /^%([abcd]x|di|si|bp|sp)$/))
adjust_sp_offset(-2)
else
adjust_sp_offset(-4)
}
-/addl? \$-?(0x[0-9a-fA-F]+|[0-9]+),%esp/ { adjust_sp_offset(-get_const1()) }
-/subl? \$-?(0x[0-9a-fA-F]+|[0-9]+),%esp/ { adjust_sp_offset(get_const1()) }
-/call/ {
- if (match($0, /call [0-9]+f/)) # "forward" label
- called = substr($0, RSTART+5, RLENGTH-6)
- else if (match($0, /call [0-9a-zA-Z_]+/))
- called = substr($0, RSTART+5, RLENGTH-5)
+/^pushal?$/ {
+ adjust_sp_offset(32)
+ if (in_function) {
+ save_reg("eax", 28)
+ save_reg("ecx", 24)
+ save_reg("edx", 20)
+ save_reg("ebx", 16)
+ save_reg("esp", 12)
+ save_reg("ebp", 8)
+ save_reg("esi", 4)
+ save_reg("edi", 0)
+ }
+}
+
+/^pushaw$/ {
+ adjust_sp_offset(16)
+}
+
+/^popal?$/ {
+ adjust_sp_offset(-32)
+}
+
+/^popaw$/ {
+ adjust_sp_offset(-16)
+}
+
+/^pushfl?$/ {
+ adjust_sp_offset(4)
+}
+
+/^pushfw$/ {
+ adjust_sp_offset(2)
+}
+
+/^popfl?$/ {
+ adjust_sp_offset(-4)
+}
+
+/^popfw$/ {
+ adjust_sp_offset(-2)
+}
+
+/^addl? \$[+-]?(0x[0-9a-fA-F]+|[0-9]+),%esp/ {
+ adjust_sp_offset(-get_const1())
+}
+
+/^subl? \$[+-]?(0x[0-9a-fA-F]+|[0-9]+),%esp/ {
+ adjust_sp_offset(get_const1())
+}
+
+/^call / {
+ if (match($2, /^[0-9]+f$/)) # "forward" label
+ called[substr($2, 1, RLENGTH - 1)] = 1
+ else if (match($2, /^[a-zA-Z_][0-9a-zA-Z_]*$/))
+ called[$2] = 1
+}
+
+/^j/ {
+ jump_to_label($2)
}
# TRACKING REGISTER VALUES FROM THE PREVIOUS STACK FRAME
#
-/pushl? %e(ax|bx|cx|dx|si|di|bp)/ { # don't match "push (%reg)"
- # if a register is being pushed, and its value has not changed since the
+/^pushl? %e([abcd]x|si|di|bp)$/ {
+ # Don't match "push (%reg)"
+ # If a register is being pushed, and its value has not changed since the
# beginning of this function, the pushed value can be used when printing
- # local variables at the next level up the stack
- # emit '.cfi_rel_offset' for that
+ # local variables at the next level up the stack.
+ # Emit '.cfi_rel_offset' for that.
- if (in_function) {
- register = get_reg()
- if (!saved[register] && !dirty[register]) {
- printf ".cfi_rel_offset %s,0\n", register
- saved[register] = 1
- }
- }
+ if (in_function)
+ save_reg(get_reg(), 0)
}
-/movl? %e(ax|bx|cx|dx|si|di|bp),-?(0x[0-9a-fA-F]+|[0-9]+)?\(%esp\)/ {
+/^movl? %e(ax|bx|cx|dx|si|di|bp),[+-]?(0x[0-9a-fA-F]+|[0-9]+)?\(%esp\)$/ {
if (in_function) {
- register = get_reg()
- if (match($0, /-?(0x[0-9a-fA-F]+|[0-9]+)\(%esp\)/)) {
- offset = parse_const(substr($0, RSTART, RLENGTH-6))
+ if (match($2, /,[+-]?(0x[0-9a-fA-F]+|[0-9]+)\(%esp\)$/)) {
+ offset = parse_const(substr($2, RSTART + 1, RLENGTH - 7))
} else {
offset = 0
}
- if (!saved[register] && !dirty[register]) {
- printf ".cfi_rel_offset %s,%d\n", register, offset
- saved[register] = 1
- }
+ save_reg(get_reg1(), offset)
}
}
# IF REGISTER VALUES ARE UNCEREMONIOUSLY TRASHED
# ...then we want to know about it.
#
-function trashed(register) {
- if (in_function && !saved[register] && !dirty[register]) {
- printf ".cfi_undefined %s\n", register
+function trashed(reg) {
+ if (in_function && !(reg in saved) && !(reg in dirty)) {
+ printf ".cfi_undefined %s\n", reg
+ dirty[reg] = 1
}
- dirty[register] = 1
}
-# this does NOT exhaustively check for all possible instructions which could
-# overwrite a register value inherited from the caller (just the common ones)
-/mov.*,%e?([abcd][hlx]|si|di|bp)$/ { trashed(get_reg2()) }
-/(add|addl|sub|subl|and|or|xor|lea|sal|sar|shl|shr).*,%e?([abcd][hlx]|si|di|bp)$/ {
+# This does NOT exhaustively check for all possible instructions which could
+# overwrite a register value inherited from the caller (just the common ones).
+/^mov.*,%e?([abcd][hlx]|si|di|bp)$/ {
trashed(get_reg2())
}
-/^i?mul [^,]*$/ { trashed("eax"); trashed("edx") }
-/^i?mul.*,%e?([abcd][hlx]|si|di|bp)$/ { trashed(get_reg2()) }
-/^i?div/ { trashed("eax"); trashed("edx") }
-/(dec|inc|not|neg|pop) %e?([abcd][hlx]|si|di|bp)/ { trashed(get_reg()) }
-/cpuid/ { trashed("eax"); trashed("ebx"); trashed("ecx"); trashed("edx") }
+/^(add|sub|and|x?or|lea|s[ah][lr])[bwl]? [^,]+,%e?([abcd][hlx]|si|di|bp)$/ {
+ trashed(get_reg2())
+}
+/^i?mul[bwl] [^,]+$/ {
+ trashed("eax")
+ trashed("edx")
+}
+/^i?mul[bwl]? [^,]+,%e?([abcd][hlx]|si|di|bp)$/ {
+ trashed(get_reg2())
+}
+/^i?div / {
+ trashed("eax")
+ trashed("edx")
+}
+/^(dec|inc|not|neg|pop)[bwl]? %e?([abcd][hlx]|si|di|bp)$/ {
+ trashed(get_reg())
+}
+/^cpuid/ {
+ trashed("eax")
+ trashed("ebx")
+ trashed("ecx")
+ trashed("edx")
+}
END {
if (in_function)
diff --git a/tools/add-cfi.x86_64.awk b/tools/add-cfi.x86_64.awk
index 7e1513d6..f484b7ab 100644
--- a/tools/add-cfi.x86_64.awk
+++ b/tools/add-cfi.x86_64.awk
@@ -1,169 +1,246 @@
-# Insert GAS CFI directives ("control frame information") into x86-64 asm input
+# Insert GAS CFI directives ("control frame information") into x86-64 asm input.
BEGIN {
- # don't put CFI data in the .eh_frame ELF section (which we don't keep)
+ # Don't put CFI data in the .eh_frame ELF section (which we don't keep).
print ".cfi_sections .debug_frame"
- # only emit CFI directives inside a function
- in_function = 0
+ # Only emit CFI directives inside a function.
+ in_function = ""
- # emit .loc directives with line numbers from original source
+ # Emit .loc directives with line numbers from original source.
printf ".file 1 \"%s\"\n", ARGV[1]
line_number = 0
- # used to detect "call label; label:" trick
- called = ""
+ re_label = "([0-9]+|[a-zA-Z_][a-zA-Z0-9_]*)"
+
+ for (i = 1; i <= 4; ++i) {
+ letter = substr("abcd", i, 1)
+ regname[letter "l"] = regname[letter "h"] = regname[letter "x"] = \
+ regname["e" letter "x"] = regname["r" letter "x"] = "r" letter "x"
+ }
+
+ regname["si"] = regname["esi"] = regname["rsi"] = "rsi"
+ regname["di"] = regname["edi"] = regname["rdi"] = "rdi"
+ regname["bp"] = regname["ebp"] = regname["rbp"] = "rbp"
+ regname["sp"] = regname["esp"] = regname["rsp"] = "rsp"
+
+ for (i = 8; i <= 15; ++i)
+ regname["r" i] = "r" i
}
+# For instructions with 2 operands, get 1st operand (assuming it is constant).
function get_const1() {
- # for instructions with 2 operands, get 1st operand (assuming it is constant)
- match($0, /-?(0x[0-9a-fA-F]+|[0-9]+),/)
- return parse_const(substr($0, RSTART, RLENGTH-1))
+ match($2, /^\$[+-]?(0x[0-9a-fA-F]+|[0-9]+),/)
+ return parse_const(substr($2, 2, RLENGTH - 2))
}
-function canonicalize_reg(register) {
- if (match(register, /^r/))
- return register
- else if (match(register, /^e/))
- return "r" substr(register, 2, length(register)-1)
- else if (match(register, /[hl]$/)) # AH, AL, BH, BL, etc
- return "r" substr(register, 1, 1) "x"
- else # AX, BX, CX, etc
- return "r" register
-}
+# Only use if you already know there is 1 and only 1 register.
function get_reg() {
- # only use if you already know there is 1 and only 1 register
- match($0, /%[er]?([abcd][xlh]|si|di|bp|8|9|10|11|12|13|14|15)/)
- return canonicalize_reg(substr($0, RSTART+1, RLENGTH-1))
+ return regname[substr($2, 2, length($2) - 1)]
}
+
+# For instructions with 2 operands, get 1st operand (assuming it is register).
function get_reg1() {
- # for instructions with 2 operands, get 1st operand (assuming it is register)
- match($0, /%[er]?([abcd][xlh]|si|di|bp|8|9|10|11|12|13|14|15),/)
- return canonicalize_reg(substr($0, RSTART+1, RLENGTH-2))
+ match($2, /^%[er]?([abcd][xlh]|si|di|bp|[89]|1[0-5]),/)
+ return regname[substr($2, 2, RLENGTH - 2)]
}
+
+# For instructions with 2 operands, get 2nd operand (assuming it is register).
function get_reg2() {
- # for instructions with 2 operands, get 2nd operand (assuming it is register)
- match($0, /,%[er]?([abcd][xlh]|si|di|bp|8|9|10|11|12|13|14|15)/)
- return canonicalize_reg(substr($0, RSTART+2, RLENGTH-2))
+ match($2, /,%[er]?([abcd][xlh]|si|di|bp|[89]|1[0-5])$/)
+ return regname[substr($2, RSTART + 2, RLENGTH - 2)]
}
+# Helper to adjust CFA offset.
function adjust_sp_offset(delta) {
- if (in_function)
+ if (in_function) {
printf ".cfi_adjust_cfa_offset %d\n", delta
+ cfa_offset[in_function] += delta
+ }
+}
+
+# Helper to process jumps to labels by saving the current CFA offset.
+function jump_to_label(label) {
+ if (in_function) {
+ if (match(label, /^[0-9]+f$/)) # "forward" label
+ cfa_offset[substr(label, 1, RLENGTH - 1)] = cfa_offset[in_function]
+ else if (match(label, /^[a-zA-Z_][a-zA-Z0-9_]*$/))
+ cfa_offset[label] = cfa_offset[in_function]
+ }
}
{
- line_number = line_number + 1
+ ++line_number
- # clean the input up before doing anything else
- # delete comments
- gsub(/(#|\/\/).*/, "")
+ # Clean the input up before doing anything else.
+ # Delete comments.
+ gsub(/#.*|\/\*.*\*\//, "")
- # canonicalize whitespace
- gsub(/[ \t]+/, " ") # mawk doesn't understand \s
+ # Canonicalize whitespace.
+ gsub(/[ \t]+/, " ") # Mawk doesn't understand \s.
gsub(/ *, */, ",")
gsub(/ *: */, ": ")
gsub(/ $/, "")
gsub(/^ /, "")
}
-# check for assembler directives which we care about
+# Check for assembler directives which we care about.
/^\.(section|data|text)/ {
- # a .cfi_startproc/.cfi_endproc pair should be within the same section
- # otherwise, clang will choke when generating ELF output
+ # A .cfi_startproc/.cfi_endproc pair should be within the same section.
+ # Otherwise, clang will choke when generating ELF output.
if (in_function) {
print ".cfi_endproc"
- in_function = 0
+ in_function = ""
}
}
-/^\.type [a-zA-Z0-9_]+,@function/ {
- functions[substr($2, 1, length($2)-10)] = 1
+
+# Record each function name.
+/^\.type [a-zA-Z0-9_]+( STT_FUNCTION|,[#@%"]function)/ {
+ functions[substr($2, 1, length($2) - 10)] = 1
}
-# not interested in assembler directives beyond this, just pass them through
+# Not interested in assembler directives beyond this, just pass them through.
/^\./ {
print
next
}
-/^[a-zA-Z0-9_]+:/ {
- label = substr($1, 1, length($1)-1) # drop trailing :
+$0 ~ "^" re_label ":" {
+ # Parse each leading label.
+ while (match($0, "^" re_label ":")) {
- if (called == label) {
- # note adjustment of stack pointer from "call label; label:"
- adjust_sp_offset(8)
- }
+ # Extract label name.
+ label = substr($1, 1, RLENGTH - 1)
- if (functions[label]) {
- if (in_function)
- print ".cfi_endproc"
+ # Remove label from current line.
+ sub("^" re_label ": ?", "")
- in_function = 1
- print ".cfi_startproc"
+ if (label in functions) {
+ if (in_function) {
+ print ".cfi_endproc"
+ for (l in called)
+ delete called[l]
+ }
- for (register in saved)
- delete saved[register]
- for (register in dirty)
- delete dirty[register]
- }
+ in_function = label
+ print ".cfi_startproc"
- # an instruction may follow on the same line, so continue processing
+ for (reg in saved)
+ delete saved[reg]
+ for (reg in dirty)
+ delete dirty[reg]
+ }
+
+ printf "%s:\n", label
+
+ # If this label has been jumped to, define the CFA offset to its
+ # value at the location of the jump.
+ if (!(label in functions) && in_function && label in cfa_offset) {
+ if (cfa_offset[in_function] != cfa_offset[label]) {
+ printf ".cfi_def_cfa_offset %d\n", cfa_offset[label]
+ cfa_offset[in_function] = cfa_offset[label]
+ }
+ delete cfa_offset[label]
+ }
+
+ # If this label has been called, adjust CFA offset.
+ if (label in called && !(label in functions)) {
+ adjust_sp_offset(8);
+ delete called[label]
+ }
+ }
+ # An instruction may follow on the same line, so continue processing.
}
+# Skip empty line.
/^$/ { next }
+# Issue source line number.
{
- called = ""
printf ".loc 1 %d\n", line_number
print
}
# KEEPING UP WITH THE STACK POINTER
-# %rsp should only be adjusted by pushing/popping or adding/subtracting constants
+# %rsp should only be adjusted by pushing/popping or adding/subtracting
+# constants.
#
-/pushl?/ {
+/^push[wq]? / {
+ if ($1 == "pushw" || match($2, /^%([abcd]x|di|si|bp|sp)$/))
+ adjust_sp_offset(2)
+ else
+ adjust_sp_offset(8)
+}
+
+/^pop[wq]? / {
+ if ($1 == "popw" || match($2, /^%([abcd]x|di|si|bp|sp)$/))
+ adjust_sp_offset(-2)
+ else
+ adjust_sp_offset(-8)
+}
+
+/^pushfq?$/ {
adjust_sp_offset(8)
}
-/popl?/ {
+
+/^pushfw$/ {
+ adjust_sp_offset(2)
+}
+
+/^popfq?$/ {
adjust_sp_offset(-8)
}
-/addl? \$-?(0x[0-9a-fA-F]+|[0-9]+),%rsp/ { adjust_sp_offset(-get_const1()) }
-/subl? \$-?(0x[0-9a-fA-F]+|[0-9]+),%rsp/ { adjust_sp_offset(get_const1()) }
-/call/ {
- if (match($0, /call [0-9]+f/)) # "forward" label
- called = substr($0, RSTART+5, RLENGTH-6)
- else if (match($0, /call [0-9a-zA-Z_]+/))
- called = substr($0, RSTART+5, RLENGTH-5)
+/^popfw$/ {
+ adjust_sp_offset(-2)
+}
+
+/^addq? \$[+-]?(0x[0-9a-fA-F]+|[0-9]+),%rsp$/ {
+ adjust_sp_offset(-get_const1())
+}
+/^subq? \$[+-]?(0x[0-9a-fA-F]+|[0-9]+),%rsp$/ {
+ adjust_sp_offset(get_const1())
+}
+
+/^call / {
+ if (match($2, /^[0-9]+f$/)) # "forward" label
+ called[substr($2, 1, RLENGTH - 1)] = 1
+ else if (match($2, /^[a-zA-Z_][0-9a-zA-Z_]*$/))
+ called[$2] = 1
+}
+
+/^j/ {
+ jump_to_label($2)
}
# TRACKING REGISTER VALUES FROM THE PREVIOUS STACK FRAME
#
-/pushl? %r(ax|bx|cx|dx|si|di|bp|8|9|10|11|12|13|14|15)/ { # don't match "push (%reg)"
- # if a register is being pushed, and its value has not changed since the
+/^pushq? %r([abcd]x|si|di|bp|[89]|1[0-5])$/ {
+ # Don't match "push (%reg)".
+ # If a register is being pushed, and its value has not changed since the
# beginning of this function, the pushed value can be used when printing
- # local variables at the next level up the stack
- # emit '.cfi_rel_offset' for that
+ # local variables at the next level up the stack.
+ # Emit '.cfi_rel_offset' for that.
if (in_function) {
- register = get_reg()
- if (!saved[register] && !dirty[register]) {
- printf ".cfi_rel_offset %s,0\n", register
- saved[register] = 1
+ reg = get_reg()
+ if (!(reg in saved) && !(reg in dirty)) {
+ printf ".cfi_rel_offset %s,0\n", reg
+ saved[reg] = 1
}
}
}
-/movl? %r(ax|bx|cx|dx|si|di|bp|8|9|10|11|12|13|14|15),-?(0x[0-9a-fA-F]+|[0-9]+)?\(%rsp\)/ {
+/^movq? %r([abcd]x|si|di|bp|[89]|1[0-5]),[+-]?(0x[0-9a-fA-F]+|[0-9]+)?\(%rsp\)$/ {
if (in_function) {
- register = get_reg()
- if (match($0, /-?(0x[0-9a-fA-F]+|[0-9]+)\(%rsp\)/)) {
- offset = parse_const(substr($0, RSTART, RLENGTH-6))
+ if (match($2, /,[+-]?(0x[0-9a-fA-F]+|[0-9]+)\(%rsp\)$/)) {
+ offset = parse_const(substr($2, RSTART, RLENGTH - 7))
} else {
offset = 0
}
- if (!saved[register] && !dirty[register]) {
- printf ".cfi_rel_offset %s,%d\n", register, offset
- saved[register] = 1
+ reg = get_reg1()
+ if (!(reg in saved) && !(reg in dirty)) {
+ printf ".cfi_rel_offset %s,%d\n", reg, offset
+ saved[reg] = 1
}
}
}
@@ -171,24 +248,41 @@ function adjust_sp_offset(delta) {
# IF REGISTER VALUES ARE UNCEREMONIOUSLY TRASHED
# ...then we want to know about it.
#
-function trashed(register) {
- if (in_function && !saved[register] && !dirty[register]) {
- printf ".cfi_undefined %s\n", register
+function trashed(reg) {
+ if (in_function && !(reg in saved) && !(reg in dirty)) {
+ printf ".cfi_undefined %s\n", reg
}
- dirty[register] = 1
+ dirty[reg] = 1
+}
+# This does NOT exhaustively check for all possible instructions which could
+# overwrite a register value inherited from the caller (just the common ones).
+/^mov.*,%[er]?([abcd][xlh]|si|di|bp|[89]|1[0-5])$/ {
+ trashed(get_reg2())
}
-# this does NOT exhaustively check for all possible instructions which could
-# overwrite a register value inherited from the caller (just the common ones)
-/mov.*,%[er]?([abcd][xlh]|si|di|bp|8|9|10|11|12|13|14|15)$/ { trashed(get_reg2()) }
-/(add|addl|sub|subl|and|or|xor|lea|sal|sar|shl|shr).*,%[er]?([abcd][xlh]|si|di|bp|8|9|10|11|12|13|14|15)$/ {
+/^(add|sub|and|x?or|lea|s[ah][lr])[bwlq]? [^,]+,%[er]?([abcd][xlh]|si|di|bp|[89]|1[0-5])$/ {
trashed(get_reg2())
}
-/^i?mul [^,]*$/ { trashed("rax"); trashed("rdx") }
-/^i?mul.*,%[er]?([abcd][xlh]|si|di|bp|8|9|10|11|12|13|14|15)$/ { trashed(get_reg2()) }
-/^i?div/ { trashed("rax"); trashed("rdx") }
+/^i?mul[bwlq]? [^,]+$/ {
+ trashed("rax")
+ trashed("rdx")
+}
+/^i?mul[bwlq] [^,]+,%[er]?([abcd][xlh]|si|di|bp|[89]|1[0-5])$/ {
+ trashed(get_reg2())
+}
+/^i?div[bwlq]? / {
+ trashed("rax")
+ trashed("rdx")
+}
-/(dec|inc|not|neg|pop) %[er]?([abcd][xlh]|si|di|bp|8|9|10|11|12|13|14|15)/ { trashed(get_reg()) }
-/cpuid/ { trashed("rax"); trashed("rbx"); trashed("rcx"); trashed("rdx") }
+/^(dec|inc|not|neg|pop)[bwlq]? %[er]?([abcd][xlh]|si|di|bp|[89]|1[0-5])$/ {
+ trashed(get_reg())
+}
+/^cpuid$/ {
+ trashed("rax")
+ trashed("rbx")
+ trashed("rcx")
+ trashed("rdx")
+}
END {
if (in_function)
--
2.45.2
Powered by blists - more mailing lists
Confused about mailing lists and their use? Read about mailing lists on Wikipedia and check out these guidelines on proper formatting of your messages.