From: Peter Hessler Subject: devel/dyncall fix assembly for armv7 To: ports@openbsd.org Date: Tue, 6 Aug 2024 21:56:35 +0200 Bring in two patches from upstream, fixes build for me on armv7. OK? https://dyncall.org/pub/dyncall/dyncall/raw-rev/351bb41d3bb1 - removed %-prefixes for register names from arm assembly files (was wrong to begin with, gas accepted them but the clang integrated assembler does not) https://dyncall.org/pub/dyncall/dyncall/raw-rev/7364f285cac8 - use UAL syntax for some arm instructions for armhf targets, as clang's integrated as disallows pre-UAL syntax devel/dyncall Index: patches/patch-dyncall_dyncall_call_arm32_arm_S =================================================================== RCS file: patches/patch-dyncall_dyncall_call_arm32_arm_S diff -N patches/patch-dyncall_dyncall_call_arm32_arm_S --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ patches/patch-dyncall_dyncall_call_arm32_arm_S 6 Aug 2024 19:53:37 -0000 @@ -0,0 +1,12 @@ +Index: dyncall/dyncall_call_arm32_arm.S +--- dyncall/dyncall_call_arm32_arm.S.orig ++++ dyncall/dyncall_call_arm32_arm.S +@@ -71,7 +71,7 @@ pushArgs: + bne pushArgs + + call: +- /* 'blx %r4' workaround for ARMv4t: */ ++ /* 'blx r4' workaround for ARMv4t: */ + mov r14, r15 /* Branch return address(r15) -> link register (r14) -- r15 always points to address of current + 2 instructions (= Epilog code). */ + bx r4 /* Call (ARM/THUMB), available for ARMv4t. */ + Index: patches/patch-dyncall_dyncall_call_arm32_arm_armhf_S =================================================================== RCS file: patches/patch-dyncall_dyncall_call_arm32_arm_armhf_S diff -N patches/patch-dyncall_dyncall_call_arm32_arm_armhf_S --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ patches/patch-dyncall_dyncall_call_arm32_arm_armhf_S 6 Aug 2024 19:53:37 -0000 @@ -0,0 +1,21 @@ +Index: dyncall/dyncall_call_arm32_arm_armhf.S +--- dyncall/dyncall_call_arm32_arm_armhf.S.orig ++++ dyncall/dyncall_call_arm32_arm_armhf.S +@@ -59,7 +59,7 @@ ENTRY_C(dcCall_arm32_armhf) + add r5, r1, #16 /* r5 = stack args (after intreg ones) */ + + /* Load 16 single-precision registers (= 8 double-precision registers). */ +- fldmiad r3, {d0-d7} ++ vldmia r3, {d0-d7} + + /* prep stack parameter area (includes room for spill area, callee spills if needed) */ + sub r13, r13, r2 +@@ -77,7 +77,7 @@ armhf_pushArgs: + + armhf_call: + ldmia r1, {r0-r3} /* Load first 4 arguments for new call into r0-r3. */ +- /* 'blx %r4' workaround for ARMv4t: */ ++ /* 'blx r4' workaround for ARMv4t: */ + mov r14, r15 /* Branch return address(r15) -> link register (r14) -- r15 always points to address of current + 2 instructions (= Epilog code). */ + bx r4 /* Call (ARM/THUMB), available for ARMv4t. */ + Index: patches/patch-dyncall_dyncall_call_arm32_arm_armhf_S.orig =================================================================== RCS file: patches/patch-dyncall_dyncall_call_arm32_arm_armhf_S.orig diff -N patches/patch-dyncall_dyncall_call_arm32_arm_armhf_S.orig --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ patches/patch-dyncall_dyncall_call_arm32_arm_armhf_S.orig 6 Aug 2024 19:53:37 -0000 @@ -0,0 +1,12 @@ +Index: dyncall/dyncall_call_arm32_arm_armhf.S +--- dyncall/dyncall_call_arm32_arm_armhf.S.orig ++++ dyncall/dyncall_call_arm32_arm_armhf.S +@@ -77,7 +77,7 @@ armhf_pushArgs: + + armhf_call: + ldmia r1, {r0-r3} /* Load first 4 arguments for new call into r0-r3. */ +- /* 'blx %r4' workaround for ARMv4t: */ ++ /* 'blx r4' workaround for ARMv4t: */ + mov r14, r15 /* Branch return address(r15) -> link register (r14) -- r15 always points to address of current + 2 instructions (= Epilog code). */ + bx r4 /* Call (ARM/THUMB), available for ARMv4t. */ + Index: patches/patch-dyncall_dyncall_call_arm32_thumb_apple_s =================================================================== RCS file: patches/patch-dyncall_dyncall_call_arm32_thumb_apple_s diff -N patches/patch-dyncall_dyncall_call_arm32_thumb_apple_s --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ patches/patch-dyncall_dyncall_call_arm32_thumb_apple_s 6 Aug 2024 19:53:37 -0000 @@ -0,0 +1,12 @@ +Index: dyncall/dyncall_call_arm32_thumb_apple.s +--- dyncall/dyncall_call_arm32_thumb_apple.s.orig ++++ dyncall/dyncall_call_arm32_thumb_apple.s +@@ -71,7 +71,7 @@ pushArgs: + call: + ldmia r5!, {r0-r3} /* Load first 4 arguments for new call into r0-r3. */ + +- /* 'blx %r4' workaround for ARMv4t in THUMB: */ ++ /* 'blx r4' workaround for ARMv4t in THUMB: */ + blx r4 /* Branch and force THUMB-mode return (LR bit 0 set). */ + + /* Epilog. */ Index: patches/patch-dyncall_dyncall_call_arm32_thumb_armhf_S =================================================================== RCS file: patches/patch-dyncall_dyncall_call_arm32_thumb_armhf_S diff -N patches/patch-dyncall_dyncall_call_arm32_thumb_armhf_S --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ patches/patch-dyncall_dyncall_call_arm32_thumb_armhf_S 6 Aug 2024 19:53:37 -0000 @@ -0,0 +1,21 @@ +Index: dyncall/dyncall_call_arm32_thumb_armhf.S +--- dyncall/dyncall_call_arm32_thumb_armhf.S.orig ++++ dyncall/dyncall_call_arm32_thumb_armhf.S +@@ -68,7 +68,7 @@ ENTRY_C(dcCall_arm32_armhf) + mov r5 , r1 /* r5 = 'args' (2nd argument is passed in r1). */ + + /* Load 16 single-precision registers (= 8 double-precision registers). */ +- fldmiad r3, {d0-d7} ++ vldmia r3, {d0-d7} + + sub r2 , #16 + cmp r2, #0 +@@ -99,7 +99,7 @@ armhf_pushArgs: + + armhf_call: + ldmia r5!, {r0-r3} /* Load first 4 arguments for new call into r0-r3. */ +- /* 'blx %r4' workaround for ARMv4t: */ ++ /* 'blx r4' workaround for ARMv4t: */ + // mov r14, r15 /* Branch return address(r15) -> link register (r14) -- r15 always points to address of current + 2 instructions (= Epilog code). */ + mov r6, r15 + add r6, #5 Index: patches/patch-dyncall_dyncall_call_arm32_thumb_armhf_S.orig =================================================================== RCS file: patches/patch-dyncall_dyncall_call_arm32_thumb_armhf_S.orig diff -N patches/patch-dyncall_dyncall_call_arm32_thumb_armhf_S.orig --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ patches/patch-dyncall_dyncall_call_arm32_thumb_armhf_S.orig 6 Aug 2024 19:53:37 -0000 @@ -0,0 +1,12 @@ +Index: dyncall/dyncall_call_arm32_thumb_armhf.S +--- dyncall/dyncall_call_arm32_thumb_armhf.S.orig ++++ dyncall/dyncall_call_arm32_thumb_armhf.S +@@ -99,7 +99,7 @@ armhf_pushArgs: + + armhf_call: + ldmia r5!, {r0-r3} /* Load first 4 arguments for new call into r0-r3. */ +- /* 'blx %r4' workaround for ARMv4t: */ ++ /* 'blx r4' workaround for ARMv4t: */ + // mov r14, r15 /* Branch return address(r15) -> link register (r14) -- r15 always points to address of current + 2 instructions (= Epilog code). */ + mov r6, r15 + add r6, #5 Index: patches/patch-dyncall_dyncall_call_arm32_thumb_gas_s =================================================================== RCS file: patches/patch-dyncall_dyncall_call_arm32_thumb_gas_s diff -N patches/patch-dyncall_dyncall_call_arm32_thumb_gas_s --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ patches/patch-dyncall_dyncall_call_arm32_thumb_gas_s 6 Aug 2024 19:53:37 -0000 @@ -0,0 +1,86 @@ +Index: dyncall/dyncall_call_arm32_thumb_gas.s +--- dyncall/dyncall_call_arm32_thumb_gas.s.orig ++++ dyncall/dyncall_call_arm32_thumb_gas.s +@@ -6,7 +6,7 @@ + Description: ARM Thumb call kernel implementation for GNU assembler. + License: + +- Copyright (c) 2007-2011 Daniel Adler , ++ Copyright (c) 2007-2021 Daniel Adler , + Tassilo Philipp + + Permission to use, copy, modify, and distribute this software for any +@@ -37,45 +37,45 @@ dcCall_arm32_thumb: + + /* Prolog. This function never needs to spill inside its prolog, so just store the permanent registers. */ + /* Code below is not using high registers, so not storing them in prolog, which is more involved with thumb, anyways. */ +- push {%r4-%r7, %r14} /* Frame ptr, permanent registers, link register -> save area on stack. */ +- mov %r7, %r13 /* Set frame ptr. */ +- sub %r13, #4 /* Realign stack to 8 bytes (b/c we stored 5 regs = 20b). */ ++ push {r4-r7, r14} /* Frame ptr, permanent registers, link register -> save area on stack. */ ++ mov r7, r13 /* Set frame ptr. */ ++ sub r13, #4 /* Realign stack to 8 bytes (b/c we stored 5 regs = 20b). */ + + /* Call. */ +- mov %r4, %r0 /* Move 'fptr' to r4 (1st argument is passed in r0). */ +- mov %r5, %r1 /* Move 'args' to r5 (2nd argument is passed in r1). */ +- mov %r6, %r2 /* Move 'size' to r6 (3rd argument is passed in r2). */ ++ mov r4, r0 /* Move 'fptr' to r4 (1st argument is passed in r0). */ ++ mov r5, r1 /* Move 'args' to r5 (2nd argument is passed in r1). */ ++ mov r6, r2 /* Move 'size' to r6 (3rd argument is passed in r2). */ + +- cmp %r6, #16 /* Jump to call if no more than 4 arguments. */ ++ cmp r6, #16 /* Jump to call if no more than 4 arguments. */ + ble call + +- sub %r6, #16 /* Size of remaining arguments. */ +- mov %r0, %r13 /* Set stack pointer to top of stack. */ +- sub %r0, %r0, %r6 +- lsr %r0, #3 /* Align stack on 8 byte boundaries. */ +- lsl %r0, #3 +- mov %r13, %r0 ++ sub r6, #16 /* Size of remaining arguments. */ ++ mov r0, r13 /* Set stack pointer to top of stack. */ ++ sub r0, r0, r6 ++ lsr r0, #3 /* Align stack on 8 byte boundaries. */ ++ lsl r0, #3 ++ mov r13, r0 + +- add %r1, #16 /* Let r1 point to remaining arguments. */ +- mov %r2, #0 /* Init byte counter to 0. */ ++ add r1, #16 /* Let r1 point to remaining arguments. */ ++ mov r2, #0 /* Init byte counter to 0. */ + .thumb_func + pushArgs: +- ldrb %r3, [%r1, %r2] /* Load a byte into r3. */ +- strb %r3, [%r0, %r2] /* Push byte onto stack. */ +- add %r2, %r2, #1 /* Increment byte counter. */ +- cmp %r2, %r6 ++ ldrb r3, [r1, r2] /* Load a byte into r3. */ ++ strb r3, [r0, r2] /* Push byte onto stack. */ ++ add r2, r2, #1 /* Increment byte counter. */ ++ cmp r2, r6 + bne pushArgs + .thumb_func + call: +- ldmia %r5!, {%r0-%r3} /* Load first 4 arguments for new call into r0-r3. */ +- +- /* 'blx %r4' workaround for ARMv4t in THUMB: */ +- mov %r6, %r15 /* Load PC+2 instructions from here */ +- add %r6, #5 /* Increment by 2 instructions (Address of 'Epilog') and set bit 0 (THUMB) */ +- mov %r14, %r6 /* Store in link register. */ +- bx %r4 /* Branch and force THUMB-mode return (LR bit 0 set). */ ++ ldmia r5!, {r0-r3} /* Load first 4 arguments for new call into r0-r3. */ + ++ /* 'blx r4' workaround for ARMv4t in THUMB: */ ++ mov r6, r15 /* Load PC+2 instructions from here */ ++ add r6, #5 /* Increment by 2 instructions (Address of 'Epilog') and set bit 0 (THUMB) */ ++ mov r14, r6 /* Store in link register. */ ++ bx r4 /* Branch and force THUMB-mode return (LR bit 0 set). */ ++ + /* Epilog. */ +- mov %r13, %r7 /* Reset stack ptr. */ +- pop {%r4-%r7, %r15} /* Restore permanent registers and program counter. (Force a stay in THUMB in ARMv4, whether ARMv5 can return in ARM or THUMB depending on the bit 0. */ ++ mov r13, r7 /* Reset stack ptr. */ ++ pop {r4-r7, r15} /* Restore permanent registers and program counter. (Force a stay in THUMB in ARMv4, whether ARMv5 can return in ARM or THUMB depending on the bit 0. */ + Index: patches/patch-dyncallback_dyncall_callback_arm32_arm_gas_S =================================================================== RCS file: patches/patch-dyncallback_dyncall_callback_arm32_arm_gas_S diff -N patches/patch-dyncallback_dyncall_callback_arm32_arm_gas_S --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ patches/patch-dyncallback_dyncall_callback_arm32_arm_gas_S 6 Aug 2024 19:53:37 -0000 @@ -0,0 +1,69 @@ +Index: dyncallback/dyncall_callback_arm32_arm_gas.S +--- dyncallback/dyncall_callback_arm32_arm_gas.S.orig ++++ dyncallback/dyncall_callback_arm32_arm_gas.S +@@ -6,7 +6,7 @@ + Description: Callback Thunk - Implementation for ARM32 (ARM mode) + License: + +- Copyright (c) 2007-2018 Daniel Adler , ++ Copyright (c) 2007-2021 Daniel Adler , + Tassilo Philipp + + Permission to use, copy, modify, and distribute this software for any +@@ -47,37 +47,37 @@ CTX_userdata = 16 + dcCallbackThunkEntry: + + /* Prolog. This function never needs to spill inside its prolog, so just store the permanent registers. */ +- stmdb %r13, {%r4-%r11, %r13, %r14} /* Permanent registers and stack pointer, etc... -> save area on stack (except counter). */ +- mov %r11, %r13 /* Set frame pointer. */ +- sub %r13, %r13, #40 /* Adjust stack pointer. */ ++ stmdb r13, {r4-r11, r13, r14} /* Permanent registers and stack pointer, etc... -> save area on stack (except counter). */ ++ mov r11, r13 /* Set frame pointer. */ ++ sub r13, r13, #40 /* Adjust stack pointer. */ + + /* Grab arguments. */ +- mov %r4, #0 ++ mov r4, #0 + #if defined(DC__ABI_ARM_HF) +- stmdb %r13!, {%r4} /* Init freg_count and dreg_count to 0 */ +- stmdb %r13!, {%r4} +- fstmdbd %r13!, {d0-d7} /* Store all fp-registers in DCArgs' f[16] */ ++ stmdb r13!, {r4} /* Init freg_count and dreg_count to 0 */ ++ stmdb r13!, {r4} ++ vstmdb r13!, {d0-d7} /* Store all fp-registers in DCArgs' f[16] */ + #endif +- stmdb %r13!, {%r0-%r4, %r11} /* Spill first 4 args to DCArgs, along with reg_count (init to 0) and (stack) pointer to remaining args. */ ++ stmdb r13!, {r0-r4, r11} /* Spill first 4 args to DCArgs, along with reg_count (init to 0) and (stack) pointer to remaining args. */ + + /* Prepare callback handler call. */ +- mov %r0, %r12 /* Parameter 0 (r0) = DCCallback pointer (r12, pointer to thunk). */ +- mov %r1, %r13 /* Parameter 1 (r1) = DCArgs pointer (r13, stack pointer). */ +- sub %r13, %r13, #DCValue_size /* Make room for return value. */ +- mov %r2, %r13 /* Parameter 2 (r2) = results pointer. */ +- ldr %r3, [%r12, #CTX_userdata] /* Parameter 3 (r3) = userdata pointer. */ ++ mov r0, r12 /* Parameter 0 (r0) = DCCallback pointer (r12, pointer to thunk). */ ++ mov r1, r13 /* Parameter 1 (r1) = DCArgs pointer (r13, stack pointer). */ ++ sub r13, r13, #DCValue_size /* Make room for return value. */ ++ mov r2, r13 /* Parameter 2 (r2) = results pointer. */ ++ ldr r3, [r12, #CTX_userdata] /* Parameter 3 (r3) = userdata pointer. */ + + /* Call. */ +- ldr %r4, [%r12, #CTX_handler] /* Load callback handler pointer into r4. */ +- mov %r14, %r15 /* Branch return address(r15) -> link register (r14) -- r15 always points to address of current + 2 instructions (= Epilog code). */ +- bx %r4 /* Call. */ ++ ldr r4, [r12, #CTX_handler] /* Load callback handler pointer into r4. */ ++ mov r14, r15 /* Branch return address(r15) -> link register (r14) -- r15 always points to address of current + 2 instructions (= Epilog code). */ ++ bx r4 /* Call. */ + + /* Return value. */ +- ldmia %r13, {%r0, %r1} /* Load return value in r0 and r1. */ ++ ldmia r13, {r0, r1} /* Load return value in r0 and r1. */ + #if defined(DC__ABI_ARM_HF) +- fldmiad %r13, {%d0} /* Same for floating point return value (if any). */ ++ vldmia r13, {d0} /* Same for floating point return value (if any). */ + #endif + + /* Epilog. */ +- ldmdb %r11, {%r4-%r11, %r13, %r15} /* Restore permanent registers (restore stack ptr and program counter).@@@db not needed since we rewrite r13? */ ++ ldmdb r11, {r4-r11, r13, r15} /* Restore permanent registers (restore stack ptr and program counter).@@@db not needed since we rewrite r13? */ + Index: patches/patch-dyncallback_dyncall_callback_arm32_arm_gas_S~ =================================================================== RCS file: patches/patch-dyncallback_dyncall_callback_arm32_arm_gas_S~ diff -N patches/patch-dyncallback_dyncall_callback_arm32_arm_gas_S~ --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ patches/patch-dyncallback_dyncall_callback_arm32_arm_gas_S~ 6 Aug 2024 19:53:37 -0000 @@ -0,0 +1,69 @@ +Index: dyncallback/dyncall_callback_arm32_arm_gas.S +--- dyncallback/dyncall_callback_arm32_arm_gas.S.orig ++++ dyncallback/dyncall_callback_arm32_arm_gas.S +@@ -6,7 +6,7 @@ + Description: Callback Thunk - Implementation for ARM32 (ARM mode) + License: + +- Copyright (c) 2007-2018 Daniel Adler , ++ Copyright (c) 2007-2021 Daniel Adler , + Tassilo Philipp + + Permission to use, copy, modify, and distribute this software for any +@@ -47,37 +47,37 @@ CTX_userdata = 16 + dcCallbackThunkEntry: + + /* Prolog. This function never needs to spill inside its prolog, so just store the permanent registers. */ +- stmdb %r13, {%r4-%r11, %r13, %r14} /* Permanent registers and stack pointer, etc... -> save area on stack (except counter). */ +- mov %r11, %r13 /* Set frame pointer. */ +- sub %r13, %r13, #40 /* Adjust stack pointer. */ ++ stmdb r13, {r4-r11, r13, r14} /* Permanent registers and stack pointer, etc... -> save area on stack (except counter). */ ++ mov r11, r13 /* Set frame pointer. */ ++ sub r13, r13, #40 /* Adjust stack pointer. */ + + /* Grab arguments. */ +- mov %r4, #0 ++ mov r4, #0 + #if defined(DC__ABI_ARM_HF) +- stmdb %r13!, {%r4} /* Init freg_count and dreg_count to 0 */ +- stmdb %r13!, {%r4} +- fstmdbd %r13!, {d0-d7} /* Store all fp-registers in DCArgs' f[16] */ ++ stmdb r13!, {r4} /* Init freg_count and dreg_count to 0 */ ++ stmdb r13!, {r4} ++ fstmdbd r13!, {d0-d7} /* Store all fp-registers in DCArgs' f[16] */ + #endif +- stmdb %r13!, {%r0-%r4, %r11} /* Spill first 4 args to DCArgs, along with reg_count (init to 0) and (stack) pointer to remaining args. */ ++ stmdb r13!, {r0-r4, r11} /* Spill first 4 args to DCArgs, along with reg_count (init to 0) and (stack) pointer to remaining args. */ + + /* Prepare callback handler call. */ +- mov %r0, %r12 /* Parameter 0 (r0) = DCCallback pointer (r12, pointer to thunk). */ +- mov %r1, %r13 /* Parameter 1 (r1) = DCArgs pointer (r13, stack pointer). */ +- sub %r13, %r13, #DCValue_size /* Make room for return value. */ +- mov %r2, %r13 /* Parameter 2 (r2) = results pointer. */ +- ldr %r3, [%r12, #CTX_userdata] /* Parameter 3 (r3) = userdata pointer. */ ++ mov r0, r12 /* Parameter 0 (r0) = DCCallback pointer (r12, pointer to thunk). */ ++ mov r1, r13 /* Parameter 1 (r1) = DCArgs pointer (r13, stack pointer). */ ++ sub r13, r13, #DCValue_size /* Make room for return value. */ ++ mov r2, r13 /* Parameter 2 (r2) = results pointer. */ ++ ldr r3, [r12, #CTX_userdata] /* Parameter 3 (r3) = userdata pointer. */ + + /* Call. */ +- ldr %r4, [%r12, #CTX_handler] /* Load callback handler pointer into r4. */ +- mov %r14, %r15 /* Branch return address(r15) -> link register (r14) -- r15 always points to address of current + 2 instructions (= Epilog code). */ +- bx %r4 /* Call. */ ++ ldr r4, [r12, #CTX_handler] /* Load callback handler pointer into r4. */ ++ mov r14, r15 /* Branch return address(r15) -> link register (r14) -- r15 always points to address of current + 2 instructions (= Epilog code). */ ++ bx r4 /* Call. */ + + /* Return value. */ +- ldmia %r13, {%r0, %r1} /* Load return value in r0 and r1. */ ++ ldmia r13, {r0, r1} /* Load return value in r0 and r1. */ + #if defined(DC__ABI_ARM_HF) +- fldmiad %r13, {%d0} /* Same for floating point return value (if any). */ ++ fldmiad r13, {d0} /* Same for floating point return value (if any). */ + #endif + + /* Epilog. */ +- ldmdb %r11, {%r4-%r11, %r13, %r15} /* Restore permanent registers (restore stack ptr and program counter).@@@db not needed since we rewrite r13? */ ++ ldmdb r11, {r4-r11, r13, r15} /* Restore permanent registers (restore stack ptr and program counter).@@@db not needed since we rewrite r13? */ + -- Minnie Mouse is a slow maze learner.