~ruther/qmk_firmware

7e844a4d55ee2e7e0b14a1c3a2aca3eebe259e8b — Stefan Kerkmann 2 years ago 996a900
[Core] RP2040: use built-in integer hardware divider and optimized i64 multiplication (#18464)

1 files changed, 31 insertions(+), 220 deletions(-)

M platforms/chibios/vendors/RP/RP2040.mk
M platforms/chibios/vendors/RP/RP2040.mk => platforms/chibios/vendors/RP/RP2040.mk +31 -220
@@ 66,223 66,34 @@ EXTRAINCDIRS += $(PLATFORM_RP2040_PATH)
# RP2040 optimized compiler intrinsics
##############################################################################

# Enables optimized Compiler intrinsics which are located in the RP2040
# bootrom. This needs startup code and linker script support from ChibiOS,
# which is WIP. Therefore disabled by default for now.
RP2040_INTRINSICS_ENABLED ?= no
ifeq ($(strip $(RP2040_INTRINSICS_ENABLED)), yes)
    PICOSDKINTRINSICSSRC =  $(PICOSDKROOT)/src/rp2_common/pico_float/float_aeabi.S \
                            $(PICOSDKROOT)/src/rp2_common/pico_float/float_math.c \
                            $(PICOSDKROOT)/src/rp2_common/pico_float/float_init_rom.c \
                            $(PICOSDKROOT)/src/rp2_common/pico_float/float_v1_rom_shim.S \
                            $(PICOSDKROOT)/src/rp2_common/pico_double/double_aeabi.S \
                            $(PICOSDKROOT)/src/rp2_common/pico_double/double_math.c \
                            $(PICOSDKROOT)/src/rp2_common/pico_double/double_init_rom.c \
                            $(PICOSDKROOT)/src/rp2_common/pico_double/double_v1_rom_shim.S \
                            $(PICOSDKROOT)/src/rp2_common/pico_divider/divider.S \
                            $(PICOSDKROOT)/src/rp2_common/pico_int64_ops/pico_int64_ops_aeabi.S \
                            $(PICOSDKROOT)/src/rp2_common/pico_mem_ops/mem_ops_aeabi.S \
                            $(PICOSDKROOT)/src/rp2_common/pico_malloc/pico_malloc.c \
                            $(PICOSDKROOT)/src/rp2_common/pico_bit_ops/bit_ops_aeabi.S

    PICOSDKINTRINSICSINC =  $(PICOSDKROOT)/src/common/pico_base/include \
                            $(PICOSDKROOT)/src/rp2_common/pico_platfrom/include \
                            $(PICOSDKROOT)/src/rp2_common/pico_bootrom/include \
                            $(PICOSDKROOT)/src/rp2_common/hardware_divider/include \
                            $(PICOSDKROOT)/src/rp2_common/pico_float/include \
                            $(PICOSDKROOT)/src/rp2_common/pico_double/include \
                            $(PICOSDKROOT)/src/rp2_common/pico_malloc/include

    OPT_DEFS += -DPICO_FLOAT_SUPPORT_ROM_V1=0 -DPICO_DOUBLE_SUPPORT_ROM_V1=0

    CFLAGS += -Wl,--defsym=__StackLimit=__heap_end__
    CFLAGS += -Wl,--defsym=__unhandled_user_irq=_unhandled_exception
    CFLAGS += -Wl,--build-id=none

    # single precision floating point intrinsics
    OPT_DEFS += -DPICO_FLOAT_IN_RAM=1
    OPT_DEFS += -DPICO_FLOAT_PROPAGATE_NANS=0

    CFLAGS += -Wl,--wrap=__aeabi_fdiv
    CFLAGS += -Wl,--wrap=__aeabi_fmul
    CFLAGS += -Wl,--wrap=__aeabi_frsub
    CFLAGS += -Wl,--wrap=__aeabi_fsub
    CFLAGS += -Wl,--wrap=__aeabi_cfcmpeq
    CFLAGS += -Wl,--wrap=__aeabi_cfrcmple
    CFLAGS += -Wl,--wrap=__aeabi_cfcmple
    CFLAGS += -Wl,--wrap=__aeabi_fcmpeq
    CFLAGS += -Wl,--wrap=__aeabi_fcmplt
    CFLAGS += -Wl,--wrap=__aeabi_fcmple
    CFLAGS += -Wl,--wrap=__aeabi_fcmpge
    CFLAGS += -Wl,--wrap=__aeabi_fcmpgt
    CFLAGS += -Wl,--wrap=__aeabi_fcmpun
    CFLAGS += -Wl,--wrap=__aeabi_i2f
    CFLAGS += -Wl,--wrap=__aeabi_l2f
    CFLAGS += -Wl,--wrap=__aeabi_ui2f
    CFLAGS += -Wl,--wrap=__aeabi_ul2f
    CFLAGS += -Wl,--wrap=__aeabi_i2f
    CFLAGS += -Wl,--wrap=__aeabi_f2iz
    CFLAGS += -Wl,--wrap=__aeabi_f2lz
    CFLAGS += -Wl,--wrap=__aeabi_f2uiz
    CFLAGS += -Wl,--wrap=__aeabi_f2ulz
    CFLAGS += -Wl,--wrap=__aeabi_f2d
    CFLAGS += -Wl,--wrap=sqrtf
    CFLAGS += -Wl,--wrap=cosf
    CFLAGS += -Wl,--wrap=sinf
    CFLAGS += -Wl,--wrap=tanf
    CFLAGS += -Wl,--wrap=atan2f
    CFLAGS += -Wl,--wrap=expf
    CFLAGS += -Wl,--wrap=logf
    CFLAGS += -Wl,--wrap=ldexpf
    CFLAGS += -Wl,--wrap=copysignf
    CFLAGS += -Wl,--wrap=truncf
    CFLAGS += -Wl,--wrap=floorf
    CFLAGS += -Wl,--wrap=ceilf
    CFLAGS += -Wl,--wrap=roundf
    CFLAGS += -Wl,--wrap=sincosf
    CFLAGS += -Wl,--wrap=asinf
    CFLAGS += -Wl,--wrap=acosf
    CFLAGS += -Wl,--wrap=atanf
    CFLAGS += -Wl,--wrap=sinhf
    CFLAGS += -Wl,--wrap=coshf
    CFLAGS += -Wl,--wrap=tanhf
    CFLAGS += -Wl,--wrap=asinhf
    CFLAGS += -Wl,--wrap=acoshf
    CFLAGS += -Wl,--wrap=atanhf
    CFLAGS += -Wl,--wrap=exp2f
    CFLAGS += -Wl,--wrap=log2f
    CFLAGS += -Wl,--wrap=exp10f
    CFLAGS += -Wl,--wrap=log10f
    CFLAGS += -Wl,--wrap=powf
    CFLAGS += -Wl,--wrap=powintf
    CFLAGS += -Wl,--wrap=hypotf
    CFLAGS += -Wl,--wrap=cbrtf
    CFLAGS += -Wl,--wrap=fmodf
    CFLAGS += -Wl,--wrap=dremf
    CFLAGS += -Wl,--wrap=remainderf
    CFLAGS += -Wl,--wrap=remquof
    CFLAGS += -Wl,--wrap=expm1f
    CFLAGS += -Wl,--wrap=log1pf
    CFLAGS += -Wl,--wrap=fmaf

    # double precision floating point intrinsics
    OPT_DEFS += -DPICO_DOUBLE_IN_RAM=1
    OPT_DEFS += -DPICO_DOUBLE_PROPAGATE_NANS=0

    CFLAGS += -Wl,--wrap=__aeabi_dadd
    CFLAGS += -Wl,--wrap=__aeabi_ddiv
    CFLAGS += -Wl,--wrap=__aeabi_dmul
    CFLAGS += -Wl,--wrap=__aeabi_drsub
    CFLAGS += -Wl,--wrap=__aeabi_dsub
    CFLAGS += -Wl,--wrap=__aeabi_cdcmpeq
    CFLAGS += -Wl,--wrap=__aeabi_cdrcmple
    CFLAGS += -Wl,--wrap=__aeabi_cdcmple
    CFLAGS += -Wl,--wrap=__aeabi_dcmpeq
    CFLAGS += -Wl,--wrap=__aeabi_dcmplt
    CFLAGS += -Wl,--wrap=__aeabi_dcmple
    CFLAGS += -Wl,--wrap=__aeabi_dcmpge
    CFLAGS += -Wl,--wrap=__aeabi_dcmpgt
    CFLAGS += -Wl,--wrap=__aeabi_dcmpun
    CFLAGS += -Wl,--wrap=__aeabi_i2d
    CFLAGS += -Wl,--wrap=__aeabi_l2d
    CFLAGS += -Wl,--wrap=__aeabi_ui2d
    CFLAGS += -Wl,--wrap=__aeabi_ul2d
    CFLAGS += -Wl,--wrap=__aeabi_d2iz
    CFLAGS += -Wl,--wrap=__aeabi_d2lz
    CFLAGS += -Wl,--wrap=__aeabi_d2uiz
    CFLAGS += -Wl,--wrap=__aeabi_d2ulz
    CFLAGS += -Wl,--wrap=__aeabi_d2f
    CFLAGS += -Wl,--wrap=sqrt
    CFLAGS += -Wl,--wrap=cos
    CFLAGS += -Wl,--wrap=sin
    CFLAGS += -Wl,--wrap=tan
    CFLAGS += -Wl,--wrap=atan2
    CFLAGS += -Wl,--wrap=exp
    CFLAGS += -Wl,--wrap=log
    CFLAGS += -Wl,--wrap=ldexp
    CFLAGS += -Wl,--wrap=copysign
    CFLAGS += -Wl,--wrap=trunc
    CFLAGS += -Wl,--wrap=floor
    CFLAGS += -Wl,--wrap=ceil
    CFLAGS += -Wl,--wrap=round
    CFLAGS += -Wl,--wrap=sincos
    CFLAGS += -Wl,--wrap=asin
    CFLAGS += -Wl,--wrap=acos
    CFLAGS += -Wl,--wrap=atan
    CFLAGS += -Wl,--wrap=sinh
    CFLAGS += -Wl,--wrap=cosh
    CFLAGS += -Wl,--wrap=tanh
    CFLAGS += -Wl,--wrap=asinh
    CFLAGS += -Wl,--wrap=acosh
    CFLAGS += -Wl,--wrap=atanh
    CFLAGS += -Wl,--wrap=exp2
    CFLAGS += -Wl,--wrap=log2
    CFLAGS += -Wl,--wrap=exp10
    CFLAGS += -Wl,--wrap=log10
    CFLAGS += -Wl,--wrap=pow
    CFLAGS += -Wl,--wrap=powint
    CFLAGS += -Wl,--wrap=hypot
    CFLAGS += -Wl,--wrap=cbrt
    CFLAGS += -Wl,--wrap=fmod
    CFLAGS += -Wl,--wrap=drem
    CFLAGS += -Wl,--wrap=remainder
    CFLAGS += -Wl,--wrap=remquo
    CFLAGS += -Wl,--wrap=expm1
    CFLAGS += -Wl,--wrap=log1p
    CFLAGS += -Wl,--wrap=fma

    # bit operation intrinsics
    OPT_DEFS += -DPICO_BITS_IN_RAM=1

    CFLAGS += -Wl,--wrap=__clzsi2
    CFLAGS += -Wl,--wrap=__clzsi2
    CFLAGS += -Wl,--wrap=__clzdi2
    CFLAGS += -Wl,--wrap=__ctzsi2
    CFLAGS += -Wl,--wrap=__ctzdi2
    CFLAGS += -Wl,--wrap=__popcountsi2
    CFLAGS += -Wl,--wrap=__popcountdi2
    CFLAGS += -Wl,--wrap=__clz
    CFLAGS += -Wl,--wrap=__clzl
    CFLAGS += -Wl,--wrap=__clzsi2
    CFLAGS += -Wl,--wrap=__clzll

    # integer division intrinsics
    OPT_DEFS += -DPICO_DIVIDER_IN_RAM=1
    OPT_DEFS += -DPICO_DIVIDER_DISABLE_INTERRUPTS=1

    CFLAGS += -Wl,--wrap=__aeabi_idiv
    CFLAGS += -Wl,--wrap=__aeabi_idivmod
    CFLAGS += -Wl,--wrap=__aeabi_ldivmod
    CFLAGS += -Wl,--wrap=__aeabi_uidiv
    CFLAGS += -Wl,--wrap=__aeabi_uidivmod
    CFLAGS += -Wl,--wrap=__aeabi_uldivmod

    # 64bit integer intrinsics
    OPT_DEFS += -DPICO_INT64_OPS_IN_RAM=1

    CFLAGS += -Wl,--wrap=__aeabi_lmul

    # malloc and friends functions
    OPT_DEFS += -DPICO_USE_MALLOC_MUTEX=0
    OPT_DEFS += -DPICO_DEBUG_MALLOC=0
    OPT_DEFS ?= -DPICO_MALLOC_PANIC=0

    CFLAGS += -Wl,--wrap=malloc
    CFLAGS += -Wl,--wrap=calloc
    CFLAGS += -Wl,--wrap=free

    # memory operation intrinsics
    OPT_DEFS += -DPICO_MEM_IN_RAM=1

    CFLAGS += -Wl,--wrap=memcpy
    CFLAGS += -Wl,--wrap=memset
    CFLAGS += -Wl,--wrap=__aeabi_memcpy
    CFLAGS += -Wl,--wrap=__aeabi_memset
    CFLAGS += -Wl,--wrap=__aeabi_memcpy4
    CFLAGS += -Wl,--wrap=__aeabi_memset4
    CFLAGS += -Wl,--wrap=__aeabi_memcpy8
    CFLAGS += -Wl,--wrap=__aeabi_memset8

    PLATFORM_SRC += $(PICOSDKINTRINSICSSRC)
    EXTRAINCDIRS += $(PICOSDKINTRINSICSINC)
endif
# The RP2040 sdk provides optimized compiler intrinsics which override the GCC
# built-ins. Some of these functions are located in the bootrom of the RP2040.
# Execution of these functions is realized via a vtable that is populated on
# bootup. This mechanism needs startup code and linker script support from
# ChibiOS, which is currently not implemented thus these functions are disabled
# ATM.
PICOSDKINTRINSICSSRC =  $(PICOSDKROOT)/src/rp2_common/pico_divider/divider.S \
                        $(PICOSDKROOT)/src/rp2_common/pico_int64_ops/pico_int64_ops_aeabi.S

PICOSDKINTRINSICSINC =  $(PICOSDKROOT)/src/common/pico_base/include \
                        $(PICOSDKROOT)/src/rp2_common/pico_platfrom/include \
                        $(PICOSDKROOT)/src/rp2_common/hardware_divider/include

# integer division intrinsics utilizing the RP2040 hardware divider
OPT_DEFS += -DPICO_DIVIDER_IN_RAM=1
OPT_DEFS += -DPICO_DIVIDER_DISABLE_INTERRUPTS=1

CFLAGS += -Wl,--wrap=__aeabi_idiv
CFLAGS += -Wl,--wrap=__aeabi_idivmod
CFLAGS += -Wl,--wrap=__aeabi_ldivmod
CFLAGS += -Wl,--wrap=__aeabi_uidiv
CFLAGS += -Wl,--wrap=__aeabi_uidivmod
CFLAGS += -Wl,--wrap=__aeabi_uldivmod

# 64bit integer intrinsics
OPT_DEFS += -DPICO_INT64_OPS_IN_RAM=1

CFLAGS += -Wl,--wrap=__aeabi_lmul

PLATFORM_SRC += $(PICOSDKINTRINSICSSRC)
EXTRAINCDIRS += $(PICOSDKINTRINSICSINC)