Open jserv opened 6 years ago
Piko/RT depends on several string routines such as memcpy and memset. They can be optimized in Thumb2 assembly in consideration of performance and/or size.
memcpy
memset
Sample implementation for memcpy:
static inline void memcpy(void *restrict dst, const void *restrict src, size_t l) { __asm__ volatile(" \ mov r1, %2; \ mov r3, %1; \ mov r4, %0; \ orr r2, r3, r4; \ ands r2, #3; \ bne 2f; \ 1: \ cmp r1, #4; \ ittt hs; \ ldrhs r2, [r3], #4; \ strhs r2, [r4], #4; \ subshs r1, #4; \ bhs 1b; \ 2: \ cmp r1, #0; \ ittt ne; \ ldrbne r2, [r3], #1; \ strbne r2, [r4], #1; \ subsne r1, #1; \ bne 2b" : : "r" (dst), "r" (src), "r" (l) : "r1", "r2", "r3", "r4", "memory", "cc"); }
static inline void memset(void *dst, int v, size_t l) { __asm__ volatile(" \ mov r1, %2; \ mov r3, %1; \ orr r3, r3, r3, lsl #8; \ orr r3, r3, r3, lsl #16; \ mov r4, %0; \ ands r2, r4, #3; \ bne 2f; \ 1: \ cmp r1, #4; \ itt hs; \ strhs r3, [r4], #4; \ subshs r1, #4; \ bhs 1b; \ 2: \ cmp r1, #0; \ itt ne; \ strbne r3, [r4], #1; \ subsne r1, #1; \ bne 2b" : : "r" (dst), "r" (v & 0xff), "r" (l) : "r1", "r2", "r3", "r4", "memory", "cc"); }
Piko/RT depends on several string routines such as
memcpy
andmemset
. They can be optimized in Thumb2 assembly in consideration of performance and/or size.Sample implementation for
memcpy
:Sample implementation for
memcpy
: