Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions arch/arm/core/isr_wrapper.S
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,10 @@ SECTION_FUNC(TEXT, _isr_wrapper)

push {lr} /* lr is now the first item on the stack */

#ifdef CONFIG_EXECUTION_BENCHMARKING
bl read_systick_start_of_isr
#endif

#ifdef CONFIG_KERNEL_EVENT_LOGGER_INTERRUPT
bl _sys_k_event_logger_interrupt
#endif
Expand Down Expand Up @@ -105,6 +109,20 @@ _idle_state_cleared:
* in thumb mode */

ldm r1!,{r0,r3} /* arg in r0, ISR in r3 */
#ifdef CONFIG_EXECUTION_BENCHMARKING
#if defined(CONFIG_ARMV6_M)
push {r3}
#endif
push {lr}
bl read_systick_end_of_isr
#if defined(CONFIG_ARMV6_M)
pop {r3}
mov lr,r3
pop {r3}
#else
pop {lr}
#endif
#endif
blx r3 /* call ISR */

#if defined(CONFIG_ARMV6_M)
Expand Down
31 changes: 31 additions & 0 deletions arch/arm/core/swap.S
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,23 @@ _thread_irq_disabled:

msr PSP, ip

#ifdef CONFIG_EXECUTION_BENCHMARKING
#if defined(CONFIG_ARMV6_M)
push {r3}
#endif
push {lr}

bl read_systick_end_of_swap

#if defined(CONFIG_ARMV6_M)
pop {r3}
mov lr,r3
pop {r3}
#else
pop {lr}
#endif
#endif

/* exc return */
bx lr

Expand Down Expand Up @@ -292,6 +309,20 @@ _oops:

SECTION_FUNC(TEXT, __swap)

#ifdef CONFIG_EXECUTION_BENCHMARKING
#if defined(CONFIG_ARMV6_M)
push {r3}
#endif
push {lr}
bl read_systick_start_of_swap
#if defined(CONFIG_ARMV6_M)
pop {r3}
mov lr,r3
pop {r3}
#else
pop {lr}
#endif
#endif
ldr r1, =_kernel
ldr r2, [r1, #_kernel_offset_to_current]
str r0, [r2, #_thread_offset_to_basepri]
Expand Down
22 changes: 22 additions & 0 deletions arch/x86/core/intstub.S
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,15 @@
*/
SECTION_FUNC(TEXT, _interrupt_enter)

#ifdef CONFIG_EXECUTION_BENCHMARKING
pushl %eax
pushl %edx
rdtsc
mov %eax, __start_intr_tsc
mov %edx, __start_intr_tsc+4
pop %edx
pop %eax
#endif
/*
* The gen_idt tool creates an interrupt-gate descriptor for
* all connections. The processor will automatically clear the IF
Expand Down Expand Up @@ -246,6 +255,19 @@ alreadyOnIntStack:
*/
push %eax
#endif
#ifdef CONFIG_EXECUTION_BENCHMARKING
/* Save the eax and edx registers before reading the time stamp
* once done pop the values
*/
pushl %eax
pushl %edx
rdtsc
mov %eax,__end_intr_tsc
mov %edx,__end_intr_tsc+4
pop %edx
pop %eax
#endif

#ifdef CONFIG_NESTED_INTERRUPTS
sti /* re-enable interrupts */
#endif
Expand Down
32 changes: 32 additions & 0 deletions arch/x86/core/swap.S
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,28 @@
*
*/

.macro read_tsc var_name
push %eax
push %edx
rdtsc
mov %eax,\var_name
mov %edx,\var_name+4
pop %edx
pop %eax
.endm
SECTION_FUNC(TEXT, __swap)
#ifdef CONFIG_EXECUTION_BENCHMARKING
/* Save the eax and edx registers before reading the time stamp
* once done pop the values.
*/
push %eax
push %edx
rdtsc
mov %eax,__start_swap_tsc
mov %edx,__start_swap_tsc+4
pop %edx
pop %eax
#endif
#ifdef CONFIG_X86_IAMCU
/* save EFLAGS on stack right before return address, just as SYSV would
* have done
Expand Down Expand Up @@ -335,5 +356,16 @@ skipIntLatencyStop:
*/
popl %edx
movl %edx, (%esp)
#endif

#ifdef CONFIG_EXECUTION_BENCHMARKING
/* Save the eax and edx registers before reading the time stamp
* once done pop the values.
*/
cmp $0x1,__read_swap_end_tsc_value
jne time_read_not_needed
movw $0x2,__read_swap_end_tsc_value
read_tsc __common_var_swap_end_tsc
time_read_not_needed:
#endif
ret
10 changes: 10 additions & 0 deletions drivers/timer/cortex_m_systick.c
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,11 @@ void _timer_int_handler(void *unused)
{
ARG_UNUSED(unused);

#ifdef CONFIG_EXECUTION_BENCHMARKING
extern void read_systick_start_of_tick_handler(void);
read_systick_start_of_tick_handler();
#endif

#ifdef CONFIG_KERNEL_EVENT_LOGGER_INTERRUPT
extern void _sys_k_event_logger_interrupt(void);
_sys_k_event_logger_interrupt();
Expand Down Expand Up @@ -337,6 +342,11 @@ void _timer_int_handler(void *unused)

#endif /* CONFIG_SYS_POWER_MANAGEMENT */

#ifdef CONFIG_EXECUTION_BENCHMARKING
extern void read_systick_end_of_tick_handler(void);
read_systick_end_of_tick_handler();
#endif

extern void _ExcExit(void);
_ExcExit();
}
Expand Down
21 changes: 20 additions & 1 deletion drivers/timer/loapic_timer.c
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,16 @@ static inline void program_max_cycles(void)
void _timer_int_handler(void *unused /* parameter is not used */
)
{
#ifdef CONFIG_EXECUTION_BENCHMARKING
__asm__ __volatile__ (
"pushl %eax\n\t"
"pushl %edx\n\t"
"rdtsc\n\t"
"mov %eax, __start_tick_tsc\n\t"
"mov %edx, __start_tick_tsc+4\n\t"
"pop %edx\n\t"
"pop %eax\n\t");
#endif
ARG_UNUSED(unused);

#if defined(CONFIG_TICKLESS_KERNEL)
Expand Down Expand Up @@ -324,7 +334,16 @@ void _timer_int_handler(void *unused /* parameter is not used */
_sys_clock_tick_announce();
#endif /*CONFIG_TICKLESS_IDLE*/
#endif

#ifdef CONFIG_EXECUTION_BENCHMARKING
__asm__ __volatile__ (
"pushl %eax\n\t"
"pushl %edx\n\t"
"rdtsc\n\t"
"mov %eax, __end_tick_tsc\n\t"
"mov %edx, __end_tick_tsc+4\n\t"
"pop %edx\n\t"
"pop %eax\n\t");
#endif /* CONFIG_EXECUTION_BENCHMARKING */
}

#ifdef CONFIG_TICKLESS_KERNEL
Expand Down
10 changes: 10 additions & 0 deletions kernel/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,16 @@ config INT_LATENCY_BENCHMARK
The metrics are displayed (and a new sampling interval is started)
each time int_latency_show() is called thereafter.

config EXECUTION_BENCHMARKING
bool
prompt "Timing metrics "
default n
help
This option enables the tracking of various times inside the kernel
the exact set of metrics being tracked is board-dependent.
All timing measurements are enabled for X86 and ARM based architectures.
In other architectures only a subset are enabled.

config THREAD_MONITOR
bool
prompt "Thread monitoring [EXPERIMENTAL]"
Expand Down
8 changes: 8 additions & 0 deletions kernel/init.c
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,14 @@ u64_t __noinit __main_tsc; /* timestamp when main task starts */
u64_t __noinit __idle_tsc; /* timestamp when CPU goes idle */
#endif

#ifdef CONFIG_EXECUTION_BENCHMARKING
u64_t __noinit __start_swap_tsc;
u64_t __noinit __end_swap_tsc;
u64_t __noinit __start_intr_tsc;
u64_t __noinit __end_intr_tsc;
u64_t __noinit __start_tick_tsc;
u64_t __noinit __end_tick_tsc;
#endif
/* init/main and idle threads */

#define IDLE_STACK_SIZE CONFIG_IDLE_STACK_SIZE
Expand Down
4 changes: 4 additions & 0 deletions tests/benchmarks/timing_info/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
BOARD ?= qemu_x86
CONF_FILE = prj.conf

include ${ZEPHYR_BASE}/Makefile.inc
111 changes: 111 additions & 0 deletions tests/benchmarks/timing_info/README.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
Title: Timing Information

Description:

Timing measurements for the following features of the OS.
1. Context switch
Time taken to compete the context switch, i.e time spent in _Swap function.
2. Interrupt latency
Time taken from the start of the common interrupt handler till the
actual ISR handler being called.
3. Tick overhead
Time spent by the cpu in the tick handler.
4. Thread Creation
Time spent in creating a thread.
5. Thread cancel
Time taken to cancel the thread which is not yet started execution.
So the time taken to complete the function call is measured.
6. Thread abort
Time taken to abort the thread which has already started execution.
So the time measured is from the start of the function call until the
another thread is swapped in.
7. Thread Suspend
The time measured is from the start of the function call until the current
thread is suspended and another thread is swapped in.
8. Thread Resume
The time measured is from the start of the function call until the required
thread is resumed by swap operation.
9. Thread Yield
The time measured is from the start of the function call until the higher priority
thread is swapped in.
10. Thread Sleep
The time measured is from the start of the function call until the current
thread is put on the timeout queue and another thread is swapped in.
11. Heap Malloc
The time to allocate heap memory in fixed size chunks. Continuously allocate
the memory from the pool. Average time taken to complete the function call
is measured.
12. Heap Free
Time to free heap memory in fixed size chunks. Continuously free
the memory that was allocated. Average time taken to complete the function call
is measured.
13. Semaphore Take with context switch
Taking a semaphore causes a thread waiting on the semaphore to be swapped in.
Thus Time measured is the time taken from the function call till the waiting
thread is swapped in.
14. Semaphore Give with context switch
Giving a semaphore causes a thread waiting on the semaphore to be swapped
in (higher priority).
Thus Time measured is the time taken from the function call till the waiting
thread is swapped in.
15. Semaphore Take without context switch
Time taken to take the semaphore. Thus time to complete the function
call is measured.
16. Semaphore Give without context switch
Time taken to give the semaphore. Thus time to complete the function
call is measured.
17. Mutex lock
Time taken to lock the mutex. Thus time to complete the function
call is measured.
18. Mutex unlock
Time taken to unlock the mutex. Thus time to complete the function
call is measured.
19. Message Queue Put with context switch
A thread is waiting for a message to arrive. The time taken from the start
of the function call till the waiting thread is swapped in is measured.
20. Message Queue Put without context switch
The time taken to complete the function call is measured.
21. Message Queue get with context switch
A thread has gone into waiting because the message queue is full.
When a get occurs this thread gets free to execute. The time taken from
the start of the function call till the waiting thread is
swapped in is measured.
22. Message Queue get without context switch
The time taken to complete the function call is measured.
23. MailBox synchronous put
The time taken from the start of the function call till the waiting thread
is swapped in is measured.
24. MailBox synchronous get
The time taken from the start of the function call till the waiting thread
is swapped in is measured.
25. MailBox asynchronous put
The time taken to complete the function call is measured.
26. MailBox get without context switch
The time taken to complete the function call is measured.


--------------------------------------------------------------------------------

Building and Running Project:

This benchmark outputs to the console. It can be built and executed
on QEMU as follows:

make qemu

--------------------------------------------------------------------------------

Troubleshooting:

Problems caused by out-dated project information can be addressed by
issuing one of the following commands then rebuilding the project:

make clean # discard results of previous builds
# but keep existing configuration info
or
make pristine # discard results of previous builds
# and restore pre-defined configuration info

--------------------------------------------------------------------------------

Sample Output:
4 changes: 4 additions & 0 deletions tests/benchmarks/timing_info/prj.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
CONFIG_EXECUTION_BENCHMARKING=y
CONFIG_TEST_RANDOM_GENERATOR=y
CONFIG_HEAP_MEM_POOL_SIZE=256
CONFIG_MAIN_STACK_SIZE=2048
7 changes: 7 additions & 0 deletions tests/benchmarks/timing_info/src/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
ccflags-y += -I$(ZEPHYR_BASE)/tests/include

obj-$(CONFIG_EXECUTION_BENCHMARKING) += main_benchmark.o
obj-$(CONFIG_EXECUTION_BENCHMARKING) += thread_bench.o
obj-$(CONFIG_EXECUTION_BENCHMARKING) += yield_bench.o
obj-$(CONFIG_EXECUTION_BENCHMARKING) += semaphore_bench.o
obj-$(CONFIG_EXECUTION_BENCHMARKING) += msg_passing_bench.o
Loading