arch/intel64: optimise context switch

We save interrupted TCB state on tcb->xcp.regs not interrupt stack now
which allows us to remove x86_64_savestate() from up_switch_context()
and other places.

Signed-off-by: p-szafonimateusz <p-szafonimateusz@xiaomi.com>
Signed-off-by: hujun5 <hujun5@xiaomi.com>
This commit is contained in:
p-szafonimateusz 2024-09-26 15:15:23 +02:00 committed by Xiang Xiao
parent 1c6daddae2
commit c18f722185
11 changed files with 156 additions and 140 deletions

View file

@ -461,6 +461,10 @@
#define XMMAREA_REGS (25)
/* Aux register used by implementation */
#define REG_AUX (26 + XMMAREA_REG_OFFSET)
/* NOTE 2: This is not really state data. Rather, this is just a convenient
* way to pass parameters from the interrupt handler to C code.
*/
@ -478,6 +482,10 @@
#define XCP_ALIGN_DOWN(a) ((a) & ~XCP_ALIGN_MASK)
#define XCP_ALIGN_UP(a) (((a) + XCP_ALIGN_MASK) & ~XCP_ALIGN_MASK)
/* Aux register flags */
#define REG_AUX_FULLCONTEXT (1 << 0) /* Force full context switch */
/****************************************************************************
* Public Types
****************************************************************************/

View file

@ -220,7 +220,6 @@ void x86_64_boardinitialize(void);
/* Defined in files with the same name as the function */
void x86_64_copystate(uint64_t *dest, uint64_t *src);
void x86_64_savestate(uint64_t *regs);
void x86_64_decodeirq(uint64_t *regs);
#ifdef CONFIG_ARCH_DMA
void weak_function x86_64_dmainitialize(void);

View file

@ -63,17 +63,11 @@ void up_switch_context(struct tcb_s *tcb, struct tcb_s *rtcb)
if (up_interrupt_context())
{
/* Yes, then we have to do things differently.
* Just copy the g_current_regs into the OLD rtcb.
*/
x86_64_savestate(rtcb->xcp.regs);
/* Restore addition x86_64 state */
x86_64_restore_auxstate(tcb);
/* Then switch contexts. Any necessary address environment
* changes will be made when the interrupt returns.
*/
/* Update current regs to signal that we need context switch */
x86_64_restorestate(tcb->xcp.regs);
}

View file

@ -31,7 +31,6 @@ set(SRCS
intel64_releasestack.c
intel64_rtc.c
intel64_restore_auxstate.c
intel64_savestate.c
intel64_stackframe.c
intel64_schedulesigaction.c
intel64_sigdeliver.c

View file

@ -22,7 +22,7 @@ include common/Make.defs
CMN_CSRCS += intel64_createstack.c intel64_initialstate.c intel64_irq.c
CMN_CSRCS += intel64_map_region.c intel64_regdump.c intel64_releasestack.c
CMN_CSRCS += intel64_rtc.c intel64_restore_auxstate.c intel64_savestate.c
CMN_CSRCS += intel64_rtc.c intel64_restore_auxstate.c
CMN_CSRCS += intel64_stackframe.c intel64_schedulesigaction.c
CMN_CSRCS += intel64_sigdeliver.c intel64_usestack.c x86_64_tcbinfo.c
CMN_CSRCS += intel64_systemreset.c intel64_freq.c intel64_cache.c

View file

@ -80,9 +80,8 @@ static uint64_t *common_handler(int irq, uint64_t *regs)
/* Check for a context switch. If a context switch occurred, then
* g_current_regs will have a different value than it did on entry. If an
* interrupt level context switch has occurred, then restore the floating
* point state and the establish the correct address environment before
* returning from the interrupt.
* interrupt level context switch has occurred, then the establish the
* correct address environment before returning from the interrupt.
*/
if (regs != up_current_regs())
@ -238,3 +237,27 @@ uint64_t *irq_handler(uint64_t *regs, uint64_t irq_no)
return ret;
#endif
}
/****************************************************************************
* Name: irq_xcp_regs
*
* Description:
* Return current task XCP registers area.
*
* ASSUMPTION:
* Interrupts are disabled.
*
* This function should be called only form intel64_vector.S file !
* Any other use must be carefully considered.
*
****************************************************************************/
uint64_t *irq_xcp_regs(void)
{
/* This must be the simplest as possible, so we not use too much registers.
* Now this function corrupts only RAX and RDX registers regardless of
* the compiler optimization.
*/
return (current_task(this_cpu()))->xcp.regs;
}

View file

@ -1,50 +0,0 @@
/****************************************************************************
* arch/x86_64/src/intel64/intel64_savestate.c
*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership. The
* ASF licenses this file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance with the
* License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*
****************************************************************************/
/****************************************************************************
* Included Files
****************************************************************************/
#include <nuttx/config.h>
#include <debug.h>
#include <arch/arch.h>
#include <arch/irq.h>
#include "x86_64_internal.h"
/****************************************************************************
* Public Functions
****************************************************************************/
/****************************************************************************
* Name: x86_64_savestate
*
* Description:
* This function saves the interrupt level context information in the
* TCB. This would just be a x86_64_copystate.
*
****************************************************************************/
void x86_64_savestate(uint64_t *regs)
{
x86_64_copystate(regs, (uint64_t *)up_current_regs());
}

View file

@ -80,13 +80,13 @@ up_saveusercontext:
/* Save the value of SP as will be at the time of the IRET that will
* appear to be the return from this function.
*
* CURRENT STACK IRET STACK
* ------------------------------ -----------------
* CURRENT STACK IRET STACK
* --------------------- -----------------
* RIP
* CS
* RFLAGS
* RSP
* ESP->Return address SS
* RSP->Return address SS
* Argument Alignment (16bytes)
*
*/

View file

@ -126,12 +126,6 @@ void up_schedule_sigaction(struct tcb_s *tcb)
up_current_regs()[REG_RIP] = (uint64_t)x86_64_sigdeliver;
up_current_regs()[REG_RSP] = up_current_regs()[REG_RSP] - 8;
up_current_regs()[REG_RFLAGS] = 0;
/* And make sure that the saved context in the TCB
* is the same as the interrupt return context.
*/
x86_64_savestate(tcb->xcp.regs);
}
}
@ -222,11 +216,14 @@ void up_schedule_sigaction(struct tcb_s *tcb)
up_current_regs()[REG_RSP] = up_current_regs()[REG_RSP] - 8;
up_current_regs()[REG_RFLAGS] = 0;
/* And make sure that the saved context in the TCB
* is the same as the interrupt return context.
/* Mark that full context switch is necessary when we
* return from interrupt handler.
* In that case RIP, RSP and RFLAGS are changed, but
* register area pointer remains the same, so we need an
* additional variable to signal the need for full context switch
*/
x86_64_savestate(tcb->xcp.regs);
tcb->xcp.regs[REG_AUX] = REG_AUX_FULLCONTEXT;
}
}

View file

@ -62,7 +62,6 @@ int x86_64_smp_call_handler(int irq, void *c, void *arg)
int cpu = this_cpu();
tcb = current_task(cpu);
x86_64_savestate(tcb->xcp.regs);
nxsched_smp_call_handler(irq, c, arg);
tcb = current_task(cpu);
x86_64_restorestate(tcb->xcp.regs);
@ -97,7 +96,6 @@ int x86_64_smp_sched_handler(int irq, void *c, void *arg)
tcb = current_task(cpu);
nxsched_suspend_scheduler(tcb);
x86_64_savestate(tcb->xcp.regs);
nxsched_process_delivered(cpu);
tcb = current_task(cpu);
nxsched_resume_scheduler(tcb);

View file

@ -40,6 +40,7 @@
.globl irq_handler
.globl isr_handler
.globl irq_xcp_regs
.globl g_interrupt_stack
.globl g_interrupt_stack_end
.globl g_isr_stack
@ -720,6 +721,7 @@ isr_common:
pushq %rbx
pushq %rax
xor %rax, %rax /* Reset rax */
mov %ds, %ax /* Lower 16-bits of rax. */
pushq %rax /* Save the data segment descriptor */
mov %es, %ax /* Lower 16-bits of rax. */
@ -763,66 +765,112 @@ isr_common:
.type irq_common, @function
irq_common:
/* Already swap to the interrupt stack */
/* stack is automatically recovered by iretq using task state */
/* Already swap to the interrupt stack
* stack is automatically recovered by iretq using task state
*/
/* x86_64 don't have pusha, we have to do things manually */
/* RDI and RSI are pushed above for handling IRQ no */
pushq %rdx
pushq %rcx
pushq %r8
pushq %r9
/* Get current task regs area - this logic assumes that irq_xcp_regs
* corrupts only RAX, RDI and RDX registers.
*/
pushq %r15
pushq %r14
pushq %r13
pushq %r12
pushq %r11
pushq %r10
pushq %rbp
pushq %rbx
pushq %rax
pushq %rdx
call irq_xcp_regs
movq %rax, %rdi
mov %ds, %ax /* Lower 16-bits of rax. */
pushq %rax /* Save the data segment descriptor */
mov %es, %ax /* Lower 16-bits of rax. */
pushq %rax /* Save the data segment descriptor */
mov %gs, %ax /* Lower 16-bits of rax. */
pushq %rax /* Save the data segment descriptor */
mov %fs, %ax /* Lower 16-bits of rax. */
pushq %rax /* Save the data segment descriptor */
/* x86_64 don't have pusha, we have to do things manually.
* RDI and RSI are pushed above for handling IRQ no.
* RAX is on stack now, so we have to pop it.
*/
/* Align to 64-bytes boundary */
leaq -(XMMAREA_REG_ALIGN * 8)(%rsp), %rsp
popq (8*REG_RDX)(%rdi)
popq (8*REG_RAX)(%rdi)
movq %rcx, (8*REG_RCX)(%rdi)
movq %r8, (8*REG_R8)(%rdi)
movq %r9, (8*REG_R9)(%rdi)
movq %r15, (8*REG_R15)(%rdi)
movq %r14, (8*REG_R14)(%rdi)
movq %r13, (8*REG_R13)(%rdi)
movq %r12, (8*REG_R12)(%rdi)
movq %r11, (8*REG_R11)(%rdi)
movq %r10, (8*REG_R10)(%rdi)
movq %rbp, (8*REG_RBP)(%rdi)
movq %rbx, (8*REG_RBX)(%rdi)
xor %rax, %rax /* Reset rax */
mov %ds, %ax /* Lower 16-bits of rax. */
movq %rax, (8*REG_DS)(%rdi) /* Save the data segment descriptor */
mov %es, %ax /* Lower 16-bits of rax. */
movq %rax, (8*REG_ES)(%rdi) /* Save the data segment descriptor */
mov %gs, %ax /* Lower 16-bits of rax. */
movq %rax, (8*REG_GS)(%rdi) /* Save the data segment descriptor */
mov %fs, %ax /* Lower 16-bits of rax. */
movq %rax, (8*REG_FS)(%rdi) /* Save the data segment descriptor */
/* Save registers from stack */
movq 0(%rsp), %rcx
movq %rcx, (8*REG_RSI)(%rdi)
movq 8(%rsp), %rcx
movq %rcx, (8*REG_RDI)(%rdi)
movq 16(%rsp), %rcx
movq %rcx, (8*REG_ERRCODE)(%rdi)
movq 24(%rsp), %rcx
movq %rcx, (8*REG_RIP)(%rdi)
movq 32(%rsp), %rcx
movq %rcx, (8*REG_CS)(%rdi)
movq 40(%rsp), %rcx
movq %rcx, (8*REG_RFLAGS)(%rdi)
movq 48(%rsp), %rcx
movq %rcx, (8*REG_RSP)(%rdi)
movq 56(%rsp), %rcx
movq %rcx, (8*REG_SS)(%rdi)
/* Registers in RDI are already properly aligned */
/* Save xmm registers */
leaq -XCPTCONTEXT_XMM_AREA_SIZE(%rsp), %rsp
#ifndef CONFIG_ARCH_X86_64_HAVE_XSAVE
fxsaveq (%rsp)
fxsaveq (%rdi)
#else
movl $XSAVE_STATE_COMPONENTS, %eax
xor %edx, %edx
xsave (%rsp)
xsave (%rdi)
#endif
/* The current value of the SP points to the beginning of the state save
* structure. Save that in RDI as the input parameter to irq_handler.
/* The current value of the RDI points to the beginning of the state save
* structure. Push this value on stack and also push a dummy value so
* that we don't lose the correct stack alignment for vector operations
*/
mov %rsp, %rdi
pushq %rdi
pushq $0
call irq_handler
add $8, %rsp
popq %rdi
/* The common return point for both isr_handler and irq_handler */
.Lreturn:
/* Check if full context switch is required for signal handling */
movq (8*REG_AUX)(%rax), %rcx
cmp $(REG_AUX_FULLCONTEXT), %rcx
je .Lfullswitch
/* EAX may possibly hold a pointer to a different register save area on
* return. Are we switching to a new context?
*/
cmp %rax, %rsp
cmp %rax, %rdi
je .Lnoswitch
/* A context swith will be performed. EAX holds the address of the new
.Lfullswitch:
/* Reset flag */
movq $0x0, (8*REG_AUX)(%rdi)
/* A context swith will be performed. RAX holds the address of the new
* register save structure.
*
* Jump to x86_64_fullcontextrestore(). We perform a call here, but that function
@ -830,50 +878,50 @@ irq_common:
* to the x86_64_fullcontextrestore().
*/
mov %rax, %rdi
movq %rax, %rdi
call x86_64_fullcontextrestore
.Lnoswitch:
#ifndef CONFIG_ARCH_X86_64_HAVE_XSAVE
fxrstorq (%rsp)
fxrstorq (%rdi)
#else
movl $XSAVE_STATE_COMPONENTS, %eax
xor %edx, %edx
xrstor (%rsp)
xrstor (%rdi)
#endif
leaq XCPTCONTEXT_XMM_AREA_SIZE(%rsp), %rsp
/* Align to 64-bytes boundary */
leaq (XMMAREA_REG_ALIGN * 8)(%rsp), %rsp
popq %rax
movq (8*REG_FS)(%rdi), %rax
mov %fs, %ax
popq %rax
movq (8*REG_GS)(%rdi), %rax
mov %gs, %ax
popq %rax
movq (8*REG_ES)(%rdi), %rax
mov %es, %ax
popq %rax
movq (8*REG_DS)(%rdi), %rax
mov %ds, %ax
popq %rax
popq %rbx
popq %rbp
popq %r10
popq %r11
popq %r12
popq %r13
popq %r14
popq %r15
movq (8*REG_RAX)(%rdi), %rax
movq (8*REG_RBX)(%rdi), %rbx
movq (8*REG_RBP)(%rdi), %rbp
movq (8*REG_R10)(%rdi), %r10
movq (8*REG_R11)(%rdi), %r11
movq (8*REG_R12)(%rdi), %r12
movq (8*REG_R13)(%rdi), %r13
movq (8*REG_R14)(%rdi), %r14
movq (8*REG_R15)(%rdi), %r15
popq %r9
popq %r8
popq %rcx
popq %rdx
movq (8*REG_R9)(%rdi), %r9
movq (8*REG_R8)(%rdi), %r8
movq (8*REG_RCX)(%rdi), %rcx
movq (8*REG_RDX)(%rdi), %rdx
popq %rsi
popq %rdi
/* Pop RDI and RSI pushed on interrupt entry */
add $8, %rsp /* Cleans up the pushed error code */
popq %rsi
popq %rdi
/* Cleans up the pushed error code */
add $8, %rsp
iretq /* Pops 5 things at once: CS, RIP, RFLAGS and SS and RSP */
.size irq_common, . - irq_common