Add "sysenter" support on x86, and a "vsyscall" page.
[Posted December 18, 2002 by corbet]
| From: |
| Linux Kernel Mailing List <linux-kernel@vger.kernel.org> |
| To: |
| BK Commits List:; |
| Subject: |
| Add "sysenter" support on x86, and a "vsyscall" page. |
| Date: |
| Tue, 17 Dec 2002 05:39:07 +0000 |
ChangeSet 1.886, 2002/12/16 21:39:07-08:00, torvalds@home.transmeta.com
Add "sysenter" support on x86, and a "vsyscall" page.
Instead of doing a "int 0x80" instruction for system calls,
user space can do a "call 0xfffff000" which will do the right
thing regardless of what kind of system call support the CPU
has.
# This patch includes the following deltas:
# ChangeSet 1.885 -> 1.886
# include/asm-i386/fixmap.h 1.8 -> 1.9
# arch/i386/kernel/head.S 1.18 -> 1.19
# arch/i386/kernel/Makefile 1.30 -> 1.31
# include/asm-i386/segment.h 1.2 -> 1.3
# arch/i386/kernel/entry.S 1.41 -> 1.42
# arch/i386/mm/init.c 1.37 -> 1.38
# (new) -> 1.1 arch/i386/kernel/sysenter.c
#
arch/i386/kernel/Makefile | 1
arch/i386/kernel/entry.S | 63 ++++++++++++++++++++++++++++++++-----
arch/i386/kernel/head.S | 25 ++++++++------
arch/i386/kernel/sysenter.c | 74 ++++++++++++++++++++++++++++++++++++++++++++
arch/i386/mm/init.c | 2 -
include/asm-i386/fixmap.h | 7 ++--
include/asm-i386/segment.h | 36 +++++++++++----------
7 files changed, 167 insertions(+), 41 deletions(-)
diff -Nru a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile
--- a/arch/i386/kernel/Makefile Mon Dec 16 22:03:50 2002
+++ b/arch/i386/kernel/Makefile Mon Dec 16 22:03:50 2002
@@ -29,6 +29,7 @@
obj-$(CONFIG_PROFILING) += profile.o
obj-$(CONFIG_EDD) += edd.o
obj-$(CONFIG_MODULES) += module.o
+obj-y += sysenter.o
EXTRA_AFLAGS := -traditional
diff -Nru a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S
--- a/arch/i386/kernel/entry.S Mon Dec 16 22:03:50 2002
+++ b/arch/i386/kernel/entry.S Mon Dec 16 22:03:50 2002
@@ -94,7 +94,7 @@
movl %edx, %ds; \
movl %edx, %es;
-#define RESTORE_ALL \
+#define RESTORE_REGS \
popl %ebx; \
popl %ecx; \
popl %edx; \
@@ -104,14 +104,25 @@
popl %eax; \
1: popl %ds; \
2: popl %es; \
- addl $4, %esp; \
-3: iret; \
.section .fixup,"ax"; \
-4: movl $0,(%esp); \
+3: movl $0,(%esp); \
jmp 1b; \
-5: movl $0,(%esp); \
+4: movl $0,(%esp); \
jmp 2b; \
-6: pushl %ss; \
+.previous; \
+.section __ex_table,"a";\
+ .align 4; \
+ .long 1b,3b; \
+ .long 2b,4b; \
+.previous
+
+
+#define RESTORE_ALL \
+ RESTORE_REGS \
+ addl $4, %esp; \
+1: iret; \
+.section .fixup,"ax"; \
+2: pushl %ss; \
popl %ds; \
pushl %ss; \
popl %es; \
@@ -120,11 +131,11 @@
.previous; \
.section __ex_table,"a";\
.align 4; \
- .long 1b,4b; \
- .long 2b,5b; \
- .long 3b,6b; \
+ .long 1b,2b; \
.previous
+
+
ENTRY(lcall7)
pushfl # We get a different stack layout with call
# gates, which has to be cleaned up later..
@@ -219,6 +230,40 @@
cli
jmp need_resched
#endif
+
+/* Points to after the "sysenter" instruction in the vsyscall page */
+#define SYSENTER_RETURN 0xfffff007
+
+ # sysenter call handler stub
+ ALIGN
+ENTRY(sysenter_entry)
+ sti
+ pushl $(__USER_DS)
+ pushl %ebp
+ pushfl
+ pushl $(__USER_CS)
+ pushl $SYSENTER_RETURN
+
+ pushl %eax
+ SAVE_ALL
+ GET_THREAD_INFO(%ebx)
+ cmpl $(NR_syscalls), %eax
+ jae syscall_badsys
+
+ testb $_TIF_SYSCALL_TRACE,TI_FLAGS(%ebx)
+ jnz syscall_trace_entry
+ call *sys_call_table(,%eax,4)
+ movl %eax,EAX(%esp)
+ cli
+ movl TI_FLAGS(%ebx), %ecx
+ testw $_TIF_ALLWORK_MASK, %cx
+ jne syscall_exit_work
+ RESTORE_REGS
+ movl 4(%esp),%edx
+ movl 16(%esp),%ecx
+ sti
+ sysexit
+
# system call handler stub
ALIGN
diff -Nru a/arch/i386/kernel/head.S b/arch/i386/kernel/head.S
--- a/arch/i386/kernel/head.S Mon Dec 16 22:03:50 2002
+++ b/arch/i386/kernel/head.S Mon Dec 16 22:03:50 2002
@@ -414,8 +414,8 @@
.quad 0x0000000000000000 /* 0x0b reserved */
.quad 0x0000000000000000 /* 0x13 reserved */
.quad 0x0000000000000000 /* 0x1b reserved */
- .quad 0x00cffa000000ffff /* 0x23 user 4GB code at 0x00000000 */
- .quad 0x00cff2000000ffff /* 0x2b user 4GB data at 0x00000000 */
+ .quad 0x0000000000000000 /* 0x20 unused */
+ .quad 0x0000000000000000 /* 0x28 unused */
.quad 0x0000000000000000 /* 0x33 TLS entry 1 */
.quad 0x0000000000000000 /* 0x3b TLS entry 2 */
.quad 0x0000000000000000 /* 0x43 TLS entry 3 */
@@ -425,22 +425,25 @@
.quad 0x00cf9a000000ffff /* 0x60 kernel 4GB code at 0x00000000 */
.quad 0x00cf92000000ffff /* 0x68 kernel 4GB data at 0x00000000 */
- .quad 0x0000000000000000 /* 0x70 TSS descriptor */
- .quad 0x0000000000000000 /* 0x78 LDT descriptor */
+ .quad 0x00cffa000000ffff /* 0x73 user 4GB code at 0x00000000 */
+ .quad 0x00cff2000000ffff /* 0x7b user 4GB data at 0x00000000 */
+
+ .quad 0x0000000000000000 /* 0x80 TSS descriptor */
+ .quad 0x0000000000000000 /* 0x88 LDT descriptor */
/* Segments used for calling PnP BIOS */
- .quad 0x00c09a0000000000 /* 0x80 32-bit code */
- .quad 0x00809a0000000000 /* 0x88 16-bit code */
- .quad 0x0080920000000000 /* 0x90 16-bit data */
- .quad 0x0080920000000000 /* 0x98 16-bit data */
+ .quad 0x00c09a0000000000 /* 0x90 32-bit code */
+ .quad 0x00809a0000000000 /* 0x98 16-bit code */
.quad 0x0080920000000000 /* 0xa0 16-bit data */
+ .quad 0x0080920000000000 /* 0xa8 16-bit data */
+ .quad 0x0080920000000000 /* 0xb0 16-bit data */
/*
* The APM segments have byte granularity and their bases
* and limits are set at run time.
*/
- .quad 0x00409a0000000000 /* 0xa8 APM CS code */
- .quad 0x00009a0000000000 /* 0xb0 APM CS 16 code (16 bit) */
- .quad 0x0040920000000000 /* 0xb8 APM DS data */
+ .quad 0x00409a0000000000 /* 0xb8 APM CS code */
+ .quad 0x00009a0000000000 /* 0xc0 APM CS 16 code (16 bit) */
+ .quad 0x0040920000000000 /* 0xc8 APM DS data */
#if CONFIG_SMP
.fill (NR_CPUS-1)*GDT_ENTRIES,8,0 /* other CPU's GDT */
diff -Nru a/arch/i386/kernel/sysenter.c b/arch/i386/kernel/sysenter.c
--- /dev/null Wed Dec 31 16:00:00 1969
+++ b/arch/i386/kernel/sysenter.c Mon Dec 16 22:03:50 2002
@@ -0,0 +1,74 @@
+/*
+ * linux/arch/i386/kernel/sysenter.c
+ *
+ * (C) Copyright 2002 Linus Torvalds
+ *
+ * This file contains the needed initializations to support sysenter.
+ */
+
+#include <linux/init.h>
+#include <linux/smp.h>
+#include <linux/thread_info.h>
+#include <linux/gfp.h>
+#include <linux/string.h>
+
+#include <asm/cpufeature.h>
+#include <asm/msr.h>
+#include <asm/pgtable.h>
+
+extern asmlinkage void sysenter_entry(void);
+
+static void __init enable_sep_cpu(void *info)
+{
+ unsigned long page = __get_free_page(GFP_ATOMIC);
+ int cpu = get_cpu();
+ unsigned long *esp0_ptr = &(init_tss + cpu)->esp0;
+ unsigned long rel32;
+
+ rel32 = (unsigned long) sysenter_entry - (page+11);
+
+
+ *(short *) (page+0) = 0x258b; /* movl xxxxx,%esp */
+ *(long **) (page+2) = esp0_ptr;
+ *(char *) (page+6) = 0xe9; /* jmp rl32 */
+ *(long *) (page+7) = rel32;
+
+ wrmsr(0x174, __KERNEL_CS, 0); /* SYSENTER_CS_MSR */
+ wrmsr(0x175, page+PAGE_SIZE, 0); /* SYSENTER_ESP_MSR */
+ wrmsr(0x176, page, 0); /* SYSENTER_EIP_MSR */
+
+ printk("Enabling SEP on CPU %d\n", cpu);
+ put_cpu();
+}
+
+static int __init sysenter_setup(void)
+{
+ static const char int80[] = {
+ 0xcd, 0x80, /* int $0x80 */
+ 0xc3 /* ret */
+ };
+ static const char sysent[] = {
+ 0x55, /* push %ebp */
+ 0x51, /* push %ecx */
+ 0x52, /* push %edx */
+ 0x89, 0xe5, /* movl %esp,%ebp */
+ 0x0f, 0x34, /* sysenter */
+ 0x5a, /* pop %edx */
+ 0x59, /* pop %ecx */
+ 0x5d, /* pop %ebp */
+ 0xc3 /* ret */
+ };
+ unsigned long page = get_zeroed_page(GFP_ATOMIC);
+
+ __set_fixmap(FIX_VSYSCALL, __pa(page), PAGE_READONLY);
+ memcpy((void *) page, int80, sizeof(int80));
+ if (!boot_cpu_has(X86_FEATURE_SEP))
+ return 0;
+
+ memcpy((void *) page, sysent, sizeof(sysent));
+ enable_sep_cpu(NULL);
+ smp_call_function(enable_sep_cpu, NULL, 1, 1);
+ return 0;
+}
+
+__initcall(sysenter_setup);
diff -Nru a/arch/i386/mm/init.c b/arch/i386/mm/init.c
--- a/arch/i386/mm/init.c Mon Dec 16 22:03:50 2002
+++ b/arch/i386/mm/init.c Mon Dec 16 22:03:50 2002
@@ -72,7 +72,7 @@
static pte_t * __init one_page_table_init(pmd_t *pmd)
{
pte_t *page_table = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE);
- set_pmd(pmd, __pmd(__pa(page_table) | _KERNPG_TABLE));
+ set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE));
if (page_table != pte_offset_kernel(pmd, 0))
BUG();
diff -Nru a/include/asm-i386/fixmap.h b/include/asm-i386/fixmap.h
--- a/include/asm-i386/fixmap.h Mon Dec 16 22:03:50 2002
+++ b/include/asm-i386/fixmap.h Mon Dec 16 22:03:50 2002
@@ -42,6 +42,8 @@
* task switches.
*/
enum fixed_addresses {
+ FIX_VSYSCALL,
+ FIX_HOLE,
#ifdef CONFIG_X86_LOCAL_APIC
FIX_APIC_BASE, /* local (CPU) APIC) -- required for SMP or not */
#endif
@@ -96,10 +98,9 @@
* used by vmalloc.c.
*
* Leave one empty page between vmalloc'ed areas and
- * the start of the fixmap, and leave one page empty
- * at the top of mem..
+ * the start of the fixmap.
*/
-#define FIXADDR_TOP (0xffffe000UL)
+#define FIXADDR_TOP (0xfffff000UL)
#define __FIXADDR_SIZE (__end_of_permanent_fixed_addresses << PAGE_SHIFT)
#define FIXADDR_START (FIXADDR_TOP - __FIXADDR_SIZE)
diff -Nru a/include/asm-i386/segment.h b/include/asm-i386/segment.h
--- a/include/asm-i386/segment.h Mon Dec 16 22:03:50 2002
+++ b/include/asm-i386/segment.h Mon Dec 16 22:03:50 2002
@@ -9,8 +9,8 @@
* 2 - reserved
* 3 - reserved
*
- * 4 - default user CS <==== new cacheline
- * 5 - default user DS
+ * 4 - unused <==== new cacheline
+ * 5 - unused
*
* ------- start of TLS (Thread-Local Storage) segments:
*
@@ -25,16 +25,18 @@
*
* 12 - kernel code segment <==== new cacheline
* 13 - kernel data segment
- * 14 - TSS
- * 15 - LDT
- * 16 - PNPBIOS support (16->32 gate)
- * 17 - PNPBIOS support
- * 18 - PNPBIOS support
+ * 14 - default user CS
+ * 15 - default user DS
+ * 16 - TSS
+ * 17 - LDT
+ * 18 - PNPBIOS support (16->32 gate)
* 19 - PNPBIOS support
* 20 - PNPBIOS support
- * 21 - APM BIOS support
- * 22 - APM BIOS support
- * 23 - APM BIOS support
+ * 21 - PNPBIOS support
+ * 22 - PNPBIOS support
+ * 23 - APM BIOS support
+ * 24 - APM BIOS support
+ * 25 - APM BIOS support
*/
#define GDT_ENTRY_TLS_ENTRIES 3
#define GDT_ENTRY_TLS_MIN 6
@@ -42,10 +44,10 @@
#define TLS_SIZE (GDT_ENTRY_TLS_ENTRIES * 8)
-#define GDT_ENTRY_DEFAULT_USER_CS 4
+#define GDT_ENTRY_DEFAULT_USER_CS 14
#define __USER_CS (GDT_ENTRY_DEFAULT_USER_CS * 8 + 3)
-#define GDT_ENTRY_DEFAULT_USER_DS 5
+#define GDT_ENTRY_DEFAULT_USER_DS 15
#define __USER_DS (GDT_ENTRY_DEFAULT_USER_DS * 8 + 3)
#define GDT_ENTRY_KERNEL_BASE 12
@@ -56,14 +58,14 @@
#define GDT_ENTRY_KERNEL_DS (GDT_ENTRY_KERNEL_BASE + 1)
#define __KERNEL_DS (GDT_ENTRY_KERNEL_DS * 8)
-#define GDT_ENTRY_TSS (GDT_ENTRY_KERNEL_BASE + 2)
-#define GDT_ENTRY_LDT (GDT_ENTRY_KERNEL_BASE + 3)
+#define GDT_ENTRY_TSS (GDT_ENTRY_KERNEL_BASE + 4)
+#define GDT_ENTRY_LDT (GDT_ENTRY_KERNEL_BASE + 5)
-#define GDT_ENTRY_PNPBIOS_BASE (GDT_ENTRY_KERNEL_BASE + 4)
-#define GDT_ENTRY_APMBIOS_BASE (GDT_ENTRY_KERNEL_BASE + 9)
+#define GDT_ENTRY_PNPBIOS_BASE (GDT_ENTRY_KERNEL_BASE + 6)
+#define GDT_ENTRY_APMBIOS_BASE (GDT_ENTRY_KERNEL_BASE + 11)
/*
- * The GDT has 21 entries but we pad it to cacheline boundary:
+ * The GDT has 23 entries but we pad it to cacheline boundary:
*/
#define GDT_ENTRIES 24
-
To unsubscribe from this list: send the line "unsubscribe bk-commits-head" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html