|
|
Log in / Subscribe / Register

Add "sysenter" support on x86, and a "vsyscall" page.

From:  Linux Kernel Mailing List <linux-kernel@vger.kernel.org>
To:  BK Commits List:;
Subject:  Add "sysenter" support on x86, and a "vsyscall" page.
Date:  Tue, 17 Dec 2002 05:39:07 +0000

ChangeSet 1.886, 2002/12/16 21:39:07-08:00, torvalds@home.transmeta.com

	Add "sysenter" support on x86, and a "vsyscall" page.
	
	Instead of doing a "int 0x80" instruction for system calls,
	user space can do a "call 0xfffff000" which will do the right
	thing regardless of what kind of system call support the CPU
	has.


# This patch includes the following deltas:
#	           ChangeSet	1.885   -> 1.886  
#	include/asm-i386/fixmap.h	1.8     -> 1.9    
#	arch/i386/kernel/head.S	1.18    -> 1.19   
#	arch/i386/kernel/Makefile	1.30    -> 1.31   
#	include/asm-i386/segment.h	1.2     -> 1.3    
#	arch/i386/kernel/entry.S	1.41    -> 1.42   
#	 arch/i386/mm/init.c	1.37    -> 1.38   
#	               (new)	        -> 1.1     arch/i386/kernel/sysenter.c
#

 arch/i386/kernel/Makefile   |    1 
 arch/i386/kernel/entry.S    |   63 ++++++++++++++++++++++++++++++++-----
 arch/i386/kernel/head.S     |   25 ++++++++------
 arch/i386/kernel/sysenter.c |   74 ++++++++++++++++++++++++++++++++++++++++++++
 arch/i386/mm/init.c         |    2 -
 include/asm-i386/fixmap.h   |    7 ++--
 include/asm-i386/segment.h  |   36 +++++++++++----------
 7 files changed, 167 insertions(+), 41 deletions(-)


diff -Nru a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile
--- a/arch/i386/kernel/Makefile	Mon Dec 16 22:03:50 2002
+++ b/arch/i386/kernel/Makefile	Mon Dec 16 22:03:50 2002
@@ -29,6 +29,7 @@
 obj-$(CONFIG_PROFILING)		+= profile.o
 obj-$(CONFIG_EDD)             	+= edd.o
 obj-$(CONFIG_MODULES)		+= module.o
+obj-y				+= sysenter.o
 
 EXTRA_AFLAGS   := -traditional
 
diff -Nru a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S
--- a/arch/i386/kernel/entry.S	Mon Dec 16 22:03:50 2002
+++ b/arch/i386/kernel/entry.S	Mon Dec 16 22:03:50 2002
@@ -94,7 +94,7 @@
 	movl %edx, %ds; \
 	movl %edx, %es;
 
-#define RESTORE_ALL	\
+#define RESTORE_REGS	\
 	popl %ebx;	\
 	popl %ecx;	\
 	popl %edx;	\
@@ -104,14 +104,25 @@
 	popl %eax;	\
 1:	popl %ds;	\
 2:	popl %es;	\
-	addl $4, %esp;	\
-3:	iret;		\
 .section .fixup,"ax";	\
-4:	movl $0,(%esp);	\
+3:	movl $0,(%esp);	\
 	jmp 1b;		\
-5:	movl $0,(%esp);	\
+4:	movl $0,(%esp);	\
 	jmp 2b;		\
-6:	pushl %ss;	\
+.previous;		\
+.section __ex_table,"a";\
+	.align 4;	\
+	.long 1b,3b;	\
+	.long 2b,4b;	\
+.previous
+
+
+#define RESTORE_ALL	\
+	RESTORE_REGS	\
+	addl $4, %esp;	\
+1:	iret;		\
+.section .fixup,"ax";   \
+2:	pushl %ss;	\
 	popl %ds;	\
 	pushl %ss;	\
 	popl %es;	\
@@ -120,11 +131,11 @@
 .previous;		\
 .section __ex_table,"a";\
 	.align 4;	\
-	.long 1b,4b;	\
-	.long 2b,5b;	\
-	.long 3b,6b;	\
+	.long 1b,2b;	\
 .previous
 
+
+
 ENTRY(lcall7)
 	pushfl			# We get a different stack layout with call
 				# gates, which has to be cleaned up later..
@@ -219,6 +230,40 @@
 	cli
 	jmp need_resched
 #endif
+
+/* Points to after the "sysenter" instruction in the vsyscall page */
+#define SYSENTER_RETURN 0xfffff007
+
+	# sysenter call handler stub
+	ALIGN
+ENTRY(sysenter_entry)
+	sti
+	pushl $(__USER_DS)
+	pushl %ebp
+	pushfl
+	pushl $(__USER_CS)
+	pushl $SYSENTER_RETURN
+
+	pushl %eax
+	SAVE_ALL
+	GET_THREAD_INFO(%ebx)
+	cmpl $(NR_syscalls), %eax
+	jae syscall_badsys
+
+	testb $_TIF_SYSCALL_TRACE,TI_FLAGS(%ebx)
+	jnz syscall_trace_entry
+	call *sys_call_table(,%eax,4)
+	movl %eax,EAX(%esp)
+	cli
+	movl TI_FLAGS(%ebx), %ecx
+	testw $_TIF_ALLWORK_MASK, %cx
+	jne syscall_exit_work
+	RESTORE_REGS
+	movl 4(%esp),%edx
+	movl 16(%esp),%ecx
+	sti
+	sysexit
+
 
 	# system call handler stub
 	ALIGN
diff -Nru a/arch/i386/kernel/head.S b/arch/i386/kernel/head.S
--- a/arch/i386/kernel/head.S	Mon Dec 16 22:03:50 2002
+++ b/arch/i386/kernel/head.S	Mon Dec 16 22:03:50 2002
@@ -414,8 +414,8 @@
 	.quad 0x0000000000000000	/* 0x0b reserved */
 	.quad 0x0000000000000000	/* 0x13 reserved */
 	.quad 0x0000000000000000	/* 0x1b reserved */
-	.quad 0x00cffa000000ffff	/* 0x23 user 4GB code at 0x00000000 */
-	.quad 0x00cff2000000ffff	/* 0x2b user 4GB data at 0x00000000 */
+	.quad 0x0000000000000000	/* 0x20 unused */
+	.quad 0x0000000000000000	/* 0x28 unused */
 	.quad 0x0000000000000000	/* 0x33 TLS entry 1 */
 	.quad 0x0000000000000000	/* 0x3b TLS entry 2 */
 	.quad 0x0000000000000000	/* 0x43 TLS entry 3 */
@@ -425,22 +425,25 @@
 
 	.quad 0x00cf9a000000ffff	/* 0x60 kernel 4GB code at 0x00000000 */
 	.quad 0x00cf92000000ffff	/* 0x68 kernel 4GB data at 0x00000000 */
-	.quad 0x0000000000000000	/* 0x70 TSS descriptor */
-	.quad 0x0000000000000000	/* 0x78 LDT descriptor */
+	.quad 0x00cffa000000ffff	/* 0x73 user 4GB code at 0x00000000 */
+	.quad 0x00cff2000000ffff	/* 0x7b user 4GB data at 0x00000000 */
+
+	.quad 0x0000000000000000	/* 0x80 TSS descriptor */
+	.quad 0x0000000000000000	/* 0x88 LDT descriptor */
 
 	/* Segments used for calling PnP BIOS */
-	.quad 0x00c09a0000000000	/* 0x80 32-bit code */
-	.quad 0x00809a0000000000	/* 0x88 16-bit code */
-	.quad 0x0080920000000000	/* 0x90 16-bit data */
-	.quad 0x0080920000000000	/* 0x98 16-bit data */
+	.quad 0x00c09a0000000000	/* 0x90 32-bit code */
+	.quad 0x00809a0000000000	/* 0x98 16-bit code */
 	.quad 0x0080920000000000	/* 0xa0 16-bit data */
+	.quad 0x0080920000000000	/* 0xa8 16-bit data */
+	.quad 0x0080920000000000	/* 0xb0 16-bit data */
 	/*
 	 * The APM segments have byte granularity and their bases
 	 * and limits are set at run time.
 	 */
-	.quad 0x00409a0000000000	/* 0xa8 APM CS    code */
-	.quad 0x00009a0000000000	/* 0xb0 APM CS 16 code (16 bit) */
-	.quad 0x0040920000000000	/* 0xb8 APM DS    data */
+	.quad 0x00409a0000000000	/* 0xb8 APM CS    code */
+	.quad 0x00009a0000000000	/* 0xc0 APM CS 16 code (16 bit) */
+	.quad 0x0040920000000000	/* 0xc8 APM DS    data */
 
 #if CONFIG_SMP
 	.fill (NR_CPUS-1)*GDT_ENTRIES,8,0 /* other CPU's GDT */
diff -Nru a/arch/i386/kernel/sysenter.c b/arch/i386/kernel/sysenter.c
--- /dev/null	Wed Dec 31 16:00:00 1969
+++ b/arch/i386/kernel/sysenter.c	Mon Dec 16 22:03:50 2002
@@ -0,0 +1,74 @@
+/*
+ * linux/arch/i386/kernel/sysenter.c
+ *
+ * (C) Copyright 2002 Linus Torvalds
+ *
+ * This file contains the needed initializations to support sysenter.
+ */
+
+#include <linux/init.h>
+#include <linux/smp.h>
+#include <linux/thread_info.h>
+#include <linux/gfp.h>
+#include <linux/string.h>
+
+#include <asm/cpufeature.h>
+#include <asm/msr.h>
+#include <asm/pgtable.h>
+
+extern asmlinkage void sysenter_entry(void);
+
+static void __init enable_sep_cpu(void *info)
+{
+	unsigned long page = __get_free_page(GFP_ATOMIC);
+	int cpu = get_cpu();
+	unsigned long *esp0_ptr = &(init_tss + cpu)->esp0;
+	unsigned long rel32;
+
+	rel32 = (unsigned long) sysenter_entry - (page+11);
+
+	
+	*(short *) (page+0) = 0x258b;		/* movl xxxxx,%esp */
+	*(long **) (page+2) = esp0_ptr;
+	*(char *)  (page+6) = 0xe9;		/* jmp rl32 */
+	*(long *)  (page+7) = rel32;
+
+	wrmsr(0x174, __KERNEL_CS, 0);		/* SYSENTER_CS_MSR */
+	wrmsr(0x175, page+PAGE_SIZE, 0);	/* SYSENTER_ESP_MSR */
+	wrmsr(0x176, page, 0);			/* SYSENTER_EIP_MSR */
+
+	printk("Enabling SEP on CPU %d\n", cpu);
+	put_cpu();	
+}
+
+static int __init sysenter_setup(void)
+{
+	static const char int80[] = {
+		0xcd, 0x80,		/* int $0x80 */
+		0xc3			/* ret */
+	};
+	static const char sysent[] = {
+		0x55,			/* push %ebp */
+		0x51,			/* push %ecx */
+		0x52,			/* push %edx */
+		0x89, 0xe5,		/* movl %esp,%ebp */
+		0x0f, 0x34,		/* sysenter */
+		0x5a,			/* pop %edx */
+		0x59,			/* pop %ecx */
+		0x5d,			/* pop %ebp */
+		0xc3			/* ret */
+	};
+	unsigned long page = get_zeroed_page(GFP_ATOMIC);
+
+	__set_fixmap(FIX_VSYSCALL, __pa(page), PAGE_READONLY);
+	memcpy((void *) page, int80, sizeof(int80));
+	if (!boot_cpu_has(X86_FEATURE_SEP))
+		return 0;
+
+	memcpy((void *) page, sysent, sizeof(sysent));
+	enable_sep_cpu(NULL);
+	smp_call_function(enable_sep_cpu, NULL, 1, 1);
+	return 0;
+}
+
+__initcall(sysenter_setup);
diff -Nru a/arch/i386/mm/init.c b/arch/i386/mm/init.c
--- a/arch/i386/mm/init.c	Mon Dec 16 22:03:50 2002
+++ b/arch/i386/mm/init.c	Mon Dec 16 22:03:50 2002
@@ -72,7 +72,7 @@
 static pte_t * __init one_page_table_init(pmd_t *pmd)
 {
 	pte_t *page_table = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE);
-	set_pmd(pmd, __pmd(__pa(page_table) | _KERNPG_TABLE));
+	set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE));
 	if (page_table != pte_offset_kernel(pmd, 0))
 		BUG();	
 
diff -Nru a/include/asm-i386/fixmap.h b/include/asm-i386/fixmap.h
--- a/include/asm-i386/fixmap.h	Mon Dec 16 22:03:50 2002
+++ b/include/asm-i386/fixmap.h	Mon Dec 16 22:03:50 2002
@@ -42,6 +42,8 @@
  * task switches.
  */
 enum fixed_addresses {
+	FIX_VSYSCALL,
+	FIX_HOLE,
 #ifdef CONFIG_X86_LOCAL_APIC
 	FIX_APIC_BASE,	/* local (CPU) APIC) -- required for SMP or not */
 #endif
@@ -96,10 +98,9 @@
  * used by vmalloc.c.
  *
  * Leave one empty page between vmalloc'ed areas and
- * the start of the fixmap, and leave one page empty
- * at the top of mem..
+ * the start of the fixmap.
  */
-#define FIXADDR_TOP	(0xffffe000UL)
+#define FIXADDR_TOP	(0xfffff000UL)
 #define __FIXADDR_SIZE	(__end_of_permanent_fixed_addresses << PAGE_SHIFT)
 #define FIXADDR_START	(FIXADDR_TOP - __FIXADDR_SIZE)
 
diff -Nru a/include/asm-i386/segment.h b/include/asm-i386/segment.h
--- a/include/asm-i386/segment.h	Mon Dec 16 22:03:50 2002
+++ b/include/asm-i386/segment.h	Mon Dec 16 22:03:50 2002
@@ -9,8 +9,8 @@
  *   2 - reserved
  *   3 - reserved
  *
- *   4 - default user CS		<==== new cacheline
- *   5 - default user DS
+ *   4 - unused			<==== new cacheline
+ *   5 - unused
  *
  *  ------- start of TLS (Thread-Local Storage) segments:
  *
@@ -25,16 +25,18 @@
  *
  *  12 - kernel code segment		<==== new cacheline
  *  13 - kernel data segment
- *  14 - TSS
- *  15 - LDT
- *  16 - PNPBIOS support (16->32 gate)
- *  17 - PNPBIOS support
- *  18 - PNPBIOS support
+ *  14 - default user CS
+ *  15 - default user DS
+ *  16 - TSS
+ *  17 - LDT
+ *  18 - PNPBIOS support (16->32 gate)
  *  19 - PNPBIOS support
  *  20 - PNPBIOS support
- *  21 - APM BIOS support
- *  22 - APM BIOS support
- *  23 - APM BIOS support 
+ *  21 - PNPBIOS support
+ *  22 - PNPBIOS support
+ *  23 - APM BIOS support
+ *  24 - APM BIOS support
+ *  25 - APM BIOS support 
  */
 #define GDT_ENTRY_TLS_ENTRIES	3
 #define GDT_ENTRY_TLS_MIN	6
@@ -42,10 +44,10 @@
 
 #define TLS_SIZE (GDT_ENTRY_TLS_ENTRIES * 8)
 
-#define GDT_ENTRY_DEFAULT_USER_CS	4
+#define GDT_ENTRY_DEFAULT_USER_CS	14
 #define __USER_CS (GDT_ENTRY_DEFAULT_USER_CS * 8 + 3)
 
-#define GDT_ENTRY_DEFAULT_USER_DS	5
+#define GDT_ENTRY_DEFAULT_USER_DS	15
 #define __USER_DS (GDT_ENTRY_DEFAULT_USER_DS * 8 + 3)
 
 #define GDT_ENTRY_KERNEL_BASE	12
@@ -56,14 +58,14 @@
 #define GDT_ENTRY_KERNEL_DS		(GDT_ENTRY_KERNEL_BASE + 1)
 #define __KERNEL_DS (GDT_ENTRY_KERNEL_DS * 8)
 
-#define GDT_ENTRY_TSS			(GDT_ENTRY_KERNEL_BASE + 2)
-#define GDT_ENTRY_LDT			(GDT_ENTRY_KERNEL_BASE + 3)
+#define GDT_ENTRY_TSS			(GDT_ENTRY_KERNEL_BASE + 4)
+#define GDT_ENTRY_LDT			(GDT_ENTRY_KERNEL_BASE + 5)
 
-#define GDT_ENTRY_PNPBIOS_BASE		(GDT_ENTRY_KERNEL_BASE + 4)
-#define GDT_ENTRY_APMBIOS_BASE		(GDT_ENTRY_KERNEL_BASE + 9)
+#define GDT_ENTRY_PNPBIOS_BASE		(GDT_ENTRY_KERNEL_BASE + 6)
+#define GDT_ENTRY_APMBIOS_BASE		(GDT_ENTRY_KERNEL_BASE + 11)
 
 /*
- * The GDT has 21 entries but we pad it to cacheline boundary:
+ * The GDT has 23 entries but we pad it to cacheline boundary:
  */
 #define GDT_ENTRIES 24
 
-
To unsubscribe from this list: send the line "unsubscribe bk-commits-head" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html



to post comments


Copyright © 2002, Eklektix, Inc.
Comments and public postings are copyrighted by their creators.
Linux is a registered trademark of Linus Torvalds