summaryrefslogtreecommitdiff
path: root/arch/x86/vdso
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/vdso')
-rw-r--r--arch/x86/vdso/.gitignore2
-rw-r--r--arch/x86/vdso/Makefile46
-rw-r--r--arch/x86/vdso/vclock_gettime.c135
-rw-r--r--arch/x86/vdso/vdso32-setup.c5
-rw-r--r--arch/x86/vdso/vdsox32.S22
-rw-r--r--arch/x86/vdso/vdsox32.lds.S28
-rw-r--r--arch/x86/vdso/vma.c78
7 files changed, 239 insertions, 77 deletions
diff --git a/arch/x86/vdso/.gitignore b/arch/x86/vdso/.gitignore
index 60274d5..3282874 100644
--- a/arch/x86/vdso/.gitignore
+++ b/arch/x86/vdso/.gitignore
@@ -1,5 +1,7 @@
vdso.lds
vdso-syms.lds
+vdsox32.lds
+vdsox32-syms.lds
vdso32-syms.lds
vdso32-syscall-syms.lds
vdso32-sysenter-syms.lds
diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile
index 5d17950..fd14be1 100644
--- a/arch/x86/vdso/Makefile
+++ b/arch/x86/vdso/Makefile
@@ -3,21 +3,29 @@
#
VDSO64-$(CONFIG_X86_64) := y
+VDSOX32-$(CONFIG_X86_X32_ABI) := y
VDSO32-$(CONFIG_X86_32) := y
VDSO32-$(CONFIG_COMPAT) := y
vdso-install-$(VDSO64-y) += vdso.so
+vdso-install-$(VDSOX32-y) += vdsox32.so
vdso-install-$(VDSO32-y) += $(vdso32-images)
# files to link into the vdso
vobjs-y := vdso-note.o vclock_gettime.o vgetcpu.o
+vobjs-$(VDSOX32-y) += $(vobjx32s-compat)
+
+# Filter out x32 objects.
+vobj64s := $(filter-out $(vobjx32s-compat),$(vobjs-y))
+
# files to link into kernel
obj-$(VDSO64-y) += vma.o vdso.o
+obj-$(VDSOX32-y) += vdsox32.o
obj-$(VDSO32-y) += vdso32.o vdso32-setup.o
-vobjs := $(foreach F,$(vobjs-y),$(obj)/$F)
+vobjs := $(foreach F,$(vobj64s),$(obj)/$F)
$(obj)/vdso.o: $(obj)/vdso.so
@@ -73,6 +81,42 @@ $(obj)/%-syms.lds: $(obj)/%.so.dbg FORCE
$(call if_changed,vdsosym)
#
+# X32 processes use x32 vDSO to access 64bit kernel data.
+#
+# Build x32 vDSO image:
+# 1. Compile x32 vDSO as 64bit.
+# 2. Convert object files to x32.
+# 3. Build x32 VDSO image with x32 objects, which contains 64bit codes
+# so that it can reach 64bit address space with 64bit pointers.
+#
+
+targets += vdsox32-syms.lds
+obj-$(VDSOX32-y) += vdsox32-syms.lds
+
+CPPFLAGS_vdsox32.lds = $(CPPFLAGS_vdso.lds)
+VDSO_LDFLAGS_vdsox32.lds = -Wl,-m,elf32_x86_64 \
+ -Wl,-soname=linux-vdso.so.1 \
+ -Wl,-z,max-page-size=4096 \
+ -Wl,-z,common-page-size=4096
+
+vobjx32s-y := $(vobj64s:.o=-x32.o)
+vobjx32s := $(foreach F,$(vobjx32s-y),$(obj)/$F)
+
+# Convert 64bit object file to x32 for x32 vDSO.
+quiet_cmd_x32 = X32 $@
+ cmd_x32 = $(OBJCOPY) -O elf32-x86-64 $< $@
+
+$(obj)/%-x32.o: $(obj)/%.o FORCE
+ $(call if_changed,x32)
+
+targets += vdsox32.so vdsox32.so.dbg vdsox32.lds $(vobjx32s-y)
+
+$(obj)/vdsox32.o: $(src)/vdsox32.S $(obj)/vdsox32.so
+
+$(obj)/vdsox32.so.dbg: $(src)/vdsox32.lds $(vobjx32s) FORCE
+ $(call if_changed,vdso)
+
+#
# Build multiple 32-bit vDSO images to choose from at boot time.
#
obj-$(VDSO32-y) += vdso32-syms.lds
diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c
index 6bc0e72..885eff4 100644
--- a/arch/x86/vdso/vclock_gettime.c
+++ b/arch/x86/vdso/vclock_gettime.c
@@ -70,100 +70,98 @@ notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
return ret;
}
+notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz)
+{
+ long ret;
+
+ asm("syscall" : "=a" (ret) :
+ "0" (__NR_gettimeofday), "D" (tv), "S" (tz) : "memory");
+ return ret;
+}
+
+
notrace static inline long vgetns(void)
{
long v;
cycles_t cycles;
if (gtod->clock.vclock_mode == VCLOCK_TSC)
cycles = vread_tsc();
- else
+ else if (gtod->clock.vclock_mode == VCLOCK_HPET)
cycles = vread_hpet();
+ else
+ return 0;
v = (cycles - gtod->clock.cycle_last) & gtod->clock.mask;
return (v * gtod->clock.mult) >> gtod->clock.shift;
}
-notrace static noinline int do_realtime(struct timespec *ts)
+/* Code size doesn't matter (vdso is 4k anyway) and this is faster. */
+notrace static int __always_inline do_realtime(struct timespec *ts)
{
unsigned long seq, ns;
+ int mode;
+
do {
- seq = read_seqbegin(&gtod->lock);
+ seq = read_seqcount_begin(&gtod->seq);
+ mode = gtod->clock.vclock_mode;
ts->tv_sec = gtod->wall_time_sec;
ts->tv_nsec = gtod->wall_time_nsec;
ns = vgetns();
- } while (unlikely(read_seqretry(&gtod->lock, seq)));
+ } while (unlikely(read_seqcount_retry(&gtod->seq, seq)));
+
timespec_add_ns(ts, ns);
- return 0;
+ return mode;
}
-notrace static noinline int do_monotonic(struct timespec *ts)
+notrace static int do_monotonic(struct timespec *ts)
{
- unsigned long seq, ns, secs;
+ unsigned long seq, ns;
+ int mode;
+
do {
- seq = read_seqbegin(&gtod->lock);
- secs = gtod->wall_time_sec;
- ns = gtod->wall_time_nsec + vgetns();
- secs += gtod->wall_to_monotonic.tv_sec;
- ns += gtod->wall_to_monotonic.tv_nsec;
- } while (unlikely(read_seqretry(&gtod->lock, seq)));
-
- /* wall_time_nsec, vgetns(), and wall_to_monotonic.tv_nsec
- * are all guaranteed to be nonnegative.
- */
- while (ns >= NSEC_PER_SEC) {
- ns -= NSEC_PER_SEC;
- ++secs;
- }
- ts->tv_sec = secs;
- ts->tv_nsec = ns;
+ seq = read_seqcount_begin(&gtod->seq);
+ mode = gtod->clock.vclock_mode;
+ ts->tv_sec = gtod->monotonic_time_sec;
+ ts->tv_nsec = gtod->monotonic_time_nsec;
+ ns = vgetns();
+ } while (unlikely(read_seqcount_retry(&gtod->seq, seq)));
+ timespec_add_ns(ts, ns);
- return 0;
+ return mode;
}
-notrace static noinline int do_realtime_coarse(struct timespec *ts)
+notrace static int do_realtime_coarse(struct timespec *ts)
{
unsigned long seq;
do {
- seq = read_seqbegin(&gtod->lock);
+ seq = read_seqcount_begin(&gtod->seq);
ts->tv_sec = gtod->wall_time_coarse.tv_sec;
ts->tv_nsec = gtod->wall_time_coarse.tv_nsec;
- } while (unlikely(read_seqretry(&gtod->lock, seq)));
+ } while (unlikely(read_seqcount_retry(&gtod->seq, seq)));
return 0;
}
-notrace static noinline int do_monotonic_coarse(struct timespec *ts)
+notrace static int do_monotonic_coarse(struct timespec *ts)
{
- unsigned long seq, ns, secs;
+ unsigned long seq;
do {
- seq = read_seqbegin(&gtod->lock);
- secs = gtod->wall_time_coarse.tv_sec;
- ns = gtod->wall_time_coarse.tv_nsec;
- secs += gtod->wall_to_monotonic.tv_sec;
- ns += gtod->wall_to_monotonic.tv_nsec;
- } while (unlikely(read_seqretry(&gtod->lock, seq)));
-
- /* wall_time_nsec and wall_to_monotonic.tv_nsec are
- * guaranteed to be between 0 and NSEC_PER_SEC.
- */
- if (ns >= NSEC_PER_SEC) {
- ns -= NSEC_PER_SEC;
- ++secs;
- }
- ts->tv_sec = secs;
- ts->tv_nsec = ns;
+ seq = read_seqcount_begin(&gtod->seq);
+ ts->tv_sec = gtod->monotonic_time_coarse.tv_sec;
+ ts->tv_nsec = gtod->monotonic_time_coarse.tv_nsec;
+ } while (unlikely(read_seqcount_retry(&gtod->seq, seq)));
return 0;
}
notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
{
+ int ret = VCLOCK_NONE;
+
switch (clock) {
case CLOCK_REALTIME:
- if (likely(gtod->clock.vclock_mode != VCLOCK_NONE))
- return do_realtime(ts);
+ ret = do_realtime(ts);
break;
case CLOCK_MONOTONIC:
- if (likely(gtod->clock.vclock_mode != VCLOCK_NONE))
- return do_monotonic(ts);
+ ret = do_monotonic(ts);
break;
case CLOCK_REALTIME_COARSE:
return do_realtime_coarse(ts);
@@ -171,32 +169,33 @@ notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
return do_monotonic_coarse(ts);
}
- return vdso_fallback_gettime(clock, ts);
+ if (ret == VCLOCK_NONE)
+ return vdso_fallback_gettime(clock, ts);
+ return 0;
}
int clock_gettime(clockid_t, struct timespec *)
__attribute__((weak, alias("__vdso_clock_gettime")));
notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz)
{
- long ret;
- if (likely(gtod->clock.vclock_mode != VCLOCK_NONE)) {
- if (likely(tv != NULL)) {
- BUILD_BUG_ON(offsetof(struct timeval, tv_usec) !=
- offsetof(struct timespec, tv_nsec) ||
- sizeof(*tv) != sizeof(struct timespec));
- do_realtime((struct timespec *)tv);
- tv->tv_usec /= 1000;
- }
- if (unlikely(tz != NULL)) {
- /* Avoid memcpy. Some old compilers fail to inline it */
- tz->tz_minuteswest = gtod->sys_tz.tz_minuteswest;
- tz->tz_dsttime = gtod->sys_tz.tz_dsttime;
- }
- return 0;
+ long ret = VCLOCK_NONE;
+
+ if (likely(tv != NULL)) {
+ BUILD_BUG_ON(offsetof(struct timeval, tv_usec) !=
+ offsetof(struct timespec, tv_nsec) ||
+ sizeof(*tv) != sizeof(struct timespec));
+ ret = do_realtime((struct timespec *)tv);
+ tv->tv_usec /= 1000;
}
- asm("syscall" : "=a" (ret) :
- "0" (__NR_gettimeofday), "D" (tv), "S" (tz) : "memory");
- return ret;
+ if (unlikely(tz != NULL)) {
+ /* Avoid memcpy. Some old compilers fail to inline it */
+ tz->tz_minuteswest = gtod->sys_tz.tz_minuteswest;
+ tz->tz_dsttime = gtod->sys_tz.tz_dsttime;
+ }
+
+ if (ret == VCLOCK_NONE)
+ return vdso_fallback_gtod(tv, tz);
+ return 0;
}
int gettimeofday(struct timeval *, struct timezone *)
__attribute__((weak, alias("__vdso_gettimeofday")));
diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c
index a944020..66e6d93 100644
--- a/arch/x86/vdso/vdso32-setup.c
+++ b/arch/x86/vdso/vdso32-setup.c
@@ -311,6 +311,11 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
int ret = 0;
bool compat;
+#ifdef CONFIG_X86_X32_ABI
+ if (test_thread_flag(TIF_X32))
+ return x32_setup_additional_pages(bprm, uses_interp);
+#endif
+
if (vdso_enabled == VDSO_DISABLED)
return 0;
diff --git a/arch/x86/vdso/vdsox32.S b/arch/x86/vdso/vdsox32.S
new file mode 100644
index 0000000..d6b9a7f
--- /dev/null
+++ b/arch/x86/vdso/vdsox32.S
@@ -0,0 +1,22 @@
+#include <asm/page_types.h>
+#include <linux/linkage.h>
+#include <linux/init.h>
+
+__PAGE_ALIGNED_DATA
+
+ .globl vdsox32_start, vdsox32_end
+ .align PAGE_SIZE
+vdsox32_start:
+ .incbin "arch/x86/vdso/vdsox32.so"
+vdsox32_end:
+ .align PAGE_SIZE /* extra data here leaks to userspace. */
+
+.previous
+
+ .globl vdsox32_pages
+ .bss
+ .align 8
+ .type vdsox32_pages, @object
+vdsox32_pages:
+ .zero (vdsox32_end - vdsox32_start + PAGE_SIZE - 1) / PAGE_SIZE * 8
+ .size vdsox32_pages, .-vdsox32_pages
diff --git a/arch/x86/vdso/vdsox32.lds.S b/arch/x86/vdso/vdsox32.lds.S
new file mode 100644
index 0000000..62272aa
--- /dev/null
+++ b/arch/x86/vdso/vdsox32.lds.S
@@ -0,0 +1,28 @@
+/*
+ * Linker script for x32 vDSO.
+ * We #include the file to define the layout details.
+ * Here we only choose the prelinked virtual address.
+ *
+ * This file defines the version script giving the user-exported symbols in
+ * the DSO. We can define local symbols here called VDSO* to make their
+ * values visible using the asm-x86/vdso.h macros from the kernel proper.
+ */
+
+#define VDSO_PRELINK 0
+#include "vdso-layout.lds.S"
+
+/*
+ * This controls what userland symbols we export from the vDSO.
+ */
+VERSION {
+ LINUX_2.6 {
+ global:
+ __vdso_clock_gettime;
+ __vdso_gettimeofday;
+ __vdso_getcpu;
+ __vdso_time;
+ local: *;
+ };
+}
+
+VDSOX32_PRELINK = VDSO_PRELINK;
diff --git a/arch/x86/vdso/vma.c b/arch/x86/vdso/vma.c
index 17e1827..00aaf04 100644
--- a/arch/x86/vdso/vma.c
+++ b/arch/x86/vdso/vma.c
@@ -24,7 +24,44 @@ extern unsigned short vdso_sync_cpuid;
extern struct page *vdso_pages[];
static unsigned vdso_size;
-static void __init patch_vdso(void *vdso, size_t len)
+#ifdef CONFIG_X86_X32_ABI
+extern char vdsox32_start[], vdsox32_end[];
+extern struct page *vdsox32_pages[];
+static unsigned vdsox32_size;
+
+static void __init patch_vdsox32(void *vdso, size_t len)
+{
+ Elf32_Ehdr *hdr = vdso;
+ Elf32_Shdr *sechdrs, *alt_sec = 0;
+ char *secstrings;
+ void *alt_data;
+ int i;
+
+ BUG_ON(len < sizeof(Elf32_Ehdr));
+ BUG_ON(memcmp(hdr->e_ident, ELFMAG, SELFMAG) != 0);
+
+ sechdrs = (void *)hdr + hdr->e_shoff;
+ secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
+
+ for (i = 1; i < hdr->e_shnum; i++) {
+ Elf32_Shdr *shdr = &sechdrs[i];
+ if (!strcmp(secstrings + shdr->sh_name, ".altinstructions")) {
+ alt_sec = shdr;
+ goto found;
+ }
+ }
+
+ /* If we get here, it's probably a bug. */
+ pr_warning("patch_vdsox32: .altinstructions not found\n");
+ return; /* nothing to patch */
+
+found:
+ alt_data = (void *)hdr + alt_sec->sh_offset;
+ apply_alternatives(alt_data, alt_data + alt_sec->sh_size);
+}
+#endif
+
+static void __init patch_vdso64(void *vdso, size_t len)
{
Elf64_Ehdr *hdr = vdso;
Elf64_Shdr *sechdrs, *alt_sec = 0;
@@ -47,7 +84,7 @@ static void __init patch_vdso(void *vdso, size_t len)
}
/* If we get here, it's probably a bug. */
- pr_warning("patch_vdso: .altinstructions not found\n");
+ pr_warning("patch_vdso64: .altinstructions not found\n");
return; /* nothing to patch */
found:
@@ -60,12 +97,20 @@ static int __init init_vdso(void)
int npages = (vdso_end - vdso_start + PAGE_SIZE - 1) / PAGE_SIZE;
int i;
- patch_vdso(vdso_start, vdso_end - vdso_start);
+ patch_vdso64(vdso_start, vdso_end - vdso_start);
vdso_size = npages << PAGE_SHIFT;
for (i = 0; i < npages; i++)
vdso_pages[i] = virt_to_page(vdso_start + i*PAGE_SIZE);
+#ifdef CONFIG_X86_X32_ABI
+ patch_vdsox32(vdsox32_start, vdsox32_end - vdsox32_start);
+ npages = (vdsox32_end - vdsox32_start + PAGE_SIZE - 1) / PAGE_SIZE;
+ vdsox32_size = npages << PAGE_SHIFT;
+ for (i = 0; i < npages; i++)
+ vdsox32_pages[i] = virt_to_page(vdsox32_start + i*PAGE_SIZE);
+#endif
+
return 0;
}
subsys_initcall(init_vdso);
@@ -103,7 +148,10 @@ static unsigned long vdso_addr(unsigned long start, unsigned len)
/* Setup a VMA at program startup for the vsyscall page.
Not called for compat tasks */
-int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
+static int setup_additional_pages(struct linux_binprm *bprm,
+ int uses_interp,
+ struct page **pages,
+ unsigned size)
{
struct mm_struct *mm = current->mm;
unsigned long addr;
@@ -113,8 +161,8 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
return 0;
down_write(&mm->mmap_sem);
- addr = vdso_addr(mm->start_stack, vdso_size);
- addr = get_unmapped_area(NULL, addr, vdso_size, 0, 0);
+ addr = vdso_addr(mm->start_stack, size);
+ addr = get_unmapped_area(NULL, addr, size, 0, 0);
if (IS_ERR_VALUE(addr)) {
ret = addr;
goto up_fail;
@@ -122,10 +170,10 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
current->mm->context.vdso = (void *)addr;
- ret = install_special_mapping(mm, addr, vdso_size,
+ ret = install_special_mapping(mm, addr, size,
VM_READ|VM_EXEC|
VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
- vdso_pages);
+ pages);
if (ret) {
current->mm->context.vdso = NULL;
goto up_fail;
@@ -136,6 +184,20 @@ up_fail:
return ret;
}
+int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
+{
+ return setup_additional_pages(bprm, uses_interp, vdso_pages,
+ vdso_size);
+}
+
+#ifdef CONFIG_X86_X32_ABI
+int x32_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
+{
+ return setup_additional_pages(bprm, uses_interp, vdsox32_pages,
+ vdsox32_size);
+}
+#endif
+
static __init int vdso_setup(char *s)
{
vdso_enabled = simple_strtoul(s, NULL, 0);