From 9bf05098ce34e68a9e15f09ad6cdfea4ed64057a Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Mon, 23 May 2011 10:23:32 +0200 Subject: [S390] cio: fix unreg race in set_online path In ccw_device_set_online we basically start path verification and wait for the device to reach a final state. If it turns out that the device has no useable path we schedule the deregistration of the device (which is still in an non-final state) and wake up the waiting process. The deregistration process will set a final state, but if the wake up happens to be prior to this, the device will hang forever in ccw_device_set_online. To fix this just set the final NOT_OPER state prior to the scheduled deregistration of the device. Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky diff --git a/drivers/s390/cio/device_fsm.c b/drivers/s390/cio/device_fsm.c index 6084103..e087a86 100644 --- a/drivers/s390/cio/device_fsm.c +++ b/drivers/s390/cio/device_fsm.c @@ -408,9 +408,10 @@ ccw_device_done(struct ccw_device *cdev, int state) CIO_MSG_EVENT(0, "Disconnected device %04x on subchannel " "%04x\n", cdev->private->dev_id.devno, sch->schid.sch_no); - if (ccw_device_notify(cdev, CIO_NO_PATH) != NOTIFY_OK) + if (ccw_device_notify(cdev, CIO_NO_PATH) != NOTIFY_OK) { + cdev->private->state = DEV_STATE_NOT_OPER; ccw_device_sched_todo(cdev, CDEV_TODO_UNREG); - else + } else ccw_device_set_disconnected(cdev); cdev->private->flags.donotify = 0; break; -- cgit v0.10.2 From 043d07084b5347a26eab0a07aa13a4a929ad9e71 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Mon, 23 May 2011 10:24:23 +0200 Subject: [S390] Remove data execution protection The noexec support on s390 does not rely on a bit in the page table entry but utilizes the secondary space mode to distinguish between memory accesses for instructions vs. data. The noexec code relies on the assumption that the cpu will always use the secondary space page table for data accesses while it is running in the secondary space mode. Up to the z9-109 class machines this has been the case. Unfortunately this is not true anymore with z10 and later machines. The load-relative-long instructions lrl, lgrl and lgfrl access the memory operand using the same addressing-space mode that has been used to fetch the instruction. This breaks the noexec mode for all user space binaries compiled with march=z10 or later. The only option is to remove the current noexec support. Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 4a7f140..ff2d237 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -230,17 +230,6 @@ config SYSVIPC_COMPAT config AUDIT_ARCH def_bool y -config S390_EXEC_PROTECT - def_bool y - prompt "Data execute protection" - help - This option allows to enable a buffer overflow protection for user - space programs and it also selects the addressing mode option above. - The kernel parameter noexec=on will enable this feature and also - switch the addressing modes, default is disabled. Enabling this (via - kernel parameter) on machines earlier than IBM System z9 this will - reduce system performance. - comment "Code generation options" choice diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h index 10c029c..64b61bf 100644 --- a/arch/s390/include/asm/elf.h +++ b/arch/s390/include/asm/elf.h @@ -196,18 +196,6 @@ do { \ } while (0) #endif /* __s390x__ */ -/* - * An executable for which elf_read_implies_exec() returns TRUE will - * have the READ_IMPLIES_EXEC personality flag set automatically. - */ -#define elf_read_implies_exec(ex, executable_stack) \ -({ \ - if (current->mm->context.noexec && \ - executable_stack != EXSTACK_DISABLE_X) \ - disable_noexec(current->mm, current); \ - current->mm->context.noexec == 0; \ -}) - #define STACK_RND_MASK 0x7ffUL #define ARCH_DLINFO \ diff --git a/arch/s390/include/asm/hugetlb.h b/arch/s390/include/asm/hugetlb.h index b56403c..799ed0f 100644 --- a/arch/s390/include/asm/hugetlb.h +++ b/arch/s390/include/asm/hugetlb.h @@ -111,21 +111,10 @@ static inline void huge_ptep_invalidate(struct mm_struct *mm, { pmd_t *pmdp = (pmd_t *) ptep; - if (!MACHINE_HAS_IDTE) { - __pmd_csp(pmdp); - if (mm->context.noexec) { - pmdp = get_shadow_table(pmdp); - __pmd_csp(pmdp); - } - return; - } - - __pmd_idte(address, pmdp); - if (mm->context.noexec) { - pmdp = get_shadow_table(pmdp); + if (MACHINE_HAS_IDTE) __pmd_idte(address, pmdp); - } - return; + else + __pmd_csp(pmdp); } #define huge_ptep_set_access_flags(__vma, __addr, __ptep, __entry, __dirty) \ diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index 65e172f..b8624d5 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -124,7 +124,7 @@ struct _lowcore { /* Address space pointer. */ __u32 kernel_asce; /* 0x02ac */ __u32 user_asce; /* 0x02b0 */ - __u32 user_exec_asce; /* 0x02b4 */ + __u8 pad_0x02b4[0x02b8-0x02b4]; /* 0x02b4 */ /* SMP info area */ __u32 cpu_nr; /* 0x02b8 */ @@ -255,7 +255,7 @@ struct _lowcore { /* Address space pointer. */ __u64 kernel_asce; /* 0x0310 */ __u64 user_asce; /* 0x0318 */ - __u64 user_exec_asce; /* 0x0320 */ + __u8 pad_0x0320[0x0328-0x0320]; /* 0x0320 */ /* SMP info area */ __u32 cpu_nr; /* 0x0328 */ diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h index 78522cde..818e829 100644 --- a/arch/s390/include/asm/mmu.h +++ b/arch/s390/include/asm/mmu.h @@ -5,19 +5,16 @@ typedef struct { atomic_t attach_count; unsigned int flush_mm; spinlock_t list_lock; - struct list_head crst_list; struct list_head pgtable_list; unsigned long asce_bits; unsigned long asce_limit; unsigned long vdso_base; - int noexec; int has_pgste; /* The mmu context has extended page tables */ int alloc_pgste; /* cloned contexts will have extended page tables */ } mm_context_t; #define INIT_MM_CONTEXT(name) \ .context.list_lock = __SPIN_LOCK_UNLOCKED(name.context.list_lock), \ - .context.crst_list = LIST_HEAD_INIT(name.context.crst_list), \ .context.pgtable_list = LIST_HEAD_INIT(name.context.pgtable_list), #endif diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h index 8c277ca..5682f16 100644 --- a/arch/s390/include/asm/mmu_context.h +++ b/arch/s390/include/asm/mmu_context.h @@ -35,11 +35,9 @@ static inline int init_new_context(struct task_struct *tsk, * and if has_pgste is set, it will create extended page * tables. */ - mm->context.noexec = 0; mm->context.has_pgste = 1; mm->context.alloc_pgste = 1; } else { - mm->context.noexec = (user_mode == SECONDARY_SPACE_MODE); mm->context.has_pgste = 0; mm->context.alloc_pgste = 0; } @@ -63,10 +61,8 @@ static inline void update_mm(struct mm_struct *mm, struct task_struct *tsk) S390_lowcore.user_asce = mm->context.asce_bits | __pa(pgd); if (user_mode != HOME_SPACE_MODE) { /* Load primary space page table origin. */ - pgd = mm->context.noexec ? get_shadow_table(pgd) : pgd; - S390_lowcore.user_exec_asce = mm->context.asce_bits | __pa(pgd); asm volatile(LCTL_OPCODE" 1,1,%0\n" - : : "m" (S390_lowcore.user_exec_asce) ); + : : "m" (S390_lowcore.user_asce) ); } else /* Load home space page table origin. */ asm volatile(LCTL_OPCODE" 13,13,%0" diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h index 082eb4e..739ff9e 100644 --- a/arch/s390/include/asm/pgalloc.h +++ b/arch/s390/include/asm/pgalloc.h @@ -19,14 +19,13 @@ #define check_pgt_cache() do {} while (0) -unsigned long *crst_table_alloc(struct mm_struct *, int); +unsigned long *crst_table_alloc(struct mm_struct *); void crst_table_free(struct mm_struct *, unsigned long *); void crst_table_free_rcu(struct mm_struct *, unsigned long *); unsigned long *page_table_alloc(struct mm_struct *); void page_table_free(struct mm_struct *, unsigned long *); void page_table_free_rcu(struct mm_struct *, unsigned long *); -void disable_noexec(struct mm_struct *, struct task_struct *); static inline void clear_table(unsigned long *s, unsigned long val, size_t n) { @@ -50,9 +49,6 @@ static inline void clear_table(unsigned long *s, unsigned long val, size_t n) static inline void crst_table_init(unsigned long *crst, unsigned long entry) { clear_table(crst, entry, sizeof(unsigned long)*2048); - crst = get_shadow_table(crst); - if (crst) - clear_table(crst, entry, sizeof(unsigned long)*2048); } #ifndef __s390x__ @@ -90,7 +86,7 @@ void crst_table_downgrade(struct mm_struct *, unsigned long limit); static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address) { - unsigned long *table = crst_table_alloc(mm, mm->context.noexec); + unsigned long *table = crst_table_alloc(mm); if (table) crst_table_init(table, _REGION3_ENTRY_EMPTY); return (pud_t *) table; @@ -99,7 +95,7 @@ static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address) static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long vmaddr) { - unsigned long *table = crst_table_alloc(mm, mm->context.noexec); + unsigned long *table = crst_table_alloc(mm); if (table) crst_table_init(table, _SEGMENT_ENTRY_EMPTY); return (pmd_t *) table; @@ -115,11 +111,6 @@ static inline void pgd_populate_kernel(struct mm_struct *mm, static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud) { pgd_populate_kernel(mm, pgd, pud); - if (mm->context.noexec) { - pgd = get_shadow_table(pgd); - pud = get_shadow_table(pud); - pgd_populate_kernel(mm, pgd, pud); - } } static inline void pud_populate_kernel(struct mm_struct *mm, @@ -131,11 +122,6 @@ static inline void pud_populate_kernel(struct mm_struct *mm, static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) { pud_populate_kernel(mm, pud, pmd); - if (mm->context.noexec) { - pud = get_shadow_table(pud); - pmd = get_shadow_table(pmd); - pud_populate_kernel(mm, pud, pmd); - } } #endif /* __s390x__ */ @@ -143,10 +129,8 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) static inline pgd_t *pgd_alloc(struct mm_struct *mm) { spin_lock_init(&mm->context.list_lock); - INIT_LIST_HEAD(&mm->context.crst_list); INIT_LIST_HEAD(&mm->context.pgtable_list); - return (pgd_t *) - crst_table_alloc(mm, user_mode == SECONDARY_SPACE_MODE); + return (pgd_t *) crst_table_alloc(mm); } #define pgd_free(mm, pgd) crst_table_free(mm, (unsigned long *) pgd) @@ -160,10 +144,6 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, pgtable_t pte) { pmd_populate_kernel(mm, pmd, pte); - if (mm->context.noexec) { - pmd = get_shadow_table(pmd); - pmd_populate_kernel(mm, pmd, pte + PTRS_PER_PTE); - } } #define pmd_pgtable(pmd) \ diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 02ace34..763620e 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -256,8 +256,6 @@ extern unsigned long VMALLOC_START; #define _PAGE_TYPE_FILE 0x601 /* bit 0x002 is used for offset !! */ #define _PAGE_TYPE_RO 0x200 #define _PAGE_TYPE_RW 0x000 -#define _PAGE_TYPE_EX_RO 0x202 -#define _PAGE_TYPE_EX_RW 0x002 /* * Only four types for huge pages, using the invalid bit and protection bit @@ -287,8 +285,6 @@ extern unsigned long VMALLOC_START; * _PAGE_TYPE_FILE 11?1 -> 11?1 * _PAGE_TYPE_RO 0100 -> 1100 * _PAGE_TYPE_RW 0000 -> 1000 - * _PAGE_TYPE_EX_RO 0110 -> 1110 - * _PAGE_TYPE_EX_RW 0010 -> 1010 * * pte_none is true for bits combinations 1000, 1010, 1100, 1110 * pte_present is true for bits combinations 0000, 0010, 0100, 0110, 1001 @@ -387,55 +383,33 @@ extern unsigned long VMALLOC_START; #define PAGE_NONE __pgprot(_PAGE_TYPE_NONE) #define PAGE_RO __pgprot(_PAGE_TYPE_RO) #define PAGE_RW __pgprot(_PAGE_TYPE_RW) -#define PAGE_EX_RO __pgprot(_PAGE_TYPE_EX_RO) -#define PAGE_EX_RW __pgprot(_PAGE_TYPE_EX_RW) #define PAGE_KERNEL PAGE_RW #define PAGE_COPY PAGE_RO /* - * Dependent on the EXEC_PROTECT option s390 can do execute protection. - * Write permission always implies read permission. In theory with a - * primary/secondary page table execute only can be implemented but - * it would cost an additional bit in the pte to distinguish all the - * different pte types. To avoid that execute permission currently - * implies read permission as well. + * On s390 the page table entry has an invalid bit and a read-only bit. + * Read permission implies execute permission and write permission + * implies read permission. */ /*xwr*/ #define __P000 PAGE_NONE #define __P001 PAGE_RO #define __P010 PAGE_RO #define __P011 PAGE_RO -#define __P100 PAGE_EX_RO -#define __P101 PAGE_EX_RO -#define __P110 PAGE_EX_RO -#define __P111 PAGE_EX_RO +#define __P100 PAGE_RO +#define __P101 PAGE_RO +#define __P110 PAGE_RO +#define __P111 PAGE_RO #define __S000 PAGE_NONE #define __S001 PAGE_RO #define __S010 PAGE_RW #define __S011 PAGE_RW -#define __S100 PAGE_EX_RO -#define __S101 PAGE_EX_RO -#define __S110 PAGE_EX_RW -#define __S111 PAGE_EX_RW - -#ifndef __s390x__ -# define PxD_SHADOW_SHIFT 1 -#else /* __s390x__ */ -# define PxD_SHADOW_SHIFT 2 -#endif /* __s390x__ */ - -static inline void *get_shadow_table(void *table) -{ - unsigned long addr, offset; - struct page *page; - - addr = (unsigned long) table; - offset = addr & ((PAGE_SIZE << PxD_SHADOW_SHIFT) - 1); - page = virt_to_page((void *)(addr ^ offset)); - return (void *)(addr_t)(page->index ? (page->index | offset) : 0UL); -} +#define __S100 PAGE_RO +#define __S101 PAGE_RO +#define __S110 PAGE_RW +#define __S111 PAGE_RW /* * Certain architectures need to do special things when PTEs @@ -446,14 +420,6 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t entry) { *ptep = entry; - if (mm->context.noexec) { - if (!(pte_val(entry) & _PAGE_INVALID) && - (pte_val(entry) & _PAGE_SWX)) - pte_val(entry) |= _PAGE_RO; - else - pte_val(entry) = _PAGE_TYPE_EMPTY; - ptep[PTRS_PER_PTE] = entry; - } } /* @@ -662,11 +628,7 @@ static inline void pgd_clear_kernel(pgd_t * pgd) static inline void pgd_clear(pgd_t * pgd) { - pgd_t *shadow = get_shadow_table(pgd); - pgd_clear_kernel(pgd); - if (shadow) - pgd_clear_kernel(shadow); } static inline void pud_clear_kernel(pud_t *pud) @@ -677,13 +639,8 @@ static inline void pud_clear_kernel(pud_t *pud) static inline void pud_clear(pud_t *pud) { - pud_t *shadow = get_shadow_table(pud); - pud_clear_kernel(pud); - if (shadow) - pud_clear_kernel(shadow); } - #endif /* __s390x__ */ static inline void pmd_clear_kernel(pmd_t * pmdp) @@ -693,18 +650,12 @@ static inline void pmd_clear_kernel(pmd_t * pmdp) static inline void pmd_clear(pmd_t *pmd) { - pmd_t *shadow = get_shadow_table(pmd); - pmd_clear_kernel(pmd); - if (shadow) - pmd_clear_kernel(shadow); } static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { pte_val(*ptep) = _PAGE_TYPE_EMPTY; - if (mm->context.noexec) - pte_val(ptep[PTRS_PER_PTE]) = _PAGE_TYPE_EMPTY; } /* @@ -903,10 +854,6 @@ static inline void ptep_invalidate(struct mm_struct *mm, } __ptep_ipte(address, ptep); pte_val(*ptep) = _PAGE_TYPE_EMPTY; - if (mm->context.noexec) { - __ptep_ipte(address, ptep + PTRS_PER_PTE); - pte_val(*(ptep + PTRS_PER_PTE)) = _PAGE_TYPE_EMPTY; - } } /* diff --git a/arch/s390/include/asm/tlbflush.h b/arch/s390/include/asm/tlbflush.h index 29d5d6d..4fdcefc 100644 --- a/arch/s390/include/asm/tlbflush.h +++ b/arch/s390/include/asm/tlbflush.h @@ -80,16 +80,11 @@ static inline void __tlb_flush_mm(struct mm_struct * mm) * on all cpus instead of doing a local flush if the mm * only ran on the local cpu. */ - if (MACHINE_HAS_IDTE) { - if (mm->context.noexec) - __tlb_flush_idte((unsigned long) - get_shadow_table(mm->pgd) | - mm->context.asce_bits); + if (MACHINE_HAS_IDTE) __tlb_flush_idte((unsigned long) mm->pgd | mm->context.asce_bits); - return; - } - __tlb_flush_full(mm); + else + __tlb_flush_full(mm); } static inline void __tlb_flush_mm_cond(struct mm_struct * mm) diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index fe03c14..ef45556 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -128,9 +128,6 @@ int main(void) DEFINE(__LC_KERNEL_STACK, offsetof(struct _lowcore, kernel_stack)); DEFINE(__LC_ASYNC_STACK, offsetof(struct _lowcore, async_stack)); DEFINE(__LC_PANIC_STACK, offsetof(struct _lowcore, panic_stack)); - DEFINE(__LC_KERNEL_ASCE, offsetof(struct _lowcore, kernel_asce)); - DEFINE(__LC_USER_ASCE, offsetof(struct _lowcore, user_asce)); - DEFINE(__LC_USER_EXEC_ASCE, offsetof(struct _lowcore, user_exec_asce)); DEFINE(__LC_INT_CLOCK, offsetof(struct _lowcore, int_clock)); DEFINE(__LC_MCCK_CLOCK, offsetof(struct _lowcore, mcck_clock)); DEFINE(__LC_MACHINE_FLAGS, offsetof(struct _lowcore, machine_flags)); diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index f5434d1..0c35dee 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -305,8 +305,7 @@ static int set_amode_and_uaccess(unsigned long user_amode, */ static int __init early_parse_switch_amode(char *p) { - if (user_mode != SECONDARY_SPACE_MODE) - user_mode = PRIMARY_SPACE_MODE; + user_mode = PRIMARY_SPACE_MODE; return 0; } early_param("switch_amode", early_parse_switch_amode); @@ -315,10 +314,6 @@ static int __init early_parse_user_mode(char *p) { if (p && strcmp(p, "primary") == 0) user_mode = PRIMARY_SPACE_MODE; -#ifdef CONFIG_S390_EXEC_PROTECT - else if (p && strcmp(p, "secondary") == 0) - user_mode = SECONDARY_SPACE_MODE; -#endif else if (!p || strcmp(p, "home") == 0) user_mode = HOME_SPACE_MODE; else @@ -327,31 +322,9 @@ static int __init early_parse_user_mode(char *p) } early_param("user_mode", early_parse_user_mode); -#ifdef CONFIG_S390_EXEC_PROTECT -/* - * Enable execute protection? - */ -static int __init early_parse_noexec(char *p) -{ - if (!strncmp(p, "off", 3)) - return 0; - user_mode = SECONDARY_SPACE_MODE; - return 0; -} -early_param("noexec", early_parse_noexec); -#endif /* CONFIG_S390_EXEC_PROTECT */ - static void setup_addressing_mode(void) { - if (user_mode == SECONDARY_SPACE_MODE) { - if (set_amode_and_uaccess(PSW_ASC_SECONDARY, - PSW32_ASC_SECONDARY)) - pr_info("Execute protection active, " - "mvcos available\n"); - else - pr_info("Execute protection active, " - "mvcos not available\n"); - } else if (user_mode == PRIMARY_SPACE_MODE) { + if (user_mode == PRIMARY_SPACE_MODE) { if (set_amode_and_uaccess(PSW_ASC_PRIMARY, PSW32_ASC_PRIMARY)) pr_info("Address spaces switched, " "mvcos available\n"); diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index ab98813..177745c 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -225,33 +225,6 @@ static noinline void do_sigbus(struct pt_regs *regs, long int_code, force_sig_info(SIGBUS, &si, tsk); } -#ifdef CONFIG_S390_EXEC_PROTECT -static noinline int signal_return(struct pt_regs *regs, long int_code, - unsigned long trans_exc_code) -{ - u16 instruction; - int rc; - - rc = __get_user(instruction, (u16 __user *) regs->psw.addr); - - if (!rc && instruction == 0x0a77) { - clear_tsk_thread_flag(current, TIF_PER_TRAP); - if (is_compat_task()) - sys32_sigreturn(); - else - sys_sigreturn(); - } else if (!rc && instruction == 0x0aad) { - clear_tsk_thread_flag(current, TIF_PER_TRAP); - if (is_compat_task()) - sys32_rt_sigreturn(); - else - sys_rt_sigreturn(); - } else - do_sigsegv(regs, int_code, SEGV_MAPERR, trans_exc_code); - return 0; -} -#endif /* CONFIG_S390_EXEC_PROTECT */ - static noinline void do_fault_error(struct pt_regs *regs, long int_code, unsigned long trans_exc_code, int fault) { @@ -259,13 +232,6 @@ static noinline void do_fault_error(struct pt_regs *regs, long int_code, switch (fault) { case VM_FAULT_BADACCESS: -#ifdef CONFIG_S390_EXEC_PROTECT - if ((regs->psw.mask & PSW_MASK_ASC) == PSW_ASC_SECONDARY && - (trans_exc_code & 3) == 0) { - signal_return(regs, int_code, trans_exc_code); - break; - } -#endif /* CONFIG_S390_EXEC_PROTECT */ case VM_FAULT_BADMAP: /* Bad memory access. Check if it is kernel or user space. */ if (regs->psw.mask & PSW_MASK_PSTATE) { @@ -414,11 +380,6 @@ void __kprobes do_dat_exception(struct pt_regs *regs, long pgm_int_code, int access, fault; access = VM_READ | VM_EXEC | VM_WRITE; -#ifdef CONFIG_S390_EXEC_PROTECT - if ((regs->psw.mask & PSW_MASK_ASC) == PSW_ASC_SECONDARY && - (trans_exc_code & 3) == 0) - access = VM_EXEC; -#endif fault = do_exception(regs, access, trans_exc_code); if (unlikely(fault)) do_fault_error(regs, pgm_int_code & 255, trans_exc_code, fault); diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c index 639cd21..a4d856d 100644 --- a/arch/s390/mm/hugetlbpage.c +++ b/arch/s390/mm/hugetlbpage.c @@ -13,7 +13,6 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *pteptr, pte_t pteval) { pmd_t *pmdp = (pmd_t *) pteptr; - pte_t shadow_pteval = pteval; unsigned long mask; if (!MACHINE_HAS_HPAGE) { @@ -21,18 +20,9 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, mask = pte_val(pteval) & (_SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO); pte_val(pteval) = (_SEGMENT_ENTRY + __pa(pteptr)) | mask; - if (mm->context.noexec) { - pteptr += PTRS_PER_PTE; - pte_val(shadow_pteval) = - (_SEGMENT_ENTRY + __pa(pteptr)) | mask; - } } pmd_val(*pmdp) = pte_val(pteval); - if (mm->context.noexec) { - pmdp = get_shadow_table(pmdp); - pmd_val(*pmdp) = pte_val(shadow_pteval); - } } int arch_prepare_hugepage(struct page *page) diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index e1850c2..8d43306 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -40,7 +40,6 @@ DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); static DEFINE_PER_CPU(struct rcu_table_freelist *, rcu_table_freelist); static void __page_table_free(struct mm_struct *mm, unsigned long *table); -static void __crst_table_free(struct mm_struct *mm, unsigned long *table); static struct rcu_table_freelist *rcu_table_freelist_get(struct mm_struct *mm) { @@ -67,7 +66,7 @@ static void rcu_table_freelist_callback(struct rcu_head *head) while (batch->pgt_index > 0) __page_table_free(batch->mm, batch->table[--batch->pgt_index]); while (batch->crst_index < RCU_FREELIST_SIZE) - __crst_table_free(batch->mm, batch->table[batch->crst_index++]); + crst_table_free(batch->mm, batch->table[batch->crst_index++]); free_page((unsigned long) batch); } @@ -125,63 +124,33 @@ static int __init parse_vmalloc(char *arg) } early_param("vmalloc", parse_vmalloc); -unsigned long *crst_table_alloc(struct mm_struct *mm, int noexec) +unsigned long *crst_table_alloc(struct mm_struct *mm) { struct page *page = alloc_pages(GFP_KERNEL, ALLOC_ORDER); if (!page) return NULL; - page->index = 0; - if (noexec) { - struct page *shadow = alloc_pages(GFP_KERNEL, ALLOC_ORDER); - if (!shadow) { - __free_pages(page, ALLOC_ORDER); - return NULL; - } - page->index = page_to_phys(shadow); - } - spin_lock_bh(&mm->context.list_lock); - list_add(&page->lru, &mm->context.crst_list); - spin_unlock_bh(&mm->context.list_lock); return (unsigned long *) page_to_phys(page); } -static void __crst_table_free(struct mm_struct *mm, unsigned long *table) -{ - unsigned long *shadow = get_shadow_table(table); - - if (shadow) - free_pages((unsigned long) shadow, ALLOC_ORDER); - free_pages((unsigned long) table, ALLOC_ORDER); -} - void crst_table_free(struct mm_struct *mm, unsigned long *table) { - struct page *page = virt_to_page(table); - - spin_lock_bh(&mm->context.list_lock); - list_del(&page->lru); - spin_unlock_bh(&mm->context.list_lock); - __crst_table_free(mm, table); + free_pages((unsigned long) table, ALLOC_ORDER); } void crst_table_free_rcu(struct mm_struct *mm, unsigned long *table) { struct rcu_table_freelist *batch; - struct page *page = virt_to_page(table); - spin_lock_bh(&mm->context.list_lock); - list_del(&page->lru); - spin_unlock_bh(&mm->context.list_lock); if (atomic_read(&mm->mm_users) < 2 && cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) { - __crst_table_free(mm, table); + crst_table_free(mm, table); return; } batch = rcu_table_freelist_get(mm); if (!batch) { smp_call_function(smp_sync, NULL, 1); - __crst_table_free(mm, table); + crst_table_free(mm, table); return; } batch->table[--batch->crst_index] = table; @@ -197,7 +166,7 @@ int crst_table_upgrade(struct mm_struct *mm, unsigned long limit) BUG_ON(limit > (1UL << 53)); repeat: - table = crst_table_alloc(mm, mm->context.noexec); + table = crst_table_alloc(mm); if (!table) return -ENOMEM; spin_lock_bh(&mm->page_table_lock); @@ -273,7 +242,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm) unsigned long *table; unsigned long bits; - bits = (mm->context.noexec || mm->context.has_pgste) ? 3UL : 1UL; + bits = (mm->context.has_pgste) ? 3UL : 1UL; spin_lock_bh(&mm->context.list_lock); page = NULL; if (!list_empty(&mm->context.pgtable_list)) { @@ -329,7 +298,7 @@ void page_table_free(struct mm_struct *mm, unsigned long *table) struct page *page; unsigned long bits; - bits = (mm->context.noexec || mm->context.has_pgste) ? 3UL : 1UL; + bits = (mm->context.has_pgste) ? 3UL : 1UL; bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long); page = pfn_to_page(__pa(table) >> PAGE_SHIFT); spin_lock_bh(&mm->context.list_lock); @@ -366,7 +335,7 @@ void page_table_free_rcu(struct mm_struct *mm, unsigned long *table) page_table_free(mm, table); return; } - bits = (mm->context.noexec || mm->context.has_pgste) ? 3UL : 1UL; + bits = (mm->context.has_pgste) ? 3UL : 1UL; bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long); page = pfn_to_page(__pa(table) >> PAGE_SHIFT); spin_lock_bh(&mm->context.list_lock); @@ -379,25 +348,6 @@ void page_table_free_rcu(struct mm_struct *mm, unsigned long *table) rcu_table_freelist_finish(); } -void disable_noexec(struct mm_struct *mm, struct task_struct *tsk) -{ - struct page *page; - - spin_lock_bh(&mm->context.list_lock); - /* Free shadow region and segment tables. */ - list_for_each_entry(page, &mm->context.crst_list, lru) - if (page->index) { - free_pages((unsigned long) page->index, ALLOC_ORDER); - page->index = 0; - } - /* "Free" second halves of page tables. */ - list_for_each_entry(page, &mm->context.pgtable_list, lru) - page->flags &= ~SECOND_HALVES; - spin_unlock_bh(&mm->context.list_lock); - mm->context.noexec = 0; - update_mm(mm, tsk); -} - /* * switch on pgstes for its userspace process (for kvm) */ -- cgit v0.10.2 From 66ceed5ad1318863c21710f316942bcefff8081c Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 23 May 2011 10:24:24 +0200 Subject: [S390] Remove tape block device driver. Remove the tape block device driver. It's not of real use but has already created some confusion when users wanted to access tape devices and used the block device nodes instead of the character device nodes. Also remove the whole tape documentation since it's completely outdated and we have the device drivers book which is the place where everything is properly documented. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky diff --git a/drivers/s390/char/Kconfig b/drivers/s390/char/Kconfig index dcee3c5..a4f117d 100644 --- a/drivers/s390/char/Kconfig +++ b/drivers/s390/char/Kconfig @@ -119,18 +119,6 @@ config S390_TAPE comment "S/390 tape interface support" depends on S390_TAPE -config S390_TAPE_BLOCK - def_bool y - prompt "Support for tape block devices" - depends on S390_TAPE && BLOCK - help - Select this option if you want to access your channel-attached tape - devices using the block device interface. This interface is similar - to CD-ROM devices on other platforms. The tapes can only be - accessed read-only when using this interface. Have a look at - for further information about creating - volumes for and using this interface. It is safe to say "Y" here. - comment "S/390 tape hardware support" depends on S390_TAPE diff --git a/drivers/s390/char/Makefile b/drivers/s390/char/Makefile index efb500a..af01b5a 100644 --- a/drivers/s390/char/Makefile +++ b/drivers/s390/char/Makefile @@ -22,7 +22,6 @@ obj-$(CONFIG_ZVM_WATCHDOG) += vmwatchdog.o obj-$(CONFIG_VMLOGRDR) += vmlogrdr.o obj-$(CONFIG_VMCP) += vmcp.o -tape-$(CONFIG_S390_TAPE_BLOCK) += tape_block.o tape-$(CONFIG_PROC_FS) += tape_proc.o tape-objs := tape_core.o tape_std.o tape_char.o $(tape-y) obj-$(CONFIG_S390_TAPE) += tape.o tape_class.o diff --git a/drivers/s390/char/tape_block.c b/drivers/s390/char/tape_block.c deleted file mode 100644 index 1b3924c..0000000 --- a/drivers/s390/char/tape_block.c +++ /dev/null @@ -1,444 +0,0 @@ -/* - * drivers/s390/char/tape_block.c - * block device frontend for tape device driver - * - * S390 and zSeries version - * Copyright (C) 2001,2003 IBM Deutschland Entwicklung GmbH, IBM Corporation - * Author(s): Carsten Otte - * Tuan Ngo-Anh - * Martin Schwidefsky - * Stefan Bader - */ - -#define KMSG_COMPONENT "tape" -#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt - -#include -#include -#include -#include -#include -#include -#include - -#include - -#define TAPE_DBF_AREA tape_core_dbf - -#include "tape.h" - -#define TAPEBLOCK_MAX_SEC 100 -#define TAPEBLOCK_MIN_REQUEUE 3 - -/* - * 2003/11/25 Stefan Bader - * - * In 2.5/2.6 the block device request function is very likely to be called - * with disabled interrupts (e.g. generic_unplug_device). So the driver can't - * just call any function that tries to allocate CCW requests from that con- - * text since it might sleep. There are two choices to work around this: - * a) do not allocate with kmalloc but use its own memory pool - * b) take requests from the queue outside that context, knowing that - * allocation might sleep - */ - -/* - * file operation structure for tape block frontend - */ -static DEFINE_MUTEX(tape_block_mutex); -static int tapeblock_open(struct block_device *, fmode_t); -static int tapeblock_release(struct gendisk *, fmode_t); -static unsigned int tapeblock_check_events(struct gendisk *, unsigned int); -static int tapeblock_revalidate_disk(struct gendisk *); - -static const struct block_device_operations tapeblock_fops = { - .owner = THIS_MODULE, - .open = tapeblock_open, - .release = tapeblock_release, - .check_events = tapeblock_check_events, - .revalidate_disk = tapeblock_revalidate_disk, -}; - -static int tapeblock_major = 0; - -static void -tapeblock_trigger_requeue(struct tape_device *device) -{ - /* Protect against rescheduling. */ - if (atomic_cmpxchg(&device->blk_data.requeue_scheduled, 0, 1) != 0) - return; - schedule_work(&device->blk_data.requeue_task); -} - -/* - * Post finished request. - */ -static void -__tapeblock_end_request(struct tape_request *ccw_req, void *data) -{ - struct tape_device *device; - struct request *req; - - DBF_LH(6, "__tapeblock_end_request()\n"); - - device = ccw_req->device; - req = (struct request *) data; - blk_end_request_all(req, (ccw_req->rc == 0) ? 0 : -EIO); - if (ccw_req->rc == 0) - /* Update position. */ - device->blk_data.block_position = - (blk_rq_pos(req) + blk_rq_sectors(req)) >> TAPEBLOCK_HSEC_S2B; - else - /* We lost the position information due to an error. */ - device->blk_data.block_position = -1; - device->discipline->free_bread(ccw_req); - if (!list_empty(&device->req_queue) || - blk_peek_request(device->blk_data.request_queue)) - tapeblock_trigger_requeue(device); -} - -/* - * Feed the tape device CCW queue with requests supplied in a list. - */ -static int -tapeblock_start_request(struct tape_device *device, struct request *req) -{ - struct tape_request * ccw_req; - int rc; - - DBF_LH(6, "tapeblock_start_request(%p, %p)\n", device, req); - - ccw_req = device->discipline->bread(device, req); - if (IS_ERR(ccw_req)) { - DBF_EVENT(1, "TBLOCK: bread failed\n"); - blk_end_request_all(req, -EIO); - return PTR_ERR(ccw_req); - } - ccw_req->callback = __tapeblock_end_request; - ccw_req->callback_data = (void *) req; - ccw_req->retries = TAPEBLOCK_RETRIES; - - rc = tape_do_io_async(device, ccw_req); - if (rc) { - /* - * Start/enqueueing failed. No retries in - * this case. - */ - blk_end_request_all(req, -EIO); - device->discipline->free_bread(ccw_req); - } - - return rc; -} - -/* - * Move requests from the block device request queue to the tape device ccw - * queue. - */ -static void -tapeblock_requeue(struct work_struct *work) { - struct tape_blk_data * blkdat; - struct tape_device * device; - struct request_queue * queue; - int nr_queued; - struct request * req; - struct list_head * l; - int rc; - - blkdat = container_of(work, struct tape_blk_data, requeue_task); - device = blkdat->device; - if (!device) - return; - - spin_lock_irq(get_ccwdev_lock(device->cdev)); - queue = device->blk_data.request_queue; - - /* Count number of requests on ccw queue. */ - nr_queued = 0; - list_for_each(l, &device->req_queue) - nr_queued++; - spin_unlock(get_ccwdev_lock(device->cdev)); - - spin_lock_irq(&device->blk_data.request_queue_lock); - while ( - blk_peek_request(queue) && - nr_queued < TAPEBLOCK_MIN_REQUEUE - ) { - req = blk_fetch_request(queue); - if (rq_data_dir(req) == WRITE) { - DBF_EVENT(1, "TBLOCK: Rejecting write request\n"); - spin_unlock_irq(&device->blk_data.request_queue_lock); - blk_end_request_all(req, -EIO); - spin_lock_irq(&device->blk_data.request_queue_lock); - continue; - } - nr_queued++; - spin_unlock_irq(&device->blk_data.request_queue_lock); - rc = tapeblock_start_request(device, req); - spin_lock_irq(&device->blk_data.request_queue_lock); - } - spin_unlock_irq(&device->blk_data.request_queue_lock); - atomic_set(&device->blk_data.requeue_scheduled, 0); -} - -/* - * Tape request queue function. Called from ll_rw_blk.c - */ -static void -tapeblock_request_fn(struct request_queue *queue) -{ - struct tape_device *device; - - device = (struct tape_device *) queue->queuedata; - DBF_LH(6, "tapeblock_request_fn(device=%p)\n", device); - BUG_ON(device == NULL); - tapeblock_trigger_requeue(device); -} - -/* - * This function is called for every new tapedevice - */ -int -tapeblock_setup_device(struct tape_device * device) -{ - struct tape_blk_data * blkdat; - struct gendisk * disk; - int rc; - - blkdat = &device->blk_data; - blkdat->device = device; - spin_lock_init(&blkdat->request_queue_lock); - atomic_set(&blkdat->requeue_scheduled, 0); - - blkdat->request_queue = blk_init_queue( - tapeblock_request_fn, - &blkdat->request_queue_lock - ); - if (!blkdat->request_queue) - return -ENOMEM; - - rc = elevator_change(blkdat->request_queue, "noop"); - if (rc) - goto cleanup_queue; - - blk_queue_logical_block_size(blkdat->request_queue, TAPEBLOCK_HSEC_SIZE); - blk_queue_max_hw_sectors(blkdat->request_queue, TAPEBLOCK_MAX_SEC); - blk_queue_max_segments(blkdat->request_queue, -1L); - blk_queue_max_segment_size(blkdat->request_queue, -1L); - blk_queue_segment_boundary(blkdat->request_queue, -1L); - - disk = alloc_disk(1); - if (!disk) { - rc = -ENOMEM; - goto cleanup_queue; - } - - disk->major = tapeblock_major; - disk->first_minor = device->first_minor; - disk->fops = &tapeblock_fops; - disk->private_data = tape_get_device(device); - disk->queue = blkdat->request_queue; - set_capacity(disk, 0); - sprintf(disk->disk_name, "btibm%d", - device->first_minor / TAPE_MINORS_PER_DEV); - - blkdat->disk = disk; - blkdat->medium_changed = 1; - blkdat->request_queue->queuedata = tape_get_device(device); - - add_disk(disk); - - tape_get_device(device); - INIT_WORK(&blkdat->requeue_task, tapeblock_requeue); - - return 0; - -cleanup_queue: - blk_cleanup_queue(blkdat->request_queue); - blkdat->request_queue = NULL; - - return rc; -} - -void -tapeblock_cleanup_device(struct tape_device *device) -{ - flush_work_sync(&device->blk_data.requeue_task); - tape_put_device(device); - - if (!device->blk_data.disk) { - goto cleanup_queue; - } - - del_gendisk(device->blk_data.disk); - device->blk_data.disk->private_data = NULL; - tape_put_device(device); - put_disk(device->blk_data.disk); - - device->blk_data.disk = NULL; -cleanup_queue: - device->blk_data.request_queue->queuedata = NULL; - tape_put_device(device); - - blk_cleanup_queue(device->blk_data.request_queue); - device->blk_data.request_queue = NULL; -} - -/* - * Detect number of blocks of the tape. - * FIXME: can we extent this to detect the blocks size as well ? - */ -static int -tapeblock_revalidate_disk(struct gendisk *disk) -{ - struct tape_device * device; - unsigned int nr_of_blks; - int rc; - - device = (struct tape_device *) disk->private_data; - BUG_ON(!device); - - if (!device->blk_data.medium_changed) - return 0; - - rc = tape_mtop(device, MTFSFM, 1); - if (rc) - return rc; - - rc = tape_mtop(device, MTTELL, 1); - if (rc < 0) - return rc; - - pr_info("%s: Determining the size of the recorded area...\n", - dev_name(&device->cdev->dev)); - DBF_LH(3, "Image file ends at %d\n", rc); - nr_of_blks = rc; - - /* This will fail for the first file. Catch the error by checking the - * position. */ - tape_mtop(device, MTBSF, 1); - - rc = tape_mtop(device, MTTELL, 1); - if (rc < 0) - return rc; - - if (rc > nr_of_blks) - return -EINVAL; - - DBF_LH(3, "Image file starts at %d\n", rc); - device->bof = rc; - nr_of_blks -= rc; - - pr_info("%s: The size of the recorded area is %i blocks\n", - dev_name(&device->cdev->dev), nr_of_blks); - set_capacity(device->blk_data.disk, - nr_of_blks*(TAPEBLOCK_HSEC_SIZE/512)); - - device->blk_data.block_position = 0; - device->blk_data.medium_changed = 0; - return 0; -} - -static unsigned int -tapeblock_check_events(struct gendisk *disk, unsigned int clearing) -{ - struct tape_device *device; - - device = (struct tape_device *) disk->private_data; - DBF_LH(6, "tapeblock_medium_changed(%p) = %d\n", - device, device->blk_data.medium_changed); - - return device->blk_data.medium_changed ? DISK_EVENT_MEDIA_CHANGE : 0; -} - -/* - * Block frontend tape device open function. - */ -static int -tapeblock_open(struct block_device *bdev, fmode_t mode) -{ - struct gendisk * disk = bdev->bd_disk; - struct tape_device * device; - int rc; - - mutex_lock(&tape_block_mutex); - device = tape_get_device(disk->private_data); - - if (device->required_tapemarks) { - DBF_EVENT(2, "TBLOCK: missing tapemarks\n"); - pr_warning("%s: Opening the tape failed because of missing " - "end-of-file marks\n", dev_name(&device->cdev->dev)); - rc = -EPERM; - goto put_device; - } - - rc = tape_open(device); - if (rc) - goto put_device; - - rc = tapeblock_revalidate_disk(disk); - if (rc) - goto release; - - /* - * Note: The reference to is hold until the release function - * is called. - */ - tape_state_set(device, TS_BLKUSE); - mutex_unlock(&tape_block_mutex); - return 0; - -release: - tape_release(device); - put_device: - tape_put_device(device); - mutex_unlock(&tape_block_mutex); - return rc; -} - -/* - * Block frontend tape device release function. - * - * Note: One reference to the tape device was made by the open function. So - * we just get the pointer here and release the reference. - */ -static int -tapeblock_release(struct gendisk *disk, fmode_t mode) -{ - struct tape_device *device = disk->private_data; - - mutex_lock(&tape_block_mutex); - tape_state_set(device, TS_IN_USE); - tape_release(device); - tape_put_device(device); - mutex_unlock(&tape_block_mutex); - - return 0; -} - -/* - * Initialize block device frontend. - */ -int -tapeblock_init(void) -{ - int rc; - - /* Register the tape major number to the kernel */ - rc = register_blkdev(tapeblock_major, "tBLK"); - if (rc < 0) - return rc; - - if (tapeblock_major == 0) - tapeblock_major = rc; - return 0; -} - -/* - * Deregister major for block device frontend - */ -void -tapeblock_exit(void) -{ - unregister_blkdev(tapeblock_major, "tBLK"); -} -- cgit v0.10.2 From 7712f83aa904fef0d7d6e5ba7684c3272bddbb19 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 23 May 2011 10:24:25 +0200 Subject: [S390] get rid of unused variables Remove trivially unused variables as detected with -Wunused-but-set-variable. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/appldata/appldata_base.c b/arch/s390/appldata/appldata_base.c index 5c91995..24bff4f 100644 --- a/arch/s390/appldata/appldata_base.c +++ b/arch/s390/appldata/appldata_base.c @@ -130,9 +130,7 @@ static void appldata_work_fn(struct work_struct *work) { struct list_head *lh; struct appldata_ops *ops; - int i; - i = 0; get_online_cpus(); mutex_lock(&appldata_ops_mutex); list_for_each(lh, &appldata_ops_list) { diff --git a/arch/s390/mm/extmem.c b/arch/s390/mm/extmem.c index 3cc95dd..214dd7b 100644 --- a/arch/s390/mm/extmem.c +++ b/arch/s390/mm/extmem.c @@ -681,8 +681,6 @@ void segment_save(char *name) { struct dcss_segment *seg; - int startpfn = 0; - int endpfn = 0; char cmd1[160]; char cmd2[80]; int i, response; @@ -698,8 +696,6 @@ segment_save(char *name) goto out; } - startpfn = seg->start_addr >> PAGE_SHIFT; - endpfn = (seg->end) >> PAGE_SHIFT; sprintf(cmd1, "DEFSEG %s", name); for (i=0; isegcnt; i++) { sprintf(cmd1+strlen(cmd1), " %lX-%lX %s", diff --git a/drivers/s390/block/dasd_alias.c b/drivers/s390/block/dasd_alias.c index 2b771f1..c388eda 100644 --- a/drivers/s390/block/dasd_alias.c +++ b/drivers/s390/block/dasd_alias.c @@ -253,13 +253,11 @@ int dasd_alias_make_device_known_to_lcu(struct dasd_device *device) */ void dasd_alias_lcu_setup_complete(struct dasd_device *device) { - struct dasd_eckd_private *private; unsigned long flags; struct alias_server *server; struct alias_lcu *lcu; struct dasd_uid uid; - private = (struct dasd_eckd_private *) device->private; device->discipline->get_uid(device, &uid); lcu = NULL; spin_lock_irqsave(&aliastree.lock, flags); @@ -279,13 +277,11 @@ void dasd_alias_lcu_setup_complete(struct dasd_device *device) void dasd_alias_wait_for_lcu_setup(struct dasd_device *device) { - struct dasd_eckd_private *private; unsigned long flags; struct alias_server *server; struct alias_lcu *lcu; struct dasd_uid uid; - private = (struct dasd_eckd_private *) device->private; device->discipline->get_uid(device, &uid); lcu = NULL; spin_lock_irqsave(&aliastree.lock, flags); diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index 3ebdf5f..5820943 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -1611,10 +1611,8 @@ static void dasd_eckd_analysis_callback(struct dasd_ccw_req *init_cqr, static int dasd_eckd_start_analysis(struct dasd_block *block) { - struct dasd_eckd_private *private; struct dasd_ccw_req *init_cqr; - private = (struct dasd_eckd_private *) block->base->private; init_cqr = dasd_eckd_analysis_ccw(block->base); if (IS_ERR(init_cqr)) return PTR_ERR(init_cqr); @@ -2264,7 +2262,6 @@ static struct dasd_ccw_req *dasd_eckd_build_cp_cmd_track( unsigned int blk_per_trk, unsigned int blksize) { - struct dasd_eckd_private *private; unsigned long *idaws; struct dasd_ccw_req *cqr; struct ccw1 *ccw; @@ -2283,7 +2280,6 @@ static struct dasd_ccw_req *dasd_eckd_build_cp_cmd_track( unsigned int recoffs; basedev = block->base; - private = (struct dasd_eckd_private *) basedev->private; if (rq_data_dir(req) == READ) cmd = DASD_ECKD_CCW_READ_TRACK_DATA; else if (rq_data_dir(req) == WRITE) @@ -2573,7 +2569,6 @@ static struct dasd_ccw_req *dasd_eckd_build_cp_tpm_track( unsigned int blk_per_trk, unsigned int blksize) { - struct dasd_eckd_private *private; struct dasd_ccw_req *cqr; struct req_iterator iter; struct bio_vec *bv; @@ -2594,7 +2589,6 @@ static struct dasd_ccw_req *dasd_eckd_build_cp_tpm_track( unsigned int count, count_to_trk_end; basedev = block->base; - private = (struct dasd_eckd_private *) basedev->private; if (rq_data_dir(req) == READ) { cmd = DASD_ECKD_CCW_READ_TRACK_DATA; itcw_op = ITCW_OP_READ; @@ -2801,7 +2795,6 @@ static struct dasd_ccw_req *dasd_raw_build_cp(struct dasd_device *startdev, struct dasd_block *block, struct request *req) { - struct dasd_eckd_private *private; unsigned long *idaws; struct dasd_device *basedev; struct dasd_ccw_req *cqr; @@ -2836,7 +2829,6 @@ static struct dasd_ccw_req *dasd_raw_build_cp(struct dasd_device *startdev, trkcount = last_trk - first_trk + 1; first_offs = 0; basedev = block->base; - private = (struct dasd_eckd_private *) basedev->private; if (rq_data_dir(req) == READ) cmd = DASD_ECKD_CCW_READ_TRACK; diff --git a/drivers/s390/char/raw3270.c b/drivers/s390/char/raw3270.c index e21a5c3..810ac38 100644 --- a/drivers/s390/char/raw3270.c +++ b/drivers/s390/char/raw3270.c @@ -598,7 +598,6 @@ __raw3270_size_device(struct raw3270 *rp) static const unsigned char wbuf[] = { 0x00, 0x07, 0x01, 0xff, 0x03, 0x00, 0x81 }; struct raw3270_ua *uap; - unsigned short count; int rc; /* @@ -653,7 +652,6 @@ __raw3270_size_device(struct raw3270 *rp) if (rc) return rc; /* Got a Query Reply */ - count = sizeof(rp->init_data) - rp->init_request.rescnt; uap = (struct raw3270_ua *) (rp->init_data + 1); /* Paranoia check. */ if (rp->init_data[0] != 0x88 || uap->uab.qcode != 0x81) diff --git a/drivers/s390/char/sclp_sdias.c b/drivers/s390/char/sclp_sdias.c index 6a1c58d..fa733ec 100644 --- a/drivers/s390/char/sclp_sdias.c +++ b/drivers/s390/char/sclp_sdias.c @@ -69,9 +69,6 @@ static DEFINE_MUTEX(sdias_mutex); static void sdias_callback(struct sclp_req *request, void *data) { - struct sdias_sccb *cbsccb; - - cbsccb = (struct sdias_sccb *) request->sccb; sclp_req_done = 1; wake_up(&sdias_wq); /* Inform caller, that request is complete */ TRACE("callback done\n"); diff --git a/drivers/s390/char/tape_3590.c b/drivers/s390/char/tape_3590.c index b98dcbd..22db252 100644 --- a/drivers/s390/char/tape_3590.c +++ b/drivers/s390/char/tape_3590.c @@ -796,10 +796,8 @@ static void tape_3590_med_state_set(struct tape_device *device, static int tape_3590_done(struct tape_device *device, struct tape_request *request) { - struct tape_3590_disc_data *disc_data; DBF_EVENT(6, "%s done\n", tape_op_verbose[request->op]); - disc_data = device->discdata; switch (request->op) { case TO_BSB: @@ -1397,11 +1395,9 @@ static int tape_3590_crypt_error(struct tape_device *device, u8 cu_rc, ekm_rc1; u16 ekm_rc2; u32 drv_rc; - const char *bus_id; char *sense; sense = ((struct tape_3590_sense *) irb->ecw)->fmt.data; - bus_id = dev_name(&device->cdev->dev); cu_rc = sense[0]; drv_rc = *((u32*) &sense[5]) & 0xffffff; ekm_rc1 = sense[9]; @@ -1429,7 +1425,6 @@ tape_3590_unit_check(struct tape_device *device, struct tape_request *request, struct irb *irb) { struct tape_3590_sense *sense; - int rc; #ifdef CONFIG_S390_TAPE_BLOCK if (request->op == TO_BLOCK) { @@ -1454,7 +1449,6 @@ tape_3590_unit_check(struct tape_device *device, struct tape_request *request, * - "break": basic error recovery is done * - "goto out:": just print error message if available */ - rc = -EIO; switch (sense->rc_rqc) { case 0x1110: diff --git a/drivers/s390/cio/device_fsm.c b/drivers/s390/cio/device_fsm.c index e087a86..52c233f 100644 --- a/drivers/s390/cio/device_fsm.c +++ b/drivers/s390/cio/device_fsm.c @@ -841,9 +841,6 @@ call_handler: static void ccw_device_killing_irq(struct ccw_device *cdev, enum dev_event dev_event) { - struct subchannel *sch; - - sch = to_subchannel(cdev->dev.parent); ccw_device_set_timeout(cdev, 0); /* Start delayed path verification. */ ccw_device_online_verify(cdev, 0); diff --git a/drivers/s390/cio/device_ops.c b/drivers/s390/cio/device_ops.c index 651976b..f98698d 100644 --- a/drivers/s390/cio/device_ops.c +++ b/drivers/s390/cio/device_ops.c @@ -418,12 +418,9 @@ int ccw_device_resume(struct ccw_device *cdev) int ccw_device_call_handler(struct ccw_device *cdev) { - struct subchannel *sch; unsigned int stctl; int ending_status; - sch = to_subchannel(cdev->dev.parent); - /* * we allow for the device action handler if . * - we received ending status -- cgit v0.10.2 From 364c18d817a6beb3303e6b8f28ea37d9c06ff382 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 23 May 2011 10:24:26 +0200 Subject: [S390] dasd: fix return code handling Check return value of itcw_add_dcw() for error code and return it. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index 5820943..30fb979 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -2552,8 +2552,7 @@ static int prepare_itcw(struct itcw *itcw, dcw = itcw_add_dcw(itcw, pfx_cmd, 0, &pfxdata, sizeof(pfxdata), total_data_size); - - return rc; + return IS_ERR(dcw) ? PTR_ERR(dcw) : 0; } static struct dasd_ccw_req *dasd_eckd_build_cp_tpm_track( -- cgit v0.10.2 From 37fa9975b28a2d86a5b47ec17e6d845dbd899c60 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 23 May 2011 10:24:27 +0200 Subject: [S390] monwriter: fix return code handling Fix return code handling within monwrite_new_hdr(). Return code handling is everwhere implemented, the return code of the diagnose function was just not passed. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky diff --git a/drivers/s390/char/monwriter.c b/drivers/s390/char/monwriter.c index e0702d3..4600aa1 100644 --- a/drivers/s390/char/monwriter.c +++ b/drivers/s390/char/monwriter.c @@ -97,7 +97,7 @@ static int monwrite_new_hdr(struct mon_private *monpriv) { struct monwrite_hdr *monhdr = &monpriv->hdr; struct mon_buf *monbuf; - int rc; + int rc = 0; if (monhdr->datalen > MONWRITE_MAX_DATALEN || monhdr->mon_function > MONWRITE_START_CONFIG || @@ -135,7 +135,7 @@ static int monwrite_new_hdr(struct mon_private *monpriv) mon_buf_count++; } monpriv->current_buf = monbuf; - return 0; + return rc; } static int monwrite_new_data(struct mon_private *monpriv) -- cgit v0.10.2 From 160fbf2e92cccbc1dccb4f837f5174808b2636d2 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 23 May 2011 10:24:28 +0200 Subject: [S390] tape: remove unused/not handled return codes Return codes are on purpose not handled or used. So remove them. Acked-by: Michael Holzheu Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky diff --git a/drivers/s390/char/tape_3590.c b/drivers/s390/char/tape_3590.c index 22db252..a7d5707 100644 --- a/drivers/s390/char/tape_3590.c +++ b/drivers/s390/char/tape_3590.c @@ -1392,15 +1392,12 @@ tape_3590_print_era_msg(struct tape_device *device, struct irb *irb) static int tape_3590_crypt_error(struct tape_device *device, struct tape_request *request, struct irb *irb) { - u8 cu_rc, ekm_rc1; + u8 cu_rc; u16 ekm_rc2; - u32 drv_rc; char *sense; sense = ((struct tape_3590_sense *) irb->ecw)->fmt.data; cu_rc = sense[0]; - drv_rc = *((u32*) &sense[5]) & 0xffffff; - ekm_rc1 = sense[9]; ekm_rc2 = *((u16*) &sense[10]); if ((cu_rc == 0) && (ekm_rc2 == 0xee31)) /* key not defined on EKM */ diff --git a/drivers/s390/char/tape_std.c b/drivers/s390/char/tape_std.c index 3c3f342..e765017 100644 --- a/drivers/s390/char/tape_std.c +++ b/drivers/s390/char/tape_std.c @@ -564,7 +564,6 @@ int tape_std_mtreten(struct tape_device *device, int mt_count) { struct tape_request *request; - int rc; request = tape_alloc_request(4, 0); if (IS_ERR(request)) @@ -576,7 +575,7 @@ tape_std_mtreten(struct tape_device *device, int mt_count) tape_ccw_cc(request->cpaddr + 2, NOP, 0, NULL); tape_ccw_end(request->cpaddr + 3, CCW_CMD_TIC, 0, request->cpaddr); /* execute it, MTRETEN rc gets ignored */ - rc = tape_do_io_interruptible(device, request); + tape_do_io_interruptible(device, request); tape_free_request(request); return tape_mtop(device, MTREW, 1); } -- cgit v0.10.2 From 89db4df160948d005b5efce82ef10f25ab5aac8b Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 23 May 2011 10:24:29 +0200 Subject: [S390] extmem: get rid of compile warning Get rid of these: arch/s390/mm/extmem.c: In function 'segment_modify_shared': arch/s390/mm/extmem.c:622:3: warning: 'end_addr' may be used uninitialized in this function [-Wuninitialized] arch/s390/mm/extmem.c:627:18: warning: 'start_addr' may be used uninitialized in this function [-Wuninitialized] arch/s390/mm/extmem.c: In function 'segment_load': arch/s390/mm/extmem.c:481:11: warning: 'end_addr' may be used uninitialized in this function [-Wuninitialized] arch/s390/mm/extmem.c:480:18: warning: 'start_addr' may be used uninitialized in this function [-Wuninitialized] Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/mm/extmem.c b/arch/s390/mm/extmem.c index 214dd7b..075ddad 100644 --- a/arch/s390/mm/extmem.c +++ b/arch/s390/mm/extmem.c @@ -412,6 +412,7 @@ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long struct dcss_segment *seg; int rc, diag_cc; + start_addr = end_addr = 0; seg = kmalloc(sizeof(*seg), GFP_KERNEL | GFP_DMA); if (seg == NULL) { rc = -ENOMEM; @@ -573,6 +574,7 @@ segment_modify_shared (char *name, int do_nonshared) unsigned long start_addr, end_addr, dummy; int rc, diag_cc; + start_addr = end_addr = 0; mutex_lock(&dcss_lock); seg = segment_by_name (name); if (seg == NULL) { -- cgit v0.10.2 From cf2d007bd43bc254d5254fe9d30af3e73ed5b98a Mon Sep 17 00:00:00 2001 From: Holger Dengler Date: Mon, 23 May 2011 10:24:30 +0200 Subject: [S390] ap: skip device registration on type probe failure The registration of an ap device will be skipped, if the device type probing fails. Add names of current crypto adapters to the Kconfig help. Signed-off-by: Holger Dengler Signed-off-by: Martin Schwidefsky diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig index c64c380..e0b25de 100644 --- a/drivers/crypto/Kconfig +++ b/drivers/crypto/Kconfig @@ -74,6 +74,8 @@ config ZCRYPT + PCI-X Cryptographic Coprocessor (PCIXCC) + Crypto Express2 Coprocessor (CEX2C) + Crypto Express2 Accelerator (CEX2A) + + Crypto Express3 Coprocessor (CEX3C) + + Crypto Express3 Accelerator (CEX3A) config ZCRYPT_MONOLITHIC bool "Monolithic zcrypt module" diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c index 67302b9..16e4a25 100644 --- a/drivers/s390/crypto/ap_bus.c +++ b/drivers/s390/crypto/ap_bus.c @@ -1183,8 +1183,12 @@ static void ap_scan_bus(struct work_struct *unused) INIT_LIST_HEAD(&ap_dev->list); setup_timer(&ap_dev->timeout, ap_request_timeout, (unsigned long) ap_dev); - if (device_type == 0) - ap_probe_device_type(ap_dev); + if (device_type == 0) { + if (ap_probe_device_type(ap_dev)) { + kfree(ap_dev); + continue; + } + } else ap_dev->device_type = device_type; -- cgit v0.10.2 From add7490c273650c3aa8012f3e082aa64af86f17c Mon Sep 17 00:00:00 2001 From: Peter Oberparleiter Date: Mon, 23 May 2011 10:24:31 +0200 Subject: [S390] vdso: disable gcov profiling The concepts of VDSO and gcov-based profiling don't mix: the former includes kernel-provided code running in userspace, the latter adds instructions that modify counters in kernel data segments. On s390 this has not been a problem so far due to VDSO code being written in all-assembler which is exempt from gcov-based profiling. This could change in the future, so disable profiling excplicitly for VDSO code. Signed-off-by: Peter Oberparleiter Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/kernel/vdso32/Makefile b/arch/s390/kernel/vdso32/Makefile index d13e875..8ad2b34 100644 --- a/arch/s390/kernel/vdso32/Makefile +++ b/arch/s390/kernel/vdso32/Makefile @@ -22,6 +22,9 @@ obj-y += vdso32_wrapper.o extra-y += vdso32.lds CPPFLAGS_vdso32.lds += -P -C -U$(ARCH) +# Disable gcov profiling for VDSO code +GCOV_PROFILE := n + # Force dependency (incbin is bad) $(obj)/vdso32_wrapper.o : $(obj)/vdso32.so diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile index 449352d..2a8ddfd 100644 --- a/arch/s390/kernel/vdso64/Makefile +++ b/arch/s390/kernel/vdso64/Makefile @@ -22,6 +22,9 @@ obj-y += vdso64_wrapper.o extra-y += vdso64.lds CPPFLAGS_vdso64.lds += -P -C -U$(ARCH) +# Disable gcov profiling for VDSO code +GCOV_PROFILE := n + # Force dependency (incbin is bad) $(obj)/vdso64_wrapper.o : $(obj)/vdso64.so -- cgit v0.10.2 From 4c2241fd42298007d7c3a92318806a4a9490a93c Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 23 May 2011 10:24:32 +0200 Subject: [S390] percpu: implement arch specific irqsafe_cpu_ops Implement arch specific irqsafe_cpu ops. The arch specific ops do not disable/enable interrupts since that is an expensive operation. Instead we disable preemption and perform a compare and swap loop. Since on server distros (the ones we care about) preemption is disabled the preempt_disable()/preempt_enable() pair is a nop. In the end this code should be faster than the generic one. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/include/asm/cmpxchg.h b/arch/s390/include/asm/cmpxchg.h index 7488e52..81d7908 100644 --- a/arch/s390/include/asm/cmpxchg.h +++ b/arch/s390/include/asm/cmpxchg.h @@ -167,7 +167,6 @@ static inline unsigned long __cmpxchg(void *ptr, unsigned long old, #ifdef CONFIG_64BIT #define cmpxchg64(ptr, o, n) \ ({ \ - BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ cmpxchg((ptr), (o), (n)); \ }) #else /* CONFIG_64BIT */ diff --git a/arch/s390/include/asm/percpu.h b/arch/s390/include/asm/percpu.h index f7ad871..5325c89 100644 --- a/arch/s390/include/asm/percpu.h +++ b/arch/s390/include/asm/percpu.h @@ -1,6 +1,9 @@ #ifndef __ARCH_S390_PERCPU__ #define __ARCH_S390_PERCPU__ +#include +#include + /* * s390 uses its own implementation for per cpu data, the offset of * the cpu local data area is cached in the cpu's lowcore memory. @@ -16,6 +19,71 @@ #define ARCH_NEEDS_WEAK_PER_CPU #endif +#define arch_irqsafe_cpu_to_op(pcp, val, op) \ +do { \ + typedef typeof(pcp) pcp_op_T__; \ + pcp_op_T__ old__, new__, prev__; \ + pcp_op_T__ *ptr__; \ + preempt_disable(); \ + ptr__ = __this_cpu_ptr(&(pcp)); \ + prev__ = *ptr__; \ + do { \ + old__ = prev__; \ + new__ = old__ op (val); \ + switch (sizeof(*ptr__)) { \ + case 8: \ + prev__ = cmpxchg64(ptr__, old__, new__); \ + break; \ + default: \ + prev__ = cmpxchg(ptr__, old__, new__); \ + } \ + } while (prev__ != old__); \ + preempt_enable(); \ +} while (0) + +#define irqsafe_cpu_add_1(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, +) +#define irqsafe_cpu_add_2(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, +) +#define irqsafe_cpu_add_4(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, +) +#define irqsafe_cpu_add_8(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, +) + +#define irqsafe_cpu_and_1(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, &) +#define irqsafe_cpu_and_2(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, &) +#define irqsafe_cpu_and_4(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, &) +#define irqsafe_cpu_and_8(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, &) + +#define irqsafe_cpu_or_1(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, |) +#define irqsafe_cpu_or_2(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, |) +#define irqsafe_cpu_or_4(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, |) +#define irqsafe_cpu_or_8(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, |) + +#define irqsafe_cpu_xor_1(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, ^) +#define irqsafe_cpu_xor_2(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, ^) +#define irqsafe_cpu_xor_4(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, ^) +#define irqsafe_cpu_xor_8(pcp, val) arch_irqsafe_cpu_to_op(pcp, val, ^) + +#define arch_irqsafe_cpu_cmpxchg(pcp, oval, nval) \ +({ \ + typedef typeof(pcp) pcp_op_T__; \ + pcp_op_T__ ret__; \ + pcp_op_T__ *ptr__; \ + preempt_disable(); \ + ptr__ = __this_cpu_ptr(&(pcp)); \ + switch (sizeof(*ptr__)) { \ + case 8: \ + ret__ = cmpxchg64(ptr__, oval, nval); \ + break; \ + default: \ + ret__ = cmpxchg(ptr__, oval, nval); \ + } \ + preempt_enable(); \ + ret__; \ +}) + +#define irqsafe_cpu_cmpxchg_1(pcp, oval, nval) arch_irqsafe_cpu_cmpxchg(pcp, oval, nval) +#define irqsafe_cpu_cmpxchg_2(pcp, oval, nval) arch_irqsafe_cpu_cmpxchg(pcp, oval, nval) +#define irqsafe_cpu_cmpxchg_4(pcp, oval, nval) arch_irqsafe_cpu_cmpxchg(pcp, oval, nval) +#define irqsafe_cpu_cmpxchg_8(pcp, oval, nval) arch_irqsafe_cpu_cmpxchg(pcp, oval, nval) + #include #endif /* __ARCH_S390_PERCPU__ */ -- cgit v0.10.2 From b456d94a9757db54eca4677c1b3a13e7170c9bb3 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 23 May 2011 10:24:33 +0200 Subject: [S390] smp: add __noreturn attribute to cpu_die() Add missing __noreturn attribute to cpu_die(): arch/s390/kernel/smp.c:691:6: error: symbol 'cpu_die' redeclared with different type Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 63c7d9f..2310f07 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -681,7 +681,7 @@ void __cpu_die(unsigned int cpu) atomic_dec(&init_mm.context.attach_count); } -void cpu_die(void) +void __noreturn cpu_die(void) { idle_task_exit(); while (sigp(smp_processor_id(), sigp_stop) == sigp_busy) -- cgit v0.10.2 From f2db2e6cb3f5f766cbb3788af44705685ff2445a Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 23 May 2011 10:24:34 +0200 Subject: [S390] pfault: cpu hotplug vs missing completion interrupts On cpu hot remove a PFAULT CANCEL command is sent to the hypervisor which in turn will cancel all outstanding pfault requests that have been issued on that cpu (the same happens with a SIGP cpu reset). The result is that we end up with uninterruptible processes where the interrupt that would wake up these processes never arrives. In order to solve this all processes which wait for a pfault completion interrupt get woken up after a cpu hot remove. The worst case that could happen is that they fault again and in turn need to wait again. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index b8624d5..228cf0b 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -124,7 +124,7 @@ struct _lowcore { /* Address space pointer. */ __u32 kernel_asce; /* 0x02ac */ __u32 user_asce; /* 0x02b0 */ - __u8 pad_0x02b4[0x02b8-0x02b4]; /* 0x02b4 */ + __u32 current_pid; /* 0x02b4 */ /* SMP info area */ __u32 cpu_nr; /* 0x02b8 */ @@ -255,7 +255,7 @@ struct _lowcore { /* Address space pointer. */ __u64 kernel_asce; /* 0x0310 */ __u64 user_asce; /* 0x0318 */ - __u8 pad_0x0320[0x0328-0x0320]; /* 0x0320 */ + __u64 current_pid; /* 0x0320 */ /* SMP info area */ __u32 cpu_nr; /* 0x0328 */ diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 2c79b64..1300c30 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -84,6 +84,7 @@ struct thread_struct { struct per_event per_event; /* Cause of the last PER trap */ /* pfault_wait is used to block the process on a pfault event */ unsigned long pfault_wait; + struct list_head list; }; typedef struct thread_struct thread_struct; diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index ef45556..edfbd17 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -124,6 +124,7 @@ int main(void) DEFINE(__LC_LAST_UPDATE_TIMER, offsetof(struct _lowcore, last_update_timer)); DEFINE(__LC_LAST_UPDATE_CLOCK, offsetof(struct _lowcore, last_update_clock)); DEFINE(__LC_CURRENT, offsetof(struct _lowcore, current_task)); + DEFINE(__LC_CURRENT_PID, offsetof(struct _lowcore, current_pid)); DEFINE(__LC_THREAD_INFO, offsetof(struct _lowcore, thread_info)); DEFINE(__LC_KERNEL_STACK, offsetof(struct _lowcore, kernel_stack)); DEFINE(__LC_ASYNC_STACK, offsetof(struct _lowcore, async_stack)); diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 1b67fc6..0476174 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -212,6 +212,7 @@ __switch_to: lctl %c4,%c4,__TASK_pid(%r3) # load pid to control reg. 4 lm %r6,%r15,__SF_GPRS(%r15) # load gprs of next task st %r3,__LC_CURRENT # store task struct of next + mvc __LC_CURRENT_PID(4,%r0),__TASK_pid(%r3) # store pid of next st %r5,__LC_THREAD_INFO # store thread info of next ahi %r5,STACK_SIZE # end of kernel stack of next st %r5,__LC_KERNEL_STACK # store end of kernel stack diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S index 9fd8645..d61967e 100644 --- a/arch/s390/kernel/entry64.S +++ b/arch/s390/kernel/entry64.S @@ -220,6 +220,7 @@ __switch_to: lctl %c4,%c4,__TASK_pid(%r3) # load pid to control reg. 4 lmg %r6,%r15,__SF_GPRS(%r15) # load gprs of next task stg %r3,__LC_CURRENT # store task struct of next + mvc __LC_CURRENT_PID+4(4,%r0),__TASK_pid(%r3) # store pid of next stg %r5,__LC_THREAD_INFO # store thread info of next aghi %r5,STACK_SIZE # end of kernel stack of next stg %r5,__LC_KERNEL_STACK # store end of kernel stack diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 177745c..1ca6564 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -466,7 +466,7 @@ typedef struct { int pfault_init(void) { pfault_refbk_t refbk = - { 0x258, 0, 5, 2, __LC_CURRENT, 1ULL << 48, 1ULL << 48, + { 0x258, 0, 5, 2, __LC_CURRENT_PID, 1ULL << 48, 1ULL << 48, __PF_RES_FIELD }; int rc; @@ -498,11 +498,15 @@ void pfault_fini(void) : : "a" (&refbk), "m" (refbk) : "cc"); } +static DEFINE_SPINLOCK(pfault_lock); +static LIST_HEAD(pfault_list); + static void pfault_interrupt(unsigned int ext_int_code, unsigned int param32, unsigned long param64) { struct task_struct *tsk; __u16 subcode; + pid_t pid; /* * Get the external interruption subcode & pfault @@ -514,44 +518,79 @@ static void pfault_interrupt(unsigned int ext_int_code, if ((subcode & 0xff00) != __SUBCODE_MASK) return; kstat_cpu(smp_processor_id()).irqs[EXTINT_PFL]++; - - /* - * Get the token (= address of the task structure of the affected task). - */ -#ifdef CONFIG_64BIT - tsk = (struct task_struct *) param64; -#else - tsk = (struct task_struct *) param32; -#endif - + if (subcode & 0x0080) { + /* Get the token (= pid of the affected task). */ + pid = sizeof(void *) == 4 ? param32 : param64; + rcu_read_lock(); + tsk = find_task_by_pid_ns(pid, &init_pid_ns); + if (tsk) + get_task_struct(tsk); + rcu_read_unlock(); + if (!tsk) + return; + } else { + tsk = current; + } + spin_lock(&pfault_lock); if (subcode & 0x0080) { /* signal bit is set -> a page has been swapped in by VM */ - if (xchg(&tsk->thread.pfault_wait, -1) != 0) { + if (tsk->thread.pfault_wait == 1) { /* Initial interrupt was faster than the completion * interrupt. pfault_wait is valid. Set pfault_wait * back to zero and wake up the process. This can * safely be done because the task is still sleeping * and can't produce new pfaults. */ tsk->thread.pfault_wait = 0; + list_del(&tsk->thread.list); wake_up_process(tsk); - put_task_struct(tsk); + } else { + /* Completion interrupt was faster than initial + * interrupt. Set pfault_wait to -1 so the initial + * interrupt doesn't put the task to sleep. */ + tsk->thread.pfault_wait = -1; } + put_task_struct(tsk); } else { /* signal bit not set -> a real page is missing. */ - get_task_struct(tsk); - set_task_state(tsk, TASK_UNINTERRUPTIBLE); - if (xchg(&tsk->thread.pfault_wait, 1) != 0) { + if (tsk->thread.pfault_wait == -1) { /* Completion interrupt was faster than the initial - * interrupt (swapped in a -1 for pfault_wait). Set - * pfault_wait back to zero and exit. This can be - * done safely because tsk is running in kernel - * mode and can't produce new pfaults. */ + * interrupt (pfault_wait == -1). Set pfault_wait + * back to zero and exit. */ tsk->thread.pfault_wait = 0; - set_task_state(tsk, TASK_RUNNING); - put_task_struct(tsk); - } else + } else { + /* Initial interrupt arrived before completion + * interrupt. Let the task sleep. */ + tsk->thread.pfault_wait = 1; + list_add(&tsk->thread.list, &pfault_list); + set_task_state(tsk, TASK_UNINTERRUPTIBLE); set_tsk_need_resched(tsk); + } + } + spin_unlock(&pfault_lock); +} + +static int __cpuinit pfault_cpu_notify(struct notifier_block *self, + unsigned long action, void *hcpu) +{ + struct thread_struct *thread, *next; + struct task_struct *tsk; + + switch (action) { + case CPU_DEAD: + case CPU_DEAD_FROZEN: + spin_lock_irq(&pfault_lock); + list_for_each_entry_safe(thread, next, &pfault_list, list) { + thread->pfault_wait = 0; + list_del(&thread->list); + tsk = container_of(thread, struct task_struct, thread); + wake_up_process(tsk); + } + spin_unlock_irq(&pfault_lock); + break; + default: + break; } + return NOTIFY_OK; } static int __init pfault_irq_init(void) @@ -568,8 +607,10 @@ static int __init pfault_irq_init(void) pfault_disable = 1; return rc; } - if (pfault_init() == 0) + if (pfault_init() == 0) { + hotcpu_notifier(pfault_cpu_notify, 0); return 0; + } /* Tough luck, no pfault. */ pfault_disable = 1; -- cgit v0.10.2 From 7dd8fe1f910f9644167ef91ddab44107d0d668c5 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 23 May 2011 10:24:35 +0200 Subject: [S390] pfault: cleanup code Small code cleanup. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 1ca6564..a0f9e73 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -452,22 +452,28 @@ static int __init nopfault(char *str) __setup("nopfault", nopfault); -typedef struct { - __u16 refdiagc; - __u16 reffcode; - __u16 refdwlen; - __u16 refversn; - __u64 refgaddr; - __u64 refselmk; - __u64 refcmpmk; - __u64 reserved; -} __attribute__ ((packed, aligned(8))) pfault_refbk_t; +struct pfault_refbk { + u16 refdiagc; + u16 reffcode; + u16 refdwlen; + u16 refversn; + u64 refgaddr; + u64 refselmk; + u64 refcmpmk; + u64 reserved; +} __attribute__ ((packed, aligned(8))); int pfault_init(void) { - pfault_refbk_t refbk = - { 0x258, 0, 5, 2, __LC_CURRENT_PID, 1ULL << 48, 1ULL << 48, - __PF_RES_FIELD }; + struct pfault_refbk refbk = { + .refdiagc = 0x258, + .reffcode = 0, + .refdwlen = 5, + .refversn = 2, + .refgaddr = __LC_CURRENT_PID, + .refselmk = 1ULL << 48, + .refcmpmk = 1ULL << 48, + .reserved = __PF_RES_FIELD }; int rc; if (!MACHINE_IS_VM || pfault_disable) @@ -485,8 +491,12 @@ int pfault_init(void) void pfault_fini(void) { - pfault_refbk_t refbk = - { 0x258, 1, 5, 2, 0ULL, 0ULL, 0ULL, 0ULL }; + struct pfault_refbk refbk = { + .refdiagc = 0x258, + .reffcode = 1, + .refdwlen = 5, + .refversn = 2, + }; if (!MACHINE_IS_VM || pfault_disable) return; @@ -599,24 +609,21 @@ static int __init pfault_irq_init(void) if (!MACHINE_IS_VM) return 0; - /* - * Try to get pfault pseudo page faults going. - */ rc = register_external_interrupt(0x2603, pfault_interrupt); - if (rc) { - pfault_disable = 1; - return rc; - } - if (pfault_init() == 0) { - hotcpu_notifier(pfault_cpu_notify, 0); - return 0; - } + if (rc) + goto out_extint; + rc = pfault_init() == 0 ? 0 : -EOPNOTSUPP; + if (rc) + goto out_pfault; + hotcpu_notifier(pfault_cpu_notify, 0); + return 0; - /* Tough luck, no pfault. */ - pfault_disable = 1; +out_pfault: unregister_external_interrupt(0x2603, pfault_interrupt); - return 0; +out_extint: + pfault_disable = 1; + return rc; } early_initcall(pfault_irq_init); -#endif +#endif /* CONFIG_PFAULT */ -- cgit v0.10.2 From 0f1959f50646612b247d624bdbf8b0c8816f2a93 Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Mon, 23 May 2011 10:24:36 +0200 Subject: [S390] convert old cpumask API into new one Adapt new API. Signed-off-by: KOSAKI Motohiro Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/include/asm/tlbflush.h b/arch/s390/include/asm/tlbflush.h index 4fdcefc..b7a4f2e 100644 --- a/arch/s390/include/asm/tlbflush.h +++ b/arch/s390/include/asm/tlbflush.h @@ -50,7 +50,7 @@ static inline void __tlb_flush_full(struct mm_struct *mm) /* * If the process only ran on the local cpu, do a local flush. */ - local_cpumask = cpumask_of_cpu(smp_processor_id()); + cpumask_copy(&local_cpumask, cpumask_of(smp_processor_id())); if (cpumask_equal(mm_cpumask(mm), &local_cpumask)) __tlb_flush_local(); else diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 2310f07..fed71dc 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -335,7 +335,7 @@ static int smp_rescan_cpus_sigp(cpumask_t avail) smp_cpu_polarization[logical_cpu] = POLARIZATION_UNKNWN; if (!cpu_stopped(logical_cpu)) continue; - cpu_set(logical_cpu, cpu_present_map); + set_cpu_present(logical_cpu, true); smp_cpu_state[logical_cpu] = CPU_STATE_CONFIGURED; logical_cpu = cpumask_next(logical_cpu, &avail); if (logical_cpu >= nr_cpu_ids) @@ -367,7 +367,7 @@ static int smp_rescan_cpus_sclp(cpumask_t avail) continue; __cpu_logical_map[logical_cpu] = cpu_id; smp_cpu_polarization[logical_cpu] = POLARIZATION_UNKNWN; - cpu_set(logical_cpu, cpu_present_map); + set_cpu_present(logical_cpu, true); if (cpu >= info->configured) smp_cpu_state[logical_cpu] = CPU_STATE_STANDBY; else @@ -385,7 +385,7 @@ static int __smp_rescan_cpus(void) { cpumask_t avail; - cpus_xor(avail, cpu_possible_map, cpu_present_map); + cpumask_xor(&avail, cpu_possible_mask, cpu_present_mask); if (smp_use_sigp_detection) return smp_rescan_cpus_sigp(avail); else @@ -467,7 +467,7 @@ int __cpuinit start_secondary(void *cpuvoid) notify_cpu_starting(smp_processor_id()); /* Mark this cpu as online */ ipi_call_lock(); - cpu_set(smp_processor_id(), cpu_online_map); + set_cpu_online(smp_processor_id(), true); ipi_call_unlock(); /* Switch on interrupts */ local_irq_enable(); @@ -644,7 +644,7 @@ int __cpu_disable(void) struct ec_creg_mask_parms cr_parms; int cpu = smp_processor_id(); - cpu_clear(cpu, cpu_online_map); + set_cpu_online(cpu, false); /* Disable pfault pseudo page faults on this cpu. */ pfault_fini(); @@ -738,8 +738,8 @@ void __init smp_prepare_boot_cpu(void) BUG_ON(smp_processor_id() != 0); current_thread_info()->cpu = 0; - cpu_set(0, cpu_present_map); - cpu_set(0, cpu_online_map); + set_cpu_present(0, true); + set_cpu_online(0, true); S390_lowcore.percpu_offset = __per_cpu_offset[0]; current_set[0] = current; smp_cpu_state[0] = CPU_STATE_CONFIGURED; @@ -1016,21 +1016,21 @@ int __ref smp_rescan_cpus(void) get_online_cpus(); mutex_lock(&smp_cpu_state_mutex); - newcpus = cpu_present_map; + cpumask_copy(&newcpus, cpu_present_mask); rc = __smp_rescan_cpus(); if (rc) goto out; - cpus_andnot(newcpus, cpu_present_map, newcpus); - for_each_cpu_mask(cpu, newcpus) { + cpumask_andnot(&newcpus, cpu_present_mask, &newcpus); + for_each_cpu(cpu, &newcpus) { rc = smp_add_present_cpu(cpu); if (rc) - cpu_clear(cpu, cpu_present_map); + set_cpu_present(cpu, false); } rc = 0; out: mutex_unlock(&smp_cpu_state_mutex); put_online_cpus(); - if (!cpus_empty(newcpus)) + if (!cpumask_empty(&newcpus)) topology_schedule_update(); return rc; } diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index 87be655..a59557f 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -810,7 +810,7 @@ static int etr_sync_clock_stop(struct etr_aib *aib, int port) etr_sync.etr_port = port; get_online_cpus(); atomic_set(&etr_sync.cpus, num_online_cpus() - 1); - rc = stop_machine(etr_sync_clock, &etr_sync, &cpu_online_map); + rc = stop_machine(etr_sync_clock, &etr_sync, cpu_online_mask); put_online_cpus(); return rc; } @@ -1579,7 +1579,7 @@ static void stp_work_fn(struct work_struct *work) memset(&stp_sync, 0, sizeof(stp_sync)); get_online_cpus(); atomic_set(&stp_sync.cpus, num_online_cpus() - 1); - stop_machine(stp_sync_clock, &stp_sync, &cpu_online_map); + stop_machine(stp_sync_clock, &stp_sync, cpu_online_mask); put_online_cpus(); if (!check_sync_clock()) diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index 94b06c3..2eafb8c 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -52,20 +52,20 @@ static cpumask_t cpu_group_map(struct mask_info *info, unsigned int cpu) { cpumask_t mask; - cpus_clear(mask); + cpumask_clear(&mask); if (!topology_enabled || !MACHINE_HAS_TOPOLOGY) { cpumask_copy(&mask, cpumask_of(cpu)); return mask; } while (info) { - if (cpu_isset(cpu, info->mask)) { + if (cpumask_test_cpu(cpu, &info->mask)) { mask = info->mask; break; } info = info->next; } - if (cpus_empty(mask)) - mask = cpumask_of_cpu(cpu); + if (cpumask_empty(&mask)) + cpumask_copy(&mask, cpumask_of(cpu)); return mask; } @@ -85,10 +85,10 @@ static void add_cpus_to_mask(struct topology_cpu *tl_cpu, if (cpu_logical_map(lcpu) != rcpu) continue; #ifdef CONFIG_SCHED_BOOK - cpu_set(lcpu, book->mask); + cpumask_set_cpu(lcpu, &book->mask); cpu_book_id[lcpu] = book->id; #endif - cpu_set(lcpu, core->mask); + cpumask_set_cpu(lcpu, &core->mask); cpu_core_id[lcpu] = core->id; smp_cpu_polarization[lcpu] = tl_cpu->pp; } @@ -101,13 +101,13 @@ static void clear_masks(void) info = &core_info; while (info) { - cpus_clear(info->mask); + cpumask_clear(&info->mask); info = info->next; } #ifdef CONFIG_SCHED_BOOK info = &book_info; while (info) { - cpus_clear(info->mask); + cpumask_clear(&info->mask); info = info->next; } #endif -- cgit v0.10.2 From 1b60f68f6674ecc5c86f7dfe7e87941bc4236b9b Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 23 May 2011 10:24:37 +0200 Subject: [S390] sclp: remove unnecessary sendmask check The sendmask check is not needed. Remove it. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky diff --git a/drivers/s390/char/sclp_config.c b/drivers/s390/char/sclp_config.c index 16e232a..95b909a 100644 --- a/drivers/s390/char/sclp_config.c +++ b/drivers/s390/char/sclp_config.c @@ -71,21 +71,9 @@ static struct sclp_register sclp_conf_register = static int __init sclp_conf_init(void) { - int rc; - INIT_WORK(&sclp_cpu_capability_work, sclp_cpu_capability_notify); INIT_WORK(&sclp_cpu_change_work, sclp_cpu_change_notify); - - rc = sclp_register(&sclp_conf_register); - if (rc) - return rc; - - if (!(sclp_conf_register.sclp_send_mask & EVTYP_CONFMGMDATA_MASK)) { - pr_warning("no configuration management.\n"); - sclp_unregister(&sclp_conf_register); - rc = -ENOSYS; - } - return rc; + return sclp_register(&sclp_conf_register); } __initcall(sclp_conf_init); -- cgit v0.10.2 From c26001d4e9133fe45e47eee18cfd826219e71fb9 Mon Sep 17 00:00:00 2001 From: Jan Glauber Date: Mon, 23 May 2011 10:24:38 +0200 Subject: [S390] qdio: prevent compile warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Prevent the following compile warning for !CONFIG_64BIT: CC drivers/s390/cio/qdio_main.o drivers/s390/cio/qdio_main.c: In function ‘handle_outbound’: drivers/s390/cio/qdio_main.c:1449: warning: ‘state’ may be used uninitialized in this function Signed-off-by: Jan Glauber Signed-off-by: Martin Schwidefsky diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c index e8f267e..55e8f72 100644 --- a/drivers/s390/cio/qdio_main.c +++ b/drivers/s390/cio/qdio_main.c @@ -1446,7 +1446,7 @@ set: static int handle_outbound(struct qdio_q *q, unsigned int callflags, int bufnr, int count) { - unsigned char state; + unsigned char state = 0; int used, rc = 0; qperf_inc(q, outbound_call); -- cgit v0.10.2 From 2d42552d1c1659b014851cf449ad2fe458509128 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Mon, 23 May 2011 10:24:39 +0200 Subject: [S390] merge page_test_dirty and page_clear_dirty The page_clear_dirty primitive always sets the default storage key which resets the access control bits and the fetch protection bit. That will surprise a KVM guest that sets non-zero access control bits or the fetch protection bit. Merge page_test_dirty and page_clear_dirty back to a single function and only clear the dirty bit from the storage key. In addition move the function page_test_and_clear_dirty and page_test_and_clear_young to page.h where they belong. This requires to change the parameter from a struct page * to a page frame number. Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h index 3c987e9..81ee277 100644 --- a/arch/s390/include/asm/page.h +++ b/arch/s390/include/asm/page.h @@ -107,8 +107,8 @@ typedef pte_t *pgtable_t; #define __pgd(x) ((pgd_t) { (x) } ) #define __pgprot(x) ((pgprot_t) { (x) } ) -static inline void -page_set_storage_key(unsigned long addr, unsigned int skey, int mapped) +static inline void page_set_storage_key(unsigned long addr, + unsigned char skey, int mapped) { if (!mapped) asm volatile(".insn rrf,0xb22b0000,%0,%1,8,0" @@ -117,15 +117,59 @@ page_set_storage_key(unsigned long addr, unsigned int skey, int mapped) asm volatile("sske %0,%1" : : "d" (skey), "a" (addr)); } -static inline unsigned int -page_get_storage_key(unsigned long addr) +static inline unsigned char page_get_storage_key(unsigned long addr) { - unsigned int skey; + unsigned char skey; - asm volatile("iske %0,%1" : "=d" (skey) : "a" (addr), "0" (0)); + asm volatile("iske %0,%1" : "=d" (skey) : "a" (addr)); return skey; } +static inline int page_reset_referenced(unsigned long addr) +{ + unsigned int ipm; + + asm volatile( + " rrbe 0,%1\n" + " ipm %0\n" + : "=d" (ipm) : "a" (addr) : "cc"); + return !!(ipm & 0x20000000); +} + +/* Bits int the storage key */ +#define _PAGE_CHANGED 0x02 /* HW changed bit */ +#define _PAGE_REFERENCED 0x04 /* HW referenced bit */ +#define _PAGE_FP_BIT 0x08 /* HW fetch protection bit */ +#define _PAGE_ACC_BITS 0xf0 /* HW access control bits */ + +/* + * Test and clear dirty bit in storage key. + * We can't clear the changed bit atomically. This is a potential + * race against modification of the referenced bit. This function + * should therefore only be called if it is not mapped in any + * address space. + */ +#define __HAVE_ARCH_PAGE_TEST_AND_CLEAR_DIRTY +static inline int page_test_and_clear_dirty(unsigned long pfn, int mapped) +{ + unsigned char skey; + + skey = page_get_storage_key(pfn << PAGE_SHIFT); + if (!(skey & _PAGE_CHANGED)) + return 0; + page_set_storage_key(pfn << PAGE_SHIFT, skey & ~_PAGE_CHANGED, mapped); + return 1; +} + +/* + * Test and clear referenced bit in storage key. + */ +#define __HAVE_ARCH_PAGE_TEST_AND_CLEAR_YOUNG +static inline int page_test_and_clear_young(unsigned long pfn) +{ + return page_reset_referenced(pfn << PAGE_SHIFT); +} + struct page; void arch_free_page(struct page *page, int order); void arch_alloc_page(struct page *page, int order); diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 763620e..4ca4dd2 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -373,10 +373,6 @@ extern unsigned long VMALLOC_START; #define _ASCE_USER_BITS (_ASCE_SPACE_SWITCH | _ASCE_PRIVATE_SPACE | \ _ASCE_ALT_EVENT) -/* Bits int the storage key */ -#define _PAGE_CHANGED 0x02 /* HW changed bit */ -#define _PAGE_REFERENCED 0x04 /* HW referenced bit */ - /* * Page protection definitions. */ @@ -555,8 +551,6 @@ static inline void rcp_unlock(pte_t *ptep) #endif } -/* forward declaration for SetPageUptodate in page-flags.h*/ -static inline void page_clear_dirty(struct page *page, int mapped); #include static inline void ptep_rcp_copy(pte_t *ptep) @@ -566,7 +560,7 @@ static inline void ptep_rcp_copy(pte_t *ptep) unsigned int skey; unsigned long *pgste = (unsigned long *) (ptep + PTRS_PER_PTE); - skey = page_get_storage_key(page_to_phys(page)); + skey = page_get_storage_key(pte_val(*ptep) >> PAGE_SHIFT); if (skey & _PAGE_CHANGED) { set_bit_simple(RCP_GC_BIT, pgste); set_bit_simple(KVM_UD_BIT, pgste); @@ -760,6 +754,7 @@ static inline int kvm_s390_test_and_clear_page_dirty(struct mm_struct *mm, { int dirty; unsigned long *pgste; + unsigned long pfn; struct page *page; unsigned int skey; @@ -767,8 +762,9 @@ static inline int kvm_s390_test_and_clear_page_dirty(struct mm_struct *mm, return -EINVAL; rcp_lock(ptep); pgste = (unsigned long *) (ptep + PTRS_PER_PTE); - page = virt_to_page(pte_val(*ptep)); - skey = page_get_storage_key(page_to_phys(page)); + pfn = pte_val(*ptep) >> PAGE_SHIFT; + page = pfn_to_page(pfn); + skey = page_get_storage_key(pfn); if (skey & _PAGE_CHANGED) { set_bit_simple(RCP_GC_BIT, pgste); set_bit_simple(KVM_UD_BIT, pgste); @@ -779,7 +775,7 @@ static inline int kvm_s390_test_and_clear_page_dirty(struct mm_struct *mm, } dirty = test_and_clear_bit_simple(KVM_UD_BIT, pgste); if (skey & _PAGE_CHANGED) - page_clear_dirty(page, 1); + page_set_storage_key(pfn, skey & ~_PAGE_CHANGED, 1); rcp_unlock(ptep); return dirty; } @@ -790,16 +786,16 @@ static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) { #ifdef CONFIG_PGSTE - unsigned long physpage; + unsigned long pfn; int young; unsigned long *pgste; if (!vma->vm_mm->context.has_pgste) return 0; - physpage = pte_val(*ptep) & PAGE_MASK; + pfn = pte_val(*ptep) >> PAGE_SHIFT; pgste = (unsigned long *) (ptep + PTRS_PER_PTE); - young = ((page_get_storage_key(physpage) & _PAGE_REFERENCED) != 0); + young = ((page_get_storage_key(pfn) & _PAGE_REFERENCED) != 0); rcp_lock(ptep); if (young) set_bit_simple(RCP_GR_BIT, pgste); @@ -937,42 +933,6 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, }) /* - * Test and clear dirty bit in storage key. - * We can't clear the changed bit atomically. This is a potential - * race against modification of the referenced bit. This function - * should therefore only be called if it is not mapped in any - * address space. - */ -#define __HAVE_ARCH_PAGE_TEST_DIRTY -static inline int page_test_dirty(struct page *page) -{ - return (page_get_storage_key(page_to_phys(page)) & _PAGE_CHANGED) != 0; -} - -#define __HAVE_ARCH_PAGE_CLEAR_DIRTY -static inline void page_clear_dirty(struct page *page, int mapped) -{ - page_set_storage_key(page_to_phys(page), PAGE_DEFAULT_KEY, mapped); -} - -/* - * Test and clear referenced bit in storage key. - */ -#define __HAVE_ARCH_PAGE_TEST_AND_CLEAR_YOUNG -static inline int page_test_and_clear_young(struct page *page) -{ - unsigned long physpage = page_to_phys(page); - int ccode; - - asm volatile( - " rrbe 0,%1\n" - " ipm %0\n" - " srl %0,28\n" - : "=d" (ccode) : "a" (physpage) : "cc" ); - return ccode & 2; -} - -/* * Conversion functions: convert a page and protection to a page entry, * and a page entry and page directory to the page they refer to. */ diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index b4bfe33..e9b8e59 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -184,22 +184,18 @@ static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b) #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #endif -#ifndef __HAVE_ARCH_PAGE_TEST_DIRTY -#define page_test_dirty(page) (0) +#ifndef __HAVE_ARCH_PAGE_TEST_AND_CLEAR_DIRTY +#define page_test_and_clear_dirty(pfn, mapped) (0) #endif -#ifndef __HAVE_ARCH_PAGE_CLEAR_DIRTY -#define page_clear_dirty(page, mapped) do { } while (0) -#endif - -#ifndef __HAVE_ARCH_PAGE_TEST_DIRTY +#ifndef __HAVE_ARCH_PAGE_TEST_AND_CLEAR_DIRTY #define pte_maybe_dirty(pte) pte_dirty(pte) #else #define pte_maybe_dirty(pte) (1) #endif #ifndef __HAVE_ARCH_PAGE_TEST_AND_CLEAR_YOUNG -#define page_test_and_clear_young(page) (0) +#define page_test_and_clear_young(pfn) (0) #endif #ifndef __HAVE_ARCH_PGD_OFFSET_GATE diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 811183d..79a6700 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -308,7 +308,7 @@ static inline void SetPageUptodate(struct page *page) { #ifdef CONFIG_S390 if (!test_and_set_bit(PG_uptodate, &page->flags)) - page_clear_dirty(page, 0); + page_set_storage_key(page_to_pfn(page), PAGE_DEFAULT_KEY, 0); #else /* * Memory barrier must be issued before setting the PG_uptodate bit, diff --git a/mm/rmap.c b/mm/rmap.c index 8da044a..522e4a9 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -719,7 +719,7 @@ int page_referenced(struct page *page, unlock_page(page); } out: - if (page_test_and_clear_young(page)) + if (page_test_and_clear_young(page_to_pfn(page))) referenced++; return referenced; @@ -785,10 +785,8 @@ int page_mkclean(struct page *page) struct address_space *mapping = page_mapping(page); if (mapping) { ret = page_mkclean_file(mapping, page); - if (page_test_dirty(page)) { - page_clear_dirty(page, 1); + if (page_test_and_clear_dirty(page_to_pfn(page), 1)) ret = 1; - } } } @@ -981,10 +979,9 @@ void page_remove_rmap(struct page *page) * not if it's in swapcache - there might be another pte slot * containing the swap entry, but page not yet written to swap. */ - if ((!PageAnon(page) || PageSwapCache(page)) && page_test_dirty(page)) { - page_clear_dirty(page, 1); + if ((!PageAnon(page) || PageSwapCache(page)) && + page_test_and_clear_dirty(page_to_pfn(page), 1)) set_page_dirty(page); - } /* * Hugepages are not counted in NR_ANON_PAGES nor NR_FILE_MAPPED * and not charged by memcg for now. -- cgit v0.10.2 From b2fa47e6bf5148aa6dbf22ec79f18141b421eeba Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Mon, 23 May 2011 10:24:40 +0200 Subject: [S390] refactor page table functions for better pgste support Rework the architecture page table functions to access the bits in the page table extension array (pgste). There are a number of changes: 1) Fix missing pgste update if the attach_count for the mm is <= 1. 2) For every operation that affects the invalid bit in the pte or the rcp byte in the pgste the pcl lock needs to be acquired. The function pgste_get_lock gets the pcl lock and returns the current pgste value for a pte pointer. The function pgste_set_unlock stores the pgste and releases the lock. Between these two calls the bits in the pgste can be shuffled. 3) Define two software bits in the pte _PAGE_SWR and _PAGE_SWC to avoid calling SetPageDirty and SetPageReferenced from pgtable.h. If the host reference backup bit or the host change backup bit has been set the dirty/referenced state is transfered to the pte. The common code will pick up the state from the pte. 4) Add ptep_modify_prot_start and ptep_modify_prot_commit for mprotect. 5) Remove pgd_populate_kernel, pud_populate_kernel, pmd_populate_kernel pgd_clear_kernel, pud_clear_kernel, pmd_clear_kernel and ptep_invalidate. 6) Rename kvm_s390_test_and_clear_page_dirty to ptep_test_and_clear_user_dirty and add ptep_test_and_clear_user_young. 7) Define mm_exclusive() and mm_has_pgste() helper to improve readability. Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h index 818e829..82d0847 100644 --- a/arch/s390/include/asm/mmu.h +++ b/arch/s390/include/asm/mmu.h @@ -9,8 +9,10 @@ typedef struct { unsigned long asce_bits; unsigned long asce_limit; unsigned long vdso_base; - int has_pgste; /* The mmu context has extended page tables */ - int alloc_pgste; /* cloned contexts will have extended page tables */ + /* Cloned contexts will be created with extended page tables. */ + unsigned int alloc_pgste:1; + /* The mmu context has extended page tables. */ + unsigned int has_pgste:1; } mm_context_t; #define INIT_MM_CONTEXT(name) \ diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h index 81ee277..accb372 100644 --- a/arch/s390/include/asm/page.h +++ b/arch/s390/include/asm/page.h @@ -90,6 +90,7 @@ static inline void copy_page(void *to, void *from) */ typedef struct { unsigned long pgprot; } pgprot_t; +typedef struct { unsigned long pgste; } pgste_t; typedef struct { unsigned long pte; } pte_t; typedef struct { unsigned long pmd; } pmd_t; typedef struct { unsigned long pud; } pud_t; @@ -97,13 +98,16 @@ typedef struct { unsigned long pgd; } pgd_t; typedef pte_t *pgtable_t; #define pgprot_val(x) ((x).pgprot) +#define pgste_val(x) ((x).pgste) #define pte_val(x) ((x).pte) #define pmd_val(x) ((x).pmd) #define pud_val(x) ((x).pud) #define pgd_val(x) ((x).pgd) +#define __pgste(x) ((pgste_t) { (x) } ) #define __pte(x) ((pte_t) { (x) } ) #define __pmd(x) ((pmd_t) { (x) } ) +#define __pud(x) ((pud_t) { (x) } ) #define __pgd(x) ((pgd_t) { (x) } ) #define __pgprot(x) ((pgprot_t) { (x) } ) diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h index 739ff9e..f6314af 100644 --- a/arch/s390/include/asm/pgalloc.h +++ b/arch/s390/include/asm/pgalloc.h @@ -65,10 +65,7 @@ static inline unsigned long pgd_entry_type(struct mm_struct *mm) #define pmd_free(mm, x) do { } while (0) #define pgd_populate(mm, pgd, pud) BUG() -#define pgd_populate_kernel(mm, pgd, pud) BUG() - #define pud_populate(mm, pud, pmd) BUG() -#define pud_populate_kernel(mm, pud, pmd) BUG() #else /* __s390x__ */ @@ -102,26 +99,14 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long vmaddr) } #define pmd_free(mm, pmd) crst_table_free(mm, (unsigned long *) pmd) -static inline void pgd_populate_kernel(struct mm_struct *mm, - pgd_t *pgd, pud_t *pud) -{ - pgd_val(*pgd) = _REGION2_ENTRY | __pa(pud); -} - static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud) { - pgd_populate_kernel(mm, pgd, pud); -} - -static inline void pud_populate_kernel(struct mm_struct *mm, - pud_t *pud, pmd_t *pmd) -{ - pud_val(*pud) = _REGION3_ENTRY | __pa(pmd); + pgd_val(*pgd) = _REGION2_ENTRY | __pa(pud); } static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) { - pud_populate_kernel(mm, pud, pmd); + pud_val(*pud) = _REGION3_ENTRY | __pa(pmd); } #endif /* __s390x__ */ @@ -134,18 +119,14 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm) } #define pgd_free(mm, pgd) crst_table_free(mm, (unsigned long *) pgd) -static inline void pmd_populate_kernel(struct mm_struct *mm, - pmd_t *pmd, pte_t *pte) -{ - pmd_val(*pmd) = _SEGMENT_ENTRY + __pa(pte); -} - static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, pgtable_t pte) { - pmd_populate_kernel(mm, pmd, pte); + pmd_val(*pmd) = _SEGMENT_ENTRY + __pa(pte); } +#define pmd_populate_kernel(mm, pmd, pte) pmd_populate(mm, pmd, pte) + #define pmd_pgtable(pmd) \ (pgtable_t)(pmd_val(pmd) & -sizeof(pte_t)*PTRS_PER_PTE) diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 4ca4dd2..c4773a2 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -31,9 +31,8 @@ #ifndef __ASSEMBLY__ #include #include -#include #include -#include +#include extern pgd_t swapper_pg_dir[] __attribute__ ((aligned (4096))); extern void paging_init(void); @@ -243,11 +242,13 @@ extern unsigned long VMALLOC_START; /* Software bits in the page table entry */ #define _PAGE_SWT 0x001 /* SW pte type bit t */ #define _PAGE_SWX 0x002 /* SW pte type bit x */ -#define _PAGE_SPECIAL 0x004 /* SW associated with special page */ +#define _PAGE_SWC 0x004 /* SW pte changed bit (for KVM) */ +#define _PAGE_SWR 0x008 /* SW pte referenced bit (for KVM) */ +#define _PAGE_SPECIAL 0x010 /* SW associated with special page */ #define __HAVE_ARCH_PTE_SPECIAL /* Set of bits not changed in pte_modify */ -#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_SPECIAL) +#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_SPECIAL | _PAGE_SWC | _PAGE_SWR) /* Six different types of pages. */ #define _PAGE_TYPE_EMPTY 0x400 @@ -293,14 +294,17 @@ extern unsigned long VMALLOC_START; */ /* Page status table bits for virtualization */ -#define RCP_PCL_BIT 55 -#define RCP_HR_BIT 54 -#define RCP_HC_BIT 53 -#define RCP_GR_BIT 50 -#define RCP_GC_BIT 49 - -/* User dirty bit for KVM's migration feature */ -#define KVM_UD_BIT 47 +#define RCP_ACC_BITS 0xf000000000000000UL +#define RCP_FP_BIT 0x0800000000000000UL +#define RCP_PCL_BIT 0x0080000000000000UL +#define RCP_HR_BIT 0x0040000000000000UL +#define RCP_HC_BIT 0x0020000000000000UL +#define RCP_GR_BIT 0x0004000000000000UL +#define RCP_GC_BIT 0x0002000000000000UL + +/* User dirty / referenced bit for KVM's migration feature */ +#define KVM_UR_BIT 0x0000800000000000UL +#define KVM_UC_BIT 0x0000400000000000UL #ifndef __s390x__ @@ -407,17 +411,20 @@ extern unsigned long VMALLOC_START; #define __S110 PAGE_RW #define __S111 PAGE_RW -/* - * Certain architectures need to do special things when PTEs - * within a page table are directly modified. Thus, the following - * hook is made available. - */ -static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, - pte_t *ptep, pte_t entry) +static inline int mm_exclusive(struct mm_struct *mm) { - *ptep = entry; + return likely(mm == current->active_mm && + atomic_read(&mm->context.attach_count) <= 1); } +static inline int mm_has_pgste(struct mm_struct *mm) +{ +#ifdef CONFIG_PGSTE + if (unlikely(mm->context.has_pgste)) + return 1; +#endif + return 0; +} /* * pgd/pmd/pte query functions */ @@ -530,53 +537,130 @@ static inline int pte_special(pte_t pte) } #define __HAVE_ARCH_PTE_SAME -#define pte_same(a,b) (pte_val(a) == pte_val(b)) +static inline int pte_same(pte_t a, pte_t b) +{ + return pte_val(a) == pte_val(b); +} -static inline void rcp_lock(pte_t *ptep) +static inline pgste_t pgste_get_lock(pte_t *ptep) { + unsigned long new = 0; #ifdef CONFIG_PGSTE - unsigned long *pgste = (unsigned long *) (ptep + PTRS_PER_PTE); + unsigned long old; + preempt_disable(); - while (test_and_set_bit(RCP_PCL_BIT, pgste)) - ; + asm( + " lg %0,%2\n" + "0: lgr %1,%0\n" + " nihh %0,0xff7f\n" /* clear RCP_PCL_BIT in old */ + " oihh %1,0x0080\n" /* set RCP_PCL_BIT in new */ + " csg %0,%1,%2\n" + " jl 0b\n" + : "=&d" (old), "=&d" (new), "=Q" (ptep[PTRS_PER_PTE]) + : "Q" (ptep[PTRS_PER_PTE]) : "cc"); #endif + return __pgste(new); } -static inline void rcp_unlock(pte_t *ptep) +static inline void pgste_set_unlock(pte_t *ptep, pgste_t pgste) { #ifdef CONFIG_PGSTE - unsigned long *pgste = (unsigned long *) (ptep + PTRS_PER_PTE); - clear_bit(RCP_PCL_BIT, pgste); + asm( + " nihh %1,0xff7f\n" /* clear RCP_PCL_BIT */ + " stg %1,%0\n" + : "=Q" (ptep[PTRS_PER_PTE]) + : "d" (pgste_val(pgste)), "Q" (ptep[PTRS_PER_PTE]) : "cc"); preempt_enable(); #endif } -#include - -static inline void ptep_rcp_copy(pte_t *ptep) +static inline pgste_t pgste_update_all(pte_t *ptep, pgste_t pgste) { #ifdef CONFIG_PGSTE - struct page *page = virt_to_page(pte_val(*ptep)); - unsigned int skey; - unsigned long *pgste = (unsigned long *) (ptep + PTRS_PER_PTE); - - skey = page_get_storage_key(pte_val(*ptep) >> PAGE_SHIFT); - if (skey & _PAGE_CHANGED) { - set_bit_simple(RCP_GC_BIT, pgste); - set_bit_simple(KVM_UD_BIT, pgste); - } - if (skey & _PAGE_REFERENCED) - set_bit_simple(RCP_GR_BIT, pgste); - if (test_and_clear_bit_simple(RCP_HC_BIT, pgste)) { - SetPageDirty(page); - set_bit_simple(KVM_UD_BIT, pgste); + unsigned long pfn, bits; + unsigned char skey; + + pfn = pte_val(*ptep) >> PAGE_SHIFT; + skey = page_get_storage_key(pfn); + bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED); + /* Clear page changed & referenced bit in the storage key */ + if (bits) { + skey ^= bits; + page_set_storage_key(pfn, skey, 1); } - if (test_and_clear_bit_simple(RCP_HR_BIT, pgste)) - SetPageReferenced(page); + /* Transfer page changed & referenced bit to guest bits in pgste */ + pgste_val(pgste) |= bits << 48; /* RCP_GR_BIT & RCP_GC_BIT */ + /* Get host changed & referenced bits from pgste */ + bits |= (pgste_val(pgste) & (RCP_HR_BIT | RCP_HC_BIT)) >> 52; + /* Clear host bits in pgste. */ + pgste_val(pgste) &= ~(RCP_HR_BIT | RCP_HC_BIT); + pgste_val(pgste) &= ~(RCP_ACC_BITS | RCP_FP_BIT); + /* Copy page access key and fetch protection bit to pgste */ + pgste_val(pgste) |= + (unsigned long) (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56; + /* Transfer changed and referenced to kvm user bits */ + pgste_val(pgste) |= bits << 45; /* KVM_UR_BIT & KVM_UC_BIT */ + /* Transfer changed & referenced to pte sofware bits */ + pte_val(*ptep) |= bits << 1; /* _PAGE_SWR & _PAGE_SWC */ +#endif + return pgste; + +} + +static inline pgste_t pgste_update_young(pte_t *ptep, pgste_t pgste) +{ +#ifdef CONFIG_PGSTE + int young; + + young = page_reset_referenced(pte_val(*ptep) & PAGE_MASK); + /* Transfer page referenced bit to pte software bit (host view) */ + if (young || (pgste_val(pgste) & RCP_HR_BIT)) + pte_val(*ptep) |= _PAGE_SWR; + /* Clear host referenced bit in pgste. */ + pgste_val(pgste) &= ~RCP_HR_BIT; + /* Transfer page referenced bit to guest bit in pgste */ + pgste_val(pgste) |= (unsigned long) young << 50; /* set RCP_GR_BIT */ +#endif + return pgste; + +} + +static inline void pgste_set_pte(pte_t *ptep, pgste_t pgste) +{ +#ifdef CONFIG_PGSTE + unsigned long pfn; + unsigned long okey, nkey; + + pfn = pte_val(*ptep) >> PAGE_SHIFT; + okey = nkey = page_get_storage_key(pfn); + nkey &= ~(_PAGE_ACC_BITS | _PAGE_FP_BIT); + /* Set page access key and fetch protection bit from pgste */ + nkey |= (pgste_val(pgste) & (RCP_ACC_BITS | RCP_FP_BIT)) >> 56; + if (okey != nkey) + page_set_storage_key(pfn, nkey, 1); #endif } /* + * Certain architectures need to do special things when PTEs + * within a page table are directly modified. Thus, the following + * hook is made available. + */ +static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t entry) +{ + pgste_t pgste; + + if (mm_has_pgste(mm)) { + pgste = pgste_get_lock(ptep); + pgste_set_pte(ptep, pgste); + *ptep = entry; + pgste_set_unlock(ptep, pgste); + } else + *ptep = entry; +} + +/* * query functions pte_write/pte_dirty/pte_young only work if * pte_present() is true. Undefined behaviour if not.. */ @@ -587,19 +671,19 @@ static inline int pte_write(pte_t pte) static inline int pte_dirty(pte_t pte) { - /* A pte is neither clean nor dirty on s/390. The dirty bit - * is in the storage key. See page_test_and_clear_dirty for - * details. - */ +#ifdef CONFIG_PGSTE + if (pte_val(pte) & _PAGE_SWC) + return 1; +#endif return 0; } static inline int pte_young(pte_t pte) { - /* A pte is neither young nor old on s/390. The young bit - * is in the storage key. See page_test_and_clear_young for - * details. - */ +#ifdef CONFIG_PGSTE + if (pte_val(pte) & _PAGE_SWR) + return 1; +#endif return 0; } @@ -607,46 +691,27 @@ static inline int pte_young(pte_t pte) * pgd/pmd/pte modification functions */ -#ifndef __s390x__ - -#define pgd_clear(pgd) do { } while (0) -#define pud_clear(pud) do { } while (0) - -#else /* __s390x__ */ - -static inline void pgd_clear_kernel(pgd_t * pgd) +static inline void pgd_clear(pgd_t *pgd) { +#ifdef __s390x__ if ((pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2) pgd_val(*pgd) = _REGION2_ENTRY_EMPTY; +#endif } -static inline void pgd_clear(pgd_t * pgd) -{ - pgd_clear_kernel(pgd); -} - -static inline void pud_clear_kernel(pud_t *pud) +static inline void pud_clear(pud_t *pud) { +#ifdef __s390x__ if ((pud_val(*pud) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) pud_val(*pud) = _REGION3_ENTRY_EMPTY; +#endif } -static inline void pud_clear(pud_t *pud) -{ - pud_clear_kernel(pud); -} -#endif /* __s390x__ */ - -static inline void pmd_clear_kernel(pmd_t * pmdp) +static inline void pmd_clear(pmd_t *pmdp) { pmd_val(*pmdp) = _SEGMENT_ENTRY_EMPTY; } -static inline void pmd_clear(pmd_t *pmd) -{ - pmd_clear_kernel(pmd); -} - static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { pte_val(*ptep) = _PAGE_TYPE_EMPTY; @@ -679,35 +744,27 @@ static inline pte_t pte_mkwrite(pte_t pte) static inline pte_t pte_mkclean(pte_t pte) { - /* The only user of pte_mkclean is the fork() code. - We must *not* clear the *physical* page dirty bit - just because fork() wants to clear the dirty bit in - *one* of the page's mappings. So we just do nothing. */ +#ifdef CONFIG_PGSTE + pte_val(pte) &= ~_PAGE_SWC; +#endif return pte; } static inline pte_t pte_mkdirty(pte_t pte) { - /* We do not explicitly set the dirty bit because the - * sske instruction is slow. It is faster to let the - * next instruction set the dirty bit. - */ return pte; } static inline pte_t pte_mkold(pte_t pte) { - /* S/390 doesn't keep its dirty/referenced bit in the pte. - * There is no point in clearing the real referenced bit. - */ +#ifdef CONFIG_PGSTE + pte_val(pte) &= ~_PAGE_SWR; +#endif return pte; } static inline pte_t pte_mkyoung(pte_t pte) { - /* S/390 doesn't keep its dirty/referenced bit in the pte. - * There is no point in setting the real referenced bit. - */ return pte; } @@ -745,64 +802,60 @@ static inline pte_t pte_mkhuge(pte_t pte) } #endif -#ifdef CONFIG_PGSTE /* - * Get (and clear) the user dirty bit for a PTE. + * Get (and clear) the user dirty bit for a pte. */ -static inline int kvm_s390_test_and_clear_page_dirty(struct mm_struct *mm, - pte_t *ptep) +static inline int ptep_test_and_clear_user_dirty(struct mm_struct *mm, + pte_t *ptep) { - int dirty; - unsigned long *pgste; - unsigned long pfn; - struct page *page; - unsigned int skey; - - if (!mm->context.has_pgste) - return -EINVAL; - rcp_lock(ptep); - pgste = (unsigned long *) (ptep + PTRS_PER_PTE); - pfn = pte_val(*ptep) >> PAGE_SHIFT; - page = pfn_to_page(pfn); - skey = page_get_storage_key(pfn); - if (skey & _PAGE_CHANGED) { - set_bit_simple(RCP_GC_BIT, pgste); - set_bit_simple(KVM_UD_BIT, pgste); - } - if (test_and_clear_bit_simple(RCP_HC_BIT, pgste)) { - SetPageDirty(page); - set_bit_simple(KVM_UD_BIT, pgste); + pgste_t pgste; + int dirty = 0; + + if (mm_has_pgste(mm)) { + pgste = pgste_get_lock(ptep); + pgste = pgste_update_all(ptep, pgste); + dirty = !!(pgste_val(pgste) & KVM_UC_BIT); + pgste_val(pgste) &= ~KVM_UC_BIT; + pgste_set_unlock(ptep, pgste); + return dirty; } - dirty = test_and_clear_bit_simple(KVM_UD_BIT, pgste); - if (skey & _PAGE_CHANGED) - page_set_storage_key(pfn, skey & ~_PAGE_CHANGED, 1); - rcp_unlock(ptep); return dirty; } -#endif + +/* + * Get (and clear) the user referenced bit for a pte. + */ +static inline int ptep_test_and_clear_user_young(struct mm_struct *mm, + pte_t *ptep) +{ + pgste_t pgste; + int young = 0; + + if (mm_has_pgste(mm)) { + pgste = pgste_get_lock(ptep); + pgste = pgste_update_young(ptep, pgste); + young = !!(pgste_val(pgste) & KVM_UR_BIT); + pgste_val(pgste) &= ~KVM_UR_BIT; + pgste_set_unlock(ptep, pgste); + } + return young; +} #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) { -#ifdef CONFIG_PGSTE - unsigned long pfn; - int young; - unsigned long *pgste; + pgste_t pgste; + pte_t pte; - if (!vma->vm_mm->context.has_pgste) - return 0; - pfn = pte_val(*ptep) >> PAGE_SHIFT; - pgste = (unsigned long *) (ptep + PTRS_PER_PTE); - - young = ((page_get_storage_key(pfn) & _PAGE_REFERENCED) != 0); - rcp_lock(ptep); - if (young) - set_bit_simple(RCP_GR_BIT, pgste); - young |= test_and_clear_bit_simple(RCP_HR_BIT, pgste); - rcp_unlock(ptep); - return young; -#endif + if (mm_has_pgste(vma->vm_mm)) { + pgste = pgste_get_lock(ptep); + pgste = pgste_update_young(ptep, pgste); + pte = *ptep; + *ptep = pte_mkold(pte); + pgste_set_unlock(ptep, pgste); + return pte_young(pte); + } return 0; } @@ -814,10 +867,7 @@ static inline int ptep_clear_flush_young(struct vm_area_struct *vma, * On s390 reference bits are in storage key and never in TLB * With virtualization we handle the reference bit, without we * we can simply return */ -#ifdef CONFIG_PGSTE return ptep_test_and_clear_young(vma, address, ptep); -#endif - return 0; } static inline void __ptep_ipte(unsigned long address, pte_t *ptep) @@ -837,21 +887,6 @@ static inline void __ptep_ipte(unsigned long address, pte_t *ptep) } } -static inline void ptep_invalidate(struct mm_struct *mm, - unsigned long address, pte_t *ptep) -{ - if (mm->context.has_pgste) { - rcp_lock(ptep); - __ptep_ipte(address, ptep); - ptep_rcp_copy(ptep); - pte_val(*ptep) = _PAGE_TYPE_EMPTY; - rcp_unlock(ptep); - return; - } - __ptep_ipte(address, ptep); - pte_val(*ptep) = _PAGE_TYPE_EMPTY; -} - /* * This is hard to understand. ptep_get_and_clear and ptep_clear_flush * both clear the TLB for the unmapped pte. The reason is that @@ -866,24 +901,72 @@ static inline void ptep_invalidate(struct mm_struct *mm, * is a nop. */ #define __HAVE_ARCH_PTEP_GET_AND_CLEAR -#define ptep_get_and_clear(__mm, __address, __ptep) \ -({ \ - pte_t __pte = *(__ptep); \ - (__mm)->context.flush_mm = 1; \ - if (atomic_read(&(__mm)->context.attach_count) > 1 || \ - (__mm) != current->active_mm) \ - ptep_invalidate(__mm, __address, __ptep); \ - else \ - pte_clear((__mm), (__address), (__ptep)); \ - __pte; \ -}) +static inline pte_t ptep_get_and_clear(struct mm_struct *mm, + unsigned long address, pte_t *ptep) +{ + pgste_t pgste; + pte_t pte; + + mm->context.flush_mm = 1; + if (mm_has_pgste(mm)) + pgste = pgste_get_lock(ptep); + + pte = *ptep; + if (!mm_exclusive(mm)) + __ptep_ipte(address, ptep); + pte_val(*ptep) = _PAGE_TYPE_EMPTY; + + if (mm_has_pgste(mm)) { + pgste = pgste_update_all(&pte, pgste); + pgste_set_unlock(ptep, pgste); + } + return pte; +} + +#define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION +static inline pte_t ptep_modify_prot_start(struct mm_struct *mm, + unsigned long address, + pte_t *ptep) +{ + pte_t pte; + + mm->context.flush_mm = 1; + if (mm_has_pgste(mm)) + pgste_get_lock(ptep); + + pte = *ptep; + if (!mm_exclusive(mm)) + __ptep_ipte(address, ptep); + return pte; +} + +static inline void ptep_modify_prot_commit(struct mm_struct *mm, + unsigned long address, + pte_t *ptep, pte_t pte) +{ + *ptep = pte; + if (mm_has_pgste(mm)) + pgste_set_unlock(ptep, *(pgste_t *)(ptep + PTRS_PER_PTE)); +} #define __HAVE_ARCH_PTEP_CLEAR_FLUSH static inline pte_t ptep_clear_flush(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) { - pte_t pte = *ptep; - ptep_invalidate(vma->vm_mm, address, ptep); + pgste_t pgste; + pte_t pte; + + if (mm_has_pgste(vma->vm_mm)) + pgste = pgste_get_lock(ptep); + + pte = *ptep; + __ptep_ipte(address, ptep); + pte_val(*ptep) = _PAGE_TYPE_EMPTY; + + if (mm_has_pgste(vma->vm_mm)) { + pgste = pgste_update_all(&pte, pgste); + pgste_set_unlock(ptep, pgste); + } return pte; } @@ -896,41 +979,68 @@ static inline pte_t ptep_clear_flush(struct vm_area_struct *vma, */ #define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, - unsigned long addr, + unsigned long address, pte_t *ptep, int full) { - pte_t pte = *ptep; + pgste_t pgste; + pte_t pte; - if (full) - pte_clear(mm, addr, ptep); - else - ptep_invalidate(mm, addr, ptep); + if (mm_has_pgste(mm)) + pgste = pgste_get_lock(ptep); + + pte = *ptep; + if (!full) + __ptep_ipte(address, ptep); + pte_val(*ptep) = _PAGE_TYPE_EMPTY; + + if (mm_has_pgste(mm)) { + pgste = pgste_update_all(&pte, pgste); + pgste_set_unlock(ptep, pgste); + } return pte; } #define __HAVE_ARCH_PTEP_SET_WRPROTECT -#define ptep_set_wrprotect(__mm, __addr, __ptep) \ -({ \ - pte_t __pte = *(__ptep); \ - if (pte_write(__pte)) { \ - (__mm)->context.flush_mm = 1; \ - if (atomic_read(&(__mm)->context.attach_count) > 1 || \ - (__mm) != current->active_mm) \ - ptep_invalidate(__mm, __addr, __ptep); \ - set_pte_at(__mm, __addr, __ptep, pte_wrprotect(__pte)); \ - } \ -}) +static inline pte_t ptep_set_wrprotect(struct mm_struct *mm, + unsigned long address, pte_t *ptep) +{ + pgste_t pgste; + pte_t pte = *ptep; + + if (pte_write(pte)) { + mm->context.flush_mm = 1; + if (mm_has_pgste(mm)) + pgste = pgste_get_lock(ptep); + + if (!mm_exclusive(mm)) + __ptep_ipte(address, ptep); + *ptep = pte_wrprotect(pte); + + if (mm_has_pgste(mm)) + pgste_set_unlock(ptep, pgste); + } + return pte; +} #define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS -#define ptep_set_access_flags(__vma, __addr, __ptep, __entry, __dirty) \ -({ \ - int __changed = !pte_same(*(__ptep), __entry); \ - if (__changed) { \ - ptep_invalidate((__vma)->vm_mm, __addr, __ptep); \ - set_pte_at((__vma)->vm_mm, __addr, __ptep, __entry); \ - } \ - __changed; \ -}) +static inline int ptep_set_access_flags(struct vm_area_struct *vma, + unsigned long address, pte_t *ptep, + pte_t entry, int dirty) +{ + pgste_t pgste; + + if (pte_same(*ptep, entry)) + return 0; + if (mm_has_pgste(vma->vm_mm)) + pgste = pgste_get_lock(ptep); + + __ptep_ipte(address, ptep); + *ptep = entry; + + if (mm_has_pgste(vma->vm_mm)) + pgste_set_unlock(ptep, pgste); + return 1; +} /* * Conversion functions: convert a page and protection to a page entry, diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index bb40933..dfefc21 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -175,7 +175,8 @@ void kernel_map_pages(struct page *page, int numpages, int enable) pmd = pmd_offset(pud, address); pte = pte_offset_kernel(pmd, address); if (!enable) { - ptep_invalidate(&init_mm, address, pte); + __ptep_ipte(address, pte); + pte_val(*pte) = _PAGE_TYPE_EMPTY; continue; } *pte = mk_pte_phys(address, __pgprot(_PAGE_TYPE_RW)); diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c index f05edcc..d013ed3 100644 --- a/arch/s390/mm/pageattr.c +++ b/arch/s390/mm/pageattr.c @@ -28,7 +28,7 @@ static void change_page_attr(unsigned long addr, int numpages, pte = *ptep; pte = set(pte); - ptep_invalidate(&init_mm, addr, ptep); + __ptep_ipte(addr, ptep); *ptep = pte; addr += PAGE_SIZE; } diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index 34c43f2..8c1970d 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -95,7 +95,7 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro) pu_dir = vmem_pud_alloc(); if (!pu_dir) goto out; - pgd_populate_kernel(&init_mm, pg_dir, pu_dir); + pgd_populate(&init_mm, pg_dir, pu_dir); } pu_dir = pud_offset(pg_dir, address); @@ -103,7 +103,7 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro) pm_dir = vmem_pmd_alloc(); if (!pm_dir) goto out; - pud_populate_kernel(&init_mm, pu_dir, pm_dir); + pud_populate(&init_mm, pu_dir, pm_dir); } pte = mk_pte_phys(address, __pgprot(ro ? _PAGE_RO : 0)); @@ -123,7 +123,7 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro) pt_dir = vmem_pte_alloc(); if (!pt_dir) goto out; - pmd_populate_kernel(&init_mm, pm_dir, pt_dir); + pmd_populate(&init_mm, pm_dir, pt_dir); } pt_dir = pte_offset_kernel(pm_dir, address); @@ -159,7 +159,7 @@ static void vmem_remove_range(unsigned long start, unsigned long size) continue; if (pmd_huge(*pm_dir)) { - pmd_clear_kernel(pm_dir); + pmd_clear(pm_dir); address += HPAGE_SIZE - PAGE_SIZE; continue; } @@ -192,7 +192,7 @@ int __meminit vmemmap_populate(struct page *start, unsigned long nr, int node) pu_dir = vmem_pud_alloc(); if (!pu_dir) goto out; - pgd_populate_kernel(&init_mm, pg_dir, pu_dir); + pgd_populate(&init_mm, pg_dir, pu_dir); } pu_dir = pud_offset(pg_dir, address); @@ -200,7 +200,7 @@ int __meminit vmemmap_populate(struct page *start, unsigned long nr, int node) pm_dir = vmem_pmd_alloc(); if (!pm_dir) goto out; - pud_populate_kernel(&init_mm, pu_dir, pm_dir); + pud_populate(&init_mm, pu_dir, pm_dir); } pm_dir = pmd_offset(pu_dir, address); @@ -208,7 +208,7 @@ int __meminit vmemmap_populate(struct page *start, unsigned long nr, int node) pt_dir = vmem_pte_alloc(); if (!pt_dir) goto out; - pmd_populate_kernel(&init_mm, pm_dir, pt_dir); + pmd_populate(&init_mm, pm_dir, pt_dir); } pt_dir = pte_offset_kernel(pm_dir, address); -- cgit v0.10.2 From fca894edd7566f5c548598c8fad7f329278c23b4 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Mon, 23 May 2011 10:24:41 +0200 Subject: [S390] chsc: process channel-path-availability information Update affected channel path descriptors when receiving channel path availability information. Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky diff --git a/drivers/s390/cio/chsc.c b/drivers/s390/cio/chsc.c index 0689fcf..75c3f1f 100644 --- a/drivers/s390/cio/chsc.c +++ b/drivers/s390/cio/chsc.c @@ -326,6 +326,36 @@ static void chsc_process_sei_res_acc(struct chsc_sei_area *sei_area) s390_process_res_acc(&link); } +static void chsc_process_sei_chp_avail(struct chsc_sei_area *sei_area) +{ + struct channel_path *chp; + struct chp_id chpid; + u8 *data; + int num; + + CIO_CRW_EVENT(4, "chsc: channel path availability information\n"); + if (sei_area->rs != 0) + return; + data = sei_area->ccdf; + chp_id_init(&chpid); + for (num = 0; num <= __MAX_CHPID; num++) { + if (!chp_test_bit(data, num)) + continue; + chpid.id = num; + + CIO_CRW_EVENT(4, "Update information for channel path " + "%x.%02x\n", chpid.cssid, chpid.id); + chp = chpid_to_chp(chpid); + if (!chp) { + chp_new(chpid); + continue; + } + mutex_lock(&chp->lock); + chsc_determine_base_channel_path_desc(chpid, &chp->desc); + mutex_unlock(&chp->lock); + } +} + struct chp_config_data { u8 map[32]; u8 op; @@ -376,9 +406,12 @@ static void chsc_process_sei(struct chsc_sei_area *sei_area) case 1: /* link incident*/ chsc_process_sei_link_incident(sei_area); break; - case 2: /* i/o resource accessibiliy */ + case 2: /* i/o resource accessibility */ chsc_process_sei_res_acc(sei_area); break; + case 7: /* channel-path-availability information */ + chsc_process_sei_chp_avail(sei_area); + break; case 8: /* channel-path-configuration notification */ chsc_process_sei_chp_config(sei_area); break; -- cgit v0.10.2 From 30c2df51173ea4e4755ad52be7f2914f01e32404 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Mon, 23 May 2011 10:24:42 +0200 Subject: [S390] sclp: event buffer dissection Move gds vector/subvector find functions to the sclp header file. Simplify event buffer dissection in sclp tty code. Signed-off-by: Martin Schwidefsky diff --git a/drivers/s390/char/sclp.h b/drivers/s390/char/sclp.h index 6bb5a6b..bc23b05 100644 --- a/drivers/s390/char/sclp.h +++ b/drivers/s390/char/sclp.h @@ -186,4 +186,26 @@ sclp_ascebc_str(unsigned char *str, int nr) (MACHINE_IS_VM) ? ASCEBC(str, nr) : ASCEBC_500(str, nr); } +static inline struct gds_vector * +sclp_find_gds_vector(void *start, void *end, u16 id) +{ + struct gds_vector *v; + + for (v = start; (void *) v < end; v = (void *) v + v->length) + if (v->gds_id == id) + return v; + return NULL; +} + +static inline struct gds_subvector * +sclp_find_gds_subvector(void *start, void *end, u8 key) +{ + struct gds_subvector *sv; + + for (sv = start; (void *) sv < end; sv = (void *) sv + sv->length) + if (sv->key == key) + return sv; + return NULL; +} + #endif /* __SCLP_H__ */ diff --git a/drivers/s390/char/sclp_tty.c b/drivers/s390/char/sclp_tty.c index 8258d59..a879c13 100644 --- a/drivers/s390/char/sclp_tty.c +++ b/drivers/s390/char/sclp_tty.c @@ -408,118 +408,72 @@ static int sclp_switch_cases(unsigned char *buf, int count) return op - buf; } -static void -sclp_get_input(unsigned char *start, unsigned char *end) +static void sclp_get_input(struct gds_subvector *sv) { + unsigned char *str; int count; - count = end - start; + str = (unsigned char *) (sv + 1); + count = sv->length - sizeof(*sv); if (sclp_tty_tolower) - EBC_TOLOWER(start, count); - count = sclp_switch_cases(start, count); + EBC_TOLOWER(str, count); + count = sclp_switch_cases(str, count); /* convert EBCDIC to ASCII (modify original input in SCCB) */ - sclp_ebcasc_str(start, count); + sclp_ebcasc_str(str, count); /* transfer input to high level driver */ - sclp_tty_input(start, count); -} - -static inline struct gds_vector * -find_gds_vector(struct gds_vector *start, struct gds_vector *end, u16 id) -{ - struct gds_vector *vec; - - for (vec = start; vec < end; vec = (void *) vec + vec->length) - if (vec->gds_id == id) - return vec; - return NULL; + sclp_tty_input(str, count); } -static inline struct gds_subvector * -find_gds_subvector(struct gds_subvector *start, - struct gds_subvector *end, u8 key) +static inline void sclp_eval_selfdeftextmsg(struct gds_subvector *sv) { - struct gds_subvector *subvec; + void *end; - for (subvec = start; subvec < end; - subvec = (void *) subvec + subvec->length) - if (subvec->key == key) - return subvec; - return NULL; + end = (void *) sv + sv->length; + for (sv = sv + 1; (void *) sv < end; sv = (void *) sv + sv->length) + if (sv->key == 0x30) + sclp_get_input(sv); } -static inline void -sclp_eval_selfdeftextmsg(struct gds_subvector *start, - struct gds_subvector *end) +static inline void sclp_eval_textcmd(struct gds_vector *v) { - struct gds_subvector *subvec; - - subvec = start; - while (subvec < end) { - subvec = find_gds_subvector(subvec, end, 0x30); - if (!subvec) - break; - sclp_get_input((unsigned char *)(subvec + 1), - (unsigned char *) subvec + subvec->length); - subvec = (void *) subvec + subvec->length; - } -} + struct gds_subvector *sv; + void *end; -static inline void -sclp_eval_textcmd(struct gds_subvector *start, - struct gds_subvector *end) -{ - struct gds_subvector *subvec; + end = (void *) v + v->length; + for (sv = (struct gds_subvector *) (v + 1); + (void *) sv < end; sv = (void *) sv + sv->length) + if (sv->key == GDS_KEY_SELFDEFTEXTMSG) + sclp_eval_selfdeftextmsg(sv); - subvec = start; - while (subvec < end) { - subvec = find_gds_subvector(subvec, end, - GDS_KEY_SELFDEFTEXTMSG); - if (!subvec) - break; - sclp_eval_selfdeftextmsg((struct gds_subvector *)(subvec + 1), - (void *)subvec + subvec->length); - subvec = (void *) subvec + subvec->length; - } } -static inline void -sclp_eval_cpmsu(struct gds_vector *start, struct gds_vector *end) +static inline void sclp_eval_cpmsu(struct gds_vector *v) { - struct gds_vector *vec; + void *end; - vec = start; - while (vec < end) { - vec = find_gds_vector(vec, end, GDS_ID_TEXTCMD); - if (!vec) - break; - sclp_eval_textcmd((struct gds_subvector *)(vec + 1), - (void *) vec + vec->length); - vec = (void *) vec + vec->length; - } + end = (void *) v + v->length; + for (v = v + 1; (void *) v < end; v = (void *) v + v->length) + if (v->gds_id == GDS_ID_TEXTCMD) + sclp_eval_textcmd(v); } -static inline void -sclp_eval_mdsmu(struct gds_vector *start, void *end) +static inline void sclp_eval_mdsmu(struct gds_vector *v) { - struct gds_vector *vec; - - vec = find_gds_vector(start, end, GDS_ID_CPMSU); - if (vec) - sclp_eval_cpmsu(vec + 1, (void *) vec + vec->length); + v = sclp_find_gds_vector(v + 1, (void *) v + v->length, GDS_ID_CPMSU); + if (v) + sclp_eval_cpmsu(v); } -static void -sclp_tty_receiver(struct evbuf_header *evbuf) +static void sclp_tty_receiver(struct evbuf_header *evbuf) { - struct gds_vector *start, *end, *vec; + struct gds_vector *v; - start = (struct gds_vector *)(evbuf + 1); - end = (void *) evbuf + evbuf->length; - vec = find_gds_vector(start, end, GDS_ID_MDSMU); - if (vec) - sclp_eval_mdsmu(vec + 1, (void *) vec + vec->length); + v = sclp_find_gds_vector(evbuf + 1, (void *) evbuf + evbuf->length, + GDS_ID_MDSMU); + if (v) + sclp_eval_mdsmu(v); } static void -- cgit v0.10.2 From 7eb9d5bec552eff896ebf079386dc47e9dc2fc89 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Mon, 23 May 2011 10:24:43 +0200 Subject: [S390] get CPC image name Provide sysfs attributes that contain the CPC name and the HMC network name of the machine the operating system is running on. This information is retrieved with the operation communication parameters (OCF) sclp interface. Signed-off-by: Martin Schwidefsky diff --git a/drivers/s390/char/Makefile b/drivers/s390/char/Makefile index af01b5a..f3c3252 100644 --- a/drivers/s390/char/Makefile +++ b/drivers/s390/char/Makefile @@ -3,7 +3,7 @@ # obj-y += ctrlchar.o keyboard.o defkeymap.o sclp.o sclp_rw.o sclp_quiesce.o \ - sclp_cmd.o sclp_config.o sclp_cpi_sys.o + sclp_cmd.o sclp_config.o sclp_cpi_sys.o sclp_ocf.o obj-$(CONFIG_TN3270) += raw3270.o obj-$(CONFIG_TN3270_CONSOLE) += con3270.o diff --git a/drivers/s390/char/sclp.h b/drivers/s390/char/sclp.h index bc23b05..49a1bb5 100644 --- a/drivers/s390/char/sclp.h +++ b/drivers/s390/char/sclp.h @@ -28,6 +28,7 @@ #define EVTYP_CONFMGMDATA 0x04 #define EVTYP_SDIAS 0x1C #define EVTYP_ASYNC 0x0A +#define EVTYP_OCF 0x1E #define EVTYP_OPCMD_MASK 0x80000000 #define EVTYP_MSG_MASK 0x40000000 @@ -40,6 +41,7 @@ #define EVTYP_CONFMGMDATA_MASK 0x10000000 #define EVTYP_SDIAS_MASK 0x00000010 #define EVTYP_ASYNC_MASK 0x00400000 +#define EVTYP_OCF_MASK 0x00000004 #define GNRLMSGFLGS_DOM 0x8000 #define GNRLMSGFLGS_SNDALRM 0x4000 diff --git a/drivers/s390/char/sclp_ocf.c b/drivers/s390/char/sclp_ocf.c new file mode 100644 index 0000000..ab294d5 --- /dev/null +++ b/drivers/s390/char/sclp_ocf.c @@ -0,0 +1,145 @@ +/* + * drivers/s390/char/sclp_ocf.c + * SCLP OCF communication parameters sysfs interface + * + * Copyright IBM Corp. 2011 + * Author(s): Martin Schwidefsky + */ + +#define KMSG_COMPONENT "sclp_ocf" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "sclp.h" + +#define OCF_LENGTH_HMC_NETWORK 8UL +#define OCF_LENGTH_CPC_NAME 8UL + +static char hmc_network[OCF_LENGTH_HMC_NETWORK + 1]; +static char cpc_name[OCF_LENGTH_CPC_NAME + 1]; + +static DEFINE_SPINLOCK(sclp_ocf_lock); +static struct work_struct sclp_ocf_change_work; + +static struct kset *ocf_kset; + +static void sclp_ocf_change_notify(struct work_struct *work) +{ + kobject_uevent(&ocf_kset->kobj, KOBJ_CHANGE); +} + +/* Handler for OCF event. Look for the CPC image name. */ +static void sclp_ocf_handler(struct evbuf_header *evbuf) +{ + struct gds_vector *v; + struct gds_subvector *sv, *netid, *cpc; + size_t size; + + /* Find the 0x9f00 block. */ + v = sclp_find_gds_vector(evbuf + 1, (void *) evbuf + evbuf->length, + 0x9f00); + if (!v) + return; + /* Find the 0x9f22 block inside the 0x9f00 block. */ + v = sclp_find_gds_vector(v + 1, (void *) v + v->length, 0x9f22); + if (!v) + return; + /* Find the 0x81 block inside the 0x9f22 block. */ + sv = sclp_find_gds_subvector(v + 1, (void *) v + v->length, 0x81); + if (!sv) + return; + /* Find the 0x01 block inside the 0x81 block. */ + netid = sclp_find_gds_subvector(sv + 1, (void *) sv + sv->length, 1); + /* Find the 0x02 block inside the 0x81 block. */ + cpc = sclp_find_gds_subvector(sv + 1, (void *) sv + sv->length, 2); + /* Copy network name and cpc name. */ + spin_lock(&sclp_ocf_lock); + if (netid) { + size = min(OCF_LENGTH_HMC_NETWORK, (size_t) netid->length); + memcpy(hmc_network, netid + 1, size); + EBCASC(hmc_network, size); + hmc_network[size] = 0; + } + if (cpc) { + size = min(OCF_LENGTH_CPC_NAME, (size_t) cpc->length); + memcpy(cpc_name, cpc + 1, size); + EBCASC(cpc_name, size); + cpc_name[size] = 0; + } + spin_unlock(&sclp_ocf_lock); + schedule_work(&sclp_ocf_change_work); +} + +static struct sclp_register sclp_ocf_event = { + .receive_mask = EVTYP_OCF_MASK, + .receiver_fn = sclp_ocf_handler, +}; + +static ssize_t cpc_name_show(struct kobject *kobj, + struct kobj_attribute *attr, char *page) +{ + int rc; + + spin_lock_irq(&sclp_ocf_lock); + rc = snprintf(page, PAGE_SIZE, "%s\n", cpc_name); + spin_unlock_irq(&sclp_ocf_lock); + return rc; +} + +static struct kobj_attribute cpc_name_attr = + __ATTR(cpc_name, 0444, cpc_name_show, NULL); + +static ssize_t hmc_network_show(struct kobject *kobj, + struct kobj_attribute *attr, char *page) +{ + int rc; + + spin_lock_irq(&sclp_ocf_lock); + rc = snprintf(page, PAGE_SIZE, "%s\n", hmc_network); + spin_unlock_irq(&sclp_ocf_lock); + return rc; +} + +static struct kobj_attribute hmc_network_attr = + __ATTR(hmc_network, 0444, hmc_network_show, NULL); + +static struct attribute *ocf_attrs[] = { + &cpc_name_attr.attr, + &hmc_network_attr.attr, + NULL, +}; + +static struct attribute_group ocf_attr_group = { + .attrs = ocf_attrs, +}; + +static int __init ocf_init(void) +{ + int rc; + + INIT_WORK(&sclp_ocf_change_work, sclp_ocf_change_notify); + ocf_kset = kset_create_and_add("ocf", NULL, firmware_kobj); + if (!ocf_kset) + return -ENOMEM; + + rc = sysfs_create_group(&ocf_kset->kobj, &ocf_attr_group); + if (rc) { + kset_unregister(ocf_kset); + return rc; + } + + return sclp_register(&sclp_ocf_event); +} + +device_initcall(ocf_init); -- cgit v0.10.2 From 3af6fb687b06393c00390d8d779c5d97ced00cde Mon Sep 17 00:00:00 2001 From: Jan Glauber Date: Mon, 23 May 2011 10:24:44 +0200 Subject: [S390] Remove unused includes in process.c Remove unsused includes from arch/s390/kernel/process.c. Signed-off-by: Jan Glauber Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index a895e69..541a750 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -9,41 +9,26 @@ #include #include -#include #include #include #include -#include #include -#include #include -#include -#include -#include -#include #include -#include -#include -#include -#include -#include #include -#include -#include #include #include #include #include #include -#include -#include -#include +#include #include #include #include #include #include #include +#include #include #include "entry.h" -- cgit v0.10.2 From 9529cdc51f36ca0c87ce0246627c5951c40cbc01 Mon Sep 17 00:00:00 2001 From: Jan Glauber Date: Mon, 23 May 2011 10:24:45 +0200 Subject: [S390] s390,oprofile: fix alert counter increment The counter for requested interrupts should be incremented if the program-request-alert bit is set and not the invalid-address-entry bit. Signed-off-by: Jan Glauber Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/oprofile/hwsampler.c b/arch/s390/oprofile/hwsampler.c index 33cbd37..90c5a87 100644 --- a/arch/s390/oprofile/hwsampler.c +++ b/arch/s390/oprofile/hwsampler.c @@ -764,7 +764,7 @@ static int worker_check_error(unsigned int cpu, int ext_params) if (!sdbt || !*sdbt) return -EINVAL; - if (ext_params & EI_IEA) + if (ext_params & EI_PRA) cb->req_alert++; if (ext_params & EI_LSDA) -- cgit v0.10.2 From 43a679d6c9f59d7aa20443d43d8041ea9d9aa363 Mon Sep 17 00:00:00 2001 From: Jan Glauber Date: Mon, 23 May 2011 10:24:46 +0200 Subject: [S390] s390,oprofile: fix compile error for !CONFIG_SMP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use ctl_set_bit instead of the smp_ctl_set_bit (likewise for clear bit) to prevent the following build error for !CONFIG_SMP: CC arch/s390/oprofile/hwsampler.o arch/s390/oprofile/hwsampler.c: In function ‘hwsampler_deallocate’: arch/s390/oprofile/hwsampler.c:1012: error: implicit declaration of function ‘smp_ctl_clear_bit’ arch/s390/oprofile/hwsampler.c: In function ‘hwsampler_start_all’: arch/s390/oprofile/hwsampler.c:1201: error: implicit declaration of function ‘smp_ctl_set_bit’ CC kernel/seccomp.o make[1]: *** [arch/s390/oprofile/hwsampler.o] Error 1 make: *** [arch/s390/oprofile] Error 2 Signed-off-by: Jan Glauber Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/oprofile/hwsampler.c b/arch/s390/oprofile/hwsampler.c index 90c5a87..8e686bf 100644 --- a/arch/s390/oprofile/hwsampler.c +++ b/arch/s390/oprofile/hwsampler.c @@ -1009,7 +1009,7 @@ int hwsampler_deallocate() if (hws_state != HWS_STOPPED) goto deallocate_exit; - smp_ctl_clear_bit(0, 5); /* set bit 58 CR0 off */ + ctl_clear_bit(0, 5); /* set bit 58 CR0 off */ deallocate_sdbt(); hws_state = HWS_DEALLOCATED; @@ -1123,7 +1123,7 @@ int hwsampler_shutdown() mutex_lock(&hws_sem); if (hws_state == HWS_STOPPED) { - smp_ctl_clear_bit(0, 5); /* set bit 58 CR0 off */ + ctl_clear_bit(0, 5); /* set bit 58 CR0 off */ deallocate_sdbt(); } if (hws_wq) { @@ -1198,7 +1198,7 @@ start_all_exit: hws_oom = 1; hws_flush_all = 0; /* now let them in, 1407 CPUMF external interrupts */ - smp_ctl_set_bit(0, 5); /* set CR0 bit 58 */ + ctl_set_bit(0, 5); /* set CR0 bit 58 */ return 0; } -- cgit v0.10.2 From bfac1d2d34990bf6ce0abf947fd5043cf412256b Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 23 May 2011 10:24:47 +0200 Subject: [S390] Ignore sendmmsg system call note wired up warning sendmmsg is reachable via the socket system call. We don't enable a second way on s390 to reach the same system call. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/include/asm/unistd.h b/arch/s390/include/asm/unistd.h index e821525..9208e69 100644 --- a/arch/s390/include/asm/unistd.h +++ b/arch/s390/include/asm/unistd.h @@ -385,6 +385,7 @@ /* Ignore system calls that are also reachable via sys_socket */ #define __IGNORE_recvmmsg +#define __IGNORE_sendmmsg #define __ARCH_WANT_IPC_PARSE_VERSION #define __ARCH_WANT_OLD_READDIR -- cgit v0.10.2 From fcdd65b0e7bbbd6aef9be2f20e7c238759cf0b41 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 23 May 2011 10:24:48 +0200 Subject: [S390] oprofile: add missing irq stats counter Count CPU measurement external interrupts as well. Signed-off-by: Heiko Carstens diff --git a/arch/s390/include/asm/irq.h b/arch/s390/include/asm/irq.h index db14a31..1544b90 100644 --- a/arch/s390/include/asm/irq.h +++ b/arch/s390/include/asm/irq.h @@ -15,6 +15,7 @@ enum interruption_class { EXTINT_VRT, EXTINT_SCP, EXTINT_IUC, + EXTINT_CPM, IOINT_QAI, IOINT_QDI, IOINT_DAS, diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index ea5099c..e204f95 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c @@ -32,6 +32,7 @@ static const struct irq_class intrclass_names[] = { {.name = "VRT", .desc = "[EXT] Virtio" }, {.name = "SCP", .desc = "[EXT] Service Call" }, {.name = "IUC", .desc = "[EXT] IUCV" }, + {.name = "CPM", .desc = "[EXT] CPU Measurement" }, {.name = "QAI", .desc = "[I/O] QDIO Adapter Interrupt" }, {.name = "QDI", .desc = "[I/O] QDIO Interrupt" }, {.name = "DAS", .desc = "[I/O] DASD" }, diff --git a/arch/s390/oprofile/hwsampler.c b/arch/s390/oprofile/hwsampler.c index 8e686bf..cb4338c 100644 --- a/arch/s390/oprofile/hwsampler.c +++ b/arch/s390/oprofile/hwsampler.c @@ -5,6 +5,7 @@ * Author: Heinz Graalfs */ +#include #include #include #include @@ -677,6 +678,7 @@ static void hws_ext_handler(unsigned int ext_int_code, int cpu; struct hws_cpu_buffer *cb; + kstat_cpu(smp_processor_id()).irqs[EXTINT_CPM]++; cpu = smp_processor_id(); cb = &per_cpu(sampler_cpu_buffer, cpu); -- cgit v0.10.2 From 5f420c5bd15c948302aafd3341903bc06895f1bb Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 23 May 2011 10:24:49 +0200 Subject: [S390] oprofile: dont access lowcore The external interrupt parameter is passed as function call parameter. No need to access lowcore. Signed-off-by: Heiko Carstens diff --git a/arch/s390/oprofile/hwsampler.c b/arch/s390/oprofile/hwsampler.c index cb4338c..053caa0 100644 --- a/arch/s390/oprofile/hwsampler.c +++ b/arch/s390/oprofile/hwsampler.c @@ -675,18 +675,11 @@ int hwsampler_activate(unsigned int cpu) static void hws_ext_handler(unsigned int ext_int_code, unsigned int param32, unsigned long param64) { - int cpu; struct hws_cpu_buffer *cb; kstat_cpu(smp_processor_id()).irqs[EXTINT_CPM]++; - cpu = smp_processor_id(); - cb = &per_cpu(sampler_cpu_buffer, cpu); - - atomic_xchg( - &cb->ext_params, - atomic_read(&cb->ext_params) - | S390_lowcore.ext_params); - + cb = &__get_cpu_var(sampler_cpu_buffer); + atomic_xchg(&cb->ext_params, atomic_read(&cb->ext_params) | param32); if (hws_wq) queue_work(hws_wq, &cb->worker); } -- cgit v0.10.2 From 5bd418784a2764a8d9de177a5462bfc008fd334a Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 23 May 2011 10:24:50 +0200 Subject: [S390] cpu hotplug: fix external interrupt subclass mask handling When disabling a cpu all external interrupt subclass masks in control register 0 get cleared. However instead of the service signal subclass mask bit an unused bit got cleared. Accidently (or luckily) the service subclass mask gets cleared with the pfault_fini() call that happens just before the rest of the subclass mask bits get cleared. Signed-off-by: Heiko Carstens diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index fed71dc..f8e85ec 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -654,8 +654,8 @@ int __cpu_disable(void) /* disable all external interrupts */ cr_parms.orvals[0] = 0; - cr_parms.andvals[0] = ~(1 << 15 | 1 << 14 | 1 << 13 | 1 << 12 | - 1 << 11 | 1 << 10 | 1 << 6 | 1 << 4); + cr_parms.andvals[0] = ~(1 << 15 | 1 << 14 | 1 << 13 | 1 << 11 | + 1 << 10 | 1 << 9 | 1 << 6 | 1 << 4); /* disable all I/O interrupts */ cr_parms.orvals[6] = 0; cr_parms.andvals[6] = ~(1 << 31 | 1 << 30 | 1 << 29 | 1 << 28 | -- cgit v0.10.2