From bc29b7ac1d9f09f5024b0e257e91bf5df611ccd4 Mon Sep 17 00:00:00 2001 From: Gerald Schaefer Date: Mon, 18 Jul 2016 14:35:13 +0200 Subject: s390/mm: clean up pte/pmd encoding The hugetlbfs pte<->pmd conversion functions currently assume that the pmd bit layout is consistent with the pte layout, which is not really true. The SW read and write bits are encoded as the sequence "wr" in a pte, but in a pmd it is "rw". The hugetlbfs conversion assumes that the sequence is identical in both cases, which results in swapped read and write bits in the pmd. In practice this is not a problem, because those pmd bits are only relevant for THP pmds and not for hugetlbfs pmds. The hugetlbfs code works on (fake) ptes, and the converted pte bits are correct. There is another variation in pte/pmd encoding which affects dirty prot-none ptes/pmds. In this case, a pmd has both its HW read-only and invalid bit set, while it is only the invalid bit for a pte. This also has no effect in practice, but it should better be consistent. This patch fixes both inconsistencies by changing the SW read/write bit layout for pmds as well as the PAGE_NONE encoding for ptes. It also makes the hugetlbfs conversion functions more robust by introducing a move_set_bit() macro that uses the pte/pmd bit #defines instead of constant shifts. Signed-off-by: Gerald Schaefer Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index ea1533e..48d383a 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -242,8 +242,8 @@ static inline int is_module_addr(void *addr) * swap .11..ttttt.0 * prot-none, clean, old .11.xx0000.1 * prot-none, clean, young .11.xx0001.1 - * prot-none, dirty, old .10.xx0010.1 - * prot-none, dirty, young .10.xx0011.1 + * prot-none, dirty, old .11.xx0010.1 + * prot-none, dirty, young .11.xx0011.1 * read-only, clean, old .11.xx0100.1 * read-only, clean, young .01.xx0101.1 * read-only, dirty, old .11.xx0110.1 @@ -323,8 +323,8 @@ static inline int is_module_addr(void *addr) #define _SEGMENT_ENTRY_DIRTY 0x2000 /* SW segment dirty bit */ #define _SEGMENT_ENTRY_YOUNG 0x1000 /* SW segment young bit */ #define _SEGMENT_ENTRY_LARGE 0x0400 /* STE-format control, large page */ -#define _SEGMENT_ENTRY_READ 0x0002 /* SW segment read bit */ -#define _SEGMENT_ENTRY_WRITE 0x0001 /* SW segment write bit */ +#define _SEGMENT_ENTRY_WRITE 0x0002 /* SW segment write bit */ +#define _SEGMENT_ENTRY_READ 0x0001 /* SW segment read bit */ #ifdef CONFIG_MEM_SOFT_DIRTY #define _SEGMENT_ENTRY_SOFT_DIRTY 0x4000 /* SW segment soft dirty bit */ @@ -335,15 +335,15 @@ static inline int is_module_addr(void *addr) /* * Segment table and region3 table entry encoding * (R = read-only, I = invalid, y = young bit): - * dy..R...I...rw + * dy..R...I...wr * prot-none, clean, old 00..1...1...00 * prot-none, clean, young 01..1...1...00 * prot-none, dirty, old 10..1...1...00 * prot-none, dirty, young 11..1...1...00 - * read-only, clean, old 00..1...1...10 - * read-only, clean, young 01..1...0...10 - * read-only, dirty, old 10..1...1...10 - * read-only, dirty, young 11..1...0...10 + * read-only, clean, old 00..1...1...01 + * read-only, clean, young 01..1...0...01 + * read-only, dirty, old 10..1...1...01 + * read-only, dirty, young 11..1...0...01 * read-write, clean, old 00..1...1...11 * read-write, clean, young 01..1...0...11 * read-write, dirty, old 10..0...1...11 @@ -382,7 +382,7 @@ static inline int is_module_addr(void *addr) /* * Page protection definitions. */ -#define PAGE_NONE __pgprot(_PAGE_PRESENT | _PAGE_INVALID) +#define PAGE_NONE __pgprot(_PAGE_PRESENT | _PAGE_INVALID | _PAGE_PROTECT) #define PAGE_READ __pgprot(_PAGE_PRESENT | _PAGE_READ | \ _PAGE_INVALID | _PAGE_PROTECT) #define PAGE_WRITE __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \ diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c index e19d853..cd404aa 100644 --- a/arch/s390/mm/hugetlbpage.c +++ b/arch/s390/mm/hugetlbpage.c @@ -11,6 +11,12 @@ #include #include +/* + * If the bit selected by single-bit bitmask "a" is set within "x", move + * it to the position indicated by single-bit bitmask "b". + */ +#define move_set_bit(x, a, b) (((x) & (a)) >> ilog2(a) << ilog2(b)) + static inline unsigned long __pte_to_rste(pte_t pte) { unsigned long rste; @@ -37,13 +43,22 @@ static inline unsigned long __pte_to_rste(pte_t pte) */ if (pte_present(pte)) { rste = pte_val(pte) & PAGE_MASK; - rste |= (pte_val(pte) & _PAGE_READ) >> 4; - rste |= (pte_val(pte) & _PAGE_WRITE) >> 4; - rste |= (pte_val(pte) & _PAGE_INVALID) >> 5; - rste |= (pte_val(pte) & _PAGE_PROTECT); - rste |= (pte_val(pte) & _PAGE_DIRTY) << 10; - rste |= (pte_val(pte) & _PAGE_YOUNG) << 10; - rste |= (pte_val(pte) & _PAGE_SOFT_DIRTY) << 13; + rste |= move_set_bit(pte_val(pte), _PAGE_READ, + _SEGMENT_ENTRY_READ); + rste |= move_set_bit(pte_val(pte), _PAGE_WRITE, + _SEGMENT_ENTRY_WRITE); + rste |= move_set_bit(pte_val(pte), _PAGE_INVALID, + _SEGMENT_ENTRY_INVALID); + rste |= move_set_bit(pte_val(pte), _PAGE_PROTECT, + _SEGMENT_ENTRY_PROTECT); + rste |= move_set_bit(pte_val(pte), _PAGE_DIRTY, + _SEGMENT_ENTRY_DIRTY); + rste |= move_set_bit(pte_val(pte), _PAGE_YOUNG, + _SEGMENT_ENTRY_YOUNG); +#ifdef CONFIG_MEM_SOFT_DIRTY + rste |= move_set_bit(pte_val(pte), _PAGE_SOFT_DIRTY, + _SEGMENT_ENTRY_SOFT_DIRTY); +#endif } else rste = _SEGMENT_ENTRY_INVALID; return rste; @@ -82,13 +97,22 @@ static inline pte_t __rste_to_pte(unsigned long rste) if (present) { pte_val(pte) = rste & _SEGMENT_ENTRY_ORIGIN_LARGE; pte_val(pte) |= _PAGE_LARGE | _PAGE_PRESENT; - pte_val(pte) |= (rste & _SEGMENT_ENTRY_READ) << 4; - pte_val(pte) |= (rste & _SEGMENT_ENTRY_WRITE) << 4; - pte_val(pte) |= (rste & _SEGMENT_ENTRY_INVALID) << 5; - pte_val(pte) |= (rste & _SEGMENT_ENTRY_PROTECT); - pte_val(pte) |= (rste & _SEGMENT_ENTRY_DIRTY) >> 10; - pte_val(pte) |= (rste & _SEGMENT_ENTRY_YOUNG) >> 10; - pte_val(pte) |= (rste & _SEGMENT_ENTRY_SOFT_DIRTY) >> 13; + pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_READ, + _PAGE_READ); + pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_WRITE, + _PAGE_WRITE); + pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_INVALID, + _PAGE_INVALID); + pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_PROTECT, + _PAGE_PROTECT); + pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_DIRTY, + _PAGE_DIRTY); + pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_YOUNG, + _PAGE_YOUNG); +#ifdef CONFIG_MEM_SOFT_DIRTY + pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_SOFT_DIRTY, + _PAGE_DIRTY); +#endif } else pte_val(pte) = _PAGE_INVALID; return pte; -- cgit v0.10.2 From 837c5220557270e652d89f68a9fb12a5e72e8a7a Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Tue, 19 Jul 2016 10:43:26 +0200 Subject: s390/cio: convert cfg_lock mutex to spinlock cfg_lock is never held long and we don't want to sleep while the lock is being held. Thus it can be converted to a simple spinlock. In addition we can now use the lock during the evaluation of a wake_up condition. Signed-off-by: Sebastian Ott Reviewed-by: Peter Oberparleiter Signed-off-by: Martin Schwidefsky diff --git a/drivers/s390/cio/chp.c b/drivers/s390/cio/chp.c index e96aced..c602211 100644 --- a/drivers/s390/cio/chp.c +++ b/drivers/s390/cio/chp.c @@ -37,7 +37,7 @@ enum cfg_task_t { /* Map for pending configure tasks. */ static enum cfg_task_t chp_cfg_task[__MAX_CSSID + 1][__MAX_CHPID + 1]; -static DEFINE_MUTEX(cfg_lock); +static DEFINE_SPINLOCK(cfg_lock); static int cfg_busy; /* Map for channel-path status. */ @@ -674,7 +674,7 @@ static void cfg_func(struct work_struct *work) enum cfg_task_t t; int rc; - mutex_lock(&cfg_lock); + spin_lock(&cfg_lock); t = cfg_none; chp_id_for_each(&chpid) { t = cfg_get_task(chpid); @@ -683,7 +683,7 @@ static void cfg_func(struct work_struct *work) break; } } - mutex_unlock(&cfg_lock); + spin_unlock(&cfg_lock); switch (t) { case cfg_configure: @@ -709,9 +709,9 @@ static void cfg_func(struct work_struct *work) case cfg_none: /* Get updated information after last change. */ info_update(); - mutex_lock(&cfg_lock); + spin_lock(&cfg_lock); cfg_busy = 0; - mutex_unlock(&cfg_lock); + spin_unlock(&cfg_lock); wake_up_interruptible(&cfg_wait_queue); return; } @@ -729,10 +729,10 @@ void chp_cfg_schedule(struct chp_id chpid, int configure) { CIO_MSG_EVENT(2, "chp_cfg_sched%x.%02x=%d\n", chpid.cssid, chpid.id, configure); - mutex_lock(&cfg_lock); + spin_lock(&cfg_lock); cfg_set_task(chpid, configure ? cfg_configure : cfg_deconfigure); cfg_busy = 1; - mutex_unlock(&cfg_lock); + spin_unlock(&cfg_lock); schedule_work(&cfg_work); } @@ -746,10 +746,10 @@ void chp_cfg_schedule(struct chp_id chpid, int configure) void chp_cfg_cancel_deconfigure(struct chp_id chpid) { CIO_MSG_EVENT(2, "chp_cfg_cancel:%x.%02x\n", chpid.cssid, chpid.id); - mutex_lock(&cfg_lock); + spin_lock(&cfg_lock); if (cfg_get_task(chpid) == cfg_deconfigure) cfg_set_task(chpid, cfg_none); - mutex_unlock(&cfg_lock); + spin_unlock(&cfg_lock); } static int cfg_wait_idle(void) -- cgit v0.10.2 From 4475aeb8b77db34dea96b09900ba0cb245b6fb42 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Tue, 19 Jul 2016 10:53:35 +0200 Subject: s390/cio: fix premature wakeup during chp configure We store requests for channel path configure operations in an array but maintain an additional cfg_busy variable (indicating if we have requests stored in said array). When 2 tasks request a channel path configure operation cfg_busy could be set to 0 even if we still have unprocessed requests. This would lead to the second task being woken up although its request was not processed yet. Fix that by getting rid of cfg_busy and use the chp_cfg_task array in the wake up condition. Signed-off-by: Sebastian Ott Reviewed-by: Peter Oberparleiter Signed-off-by: Martin Schwidefsky diff --git a/drivers/s390/cio/chp.c b/drivers/s390/cio/chp.c index c602211..46be25c 100644 --- a/drivers/s390/cio/chp.c +++ b/drivers/s390/cio/chp.c @@ -38,7 +38,6 @@ enum cfg_task_t { /* Map for pending configure tasks. */ static enum cfg_task_t chp_cfg_task[__MAX_CSSID + 1][__MAX_CHPID + 1]; static DEFINE_SPINLOCK(cfg_lock); -static int cfg_busy; /* Map for channel-path status. */ static struct sclp_chp_info chp_info; @@ -666,6 +665,20 @@ static void cfg_set_task(struct chp_id chpid, enum cfg_task_t cfg) chp_cfg_task[chpid.cssid][chpid.id] = cfg; } +/* Fetch the first configure task. Set chpid accordingly. */ +static enum cfg_task_t chp_cfg_fetch_task(struct chp_id *chpid) +{ + enum cfg_task_t t = cfg_none; + + chp_id_for_each(chpid) { + t = cfg_get_task(*chpid); + if (t != cfg_none) + break; + } + + return t; +} + /* Perform one configure/deconfigure request. Reschedule work function until * last request. */ static void cfg_func(struct work_struct *work) @@ -675,14 +688,7 @@ static void cfg_func(struct work_struct *work) int rc; spin_lock(&cfg_lock); - t = cfg_none; - chp_id_for_each(&chpid) { - t = cfg_get_task(chpid); - if (t != cfg_none) { - cfg_set_task(chpid, cfg_none); - break; - } - } + t = chp_cfg_fetch_task(&chpid); spin_unlock(&cfg_lock); switch (t) { @@ -709,12 +715,13 @@ static void cfg_func(struct work_struct *work) case cfg_none: /* Get updated information after last change. */ info_update(); - spin_lock(&cfg_lock); - cfg_busy = 0; - spin_unlock(&cfg_lock); wake_up_interruptible(&cfg_wait_queue); return; } + spin_lock(&cfg_lock); + if (t == cfg_get_task(chpid)) + cfg_set_task(chpid, cfg_none); + spin_unlock(&cfg_lock); schedule_work(&cfg_work); } @@ -731,7 +738,6 @@ void chp_cfg_schedule(struct chp_id chpid, int configure) configure); spin_lock(&cfg_lock); cfg_set_task(chpid, configure ? cfg_configure : cfg_deconfigure); - cfg_busy = 1; spin_unlock(&cfg_lock); schedule_work(&cfg_work); } @@ -752,9 +758,21 @@ void chp_cfg_cancel_deconfigure(struct chp_id chpid) spin_unlock(&cfg_lock); } +static bool cfg_idle(void) +{ + struct chp_id chpid; + enum cfg_task_t t; + + spin_lock(&cfg_lock); + t = chp_cfg_fetch_task(&chpid); + spin_unlock(&cfg_lock); + + return t == cfg_none; +} + static int cfg_wait_idle(void) { - if (wait_event_interruptible(cfg_wait_queue, !cfg_busy)) + if (wait_event_interruptible(cfg_wait_queue, cfg_idle())) return -ERESTARTSYS; return 0; } -- cgit v0.10.2 From d6d86c57d77d466df2096b134e5f54463d3f0fb8 Mon Sep 17 00:00:00 2001 From: Ingo Tuchscherer Date: Mon, 25 Jul 2016 14:52:28 +0200 Subject: s390/zcrypt: Fix zcrypt suspend/resume behavior The device suspend call triggers all ap devices to fetch potentially available response messages from the queues. Therefore the corresponding zcrypt device, that is allocated asynchronously after ap device probing, needs to be fully prepared. This race condition could lead to uninitialized response buffers while trying to read from the queues. Introduce a new callback within the ap layer to get noticed when a zcrypt device is fully prepared. Additional checks prevent reading from devices that are not fully prepared. Signed-off-by: Ingo Tuchscherer Signed-off-by: Martin Schwidefsky diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c index 4feb272..03e4d62 100644 --- a/drivers/s390/crypto/ap_bus.c +++ b/drivers/s390/crypto/ap_bus.c @@ -468,6 +468,8 @@ int ap_recv(ap_qid_t qid, unsigned long long *psmid, void *msg, size_t length) { struct ap_queue_status status; + if (msg == NULL) + return -EINVAL; status = __ap_recv(qid, psmid, msg, length); switch (status.response_code) { case AP_RESPONSE_NORMAL: @@ -617,6 +619,8 @@ static enum ap_wait ap_sm_read(struct ap_device *ap_dev) { struct ap_queue_status status; + if (!ap_dev->reply) + return AP_WAIT_NONE; status = ap_sm_recv(ap_dev); switch (status.response_code) { case AP_RESPONSE_NORMAL: @@ -638,6 +642,31 @@ static enum ap_wait ap_sm_read(struct ap_device *ap_dev) } /** + * ap_sm_suspend_read(): Receive pending reply messages from an AP device + * without changing the device state in between. In suspend mode we don't + * allow sending new requests, therefore just fetch pending replies. + * @ap_dev: pointer to the AP device + * + * Returns AP_WAIT_NONE or AP_WAIT_AGAIN + */ +static enum ap_wait ap_sm_suspend_read(struct ap_device *ap_dev) +{ + struct ap_queue_status status; + + if (!ap_dev->reply) + return AP_WAIT_NONE; + status = ap_sm_recv(ap_dev); + switch (status.response_code) { + case AP_RESPONSE_NORMAL: + if (ap_dev->queue_count > 0) + return AP_WAIT_AGAIN; + /* fall through */ + default: + return AP_WAIT_NONE; + } +} + +/** * ap_sm_write(): Send messages from the request queue to an AP device. * @ap_dev: pointer to the AP device * @@ -738,7 +767,7 @@ static enum ap_wait ap_sm_reset_wait(struct ap_device *ap_dev) struct ap_queue_status status; unsigned long info; - if (ap_dev->queue_count > 0) + if (ap_dev->queue_count > 0 && ap_dev->reply) /* Try to read a completed message and get the status */ status = ap_sm_recv(ap_dev); else @@ -778,7 +807,7 @@ static enum ap_wait ap_sm_setirq_wait(struct ap_device *ap_dev) struct ap_queue_status status; unsigned long info; - if (ap_dev->queue_count > 0) + if (ap_dev->queue_count > 0 && ap_dev->reply) /* Try to read a completed message and get the status */ status = ap_sm_recv(ap_dev); else @@ -834,7 +863,7 @@ static ap_func_t *ap_jumptable[NR_AP_STATES][NR_AP_EVENTS] = { [AP_EVENT_TIMEOUT] = ap_sm_reset, }, [AP_STATE_SUSPEND_WAIT] = { - [AP_EVENT_POLL] = ap_sm_read, + [AP_EVENT_POLL] = ap_sm_suspend_read, [AP_EVENT_TIMEOUT] = ap_sm_nop, }, [AP_STATE_BORKED] = { @@ -1335,6 +1364,17 @@ static struct bus_type ap_bus_type = { .resume = ap_dev_resume, }; +void ap_device_init_reply(struct ap_device *ap_dev, + struct ap_message *reply) +{ + ap_dev->reply = reply; + + spin_lock_bh(&ap_dev->lock); + ap_sm_wait(ap_sm_event(ap_dev, AP_EVENT_POLL)); + spin_unlock_bh(&ap_dev->lock); +} +EXPORT_SYMBOL(ap_device_init_reply); + static int ap_device_probe(struct device *dev) { struct ap_device *ap_dev = to_ap_dev(dev); diff --git a/drivers/s390/crypto/ap_bus.h b/drivers/s390/crypto/ap_bus.h index 6adcbdf..d7fdf5c 100644 --- a/drivers/s390/crypto/ap_bus.h +++ b/drivers/s390/crypto/ap_bus.h @@ -262,6 +262,7 @@ void ap_queue_message(struct ap_device *ap_dev, struct ap_message *ap_msg); void ap_cancel_message(struct ap_device *ap_dev, struct ap_message *ap_msg); void ap_flush_queue(struct ap_device *ap_dev); void ap_bus_force_rescan(void); +void ap_device_init_reply(struct ap_device *ap_dev, struct ap_message *ap_msg); int ap_module_init(void); void ap_module_exit(void); diff --git a/drivers/s390/crypto/zcrypt_cex2a.c b/drivers/s390/crypto/zcrypt_cex2a.c index 1e849d6..15104aa 100644 --- a/drivers/s390/crypto/zcrypt_cex2a.c +++ b/drivers/s390/crypto/zcrypt_cex2a.c @@ -126,7 +126,7 @@ static int zcrypt_cex2a_probe(struct ap_device *ap_dev) MSGTYPE50_VARIANT_DEFAULT); zdev->ap_dev = ap_dev; zdev->online = 1; - ap_dev->reply = &zdev->reply; + ap_device_init_reply(ap_dev, &zdev->reply); ap_dev->private = zdev; rc = zcrypt_device_register(zdev); if (rc) { diff --git a/drivers/s390/crypto/zcrypt_cex4.c b/drivers/s390/crypto/zcrypt_cex4.c index bb39088..ccb2e78 100644 --- a/drivers/s390/crypto/zcrypt_cex4.c +++ b/drivers/s390/crypto/zcrypt_cex4.c @@ -147,7 +147,7 @@ static int zcrypt_cex4_probe(struct ap_device *ap_dev) return -ENODEV; zdev->ap_dev = ap_dev; zdev->online = 1; - ap_dev->reply = &zdev->reply; + ap_device_init_reply(ap_dev, &zdev->reply); ap_dev->private = zdev; rc = zcrypt_device_register(zdev); if (rc) { diff --git a/drivers/s390/crypto/zcrypt_pcixcc.c b/drivers/s390/crypto/zcrypt_pcixcc.c index f418527..df8f0c4 100644 --- a/drivers/s390/crypto/zcrypt_pcixcc.c +++ b/drivers/s390/crypto/zcrypt_pcixcc.c @@ -327,7 +327,7 @@ static int zcrypt_pcixcc_probe(struct ap_device *ap_dev) else zdev->ops = zcrypt_msgtype_request(MSGTYPE06_NAME, MSGTYPE06_VARIANT_NORNG); - ap_dev->reply = &zdev->reply; + ap_device_init_reply(ap_dev, &zdev->reply); ap_dev->private = zdev; rc = zcrypt_device_register(zdev); if (rc) -- cgit v0.10.2 From cf9fdfea5f01bfebeeb28b4b27dc5151c7ea29d3 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 30 Jun 2016 10:24:18 +0200 Subject: s390/sclp: move uninitialized data to data section The early sclp code may be called before the bss section is cleared. Therefore move all variables to the data section. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/kernel/sclp.c b/arch/s390/kernel/sclp.c index d88db40..f08af67 100644 --- a/arch/s390/kernel/sclp.c +++ b/arch/s390/kernel/sclp.c @@ -12,8 +12,9 @@ #define EVTYP_VT220MSG_MASK 0x00000040 #define EVTYP_MSG_MASK 0x40000000 -static char _sclp_work_area[4096] __aligned(PAGE_SIZE); -static bool have_vt220, have_linemode; +static char _sclp_work_area[4096] __aligned(PAGE_SIZE) __section(data); +static bool have_vt220 __section(data); +static bool have_linemode __section(data); static void _sclp_wait_int(void) { -- cgit v0.10.2 From be2412c247c16c0ea4dd3293e8046e1b383c3bb6 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 27 Jun 2016 15:52:38 +0200 Subject: s390/als: convert architecture level set code to C There is no reason to have this code in assembly language. Therefore convert it to C. Note that this code needs special treatment: it is called very early and one of the side effects is that e.g. the bss section is not cleared. Therefore the preferred way for static variables is to put them on the stack which has a size of 16KB. There is no functional change with this patch. Signed-off-by: Heiko Carstens Reviewed-by: Sascha Silbe Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/boot/compressed/Makefile b/arch/s390/boot/compressed/Makefile index 98ec652..13723c3 100644 --- a/arch/s390/boot/compressed/Makefile +++ b/arch/s390/boot/compressed/Makefile @@ -18,7 +18,7 @@ KBUILD_CFLAGS += $(call cc-option,-ffreestanding) GCOV_PROFILE := n -OBJECTS := $(addprefix $(objtree)/arch/s390/kernel/, head.o sclp.o ebcdic.o) +OBJECTS := $(addprefix $(objtree)/arch/s390/kernel/, head.o sclp.o ebcdic.o als.o) OBJECTS += $(obj)/head.o $(obj)/misc.o $(obj)/piggy.o LDFLAGS_vmlinux := --oformat $(LD_BFD) -e startup -T diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index f37be37..07fca5e 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -4,6 +4,7 @@ KCOV_INSTRUMENT_early.o := n KCOV_INSTRUMENT_sclp.o := n +KCOV_INSTRUMENT_als.o := n ifdef CONFIG_FUNCTION_TRACER # Don't trace early setup code and tracing code @@ -32,13 +33,16 @@ CFLAGS_ptrace.o += -DUTS_MACHINE='"$(UTS_MACHINE)"' CFLAGS_sysinfo.o += -w # -# Use -march=z900 for sclp.c to be able to print an error message if -# the kernel is started on a machine which is too old +# Use -march=z900 for sclp.c and als.c to be able to print an error +# message if the kernel is started on a machine which is too old # CFLAGS_REMOVE_sclp.o = $(CC_FLAGS_FTRACE) +CFLAGS_REMOVE_als.o = $(CC_FLAGS_FTRACE) ifneq ($(CC_FLAGS_MARCH),-march=z900) CFLAGS_REMOVE_sclp.o += $(CC_FLAGS_MARCH) CFLAGS_sclp.o += -march=z900 +CFLAGS_REMOVE_als.o += $(CC_FLAGS_MARCH) +CFLAGS_als.o += -march=z900 AFLAGS_REMOVE_head.o += $(CC_FLAGS_MARCH) AFLAGS_head.o += -march=z900 endif @@ -46,7 +50,7 @@ GCOV_PROFILE_sclp.o := n obj-y := traps.o time.o process.o base.o early.o setup.o idle.o vtime.o obj-y += processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o -obj-y += debug.o irq.o ipl.o dis.o diag.o sclp.o vdso.o +obj-y += debug.o irq.o ipl.o dis.o diag.o sclp.o vdso.o als.o obj-y += sysinfo.o jump_label.o lgr.o os_info.o machine_kexec.o pgm_check.o obj-y += runtime_instr.o cache.o fpu.o dumpstack.o obj-y += entry.o reipl.o relocate_kernel.o diff --git a/arch/s390/kernel/als.c b/arch/s390/kernel/als.c new file mode 100644 index 0000000..f96a577 --- /dev/null +++ b/arch/s390/kernel/als.c @@ -0,0 +1,46 @@ +/* + * Copyright IBM Corp. 2016 + */ +#include +#include +#include +#include +#include +#include +#include "entry.h" + +/* + * The code within this file will be called very early. It may _not_ + * access anything within the bss section, since that is not cleared + * yet and may contain data (e.g. initrd) that must be saved by other + * code. + * For temporary objects the stack (16k) should be used. + */ + +static unsigned long als[] __initdata = { FACILITIES_ALS }; + +void __init verify_facilities(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(S390_lowcore.stfle_fac_list); i++) + S390_lowcore.stfle_fac_list[i] = 0; + asm volatile( + " stfl 0(0)\n" + : "=m" (S390_lowcore.stfl_fac_list)); + S390_lowcore.stfle_fac_list[0] = (u64)S390_lowcore.stfl_fac_list << 32; + if (S390_lowcore.stfl_fac_list & 0x01000000) { + register unsigned long reg0 asm("0") = ARRAY_SIZE(als) - 1; + + asm volatile(".insn s,0xb2b00000,0(%1)" /* stfle */ + : "+d" (reg0) + : "a" (&S390_lowcore.stfle_fac_list) + : "memory", "cc"); + } + for (i = 0; i < ARRAY_SIZE(als); i++) { + if ((S390_lowcore.stfle_fac_list[i] & als[i]) == als[i]) + continue; + _sclp_print_early("The Linux kernel requires more recent processor hardware"); + disabled_wait(0x8badcccc); + } +} diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h index bedd2f5..e79f030 100644 --- a/arch/s390/kernel/entry.h +++ b/arch/s390/kernel/entry.h @@ -79,4 +79,6 @@ long sys_s390_pci_mmio_read(unsigned long, void __user *, size_t); DECLARE_PER_CPU(u64, mt_cycles[8]); +void verify_facilities(void); + #endif /* _ENTRY_H */ diff --git a/arch/s390/kernel/head.S b/arch/s390/kernel/head.S index fcaefb0..56e4d82 100644 --- a/arch/s390/kernel/head.S +++ b/arch/s390/kernel/head.S @@ -306,49 +306,14 @@ ENTRY(startup_kdump) stck __LC_LAST_UPDATE_CLOCK spt 6f-.LPG0(%r13) mvc __LC_LAST_UPDATE_TIMER(8),6f-.LPG0(%r13) - stfl 0(%r0) # store facilities @ __LC_STFL_FAC_LIST - mvc __LC_STFLE_FAC_LIST(4),__LC_STFL_FAC_LIST - tm __LC_STFLE_FAC_LIST,0x01 # stfle available ? - jz 0f - lghi %r0,FACILITIES_ALS_DWORDS-1 - .insn s,0xb2b00000,__LC_STFLE_FAC_LIST # store facility list extended - # verify if all required facilities are supported by the machine -0: la %r1,__LC_STFLE_FAC_LIST - la %r2,3f+8-.LPG0(%r13) - lhi %r3,FACILITIES_ALS_DWORDS -1: lg %r0,0(%r1) - ng %r0,0(%r2) - clg %r0,0(%r2) - jne 2f - la %r1,8(%r1) - la %r2,8(%r2) - ahi %r3,-1 - jnz 1b - j 4f -2: l %r15,.Lstack-.LPG0(%r13) + l %r15,.Lstack-.LPG0(%r13) ahi %r15,-STACK_FRAME_OVERHEAD - la %r2,.Lals_string-.LPG0(%r13) - l %r3,.Lsclp_print-.LPG0(%r13) - basr %r14,%r3 - lpsw 3f-.LPG0(%r13) # machine type not good enough, crash -.Lals_string: - .asciz "The Linux kernel requires more recent processor hardware" -.Lsclp_print: - .long _sclp_print_early -.Lstack: - .long 0x8000 + (1<<(PAGE_SHIFT+THREAD_ORDER)) - .align 16 -3: .long 0x000a0000,0x8badcccc - -# List of facilities that are required. If not all facilities are present -# the kernel will crash. - - .quad FACILITIES_ALS - -4: + brasl %r14,verify_facilities /* Continue with startup code in head64.S */ jg startup_continue +.Lstack: + .long 0x8000 + (1<<(PAGE_SHIFT+THREAD_ORDER)) .align 8 6: .long 0x7fffffff,0xffffffff -- cgit v0.10.2 From 06ed5512a26347f7b570295c68d48f0369bdb754 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 28 Jun 2016 14:40:07 +0200 Subject: s390/als: print machine type on facility mismatch If we have a facility mismatch the kernel only emits a warning that the processor is not recent enough and stops operating. This doesn't give us a lot of an idea of what actually went wrong. As a first step print the machine type in addition. Signed-off-by: Heiko Carstens Reviewed-by: Sascha Silbe Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/kernel/als.c b/arch/s390/kernel/als.c index f96a577..0eff858 100644 --- a/arch/s390/kernel/als.c +++ b/arch/s390/kernel/als.c @@ -19,6 +19,38 @@ static unsigned long als[] __initdata = { FACILITIES_ALS }; +static void __init u16_to_hex(char *str, u16 val) +{ + int i, num; + + for (i = 1; i <= 4; i++) { + num = (val >> (16 - 4 * i)) & 0xf; + if (num >= 10) + num += 7; + *str++ = '0' + num; + } + *str = '\0'; +} + +static void __init print_machine_type(void) +{ + static char mach_str[80] __initdata = "Detected machine-type number: "; + char type_str[5]; + struct cpuid id; + + get_cpu_id(&id); + u16_to_hex(type_str, id.machine); + strcat(mach_str, type_str); + _sclp_print_early(mach_str); +} + +static void __init facility_mismatch(void) +{ + _sclp_print_early("The Linux kernel requires more recent processor hardware"); + print_machine_type(); + disabled_wait(0x8badcccc); +} + void __init verify_facilities(void) { int i; @@ -38,9 +70,7 @@ void __init verify_facilities(void) : "memory", "cc"); } for (i = 0; i < ARRAY_SIZE(als); i++) { - if ((S390_lowcore.stfle_fac_list[i] & als[i]) == als[i]) - continue; - _sclp_print_early("The Linux kernel requires more recent processor hardware"); - disabled_wait(0x8badcccc); + if ((S390_lowcore.stfle_fac_list[i] & als[i]) != als[i]) + facility_mismatch(); } } -- cgit v0.10.2 From a02d1988b56f87fcce292f4f2f89b063a97ee7ea Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 29 Jun 2016 23:56:47 +0200 Subject: s390/als: print missing facilities on facility mismatch If the kernel needs more facilities to run than the machine provides it is running on, print the facility bit numbers which are missing. This allows to easily tell what went wrong and if simply the machine does not provide a required facility or if either the kernel or the hypervisor may have a bug. Signed-off-by: Heiko Carstens Reviewed-by: Sascha Silbe Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/kernel/als.c b/arch/s390/kernel/als.c index 0eff858..a16e9d1 100644 --- a/arch/s390/kernel/als.c +++ b/arch/s390/kernel/als.c @@ -44,10 +44,58 @@ static void __init print_machine_type(void) _sclp_print_early(mach_str); } +static void __init u16_to_decimal(char *str, u16 val) +{ + int div = 1; + + while (div * 10 <= val) + div *= 10; + while (div) { + *str++ = '0' + val / div; + val %= div; + div /= 10; + } + *str = '\0'; +} + +static void __init print_missing_facilities(void) +{ + static char als_str[80] __initdata = "Missing facilities: "; + unsigned long val; + char val_str[6]; + int i, j, first; + + first = 1; + for (i = 0; i < ARRAY_SIZE(als); i++) { + val = ~S390_lowcore.stfle_fac_list[i] & als[i]; + for (j = 0; j < BITS_PER_LONG; j++) { + if (!(val & (1UL << (BITS_PER_LONG - 1 - j)))) + continue; + if (!first) + strcat(als_str, ","); + /* + * Make sure we stay within one line. Consider that + * each facility bit adds up to five characters and + * z/VM adds a four character prefix. + */ + if (strlen(als_str) > 70) { + _sclp_print_early(als_str); + *als_str = '\0'; + } + u16_to_decimal(val_str, i * BITS_PER_LONG + j); + strcat(als_str, val_str); + first = 0; + } + } + _sclp_print_early(als_str); + _sclp_print_early("See Principles of Operations for facility bits"); +} + static void __init facility_mismatch(void) { _sclp_print_early("The Linux kernel requires more recent processor hardware"); print_machine_type(); + print_missing_facilities(); disabled_wait(0x8badcccc); } -- cgit v0.10.2 From fbd6534ce07fa66da6585f9b536504c422527c8c Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 30 Jun 2016 00:48:42 +0200 Subject: s390/facilities: do not generate DWORDS define anymore The architecture level set code has been converted to C and doesn't need a define to figure out array sizes. Since the old code was the only user of the DWORDS define, we can get rid of it again. Signed-off-by: Heiko Carstens Reviewed-by: Sascha Silbe Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/tools/gen_facilities.c b/arch/s390/tools/gen_facilities.c index e2660d2..fe4e6c9 100644 --- a/arch/s390/tools/gen_facilities.c +++ b/arch/s390/tools/gen_facilities.c @@ -39,7 +39,6 @@ static void print_facility_list(struct facility_def *def) printf("#define %s ", def->name); for (i = 0; i <= high; i++) printf("_AC(0x%016llx,UL)%c", array[i], i < high ? ',' : '\n'); - printf("#define %s_DWORDS %d\n", def->name, high + 1); free(array); } -- cgit v0.10.2 From 8814309163345830fa571fd8c153a8230b94db36 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 29 Jul 2016 08:36:35 +0200 Subject: s390/als: fix compile with gcov enabled Fix this one when gcov is enabled: arch/s390/kernel/als.o:(.data+0x118): undefined reference to `__gcov_merge_add' arch/s390/kernel/als.o: In function `_GLOBAL__sub_I_65535_0_verify_facilities': (.text.startup+0x8): undefined reference to `__gcov_init' Please merge with "s390/als: convert architecture level set code to C". Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index 07fca5e..3234817 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -47,6 +47,7 @@ AFLAGS_REMOVE_head.o += $(CC_FLAGS_MARCH) AFLAGS_head.o += -march=z900 endif GCOV_PROFILE_sclp.o := n +GCOV_PROFILE_als.o := n obj-y := traps.o time.o process.o base.o early.o setup.o idle.o vtime.o obj-y += processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o -- cgit v0.10.2 From ef4423ce70b8a78768a6e854e4d84c082800317d Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 28 Jul 2016 18:14:29 +0200 Subject: s390/numa: only set possible nodes within node_possible_map Make sure that only those nodes appear in the node_possible_map that may actually be used. Usually that means that the node online and possible maps are identical. For mode "plain" we only have one node, for mode "emu" we have "emu_nodes" nodes. Before this the possible map included (with default config) 16 nodes while usually only one was used. That made a couple of loops that iterated over all possible nodes do more work than necessary. Signed-off-by: Heiko Carstens Acked-by: Michael Holzheu Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/numa/mode_emu.c b/arch/s390/numa/mode_emu.c index fbc394e..37e0bb8 100644 --- a/arch/s390/numa/mode_emu.c +++ b/arch/s390/numa/mode_emu.c @@ -482,8 +482,12 @@ static int emu_setup_nodes_adjust(int nodes) */ static void emu_setup(void) { + int nid; + emu_size = emu_setup_size_adjust(emu_size); emu_nodes = emu_setup_nodes_adjust(emu_nodes); + for (nid = 0; nid < emu_nodes; nid++) + node_set(nid, node_possible_map); pr_info("Creating %d nodes with memory stripe size %ld MB\n", emu_nodes, emu_size >> 20); } diff --git a/arch/s390/numa/numa.c b/arch/s390/numa/numa.c index 2794845..f576f10 100644 --- a/arch/s390/numa/numa.c +++ b/arch/s390/numa/numa.c @@ -26,8 +26,14 @@ EXPORT_SYMBOL(node_data); cpumask_t node_to_cpumask_map[MAX_NUMNODES]; EXPORT_SYMBOL(node_to_cpumask_map); +static void plain_setup(void) +{ + node_set(0, node_possible_map); +} + const struct numa_mode numa_mode_plain = { .name = "plain", + .setup = plain_setup, }; static const struct numa_mode *mode = &numa_mode_plain; @@ -126,13 +132,13 @@ static void __init numa_setup_memory(void) void __init numa_setup(void) { pr_info("NUMA mode: %s\n", mode->name); + nodes_clear(node_possible_map); if (mode->setup) mode->setup(); numa_setup_memory(); memblock_dump_all(); } - /* * numa_init_early() - Initialization initcall * -- cgit v0.10.2 From 33c388b81ce4b2f249730014b9b9f103a7578ca2 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sun, 31 Jul 2016 11:47:06 +0200 Subject: s390/zcrypt: fix possible memory leak in ap_module_init() ap_configuration is malloced in ap_module_init() and should be freed before leaving from the error handling cases, otherwise it may cause memory leak. Signed-off-by: Wei Yongjun Signed-off-by: Martin Schwidefsky diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c index 03e4d62..ed92fb0 100644 --- a/drivers/s390/crypto/ap_bus.c +++ b/drivers/s390/crypto/ap_bus.c @@ -1819,7 +1819,8 @@ int __init ap_module_init(void) if (ap_domain_index < -1 || ap_domain_index > max_domain_id) { pr_warn("%d is not a valid cryptographic domain\n", ap_domain_index); - return -EINVAL; + rc = -EINVAL; + goto out_free; } /* In resume callback we need to know if the user had set the domain. * If so, we can not just reset it. @@ -1892,6 +1893,7 @@ out: unregister_reset_call(&ap_reset_call); if (ap_using_interrupts()) unregister_adapter_interrupt(&ap_airq); +out_free: kfree(ap_configuration); return rc; } -- cgit v0.10.2 From 68c5cf5a6091c2c3fabccfd42ca844d730ec24c6 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Sun, 31 Jul 2016 14:11:11 +0200 Subject: s390: Define AT_VECTOR_SIZE_ARCH for ARCH_DLINFO AT_VECTOR_SIZE_ARCH should be defined with the maximum number of NEW_AUX_ENT entries that ARCH_DLINFO can contain, but it wasn't defined for s390 at all even though ARCH_DLINFO can contain one NEW_AUX_ENT when VDSO is enabled. This shouldn't be a problem as AT_VECTOR_SIZE_BASE includes space for AT_BASE_PLATFORM which s390 doesn't use, but lets define it now and add the comment above ARCH_DLINFO as found in several other architectures to remind future modifiers of ARCH_DLINFO to keep AT_VECTOR_SIZE_ARCH up to date. Fixes: b020632e40c3 ("[S390] introduce vdso on s390") Signed-off-by: James Hogan Signed-off-by: Martin Schwidefsky Cc: Heiko Carstens Cc: linux-s390@vger.kernel.org diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h index 563ab9f..1736c7d 100644 --- a/arch/s390/include/asm/elf.h +++ b/arch/s390/include/asm/elf.h @@ -225,6 +225,7 @@ do { \ #define MMAP_ALIGN_MASK (is_compat_task() ? 0 : 0x7fUL) #define STACK_RND_MASK MMAP_RND_MASK +/* update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT entries changes */ #define ARCH_DLINFO \ do { \ if (vdso_enabled) \ diff --git a/arch/s390/include/uapi/asm/auxvec.h b/arch/s390/include/uapi/asm/auxvec.h index a1f153e..c53e084 100644 --- a/arch/s390/include/uapi/asm/auxvec.h +++ b/arch/s390/include/uapi/asm/auxvec.h @@ -3,4 +3,6 @@ #define AT_SYSINFO_EHDR 33 +#define AT_VECTOR_SIZE_ARCH 1 /* entries in ARCH_DLINFO */ + #endif -- cgit v0.10.2 From e64a5470dcd2900ab8f8f83638c00098b10e6300 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 31 Jul 2016 14:52:53 +0200 Subject: s390/ftrace/jprobes: Fix conflict between jprobes and function graph tracing This fixes the same issue Steven already fixed for x86 in following commit: 237d28db036e ftrace/jprobes/x86: Fix conflict between jprobes and function graph tracing It fixes the crash, that happens when function graph tracing and jprobes are used simultaneously. Please refer to above commit for details. Signed-off-by: Jiri Olsa Signed-off-by: Martin Schwidefsky Acked-by: Steven Rostedt diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c index 250f597..dd6306c 100644 --- a/arch/s390/kernel/kprobes.c +++ b/arch/s390/kernel/kprobes.c @@ -690,6 +690,15 @@ int setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) stack = (unsigned long) regs->gprs[15]; memcpy(kcb->jprobes_stack, (void *) stack, MIN_STACK_SIZE(stack)); + + /* + * jprobes use jprobe_return() which skips the normal return + * path of the function, and this messes up the accounting of the + * function graph tracer to get messed up. + * + * Pause function graph tracing while performing the jprobe function. + */ + pause_graph_tracing(); return 1; } NOKPROBE_SYMBOL(setjmp_pre_handler); @@ -705,6 +714,9 @@ int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); unsigned long stack; + /* It's OK to start function graph tracing again */ + unpause_graph_tracing(); + stack = (unsigned long) kcb->jprobe_saved_regs.gprs[15]; /* Put the regs back */ -- cgit v0.10.2