From 800252976be89611ef86d6d04442a821018ed949 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 14 Nov 2011 11:18:59 +0100 Subject: [PATCH 01/10] [S390] wire up process_vm syscalls Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/unistd.h | 4 +++- arch/s390/kernel/compat_wrapper.S | 20 ++++++++++++++++++++ arch/s390/kernel/syscalls.S | 2 ++ 3 files changed, 25 insertions(+), 1 deletion(-) diff --git a/arch/s390/include/asm/unistd.h b/arch/s390/include/asm/unistd.h index 404bdb9671b4..58de4c91c333 100644 --- a/arch/s390/include/asm/unistd.h +++ b/arch/s390/include/asm/unistd.h @@ -277,7 +277,9 @@ #define __NR_clock_adjtime 337 #define __NR_syncfs 338 #define __NR_setns 339 -#define NR_syscalls 340 +#define __NR_process_vm_readv 340 +#define __NR_process_vm_writev 341 +#define NR_syscalls 342 /* * There are some system calls that are not present on 64 bit, some diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S index 5006a1d9f5d0..18c51df9fe06 100644 --- a/arch/s390/kernel/compat_wrapper.S +++ b/arch/s390/kernel/compat_wrapper.S @@ -1627,3 +1627,23 @@ ENTRY(sys_setns_wrapper) lgfr %r2,%r2 # int lgfr %r3,%r3 # int jg sys_setns + +ENTRY(compat_sys_process_vm_readv_wrapper) + lgfr %r2,%r2 # compat_pid_t + llgtr %r3,%r3 # struct compat_iovec __user * + llgfr %r4,%r4 # unsigned long + llgtr %r5,%r5 # struct compat_iovec __user * + llgfr %r6,%r6 # unsigned long + llgf %r0,164(%r15) # unsigned long + stg %r0,160(%r15) + jg sys_process_vm_readv + +ENTRY(compat_sys_process_vm_writev_wrapper) + lgfr %r2,%r2 # compat_pid_t + llgtr %r3,%r3 # struct compat_iovec __user * + llgfr %r4,%r4 # unsigned long + llgtr %r5,%r5 # struct compat_iovec __user * + llgfr %r6,%r6 # unsigned long + llgf %r0,164(%r15) # unsigned long + stg %r0,160(%r15) + jg sys_process_vm_writev diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S index 73eb08c874fb..bcab2f04ba58 100644 --- a/arch/s390/kernel/syscalls.S +++ b/arch/s390/kernel/syscalls.S @@ -348,3 +348,5 @@ SYSCALL(sys_open_by_handle_at,sys_open_by_handle_at,compat_sys_open_by_handle_at SYSCALL(sys_clock_adjtime,sys_clock_adjtime,compat_sys_clock_adjtime_wrapper) SYSCALL(sys_syncfs,sys_syncfs,sys_syncfs_wrapper) SYSCALL(sys_setns,sys_setns,sys_setns_wrapper) +SYSCALL(sys_process_vm_readv,sys_process_vm_readv,compat_sys_process_vm_readv_wrapper) /* 340 */ +SYSCALL(sys_process_vm_writev,sys_process_vm_writev,compat_sys_process_vm_writev_wrapper) From 09b538833b85521d937a06faf61e6a3273253cc0 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Mon, 14 Nov 2011 11:19:00 +0100 Subject: [PATCH 02/10] [S390] fix pgste update logic The pgste_update_all / pgste_update_young and pgste_set_pte need to check if the pte entry contains a valid page address before the storage key can be accessed. In addition pgste_set_pte needs to set the access key and fetch protection bit of the new pte entry, not the old entry. Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/pgtable.h | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 34ede0ea85a9..524d23b8610c 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -593,6 +593,8 @@ static inline pgste_t pgste_update_all(pte_t *ptep, pgste_t pgste) unsigned long address, bits; unsigned char skey; + if (!pte_present(*ptep)) + return pgste; address = pte_val(*ptep) & PAGE_MASK; skey = page_get_storage_key(address); bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED); @@ -625,6 +627,8 @@ static inline pgste_t pgste_update_young(pte_t *ptep, pgste_t pgste) #ifdef CONFIG_PGSTE int young; + if (!pte_present(*ptep)) + return pgste; young = page_reset_referenced(pte_val(*ptep) & PAGE_MASK); /* Transfer page referenced bit to pte software bit (host view) */ if (young || (pgste_val(pgste) & RCP_HR_BIT)) @@ -638,13 +642,15 @@ static inline pgste_t pgste_update_young(pte_t *ptep, pgste_t pgste) } -static inline void pgste_set_pte(pte_t *ptep, pgste_t pgste) +static inline void pgste_set_pte(pte_t *ptep, pgste_t pgste, pte_t entry) { #ifdef CONFIG_PGSTE unsigned long address; unsigned long okey, nkey; - address = pte_val(*ptep) & PAGE_MASK; + if (!pte_present(entry)) + return; + address = pte_val(entry) & PAGE_MASK; okey = nkey = page_get_storage_key(address); nkey &= ~(_PAGE_ACC_BITS | _PAGE_FP_BIT); /* Set page access key and fetch protection bit from pgste */ @@ -712,7 +718,7 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, if (mm_has_pgste(mm)) { pgste = pgste_get_lock(ptep); - pgste_set_pte(ptep, pgste); + pgste_set_pte(ptep, pgste, entry); *ptep = entry; pgste_set_unlock(ptep, pgste); } else From fa2fb2f4a599c402bb2670dde27867dbbb7e3c45 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 14 Nov 2011 11:19:01 +0100 Subject: [PATCH 03/10] [S390] pfault: ignore leftover completion interrupts Ignore completion interrupts if the initial interrupt hasn't been received and the addressed task is not running. This case can only happen if leftover (pending) completion interrupt gets delivered which wasn't removed with the PFAULT CANCEL operation during cpu hotplug. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/mm/fault.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 1766def5bc3f..a9a301866b3c 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -587,8 +587,13 @@ static void pfault_interrupt(unsigned int ext_int_code, } else { /* Completion interrupt was faster than initial * interrupt. Set pfault_wait to -1 so the initial - * interrupt doesn't put the task to sleep. */ - tsk->thread.pfault_wait = -1; + * interrupt doesn't put the task to sleep. + * If the task is not running, ignore the completion + * interrupt since it must be a leftover of a PFAULT + * CANCEL operation which didn't remove all pending + * completion interrupts. */ + if (tsk->state == TASK_RUNNING) + tsk->thread.pfault_wait = -1; } put_task_struct(tsk); } else { From 7a2512b744e72377c3fa5976f06a3f343e155d1f Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Mon, 14 Nov 2011 11:19:02 +0100 Subject: [PATCH 04/10] [S390] incorrect note program header 'readelf -n' on the s390 vmlinux file generates lots of warnings about corrupt notes. The reason is that the 'NOTE' program header has incorrect file and memory sizes. The problem is that the section following the NOTES section do not switch to a different phdr and they get added to the NOTE program section. Add a dummy entry to the linker script that switches to the data phdr before the start of the RODATA section. Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/vmlinux.lds.S | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index 56fe6bc81fee..e4c79ebb40e6 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -43,6 +43,8 @@ SECTIONS NOTES :text :note + .dummy : { *(.dummy) } :data + RODATA #ifdef CONFIG_SHARED_KERNEL From 96603b505cb6c54782a27599afef65cc108ef5f2 Mon Sep 17 00:00:00 2001 From: Michael Holzheu Date: Mon, 14 Nov 2011 11:19:03 +0100 Subject: [PATCH 05/10] [S390] Kconfig: Select CONFIG_KEXEC for CONFIG_CRASH_DUMP The kdump infrastructure is built on top of kexec. Therefore CONFIG_KEXEC has to be enabled when CONFIG_CRASH_DUMP is selected. Signed-off-by: Michael Holzheu Signed-off-by: Martin Schwidefsky --- arch/s390/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index a9fbd43395f7..373679b3744a 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -572,6 +572,7 @@ config KEXEC config CRASH_DUMP bool "kernel crash dumps" depends on 64BIT + select KEXEC help Generate crash dump after being started by kexec. Crash dump kernels are loaded in the main kernel with kexec-tools From bc615deaf35ab06e7fe5672b0efb3c7a0b2dcf1a Mon Sep 17 00:00:00 2001 From: Holger Dengler Date: Mon, 14 Nov 2011 11:19:04 +0100 Subject: [PATCH 06/10] [S390] ap: Setup processing for messages in request queue. Setup timer for processing messages in request queue, if sending an AP message returns with reason code AP_RESPONSE_RESET_IN_PROGRESS. Signed-off-by: Holger Dengler Signed-off-by: Martin Schwidefsky --- drivers/s390/crypto/ap_bus.c | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c index b77ae519d79c..ec94f049e995 100644 --- a/drivers/s390/crypto/ap_bus.c +++ b/drivers/s390/crypto/ap_bus.c @@ -1271,18 +1271,16 @@ ap_config_timeout(unsigned long ptr) } /** - * ap_schedule_poll_timer(): Schedule poll timer. + * __ap_schedule_poll_timer(): Schedule poll timer. * * Set up the timer to run the poll tasklet */ -static inline void ap_schedule_poll_timer(void) +static inline void __ap_schedule_poll_timer(void) { ktime_t hr_time; spin_lock_bh(&ap_poll_timer_lock); - if (ap_using_interrupts() || ap_suspend_flag) - goto out; - if (hrtimer_is_queued(&ap_poll_timer)) + if (hrtimer_is_queued(&ap_poll_timer) || ap_suspend_flag) goto out; if (ktime_to_ns(hrtimer_expires_remaining(&ap_poll_timer)) <= 0) { hr_time = ktime_set(0, poll_timeout); @@ -1293,6 +1291,18 @@ out: spin_unlock_bh(&ap_poll_timer_lock); } +/** + * ap_schedule_poll_timer(): Schedule poll timer. + * + * Set up the timer to run the poll tasklet + */ +static inline void ap_schedule_poll_timer(void) +{ + if (ap_using_interrupts()) + return; + __ap_schedule_poll_timer(); +} + /** * ap_poll_read(): Receive pending reply messages from an AP device. * @ap_dev: pointer to the AP device @@ -1374,8 +1384,9 @@ static int ap_poll_write(struct ap_device *ap_dev, unsigned long *flags) *flags |= 1; *flags |= 2; break; - case AP_RESPONSE_Q_FULL: case AP_RESPONSE_RESET_IN_PROGRESS: + __ap_schedule_poll_timer(); + case AP_RESPONSE_Q_FULL: *flags |= 2; break; case AP_RESPONSE_MESSAGE_TOO_BIG: From 3f25dc4fcbc371f86a61a6af759003ebd4965908 Mon Sep 17 00:00:00 2001 From: Michael Holzheu Date: Mon, 14 Nov 2011 11:19:05 +0100 Subject: [PATCH 07/10] [S390] zfcpdump: Do not initialize zfcpdump in kdump mode When the kernel is started in kdump mode, zfcpdump should not be initialized because both dump methods can't be used at the same time. Signed-off-by: Michael Holzheu Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/setup.c | 4 +++- drivers/s390/char/zcore.c | 2 ++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 8ac6bfa2786c..e58a462949b1 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -211,6 +211,8 @@ static void __init setup_zfcpdump(unsigned int console_devno) if (ipl_info.type != IPL_TYPE_FCP_DUMP) return; + if (OLDMEM_BASE) + return; if (console_devno != -1) sprintf(str, " cio_ignore=all,!0.0.%04x,!0.0.%04x", ipl_info.data.fcp.dev_id.devno, console_devno); @@ -482,7 +484,7 @@ static void __init setup_memory_end(void) #ifdef CONFIG_ZFCPDUMP - if (ipl_info.type == IPL_TYPE_FCP_DUMP) { + if (ipl_info.type == IPL_TYPE_FCP_DUMP && !OLDMEM_BASE) { memory_end = ZFCPDUMP_HSA_SIZE; memory_end_set = 1; } diff --git a/drivers/s390/char/zcore.c b/drivers/s390/char/zcore.c index 43068fbd0baa..1b6d9247fdc7 100644 --- a/drivers/s390/char/zcore.c +++ b/drivers/s390/char/zcore.c @@ -641,6 +641,8 @@ static int __init zcore_init(void) if (ipl_info.type != IPL_TYPE_FCP_DUMP) return -ENODATA; + if (OLDMEM_BASE) + return -ENODATA; zcore_dbf = debug_register("zcore", 4, 1, 4 * sizeof(long)); debug_register_view(zcore_dbf, &debug_sprintf_view); From cfa1e7e1d49c6f5f0b00b2cb890b521e5c0dc7ea Mon Sep 17 00:00:00 2001 From: Jan Glauber Date: Mon, 14 Nov 2011 11:19:06 +0100 Subject: [PATCH 08/10] [S390] avoid STCKF if running in ESA mode In ESA mode STCKF is not defined even if the facility bit is enabled. To prevent an illegal operation we must also check if we run a 64 bit kernel. To make the check perform well add the STCKF bit to the machine flags. Signed-off-by: Jan Glauber Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/setup.h | 3 +++ arch/s390/include/asm/timex.h | 2 +- arch/s390/kernel/early.c | 2 ++ 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index 5a099714df04..097183c70407 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -82,6 +82,7 @@ extern unsigned int user_mode; #define MACHINE_FLAG_LPAR (1UL << 12) #define MACHINE_FLAG_SPP (1UL << 13) #define MACHINE_FLAG_TOPOLOGY (1UL << 14) +#define MACHINE_FLAG_STCKF (1UL << 15) #define MACHINE_IS_VM (S390_lowcore.machine_flags & MACHINE_FLAG_VM) #define MACHINE_IS_KVM (S390_lowcore.machine_flags & MACHINE_FLAG_KVM) @@ -100,6 +101,7 @@ extern unsigned int user_mode; #define MACHINE_HAS_PFMF (0) #define MACHINE_HAS_SPP (0) #define MACHINE_HAS_TOPOLOGY (0) +#define MACHINE_HAS_STCKF (0) #else /* __s390x__ */ #define MACHINE_HAS_IEEE (1) #define MACHINE_HAS_CSP (1) @@ -111,6 +113,7 @@ extern unsigned int user_mode; #define MACHINE_HAS_PFMF (S390_lowcore.machine_flags & MACHINE_FLAG_PFMF) #define MACHINE_HAS_SPP (S390_lowcore.machine_flags & MACHINE_FLAG_SPP) #define MACHINE_HAS_TOPOLOGY (S390_lowcore.machine_flags & MACHINE_FLAG_TOPOLOGY) +#define MACHINE_HAS_STCKF (S390_lowcore.machine_flags & MACHINE_FLAG_STCKF) #endif /* __s390x__ */ #define ZFCPDUMP_HSA_SIZE (32UL<<20) diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h index d610bef9c5e9..c447a27a7fdb 100644 --- a/arch/s390/include/asm/timex.h +++ b/arch/s390/include/asm/timex.h @@ -90,7 +90,7 @@ static inline unsigned long long get_clock_fast(void) { unsigned long long clk; - if (test_facility(25)) + if (MACHINE_HAS_STCKF) asm volatile(".insn s,0xb27c0000,%0" : "=Q" (clk) : : "cc"); else clk = get_clock(); diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index 37394b3413e2..c9ffe0025197 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -390,6 +390,8 @@ static __init void detect_machine_facilities(void) S390_lowcore.machine_flags |= MACHINE_FLAG_MVCOS; if (test_facility(40)) S390_lowcore.machine_flags |= MACHINE_FLAG_SPP; + if (test_facility(25)) + S390_lowcore.machine_flags |= MACHINE_FLAG_STCKF; #endif } From 6ed54387dc470fc439cb154724a1ac81d251c126 Mon Sep 17 00:00:00 2001 From: Jan Glauber Date: Mon, 14 Nov 2011 11:19:07 +0100 Subject: [PATCH 09/10] [S390] crypto: avoid MSA3 and MSA4 instructions in ESA mode MSA3 and MSA4 instructions are only available under CONFIG_64BIT. Bail out before using any of these instructions if the kernel is running in 31 bit mode. Signed-off-by: Jan Glauber Signed-off-by: Martin Schwidefsky --- arch/s390/crypto/crypt_s390.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/s390/crypto/crypt_s390.h b/arch/s390/crypto/crypt_s390.h index 49676771bd66..ffd1ac255f19 100644 --- a/arch/s390/crypto/crypt_s390.h +++ b/arch/s390/crypto/crypt_s390.h @@ -368,9 +368,12 @@ static inline int crypt_s390_func_available(int func, if (facility_mask & CRYPT_S390_MSA && !test_facility(17)) return 0; - if (facility_mask & CRYPT_S390_MSA3 && !test_facility(76)) + + if (facility_mask & CRYPT_S390_MSA3 && + (!test_facility(2) || !test_facility(76))) return 0; - if (facility_mask & CRYPT_S390_MSA4 && !test_facility(77)) + if (facility_mask & CRYPT_S390_MSA4 && + (!test_facility(2) || !test_facility(77))) return 0; switch (func & CRYPT_S390_OP_MASK) { From f6bf1a8acd2cb3a92a7b7c9ab03e56a32ac5ece5 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 14 Nov 2011 11:19:08 +0100 Subject: [PATCH 10/10] [S390] topology: fix topology on z10 machines Make sure that all cpus in a book on a z10 appear as book siblings and not as core siblings. This fixes some performance regressions that appeared after the book scheduling domain got introduced. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/topology.c | 45 +++++++++++++++++++++++++++++++------ 1 file changed, 38 insertions(+), 7 deletions(-) diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index 77b8942b9a15..fdb5b8cb260f 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -68,8 +68,10 @@ static cpumask_t cpu_group_map(struct mask_info *info, unsigned int cpu) return mask; } -static void add_cpus_to_mask(struct topology_cpu *tl_cpu, - struct mask_info *book, struct mask_info *core) +static struct mask_info *add_cpus_to_mask(struct topology_cpu *tl_cpu, + struct mask_info *book, + struct mask_info *core, + int z10) { unsigned int cpu; @@ -88,10 +90,16 @@ static void add_cpus_to_mask(struct topology_cpu *tl_cpu, cpu_book_id[lcpu] = book->id; #endif cpumask_set_cpu(lcpu, &core->mask); - cpu_core_id[lcpu] = core->id; + if (z10) { + cpu_core_id[lcpu] = rcpu; + core = core->next; + } else { + cpu_core_id[lcpu] = core->id; + } smp_cpu_polarization[lcpu] = tl_cpu->pp; } } + return core; } static void clear_masks(void) @@ -123,18 +131,41 @@ static void tl_to_cores(struct sysinfo_15_1_x *info) { #ifdef CONFIG_SCHED_BOOK struct mask_info *book = &book_info; + struct cpuid cpu_id; #else struct mask_info *book = NULL; #endif struct mask_info *core = &core_info; union topology_entry *tle, *end; + int z10 = 0; - +#ifdef CONFIG_SCHED_BOOK + get_cpu_id(&cpu_id); + z10 = cpu_id.machine == 0x2097 || cpu_id.machine == 0x2098; +#endif spin_lock_irq(&topology_lock); clear_masks(); tle = info->tle; end = (union topology_entry *)((unsigned long)info + info->length); while (tle < end) { +#ifdef CONFIG_SCHED_BOOK + if (z10) { + switch (tle->nl) { + case 1: + book = book->next; + book->id = tle->container.id; + break; + case 0: + core = add_cpus_to_mask(&tle->cpu, book, core, z10); + break; + default: + clear_masks(); + goto out; + } + tle = next_tle(tle); + continue; + } +#endif switch (tle->nl) { #ifdef CONFIG_SCHED_BOOK case 2: @@ -147,7 +178,7 @@ static void tl_to_cores(struct sysinfo_15_1_x *info) core->id = tle->container.id; break; case 0: - add_cpus_to_mask(&tle->cpu, book, core); + add_cpus_to_mask(&tle->cpu, book, core, z10); break; default: clear_masks(); @@ -328,8 +359,8 @@ void __init s390_init_cpu_topology(void) for (i = 0; i < TOPOLOGY_NR_MAG; i++) printk(" %d", info->mag[i]); printk(" / %d\n", info->mnest); - alloc_masks(info, &core_info, 2); + alloc_masks(info, &core_info, 1); #ifdef CONFIG_SCHED_BOOK - alloc_masks(info, &book_info, 3); + alloc_masks(info, &book_info, 2); #endif }