summaryrefslogtreecommitdiff
path: root/arch/s390/kernel/smp.c
diff options
context:
space:
mode:
authorMartin Schwidefsky <schwidefsky@de.ibm.com>2015-01-14 16:52:10 (GMT)
committerMartin Schwidefsky <schwidefsky@de.ibm.com>2015-01-22 11:16:01 (GMT)
commit10ad34bc76dfbc49bda327a13012f6754c0c72e0 (patch)
tree4d76fb6882da2eb3e77b53c4fe64f6367ffbd79d /arch/s390/kernel/smp.c
parent1f6b83e5e4d3aed46eac1d219322fba9c7341cd8 (diff)
downloadlinux-10ad34bc76dfbc49bda327a13012f6754c0c72e0.tar.xz
s390: add SMT support
The multi-threading facility is introduced with the z13 processor family. This patch adds code to detect the multi-threading facility. With the facility enabled each core will surface multiple hardware threads to the system. Each hardware threads looks like a normal CPU to the operating system with all its registers and properties. The SCLP interface reports the SMT topology indirectly via the maximum thread id. Each reported CPU in the result of a read-scp-information is a core representing a number of hardware threads. To reflect the reduced CPU capacity if two hardware threads run on a single core the MT utilization counter set is used to normalize the raw cputime obtained by the CPU timer deltas. This scaled cputime is reported via the taskstats interface. The normal /proc/stat numbers are based on the raw cputime and are not affected by the normalization. Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Diffstat (limited to 'arch/s390/kernel/smp.c')
-rw-r--r--arch/s390/kernel/smp.c261
1 files changed, 205 insertions, 56 deletions
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 0b499f5..370ff3a 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -71,9 +71,30 @@ struct pcpu {
};
static u8 boot_cpu_type;
-static u16 boot_cpu_address;
static struct pcpu pcpu_devices[NR_CPUS];
+unsigned int smp_cpu_mt_shift;
+EXPORT_SYMBOL(smp_cpu_mt_shift);
+
+unsigned int smp_cpu_mtid;
+EXPORT_SYMBOL(smp_cpu_mtid);
+
+static unsigned int smp_max_threads __initdata = -1U;
+
+static int __init early_nosmt(char *s)
+{
+ smp_max_threads = 1;
+ return 0;
+}
+early_param("nosmt", early_nosmt);
+
+static int __init early_smt(char *s)
+{
+ get_option(&s, &smp_max_threads);
+ return 0;
+}
+early_param("smt", early_smt);
+
/*
* The smp_cpu_state_mutex must be held when changing the state or polarization
* member of a pcpu data structure within the pcpu_devices arreay.
@@ -132,7 +153,7 @@ static inline int pcpu_running(struct pcpu *pcpu)
/*
* Find struct pcpu by cpu address.
*/
-static struct pcpu *pcpu_find_address(const struct cpumask *mask, int address)
+static struct pcpu *pcpu_find_address(const struct cpumask *mask, u16 address)
{
int cpu;
@@ -299,6 +320,32 @@ static void pcpu_delegate(struct pcpu *pcpu, void (*func)(void *),
}
/*
+ * Enable additional logical cpus for multi-threading.
+ */
+static int pcpu_set_smt(unsigned int mtid)
+{
+ register unsigned long reg1 asm ("1") = (unsigned long) mtid;
+ int cc;
+
+ if (smp_cpu_mtid == mtid)
+ return 0;
+ asm volatile(
+ " sigp %1,0,%2 # sigp set multi-threading\n"
+ " ipm %0\n"
+ " srl %0,28\n"
+ : "=d" (cc) : "d" (reg1), "K" (SIGP_SET_MULTI_THREADING)
+ : "cc");
+ if (cc == 0) {
+ smp_cpu_mtid = mtid;
+ smp_cpu_mt_shift = 0;
+ while (smp_cpu_mtid >= (1U << smp_cpu_mt_shift))
+ smp_cpu_mt_shift++;
+ pcpu_devices[0].address = stap();
+ }
+ return cc;
+}
+
+/*
* Call function on an online CPU.
*/
void smp_call_online_cpu(void (*func)(void *), void *data)
@@ -512,22 +559,17 @@ EXPORT_SYMBOL(smp_ctl_clear_bit);
#ifdef CONFIG_CRASH_DUMP
-static void __init smp_get_save_area(int cpu, u16 address)
+static inline void __smp_store_cpu_state(int cpu, u16 address, int is_boot_cpu)
{
void *lc = pcpu_devices[0].lowcore;
struct save_area_ext *sa_ext;
unsigned long vx_sa;
- if (is_kdump_kernel())
- return;
- if (!OLDMEM_BASE && (address == boot_cpu_address ||
- ipl_info.type != IPL_TYPE_FCP_DUMP))
- return;
sa_ext = dump_save_area_create(cpu);
if (!sa_ext)
panic("could not allocate memory for save area\n");
- if (address == boot_cpu_address) {
- /* Copy the registers of the boot cpu. */
+ if (is_boot_cpu) {
+ /* Copy the registers of the boot CPU. */
copy_oldmem_page(1, (void *) &sa_ext->sa, sizeof(sa_ext->sa),
SAVE_AREA_BASE - PAGE_SIZE, 0);
if (MACHINE_HAS_VX)
@@ -548,6 +590,64 @@ static void __init smp_get_save_area(int cpu, u16 address)
free_page(vx_sa);
}
+/*
+ * Collect CPU state of the previous, crashed system.
+ * There are four cases:
+ * 1) standard zfcp dump
+ * condition: OLDMEM_BASE == NULL && ipl_info.type == IPL_TYPE_FCP_DUMP
+ * The state for all CPUs except the boot CPU needs to be collected
+ * with sigp stop-and-store-status. The boot CPU state is located in
+ * the absolute lowcore of the memory stored in the HSA. The zcore code
+ * will allocate the save area and copy the boot CPU state from the HSA.
+ * 2) stand-alone kdump for SCSI (zfcp dump with swapped memory)
+ * condition: OLDMEM_BASE != NULL && ipl_info.type == IPL_TYPE_FCP_DUMP
+ * The state for all CPUs except the boot CPU needs to be collected
+ * with sigp stop-and-store-status. The firmware or the boot-loader
+ * stored the registers of the boot CPU in the absolute lowcore in the
+ * memory of the old system.
+ * 3) kdump and the old kernel did not store the CPU state,
+ * or stand-alone kdump for DASD
+ * condition: OLDMEM_BASE != NULL && !is_kdump_kernel()
+ * The state for all CPUs except the boot CPU needs to be collected
+ * with sigp stop-and-store-status. The kexec code or the boot-loader
+ * stored the registers of the boot CPU in the memory of the old system.
+ * 4) kdump and the old kernel stored the CPU state
+ * condition: OLDMEM_BASE != NULL && is_kdump_kernel()
+ * The state of all CPUs is stored in ELF sections in the memory of the
+ * old system. The ELF sections are picked up by the crash_dump code
+ * via elfcorehdr_addr.
+ */
+static void __init smp_store_cpu_states(struct sclp_cpu_info *info)
+{
+ unsigned int cpu, address, i, j;
+ int is_boot_cpu;
+
+ if (is_kdump_kernel())
+ /* Previous system stored the CPU states. Nothing to do. */
+ return;
+ if (!(OLDMEM_BASE || ipl_info.type == IPL_TYPE_FCP_DUMP))
+ /* No previous system present, normal boot. */
+ return;
+ /* Set multi-threading state to the previous system. */
+ pcpu_set_smt(sclp_get_mtid_prev());
+ /* Collect CPU states. */
+ cpu = 0;
+ for (i = 0; i < info->configured; i++) {
+ /* Skip CPUs with different CPU type. */
+ if (info->has_cpu_type && info->cpu[i].type != boot_cpu_type)
+ continue;
+ for (j = 0; j <= smp_cpu_mtid; j++, cpu++) {
+ address = (info->cpu[i].core_id << smp_cpu_mt_shift) + j;
+ is_boot_cpu = (address == pcpu_devices[0].address);
+ if (is_boot_cpu && !OLDMEM_BASE)
+ /* Skip boot CPU for standard zfcp dump. */
+ continue;
+ /* Get state for this CPu. */
+ __smp_store_cpu_state(cpu, address, is_boot_cpu);
+ }
+ }
+}
+
int smp_store_status(int cpu)
{
unsigned long vx_sa;
@@ -565,10 +665,6 @@ int smp_store_status(int cpu)
return 0;
}
-#else /* CONFIG_CRASH_DUMP */
-
-static inline void smp_get_save_area(int cpu, u16 address) { }
-
#endif /* CONFIG_CRASH_DUMP */
void smp_cpu_set_polarization(int cpu, int val)
@@ -590,11 +686,13 @@ static struct sclp_cpu_info *smp_get_cpu_info(void)
info = kzalloc(sizeof(*info), GFP_KERNEL);
if (info && (use_sigp_detection || sclp_get_cpu_info(info))) {
use_sigp_detection = 1;
- for (address = 0; address <= MAX_CPU_ADDRESS; address++) {
+ for (address = 0; address <= MAX_CPU_ADDRESS;
+ address += (1U << smp_cpu_mt_shift)) {
if (__pcpu_sigp_relax(address, SIGP_SENSE, 0, NULL) ==
SIGP_CC_NOT_OPERATIONAL)
continue;
- info->cpu[info->configured].address = address;
+ info->cpu[info->configured].core_id =
+ address >> smp_cpu_mt_shift;
info->configured++;
}
info->combined = info->configured;
@@ -608,7 +706,8 @@ static int __smp_rescan_cpus(struct sclp_cpu_info *info, int sysfs_add)
{
struct pcpu *pcpu;
cpumask_t avail;
- int cpu, nr, i;
+ int cpu, nr, i, j;
+ u16 address;
nr = 0;
cpumask_xor(&avail, cpu_possible_mask, cpu_present_mask);
@@ -616,51 +715,76 @@ static int __smp_rescan_cpus(struct sclp_cpu_info *info, int sysfs_add)
for (i = 0; (i < info->combined) && (cpu < nr_cpu_ids); i++) {
if (info->has_cpu_type && info->cpu[i].type != boot_cpu_type)
continue;
- if (pcpu_find_address(cpu_present_mask, info->cpu[i].address))
- continue;
- pcpu = pcpu_devices + cpu;
- pcpu->address = info->cpu[i].address;
- pcpu->state = (i >= info->configured) ?
- CPU_STATE_STANDBY : CPU_STATE_CONFIGURED;
- smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN);
- set_cpu_present(cpu, true);
- if (sysfs_add && smp_add_present_cpu(cpu) != 0)
- set_cpu_present(cpu, false);
- else
- nr++;
- cpu = cpumask_next(cpu, &avail);
+ address = info->cpu[i].core_id << smp_cpu_mt_shift;
+ for (j = 0; j <= smp_cpu_mtid; j++) {
+ if (pcpu_find_address(cpu_present_mask, address + j))
+ continue;
+ pcpu = pcpu_devices + cpu;
+ pcpu->address = address + j;
+ pcpu->state =
+ (cpu >= info->configured*(smp_cpu_mtid + 1)) ?
+ CPU_STATE_STANDBY : CPU_STATE_CONFIGURED;
+ smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN);
+ set_cpu_present(cpu, true);
+ if (sysfs_add && smp_add_present_cpu(cpu) != 0)
+ set_cpu_present(cpu, false);
+ else
+ nr++;
+ cpu = cpumask_next(cpu, &avail);
+ if (cpu >= nr_cpu_ids)
+ break;
+ }
}
return nr;
}
static void __init smp_detect_cpus(void)
{
- unsigned int cpu, c_cpus, s_cpus;
+ unsigned int cpu, mtid, c_cpus, s_cpus;
struct sclp_cpu_info *info;
+ u16 address;
+ /* Get CPU information */
info = smp_get_cpu_info();
if (!info)
panic("smp_detect_cpus failed to allocate memory\n");
+
+ /* Find boot CPU type */
if (info->has_cpu_type) {
- for (cpu = 0; cpu < info->combined; cpu++) {
- if (info->cpu[cpu].address != boot_cpu_address)
- continue;
- /* The boot cpu dictates the cpu type. */
- boot_cpu_type = info->cpu[cpu].type;
- break;
- }
+ address = stap();
+ for (cpu = 0; cpu < info->combined; cpu++)
+ if (info->cpu[cpu].core_id == address) {
+ /* The boot cpu dictates the cpu type. */
+ boot_cpu_type = info->cpu[cpu].type;
+ break;
+ }
+ if (cpu >= info->combined)
+ panic("Could not find boot CPU type");
}
+
+#if defined(CONFIG_ZFCPDUMP) || defined(CONFIG_CRASH_DUMP)
+ /* Collect CPU state of previous system */
+ smp_store_cpu_states(info);
+#endif
+
+ /* Set multi-threading state for the current system */
+ mtid = sclp_get_mtid(boot_cpu_type);
+ mtid = (mtid < smp_max_threads) ? mtid : smp_max_threads - 1;
+ pcpu_set_smt(mtid);
+
+ /* Print number of CPUs */
c_cpus = s_cpus = 0;
for (cpu = 0; cpu < info->combined; cpu++) {
if (info->has_cpu_type && info->cpu[cpu].type != boot_cpu_type)
continue;
- if (cpu < info->configured) {
- smp_get_save_area(c_cpus, info->cpu[cpu].address);
- c_cpus++;
- } else
- s_cpus++;
+ if (cpu < info->configured)
+ c_cpus += smp_cpu_mtid + 1;
+ else
+ s_cpus += smp_cpu_mtid + 1;
}
pr_info("%d configured CPUs, %d standby CPUs\n", c_cpus, s_cpus);
+
+ /* Add CPUs present at boot */
get_online_cpus();
__smp_rescan_cpus(info, 0);
put_online_cpus();
@@ -696,12 +820,23 @@ static void smp_start_secondary(void *cpuvoid)
int __cpu_up(unsigned int cpu, struct task_struct *tidle)
{
struct pcpu *pcpu;
- int rc;
+ int base, i, rc;
pcpu = pcpu_devices + cpu;
if (pcpu->state != CPU_STATE_CONFIGURED)
return -EIO;
- if (pcpu_sigp_retry(pcpu, SIGP_INITIAL_CPU_RESET, 0) !=
+ base = cpu - (cpu % (smp_cpu_mtid + 1));
+ for (i = 0; i <= smp_cpu_mtid; i++) {
+ if (base + i < nr_cpu_ids)
+ if (cpu_online(base + i))
+ break;
+ }
+ /*
+ * If this is the first CPU of the core to get online
+ * do an initial CPU reset.
+ */
+ if (i > smp_cpu_mtid &&
+ pcpu_sigp_retry(pcpu_devices + base, SIGP_INITIAL_CPU_RESET, 0) !=
SIGP_CC_ORDER_CODE_ACCEPTED)
return -EIO;
@@ -774,7 +909,8 @@ void __init smp_fill_possible_mask(void)
{
unsigned int possible, sclp, cpu;
- sclp = sclp_get_max_cpu() ?: nr_cpu_ids;
+ sclp = min(smp_max_threads, sclp_get_mtid_max() + 1);
+ sclp = sclp_get_max_cpu()*sclp ?: nr_cpu_ids;
possible = setup_possible_cpus ?: nr_cpu_ids;
possible = min(possible, sclp);
for (cpu = 0; cpu < possible && cpu < nr_cpu_ids; cpu++)
@@ -796,9 +932,8 @@ void __init smp_prepare_boot_cpu(void)
{
struct pcpu *pcpu = pcpu_devices;
- boot_cpu_address = stap();
pcpu->state = CPU_STATE_CONFIGURED;
- pcpu->address = boot_cpu_address;
+ pcpu->address = stap();
pcpu->lowcore = (struct _lowcore *)(unsigned long) store_prefix();
pcpu->async_stack = S390_lowcore.async_stack - ASYNC_SIZE
+ STACK_FRAME_OVERHEAD + sizeof(struct pt_regs);
@@ -848,7 +983,7 @@ static ssize_t cpu_configure_store(struct device *dev,
const char *buf, size_t count)
{
struct pcpu *pcpu;
- int cpu, val, rc;
+ int cpu, val, rc, i;
char delim;
if (sscanf(buf, "%d %c", &val, &delim) != 1)
@@ -860,29 +995,43 @@ static ssize_t cpu_configure_store(struct device *dev,
rc = -EBUSY;
/* disallow configuration changes of online cpus and cpu 0 */
cpu = dev->id;
- if (cpu_online(cpu) || cpu == 0)
+ cpu -= cpu % (smp_cpu_mtid + 1);
+ if (cpu == 0)
goto out;
+ for (i = 0; i <= smp_cpu_mtid; i++)
+ if (cpu_online(cpu + i))
+ goto out;
pcpu = pcpu_devices + cpu;
rc = 0;
switch (val) {
case 0:
if (pcpu->state != CPU_STATE_CONFIGURED)
break;
- rc = sclp_cpu_deconfigure(pcpu->address);
+ rc = sclp_cpu_deconfigure(pcpu->address >> smp_cpu_mt_shift);
if (rc)
break;
- pcpu->state = CPU_STATE_STANDBY;
- smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN);
+ for (i = 0; i <= smp_cpu_mtid; i++) {
+ if (cpu + i >= nr_cpu_ids || !cpu_present(cpu + i))
+ continue;
+ pcpu[i].state = CPU_STATE_STANDBY;
+ smp_cpu_set_polarization(cpu + i,
+ POLARIZATION_UNKNOWN);
+ }
topology_expect_change();
break;
case 1:
if (pcpu->state != CPU_STATE_STANDBY)
break;
- rc = sclp_cpu_configure(pcpu->address);
+ rc = sclp_cpu_configure(pcpu->address >> smp_cpu_mt_shift);
if (rc)
break;
- pcpu->state = CPU_STATE_CONFIGURED;
- smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN);
+ for (i = 0; i <= smp_cpu_mtid; i++) {
+ if (cpu + i >= nr_cpu_ids || !cpu_present(cpu + i))
+ continue;
+ pcpu[i].state = CPU_STATE_CONFIGURED;
+ smp_cpu_set_polarization(cpu + i,
+ POLARIZATION_UNKNOWN);
+ }
topology_expect_change();
break;
default: