From 32346f47dd46bed291464e194a6c47da6fdd1bc3 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Mon, 22 Oct 2007 12:52:38 +0200 Subject: [S390] Update default configuration. Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/defconfig b/arch/s390/defconfig index 2aae23d..ece7b99 100644 --- a/arch/s390/defconfig +++ b/arch/s390/defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.22 -# Tue Jul 17 12:50:23 2007 +# Linux kernel version: 2.6.23 +# Mon Oct 22 12:10:44 2007 # CONFIG_MMU=y CONFIG_ZONE_DMA=y @@ -19,15 +19,11 @@ CONFIG_S390=y CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" # -# Code maturity level options +# General setup # CONFIG_EXPERIMENTAL=y CONFIG_LOCK_KERNEL=y CONFIG_INIT_ENV_ARG_LIMIT=32 - -# -# General setup -# CONFIG_LOCALVERSION="" CONFIG_LOCALVERSION_AUTO=y CONFIG_SWAP=y @@ -42,7 +38,14 @@ CONFIG_AUDIT=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=17 +CONFIG_CGROUPS=y +# CONFIG_CGROUP_DEBUG is not set +CONFIG_CGROUP_NS=y +CONFIG_CGROUP_CPUACCT=y # CONFIG_CPUSETS is not set +CONFIG_FAIR_GROUP_SCHED=y +CONFIG_FAIR_USER_SCHED=y +# CONFIG_FAIR_CGROUP_SCHED is not set CONFIG_SYSFS_DEPRECATED=y # CONFIG_RELAY is not set CONFIG_BLK_DEV_INITRD=y @@ -63,7 +66,6 @@ CONFIG_FUTEX=y CONFIG_ANON_INODES=y CONFIG_EPOLL=y CONFIG_SIGNALFD=y -CONFIG_TIMERFD=y CONFIG_EVENTFD=y CONFIG_SHMEM=y CONFIG_VM_EVENT_COUNTERS=y @@ -83,6 +85,7 @@ CONFIG_STOP_MACHINE=y CONFIG_BLOCK=y # CONFIG_BLK_DEV_IO_TRACE is not set CONFIG_BLK_DEV_BSG=y +CONFIG_BLOCK_COMPAT=y # # IO Schedulers @@ -108,7 +111,6 @@ CONFIG_64BIT=y CONFIG_SMP=y CONFIG_NR_CPUS=32 CONFIG_HOTPLUG_CPU=y -CONFIG_DEFAULT_MIGRATION_COST=1000000 CONFIG_COMPAT=y CONFIG_SYSVIPC_COMPAT=y CONFIG_AUDIT_ARCH=y @@ -143,9 +145,11 @@ CONFIG_FLATMEM_MANUAL=y CONFIG_FLATMEM=y CONFIG_FLAT_NODE_MEM_MAP=y # CONFIG_SPARSEMEM_STATIC is not set +# CONFIG_SPARSEMEM_VMEMMAP_ENABLE is not set CONFIG_SPLIT_PTLOCK_CPUS=4 CONFIG_RESOURCES_64BIT=y CONFIG_ZONE_DMA_FLAG=1 +CONFIG_BOUNCE=y CONFIG_VIRT_TO_BUS=y CONFIG_HOLES_IN_ZONE=y @@ -219,12 +223,14 @@ CONFIG_INET_TUNNEL=y CONFIG_INET_XFRM_MODE_TRANSPORT=y CONFIG_INET_XFRM_MODE_TUNNEL=y CONFIG_INET_XFRM_MODE_BEET=y +CONFIG_INET_LRO=y CONFIG_INET_DIAG=y CONFIG_INET_TCP_DIAG=y # CONFIG_TCP_CONG_ADVANCED is not set CONFIG_TCP_CONG_CUBIC=y CONFIG_DEFAULT_TCP_CONG="cubic" # CONFIG_TCP_MD5SIG is not set +# CONFIG_IP_VS is not set CONFIG_IPV6=y # CONFIG_IPV6_PRIVACY is not set # CONFIG_IPV6_ROUTER_PREF is not set @@ -243,7 +249,48 @@ CONFIG_IPV6_SIT=y # CONFIG_IPV6_TUNNEL is not set # CONFIG_IPV6_MULTIPLE_TABLES is not set # CONFIG_NETWORK_SECMARK is not set -# CONFIG_NETFILTER is not set +CONFIG_NETFILTER=y +# CONFIG_NETFILTER_DEBUG is not set + +# +# Core Netfilter Configuration +# +CONFIG_NETFILTER_NETLINK=m +CONFIG_NETFILTER_NETLINK_QUEUE=m +CONFIG_NETFILTER_NETLINK_LOG=m +CONFIG_NF_CONNTRACK_ENABLED=m +CONFIG_NF_CONNTRACK=m +# CONFIG_NF_CT_ACCT is not set +# CONFIG_NF_CONNTRACK_MARK is not set +# CONFIG_NF_CONNTRACK_EVENTS is not set +# CONFIG_NF_CT_PROTO_SCTP is not set +# CONFIG_NF_CT_PROTO_UDPLITE is not set +# CONFIG_NF_CONNTRACK_AMANDA is not set +# CONFIG_NF_CONNTRACK_FTP is not set +# CONFIG_NF_CONNTRACK_H323 is not set +# CONFIG_NF_CONNTRACK_IRC is not set +# CONFIG_NF_CONNTRACK_NETBIOS_NS is not set +# CONFIG_NF_CONNTRACK_PPTP is not set +# CONFIG_NF_CONNTRACK_SANE is not set +# CONFIG_NF_CONNTRACK_SIP is not set +# CONFIG_NF_CONNTRACK_TFTP is not set +# CONFIG_NF_CT_NETLINK is not set +# CONFIG_NETFILTER_XTABLES is not set + +# +# IP: Netfilter Configuration +# +# CONFIG_NF_CONNTRACK_IPV4 is not set +# CONFIG_IP_NF_QUEUE is not set +# CONFIG_IP_NF_IPTABLES is not set +# CONFIG_IP_NF_ARPTABLES is not set + +# +# IPv6: Netfilter Configuration (EXPERIMENTAL) +# +# CONFIG_NF_CONNTRACK_IPV6 is not set +# CONFIG_IP6_NF_QUEUE is not set +# CONFIG_IP6_NF_IPTABLES is not set # CONFIG_IP_DCCP is not set CONFIG_IP_SCTP=m # CONFIG_SCTP_DBG_MSG is not set @@ -263,12 +310,7 @@ CONFIG_SCTP_HMAC_MD5=y # CONFIG_LAPB is not set # CONFIG_ECONET is not set # CONFIG_WAN_ROUTER is not set - -# -# QoS and/or fair queueing -# CONFIG_NET_SCHED=y -CONFIG_NET_SCH_FIFO=y # # Queueing/Scheduling @@ -306,10 +348,12 @@ CONFIG_NET_CLS_ACT=y CONFIG_NET_ACT_POLICE=y # CONFIG_NET_ACT_GACT is not set # CONFIG_NET_ACT_MIRRED is not set +CONFIG_NET_ACT_NAT=m # CONFIG_NET_ACT_PEDIT is not set # CONFIG_NET_ACT_SIMP is not set CONFIG_NET_CLS_POLICE=y # CONFIG_NET_CLS_IND is not set +CONFIG_NET_SCH_FIFO=y # # Network testing @@ -329,6 +373,7 @@ CONFIG_CCW=y # # Generic Driver Options # +CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_STANDALONE=y CONFIG_PREVENT_FIRMWARE_BUILD=y # CONFIG_FW_LOADER is not set @@ -400,17 +445,11 @@ CONFIG_SCSI_FC_ATTRS=y # CONFIG_SCSI_ISCSI_ATTRS is not set # CONFIG_SCSI_SAS_ATTRS is not set # CONFIG_SCSI_SAS_LIBSAS is not set - -# -# SCSI low-level drivers -# +# CONFIG_SCSI_SRP_ATTRS is not set +CONFIG_SCSI_LOWLEVEL=y # CONFIG_ISCSI_TCP is not set # CONFIG_SCSI_DEBUG is not set CONFIG_ZFCP=y - -# -# Multi-device support (RAID and LVM) -# CONFIG_MD=y CONFIG_BLK_DEV_MD=y CONFIG_MD_LINEAR=m @@ -429,7 +468,9 @@ CONFIG_DM_ZERO=y CONFIG_DM_MULTIPATH=y # CONFIG_DM_MULTIPATH_EMC is not set # CONFIG_DM_MULTIPATH_RDAC is not set +# CONFIG_DM_MULTIPATH_HP is not set # CONFIG_DM_DELAY is not set +# CONFIG_DM_UEVENT is not set CONFIG_NETDEVICES=y # CONFIG_NETDEVICES_MULTIQUEUE is not set # CONFIG_IFB is not set @@ -438,8 +479,13 @@ CONFIG_BONDING=m # CONFIG_MACVLAN is not set CONFIG_EQUALIZER=m CONFIG_TUN=m +CONFIG_VETH=m CONFIG_NET_ETHERNET=y # CONFIG_MII is not set +# CONFIG_IBM_NEW_EMAC_ZMII is not set +# CONFIG_IBM_NEW_EMAC_RGMII is not set +# CONFIG_IBM_NEW_EMAC_TAH is not set +# CONFIG_IBM_NEW_EMAC_EMAC4 is not set CONFIG_NETDEV_1000=y CONFIG_NETDEV_10000=y # CONFIG_TR is not set @@ -473,7 +519,6 @@ CONFIG_CCWGROUP=y CONFIG_UNIX98_PTYS=y CONFIG_LEGACY_PTYS=y CONFIG_LEGACY_PTY_COUNT=256 -# CONFIG_WATCHDOG is not set CONFIG_HW_RANDOM=m # CONFIG_R3964 is not set CONFIG_RAW_DRIVER=m @@ -490,7 +535,6 @@ CONFIG_TN3270_CONSOLE=y CONFIG_TN3215=y CONFIG_TN3215_CONSOLE=y CONFIG_CCW_CONSOLE=y -CONFIG_SCLP=y CONFIG_SCLP_TTY=y CONFIG_SCLP_CONSOLE=y CONFIG_SCLP_VT220_TTY=y @@ -514,6 +558,11 @@ CONFIG_S390_TAPE_34XX=m CONFIG_MONWRITER=m CONFIG_S390_VMUR=m # CONFIG_POWER_SUPPLY is not set +# CONFIG_WATCHDOG is not set + +# +# Sonics Silicon Backplane +# # # File systems @@ -569,7 +618,6 @@ CONFIG_SYSFS=y CONFIG_TMPFS=y CONFIG_TMPFS_POSIX_ACL=y # CONFIG_HUGETLB_PAGE is not set -CONFIG_RAMFS=y CONFIG_CONFIGFS_FS=m # @@ -588,10 +636,7 @@ CONFIG_CONFIGFS_FS=m # CONFIG_QNX4FS_FS is not set # CONFIG_SYSV_FS is not set # CONFIG_UFS_FS is not set - -# -# Network File Systems -# +CONFIG_NETWORK_FILESYSTEMS=y CONFIG_NFS_FS=y CONFIG_NFS_V3=y # CONFIG_NFS_V3_ACL is not set @@ -638,27 +683,13 @@ CONFIG_MSDOS_PARTITION=y # CONFIG_KARMA_PARTITION is not set # CONFIG_EFI_PARTITION is not set # CONFIG_SYSV68_PARTITION is not set - -# -# Native Language Support -# # CONFIG_NLS is not set - -# -# Distributed Lock Manager -# CONFIG_DLM=m # CONFIG_DLM_DEBUG is not set - -# -# Instrumentation Support -# - -# -# Profiling support -# +CONFIG_INSTRUMENTATION=y # CONFIG_PROFILING is not set CONFIG_KPROBES=y +# CONFIG_MARKERS is not set # # Kernel hacking @@ -682,6 +713,7 @@ CONFIG_DEBUG_SPINLOCK=y CONFIG_DEBUG_MUTEXES=y # CONFIG_DEBUG_LOCK_ALLOC is not set # CONFIG_PROVE_LOCKING is not set +# CONFIG_LOCK_STAT is not set CONFIG_DEBUG_SPINLOCK_SLEEP=y # CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set # CONFIG_DEBUG_KOBJECT is not set @@ -694,14 +726,17 @@ CONFIG_FORCED_INLINING=y # CONFIG_RCU_TORTURE_TEST is not set # CONFIG_LKDTM is not set # CONFIG_FAULT_INJECTION is not set +CONFIG_SAMPLES=y # # Security options # # CONFIG_KEYS is not set # CONFIG_SECURITY is not set +# CONFIG_SECURITY_FILE_CAPABILITIES is not set CONFIG_CRYPTO=y CONFIG_CRYPTO_ALGAPI=y +CONFIG_CRYPTO_AEAD=m CONFIG_CRYPTO_BLKCIPHER=y CONFIG_CRYPTO_HASH=m CONFIG_CRYPTO_MANAGER=y @@ -720,6 +755,7 @@ CONFIG_CRYPTO_ECB=m CONFIG_CRYPTO_CBC=y CONFIG_CRYPTO_PCBC=m # CONFIG_CRYPTO_LRW is not set +# CONFIG_CRYPTO_XTS is not set # CONFIG_CRYPTO_CRYPTD is not set # CONFIG_CRYPTO_DES is not set CONFIG_CRYPTO_FCRYPT=m @@ -733,11 +769,13 @@ CONFIG_CRYPTO_FCRYPT=m # CONFIG_CRYPTO_ARC4 is not set # CONFIG_CRYPTO_KHAZAD is not set # CONFIG_CRYPTO_ANUBIS is not set +CONFIG_CRYPTO_SEED=m # CONFIG_CRYPTO_DEFLATE is not set # CONFIG_CRYPTO_MICHAEL_MIC is not set # CONFIG_CRYPTO_CRC32C is not set CONFIG_CRYPTO_CAMELLIA=m # CONFIG_CRYPTO_TEST is not set +CONFIG_CRYPTO_AUTHENC=m CONFIG_CRYPTO_HW=y # CONFIG_CRYPTO_SHA1_S390 is not set # CONFIG_CRYPTO_SHA256_S390 is not set @@ -755,5 +793,6 @@ CONFIG_BITREVERSE=m # CONFIG_CRC16 is not set # CONFIG_CRC_ITU_T is not set CONFIG_CRC32=m +CONFIG_CRC7=m # CONFIG_LIBCRC32C is not set CONFIG_PLIST=y -- cgit v0.10.2 From fae8b22d3e3e3a3d317a7746493997af02a3f35c Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 22 Oct 2007 12:52:39 +0200 Subject: [S390] Add per-cpu idle time / idle count sysfs attributes. Add two new sysfs entries per cpu: idle_count and idle_time. idle_count contains the number of times a cpu went into idle state. idle_time contains the time a cpu spent in idle state in microseconds. This can be used e.g. by powertop to tell how often idle state is entered and left. # cat /sys/devices/system/cpu/cpu0/idle_count 504 # cat /sys/devices/system/cpu/cpu0/idle_time 469734037 us Cc: Arjan van de Ven Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index 70c5737..cc7c4ba 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -44,6 +44,7 @@ #include #include #include +#include asmlinkage void ret_from_fork(void) asm ("ret_from_fork"); @@ -91,6 +92,14 @@ EXPORT_SYMBOL(unregister_idle_notifier); void do_monitor_call(struct pt_regs *regs, long interruption_code) { + struct s390_idle_data *idle; + + idle = &__get_cpu_var(s390_idle); + spin_lock(&idle->lock); + idle->idle_time += get_clock() - idle->idle_enter; + idle->in_idle = 0; + spin_unlock(&idle->lock); + /* disable monitor call class 0 */ __ctl_clear_bit(8, 15); @@ -105,6 +114,7 @@ extern void s390_handle_mcck(void); static void default_idle(void) { int cpu, rc; + struct s390_idle_data *idle; /* CPU is going idle. */ cpu = smp_processor_id(); @@ -142,6 +152,12 @@ static void default_idle(void) return; } + idle = &__get_cpu_var(s390_idle); + spin_lock(&idle->lock); + idle->idle_count++; + idle->in_idle = 1; + idle->idle_enter = get_clock(); + spin_unlock(&idle->lock); trace_hardirqs_on(); /* Wait for external, I/O or machine check interrupt. */ __load_psw_mask(psw_kernel_bits | PSW_MASK_WAIT | diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 35edbef..ba3fff0 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -42,6 +42,7 @@ #include #include #include +#include /* * An array with a pointer the lowcore of every CPU. @@ -494,6 +495,8 @@ int __cpuinit start_secondary(void *cpuvoid) return 0; } +DEFINE_PER_CPU(struct s390_idle_data, s390_idle); + static void __init smp_create_idle(unsigned int cpu) { struct task_struct *p; @@ -506,6 +509,7 @@ static void __init smp_create_idle(unsigned int cpu) if (IS_ERR(p)) panic("failed fork for CPU %u: %li", cpu, PTR_ERR(p)); current_set[cpu] = p; + spin_lock_init(&(&per_cpu(s390_idle, cpu))->lock); } static int cpu_stopped(int cpu) @@ -724,6 +728,7 @@ void __init smp_prepare_boot_cpu(void) cpu_set(0, cpu_online_map); S390_lowcore.percpu_offset = __per_cpu_offset[0]; current_set[0] = current; + spin_lock_init(&(&__get_cpu_var(s390_idle))->lock); } void __init smp_cpus_done(unsigned int max_cpus) @@ -756,22 +761,71 @@ static ssize_t show_capability(struct sys_device *dev, char *buf) } static SYSDEV_ATTR(capability, 0444, show_capability, NULL); +static ssize_t show_idle_count(struct sys_device *dev, char *buf) +{ + struct s390_idle_data *idle; + unsigned long long idle_count; + + idle = &per_cpu(s390_idle, dev->id); + spin_lock_irq(&idle->lock); + idle_count = idle->idle_count; + spin_unlock_irq(&idle->lock); + return sprintf(buf, "%llu\n", idle_count); +} +static SYSDEV_ATTR(idle_count, 0444, show_idle_count, NULL); + +static ssize_t show_idle_time(struct sys_device *dev, char *buf) +{ + struct s390_idle_data *idle; + unsigned long long new_time; + + idle = &per_cpu(s390_idle, dev->id); + spin_lock_irq(&idle->lock); + if (idle->in_idle) { + new_time = get_clock(); + idle->idle_time += new_time - idle->idle_enter; + idle->idle_enter = new_time; + } + new_time = idle->idle_time; + spin_unlock_irq(&idle->lock); + return sprintf(buf, "%llu us\n", new_time >> 12); +} +static SYSDEV_ATTR(idle_time, 0444, show_idle_time, NULL); + +static struct attribute *cpu_attrs[] = { + &attr_capability.attr, + &attr_idle_count.attr, + &attr_idle_time.attr, + NULL, +}; + +static struct attribute_group cpu_attr_group = { + .attrs = cpu_attrs, +}; + static int __cpuinit smp_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) { unsigned int cpu = (unsigned int)(long)hcpu; struct cpu *c = &per_cpu(cpu_devices, cpu); struct sys_device *s = &c->sysdev; + struct s390_idle_data *idle; switch (action) { case CPU_ONLINE: case CPU_ONLINE_FROZEN: - if (sysdev_create_file(s, &attr_capability)) + idle = &per_cpu(s390_idle, cpu); + spin_lock_irq(&idle->lock); + idle->idle_enter = 0; + idle->idle_time = 0; + idle->idle_count = 0; + spin_unlock_irq(&idle->lock); + if (sysfs_create_group(&s->kobj, &cpu_attr_group)) return NOTIFY_BAD; break; case CPU_DEAD: case CPU_DEAD_FROZEN: - sysdev_remove_file(s, &attr_capability); + sysfs_remove_group(&s->kobj, &cpu_attr_group); break; } return NOTIFY_OK; @@ -784,6 +838,7 @@ static struct notifier_block __cpuinitdata smp_cpu_nb = { static int __init topology_init(void) { int cpu; + int rc; register_cpu_notifier(&smp_cpu_nb); @@ -796,7 +851,9 @@ static int __init topology_init(void) if (!cpu_online(cpu)) continue; s = &c->sysdev; - sysdev_create_file(s, &attr_capability); + rc = sysfs_create_group(&s->kobj, &cpu_attr_group); + if (rc) + return rc; } return 0; } diff --git a/include/asm-s390/cpu.h b/include/asm-s390/cpu.h new file mode 100644 index 0000000..352dde1 --- /dev/null +++ b/include/asm-s390/cpu.h @@ -0,0 +1,25 @@ +/* + * include/asm-s390/cpu.h + * + * Copyright IBM Corp. 2007 + * Author(s): Heiko Carstens + */ + +#ifndef _ASM_S390_CPU_H_ +#define _ASM_S390_CPU_H_ + +#include +#include +#include + +struct s390_idle_data { + spinlock_t lock; + unsigned int in_idle; + unsigned long long idle_count; + unsigned long long idle_enter; + unsigned long long idle_time; +}; + +DECLARE_PER_CPU(struct s390_idle_data, s390_idle); + +#endif /* _ASM_S390_CPU_H_ */ -- cgit v0.10.2 From 054696077a6c4f9e306321d45b5762d6ea7940e1 Mon Sep 17 00:00:00 2001 From: Cornelia Huck Date: Mon, 22 Oct 2007 12:52:40 +0200 Subject: [S390] cio: Use to_channelpath() for device to channel path conversion. We already have a macro for that, so let's use it consistently... Signed-off-by: Cornelia Huck Signed-off-by: Martin Schwidefsky diff --git a/drivers/s390/cio/chp.c b/drivers/s390/cio/chp.c index 42c1f46..297cdce 100644 --- a/drivers/s390/cio/chp.c +++ b/drivers/s390/cio/chp.c @@ -246,7 +246,7 @@ int chp_add_cmg_attr(struct channel_path *chp) static ssize_t chp_status_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct channel_path *chp = container_of(dev, struct channel_path, dev); + struct channel_path *chp = to_channelpath(dev); if (!chp) return 0; @@ -258,7 +258,7 @@ static ssize_t chp_status_write(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - struct channel_path *cp = container_of(dev, struct channel_path, dev); + struct channel_path *cp = to_channelpath(dev); char cmd[10]; int num_args; int error; @@ -286,7 +286,7 @@ static ssize_t chp_configure_show(struct device *dev, struct channel_path *cp; int status; - cp = container_of(dev, struct channel_path, dev); + cp = to_channelpath(dev); status = chp_info_get_status(cp->chpid); if (status < 0) return status; @@ -308,7 +308,7 @@ static ssize_t chp_configure_write(struct device *dev, return -EINVAL; if (val != 0 && val != 1) return -EINVAL; - cp = container_of(dev, struct channel_path, dev); + cp = to_channelpath(dev); chp_cfg_schedule(cp->chpid, val); cfg_wait_idle(); @@ -320,7 +320,7 @@ static DEVICE_ATTR(configure, 0644, chp_configure_show, chp_configure_write); static ssize_t chp_type_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct channel_path *chp = container_of(dev, struct channel_path, dev); + struct channel_path *chp = to_channelpath(dev); if (!chp) return 0; @@ -374,7 +374,7 @@ static void chp_release(struct device *dev) { struct channel_path *cp; - cp = container_of(dev, struct channel_path, dev); + cp = to_channelpath(dev); kfree(cp); } -- cgit v0.10.2 From 5bf04b2062c5b441d7154216694fea7dc2a6a7f3 Mon Sep 17 00:00:00 2001 From: Cornelia Huck Date: Mon, 22 Oct 2007 12:52:41 +0200 Subject: [S390] cio: Fix incomplete commit for uevent suppression. Commit fa1a8c23eb7d3ded8a3c6d0e653339a2bc7fca9e intended to introduce uevent suppression for subchannels, but half of it was lost somewhere. Now, we end up with two uevents for every registered subchannel :( So we should better add the missing part from http://marc.info/?l=linux-kernel&m=117515953113974&w=2. Signed-off-by: Cornelia Huck Signed-off-by: Martin Schwidefsky diff --git a/drivers/s390/cio/css.c b/drivers/s390/cio/css.c index 5d83dd4..838f7ac 100644 --- a/drivers/s390/cio/css.c +++ b/drivers/s390/cio/css.c @@ -182,6 +182,15 @@ static int css_register_subchannel(struct subchannel *sch) sch->dev.bus = &css_bus_type; sch->dev.release = &css_subchannel_release; sch->dev.groups = subch_attr_groups; + /* + * We don't want to generate uevents for I/O subchannels that don't + * have a working ccw device behind them since they will be + * unregistered before they can be used anyway, so we delay the add + * uevent until after device recognition was successful. + */ + if (!cio_is_console(sch->schid)) + /* Console is special, no need to suppress. */ + sch->dev.uevent_suppress = 1; css_update_ssd_info(sch); /* make it known to the system */ ret = css_sch_device_register(sch); -- cgit v0.10.2 From 7f021ce1957504cacc78896de857b90293badabc Mon Sep 17 00:00:00 2001 From: Cornelia Huck Date: Mon, 22 Oct 2007 12:52:42 +0200 Subject: [S390] struct class_device -> struct device conversion. Convert struct class_device users under drivers/s390/char to use struct device. Signed-off-by: Cornelia Huck Signed-off-by: Martin Schwidefsky diff --git a/drivers/s390/char/raw3270.c b/drivers/s390/char/raw3270.c index 2edd5fb..8d1c64a 100644 --- a/drivers/s390/char/raw3270.c +++ b/drivers/s390/char/raw3270.c @@ -48,8 +48,8 @@ struct raw3270 { struct timer_list timer; /* Device timer. */ unsigned char *ascebc; /* ascii -> ebcdic table */ - struct class_device *clttydev; /* 3270-class tty device ptr */ - struct class_device *cltubdev; /* 3270-class tub device ptr */ + struct device *clttydev; /* 3270-class tty device ptr */ + struct device *cltubdev; /* 3270-class tub device ptr */ struct raw3270_request init_request; unsigned char init_data[256]; @@ -1107,11 +1107,9 @@ raw3270_delete_device(struct raw3270 *rp) /* Remove from device chain. */ mutex_lock(&raw3270_mutex); if (rp->clttydev && !IS_ERR(rp->clttydev)) - class_device_destroy(class3270, - MKDEV(IBM_TTY3270_MAJOR, rp->minor)); + device_destroy(class3270, MKDEV(IBM_TTY3270_MAJOR, rp->minor)); if (rp->cltubdev && !IS_ERR(rp->cltubdev)) - class_device_destroy(class3270, - MKDEV(IBM_FS3270_MAJOR, rp->minor)); + device_destroy(class3270, MKDEV(IBM_FS3270_MAJOR, rp->minor)); list_del_init(&rp->list); mutex_unlock(&raw3270_mutex); @@ -1181,24 +1179,22 @@ static int raw3270_create_attributes(struct raw3270 *rp) if (rc) goto out; - rp->clttydev = class_device_create(class3270, NULL, - MKDEV(IBM_TTY3270_MAJOR, rp->minor), - &rp->cdev->dev, "tty%s", - rp->cdev->dev.bus_id); + rp->clttydev = device_create(class3270, &rp->cdev->dev, + MKDEV(IBM_TTY3270_MAJOR, rp->minor), + "tty%s", rp->cdev->dev.bus_id); if (IS_ERR(rp->clttydev)) { rc = PTR_ERR(rp->clttydev); goto out_ttydev; } - rp->cltubdev = class_device_create(class3270, NULL, - MKDEV(IBM_FS3270_MAJOR, rp->minor), - &rp->cdev->dev, "tub%s", - rp->cdev->dev.bus_id); + rp->cltubdev = device_create(class3270, &rp->cdev->dev, + MKDEV(IBM_FS3270_MAJOR, rp->minor), + "tub%s", rp->cdev->dev.bus_id); if (!IS_ERR(rp->cltubdev)) goto out; rc = PTR_ERR(rp->cltubdev); - class_device_destroy(class3270, MKDEV(IBM_TTY3270_MAJOR, rp->minor)); + device_destroy(class3270, MKDEV(IBM_TTY3270_MAJOR, rp->minor)); out_ttydev: sysfs_remove_group(&rp->cdev->dev.kobj, &raw3270_attr_group); diff --git a/drivers/s390/char/tape_class.c b/drivers/s390/char/tape_class.c index 2e0d297..aa7f166 100644 --- a/drivers/s390/char/tape_class.c +++ b/drivers/s390/char/tape_class.c @@ -69,12 +69,9 @@ struct tape_class_device *register_tape_dev( if (rc) goto fail_with_cdev; - tcd->class_device = class_device_create( - tape_class, - NULL, - tcd->char_device->dev, - device, - "%s", tcd->device_name + tcd->class_device = device_create(tape_class, device, + tcd->char_device->dev, + "%s", tcd->device_name ); rc = IS_ERR(tcd->class_device) ? PTR_ERR(tcd->class_device) : 0; if (rc) @@ -90,7 +87,7 @@ struct tape_class_device *register_tape_dev( return tcd; fail_with_class_device: - class_device_destroy(tape_class, tcd->char_device->dev); + device_destroy(tape_class, tcd->char_device->dev); fail_with_cdev: cdev_del(tcd->char_device); @@ -105,11 +102,9 @@ EXPORT_SYMBOL(register_tape_dev); void unregister_tape_dev(struct tape_class_device *tcd) { if (tcd != NULL && !IS_ERR(tcd)) { - sysfs_remove_link( - &tcd->class_device->dev->kobj, - tcd->mode_name - ); - class_device_destroy(tape_class, tcd->char_device->dev); + sysfs_remove_link(&tcd->class_device->kobj, + tcd->mode_name); + device_destroy(tape_class, tcd->char_device->dev); cdev_del(tcd->char_device); kfree(tcd); } diff --git a/drivers/s390/char/tape_class.h b/drivers/s390/char/tape_class.h index a8bd9b4..e2b5ac9 100644 --- a/drivers/s390/char/tape_class.h +++ b/drivers/s390/char/tape_class.h @@ -24,8 +24,8 @@ #define TAPECLASS_NAME_LEN 32 struct tape_class_device { - struct cdev * char_device; - struct class_device * class_device; + struct cdev *char_device; + struct device *class_device; char device_name[TAPECLASS_NAME_LEN]; char mode_name[TAPECLASS_NAME_LEN]; }; diff --git a/drivers/s390/char/vmlogrdr.c b/drivers/s390/char/vmlogrdr.c index 12f7a4c..e0c4c50 100644 --- a/drivers/s390/char/vmlogrdr.c +++ b/drivers/s390/char/vmlogrdr.c @@ -74,7 +74,7 @@ struct vmlogrdr_priv_t { int dev_in_use; /* 1: already opened, 0: not opened*/ spinlock_t priv_lock; struct device *device; - struct class_device *class_device; + struct device *class_device; int autorecording; int autopurge; }; @@ -762,12 +762,10 @@ static int vmlogrdr_register_device(struct vmlogrdr_priv_t *priv) device_unregister(dev); return ret; } - priv->class_device = class_device_create( - vmlogrdr_class, - NULL, - MKDEV(vmlogrdr_major, priv->minor_num), - dev, - "%s", dev->bus_id ); + priv->class_device = device_create(vmlogrdr_class, dev, + MKDEV(vmlogrdr_major, + priv->minor_num), + "%s", dev->bus_id); if (IS_ERR(priv->class_device)) { ret = PTR_ERR(priv->class_device); priv->class_device=NULL; @@ -783,8 +781,7 @@ static int vmlogrdr_register_device(struct vmlogrdr_priv_t *priv) static int vmlogrdr_unregister_device(struct vmlogrdr_priv_t *priv) { - class_device_destroy(vmlogrdr_class, - MKDEV(vmlogrdr_major, priv->minor_num)); + device_destroy(vmlogrdr_class, MKDEV(vmlogrdr_major, priv->minor_num)); if (priv->device != NULL) { sysfs_remove_group(&priv->device->kobj, &vmlogrdr_attr_group); device_unregister(priv->device); -- cgit v0.10.2 From e3d3683d1402c1737687cb698451d545f57c32a7 Mon Sep 17 00:00:00 2001 From: Michael Holzheu Date: Mon, 22 Oct 2007 12:52:43 +0200 Subject: [S390] kernel: Fix dump on panic for DASDs under LPAR. Currently the ccw method is used to ipl the DASD dump record under LPAR. This mechanism is not reliable, which can cause dump failures. This fix now uses the diag 308 ipl method for all machines, which have diag308 subcode 5 and 4 support. Signed-off-by: Michael Holzheu Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index 66b5190..ce0856d 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -648,6 +648,8 @@ static int dump_set_type(enum dump_type type) case DUMP_TYPE_CCW: if (MACHINE_IS_VM) dump_method = DUMP_METHOD_CCW_VM; + else if (diag308_set_works) + dump_method = DUMP_METHOD_CCW_DIAG; else dump_method = DUMP_METHOD_CCW_CIO; break; -- cgit v0.10.2 From ba8a9229ab9e80278c28ad68b15053f65b2b0a7c Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Mon, 22 Oct 2007 12:52:44 +0200 Subject: [S390] tlb flush fix. The current tlb flushing code for page table entries violates the s390 architecture in a small detail. The relevant section from the principles of operation (SA22-7832-02 page 3-47): "A valid table entry must not be changed while it is attached to any CPU and may be used for translation by that CPU except to (1) invalidate the entry by using INVALIDATE PAGE TABLE ENTRY or INVALIDATE DAT TABLE ENTRY, (2) alter bits 56-63 of a page-table entry, or (3) make a change by means of a COMPARE AND SWAP AND PURGE instruction that purges the TLB." That means if one thread of a multithreaded applciation uses a vma while another thread does an unmap on it, the page table entries of that vma needs to get removed with IPTE, IDTE or CSP. In some strange and rare situations a cpu could check-stop (die) because a entry has been pushed out of the TLB that is still needed to complete a (milli-coded) instruction. I've never seen it happen with the current code on any of the supported machines, so right now this is a theoretical problem. But I want to fix it nevertheless, to avoid headaches in the futures. To get this implemented correctly without changing common code the primitives ptep_get_and_clear, ptep_get_and_clear_full and ptep_set_wrprotect need to use the IPTE instruction to invalidate the pte before the new pte value gets stored. If IPTE is always used for the three primitives three important operations will have a performace hit: fork, mprotect and exit_mmap. Time for some workarounds: * 1: ptep_get_and_clear_full is used in unmap_vmas to remove page tables entries in a batched tlb gather operation. If the mmu_gather context passed to unmap_vmas has been started with full_mm_flush==1 or if only one cpu is online or if the only user of a mm_struct is the current process then the fullmm indication in the mmu_gather context is set to one. All TLBs for mm_struct are flushed by the tlb_gather_mmu call. No new TLBs can be created while the unmap is in progress. In this case ptep_get_and_clear_full clears the ptes with a simple store. * 2: ptep_get_and_clear is used in change_protection to clear the ptes from the page tables before they are reentered with the new access flags. At the end of the update flush_tlb_range clears the remaining TLBs. In general the ptep_get_and_clear has to issue IPTE for each pte and flush_tlb_range is a nop. But if there is only one user of the mm_struct then ptep_get_and_clear uses simple stores to do the update and flush_tlb_range will flush the TLBs. * 3: Similar to 2, ptep_set_wrprotect is used in copy_page_range for a fork to make all ptes of a cow mapping read-only. At the end of of copy_page_range dup_mmap will flush the TLBs with a call to flush_tlb_mm. Check for mm->mm_users and if there is only one user avoid using IPTE in ptep_set_wrprotect and let flush_tlb_mm clear the TLBs. Overall for single threaded programs the tlb flush code now performs better, for multi threaded programs it is slightly worse. In particular exit_mmap() now does a single IDTE for the mm and then just frees every page cache reference and every page table page directly without a delay over the mmu_gather structure. Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index ba3fff0..1d97fe1 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -326,7 +326,7 @@ static void smp_ext_bitcall(int cpu, ec_bit_sig sig) */ void smp_ptlb_callback(void *info) { - local_flush_tlb(); + __tlb_flush_local(); } void smp_ptlb_all(void) diff --git a/include/asm-s390/pgalloc.h b/include/asm-s390/pgalloc.h index e45d3c9..6cbbfe4 100644 --- a/include/asm-s390/pgalloc.h +++ b/include/asm-s390/pgalloc.h @@ -82,7 +82,6 @@ static inline void pgd_free(pgd_t *pgd) */ #define pmd_alloc_one(mm,address) ({ BUG(); ((pmd_t *)2); }) #define pmd_free(x) do { } while (0) -#define __pmd_free_tlb(tlb,x) do { } while (0) #define pgd_populate(mm, pmd, pte) BUG() #define pgd_populate_kernel(mm, pmd, pte) BUG() #else /* __s390x__ */ @@ -118,12 +117,6 @@ static inline void pmd_free (pmd_t *pmd) free_pages((unsigned long) pmd, PMD_ALLOC_ORDER); } -#define __pmd_free_tlb(tlb,pmd) \ - do { \ - tlb_flush_mmu(tlb, 0, 0); \ - pmd_free(pmd); \ - } while (0) - static inline void pgd_populate_kernel(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmd) { @@ -224,14 +217,4 @@ static inline void pte_free(struct page *pte) __free_page(pte); } -#define __pte_free_tlb(tlb, pte) \ -({ \ - struct mmu_gather *__tlb = (tlb); \ - struct page *__pte = (pte); \ - struct page *shadow_page = get_shadow_page(__pte); \ - if (shadow_page) \ - tlb_remove_page(__tlb, shadow_page); \ - tlb_remove_page(__tlb, __pte); \ -}) - #endif /* _S390_PGALLOC_H */ diff --git a/include/asm-s390/pgtable.h b/include/asm-s390/pgtable.h index 39bb519..b424ab2 100644 --- a/include/asm-s390/pgtable.h +++ b/include/asm-s390/pgtable.h @@ -424,7 +424,8 @@ static inline pgd_t *get_shadow_pgd(pgd_t *pgdp) * within a page table are directly modified. Thus, the following * hook is made available. */ -static inline void set_pte(pte_t *pteptr, pte_t pteval) +static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *pteptr, pte_t pteval) { pte_t *shadow_pte = get_shadow_pte(pteptr); @@ -437,7 +438,6 @@ static inline void set_pte(pte_t *pteptr, pte_t pteval) pte_val(*shadow_pte) = _PAGE_TYPE_EMPTY; } } -#define set_pte_at(mm,addr,ptep,pteval) set_pte(ptep,pteval) /* * pgd/pmd/pte query functions @@ -508,7 +508,8 @@ static inline int pte_file(pte_t pte) return (pte_val(pte) & mask) == _PAGE_TYPE_FILE; } -#define pte_same(a,b) (pte_val(a) == pte_val(b)) +#define __HAVE_ARCH_PTE_SAME +#define pte_same(a,b) (pte_val(a) == pte_val(b)) /* * query functions pte_write/pte_dirty/pte_young only work if @@ -663,24 +664,19 @@ static inline pte_t pte_mkyoung(pte_t pte) return pte; } -static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) +#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG +static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep) { return 0; } -static inline int -ptep_clear_flush_young(struct vm_area_struct *vma, - unsigned long address, pte_t *ptep) +#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH +static inline int ptep_clear_flush_young(struct vm_area_struct *vma, + unsigned long address, pte_t *ptep) { /* No need to flush TLB; bits are in storage key */ - return ptep_test_and_clear_young(vma, address, ptep); -} - -static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) -{ - pte_t pte = *ptep; - pte_clear(mm, addr, ptep); - return pte; + return 0; } static inline void __ptep_ipte(unsigned long address, pte_t *ptep) @@ -709,6 +705,32 @@ static inline void ptep_invalidate(unsigned long address, pte_t *ptep) __ptep_ipte(address, ptep); } +/* + * This is hard to understand. ptep_get_and_clear and ptep_clear_flush + * both clear the TLB for the unmapped pte. The reason is that + * ptep_get_and_clear is used in common code (e.g. change_pte_range) + * to modify an active pte. The sequence is + * 1) ptep_get_and_clear + * 2) set_pte_at + * 3) flush_tlb_range + * On s390 the tlb needs to get flushed with the modification of the pte + * if the pte is active. The only way how this can be implemented is to + * have ptep_get_and_clear do the tlb flush. In exchange flush_tlb_range + * is a nop. + */ +#define __HAVE_ARCH_PTEP_GET_AND_CLEAR +#define ptep_get_and_clear(__mm, __address, __ptep) \ +({ \ + pte_t __pte = *(__ptep); \ + if (atomic_read(&(__mm)->mm_users) > 1 || \ + (__mm) != current->active_mm) \ + ptep_invalidate(__address, __ptep); \ + else \ + pte_clear((__mm), (__address), (__ptep)); \ + __pte; \ +}) + +#define __HAVE_ARCH_PTEP_CLEAR_FLUSH static inline pte_t ptep_clear_flush(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) { @@ -717,12 +739,40 @@ static inline pte_t ptep_clear_flush(struct vm_area_struct *vma, return pte; } -static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) +/* + * The batched pte unmap code uses ptep_get_and_clear_full to clear the + * ptes. Here an optimization is possible. tlb_gather_mmu flushes all + * tlbs of an mm if it can guarantee that the ptes of the mm_struct + * cannot be accessed while the batched unmap is running. In this case + * full==1 and a simple pte_clear is enough. See tlb.h. + */ +#define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL +static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, + unsigned long addr, + pte_t *ptep, int full) { - pte_t old_pte = *ptep; - set_pte_at(mm, addr, ptep, pte_wrprotect(old_pte)); + pte_t pte = *ptep; + + if (full) + pte_clear(mm, addr, ptep); + else + ptep_invalidate(addr, ptep); + return pte; } +#define __HAVE_ARCH_PTEP_SET_WRPROTECT +#define ptep_set_wrprotect(__mm, __addr, __ptep) \ +({ \ + pte_t __pte = *(__ptep); \ + if (pte_write(__pte)) { \ + if (atomic_read(&(__mm)->mm_users) > 1 || \ + (__mm) != current->active_mm) \ + ptep_invalidate(__addr, __ptep); \ + set_pte_at(__mm, __addr, __ptep, pte_wrprotect(__pte)); \ + } \ +}) + +#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS #define ptep_set_access_flags(__vma, __addr, __ptep, __entry, __dirty) \ ({ \ int __changed = !pte_same(*(__ptep), __entry); \ @@ -740,11 +790,13 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, * should therefore only be called if it is not mapped in any * address space. */ +#define __HAVE_ARCH_PAGE_TEST_DIRTY static inline int page_test_dirty(struct page *page) { return (page_get_storage_key(page_to_phys(page)) & _PAGE_CHANGED) != 0; } +#define __HAVE_ARCH_PAGE_CLEAR_DIRTY static inline void page_clear_dirty(struct page *page) { page_set_storage_key(page_to_phys(page), PAGE_DEFAULT_KEY); @@ -753,6 +805,7 @@ static inline void page_clear_dirty(struct page *page) /* * Test and clear referenced bit in storage key. */ +#define __HAVE_ARCH_PAGE_TEST_AND_CLEAR_YOUNG static inline int page_test_and_clear_young(struct page *page) { unsigned long physpage = page_to_phys(page); @@ -930,16 +983,6 @@ extern int remove_shared_memory(unsigned long start, unsigned long size); #define __HAVE_ARCH_MEMMAP_INIT extern void memmap_init(unsigned long, int, unsigned long, unsigned long); -#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS -#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG -#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH -#define __HAVE_ARCH_PTEP_GET_AND_CLEAR -#define __HAVE_ARCH_PTEP_CLEAR_FLUSH -#define __HAVE_ARCH_PTEP_SET_WRPROTECT -#define __HAVE_ARCH_PTE_SAME -#define __HAVE_ARCH_PAGE_TEST_DIRTY -#define __HAVE_ARCH_PAGE_CLEAR_DIRTY -#define __HAVE_ARCH_PAGE_TEST_AND_CLEAR_YOUNG #include #endif /* _S390_PAGE_H */ diff --git a/include/asm-s390/tlb.h b/include/asm-s390/tlb.h index 51bd957..55ae45e 100644 --- a/include/asm-s390/tlb.h +++ b/include/asm-s390/tlb.h @@ -2,19 +2,128 @@ #define _S390_TLB_H /* - * s390 doesn't need any special per-pte or - * per-vma handling.. + * TLB flushing on s390 is complicated. The following requirement + * from the principles of operation is the most arduous: + * + * "A valid table entry must not be changed while it is attached + * to any CPU and may be used for translation by that CPU except to + * (1) invalidate the entry by using INVALIDATE PAGE TABLE ENTRY, + * or INVALIDATE DAT TABLE ENTRY, (2) alter bits 56-63 of a page + * table entry, or (3) make a change by means of a COMPARE AND SWAP + * AND PURGE instruction that purges the TLB." + * + * The modification of a pte of an active mm struct therefore is + * a two step process: i) invalidate the pte, ii) store the new pte. + * This is true for the page protection bit as well. + * The only possible optimization is to flush at the beginning of + * a tlb_gather_mmu cycle if the mm_struct is currently not in use. + * + * Pages used for the page tables is a different story. FIXME: more */ -#define tlb_start_vma(tlb, vma) do { } while (0) -#define tlb_end_vma(tlb, vma) do { } while (0) -#define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0) + +#include +#include +#include +#include +#include +#include + +#ifndef CONFIG_SMP +#define TLB_NR_PTRS 1 +#else +#define TLB_NR_PTRS 508 +#endif + +struct mmu_gather { + struct mm_struct *mm; + unsigned int fullmm; + unsigned int nr_ptes; + unsigned int nr_pmds; + void *array[TLB_NR_PTRS]; +}; + +DECLARE_PER_CPU(struct mmu_gather, mmu_gathers); + +static inline struct mmu_gather *tlb_gather_mmu(struct mm_struct *mm, + unsigned int full_mm_flush) +{ + struct mmu_gather *tlb = &get_cpu_var(mmu_gathers); + + tlb->mm = mm; + tlb->fullmm = full_mm_flush || (num_online_cpus() == 1) || + (atomic_read(&mm->mm_users) <= 1 && mm == current->active_mm); + tlb->nr_ptes = 0; + tlb->nr_pmds = TLB_NR_PTRS; + if (tlb->fullmm) + __tlb_flush_mm(mm); + return tlb; +} + +static inline void tlb_flush_mmu(struct mmu_gather *tlb, + unsigned long start, unsigned long end) +{ + if (!tlb->fullmm && (tlb->nr_ptes > 0 || tlb->nr_pmds < TLB_NR_PTRS)) + __tlb_flush_mm(tlb->mm); + while (tlb->nr_ptes > 0) + pte_free(tlb->array[--tlb->nr_ptes]); + while (tlb->nr_pmds < TLB_NR_PTRS) + pmd_free((pmd_t *) tlb->array[tlb->nr_pmds++]); +} + +static inline void tlb_finish_mmu(struct mmu_gather *tlb, + unsigned long start, unsigned long end) +{ + tlb_flush_mmu(tlb, start, end); + + /* keep the page table cache within bounds */ + check_pgt_cache(); + + put_cpu_var(mmu_gathers); +} /* - * .. because we flush the whole mm when it - * fills up. + * Release the page cache reference for a pte removed by + * tlb_ptep_clear_flush. In both flush modes the tlb fo a page cache page + * has already been freed, so just do free_page_and_swap_cache. */ -#define tlb_flush(tlb) flush_tlb_mm((tlb)->mm) +static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page) +{ + free_page_and_swap_cache(page); +} -#include +/* + * pte_free_tlb frees a pte table and clears the CRSTE for the + * page table from the tlb. + */ +static inline void pte_free_tlb(struct mmu_gather *tlb, struct page *page) +{ + if (!tlb->fullmm) { + tlb->array[tlb->nr_ptes++] = page; + if (tlb->nr_ptes >= tlb->nr_pmds) + tlb_flush_mmu(tlb, 0, 0); + } else + pte_free(page); +} +/* + * pmd_free_tlb frees a pmd table and clears the CRSTE for the + * segment table entry from the tlb. + */ +static inline void pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd) +{ +#ifdef __s390x__ + if (!tlb->fullmm) { + tlb->array[--tlb->nr_pmds] = (struct page *) pmd; + if (tlb->nr_ptes >= tlb->nr_pmds) + tlb_flush_mmu(tlb, 0, 0); + } else + pmd_free(pmd); #endif +} + +#define tlb_start_vma(tlb, vma) do { } while (0) +#define tlb_end_vma(tlb, vma) do { } while (0) +#define tlb_remove_tlb_entry(tlb, ptep, addr) do { } while (0) +#define tlb_migrate_finish(mm) do { } while (0) + +#endif /* _S390_TLB_H */ diff --git a/include/asm-s390/tlbflush.h b/include/asm-s390/tlbflush.h index 6de2632..3a9985f 100644 --- a/include/asm-s390/tlbflush.h +++ b/include/asm-s390/tlbflush.h @@ -6,68 +6,19 @@ #include /* - * TLB flushing: - * - * - flush_tlb() flushes the current mm struct TLBs - * - flush_tlb_all() flushes all processes TLBs - * - flush_tlb_mm(mm) flushes the specified mm context TLB's - * - flush_tlb_page(vma, vmaddr) flushes one page - * - flush_tlb_range(vma, start, end) flushes a range of pages - * - flush_tlb_kernel_range(start, end) flushes a range of kernel pages - */ - -/* - * S/390 has three ways of flushing TLBs - * 'ptlb' does a flush of the local processor - * 'csp' flushes the TLBs on all PUs of a SMP - * 'ipte' invalidates a pte in a page table and flushes that out of - * the TLBs of all PUs of a SMP - */ - -#define local_flush_tlb() \ -do { asm volatile("ptlb": : :"memory"); } while (0) - -#ifndef CONFIG_SMP - -/* - * We always need to flush, since s390 does not flush tlb - * on each context switch + * Flush all tlb entries on the local cpu. */ - -static inline void flush_tlb(void) +static inline void __tlb_flush_local(void) { - local_flush_tlb(); + asm volatile("ptlb" : : : "memory"); } -static inline void flush_tlb_all(void) -{ - local_flush_tlb(); -} -static inline void flush_tlb_mm(struct mm_struct *mm) -{ - local_flush_tlb(); -} -static inline void flush_tlb_page(struct vm_area_struct *vma, - unsigned long addr) -{ - local_flush_tlb(); -} -static inline void flush_tlb_range(struct vm_area_struct *vma, - unsigned long start, unsigned long end) -{ - local_flush_tlb(); -} - -#define flush_tlb_kernel_range(start, end) \ - local_flush_tlb(); - -#else -#include - -extern void smp_ptlb_all(void); - -static inline void global_flush_tlb(void) +/* + * Flush all tlb entries on all cpus. + */ +static inline void __tlb_flush_global(void) { + extern void smp_ptlb_all(void); register unsigned long reg2 asm("2"); register unsigned long reg3 asm("3"); register unsigned long reg4 asm("4"); @@ -89,66 +40,75 @@ static inline void global_flush_tlb(void) } /* - * We only have to do global flush of tlb if process run since last - * flush on any other pu than current. - * If we have threads (mm->count > 1) we always do a global flush, - * since the process runs on more than one processor at the same time. + * Flush all tlb entries of a page table on all cpus. */ +static inline void __tlb_flush_idte(pgd_t *pgd) +{ + asm volatile( + " .insn rrf,0xb98e0000,0,%0,%1,0" + : : "a" (2048), "a" (__pa(pgd) & PAGE_MASK) : "cc" ); +} -static inline void __flush_tlb_mm(struct mm_struct * mm) +static inline void __tlb_flush_mm(struct mm_struct * mm) { cpumask_t local_cpumask; if (unlikely(cpus_empty(mm->cpu_vm_mask))) return; + /* + * If the machine has IDTE we prefer to do a per mm flush + * on all cpus instead of doing a local flush if the mm + * only ran on the local cpu. + */ if (MACHINE_HAS_IDTE) { pgd_t *shadow_pgd = get_shadow_pgd(mm->pgd); - if (shadow_pgd) { - asm volatile( - " .insn rrf,0xb98e0000,0,%0,%1,0" - : : "a" (2048), - "a" (__pa(shadow_pgd) & PAGE_MASK) : "cc" ); - } - asm volatile( - " .insn rrf,0xb98e0000,0,%0,%1,0" - : : "a" (2048), "a" (__pa(mm->pgd)&PAGE_MASK) : "cc"); + if (shadow_pgd) + __tlb_flush_idte(shadow_pgd); + __tlb_flush_idte(mm->pgd); return; } preempt_disable(); + /* + * If the process only ran on the local cpu, do a local flush. + */ local_cpumask = cpumask_of_cpu(smp_processor_id()); if (cpus_equal(mm->cpu_vm_mask, local_cpumask)) - local_flush_tlb(); + __tlb_flush_local(); else - global_flush_tlb(); + __tlb_flush_global(); preempt_enable(); } -static inline void flush_tlb(void) -{ - __flush_tlb_mm(current->mm); -} -static inline void flush_tlb_all(void) -{ - global_flush_tlb(); -} -static inline void flush_tlb_mm(struct mm_struct *mm) -{ - __flush_tlb_mm(mm); -} -static inline void flush_tlb_page(struct vm_area_struct *vma, - unsigned long addr) -{ - __flush_tlb_mm(vma->vm_mm); -} -static inline void flush_tlb_range(struct vm_area_struct *vma, - unsigned long start, unsigned long end) +static inline void __tlb_flush_mm_cond(struct mm_struct * mm) { - __flush_tlb_mm(vma->vm_mm); + if (atomic_read(&mm->mm_users) <= 1 && mm == current->active_mm) + __tlb_flush_mm(mm); } -#define flush_tlb_kernel_range(start, end) global_flush_tlb() +/* + * TLB flushing: + * flush_tlb() - flushes the current mm struct TLBs + * flush_tlb_all() - flushes all processes TLBs + * flush_tlb_mm(mm) - flushes the specified mm context TLB's + * flush_tlb_page(vma, vmaddr) - flushes one page + * flush_tlb_range(vma, start, end) - flushes a range of pages + * flush_tlb_kernel_range(start, end) - flushes a range of kernel pages + */ -#endif +/* + * flush_tlb_mm goes together with ptep_set_wrprotect for the + * copy_page_range operation and flush_tlb_range is related to + * ptep_get_and_clear for change_protection. ptep_set_wrprotect and + * ptep_get_and_clear do not flush the TLBs directly if the mm has + * only one user. At the end of the update the flush_tlb_mm and + * flush_tlb_range functions need to do the flush. + */ +#define flush_tlb() do { } while (0) +#define flush_tlb_all() do { } while (0) +#define flush_tlb_mm(mm) __tlb_flush_mm_cond(mm) +#define flush_tlb_page(vma, addr) do { } while (0) +#define flush_tlb_range(vma, start, end) __tlb_flush_mm_cond(mm) +#define flush_tlb_kernel_range(start, end) __tlb_flush_mm(&init_mm) #endif /* _S390_TLBFLUSH_H */ -- cgit v0.10.2 From 6f3fa3f0eb8fe4675f8543dd4be3365577e1d487 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Mon, 22 Oct 2007 12:52:45 +0200 Subject: [S390] Remove unused user_seg from thread structure. Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index cc7c4ba..96492cf 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -270,14 +270,12 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long new_stackp, save_fp_regs(¤t->thread.fp_regs); memcpy(&p->thread.fp_regs, ¤t->thread.fp_regs, sizeof(s390_fp_regs)); - p->thread.user_seg = __pa((unsigned long) p->mm->pgd) | _SEGMENT_TABLE; /* Set a new TLS ? */ if (clone_flags & CLONE_SETTLS) p->thread.acrs[0] = regs->gprs[6]; #else /* CONFIG_64BIT */ /* Save the fpu registers to new thread structure. */ save_fp_regs(&p->thread.fp_regs); - p->thread.user_seg = __pa((unsigned long) p->mm->pgd) | _REGION_TABLE; /* Set a new TLS ? */ if (clone_flags & CLONE_SETTLS) { if (test_thread_flag(TIF_31BIT)) { diff --git a/include/asm-s390/processor.h b/include/asm-s390/processor.h index 3b972d4..81efccc 100644 --- a/include/asm-s390/processor.h +++ b/include/asm-s390/processor.h @@ -93,7 +93,6 @@ struct thread_struct { s390_fp_regs fp_regs; unsigned int acrs[NUM_ACRS]; unsigned long ksp; /* kernel stack pointer */ - unsigned long user_seg; /* HSTD */ mm_segment_t mm_segment; unsigned long prot_addr; /* address of protection-excep. */ unsigned int error_code; /* error-code of last prog-excep. */ @@ -134,16 +133,9 @@ struct stack_frame { # define __SWAPPER_PG_DIR __pa(&swapper_pg_dir[0]) + _REGION_TABLE #endif /* __s390x__ */ -#define INIT_THREAD {{0,{{0},{0},{0},{0},{0},{0},{0},{0},{0},{0}, \ - {0},{0},{0},{0},{0},{0}}}, \ - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, \ - sizeof(init_stack) + (unsigned long) &init_stack, \ - __SWAPPER_PG_DIR, \ - {0}, \ - 0,0,0, \ - (per_struct) {{{{0,}}},0,0,0,0,{{0,}}}, \ - 0, 0 \ -} +#define INIT_THREAD { \ + .ksp = sizeof(init_stack) + (unsigned long) &init_stack, \ +} /* * Do necessary setup to start up a new thread. -- cgit v0.10.2 From e4aa402e7a3b6b87d8df6243a37171cdcd2f01c2 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Mon, 22 Oct 2007 12:52:46 +0200 Subject: [S390] Introduce follow_table in uaccess_pt.c Define and use follow_table inline in uaccess_pt.c to simplify the code. Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/lib/uaccess_pt.c b/arch/s390/lib/uaccess_pt.c index b159a9d..dc37ea8 100644 --- a/arch/s390/lib/uaccess_pt.c +++ b/arch/s390/lib/uaccess_pt.c @@ -15,6 +15,22 @@ #include #include "uaccess.h" +static inline pte_t *follow_table(struct mm_struct *mm, unsigned long addr) +{ + pgd_t *pgd; + pmd_t *pmd; + + pgd = pgd_offset(mm, addr); + if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd))) + return NULL; + + pmd = pmd_offset(pgd, addr); + if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd))) + return NULL; + + return pte_offset_map(pmd, addr); +} + static int __handle_fault(struct mm_struct *mm, unsigned long address, int write_access) { @@ -85,8 +101,6 @@ static size_t __user_copy_pt(unsigned long uaddr, void *kptr, { struct mm_struct *mm = current->mm; unsigned long offset, pfn, done, size; - pgd_t *pgd; - pmd_t *pmd; pte_t *pte; void *from, *to; @@ -94,15 +108,7 @@ static size_t __user_copy_pt(unsigned long uaddr, void *kptr, retry: spin_lock(&mm->page_table_lock); do { - pgd = pgd_offset(mm, uaddr); - if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd))) - goto fault; - - pmd = pmd_offset(pgd, uaddr); - if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd))) - goto fault; - - pte = pte_offset_map(pmd, uaddr); + pte = follow_table(mm, uaddr); if (!pte || !pte_present(*pte) || (write_user && !pte_write(*pte))) goto fault; @@ -142,22 +148,12 @@ static unsigned long __dat_user_addr(unsigned long uaddr) { struct mm_struct *mm = current->mm; unsigned long pfn, ret; - pgd_t *pgd; - pmd_t *pmd; pte_t *pte; int rc; ret = 0; retry: - pgd = pgd_offset(mm, uaddr); - if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd))) - goto fault; - - pmd = pmd_offset(pgd, uaddr); - if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd))) - goto fault; - - pte = pte_offset_map(pmd, uaddr); + pte = follow_table(mm, uaddr); if (!pte || !pte_present(*pte)) goto fault; @@ -229,8 +225,6 @@ static size_t strnlen_user_pt(size_t count, const char __user *src) unsigned long uaddr = (unsigned long) src; struct mm_struct *mm = current->mm; unsigned long offset, pfn, done, len; - pgd_t *pgd; - pmd_t *pmd; pte_t *pte; size_t len_str; @@ -240,15 +234,7 @@ static size_t strnlen_user_pt(size_t count, const char __user *src) retry: spin_lock(&mm->page_table_lock); do { - pgd = pgd_offset(mm, uaddr); - if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd))) - goto fault; - - pmd = pmd_offset(pgd, uaddr); - if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd))) - goto fault; - - pte = pte_offset_map(pmd, uaddr); + pte = follow_table(mm, uaddr); if (!pte || !pte_present(*pte)) goto fault; @@ -308,8 +294,6 @@ static size_t copy_in_user_pt(size_t n, void __user *to, uaddr, done, size; unsigned long uaddr_from = (unsigned long) from; unsigned long uaddr_to = (unsigned long) to; - pgd_t *pgd_from, *pgd_to; - pmd_t *pmd_from, *pmd_to; pte_t *pte_from, *pte_to; int write_user; @@ -317,39 +301,14 @@ static size_t copy_in_user_pt(size_t n, void __user *to, retry: spin_lock(&mm->page_table_lock); do { - pgd_from = pgd_offset(mm, uaddr_from); - if (pgd_none(*pgd_from) || unlikely(pgd_bad(*pgd_from))) { - uaddr = uaddr_from; - write_user = 0; - goto fault; - } - pgd_to = pgd_offset(mm, uaddr_to); - if (pgd_none(*pgd_to) || unlikely(pgd_bad(*pgd_to))) { - uaddr = uaddr_to; - write_user = 1; - goto fault; - } - - pmd_from = pmd_offset(pgd_from, uaddr_from); - if (pmd_none(*pmd_from) || unlikely(pmd_bad(*pmd_from))) { - uaddr = uaddr_from; - write_user = 0; - goto fault; - } - pmd_to = pmd_offset(pgd_to, uaddr_to); - if (pmd_none(*pmd_to) || unlikely(pmd_bad(*pmd_to))) { - uaddr = uaddr_to; - write_user = 1; - goto fault; - } - - pte_from = pte_offset_map(pmd_from, uaddr_from); + pte_from = follow_table(mm, uaddr_from); if (!pte_from || !pte_present(*pte_from)) { uaddr = uaddr_from; write_user = 0; goto fault; } - pte_to = pte_offset_map(pmd_to, uaddr_to); + + pte_to = follow_table(mm, uaddr_to); if (!pte_to || !pte_present(*pte_to) || !pte_write(*pte_to)) { uaddr = uaddr_to; write_user = 1; -- cgit v0.10.2 From 3610cce87af0693603db171d5b6f6735f5e3dc5b Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Mon, 22 Oct 2007 12:52:47 +0200 Subject: [S390] Cleanup page table definitions. - De-confuse the defines for the address-space-control-elements and the segment/region table entries. - Create out of line functions for page table allocation / freeing. - Simplify get_shadow_xxx functions. Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile index f95449b..6640193 100644 --- a/arch/s390/mm/Makefile +++ b/arch/s390/mm/Makefile @@ -2,6 +2,6 @@ # Makefile for the linux s390-specific parts of the memory manager. # -obj-y := init.o fault.o extmem.o mmap.o vmem.o +obj-y := init.o fault.o extmem.o mmap.o vmem.o pgtable.o obj-$(CONFIG_CMM) += cmm.o diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 3a25bbf..90ec058 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -103,32 +103,28 @@ static void __init setup_ro_region(void) */ void __init paging_init(void) { - pgd_t *pg_dir; - int i; - unsigned long pgdir_k; static const int ssm_mask = 0x04000000L; unsigned long max_zone_pfns[MAX_NR_ZONES]; + unsigned long pgd_type; - pg_dir = swapper_pg_dir; - + init_mm.pgd = swapper_pg_dir; + S390_lowcore.kernel_asce = __pa(init_mm.pgd) & PAGE_MASK; #ifdef CONFIG_64BIT - pgdir_k = (__pa(swapper_pg_dir) & PAGE_MASK) | _KERN_REGION_TABLE; - for (i = 0; i < PTRS_PER_PGD; i++) - pgd_clear_kernel(pg_dir + i); + S390_lowcore.kernel_asce |= _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH; + pgd_type = _REGION3_ENTRY_EMPTY; #else - pgdir_k = (__pa(swapper_pg_dir) & PAGE_MASK) | _KERNSEG_TABLE; - for (i = 0; i < PTRS_PER_PGD; i++) - pmd_clear_kernel((pmd_t *)(pg_dir + i)); + S390_lowcore.kernel_asce |= _ASCE_TABLE_LENGTH; + pgd_type = _SEGMENT_ENTRY_EMPTY; #endif + clear_table((unsigned long *) init_mm.pgd, pgd_type, + sizeof(unsigned long)*2048); vmem_map_init(); setup_ro_region(); - S390_lowcore.kernel_asce = pgdir_k; - /* enable virtual mapping in kernel mode */ - __ctl_load(pgdir_k, 1, 1); - __ctl_load(pgdir_k, 7, 7); - __ctl_load(pgdir_k, 13, 13); + __ctl_load(S390_lowcore.kernel_asce, 1, 1); + __ctl_load(S390_lowcore.kernel_asce, 7, 7); + __ctl_load(S390_lowcore.kernel_asce, 13, 13); __raw_local_irq_ssm(ssm_mask); memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c new file mode 100644 index 0000000..e60e0ae --- /dev/null +++ b/arch/s390/mm/pgtable.c @@ -0,0 +1,94 @@ +/* + * arch/s390/mm/pgtable.c + * + * Copyright IBM Corp. 2007 + * Author(s): Martin Schwidefsky + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#ifndef CONFIG_64BIT +#define ALLOC_ORDER 1 +#else +#define ALLOC_ORDER 2 +#endif + +unsigned long *crst_table_alloc(struct mm_struct *mm, int noexec) +{ + struct page *page = alloc_pages(GFP_KERNEL, ALLOC_ORDER); + + if (!page) + return NULL; + page->index = 0; + if (noexec) { + struct page *shadow = alloc_pages(GFP_KERNEL, ALLOC_ORDER); + if (!shadow) { + __free_pages(page, ALLOC_ORDER); + return NULL; + } + page->index = page_to_phys(shadow); + } + return (unsigned long *) page_to_phys(page); +} + +void crst_table_free(unsigned long *table) +{ + unsigned long *shadow = get_shadow_table(table); + + if (shadow) + free_pages((unsigned long) shadow, ALLOC_ORDER); + free_pages((unsigned long) table, ALLOC_ORDER); +} + +/* + * page table entry allocation/free routines. + */ +unsigned long *page_table_alloc(int noexec) +{ + struct page *page = alloc_page(GFP_KERNEL); + unsigned long *table; + + if (!page) + return NULL; + page->index = 0; + if (noexec) { + struct page *shadow = alloc_page(GFP_KERNEL); + if (!shadow) { + __free_page(page); + return NULL; + } + table = (unsigned long *) page_to_phys(shadow); + clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE); + page->index = (addr_t) table; + } + table = (unsigned long *) page_to_phys(page); + clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE); + return table; +} + +void page_table_free(unsigned long *table) +{ + unsigned long *shadow = get_shadow_pte(table); + + if (shadow) + free_page((unsigned long) shadow); + free_page((unsigned long) table); + +} diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index fd594d5..1bd51d8 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -75,29 +75,24 @@ static void __init_refok *vmem_alloc_pages(unsigned int order) static inline pmd_t *vmem_pmd_alloc(void) { - pmd_t *pmd; - int i; + pmd_t *pmd = NULL; - pmd = vmem_alloc_pages(PMD_ALLOC_ORDER); +#ifdef CONFIG_64BIT + pmd = vmem_alloc_pages(2); if (!pmd) return NULL; - for (i = 0; i < PTRS_PER_PMD; i++) - pmd_clear_kernel(pmd + i); + clear_table((unsigned long *) pmd, _SEGMENT_ENTRY_EMPTY, PAGE_SIZE*4); +#endif return pmd; } static inline pte_t *vmem_pte_alloc(void) { - pte_t *pte; - pte_t empty_pte; - int i; + pte_t *pte = vmem_alloc_pages(0); - pte = vmem_alloc_pages(PTE_ALLOC_ORDER); if (!pte) return NULL; - pte_val(empty_pte) = _PAGE_TYPE_EMPTY; - for (i = 0; i < PTRS_PER_PTE; i++) - pte[i] = empty_pte; + clear_table((unsigned long *) pte, _PAGE_TYPE_EMPTY, PAGE_SIZE); return pte; } diff --git a/include/asm-s390/mmu_context.h b/include/asm-s390/mmu_context.h index 501cb9b..05b8421 100644 --- a/include/asm-s390/mmu_context.h +++ b/include/asm-s390/mmu_context.h @@ -21,45 +21,43 @@ #ifndef __s390x__ #define LCTL_OPCODE "lctl" -#define PGTABLE_BITS (_SEGMENT_TABLE|USER_STD_MASK) #else #define LCTL_OPCODE "lctlg" -#define PGTABLE_BITS (_REGION_TABLE|USER_STD_MASK) #endif -static inline void enter_lazy_tlb(struct mm_struct *mm, - struct task_struct *tsk) +static inline void update_mm(struct mm_struct *mm, struct task_struct *tsk) { + pgd_t *pgd = mm->pgd; + unsigned long asce_bits; + + /* Calculate asce bits from the first pgd table entry. */ + asce_bits = _ASCE_TABLE_LENGTH | _ASCE_USER_BITS; +#ifdef CONFIG_64BIT + asce_bits |= _ASCE_TYPE_REGION3; +#endif + S390_lowcore.user_asce = asce_bits | __pa(pgd); + if (switch_amode) { + /* Load primary space page table origin. */ + pgd_t *shadow_pgd = get_shadow_table(pgd) ? : pgd; + S390_lowcore.user_exec_asce = asce_bits | __pa(shadow_pgd); + asm volatile(LCTL_OPCODE" 1,1,%0\n" + : : "m" (S390_lowcore.user_exec_asce) ); + } else + /* Load home space page table origin. */ + asm volatile(LCTL_OPCODE" 13,13,%0" + : : "m" (S390_lowcore.user_asce) ); } static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk) { - pgd_t *shadow_pgd = get_shadow_pgd(next->pgd); - - if (prev != next) { - S390_lowcore.user_asce = (__pa(next->pgd) & PAGE_MASK) | - PGTABLE_BITS; - if (shadow_pgd) { - /* Load primary/secondary space page table origin. */ - S390_lowcore.user_exec_asce = - (__pa(shadow_pgd) & PAGE_MASK) | PGTABLE_BITS; - asm volatile(LCTL_OPCODE" 1,1,%0\n" - LCTL_OPCODE" 7,7,%1" - : : "m" (S390_lowcore.user_exec_asce), - "m" (S390_lowcore.user_asce) ); - } else if (switch_amode) { - /* Load primary space page table origin. */ - asm volatile(LCTL_OPCODE" 1,1,%0" - : : "m" (S390_lowcore.user_asce) ); - } else - /* Load home space page table origin. */ - asm volatile(LCTL_OPCODE" 13,13,%0" - : : "m" (S390_lowcore.user_asce) ); - } + if (unlikely(prev == next)) + return; cpu_set(smp_processor_id(), next->cpu_vm_mask); + update_mm(next, tsk); } +#define enter_lazy_tlb(mm,tsk) do { } while (0) #define deactivate_mm(tsk,mm) do { } while (0) static inline void activate_mm(struct mm_struct *prev, diff --git a/include/asm-s390/pgalloc.h b/include/asm-s390/pgalloc.h index 6cbbfe4..229b0bd 100644 --- a/include/asm-s390/pgalloc.h +++ b/include/asm-s390/pgalloc.h @@ -19,114 +19,75 @@ #define check_pgt_cache() do {} while (0) -/* - * Page allocation orders. - */ -#ifndef __s390x__ -# define PTE_ALLOC_ORDER 0 -# define PMD_ALLOC_ORDER 0 -# define PGD_ALLOC_ORDER 1 -#else /* __s390x__ */ -# define PTE_ALLOC_ORDER 0 -# define PMD_ALLOC_ORDER 2 -# define PGD_ALLOC_ORDER 2 -#endif /* __s390x__ */ +unsigned long *crst_table_alloc(struct mm_struct *, int); +void crst_table_free(unsigned long *); -/* - * Allocate and free page tables. The xxx_kernel() versions are - * used to allocate a kernel page table - this turns on ASN bits - * if any. - */ +unsigned long *page_table_alloc(int); +void page_table_free(unsigned long *); -static inline pgd_t *pgd_alloc(struct mm_struct *mm) +static inline void clear_table(unsigned long *s, unsigned long val, size_t n) { - pgd_t *pgd = (pgd_t *) __get_free_pages(GFP_KERNEL, PGD_ALLOC_ORDER); - int i; - - if (!pgd) - return NULL; - if (s390_noexec) { - pgd_t *shadow_pgd = (pgd_t *) - __get_free_pages(GFP_KERNEL, PGD_ALLOC_ORDER); - struct page *page = virt_to_page(pgd); - - if (!shadow_pgd) { - free_pages((unsigned long) pgd, PGD_ALLOC_ORDER); - return NULL; - } - page->lru.next = (void *) shadow_pgd; - } - for (i = 0; i < PTRS_PER_PGD; i++) -#ifndef __s390x__ - pmd_clear(pmd_offset(pgd + i, i*PGDIR_SIZE)); + *s = val; + n = (n / 256) - 1; + asm volatile( +#ifdef CONFIG_64BIT + " mvc 8(248,%0),0(%0)\n" #else - pgd_clear(pgd + i); + " mvc 4(252,%0),0(%0)\n" #endif - return pgd; + "0: mvc 256(256,%0),0(%0)\n" + " la %0,256(%0)\n" + " brct %1,0b\n" + : "+a" (s), "+d" (n)); } -static inline void pgd_free(pgd_t *pgd) +static inline void crst_table_init(unsigned long *crst, unsigned long entry) { - pgd_t *shadow_pgd = get_shadow_pgd(pgd); - - if (shadow_pgd) - free_pages((unsigned long) shadow_pgd, PGD_ALLOC_ORDER); - free_pages((unsigned long) pgd, PGD_ALLOC_ORDER); + clear_table(crst, entry, sizeof(unsigned long)*2048); + crst = get_shadow_table(crst); + if (crst) + clear_table(crst, entry, sizeof(unsigned long)*2048); } #ifndef __s390x__ -/* - * page middle directory allocation/free routines. - * We use pmd cache only on s390x, so these are dummy routines. This - * code never triggers because the pgd will always be present. - */ -#define pmd_alloc_one(mm,address) ({ BUG(); ((pmd_t *)2); }) -#define pmd_free(x) do { } while (0) -#define pgd_populate(mm, pmd, pte) BUG() + +static inline unsigned long pgd_entry_type(struct mm_struct *mm) +{ + return _SEGMENT_ENTRY_EMPTY; +} + +#define pmd_alloc_one(mm,address) ({ BUG(); ((pmd_t *)2); }) +#define pmd_free(x) do { } while (0) + +#define pgd_populate(mm, pmd, pte) BUG() #define pgd_populate_kernel(mm, pmd, pte) BUG() + #else /* __s390x__ */ -static inline pmd_t * pmd_alloc_one(struct mm_struct *mm, unsigned long vmaddr) + +static inline unsigned long pgd_entry_type(struct mm_struct *mm) { - pmd_t *pmd = (pmd_t *) __get_free_pages(GFP_KERNEL, PMD_ALLOC_ORDER); - int i; - - if (!pmd) - return NULL; - if (s390_noexec) { - pmd_t *shadow_pmd = (pmd_t *) - __get_free_pages(GFP_KERNEL, PMD_ALLOC_ORDER); - struct page *page = virt_to_page(pmd); - - if (!shadow_pmd) { - free_pages((unsigned long) pmd, PMD_ALLOC_ORDER); - return NULL; - } - page->lru.next = (void *) shadow_pmd; - } - for (i=0; i < PTRS_PER_PMD; i++) - pmd_clear(pmd + i); - return pmd; + return _REGION3_ENTRY_EMPTY; } -static inline void pmd_free (pmd_t *pmd) +static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long vmaddr) { - pmd_t *shadow_pmd = get_shadow_pmd(pmd); - - if (shadow_pmd) - free_pages((unsigned long) shadow_pmd, PMD_ALLOC_ORDER); - free_pages((unsigned long) pmd, PMD_ALLOC_ORDER); + unsigned long *crst = crst_table_alloc(mm, s390_noexec); + if (crst) + crst_table_init(crst, _SEGMENT_ENTRY_EMPTY); + return (pmd_t *) crst; } +#define pmd_free(pmd) crst_table_free((unsigned long *) pmd) -static inline void -pgd_populate_kernel(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmd) +static inline void pgd_populate_kernel(struct mm_struct *mm, + pgd_t *pgd, pmd_t *pmd) { - pgd_val(*pgd) = _PGD_ENTRY | __pa(pmd); + pgd_val(*pgd) = _REGION3_ENTRY | __pa(pmd); } static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmd) { - pgd_t *shadow_pgd = get_shadow_pgd(pgd); - pmd_t *shadow_pmd = get_shadow_pmd(pmd); + pgd_t *shadow_pgd = get_shadow_table(pgd); + pmd_t *shadow_pmd = get_shadow_table(pmd); if (shadow_pgd && shadow_pmd) pgd_populate_kernel(mm, shadow_pgd, shadow_pmd); @@ -135,17 +96,26 @@ static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmd) #endif /* __s390x__ */ +static inline pgd_t *pgd_alloc(struct mm_struct *mm) +{ + unsigned long *crst = crst_table_alloc(mm, s390_noexec); + if (crst) + crst_table_init(crst, pgd_entry_type(mm)); + return (pgd_t *) crst; +} +#define pgd_free(pgd) crst_table_free((unsigned long *) pgd) + static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte) { #ifndef __s390x__ - pmd_val(pmd[0]) = _PAGE_TABLE + __pa(pte); - pmd_val(pmd[1]) = _PAGE_TABLE + __pa(pte+256); - pmd_val(pmd[2]) = _PAGE_TABLE + __pa(pte+512); - pmd_val(pmd[3]) = _PAGE_TABLE + __pa(pte+768); + pmd_val(pmd[0]) = _SEGMENT_ENTRY + __pa(pte); + pmd_val(pmd[1]) = _SEGMENT_ENTRY + __pa(pte+256); + pmd_val(pmd[2]) = _SEGMENT_ENTRY + __pa(pte+512); + pmd_val(pmd[3]) = _SEGMENT_ENTRY + __pa(pte+768); #else /* __s390x__ */ - pmd_val(*pmd) = _PMD_ENTRY + __pa(pte); - pmd_val1(*pmd) = _PMD_ENTRY + __pa(pte+256); + pmd_val(*pmd) = _SEGMENT_ENTRY + __pa(pte); + pmd_val1(*pmd) = _SEGMENT_ENTRY + __pa(pte+256); #endif /* __s390x__ */ } @@ -153,7 +123,7 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, struct page *page) { pte_t *pte = (pte_t *)page_to_phys(page); - pmd_t *shadow_pmd = get_shadow_pmd(pmd); + pmd_t *shadow_pmd = get_shadow_table(pmd); pte_t *shadow_pte = get_shadow_pte(pte); pmd_populate_kernel(mm, pmd, pte); @@ -164,57 +134,14 @@ pmd_populate(struct mm_struct *mm, pmd_t *pmd, struct page *page) /* * page table entry allocation/free routines. */ -static inline pte_t * -pte_alloc_one_kernel(struct mm_struct *mm, unsigned long vmaddr) -{ - pte_t *pte = (pte_t *) __get_free_page(GFP_KERNEL|__GFP_REPEAT); - int i; - - if (!pte) - return NULL; - if (s390_noexec) { - pte_t *shadow_pte = (pte_t *) - __get_free_page(GFP_KERNEL|__GFP_REPEAT); - struct page *page = virt_to_page(pte); - - if (!shadow_pte) { - free_page((unsigned long) pte); - return NULL; - } - page->lru.next = (void *) shadow_pte; - } - for (i=0; i < PTRS_PER_PTE; i++) { - pte_clear(mm, vmaddr, pte + i); - vmaddr += PAGE_SIZE; - } - return pte; -} - -static inline struct page * -pte_alloc_one(struct mm_struct *mm, unsigned long vmaddr) -{ - pte_t *pte = pte_alloc_one_kernel(mm, vmaddr); - if (pte) - return virt_to_page(pte); - return NULL; -} - -static inline void pte_free_kernel(pte_t *pte) -{ - pte_t *shadow_pte = get_shadow_pte(pte); - - if (shadow_pte) - free_page((unsigned long) shadow_pte); - free_page((unsigned long) pte); -} - -static inline void pte_free(struct page *pte) -{ - struct page *shadow_page = get_shadow_page(pte); - - if (shadow_page) - __free_page(shadow_page); - __free_page(pte); -} +#define pte_alloc_one_kernel(mm, vmaddr) \ + ((pte_t *) page_table_alloc(s390_noexec)) +#define pte_alloc_one(mm, vmaddr) \ + virt_to_page(page_table_alloc(s390_noexec)) + +#define pte_free_kernel(pte) \ + page_table_free((unsigned long *) pte) +#define pte_free(pte) \ + page_table_free((unsigned long *) page_to_phys((struct page *) pte)) #endif /* _S390_PGALLOC_H */ diff --git a/include/asm-s390/pgtable.h b/include/asm-s390/pgtable.h index b424ab2..f9f59a8 100644 --- a/include/asm-s390/pgtable.h +++ b/include/asm-s390/pgtable.h @@ -35,9 +35,6 @@ #include #include -struct vm_area_struct; /* forward declaration (include/linux/mm.h) */ -struct mm_struct; - extern pgd_t swapper_pg_dir[] __attribute__ ((aligned (4096))); extern void paging_init(void); extern void vmem_map_init(void); @@ -221,6 +218,8 @@ extern unsigned long vmalloc_end; /* Hardware bits in the page table entry */ #define _PAGE_RO 0x200 /* HW read-only bit */ #define _PAGE_INVALID 0x400 /* HW invalid bit */ + +/* Software bits in the page table entry */ #define _PAGE_SWT 0x001 /* SW pte type bit t */ #define _PAGE_SWX 0x002 /* SW pte type bit x */ @@ -264,60 +263,75 @@ extern unsigned long vmalloc_end; #ifndef __s390x__ -/* Bits in the segment table entry */ -#define _PAGE_TABLE_LEN 0xf /* only full page-tables */ -#define _PAGE_TABLE_COM 0x10 /* common page-table */ -#define _PAGE_TABLE_INV 0x20 /* invalid page-table */ -#define _SEG_PRESENT 0x001 /* Software (overlap with PTL) */ - -/* Bits int the storage key */ -#define _PAGE_CHANGED 0x02 /* HW changed bit */ -#define _PAGE_REFERENCED 0x04 /* HW referenced bit */ - -#define _USER_SEG_TABLE_LEN 0x7f /* user-segment-table up to 2 GB */ -#define _KERNEL_SEG_TABLE_LEN 0x7f /* kernel-segment-table up to 2 GB */ - -/* - * User and Kernel pagetables are identical - */ -#define _PAGE_TABLE _PAGE_TABLE_LEN -#define _KERNPG_TABLE _PAGE_TABLE_LEN - -/* - * The Kernel segment-tables includes the User segment-table - */ +/* Bits in the segment table address-space-control-element */ +#define _ASCE_SPACE_SWITCH 0x80000000UL /* space switch event */ +#define _ASCE_ORIGIN_MASK 0x7ffff000UL /* segment table origin */ +#define _ASCE_PRIVATE_SPACE 0x100 /* private space control */ +#define _ASCE_ALT_EVENT 0x80 /* storage alteration event control */ +#define _ASCE_TABLE_LENGTH 0x7f /* 128 x 64 entries = 8k */ -#define _SEGMENT_TABLE (_USER_SEG_TABLE_LEN|0x80000000|0x100) -#define _KERNSEG_TABLE _KERNEL_SEG_TABLE_LEN +/* Bits in the segment table entry */ +#define _SEGMENT_ENTRY_ORIGIN 0x7fffffc0UL /* page table origin */ +#define _SEGMENT_ENTRY_INV 0x20 /* invalid segment table entry */ +#define _SEGMENT_ENTRY_COMMON 0x10 /* common segment bit */ +#define _SEGMENT_ENTRY_PTL 0x0f /* page table length */ -#define USER_STD_MASK 0x00000080UL +#define _SEGMENT_ENTRY (_SEGMENT_ENTRY_PTL) +#define _SEGMENT_ENTRY_EMPTY (_SEGMENT_ENTRY_INV) #else /* __s390x__ */ +/* Bits in the segment/region table address-space-control-element */ +#define _ASCE_ORIGIN ~0xfffUL/* segment table origin */ +#define _ASCE_PRIVATE_SPACE 0x100 /* private space control */ +#define _ASCE_ALT_EVENT 0x80 /* storage alteration event control */ +#define _ASCE_SPACE_SWITCH 0x40 /* space switch event */ +#define _ASCE_REAL_SPACE 0x20 /* real space control */ +#define _ASCE_TYPE_MASK 0x0c /* asce table type mask */ +#define _ASCE_TYPE_REGION1 0x0c /* region first table type */ +#define _ASCE_TYPE_REGION2 0x08 /* region second table type */ +#define _ASCE_TYPE_REGION3 0x04 /* region third table type */ +#define _ASCE_TYPE_SEGMENT 0x00 /* segment table type */ +#define _ASCE_TABLE_LENGTH 0x03 /* region table length */ + +/* Bits in the region table entry */ +#define _REGION_ENTRY_ORIGIN ~0xfffUL/* region/segment table origin */ +#define _REGION_ENTRY_INV 0x20 /* invalid region table entry */ +#define _REGION_ENTRY_TYPE_MASK 0x0c /* region/segment table type mask */ +#define _REGION_ENTRY_TYPE_R1 0x0c /* region first table type */ +#define _REGION_ENTRY_TYPE_R2 0x08 /* region second table type */ +#define _REGION_ENTRY_TYPE_R3 0x04 /* region third table type */ +#define _REGION_ENTRY_LENGTH 0x03 /* region third length */ + +#define _REGION1_ENTRY (_REGION_ENTRY_TYPE_R1 | _REGION_ENTRY_LENGTH) +#define _REGION1_ENTRY_EMPTY (_REGION_ENTRY_TYPE_R1 | _REGION_ENTRY_INV) +#define _REGION2_ENTRY (_REGION_ENTRY_TYPE_R2 | _REGION_ENTRY_LENGTH) +#define _REGION2_ENTRY_EMPTY (_REGION_ENTRY_TYPE_R2 | _REGION_ENTRY_INV) +#define _REGION3_ENTRY (_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_LENGTH) +#define _REGION3_ENTRY_EMPTY (_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_INV) + /* Bits in the segment table entry */ -#define _PMD_ENTRY_INV 0x20 /* invalid segment table entry */ -#define _PMD_ENTRY 0x00 +#define _SEGMENT_ENTRY_ORIGIN ~0x7ffUL/* segment table origin */ +#define _SEGMENT_ENTRY_RO 0x200 /* page protection bit */ +#define _SEGMENT_ENTRY_INV 0x20 /* invalid segment table entry */ -/* Bits in the region third table entry */ -#define _PGD_ENTRY_INV 0x20 /* invalid region table entry */ -#define _PGD_ENTRY 0x07 +#define _SEGMENT_ENTRY (0) +#define _SEGMENT_ENTRY_EMPTY (_SEGMENT_ENTRY_INV) + +#endif /* __s390x__ */ /* - * User and kernel page directory + * A user page table pointer has the space-switch-event bit, the + * private-space-control bit and the storage-alteration-event-control + * bit set. A kernel page table pointer doesn't need them. */ -#define _REGION_THIRD 0x4 -#define _REGION_THIRD_LEN 0x3 -#define _REGION_TABLE (_REGION_THIRD|_REGION_THIRD_LEN|0x40|0x100) -#define _KERN_REGION_TABLE (_REGION_THIRD|_REGION_THIRD_LEN) - -#define USER_STD_MASK 0x0000000000000080UL +#define _ASCE_USER_BITS (_ASCE_SPACE_SWITCH | _ASCE_PRIVATE_SPACE | \ + _ASCE_ALT_EVENT) -/* Bits in the storage key */ +/* Bits int the storage key */ #define _PAGE_CHANGED 0x02 /* HW changed bit */ #define _PAGE_REFERENCED 0x04 /* HW referenced bit */ -#endif /* __s390x__ */ - /* * Page protection definitions. */ @@ -358,65 +372,38 @@ extern unsigned long vmalloc_end; #define __S111 PAGE_EX_RW #ifndef __s390x__ -# define PMD_SHADOW_SHIFT 1 -# define PGD_SHADOW_SHIFT 1 +# define PxD_SHADOW_SHIFT 1 #else /* __s390x__ */ -# define PMD_SHADOW_SHIFT 2 -# define PGD_SHADOW_SHIFT 2 +# define PxD_SHADOW_SHIFT 2 #endif /* __s390x__ */ static inline struct page *get_shadow_page(struct page *page) { - if (s390_noexec && !list_empty(&page->lru)) - return virt_to_page(page->lru.next); - return NULL; -} - -static inline pte_t *get_shadow_pte(pte_t *ptep) -{ - unsigned long pteptr = (unsigned long) (ptep); - - if (s390_noexec) { - unsigned long offset = pteptr & (PAGE_SIZE - 1); - void *addr = (void *) (pteptr ^ offset); - struct page *page = virt_to_page(addr); - if (!list_empty(&page->lru)) - return (pte_t *) ((unsigned long) page->lru.next | - offset); - } + if (s390_noexec && page->index) + return virt_to_page((void *)(addr_t) page->index); return NULL; } -static inline pmd_t *get_shadow_pmd(pmd_t *pmdp) +static inline void *get_shadow_pte(void *table) { - unsigned long pmdptr = (unsigned long) (pmdp); + unsigned long addr, offset; + struct page *page; - if (s390_noexec) { - unsigned long offset = pmdptr & - ((PAGE_SIZE << PMD_SHADOW_SHIFT) - 1); - void *addr = (void *) (pmdptr ^ offset); - struct page *page = virt_to_page(addr); - if (!list_empty(&page->lru)) - return (pmd_t *) ((unsigned long) page->lru.next | - offset); - } - return NULL; + addr = (unsigned long) table; + offset = addr & (PAGE_SIZE - 1); + page = virt_to_page((void *)(addr ^ offset)); + return (void *)(addr_t)(page->index ? (page->index | offset) : 0UL); } -static inline pgd_t *get_shadow_pgd(pgd_t *pgdp) +static inline void *get_shadow_table(void *table) { - unsigned long pgdptr = (unsigned long) (pgdp); + unsigned long addr, offset; + struct page *page; - if (s390_noexec) { - unsigned long offset = pgdptr & - ((PAGE_SIZE << PGD_SHADOW_SHIFT) - 1); - void *addr = (void *) (pgdptr ^ offset); - struct page *page = virt_to_page(addr); - if (!list_empty(&page->lru)) - return (pgd_t *) ((unsigned long) page->lru.next | - offset); - } - return NULL; + addr = (unsigned long) table; + offset = addr & ((PAGE_SIZE << PxD_SHADOW_SHIFT) - 1); + page = virt_to_page((void *)(addr ^ offset)); + return (void *)(addr_t)(page->index ? (page->index | offset) : 0UL); } /* @@ -448,47 +435,42 @@ static inline int pgd_present(pgd_t pgd) { return 1; } static inline int pgd_none(pgd_t pgd) { return 0; } static inline int pgd_bad(pgd_t pgd) { return 0; } -static inline int pmd_present(pmd_t pmd) { return pmd_val(pmd) & _SEG_PRESENT; } -static inline int pmd_none(pmd_t pmd) { return pmd_val(pmd) & _PAGE_TABLE_INV; } -static inline int pmd_bad(pmd_t pmd) -{ - return (pmd_val(pmd) & (~PAGE_MASK & ~_PAGE_TABLE_INV)) != _PAGE_TABLE; -} - #else /* __s390x__ */ static inline int pgd_present(pgd_t pgd) { - return (pgd_val(pgd) & ~PAGE_MASK) == _PGD_ENTRY; + return pgd_val(pgd) & _REGION_ENTRY_ORIGIN; } static inline int pgd_none(pgd_t pgd) { - return pgd_val(pgd) & _PGD_ENTRY_INV; + return pgd_val(pgd) & _REGION_ENTRY_INV; } static inline int pgd_bad(pgd_t pgd) { - return (pgd_val(pgd) & (~PAGE_MASK & ~_PGD_ENTRY_INV)) != _PGD_ENTRY; + unsigned long mask = ~_REGION_ENTRY_ORIGIN & ~_REGION_ENTRY_INV; + return (pgd_val(pgd) & mask) != _REGION3_ENTRY; } +#endif /* __s390x__ */ + static inline int pmd_present(pmd_t pmd) { - return (pmd_val(pmd) & ~PAGE_MASK) == _PMD_ENTRY; + return pmd_val(pmd) & _SEGMENT_ENTRY_ORIGIN; } static inline int pmd_none(pmd_t pmd) { - return pmd_val(pmd) & _PMD_ENTRY_INV; + return pmd_val(pmd) & _SEGMENT_ENTRY_INV; } static inline int pmd_bad(pmd_t pmd) { - return (pmd_val(pmd) & (~PAGE_MASK & ~_PMD_ENTRY_INV)) != _PMD_ENTRY; + unsigned long mask = ~_SEGMENT_ENTRY_ORIGIN & ~_SEGMENT_ENTRY_INV; + return (pmd_val(pmd) & mask) != _SEGMENT_ENTRY; } -#endif /* __s390x__ */ - static inline int pte_none(pte_t pte) { return (pte_val(pte) & _PAGE_INVALID) && !(pte_val(pte) & _PAGE_SWT); @@ -548,31 +530,22 @@ static inline void pgd_clear(pgd_t * pgdp) { } static inline void pmd_clear_kernel(pmd_t * pmdp) { - pmd_val(pmdp[0]) = _PAGE_TABLE_INV; - pmd_val(pmdp[1]) = _PAGE_TABLE_INV; - pmd_val(pmdp[2]) = _PAGE_TABLE_INV; - pmd_val(pmdp[3]) = _PAGE_TABLE_INV; -} - -static inline void pmd_clear(pmd_t * pmdp) -{ - pmd_t *shadow_pmd = get_shadow_pmd(pmdp); - - pmd_clear_kernel(pmdp); - if (shadow_pmd) - pmd_clear_kernel(shadow_pmd); + pmd_val(pmdp[0]) = _SEGMENT_ENTRY_EMPTY; + pmd_val(pmdp[1]) = _SEGMENT_ENTRY_EMPTY; + pmd_val(pmdp[2]) = _SEGMENT_ENTRY_EMPTY; + pmd_val(pmdp[3]) = _SEGMENT_ENTRY_EMPTY; } #else /* __s390x__ */ static inline void pgd_clear_kernel(pgd_t * pgdp) { - pgd_val(*pgdp) = _PGD_ENTRY_INV | _PGD_ENTRY; + pgd_val(*pgdp) = _REGION3_ENTRY_EMPTY; } static inline void pgd_clear(pgd_t * pgdp) { - pgd_t *shadow_pgd = get_shadow_pgd(pgdp); + pgd_t *shadow_pgd = get_shadow_table(pgdp); pgd_clear_kernel(pgdp); if (shadow_pgd) @@ -581,21 +554,21 @@ static inline void pgd_clear(pgd_t * pgdp) static inline void pmd_clear_kernel(pmd_t * pmdp) { - pmd_val(*pmdp) = _PMD_ENTRY_INV | _PMD_ENTRY; - pmd_val1(*pmdp) = _PMD_ENTRY_INV | _PMD_ENTRY; + pmd_val(*pmdp) = _SEGMENT_ENTRY_EMPTY; + pmd_val1(*pmdp) = _SEGMENT_ENTRY_EMPTY; } +#endif /* __s390x__ */ + static inline void pmd_clear(pmd_t * pmdp) { - pmd_t *shadow_pmd = get_shadow_pmd(pmdp); + pmd_t *shadow_pmd = get_shadow_table(pmdp); pmd_clear_kernel(pmdp); if (shadow_pmd) pmd_clear_kernel(shadow_pmd); } -#endif /* __s390x__ */ - static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { pte_t *shadow_pte = get_shadow_pte(ptep); diff --git a/include/asm-s390/processor.h b/include/asm-s390/processor.h index 81efccc..21d40a1 100644 --- a/include/asm-s390/processor.h +++ b/include/asm-s390/processor.h @@ -127,12 +127,6 @@ struct stack_frame { #define ARCH_MIN_TASKALIGN 8 -#ifndef __s390x__ -# define __SWAPPER_PG_DIR __pa(&swapper_pg_dir[0]) + _SEGMENT_TABLE -#else /* __s390x__ */ -# define __SWAPPER_PG_DIR __pa(&swapper_pg_dir[0]) + _REGION_TABLE -#endif /* __s390x__ */ - #define INIT_THREAD { \ .ksp = sizeof(init_stack) + (unsigned long) &init_stack, \ } diff --git a/include/asm-s390/tlbflush.h b/include/asm-s390/tlbflush.h index 3a9985f..a69bd24 100644 --- a/include/asm-s390/tlbflush.h +++ b/include/asm-s390/tlbflush.h @@ -61,7 +61,7 @@ static inline void __tlb_flush_mm(struct mm_struct * mm) * only ran on the local cpu. */ if (MACHINE_HAS_IDTE) { - pgd_t *shadow_pgd = get_shadow_pgd(mm->pgd); + pgd_t *shadow_pgd = get_shadow_table(mm->pgd); if (shadow_pgd) __tlb_flush_idte(shadow_pgd); -- cgit v0.10.2 From 190a1d722a59725706daf832bc8a511ed62f249d Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Mon, 22 Oct 2007 12:52:48 +0200 Subject: [S390] 4level-fixup cleanup Get independent from asm-generic/4level-fixup.h Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/lib/uaccess_pt.c b/arch/s390/lib/uaccess_pt.c index dc37ea8..7e8efaa 100644 --- a/arch/s390/lib/uaccess_pt.c +++ b/arch/s390/lib/uaccess_pt.c @@ -18,13 +18,18 @@ static inline pte_t *follow_table(struct mm_struct *mm, unsigned long addr) { pgd_t *pgd; + pud_t *pud; pmd_t *pmd; pgd = pgd_offset(mm, addr); if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd))) return NULL; - pmd = pmd_offset(pgd, addr); + pud = pud_offset(pgd, addr); + if (pud_none(*pud) || unlikely(pud_bad(*pud))) + return NULL; + + pmd = pmd_offset(pud, addr); if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd))) return NULL; diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 90ec058..b234bb4 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -81,6 +81,7 @@ void show_mem(void) static void __init setup_ro_region(void) { pgd_t *pgd; + pud_t *pud; pmd_t *pmd; pte_t *pte; pte_t new_pte; @@ -91,7 +92,8 @@ static void __init setup_ro_region(void) for (; address < end; address += PAGE_SIZE) { pgd = pgd_offset_k(address); - pmd = pmd_offset(pgd, address); + pud = pud_offset(pgd, address); + pmd = pmd_offset(pud, address); pte = pte_offset_kernel(pmd, address); new_pte = mk_pte_phys(address, __pgprot(_PAGE_RO)); *pte = new_pte; diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index 1bd51d8..fb9c5a8 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -73,6 +73,8 @@ static void __init_refok *vmem_alloc_pages(unsigned int order) return alloc_bootmem_pages((1 << order) * PAGE_SIZE); } +#define vmem_pud_alloc() ({ BUG(); ((pud_t *) NULL); }) + static inline pmd_t *vmem_pmd_alloc(void) { pmd_t *pmd = NULL; @@ -103,6 +105,7 @@ static int vmem_add_range(unsigned long start, unsigned long size) { unsigned long address; pgd_t *pg_dir; + pud_t *pu_dir; pmd_t *pm_dir; pte_t *pt_dir; pte_t pte; @@ -111,13 +114,21 @@ static int vmem_add_range(unsigned long start, unsigned long size) for (address = start; address < start + size; address += PAGE_SIZE) { pg_dir = pgd_offset_k(address); if (pgd_none(*pg_dir)) { + pu_dir = vmem_pud_alloc(); + if (!pu_dir) + goto out; + pgd_populate_kernel(&init_mm, pg_dir, pu_dir); + } + + pu_dir = pud_offset(pg_dir, address); + if (pud_none(*pu_dir)) { pm_dir = vmem_pmd_alloc(); if (!pm_dir) goto out; - pgd_populate_kernel(&init_mm, pg_dir, pm_dir); + pud_populate_kernel(&init_mm, pu_dir, pm_dir); } - pm_dir = pmd_offset(pg_dir, address); + pm_dir = pmd_offset(pu_dir, address); if (pmd_none(*pm_dir)) { pt_dir = vmem_pte_alloc(); if (!pt_dir) @@ -143,6 +154,7 @@ static void vmem_remove_range(unsigned long start, unsigned long size) { unsigned long address; pgd_t *pg_dir; + pud_t *pu_dir; pmd_t *pm_dir; pte_t *pt_dir; pte_t pte; @@ -150,9 +162,10 @@ static void vmem_remove_range(unsigned long start, unsigned long size) pte_val(pte) = _PAGE_TYPE_EMPTY; for (address = start; address < start + size; address += PAGE_SIZE) { pg_dir = pgd_offset_k(address); - if (pgd_none(*pg_dir)) + pu_dir = pud_offset(pg_dir, address); + if (pud_none(*pu_dir)) continue; - pm_dir = pmd_offset(pg_dir, address); + pm_dir = pmd_offset(pu_dir, address); if (pmd_none(*pm_dir)) continue; pt_dir = pte_offset_kernel(pm_dir, address); @@ -169,6 +182,7 @@ static int vmem_add_mem_map(unsigned long start, unsigned long size) unsigned long address, start_addr, end_addr; struct page *map_start, *map_end; pgd_t *pg_dir; + pud_t *pu_dir; pmd_t *pm_dir; pte_t *pt_dir; pte_t pte; @@ -183,13 +197,21 @@ static int vmem_add_mem_map(unsigned long start, unsigned long size) for (address = start_addr; address < end_addr; address += PAGE_SIZE) { pg_dir = pgd_offset_k(address); if (pgd_none(*pg_dir)) { + pu_dir = vmem_pud_alloc(); + if (!pu_dir) + goto out; + pgd_populate_kernel(&init_mm, pg_dir, pu_dir); + } + + pu_dir = pud_offset(pg_dir, address); + if (pud_none(*pu_dir)) { pm_dir = vmem_pmd_alloc(); if (!pm_dir) goto out; - pgd_populate_kernel(&init_mm, pg_dir, pm_dir); + pud_populate_kernel(&init_mm, pu_dir, pm_dir); } - pm_dir = pmd_offset(pg_dir, address); + pm_dir = pmd_offset(pu_dir, address); if (pmd_none(*pm_dir)) { pt_dir = vmem_pte_alloc(); if (!pt_dir) diff --git a/include/asm-s390/page.h b/include/asm-s390/page.h index ceec382..584d0ee 100644 --- a/include/asm-s390/page.h +++ b/include/asm-s390/page.h @@ -82,6 +82,7 @@ typedef struct { unsigned long pte; } pte_t; #ifndef __s390x__ typedef struct { unsigned long pmd; } pmd_t; +typedef struct { unsigned long pud; } pud_t; typedef struct { unsigned long pgd0; unsigned long pgd1; @@ -90,6 +91,7 @@ typedef struct { } pgd_t; #define pmd_val(x) ((x).pmd) +#define pud_val(x) ((x).pud) #define pgd_val(x) ((x).pgd0) #else /* __s390x__ */ @@ -98,10 +100,12 @@ typedef struct { unsigned long pmd0; unsigned long pmd1; } pmd_t; +typedef struct { unsigned long pud; } pud_t; typedef struct { unsigned long pgd; } pgd_t; #define pmd_val(x) ((x).pmd0) #define pmd_val1(x) ((x).pmd1) +#define pud_val(x) ((x).pud) #define pgd_val(x) ((x).pgd) #endif /* __s390x__ */ diff --git a/include/asm-s390/pgalloc.h b/include/asm-s390/pgalloc.h index 229b0bd..709dd17 100644 --- a/include/asm-s390/pgalloc.h +++ b/include/asm-s390/pgalloc.h @@ -56,11 +56,17 @@ static inline unsigned long pgd_entry_type(struct mm_struct *mm) return _SEGMENT_ENTRY_EMPTY; } +#define pud_alloc_one(mm,address) ({ BUG(); ((pud_t *)2); }) +#define pud_free(x) do { } while (0) + #define pmd_alloc_one(mm,address) ({ BUG(); ((pmd_t *)2); }) #define pmd_free(x) do { } while (0) -#define pgd_populate(mm, pmd, pte) BUG() -#define pgd_populate_kernel(mm, pmd, pte) BUG() +#define pgd_populate(mm, pgd, pud) BUG() +#define pgd_populate_kernel(mm, pgd, pud) BUG() + +#define pud_populate(mm, pud, pmd) BUG() +#define pud_populate_kernel(mm, pud, pmd) BUG() #else /* __s390x__ */ @@ -69,6 +75,9 @@ static inline unsigned long pgd_entry_type(struct mm_struct *mm) return _REGION3_ENTRY_EMPTY; } +#define pud_alloc_one(mm,address) ({ BUG(); ((pud_t *)2); }) +#define pud_free(x) do { } while (0) + static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long vmaddr) { unsigned long *crst = crst_table_alloc(mm, s390_noexec); @@ -78,20 +87,23 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long vmaddr) } #define pmd_free(pmd) crst_table_free((unsigned long *) pmd) -static inline void pgd_populate_kernel(struct mm_struct *mm, - pgd_t *pgd, pmd_t *pmd) +#define pgd_populate(mm, pgd, pud) BUG() +#define pgd_populate_kernel(mm, pgd, pud) BUG() + +static inline void pud_populate_kernel(struct mm_struct *mm, + pud_t *pud, pmd_t *pmd) { - pgd_val(*pgd) = _REGION3_ENTRY | __pa(pmd); + pud_val(*pud) = _REGION3_ENTRY | __pa(pmd); } -static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmd) +static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) { - pgd_t *shadow_pgd = get_shadow_table(pgd); + pud_t *shadow_pud = get_shadow_table(pud); pmd_t *shadow_pmd = get_shadow_table(pmd); - if (shadow_pgd && shadow_pmd) - pgd_populate_kernel(mm, shadow_pgd, shadow_pmd); - pgd_populate_kernel(mm, pgd, pmd); + if (shadow_pud && shadow_pmd) + pud_populate_kernel(mm, shadow_pud, shadow_pmd); + pud_populate_kernel(mm, pud, pmd); } #endif /* __s390x__ */ diff --git a/include/asm-s390/pgtable.h b/include/asm-s390/pgtable.h index f9f59a8..f2cc25b 100644 --- a/include/asm-s390/pgtable.h +++ b/include/asm-s390/pgtable.h @@ -13,8 +13,6 @@ #ifndef _ASM_S390_PGTABLE_H #define _ASM_S390_PGTABLE_H -#include - /* * The Linux memory management assumes a three-level page table setup. For * s390 31 bit we "fold" the mid level into the top-level page table, so @@ -60,14 +58,18 @@ extern char empty_zero_page[PAGE_SIZE]; */ #ifndef __s390x__ # define PMD_SHIFT 22 +# define PUD_SHIFT 22 # define PGDIR_SHIFT 22 #else /* __s390x__ */ # define PMD_SHIFT 21 +# define PUD_SHIFT 31 # define PGDIR_SHIFT 31 #endif /* __s390x__ */ #define PMD_SIZE (1UL << PMD_SHIFT) #define PMD_MASK (~(PMD_SIZE-1)) +#define PUD_SIZE (1UL << PUD_SHIFT) +#define PUD_MASK (~(PUD_SIZE-1)) #define PGDIR_SIZE (1UL << PGDIR_SHIFT) #define PGDIR_MASK (~(PGDIR_SIZE-1)) @@ -80,10 +82,12 @@ extern char empty_zero_page[PAGE_SIZE]; #ifndef __s390x__ # define PTRS_PER_PTE 1024 # define PTRS_PER_PMD 1 +# define PTRS_PER_PUD 1 # define PTRS_PER_PGD 512 #else /* __s390x__ */ # define PTRS_PER_PTE 512 # define PTRS_PER_PMD 1024 +# define PTRS_PER_PUD 1 # define PTRS_PER_PGD 2048 #endif /* __s390x__ */ @@ -93,6 +97,8 @@ extern char empty_zero_page[PAGE_SIZE]; printk("%s:%d: bad pte %p.\n", __FILE__, __LINE__, (void *) pte_val(e)) #define pmd_ERROR(e) \ printk("%s:%d: bad pmd %p.\n", __FILE__, __LINE__, (void *) pmd_val(e)) +#define pud_ERROR(e) \ + printk("%s:%d: bad pud %p.\n", __FILE__, __LINE__, (void *) pud_val(e)) #define pgd_ERROR(e) \ printk("%s:%d: bad pgd %p.\n", __FILE__, __LINE__, (void *) pgd_val(e)) @@ -192,7 +198,7 @@ extern unsigned long vmalloc_end; * I Segment-Invalid Bit: Segment is not available for address-translation * TT Type 01 * TF - * TL Table lenght + * TL Table length * * The 64 bit regiontable origin of S390 has following format: * | region table origon | DTTL @@ -435,22 +441,30 @@ static inline int pgd_present(pgd_t pgd) { return 1; } static inline int pgd_none(pgd_t pgd) { return 0; } static inline int pgd_bad(pgd_t pgd) { return 0; } +static inline int pud_present(pud_t pud) { return 1; } +static inline int pud_none(pud_t pud) { return 0; } +static inline int pud_bad(pud_t pud) { return 0; } + #else /* __s390x__ */ -static inline int pgd_present(pgd_t pgd) +static inline int pgd_present(pgd_t pgd) { return 1; } +static inline int pgd_none(pgd_t pgd) { return 0; } +static inline int pgd_bad(pgd_t pgd) { return 0; } + +static inline int pud_present(pud_t pud) { - return pgd_val(pgd) & _REGION_ENTRY_ORIGIN; + return pud_val(pud) & _REGION_ENTRY_ORIGIN; } -static inline int pgd_none(pgd_t pgd) +static inline int pud_none(pud_t pud) { - return pgd_val(pgd) & _REGION_ENTRY_INV; + return pud_val(pud) & _REGION_ENTRY_INV; } -static inline int pgd_bad(pgd_t pgd) +static inline int pud_bad(pud_t pud) { unsigned long mask = ~_REGION_ENTRY_ORIGIN & ~_REGION_ENTRY_INV; - return (pgd_val(pgd) & mask) != _REGION3_ENTRY; + return (pud_val(pud) & mask) != _REGION3_ENTRY; } #endif /* __s390x__ */ @@ -526,7 +540,8 @@ static inline int pte_young(pte_t pte) #ifndef __s390x__ -static inline void pgd_clear(pgd_t * pgdp) { } +#define pgd_clear(pgd) do { } while (0) +#define pud_clear(pud) do { } while (0) static inline void pmd_clear_kernel(pmd_t * pmdp) { @@ -538,18 +553,20 @@ static inline void pmd_clear_kernel(pmd_t * pmdp) #else /* __s390x__ */ -static inline void pgd_clear_kernel(pgd_t * pgdp) +#define pgd_clear(pgd) do { } while (0) + +static inline void pud_clear_kernel(pud_t *pud) { - pgd_val(*pgdp) = _REGION3_ENTRY_EMPTY; + pud_val(*pud) = _REGION3_ENTRY_EMPTY; } -static inline void pgd_clear(pgd_t * pgdp) +static inline void pud_clear(pud_t * pud) { - pgd_t *shadow_pgd = get_shadow_table(pgdp); + pud_t *shadow = get_shadow_table(pud); - pgd_clear_kernel(pgdp); - if (shadow_pgd) - pgd_clear_kernel(shadow_pgd); + pud_clear_kernel(pud); + if (shadow) + pud_clear_kernel(shadow); } static inline void pmd_clear_kernel(pmd_t * pmdp) @@ -810,63 +827,48 @@ static inline pte_t mk_pte(struct page *page, pgprot_t pgprot) return mk_pte_phys(physpage, pgprot); } -static inline pte_t pfn_pte(unsigned long pfn, pgprot_t pgprot) -{ - unsigned long physpage = __pa((pfn) << PAGE_SHIFT); - - return mk_pte_phys(physpage, pgprot); -} - -#ifdef __s390x__ - -static inline pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot) -{ - unsigned long physpage = __pa((pfn) << PAGE_SHIFT); - - return __pmd(physpage + pgprot_val(pgprot)); -} - -#endif /* __s390x__ */ - -#define pte_pfn(x) (pte_val(x) >> PAGE_SHIFT) -#define pte_page(x) pfn_to_page(pte_pfn(x)) +#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1)) +#define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1)) +#define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1)) +#define pte_index(address) (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE-1)) -#define pmd_page_vaddr(pmd) (pmd_val(pmd) & PAGE_MASK) +#define pgd_offset(mm, address) ((mm)->pgd + pgd_index(address)) +#define pgd_offset_k(address) pgd_offset(&init_mm, address) -#define pmd_page(pmd) pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT) +#ifndef __s390x__ -#define pgd_page_vaddr(pgd) (pgd_val(pgd) & PAGE_MASK) +#define pmd_deref(pmd) (pmd_val(pmd) & _SEGMENT_ENTRY_ORIGIN) +#define pud_deref(pmd) ({ BUG(); 0UL; }) +#define pgd_deref(pmd) ({ BUG(); 0UL; }) -#define pgd_page(pgd) pfn_to_page(pgd_val(pgd) >> PAGE_SHIFT) +#define pud_offset(pgd, address) ((pud_t *) pgd) +#define pmd_offset(pud, address) ((pmd_t *) pud + pmd_index(address)) -/* to find an entry in a page-table-directory */ -#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1)) -#define pgd_offset(mm, address) ((mm)->pgd+pgd_index(address)) +#else /* __s390x__ */ -/* to find an entry in a kernel page-table-directory */ -#define pgd_offset_k(address) pgd_offset(&init_mm, address) +#define pmd_deref(pmd) (pmd_val(pmd) & _SEGMENT_ENTRY_ORIGIN) +#define pud_deref(pud) (pud_val(pud) & _REGION_ENTRY_ORIGIN) +#define pgd_deref(pgd) ({ BUG(); 0UL; }) -#ifndef __s390x__ +#define pud_offset(pgd, address) ((pud_t *) pgd) -/* Find an entry in the second-level page table.. */ -static inline pmd_t * pmd_offset(pgd_t * dir, unsigned long address) +static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address) { - return (pmd_t *) dir; + pmd_t *pmd = (pmd_t *) pud_deref(*pud); + return pmd + pmd_index(address); } -#else /* __s390x__ */ +#endif /* __s390x__ */ -/* Find an entry in the second-level page table.. */ -#define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1)) -#define pmd_offset(dir,addr) \ - ((pmd_t *) pgd_page_vaddr(*(dir)) + pmd_index(addr)) +#define pfn_pte(pfn,pgprot) mk_pte_phys(__pa((pfn) << PAGE_SHIFT),(pgprot)) +#define pte_pfn(x) (pte_val(x) >> PAGE_SHIFT) +#define pte_page(x) pfn_to_page(pte_pfn(x)) -#endif /* __s390x__ */ +#define pmd_page(pmd) pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT) -/* Find an entry in the third-level page table.. */ -#define pte_index(address) (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE-1)) -#define pte_offset_kernel(pmd, address) \ - ((pte_t *) pmd_page_vaddr(*(pmd)) + pte_index(address)) +/* Find an entry in the lowest level page table.. */ +#define pte_offset(pmd, addr) ((pte_t *) pmd_deref(*(pmd)) + pte_index(addr)) +#define pte_offset_kernel(pmd, address) pte_offset(pmd,address) #define pte_offset_map(pmd, address) pte_offset_kernel(pmd, address) #define pte_offset_map_nested(pmd, address) pte_offset_kernel(pmd, address) #define pte_unmap(pte) do { } while (0) @@ -959,4 +961,3 @@ extern void memmap_init(unsigned long, int, unsigned long, unsigned long); #include #endif /* _S390_PAGE_H */ - diff --git a/include/asm-s390/tlb.h b/include/asm-s390/tlb.h index 55ae45e..618693c 100644 --- a/include/asm-s390/tlb.h +++ b/include/asm-s390/tlb.h @@ -121,6 +121,8 @@ static inline void pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd) #endif } +#define pud_free_tlb(tlb, pud) do { } while (0) + #define tlb_start_vma(tlb, vma) do { } while (0) #define tlb_end_vma(tlb, vma) do { } while (0) #define tlb_remove_tlb_entry(tlb, ptep, addr) do { } while (0) -- cgit v0.10.2