From 06e44840368efdb359224b5d04db6c92f73bd373 Mon Sep 17 00:00:00 2001 From: Jesper Nilsson Date: Fri, 1 Apr 2011 17:13:17 +0200 Subject: CRISv32: Correct name of read_mostly section. 54cb27a71f51d304342c79e62fd7667f2171062b renamed .data.read_mostly to .data..read_mostly for all architectures for 2.6.33. Reported-by: Ralf Baechle Signed-off-by: Jesper Nilsson diff --git a/arch/cris/include/arch-v32/arch/cache.h b/arch/cris/include/arch-v32/arch/cache.h index 1de779f..7caf25d 100644 --- a/arch/cris/include/arch-v32/arch/cache.h +++ b/arch/cris/include/arch-v32/arch/cache.h @@ -7,7 +7,7 @@ #define L1_CACHE_BYTES 32 #define L1_CACHE_SHIFT 5 -#define __read_mostly __attribute__((__section__(".data.read_mostly"))) +#define __read_mostly __attribute__((__section__(".data..read_mostly"))) void flush_dma_list(dma_descr_data *descr); void flush_dma_descr(dma_descr_data *descr, int flush_buf); -- cgit v0.10.2 From 4bade0481cc378eee8f1f9412dc10dcc1b401f23 Mon Sep 17 00:00:00 2001 From: Wanlong Gao Date: Sun, 10 Apr 2011 16:10:25 +0200 Subject: cris:removed the unused variable removed the unused variable "unsigned long tmp". Signed-off-by: Wanlong Gao Signed-off-by: Jesper Nilsson diff --git a/arch/cris/arch-v32/kernel/ptrace.c b/arch/cris/arch-v32/kernel/ptrace.c index f7ad9e8..f085229 100644 --- a/arch/cris/arch-v32/kernel/ptrace.c +++ b/arch/cris/arch-v32/kernel/ptrace.c @@ -114,8 +114,6 @@ void user_disable_single_step(struct task_struct *child) void ptrace_disable(struct task_struct *child) { - unsigned long tmp; - /* Deconfigure SPC and S-bit. */ user_disable_single_step(child); put_reg(child, PT_SPC, 0); -- cgit v0.10.2 From 17b8c8c00b5d7c0d087db3c847aff16a91c714b1 Mon Sep 17 00:00:00 2001 From: Wanlong Gao Date: Sun, 10 Apr 2011 16:10:27 +0200 Subject: net:removed the unused variable removed the unused variable "np" of eth_v10.c. Signed-off-by: Wanlong Gao Signed-off-by: Jesper Nilsson diff --git a/drivers/net/cris/eth_v10.c b/drivers/net/cris/eth_v10.c index ec03b40..9c755db 100644 --- a/drivers/net/cris/eth_v10.c +++ b/drivers/net/cris/eth_v10.c @@ -1131,7 +1131,6 @@ static irqreturn_t e100rxtx_interrupt(int irq, void *dev_id) { struct net_device *dev = (struct net_device *)dev_id; - struct net_local *np = netdev_priv(dev); unsigned long irqbits; /* -- cgit v0.10.2 From a52bdec35b9431f1b6939b35c31e04b2fd57e2f2 Mon Sep 17 00:00:00 2001 From: Jesper Nilsson Date: Tue, 12 Apr 2011 10:50:19 +0200 Subject: cris/arch-v32: cryptocop: Use kzalloc This avoids unnecessary explicit initialization by allocating zeroed memory. Signed-off-by: Maxin B. John Signed-off-by: Jesper Nilsson diff --git a/arch/cris/arch-v32/drivers/cryptocop.c b/arch/cris/arch-v32/drivers/cryptocop.c index 642c6fe..f8476d9 100644 --- a/arch/cris/arch-v32/drivers/cryptocop.c +++ b/arch/cris/arch-v32/drivers/cryptocop.c @@ -1394,11 +1394,10 @@ static int create_md5_pad(int alloc_flag, unsigned long long hashed_length, char if (padlen < MD5_MIN_PAD_LENGTH) padlen += MD5_BLOCK_LENGTH; - p = kmalloc(padlen, alloc_flag); + p = kzalloc(padlen, alloc_flag); if (!p) return -ENOMEM; *p = 0x80; - memset(p+1, 0, padlen - 1); DEBUG(printk("create_md5_pad: hashed_length=%lld bits == %lld bytes\n", bit_length, hashed_length)); @@ -1426,11 +1425,10 @@ static int create_sha1_pad(int alloc_flag, unsigned long long hashed_length, cha if (padlen < SHA1_MIN_PAD_LENGTH) padlen += SHA1_BLOCK_LENGTH; - p = kmalloc(padlen, alloc_flag); + p = kzalloc(padlen, alloc_flag); if (!p) return -ENOMEM; *p = 0x80; - memset(p+1, 0, padlen - 1); DEBUG(printk("create_sha1_pad: hashed_length=%lld bits == %lld bytes\n", bit_length, hashed_length)); -- cgit v0.10.2 From 14c70c3de28747d6c4e7e4877bc4a5a2fdbe6f88 Mon Sep 17 00:00:00 2001 From: Jesper Nilsson Date: Tue, 12 Apr 2011 12:05:19 +0200 Subject: CRIS: Update documentation Change some broken links, remove obsolete information and add information about the CRISv32 architecture. Signed-off-by: Jesper Nilsson diff --git a/Documentation/cris/README b/Documentation/cris/README index d9b0868..8dbdb1a 100644 --- a/Documentation/cris/README +++ b/Documentation/cris/README @@ -1,38 +1,34 @@ -Linux 2.4 on the CRIS architecture -================================== -$Id: README,v 1.7 2001/04/19 12:38:32 bjornw Exp $ +Linux on the CRIS architecture +============================== -This is a port of Linux 2.4 to Axis Communications ETRAX 100LX embedded -network CPU. For more information about CRIS and ETRAX please see further -below. +This is a port of Linux to Axis Communications ETRAX 100LX, +ETRAX FS and ARTPEC-3 embedded network CPUs. + +For more information about CRIS and ETRAX please see further below. In order to compile this you need a version of gcc with support for the -ETRAX chip family. Please see this link for more information on how to +ETRAX chip family. Please see this link for more information on how to download the compiler and other tools useful when building and booting software for the ETRAX platform: -http://developer.axis.com/doc/software/devboard_lx/install-howto.html - - +http://developer.axis.com/wiki/doku.php?id=axis:install-howto-2_20 What is CRIS ? -------------- CRIS is an acronym for 'Code Reduced Instruction Set'. It is the CPU architecture in Axis Communication AB's range of embedded network CPU's, -called ETRAX. The latest CPU is called ETRAX 100LX, where LX stands for -'Linux' because the chip was designed to be a good host for the Linux -operating system. +called ETRAX. The ETRAX 100LX chip -------------------- -For reference, please see the press-release: +For reference, please see the following link: -http://www.axis.com/news/us/001101_etrax.htm +http://www.axis.com/products/dev_etrax_100lx/index.htm -The ETRAX 100LX is a 100 MIPS processor with 8kB cache, MMU, and a very broad -range of built-in interfaces, all with modern scatter/gather DMA. +The ETRAX 100LX is a 100 MIPS processor with 8kB cache, MMU, and a very broad +range of built-in interfaces, all with modern scatter/gather DMA. Memory interfaces: @@ -51,20 +47,28 @@ I/O interfaces: * SCSI * two parallel-ports * two generic 8-bit ports - - (not all interfaces are available at the same time due to chip pin + + (not all interfaces are available at the same time due to chip pin multiplexing) -The previous version of the ETRAX, the ETRAX 100, sits in almost all of -Axis shipping thin-servers like the Axis 2100 web camera or the ETRAX 100 -developer-board. It lacks an MMU so the Linux we run on that is a version -of uClinux (Linux 2.0 without MM-support) ported to the CRIS architecture. -The new Linux 2.4 port has full MM and needs a CPU with an MMU, so it will -not run on the ETRAX 100. +ETRAX 100LX is CRISv10 architecture. + + +The ETRAX FS and ARTPEC-3 chips +------------------------------- -A version of the Axis developer-board with ETRAX 100LX (running Linux -2.4) is now available. For more information please see developer.axis.com. +The ETRAX FS is a 200MHz 32-bit RISC processor with on-chip 16kB +I-cache and 16kB D-cache and with a wide range of device interfaces +including multiple high speed serial ports and an integrated USB 1.1 PHY. +The ARTPEC-3 is a variant of the ETRAX FS with additional IO-units +used by the Axis Communications network cameras. + +See below link for more information: + +http://www.axis.com/products/dev_etrax_fs/index.htm + +ETRAX FS and ARTPEC-3 are both CRISv32 architectures. Bootlog ------- @@ -182,10 +186,6 @@ SwapFree: 0 kB -rwxr-xr-x 1 342 100 16252 Jan 01 00:00 telnetd -(All programs are statically linked to the libc at this point - we have not ported the - shared libraries yet) - - -- cgit v0.10.2 From 74f077d2a7651409c44bb323471f219a4b0d2aab Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Mon, 2 Apr 2012 13:40:17 +0200 Subject: cris: posix_types.h, include asm-generic/posix_types.h Without that I cannot build anything: In file included from include/linux/page-flags.h:8:0, from kernel/bounds.c:9: include/linux/types.h:25:1: error: unknown type name '__kernel_ino_t' include/linux/types.h:29:1: error: unknown type name '__kernel_off_t' ... Signed-off-by: Jiri Slaby Cc: Mikael Starvik Signed-off-by: Jesper Nilsson Cc: linux-cris-kernel@axis.com diff --git a/arch/cris/include/asm/posix_types.h b/arch/cris/include/asm/posix_types.h index 72b3cd6..234891c 100644 --- a/arch/cris/include/asm/posix_types.h +++ b/arch/cris/include/asm/posix_types.h @@ -33,4 +33,6 @@ typedef int __kernel_ptrdiff_t; typedef unsigned short __kernel_old_dev_t; #define __kernel_old_dev_t __kernel_old_dev_t +#include + #endif /* __ARCH_CRIS_POSIX_TYPES_H */ -- cgit v0.10.2 From 473e162eea465e60578edb93341752e7f1c1dacc Mon Sep 17 00:00:00 2001 From: Jesper Nilsson Date: Mon, 24 Oct 2011 11:19:25 +0200 Subject: CRIS: Add _sdata to vmlinux.lds.S Fixes link error: LD vmlinux kernel/built-in.o: In function `core_kernel_data': (.text+0x13e44): undefined reference to `_sdata' Signed-off-by: Jesper Nilsson diff --git a/arch/cris/kernel/vmlinux.lds.S b/arch/cris/kernel/vmlinux.lds.S index a6990cb..a68b983 100644 --- a/arch/cris/kernel/vmlinux.lds.S +++ b/arch/cris/kernel/vmlinux.lds.S @@ -52,6 +52,7 @@ SECTIONS EXCEPTION_TABLE(4) + _sdata = .; RODATA . = ALIGN (4); -- cgit v0.10.2 From dfb73a071f0259ba81ceea7b7f312f63dd18c73c Mon Sep 17 00:00:00 2001 From: Wanlong Gao Date: Wed, 3 Aug 2011 04:43:21 +0200 Subject: cris:fix the wrong function declear Fix wrong declear of write_stack_register. Signed-off-by: Wanlong Gao Signed-off-by: Jesper Nilsson diff --git a/arch/cris/arch-v10/kernel/kgdb.c b/arch/cris/arch-v10/kernel/kgdb.c index b579dd0..37e6d2c 100644 --- a/arch/cris/arch-v10/kernel/kgdb.c +++ b/arch/cris/arch-v10/kernel/kgdb.c @@ -264,7 +264,7 @@ static int write_register (int regno, char *val); /* Write a value to a specified register in the stack of a thread other than the current thread. */ -static write_stack_register (int thread_id, int regno, char *valptr); +static int write_stack_register(int thread_id, int regno, char *valptr); /* Read a value from a specified register in the register image. Returns the status of the read operation. The register value is returned in valptr. */ -- cgit v0.10.2 From 4d5914d628360c607dc426ccb1acaf23909ac546 Mon Sep 17 00:00:00 2001 From: Kautuk Consul Date: Tue, 20 Mar 2012 14:24:05 +0100 Subject: cris/mm/fault.c: Port OOM changes to do_page_fault Commit d065bd810b6deb67d4897a14bfe21f8eb526ba99 (mm: retry page fault when blocking on disk transfer) and commit 37b23e0525d393d48a7d59f870b3bc061a30ccdb (x86,mm: make pagefault killable) The above commits introduced changes into the x86 pagefault handler for making the page fault handler retryable as well as killable. These changes reduce the mmap_sem hold time, which is crucial during OOM killer invocation. Port these changes to CRIS. Signed-off-by: Kautuk Consul Signed-off-by: Jesper Nilsson diff --git a/arch/cris/mm/fault.c b/arch/cris/mm/fault.c index b4760d8..45fd542 100644 --- a/arch/cris/mm/fault.c +++ b/arch/cris/mm/fault.c @@ -58,6 +58,8 @@ do_page_fault(unsigned long address, struct pt_regs *regs, struct vm_area_struct * vma; siginfo_t info; int fault; + unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE | + ((writeaccess & 1) ? FAULT_FLAG_WRITE : 0); D(printk(KERN_DEBUG "Page fault for %lX on %X at %lX, prot %d write %d\n", @@ -115,6 +117,7 @@ do_page_fault(unsigned long address, struct pt_regs *regs, if (in_atomic() || !mm) goto no_context; +retry: down_read(&mm->mmap_sem); vma = find_vma(mm, address); if (!vma) @@ -163,7 +166,11 @@ do_page_fault(unsigned long address, struct pt_regs *regs, * the fault. */ - fault = handle_mm_fault(mm, vma, address, (writeaccess & 1) ? FAULT_FLAG_WRITE : 0); + fault = handle_mm_fault(mm, vma, address, flags); + + if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) + return; + if (unlikely(fault & VM_FAULT_ERROR)) { if (fault & VM_FAULT_OOM) goto out_of_memory; @@ -171,10 +178,24 @@ do_page_fault(unsigned long address, struct pt_regs *regs, goto do_sigbus; BUG(); } - if (fault & VM_FAULT_MAJOR) - tsk->maj_flt++; - else - tsk->min_flt++; + + if (flags & FAULT_FLAG_ALLOW_RETRY) { + if (fault & VM_FAULT_MAJOR) + tsk->maj_flt++; + else + tsk->min_flt++; + if (fault & VM_FAULT_RETRY) { + flags &= ~FAULT_FLAG_ALLOW_RETRY; + + /* + * No need to up_read(&mm->mmap_sem) as we would + * have already released it in __lock_page_or_retry + * in mm/filemap.c. + */ + + goto retry; + } + } up_read(&mm->mmap_sem); return; -- cgit v0.10.2 From e1f80f57443838f5f420c774744c50c81c178e2c Mon Sep 17 00:00:00 2001 From: KyongHo Cho Date: Wed, 4 Apr 2012 09:23:02 -0700 Subject: S5P: SYSMMU: Remove System MMU device driver This patch removes System MMU device driver from arm/plat-s5p tree for transition to implement IOMMU driver. Although controlling System MMU in this removing driver is similar to the new IOMMU driver in the following patch, the new one is almost rewrite of this driver and there is no benefit to moving the driver file from arch/arm/ to drivers/iommu. Signed-off-by: KyongHo Cho Signed-off-by: Kukjin Kim diff --git a/arch/arm/plat-s5p/Kconfig b/arch/arm/plat-s5p/Kconfig index 96bea32..2c1193c 100644 --- a/arch/arm/plat-s5p/Kconfig +++ b/arch/arm/plat-s5p/Kconfig @@ -50,14 +50,6 @@ config S5P_PM Common code for power management support on S5P and newer SoCs Note: Do not select this for S5P6440 and S5P6450. -comment "System MMU" - -config S5P_SYSTEM_MMU - bool "S5P SYSTEM MMU" - depends on ARCH_EXYNOS4 - help - Say Y here if you want to enable System MMU - config S5P_SLEEP bool help diff --git a/arch/arm/plat-s5p/Makefile b/arch/arm/plat-s5p/Makefile index 4bd8241..4953d50 100644 --- a/arch/arm/plat-s5p/Makefile +++ b/arch/arm/plat-s5p/Makefile @@ -16,7 +16,6 @@ obj-y += clock.o obj-y += irq.o obj-$(CONFIG_S5P_EXT_INT) += irq-eint.o obj-$(CONFIG_S5P_GPIO_INT) += irq-gpioint.o -obj-$(CONFIG_S5P_SYSTEM_MMU) += sysmmu.o obj-$(CONFIG_S5P_PM) += pm.o irq-pm.o obj-$(CONFIG_S5P_SLEEP) += sleep.o obj-$(CONFIG_S5P_HRT) += s5p-time.o diff --git a/arch/arm/plat-s5p/sysmmu.c b/arch/arm/plat-s5p/sysmmu.c deleted file mode 100644 index c8bec9c..0000000 --- a/arch/arm/plat-s5p/sysmmu.c +++ /dev/null @@ -1,313 +0,0 @@ -/* linux/arch/arm/plat-s5p/sysmmu.c - * - * Copyright (c) 2010 Samsung Electronics Co., Ltd. - * http://www.samsung.com - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include -#include -#include -#include - -#include - -#include -#include -#include - -#define CTRL_ENABLE 0x5 -#define CTRL_BLOCK 0x7 -#define CTRL_DISABLE 0x0 - -static struct device *dev; - -static unsigned short fault_reg_offset[SYSMMU_FAULTS_NUM] = { - S5P_PAGE_FAULT_ADDR, - S5P_AR_FAULT_ADDR, - S5P_AW_FAULT_ADDR, - S5P_DEFAULT_SLAVE_ADDR, - S5P_AR_FAULT_ADDR, - S5P_AR_FAULT_ADDR, - S5P_AW_FAULT_ADDR, - S5P_AW_FAULT_ADDR -}; - -static char *sysmmu_fault_name[SYSMMU_FAULTS_NUM] = { - "PAGE FAULT", - "AR MULTI-HIT FAULT", - "AW MULTI-HIT FAULT", - "BUS ERROR", - "AR SECURITY PROTECTION FAULT", - "AR ACCESS PROTECTION FAULT", - "AW SECURITY PROTECTION FAULT", - "AW ACCESS PROTECTION FAULT" -}; - -static int (*fault_handlers[S5P_SYSMMU_TOTAL_IPNUM])( - enum S5P_SYSMMU_INTERRUPT_TYPE itype, - unsigned long pgtable_base, - unsigned long fault_addr); - -/* - * If adjacent 2 bits are true, the system MMU is enabled. - * The system MMU is disabled, otherwise. - */ -static unsigned long sysmmu_states; - -static inline void set_sysmmu_active(sysmmu_ips ips) -{ - sysmmu_states |= 3 << (ips * 2); -} - -static inline void set_sysmmu_inactive(sysmmu_ips ips) -{ - sysmmu_states &= ~(3 << (ips * 2)); -} - -static inline int is_sysmmu_active(sysmmu_ips ips) -{ - return sysmmu_states & (3 << (ips * 2)); -} - -static void __iomem *sysmmusfrs[S5P_SYSMMU_TOTAL_IPNUM]; - -static inline void sysmmu_block(sysmmu_ips ips) -{ - __raw_writel(CTRL_BLOCK, sysmmusfrs[ips] + S5P_MMU_CTRL); - dev_dbg(dev, "%s is blocked.\n", sysmmu_ips_name[ips]); -} - -static inline void sysmmu_unblock(sysmmu_ips ips) -{ - __raw_writel(CTRL_ENABLE, sysmmusfrs[ips] + S5P_MMU_CTRL); - dev_dbg(dev, "%s is unblocked.\n", sysmmu_ips_name[ips]); -} - -static inline void __sysmmu_tlb_invalidate(sysmmu_ips ips) -{ - __raw_writel(0x1, sysmmusfrs[ips] + S5P_MMU_FLUSH); - dev_dbg(dev, "TLB of %s is invalidated.\n", sysmmu_ips_name[ips]); -} - -static inline void __sysmmu_set_ptbase(sysmmu_ips ips, unsigned long pgd) -{ - if (unlikely(pgd == 0)) { - pgd = (unsigned long)ZERO_PAGE(0); - __raw_writel(0x20, sysmmusfrs[ips] + S5P_MMU_CFG); /* 4KB LV1 */ - } else { - __raw_writel(0x0, sysmmusfrs[ips] + S5P_MMU_CFG); /* 16KB LV1 */ - } - - __raw_writel(pgd, sysmmusfrs[ips] + S5P_PT_BASE_ADDR); - - dev_dbg(dev, "Page table base of %s is initialized with 0x%08lX.\n", - sysmmu_ips_name[ips], pgd); - __sysmmu_tlb_invalidate(ips); -} - -void sysmmu_set_fault_handler(sysmmu_ips ips, - int (*handler)(enum S5P_SYSMMU_INTERRUPT_TYPE itype, - unsigned long pgtable_base, - unsigned long fault_addr)) -{ - BUG_ON(!((ips >= SYSMMU_MDMA) && (ips < S5P_SYSMMU_TOTAL_IPNUM))); - fault_handlers[ips] = handler; -} - -static irqreturn_t s5p_sysmmu_irq(int irq, void *dev_id) -{ - /* SYSMMU is in blocked when interrupt occurred. */ - unsigned long base = 0; - sysmmu_ips ips = (sysmmu_ips)dev_id; - enum S5P_SYSMMU_INTERRUPT_TYPE itype; - - itype = (enum S5P_SYSMMU_INTERRUPT_TYPE) - __ffs(__raw_readl(sysmmusfrs[ips] + S5P_INT_STATUS)); - - BUG_ON(!((itype >= 0) && (itype < 8))); - - dev_alert(dev, "%s occurred by %s.\n", sysmmu_fault_name[itype], - sysmmu_ips_name[ips]); - - if (fault_handlers[ips]) { - unsigned long addr; - - base = __raw_readl(sysmmusfrs[ips] + S5P_PT_BASE_ADDR); - addr = __raw_readl(sysmmusfrs[ips] + fault_reg_offset[itype]); - - if (fault_handlers[ips](itype, base, addr)) { - __raw_writel(1 << itype, - sysmmusfrs[ips] + S5P_INT_CLEAR); - dev_notice(dev, "%s from %s is resolved." - " Retrying translation.\n", - sysmmu_fault_name[itype], sysmmu_ips_name[ips]); - } else { - base = 0; - } - } - - sysmmu_unblock(ips); - - if (!base) - dev_notice(dev, "%s from %s is not handled.\n", - sysmmu_fault_name[itype], sysmmu_ips_name[ips]); - - return IRQ_HANDLED; -} - -void s5p_sysmmu_set_tablebase_pgd(sysmmu_ips ips, unsigned long pgd) -{ - if (is_sysmmu_active(ips)) { - sysmmu_block(ips); - __sysmmu_set_ptbase(ips, pgd); - sysmmu_unblock(ips); - } else { - dev_dbg(dev, "%s is disabled. " - "Skipping initializing page table base.\n", - sysmmu_ips_name[ips]); - } -} - -void s5p_sysmmu_enable(sysmmu_ips ips, unsigned long pgd) -{ - if (!is_sysmmu_active(ips)) { - sysmmu_clk_enable(ips); - - __sysmmu_set_ptbase(ips, pgd); - - __raw_writel(CTRL_ENABLE, sysmmusfrs[ips] + S5P_MMU_CTRL); - - set_sysmmu_active(ips); - dev_dbg(dev, "%s is enabled.\n", sysmmu_ips_name[ips]); - } else { - dev_dbg(dev, "%s is already enabled.\n", sysmmu_ips_name[ips]); - } -} - -void s5p_sysmmu_disable(sysmmu_ips ips) -{ - if (is_sysmmu_active(ips)) { - __raw_writel(CTRL_DISABLE, sysmmusfrs[ips] + S5P_MMU_CTRL); - set_sysmmu_inactive(ips); - sysmmu_clk_disable(ips); - dev_dbg(dev, "%s is disabled.\n", sysmmu_ips_name[ips]); - } else { - dev_dbg(dev, "%s is already disabled.\n", sysmmu_ips_name[ips]); - } -} - -void s5p_sysmmu_tlb_invalidate(sysmmu_ips ips) -{ - if (is_sysmmu_active(ips)) { - sysmmu_block(ips); - __sysmmu_tlb_invalidate(ips); - sysmmu_unblock(ips); - } else { - dev_dbg(dev, "%s is disabled. " - "Skipping invalidating TLB.\n", sysmmu_ips_name[ips]); - } -} - -static int s5p_sysmmu_probe(struct platform_device *pdev) -{ - int i, ret; - struct resource *res, *mem; - - dev = &pdev->dev; - - for (i = 0; i < S5P_SYSMMU_TOTAL_IPNUM; i++) { - int irq; - - sysmmu_clk_init(dev, i); - sysmmu_clk_disable(i); - - res = platform_get_resource(pdev, IORESOURCE_MEM, i); - if (!res) { - dev_err(dev, "Failed to get the resource of %s.\n", - sysmmu_ips_name[i]); - ret = -ENODEV; - goto err_res; - } - - mem = request_mem_region(res->start, resource_size(res), - pdev->name); - if (!mem) { - dev_err(dev, "Failed to request the memory region of %s.\n", - sysmmu_ips_name[i]); - ret = -EBUSY; - goto err_res; - } - - sysmmusfrs[i] = ioremap(res->start, resource_size(res)); - if (!sysmmusfrs[i]) { - dev_err(dev, "Failed to ioremap() for %s.\n", - sysmmu_ips_name[i]); - ret = -ENXIO; - goto err_reg; - } - - irq = platform_get_irq(pdev, i); - if (irq <= 0) { - dev_err(dev, "Failed to get the IRQ resource of %s.\n", - sysmmu_ips_name[i]); - ret = -ENOENT; - goto err_map; - } - - if (request_irq(irq, s5p_sysmmu_irq, IRQF_DISABLED, - pdev->name, (void *)i)) { - dev_err(dev, "Failed to request IRQ for %s.\n", - sysmmu_ips_name[i]); - ret = -ENOENT; - goto err_map; - } - } - - return 0; - -err_map: - iounmap(sysmmusfrs[i]); -err_reg: - release_mem_region(mem->start, resource_size(mem)); -err_res: - return ret; -} - -static int s5p_sysmmu_remove(struct platform_device *pdev) -{ - return 0; -} -int s5p_sysmmu_runtime_suspend(struct device *dev) -{ - return 0; -} - -int s5p_sysmmu_runtime_resume(struct device *dev) -{ - return 0; -} - -const struct dev_pm_ops s5p_sysmmu_pm_ops = { - .runtime_suspend = s5p_sysmmu_runtime_suspend, - .runtime_resume = s5p_sysmmu_runtime_resume, -}; - -static struct platform_driver s5p_sysmmu_driver = { - .probe = s5p_sysmmu_probe, - .remove = s5p_sysmmu_remove, - .driver = { - .owner = THIS_MODULE, - .name = "s5p-sysmmu", - .pm = &s5p_sysmmu_pm_ops, - } -}; - -static int __init s5p_sysmmu_init(void) -{ - return platform_driver_register(&s5p_sysmmu_driver); -} -arch_initcall(s5p_sysmmu_init); diff --git a/arch/arm/plat-samsung/include/plat/devs.h b/arch/arm/plat-samsung/include/plat/devs.h index 2155d4a..4067d1d 100644 --- a/arch/arm/plat-samsung/include/plat/devs.h +++ b/arch/arm/plat-samsung/include/plat/devs.h @@ -133,7 +133,6 @@ extern struct platform_device exynos4_device_pcm1; extern struct platform_device exynos4_device_pcm2; extern struct platform_device exynos4_device_pd[]; extern struct platform_device exynos4_device_spdif; -extern struct platform_device exynos4_device_sysmmu; extern struct platform_device samsung_asoc_dma; extern struct platform_device samsung_asoc_idma; diff --git a/arch/arm/plat-samsung/include/plat/sysmmu.h b/arch/arm/plat-samsung/include/plat/sysmmu.h deleted file mode 100644 index 5fe8ee0..0000000 --- a/arch/arm/plat-samsung/include/plat/sysmmu.h +++ /dev/null @@ -1,95 +0,0 @@ -/* linux/arch/arm/plat-samsung/include/plat/sysmmu.h - * - * Copyright (c) 2010-2011 Samsung Electronics Co., Ltd. - * http://www.samsung.com - * - * Samsung System MMU driver for S5P platform - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. -*/ - -#ifndef __PLAT_SAMSUNG_SYSMMU_H -#define __PLAT_SAMSUNG_SYSMMU_H __FILE__ - -enum S5P_SYSMMU_INTERRUPT_TYPE { - SYSMMU_PAGEFAULT, - SYSMMU_AR_MULTIHIT, - SYSMMU_AW_MULTIHIT, - SYSMMU_BUSERROR, - SYSMMU_AR_SECURITY, - SYSMMU_AR_ACCESS, - SYSMMU_AW_SECURITY, - SYSMMU_AW_PROTECTION, /* 7 */ - SYSMMU_FAULTS_NUM -}; - -#ifdef CONFIG_S5P_SYSTEM_MMU - -#include - -/** - * s5p_sysmmu_enable() - enable system mmu of ip - * @ips: The ip connected system mmu. - * #pgd: Base physical address of the 1st level page table - * - * This function enable system mmu to transfer address - * from virtual address to physical address - */ -void s5p_sysmmu_enable(sysmmu_ips ips, unsigned long pgd); - -/** - * s5p_sysmmu_disable() - disable sysmmu mmu of ip - * @ips: The ip connected system mmu. - * - * This function disable system mmu to transfer address - * from virtual address to physical address - */ -void s5p_sysmmu_disable(sysmmu_ips ips); - -/** - * s5p_sysmmu_set_tablebase_pgd() - set page table base address to refer page table - * @ips: The ip connected system mmu. - * @pgd: The page table base address. - * - * This function set page table base address - * When system mmu transfer address from virtaul address to physical address, - * system mmu refer address information from page table - */ -void s5p_sysmmu_set_tablebase_pgd(sysmmu_ips ips, unsigned long pgd); - -/** - * s5p_sysmmu_tlb_invalidate() - flush all TLB entry in system mmu - * @ips: The ip connected system mmu. - * - * This function flush all TLB entry in system mmu - */ -void s5p_sysmmu_tlb_invalidate(sysmmu_ips ips); - -/** s5p_sysmmu_set_fault_handler() - Fault handler for System MMUs - * @itype: type of fault. - * @pgtable_base: the physical address of page table base. This is 0 if @ips is - * SYSMMU_BUSERROR. - * @fault_addr: the device (virtual) address that the System MMU tried to - * translated. This is 0 if @ips is SYSMMU_BUSERROR. - * Called when interrupt occurred by the System MMUs - * The device drivers of peripheral devices that has a System MMU can implement - * a fault handler to resolve address translation fault by System MMU. - * The meanings of return value and parameters are described below. - - * return value: non-zero if the fault is correctly resolved. - * zero if the fault is not handled. - */ -void s5p_sysmmu_set_fault_handler(sysmmu_ips ips, - int (*handler)(enum S5P_SYSMMU_INTERRUPT_TYPE itype, - unsigned long pgtable_base, - unsigned long fault_addr)); -#else -#define s5p_sysmmu_enable(ips, pgd) do { } while (0) -#define s5p_sysmmu_disable(ips) do { } while (0) -#define s5p_sysmmu_set_tablebase_pgd(ips, pgd) do { } while (0) -#define s5p_sysmmu_tlb_invalidate(ips) do { } while (0) -#define s5p_sysmmu_set_fault_handler(ips, handler) do { } while (0) -#endif -#endif /* __ASM_PLAT_SYSMMU_H */ -- cgit v0.10.2 From bca10b906f8d2e4f177bff047b9d623941e454f7 Mon Sep 17 00:00:00 2001 From: KyongHo Cho Date: Wed, 4 Apr 2012 09:23:02 -0700 Subject: ARM: EXYNOS: Change System MMU platform device definitions Handling System MMUs with an identifier is not flexible to manage System MMU platform devices because of the following reasons: 1. A device driver which needs to handle System MMU must know the ID. 2. A System MMU may not present in some implementations of Exynos family. 3. Handling System MMU with IOMMU API does not require an ID. This patch is the result of removing ID of System MMUs. Instead, a device driver that needs to handle its System MMU must use IOMMU API while its descriptor of platform device is given. This patch also includes the following enhancements: - A System MMU device becomes a child if its power domain device. - clkdev Signed-off-by: KyongHo Cho Signed-off-by: Kukjin Kim diff --git a/arch/arm/mach-exynos/Kconfig b/arch/arm/mach-exynos/Kconfig index 0491cee..801c738 100644 --- a/arch/arm/mach-exynos/Kconfig +++ b/arch/arm/mach-exynos/Kconfig @@ -85,10 +85,10 @@ config EXYNOS4_SETUP_FIMD0 help Common setup code for FIMD0. -config EXYNOS4_DEV_SYSMMU +config EXYNOS_DEV_SYSMMU bool help - Common setup code for SYSTEM MMU in EXYNOS4 + Common setup code for SYSTEM MMU in EXYNOS platforms config EXYNOS4_DEV_DWMCI bool @@ -200,12 +200,12 @@ config MACH_SMDKV310 select S3C_DEV_HSMMC2 select S3C_DEV_HSMMC3 select SAMSUNG_DEV_BACKLIGHT + select EXYNOS_DEV_SYSMMU select EXYNOS4_DEV_AHCI select SAMSUNG_DEV_KEYPAD select EXYNOS4_DEV_DMA select SAMSUNG_DEV_PWM select EXYNOS4_DEV_USB_OHCI - select EXYNOS4_DEV_SYSMMU select EXYNOS4_SETUP_FIMD0 select EXYNOS4_SETUP_I2C1 select EXYNOS4_SETUP_KEYPAD @@ -224,7 +224,6 @@ config MACH_ARMLEX4210 select S3C_DEV_HSMMC3 select EXYNOS4_DEV_AHCI select EXYNOS4_DEV_DMA - select EXYNOS4_DEV_SYSMMU select EXYNOS4_SETUP_SDHCI help Machine support for Samsung ARMLEX4210 based on EXYNOS4210 @@ -251,6 +250,7 @@ config MACH_UNIVERSAL_C210 select S5P_DEV_MFC select S5P_DEV_ONENAND select S5P_DEV_TV + select EXYNOS_DEV_SYSMMU select EXYNOS4_DEV_DMA select EXYNOS4_SETUP_FIMD0 select EXYNOS4_SETUP_I2C1 @@ -322,6 +322,7 @@ config MACH_ORIGEN select S5P_DEV_USB_EHCI select SAMSUNG_DEV_BACKLIGHT select SAMSUNG_DEV_PWM + select EXYNOS_DEV_SYSMMU select EXYNOS4_DEV_DMA select EXYNOS4_DEV_USB_OHCI select EXYNOS4_SETUP_FIMD0 @@ -345,6 +346,7 @@ config MACH_SMDK4212 select SAMSUNG_DEV_BACKLIGHT select SAMSUNG_DEV_KEYPAD select SAMSUNG_DEV_PWM + select EXYNOS_DEV_SYSMMU select EXYNOS4_DEV_DMA select EXYNOS4_SETUP_I2C1 select EXYNOS4_SETUP_I2C3 diff --git a/arch/arm/mach-exynos/Makefile b/arch/arm/mach-exynos/Makefile index 8631840..2726252 100644 --- a/arch/arm/mach-exynos/Makefile +++ b/arch/arm/mach-exynos/Makefile @@ -50,7 +50,7 @@ obj-$(CONFIG_MACH_EXYNOS5_DT) += mach-exynos5-dt.o obj-y += dev-uart.o obj-$(CONFIG_ARCH_EXYNOS4) += dev-audio.o obj-$(CONFIG_EXYNOS4_DEV_AHCI) += dev-ahci.o -obj-$(CONFIG_EXYNOS4_DEV_SYSMMU) += dev-sysmmu.o +obj-$(CONFIG_EXYNOS_DEV_SYSMMU) += dev-sysmmu.o obj-$(CONFIG_EXYNOS4_DEV_DWMCI) += dev-dwmci.o obj-$(CONFIG_EXYNOS4_DEV_DMA) += dma.o obj-$(CONFIG_EXYNOS4_DEV_USB_OHCI) += dev-ohci.o diff --git a/arch/arm/mach-exynos/clock-exynos4.c b/arch/arm/mach-exynos/clock-exynos4.c index df54c2a..4287311 100644 --- a/arch/arm/mach-exynos/clock-exynos4.c +++ b/arch/arm/mach-exynos/clock-exynos4.c @@ -168,7 +168,7 @@ static int exynos4_clk_ip_tv_ctrl(struct clk *clk, int enable) return s5p_gatectrl(EXYNOS4_CLKGATE_IP_TV, clk, enable); } -static int exynos4_clk_ip_image_ctrl(struct clk *clk, int enable) +int exynos4_clk_ip_image_ctrl(struct clk *clk, int enable) { return s5p_gatectrl(EXYNOS4_CLKGATE_IP_IMAGE, clk, enable); } @@ -198,6 +198,11 @@ static int exynos4_clk_ip_perir_ctrl(struct clk *clk, int enable) return s5p_gatectrl(EXYNOS4_CLKGATE_IP_PERIR, clk, enable); } +int exynos4_clk_ip_dmc_ctrl(struct clk *clk, int enable) +{ + return s5p_gatectrl(EXYNOS4_CLKGATE_IP_DMC, clk, enable); +} + static int exynos4_clk_hdmiphy_ctrl(struct clk *clk, int enable) { return s5p_gatectrl(S5P_HDMI_PHY_CONTROL, clk, enable); @@ -678,61 +683,55 @@ static struct clk exynos4_init_clocks_off[] = { .enable = exynos4_clk_ip_peril_ctrl, .ctrlbit = (1 << 14), }, { - .name = "SYSMMU_MDMA", + .name = SYSMMU_CLOCK_NAME, + .devname = SYSMMU_CLOCK_DEVNAME(mfc_l, 0), + .enable = exynos4_clk_ip_mfc_ctrl, + .ctrlbit = (1 << 1), + }, { + .name = SYSMMU_CLOCK_NAME, + .devname = SYSMMU_CLOCK_DEVNAME(mfc_r, 1), + .enable = exynos4_clk_ip_mfc_ctrl, + .ctrlbit = (1 << 2), + }, { + .name = SYSMMU_CLOCK_NAME, + .devname = SYSMMU_CLOCK_DEVNAME(tv, 2), + .enable = exynos4_clk_ip_tv_ctrl, + .ctrlbit = (1 << 4), + }, { + .name = SYSMMU_CLOCK_NAME, + .devname = SYSMMU_CLOCK_DEVNAME(jpeg, 3), + .enable = exynos4_clk_ip_cam_ctrl, + .ctrlbit = (1 << 11), + }, { + .name = SYSMMU_CLOCK_NAME, + .devname = SYSMMU_CLOCK_DEVNAME(rot, 4), .enable = exynos4_clk_ip_image_ctrl, - .ctrlbit = (1 << 5), + .ctrlbit = (1 << 4), }, { - .name = "SYSMMU_FIMC0", + .name = SYSMMU_CLOCK_NAME, + .devname = SYSMMU_CLOCK_DEVNAME(fimc0, 5), .enable = exynos4_clk_ip_cam_ctrl, .ctrlbit = (1 << 7), }, { - .name = "SYSMMU_FIMC1", + .name = SYSMMU_CLOCK_NAME, + .devname = SYSMMU_CLOCK_DEVNAME(fimc1, 6), .enable = exynos4_clk_ip_cam_ctrl, .ctrlbit = (1 << 8), }, { - .name = "SYSMMU_FIMC2", + .name = SYSMMU_CLOCK_NAME, + .devname = SYSMMU_CLOCK_DEVNAME(fimc2, 7), .enable = exynos4_clk_ip_cam_ctrl, .ctrlbit = (1 << 9), }, { - .name = "SYSMMU_FIMC3", + .name = SYSMMU_CLOCK_NAME, + .devname = SYSMMU_CLOCK_DEVNAME(fimc3, 8), .enable = exynos4_clk_ip_cam_ctrl, .ctrlbit = (1 << 10), }, { - .name = "SYSMMU_JPEG", - .enable = exynos4_clk_ip_cam_ctrl, - .ctrlbit = (1 << 11), - }, { - .name = "SYSMMU_FIMD0", + .name = SYSMMU_CLOCK_NAME, + .devname = SYSMMU_CLOCK_DEVNAME(fimd0, 10), .enable = exynos4_clk_ip_lcd0_ctrl, .ctrlbit = (1 << 4), - }, { - .name = "SYSMMU_FIMD1", - .enable = exynos4_clk_ip_lcd1_ctrl, - .ctrlbit = (1 << 4), - }, { - .name = "SYSMMU_PCIe", - .enable = exynos4_clk_ip_fsys_ctrl, - .ctrlbit = (1 << 18), - }, { - .name = "SYSMMU_G2D", - .enable = exynos4_clk_ip_image_ctrl, - .ctrlbit = (1 << 3), - }, { - .name = "SYSMMU_ROTATOR", - .enable = exynos4_clk_ip_image_ctrl, - .ctrlbit = (1 << 4), - }, { - .name = "SYSMMU_TV", - .enable = exynos4_clk_ip_tv_ctrl, - .ctrlbit = (1 << 4), - }, { - .name = "SYSMMU_MFC_L", - .enable = exynos4_clk_ip_mfc_ctrl, - .ctrlbit = (1 << 1), - }, { - .name = "SYSMMU_MFC_R", - .enable = exynos4_clk_ip_mfc_ctrl, - .ctrlbit = (1 << 2), } }; diff --git a/arch/arm/mach-exynos/clock-exynos4.h b/arch/arm/mach-exynos/clock-exynos4.h index cb71c29..28a1197 100644 --- a/arch/arm/mach-exynos/clock-exynos4.h +++ b/arch/arm/mach-exynos/clock-exynos4.h @@ -26,5 +26,7 @@ extern struct clk *exynos4_clkset_group_list[]; extern int exynos4_clksrc_mask_fsys_ctrl(struct clk *clk, int enable); extern int exynos4_clk_ip_fsys_ctrl(struct clk *clk, int enable); extern int exynos4_clk_ip_lcd1_ctrl(struct clk *clk, int enable); +extern int exynos4_clk_ip_image_ctrl(struct clk *clk, int enable); +extern int exynos4_clk_ip_dmc_ctrl(struct clk *clk, int enable); #endif /* __ASM_ARCH_CLOCK_H */ diff --git a/arch/arm/mach-exynos/clock-exynos4210.c b/arch/arm/mach-exynos/clock-exynos4210.c index 3b131e4..b8689ff 100644 --- a/arch/arm/mach-exynos/clock-exynos4210.c +++ b/arch/arm/mach-exynos/clock-exynos4210.c @@ -26,6 +26,7 @@ #include #include #include +#include #include "common.h" #include "clock-exynos4.h" @@ -94,6 +95,16 @@ static struct clk init_clocks_off[] = { .devname = "exynos4-fb.1", .enable = exynos4_clk_ip_lcd1_ctrl, .ctrlbit = (1 << 0), + }, { + .name = SYSMMU_CLOCK_NAME, + .devname = SYSMMU_CLOCK_DEVNAME(2d, 14), + .enable = exynos4_clk_ip_image_ctrl, + .ctrlbit = (1 << 3), + }, { + .name = SYSMMU_CLOCK_NAME, + .devname = SYSMMU_CLOCK_DEVNAME(fimd1, 11), + .enable = exynos4_clk_ip_lcd1_ctrl, + .ctrlbit = (1 << 4), }, }; diff --git a/arch/arm/mach-exynos/clock-exynos4212.c b/arch/arm/mach-exynos/clock-exynos4212.c index 3ecc01e..9882312 100644 --- a/arch/arm/mach-exynos/clock-exynos4212.c +++ b/arch/arm/mach-exynos/clock-exynos4212.c @@ -26,6 +26,7 @@ #include #include #include +#include #include "common.h" #include "clock-exynos4.h" @@ -39,6 +40,16 @@ static struct sleep_save exynos4212_clock_save[] = { }; #endif +static int exynos4212_clk_ip_isp0_ctrl(struct clk *clk, int enable) +{ + return s5p_gatectrl(EXYNOS4_CLKGATE_IP_ISP0, clk, enable); +} + +static int exynos4212_clk_ip_isp1_ctrl(struct clk *clk, int enable) +{ + return s5p_gatectrl(EXYNOS4_CLKGATE_IP_ISP1, clk, enable); +} + static struct clk *clk_src_mpll_user_list[] = { [0] = &clk_fin_mpll, [1] = &exynos4_clk_mout_mpll.clk, @@ -66,7 +77,22 @@ static struct clksrc_clk clksrcs[] = { }; static struct clk init_clocks_off[] = { - /* nothing here yet */ + { + .name = SYSMMU_CLOCK_NAME, + .devname = SYSMMU_CLOCK_DEVNAME(2d, 14), + .enable = exynos4_clk_ip_dmc_ctrl, + .ctrlbit = (1 << 24), + }, { + .name = SYSMMU_CLOCK_NAME, + .devname = SYSMMU_CLOCK_DEVNAME(isp, 9), + .enable = exynos4212_clk_ip_isp0_ctrl, + .ctrlbit = (7 << 8), + }, { + .name = SYSMMU_CLOCK_NAME2, + .devname = SYSMMU_CLOCK_DEVNAME(isp, 9), + .enable = exynos4212_clk_ip_isp1_ctrl, + .ctrlbit = (1 << 4), + } }; #ifdef CONFIG_PM_SLEEP diff --git a/arch/arm/mach-exynos/clock-exynos5.c b/arch/arm/mach-exynos/clock-exynos5.c index d013982..3320ad1 100644 --- a/arch/arm/mach-exynos/clock-exynos5.c +++ b/arch/arm/mach-exynos/clock-exynos5.c @@ -82,6 +82,11 @@ static int exynos5_clksrc_mask_peric0_ctrl(struct clk *clk, int enable) return s5p_gatectrl(EXYNOS5_CLKSRC_MASK_PERIC0, clk, enable); } +static int exynos5_clk_ip_acp_ctrl(struct clk *clk, int enable) +{ + return s5p_gatectrl(EXYNOS5_CLKGATE_IP_ACP, clk, enable); +} + static int exynos5_clk_ip_core_ctrl(struct clk *clk, int enable) { return s5p_gatectrl(EXYNOS5_CLKGATE_IP_CORE, clk, enable); @@ -127,6 +132,21 @@ static int exynos5_clk_ip_peris_ctrl(struct clk *clk, int enable) return s5p_gatectrl(EXYNOS5_CLKGATE_IP_PERIS, clk, enable); } +static int exynos5_clk_ip_gscl_ctrl(struct clk *clk, int enable) +{ + return s5p_gatectrl(EXYNOS5_CLKGATE_IP_GSCL, clk, enable); +} + +static int exynos5_clk_ip_isp0_ctrl(struct clk *clk, int enable) +{ + return s5p_gatectrl(EXYNOS5_CLKGATE_IP_ISP0, clk, enable); +} + +static int exynos5_clk_ip_isp1_ctrl(struct clk *clk, int enable) +{ + return s5p_gatectrl(EXYNOS5_CLKGATE_IP_ISP1, clk, enable); +} + /* Core list of CMU_CPU side */ static struct clksrc_clk exynos5_clk_mout_apll = { @@ -630,6 +650,76 @@ static struct clk exynos5_init_clocks_off[] = { .parent = &exynos5_clk_aclk_66.clk, .enable = exynos5_clk_ip_peric_ctrl, .ctrlbit = (1 << 14), + }, { + .name = SYSMMU_CLOCK_NAME, + .devname = SYSMMU_CLOCK_DEVNAME(mfc_l, 0), + .enable = &exynos5_clk_ip_mfc_ctrl, + .ctrlbit = (1 << 1), + }, { + .name = SYSMMU_CLOCK_NAME, + .devname = SYSMMU_CLOCK_DEVNAME(mfc_r, 1), + .enable = &exynos5_clk_ip_mfc_ctrl, + .ctrlbit = (1 << 2), + }, { + .name = SYSMMU_CLOCK_NAME, + .devname = SYSMMU_CLOCK_DEVNAME(tv, 2), + .enable = &exynos5_clk_ip_disp1_ctrl, + .ctrlbit = (1 << 9) + }, { + .name = SYSMMU_CLOCK_NAME, + .devname = SYSMMU_CLOCK_DEVNAME(jpeg, 3), + .enable = &exynos5_clk_ip_gen_ctrl, + .ctrlbit = (1 << 7), + }, { + .name = SYSMMU_CLOCK_NAME, + .devname = SYSMMU_CLOCK_DEVNAME(rot, 4), + .enable = &exynos5_clk_ip_gen_ctrl, + .ctrlbit = (1 << 6) + }, { + .name = SYSMMU_CLOCK_NAME, + .devname = SYSMMU_CLOCK_DEVNAME(gsc0, 5), + .enable = &exynos5_clk_ip_gscl_ctrl, + .ctrlbit = (1 << 7), + }, { + .name = SYSMMU_CLOCK_NAME, + .devname = SYSMMU_CLOCK_DEVNAME(gsc1, 6), + .enable = &exynos5_clk_ip_gscl_ctrl, + .ctrlbit = (1 << 8), + }, { + .name = SYSMMU_CLOCK_NAME, + .devname = SYSMMU_CLOCK_DEVNAME(gsc2, 7), + .enable = &exynos5_clk_ip_gscl_ctrl, + .ctrlbit = (1 << 9), + }, { + .name = SYSMMU_CLOCK_NAME, + .devname = SYSMMU_CLOCK_DEVNAME(gsc3, 8), + .enable = &exynos5_clk_ip_gscl_ctrl, + .ctrlbit = (1 << 10), + }, { + .name = SYSMMU_CLOCK_NAME, + .devname = SYSMMU_CLOCK_DEVNAME(isp, 9), + .enable = &exynos5_clk_ip_isp0_ctrl, + .ctrlbit = (0x3F << 8), + }, { + .name = SYSMMU_CLOCK_NAME2, + .devname = SYSMMU_CLOCK_DEVNAME(isp, 9), + .enable = &exynos5_clk_ip_isp1_ctrl, + .ctrlbit = (0xF << 4), + }, { + .name = SYSMMU_CLOCK_NAME, + .devname = SYSMMU_CLOCK_DEVNAME(camif0, 12), + .enable = &exynos5_clk_ip_gscl_ctrl, + .ctrlbit = (1 << 11), + }, { + .name = SYSMMU_CLOCK_NAME, + .devname = SYSMMU_CLOCK_DEVNAME(camif1, 13), + .enable = &exynos5_clk_ip_gscl_ctrl, + .ctrlbit = (1 << 12), + }, { + .name = SYSMMU_CLOCK_NAME, + .devname = SYSMMU_CLOCK_DEVNAME(2d, 14), + .enable = &exynos5_clk_ip_acp_ctrl, + .ctrlbit = (1 << 7) } }; diff --git a/arch/arm/mach-exynos/dev-sysmmu.c b/arch/arm/mach-exynos/dev-sysmmu.c index 781563f..c5b1ea3 100644 --- a/arch/arm/mach-exynos/dev-sysmmu.c +++ b/arch/arm/mach-exynos/dev-sysmmu.c @@ -1,9 +1,9 @@ -/* linux/arch/arm/mach-exynos4/dev-sysmmu.c +/* linux/arch/arm/mach-exynos/dev-sysmmu.c * - * Copyright (c) 2010 Samsung Electronics Co., Ltd. + * Copyright (c) 2010-2012 Samsung Electronics Co., Ltd. * http://www.samsung.com * - * EXYNOS4 - System MMU support + * EXYNOS - System MMU support * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -12,222 +12,263 @@ #include #include -#include + +#include #include #include #include -#include - -/* These names must be equal to the clock names in mach-exynos4/clock.c */ -const char *sysmmu_ips_name[EXYNOS4_SYSMMU_TOTAL_IPNUM] = { - "SYSMMU_MDMA" , - "SYSMMU_SSS" , - "SYSMMU_FIMC0" , - "SYSMMU_FIMC1" , - "SYSMMU_FIMC2" , - "SYSMMU_FIMC3" , - "SYSMMU_JPEG" , - "SYSMMU_FIMD0" , - "SYSMMU_FIMD1" , - "SYSMMU_PCIe" , - "SYSMMU_G2D" , - "SYSMMU_ROTATOR", - "SYSMMU_MDMA2" , - "SYSMMU_TV" , - "SYSMMU_MFC_L" , - "SYSMMU_MFC_R" , -}; -static struct resource exynos4_sysmmu_resource[] = { - [0] = { - .start = EXYNOS4_PA_SYSMMU_MDMA, - .end = EXYNOS4_PA_SYSMMU_MDMA + SZ_64K - 1, - .flags = IORESOURCE_MEM, - }, - [1] = { - .start = IRQ_SYSMMU_MDMA0_0, - .end = IRQ_SYSMMU_MDMA0_0, - .flags = IORESOURCE_IRQ, - }, - [2] = { - .start = EXYNOS4_PA_SYSMMU_SSS, - .end = EXYNOS4_PA_SYSMMU_SSS + SZ_64K - 1, - .flags = IORESOURCE_MEM, - }, - [3] = { - .start = IRQ_SYSMMU_SSS_0, - .end = IRQ_SYSMMU_SSS_0, - .flags = IORESOURCE_IRQ, - }, - [4] = { - .start = EXYNOS4_PA_SYSMMU_FIMC0, - .end = EXYNOS4_PA_SYSMMU_FIMC0 + SZ_64K - 1, - .flags = IORESOURCE_MEM, - }, - [5] = { - .start = IRQ_SYSMMU_FIMC0_0, - .end = IRQ_SYSMMU_FIMC0_0, - .flags = IORESOURCE_IRQ, - }, - [6] = { - .start = EXYNOS4_PA_SYSMMU_FIMC1, - .end = EXYNOS4_PA_SYSMMU_FIMC1 + SZ_64K - 1, - .flags = IORESOURCE_MEM, - }, - [7] = { - .start = IRQ_SYSMMU_FIMC1_0, - .end = IRQ_SYSMMU_FIMC1_0, - .flags = IORESOURCE_IRQ, - }, - [8] = { - .start = EXYNOS4_PA_SYSMMU_FIMC2, - .end = EXYNOS4_PA_SYSMMU_FIMC2 + SZ_64K - 1, - .flags = IORESOURCE_MEM, - }, - [9] = { - .start = IRQ_SYSMMU_FIMC2_0, - .end = IRQ_SYSMMU_FIMC2_0, - .flags = IORESOURCE_IRQ, - }, - [10] = { - .start = EXYNOS4_PA_SYSMMU_FIMC3, - .end = EXYNOS4_PA_SYSMMU_FIMC3 + SZ_64K - 1, - .flags = IORESOURCE_MEM, - }, - [11] = { - .start = IRQ_SYSMMU_FIMC3_0, - .end = IRQ_SYSMMU_FIMC3_0, - .flags = IORESOURCE_IRQ, - }, - [12] = { - .start = EXYNOS4_PA_SYSMMU_JPEG, - .end = EXYNOS4_PA_SYSMMU_JPEG + SZ_64K - 1, - .flags = IORESOURCE_MEM, - }, - [13] = { - .start = IRQ_SYSMMU_JPEG_0, - .end = IRQ_SYSMMU_JPEG_0, - .flags = IORESOURCE_IRQ, - }, - [14] = { - .start = EXYNOS4_PA_SYSMMU_FIMD0, - .end = EXYNOS4_PA_SYSMMU_FIMD0 + SZ_64K - 1, - .flags = IORESOURCE_MEM, - }, - [15] = { - .start = IRQ_SYSMMU_LCD0_M0_0, - .end = IRQ_SYSMMU_LCD0_M0_0, - .flags = IORESOURCE_IRQ, - }, - [16] = { - .start = EXYNOS4_PA_SYSMMU_FIMD1, - .end = EXYNOS4_PA_SYSMMU_FIMD1 + SZ_64K - 1, - .flags = IORESOURCE_MEM, - }, - [17] = { - .start = IRQ_SYSMMU_LCD1_M1_0, - .end = IRQ_SYSMMU_LCD1_M1_0, - .flags = IORESOURCE_IRQ, - }, - [18] = { - .start = EXYNOS4_PA_SYSMMU_PCIe, - .end = EXYNOS4_PA_SYSMMU_PCIe + SZ_64K - 1, - .flags = IORESOURCE_MEM, - }, - [19] = { - .start = IRQ_SYSMMU_PCIE_0, - .end = IRQ_SYSMMU_PCIE_0, - .flags = IORESOURCE_IRQ, - }, - [20] = { - .start = EXYNOS4_PA_SYSMMU_G2D, - .end = EXYNOS4_PA_SYSMMU_G2D + SZ_64K - 1, - .flags = IORESOURCE_MEM, - }, - [21] = { - .start = IRQ_SYSMMU_2D_0, - .end = IRQ_SYSMMU_2D_0, - .flags = IORESOURCE_IRQ, - }, - [22] = { - .start = EXYNOS4_PA_SYSMMU_ROTATOR, - .end = EXYNOS4_PA_SYSMMU_ROTATOR + SZ_64K - 1, - .flags = IORESOURCE_MEM, - }, - [23] = { - .start = IRQ_SYSMMU_ROTATOR_0, - .end = IRQ_SYSMMU_ROTATOR_0, - .flags = IORESOURCE_IRQ, - }, - [24] = { - .start = EXYNOS4_PA_SYSMMU_MDMA2, - .end = EXYNOS4_PA_SYSMMU_MDMA2 + SZ_64K - 1, - .flags = IORESOURCE_MEM, - }, - [25] = { - .start = IRQ_SYSMMU_MDMA1_0, - .end = IRQ_SYSMMU_MDMA1_0, - .flags = IORESOURCE_IRQ, - }, - [26] = { - .start = EXYNOS4_PA_SYSMMU_TV, - .end = EXYNOS4_PA_SYSMMU_TV + SZ_64K - 1, - .flags = IORESOURCE_MEM, - }, - [27] = { - .start = IRQ_SYSMMU_TV_M0_0, - .end = IRQ_SYSMMU_TV_M0_0, - .flags = IORESOURCE_IRQ, - }, - [28] = { - .start = EXYNOS4_PA_SYSMMU_MFC_L, - .end = EXYNOS4_PA_SYSMMU_MFC_L + SZ_64K - 1, - .flags = IORESOURCE_MEM, - }, - [29] = { - .start = IRQ_SYSMMU_MFC_M0_0, - .end = IRQ_SYSMMU_MFC_M0_0, - .flags = IORESOURCE_IRQ, - }, - [30] = { - .start = EXYNOS4_PA_SYSMMU_MFC_R, - .end = EXYNOS4_PA_SYSMMU_MFC_R + SZ_64K - 1, - .flags = IORESOURCE_MEM, - }, - [31] = { - .start = IRQ_SYSMMU_MFC_M1_0, - .end = IRQ_SYSMMU_MFC_M1_0, - .flags = IORESOURCE_IRQ, - }, -}; +static u64 exynos_sysmmu_dma_mask = DMA_BIT_MASK(32); + +#define SYSMMU_PLATFORM_DEVICE(ipname, devid) \ +static struct sysmmu_platform_data platdata_##ipname = { \ + .dbgname = #ipname, \ +}; \ +struct platform_device SYSMMU_PLATDEV(ipname) = \ +{ \ + .name = SYSMMU_DEVNAME_BASE, \ + .id = devid, \ + .dev = { \ + .dma_mask = &exynos_sysmmu_dma_mask, \ + .coherent_dma_mask = DMA_BIT_MASK(32), \ + .platform_data = &platdata_##ipname, \ + }, \ +} + +SYSMMU_PLATFORM_DEVICE(mfc_l, 0); +SYSMMU_PLATFORM_DEVICE(mfc_r, 1); +SYSMMU_PLATFORM_DEVICE(tv, 2); +SYSMMU_PLATFORM_DEVICE(jpeg, 3); +SYSMMU_PLATFORM_DEVICE(rot, 4); +SYSMMU_PLATFORM_DEVICE(fimc0, 5); /* fimc* and gsc* exist exclusively */ +SYSMMU_PLATFORM_DEVICE(fimc1, 6); +SYSMMU_PLATFORM_DEVICE(fimc2, 7); +SYSMMU_PLATFORM_DEVICE(fimc3, 8); +SYSMMU_PLATFORM_DEVICE(gsc0, 5); +SYSMMU_PLATFORM_DEVICE(gsc1, 6); +SYSMMU_PLATFORM_DEVICE(gsc2, 7); +SYSMMU_PLATFORM_DEVICE(gsc3, 8); +SYSMMU_PLATFORM_DEVICE(isp, 9); +SYSMMU_PLATFORM_DEVICE(fimd0, 10); +SYSMMU_PLATFORM_DEVICE(fimd1, 11); +SYSMMU_PLATFORM_DEVICE(camif0, 12); +SYSMMU_PLATFORM_DEVICE(camif1, 13); +SYSMMU_PLATFORM_DEVICE(2d, 14); + +#define SYSMMU_RESOURCE_NAME(core, ipname) sysmmures_##core##_##ipname + +#define SYSMMU_RESOURCE(core, ipname) \ + static struct resource SYSMMU_RESOURCE_NAME(core, ipname)[] __initdata = + +#define DEFINE_SYSMMU_RESOURCE(core, mem, irq) \ + DEFINE_RES_MEM_NAMED(core##_PA_SYSMMU_##mem, SZ_4K, #mem), \ + DEFINE_RES_IRQ_NAMED(core##_IRQ_SYSMMU_##irq##_0, #mem) + +#define SYSMMU_RESOURCE_DEFINE(core, ipname, mem, irq) \ + SYSMMU_RESOURCE(core, ipname) { \ + DEFINE_SYSMMU_RESOURCE(core, mem, irq) \ + } -struct platform_device exynos4_device_sysmmu = { - .name = "s5p-sysmmu", - .id = 32, - .num_resources = ARRAY_SIZE(exynos4_sysmmu_resource), - .resource = exynos4_sysmmu_resource, +struct sysmmu_resource_map { + struct platform_device *pdev; + struct resource *res; + u32 rnum; + struct device *pdd; + char *clocknames; }; -EXPORT_SYMBOL(exynos4_device_sysmmu); -static struct clk *sysmmu_clk[S5P_SYSMMU_TOTAL_IPNUM]; -void sysmmu_clk_init(struct device *dev, sysmmu_ips ips) -{ - sysmmu_clk[ips] = clk_get(dev, sysmmu_ips_name[ips]); - if (IS_ERR(sysmmu_clk[ips])) - sysmmu_clk[ips] = NULL; - else - clk_put(sysmmu_clk[ips]); +#define SYSMMU_RESOURCE_MAPPING(core, ipname, resname) { \ + .pdev = &SYSMMU_PLATDEV(ipname), \ + .res = SYSMMU_RESOURCE_NAME(EXYNOS##core, resname), \ + .rnum = ARRAY_SIZE(SYSMMU_RESOURCE_NAME(EXYNOS##core, resname)),\ + .clocknames = SYSMMU_CLOCK_NAME, \ } -void sysmmu_clk_enable(sysmmu_ips ips) -{ - if (sysmmu_clk[ips]) - clk_enable(sysmmu_clk[ips]); +#define SYSMMU_RESOURCE_MAPPING_MC(core, ipname, resname, pdata) { \ + .pdev = &SYSMMU_PLATDEV(ipname), \ + .res = SYSMMU_RESOURCE_NAME(EXYNOS##core, resname), \ + .rnum = ARRAY_SIZE(SYSMMU_RESOURCE_NAME(EXYNOS##core, resname)),\ + .clocknames = SYSMMU_CLOCK_NAME "," SYSMMU_CLOCK_NAME2, \ +} + +#ifdef CONFIG_EXYNOS_DEV_PD +#define SYSMMU_RESOURCE_MAPPING_PD(core, ipname, resname, pd) { \ + .pdev = &SYSMMU_PLATDEV(ipname), \ + .res = &SYSMMU_RESOURCE_NAME(EXYNOS##core, resname), \ + .rnum = ARRAY_SIZE(SYSMMU_RESOURCE_NAME(EXYNOS##core, resname)),\ + .clocknames = SYSMMU_CLOCK_NAME, \ + .pdd = &exynos##core##_device_pd[pd].dev, \ +} + +#define SYSMMU_RESOURCE_MAPPING_MCPD(core, ipname, resname, pd, pdata) {\ + .pdev = &SYSMMU_PLATDEV(ipname), \ + .res = &SYSMMU_RESOURCE_NAME(EXYNOS##core, resname), \ + .rnum = ARRAY_SIZE(SYSMMU_RESOURCE_NAME(EXYNOS##core, resname)),\ + .clocknames = SYSMMU_CLOCK_NAME "," SYSMMU_CLOCK_NAME2, \ + .pdd = &exynos##core##_device_pd[pd].dev, \ } +#else +#define SYSMMU_RESOURCE_MAPPING_PD(core, ipname, resname, pd) \ + SYSMMU_RESOURCE_MAPPING(core, ipname, resname) +#define SYSMMU_RESOURCE_MAPPING_MCPD(core, ipname, resname, pd, pdata) \ + SYSMMU_RESOURCE_MAPPING_MC(core, ipname, resname, pdata) + +#endif /* CONFIG_EXYNOS_DEV_PD */ + +#ifdef CONFIG_ARCH_EXYNOS4 +SYSMMU_RESOURCE_DEFINE(EXYNOS4, fimc0, FIMC0, FIMC0); +SYSMMU_RESOURCE_DEFINE(EXYNOS4, fimc1, FIMC1, FIMC1); +SYSMMU_RESOURCE_DEFINE(EXYNOS4, fimc2, FIMC2, FIMC2); +SYSMMU_RESOURCE_DEFINE(EXYNOS4, fimc3, FIMC3, FIMC3); +SYSMMU_RESOURCE_DEFINE(EXYNOS4, jpeg, JPEG, JPEG); +SYSMMU_RESOURCE_DEFINE(EXYNOS4, 2d, G2D, 2D); +SYSMMU_RESOURCE_DEFINE(EXYNOS4, tv, TV, TV_M0); +SYSMMU_RESOURCE_DEFINE(EXYNOS4, 2d_acp, 2D_ACP, 2D); +SYSMMU_RESOURCE_DEFINE(EXYNOS4, rot, ROTATOR, ROTATOR); +SYSMMU_RESOURCE_DEFINE(EXYNOS4, fimd0, FIMD0, LCD0_M0); +SYSMMU_RESOURCE_DEFINE(EXYNOS4, fimd1, FIMD1, LCD1_M1); +SYSMMU_RESOURCE_DEFINE(EXYNOS4, flite0, FIMC_LITE0, FIMC_LITE0); +SYSMMU_RESOURCE_DEFINE(EXYNOS4, flite1, FIMC_LITE1, FIMC_LITE1); +SYSMMU_RESOURCE_DEFINE(EXYNOS4, mfc_r, MFC_R, MFC_M0); +SYSMMU_RESOURCE_DEFINE(EXYNOS4, mfc_l, MFC_L, MFC_M1); +SYSMMU_RESOURCE(EXYNOS4, isp) { + DEFINE_SYSMMU_RESOURCE(EXYNOS4, FIMC_ISP, FIMC_ISP), + DEFINE_SYSMMU_RESOURCE(EXYNOS4, FIMC_DRC, FIMC_DRC), + DEFINE_SYSMMU_RESOURCE(EXYNOS4, FIMC_FD, FIMC_FD), + DEFINE_SYSMMU_RESOURCE(EXYNOS4, ISPCPU, FIMC_CX), +}; + +static struct sysmmu_resource_map sysmmu_resmap4[] __initdata = { + SYSMMU_RESOURCE_MAPPING_PD(4, fimc0, fimc0, PD_CAM), + SYSMMU_RESOURCE_MAPPING_PD(4, fimc1, fimc1, PD_CAM), + SYSMMU_RESOURCE_MAPPING_PD(4, fimc2, fimc2, PD_CAM), + SYSMMU_RESOURCE_MAPPING_PD(4, fimc3, fimc3, PD_CAM), + SYSMMU_RESOURCE_MAPPING_PD(4, tv, tv, PD_TV), + SYSMMU_RESOURCE_MAPPING_PD(4, mfc_r, mfc_r, PD_MFC), + SYSMMU_RESOURCE_MAPPING_PD(4, mfc_l, mfc_l, PD_MFC), + SYSMMU_RESOURCE_MAPPING_PD(4, rot, rot, PD_LCD0), + SYSMMU_RESOURCE_MAPPING_PD(4, jpeg, jpeg, PD_CAM), + SYSMMU_RESOURCE_MAPPING_PD(4, fimd0, fimd0, PD_LCD0), +}; + +static struct sysmmu_resource_map sysmmu_resmap4210[] __initdata = { + SYSMMU_RESOURCE_MAPPING_PD(4, 2d, 2d, PD_LCD0), + SYSMMU_RESOURCE_MAPPING_PD(4, fimd1, fimd1, PD_LCD1), +}; + +static struct sysmmu_resource_map sysmmu_resmap4212[] __initdata = { + SYSMMU_RESOURCE_MAPPING(4, 2d, 2d_acp), + SYSMMU_RESOURCE_MAPPING_PD(4, camif0, flite0, PD_ISP), + SYSMMU_RESOURCE_MAPPING_PD(4, camif1, flite1, PD_ISP), + SYSMMU_RESOURCE_MAPPING_PD(4, isp, isp, PD_ISP), +}; +#endif /* CONFIG_ARCH_EXYNOS4 */ -void sysmmu_clk_disable(sysmmu_ips ips) +#ifdef CONFIG_ARCH_EXYNOS5 +SYSMMU_RESOURCE_DEFINE(EXYNOS5, jpeg, JPEG, JPEG); +SYSMMU_RESOURCE_DEFINE(EXYNOS5, fimd1, FIMD1, FIMD1); +SYSMMU_RESOURCE_DEFINE(EXYNOS5, 2d, 2D, 2D); +SYSMMU_RESOURCE_DEFINE(EXYNOS5, rot, ROTATOR, ROTATOR); +SYSMMU_RESOURCE_DEFINE(EXYNOS5, tv, TV, TV); +SYSMMU_RESOURCE_DEFINE(EXYNOS5, flite0, LITE0, LITE0); +SYSMMU_RESOURCE_DEFINE(EXYNOS5, flite1, LITE1, LITE1); +SYSMMU_RESOURCE_DEFINE(EXYNOS5, gsc0, GSC0, GSC0); +SYSMMU_RESOURCE_DEFINE(EXYNOS5, gsc1, GSC1, GSC1); +SYSMMU_RESOURCE_DEFINE(EXYNOS5, gsc2, GSC2, GSC2); +SYSMMU_RESOURCE_DEFINE(EXYNOS5, gsc3, GSC3, GSC3); +SYSMMU_RESOURCE_DEFINE(EXYNOS5, mfc_r, MFC_R, MFC_R); +SYSMMU_RESOURCE_DEFINE(EXYNOS5, mfc_l, MFC_L, MFC_L); +SYSMMU_RESOURCE(EXYNOS5, isp) { + DEFINE_SYSMMU_RESOURCE(EXYNOS5, ISP, ISP), + DEFINE_SYSMMU_RESOURCE(EXYNOS5, DRC, DRC), + DEFINE_SYSMMU_RESOURCE(EXYNOS5, FD, FD), + DEFINE_SYSMMU_RESOURCE(EXYNOS5, ISPCPU, MCUISP), + DEFINE_SYSMMU_RESOURCE(EXYNOS5, SCALERC, SCALERCISP), + DEFINE_SYSMMU_RESOURCE(EXYNOS5, SCALERP, SCALERPISP), + DEFINE_SYSMMU_RESOURCE(EXYNOS5, ODC, ODC), + DEFINE_SYSMMU_RESOURCE(EXYNOS5, DIS0, DIS0), + DEFINE_SYSMMU_RESOURCE(EXYNOS5, DIS1, DIS1), + DEFINE_SYSMMU_RESOURCE(EXYNOS5, 3DNR, 3DNR), +}; + +static struct sysmmu_resource_map sysmmu_resmap5[] __initdata = { + SYSMMU_RESOURCE_MAPPING(5, jpeg, jpeg), + SYSMMU_RESOURCE_MAPPING(5, fimd1, fimd1), + SYSMMU_RESOURCE_MAPPING(5, 2d, 2d), + SYSMMU_RESOURCE_MAPPING(5, rot, rot), + SYSMMU_RESOURCE_MAPPING_PD(5, tv, tv, PD_DISP1), + SYSMMU_RESOURCE_MAPPING_PD(5, camif0, flite0, PD_GSCL), + SYSMMU_RESOURCE_MAPPING_PD(5, camif1, flite1, PD_GSCL), + SYSMMU_RESOURCE_MAPPING_PD(5, gsc0, gsc0, PD_GSCL), + SYSMMU_RESOURCE_MAPPING_PD(5, gsc1, gsc1, PD_GSCL), + SYSMMU_RESOURCE_MAPPING_PD(5, gsc2, gsc2, PD_GSCL), + SYSMMU_RESOURCE_MAPPING_PD(5, gsc3, gsc3, PD_GSCL), + SYSMMU_RESOURCE_MAPPING_PD(5, mfc_r, mfc_r, PD_MFC), + SYSMMU_RESOURCE_MAPPING_PD(5, mfc_l, mfc_l, PD_MFC), + SYSMMU_RESOURCE_MAPPING_MCPD(5, isp, isp, PD_ISP, mc_platdata), +}; +#endif /* CONFIG_ARCH_EXYNOS5 */ + +static int __init init_sysmmu_platform_device(void) { - if (sysmmu_clk[ips]) - clk_disable(sysmmu_clk[ips]); + int i, j; + struct sysmmu_resource_map *resmap[2] = {NULL, NULL}; + int nmap[2] = {0, 0}; + +#ifdef CONFIG_ARCH_EXYNOS5 + if (soc_is_exynos5250()) { + resmap[0] = sysmmu_resmap5; + nmap[0] = ARRAY_SIZE(sysmmu_resmap5); + nmap[1] = 0; + } +#endif + +#ifdef CONFIG_ARCH_EXYNOS4 + if (resmap[0] == NULL) { + resmap[0] = sysmmu_resmap4; + nmap[0] = ARRAY_SIZE(sysmmu_resmap4); + } + + if (soc_is_exynos4210()) { + resmap[1] = sysmmu_resmap4210; + nmap[1] = ARRAY_SIZE(sysmmu_resmap4210); + } + + if (soc_is_exynos4412() || soc_is_exynos4212()) { + resmap[1] = sysmmu_resmap4212; + nmap[1] = ARRAY_SIZE(sysmmu_resmap4212); + } +#endif + + for (j = 0; j < 2; j++) { + for (i = 0; i < nmap[j]; i++) { + struct sysmmu_resource_map *map; + struct sysmmu_platform_data *platdata; + + map = &resmap[j][i]; + + map->pdev->dev.parent = map->pdd; + + platdata = map->pdev->dev.platform_data; + platdata->clockname = map->clocknames; + + if (platform_device_add_resources(map->pdev, map->res, + map->rnum)) { + pr_err("%s: Failed to add device resources for " + "%s.%d\n", __func__, + map->pdev->name, map->pdev->id); + continue; + } + + if (platform_device_register(map->pdev)) { + pr_err("%s: Failed to register %s.%d\n", + __func__, map->pdev->name, + map->pdev->id); + } + } + } + + return 0; } +arch_initcall(init_sysmmu_platform_device); diff --git a/arch/arm/mach-exynos/include/mach/irqs.h b/arch/arm/mach-exynos/include/mach/irqs.h index 9bee853..f140e1a 100644 --- a/arch/arm/mach-exynos/include/mach/irqs.h +++ b/arch/arm/mach-exynos/include/mach/irqs.h @@ -154,6 +154,13 @@ #define EXYNOS4_IRQ_SYSMMU_MFC_M1_0 COMBINER_IRQ(5, 6) #define EXYNOS4_IRQ_SYSMMU_PCIE_0 COMBINER_IRQ(5, 7) +#define EXYNOS4_IRQ_SYSMMU_FIMC_LITE0_0 COMBINER_IRQ(16, 0) +#define EXYNOS4_IRQ_SYSMMU_FIMC_LITE1_0 COMBINER_IRQ(16, 1) +#define EXYNOS4_IRQ_SYSMMU_FIMC_ISP_0 COMBINER_IRQ(16, 2) +#define EXYNOS4_IRQ_SYSMMU_FIMC_DRC_0 COMBINER_IRQ(16, 3) +#define EXYNOS4_IRQ_SYSMMU_FIMC_FD_0 COMBINER_IRQ(16, 4) +#define EXYNOS4_IRQ_SYSMMU_FIMC_CX_0 COMBINER_IRQ(16, 5) + #define EXYNOS4_IRQ_FIMD0_FIFO COMBINER_IRQ(11, 0) #define EXYNOS4_IRQ_FIMD0_VSYNC COMBINER_IRQ(11, 1) #define EXYNOS4_IRQ_FIMD0_SYSTEM COMBINER_IRQ(11, 2) @@ -218,24 +225,6 @@ #define IRQ_KEYPAD EXYNOS4_IRQ_KEYPAD #define IRQ_PMU EXYNOS4_IRQ_PMU -#define IRQ_SYSMMU_MDMA0_0 EXYNOS4_IRQ_SYSMMU_MDMA0_0 -#define IRQ_SYSMMU_SSS_0 EXYNOS4_IRQ_SYSMMU_SSS_0 -#define IRQ_SYSMMU_FIMC0_0 EXYNOS4_IRQ_SYSMMU_FIMC0_0 -#define IRQ_SYSMMU_FIMC1_0 EXYNOS4_IRQ_SYSMMU_FIMC1_0 -#define IRQ_SYSMMU_FIMC2_0 EXYNOS4_IRQ_SYSMMU_FIMC2_0 -#define IRQ_SYSMMU_FIMC3_0 EXYNOS4_IRQ_SYSMMU_FIMC3_0 -#define IRQ_SYSMMU_JPEG_0 EXYNOS4_IRQ_SYSMMU_JPEG_0 -#define IRQ_SYSMMU_2D_0 EXYNOS4_IRQ_SYSMMU_2D_0 - -#define IRQ_SYSMMU_ROTATOR_0 EXYNOS4_IRQ_SYSMMU_ROTATOR_0 -#define IRQ_SYSMMU_MDMA1_0 EXYNOS4_IRQ_SYSMMU_MDMA1_0 -#define IRQ_SYSMMU_LCD0_M0_0 EXYNOS4_IRQ_SYSMMU_LCD0_M0_0 -#define IRQ_SYSMMU_LCD1_M1_0 EXYNOS4_IRQ_SYSMMU_LCD1_M1_0 -#define IRQ_SYSMMU_TV_M0_0 EXYNOS4_IRQ_SYSMMU_TV_M0_0 -#define IRQ_SYSMMU_MFC_M0_0 EXYNOS4_IRQ_SYSMMU_MFC_M0_0 -#define IRQ_SYSMMU_MFC_M1_0 EXYNOS4_IRQ_SYSMMU_MFC_M1_0 -#define IRQ_SYSMMU_PCIE_0 EXYNOS4_IRQ_SYSMMU_PCIE_0 - #define IRQ_FIMD0_FIFO EXYNOS4_IRQ_FIMD0_FIFO #define IRQ_FIMD0_VSYNC EXYNOS4_IRQ_FIMD0_VSYNC #define IRQ_FIMD0_SYSTEM EXYNOS4_IRQ_FIMD0_SYSTEM diff --git a/arch/arm/mach-exynos/include/mach/map.h b/arch/arm/mach-exynos/include/mach/map.h index 024d38f..69f2ea6 100644 --- a/arch/arm/mach-exynos/include/mach/map.h +++ b/arch/arm/mach-exynos/include/mach/map.h @@ -91,6 +91,7 @@ #define EXYNOS4_PA_PDMA1 0x12690000 #define EXYNOS4_PA_SYSMMU_MDMA 0x10A40000 +#define EXYNOS4_PA_SYSMMU_2D_ACP 0x10A40000 #define EXYNOS4_PA_SYSMMU_SSS 0x10A50000 #define EXYNOS4_PA_SYSMMU_FIMC0 0x11A20000 #define EXYNOS4_PA_SYSMMU_FIMC1 0x11A30000 @@ -99,6 +100,12 @@ #define EXYNOS4_PA_SYSMMU_JPEG 0x11A60000 #define EXYNOS4_PA_SYSMMU_FIMD0 0x11E20000 #define EXYNOS4_PA_SYSMMU_FIMD1 0x12220000 +#define EXYNOS4_PA_SYSMMU_FIMC_ISP 0x12260000 +#define EXYNOS4_PA_SYSMMU_FIMC_DRC 0x12270000 +#define EXYNOS4_PA_SYSMMU_FIMC_FD 0x122A0000 +#define EXYNOS4_PA_SYSMMU_ISPCPU 0x122B0000 +#define EXYNOS4_PA_SYSMMU_FIMC_LITE0 0x123B0000 +#define EXYNOS4_PA_SYSMMU_FIMC_LITE1 0x123C0000 #define EXYNOS4_PA_SYSMMU_PCIe 0x12620000 #define EXYNOS4_PA_SYSMMU_G2D 0x12A20000 #define EXYNOS4_PA_SYSMMU_ROTATOR 0x12A30000 @@ -106,6 +113,37 @@ #define EXYNOS4_PA_SYSMMU_TV 0x12E20000 #define EXYNOS4_PA_SYSMMU_MFC_L 0x13620000 #define EXYNOS4_PA_SYSMMU_MFC_R 0x13630000 + +#define EXYNOS5_PA_SYSMMU_MDMA1 0x10A40000 +#define EXYNOS5_PA_SYSMMU_SSS 0x10A50000 +#define EXYNOS5_PA_SYSMMU_2D 0x10A60000 +#define EXYNOS5_PA_SYSMMU_MFC_L 0x11200000 +#define EXYNOS5_PA_SYSMMU_MFC_R 0x11210000 +#define EXYNOS5_PA_SYSMMU_ROTATOR 0x11D40000 +#define EXYNOS5_PA_SYSMMU_MDMA2 0x11D50000 +#define EXYNOS5_PA_SYSMMU_JPEG 0x11F20000 +#define EXYNOS5_PA_SYSMMU_IOP 0x12360000 +#define EXYNOS5_PA_SYSMMU_RTIC 0x12370000 +#define EXYNOS5_PA_SYSMMU_GPS 0x12630000 +#define EXYNOS5_PA_SYSMMU_ISP 0x13260000 +#define EXYNOS5_PA_SYSMMU_DRC 0x12370000 +#define EXYNOS5_PA_SYSMMU_SCALERC 0x13280000 +#define EXYNOS5_PA_SYSMMU_SCALERP 0x13290000 +#define EXYNOS5_PA_SYSMMU_FD 0x132A0000 +#define EXYNOS5_PA_SYSMMU_ISPCPU 0x132B0000 +#define EXYNOS5_PA_SYSMMU_ODC 0x132C0000 +#define EXYNOS5_PA_SYSMMU_DIS0 0x132D0000 +#define EXYNOS5_PA_SYSMMU_DIS1 0x132E0000 +#define EXYNOS5_PA_SYSMMU_3DNR 0x132F0000 +#define EXYNOS5_PA_SYSMMU_LITE0 0x13C40000 +#define EXYNOS5_PA_SYSMMU_LITE1 0x13C50000 +#define EXYNOS5_PA_SYSMMU_GSC0 0x13E80000 +#define EXYNOS5_PA_SYSMMU_GSC1 0x13E90000 +#define EXYNOS5_PA_SYSMMU_GSC2 0x13EA0000 +#define EXYNOS5_PA_SYSMMU_GSC3 0x13EB0000 +#define EXYNOS5_PA_SYSMMU_FIMD1 0x14640000 +#define EXYNOS5_PA_SYSMMU_TV 0x14650000 + #define EXYNOS4_PA_SPI0 0x13920000 #define EXYNOS4_PA_SPI1 0x13930000 #define EXYNOS4_PA_SPI2 0x13940000 diff --git a/arch/arm/mach-exynos/include/mach/regs-clock.h b/arch/arm/mach-exynos/include/mach/regs-clock.h index e141c1f..7395236 100644 --- a/arch/arm/mach-exynos/include/mach/regs-clock.h +++ b/arch/arm/mach-exynos/include/mach/regs-clock.h @@ -135,6 +135,9 @@ #define EXYNOS4_CLKGATE_SCLKCPU EXYNOS_CLKREG(0x14800) #define EXYNOS4_CLKGATE_IP_CPU EXYNOS_CLKREG(0x14900) +#define EXYNOS4_CLKGATE_IP_ISP0 EXYNOS_CLKREG(0x18800) +#define EXYNOS4_CLKGATE_IP_ISP1 EXYNOS_CLKREG(0x18804) + #define EXYNOS4_APLL_LOCKTIME (0x1C20) /* 300us */ #define EXYNOS4_APLLCON0_ENABLE_SHIFT (31) @@ -297,6 +300,8 @@ #define EXYNOS5_CLKDIV_PERIC0 EXYNOS_CLKREG(0x10558) #define EXYNOS5_CLKGATE_IP_ACP EXYNOS_CLKREG(0x08800) +#define EXYNOS5_CLKGATE_IP_ISP0 EXYNOS_CLKREG(0x0C800) +#define EXYNOS5_CLKGATE_IP_ISP1 EXYNOS_CLKREG(0x0C804) #define EXYNOS5_CLKGATE_IP_GSCL EXYNOS_CLKREG(0x10920) #define EXYNOS5_CLKGATE_IP_DISP1 EXYNOS_CLKREG(0x10928) #define EXYNOS5_CLKGATE_IP_MFC EXYNOS_CLKREG(0x1092C) diff --git a/arch/arm/mach-exynos/include/mach/regs-sysmmu.h b/arch/arm/mach-exynos/include/mach/regs-sysmmu.h deleted file mode 100644 index 68ff6ad..0000000 --- a/arch/arm/mach-exynos/include/mach/regs-sysmmu.h +++ /dev/null @@ -1,28 +0,0 @@ -/* linux/arch/arm/mach-exynos4/include/mach/regs-sysmmu.h - * - * Copyright (c) 2010-2011 Samsung Electronics Co., Ltd. - * http://www.samsung.com - * - * EXYNOS4 - System MMU register - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. -*/ - -#ifndef __ASM_ARCH_REGS_SYSMMU_H -#define __ASM_ARCH_REGS_SYSMMU_H __FILE__ - -#define S5P_MMU_CTRL 0x000 -#define S5P_MMU_CFG 0x004 -#define S5P_MMU_STATUS 0x008 -#define S5P_MMU_FLUSH 0x00C -#define S5P_PT_BASE_ADDR 0x014 -#define S5P_INT_STATUS 0x018 -#define S5P_INT_CLEAR 0x01C -#define S5P_PAGE_FAULT_ADDR 0x024 -#define S5P_AW_FAULT_ADDR 0x028 -#define S5P_AR_FAULT_ADDR 0x02C -#define S5P_DEFAULT_SLAVE_ADDR 0x030 - -#endif /* __ASM_ARCH_REGS_SYSMMU_H */ diff --git a/arch/arm/mach-exynos/include/mach/sysmmu.h b/arch/arm/mach-exynos/include/mach/sysmmu.h index 6a5fbb5..998daf2 100644 --- a/arch/arm/mach-exynos/include/mach/sysmmu.h +++ b/arch/arm/mach-exynos/include/mach/sysmmu.h @@ -1,46 +1,66 @@ -/* linux/arch/arm/mach-exynos4/include/mach/sysmmu.h - * - * Copyright (c) 2010-2011 Samsung Electronics Co., Ltd. +/* + * Copyright (c) 2011-2012 Samsung Electronics Co., Ltd. * http://www.samsung.com * - * Samsung sysmmu driver for EXYNOS4 + * EXYNOS - System MMU support * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. -*/ - -#ifndef __ASM_ARM_ARCH_SYSMMU_H -#define __ASM_ARM_ARCH_SYSMMU_H __FILE__ - -enum exynos4_sysmmu_ips { - SYSMMU_MDMA, - SYSMMU_SSS, - SYSMMU_FIMC0, - SYSMMU_FIMC1, - SYSMMU_FIMC2, - SYSMMU_FIMC3, - SYSMMU_JPEG, - SYSMMU_FIMD0, - SYSMMU_FIMD1, - SYSMMU_PCIe, - SYSMMU_G2D, - SYSMMU_ROTATOR, - SYSMMU_MDMA2, - SYSMMU_TV, - SYSMMU_MFC_L, - SYSMMU_MFC_R, - EXYNOS4_SYSMMU_TOTAL_IPNUM, + */ + +#ifndef _ARM_MACH_EXYNOS_SYSMMU_H_ +#define _ARM_MACH_EXYNOS_SYSMMU_H_ + +struct sysmmu_platform_data { + char *dbgname; + /* comma(,) separated list of clock names for clock gating */ + char *clockname; }; -#define S5P_SYSMMU_TOTAL_IPNUM EXYNOS4_SYSMMU_TOTAL_IPNUM +#define SYSMMU_DEVNAME_BASE "exynos-sysmmu" + +#define SYSMMU_CLOCK_NAME "sysmmu" +#define SYSMMU_CLOCK_NAME2 "sysmmu_mc" + +#ifdef CONFIG_EXYNOS_DEV_SYSMMU +#include +struct platform_device; + +#define SYSMMU_PLATDEV(ipname) exynos_device_sysmmu_##ipname + +extern struct platform_device SYSMMU_PLATDEV(mfc_l); +extern struct platform_device SYSMMU_PLATDEV(mfc_r); +extern struct platform_device SYSMMU_PLATDEV(tv); +extern struct platform_device SYSMMU_PLATDEV(jpeg); +extern struct platform_device SYSMMU_PLATDEV(rot); +extern struct platform_device SYSMMU_PLATDEV(fimc0); +extern struct platform_device SYSMMU_PLATDEV(fimc1); +extern struct platform_device SYSMMU_PLATDEV(fimc2); +extern struct platform_device SYSMMU_PLATDEV(fimc3); +extern struct platform_device SYSMMU_PLATDEV(gsc0); +extern struct platform_device SYSMMU_PLATDEV(gsc1); +extern struct platform_device SYSMMU_PLATDEV(gsc2); +extern struct platform_device SYSMMU_PLATDEV(gsc3); +extern struct platform_device SYSMMU_PLATDEV(isp); +extern struct platform_device SYSMMU_PLATDEV(fimd0); +extern struct platform_device SYSMMU_PLATDEV(fimd1); +extern struct platform_device SYSMMU_PLATDEV(camif0); +extern struct platform_device SYSMMU_PLATDEV(camif1); +extern struct platform_device SYSMMU_PLATDEV(2d); -extern const char *sysmmu_ips_name[EXYNOS4_SYSMMU_TOTAL_IPNUM]; +#ifdef CONFIG_IOMMU_API +static inline void platform_set_sysmmu( + struct device *sysmmu, struct device *dev) +{ + dev->archdata.iommu = sysmmu; +} +#endif -typedef enum exynos4_sysmmu_ips sysmmu_ips; +#else /* !CONFIG_EXYNOS_DEV_SYSMMU */ +#define platform_set_sysmmu(dev, sysmmu) do { } while (0) +#endif -void sysmmu_clk_init(struct device *dev, sysmmu_ips ips); -void sysmmu_clk_enable(sysmmu_ips ips); -void sysmmu_clk_disable(sysmmu_ips ips); +#define SYSMMU_CLOCK_DEVNAME(ipname, id) (SYSMMU_DEVNAME_BASE "." #id) -#endif /* __ASM_ARM_ARCH_SYSMMU_H */ +#endif /* _ARM_MACH_EXYNOS_SYSMMU_H_ */ diff --git a/arch/arm/mach-exynos/mach-armlex4210.c b/arch/arm/mach-exynos/mach-armlex4210.c index d726fcd..6ce2148 100644 --- a/arch/arm/mach-exynos/mach-armlex4210.c +++ b/arch/arm/mach-exynos/mach-armlex4210.c @@ -157,7 +157,6 @@ static struct platform_device *armlex4210_devices[] __initdata = { &s3c_device_hsmmc3, &s3c_device_rtc, &s3c_device_wdt, - &exynos4_device_sysmmu, &samsung_asoc_dma, &armlex4210_smsc911x, &exynos4_device_ahci, diff --git a/arch/arm/mach-exynos/mach-smdkv310.c b/arch/arm/mach-exynos/mach-smdkv310.c index 83b91fa..495c7e5 100644 --- a/arch/arm/mach-exynos/mach-smdkv310.c +++ b/arch/arm/mach-exynos/mach-smdkv310.c @@ -281,7 +281,6 @@ static struct platform_device *smdkv310_devices[] __initdata = { &s5p_device_mfc_l, &s5p_device_mfc_r, &exynos4_device_spdif, - &exynos4_device_sysmmu, &samsung_asoc_dma, &samsung_asoc_idma, &s5p_device_fimd0, -- cgit v0.10.2 From 9c75fc8c5c8c50775fc8b89418219221335b758f Mon Sep 17 00:00:00 2001 From: Jesper Nilsson Date: Thu, 5 Apr 2012 11:52:55 +0200 Subject: CRIS: Remove legacy RTC drivers These old drivers are not used anymore, we use the ones in drivers/rtc. This allows us to remove some cruft in the CRIS timekeeping code. Signed-off-by: Jesper Nilsson diff --git a/arch/cris/arch-v10/drivers/ds1302.c b/arch/cris/arch-v10/drivers/ds1302.c deleted file mode 100644 index 74f99c6..0000000 --- a/arch/cris/arch-v10/drivers/ds1302.c +++ /dev/null @@ -1,515 +0,0 @@ -/*!*************************************************************************** -*! -*! FILE NAME : ds1302.c -*! -*! DESCRIPTION: Implements an interface for the DS1302 RTC through Etrax I/O -*! -*! Functions exported: ds1302_readreg, ds1302_writereg, ds1302_init -*! -*! --------------------------------------------------------------------------- -*! -*! (C) Copyright 1999-2007 Axis Communications AB, LUND, SWEDEN -*! -*!***************************************************************************/ - - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -#include "i2c.h" - -#define RTC_MAJOR_NR 121 /* local major, change later */ - -static DEFINE_MUTEX(ds1302_mutex); -static const char ds1302_name[] = "ds1302"; - -/* The DS1302 might be connected to different bits on different products. - * It has three signals - SDA, SCL and RST. RST and SCL are always outputs, - * but SDA can have a selected direction. - * For now, only PORT_PB is hardcoded. - */ - -/* The RST bit may be on either the Generic Port or Port PB. */ -#ifdef CONFIG_ETRAX_DS1302_RST_ON_GENERIC_PORT -#define TK_RST_OUT(x) REG_SHADOW_SET(R_PORT_G_DATA, port_g_data_shadow, CONFIG_ETRAX_DS1302_RSTBIT, x) -#define TK_RST_DIR(x) -#else -#define TK_RST_OUT(x) REG_SHADOW_SET(R_PORT_PB_DATA, port_pb_data_shadow, CONFIG_ETRAX_DS1302_RSTBIT, x) -#define TK_RST_DIR(x) REG_SHADOW_SET(R_PORT_PB_DIR, port_pb_dir_shadow, CONFIG_ETRAX_DS1302_RSTBIT, x) -#endif - - -#define TK_SDA_OUT(x) REG_SHADOW_SET(R_PORT_PB_DATA, port_pb_data_shadow, CONFIG_ETRAX_DS1302_SDABIT, x) -#define TK_SCL_OUT(x) REG_SHADOW_SET(R_PORT_PB_DATA, port_pb_data_shadow, CONFIG_ETRAX_DS1302_SCLBIT, x) - -#define TK_SDA_IN() ((*R_PORT_PB_READ >> CONFIG_ETRAX_DS1302_SDABIT) & 1) -/* 1 is out, 0 is in */ -#define TK_SDA_DIR(x) REG_SHADOW_SET(R_PORT_PB_DIR, port_pb_dir_shadow, CONFIG_ETRAX_DS1302_SDABIT, x) -#define TK_SCL_DIR(x) REG_SHADOW_SET(R_PORT_PB_DIR, port_pb_dir_shadow, CONFIG_ETRAX_DS1302_SCLBIT, x) - - -/* - * The reason for tempudelay and not udelay is that loops_per_usec - * (used in udelay) is not set when functions here are called from time.c - */ - -static void tempudelay(int usecs) -{ - volatile int loops; - - for(loops = usecs * 12; loops > 0; loops--) - /* nothing */; -} - - -/* Send 8 bits. */ -static void -out_byte(unsigned char x) -{ - int i; - TK_SDA_DIR(1); - for (i = 8; i--;) { - /* The chip latches incoming bits on the rising edge of SCL. */ - TK_SCL_OUT(0); - TK_SDA_OUT(x & 1); - tempudelay(1); - TK_SCL_OUT(1); - tempudelay(1); - x >>= 1; - } - TK_SDA_DIR(0); -} - -static unsigned char -in_byte(void) -{ - unsigned char x = 0; - int i; - - /* Read byte. Bits come LSB first, on the falling edge of SCL. - * Assume SDA is in input direction already. - */ - TK_SDA_DIR(0); - - for (i = 8; i--;) { - TK_SCL_OUT(0); - tempudelay(1); - x >>= 1; - x |= (TK_SDA_IN() << 7); - TK_SCL_OUT(1); - tempudelay(1); - } - - return x; -} - -/* Prepares for a transaction by de-activating RST (active-low). */ - -static void -start(void) -{ - TK_SCL_OUT(0); - tempudelay(1); - TK_RST_OUT(0); - tempudelay(5); - TK_RST_OUT(1); -} - -/* Ends a transaction by taking RST active again. */ - -static void -stop(void) -{ - tempudelay(2); - TK_RST_OUT(0); -} - -/* Enable writing. */ - -static void -ds1302_wenable(void) -{ - start(); - out_byte(0x8e); /* Write control register */ - out_byte(0x00); /* Disable write protect bit 7 = 0 */ - stop(); -} - -/* Disable writing. */ - -static void -ds1302_wdisable(void) -{ - start(); - out_byte(0x8e); /* Write control register */ - out_byte(0x80); /* Disable write protect bit 7 = 0 */ - stop(); -} - - - -/* Read a byte from the selected register in the DS1302. */ - -unsigned char -ds1302_readreg(int reg) -{ - unsigned char x; - - start(); - out_byte(0x81 | (reg << 1)); /* read register */ - x = in_byte(); - stop(); - - return x; -} - -/* Write a byte to the selected register. */ - -void -ds1302_writereg(int reg, unsigned char val) -{ -#ifndef CONFIG_ETRAX_RTC_READONLY - int do_writereg = 1; -#else - int do_writereg = 0; - - if (reg == RTC_TRICKLECHARGER) - do_writereg = 1; -#endif - - if (do_writereg) { - ds1302_wenable(); - start(); - out_byte(0x80 | (reg << 1)); /* write register */ - out_byte(val); - stop(); - ds1302_wdisable(); - } -} - -void -get_rtc_time(struct rtc_time *rtc_tm) -{ - unsigned long flags; - - local_irq_save(flags); - - rtc_tm->tm_sec = CMOS_READ(RTC_SECONDS); - rtc_tm->tm_min = CMOS_READ(RTC_MINUTES); - rtc_tm->tm_hour = CMOS_READ(RTC_HOURS); - rtc_tm->tm_mday = CMOS_READ(RTC_DAY_OF_MONTH); - rtc_tm->tm_mon = CMOS_READ(RTC_MONTH); - rtc_tm->tm_year = CMOS_READ(RTC_YEAR); - - local_irq_restore(flags); - - rtc_tm->tm_sec = bcd2bin(rtc_tm->tm_sec); - rtc_tm->tm_min = bcd2bin(rtc_tm->tm_min); - rtc_tm->tm_hour = bcd2bin(rtc_tm->tm_hour); - rtc_tm->tm_mday = bcd2bin(rtc_tm->tm_mday); - rtc_tm->tm_mon = bcd2bin(rtc_tm->tm_mon); - rtc_tm->tm_year = bcd2bin(rtc_tm->tm_year); - - /* - * Account for differences between how the RTC uses the values - * and how they are defined in a struct rtc_time; - */ - - if (rtc_tm->tm_year <= 69) - rtc_tm->tm_year += 100; - - rtc_tm->tm_mon--; -} - -static unsigned char days_in_mo[] = - {0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}; - -/* ioctl that supports RTC_RD_TIME and RTC_SET_TIME (read and set time/date). */ - -static int rtc_ioctl(struct file *file, unsigned int cmd, unsigned long arg) -{ - unsigned long flags; - - switch(cmd) { - case RTC_RD_TIME: /* read the time/date from RTC */ - { - struct rtc_time rtc_tm; - - memset(&rtc_tm, 0, sizeof (struct rtc_time)); - get_rtc_time(&rtc_tm); - if (copy_to_user((struct rtc_time*)arg, &rtc_tm, sizeof(struct rtc_time))) - return -EFAULT; - return 0; - } - - case RTC_SET_TIME: /* set the RTC */ - { - struct rtc_time rtc_tm; - unsigned char mon, day, hrs, min, sec, leap_yr; - unsigned int yrs; - - if (!capable(CAP_SYS_TIME)) - return -EPERM; - - if (copy_from_user(&rtc_tm, (struct rtc_time*)arg, sizeof(struct rtc_time))) - return -EFAULT; - - yrs = rtc_tm.tm_year + 1900; - mon = rtc_tm.tm_mon + 1; /* tm_mon starts at zero */ - day = rtc_tm.tm_mday; - hrs = rtc_tm.tm_hour; - min = rtc_tm.tm_min; - sec = rtc_tm.tm_sec; - - - if ((yrs < 1970) || (yrs > 2069)) - return -EINVAL; - - leap_yr = ((!(yrs % 4) && (yrs % 100)) || !(yrs % 400)); - - if ((mon > 12) || (day == 0)) - return -EINVAL; - - if (day > (days_in_mo[mon] + ((mon == 2) && leap_yr))) - return -EINVAL; - - if ((hrs >= 24) || (min >= 60) || (sec >= 60)) - return -EINVAL; - - if (yrs >= 2000) - yrs -= 2000; /* RTC (0, 1, ... 69) */ - else - yrs -= 1900; /* RTC (70, 71, ... 99) */ - - sec = bin2bcd(sec); - min = bin2bcd(min); - hrs = bin2bcd(hrs); - day = bin2bcd(day); - mon = bin2bcd(mon); - yrs = bin2bcd(yrs); - - local_irq_save(flags); - CMOS_WRITE(yrs, RTC_YEAR); - CMOS_WRITE(mon, RTC_MONTH); - CMOS_WRITE(day, RTC_DAY_OF_MONTH); - CMOS_WRITE(hrs, RTC_HOURS); - CMOS_WRITE(min, RTC_MINUTES); - CMOS_WRITE(sec, RTC_SECONDS); - local_irq_restore(flags); - - /* Notice that at this point, the RTC is updated but - * the kernel is still running with the old time. - * You need to set that separately with settimeofday - * or adjtimex. - */ - return 0; - } - - case RTC_SET_CHARGE: /* set the RTC TRICKLE CHARGE register */ - { - int tcs_val; - - if (!capable(CAP_SYS_TIME)) - return -EPERM; - - if(copy_from_user(&tcs_val, (int*)arg, sizeof(int))) - return -EFAULT; - - tcs_val = RTC_TCR_PATTERN | (tcs_val & 0x0F); - ds1302_writereg(RTC_TRICKLECHARGER, tcs_val); - return 0; - } - case RTC_VL_READ: - { - /* TODO: - * Implement voltage low detection support - */ - printk(KERN_WARNING "DS1302: RTC Voltage Low detection" - " is not supported\n"); - return 0; - } - case RTC_VL_CLR: - { - /* TODO: - * Nothing to do since Voltage Low detection is not supported - */ - return 0; - } - default: - return -ENOIOCTLCMD; - } -} - -static long rtc_unlocked_ioctl(struct file *file, unsigned int cmd, unsigned long arg) -{ - int ret; - - mutex_lock(&ds1302_mutex); - ret = rtc_ioctl(file, cmd, arg); - mutex_unlock(&ds1302_mutex); - - return ret; -} - -static void -print_rtc_status(void) -{ - struct rtc_time tm; - - get_rtc_time(&tm); - - /* - * There is no way to tell if the luser has the RTC set for local - * time or for Universal Standard Time (GMT). Probably local though. - */ - - printk(KERN_INFO "rtc_time\t: %02d:%02d:%02d\n", - tm.tm_hour, tm.tm_min, tm.tm_sec); - printk(KERN_INFO "rtc_date\t: %04d-%02d-%02d\n", - tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday); -} - -/* The various file operations we support. */ - -static const struct file_operations rtc_fops = { - .owner = THIS_MODULE, - .unlocked_ioctl = rtc_unlocked_ioctl, - .llseek = noop_llseek, -}; - -/* Probe for the chip by writing something to its RAM and try reading it back. */ - -#define MAGIC_PATTERN 0x42 - -static int __init -ds1302_probe(void) -{ - int retval, res; - - TK_RST_DIR(1); - TK_SCL_DIR(1); - TK_SDA_DIR(0); - - /* Try to talk to timekeeper. */ - - ds1302_wenable(); - start(); - out_byte(0xc0); /* write RAM byte 0 */ - out_byte(MAGIC_PATTERN); /* write something magic */ - start(); - out_byte(0xc1); /* read RAM byte 0 */ - - if((res = in_byte()) == MAGIC_PATTERN) { - stop(); - ds1302_wdisable(); - printk(KERN_INFO "%s: RTC found.\n", ds1302_name); - printk(KERN_INFO "%s: SDA, SCL, RST on PB%i, PB%i, %s%i\n", - ds1302_name, - CONFIG_ETRAX_DS1302_SDABIT, - CONFIG_ETRAX_DS1302_SCLBIT, -#ifdef CONFIG_ETRAX_DS1302_RST_ON_GENERIC_PORT - "GENIO", -#else - "PB", -#endif - CONFIG_ETRAX_DS1302_RSTBIT); - print_rtc_status(); - retval = 1; - } else { - stop(); - retval = 0; - } - - return retval; -} - - -/* Just probe for the RTC and register the device to handle the ioctl needed. */ - -int __init -ds1302_init(void) -{ -#ifdef CONFIG_ETRAX_I2C - i2c_init(); -#endif - - if (!ds1302_probe()) { -#ifdef CONFIG_ETRAX_DS1302_RST_ON_GENERIC_PORT -#if CONFIG_ETRAX_DS1302_RSTBIT == 27 - /* - * The only way to set g27 to output is to enable ATA. - * - * Make sure that R_GEN_CONFIG is setup correct. - */ - /* Allocating the ATA interface will grab almost all - * pins in I/O groups a, b, c and d. A consequence of - * allocating the ATA interface is that the fixed - * interfaces shared RAM, parallel port 0, parallel - * port 1, parallel port W, SCSI-8 port 0, SCSI-8 port - * 1, SCSI-W, serial port 2, serial port 3, - * synchronous serial port 3 and USB port 2 and almost - * all GPIO pins on port g cannot be used. - */ - if (cris_request_io_interface(if_ata, "ds1302/ATA")) { - printk(KERN_WARNING "ds1302: Failed to get IO interface\n"); - return -1; - } - -#elif CONFIG_ETRAX_DS1302_RSTBIT == 0 - if (cris_io_interface_allocate_pins(if_gpio_grp_a, - 'g', - CONFIG_ETRAX_DS1302_RSTBIT, - CONFIG_ETRAX_DS1302_RSTBIT)) { - printk(KERN_WARNING "ds1302: Failed to get IO interface\n"); - return -1; - } - - /* Set the direction of this bit to out. */ - genconfig_shadow = ((genconfig_shadow & - ~IO_MASK(R_GEN_CONFIG, g0dir)) | - (IO_STATE(R_GEN_CONFIG, g0dir, out))); - *R_GEN_CONFIG = genconfig_shadow; -#endif - if (!ds1302_probe()) { - printk(KERN_WARNING "%s: RTC not found.\n", ds1302_name); - return -1; - } -#else - printk(KERN_WARNING "%s: RTC not found.\n", ds1302_name); - return -1; -#endif - } - /* Initialise trickle charger */ - ds1302_writereg(RTC_TRICKLECHARGER, - RTC_TCR_PATTERN |(CONFIG_ETRAX_DS1302_TRICKLE_CHARGE & 0x0F)); - /* Start clock by resetting CLOCK_HALT */ - ds1302_writereg(RTC_SECONDS, (ds1302_readreg(RTC_SECONDS) & 0x7F)); - return 0; -} - -static int __init ds1302_register(void) -{ - ds1302_init(); - if (register_chrdev(RTC_MAJOR_NR, ds1302_name, &rtc_fops)) { - printk(KERN_INFO "%s: unable to get major %d for rtc\n", - ds1302_name, RTC_MAJOR_NR); - return -1; - } - return 0; - -} - -module_init(ds1302_register); diff --git a/arch/cris/arch-v10/drivers/pcf8563.c b/arch/cris/arch-v10/drivers/pcf8563.c deleted file mode 100644 index 9da0568..0000000 --- a/arch/cris/arch-v10/drivers/pcf8563.c +++ /dev/null @@ -1,380 +0,0 @@ -/* - * PCF8563 RTC - * - * From Phillips' datasheet: - * - * The PCF8563 is a CMOS real-time clock/calendar optimized for low power - * consumption. A programmable clock output, interrupt output and voltage - * low detector are also provided. All address and data are transferred - * serially via two-line bidirectional I2C-bus. Maximum bus speed is - * 400 kbits/s. The built-in word address register is incremented - * automatically after each written or read byte. - * - * Copyright (c) 2002-2007, Axis Communications AB - * All rights reserved. - * - * Author: Tobias Anderberg . - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include "i2c.h" - -#define PCF8563_MAJOR 121 /* Local major number. */ -#define DEVICE_NAME "rtc" /* Name which is registered in /proc/devices. */ -#define PCF8563_NAME "PCF8563" -#define DRIVER_VERSION "$Revision: 1.24 $" - -/* I2C bus slave registers. */ -#define RTC_I2C_READ 0xa3 -#define RTC_I2C_WRITE 0xa2 - -/* Two simple wrapper macros, saves a few keystrokes. */ -#define rtc_read(x) i2c_readreg(RTC_I2C_READ, x) -#define rtc_write(x,y) i2c_writereg(RTC_I2C_WRITE, x, y) - -static DEFINE_MUTEX(pcf8563_mutex); -static DEFINE_MUTEX(rtc_lock); /* Protect state etc */ - -static const unsigned char days_in_month[] = - { 0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 }; - -static long pcf8563_unlocked_ioctl(struct file *, unsigned int, unsigned long); - -/* Cache VL bit value read at driver init since writing the RTC_SECOND - * register clears the VL status. - */ -static int voltage_low; - -static const struct file_operations pcf8563_fops = { - .owner = THIS_MODULE, - .unlocked_ioctl = pcf8563_unlocked_ioctl, - .llseek = noop_llseek, -}; - -unsigned char -pcf8563_readreg(int reg) -{ - unsigned char res = rtc_read(reg); - - /* The PCF8563 does not return 0 for unimplemented bits. */ - switch (reg) { - case RTC_SECONDS: - case RTC_MINUTES: - res &= 0x7F; - break; - case RTC_HOURS: - case RTC_DAY_OF_MONTH: - res &= 0x3F; - break; - case RTC_WEEKDAY: - res &= 0x07; - break; - case RTC_MONTH: - res &= 0x1F; - break; - case RTC_CONTROL1: - res &= 0xA8; - break; - case RTC_CONTROL2: - res &= 0x1F; - break; - case RTC_CLOCKOUT_FREQ: - case RTC_TIMER_CONTROL: - res &= 0x83; - break; - } - return res; -} - -void -pcf8563_writereg(int reg, unsigned char val) -{ - rtc_write(reg, val); -} - -void -get_rtc_time(struct rtc_time *tm) -{ - tm->tm_sec = rtc_read(RTC_SECONDS); - tm->tm_min = rtc_read(RTC_MINUTES); - tm->tm_hour = rtc_read(RTC_HOURS); - tm->tm_mday = rtc_read(RTC_DAY_OF_MONTH); - tm->tm_wday = rtc_read(RTC_WEEKDAY); - tm->tm_mon = rtc_read(RTC_MONTH); - tm->tm_year = rtc_read(RTC_YEAR); - - if (tm->tm_sec & 0x80) { - printk(KERN_ERR "%s: RTC Voltage Low - reliable date/time " - "information is no longer guaranteed!\n", PCF8563_NAME); - } - - tm->tm_year = bcd2bin(tm->tm_year) + - ((tm->tm_mon & 0x80) ? 100 : 0); - tm->tm_sec &= 0x7F; - tm->tm_min &= 0x7F; - tm->tm_hour &= 0x3F; - tm->tm_mday &= 0x3F; - tm->tm_wday &= 0x07; /* Not coded in BCD. */ - tm->tm_mon &= 0x1F; - - tm->tm_sec = bcd2bin(tm->tm_sec); - tm->tm_min = bcd2bin(tm->tm_min); - tm->tm_hour = bcd2bin(tm->tm_hour); - tm->tm_mday = bcd2bin(tm->tm_mday); - tm->tm_mon = bcd2bin(tm->tm_mon); - tm->tm_mon--; /* Month is 1..12 in RTC but 0..11 in linux */ -} - -int __init -pcf8563_init(void) -{ - static int res; - static int first = 1; - - if (!first) - return res; - first = 0; - - /* Initiate the i2c protocol. */ - res = i2c_init(); - if (res < 0) { - printk(KERN_CRIT "pcf8563_init: Failed to init i2c.\n"); - return res; - } - - /* - * First of all we need to reset the chip. This is done by - * clearing control1, control2 and clk freq and resetting - * all alarms. - */ - if (rtc_write(RTC_CONTROL1, 0x00) < 0) - goto err; - - if (rtc_write(RTC_CONTROL2, 0x00) < 0) - goto err; - - if (rtc_write(RTC_CLOCKOUT_FREQ, 0x00) < 0) - goto err; - - if (rtc_write(RTC_TIMER_CONTROL, 0x03) < 0) - goto err; - - /* Reset the alarms. */ - if (rtc_write(RTC_MINUTE_ALARM, 0x80) < 0) - goto err; - - if (rtc_write(RTC_HOUR_ALARM, 0x80) < 0) - goto err; - - if (rtc_write(RTC_DAY_ALARM, 0x80) < 0) - goto err; - - if (rtc_write(RTC_WEEKDAY_ALARM, 0x80) < 0) - goto err; - - /* Check for low voltage, and warn about it. */ - if (rtc_read(RTC_SECONDS) & 0x80) { - voltage_low = 1; - printk(KERN_WARNING "%s: RTC Voltage Low - reliable " - "date/time information is no longer guaranteed!\n", - PCF8563_NAME); - } - - return res; - -err: - printk(KERN_INFO "%s: Error initializing chip.\n", PCF8563_NAME); - res = -1; - return res; -} - -void __exit -pcf8563_exit(void) -{ - unregister_chrdev(PCF8563_MAJOR, DEVICE_NAME); -} - -/* - * ioctl calls for this driver. Why return -ENOTTY upon error? Because - * POSIX says so! - */ -static int pcf8563_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) -{ - /* Some sanity checks. */ - if (_IOC_TYPE(cmd) != RTC_MAGIC) - return -ENOTTY; - - if (_IOC_NR(cmd) > RTC_MAX_IOCTL) - return -ENOTTY; - - switch (cmd) { - case RTC_RD_TIME: - { - struct rtc_time tm; - - mutex_lock(&rtc_lock); - memset(&tm, 0, sizeof tm); - get_rtc_time(&tm); - - if (copy_to_user((struct rtc_time *) arg, &tm, - sizeof tm)) { - mutex_unlock(&rtc_lock); - return -EFAULT; - } - - mutex_unlock(&rtc_lock); - - return 0; - } - case RTC_SET_TIME: - { - int leap; - int year; - int century; - struct rtc_time tm; - - memset(&tm, 0, sizeof tm); - if (!capable(CAP_SYS_TIME)) - return -EPERM; - - if (copy_from_user(&tm, (struct rtc_time *) arg, sizeof tm)) - return -EFAULT; - - /* Convert from struct tm to struct rtc_time. */ - tm.tm_year += 1900; - tm.tm_mon += 1; - - /* - * Check if tm.tm_year is a leap year. A year is a leap - * year if it is divisible by 4 but not 100, except - * that years divisible by 400 _are_ leap years. - */ - year = tm.tm_year; - leap = (tm.tm_mon == 2) && - ((year % 4 == 0 && year % 100 != 0) || year % 400 == 0); - - /* Perform some sanity checks. */ - if ((tm.tm_year < 1970) || - (tm.tm_mon > 12) || - (tm.tm_mday == 0) || - (tm.tm_mday > days_in_month[tm.tm_mon] + leap) || - (tm.tm_wday >= 7) || - (tm.tm_hour >= 24) || - (tm.tm_min >= 60) || - (tm.tm_sec >= 60)) - return -EINVAL; - - century = (tm.tm_year >= 2000) ? 0x80 : 0; - tm.tm_year = tm.tm_year % 100; - - tm.tm_year = bin2bcd(tm.tm_year); - tm.tm_mon = bin2bcd(tm.tm_mon); - tm.tm_mday = bin2bcd(tm.tm_mday); - tm.tm_hour = bin2bcd(tm.tm_hour); - tm.tm_min = bin2bcd(tm.tm_min); - tm.tm_sec = bin2bcd(tm.tm_sec); - tm.tm_mon |= century; - - mutex_lock(&rtc_lock); - - rtc_write(RTC_YEAR, tm.tm_year); - rtc_write(RTC_MONTH, tm.tm_mon); - rtc_write(RTC_WEEKDAY, tm.tm_wday); /* Not coded in BCD. */ - rtc_write(RTC_DAY_OF_MONTH, tm.tm_mday); - rtc_write(RTC_HOURS, tm.tm_hour); - rtc_write(RTC_MINUTES, tm.tm_min); - rtc_write(RTC_SECONDS, tm.tm_sec); - - mutex_unlock(&rtc_lock); - - return 0; - } - case RTC_VL_READ: - if (voltage_low) { - printk(KERN_ERR "%s: RTC Voltage Low - " - "reliable date/time information is no " - "longer guaranteed!\n", PCF8563_NAME); - } - - if (copy_to_user((int *) arg, &voltage_low, sizeof(int))) - return -EFAULT; - return 0; - - case RTC_VL_CLR: - { - /* Clear the VL bit in the seconds register in case - * the time has not been set already (which would - * have cleared it). This does not really matter - * because of the cached voltage_low value but do it - * anyway for consistency. */ - - int ret = rtc_read(RTC_SECONDS); - - rtc_write(RTC_SECONDS, (ret & 0x7F)); - - /* Clear the cached value. */ - voltage_low = 0; - - return 0; - } - default: - return -ENOTTY; - } - - return 0; -} - -static long pcf8563_unlocked_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) -{ - int ret; - - mutex_lock(&pcf8563_mutex); - ret = pcf8563_ioctl(filp, cmd, arg); - mutex_unlock(&pcf8563_mutex); - - return ret; -} - -static int __init pcf8563_register(void) -{ - if (pcf8563_init() < 0) { - printk(KERN_INFO "%s: Unable to initialize Real-Time Clock " - "Driver, %s\n", PCF8563_NAME, DRIVER_VERSION); - return -1; - } - - if (register_chrdev(PCF8563_MAJOR, DEVICE_NAME, &pcf8563_fops) < 0) { - printk(KERN_INFO "%s: Unable to get major number %d for RTC device.\n", - PCF8563_NAME, PCF8563_MAJOR); - return -1; - } - - printk(KERN_INFO "%s Real-Time Clock Driver, %s\n", PCF8563_NAME, - DRIVER_VERSION); - - /* Check for low voltage, and warn about it. */ - if (voltage_low) { - printk(KERN_WARNING "%s: RTC Voltage Low - reliable date/time " - "information is no longer guaranteed!\n", PCF8563_NAME); - } - - return 0; -} - -module_init(pcf8563_register); -module_exit(pcf8563_exit); diff --git a/arch/cris/arch-v10/kernel/fasttimer.c b/arch/cris/arch-v10/kernel/fasttimer.c index 8a8196e..082f189 100644 --- a/arch/cris/arch-v10/kernel/fasttimer.c +++ b/arch/cris/arch-v10/kernel/fasttimer.c @@ -21,8 +21,6 @@ #include #include #include -#include - #include #include diff --git a/arch/cris/arch-v10/kernel/time.c b/arch/cris/arch-v10/kernel/time.c index 20c85b5..bcffcb6 100644 --- a/arch/cris/arch-v10/kernel/time.c +++ b/arch/cris/arch-v10/kernel/time.c @@ -19,16 +19,12 @@ #include #include #include -#include #include /* define this if you need to use print_timestamp */ /* it will make jiffies at 96 hz instead of 100 hz though */ #undef USE_CASCADE_TIMERS -extern int set_rtc_mmss(unsigned long nowtime); -extern int have_rtc; - unsigned long get_ns_in_jiffie(void) { unsigned char timer_count, t1; @@ -203,11 +199,6 @@ time_init(void) */ loops_per_usec = 50; - if(RTC_INIT() < 0) - have_rtc = 0; - else - have_rtc = 1; - /* Setup the etrax timers * Base frequency is 25000 hz, divider 250 -> 100 HZ * In normal mode, we use timer0, so timer1 is free. In cascade diff --git a/arch/cris/arch-v32/kernel/time.c b/arch/cris/arch-v32/kernel/time.c index 6773fc8..8c4b45e 100644 --- a/arch/cris/arch-v32/kernel/time.c +++ b/arch/cris/arch-v32/kernel/time.c @@ -18,7 +18,6 @@ #include #include #include -#include #include #include @@ -67,7 +66,6 @@ unsigned long timer_regs[NR_CPUS] = }; extern int set_rtc_mmss(unsigned long nowtime); -extern int have_rtc; #ifdef CONFIG_CPU_FREQ static int @@ -265,11 +263,6 @@ void __init time_init(void) */ loops_per_usec = 50; - if(RTC_INIT() < 0) - have_rtc = 0; - else - have_rtc = 1; - /* Start CPU local timer. */ cris_timer_init(); diff --git a/arch/cris/include/asm/Kbuild b/arch/cris/include/asm/Kbuild index 956eea2..04d02a5 100644 --- a/arch/cris/include/asm/Kbuild +++ b/arch/cris/include/asm/Kbuild @@ -6,5 +6,4 @@ header-y += arch-v32/ header-y += ethernet.h header-y += etraxgpio.h header-y += rs485.h -header-y += rtc.h header-y += sync_serial.h diff --git a/arch/cris/include/asm/rtc.h b/arch/cris/include/asm/rtc.h deleted file mode 100644 index 17d3019..0000000 --- a/arch/cris/include/asm/rtc.h +++ /dev/null @@ -1,107 +0,0 @@ - -#ifndef __RTC_H__ -#define __RTC_H__ - -#ifdef CONFIG_ETRAX_DS1302 - /* Dallas DS1302 clock/calendar register numbers. */ -# define RTC_SECONDS 0 -# define RTC_MINUTES 1 -# define RTC_HOURS 2 -# define RTC_DAY_OF_MONTH 3 -# define RTC_MONTH 4 -# define RTC_WEEKDAY 5 -# define RTC_YEAR 6 -# define RTC_CONTROL 7 - - /* Bits in CONTROL register. */ -# define RTC_CONTROL_WRITEPROTECT 0x80 -# define RTC_TRICKLECHARGER 8 - - /* Bits in TRICKLECHARGER register TCS TCS TCS TCS DS DS RS RS. */ -# define RTC_TCR_PATTERN 0xA0 /* 1010xxxx */ -# define RTC_TCR_1DIOD 0x04 /* xxxx01xx */ -# define RTC_TCR_2DIOD 0x08 /* xxxx10xx */ -# define RTC_TCR_DISABLED 0x00 /* xxxxxx00 Disabled */ -# define RTC_TCR_2KOHM 0x01 /* xxxxxx01 2KOhm */ -# define RTC_TCR_4KOHM 0x02 /* xxxxxx10 4kOhm */ -# define RTC_TCR_8KOHM 0x03 /* xxxxxx11 8kOhm */ - -#elif defined(CONFIG_ETRAX_PCF8563) - /* I2C bus slave registers. */ -# define RTC_I2C_READ 0xa3 -# define RTC_I2C_WRITE 0xa2 - - /* Phillips PCF8563 registers. */ -# define RTC_CONTROL1 0x00 /* Control/Status register 1. */ -# define RTC_CONTROL2 0x01 /* Control/Status register 2. */ -# define RTC_CLOCKOUT_FREQ 0x0d /* CLKOUT frequency. */ -# define RTC_TIMER_CONTROL 0x0e /* Timer control. */ -# define RTC_TIMER_CNTDOWN 0x0f /* Timer countdown. */ - - /* BCD encoded clock registers. */ -# define RTC_SECONDS 0x02 -# define RTC_MINUTES 0x03 -# define RTC_HOURS 0x04 -# define RTC_DAY_OF_MONTH 0x05 -# define RTC_WEEKDAY 0x06 /* Not coded in BCD! */ -# define RTC_MONTH 0x07 -# define RTC_YEAR 0x08 -# define RTC_MINUTE_ALARM 0x09 -# define RTC_HOUR_ALARM 0x0a -# define RTC_DAY_ALARM 0x0b -# define RTC_WEEKDAY_ALARM 0x0c - -#endif - -#ifdef CONFIG_ETRAX_DS1302 -extern unsigned char ds1302_readreg(int reg); -extern void ds1302_writereg(int reg, unsigned char val); -extern int ds1302_init(void); -# define CMOS_READ(x) ds1302_readreg(x) -# define CMOS_WRITE(val,reg) ds1302_writereg(reg,val) -# define RTC_INIT() ds1302_init() -#elif defined(CONFIG_ETRAX_PCF8563) -extern unsigned char pcf8563_readreg(int reg); -extern void pcf8563_writereg(int reg, unsigned char val); -extern int pcf8563_init(void); -# define CMOS_READ(x) pcf8563_readreg(x) -# define CMOS_WRITE(val,reg) pcf8563_writereg(reg,val) -# define RTC_INIT() pcf8563_init() -#else - /* No RTC configured so we shouldn't try to access any. */ -# define CMOS_READ(x) 42 -# define CMOS_WRITE(x,y) -# define RTC_INIT() (-1) -#endif - -/* - * The struct used to pass data via the following ioctl. Similar to the - * struct tm in , but it needs to be here so that the kernel - * source is self contained, allowing cross-compiles, etc. etc. - */ -struct rtc_time { - int tm_sec; - int tm_min; - int tm_hour; - int tm_mday; - int tm_mon; - int tm_year; - int tm_wday; - int tm_yday; - int tm_isdst; -}; - -/* ioctl() calls that are permitted to the /dev/rtc interface. */ -#define RTC_MAGIC 'p' -/* Read RTC time. */ -#define RTC_RD_TIME _IOR(RTC_MAGIC, 0x09, struct rtc_time) -/* Set RTC time. */ -#define RTC_SET_TIME _IOW(RTC_MAGIC, 0x0a, struct rtc_time) -#define RTC_SET_CHARGE _IOW(RTC_MAGIC, 0x0b, int) -/* Voltage low detector */ -#define RTC_VL_READ _IOR(RTC_MAGIC, 0x13, int) -/* Clear voltage low information */ -#define RTC_VL_CLR _IO(RTC_MAGIC, 0x14) -#define RTC_MAX_IOCTL 0x14 - -#endif /* __RTC_H__ */ diff --git a/arch/cris/kernel/time.c b/arch/cris/kernel/time.c index 4e73092..277ffc4 100644 --- a/arch/cris/kernel/time.c +++ b/arch/cris/kernel/time.c @@ -21,7 +21,6 @@ * */ -#include #include #include #include @@ -32,7 +31,8 @@ #include #include /* just for sched_clock() - funny that */ -int have_rtc; /* used to remember if we have an RTC or not */; + +#define D(x) #define TICK_SIZE tick @@ -50,78 +50,16 @@ u32 arch_gettimeoffset(void) } #endif -/* - * BUG: This routine does not handle hour overflow properly; it just - * sets the minutes. Usually you'll only notice that after reboot! - */ - int set_rtc_mmss(unsigned long nowtime) { - int retval = 0; - int real_seconds, real_minutes, cmos_minutes; - - printk(KERN_DEBUG "set_rtc_mmss(%lu)\n", nowtime); - - if(!have_rtc) - return 0; - - cmos_minutes = CMOS_READ(RTC_MINUTES); - cmos_minutes = bcd2bin(cmos_minutes); - - /* - * since we're only adjusting minutes and seconds, - * don't interfere with hour overflow. This avoids - * messing with unknown time zones but requires your - * RTC not to be off by more than 15 minutes - */ - real_seconds = nowtime % 60; - real_minutes = nowtime / 60; - if (((abs(real_minutes - cmos_minutes) + 15)/30) & 1) - real_minutes += 30; /* correct for half hour time zone */ - real_minutes %= 60; - - if (abs(real_minutes - cmos_minutes) < 30) { - real_seconds = bin2bcd(real_seconds); - real_minutes = bin2bcd(real_minutes); - CMOS_WRITE(real_seconds,RTC_SECONDS); - CMOS_WRITE(real_minutes,RTC_MINUTES); - } else { - printk_once(KERN_NOTICE - "set_rtc_mmss: can't update from %d to %d\n", - cmos_minutes, real_minutes); - retval = -1; - } - - return retval; + D(printk(KERN_DEBUG "set_rtc_mmss(%lu)\n", nowtime)); + return 0; } /* grab the time from the RTC chip */ - -unsigned long -get_cmos_time(void) +unsigned long get_cmos_time(void) { - unsigned int year, mon, day, hour, min, sec; - if(!have_rtc) - return 0; - - sec = CMOS_READ(RTC_SECONDS); - min = CMOS_READ(RTC_MINUTES); - hour = CMOS_READ(RTC_HOURS); - day = CMOS_READ(RTC_DAY_OF_MONTH); - mon = CMOS_READ(RTC_MONTH); - year = CMOS_READ(RTC_YEAR); - - sec = bcd2bin(sec); - min = bcd2bin(min); - hour = bcd2bin(hour); - day = bcd2bin(day); - mon = bcd2bin(mon); - year = bcd2bin(year); - - if ((year += 1900) < 1970) - year += 100; - - return mktime(year, mon, day, hour, min, sec); + return 0; } @@ -132,7 +70,7 @@ int update_persistent_clock(struct timespec now) void read_persistent_clock(struct timespec *ts) { - ts->tv_sec = get_cmos_time(); + ts->tv_sec = 0; ts->tv_nsec = 0; } -- cgit v0.10.2 From 3f3aaea29ff7ee2d43b430338427f30ba7f60ff9 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Fri, 30 Mar 2012 11:45:01 -0400 Subject: xen/p2m: Move code around to allow for better re-usage. We are going to be using the early_alloc_p2m (and early_alloc_p2m_middle) code in follow up patches which are not related to setting identity pages. Hence lets move the code out in its own function and rename them as appropiate. Signed-off-by: Konrad Rzeszutek Wilk diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 1b267e7..3cc3afe 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -499,7 +499,7 @@ static bool alloc_p2m(unsigned long pfn) return true; } -static bool __init __early_alloc_p2m(unsigned long pfn) +static bool __init early_alloc_p2m_middle(unsigned long pfn) { unsigned topidx, mididx, idx; @@ -541,6 +541,36 @@ static bool __init __early_alloc_p2m(unsigned long pfn) } return idx != 0; } + +static bool __init early_alloc_p2m(unsigned long pfn) +{ + unsigned topidx = p2m_top_index(pfn); + unsigned long *mid_mfn_p; + unsigned long **mid; + + mid = p2m_top[topidx]; + mid_mfn_p = p2m_top_mfn_p[topidx]; + if (mid == p2m_mid_missing) { + mid = extend_brk(PAGE_SIZE, PAGE_SIZE); + + p2m_mid_init(mid); + + p2m_top[topidx] = mid; + + BUG_ON(mid_mfn_p != p2m_mid_missing_mfn); + } + /* And the save/restore P2M tables.. */ + if (mid_mfn_p == p2m_mid_missing_mfn) { + mid_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE); + p2m_mid_mfn_init(mid_mfn_p); + + p2m_top_mfn_p[topidx] = mid_mfn_p; + p2m_top_mfn[topidx] = virt_to_mfn(mid_mfn_p); + /* Note: we don't set mid_mfn_p[midix] here, + * look in early_alloc_p2m_middle */ + } + return true; +} unsigned long __init set_phys_range_identity(unsigned long pfn_s, unsigned long pfn_e) { @@ -559,35 +589,11 @@ unsigned long __init set_phys_range_identity(unsigned long pfn_s, pfn < ALIGN(pfn_e, (P2M_MID_PER_PAGE * P2M_PER_PAGE)); pfn += P2M_MID_PER_PAGE * P2M_PER_PAGE) { - unsigned topidx = p2m_top_index(pfn); - unsigned long *mid_mfn_p; - unsigned long **mid; - - mid = p2m_top[topidx]; - mid_mfn_p = p2m_top_mfn_p[topidx]; - if (mid == p2m_mid_missing) { - mid = extend_brk(PAGE_SIZE, PAGE_SIZE); - - p2m_mid_init(mid); - - p2m_top[topidx] = mid; - - BUG_ON(mid_mfn_p != p2m_mid_missing_mfn); - } - /* And the save/restore P2M tables.. */ - if (mid_mfn_p == p2m_mid_missing_mfn) { - mid_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE); - p2m_mid_mfn_init(mid_mfn_p); - - p2m_top_mfn_p[topidx] = mid_mfn_p; - p2m_top_mfn[topidx] = virt_to_mfn(mid_mfn_p); - /* Note: we don't set mid_mfn_p[midix] here, - * look in __early_alloc_p2m */ - } + WARN_ON(!early_alloc_p2m(pfn)); } - __early_alloc_p2m(pfn_s); - __early_alloc_p2m(pfn_e); + early_alloc_p2m_middle(pfn_s); + early_alloc_p2m_middle(pfn_e); for (pfn = pfn_s; pfn < pfn_e; pfn++) if (!__set_phys_to_machine(pfn, IDENTITY_FRAME(pfn))) -- cgit v0.10.2 From cef4cca551d652b7f69c9d76337c5fae24e069dc Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Fri, 30 Mar 2012 14:15:14 -0400 Subject: xen/p2m: Allow alloc_p2m_middle to call reserve_brk depending on argument For identity cases we want to call reserve_brk only on the boundary conditions of the middle P2M (so P2M[x][y][0] = extend_brk). This is to work around identify regions (PCI spaces, gaps in E820) which are not aligned on 2MB regions. However for the case were we want to allocate P2M middle leafs at the early bootup stage, irregardless of this alignment check we need some means of doing that. For that we provide the new argument. Signed-off-by: Konrad Rzeszutek Wilk diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 3cc3afe..8b3a395 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -499,7 +499,7 @@ static bool alloc_p2m(unsigned long pfn) return true; } -static bool __init early_alloc_p2m_middle(unsigned long pfn) +static bool __init early_alloc_p2m_middle(unsigned long pfn, bool check_boundary) { unsigned topidx, mididx, idx; @@ -508,7 +508,7 @@ static bool __init early_alloc_p2m_middle(unsigned long pfn) idx = p2m_index(pfn); /* Pfff.. No boundary cross-over, lets get out. */ - if (!idx) + if (!idx && check_boundary) return false; WARN(p2m_top[topidx][mididx] == p2m_identity, @@ -531,7 +531,7 @@ static bool __init early_alloc_p2m_middle(unsigned long pfn) p2m_top[topidx][mididx] = p2m; /* For save/restore we need to MFN of the P2M saved */ - + mid_mfn_p = p2m_top_mfn_p[topidx]; WARN(mid_mfn_p[mididx] != virt_to_mfn(p2m_missing), "P2M_TOP_P[%d][%d] != MFN of p2m_missing!\n", @@ -592,8 +592,8 @@ unsigned long __init set_phys_range_identity(unsigned long pfn_s, WARN_ON(!early_alloc_p2m(pfn)); } - early_alloc_p2m_middle(pfn_s); - early_alloc_p2m_middle(pfn_e); + early_alloc_p2m_middle(pfn_s, true); + early_alloc_p2m_middle(pfn_e, true); for (pfn = pfn_s; pfn < pfn_e; pfn++) if (!__set_phys_to_machine(pfn, IDENTITY_FRAME(pfn))) -- cgit v0.10.2 From d5096850b47424fb0f1c6a75b8f7184f7169319a Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Fri, 30 Mar 2012 14:16:49 -0400 Subject: xen/p2m: Collapse early_alloc_p2m_middle redundant checks. At the start of the function we were checking for idx != 0 and bailing out. And later calling extend_brk if idx != 0. That is unnecessary so remove that checks. Signed-off-by: Konrad Rzeszutek Wilk diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 8b3a395..952edef 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -502,6 +502,8 @@ static bool alloc_p2m(unsigned long pfn) static bool __init early_alloc_p2m_middle(unsigned long pfn, bool check_boundary) { unsigned topidx, mididx, idx; + unsigned long *p2m; + unsigned long *mid_mfn_p; topidx = p2m_top_index(pfn); mididx = p2m_mid_index(pfn); @@ -522,24 +524,21 @@ static bool __init early_alloc_p2m_middle(unsigned long pfn, bool check_boundary return false; /* Boundary cross-over for the edges: */ - if (idx) { - unsigned long *p2m = extend_brk(PAGE_SIZE, PAGE_SIZE); - unsigned long *mid_mfn_p; + p2m = extend_brk(PAGE_SIZE, PAGE_SIZE); - p2m_init(p2m); + p2m_init(p2m); - p2m_top[topidx][mididx] = p2m; + p2m_top[topidx][mididx] = p2m; - /* For save/restore we need to MFN of the P2M saved */ + /* For save/restore we need to MFN of the P2M saved */ - mid_mfn_p = p2m_top_mfn_p[topidx]; - WARN(mid_mfn_p[mididx] != virt_to_mfn(p2m_missing), - "P2M_TOP_P[%d][%d] != MFN of p2m_missing!\n", - topidx, mididx); - mid_mfn_p[mididx] = virt_to_mfn(p2m); + mid_mfn_p = p2m_top_mfn_p[topidx]; + WARN(mid_mfn_p[mididx] != virt_to_mfn(p2m_missing), + "P2M_TOP_P[%d][%d] != MFN of p2m_missing!\n", + topidx, mididx); + mid_mfn_p[mididx] = virt_to_mfn(p2m); - } - return idx != 0; + return true; } static bool __init early_alloc_p2m(unsigned long pfn) -- cgit v0.10.2 From 940713bb2ce3033f468a220094a07250a2f69bdd Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Fri, 30 Mar 2012 14:33:14 -0400 Subject: xen/p2m: An early bootup variant of set_phys_to_machine During early bootup we can't use alloc_page, so to allocate leaf pages in the P2M we need to use extend_brk. For that we are utilizing the early_alloc_p2m and early_alloc_p2m_middle functions to do the job for us. This function follows the same logic as set_phys_to_machine. Signed-off-by: Konrad Rzeszutek Wilk diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h index c34f96c..93971e8 100644 --- a/arch/x86/include/asm/xen/page.h +++ b/arch/x86/include/asm/xen/page.h @@ -44,6 +44,7 @@ extern unsigned long machine_to_phys_nr; extern unsigned long get_phys_to_machine(unsigned long pfn); extern bool set_phys_to_machine(unsigned long pfn, unsigned long mfn); +extern bool __init early_set_phys_to_machine(unsigned long pfn, unsigned long mfn); extern bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn); extern unsigned long set_phys_range_identity(unsigned long pfn_s, unsigned long pfn_e); diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 952edef..ffd08c4 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -570,6 +570,21 @@ static bool __init early_alloc_p2m(unsigned long pfn) } return true; } +bool __init early_set_phys_to_machine(unsigned long pfn, unsigned long mfn) +{ + if (unlikely(!__set_phys_to_machine(pfn, mfn))) { + if (!early_alloc_p2m(pfn)) + return false; + + if (!early_alloc_p2m_middle(pfn, false /* boundary crossover OK!*/)) + return false; + + if (!__set_phys_to_machine(pfn, mfn)) + return false; + } + + return true; +} unsigned long __init set_phys_range_identity(unsigned long pfn_s, unsigned long pfn_e) { -- cgit v0.10.2 From 83c529151ab0d4a813e3f6a3e293fff75d468519 Mon Sep 17 00:00:00 2001 From: "Liu, Jinsong" Date: Tue, 28 Feb 2012 05:15:46 +0000 Subject: KVM: x86: expose Intel cpu new features (HLE, RTM) to guest Intel recently release 2 new features, HLE and RTM. Refer to http://software.intel.com/file/41417. This patch expose them to guest. Signed-off-by: Liu, Jinsong Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 9fed5be..c2134b8 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -247,7 +247,8 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, /* cpuid 7.0.ebx */ const u32 kvm_supported_word9_x86_features = - F(FSGSBASE) | F(BMI1) | F(AVX2) | F(SMEP) | F(BMI2) | F(ERMS); + F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) | + F(BMI2) | F(ERMS) | F(RTM); /* all calls to cpuid_count() should be made on the same cpu */ get_cpu(); -- cgit v0.10.2 From a13007160f1b9ec7c67e28ec9254f197c5c08d7d Mon Sep 17 00:00:00 2001 From: Amos Kong Date: Fri, 9 Mar 2012 12:17:32 +0800 Subject: KVM: resize kvm_io_range array dynamically This patch makes the kvm_io_range array can be resized dynamically. Signed-off-by: Amos Kong Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 665a260..ba9fb4a 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -68,10 +68,11 @@ struct kvm_io_range { struct kvm_io_device *dev; }; +#define NR_IOBUS_DEVS 300 + struct kvm_io_bus { int dev_count; -#define NR_IOBUS_DEVS 300 - struct kvm_io_range range[NR_IOBUS_DEVS]; + struct kvm_io_range range[]; }; enum kvm_bus { diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 42b7393..a9565e2 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2393,9 +2393,6 @@ int kvm_io_bus_sort_cmp(const void *p1, const void *p2) int kvm_io_bus_insert_dev(struct kvm_io_bus *bus, struct kvm_io_device *dev, gpa_t addr, int len) { - if (bus->dev_count == NR_IOBUS_DEVS) - return -ENOSPC; - bus->range[bus->dev_count++] = (struct kvm_io_range) { .addr = addr, .len = len, @@ -2495,12 +2492,15 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, struct kvm_io_bus *new_bus, *bus; bus = kvm->buses[bus_idx]; - if (bus->dev_count > NR_IOBUS_DEVS-1) + if (bus->dev_count > NR_IOBUS_DEVS - 1) return -ENOSPC; - new_bus = kmemdup(bus, sizeof(struct kvm_io_bus), GFP_KERNEL); + new_bus = kzalloc(sizeof(*bus) + ((bus->dev_count + 1) * + sizeof(struct kvm_io_range)), GFP_KERNEL); if (!new_bus) return -ENOMEM; + memcpy(new_bus, bus, sizeof(*bus) + (bus->dev_count * + sizeof(struct kvm_io_range))); kvm_io_bus_insert_dev(new_bus, dev, addr, len); rcu_assign_pointer(kvm->buses[bus_idx], new_bus); synchronize_srcu_expedited(&kvm->srcu); @@ -2517,27 +2517,25 @@ int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, struct kvm_io_bus *new_bus, *bus; bus = kvm->buses[bus_idx]; - - new_bus = kmemdup(bus, sizeof(*bus), GFP_KERNEL); - if (!new_bus) - return -ENOMEM; - r = -ENOENT; - for (i = 0; i < new_bus->dev_count; i++) - if (new_bus->range[i].dev == dev) { + for (i = 0; i < bus->dev_count; i++) + if (bus->range[i].dev == dev) { r = 0; - new_bus->dev_count--; - new_bus->range[i] = new_bus->range[new_bus->dev_count]; - sort(new_bus->range, new_bus->dev_count, - sizeof(struct kvm_io_range), - kvm_io_bus_sort_cmp, NULL); break; } - if (r) { - kfree(new_bus); + if (r) return r; - } + + new_bus = kzalloc(sizeof(*bus) + ((bus->dev_count - 1) * + sizeof(struct kvm_io_range)), GFP_KERNEL); + if (!new_bus) + return -ENOMEM; + + memcpy(new_bus, bus, sizeof(*bus) + i * sizeof(struct kvm_io_range)); + new_bus->dev_count--; + memcpy(new_bus->range + i, bus->range + i + 1, + (new_bus->dev_count - i) * sizeof(struct kvm_io_range)); rcu_assign_pointer(kvm->buses[bus_idx], new_bus); synchronize_srcu_expedited(&kvm->srcu); -- cgit v0.10.2 From 786a9f888bfbe70a36d0592b26037ca1e8c8da7f Mon Sep 17 00:00:00 2001 From: Amos Kong Date: Fri, 9 Mar 2012 12:17:40 +0800 Subject: KVM: set upper bounds for iobus dev to limit userspace kvm_io_bus devices are used for ioevent, pit, pic, ioapic, coalesced_mmio. Currently Qemu only emulates one PCI bus, it contains 32 slots, one slot contains 8 functions, maximum of supported PCI devices: 1 * 32 * 8 = 256. One virtio-blk takes one iobus device, one virtio-net(vhost=on) takes two iobus devices. The maximum of coalesced mmio zone is 100, each zone has an iobus devices. So 300 io_bus devices are not enough. Set an upper bounds for kvm_io_range to limit userspace. 1000 is a very large limit and not bloat the typical user. Signed-off-by: Amos Kong Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index ba9fb4a..3a2cea6 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -68,7 +68,7 @@ struct kvm_io_range { struct kvm_io_device *dev; }; -#define NR_IOBUS_DEVS 300 +#define NR_IOBUS_DEVS 1000 struct kvm_io_bus { int dev_count; -- cgit v0.10.2 From 675acb758ab2381c72fe3ceb5c091cbd0879d4dd Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Thu, 8 Mar 2012 18:07:56 +0800 Subject: KVM: SVM: count all irq windows exit Also count the exits of fast-path. Signed-off-by: Jason Wang Acked-by: Joerg Roedel Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index e334389..f316720 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -3240,6 +3240,7 @@ static int interrupt_window_interception(struct vcpu_svm *svm) svm_clear_vintr(svm); svm->vmcb->control.int_ctl &= ~V_IRQ_MASK; mark_dirty(svm->vmcb, VMCB_INTR); + ++svm->vcpu.stat.irq_window_exits; /* * If the user space waits to inject interrupts, exit as soon as * possible @@ -3247,7 +3248,6 @@ static int interrupt_window_interception(struct vcpu_svm *svm) if (!irqchip_in_kernel(svm->vcpu.kvm) && kvm_run->request_interrupt_window && !kvm_cpu_has_interrupt(&svm->vcpu)) { - ++svm->vcpu.stat.irq_window_exits; kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN; return 0; } -- cgit v0.10.2 From 66ef89315f121cda9bf5b65a4ef02ad1b4fb16d9 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 8 Apr 2012 12:47:32 +0300 Subject: KVM: schedule debugfs statistics for removal Deprecated in favour of tracepoints. Signed-off-by: Avi Kivity Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index 709e08e..d6dd84f 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -531,3 +531,10 @@ Why: There appear to be no production users of the get_robust_list syscall, of ASLR. It was only ever intended for debugging, so it should be removed. Who: Kees Cook + +---------------------------- + +What: KVM debugfs statistics +When: 2013 +Why: KVM tracepoints provide mostly equivalent information in a much more + flexible fashion. -- cgit v0.10.2 From b6d33834bd4e8bdf4a199812e31b3e36da53c794 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Thu, 8 Mar 2012 16:44:24 -0500 Subject: KVM: Factor out kvm_vcpu_kick to arch-generic code The kvm_vcpu_kick function performs roughly the same funcitonality on most all architectures, so we shouldn't have separate copies. PowerPC keeps a pointer to interchanging waitqueues on the vcpu_arch structure and to accomodate this special need a __KVM_HAVE_ARCH_VCPU_GET_WQ define and accompanying function kvm_arch_vcpu_wq have been defined. For all other architectures this is a generic inline that just returns &vcpu->wq; Acked-by: Scott Wood Signed-off-by: Christoffer Dall Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h index e35b3a8..c4b4bac 100644 --- a/arch/ia64/include/asm/kvm_host.h +++ b/arch/ia64/include/asm/kvm_host.h @@ -365,6 +365,7 @@ struct thash_cb { }; struct kvm_vcpu_stat { + u32 halt_wakeup; }; struct kvm_vcpu_arch { diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index f5104b7..9d80ff8 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -1872,21 +1872,6 @@ void kvm_arch_hardware_unsetup(void) { } -void kvm_vcpu_kick(struct kvm_vcpu *vcpu) -{ - int me; - int cpu = vcpu->cpu; - - if (waitqueue_active(&vcpu->wq)) - wake_up_interruptible(&vcpu->wq); - - me = get_cpu(); - if (cpu != me && (unsigned) cpu < nr_cpu_ids && cpu_online(cpu)) - if (!test_and_set_bit(KVM_REQ_KICK, &vcpu->requests)) - smp_send_reschedule(cpu); - put_cpu(); -} - int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq) { return __apic_accept_irq(vcpu, irq->vector); @@ -1956,6 +1941,11 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) (kvm_highest_pending_irq(vcpu) != -1); } +int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) +{ + return (!test_and_set_bit(KVM_REQ_KICK, &vcpu->requests)); +} + int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, struct kvm_mp_state *mp_state) { diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 52eb9c1..8893837 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -498,4 +498,10 @@ struct kvm_vcpu_arch { #define KVM_MMIO_REG_QPR 0x0040 #define KVM_MMIO_REG_FQPR 0x0060 +#define __KVM_HAVE_ARCH_VCPU_GET_WQ 1 +static inline wait_queue_head *kvm_arch_vcpu_wq(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.wqp; +} + #endif /* __POWERPC_KVM_HOST_H__ */ diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 00d7e34..b5e9046 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -43,6 +43,11 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) v->requests; } +int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) +{ + return 1; +} + int kvmppc_kvm_pv(struct kvm_vcpu *vcpu) { int nr = kvmppc_get_gpr(vcpu, 11); @@ -588,21 +593,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) return r; } -void kvm_vcpu_kick(struct kvm_vcpu *vcpu) -{ - int me; - int cpu = vcpu->cpu; - - me = get_cpu(); - if (waitqueue_active(vcpu->arch.wqp)) { - wake_up_interruptible(vcpu->arch.wqp); - vcpu->stat.halt_wakeup++; - } else if (cpu != me && cpu != -1) { - smp_send_reschedule(vcpu->cpu); - } - put_cpu(); -} - int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq) { if (irq->irq == KVM_INTERRUPT_UNSET) { @@ -611,6 +601,7 @@ int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq) } kvmppc_core_queue_external(vcpu, irq); + kvm_vcpu_kick(vcpu); return 0; diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 217ce44..d30c835 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -423,6 +423,14 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) return 0; } +int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) +{ + /* kvm common code refers to this, but never calls it */ + BUG(); + return 0; +} + + static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu) { kvm_s390_vcpu_initial_reset(vcpu); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 4044ce0..511031d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6403,21 +6403,9 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) kvm_cpu_has_interrupt(vcpu)); } -void kvm_vcpu_kick(struct kvm_vcpu *vcpu) +int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) { - int me; - int cpu = vcpu->cpu; - - if (waitqueue_active(&vcpu->wq)) { - wake_up_interruptible(&vcpu->wq); - ++vcpu->stat.halt_wakeup; - } - - me = get_cpu(); - if (cpu != me && (unsigned)cpu < nr_cpu_ids && cpu_online(cpu)) - if (kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE) - smp_send_reschedule(cpu); - put_cpu(); + return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE; } int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 3a2cea6..5b624e1 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -439,6 +439,7 @@ void mark_page_dirty_in_slot(struct kvm *kvm, struct kvm_memory_slot *memslot, gfn_t gfn); void kvm_vcpu_block(struct kvm_vcpu *vcpu); +void kvm_vcpu_kick(struct kvm_vcpu *vcpu); void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu); void kvm_resched(struct kvm_vcpu *vcpu); void kvm_load_guest_fpu(struct kvm_vcpu *vcpu); @@ -507,6 +508,7 @@ int kvm_arch_hardware_setup(void); void kvm_arch_hardware_unsetup(void); void kvm_arch_check_processor_compat(void *rtn); int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu); +int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu); void kvm_free_physmem(struct kvm *kvm); @@ -522,6 +524,13 @@ static inline void kvm_arch_free_vm(struct kvm *kvm) } #endif +#ifndef __KVM_HAVE_ARCH_VCPU_GET_WQ +static inline wait_queue_head_t *kvm_arch_vcpu_wq(struct kvm_vcpu *vcpu) +{ + return &vcpu->wq; +} +#endif + int kvm_arch_init_vm(struct kvm *kvm, unsigned long type); void kvm_arch_destroy_vm(struct kvm *kvm); void kvm_free_all_assigned_devices(struct kvm *kvm); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index a9565e2..7149a2e 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1514,6 +1514,28 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu) finish_wait(&vcpu->wq, &wait); } +/* + * Kick a sleeping VCPU, or a guest VCPU in guest mode, into host kernel mode. + */ +void kvm_vcpu_kick(struct kvm_vcpu *vcpu) +{ + int me; + int cpu = vcpu->cpu; + wait_queue_head_t *wqp; + + wqp = kvm_arch_vcpu_wq(vcpu); + if (waitqueue_active(wqp)) { + wake_up_interruptible(wqp); + ++vcpu->stat.halt_wakeup; + } + + me = get_cpu(); + if (cpu != me && (unsigned)cpu < nr_cpu_ids && cpu_online(cpu)) + if (kvm_arch_vcpu_should_kick(vcpu)) + smp_send_reschedule(cpu); + put_cpu(); +} + void kvm_resched(struct kvm_vcpu *vcpu) { if (!need_resched()) -- cgit v0.10.2 From 2246f8b56315befa30f3d3d2800e0734c774f70e Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Tue, 13 Mar 2012 22:35:01 +0100 Subject: KVM: PPC: Rework wqp conditional code On PowerPC, we sometimes use a waitqueue per core, not per thread, so we can't always use the vcpu internal waitqueue. This code has been generalized by Christoffer Dall recently, but unfortunately broke compilation for PowerPC. At the time the helper function is defined, struct kvm_vcpu is not declared yet, so we can't dereference it. This patch moves all logic into the generic inline function, at which time we have all information necessary. Signed-off-by: Alexander Graf Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 8893837..20ab5b2 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -498,10 +498,6 @@ struct kvm_vcpu_arch { #define KVM_MMIO_REG_QPR 0x0040 #define KVM_MMIO_REG_FQPR 0x0060 -#define __KVM_HAVE_ARCH_VCPU_GET_WQ 1 -static inline wait_queue_head *kvm_arch_vcpu_wq(struct kvm_vcpu *vcpu) -{ - return vcpu->arch.wqp; -} +#define __KVM_HAVE_ARCH_WQP #endif /* __POWERPC_KVM_HOST_H__ */ diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 5b624e1..5184817 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -524,12 +524,14 @@ static inline void kvm_arch_free_vm(struct kvm *kvm) } #endif -#ifndef __KVM_HAVE_ARCH_VCPU_GET_WQ static inline wait_queue_head_t *kvm_arch_vcpu_wq(struct kvm_vcpu *vcpu) { +#ifdef __KVM_HAVE_ARCH_WQP + return vcpu->arch.wqp; +#else return &vcpu->wq; -} #endif +} int kvm_arch_init_vm(struct kvm *kvm, unsigned long type); void kvm_arch_destroy_vm(struct kvm *kvm); -- cgit v0.10.2 From eae3ee7d8a7c59cf63441dedf28674889f5fc477 Mon Sep 17 00:00:00 2001 From: Eric B Munson Date: Sat, 10 Mar 2012 14:37:25 -0500 Subject: x86: pvclock: Add flag to indicate that a vm was stopped by the host This flag will be used to check if the vm was stopped by the host when a soft lockup was detected. The host will set the flag when it stops the guest. On resume, the guest will check this flag if a soft lockup is detected and skip issuing the warning. Signed-off-by: Eric B Munson Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity diff --git a/arch/x86/include/asm/pvclock-abi.h b/arch/x86/include/asm/pvclock-abi.h index 35f2d19..6167fd7 100644 --- a/arch/x86/include/asm/pvclock-abi.h +++ b/arch/x86/include/asm/pvclock-abi.h @@ -40,5 +40,6 @@ struct pvclock_wall_clock { } __attribute__((__packed__)); #define PVCLOCK_TSC_STABLE_BIT (1 << 0) +#define PVCLOCK_GUEST_STOPPED (1 << 1) #endif /* __ASSEMBLY__ */ #endif /* _ASM_X86_PVCLOCK_ABI_H */ -- cgit v0.10.2 From 3b5d56b9317fa7b5407dff1aa7b115bf6cdbd494 Mon Sep 17 00:00:00 2001 From: Eric B Munson Date: Sat, 10 Mar 2012 14:37:26 -0500 Subject: kvmclock: Add functions to check if the host has stopped the vm When a host stops or suspends a VM it will set a flag to show this. The watchdog will use these functions to determine if a softlockup is real, or the result of a suspended VM. Signed-off-by: Eric B Munson asm-generic changes Acked-by: Arnd Bergmann Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity diff --git a/arch/alpha/include/asm/kvm_para.h b/arch/alpha/include/asm/kvm_para.h new file mode 100644 index 0000000..14fab8f --- /dev/null +++ b/arch/alpha/include/asm/kvm_para.h @@ -0,0 +1 @@ +#include diff --git a/arch/arm/include/asm/kvm_para.h b/arch/arm/include/asm/kvm_para.h new file mode 100644 index 0000000..14fab8f --- /dev/null +++ b/arch/arm/include/asm/kvm_para.h @@ -0,0 +1 @@ +#include diff --git a/arch/avr32/include/asm/kvm_para.h b/arch/avr32/include/asm/kvm_para.h new file mode 100644 index 0000000..14fab8f --- /dev/null +++ b/arch/avr32/include/asm/kvm_para.h @@ -0,0 +1 @@ +#include diff --git a/arch/blackfin/include/asm/kvm_para.h b/arch/blackfin/include/asm/kvm_para.h new file mode 100644 index 0000000..14fab8f --- /dev/null +++ b/arch/blackfin/include/asm/kvm_para.h @@ -0,0 +1 @@ +#include diff --git a/arch/c6x/include/asm/kvm_para.h b/arch/c6x/include/asm/kvm_para.h new file mode 100644 index 0000000..14fab8f --- /dev/null +++ b/arch/c6x/include/asm/kvm_para.h @@ -0,0 +1 @@ +#include diff --git a/arch/frv/include/asm/kvm_para.h b/arch/frv/include/asm/kvm_para.h new file mode 100644 index 0000000..14fab8f --- /dev/null +++ b/arch/frv/include/asm/kvm_para.h @@ -0,0 +1 @@ +#include diff --git a/arch/h8300/include/asm/kvm_para.h b/arch/h8300/include/asm/kvm_para.h new file mode 100644 index 0000000..14fab8f --- /dev/null +++ b/arch/h8300/include/asm/kvm_para.h @@ -0,0 +1 @@ +#include diff --git a/arch/hexagon/include/asm/kvm_para.h b/arch/hexagon/include/asm/kvm_para.h new file mode 100644 index 0000000..14fab8f --- /dev/null +++ b/arch/hexagon/include/asm/kvm_para.h @@ -0,0 +1 @@ +#include diff --git a/arch/ia64/include/asm/kvm_para.h b/arch/ia64/include/asm/kvm_para.h index 1588aee..2019cb9 100644 --- a/arch/ia64/include/asm/kvm_para.h +++ b/arch/ia64/include/asm/kvm_para.h @@ -26,6 +26,11 @@ static inline unsigned int kvm_arch_para_features(void) return 0; } +static inline bool kvm_check_and_clear_guest_paused(void) +{ + return false; +} + #endif #endif diff --git a/arch/m68k/include/asm/kvm_para.h b/arch/m68k/include/asm/kvm_para.h new file mode 100644 index 0000000..14fab8f --- /dev/null +++ b/arch/m68k/include/asm/kvm_para.h @@ -0,0 +1 @@ +#include diff --git a/arch/microblaze/include/asm/kvm_para.h b/arch/microblaze/include/asm/kvm_para.h new file mode 100644 index 0000000..14fab8f --- /dev/null +++ b/arch/microblaze/include/asm/kvm_para.h @@ -0,0 +1 @@ +#include diff --git a/arch/mips/include/asm/kvm_para.h b/arch/mips/include/asm/kvm_para.h new file mode 100644 index 0000000..14fab8f --- /dev/null +++ b/arch/mips/include/asm/kvm_para.h @@ -0,0 +1 @@ +#include diff --git a/arch/mn10300/include/asm/kvm_para.h b/arch/mn10300/include/asm/kvm_para.h new file mode 100644 index 0000000..14fab8f --- /dev/null +++ b/arch/mn10300/include/asm/kvm_para.h @@ -0,0 +1 @@ +#include diff --git a/arch/openrisc/include/asm/kvm_para.h b/arch/openrisc/include/asm/kvm_para.h new file mode 100644 index 0000000..14fab8f --- /dev/null +++ b/arch/openrisc/include/asm/kvm_para.h @@ -0,0 +1 @@ +#include diff --git a/arch/parisc/include/asm/kvm_para.h b/arch/parisc/include/asm/kvm_para.h new file mode 100644 index 0000000..14fab8f --- /dev/null +++ b/arch/parisc/include/asm/kvm_para.h @@ -0,0 +1 @@ +#include diff --git a/arch/powerpc/include/asm/kvm_para.h b/arch/powerpc/include/asm/kvm_para.h index 7b754e7..c18916b 100644 --- a/arch/powerpc/include/asm/kvm_para.h +++ b/arch/powerpc/include/asm/kvm_para.h @@ -206,6 +206,11 @@ static inline unsigned int kvm_arch_para_features(void) return r; } +static inline bool kvm_check_and_clear_guest_paused(void) +{ + return false; +} + #endif /* __KERNEL__ */ #endif /* __POWERPC_KVM_PARA_H__ */ diff --git a/arch/s390/include/asm/kvm_para.h b/arch/s390/include/asm/kvm_para.h index 6964db2..a988329 100644 --- a/arch/s390/include/asm/kvm_para.h +++ b/arch/s390/include/asm/kvm_para.h @@ -149,6 +149,11 @@ static inline unsigned int kvm_arch_para_features(void) return 0; } +static inline bool kvm_check_and_clear_guest_paused(void) +{ + return false; +} + #endif #endif /* __S390_KVM_PARA_H */ diff --git a/arch/score/include/asm/kvm_para.h b/arch/score/include/asm/kvm_para.h new file mode 100644 index 0000000..14fab8f --- /dev/null +++ b/arch/score/include/asm/kvm_para.h @@ -0,0 +1 @@ +#include diff --git a/arch/sh/include/asm/kvm_para.h b/arch/sh/include/asm/kvm_para.h new file mode 100644 index 0000000..14fab8f --- /dev/null +++ b/arch/sh/include/asm/kvm_para.h @@ -0,0 +1 @@ +#include diff --git a/arch/sparc/include/asm/kvm_para.h b/arch/sparc/include/asm/kvm_para.h new file mode 100644 index 0000000..14fab8f --- /dev/null +++ b/arch/sparc/include/asm/kvm_para.h @@ -0,0 +1 @@ +#include diff --git a/arch/tile/include/asm/kvm_para.h b/arch/tile/include/asm/kvm_para.h new file mode 100644 index 0000000..14fab8f --- /dev/null +++ b/arch/tile/include/asm/kvm_para.h @@ -0,0 +1 @@ +#include diff --git a/arch/um/include/asm/kvm_para.h b/arch/um/include/asm/kvm_para.h new file mode 100644 index 0000000..14fab8f --- /dev/null +++ b/arch/um/include/asm/kvm_para.h @@ -0,0 +1 @@ +#include diff --git a/arch/unicore32/include/asm/kvm_para.h b/arch/unicore32/include/asm/kvm_para.h new file mode 100644 index 0000000..14fab8f --- /dev/null +++ b/arch/unicore32/include/asm/kvm_para.h @@ -0,0 +1 @@ +#include diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h index 734c376..99c4bbe 100644 --- a/arch/x86/include/asm/kvm_para.h +++ b/arch/x86/include/asm/kvm_para.h @@ -95,6 +95,14 @@ struct kvm_vcpu_pv_apf_data { extern void kvmclock_init(void); extern int kvm_register_clock(char *txt); +#ifdef CONFIG_KVM_CLOCK +bool kvm_check_and_clear_guest_paused(void); +#else +static inline bool kvm_check_and_clear_guest_paused(void) +{ + return false; +} +#endif /* CONFIG_KVMCLOCK */ /* This instruction is vmcall. On non-VT architectures, it will generate a * trap that we will then rewrite to the appropriate instruction. diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index f8492da..4ba090c 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -114,6 +115,26 @@ static void kvm_get_preset_lpj(void) preset_lpj = lpj; } +bool kvm_check_and_clear_guest_paused(void) +{ + bool ret = false; + struct pvclock_vcpu_time_info *src; + + /* + * per_cpu() is safe here because this function is only called from + * timer functions where preemption is already disabled. + */ + WARN_ON(!in_atomic()); + src = &__get_cpu_var(hv_clock); + if ((src->flags & PVCLOCK_GUEST_STOPPED) != 0) { + __this_cpu_and(hv_clock.flags, ~PVCLOCK_GUEST_STOPPED); + ret = true; + } + + return ret; +} +EXPORT_SYMBOL_GPL(kvm_check_and_clear_guest_paused); + static struct clocksource kvm_clock = { .name = "kvm-clock", .read = kvm_clock_get_cycles, diff --git a/arch/xtensa/include/asm/kvm_para.h b/arch/xtensa/include/asm/kvm_para.h new file mode 100644 index 0000000..14fab8f --- /dev/null +++ b/arch/xtensa/include/asm/kvm_para.h @@ -0,0 +1 @@ +#include diff --git a/include/asm-generic/kvm_para.h b/include/asm-generic/kvm_para.h new file mode 100644 index 0000000..05ef7e7 --- /dev/null +++ b/include/asm-generic/kvm_para.h @@ -0,0 +1,14 @@ +#ifndef _ASM_GENERIC_KVM_PARA_H +#define _ASM_GENERIC_KVM_PARA_H + + +/* + * This function is used by architectures that support kvm to avoid issuing + * false soft lockup messages. + */ +static inline bool kvm_check_and_clear_guest_paused(void) +{ + return false; +} + +#endif -- cgit v0.10.2 From 1c0b28c2a46d98cd258d96b8c222144b22876c46 Mon Sep 17 00:00:00 2001 From: Eric B Munson Date: Sat, 10 Mar 2012 14:37:27 -0500 Subject: KVM: x86: Add ioctl for KVM_KVMCLOCK_CTRL Now that we have a flag that will tell the guest it was suspended, create an interface for that communication using a KVM ioctl. Signed-off-by: Eric B Munson Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 6386f8c..81ff39f 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -1669,6 +1669,26 @@ at the memory location pointed to by "addr". The list of registers accessible using this interface is identical to the list in 4.64. +4.70 KVM_KVMCLOCK_CTRL + +Capability: KVM_CAP_KVMCLOCK_CTRL +Architectures: Any that implement pvclocks (currently x86 only) +Type: vcpu ioctl +Parameters: None +Returns: 0 on success, -1 on error + +This signals to the host kernel that the specified guest is being paused by +userspace. The host will set a flag in the pvclock structure that is checked +from the soft lockup watchdog. The flag is part of the pvclock structure that +is shared between guest and host, specifically the second bit of the flags +field of the pvclock_vcpu_time_info structure. It will be set exclusively by +the host and read/cleared exclusively by the guest. The guest operation of +checking and clearing the flag must an atomic operation so +load-link/store-conditional, or equivalent must be used. There are two cases +where the guest will clear the flag: when the soft lockup watchdog timer resets +itself or when a soft lockup is detected. This ioctl can be called any time +after pausing the vcpu, but before it is resumed. + 5. The kvm_run structure Application code obtains a pointer to the kvm_run structure by diff --git a/Documentation/virtual/kvm/msr.txt b/Documentation/virtual/kvm/msr.txt index 5031780..96b41bd 100644 --- a/Documentation/virtual/kvm/msr.txt +++ b/Documentation/virtual/kvm/msr.txt @@ -109,6 +109,10 @@ MSR_KVM_SYSTEM_TIME_NEW: 0x4b564d01 0 | 24 | multiple cpus are guaranteed to | | be monotonic ------------------------------------------------------------- + | | guest vcpu has been paused by + 1 | N/A | the host + | | See 4.70 in api.txt + ------------------------------------------------------------- Availability of this MSR must be checked via bit 3 in 0x4000001 cpuid leaf prior to usage. diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 511031d..99b7380 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2147,6 +2147,7 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_ASYNC_PF: case KVM_CAP_GET_TSC_KHZ: case KVM_CAP_PCI_2_3: + case KVM_CAP_KVMCLOCK_CTRL: r = 1; break; case KVM_CAP_COALESCED_MMIO: @@ -2597,6 +2598,23 @@ static int kvm_vcpu_ioctl_x86_set_xcrs(struct kvm_vcpu *vcpu, return r; } +/* + * kvm_set_guest_paused() indicates to the guest kernel that it has been + * stopped by the hypervisor. This function will be called from the host only. + * EINVAL is returned when the host attempts to set the flag for a guest that + * does not support pv clocks. + */ +static int kvm_set_guest_paused(struct kvm_vcpu *vcpu) +{ + struct pvclock_vcpu_time_info *src = &vcpu->arch.hv_clock; + if (!vcpu->arch.time_page) + return -EINVAL; + src->flags |= PVCLOCK_GUEST_STOPPED; + mark_page_dirty(vcpu->kvm, vcpu->arch.time >> PAGE_SHIFT); + kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); + return 0; +} + long kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -2873,6 +2891,10 @@ long kvm_arch_vcpu_ioctl(struct file *filp, r = vcpu->arch.virtual_tsc_khz; goto out; } + case KVM_KVMCLOCK_CTRL: { + r = kvm_set_guest_paused(vcpu); + goto out; + } default: r = -EINVAL; } diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 6c322a9..7a9dd4b 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -589,6 +589,7 @@ struct kvm_ppc_pvinfo { #define KVM_CAP_S390_UCONTROL 73 #define KVM_CAP_SYNC_REGS 74 #define KVM_CAP_PCI_2_3 75 +#define KVM_CAP_KVMCLOCK_CTRL 76 #ifdef KVM_CAP_IRQ_ROUTING @@ -859,6 +860,8 @@ struct kvm_s390_ucas_mapping { /* Available with KVM_CAP_ONE_REG */ #define KVM_GET_ONE_REG _IOW(KVMIO, 0xab, struct kvm_one_reg) #define KVM_SET_ONE_REG _IOW(KVMIO, 0xac, struct kvm_one_reg) +/* VM is being stopped by host */ +#define KVM_KVMCLOCK_CTRL _IO(KVMIO, 0xad) #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) #define KVM_DEV_ASSIGN_PCI_2_3 (1 << 1) -- cgit v0.10.2 From 5d1c0f4a80a6df73395fb3fc2c302510f8f09d36 Mon Sep 17 00:00:00 2001 From: Eric B Munson Date: Sat, 10 Mar 2012 14:37:28 -0500 Subject: watchdog: add check for suspended vm in softlockup detector A suspended VM can cause spurious soft lockup warnings. To avoid these, the watchdog now checks if the kernel knows it was stopped by the host and skips the warning if so. When the watchdog is reset successfully, clear the guest paused flag. Signed-off-by: Eric B Munson Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity diff --git a/kernel/watchdog.c b/kernel/watchdog.c index df30ee0..e5e1d85 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -24,6 +24,7 @@ #include #include +#include #include int watchdog_enabled = 1; @@ -280,6 +281,9 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) __this_cpu_write(softlockup_touch_sync, false); sched_clock_tick(); } + + /* Clear the guest paused flag on watchdog reset */ + kvm_check_and_clear_guest_paused(); __touch_watchdog(); return HRTIMER_RESTART; } @@ -292,6 +296,14 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) */ duration = is_softlockup(touch_ts); if (unlikely(duration)) { + /* + * If a virtual machine is stopped by the host it can look to + * the watchdog like a soft lockup, check to see if the host + * stopped the vm before we issue the warning + */ + if (kvm_check_and_clear_guest_paused()) + return HRTIMER_RESTART; + /* only warn once */ if (__this_cpu_read(soft_watchdog_warn) == true) return HRTIMER_RESTART; -- cgit v0.10.2 From 8c84780df909433777ea9463b8350bc0aab34940 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Wed, 14 Mar 2012 17:58:48 -0300 Subject: KVM: fix kvm_vcpu_kick build failure on S390 S390's kvm_vcpu_stat does not contain halt_wakeup member. Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 7149a2e..a612bc8 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1514,6 +1514,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu) finish_wait(&vcpu->wq, &wait); } +#ifndef CONFIG_S390 /* * Kick a sleeping VCPU, or a guest VCPU in guest mode, into host kernel mode. */ @@ -1535,6 +1536,7 @@ void kvm_vcpu_kick(struct kvm_vcpu *vcpu) smp_send_reschedule(cpu); put_cpu(); } +#endif /* !CONFIG_S390 */ void kvm_resched(struct kvm_vcpu *vcpu) { -- cgit v0.10.2 From 248997095d652576f1213028a95ca5fff85d089f Mon Sep 17 00:00:00 2001 From: Eric B Munson Date: Thu, 15 Mar 2012 18:16:49 -0400 Subject: kvmclock: remove unneeded EXPORT macro check_and_clear_guest_paused does not need to be exported as it isn't used by any modules, remove the export. Signed-off-by: Eric B Munson Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 4ba090c..086eb58 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -133,7 +133,6 @@ bool kvm_check_and_clear_guest_paused(void) return ret; } -EXPORT_SYMBOL_GPL(kvm_check_and_clear_guest_paused); static struct clocksource kvm_clock = { .name = "kvm-clock", -- cgit v0.10.2 From a0ed46073c14f66dbf0707aaa7588b78da83d7c6 Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Thu, 1 Mar 2012 19:31:22 +0900 Subject: KVM: MMU: Split the main body of rmap_write_protect() off from others We will use this in the following patch to implement another function which needs to write protect pages using the rmap information. Note that there is a small change in debug printing for large pages: we do not differentiate them from others to avoid duplicating code. Signed-off-by: Takuya Yoshikawa Signed-off-by: Avi Kivity diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 4cb1642..c8b5694 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1010,42 +1010,43 @@ static void drop_spte(struct kvm *kvm, u64 *sptep) rmap_remove(kvm, sptep); } -int kvm_mmu_rmap_write_protect(struct kvm *kvm, u64 gfn, - struct kvm_memory_slot *slot) +static int __rmap_write_protect(struct kvm *kvm, unsigned long *rmapp, int level) { - unsigned long *rmapp; - u64 *spte; - int i, write_protected = 0; + u64 *spte = NULL; + int write_protected = 0; - rmapp = __gfn_to_rmap(gfn, PT_PAGE_TABLE_LEVEL, slot); - spte = rmap_next(rmapp, NULL); - while (spte) { + while ((spte = rmap_next(rmapp, spte))) { BUG_ON(!(*spte & PT_PRESENT_MASK)); rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte); - if (is_writable_pte(*spte)) { + + if (!is_writable_pte(*spte)) + continue; + + if (level == PT_PAGE_TABLE_LEVEL) { mmu_spte_update(spte, *spte & ~PT_WRITABLE_MASK); - write_protected = 1; + } else { + BUG_ON(!is_large_pte(*spte)); + drop_spte(kvm, spte); + --kvm->stat.lpages; + spte = NULL; } - spte = rmap_next(rmapp, spte); + + write_protected = 1; } - /* check for huge page mappings */ - for (i = PT_DIRECTORY_LEVEL; + return write_protected; +} + +int kvm_mmu_rmap_write_protect(struct kvm *kvm, u64 gfn, + struct kvm_memory_slot *slot) +{ + unsigned long *rmapp; + int i, write_protected = 0; + + for (i = PT_PAGE_TABLE_LEVEL; i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) { rmapp = __gfn_to_rmap(gfn, i, slot); - spte = rmap_next(rmapp, NULL); - while (spte) { - BUG_ON(!(*spte & PT_PRESENT_MASK)); - BUG_ON(!is_large_pte(*spte)); - pgprintk("rmap_write_protect(large): spte %p %llx %lld\n", spte, *spte, gfn); - if (is_writable_pte(*spte)) { - drop_spte(kvm, spte); - --kvm->stat.lpages; - spte = NULL; - write_protected = 1; - } - spte = rmap_next(rmapp, spte); - } + write_protected |= __rmap_write_protect(kvm, rmapp, i); } return write_protected; -- cgit v0.10.2 From 5dc99b2380d59b8aeafa98791f92b96400ed3187 Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Thu, 1 Mar 2012 19:32:16 +0900 Subject: KVM: Avoid checking huge page mappings in get_dirty_log() Dropped such mappings when we enabled dirty logging and we will never create new ones until we stop the logging. For this we introduce a new function which can be used to write protect a range of PT level pages: although we do not need to care about a range of pages at this point, the following patch will need this feature to optimize the write protection of many pages. Signed-off-by: Takuya Yoshikawa Signed-off-by: Avi Kivity diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index e216ba0..f624ca7 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -712,8 +712,9 @@ void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, int kvm_mmu_reset_context(struct kvm_vcpu *vcpu); void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot); -int kvm_mmu_rmap_write_protect(struct kvm *kvm, u64 gfn, - struct kvm_memory_slot *slot); +void kvm_mmu_write_protect_pt_masked(struct kvm *kvm, + struct kvm_memory_slot *slot, + gfn_t gfn_offset, unsigned long mask); void kvm_mmu_zap_all(struct kvm *kvm); unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm); void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages); diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index c8b5694..dc5f245 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1037,27 +1037,47 @@ static int __rmap_write_protect(struct kvm *kvm, unsigned long *rmapp, int level return write_protected; } -int kvm_mmu_rmap_write_protect(struct kvm *kvm, u64 gfn, - struct kvm_memory_slot *slot) +/** + * kvm_mmu_write_protect_pt_masked - write protect selected PT level pages + * @kvm: kvm instance + * @slot: slot to protect + * @gfn_offset: start of the BITS_PER_LONG pages we care about + * @mask: indicates which pages we should protect + * + * Used when we do not need to care about huge page mappings: e.g. during dirty + * logging we do not have any such mappings. + */ +void kvm_mmu_write_protect_pt_masked(struct kvm *kvm, + struct kvm_memory_slot *slot, + gfn_t gfn_offset, unsigned long mask) { unsigned long *rmapp; - int i, write_protected = 0; - for (i = PT_PAGE_TABLE_LEVEL; - i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) { - rmapp = __gfn_to_rmap(gfn, i, slot); - write_protected |= __rmap_write_protect(kvm, rmapp, i); - } + while (mask) { + rmapp = &slot->rmap[gfn_offset + __ffs(mask)]; + __rmap_write_protect(kvm, rmapp, PT_PAGE_TABLE_LEVEL); - return write_protected; + /* clear the first set bit */ + mask &= mask - 1; + } } static int rmap_write_protect(struct kvm *kvm, u64 gfn) { struct kvm_memory_slot *slot; + unsigned long *rmapp; + int i; + int write_protected = 0; slot = gfn_to_memslot(kvm, gfn); - return kvm_mmu_rmap_write_protect(kvm, gfn, slot); + + for (i = PT_PAGE_TABLE_LEVEL; + i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) { + rmapp = __gfn_to_rmap(gfn, i, slot); + write_protected |= __rmap_write_protect(kvm, rmapp, i); + } + + return write_protected; } static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 99b7380..813ebf1 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3095,13 +3095,11 @@ static void write_protect_slot(struct kvm *kvm, /* Not many dirty pages compared to # of shadow pages. */ if (nr_dirty_pages < kvm->arch.n_used_mmu_pages) { - unsigned long gfn_offset; + gfn_t offset; - for_each_set_bit(gfn_offset, dirty_bitmap, memslot->npages) { - unsigned long gfn = memslot->base_gfn + gfn_offset; + for_each_set_bit(offset, dirty_bitmap, memslot->npages) + kvm_mmu_write_protect_pt_masked(kvm, memslot, offset, 1); - kvm_mmu_rmap_write_protect(kvm, gfn, memslot); - } kvm_flush_remote_tlbs(kvm); } else kvm_mmu_slot_remove_write_access(kvm, memslot->id); -- cgit v0.10.2 From 60c34612b70711fb14a8dcbc6a79509902450d2e Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Sat, 3 Mar 2012 14:21:48 +0900 Subject: KVM: Switch to srcu-less get_dirty_log() We have seen some problems of the current implementation of get_dirty_log() which uses synchronize_srcu_expedited() for updating dirty bitmaps; e.g. it is noticeable that this sometimes gives us ms order of latency when we use VGA displays. Furthermore the recent discussion on the following thread "srcu: Implement call_srcu()" http://lkml.org/lkml/2012/1/31/211 also motivated us to implement get_dirty_log() without SRCU. This patch achieves this goal without sacrificing the performance of both VGA and live migration: in practice the new code is much faster than the old one unless we have too many dirty pages. Implementation: The key part of the implementation is the use of xchg() operation for clearing dirty bits atomically. Since this allows us to update only BITS_PER_LONG pages at once, we need to iterate over the dirty bitmap until every dirty bit is cleared again for the next call. Although some people may worry about the problem of using the atomic memory instruction many times to the concurrently accessible bitmap, it is usually accessed with mmu_lock held and we rarely see concurrent accesses: so what we need to care about is the pure xchg() overheads. Another point to note is that we do not use for_each_set_bit() to check which ones in each BITS_PER_LONG pages are actually dirty. Instead we simply use __ffs() in a loop. This is much faster than repeatedly call find_next_bit(). Performance: The dirty-log-perf unit test showed nice improvements, some times faster than before, except for some extreme cases; for such cases the speed of getting dirty page information is much faster than we process it in the userspace. For real workloads, both VGA and live migration, we have observed pure improvements: when the guest was reading a file during live migration, we originally saw a few ms of latency, but with the new method the latency was less than 200us. Signed-off-by: Takuya Yoshikawa Signed-off-by: Avi Kivity diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 813ebf1..0d9a578 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3067,55 +3067,32 @@ static int kvm_vm_ioctl_reinject(struct kvm *kvm, } /** - * write_protect_slot - write protect a slot for dirty logging - * @kvm: the kvm instance - * @memslot: the slot we protect - * @dirty_bitmap: the bitmap indicating which pages are dirty - * @nr_dirty_pages: the number of dirty pages + * kvm_vm_ioctl_get_dirty_log - get and clear the log of dirty pages in a slot + * @kvm: kvm instance + * @log: slot id and address to which we copy the log * - * We have two ways to find all sptes to protect: - * 1. Use kvm_mmu_slot_remove_write_access() which walks all shadow pages and - * checks ones that have a spte mapping a page in the slot. - * 2. Use kvm_mmu_rmap_write_protect() for each gfn found in the bitmap. + * We need to keep it in mind that VCPU threads can write to the bitmap + * concurrently. So, to avoid losing data, we keep the following order for + * each bit: * - * Generally speaking, if there are not so many dirty pages compared to the - * number of shadow pages, we should use the latter. + * 1. Take a snapshot of the bit and clear it if needed. + * 2. Write protect the corresponding page. + * 3. Flush TLB's if needed. + * 4. Copy the snapshot to the userspace. * - * Note that letting others write into a page marked dirty in the old bitmap - * by using the remaining tlb entry is not a problem. That page will become - * write protected again when we flush the tlb and then be reported dirty to - * the user space by copying the old bitmap. + * Between 2 and 3, the guest may write to the page using the remaining TLB + * entry. This is not a problem because the page will be reported dirty at + * step 4 using the snapshot taken before and step 3 ensures that successive + * writes will be logged for the next call. */ -static void write_protect_slot(struct kvm *kvm, - struct kvm_memory_slot *memslot, - unsigned long *dirty_bitmap, - unsigned long nr_dirty_pages) -{ - spin_lock(&kvm->mmu_lock); - - /* Not many dirty pages compared to # of shadow pages. */ - if (nr_dirty_pages < kvm->arch.n_used_mmu_pages) { - gfn_t offset; - - for_each_set_bit(offset, dirty_bitmap, memslot->npages) - kvm_mmu_write_protect_pt_masked(kvm, memslot, offset, 1); - - kvm_flush_remote_tlbs(kvm); - } else - kvm_mmu_slot_remove_write_access(kvm, memslot->id); - - spin_unlock(&kvm->mmu_lock); -} - -/* - * Get (and clear) the dirty memory log for a memory slot. - */ -int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, - struct kvm_dirty_log *log) +int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) { int r; struct kvm_memory_slot *memslot; - unsigned long n, nr_dirty_pages; + unsigned long n, i; + unsigned long *dirty_bitmap; + unsigned long *dirty_bitmap_buffer; + bool is_dirty = false; mutex_lock(&kvm->slots_lock); @@ -3124,49 +3101,42 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, goto out; memslot = id_to_memslot(kvm->memslots, log->slot); + + dirty_bitmap = memslot->dirty_bitmap; r = -ENOENT; - if (!memslot->dirty_bitmap) + if (!dirty_bitmap) goto out; n = kvm_dirty_bitmap_bytes(memslot); - nr_dirty_pages = memslot->nr_dirty_pages; - /* If nothing is dirty, don't bother messing with page tables. */ - if (nr_dirty_pages) { - struct kvm_memslots *slots, *old_slots; - unsigned long *dirty_bitmap, *dirty_bitmap_head; + dirty_bitmap_buffer = dirty_bitmap + n / sizeof(long); + memset(dirty_bitmap_buffer, 0, n); - dirty_bitmap = memslot->dirty_bitmap; - dirty_bitmap_head = memslot->dirty_bitmap_head; - if (dirty_bitmap == dirty_bitmap_head) - dirty_bitmap_head += n / sizeof(long); - memset(dirty_bitmap_head, 0, n); + spin_lock(&kvm->mmu_lock); - r = -ENOMEM; - slots = kmemdup(kvm->memslots, sizeof(*kvm->memslots), GFP_KERNEL); - if (!slots) - goto out; + for (i = 0; i < n / sizeof(long); i++) { + unsigned long mask; + gfn_t offset; - memslot = id_to_memslot(slots, log->slot); - memslot->nr_dirty_pages = 0; - memslot->dirty_bitmap = dirty_bitmap_head; - update_memslots(slots, NULL); + if (!dirty_bitmap[i]) + continue; - old_slots = kvm->memslots; - rcu_assign_pointer(kvm->memslots, slots); - synchronize_srcu_expedited(&kvm->srcu); - kfree(old_slots); + is_dirty = true; - write_protect_slot(kvm, memslot, dirty_bitmap, nr_dirty_pages); + mask = xchg(&dirty_bitmap[i], 0); + dirty_bitmap_buffer[i] = mask; - r = -EFAULT; - if (copy_to_user(log->dirty_bitmap, dirty_bitmap, n)) - goto out; - } else { - r = -EFAULT; - if (clear_user(log->dirty_bitmap, n)) - goto out; + offset = i * BITS_PER_LONG; + kvm_mmu_write_protect_pt_masked(kvm, memslot, offset, mask); } + if (is_dirty) + kvm_flush_remote_tlbs(kvm); + + spin_unlock(&kvm->mmu_lock); + + r = -EFAULT; + if (copy_to_user(log->dirty_bitmap, dirty_bitmap_buffer, n)) + goto out; r = 0; out: -- cgit v0.10.2 From 93474b25af1eabf5b14743793156e8d307bfcd6b Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Thu, 1 Mar 2012 19:34:45 +0900 Subject: KVM: Remove unused dirty_bitmap_head and nr_dirty_pages Now that we do neither double buffering nor heuristic selection of the write protection method these are not needed anymore. Note: some drivers have their own implementation of set_bit_le() and making it generic needs a bit of work; so we use test_and_set_bit_le() and will later replace it with generic set_bit_le(). Signed-off-by: Takuya Yoshikawa Signed-off-by: Avi Kivity diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 5184817..49c2f2fd 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -179,8 +179,6 @@ struct kvm_memory_slot { unsigned long flags; unsigned long *rmap; unsigned long *dirty_bitmap; - unsigned long *dirty_bitmap_head; - unsigned long nr_dirty_pages; struct kvm_arch_memory_slot arch; unsigned long userspace_addr; int user_alloc; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index a612bc8..6bd34a6 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -522,12 +522,11 @@ static void kvm_destroy_dirty_bitmap(struct kvm_memory_slot *memslot) return; if (2 * kvm_dirty_bitmap_bytes(memslot) > PAGE_SIZE) - vfree(memslot->dirty_bitmap_head); + vfree(memslot->dirty_bitmap); else - kfree(memslot->dirty_bitmap_head); + kfree(memslot->dirty_bitmap); memslot->dirty_bitmap = NULL; - memslot->dirty_bitmap_head = NULL; } /* @@ -611,8 +610,7 @@ static int kvm_vm_release(struct inode *inode, struct file *filp) /* * Allocation size is twice as large as the actual dirty bitmap size. - * This makes it possible to do double buffering: see x86's - * kvm_vm_ioctl_get_dirty_log(). + * See x86's kvm_vm_ioctl_get_dirty_log() why this is needed. */ static int kvm_create_dirty_bitmap(struct kvm_memory_slot *memslot) { @@ -627,8 +625,6 @@ static int kvm_create_dirty_bitmap(struct kvm_memory_slot *memslot) if (!memslot->dirty_bitmap) return -ENOMEM; - memslot->dirty_bitmap_head = memslot->dirty_bitmap; - memslot->nr_dirty_pages = 0; #endif /* !CONFIG_S390 */ return 0; } @@ -1476,8 +1472,8 @@ void mark_page_dirty_in_slot(struct kvm *kvm, struct kvm_memory_slot *memslot, if (memslot && memslot->dirty_bitmap) { unsigned long rel_gfn = gfn - memslot->base_gfn; - if (!test_and_set_bit_le(rel_gfn, memslot->dirty_bitmap)) - memslot->nr_dirty_pages++; + /* TODO: introduce set_bit_le() and use it */ + test_and_set_bit_le(rel_gfn, memslot->dirty_bitmap); } } -- cgit v0.10.2 From 52b066fa4e9cbfe45243dd4259b053d3fa7e21f1 Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Tue, 20 Dec 2011 15:34:12 +0000 Subject: powerpc/booke: Set CPU_FTR_DEBUG_LVL_EXC on 32-bit Currently 32-bit only cares about this for choice of exception vector, which is done in core-specific code. However, KVM will want to distinguish as well. Signed-off-by: Scott Wood Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index b9219e9..3be45cd 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -376,7 +376,8 @@ extern const char *powerpc_base_platform; #define CPU_FTRS_47X (CPU_FTRS_440x6) #define CPU_FTRS_E200 (CPU_FTR_USE_TB | CPU_FTR_SPE_COMP | \ CPU_FTR_NODSISRALIGN | CPU_FTR_COHERENT_ICACHE | \ - CPU_FTR_UNIFIED_ID_CACHE | CPU_FTR_NOEXECUTE) + CPU_FTR_UNIFIED_ID_CACHE | CPU_FTR_NOEXECUTE | \ + CPU_FTR_DEBUG_LVL_EXC) #define CPU_FTRS_E500 (CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | \ CPU_FTR_SPE_COMP | CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_NODSISRALIGN | \ CPU_FTR_NOEXECUTE) @@ -385,7 +386,7 @@ extern const char *powerpc_base_platform; CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE) #define CPU_FTRS_E500MC (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | \ CPU_FTR_L2CSR | CPU_FTR_LWSYNC | CPU_FTR_NOEXECUTE | \ - CPU_FTR_DBELL) + CPU_FTR_DBELL | CPU_FTR_DEBUG_LVL_EXC) #define CPU_FTRS_E5500 (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | \ CPU_FTR_L2CSR | CPU_FTR_LWSYNC | CPU_FTR_NOEXECUTE | \ CPU_FTR_DBELL | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \ -- cgit v0.10.2 From 06aae86799c1b37f216371e05a1eacb2188bee9d Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Tue, 20 Dec 2011 15:34:14 +0000 Subject: powerpc/e500: split CPU_FTRS_ALWAYS/CPU_FTRS_POSSIBLE Split e500 (v1/v2) and e500mc/e5500 to allow optimization of feature checks that differ between the two. Signed-off-by: Scott Wood Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index 3be45cd..7108a9c 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -487,8 +487,10 @@ enum { CPU_FTRS_E200 | #endif #ifdef CONFIG_E500 - CPU_FTRS_E500 | CPU_FTRS_E500_2 | CPU_FTRS_E500MC | - CPU_FTRS_E5500 | CPU_FTRS_E6500 | + CPU_FTRS_E500 | CPU_FTRS_E500_2 | +#endif +#ifdef CONFIG_PPC_E500MC + CPU_FTRS_E500MC | CPU_FTRS_E5500 | CPU_FTRS_E6500 | #endif 0, }; @@ -532,8 +534,10 @@ enum { CPU_FTRS_E200 & #endif #ifdef CONFIG_E500 - CPU_FTRS_E500 & CPU_FTRS_E500_2 & CPU_FTRS_E500MC & - CPU_FTRS_E5500 & CPU_FTRS_E6500 & + CPU_FTRS_E500 & CPU_FTRS_E500_2 & +#endif +#ifdef CONFIG_PPC_E500MC + CPU_FTRS_E500MC & CPU_FTRS_E5500 & CPU_FTRS_E6500 & #endif CPU_FTRS_POSSIBLE, }; -- cgit v0.10.2 From 043cc4d724da6bb9e4f417c735accec58dfa40bf Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Tue, 20 Dec 2011 15:34:20 +0000 Subject: KVM: PPC: factor out lpid allocator from book3s_64_mmu_hv We'll use it on e500mc as well. Signed-off-by: Scott Wood Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index aa795cc..046041f 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -452,4 +452,7 @@ static inline bool kvmppc_critical_section(struct kvm_vcpu *vcpu) #define INS_DCBZ 0x7c0007ec +/* LPIDs we support with this build -- runtime limit may be lower */ +#define KVMPPC_NR_LPIDS (LPID_RSVD + 1) + #endif /* __ASM_KVM_BOOK3S_H__ */ diff --git a/arch/powerpc/include/asm/kvm_booke.h b/arch/powerpc/include/asm/kvm_booke.h index a90e091..b7cd335 100644 --- a/arch/powerpc/include/asm/kvm_booke.h +++ b/arch/powerpc/include/asm/kvm_booke.h @@ -23,6 +23,9 @@ #include #include +/* LPIDs we support with this build -- runtime limit may be lower */ +#define KVMPPC_NR_LPIDS 64 + static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val) { vcpu->arch.gpr[num] = val; diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 9d6dee0..731e920 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -204,4 +204,9 @@ int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu, int kvm_vcpu_ioctl_dirty_tlb(struct kvm_vcpu *vcpu, struct kvm_dirty_tlb *cfg); +long kvmppc_alloc_lpid(void); +void kvmppc_claim_lpid(long lpid); +void kvmppc_free_lpid(long lpid); +void kvmppc_init_lpid(unsigned long nr_lpids); + #endif /* __POWERPC_KVM_PPC_H__ */ diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index ddc485a..d031ce1 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -36,13 +36,11 @@ /* POWER7 has 10-bit LPIDs, PPC970 has 6-bit LPIDs */ #define MAX_LPID_970 63 -#define NR_LPIDS (LPID_RSVD + 1) -unsigned long lpid_inuse[BITS_TO_LONGS(NR_LPIDS)]; long kvmppc_alloc_hpt(struct kvm *kvm) { unsigned long hpt; - unsigned long lpid; + long lpid; struct revmap_entry *rev; struct kvmppc_linear_info *li; @@ -72,14 +70,9 @@ long kvmppc_alloc_hpt(struct kvm *kvm) } kvm->arch.revmap = rev; - /* Allocate the guest's logical partition ID */ - do { - lpid = find_first_zero_bit(lpid_inuse, NR_LPIDS); - if (lpid >= NR_LPIDS) { - pr_err("kvm_alloc_hpt: No LPIDs free\n"); - goto out_freeboth; - } - } while (test_and_set_bit(lpid, lpid_inuse)); + lpid = kvmppc_alloc_lpid(); + if (lpid < 0) + goto out_freeboth; kvm->arch.sdr1 = __pa(hpt) | (HPT_ORDER - 18); kvm->arch.lpid = lpid; @@ -96,7 +89,7 @@ long kvmppc_alloc_hpt(struct kvm *kvm) void kvmppc_free_hpt(struct kvm *kvm) { - clear_bit(kvm->arch.lpid, lpid_inuse); + kvmppc_free_lpid(kvm->arch.lpid); vfree(kvm->arch.revmap); if (kvm->arch.hpt_li) kvm_release_hpt(kvm->arch.hpt_li); @@ -171,8 +164,7 @@ int kvmppc_mmu_hv_init(void) if (!cpu_has_feature(CPU_FTR_HVMODE)) return -EINVAL; - memset(lpid_inuse, 0, sizeof(lpid_inuse)); - + /* POWER7 has 10-bit LPIDs, PPC970 and e500mc have 6-bit LPIDs */ if (cpu_has_feature(CPU_FTR_ARCH_206)) { host_lpid = mfspr(SPRN_LPID); /* POWER7 */ rsvd_lpid = LPID_RSVD; @@ -181,9 +173,11 @@ int kvmppc_mmu_hv_init(void) rsvd_lpid = MAX_LPID_970; } - set_bit(host_lpid, lpid_inuse); + kvmppc_init_lpid(rsvd_lpid + 1); + + kvmppc_claim_lpid(host_lpid); /* rsvd_lpid is reserved for use in partition switching */ - set_bit(rsvd_lpid, lpid_inuse); + kvmppc_claim_lpid(rsvd_lpid); return 0; } diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index b5e9046..cd53e08 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -799,6 +799,40 @@ out: return r; } +static unsigned long lpid_inuse[BITS_TO_LONGS(KVMPPC_NR_LPIDS)]; +static unsigned long nr_lpids; + +long kvmppc_alloc_lpid(void) +{ + long lpid; + + do { + lpid = find_first_zero_bit(lpid_inuse, KVMPPC_NR_LPIDS); + if (lpid >= nr_lpids) { + pr_err("%s: No LPIDs free\n", __func__); + return -ENOMEM; + } + } while (test_and_set_bit(lpid, lpid_inuse)); + + return lpid; +} + +void kvmppc_claim_lpid(long lpid) +{ + set_bit(lpid, lpid_inuse); +} + +void kvmppc_free_lpid(long lpid) +{ + clear_bit(lpid, lpid_inuse); +} + +void kvmppc_init_lpid(unsigned long nr_lpids_param) +{ + nr_lpids = min_t(unsigned long, KVMPPC_NR_LPIDS, nr_lpids_param); + memset(lpid_inuse, 0, sizeof(lpid_inuse)); +} + int kvm_arch_init(void *opaque) { return 0; -- cgit v0.10.2 From 94fa9d9927627a948cef3eff7ebd228dcab5a316 Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Tue, 20 Dec 2011 15:34:22 +0000 Subject: KVM: PPC: booke: add booke-level vcpu load/put This gives us a place to put load/put actions that correspond to code that is booke-specific but not specific to a particular core. Signed-off-by: Scott Wood Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity diff --git a/arch/powerpc/kvm/44x.c b/arch/powerpc/kvm/44x.c index 7b612a7..879a1a7 100644 --- a/arch/powerpc/kvm/44x.c +++ b/arch/powerpc/kvm/44x.c @@ -29,15 +29,18 @@ #include #include "44x_tlb.h" +#include "booke.h" void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { + kvmppc_booke_vcpu_load(vcpu, cpu); kvmppc_44x_tlb_load(vcpu); } void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) { kvmppc_44x_tlb_put(vcpu); + kvmppc_booke_vcpu_put(vcpu); } int kvmppc_core_check_processor_compat(void) diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index ee9e1ee..a2456c7 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -968,6 +968,14 @@ void kvmppc_decrementer_func(unsigned long data) kvmppc_set_tsr_bits(vcpu, TSR_DIS); } +void kvmppc_booke_vcpu_load(struct kvm_vcpu *vcpu, int cpu) +{ +} + +void kvmppc_booke_vcpu_put(struct kvm_vcpu *vcpu) +{ +} + int __init kvmppc_booke_init(void) { unsigned long ivor[16]; diff --git a/arch/powerpc/kvm/booke.h b/arch/powerpc/kvm/booke.h index 2fe2027..05d1d99 100644 --- a/arch/powerpc/kvm/booke.h +++ b/arch/powerpc/kvm/booke.h @@ -71,4 +71,7 @@ void kvmppc_save_guest_spe(struct kvm_vcpu *vcpu); /* high-level function, manages flags, host state */ void kvmppc_vcpu_disable_spe(struct kvm_vcpu *vcpu); +void kvmppc_booke_vcpu_load(struct kvm_vcpu *vcpu, int cpu); +void kvmppc_booke_vcpu_put(struct kvm_vcpu *vcpu); + #endif /* __KVM_BOOKE_H__ */ diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c index ddcd896..2d5fe04 100644 --- a/arch/powerpc/kvm/e500.c +++ b/arch/powerpc/kvm/e500.c @@ -36,6 +36,7 @@ void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu) void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { + kvmppc_booke_vcpu_load(vcpu, cpu); kvmppc_e500_tlb_load(vcpu, cpu); } @@ -47,6 +48,8 @@ void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) if (vcpu->arch.shadow_msr & MSR_SPE) kvmppc_vcpu_disable_spe(vcpu); #endif + + kvmppc_booke_vcpu_put(vcpu); } int kvmppc_core_check_processor_compat(void) -- cgit v0.10.2 From fafd68327858bf30c846d38c7ea144f0827f552e Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Tue, 20 Dec 2011 15:34:26 +0000 Subject: KVM: PPC: booke: Move vm core init/destroy out of booke.c e500mc will want to do lpid allocation/deallocation here. Signed-off-by: Scott Wood Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity diff --git a/arch/powerpc/kvm/44x.c b/arch/powerpc/kvm/44x.c index 879a1a7..50e7dbc 100644 --- a/arch/powerpc/kvm/44x.c +++ b/arch/powerpc/kvm/44x.c @@ -163,6 +163,15 @@ void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) kmem_cache_free(kvm_vcpu_cache, vcpu_44x); } +int kvmppc_core_init_vm(struct kvm *kvm) +{ + return 0; +} + +void kvmppc_core_destroy_vm(struct kvm *kvm) +{ +} + static int __init kvmppc_44x_init(void) { int r; diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index a2456c7..2ee9bae 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -932,15 +932,6 @@ void kvmppc_core_commit_memory_region(struct kvm *kvm, { } -int kvmppc_core_init_vm(struct kvm *kvm) -{ - return 0; -} - -void kvmppc_core_destroy_vm(struct kvm *kvm) -{ -} - void kvmppc_set_tcr(struct kvm_vcpu *vcpu, u32 new_tcr) { vcpu->arch.tcr = new_tcr; diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c index 2d5fe04..ac6c9ae 100644 --- a/arch/powerpc/kvm/e500.c +++ b/arch/powerpc/kvm/e500.c @@ -226,6 +226,15 @@ void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) kmem_cache_free(kvm_vcpu_cache, vcpu_e500); } +int kvmppc_core_init_vm(struct kvm *kvm) +{ + return 0; +} + +void kvmppc_core_destroy_vm(struct kvm *kvm) +{ +} + static int __init kvmppc_e500_init(void) { int r, i; -- cgit v0.10.2 From 29a5a6f9102aed97a06aa984cc294e0e603b3a79 Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Tue, 20 Dec 2011 15:34:29 +0000 Subject: KVM: PPC: e500: rename e500_tlb.h to e500.h This is in preparation for merging in the contents of arch/powerpc/include/asm/kvm_e500.h. Signed-off-by: Scott Wood Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c index ac6c9ae..5c450ba 100644 --- a/arch/powerpc/kvm/e500.c +++ b/arch/powerpc/kvm/e500.c @@ -24,7 +24,7 @@ #include #include "booke.h" -#include "e500_tlb.h" +#include "e500.h" void kvmppc_core_load_host_debugstate(struct kvm_vcpu *vcpu) { diff --git a/arch/powerpc/kvm/e500.h b/arch/powerpc/kvm/e500.h new file mode 100644 index 0000000..02ecde2 --- /dev/null +++ b/arch/powerpc/kvm/e500.h @@ -0,0 +1,174 @@ +/* + * Copyright (C) 2008-2011 Freescale Semiconductor, Inc. All rights reserved. + * + * Author: Yu Liu, yu.liu@freescale.com + * + * Description: + * This file is based on arch/powerpc/kvm/44x_tlb.h, + * by Hollis Blanchard . + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + */ + +#ifndef KVM_E500_H +#define KVM_E500_H + +#include +#include +#include +#include + +/* This geometry is the legacy default -- can be overridden by userspace */ +#define KVM_E500_TLB0_WAY_SIZE 128 +#define KVM_E500_TLB0_WAY_NUM 2 + +#define KVM_E500_TLB0_SIZE (KVM_E500_TLB0_WAY_SIZE * KVM_E500_TLB0_WAY_NUM) +#define KVM_E500_TLB1_SIZE 16 + +#define index_of(tlbsel, esel) (((tlbsel) << 16) | ((esel) & 0xFFFF)) +#define tlbsel_of(index) ((index) >> 16) +#define esel_of(index) ((index) & 0xFFFF) + +#define E500_TLB_USER_PERM_MASK (MAS3_UX|MAS3_UR|MAS3_UW) +#define E500_TLB_SUPER_PERM_MASK (MAS3_SX|MAS3_SR|MAS3_SW) +#define MAS2_ATTRIB_MASK \ + (MAS2_X0 | MAS2_X1) +#define MAS3_ATTRIB_MASK \ + (MAS3_U0 | MAS3_U1 | MAS3_U2 | MAS3_U3 \ + | E500_TLB_USER_PERM_MASK | E500_TLB_SUPER_PERM_MASK) + +extern void kvmppc_dump_tlbs(struct kvm_vcpu *); +extern int kvmppc_e500_emul_mt_mmucsr0(struct kvmppc_vcpu_e500 *, ulong); +extern int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *); +extern int kvmppc_e500_emul_tlbre(struct kvm_vcpu *); +extern int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *, int, int); +extern int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *, int); +extern int kvmppc_e500_tlb_search(struct kvm_vcpu *, gva_t, unsigned int, int); +extern void kvmppc_e500_tlb_put(struct kvm_vcpu *); +extern void kvmppc_e500_tlb_load(struct kvm_vcpu *, int); +extern int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *); +extern void kvmppc_e500_tlb_uninit(struct kvmppc_vcpu_e500 *); +extern void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *); +extern void kvmppc_e500_recalc_shadow_pid(struct kvmppc_vcpu_e500 *); + +/* TLB helper functions */ +static inline unsigned int +get_tlb_size(const struct kvm_book3e_206_tlb_entry *tlbe) +{ + return (tlbe->mas1 >> 7) & 0x1f; +} + +static inline gva_t get_tlb_eaddr(const struct kvm_book3e_206_tlb_entry *tlbe) +{ + return tlbe->mas2 & 0xfffff000; +} + +static inline u64 get_tlb_bytes(const struct kvm_book3e_206_tlb_entry *tlbe) +{ + unsigned int pgsize = get_tlb_size(tlbe); + return 1ULL << 10 << pgsize; +} + +static inline gva_t get_tlb_end(const struct kvm_book3e_206_tlb_entry *tlbe) +{ + u64 bytes = get_tlb_bytes(tlbe); + return get_tlb_eaddr(tlbe) + bytes - 1; +} + +static inline u64 get_tlb_raddr(const struct kvm_book3e_206_tlb_entry *tlbe) +{ + return tlbe->mas7_3 & ~0xfffULL; +} + +static inline unsigned int +get_tlb_tid(const struct kvm_book3e_206_tlb_entry *tlbe) +{ + return (tlbe->mas1 >> 16) & 0xff; +} + +static inline unsigned int +get_tlb_ts(const struct kvm_book3e_206_tlb_entry *tlbe) +{ + return (tlbe->mas1 >> 12) & 0x1; +} + +static inline unsigned int +get_tlb_v(const struct kvm_book3e_206_tlb_entry *tlbe) +{ + return (tlbe->mas1 >> 31) & 0x1; +} + +static inline unsigned int +get_tlb_iprot(const struct kvm_book3e_206_tlb_entry *tlbe) +{ + return (tlbe->mas1 >> 30) & 0x1; +} + +static inline unsigned int get_cur_pid(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.pid & 0xff; +} + +static inline unsigned int get_cur_as(struct kvm_vcpu *vcpu) +{ + return !!(vcpu->arch.shared->msr & (MSR_IS | MSR_DS)); +} + +static inline unsigned int get_cur_pr(struct kvm_vcpu *vcpu) +{ + return !!(vcpu->arch.shared->msr & MSR_PR); +} + +static inline unsigned int get_cur_spid(const struct kvm_vcpu *vcpu) +{ + return (vcpu->arch.shared->mas6 >> 16) & 0xff; +} + +static inline unsigned int get_cur_sas(const struct kvm_vcpu *vcpu) +{ + return vcpu->arch.shared->mas6 & 0x1; +} + +static inline unsigned int get_tlb_tlbsel(const struct kvm_vcpu *vcpu) +{ + /* + * Manual says that tlbsel has 2 bits wide. + * Since we only have two TLBs, only lower bit is used. + */ + return (vcpu->arch.shared->mas0 >> 28) & 0x1; +} + +static inline unsigned int get_tlb_nv_bit(const struct kvm_vcpu *vcpu) +{ + return vcpu->arch.shared->mas0 & 0xfff; +} + +static inline unsigned int get_tlb_esel_bit(const struct kvm_vcpu *vcpu) +{ + return (vcpu->arch.shared->mas0 >> 16) & 0xfff; +} + +static inline int tlbe_is_host_safe(const struct kvm_vcpu *vcpu, + const struct kvm_book3e_206_tlb_entry *tlbe) +{ + gpa_t gpa; + + if (!get_tlb_v(tlbe)) + return 0; + + /* Does it match current guest AS? */ + /* XXX what about IS != DS? */ + if (get_tlb_ts(tlbe) != !!(vcpu->arch.shared->msr & MSR_IS)) + return 0; + + gpa = get_tlb_raddr(tlbe); + if (!gfn_to_memslot(vcpu->kvm, gpa >> PAGE_SHIFT)) + /* Mapping is not for RAM. */ + return 0; + + return 1; +} + +#endif /* KVM_E500_H */ diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c index 6d0b2bd..2a1a228 100644 --- a/arch/powerpc/kvm/e500_emulate.c +++ b/arch/powerpc/kvm/e500_emulate.c @@ -17,7 +17,7 @@ #include #include "booke.h" -#include "e500_tlb.h" +#include "e500.h" #define XOP_TLBIVAX 786 #define XOP_TLBSX 914 diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c index 6e53e41..1d623a0 100644 --- a/arch/powerpc/kvm/e500_tlb.c +++ b/arch/powerpc/kvm/e500_tlb.c @@ -29,7 +29,7 @@ #include #include "../mm/mmu_decl.h" -#include "e500_tlb.h" +#include "e500.h" #include "trace.h" #include "timing.h" diff --git a/arch/powerpc/kvm/e500_tlb.h b/arch/powerpc/kvm/e500_tlb.h deleted file mode 100644 index 5c6d2d7..0000000 --- a/arch/powerpc/kvm/e500_tlb.h +++ /dev/null @@ -1,174 +0,0 @@ -/* - * Copyright (C) 2008-2011 Freescale Semiconductor, Inc. All rights reserved. - * - * Author: Yu Liu, yu.liu@freescale.com - * - * Description: - * This file is based on arch/powerpc/kvm/44x_tlb.h, - * by Hollis Blanchard . - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. - */ - -#ifndef __KVM_E500_TLB_H__ -#define __KVM_E500_TLB_H__ - -#include -#include -#include -#include - -/* This geometry is the legacy default -- can be overridden by userspace */ -#define KVM_E500_TLB0_WAY_SIZE 128 -#define KVM_E500_TLB0_WAY_NUM 2 - -#define KVM_E500_TLB0_SIZE (KVM_E500_TLB0_WAY_SIZE * KVM_E500_TLB0_WAY_NUM) -#define KVM_E500_TLB1_SIZE 16 - -#define index_of(tlbsel, esel) (((tlbsel) << 16) | ((esel) & 0xFFFF)) -#define tlbsel_of(index) ((index) >> 16) -#define esel_of(index) ((index) & 0xFFFF) - -#define E500_TLB_USER_PERM_MASK (MAS3_UX|MAS3_UR|MAS3_UW) -#define E500_TLB_SUPER_PERM_MASK (MAS3_SX|MAS3_SR|MAS3_SW) -#define MAS2_ATTRIB_MASK \ - (MAS2_X0 | MAS2_X1) -#define MAS3_ATTRIB_MASK \ - (MAS3_U0 | MAS3_U1 | MAS3_U2 | MAS3_U3 \ - | E500_TLB_USER_PERM_MASK | E500_TLB_SUPER_PERM_MASK) - -extern void kvmppc_dump_tlbs(struct kvm_vcpu *); -extern int kvmppc_e500_emul_mt_mmucsr0(struct kvmppc_vcpu_e500 *, ulong); -extern int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *); -extern int kvmppc_e500_emul_tlbre(struct kvm_vcpu *); -extern int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *, int, int); -extern int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *, int); -extern int kvmppc_e500_tlb_search(struct kvm_vcpu *, gva_t, unsigned int, int); -extern void kvmppc_e500_tlb_put(struct kvm_vcpu *); -extern void kvmppc_e500_tlb_load(struct kvm_vcpu *, int); -extern int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *); -extern void kvmppc_e500_tlb_uninit(struct kvmppc_vcpu_e500 *); -extern void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *); -extern void kvmppc_e500_recalc_shadow_pid(struct kvmppc_vcpu_e500 *); - -/* TLB helper functions */ -static inline unsigned int -get_tlb_size(const struct kvm_book3e_206_tlb_entry *tlbe) -{ - return (tlbe->mas1 >> 7) & 0x1f; -} - -static inline gva_t get_tlb_eaddr(const struct kvm_book3e_206_tlb_entry *tlbe) -{ - return tlbe->mas2 & 0xfffff000; -} - -static inline u64 get_tlb_bytes(const struct kvm_book3e_206_tlb_entry *tlbe) -{ - unsigned int pgsize = get_tlb_size(tlbe); - return 1ULL << 10 << pgsize; -} - -static inline gva_t get_tlb_end(const struct kvm_book3e_206_tlb_entry *tlbe) -{ - u64 bytes = get_tlb_bytes(tlbe); - return get_tlb_eaddr(tlbe) + bytes - 1; -} - -static inline u64 get_tlb_raddr(const struct kvm_book3e_206_tlb_entry *tlbe) -{ - return tlbe->mas7_3 & ~0xfffULL; -} - -static inline unsigned int -get_tlb_tid(const struct kvm_book3e_206_tlb_entry *tlbe) -{ - return (tlbe->mas1 >> 16) & 0xff; -} - -static inline unsigned int -get_tlb_ts(const struct kvm_book3e_206_tlb_entry *tlbe) -{ - return (tlbe->mas1 >> 12) & 0x1; -} - -static inline unsigned int -get_tlb_v(const struct kvm_book3e_206_tlb_entry *tlbe) -{ - return (tlbe->mas1 >> 31) & 0x1; -} - -static inline unsigned int -get_tlb_iprot(const struct kvm_book3e_206_tlb_entry *tlbe) -{ - return (tlbe->mas1 >> 30) & 0x1; -} - -static inline unsigned int get_cur_pid(struct kvm_vcpu *vcpu) -{ - return vcpu->arch.pid & 0xff; -} - -static inline unsigned int get_cur_as(struct kvm_vcpu *vcpu) -{ - return !!(vcpu->arch.shared->msr & (MSR_IS | MSR_DS)); -} - -static inline unsigned int get_cur_pr(struct kvm_vcpu *vcpu) -{ - return !!(vcpu->arch.shared->msr & MSR_PR); -} - -static inline unsigned int get_cur_spid(const struct kvm_vcpu *vcpu) -{ - return (vcpu->arch.shared->mas6 >> 16) & 0xff; -} - -static inline unsigned int get_cur_sas(const struct kvm_vcpu *vcpu) -{ - return vcpu->arch.shared->mas6 & 0x1; -} - -static inline unsigned int get_tlb_tlbsel(const struct kvm_vcpu *vcpu) -{ - /* - * Manual says that tlbsel has 2 bits wide. - * Since we only have two TLBs, only lower bit is used. - */ - return (vcpu->arch.shared->mas0 >> 28) & 0x1; -} - -static inline unsigned int get_tlb_nv_bit(const struct kvm_vcpu *vcpu) -{ - return vcpu->arch.shared->mas0 & 0xfff; -} - -static inline unsigned int get_tlb_esel_bit(const struct kvm_vcpu *vcpu) -{ - return (vcpu->arch.shared->mas0 >> 16) & 0xfff; -} - -static inline int tlbe_is_host_safe(const struct kvm_vcpu *vcpu, - const struct kvm_book3e_206_tlb_entry *tlbe) -{ - gpa_t gpa; - - if (!get_tlb_v(tlbe)) - return 0; - - /* Does it match current guest AS? */ - /* XXX what about IS != DS? */ - if (get_tlb_ts(tlbe) != !!(vcpu->arch.shared->msr & MSR_IS)) - return 0; - - gpa = get_tlb_raddr(tlbe); - if (!gfn_to_memslot(vcpu->kvm, gpa >> PAGE_SHIFT)) - /* Mapping is not for RAM. */ - return 0; - - return 1; -} - -#endif /* __KVM_E500_TLB_H__ */ -- cgit v0.10.2 From fc6cf99509eb8e5f16e0f81db0c71f5301193005 Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Tue, 20 Dec 2011 15:34:31 +0000 Subject: KVM: PPC: e500: merge into arch/powerpc/kvm/e500.h Keeping two separate headers for e500-specific things was a pain, and wasn't even organized along any logical boundary. There was TLB stuff in despite the existence of arch/powerpc/kvm/e500_tlb.h, and nothing in needed to be referenced from outside arch/powerpc/kvm. Signed-off-by: Scott Wood [agraf: fix bisectability] Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity diff --git a/arch/powerpc/include/asm/kvm_e500.h b/arch/powerpc/include/asm/kvm_e500.h deleted file mode 100644 index 8cd50a5..0000000 --- a/arch/powerpc/include/asm/kvm_e500.h +++ /dev/null @@ -1,96 +0,0 @@ -/* - * Copyright (C) 2008-2011 Freescale Semiconductor, Inc. All rights reserved. - * - * Author: Yu Liu, - * - * Description: - * This file is derived from arch/powerpc/include/asm/kvm_44x.h, - * by Hollis Blanchard . - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. - */ - -#ifndef __ASM_KVM_E500_H__ -#define __ASM_KVM_E500_H__ - -#include - -#define BOOKE_INTERRUPT_SIZE 36 - -#define E500_PID_NUM 3 -#define E500_TLB_NUM 2 - -#define E500_TLB_VALID 1 -#define E500_TLB_DIRTY 2 - -struct tlbe_ref { - pfn_t pfn; - unsigned int flags; /* E500_TLB_* */ -}; - -struct tlbe_priv { - struct tlbe_ref ref; /* TLB0 only -- TLB1 uses tlb_refs */ -}; - -struct vcpu_id_table; - -struct kvmppc_e500_tlb_params { - int entries, ways, sets; -}; - -struct kvmppc_vcpu_e500 { - /* Unmodified copy of the guest's TLB -- shared with host userspace. */ - struct kvm_book3e_206_tlb_entry *gtlb_arch; - - /* Starting entry number in gtlb_arch[] */ - int gtlb_offset[E500_TLB_NUM]; - - /* KVM internal information associated with each guest TLB entry */ - struct tlbe_priv *gtlb_priv[E500_TLB_NUM]; - - struct kvmppc_e500_tlb_params gtlb_params[E500_TLB_NUM]; - - unsigned int gtlb_nv[E500_TLB_NUM]; - - /* - * information associated with each host TLB entry -- - * TLB1 only for now. If/when guest TLB1 entries can be - * mapped with host TLB0, this will be used for that too. - * - * We don't want to use this for guest TLB0 because then we'd - * have the overhead of doing the translation again even if - * the entry is still in the guest TLB (e.g. we swapped out - * and back, and our host TLB entries got evicted). - */ - struct tlbe_ref *tlb_refs[E500_TLB_NUM]; - unsigned int host_tlb1_nv; - - u32 host_pid[E500_PID_NUM]; - u32 pid[E500_PID_NUM]; - u32 svr; - - /* vcpu id table */ - struct vcpu_id_table *idt; - - u32 l1csr0; - u32 l1csr1; - u32 hid0; - u32 hid1; - u32 tlb0cfg; - u32 tlb1cfg; - u64 mcar; - - struct page **shared_tlb_pages; - int num_shared_tlb_pages; - - struct kvm_vcpu vcpu; -}; - -static inline struct kvmppc_vcpu_e500 *to_e500(struct kvm_vcpu *vcpu) -{ - return container_of(vcpu, struct kvmppc_vcpu_e500, vcpu); -} - -#endif /* __ASM_KVM_E500_H__ */ diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c index 5c450ba..76b35d8 100644 --- a/arch/powerpc/kvm/e500.c +++ b/arch/powerpc/kvm/e500.c @@ -20,7 +20,6 @@ #include #include #include -#include #include #include "booke.h" diff --git a/arch/powerpc/kvm/e500.h b/arch/powerpc/kvm/e500.h index 02ecde2..51d13bd 100644 --- a/arch/powerpc/kvm/e500.h +++ b/arch/powerpc/kvm/e500.h @@ -1,11 +1,12 @@ /* * Copyright (C) 2008-2011 Freescale Semiconductor, Inc. All rights reserved. * - * Author: Yu Liu, yu.liu@freescale.com + * Author: Yu Liu * * Description: - * This file is based on arch/powerpc/kvm/44x_tlb.h, - * by Hollis Blanchard . + * This file is based on arch/powerpc/kvm/44x_tlb.h and + * arch/powerpc/include/asm/kvm_44x.h by Hollis Blanchard , + * Copyright IBM Corp. 2007-2008 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License, version 2, as @@ -18,7 +19,80 @@ #include #include #include -#include + +#define E500_PID_NUM 3 +#define E500_TLB_NUM 2 + +#define E500_TLB_VALID 1 +#define E500_TLB_DIRTY 2 + +struct tlbe_ref { + pfn_t pfn; + unsigned int flags; /* E500_TLB_* */ +}; + +struct tlbe_priv { + struct tlbe_ref ref; /* TLB0 only -- TLB1 uses tlb_refs */ +}; + +struct vcpu_id_table; + +struct kvmppc_e500_tlb_params { + int entries, ways, sets; +}; + +struct kvmppc_vcpu_e500 { + /* Unmodified copy of the guest's TLB -- shared with host userspace. */ + struct kvm_book3e_206_tlb_entry *gtlb_arch; + + /* Starting entry number in gtlb_arch[] */ + int gtlb_offset[E500_TLB_NUM]; + + /* KVM internal information associated with each guest TLB entry */ + struct tlbe_priv *gtlb_priv[E500_TLB_NUM]; + + struct kvmppc_e500_tlb_params gtlb_params[E500_TLB_NUM]; + + unsigned int gtlb_nv[E500_TLB_NUM]; + + /* + * information associated with each host TLB entry -- + * TLB1 only for now. If/when guest TLB1 entries can be + * mapped with host TLB0, this will be used for that too. + * + * We don't want to use this for guest TLB0 because then we'd + * have the overhead of doing the translation again even if + * the entry is still in the guest TLB (e.g. we swapped out + * and back, and our host TLB entries got evicted). + */ + struct tlbe_ref *tlb_refs[E500_TLB_NUM]; + unsigned int host_tlb1_nv; + + u32 host_pid[E500_PID_NUM]; + u32 pid[E500_PID_NUM]; + u32 svr; + + /* vcpu id table */ + struct vcpu_id_table *idt; + + u32 l1csr0; + u32 l1csr1; + u32 hid0; + u32 hid1; + u32 tlb0cfg; + u32 tlb1cfg; + u64 mcar; + + struct page **shared_tlb_pages; + int num_shared_tlb_pages; + + struct kvm_vcpu vcpu; +}; + +static inline struct kvmppc_vcpu_e500 *to_e500(struct kvm_vcpu *vcpu) +{ + return container_of(vcpu, struct kvmppc_vcpu_e500, vcpu); +} /* This geometry is the legacy default -- can be overridden by userspace */ #define KVM_E500_TLB0_WAY_SIZE 128 diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c index 2a1a228..7e2d592 100644 --- a/arch/powerpc/kvm/e500_emulate.c +++ b/arch/powerpc/kvm/e500_emulate.c @@ -14,7 +14,6 @@ #include #include -#include #include "booke.h" #include "e500.h" diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c index 1d623a0..7d4a918 100644 --- a/arch/powerpc/kvm/e500_tlb.c +++ b/arch/powerpc/kvm/e500_tlb.c @@ -26,7 +26,6 @@ #include #include #include -#include #include "../mm/mmu_decl.h" #include "e500.h" -- cgit v0.10.2 From 52e1718c6fd1a1f54c676c2107dc931e93865fe8 Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Tue, 20 Dec 2011 15:34:32 +0000 Subject: KVM: PPC: e500: clean up arch/powerpc/kvm/e500.h Move vcpu to the beginning of vcpu_e500 to give it appropriate prominence, especially if more fields end up getting added to the end of vcpu_e500 (and vcpu ends up in the middle). Remove gratuitous "extern" and add parameter names to prototypes. Signed-off-by: Scott Wood [agraf: fix bisectability] Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity diff --git a/arch/powerpc/kvm/e500.h b/arch/powerpc/kvm/e500.h index 51d13bd..a48af00 100644 --- a/arch/powerpc/kvm/e500.h +++ b/arch/powerpc/kvm/e500.h @@ -42,6 +42,8 @@ struct kvmppc_e500_tlb_params { }; struct kvmppc_vcpu_e500 { + struct kvm_vcpu vcpu; + /* Unmodified copy of the guest's TLB -- shared with host userspace. */ struct kvm_book3e_206_tlb_entry *gtlb_arch; @@ -85,8 +87,6 @@ struct kvmppc_vcpu_e500 { struct page **shared_tlb_pages; int num_shared_tlb_pages; - - struct kvm_vcpu vcpu; }; static inline struct kvmppc_vcpu_e500 *to_e500(struct kvm_vcpu *vcpu) @@ -113,19 +113,22 @@ static inline struct kvmppc_vcpu_e500 *to_e500(struct kvm_vcpu *vcpu) (MAS3_U0 | MAS3_U1 | MAS3_U2 | MAS3_U3 \ | E500_TLB_USER_PERM_MASK | E500_TLB_SUPER_PERM_MASK) -extern void kvmppc_dump_tlbs(struct kvm_vcpu *); -extern int kvmppc_e500_emul_mt_mmucsr0(struct kvmppc_vcpu_e500 *, ulong); -extern int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *); -extern int kvmppc_e500_emul_tlbre(struct kvm_vcpu *); -extern int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *, int, int); -extern int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *, int); -extern int kvmppc_e500_tlb_search(struct kvm_vcpu *, gva_t, unsigned int, int); extern void kvmppc_e500_tlb_put(struct kvm_vcpu *); extern void kvmppc_e500_tlb_load(struct kvm_vcpu *, int); -extern int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *); -extern void kvmppc_e500_tlb_uninit(struct kvmppc_vcpu_e500 *); extern void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *); extern void kvmppc_e500_recalc_shadow_pid(struct kvmppc_vcpu_e500 *); +int kvmppc_e500_emul_mt_mmucsr0(struct kvmppc_vcpu_e500 *vcpu_e500, + ulong value); +int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu); +int kvmppc_e500_emul_tlbre(struct kvm_vcpu *vcpu); +int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *vcpu, int ra, int rb); +int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb); +int kvmppc_e500_tlb_search(struct kvm_vcpu *, gva_t, unsigned int, int); +int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500); +void kvmppc_e500_tlb_uninit(struct kvmppc_vcpu_e500 *vcpu_e500); + +void kvmppc_get_sregs_e500_tlb(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); +int kvmppc_set_sregs_e500_tlb(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); /* TLB helper functions */ static inline unsigned int -- cgit v0.10.2 From 8fdd21a26876ea6c486c38bfa75fdd18ba299351 Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Tue, 20 Dec 2011 15:34:34 +0000 Subject: KVM: PPC: e500: refactor core-specific TLB code The PID handling is e500v1/v2-specific, and is moved to e500.c. The MMU sregs code and kvmppc_core_vcpu_translate will be shared with e500mc, and is moved from e500.c to e500_tlb.c. Partially based on patches from Liu Yu . Signed-off-by: Scott Wood [agraf: fix bisectability] Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 20ab5b2..5b81cbc 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -426,6 +426,8 @@ struct kvm_vcpu_arch { ulong fault_esr; ulong queued_dear; ulong queued_esr; + u32 tlbcfg[4]; + u32 mmucfg; #endif gpa_t paddr_accessed; diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c index 76b35d8..b479ed7 100644 --- a/arch/powerpc/kvm/e500.c +++ b/arch/powerpc/kvm/e500.c @@ -22,9 +22,281 @@ #include #include +#include "../mm/mmu_decl.h" #include "booke.h" #include "e500.h" +struct id { + unsigned long val; + struct id **pentry; +}; + +#define NUM_TIDS 256 + +/* + * This table provide mappings from: + * (guestAS,guestTID,guestPR) --> ID of physical cpu + * guestAS [0..1] + * guestTID [0..255] + * guestPR [0..1] + * ID [1..255] + * Each vcpu keeps one vcpu_id_table. + */ +struct vcpu_id_table { + struct id id[2][NUM_TIDS][2]; +}; + +/* + * This table provide reversed mappings of vcpu_id_table: + * ID --> address of vcpu_id_table item. + * Each physical core has one pcpu_id_table. + */ +struct pcpu_id_table { + struct id *entry[NUM_TIDS]; +}; + +static DEFINE_PER_CPU(struct pcpu_id_table, pcpu_sids); + +/* This variable keeps last used shadow ID on local core. + * The valid range of shadow ID is [1..255] */ +static DEFINE_PER_CPU(unsigned long, pcpu_last_used_sid); + +/* + * Allocate a free shadow id and setup a valid sid mapping in given entry. + * A mapping is only valid when vcpu_id_table and pcpu_id_table are match. + * + * The caller must have preemption disabled, and keep it that way until + * it has finished with the returned shadow id (either written into the + * TLB or arch.shadow_pid, or discarded). + */ +static inline int local_sid_setup_one(struct id *entry) +{ + unsigned long sid; + int ret = -1; + + sid = ++(__get_cpu_var(pcpu_last_used_sid)); + if (sid < NUM_TIDS) { + __get_cpu_var(pcpu_sids).entry[sid] = entry; + entry->val = sid; + entry->pentry = &__get_cpu_var(pcpu_sids).entry[sid]; + ret = sid; + } + + /* + * If sid == NUM_TIDS, we've run out of sids. We return -1, and + * the caller will invalidate everything and start over. + * + * sid > NUM_TIDS indicates a race, which we disable preemption to + * avoid. + */ + WARN_ON(sid > NUM_TIDS); + + return ret; +} + +/* + * Check if given entry contain a valid shadow id mapping. + * An ID mapping is considered valid only if + * both vcpu and pcpu know this mapping. + * + * The caller must have preemption disabled, and keep it that way until + * it has finished with the returned shadow id (either written into the + * TLB or arch.shadow_pid, or discarded). + */ +static inline int local_sid_lookup(struct id *entry) +{ + if (entry && entry->val != 0 && + __get_cpu_var(pcpu_sids).entry[entry->val] == entry && + entry->pentry == &__get_cpu_var(pcpu_sids).entry[entry->val]) + return entry->val; + return -1; +} + +/* Invalidate all id mappings on local core -- call with preempt disabled */ +static inline void local_sid_destroy_all(void) +{ + __get_cpu_var(pcpu_last_used_sid) = 0; + memset(&__get_cpu_var(pcpu_sids), 0, sizeof(__get_cpu_var(pcpu_sids))); +} + +static void *kvmppc_e500_id_table_alloc(struct kvmppc_vcpu_e500 *vcpu_e500) +{ + vcpu_e500->idt = kzalloc(sizeof(struct vcpu_id_table), GFP_KERNEL); + return vcpu_e500->idt; +} + +static void kvmppc_e500_id_table_free(struct kvmppc_vcpu_e500 *vcpu_e500) +{ + kfree(vcpu_e500->idt); + vcpu_e500->idt = NULL; +} + +/* Map guest pid to shadow. + * We use PID to keep shadow of current guest non-zero PID, + * and use PID1 to keep shadow of guest zero PID. + * So that guest tlbe with TID=0 can be accessed at any time */ +static void kvmppc_e500_recalc_shadow_pid(struct kvmppc_vcpu_e500 *vcpu_e500) +{ + preempt_disable(); + vcpu_e500->vcpu.arch.shadow_pid = kvmppc_e500_get_sid(vcpu_e500, + get_cur_as(&vcpu_e500->vcpu), + get_cur_pid(&vcpu_e500->vcpu), + get_cur_pr(&vcpu_e500->vcpu), 1); + vcpu_e500->vcpu.arch.shadow_pid1 = kvmppc_e500_get_sid(vcpu_e500, + get_cur_as(&vcpu_e500->vcpu), 0, + get_cur_pr(&vcpu_e500->vcpu), 1); + preempt_enable(); +} + +/* Invalidate all mappings on vcpu */ +static void kvmppc_e500_id_table_reset_all(struct kvmppc_vcpu_e500 *vcpu_e500) +{ + memset(vcpu_e500->idt, 0, sizeof(struct vcpu_id_table)); + + /* Update shadow pid when mappings are changed */ + kvmppc_e500_recalc_shadow_pid(vcpu_e500); +} + +/* Invalidate one ID mapping on vcpu */ +static inline void kvmppc_e500_id_table_reset_one( + struct kvmppc_vcpu_e500 *vcpu_e500, + int as, int pid, int pr) +{ + struct vcpu_id_table *idt = vcpu_e500->idt; + + BUG_ON(as >= 2); + BUG_ON(pid >= NUM_TIDS); + BUG_ON(pr >= 2); + + idt->id[as][pid][pr].val = 0; + idt->id[as][pid][pr].pentry = NULL; + + /* Update shadow pid when mappings are changed */ + kvmppc_e500_recalc_shadow_pid(vcpu_e500); +} + +/* + * Map guest (vcpu,AS,ID,PR) to physical core shadow id. + * This function first lookup if a valid mapping exists, + * if not, then creates a new one. + * + * The caller must have preemption disabled, and keep it that way until + * it has finished with the returned shadow id (either written into the + * TLB or arch.shadow_pid, or discarded). + */ +unsigned int kvmppc_e500_get_sid(struct kvmppc_vcpu_e500 *vcpu_e500, + unsigned int as, unsigned int gid, + unsigned int pr, int avoid_recursion) +{ + struct vcpu_id_table *idt = vcpu_e500->idt; + int sid; + + BUG_ON(as >= 2); + BUG_ON(gid >= NUM_TIDS); + BUG_ON(pr >= 2); + + sid = local_sid_lookup(&idt->id[as][gid][pr]); + + while (sid <= 0) { + /* No mapping yet */ + sid = local_sid_setup_one(&idt->id[as][gid][pr]); + if (sid <= 0) { + _tlbil_all(); + local_sid_destroy_all(); + } + + /* Update shadow pid when mappings are changed */ + if (!avoid_recursion) + kvmppc_e500_recalc_shadow_pid(vcpu_e500); + } + + return sid; +} + +unsigned int kvmppc_e500_get_tlb_stid(struct kvm_vcpu *vcpu, + struct kvm_book3e_206_tlb_entry *gtlbe) +{ + return kvmppc_e500_get_sid(to_e500(vcpu), get_tlb_ts(gtlbe), + get_tlb_tid(gtlbe), get_cur_pr(vcpu), 0); +} + +void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + + if (vcpu->arch.pid != pid) { + vcpu_e500->pid[0] = vcpu->arch.pid = pid; + kvmppc_e500_recalc_shadow_pid(vcpu_e500); + } +} + +/* gtlbe must not be mapped by more than one host tlbe */ +void kvmppc_e500_tlbil_one(struct kvmppc_vcpu_e500 *vcpu_e500, + struct kvm_book3e_206_tlb_entry *gtlbe) +{ + struct vcpu_id_table *idt = vcpu_e500->idt; + unsigned int pr, tid, ts, pid; + u32 val, eaddr; + unsigned long flags; + + ts = get_tlb_ts(gtlbe); + tid = get_tlb_tid(gtlbe); + + preempt_disable(); + + /* One guest ID may be mapped to two shadow IDs */ + for (pr = 0; pr < 2; pr++) { + /* + * The shadow PID can have a valid mapping on at most one + * host CPU. In the common case, it will be valid on this + * CPU, in which case we do a local invalidation of the + * specific address. + * + * If the shadow PID is not valid on the current host CPU, + * we invalidate the entire shadow PID. + */ + pid = local_sid_lookup(&idt->id[ts][tid][pr]); + if (pid <= 0) { + kvmppc_e500_id_table_reset_one(vcpu_e500, ts, tid, pr); + continue; + } + + /* + * The guest is invalidating a 4K entry which is in a PID + * that has a valid shadow mapping on this host CPU. We + * search host TLB to invalidate it's shadow TLB entry, + * similar to __tlbil_va except that we need to look in AS1. + */ + val = (pid << MAS6_SPID_SHIFT) | MAS6_SAS; + eaddr = get_tlb_eaddr(gtlbe); + + local_irq_save(flags); + + mtspr(SPRN_MAS6, val); + asm volatile("tlbsx 0, %[eaddr]" : : [eaddr] "r" (eaddr)); + val = mfspr(SPRN_MAS1); + if (val & MAS1_VALID) { + mtspr(SPRN_MAS1, val & ~MAS1_VALID); + asm volatile("tlbwe"); + } + + local_irq_restore(flags); + } + + preempt_enable(); +} + +void kvmppc_e500_tlbil_all(struct kvmppc_vcpu_e500 *vcpu_e500) +{ + kvmppc_e500_id_table_reset_all(vcpu_e500); +} + +void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr) +{ + /* Recalc shadow pid since MSR changes */ + kvmppc_e500_recalc_shadow_pid(to_e500(vcpu)); +} + void kvmppc_core_load_host_debugstate(struct kvm_vcpu *vcpu) { } @@ -36,13 +308,13 @@ void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu) void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { kvmppc_booke_vcpu_load(vcpu, cpu); - kvmppc_e500_tlb_load(vcpu, cpu); + + /* Shadow PID may be expired on local core */ + kvmppc_e500_recalc_shadow_pid(to_e500(vcpu)); } void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) { - kvmppc_e500_tlb_put(vcpu); - #ifdef CONFIG_SPE if (vcpu->arch.shadow_msr & MSR_SPE) kvmppc_vcpu_disable_spe(vcpu); @@ -63,6 +335,23 @@ int kvmppc_core_check_processor_compat(void) return r; } +static void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *vcpu_e500) +{ + struct kvm_book3e_206_tlb_entry *tlbe; + + /* Insert large initial mapping for guest. */ + tlbe = get_entry(vcpu_e500, 1, 0); + tlbe->mas1 = MAS1_VALID | MAS1_TSIZE(BOOK3E_PAGESZ_256M); + tlbe->mas2 = 0; + tlbe->mas7_3 = E500_TLB_SUPER_PERM_MASK; + + /* 4K map for serial output. Used by kernel wrapper. */ + tlbe = get_entry(vcpu_e500, 1, 1); + tlbe->mas1 = MAS1_VALID | MAS1_TSIZE(BOOK3E_PAGESZ_4K); + tlbe->mas2 = (0xe0004500 & 0xFFFFF000) | MAS2_I | MAS2_G; + tlbe->mas7_3 = (0xe0004500 & 0xFFFFF000) | E500_TLB_SUPER_PERM_MASK; +} + int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu) { struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); @@ -78,32 +367,6 @@ int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu) return 0; } -/* 'linear_address' is actually an encoding of AS|PID|EADDR . */ -int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu, - struct kvm_translation *tr) -{ - int index; - gva_t eaddr; - u8 pid; - u8 as; - - eaddr = tr->linear_address; - pid = (tr->linear_address >> 32) & 0xff; - as = (tr->linear_address >> 40) & 0x1; - - index = kvmppc_e500_tlb_search(vcpu, eaddr, pid, as); - if (index < 0) { - tr->valid = 0; - return 0; - } - - tr->physical_address = kvmppc_mmu_xlate(vcpu, index, eaddr); - /* XXX what does "writeable" and "usermode" even mean? */ - tr->valid = 1; - - return 0; -} - void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) { struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); @@ -117,19 +380,6 @@ void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) sregs->u.e.impl.fsl.hid0 = vcpu_e500->hid0; sregs->u.e.impl.fsl.mcar = vcpu_e500->mcar; - sregs->u.e.mas0 = vcpu->arch.shared->mas0; - sregs->u.e.mas1 = vcpu->arch.shared->mas1; - sregs->u.e.mas2 = vcpu->arch.shared->mas2; - sregs->u.e.mas7_3 = vcpu->arch.shared->mas7_3; - sregs->u.e.mas4 = vcpu->arch.shared->mas4; - sregs->u.e.mas6 = vcpu->arch.shared->mas6; - - sregs->u.e.mmucfg = mfspr(SPRN_MMUCFG); - sregs->u.e.tlbcfg[0] = vcpu_e500->tlb0cfg; - sregs->u.e.tlbcfg[1] = vcpu_e500->tlb1cfg; - sregs->u.e.tlbcfg[2] = 0; - sregs->u.e.tlbcfg[3] = 0; - sregs->u.e.ivor_high[0] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL]; sregs->u.e.ivor_high[1] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA]; sregs->u.e.ivor_high[2] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND]; @@ -137,11 +387,13 @@ void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR]; kvmppc_get_sregs_ivor(vcpu, sregs); + kvmppc_get_sregs_e500_tlb(vcpu, sregs); } int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) { struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + int ret; if (sregs->u.e.impl_id == KVM_SREGS_E_IMPL_FSL) { vcpu_e500->svr = sregs->u.e.impl.fsl.svr; @@ -149,14 +401,9 @@ int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) vcpu_e500->mcar = sregs->u.e.impl.fsl.mcar; } - if (sregs->u.e.features & KVM_SREGS_E_ARCH206_MMU) { - vcpu->arch.shared->mas0 = sregs->u.e.mas0; - vcpu->arch.shared->mas1 = sregs->u.e.mas1; - vcpu->arch.shared->mas2 = sregs->u.e.mas2; - vcpu->arch.shared->mas7_3 = sregs->u.e.mas7_3; - vcpu->arch.shared->mas4 = sregs->u.e.mas4; - vcpu->arch.shared->mas6 = sregs->u.e.mas6; - } + ret = kvmppc_set_sregs_e500_tlb(vcpu, sregs); + if (ret < 0) + return ret; if (!(sregs->u.e.features & KVM_SREGS_E_IVOR)) return 0; @@ -195,9 +442,12 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) if (err) goto free_vcpu; + if (kvmppc_e500_id_table_alloc(vcpu_e500) == NULL) + goto uninit_vcpu; + err = kvmppc_e500_tlb_init(vcpu_e500); if (err) - goto uninit_vcpu; + goto uninit_id; vcpu->arch.shared = (void*)__get_free_page(GFP_KERNEL|__GFP_ZERO); if (!vcpu->arch.shared) @@ -207,6 +457,8 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) uninit_tlb: kvmppc_e500_tlb_uninit(vcpu_e500); +uninit_id: + kvmppc_e500_id_table_free(vcpu_e500); uninit_vcpu: kvm_vcpu_uninit(vcpu); free_vcpu: @@ -220,8 +472,9 @@ void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); free_page((unsigned long)vcpu->arch.shared); - kvm_vcpu_uninit(vcpu); kvmppc_e500_tlb_uninit(vcpu_e500); + kvmppc_e500_id_table_free(vcpu_e500); + kvm_vcpu_uninit(vcpu); kmem_cache_free(kvm_vcpu_cache, vcpu_e500); } diff --git a/arch/powerpc/kvm/e500.h b/arch/powerpc/kvm/e500.h index a48af00..34cef08 100644 --- a/arch/powerpc/kvm/e500.h +++ b/arch/powerpc/kvm/e500.h @@ -35,7 +35,9 @@ struct tlbe_priv { struct tlbe_ref ref; /* TLB0 only -- TLB1 uses tlb_refs */ }; +#ifdef CONFIG_KVM_E500 struct vcpu_id_table; +#endif struct kvmppc_e500_tlb_params { int entries, ways, sets; @@ -70,23 +72,22 @@ struct kvmppc_vcpu_e500 { struct tlbe_ref *tlb_refs[E500_TLB_NUM]; unsigned int host_tlb1_nv; - u32 host_pid[E500_PID_NUM]; - u32 pid[E500_PID_NUM]; u32 svr; - - /* vcpu id table */ - struct vcpu_id_table *idt; - u32 l1csr0; u32 l1csr1; u32 hid0; u32 hid1; - u32 tlb0cfg; - u32 tlb1cfg; u64 mcar; struct page **shared_tlb_pages; int num_shared_tlb_pages; + +#ifdef CONFIG_KVM_E500 + u32 pid[E500_PID_NUM]; + + /* vcpu id table */ + struct vcpu_id_table *idt; +#endif }; static inline struct kvmppc_vcpu_e500 *to_e500(struct kvm_vcpu *vcpu) @@ -113,23 +114,25 @@ static inline struct kvmppc_vcpu_e500 *to_e500(struct kvm_vcpu *vcpu) (MAS3_U0 | MAS3_U1 | MAS3_U2 | MAS3_U3 \ | E500_TLB_USER_PERM_MASK | E500_TLB_SUPER_PERM_MASK) -extern void kvmppc_e500_tlb_put(struct kvm_vcpu *); -extern void kvmppc_e500_tlb_load(struct kvm_vcpu *, int); -extern void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *); -extern void kvmppc_e500_recalc_shadow_pid(struct kvmppc_vcpu_e500 *); int kvmppc_e500_emul_mt_mmucsr0(struct kvmppc_vcpu_e500 *vcpu_e500, ulong value); int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu); int kvmppc_e500_emul_tlbre(struct kvm_vcpu *vcpu); int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *vcpu, int ra, int rb); int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb); -int kvmppc_e500_tlb_search(struct kvm_vcpu *, gva_t, unsigned int, int); int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500); void kvmppc_e500_tlb_uninit(struct kvmppc_vcpu_e500 *vcpu_e500); void kvmppc_get_sregs_e500_tlb(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); int kvmppc_set_sregs_e500_tlb(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); + +#ifdef CONFIG_KVM_E500 +unsigned int kvmppc_e500_get_sid(struct kvmppc_vcpu_e500 *vcpu_e500, + unsigned int as, unsigned int gid, + unsigned int pr, int avoid_recursion); +#endif + /* TLB helper functions */ static inline unsigned int get_tlb_size(const struct kvm_book3e_206_tlb_entry *tlbe) @@ -183,6 +186,12 @@ get_tlb_iprot(const struct kvm_book3e_206_tlb_entry *tlbe) return (tlbe->mas1 >> 30) & 0x1; } +static inline unsigned int +get_tlb_tsize(const struct kvm_book3e_206_tlb_entry *tlbe) +{ + return (tlbe->mas1 & MAS1_TSIZE_MASK) >> MAS1_TSIZE_SHIFT; +} + static inline unsigned int get_cur_pid(struct kvm_vcpu *vcpu) { return vcpu->arch.pid & 0xff; @@ -248,4 +257,31 @@ static inline int tlbe_is_host_safe(const struct kvm_vcpu *vcpu, return 1; } +static inline struct kvm_book3e_206_tlb_entry *get_entry( + struct kvmppc_vcpu_e500 *vcpu_e500, int tlbsel, int entry) +{ + int offset = vcpu_e500->gtlb_offset[tlbsel]; + return &vcpu_e500->gtlb_arch[offset + entry]; +} + +void kvmppc_e500_tlbil_one(struct kvmppc_vcpu_e500 *vcpu_e500, + struct kvm_book3e_206_tlb_entry *gtlbe); +void kvmppc_e500_tlbil_all(struct kvmppc_vcpu_e500 *vcpu_e500); + +#ifdef CONFIG_KVM_E500 +unsigned int kvmppc_e500_get_tlb_stid(struct kvm_vcpu *vcpu, + struct kvm_book3e_206_tlb_entry *gtlbe); + +static inline unsigned int get_tlbmiss_tid(struct kvm_vcpu *vcpu) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + unsigned int tidseld = (vcpu->arch.shared->mas4 >> 16) & 0xf; + + return vcpu_e500->pid[tidseld]; +} + +/* Force TS=1 for all guest mappings. */ +#define get_tlb_sts(gtlbe) (MAS1_TS) +#endif /* CONFIG_KVM_E500 */ + #endif /* KVM_E500_H */ diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c index 7e2d592..c80794d 100644 --- a/arch/powerpc/kvm/e500_emulate.c +++ b/arch/powerpc/kvm/e500_emulate.c @@ -174,9 +174,9 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) kvmppc_set_gpr(vcpu, rt, val); break; case SPRN_TLB0CFG: - kvmppc_set_gpr(vcpu, rt, vcpu_e500->tlb0cfg); break; + kvmppc_set_gpr(vcpu, rt, vcpu->arch.tlbcfg[0]); break; case SPRN_TLB1CFG: - kvmppc_set_gpr(vcpu, rt, vcpu_e500->tlb1cfg); break; + kvmppc_set_gpr(vcpu, rt, vcpu->arch.tlbcfg[1]); break; case SPRN_L1CSR0: kvmppc_set_gpr(vcpu, rt, vcpu_e500->l1csr0); break; case SPRN_L1CSR1: @@ -192,7 +192,7 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) kvmppc_set_gpr(vcpu, rt, 0); break; case SPRN_MMUCFG: - kvmppc_set_gpr(vcpu, rt, mfspr(SPRN_MMUCFG)); break; + kvmppc_set_gpr(vcpu, rt, vcpu->arch.mmucfg); break; /* extra exceptions */ case SPRN_IVOR32: diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c index 7d4a918..9925fc6 100644 --- a/arch/powerpc/kvm/e500_tlb.c +++ b/arch/powerpc/kvm/e500_tlb.c @@ -27,208 +27,14 @@ #include #include -#include "../mm/mmu_decl.h" #include "e500.h" #include "trace.h" #include "timing.h" #define to_htlb1_esel(esel) (host_tlb_params[1].entries - (esel) - 1) -struct id { - unsigned long val; - struct id **pentry; -}; - -#define NUM_TIDS 256 - -/* - * This table provide mappings from: - * (guestAS,guestTID,guestPR) --> ID of physical cpu - * guestAS [0..1] - * guestTID [0..255] - * guestPR [0..1] - * ID [1..255] - * Each vcpu keeps one vcpu_id_table. - */ -struct vcpu_id_table { - struct id id[2][NUM_TIDS][2]; -}; - -/* - * This table provide reversed mappings of vcpu_id_table: - * ID --> address of vcpu_id_table item. - * Each physical core has one pcpu_id_table. - */ -struct pcpu_id_table { - struct id *entry[NUM_TIDS]; -}; - -static DEFINE_PER_CPU(struct pcpu_id_table, pcpu_sids); - -/* This variable keeps last used shadow ID on local core. - * The valid range of shadow ID is [1..255] */ -static DEFINE_PER_CPU(unsigned long, pcpu_last_used_sid); - static struct kvmppc_e500_tlb_params host_tlb_params[E500_TLB_NUM]; -static struct kvm_book3e_206_tlb_entry *get_entry( - struct kvmppc_vcpu_e500 *vcpu_e500, int tlbsel, int entry) -{ - int offset = vcpu_e500->gtlb_offset[tlbsel]; - return &vcpu_e500->gtlb_arch[offset + entry]; -} - -/* - * Allocate a free shadow id and setup a valid sid mapping in given entry. - * A mapping is only valid when vcpu_id_table and pcpu_id_table are match. - * - * The caller must have preemption disabled, and keep it that way until - * it has finished with the returned shadow id (either written into the - * TLB or arch.shadow_pid, or discarded). - */ -static inline int local_sid_setup_one(struct id *entry) -{ - unsigned long sid; - int ret = -1; - - sid = ++(__get_cpu_var(pcpu_last_used_sid)); - if (sid < NUM_TIDS) { - __get_cpu_var(pcpu_sids).entry[sid] = entry; - entry->val = sid; - entry->pentry = &__get_cpu_var(pcpu_sids).entry[sid]; - ret = sid; - } - - /* - * If sid == NUM_TIDS, we've run out of sids. We return -1, and - * the caller will invalidate everything and start over. - * - * sid > NUM_TIDS indicates a race, which we disable preemption to - * avoid. - */ - WARN_ON(sid > NUM_TIDS); - - return ret; -} - -/* - * Check if given entry contain a valid shadow id mapping. - * An ID mapping is considered valid only if - * both vcpu and pcpu know this mapping. - * - * The caller must have preemption disabled, and keep it that way until - * it has finished with the returned shadow id (either written into the - * TLB or arch.shadow_pid, or discarded). - */ -static inline int local_sid_lookup(struct id *entry) -{ - if (entry && entry->val != 0 && - __get_cpu_var(pcpu_sids).entry[entry->val] == entry && - entry->pentry == &__get_cpu_var(pcpu_sids).entry[entry->val]) - return entry->val; - return -1; -} - -/* Invalidate all id mappings on local core -- call with preempt disabled */ -static inline void local_sid_destroy_all(void) -{ - __get_cpu_var(pcpu_last_used_sid) = 0; - memset(&__get_cpu_var(pcpu_sids), 0, sizeof(__get_cpu_var(pcpu_sids))); -} - -static void *kvmppc_e500_id_table_alloc(struct kvmppc_vcpu_e500 *vcpu_e500) -{ - vcpu_e500->idt = kzalloc(sizeof(struct vcpu_id_table), GFP_KERNEL); - return vcpu_e500->idt; -} - -static void kvmppc_e500_id_table_free(struct kvmppc_vcpu_e500 *vcpu_e500) -{ - kfree(vcpu_e500->idt); -} - -/* Invalidate all mappings on vcpu */ -static void kvmppc_e500_id_table_reset_all(struct kvmppc_vcpu_e500 *vcpu_e500) -{ - memset(vcpu_e500->idt, 0, sizeof(struct vcpu_id_table)); - - /* Update shadow pid when mappings are changed */ - kvmppc_e500_recalc_shadow_pid(vcpu_e500); -} - -/* Invalidate one ID mapping on vcpu */ -static inline void kvmppc_e500_id_table_reset_one( - struct kvmppc_vcpu_e500 *vcpu_e500, - int as, int pid, int pr) -{ - struct vcpu_id_table *idt = vcpu_e500->idt; - - BUG_ON(as >= 2); - BUG_ON(pid >= NUM_TIDS); - BUG_ON(pr >= 2); - - idt->id[as][pid][pr].val = 0; - idt->id[as][pid][pr].pentry = NULL; - - /* Update shadow pid when mappings are changed */ - kvmppc_e500_recalc_shadow_pid(vcpu_e500); -} - -/* - * Map guest (vcpu,AS,ID,PR) to physical core shadow id. - * This function first lookup if a valid mapping exists, - * if not, then creates a new one. - * - * The caller must have preemption disabled, and keep it that way until - * it has finished with the returned shadow id (either written into the - * TLB or arch.shadow_pid, or discarded). - */ -static unsigned int kvmppc_e500_get_sid(struct kvmppc_vcpu_e500 *vcpu_e500, - unsigned int as, unsigned int gid, - unsigned int pr, int avoid_recursion) -{ - struct vcpu_id_table *idt = vcpu_e500->idt; - int sid; - - BUG_ON(as >= 2); - BUG_ON(gid >= NUM_TIDS); - BUG_ON(pr >= 2); - - sid = local_sid_lookup(&idt->id[as][gid][pr]); - - while (sid <= 0) { - /* No mapping yet */ - sid = local_sid_setup_one(&idt->id[as][gid][pr]); - if (sid <= 0) { - _tlbil_all(); - local_sid_destroy_all(); - } - - /* Update shadow pid when mappings are changed */ - if (!avoid_recursion) - kvmppc_e500_recalc_shadow_pid(vcpu_e500); - } - - return sid; -} - -/* Map guest pid to shadow. - * We use PID to keep shadow of current guest non-zero PID, - * and use PID1 to keep shadow of guest zero PID. - * So that guest tlbe with TID=0 can be accessed at any time */ -void kvmppc_e500_recalc_shadow_pid(struct kvmppc_vcpu_e500 *vcpu_e500) -{ - preempt_disable(); - vcpu_e500->vcpu.arch.shadow_pid = kvmppc_e500_get_sid(vcpu_e500, - get_cur_as(&vcpu_e500->vcpu), - get_cur_pid(&vcpu_e500->vcpu), - get_cur_pr(&vcpu_e500->vcpu), 1); - vcpu_e500->vcpu.arch.shadow_pid1 = kvmppc_e500_get_sid(vcpu_e500, - get_cur_as(&vcpu_e500->vcpu), 0, - get_cur_pr(&vcpu_e500->vcpu), 1); - preempt_enable(); -} - static inline unsigned int gtlb0_get_next_victim( struct kvmppc_vcpu_e500 *vcpu_e500) { @@ -336,6 +142,7 @@ static inline void write_host_tlbe(struct kvmppc_vcpu_e500 *vcpu_e500, } } +#ifdef CONFIG_KVM_E500 void kvmppc_map_magic(struct kvm_vcpu *vcpu) { struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); @@ -360,75 +167,21 @@ void kvmppc_map_magic(struct kvm_vcpu *vcpu) __write_host_tlbe(&magic, MAS0_TLBSEL(1) | MAS0_ESEL(tlbcam_index)); preempt_enable(); } - -void kvmppc_e500_tlb_load(struct kvm_vcpu *vcpu, int cpu) -{ - struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); - - /* Shadow PID may be expired on local core */ - kvmppc_e500_recalc_shadow_pid(vcpu_e500); -} - -void kvmppc_e500_tlb_put(struct kvm_vcpu *vcpu) -{ -} +#endif static void inval_gtlbe_on_host(struct kvmppc_vcpu_e500 *vcpu_e500, int tlbsel, int esel) { struct kvm_book3e_206_tlb_entry *gtlbe = get_entry(vcpu_e500, tlbsel, esel); - struct vcpu_id_table *idt = vcpu_e500->idt; - unsigned int pr, tid, ts, pid; - u32 val, eaddr; - unsigned long flags; - - ts = get_tlb_ts(gtlbe); - tid = get_tlb_tid(gtlbe); - - preempt_disable(); - - /* One guest ID may be mapped to two shadow IDs */ - for (pr = 0; pr < 2; pr++) { - /* - * The shadow PID can have a valid mapping on at most one - * host CPU. In the common case, it will be valid on this - * CPU, in which case (for TLB0) we do a local invalidation - * of the specific address. - * - * If the shadow PID is not valid on the current host CPU, or - * if we're invalidating a TLB1 entry, we invalidate the - * entire shadow PID. - */ - if (tlbsel == 1 || - (pid = local_sid_lookup(&idt->id[ts][tid][pr])) <= 0) { - kvmppc_e500_id_table_reset_one(vcpu_e500, ts, tid, pr); - continue; - } - - /* - * The guest is invalidating a TLB0 entry which is in a PID - * that has a valid shadow mapping on this host CPU. We - * search host TLB0 to invalidate it's shadow TLB entry, - * similar to __tlbil_va except that we need to look in AS1. - */ - val = (pid << MAS6_SPID_SHIFT) | MAS6_SAS; - eaddr = get_tlb_eaddr(gtlbe); - - local_irq_save(flags); - - mtspr(SPRN_MAS6, val); - asm volatile("tlbsx 0, %[eaddr]" : : [eaddr] "r" (eaddr)); - val = mfspr(SPRN_MAS1); - if (val & MAS1_VALID) { - mtspr(SPRN_MAS1, val & ~MAS1_VALID); - asm volatile("tlbwe"); - } - local_irq_restore(flags); + if (tlbsel == 1) { + kvmppc_e500_tlbil_all(vcpu_e500); + return; } - preempt_enable(); + /* Guest tlbe is backed by at most one host tlbe per shadow pid. */ + kvmppc_e500_tlbil_one(vcpu_e500, gtlbe); } static int tlb0_set_base(gva_t addr, int sets, int ways) @@ -546,7 +299,7 @@ static void clear_tlb_refs(struct kvmppc_vcpu_e500 *vcpu_e500) int stlbsel = 1; int i; - kvmppc_e500_id_table_reset_all(vcpu_e500); + kvmppc_e500_tlbil_all(vcpu_e500); for (i = 0; i < host_tlb_params[stlbsel].entries; i++) { struct tlbe_ref *ref = @@ -561,19 +314,18 @@ static inline void kvmppc_e500_deliver_tlb_miss(struct kvm_vcpu *vcpu, unsigned int eaddr, int as) { struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); - unsigned int victim, pidsel, tsized; + unsigned int victim, tsized; int tlbsel; /* since we only have two TLBs, only lower bit is used. */ tlbsel = (vcpu->arch.shared->mas4 >> 28) & 0x1; victim = (tlbsel == 0) ? gtlb0_get_next_victim(vcpu_e500) : 0; - pidsel = (vcpu->arch.shared->mas4 >> 16) & 0xf; tsized = (vcpu->arch.shared->mas4 >> 7) & 0x1f; vcpu->arch.shared->mas0 = MAS0_TLBSEL(tlbsel) | MAS0_ESEL(victim) | MAS0_NV(vcpu_e500->gtlb_nv[tlbsel]); vcpu->arch.shared->mas1 = MAS1_VALID | (as ? MAS1_TS : 0) - | MAS1_TID(vcpu_e500->pid[pidsel]) + | MAS1_TID(get_tlbmiss_tid(vcpu)) | MAS1_TSIZE(tsized); vcpu->arch.shared->mas2 = (eaddr & MAS2_EPN) | (vcpu->arch.shared->mas4 & MAS2_ATTRIB_MASK); @@ -585,23 +337,22 @@ static inline void kvmppc_e500_deliver_tlb_miss(struct kvm_vcpu *vcpu, /* TID must be supplied by the caller */ static inline void kvmppc_e500_setup_stlbe( - struct kvmppc_vcpu_e500 *vcpu_e500, + struct kvm_vcpu *vcpu, struct kvm_book3e_206_tlb_entry *gtlbe, int tsize, struct tlbe_ref *ref, u64 gvaddr, struct kvm_book3e_206_tlb_entry *stlbe) { pfn_t pfn = ref->pfn; + u32 pr = vcpu->arch.shared->msr & MSR_PR; BUG_ON(!(ref->flags & E500_TLB_VALID)); - /* Force TS=1 IPROT=0 for all guest mappings. */ - stlbe->mas1 = MAS1_TSIZE(tsize) | MAS1_TS | MAS1_VALID; - stlbe->mas2 = (gvaddr & MAS2_EPN) - | e500_shadow_mas2_attrib(gtlbe->mas2, - vcpu_e500->vcpu.arch.shared->msr & MSR_PR); - stlbe->mas7_3 = ((u64)pfn << PAGE_SHIFT) - | e500_shadow_mas3_attrib(gtlbe->mas7_3, - vcpu_e500->vcpu.arch.shared->msr & MSR_PR); + /* Force IPROT=0 for all guest mappings. */ + stlbe->mas1 = MAS1_TSIZE(tsize) | get_tlb_sts(gtlbe) | MAS1_VALID; + stlbe->mas2 = (gvaddr & MAS2_EPN) | + e500_shadow_mas2_attrib(gtlbe->mas2, pr); + stlbe->mas7_3 = ((u64)pfn << PAGE_SHIFT) | + e500_shadow_mas3_attrib(gtlbe->mas7_3, pr); } static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, @@ -735,7 +486,8 @@ static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, kvmppc_e500_ref_release(ref); kvmppc_e500_ref_setup(ref, gtlbe, pfn); - kvmppc_e500_setup_stlbe(vcpu_e500, gtlbe, tsize, ref, gvaddr, stlbe); + kvmppc_e500_setup_stlbe(&vcpu_e500->vcpu, gtlbe, tsize, + ref, gvaddr, stlbe); } /* XXX only map the one-one case, for now use TLB0 */ @@ -775,14 +527,6 @@ static int kvmppc_e500_tlb1_map(struct kvmppc_vcpu_e500 *vcpu_e500, return victim; } -void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr) -{ - struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); - - /* Recalc shadow pid since MSR changes */ - kvmppc_e500_recalc_shadow_pid(vcpu_e500); -} - static inline int kvmppc_e500_gtlbe_invalidate( struct kvmppc_vcpu_e500 *vcpu_e500, int tlbsel, int esel) @@ -810,7 +554,7 @@ int kvmppc_e500_emul_mt_mmucsr0(struct kvmppc_vcpu_e500 *vcpu_e500, ulong value) kvmppc_e500_gtlbe_invalidate(vcpu_e500, 1, esel); /* Invalidate all vcpu id mappings */ - kvmppc_e500_id_table_reset_all(vcpu_e500); + kvmppc_e500_tlbil_all(vcpu_e500); return EMULATE_DONE; } @@ -843,7 +587,7 @@ int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *vcpu, int ra, int rb) } /* Invalidate all vcpu id mappings */ - kvmppc_e500_id_table_reset_all(vcpu_e500); + kvmppc_e500_tlbil_all(vcpu_e500); return EMULATE_DONE; } @@ -928,9 +672,7 @@ static void write_stlbe(struct kvmppc_vcpu_e500 *vcpu_e500, int stid; preempt_disable(); - stid = kvmppc_e500_get_sid(vcpu_e500, get_tlb_ts(gtlbe), - get_tlb_tid(gtlbe), - get_cur_pr(&vcpu_e500->vcpu), 0); + stid = kvmppc_e500_get_tlb_stid(&vcpu_e500->vcpu, gtlbe); stlbe->mas1 |= MAS1_TID(stid); write_host_tlbe(vcpu_e500, stlbsel, sesel, stlbe); @@ -940,8 +682,8 @@ static void write_stlbe(struct kvmppc_vcpu_e500 *vcpu_e500, int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu) { struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); - struct kvm_book3e_206_tlb_entry *gtlbe; - int tlbsel, esel; + struct kvm_book3e_206_tlb_entry *gtlbe, stlbe; + int tlbsel, esel, stlbsel, sesel; tlbsel = get_tlb_tlbsel(vcpu); esel = get_tlb_esel(vcpu, tlbsel); @@ -960,8 +702,6 @@ int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu) /* Invalidate shadow mappings for the about-to-be-clobbered TLBE. */ if (tlbe_is_host_safe(vcpu, gtlbe)) { - struct kvm_book3e_206_tlb_entry stlbe; - int stlbsel, sesel; u64 eaddr; u64 raddr; @@ -988,7 +728,7 @@ int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu) * are mapped on the fly. */ stlbsel = 1; sesel = kvmppc_e500_tlb1_map(vcpu_e500, eaddr, - raddr >> PAGE_SHIFT, gtlbe, &stlbe); + raddr >> PAGE_SHIFT, gtlbe, &stlbe); break; default: @@ -1002,6 +742,48 @@ int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu) return EMULATE_DONE; } +static int kvmppc_e500_tlb_search(struct kvm_vcpu *vcpu, + gva_t eaddr, unsigned int pid, int as) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + int esel, tlbsel; + + for (tlbsel = 0; tlbsel < 2; tlbsel++) { + esel = kvmppc_e500_tlb_index(vcpu_e500, eaddr, tlbsel, pid, as); + if (esel >= 0) + return index_of(tlbsel, esel); + } + + return -1; +} + +/* 'linear_address' is actually an encoding of AS|PID|EADDR . */ +int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu, + struct kvm_translation *tr) +{ + int index; + gva_t eaddr; + u8 pid; + u8 as; + + eaddr = tr->linear_address; + pid = (tr->linear_address >> 32) & 0xff; + as = (tr->linear_address >> 40) & 0x1; + + index = kvmppc_e500_tlb_search(vcpu, eaddr, pid, as); + if (index < 0) { + tr->valid = 0; + return 0; + } + + tr->physical_address = kvmppc_mmu_xlate(vcpu, index, eaddr); + /* XXX what does "writeable" and "usermode" even mean? */ + tr->valid = 1; + + return 0; +} + + int kvmppc_mmu_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr) { unsigned int as = !!(vcpu->arch.shared->msr & MSR_IS); @@ -1065,7 +847,7 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr, sesel = 0; /* unused */ priv = &vcpu_e500->gtlb_priv[tlbsel][esel]; - kvmppc_e500_setup_stlbe(vcpu_e500, gtlbe, BOOK3E_PAGESZ_4K, + kvmppc_e500_setup_stlbe(vcpu, gtlbe, BOOK3E_PAGESZ_4K, &priv->ref, eaddr, &stlbe); break; @@ -1086,48 +868,6 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr, write_stlbe(vcpu_e500, gtlbe, &stlbe, stlbsel, sesel); } -int kvmppc_e500_tlb_search(struct kvm_vcpu *vcpu, - gva_t eaddr, unsigned int pid, int as) -{ - struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); - int esel, tlbsel; - - for (tlbsel = 0; tlbsel < 2; tlbsel++) { - esel = kvmppc_e500_tlb_index(vcpu_e500, eaddr, tlbsel, pid, as); - if (esel >= 0) - return index_of(tlbsel, esel); - } - - return -1; -} - -void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid) -{ - struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); - - if (vcpu->arch.pid != pid) { - vcpu_e500->pid[0] = vcpu->arch.pid = pid; - kvmppc_e500_recalc_shadow_pid(vcpu_e500); - } -} - -void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *vcpu_e500) -{ - struct kvm_book3e_206_tlb_entry *tlbe; - - /* Insert large initial mapping for guest. */ - tlbe = get_entry(vcpu_e500, 1, 0); - tlbe->mas1 = MAS1_VALID | MAS1_TSIZE(BOOK3E_PAGESZ_256M); - tlbe->mas2 = 0; - tlbe->mas7_3 = E500_TLB_SUPER_PERM_MASK; - - /* 4K map for serial output. Used by kernel wrapper. */ - tlbe = get_entry(vcpu_e500, 1, 1); - tlbe->mas1 = MAS1_VALID | MAS1_TSIZE(BOOK3E_PAGESZ_4K); - tlbe->mas2 = (0xe0004500 & 0xFFFFF000) | MAS2_I | MAS2_G; - tlbe->mas7_3 = (0xe0004500 & 0xFFFFF000) | E500_TLB_SUPER_PERM_MASK; -} - static void free_gtlb(struct kvmppc_vcpu_e500 *vcpu_e500) { int i; @@ -1154,6 +894,36 @@ static void free_gtlb(struct kvmppc_vcpu_e500 *vcpu_e500) vcpu_e500->gtlb_arch = NULL; } +void kvmppc_get_sregs_e500_tlb(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) +{ + sregs->u.e.mas0 = vcpu->arch.shared->mas0; + sregs->u.e.mas1 = vcpu->arch.shared->mas1; + sregs->u.e.mas2 = vcpu->arch.shared->mas2; + sregs->u.e.mas7_3 = vcpu->arch.shared->mas7_3; + sregs->u.e.mas4 = vcpu->arch.shared->mas4; + sregs->u.e.mas6 = vcpu->arch.shared->mas6; + + sregs->u.e.mmucfg = vcpu->arch.mmucfg; + sregs->u.e.tlbcfg[0] = vcpu->arch.tlbcfg[0]; + sregs->u.e.tlbcfg[1] = vcpu->arch.tlbcfg[1]; + sregs->u.e.tlbcfg[2] = 0; + sregs->u.e.tlbcfg[3] = 0; +} + +int kvmppc_set_sregs_e500_tlb(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) +{ + if (sregs->u.e.features & KVM_SREGS_E_ARCH206_MMU) { + vcpu->arch.shared->mas0 = sregs->u.e.mas0; + vcpu->arch.shared->mas1 = sregs->u.e.mas1; + vcpu->arch.shared->mas2 = sregs->u.e.mas2; + vcpu->arch.shared->mas7_3 = sregs->u.e.mas7_3; + vcpu->arch.shared->mas4 = sregs->u.e.mas4; + vcpu->arch.shared->mas6 = sregs->u.e.mas6; + } + + return 0; +} + int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu, struct kvm_config_tlb *cfg) { @@ -1237,14 +1007,16 @@ int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu, vcpu_e500->gtlb_offset[0] = 0; vcpu_e500->gtlb_offset[1] = params.tlb_sizes[0]; - vcpu_e500->tlb0cfg &= ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC); + vcpu->arch.mmucfg = mfspr(SPRN_MMUCFG) & ~MMUCFG_LPIDSIZE; + + vcpu->arch.tlbcfg[0] &= ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC); if (params.tlb_sizes[0] <= 2048) - vcpu_e500->tlb0cfg |= params.tlb_sizes[0]; - vcpu_e500->tlb0cfg |= params.tlb_ways[0] << TLBnCFG_ASSOC_SHIFT; + vcpu->arch.tlbcfg[0] |= params.tlb_sizes[0]; + vcpu->arch.tlbcfg[0] |= params.tlb_ways[0] << TLBnCFG_ASSOC_SHIFT; - vcpu_e500->tlb1cfg &= ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC); - vcpu_e500->tlb1cfg |= params.tlb_sizes[1]; - vcpu_e500->tlb1cfg |= params.tlb_ways[1] << TLBnCFG_ASSOC_SHIFT; + vcpu->arch.tlbcfg[1] &= ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC); + vcpu->arch.tlbcfg[1] |= params.tlb_sizes[1]; + vcpu->arch.tlbcfg[1] |= params.tlb_ways[1] << TLBnCFG_ASSOC_SHIFT; vcpu_e500->shared_tlb_pages = pages; vcpu_e500->num_shared_tlb_pages = num_pages; @@ -1280,6 +1052,7 @@ int kvm_vcpu_ioctl_dirty_tlb(struct kvm_vcpu *vcpu, int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500) { + struct kvm_vcpu *vcpu = &vcpu_e500->vcpu; int entry_size = sizeof(struct kvm_book3e_206_tlb_entry); int entries = KVM_E500_TLB0_SIZE + KVM_E500_TLB1_SIZE; @@ -1356,20 +1129,17 @@ int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500) if (!vcpu_e500->gtlb_priv[1]) goto err; - if (kvmppc_e500_id_table_alloc(vcpu_e500) == NULL) - goto err; - /* Init TLB configuration register */ - vcpu_e500->tlb0cfg = mfspr(SPRN_TLB0CFG) & + vcpu->arch.tlbcfg[0] = mfspr(SPRN_TLB0CFG) & ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC); - vcpu_e500->tlb0cfg |= vcpu_e500->gtlb_params[0].entries; - vcpu_e500->tlb0cfg |= + vcpu->arch.tlbcfg[0] |= vcpu_e500->gtlb_params[0].entries; + vcpu->arch.tlbcfg[0] |= vcpu_e500->gtlb_params[0].ways << TLBnCFG_ASSOC_SHIFT; - vcpu_e500->tlb1cfg = mfspr(SPRN_TLB1CFG) & + vcpu->arch.tlbcfg[1] = mfspr(SPRN_TLB1CFG) & ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC); - vcpu_e500->tlb0cfg |= vcpu_e500->gtlb_params[1].entries; - vcpu_e500->tlb0cfg |= + vcpu->arch.tlbcfg[0] |= vcpu_e500->gtlb_params[1].entries; + vcpu->arch.tlbcfg[0] |= vcpu_e500->gtlb_params[1].ways << TLBnCFG_ASSOC_SHIFT; return 0; @@ -1384,8 +1154,6 @@ err: void kvmppc_e500_tlb_uninit(struct kvmppc_vcpu_e500 *vcpu_e500) { free_gtlb(vcpu_e500); - kvmppc_e500_id_table_free(vcpu_e500); - kfree(vcpu_e500->tlb_refs[0]); kfree(vcpu_e500->tlb_refs[1]); } -- cgit v0.10.2 From 4f802fe98bd5bd4fe1dd86df3e5c58546e65ad09 Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Tue, 20 Dec 2011 15:34:37 +0000 Subject: KVM: PPC: e500: Track TLB1 entries with a bitmap Rather than invalidate everything when a TLB1 entry needs to be taken down, keep track of which host TLB1 entries are used for a given guest TLB1 entry, and invalidate just those entries. Based on code from Ashish Kalra and Liu Yu . Signed-off-by: Scott Wood Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity diff --git a/arch/powerpc/kvm/e500.h b/arch/powerpc/kvm/e500.h index 34cef08..f4dee55 100644 --- a/arch/powerpc/kvm/e500.h +++ b/arch/powerpc/kvm/e500.h @@ -2,6 +2,7 @@ * Copyright (C) 2008-2011 Freescale Semiconductor, Inc. All rights reserved. * * Author: Yu Liu + * Ashish Kalra * * Description: * This file is based on arch/powerpc/kvm/44x_tlb.h and @@ -25,6 +26,7 @@ #define E500_TLB_VALID 1 #define E500_TLB_DIRTY 2 +#define E500_TLB_BITMAP 4 struct tlbe_ref { pfn_t pfn; @@ -82,6 +84,9 @@ struct kvmppc_vcpu_e500 { struct page **shared_tlb_pages; int num_shared_tlb_pages; + u64 *g2h_tlb1_map; + unsigned int *h2g_tlb1_rmap; + #ifdef CONFIG_KVM_E500 u32 pid[E500_PID_NUM]; diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c index 9925fc6..c8ce51d 100644 --- a/arch/powerpc/kvm/e500_tlb.c +++ b/arch/powerpc/kvm/e500_tlb.c @@ -2,6 +2,7 @@ * Copyright (C) 2008-2011 Freescale Semiconductor, Inc. All rights reserved. * * Author: Yu Liu, yu.liu@freescale.com + * Ashish Kalra, ashish.kalra@freescale.com * * Description: * This file is based on arch/powerpc/kvm/44x_tlb.c, @@ -175,8 +176,28 @@ static void inval_gtlbe_on_host(struct kvmppc_vcpu_e500 *vcpu_e500, struct kvm_book3e_206_tlb_entry *gtlbe = get_entry(vcpu_e500, tlbsel, esel); - if (tlbsel == 1) { - kvmppc_e500_tlbil_all(vcpu_e500); + if (tlbsel == 1 && + vcpu_e500->gtlb_priv[1][esel].ref.flags & E500_TLB_BITMAP) { + u64 tmp = vcpu_e500->g2h_tlb1_map[esel]; + int hw_tlb_indx; + unsigned long flags; + + local_irq_save(flags); + while (tmp) { + hw_tlb_indx = __ilog2_u64(tmp & -tmp); + mtspr(SPRN_MAS0, + MAS0_TLBSEL(1) | + MAS0_ESEL(to_htlb1_esel(hw_tlb_indx))); + mtspr(SPRN_MAS1, 0); + asm volatile("tlbwe"); + vcpu_e500->h2g_tlb1_rmap[hw_tlb_indx] = 0; + tmp &= tmp - 1; + } + mb(); + vcpu_e500->g2h_tlb1_map[esel] = 0; + vcpu_e500->gtlb_priv[1][esel].ref.flags &= ~E500_TLB_BITMAP; + local_irq_restore(flags); + return; } @@ -282,6 +303,16 @@ static inline void kvmppc_e500_ref_release(struct tlbe_ref *ref) } } +static void clear_tlb1_bitmap(struct kvmppc_vcpu_e500 *vcpu_e500) +{ + if (vcpu_e500->g2h_tlb1_map) + memset(vcpu_e500->g2h_tlb1_map, + sizeof(u64) * vcpu_e500->gtlb_params[1].entries, 0); + if (vcpu_e500->h2g_tlb1_rmap) + memset(vcpu_e500->h2g_tlb1_rmap, + sizeof(unsigned int) * host_tlb_params[1].entries, 0); +} + static void clear_tlb_privs(struct kvmppc_vcpu_e500 *vcpu_e500) { int tlbsel = 0; @@ -511,7 +542,7 @@ static void kvmppc_e500_tlb0_map(struct kvmppc_vcpu_e500 *vcpu_e500, /* XXX for both one-one and one-to-many , for now use TLB1 */ static int kvmppc_e500_tlb1_map(struct kvmppc_vcpu_e500 *vcpu_e500, u64 gvaddr, gfn_t gfn, struct kvm_book3e_206_tlb_entry *gtlbe, - struct kvm_book3e_206_tlb_entry *stlbe) + struct kvm_book3e_206_tlb_entry *stlbe, int esel) { struct tlbe_ref *ref; unsigned int victim; @@ -524,6 +555,14 @@ static int kvmppc_e500_tlb1_map(struct kvmppc_vcpu_e500 *vcpu_e500, ref = &vcpu_e500->tlb_refs[1][victim]; kvmppc_e500_shadow_map(vcpu_e500, gvaddr, gfn, gtlbe, 1, stlbe, ref); + vcpu_e500->g2h_tlb1_map[esel] |= (u64)1 << victim; + vcpu_e500->gtlb_priv[1][esel].ref.flags |= E500_TLB_BITMAP; + if (vcpu_e500->h2g_tlb1_rmap[victim]) { + unsigned int idx = vcpu_e500->h2g_tlb1_rmap[victim]; + vcpu_e500->g2h_tlb1_map[idx] &= ~(1ULL << victim); + } + vcpu_e500->h2g_tlb1_rmap[victim] = esel; + return victim; } @@ -728,7 +767,7 @@ int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu) * are mapped on the fly. */ stlbsel = 1; sesel = kvmppc_e500_tlb1_map(vcpu_e500, eaddr, - raddr >> PAGE_SHIFT, gtlbe, &stlbe); + raddr >> PAGE_SHIFT, gtlbe, &stlbe, esel); break; default: @@ -856,7 +895,7 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr, stlbsel = 1; sesel = kvmppc_e500_tlb1_map(vcpu_e500, eaddr, gfn, - gtlbe, &stlbe); + gtlbe, &stlbe, esel); break; } @@ -872,6 +911,9 @@ static void free_gtlb(struct kvmppc_vcpu_e500 *vcpu_e500) { int i; + clear_tlb1_bitmap(vcpu_e500); + kfree(vcpu_e500->g2h_tlb1_map); + clear_tlb_refs(vcpu_e500); kfree(vcpu_e500->gtlb_priv[0]); kfree(vcpu_e500->gtlb_priv[1]); @@ -932,6 +974,7 @@ int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu, char *virt; struct page **pages; struct tlbe_priv *privs[2] = {}; + u64 *g2h_bitmap = NULL; size_t array_len; u32 sets; int num_pages, ret, i; @@ -993,10 +1036,16 @@ int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu, if (!privs[0] || !privs[1]) goto err_put_page; + g2h_bitmap = kzalloc(sizeof(u64) * params.tlb_sizes[1], + GFP_KERNEL); + if (!g2h_bitmap) + goto err_put_page; + free_gtlb(vcpu_e500); vcpu_e500->gtlb_priv[0] = privs[0]; vcpu_e500->gtlb_priv[1] = privs[1]; + vcpu_e500->g2h_tlb1_map = g2h_bitmap; vcpu_e500->gtlb_arch = (struct kvm_book3e_206_tlb_entry *) (virt + (cfg->array & (PAGE_SIZE - 1))); @@ -1129,6 +1178,18 @@ int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500) if (!vcpu_e500->gtlb_priv[1]) goto err; + vcpu_e500->g2h_tlb1_map = kzalloc(sizeof(unsigned int) * + vcpu_e500->gtlb_params[1].entries, + GFP_KERNEL); + if (!vcpu_e500->g2h_tlb1_map) + goto err; + + vcpu_e500->h2g_tlb1_rmap = kzalloc(sizeof(unsigned int) * + host_tlb_params[1].entries, + GFP_KERNEL); + if (!vcpu_e500->h2g_tlb1_rmap) + goto err; + /* Init TLB configuration register */ vcpu->arch.tlbcfg[0] = mfspr(SPRN_TLB0CFG) & ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC); @@ -1154,6 +1215,7 @@ err: void kvmppc_e500_tlb_uninit(struct kvmppc_vcpu_e500 *vcpu_e500) { free_gtlb(vcpu_e500); + kfree(vcpu_e500->h2g_tlb1_rmap); kfree(vcpu_e500->tlb_refs[0]); kfree(vcpu_e500->tlb_refs[1]); } -- cgit v0.10.2 From ab9fc4056af338248640ddb18497be386360363d Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Tue, 20 Dec 2011 15:34:39 +0000 Subject: KVM: PPC: e500: emulate tlbilx tlbilx is the new, preferred invalidation instruction. It is not found on e500 prior to e500mc, but there should be no harm in supporting it on all e500. Based on code from Ashish Kalra . Signed-off-by: Scott Wood Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity diff --git a/arch/powerpc/kvm/e500.h b/arch/powerpc/kvm/e500.h index f4dee55..ce3f163 100644 --- a/arch/powerpc/kvm/e500.h +++ b/arch/powerpc/kvm/e500.h @@ -124,6 +124,7 @@ int kvmppc_e500_emul_mt_mmucsr0(struct kvmppc_vcpu_e500 *vcpu_e500, int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu); int kvmppc_e500_emul_tlbre(struct kvm_vcpu *vcpu); int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *vcpu, int ra, int rb); +int kvmppc_e500_emul_tlbilx(struct kvm_vcpu *vcpu, int rt, int ra, int rb); int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb); int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500); void kvmppc_e500_tlb_uninit(struct kvmppc_vcpu_e500 *vcpu_e500); diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c index c80794d..af02c18 100644 --- a/arch/powerpc/kvm/e500_emulate.c +++ b/arch/powerpc/kvm/e500_emulate.c @@ -22,6 +22,7 @@ #define XOP_TLBSX 914 #define XOP_TLBRE 946 #define XOP_TLBWE 978 +#define XOP_TLBILX 18 int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, unsigned int inst, int *advance) @@ -29,6 +30,7 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, int emulated = EMULATE_DONE; int ra; int rb; + int rt; switch (get_op(inst)) { case 31: @@ -47,6 +49,13 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, emulated = kvmppc_e500_emul_tlbsx(vcpu,rb); break; + case XOP_TLBILX: + ra = get_ra(inst); + rb = get_rb(inst); + rt = get_rt(inst); + emulated = kvmppc_e500_emul_tlbilx(vcpu, rt, ra, rb); + break; + case XOP_TLBIVAX: ra = get_ra(inst); rb = get_rb(inst); diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c index c8ce51d..6eb5d65 100644 --- a/arch/powerpc/kvm/e500_tlb.c +++ b/arch/powerpc/kvm/e500_tlb.c @@ -631,6 +631,58 @@ int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *vcpu, int ra, int rb) return EMULATE_DONE; } +static void tlbilx_all(struct kvmppc_vcpu_e500 *vcpu_e500, int tlbsel, + int pid, int rt) +{ + struct kvm_book3e_206_tlb_entry *tlbe; + int tid, esel; + + /* invalidate all entries */ + for (esel = 0; esel < vcpu_e500->gtlb_params[tlbsel].entries; esel++) { + tlbe = get_entry(vcpu_e500, tlbsel, esel); + tid = get_tlb_tid(tlbe); + if (rt == 0 || tid == pid) { + inval_gtlbe_on_host(vcpu_e500, tlbsel, esel); + kvmppc_e500_gtlbe_invalidate(vcpu_e500, tlbsel, esel); + } + } +} + +static void tlbilx_one(struct kvmppc_vcpu_e500 *vcpu_e500, int pid, + int ra, int rb) +{ + int tlbsel, esel; + gva_t ea; + + ea = kvmppc_get_gpr(&vcpu_e500->vcpu, rb); + if (ra) + ea += kvmppc_get_gpr(&vcpu_e500->vcpu, ra); + + for (tlbsel = 0; tlbsel < 2; tlbsel++) { + esel = kvmppc_e500_tlb_index(vcpu_e500, ea, tlbsel, pid, -1); + if (esel >= 0) { + inval_gtlbe_on_host(vcpu_e500, tlbsel, esel); + kvmppc_e500_gtlbe_invalidate(vcpu_e500, tlbsel, esel); + break; + } + } +} + +int kvmppc_e500_emul_tlbilx(struct kvm_vcpu *vcpu, int rt, int ra, int rb) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + int pid = get_cur_spid(vcpu); + + if (rt == 0 || rt == 1) { + tlbilx_all(vcpu_e500, 0, pid, rt); + tlbilx_all(vcpu_e500, 1, pid, rt); + } else if (rt == 3) { + tlbilx_one(vcpu_e500, pid, ra, rb); + } + + return EMULATE_DONE; +} + int kvmppc_e500_emul_tlbre(struct kvm_vcpu *vcpu) { struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); -- cgit v0.10.2 From cfac57847a67c4903f34a77e971521531bbc7c77 Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Tue, 20 Dec 2011 15:34:40 +0000 Subject: powerpc/booke: Provide exception macros with interrupt name DO_KVM will need to identify the particular exception type. There is an existing set of arbitrary numbers that Linux passes, but it's an undocumented mess that sort of corresponds to server/classic exception vectors but not really. Signed-off-by: Scott Wood Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S index 7dd2981..d1192c5 100644 --- a/arch/powerpc/kernel/head_44x.S +++ b/arch/powerpc/kernel/head_44x.S @@ -248,10 +248,11 @@ _ENTRY(_start); interrupt_base: /* Critical Input Interrupt */ - CRITICAL_EXCEPTION(0x0100, CriticalInput, unknown_exception) + CRITICAL_EXCEPTION(0x0100, CRITICAL, CriticalInput, unknown_exception) /* Machine Check Interrupt */ - CRITICAL_EXCEPTION(0x0200, MachineCheck, machine_check_exception) + CRITICAL_EXCEPTION(0x0200, MACHINE_CHECK, MachineCheck, \ + machine_check_exception) MCHECK_EXCEPTION(0x0210, MachineCheckA, machine_check_exception) /* Data Storage Interrupt */ @@ -261,7 +262,8 @@ interrupt_base: INSTRUCTION_STORAGE_EXCEPTION /* External Input Interrupt */ - EXCEPTION(0x0500, ExternalInput, do_IRQ, EXC_XFER_LITE) + EXCEPTION(0x0500, BOOKE_INTERRUPT_EXTERNAL, ExternalInput, \ + do_IRQ, EXC_XFER_LITE) /* Alignment Interrupt */ ALIGNMENT_EXCEPTION @@ -273,29 +275,32 @@ interrupt_base: #ifdef CONFIG_PPC_FPU FP_UNAVAILABLE_EXCEPTION #else - EXCEPTION(0x2010, FloatingPointUnavailable, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x2010, BOOKE_INTERRUPT_FP_UNAVAIL, \ + FloatingPointUnavailable, unknown_exception, EXC_XFER_EE) #endif /* System Call Interrupt */ START_EXCEPTION(SystemCall) - NORMAL_EXCEPTION_PROLOG + NORMAL_EXCEPTION_PROLOG(BOOKE_INTERRUPT_SYSCALL) EXC_XFER_EE_LITE(0x0c00, DoSyscall) /* Auxiliary Processor Unavailable Interrupt */ - EXCEPTION(0x2020, AuxillaryProcessorUnavailable, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x2020, BOOKE_INTERRUPT_AP_UNAVAIL, \ + AuxillaryProcessorUnavailable, unknown_exception, EXC_XFER_EE) /* Decrementer Interrupt */ DECREMENTER_EXCEPTION /* Fixed Internal Timer Interrupt */ /* TODO: Add FIT support */ - EXCEPTION(0x1010, FixedIntervalTimer, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1010, BOOKE_INTERRUPT_FIT, FixedIntervalTimer, \ + unknown_exception, EXC_XFER_EE) /* Watchdog Timer Interrupt */ /* TODO: Add watchdog support */ #ifdef CONFIG_BOOKE_WDT - CRITICAL_EXCEPTION(0x1020, WatchdogTimer, WatchdogException) + CRITICAL_EXCEPTION(0x1020, WATCHDOG, WatchdogTimer, WatchdogException) #else - CRITICAL_EXCEPTION(0x1020, WatchdogTimer, unknown_exception) + CRITICAL_EXCEPTION(0x1020, WATCHDOG, WatchdogTimer, unknown_exception) #endif /* Data TLB Error Interrupt */ diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index 0e41753..51fd072 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -2,6 +2,8 @@ #define __HEAD_BOOKE_H__ #include /* for STACK_FRAME_REGS_MARKER */ +#include + /* * Macros used for common Book-e exception handling */ @@ -28,7 +30,7 @@ */ #define THREAD_NORMSAVE(offset) (THREAD_NORMSAVES + (offset * 4)) -#define NORMAL_EXCEPTION_PROLOG \ +#define NORMAL_EXCEPTION_PROLOG(intno) \ mtspr SPRN_SPRG_WSCRATCH0, r10; /* save one register */ \ mfspr r10, SPRN_SPRG_THREAD; \ stw r11, THREAD_NORMSAVE(0)(r10); \ @@ -113,7 +115,7 @@ * registers as the normal prolog above. Instead we use a portion of the * critical/machine check exception stack at low physical addresses. */ -#define EXC_LEVEL_EXCEPTION_PROLOG(exc_level, exc_level_srr0, exc_level_srr1) \ +#define EXC_LEVEL_EXCEPTION_PROLOG(exc_level, intno, exc_level_srr0, exc_level_srr1) \ mtspr SPRN_SPRG_WSCRATCH_##exc_level,r8; \ BOOKE_LOAD_EXC_LEVEL_STACK(exc_level);/* r8 points to the exc_level stack*/ \ stw r9,GPR9(r8); /* save various registers */\ @@ -162,12 +164,13 @@ SAVE_4GPRS(3, r11); \ SAVE_2GPRS(7, r11) -#define CRITICAL_EXCEPTION_PROLOG \ - EXC_LEVEL_EXCEPTION_PROLOG(CRIT, SPRN_CSRR0, SPRN_CSRR1) +#define CRITICAL_EXCEPTION_PROLOG(intno) \ + EXC_LEVEL_EXCEPTION_PROLOG(CRIT, intno, SPRN_CSRR0, SPRN_CSRR1) #define DEBUG_EXCEPTION_PROLOG \ - EXC_LEVEL_EXCEPTION_PROLOG(DBG, SPRN_DSRR0, SPRN_DSRR1) + EXC_LEVEL_EXCEPTION_PROLOG(DBG, DEBUG, SPRN_DSRR0, SPRN_DSRR1) #define MCHECK_EXCEPTION_PROLOG \ - EXC_LEVEL_EXCEPTION_PROLOG(MC, SPRN_MCSRR0, SPRN_MCSRR1) + EXC_LEVEL_EXCEPTION_PROLOG(MC, MACHINE_CHECK, \ + SPRN_MCSRR0, SPRN_MCSRR1) /* * Exception vectors. @@ -181,16 +184,16 @@ label: .long func; \ .long ret_from_except_full -#define EXCEPTION(n, label, hdlr, xfer) \ +#define EXCEPTION(n, intno, label, hdlr, xfer) \ START_EXCEPTION(label); \ - NORMAL_EXCEPTION_PROLOG; \ + NORMAL_EXCEPTION_PROLOG(intno); \ addi r3,r1,STACK_FRAME_OVERHEAD; \ xfer(n, hdlr) -#define CRITICAL_EXCEPTION(n, label, hdlr) \ - START_EXCEPTION(label); \ - CRITICAL_EXCEPTION_PROLOG; \ - addi r3,r1,STACK_FRAME_OVERHEAD; \ +#define CRITICAL_EXCEPTION(n, intno, label, hdlr) \ + START_EXCEPTION(label); \ + CRITICAL_EXCEPTION_PROLOG(intno); \ + addi r3,r1,STACK_FRAME_OVERHEAD; \ EXC_XFER_TEMPLATE(hdlr, n+2, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \ NOCOPY, crit_transfer_to_handler, \ ret_from_crit_exc) @@ -302,7 +305,7 @@ label: #define DEBUG_CRIT_EXCEPTION \ START_EXCEPTION(DebugCrit); \ - CRITICAL_EXCEPTION_PROLOG; \ + CRITICAL_EXCEPTION_PROLOG(DEBUG); \ \ /* \ * If there is a single step or branch-taken exception in an \ @@ -355,7 +358,7 @@ label: #define DATA_STORAGE_EXCEPTION \ START_EXCEPTION(DataStorage) \ - NORMAL_EXCEPTION_PROLOG; \ + NORMAL_EXCEPTION_PROLOG(DATA_STORAGE); \ mfspr r5,SPRN_ESR; /* Grab the ESR and save it */ \ stw r5,_ESR(r11); \ mfspr r4,SPRN_DEAR; /* Grab the DEAR */ \ @@ -363,7 +366,7 @@ label: #define INSTRUCTION_STORAGE_EXCEPTION \ START_EXCEPTION(InstructionStorage) \ - NORMAL_EXCEPTION_PROLOG; \ + NORMAL_EXCEPTION_PROLOG(INST_STORAGE); \ mfspr r5,SPRN_ESR; /* Grab the ESR and save it */ \ stw r5,_ESR(r11); \ mr r4,r12; /* Pass SRR0 as arg2 */ \ @@ -372,7 +375,7 @@ label: #define ALIGNMENT_EXCEPTION \ START_EXCEPTION(Alignment) \ - NORMAL_EXCEPTION_PROLOG; \ + NORMAL_EXCEPTION_PROLOG(ALIGNMENT); \ mfspr r4,SPRN_DEAR; /* Grab the DEAR and save it */ \ stw r4,_DEAR(r11); \ addi r3,r1,STACK_FRAME_OVERHEAD; \ @@ -380,7 +383,7 @@ label: #define PROGRAM_EXCEPTION \ START_EXCEPTION(Program) \ - NORMAL_EXCEPTION_PROLOG; \ + NORMAL_EXCEPTION_PROLOG(PROGRAM); \ mfspr r4,SPRN_ESR; /* Grab the ESR and save it */ \ stw r4,_ESR(r11); \ addi r3,r1,STACK_FRAME_OVERHEAD; \ @@ -388,7 +391,7 @@ label: #define DECREMENTER_EXCEPTION \ START_EXCEPTION(Decrementer) \ - NORMAL_EXCEPTION_PROLOG; \ + NORMAL_EXCEPTION_PROLOG(DECREMENTER); \ lis r0,TSR_DIS@h; /* Setup the DEC interrupt mask */ \ mtspr SPRN_TSR,r0; /* Clear the DEC interrupt */ \ addi r3,r1,STACK_FRAME_OVERHEAD; \ @@ -396,7 +399,7 @@ label: #define FP_UNAVAILABLE_EXCEPTION \ START_EXCEPTION(FloatingPointUnavailable) \ - NORMAL_EXCEPTION_PROLOG; \ + NORMAL_EXCEPTION_PROLOG(FP_UNAVAIL); \ beq 1f; \ bl load_up_fpu; /* if from user, just load it up */ \ b fast_exception_return; \ diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index 28e6259..7c406dd 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -301,19 +301,20 @@ _ENTRY(__early_start) interrupt_base: /* Critical Input Interrupt */ - CRITICAL_EXCEPTION(0x0100, CriticalInput, unknown_exception) + CRITICAL_EXCEPTION(0x0100, CRITICAL, CriticalInput, unknown_exception) /* Machine Check Interrupt */ #ifdef CONFIG_E200 /* no RFMCI, MCSRRs on E200 */ - CRITICAL_EXCEPTION(0x0200, MachineCheck, machine_check_exception) + CRITICAL_EXCEPTION(0x0200, MACHINE_CHECK, MachineCheck, \ + machine_check_exception) #else MCHECK_EXCEPTION(0x0200, MachineCheck, machine_check_exception) #endif /* Data Storage Interrupt */ START_EXCEPTION(DataStorage) - NORMAL_EXCEPTION_PROLOG + NORMAL_EXCEPTION_PROLOG(DATA_STORAGE) mfspr r5,SPRN_ESR /* Grab the ESR, save it, pass arg3 */ stw r5,_ESR(r11) mfspr r4,SPRN_DEAR /* Grab the DEAR, save it, pass arg2 */ @@ -328,7 +329,7 @@ interrupt_base: INSTRUCTION_STORAGE_EXCEPTION /* External Input Interrupt */ - EXCEPTION(0x0500, ExternalInput, do_IRQ, EXC_XFER_LITE) + EXCEPTION(0x0500, EXTERNAL, ExternalInput, do_IRQ, EXC_XFER_LITE) /* Alignment Interrupt */ ALIGNMENT_EXCEPTION @@ -342,32 +343,36 @@ interrupt_base: #else #ifdef CONFIG_E200 /* E200 treats 'normal' floating point instructions as FP Unavail exception */ - EXCEPTION(0x0800, FloatingPointUnavailable, program_check_exception, EXC_XFER_EE) + EXCEPTION(0x0800, FP_UNAVAIL, FloatingPointUnavailable, \ + program_check_exception, EXC_XFER_EE) #else - EXCEPTION(0x0800, FloatingPointUnavailable, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x0800, FP_UNAVAIL, FloatingPointUnavailable, \ + unknown_exception, EXC_XFER_EE) #endif #endif /* System Call Interrupt */ START_EXCEPTION(SystemCall) - NORMAL_EXCEPTION_PROLOG + NORMAL_EXCEPTION_PROLOG(SYSCALL) EXC_XFER_EE_LITE(0x0c00, DoSyscall) /* Auxiliary Processor Unavailable Interrupt */ - EXCEPTION(0x2900, AuxillaryProcessorUnavailable, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x2900, AP_UNAVAIL, AuxillaryProcessorUnavailable, \ + unknown_exception, EXC_XFER_EE) /* Decrementer Interrupt */ DECREMENTER_EXCEPTION /* Fixed Internal Timer Interrupt */ /* TODO: Add FIT support */ - EXCEPTION(0x3100, FixedIntervalTimer, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x3100, FIT, FixedIntervalTimer, \ + unknown_exception, EXC_XFER_EE) /* Watchdog Timer Interrupt */ #ifdef CONFIG_BOOKE_WDT - CRITICAL_EXCEPTION(0x3200, WatchdogTimer, WatchdogException) + CRITICAL_EXCEPTION(0x3200, WATCHDOG, WatchdogTimer, WatchdogException) #else - CRITICAL_EXCEPTION(0x3200, WatchdogTimer, unknown_exception) + CRITICAL_EXCEPTION(0x3200, WATCHDOG, WatchdogTimer, unknown_exception) #endif /* Data TLB Error Interrupt */ @@ -538,31 +543,38 @@ interrupt_base: #ifdef CONFIG_SPE /* SPE Unavailable */ START_EXCEPTION(SPEUnavailable) - NORMAL_EXCEPTION_PROLOG + NORMAL_EXCEPTION_PROLOG(SPE_UNAVAIL) bne load_up_spe addi r3,r1,STACK_FRAME_OVERHEAD EXC_XFER_EE_LITE(0x2010, KernelSPE) #else - EXCEPTION(0x2020, SPEUnavailable, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x2020, SPE_UNAVAIL, SPEUnavailable, \ + unknown_exception, EXC_XFER_EE) #endif /* CONFIG_SPE */ /* SPE Floating Point Data */ #ifdef CONFIG_SPE - EXCEPTION(0x2030, SPEFloatingPointData, SPEFloatingPointException, EXC_XFER_EE); + EXCEPTION(0x2030, SPE_FP_DATA, SPEFloatingPointData, \ + SPEFloatingPointException, EXC_XFER_EE); /* SPE Floating Point Round */ - EXCEPTION(0x2050, SPEFloatingPointRound, SPEFloatingPointRoundException, EXC_XFER_EE) + EXCEPTION(0x2050, SPE_FP_ROUND, SPEFloatingPointRound, \ + SPEFloatingPointRoundException, EXC_XFER_EE) #else - EXCEPTION(0x2040, SPEFloatingPointData, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x2050, SPEFloatingPointRound, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x2040, SPE_FP_DATA, SPEFloatingPointData, \ + unknown_exception, EXC_XFER_EE) + EXCEPTION(0x2050, SPE_FP_ROUND, SPEFloatingPointRound, \ + unknown_exception, EXC_XFER_EE) #endif /* CONFIG_SPE */ /* Performance Monitor */ - EXCEPTION(0x2060, PerformanceMonitor, performance_monitor_exception, EXC_XFER_STD) + EXCEPTION(0x2060, PERFORMANCE_MONITOR, PerformanceMonitor, \ + performance_monitor_exception, EXC_XFER_STD) - EXCEPTION(0x2070, Doorbell, doorbell_exception, EXC_XFER_STD) + EXCEPTION(0x2070, DOORBELL, Doorbell, doorbell_exception, EXC_XFER_STD) - CRITICAL_EXCEPTION(0x2080, CriticalDoorbell, unknown_exception) + CRITICAL_EXCEPTION(0x2080, DOORBELL_CRITICAL, \ + CriticalDoorbell, unknown_exception) /* Debug Interrupt */ DEBUG_DEBUG_EXCEPTION -- cgit v0.10.2 From d30f6e480055e5be12e7a03fd11ea912a451daa5 Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Tue, 20 Dec 2011 15:34:43 +0000 Subject: KVM: PPC: booke: category E.HV (GS-mode) support Chips such as e500mc that implement category E.HV in Power ISA 2.06 provide hardware virtualization features, including a new MSR mode for guest state. The guest OS can perform many operations without trapping into the hypervisor, including transitions to and from guest userspace. Since we can use SRR1[GS] to reliably tell whether an exception came from guest state, instead of messing around with IVPR, we use DO_KVM similarly to book3s. Current issues include: - Machine checks from guest state are not routed to the host handler. - The guest can cause a host oops by executing an emulated instruction in a page that lacks read permission. Existing e500/4xx support has the same problem. Includes work by Ashish Kalra , Varun Sethi , and Liu Yu . Signed-off-by: Scott Wood [agraf: remove pt_regs usage] Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity diff --git a/arch/powerpc/include/asm/dbell.h b/arch/powerpc/include/asm/dbell.h index efa74ac..d7365b0 100644 --- a/arch/powerpc/include/asm/dbell.h +++ b/arch/powerpc/include/asm/dbell.h @@ -19,6 +19,7 @@ #define PPC_DBELL_MSG_BRDCAST (0x04000000) #define PPC_DBELL_TYPE(x) (((x) & 0xf) << (63-36)) +#define PPC_DBELL_LPID(x) ((x) << (63 - 49)) enum ppc_dbell { PPC_DBELL = 0, /* doorbell */ PPC_DBELL_CRIT = 1, /* critical doorbell */ diff --git a/arch/powerpc/include/asm/kvm_asm.h b/arch/powerpc/include/asm/kvm_asm.h index 7b1f0e0..0978152 100644 --- a/arch/powerpc/include/asm/kvm_asm.h +++ b/arch/powerpc/include/asm/kvm_asm.h @@ -48,6 +48,14 @@ #define BOOKE_INTERRUPT_SPE_FP_DATA 33 #define BOOKE_INTERRUPT_SPE_FP_ROUND 34 #define BOOKE_INTERRUPT_PERFORMANCE_MONITOR 35 +#define BOOKE_INTERRUPT_DOORBELL 36 +#define BOOKE_INTERRUPT_DOORBELL_CRITICAL 37 + +/* booke_hv */ +#define BOOKE_INTERRUPT_GUEST_DBELL 38 +#define BOOKE_INTERRUPT_GUEST_DBELL_CRIT 39 +#define BOOKE_INTERRUPT_HV_SYSCALL 40 +#define BOOKE_INTERRUPT_HV_PRIV 41 /* book3s */ diff --git a/arch/powerpc/include/asm/kvm_booke_hv_asm.h b/arch/powerpc/include/asm/kvm_booke_hv_asm.h new file mode 100644 index 0000000..30a600f --- /dev/null +++ b/arch/powerpc/include/asm/kvm_booke_hv_asm.h @@ -0,0 +1,49 @@ +/* + * Copyright 2010-2011 Freescale Semiconductor, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + */ + +#ifndef ASM_KVM_BOOKE_HV_ASM_H +#define ASM_KVM_BOOKE_HV_ASM_H + +#ifdef __ASSEMBLY__ + +/* + * All exceptions from guest state must go through KVM + * (except for those which are delivered directly to the guest) -- + * there are no exceptions for which we fall through directly to + * the normal host handler. + * + * Expected inputs (normal exceptions): + * SCRATCH0 = saved r10 + * r10 = thread struct + * r11 = appropriate SRR1 variant (currently used as scratch) + * r13 = saved CR + * *(r10 + THREAD_NORMSAVE(0)) = saved r11 + * *(r10 + THREAD_NORMSAVE(2)) = saved r13 + * + * Expected inputs (crit/mcheck/debug exceptions): + * appropriate SCRATCH = saved r8 + * r8 = exception level stack frame + * r9 = *(r8 + _CCR) = saved CR + * r11 = appropriate SRR1 variant (currently used as scratch) + * *(r8 + GPR9) = saved r9 + * *(r8 + GPR10) = saved r10 (r10 not yet clobbered) + * *(r8 + GPR11) = saved r11 + */ +.macro DO_KVM intno srr1 +#ifdef CONFIG_KVM_BOOKE_HV +BEGIN_FTR_SECTION + mtocrf 0x80, r11 /* check MSR[GS] without clobbering reg */ + bf 3, kvmppc_resume_\intno\()_\srr1 + b kvmppc_handler_\intno\()_\srr1 +kvmppc_resume_\intno\()_\srr1: +END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV) +#endif +.endm + +#endif /*__ASSEMBLY__ */ +#endif /* ASM_KVM_BOOKE_HV_ASM_H */ diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 5b81cbc..e645623 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -106,6 +106,8 @@ struct kvm_vcpu_stat { u32 dec_exits; u32 ext_intr_exits; u32 halt_wakeup; + u32 dbell_exits; + u32 gdbell_exits; #ifdef CONFIG_PPC_BOOK3S u32 pf_storage; u32 pf_instruc; @@ -140,6 +142,7 @@ enum kvm_exit_types { EMULATED_TLBSX_EXITS, EMULATED_TLBWE_EXITS, EMULATED_RFI_EXITS, + EMULATED_RFCI_EXITS, DEC_EXITS, EXT_INTR_EXITS, HALT_WAKEUP, @@ -147,6 +150,8 @@ enum kvm_exit_types { FP_UNAVAIL, DEBUG_EXITS, TIMEINGUEST, + DBELL_EXITS, + GDBELL_EXITS, __NUMBER_OF_KVM_EXIT_TYPES }; @@ -217,10 +222,10 @@ struct kvm_arch_memory_slot { }; struct kvm_arch { + unsigned int lpid; #ifdef CONFIG_KVM_BOOK3S_64_HV unsigned long hpt_virt; struct revmap_entry *revmap; - unsigned int lpid; unsigned int host_lpid; unsigned long host_lpcr; unsigned long sdr1; @@ -345,6 +350,17 @@ struct kvm_vcpu_arch { u64 vsr[64]; #endif +#ifdef CONFIG_KVM_BOOKE_HV + u32 host_mas4; + u32 host_mas6; + u32 shadow_epcr; + u32 epcr; + u32 shadow_msrp; + u32 eplc; + u32 epsc; + u32 oldpir; +#endif + #ifdef CONFIG_PPC_BOOK3S /* For Gekko paired singles */ u32 qpr[32]; @@ -428,6 +444,7 @@ struct kvm_vcpu_arch { ulong queued_esr; u32 tlbcfg[4]; u32 mmucfg; + u32 epr; #endif gpa_t paddr_accessed; diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 731e920..e709975 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -139,6 +139,9 @@ extern int kvmppc_core_prepare_memory_region(struct kvm *kvm, extern void kvmppc_core_commit_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem); +extern int kvmppc_bookehv_init(void); +extern void kvmppc_bookehv_exit(void); + /* * Cuts out inst bits with ordering according to spec. * That means the leftmost bit is zero. All given bits are included. diff --git a/arch/powerpc/include/asm/mmu-book3e.h b/arch/powerpc/include/asm/mmu-book3e.h index cdb5421..eeabcdb 100644 --- a/arch/powerpc/include/asm/mmu-book3e.h +++ b/arch/powerpc/include/asm/mmu-book3e.h @@ -104,6 +104,8 @@ #define MAS4_TSIZED_MASK 0x00000f80 /* Default TSIZE */ #define MAS4_TSIZED_SHIFT 7 +#define MAS5_SGS 0x80000000 + #define MAS6_SPID0 0x3FFF0000 #define MAS6_SPID1 0x00007FFE #define MAS6_ISIZE(x) MAS1_TSIZE(x) @@ -118,6 +120,10 @@ #define MAS7_RPN 0xFFFFFFFF +#define MAS8_TGS 0x80000000 /* Guest space */ +#define MAS8_VF 0x40000000 /* Virtualization Fault */ +#define MAS8_TLPID 0x000000ff + /* Bit definitions for MMUCFG */ #define MMUCFG_MAVN 0x00000003 /* MMU Architecture Version Number */ #define MMUCFG_MAVN_V1 0x00000000 /* v1.0 */ diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index 8e2d037..2a25ab0 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -243,6 +243,9 @@ struct thread_struct { #ifdef CONFIG_KVM_BOOK3S_32_HANDLER void* kvm_shadow_vcpu; /* KVM internal data */ #endif /* CONFIG_KVM_BOOK3S_32_HANDLER */ +#if defined(CONFIG_KVM) && defined(CONFIG_BOOKE) + struct kvm_vcpu *kvm_vcpu; +#endif #ifdef CONFIG_PPC64 unsigned long dscr; int dscr_inherit; diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 9d7f0fb..f0cb7f4 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -257,7 +257,9 @@ #define LPCR_LPES_SH 2 #define LPCR_RMI 0x00000002 /* real mode is cache inhibit */ #define LPCR_HDICE 0x00000001 /* Hyp Decr enable (HV,PR,EE) */ +#ifndef SPRN_LPID #define SPRN_LPID 0x13F /* Logical Partition Identifier */ +#endif #define LPID_RSVD 0x3ff /* Reserved LPID for partn switching */ #define SPRN_HMER 0x150 /* Hardware m? error recovery */ #define SPRN_HMEER 0x151 /* Hardware m? enable error recovery */ diff --git a/arch/powerpc/include/asm/reg_booke.h b/arch/powerpc/include/asm/reg_booke.h index b86faa9..815e404 100644 --- a/arch/powerpc/include/asm/reg_booke.h +++ b/arch/powerpc/include/asm/reg_booke.h @@ -61,18 +61,30 @@ extern u32 booke_wdt_period; #define SPRN_SPRG7W 0x117 /* Special Purpose Register General 7 Write */ #define SPRN_EPCR 0x133 /* Embedded Processor Control Register */ #define SPRN_DBCR2 0x136 /* Debug Control Register 2 */ +#define SPRN_MSRP 0x137 /* MSR Protect Register */ #define SPRN_IAC3 0x13A /* Instruction Address Compare 3 */ #define SPRN_IAC4 0x13B /* Instruction Address Compare 4 */ #define SPRN_DVC1 0x13E /* Data Value Compare Register 1 */ #define SPRN_DVC2 0x13F /* Data Value Compare Register 2 */ +#define SPRN_LPID 0x152 /* Logical Partition ID */ #define SPRN_MAS8 0x155 /* MMU Assist Register 8 */ #define SPRN_TLB0PS 0x158 /* TLB 0 Page Size Register */ #define SPRN_TLB1PS 0x159 /* TLB 1 Page Size Register */ #define SPRN_MAS5_MAS6 0x15c /* MMU Assist Register 5 || 6 */ #define SPRN_MAS8_MAS1 0x15d /* MMU Assist Register 8 || 1 */ #define SPRN_EPTCFG 0x15e /* Embedded Page Table Config */ +#define SPRN_GSPRG0 0x170 /* Guest SPRG0 */ +#define SPRN_GSPRG1 0x171 /* Guest SPRG1 */ +#define SPRN_GSPRG2 0x172 /* Guest SPRG2 */ +#define SPRN_GSPRG3 0x173 /* Guest SPRG3 */ #define SPRN_MAS7_MAS3 0x174 /* MMU Assist Register 7 || 3 */ #define SPRN_MAS0_MAS1 0x175 /* MMU Assist Register 0 || 1 */ +#define SPRN_GSRR0 0x17A /* Guest SRR0 */ +#define SPRN_GSRR1 0x17B /* Guest SRR1 */ +#define SPRN_GEPR 0x17C /* Guest EPR */ +#define SPRN_GDEAR 0x17D /* Guest DEAR */ +#define SPRN_GPIR 0x17E /* Guest PIR */ +#define SPRN_GESR 0x17F /* Guest Exception Syndrome Register */ #define SPRN_IVOR0 0x190 /* Interrupt Vector Offset Register 0 */ #define SPRN_IVOR1 0x191 /* Interrupt Vector Offset Register 1 */ #define SPRN_IVOR2 0x192 /* Interrupt Vector Offset Register 2 */ @@ -93,6 +105,13 @@ extern u32 booke_wdt_period; #define SPRN_IVOR39 0x1B1 /* Interrupt Vector Offset Register 39 */ #define SPRN_IVOR40 0x1B2 /* Interrupt Vector Offset Register 40 */ #define SPRN_IVOR41 0x1B3 /* Interrupt Vector Offset Register 41 */ +#define SPRN_GIVOR2 0x1B8 /* Guest IVOR2 */ +#define SPRN_GIVOR3 0x1B9 /* Guest IVOR3 */ +#define SPRN_GIVOR4 0x1BA /* Guest IVOR4 */ +#define SPRN_GIVOR8 0x1BB /* Guest IVOR8 */ +#define SPRN_GIVOR13 0x1BC /* Guest IVOR13 */ +#define SPRN_GIVOR14 0x1BD /* Guest IVOR14 */ +#define SPRN_GIVPR 0x1BF /* Guest IVPR */ #define SPRN_SPEFSCR 0x200 /* SPE & Embedded FP Status & Control */ #define SPRN_BBEAR 0x201 /* Branch Buffer Entry Address Register */ #define SPRN_BBTAR 0x202 /* Branch Buffer Target Address Register */ @@ -245,6 +264,10 @@ extern u32 booke_wdt_period; #define MCSR_LDG 0x00002000UL /* Guarded Load */ #define MCSR_TLBSYNC 0x00000002UL /* Multiple tlbsyncs detected */ #define MCSR_BSL2_ERR 0x00000001UL /* Backside L2 cache error */ + +#define MSRP_UCLEP 0x04000000 /* Protect MSR[UCLE] */ +#define MSRP_DEP 0x00000200 /* Protect MSR[DE] */ +#define MSRP_PMMP 0x00000004 /* Protect MSR[PMM] */ #endif #ifdef CONFIG_E200 @@ -599,6 +622,17 @@ extern u32 booke_wdt_period; #define SPRN_EPCR_DMIUH 0x00400000 /* Disable MAS Interrupt updates * for hypervisor */ +/* Bit definitions for EPLC/EPSC */ +#define EPC_EPR 0x80000000 /* 1 = user, 0 = kernel */ +#define EPC_EPR_SHIFT 31 +#define EPC_EAS 0x40000000 /* Address Space */ +#define EPC_EAS_SHIFT 30 +#define EPC_EGS 0x20000000 /* 1 = guest, 0 = hypervisor */ +#define EPC_EGS_SHIFT 29 +#define EPC_ELPID 0x00ff0000 +#define EPC_ELPID_SHIFT 16 +#define EPC_EPID 0x00003fff +#define EPC_EPID_SHIFT 0 /* * The IBM-403 is an even more odd special case, as it is much diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 34b8afe9..bbede58 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -116,6 +116,9 @@ int main(void) #ifdef CONFIG_KVM_BOOK3S_32_HANDLER DEFINE(THREAD_KVM_SVCPU, offsetof(struct thread_struct, kvm_shadow_vcpu)); #endif +#ifdef CONFIG_KVM_BOOKE_HV + DEFINE(THREAD_KVM_VCPU, offsetof(struct thread_struct, kvm_vcpu)); +#endif DEFINE(TI_FLAGS, offsetof(struct thread_info, flags)); DEFINE(TI_LOCAL_FLAGS, offsetof(struct thread_info, local_flags)); @@ -387,6 +390,7 @@ int main(void) #ifdef CONFIG_KVM DEFINE(VCPU_HOST_STACK, offsetof(struct kvm_vcpu, arch.host_stack)); DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid)); + DEFINE(VCPU_GUEST_PID, offsetof(struct kvm_vcpu, arch.pid)); DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr)); DEFINE(VCPU_VRSAVE, offsetof(struct kvm_vcpu, arch.vrsave)); DEFINE(VCPU_FPRS, offsetof(struct kvm_vcpu, arch.fpr)); @@ -429,9 +433,11 @@ int main(void) DEFINE(VCPU_SHARED_MAS4, offsetof(struct kvm_vcpu_arch_shared, mas4)); DEFINE(VCPU_SHARED_MAS6, offsetof(struct kvm_vcpu_arch_shared, mas6)); + DEFINE(VCPU_KVM, offsetof(struct kvm_vcpu, kvm)); + DEFINE(KVM_LPID, offsetof(struct kvm, arch.lpid)); + /* book3s */ #ifdef CONFIG_KVM_BOOK3S_64_HV - DEFINE(KVM_LPID, offsetof(struct kvm, arch.lpid)); DEFINE(KVM_SDR1, offsetof(struct kvm, arch.sdr1)); DEFINE(KVM_HOST_LPID, offsetof(struct kvm, arch.host_lpid)); DEFINE(KVM_HOST_LPCR, offsetof(struct kvm, arch.host_lpcr)); @@ -446,7 +452,6 @@ int main(void) DEFINE(VCPU_DAR, offsetof(struct kvm_vcpu, arch.shregs.dar)); #endif #ifdef CONFIG_PPC_BOOK3S - DEFINE(VCPU_KVM, offsetof(struct kvm_vcpu, kvm)); DEFINE(VCPU_VCPUID, offsetof(struct kvm_vcpu, vcpu_id)); DEFINE(VCPU_PURR, offsetof(struct kvm_vcpu, arch.purr)); DEFINE(VCPU_SPURR, offsetof(struct kvm_vcpu, arch.spurr)); @@ -597,6 +602,12 @@ int main(void) DEFINE(VCPU_HOST_SPEFSCR, offsetof(struct kvm_vcpu, arch.host_spefscr)); #endif +#ifdef CONFIG_KVM_BOOKE_HV + DEFINE(VCPU_HOST_MAS4, offsetof(struct kvm_vcpu, arch.host_mas4)); + DEFINE(VCPU_HOST_MAS6, offsetof(struct kvm_vcpu, arch.host_mas6)); + DEFINE(VCPU_EPLC, offsetof(struct kvm_vcpu, arch.eplc)); +#endif + #ifdef CONFIG_KVM_EXIT_TIMING DEFINE(VCPU_TIMING_EXIT_TBU, offsetof(struct kvm_vcpu, arch.timing_exit.tv32.tbu)); diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index 51fd072..5f051ee 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -3,6 +3,7 @@ #include /* for STACK_FRAME_REGS_MARKER */ #include +#include /* * Macros used for common Book-e exception handling @@ -36,8 +37,9 @@ stw r11, THREAD_NORMSAVE(0)(r10); \ stw r13, THREAD_NORMSAVE(2)(r10); \ mfcr r13; /* save CR in r13 for now */\ - mfspr r11,SPRN_SRR1; /* check whether user or kernel */\ - andi. r11,r11,MSR_PR; \ + mfspr r11, SPRN_SRR1; \ + DO_KVM BOOKE_INTERRUPT_##intno SPRN_SRR1; \ + andi. r11, r11, MSR_PR; /* check whether user or kernel */\ mr r11, r1; \ beq 1f; \ /* if from user, start at top of this thread's kernel stack */ \ @@ -123,8 +125,9 @@ stw r10,GPR10(r8); \ stw r11,GPR11(r8); \ stw r9,_CCR(r8); /* save CR on stack */\ - mfspr r10,exc_level_srr1; /* check whether user or kernel */\ - andi. r10,r10,MSR_PR; \ + mfspr r11,exc_level_srr1; /* check whether user or kernel */\ + DO_KVM BOOKE_INTERRUPT_##intno exc_level_srr1; \ + andi. r11,r11,MSR_PR; \ mfspr r11,SPRN_SPRG_THREAD; /* if from user, start at top of */\ lwz r11,THREAD_INFO-THREAD(r11); /* this thread's kernel stack */\ addi r11,r11,EXC_LVL_FRAME_OVERHEAD; /* allocate stack frame */\ @@ -173,6 +176,23 @@ SPRN_MCSRR0, SPRN_MCSRR1) /* + * Guest Doorbell -- this is a bit odd in that uses GSRR0/1 despite + * being delivered to the host. This exception can only happen + * inside a KVM guest -- so we just handle up to the DO_KVM rather + * than try to fit this into one of the existing prolog macros. + */ +#define GUEST_DOORBELL_EXCEPTION \ + START_EXCEPTION(GuestDoorbell); \ + mtspr SPRN_SPRG_WSCRATCH0, r10; /* save one register */ \ + mfspr r10, SPRN_SPRG_THREAD; \ + stw r11, THREAD_NORMSAVE(0)(r10); \ + mfspr r11, SPRN_SRR1; \ + stw r13, THREAD_NORMSAVE(2)(r10); \ + mfcr r13; /* save CR in r13 for now */\ + DO_KVM BOOKE_INTERRUPT_GUEST_DBELL SPRN_GSRR1; \ + trap + +/* * Exception vectors. */ #define START_EXCEPTION(label) \ diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index 8f64709..2c33cd3 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig @@ -90,6 +90,9 @@ config KVM_BOOK3S_64_PR depends on KVM_BOOK3S_64 && !KVM_BOOK3S_64_HV select KVM_BOOK3S_PR +config KVM_BOOKE_HV + bool + config KVM_440 bool "KVM support for PowerPC 440 processors" depends on EXPERIMENTAL && 44x diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 2ee9bae..75dbaeb 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -17,6 +17,8 @@ * * Authors: Hollis Blanchard * Christian Ehrhardt + * Scott Wood + * Varun Sethi */ #include @@ -30,9 +32,12 @@ #include #include #include -#include "timing.h" #include +#include +#include +#include +#include "timing.h" #include "booke.h" unsigned long kvmppc_booke_handlers; @@ -55,6 +60,8 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "dec", VCPU_STAT(dec_exits) }, { "ext_intr", VCPU_STAT(ext_intr_exits) }, { "halt_wakeup", VCPU_STAT(halt_wakeup) }, + { "doorbell", VCPU_STAT(dbell_exits) }, + { "guest doorbell", VCPU_STAT(gdbell_exits) }, { NULL } }; @@ -121,6 +128,10 @@ void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr) { u32 old_msr = vcpu->arch.shared->msr; +#ifdef CONFIG_KVM_BOOKE_HV + new_msr |= MSR_GS; +#endif + vcpu->arch.shared->msr = new_msr; kvmppc_mmu_msr_notify(vcpu, old_msr); @@ -195,6 +206,75 @@ void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu, clear_bit(BOOKE_IRQPRIO_EXTERNAL_LEVEL, &vcpu->arch.pending_exceptions); } +static void set_guest_srr(struct kvm_vcpu *vcpu, unsigned long srr0, u32 srr1) +{ +#ifdef CONFIG_KVM_BOOKE_HV + mtspr(SPRN_GSRR0, srr0); + mtspr(SPRN_GSRR1, srr1); +#else + vcpu->arch.shared->srr0 = srr0; + vcpu->arch.shared->srr1 = srr1; +#endif +} + +static void set_guest_csrr(struct kvm_vcpu *vcpu, unsigned long srr0, u32 srr1) +{ + vcpu->arch.csrr0 = srr0; + vcpu->arch.csrr1 = srr1; +} + +static void set_guest_dsrr(struct kvm_vcpu *vcpu, unsigned long srr0, u32 srr1) +{ + if (cpu_has_feature(CPU_FTR_DEBUG_LVL_EXC)) { + vcpu->arch.dsrr0 = srr0; + vcpu->arch.dsrr1 = srr1; + } else { + set_guest_csrr(vcpu, srr0, srr1); + } +} + +static void set_guest_mcsrr(struct kvm_vcpu *vcpu, unsigned long srr0, u32 srr1) +{ + vcpu->arch.mcsrr0 = srr0; + vcpu->arch.mcsrr1 = srr1; +} + +static unsigned long get_guest_dear(struct kvm_vcpu *vcpu) +{ +#ifdef CONFIG_KVM_BOOKE_HV + return mfspr(SPRN_GDEAR); +#else + return vcpu->arch.shared->dar; +#endif +} + +static void set_guest_dear(struct kvm_vcpu *vcpu, unsigned long dear) +{ +#ifdef CONFIG_KVM_BOOKE_HV + mtspr(SPRN_GDEAR, dear); +#else + vcpu->arch.shared->dar = dear; +#endif +} + +static unsigned long get_guest_esr(struct kvm_vcpu *vcpu) +{ +#ifdef CONFIG_KVM_BOOKE_HV + return mfspr(SPRN_GESR); +#else + return vcpu->arch.shared->esr; +#endif +} + +static void set_guest_esr(struct kvm_vcpu *vcpu, u32 esr) +{ +#ifdef CONFIG_KVM_BOOKE_HV + mtspr(SPRN_GESR, esr); +#else + vcpu->arch.shared->esr = esr; +#endif +} + /* Deliver the interrupt of the corresponding priority, if possible. */ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority) @@ -206,6 +286,7 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, ulong crit_r1 = kvmppc_get_gpr(vcpu, 1); bool crit; bool keep_irq = false; + enum int_class int_class; /* Truncate crit indicators in 32 bit mode */ if (!(vcpu->arch.shared->msr & MSR_SF)) { @@ -241,16 +322,20 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, case BOOKE_IRQPRIO_AP_UNAVAIL: case BOOKE_IRQPRIO_ALIGNMENT: allowed = 1; - msr_mask = MSR_CE|MSR_ME|MSR_DE; + msr_mask = MSR_GS | MSR_CE | MSR_ME | MSR_DE; + int_class = INT_CLASS_NONCRIT; break; case BOOKE_IRQPRIO_CRITICAL: - case BOOKE_IRQPRIO_WATCHDOG: allowed = vcpu->arch.shared->msr & MSR_CE; - msr_mask = MSR_ME; + allowed = allowed && !crit; + msr_mask = MSR_GS | MSR_ME; + int_class = INT_CLASS_CRIT; break; case BOOKE_IRQPRIO_MACHINE_CHECK: allowed = vcpu->arch.shared->msr & MSR_ME; - msr_mask = 0; + allowed = allowed && !crit; + msr_mask = MSR_GS; + int_class = INT_CLASS_MC; break; case BOOKE_IRQPRIO_DECREMENTER: case BOOKE_IRQPRIO_FIT: @@ -259,28 +344,62 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, case BOOKE_IRQPRIO_EXTERNAL: allowed = vcpu->arch.shared->msr & MSR_EE; allowed = allowed && !crit; - msr_mask = MSR_CE|MSR_ME|MSR_DE; + msr_mask = MSR_GS | MSR_CE | MSR_ME | MSR_DE; + int_class = INT_CLASS_NONCRIT; break; case BOOKE_IRQPRIO_DEBUG: allowed = vcpu->arch.shared->msr & MSR_DE; - msr_mask = MSR_ME; + allowed = allowed && !crit; + msr_mask = MSR_GS | MSR_ME; + int_class = INT_CLASS_CRIT; break; } if (allowed) { - vcpu->arch.shared->srr0 = vcpu->arch.pc; - vcpu->arch.shared->srr1 = vcpu->arch.shared->msr; + switch (int_class) { + case INT_CLASS_NONCRIT: + set_guest_srr(vcpu, vcpu->arch.pc, + vcpu->arch.shared->msr); + break; + case INT_CLASS_CRIT: + set_guest_csrr(vcpu, vcpu->arch.pc, + vcpu->arch.shared->msr); + break; + case INT_CLASS_DBG: + set_guest_dsrr(vcpu, vcpu->arch.pc, + vcpu->arch.shared->msr); + break; + case INT_CLASS_MC: + set_guest_mcsrr(vcpu, vcpu->arch.pc, + vcpu->arch.shared->msr); + break; + } + vcpu->arch.pc = vcpu->arch.ivpr | vcpu->arch.ivor[priority]; if (update_esr == true) - vcpu->arch.shared->esr = vcpu->arch.queued_esr; + set_guest_esr(vcpu, vcpu->arch.queued_esr); if (update_dear == true) - vcpu->arch.shared->dar = vcpu->arch.queued_dear; + set_guest_dear(vcpu, vcpu->arch.queued_dear); kvmppc_set_msr(vcpu, vcpu->arch.shared->msr & msr_mask); if (!keep_irq) clear_bit(priority, &vcpu->arch.pending_exceptions); } +#ifdef CONFIG_KVM_BOOKE_HV + /* + * If an interrupt is pending but masked, raise a guest doorbell + * so that we are notified when the guest enables the relevant + * MSR bit. + */ + if (vcpu->arch.pending_exceptions & BOOKE_IRQMASK_EE) + kvmppc_set_pending_interrupt(vcpu, INT_CLASS_NONCRIT); + if (vcpu->arch.pending_exceptions & BOOKE_IRQMASK_CE) + kvmppc_set_pending_interrupt(vcpu, INT_CLASS_CRIT); + if (vcpu->arch.pending_exceptions & BOOKE_IRQPRIO_MACHINE_CHECK) + kvmppc_set_pending_interrupt(vcpu, INT_CLASS_MC); +#endif + return allowed; } @@ -344,6 +463,11 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) return -EINVAL; } + if (!current->thread.kvm_vcpu) { + WARN(1, "no vcpu\n"); + return -EPERM; + } + local_irq_disable(); kvmppc_core_prepare_to_enter(vcpu); @@ -363,6 +487,38 @@ out: return ret; } +static int emulation_exit(struct kvm_run *run, struct kvm_vcpu *vcpu) +{ + enum emulation_result er; + + er = kvmppc_emulate_instruction(run, vcpu); + switch (er) { + case EMULATE_DONE: + /* don't overwrite subtypes, just account kvm_stats */ + kvmppc_account_exit_stat(vcpu, EMULATED_INST_EXITS); + /* Future optimization: only reload non-volatiles if + * they were actually modified by emulation. */ + return RESUME_GUEST_NV; + + case EMULATE_DO_DCR: + run->exit_reason = KVM_EXIT_DCR; + return RESUME_HOST; + + case EMULATE_FAIL: + /* XXX Deliver Program interrupt to guest. */ + printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n", + __func__, vcpu->arch.pc, vcpu->arch.last_inst); + /* For debugging, encode the failing instruction and + * report it to userspace. */ + run->hw.hardware_exit_reason = ~0ULL << 32; + run->hw.hardware_exit_reason |= vcpu->arch.last_inst; + return RESUME_HOST; + + default: + BUG(); + } +} + /** * kvmppc_handle_exit * @@ -371,12 +527,30 @@ out: int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, unsigned int exit_nr) { - enum emulation_result er; int r = RESUME_HOST; /* update before a new last_exit_type is rewritten */ kvmppc_update_timing_stats(vcpu); + switch (exit_nr) { + case BOOKE_INTERRUPT_EXTERNAL: + do_IRQ(current->thread.regs); + break; + + case BOOKE_INTERRUPT_DECREMENTER: + timer_interrupt(current->thread.regs); + break; + +#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_BOOK3E_64) + case BOOKE_INTERRUPT_DOORBELL: + doorbell_exception(current->thread.regs); + break; +#endif + case BOOKE_INTERRUPT_MACHINE_CHECK: + /* FIXME */ + break; + } + local_irq_enable(); run->exit_reason = KVM_EXIT_UNKNOWN; @@ -384,30 +558,56 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, switch (exit_nr) { case BOOKE_INTERRUPT_MACHINE_CHECK: - printk("MACHINE CHECK: %lx\n", mfspr(SPRN_MCSR)); - kvmppc_dump_vcpu(vcpu); - r = RESUME_HOST; + kvm_resched(vcpu); + r = RESUME_GUEST; break; case BOOKE_INTERRUPT_EXTERNAL: kvmppc_account_exit(vcpu, EXT_INTR_EXITS); - if (need_resched()) - cond_resched(); + kvm_resched(vcpu); r = RESUME_GUEST; break; case BOOKE_INTERRUPT_DECREMENTER: - /* Since we switched IVPR back to the host's value, the host - * handled this interrupt the moment we enabled interrupts. - * Now we just offer it a chance to reschedule the guest. */ kvmppc_account_exit(vcpu, DEC_EXITS); - if (need_resched()) - cond_resched(); + kvm_resched(vcpu); r = RESUME_GUEST; break; + case BOOKE_INTERRUPT_DOORBELL: + kvmppc_account_exit(vcpu, DBELL_EXITS); + kvm_resched(vcpu); + r = RESUME_GUEST; + break; + + case BOOKE_INTERRUPT_GUEST_DBELL_CRIT: + kvmppc_account_exit(vcpu, GDBELL_EXITS); + + /* + * We are here because there is a pending guest interrupt + * which could not be delivered as MSR_CE or MSR_ME was not + * set. Once we break from here we will retry delivery. + */ + r = RESUME_GUEST; + break; + + case BOOKE_INTERRUPT_GUEST_DBELL: + kvmppc_account_exit(vcpu, GDBELL_EXITS); + + /* + * We are here because there is a pending guest interrupt + * which could not be delivered as MSR_EE was not set. Once + * we break from here we will retry delivery. + */ + r = RESUME_GUEST; + break; + + case BOOKE_INTERRUPT_HV_PRIV: + r = emulation_exit(run, vcpu); + break; + case BOOKE_INTERRUPT_PROGRAM: - if (vcpu->arch.shared->msr & MSR_PR) { + if (vcpu->arch.shared->msr & (MSR_PR | MSR_GS)) { /* Program traps generated by user-level software must be handled * by the guest kernel. */ kvmppc_core_queue_program(vcpu, vcpu->arch.fault_esr); @@ -416,32 +616,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, break; } - er = kvmppc_emulate_instruction(run, vcpu); - switch (er) { - case EMULATE_DONE: - /* don't overwrite subtypes, just account kvm_stats */ - kvmppc_account_exit_stat(vcpu, EMULATED_INST_EXITS); - /* Future optimization: only reload non-volatiles if - * they were actually modified by emulation. */ - r = RESUME_GUEST_NV; - break; - case EMULATE_DO_DCR: - run->exit_reason = KVM_EXIT_DCR; - r = RESUME_HOST; - break; - case EMULATE_FAIL: - /* XXX Deliver Program interrupt to guest. */ - printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n", - __func__, vcpu->arch.pc, vcpu->arch.last_inst); - /* For debugging, encode the failing instruction and - * report it to userspace. */ - run->hw.hardware_exit_reason = ~0ULL << 32; - run->hw.hardware_exit_reason |= vcpu->arch.last_inst; - r = RESUME_HOST; - break; - default: - BUG(); - } + r = emulation_exit(run, vcpu); break; case BOOKE_INTERRUPT_FP_UNAVAIL: @@ -506,6 +681,21 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, r = RESUME_GUEST; break; +#ifdef CONFIG_KVM_BOOKE_HV + case BOOKE_INTERRUPT_HV_SYSCALL: + if (!(vcpu->arch.shared->msr & MSR_PR)) { + kvmppc_set_gpr(vcpu, 3, kvmppc_kvm_pv(vcpu)); + } else { + /* + * hcall from guest userspace -- send privileged + * instruction program check. + */ + kvmppc_core_queue_program(vcpu, ESR_PPR); + } + + r = RESUME_GUEST; + break; +#else case BOOKE_INTERRUPT_SYSCALL: if (!(vcpu->arch.shared->msr & MSR_PR) && (((u32)kvmppc_get_gpr(vcpu, 0)) == KVM_SC_MAGIC_R0)) { @@ -519,6 +709,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, kvmppc_account_exit(vcpu, SYSCALL_EXITS); r = RESUME_GUEST; break; +#endif case BOOKE_INTERRUPT_DTLB_MISS: { unsigned long eaddr = vcpu->arch.fault_dear; @@ -659,12 +850,15 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) int r; vcpu->arch.pc = 0; - vcpu->arch.shared->msr = 0; - vcpu->arch.shadow_msr = MSR_USER | MSR_DE | MSR_IS | MSR_DS; vcpu->arch.shared->pir = vcpu->vcpu_id; kvmppc_set_gpr(vcpu, 1, (16<<20) - 8); /* -8 for the callee-save LR slot */ + kvmppc_set_msr(vcpu, 0); +#ifndef CONFIG_KVM_BOOKE_HV + vcpu->arch.shadow_msr = MSR_USER | MSR_DE | MSR_IS | MSR_DS; vcpu->arch.shadow_pid = 1; + vcpu->arch.shared->msr = 0; +#endif /* Eye-catching numbers so we know if the guest takes an interrupt * before it's programmed its own IVPR/IVORs. */ @@ -745,8 +939,8 @@ static void get_sregs_base(struct kvm_vcpu *vcpu, sregs->u.e.csrr0 = vcpu->arch.csrr0; sregs->u.e.csrr1 = vcpu->arch.csrr1; sregs->u.e.mcsr = vcpu->arch.mcsr; - sregs->u.e.esr = vcpu->arch.shared->esr; - sregs->u.e.dear = vcpu->arch.shared->dar; + sregs->u.e.esr = get_guest_esr(vcpu); + sregs->u.e.dear = get_guest_dear(vcpu); sregs->u.e.tsr = vcpu->arch.tsr; sregs->u.e.tcr = vcpu->arch.tcr; sregs->u.e.dec = kvmppc_get_dec(vcpu, tb); @@ -763,8 +957,8 @@ static int set_sregs_base(struct kvm_vcpu *vcpu, vcpu->arch.csrr0 = sregs->u.e.csrr0; vcpu->arch.csrr1 = sregs->u.e.csrr1; vcpu->arch.mcsr = sregs->u.e.mcsr; - vcpu->arch.shared->esr = sregs->u.e.esr; - vcpu->arch.shared->dar = sregs->u.e.dear; + set_guest_esr(vcpu, sregs->u.e.esr); + set_guest_dear(vcpu, sregs->u.e.dear); vcpu->arch.vrsave = sregs->u.e.vrsave; kvmppc_set_tcr(vcpu, sregs->u.e.tcr); @@ -961,14 +1155,17 @@ void kvmppc_decrementer_func(unsigned long data) void kvmppc_booke_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { + current->thread.kvm_vcpu = vcpu; } void kvmppc_booke_vcpu_put(struct kvm_vcpu *vcpu) { + current->thread.kvm_vcpu = NULL; } int __init kvmppc_booke_init(void) { +#ifndef CONFIG_KVM_BOOKE_HV unsigned long ivor[16]; unsigned long max_ivor = 0; int i; @@ -1011,7 +1208,7 @@ int __init kvmppc_booke_init(void) } flush_icache_range(kvmppc_booke_handlers, kvmppc_booke_handlers + max_ivor + kvmppc_handler_len); - +#endif /* !BOOKE_HV */ return 0; } diff --git a/arch/powerpc/kvm/booke.h b/arch/powerpc/kvm/booke.h index 05d1d99..d53bcf2 100644 --- a/arch/powerpc/kvm/booke.h +++ b/arch/powerpc/kvm/booke.h @@ -48,7 +48,20 @@ #define BOOKE_IRQPRIO_PERFORMANCE_MONITOR 19 /* Internal pseudo-irqprio for level triggered externals */ #define BOOKE_IRQPRIO_EXTERNAL_LEVEL 20 -#define BOOKE_IRQPRIO_MAX 20 +#define BOOKE_IRQPRIO_DBELL 21 +#define BOOKE_IRQPRIO_DBELL_CRIT 22 +#define BOOKE_IRQPRIO_MAX 23 + +#define BOOKE_IRQMASK_EE ((1 << BOOKE_IRQPRIO_EXTERNAL_LEVEL) | \ + (1 << BOOKE_IRQPRIO_PERFORMANCE_MONITOR) | \ + (1 << BOOKE_IRQPRIO_DBELL) | \ + (1 << BOOKE_IRQPRIO_DECREMENTER) | \ + (1 << BOOKE_IRQPRIO_FIT) | \ + (1 << BOOKE_IRQPRIO_EXTERNAL)) + +#define BOOKE_IRQMASK_CE ((1 << BOOKE_IRQPRIO_DBELL_CRIT) | \ + (1 << BOOKE_IRQPRIO_WATCHDOG) | \ + (1 << BOOKE_IRQPRIO_CRITICAL)) extern unsigned long kvmppc_booke_handlers; @@ -74,4 +87,13 @@ void kvmppc_vcpu_disable_spe(struct kvm_vcpu *vcpu); void kvmppc_booke_vcpu_load(struct kvm_vcpu *vcpu, int cpu); void kvmppc_booke_vcpu_put(struct kvm_vcpu *vcpu); +enum int_class { + INT_CLASS_NONCRIT, + INT_CLASS_CRIT, + INT_CLASS_MC, + INT_CLASS_DBG, +}; + +void kvmppc_set_pending_interrupt(struct kvm_vcpu *vcpu, enum int_class type); + #endif /* __KVM_BOOKE_H__ */ diff --git a/arch/powerpc/kvm/booke_emulate.c b/arch/powerpc/kvm/booke_emulate.c index 3e652da..904412b 100644 --- a/arch/powerpc/kvm/booke_emulate.c +++ b/arch/powerpc/kvm/booke_emulate.c @@ -99,6 +99,12 @@ int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, return emulated; } +/* + * NOTE: some of these registers are not emulated on BOOKE_HV (GS-mode). + * Their backing store is in real registers, and these functions + * will return the wrong result if called for them in another context + * (such as debugging). + */ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) { int emulated = EMULATE_DONE; @@ -122,9 +128,11 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) kvmppc_set_tcr(vcpu, spr_val); break; - /* Note: SPRG4-7 are user-readable. These values are - * loaded into the real SPRGs when resuming the - * guest. */ + /* + * Note: SPRG4-7 are user-readable. + * These values are loaded into the real SPRGs when resuming the + * guest (PR-mode only). + */ case SPRN_SPRG4: vcpu->arch.shared->sprg4 = spr_val; break; case SPRN_SPRG5: @@ -136,6 +144,9 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) case SPRN_IVPR: vcpu->arch.ivpr = spr_val; +#ifdef CONFIG_KVM_BOOKE_HV + mtspr(SPRN_GIVPR, spr_val); +#endif break; case SPRN_IVOR0: vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL] = spr_val; @@ -145,6 +156,9 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) break; case SPRN_IVOR2: vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE] = spr_val; +#ifdef CONFIG_KVM_BOOKE_HV + mtspr(SPRN_GIVOR2, spr_val); +#endif break; case SPRN_IVOR3: vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE] = spr_val; @@ -163,6 +177,9 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) break; case SPRN_IVOR8: vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL] = spr_val; +#ifdef CONFIG_KVM_BOOKE_HV + mtspr(SPRN_GIVOR8, spr_val); +#endif break; case SPRN_IVOR9: vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL] = spr_val; diff --git a/arch/powerpc/kvm/bookehv_interrupts.S b/arch/powerpc/kvm/bookehv_interrupts.S new file mode 100644 index 0000000..9eaeebd --- /dev/null +++ b/arch/powerpc/kvm/bookehv_interrupts.S @@ -0,0 +1,587 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright (C) 2010-2011 Freescale Semiconductor, Inc. + * + * Author: Varun Sethi + * Author: Scott Wood + * + * This file is derived from arch/powerpc/kvm/booke_interrupts.S + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../kernel/head_booke.h" /* for THREAD_NORMSAVE() */ + +#define GET_VCPU(vcpu, thread) \ + PPC_LL vcpu, THREAD_KVM_VCPU(thread) + +#define SET_VCPU(vcpu) \ + PPC_STL vcpu, (THREAD + THREAD_KVM_VCPU)(r2) + +#define LONGBYTES (BITS_PER_LONG / 8) + +#define VCPU_GPR(n) (VCPU_GPRS + (n * LONGBYTES)) +#define VCPU_GUEST_SPRG(n) (VCPU_GUEST_SPRGS + (n * LONGBYTES)) + +/* The host stack layout: */ +#define HOST_R1 (0 * LONGBYTES) /* Implied by stwu. */ +#define HOST_CALLEE_LR (1 * LONGBYTES) +#define HOST_RUN (2 * LONGBYTES) /* struct kvm_run */ +/* + * r2 is special: it holds 'current', and it made nonvolatile in the + * kernel with the -ffixed-r2 gcc option. + */ +#define HOST_R2 (3 * LONGBYTES) +#define HOST_NV_GPRS (4 * LONGBYTES) +#define HOST_NV_GPR(n) (HOST_NV_GPRS + ((n - 14) * LONGBYTES)) +#define HOST_MIN_STACK_SIZE (HOST_NV_GPR(31) + LONGBYTES) +#define HOST_STACK_SIZE ((HOST_MIN_STACK_SIZE + 15) & ~15) /* Align. */ +#define HOST_STACK_LR (HOST_STACK_SIZE + LONGBYTES) /* In caller stack frame. */ + +#define NEED_EMU 0x00000001 /* emulation -- save nv regs */ +#define NEED_DEAR 0x00000002 /* save faulting DEAR */ +#define NEED_ESR 0x00000004 /* save faulting ESR */ + +/* + * On entry: + * r4 = vcpu, r5 = srr0, r6 = srr1 + * saved in vcpu: cr, ctr, r3-r13 + */ +.macro kvm_handler_common intno, srr0, flags + mfspr r10, SPRN_PID + lwz r8, VCPU_HOST_PID(r4) + PPC_LL r11, VCPU_SHARED(r4) + PPC_STL r14, VCPU_GPR(r14)(r4) /* We need a non-volatile GPR. */ + li r14, \intno + + stw r10, VCPU_GUEST_PID(r4) + mtspr SPRN_PID, r8 + + .if \flags & NEED_EMU + lwz r9, VCPU_KVM(r4) + .endif + +#ifdef CONFIG_KVM_EXIT_TIMING + /* save exit time */ +1: mfspr r7, SPRN_TBRU + mfspr r8, SPRN_TBRL + mfspr r9, SPRN_TBRU + cmpw r9, r7 + PPC_STL r8, VCPU_TIMING_EXIT_TBL(r4) + bne- 1b + PPC_STL r9, VCPU_TIMING_EXIT_TBU(r4) +#endif + + oris r8, r6, MSR_CE@h +#ifndef CONFIG_64BIT + stw r6, (VCPU_SHARED_MSR + 4)(r11) +#else + std r6, (VCPU_SHARED_MSR)(r11) +#endif + ori r8, r8, MSR_ME | MSR_RI + PPC_STL r5, VCPU_PC(r4) + + /* + * Make sure CE/ME/RI are set (if appropriate for exception type) + * whether or not the guest had it set. Since mfmsr/mtmsr are + * somewhat expensive, skip in the common case where the guest + * had all these bits set (and thus they're still set if + * appropriate for the exception type). + */ + cmpw r6, r8 + .if \flags & NEED_EMU + lwz r9, KVM_LPID(r9) + .endif + beq 1f + mfmsr r7 + .if \srr0 != SPRN_MCSRR0 && \srr0 != SPRN_CSRR0 + oris r7, r7, MSR_CE@h + .endif + .if \srr0 != SPRN_MCSRR0 + ori r7, r7, MSR_ME | MSR_RI + .endif + mtmsr r7 +1: + + .if \flags & NEED_EMU + /* + * This assumes you have external PID support. + * To support a bookehv CPU without external PID, you'll + * need to look up the TLB entry and create a temporary mapping. + * + * FIXME: we don't currently handle if the lwepx faults. PR-mode + * booke doesn't handle it either. Since Linux doesn't use + * broadcast tlbivax anymore, the only way this should happen is + * if the guest maps its memory execute-but-not-read, or if we + * somehow take a TLB miss in the middle of this entry code and + * evict the relevant entry. On e500mc, all kernel lowmem is + * bolted into TLB1 large page mappings, and we don't use + * broadcast invalidates, so we should not take a TLB miss here. + * + * Later we'll need to deal with faults here. Disallowing guest + * mappings that are execute-but-not-read could be an option on + * e500mc, but not on chips with an LRAT if it is used. + */ + + mfspr r3, SPRN_EPLC /* will already have correct ELPID and EGS */ + PPC_STL r15, VCPU_GPR(r15)(r4) + PPC_STL r16, VCPU_GPR(r16)(r4) + PPC_STL r17, VCPU_GPR(r17)(r4) + PPC_STL r18, VCPU_GPR(r18)(r4) + PPC_STL r19, VCPU_GPR(r19)(r4) + mr r8, r3 + PPC_STL r20, VCPU_GPR(r20)(r4) + rlwimi r8, r6, EPC_EAS_SHIFT - MSR_IR_LG, EPC_EAS + PPC_STL r21, VCPU_GPR(r21)(r4) + rlwimi r8, r6, EPC_EPR_SHIFT - MSR_PR_LG, EPC_EPR + PPC_STL r22, VCPU_GPR(r22)(r4) + rlwimi r8, r10, EPC_EPID_SHIFT, EPC_EPID + PPC_STL r23, VCPU_GPR(r23)(r4) + PPC_STL r24, VCPU_GPR(r24)(r4) + PPC_STL r25, VCPU_GPR(r25)(r4) + PPC_STL r26, VCPU_GPR(r26)(r4) + PPC_STL r27, VCPU_GPR(r27)(r4) + PPC_STL r28, VCPU_GPR(r28)(r4) + PPC_STL r29, VCPU_GPR(r29)(r4) + PPC_STL r30, VCPU_GPR(r30)(r4) + PPC_STL r31, VCPU_GPR(r31)(r4) + mtspr SPRN_EPLC, r8 + isync + lwepx r9, 0, r5 + mtspr SPRN_EPLC, r3 + stw r9, VCPU_LAST_INST(r4) + .endif + + .if \flags & NEED_ESR + mfspr r8, SPRN_ESR + PPC_STL r8, VCPU_FAULT_ESR(r4) + .endif + + .if \flags & NEED_DEAR + mfspr r9, SPRN_DEAR + PPC_STL r9, VCPU_FAULT_DEAR(r4) + .endif + + b kvmppc_resume_host +.endm + +/* + * For input register values, see arch/powerpc/include/asm/kvm_booke_hv_asm.h + */ +.macro kvm_handler intno srr0, srr1, flags +_GLOBAL(kvmppc_handler_\intno\()_\srr1) + GET_VCPU(r11, r10) + PPC_STL r3, VCPU_GPR(r3)(r11) + mfspr r3, SPRN_SPRG_RSCRATCH0 + PPC_STL r4, VCPU_GPR(r4)(r11) + PPC_LL r4, THREAD_NORMSAVE(0)(r10) + PPC_STL r5, VCPU_GPR(r5)(r11) + PPC_STL r13, VCPU_CR(r11) + mfspr r5, \srr0 + PPC_STL r3, VCPU_GPR(r10)(r11) + PPC_LL r3, THREAD_NORMSAVE(2)(r10) + PPC_STL r6, VCPU_GPR(r6)(r11) + PPC_STL r4, VCPU_GPR(r11)(r11) + mfspr r6, \srr1 + PPC_STL r7, VCPU_GPR(r7)(r11) + PPC_STL r8, VCPU_GPR(r8)(r11) + PPC_STL r9, VCPU_GPR(r9)(r11) + PPC_STL r3, VCPU_GPR(r13)(r11) + mfctr r7 + PPC_STL r12, VCPU_GPR(r12)(r11) + PPC_STL r7, VCPU_CTR(r11) + mr r4, r11 + kvm_handler_common \intno, \srr0, \flags +.endm + +.macro kvm_lvl_handler intno scratch srr0, srr1, flags +_GLOBAL(kvmppc_handler_\intno\()_\srr1) + mfspr r10, SPRN_SPRG_THREAD + GET_VCPU(r11, r10) + PPC_STL r3, VCPU_GPR(r3)(r11) + mfspr r3, \scratch + PPC_STL r4, VCPU_GPR(r4)(r11) + PPC_LL r4, GPR9(r8) + PPC_STL r5, VCPU_GPR(r5)(r11) + PPC_STL r9, VCPU_CR(r11) + mfspr r5, \srr0 + PPC_STL r3, VCPU_GPR(r8)(r11) + PPC_LL r3, GPR10(r8) + PPC_STL r6, VCPU_GPR(r6)(r11) + PPC_STL r4, VCPU_GPR(r9)(r11) + mfspr r6, \srr1 + PPC_LL r4, GPR11(r8) + PPC_STL r7, VCPU_GPR(r7)(r11) + PPC_STL r8, VCPU_GPR(r8)(r11) + PPC_STL r3, VCPU_GPR(r10)(r11) + mfctr r7 + PPC_STL r12, VCPU_GPR(r12)(r11) + PPC_STL r4, VCPU_GPR(r11)(r11) + PPC_STL r7, VCPU_CTR(r11) + mr r4, r11 + kvm_handler_common \intno, \srr0, \flags +.endm + +kvm_lvl_handler BOOKE_INTERRUPT_CRITICAL, \ + SPRN_SPRG_RSCRATCH_CRIT, SPRN_CSRR0, SPRN_CSRR1, 0 +kvm_lvl_handler BOOKE_INTERRUPT_MACHINE_CHECK, \ + SPRN_SPRG_RSCRATCH_MC, SPRN_MCSRR0, SPRN_MCSRR1, 0 +kvm_handler BOOKE_INTERRUPT_DATA_STORAGE, \ + SPRN_SRR0, SPRN_SRR1, (NEED_EMU | NEED_DEAR) +kvm_handler BOOKE_INTERRUPT_INST_STORAGE, SPRN_SRR0, SPRN_SRR1, NEED_ESR +kvm_handler BOOKE_INTERRUPT_EXTERNAL, SPRN_SRR0, SPRN_SRR1, 0 +kvm_handler BOOKE_INTERRUPT_ALIGNMENT, \ + SPRN_SRR0, SPRN_SRR1, (NEED_DEAR | NEED_ESR) +kvm_handler BOOKE_INTERRUPT_PROGRAM, SPRN_SRR0, SPRN_SRR1, NEED_ESR +kvm_handler BOOKE_INTERRUPT_FP_UNAVAIL, SPRN_SRR0, SPRN_SRR1, 0 +kvm_handler BOOKE_INTERRUPT_SYSCALL, SPRN_SRR0, SPRN_SRR1, 0 +kvm_handler BOOKE_INTERRUPT_AP_UNAVAIL, SPRN_SRR0, SPRN_SRR1, 0 +kvm_handler BOOKE_INTERRUPT_DECREMENTER, SPRN_SRR0, SPRN_SRR1, 0 +kvm_handler BOOKE_INTERRUPT_FIT, SPRN_SRR0, SPRN_SRR1, 0 +kvm_lvl_handler BOOKE_INTERRUPT_WATCHDOG, \ + SPRN_SPRG_RSCRATCH_CRIT, SPRN_CSRR0, SPRN_CSRR1, 0 +kvm_handler BOOKE_INTERRUPT_DTLB_MISS, \ + SPRN_SRR0, SPRN_SRR1, (NEED_EMU | NEED_DEAR | NEED_ESR) +kvm_handler BOOKE_INTERRUPT_ITLB_MISS, SPRN_SRR0, SPRN_SRR1, 0 +kvm_handler BOOKE_INTERRUPT_SPE_UNAVAIL, SPRN_SRR0, SPRN_SRR1, 0 +kvm_handler BOOKE_INTERRUPT_SPE_FP_DATA, SPRN_SRR0, SPRN_SRR1, 0 +kvm_handler BOOKE_INTERRUPT_SPE_FP_ROUND, SPRN_SRR0, SPRN_SRR1, 0 +kvm_handler BOOKE_INTERRUPT_PERFORMANCE_MONITOR, SPRN_SRR0, SPRN_SRR1, 0 +kvm_handler BOOKE_INTERRUPT_DOORBELL, SPRN_SRR0, SPRN_SRR1, 0 +kvm_lvl_handler BOOKE_INTERRUPT_DOORBELL_CRITICAL, \ + SPRN_SPRG_RSCRATCH_CRIT, SPRN_CSRR0, SPRN_CSRR1, 0 +kvm_handler BOOKE_INTERRUPT_HV_PRIV, SPRN_SRR0, SPRN_SRR1, NEED_EMU +kvm_handler BOOKE_INTERRUPT_HV_SYSCALL, SPRN_SRR0, SPRN_SRR1, 0 +kvm_handler BOOKE_INTERRUPT_GUEST_DBELL, SPRN_GSRR0, SPRN_GSRR1, 0 +kvm_lvl_handler BOOKE_INTERRUPT_GUEST_DBELL_CRIT, \ + SPRN_SPRG_RSCRATCH_CRIT, SPRN_CSRR0, SPRN_CSRR1, 0 +kvm_lvl_handler BOOKE_INTERRUPT_DEBUG, \ + SPRN_SPRG_RSCRATCH_CRIT, SPRN_CSRR0, SPRN_CSRR1, 0 +kvm_lvl_handler BOOKE_INTERRUPT_DEBUG, \ + SPRN_SPRG_RSCRATCH_DBG, SPRN_DSRR0, SPRN_DSRR1, 0 + + +/* Registers: + * SPRG_SCRATCH0: guest r10 + * r4: vcpu pointer + * r11: vcpu->arch.shared + * r14: KVM exit number + */ +_GLOBAL(kvmppc_resume_host) + /* Save remaining volatile guest register state to vcpu. */ + mfspr r3, SPRN_VRSAVE + PPC_STL r0, VCPU_GPR(r0)(r4) + PPC_STL r1, VCPU_GPR(r1)(r4) + mflr r5 + mfspr r6, SPRN_SPRG4 + PPC_STL r2, VCPU_GPR(r2)(r4) + PPC_STL r5, VCPU_LR(r4) + mfspr r7, SPRN_SPRG5 + PPC_STL r3, VCPU_VRSAVE(r4) + PPC_STL r6, VCPU_SHARED_SPRG4(r11) + mfspr r8, SPRN_SPRG6 + PPC_STL r7, VCPU_SHARED_SPRG5(r11) + mfspr r9, SPRN_SPRG7 + PPC_STL r8, VCPU_SHARED_SPRG6(r11) + mfxer r3 + PPC_STL r9, VCPU_SHARED_SPRG7(r11) + + /* save guest MAS registers and restore host mas4 & mas6 */ + mfspr r5, SPRN_MAS0 + PPC_STL r3, VCPU_XER(r4) + mfspr r6, SPRN_MAS1 + stw r5, VCPU_SHARED_MAS0(r11) + mfspr r7, SPRN_MAS2 + stw r6, VCPU_SHARED_MAS1(r11) +#ifndef CONFIG_64BIT + stw r7, (VCPU_SHARED_MAS2 + 4)(r11) +#else + std r7, (VCPU_SHARED_MAS2)(r11) +#endif + mfspr r5, SPRN_MAS3 + mfspr r6, SPRN_MAS4 + stw r5, VCPU_SHARED_MAS7_3+4(r11) + mfspr r7, SPRN_MAS6 + stw r6, VCPU_SHARED_MAS4(r11) + mfspr r5, SPRN_MAS7 + lwz r6, VCPU_HOST_MAS4(r4) + stw r7, VCPU_SHARED_MAS6(r11) + lwz r8, VCPU_HOST_MAS6(r4) + mtspr SPRN_MAS4, r6 + stw r5, VCPU_SHARED_MAS7_3+0(r11) + mtspr SPRN_MAS6, r8 + mfspr r3, SPRN_EPCR + rlwinm r3, r3, 0, ~SPRN_EPCR_DMIUH + mtspr SPRN_EPCR, r3 + isync + + /* Restore host stack pointer */ + PPC_LL r1, VCPU_HOST_STACK(r4) + PPC_LL r2, HOST_R2(r1) + + /* Switch to kernel stack and jump to handler. */ + PPC_LL r3, HOST_RUN(r1) + mr r5, r14 /* intno */ + mr r14, r4 /* Save vcpu pointer. */ + bl kvmppc_handle_exit + + /* Restore vcpu pointer and the nonvolatiles we used. */ + mr r4, r14 + PPC_LL r14, VCPU_GPR(r14)(r4) + + andi. r5, r3, RESUME_FLAG_NV + beq skip_nv_load + PPC_LL r15, VCPU_GPR(r15)(r4) + PPC_LL r16, VCPU_GPR(r16)(r4) + PPC_LL r17, VCPU_GPR(r17)(r4) + PPC_LL r18, VCPU_GPR(r18)(r4) + PPC_LL r19, VCPU_GPR(r19)(r4) + PPC_LL r20, VCPU_GPR(r20)(r4) + PPC_LL r21, VCPU_GPR(r21)(r4) + PPC_LL r22, VCPU_GPR(r22)(r4) + PPC_LL r23, VCPU_GPR(r23)(r4) + PPC_LL r24, VCPU_GPR(r24)(r4) + PPC_LL r25, VCPU_GPR(r25)(r4) + PPC_LL r26, VCPU_GPR(r26)(r4) + PPC_LL r27, VCPU_GPR(r27)(r4) + PPC_LL r28, VCPU_GPR(r28)(r4) + PPC_LL r29, VCPU_GPR(r29)(r4) + PPC_LL r30, VCPU_GPR(r30)(r4) + PPC_LL r31, VCPU_GPR(r31)(r4) +skip_nv_load: + /* Should we return to the guest? */ + andi. r5, r3, RESUME_FLAG_HOST + beq lightweight_exit + + srawi r3, r3, 2 /* Shift -ERR back down. */ + +heavyweight_exit: + /* Not returning to guest. */ + PPC_LL r5, HOST_STACK_LR(r1) + + /* + * We already saved guest volatile register state; now save the + * non-volatiles. + */ + + PPC_STL r15, VCPU_GPR(r15)(r4) + PPC_STL r16, VCPU_GPR(r16)(r4) + PPC_STL r17, VCPU_GPR(r17)(r4) + PPC_STL r18, VCPU_GPR(r18)(r4) + PPC_STL r19, VCPU_GPR(r19)(r4) + PPC_STL r20, VCPU_GPR(r20)(r4) + PPC_STL r21, VCPU_GPR(r21)(r4) + PPC_STL r22, VCPU_GPR(r22)(r4) + PPC_STL r23, VCPU_GPR(r23)(r4) + PPC_STL r24, VCPU_GPR(r24)(r4) + PPC_STL r25, VCPU_GPR(r25)(r4) + PPC_STL r26, VCPU_GPR(r26)(r4) + PPC_STL r27, VCPU_GPR(r27)(r4) + PPC_STL r28, VCPU_GPR(r28)(r4) + PPC_STL r29, VCPU_GPR(r29)(r4) + PPC_STL r30, VCPU_GPR(r30)(r4) + PPC_STL r31, VCPU_GPR(r31)(r4) + + /* Load host non-volatile register state from host stack. */ + PPC_LL r14, HOST_NV_GPR(r14)(r1) + PPC_LL r15, HOST_NV_GPR(r15)(r1) + PPC_LL r16, HOST_NV_GPR(r16)(r1) + PPC_LL r17, HOST_NV_GPR(r17)(r1) + PPC_LL r18, HOST_NV_GPR(r18)(r1) + PPC_LL r19, HOST_NV_GPR(r19)(r1) + PPC_LL r20, HOST_NV_GPR(r20)(r1) + PPC_LL r21, HOST_NV_GPR(r21)(r1) + PPC_LL r22, HOST_NV_GPR(r22)(r1) + PPC_LL r23, HOST_NV_GPR(r23)(r1) + PPC_LL r24, HOST_NV_GPR(r24)(r1) + PPC_LL r25, HOST_NV_GPR(r25)(r1) + PPC_LL r26, HOST_NV_GPR(r26)(r1) + PPC_LL r27, HOST_NV_GPR(r27)(r1) + PPC_LL r28, HOST_NV_GPR(r28)(r1) + PPC_LL r29, HOST_NV_GPR(r29)(r1) + PPC_LL r30, HOST_NV_GPR(r30)(r1) + PPC_LL r31, HOST_NV_GPR(r31)(r1) + + /* Return to kvm_vcpu_run(). */ + mtlr r5 + addi r1, r1, HOST_STACK_SIZE + /* r3 still contains the return code from kvmppc_handle_exit(). */ + blr + +/* Registers: + * r3: kvm_run pointer + * r4: vcpu pointer + */ +_GLOBAL(__kvmppc_vcpu_run) + stwu r1, -HOST_STACK_SIZE(r1) + PPC_STL r1, VCPU_HOST_STACK(r4) /* Save stack pointer to vcpu. */ + + /* Save host state to stack. */ + PPC_STL r3, HOST_RUN(r1) + mflr r3 + PPC_STL r3, HOST_STACK_LR(r1) + + /* Save host non-volatile register state to stack. */ + PPC_STL r14, HOST_NV_GPR(r14)(r1) + PPC_STL r15, HOST_NV_GPR(r15)(r1) + PPC_STL r16, HOST_NV_GPR(r16)(r1) + PPC_STL r17, HOST_NV_GPR(r17)(r1) + PPC_STL r18, HOST_NV_GPR(r18)(r1) + PPC_STL r19, HOST_NV_GPR(r19)(r1) + PPC_STL r20, HOST_NV_GPR(r20)(r1) + PPC_STL r21, HOST_NV_GPR(r21)(r1) + PPC_STL r22, HOST_NV_GPR(r22)(r1) + PPC_STL r23, HOST_NV_GPR(r23)(r1) + PPC_STL r24, HOST_NV_GPR(r24)(r1) + PPC_STL r25, HOST_NV_GPR(r25)(r1) + PPC_STL r26, HOST_NV_GPR(r26)(r1) + PPC_STL r27, HOST_NV_GPR(r27)(r1) + PPC_STL r28, HOST_NV_GPR(r28)(r1) + PPC_STL r29, HOST_NV_GPR(r29)(r1) + PPC_STL r30, HOST_NV_GPR(r30)(r1) + PPC_STL r31, HOST_NV_GPR(r31)(r1) + + /* Load guest non-volatiles. */ + PPC_LL r14, VCPU_GPR(r14)(r4) + PPC_LL r15, VCPU_GPR(r15)(r4) + PPC_LL r16, VCPU_GPR(r16)(r4) + PPC_LL r17, VCPU_GPR(r17)(r4) + PPC_LL r18, VCPU_GPR(r18)(r4) + PPC_LL r19, VCPU_GPR(r19)(r4) + PPC_LL r20, VCPU_GPR(r20)(r4) + PPC_LL r21, VCPU_GPR(r21)(r4) + PPC_LL r22, VCPU_GPR(r22)(r4) + PPC_LL r23, VCPU_GPR(r23)(r4) + PPC_LL r24, VCPU_GPR(r24)(r4) + PPC_LL r25, VCPU_GPR(r25)(r4) + PPC_LL r26, VCPU_GPR(r26)(r4) + PPC_LL r27, VCPU_GPR(r27)(r4) + PPC_LL r28, VCPU_GPR(r28)(r4) + PPC_LL r29, VCPU_GPR(r29)(r4) + PPC_LL r30, VCPU_GPR(r30)(r4) + PPC_LL r31, VCPU_GPR(r31)(r4) + + +lightweight_exit: + PPC_STL r2, HOST_R2(r1) + + mfspr r3, SPRN_PID + stw r3, VCPU_HOST_PID(r4) + lwz r3, VCPU_GUEST_PID(r4) + mtspr SPRN_PID, r3 + + /* Save vcpu pointer for the exception handlers + * must be done before loading guest r2. + */ +// SET_VCPU(r4) + + PPC_LL r11, VCPU_SHARED(r4) + /* Save host mas4 and mas6 and load guest MAS registers */ + mfspr r3, SPRN_MAS4 + stw r3, VCPU_HOST_MAS4(r4) + mfspr r3, SPRN_MAS6 + stw r3, VCPU_HOST_MAS6(r4) + lwz r3, VCPU_SHARED_MAS0(r11) + lwz r5, VCPU_SHARED_MAS1(r11) +#ifndef CONFIG_64BIT + lwz r6, (VCPU_SHARED_MAS2 + 4)(r11) +#else + ld r6, (VCPU_SHARED_MAS2)(r11) +#endif + lwz r7, VCPU_SHARED_MAS7_3+4(r11) + lwz r8, VCPU_SHARED_MAS4(r11) + mtspr SPRN_MAS0, r3 + mtspr SPRN_MAS1, r5 + mtspr SPRN_MAS2, r6 + mtspr SPRN_MAS3, r7 + mtspr SPRN_MAS4, r8 + lwz r3, VCPU_SHARED_MAS6(r11) + lwz r5, VCPU_SHARED_MAS7_3+0(r11) + mtspr SPRN_MAS6, r3 + mtspr SPRN_MAS7, r5 + /* Disable MAS register updates via exception */ + mfspr r3, SPRN_EPCR + oris r3, r3, SPRN_EPCR_DMIUH@h + mtspr SPRN_EPCR, r3 + + /* + * Host interrupt handlers may have clobbered these guest-readable + * SPRGs, so we need to reload them here with the guest's values. + */ + lwz r3, VCPU_VRSAVE(r4) + lwz r5, VCPU_SHARED_SPRG4(r11) + mtspr SPRN_VRSAVE, r3 + lwz r6, VCPU_SHARED_SPRG5(r11) + mtspr SPRN_SPRG4W, r5 + lwz r7, VCPU_SHARED_SPRG6(r11) + mtspr SPRN_SPRG5W, r6 + lwz r8, VCPU_SHARED_SPRG7(r11) + mtspr SPRN_SPRG6W, r7 + mtspr SPRN_SPRG7W, r8 + + /* Load some guest volatiles. */ + PPC_LL r3, VCPU_LR(r4) + PPC_LL r5, VCPU_XER(r4) + PPC_LL r6, VCPU_CTR(r4) + PPC_LL r7, VCPU_CR(r4) + PPC_LL r8, VCPU_PC(r4) +#ifndef CONFIG_64BIT + lwz r9, (VCPU_SHARED_MSR + 4)(r11) +#else + ld r9, (VCPU_SHARED_MSR)(r11) +#endif + PPC_LL r0, VCPU_GPR(r0)(r4) + PPC_LL r1, VCPU_GPR(r1)(r4) + PPC_LL r2, VCPU_GPR(r2)(r4) + PPC_LL r10, VCPU_GPR(r10)(r4) + PPC_LL r11, VCPU_GPR(r11)(r4) + PPC_LL r12, VCPU_GPR(r12)(r4) + PPC_LL r13, VCPU_GPR(r13)(r4) + mtlr r3 + mtxer r5 + mtctr r6 + mtcr r7 + mtsrr0 r8 + mtsrr1 r9 + +#ifdef CONFIG_KVM_EXIT_TIMING + /* save enter time */ +1: + mfspr r6, SPRN_TBRU + mfspr r7, SPRN_TBRL + mfspr r8, SPRN_TBRU + cmpw r8, r6 + PPC_STL r7, VCPU_TIMING_LAST_ENTER_TBL(r4) + bne 1b + PPC_STL r8, VCPU_TIMING_LAST_ENTER_TBU(r4) +#endif + + /* Finish loading guest volatiles and jump to guest. */ + PPC_LL r5, VCPU_GPR(r5)(r4) + PPC_LL r6, VCPU_GPR(r6)(r4) + PPC_LL r7, VCPU_GPR(r7)(r4) + PPC_LL r8, VCPU_GPR(r8)(r4) + PPC_LL r9, VCPU_GPR(r9)(r4) + + PPC_LL r3, VCPU_GPR(r3)(r4) + PPC_LL r4, VCPU_GPR(r4)(r4) + rfi diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index cd53e08..6a530e4 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -114,6 +114,11 @@ int kvmppc_sanity_check(struct kvm_vcpu *vcpu) goto out; #endif +#ifdef CONFIG_KVM_BOOKE_HV + if (!cpu_has_feature(CPU_FTR_EMB_HV)) + goto out; +#endif + r = true; out: diff --git a/arch/powerpc/kvm/timing.h b/arch/powerpc/kvm/timing.h index 8167d42..bf191e7 100644 --- a/arch/powerpc/kvm/timing.h +++ b/arch/powerpc/kvm/timing.h @@ -93,6 +93,12 @@ static inline void kvmppc_account_exit_stat(struct kvm_vcpu *vcpu, int type) case SIGNAL_EXITS: vcpu->stat.signal_exits++; break; + case DBELL_EXITS: + vcpu->stat.dbell_exits++; + break; + case GDBELL_EXITS: + vcpu->stat.gdbell_exits++; + break; } } -- cgit v0.10.2 From 8fae845f4956de0becc115e926d33eff46722e94 Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Tue, 20 Dec 2011 15:34:45 +0000 Subject: KVM: PPC: booke: standard PPC floating point support e500mc has a normal PPC FPU, rather than SPE which is found on e500v1/v2. Based on code from Liu Yu . Signed-off-by: Scott Wood Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity diff --git a/arch/powerpc/include/asm/switch_to.h b/arch/powerpc/include/asm/switch_to.h index caf82d0..1622c35 100644 --- a/arch/powerpc/include/asm/switch_to.h +++ b/arch/powerpc/include/asm/switch_to.h @@ -17,6 +17,7 @@ extern struct task_struct *_switch(struct thread_struct *prev, struct thread_struct *next); extern void giveup_fpu(struct task_struct *); +extern void load_up_fpu(void); extern void disable_kernel_fp(void); extern void enable_kernel_fp(void); extern void flush_fp_to_thread(struct task_struct *); diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 75dbaeb..0b77be1 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -457,6 +457,11 @@ void kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu) int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) { int ret; +#ifdef CONFIG_PPC_FPU + unsigned int fpscr; + int fpexc_mode; + u64 fpr[32]; +#endif if (!vcpu->arch.sane) { kvm_run->exit_reason = KVM_EXIT_INTERNAL_ERROR; @@ -479,7 +484,46 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) } kvm_guest_enter(); + +#ifdef CONFIG_PPC_FPU + /* Save userspace FPU state in stack */ + enable_kernel_fp(); + memcpy(fpr, current->thread.fpr, sizeof(current->thread.fpr)); + fpscr = current->thread.fpscr.val; + fpexc_mode = current->thread.fpexc_mode; + + /* Restore guest FPU state to thread */ + memcpy(current->thread.fpr, vcpu->arch.fpr, sizeof(vcpu->arch.fpr)); + current->thread.fpscr.val = vcpu->arch.fpscr; + + /* + * Since we can't trap on MSR_FP in GS-mode, we consider the guest + * as always using the FPU. Kernel usage of FP (via + * enable_kernel_fp()) in this thread must not occur while + * vcpu->fpu_active is set. + */ + vcpu->fpu_active = 1; + + kvmppc_load_guest_fp(vcpu); +#endif + ret = __kvmppc_vcpu_run(kvm_run, vcpu); + +#ifdef CONFIG_PPC_FPU + kvmppc_save_guest_fp(vcpu); + + vcpu->fpu_active = 0; + + /* Save guest FPU state from thread */ + memcpy(vcpu->arch.fpr, current->thread.fpr, sizeof(vcpu->arch.fpr)); + vcpu->arch.fpscr = current->thread.fpscr.val; + + /* Restore userspace FPU state from stack */ + memcpy(current->thread.fpr, fpr, sizeof(current->thread.fpr)); + current->thread.fpscr.val = fpscr; + current->thread.fpexc_mode = fpexc_mode; +#endif + kvm_guest_exit(); out: diff --git a/arch/powerpc/kvm/booke.h b/arch/powerpc/kvm/booke.h index d53bcf2..62c4fe5 100644 --- a/arch/powerpc/kvm/booke.h +++ b/arch/powerpc/kvm/booke.h @@ -23,6 +23,7 @@ #include #include #include +#include #include "timing.h" /* interrupt priortity ordering */ @@ -96,4 +97,34 @@ enum int_class { void kvmppc_set_pending_interrupt(struct kvm_vcpu *vcpu, enum int_class type); +/* + * Load up guest vcpu FP state if it's needed. + * It also set the MSR_FP in thread so that host know + * we're holding FPU, and then host can help to save + * guest vcpu FP state if other threads require to use FPU. + * This simulates an FP unavailable fault. + * + * It requires to be called with preemption disabled. + */ +static inline void kvmppc_load_guest_fp(struct kvm_vcpu *vcpu) +{ +#ifdef CONFIG_PPC_FPU + if (vcpu->fpu_active && !(current->thread.regs->msr & MSR_FP)) { + load_up_fpu(); + current->thread.regs->msr |= MSR_FP; + } +#endif +} + +/* + * Save guest vcpu FP state into thread. + * It requires to be called with preemption disabled. + */ +static inline void kvmppc_save_guest_fp(struct kvm_vcpu *vcpu) +{ +#ifdef CONFIG_PPC_FPU + if (vcpu->fpu_active && (current->thread.regs->msr & MSR_FP)) + giveup_fpu(current); +#endif +} #endif /* __KVM_BOOKE_H__ */ -- cgit v0.10.2 From 73196cd364a2d972d73fa08da9d81ca3215bed68 Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Tue, 20 Dec 2011 15:34:47 +0000 Subject: KVM: PPC: e500mc support Add processor support for e500mc, using hardware virtualization support (GS-mode). Current issues include: - No support for external proxy (coreint) interrupt mode in the guest. Includes work by Ashish Kalra , Varun Sethi , and Liu Yu . Signed-off-by: Scott Wood Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index 7108a9c..67c34af 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -168,6 +168,7 @@ extern const char *powerpc_base_platform; #define CPU_FTR_LWSYNC ASM_CONST(0x0000000008000000) #define CPU_FTR_NOEXECUTE ASM_CONST(0x0000000010000000) #define CPU_FTR_INDEXED_DCR ASM_CONST(0x0000000020000000) +#define CPU_FTR_EMB_HV ASM_CONST(0x0000000040000000) /* * Add the 64-bit processor unique features in the top half of the word; @@ -386,11 +387,11 @@ extern const char *powerpc_base_platform; CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE) #define CPU_FTRS_E500MC (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | \ CPU_FTR_L2CSR | CPU_FTR_LWSYNC | CPU_FTR_NOEXECUTE | \ - CPU_FTR_DBELL | CPU_FTR_DEBUG_LVL_EXC) + CPU_FTR_DBELL | CPU_FTR_DEBUG_LVL_EXC | CPU_FTR_EMB_HV) #define CPU_FTRS_E5500 (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | \ CPU_FTR_L2CSR | CPU_FTR_LWSYNC | CPU_FTR_NOEXECUTE | \ CPU_FTR_DBELL | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \ - CPU_FTR_DEBUG_LVL_EXC) + CPU_FTR_DEBUG_LVL_EXC | CPU_FTR_EMB_HV) #define CPU_FTRS_E6500 (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | \ CPU_FTR_L2CSR | CPU_FTR_LWSYNC | CPU_FTR_NOEXECUTE | \ CPU_FTR_DBELL | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \ @@ -539,6 +540,7 @@ enum { #ifdef CONFIG_PPC_E500MC CPU_FTRS_E500MC & CPU_FTRS_E5500 & CPU_FTRS_E6500 & #endif + ~CPU_FTR_EMB_HV & /* can be removed at runtime */ CPU_FTRS_POSSIBLE, }; #endif /* __powerpc64__ */ diff --git a/arch/powerpc/include/asm/kvm.h b/arch/powerpc/include/asm/kvm.h index b921c3f..1bea4d8 100644 --- a/arch/powerpc/include/asm/kvm.h +++ b/arch/powerpc/include/asm/kvm.h @@ -277,6 +277,7 @@ struct kvm_sync_regs { #define KVM_CPU_E500V2 2 #define KVM_CPU_3S_32 3 #define KVM_CPU_3S_64 4 +#define KVM_CPU_E500MC 5 /* for KVM_CAP_SPAPR_TCE */ struct kvm_create_spapr_tce { diff --git a/arch/powerpc/kernel/cpu_setup_fsl_booke.S b/arch/powerpc/kernel/cpu_setup_fsl_booke.S index 8053db0..69fdd23 100644 --- a/arch/powerpc/kernel/cpu_setup_fsl_booke.S +++ b/arch/powerpc/kernel/cpu_setup_fsl_booke.S @@ -73,6 +73,7 @@ _GLOBAL(__setup_cpu_e500v2) mtlr r4 blr _GLOBAL(__setup_cpu_e500mc) + mr r5, r4 mflr r4 bl __e500_icache_setup bl __e500_dcache_setup diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index 7c406dd..89c6d6f 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -380,10 +380,16 @@ interrupt_base: mtspr SPRN_SPRG_WSCRATCH0, r10 /* Save some working registers */ mfspr r10, SPRN_SPRG_THREAD stw r11, THREAD_NORMSAVE(0)(r10) +#ifdef CONFIG_KVM_BOOKE_HV +BEGIN_FTR_SECTION + mfspr r11, SPRN_SRR1 +END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV) +#endif stw r12, THREAD_NORMSAVE(1)(r10) stw r13, THREAD_NORMSAVE(2)(r10) mfcr r13 stw r13, THREAD_NORMSAVE(3)(r10) + DO_KVM BOOKE_INTERRUPT_DTLB_MISS SPRN_SRR1 mfspr r10, SPRN_DEAR /* Get faulting address */ /* If we are faulting a kernel address, we have to use the @@ -468,10 +474,16 @@ interrupt_base: mtspr SPRN_SPRG_WSCRATCH0, r10 /* Save some working registers */ mfspr r10, SPRN_SPRG_THREAD stw r11, THREAD_NORMSAVE(0)(r10) +#ifdef CONFIG_KVM_BOOKE_HV +BEGIN_FTR_SECTION + mfspr r11, SPRN_SRR1 +END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV) +#endif stw r12, THREAD_NORMSAVE(1)(r10) stw r13, THREAD_NORMSAVE(2)(r10) mfcr r13 stw r13, THREAD_NORMSAVE(3)(r10) + DO_KVM BOOKE_INTERRUPT_ITLB_MISS SPRN_SRR1 mfspr r10, SPRN_SRR0 /* Get faulting address */ /* If we are faulting a kernel address, we have to use the @@ -580,6 +592,17 @@ interrupt_base: DEBUG_DEBUG_EXCEPTION DEBUG_CRIT_EXCEPTION + GUEST_DOORBELL_EXCEPTION + + CRITICAL_EXCEPTION(0, GUEST_DBELL_CRIT, CriticalGuestDoorbell, \ + unknown_exception) + + /* Hypercall */ + EXCEPTION(0, HV_SYSCALL, Hypercall, unknown_exception, EXC_XFER_EE) + + /* Embedded Hypervisor Privilege */ + EXCEPTION(0, HV_PRIV, Ehvpriv, unknown_exception, EXC_XFER_EE) + /* * Local functions */ @@ -883,8 +906,31 @@ _GLOBAL(__setup_e500mc_ivors) mtspr SPRN_IVOR36,r3 li r3,CriticalDoorbell@l mtspr SPRN_IVOR37,r3 + + /* + * We only want to touch IVOR38-41 if we're running on hardware + * that supports category E.HV. The architectural way to determine + * this is MMUCFG[LPIDSIZE]. + */ + mfspr r3, SPRN_MMUCFG + andis. r3, r3, MMUCFG_LPIDSIZE@h + beq no_hv + li r3,GuestDoorbell@l + mtspr SPRN_IVOR38,r3 + li r3,CriticalGuestDoorbell@l + mtspr SPRN_IVOR39,r3 + li r3,Hypercall@l + mtspr SPRN_IVOR40,r3 + li r3,Ehvpriv@l + mtspr SPRN_IVOR41,r3 +skip_hv_ivors: sync blr +no_hv: + lwz r3, CPU_SPEC_FEATURES(r5) + rlwinm r3, r3, 0, ~CPU_FTR_EMB_HV + stw r3, CPU_SPEC_FEATURES(r5) + b skip_hv_ivors /* * extern void giveup_altivec(struct task_struct *prev) diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index 2c33cd3..58f6e68 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig @@ -109,7 +109,7 @@ config KVM_440 config KVM_EXIT_TIMING bool "Detailed exit timing" - depends on KVM_440 || KVM_E500 + depends on KVM_440 || KVM_E500 || KVM_E500MC ---help--- Calculate elapsed time for every exit/enter cycle. A per-vcpu report is available in debugfs kvm/vm#_vcpu#_timing. @@ -132,6 +132,21 @@ config KVM_E500 If unsure, say N. +config KVM_E500MC + bool "KVM support for PowerPC E500MC/E5500 processors" + depends on EXPERIMENTAL && PPC_E500MC + select KVM + select KVM_MMIO + select KVM_BOOKE_HV + ---help--- + Support running unmodified E500MC/E5500 (32-bit) guest kernels in + virtual machines on E500MC/E5500 host processors. + + This module provides access to the hardware capabilities through + a character device node named /dev/kvm. + + If unsure, say N. + source drivers/vhost/Kconfig endif # VIRTUALIZATION diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile index 3688aee..62febd7 100644 --- a/arch/powerpc/kvm/Makefile +++ b/arch/powerpc/kvm/Makefile @@ -38,6 +38,16 @@ kvm-e500-objs := \ e500_emulate.o kvm-objs-$(CONFIG_KVM_E500) := $(kvm-e500-objs) +kvm-e500mc-objs := \ + $(common-objs-y) \ + booke.o \ + booke_emulate.o \ + bookehv_interrupts.o \ + e500mc.o \ + e500_tlb.o \ + e500_emulate.o +kvm-objs-$(CONFIG_KVM_E500MC) := $(kvm-e500mc-objs) + kvm-book3s_64-objs-$(CONFIG_KVM_BOOK3S_64_PR) := \ ../../../virt/kvm/coalesced_mmio.o \ fpu.o \ @@ -89,6 +99,7 @@ kvm-objs := $(kvm-objs-m) $(kvm-objs-y) obj-$(CONFIG_KVM_440) += kvm.o obj-$(CONFIG_KVM_E500) += kvm.o +obj-$(CONFIG_KVM_E500MC) += kvm.o obj-$(CONFIG_KVM_BOOK3S_64) += kvm.o obj-$(CONFIG_KVM_BOOK3S_32) += kvm.o diff --git a/arch/powerpc/kvm/e500.h b/arch/powerpc/kvm/e500.h index ce3f163..3143085 100644 --- a/arch/powerpc/kvm/e500.h +++ b/arch/powerpc/kvm/e500.h @@ -2,7 +2,9 @@ * Copyright (C) 2008-2011 Freescale Semiconductor, Inc. All rights reserved. * * Author: Yu Liu + * Scott Wood * Ashish Kalra + * Varun Sethi * * Description: * This file is based on arch/powerpc/kvm/44x_tlb.h and @@ -100,6 +102,7 @@ static inline struct kvmppc_vcpu_e500 *to_e500(struct kvm_vcpu *vcpu) return container_of(vcpu, struct kvmppc_vcpu_e500, vcpu); } + /* This geometry is the legacy default -- can be overridden by userspace */ #define KVM_E500_TLB0_WAY_SIZE 128 #define KVM_E500_TLB0_WAY_NUM 2 @@ -250,10 +253,12 @@ static inline int tlbe_is_host_safe(const struct kvm_vcpu *vcpu, if (!get_tlb_v(tlbe)) return 0; +#ifndef CONFIG_KVM_BOOKE_HV /* Does it match current guest AS? */ /* XXX what about IS != DS? */ if (get_tlb_ts(tlbe) != !!(vcpu->arch.shared->msr & MSR_IS)) return 0; +#endif gpa = get_tlb_raddr(tlbe); if (!gfn_to_memslot(vcpu->kvm, gpa >> PAGE_SHIFT)) @@ -274,7 +279,11 @@ void kvmppc_e500_tlbil_one(struct kvmppc_vcpu_e500 *vcpu_e500, struct kvm_book3e_206_tlb_entry *gtlbe); void kvmppc_e500_tlbil_all(struct kvmppc_vcpu_e500 *vcpu_e500); -#ifdef CONFIG_KVM_E500 +#ifdef CONFIG_KVM_BOOKE_HV +#define kvmppc_e500_get_tlb_stid(vcpu, gtlbe) get_tlb_tid(gtlbe) +#define get_tlbmiss_tid(vcpu) get_cur_pid(vcpu) +#define get_tlb_sts(gtlbe) (gtlbe->mas1 & MAS1_TS) +#else unsigned int kvmppc_e500_get_tlb_stid(struct kvm_vcpu *vcpu, struct kvm_book3e_206_tlb_entry *gtlbe); @@ -288,6 +297,6 @@ static inline unsigned int get_tlbmiss_tid(struct kvm_vcpu *vcpu) /* Force TS=1 for all guest mappings. */ #define get_tlb_sts(gtlbe) (MAS1_TS) -#endif /* CONFIG_KVM_E500 */ +#endif /* !BOOKE_HV */ #endif /* KVM_E500_H */ diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c index af02c18..98b6c1c 100644 --- a/arch/powerpc/kvm/e500_emulate.c +++ b/arch/powerpc/kvm/e500_emulate.c @@ -85,6 +85,7 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) ulong spr_val = kvmppc_get_gpr(vcpu, rs); switch (sprn) { +#ifndef CONFIG_KVM_BOOKE_HV case SPRN_PID: kvmppc_set_pid(vcpu, spr_val); break; @@ -114,6 +115,7 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) vcpu->arch.shared->mas7_3 &= (u64)0xffffffff; vcpu->arch.shared->mas7_3 |= (u64)spr_val << 32; break; +#endif case SPRN_L1CSR0: vcpu_e500->l1csr0 = spr_val; vcpu_e500->l1csr0 &= ~(L1CSR0_DCFI | L1CSR0_CLFC); @@ -143,7 +145,14 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) case SPRN_IVOR35: vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR] = spr_val; break; - +#ifdef CONFIG_KVM_BOOKE_HV + case SPRN_IVOR36: + vcpu->arch.ivor[BOOKE_IRQPRIO_DBELL] = spr_val; + break; + case SPRN_IVOR37: + vcpu->arch.ivor[BOOKE_IRQPRIO_DBELL_CRIT] = spr_val; + break; +#endif default: emulated = kvmppc_booke_emulate_mtspr(vcpu, sprn, rs); } @@ -155,9 +164,11 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) { struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); int emulated = EMULATE_DONE; - unsigned long val; switch (sprn) { +#ifndef CONFIG_KVM_BOOKE_HV + unsigned long val; + case SPRN_PID: kvmppc_set_gpr(vcpu, rt, vcpu_e500->pid[0]); break; case SPRN_PID1: @@ -182,6 +193,7 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) val = vcpu->arch.shared->mas7_3 >> 32; kvmppc_set_gpr(vcpu, rt, val); break; +#endif case SPRN_TLB0CFG: kvmppc_set_gpr(vcpu, rt, vcpu->arch.tlbcfg[0]); break; case SPRN_TLB1CFG: @@ -216,6 +228,14 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) case SPRN_IVOR35: kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR]); break; +#ifdef CONFIG_KVM_BOOKE_HV + case SPRN_IVOR36: + kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_DBELL]); + break; + case SPRN_IVOR37: + kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_DBELL_CRIT]); + break; +#endif default: emulated = kvmppc_booke_emulate_mfspr(vcpu, sprn, rt); } diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c index 6eb5d65..e232bb4 100644 --- a/arch/powerpc/kvm/e500_tlb.c +++ b/arch/powerpc/kvm/e500_tlb.c @@ -2,7 +2,9 @@ * Copyright (C) 2008-2011 Freescale Semiconductor, Inc. All rights reserved. * * Author: Yu Liu, yu.liu@freescale.com + * Scott Wood, scottwood@freescale.com * Ashish Kalra, ashish.kalra@freescale.com + * Varun Sethi, varun.sethi@freescale.com * * Description: * This file is based on arch/powerpc/kvm/44x_tlb.c, @@ -64,6 +66,7 @@ static inline u32 e500_shadow_mas3_attrib(u32 mas3, int usermode) /* Mask off reserved bits. */ mas3 &= MAS3_ATTRIB_MASK; +#ifndef CONFIG_KVM_BOOKE_HV if (!usermode) { /* Guest is in supervisor mode, * so we need to translate guest @@ -71,8 +74,9 @@ static inline u32 e500_shadow_mas3_attrib(u32 mas3, int usermode) mas3 &= ~E500_TLB_USER_PERM_MASK; mas3 |= (mas3 & E500_TLB_SUPER_PERM_MASK) << 1; } - - return mas3 | E500_TLB_SUPER_PERM_MASK; + mas3 |= E500_TLB_SUPER_PERM_MASK; +#endif + return mas3; } static inline u32 e500_shadow_mas2_attrib(u32 mas2, int usermode) @@ -98,7 +102,16 @@ static inline void __write_host_tlbe(struct kvm_book3e_206_tlb_entry *stlbe, mtspr(SPRN_MAS2, (unsigned long)stlbe->mas2); mtspr(SPRN_MAS3, (u32)stlbe->mas7_3); mtspr(SPRN_MAS7, (u32)(stlbe->mas7_3 >> 32)); +#ifdef CONFIG_KVM_BOOKE_HV + mtspr(SPRN_MAS8, stlbe->mas8); +#endif asm volatile("isync; tlbwe" : : : "memory"); + +#ifdef CONFIG_KVM_BOOKE_HV + /* Must clear mas8 for other host tlbwe's */ + mtspr(SPRN_MAS8, 0); + isync(); +#endif local_irq_restore(flags); trace_kvm_booke206_stlb_write(mas0, stlbe->mas8, stlbe->mas1, @@ -384,6 +397,10 @@ static inline void kvmppc_e500_setup_stlbe( e500_shadow_mas2_attrib(gtlbe->mas2, pr); stlbe->mas7_3 = ((u64)pfn << PAGE_SHIFT) | e500_shadow_mas3_attrib(gtlbe->mas7_3, pr); + +#ifdef CONFIG_KVM_BOOKE_HV + stlbe->mas8 = MAS8_TGS | vcpu->kvm->arch.lpid; +#endif } static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c new file mode 100644 index 0000000..fe6c1de --- /dev/null +++ b/arch/powerpc/kvm/e500mc.c @@ -0,0 +1,342 @@ +/* + * Copyright (C) 2010 Freescale Semiconductor, Inc. All rights reserved. + * + * Author: Varun Sethi, + * + * Description: + * This file is derived from arch/powerpc/kvm/e500.c, + * by Yu Liu . + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "booke.h" +#include "e500.h" + +void kvmppc_set_pending_interrupt(struct kvm_vcpu *vcpu, enum int_class type) +{ + enum ppc_dbell dbell_type; + unsigned long tag; + + switch (type) { + case INT_CLASS_NONCRIT: + dbell_type = PPC_G_DBELL; + break; + case INT_CLASS_CRIT: + dbell_type = PPC_G_DBELL_CRIT; + break; + case INT_CLASS_MC: + dbell_type = PPC_G_DBELL_MC; + break; + default: + WARN_ONCE(1, "%s: unknown int type %d\n", __func__, type); + return; + } + + + tag = PPC_DBELL_LPID(vcpu->kvm->arch.lpid) | vcpu->vcpu_id; + mb(); + ppc_msgsnd(dbell_type, 0, tag); +} + +/* gtlbe must not be mapped by more than one host tlb entry */ +void kvmppc_e500_tlbil_one(struct kvmppc_vcpu_e500 *vcpu_e500, + struct kvm_book3e_206_tlb_entry *gtlbe) +{ + unsigned int tid, ts; + u32 val, eaddr, lpid; + unsigned long flags; + + ts = get_tlb_ts(gtlbe); + tid = get_tlb_tid(gtlbe); + lpid = vcpu_e500->vcpu.kvm->arch.lpid; + + /* We search the host TLB to invalidate its shadow TLB entry */ + val = (tid << 16) | ts; + eaddr = get_tlb_eaddr(gtlbe); + + local_irq_save(flags); + + mtspr(SPRN_MAS6, val); + mtspr(SPRN_MAS5, MAS5_SGS | lpid); + + asm volatile("tlbsx 0, %[eaddr]\n" : : [eaddr] "r" (eaddr)); + val = mfspr(SPRN_MAS1); + if (val & MAS1_VALID) { + mtspr(SPRN_MAS1, val & ~MAS1_VALID); + asm volatile("tlbwe"); + } + mtspr(SPRN_MAS5, 0); + /* NOTE: tlbsx also updates mas8, so clear it for host tlbwe */ + mtspr(SPRN_MAS8, 0); + isync(); + + local_irq_restore(flags); +} + +void kvmppc_e500_tlbil_all(struct kvmppc_vcpu_e500 *vcpu_e500) +{ + unsigned long flags; + + local_irq_save(flags); + mtspr(SPRN_MAS5, MAS5_SGS | vcpu_e500->vcpu.kvm->arch.lpid); + asm volatile("tlbilxlpid"); + mtspr(SPRN_MAS5, 0); + local_irq_restore(flags); +} + +void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid) +{ + vcpu->arch.pid = pid; +} + +void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr) +{ +} + +void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + + kvmppc_booke_vcpu_load(vcpu, cpu); + + mtspr(SPRN_LPID, vcpu->kvm->arch.lpid); + mtspr(SPRN_EPCR, vcpu->arch.shadow_epcr); + mtspr(SPRN_GPIR, vcpu->vcpu_id); + mtspr(SPRN_MSRP, vcpu->arch.shadow_msrp); + mtspr(SPRN_EPLC, vcpu->arch.eplc); + mtspr(SPRN_EPSC, vcpu->arch.epsc); + + mtspr(SPRN_GIVPR, vcpu->arch.ivpr); + mtspr(SPRN_GIVOR2, vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE]); + mtspr(SPRN_GIVOR8, vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL]); + mtspr(SPRN_GSPRG0, (unsigned long)vcpu->arch.shared->sprg0); + mtspr(SPRN_GSPRG1, (unsigned long)vcpu->arch.shared->sprg1); + mtspr(SPRN_GSPRG2, (unsigned long)vcpu->arch.shared->sprg2); + mtspr(SPRN_GSPRG3, (unsigned long)vcpu->arch.shared->sprg3); + + mtspr(SPRN_GSRR0, vcpu->arch.shared->srr0); + mtspr(SPRN_GSRR1, vcpu->arch.shared->srr1); + + mtspr(SPRN_GEPR, vcpu->arch.epr); + mtspr(SPRN_GDEAR, vcpu->arch.shared->dar); + mtspr(SPRN_GESR, vcpu->arch.shared->esr); + + if (vcpu->arch.oldpir != mfspr(SPRN_PIR)) + kvmppc_e500_tlbil_all(vcpu_e500); + + kvmppc_load_guest_fp(vcpu); +} + +void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) +{ + vcpu->arch.eplc = mfspr(SPRN_EPLC); + vcpu->arch.epsc = mfspr(SPRN_EPSC); + + vcpu->arch.shared->sprg0 = mfspr(SPRN_GSPRG0); + vcpu->arch.shared->sprg1 = mfspr(SPRN_GSPRG1); + vcpu->arch.shared->sprg2 = mfspr(SPRN_GSPRG2); + vcpu->arch.shared->sprg3 = mfspr(SPRN_GSPRG3); + + vcpu->arch.shared->srr0 = mfspr(SPRN_GSRR0); + vcpu->arch.shared->srr1 = mfspr(SPRN_GSRR1); + + vcpu->arch.epr = mfspr(SPRN_GEPR); + vcpu->arch.shared->dar = mfspr(SPRN_GDEAR); + vcpu->arch.shared->esr = mfspr(SPRN_GESR); + + vcpu->arch.oldpir = mfspr(SPRN_PIR); + + kvmppc_booke_vcpu_put(vcpu); +} + +int kvmppc_core_check_processor_compat(void) +{ + int r; + + if (strcmp(cur_cpu_spec->cpu_name, "e500mc") == 0) + r = 0; + else if (strcmp(cur_cpu_spec->cpu_name, "e5500") == 0) + r = 0; + else + r = -ENOTSUPP; + + return r; +} + +int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + + vcpu->arch.shadow_epcr = SPRN_EPCR_DSIGS | SPRN_EPCR_DGTMI | \ + SPRN_EPCR_DUVD; + vcpu->arch.shadow_msrp = MSRP_UCLEP | MSRP_DEP | MSRP_PMMP; + vcpu->arch.eplc = EPC_EGS | (vcpu->kvm->arch.lpid << EPC_ELPID_SHIFT); + vcpu->arch.epsc = vcpu->arch.eplc; + + vcpu->arch.pvr = mfspr(SPRN_PVR); + vcpu_e500->svr = mfspr(SPRN_SVR); + + vcpu->arch.cpu_type = KVM_CPU_E500MC; + + return 0; +} + +void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + + sregs->u.e.features |= KVM_SREGS_E_ARCH206_MMU | KVM_SREGS_E_PM | + KVM_SREGS_E_PC; + sregs->u.e.impl_id = KVM_SREGS_E_IMPL_FSL; + + sregs->u.e.impl.fsl.features = 0; + sregs->u.e.impl.fsl.svr = vcpu_e500->svr; + sregs->u.e.impl.fsl.hid0 = vcpu_e500->hid0; + sregs->u.e.impl.fsl.mcar = vcpu_e500->mcar; + + kvmppc_get_sregs_e500_tlb(vcpu, sregs); + + sregs->u.e.ivor_high[3] = + vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR]; + sregs->u.e.ivor_high[4] = vcpu->arch.ivor[BOOKE_IRQPRIO_DBELL]; + sregs->u.e.ivor_high[5] = vcpu->arch.ivor[BOOKE_IRQPRIO_DBELL_CRIT]; + + kvmppc_get_sregs_ivor(vcpu, sregs); +} + +int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + int ret; + + if (sregs->u.e.impl_id == KVM_SREGS_E_IMPL_FSL) { + vcpu_e500->svr = sregs->u.e.impl.fsl.svr; + vcpu_e500->hid0 = sregs->u.e.impl.fsl.hid0; + vcpu_e500->mcar = sregs->u.e.impl.fsl.mcar; + } + + ret = kvmppc_set_sregs_e500_tlb(vcpu, sregs); + if (ret < 0) + return ret; + + if (!(sregs->u.e.features & KVM_SREGS_E_IVOR)) + return 0; + + if (sregs->u.e.features & KVM_SREGS_E_PM) { + vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR] = + sregs->u.e.ivor_high[3]; + } + + if (sregs->u.e.features & KVM_SREGS_E_PC) { + vcpu->arch.ivor[BOOKE_IRQPRIO_DBELL] = + sregs->u.e.ivor_high[4]; + vcpu->arch.ivor[BOOKE_IRQPRIO_DBELL_CRIT] = + sregs->u.e.ivor_high[5]; + } + + return kvmppc_set_sregs_ivor(vcpu, sregs); +} + +struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) +{ + struct kvmppc_vcpu_e500 *vcpu_e500; + struct kvm_vcpu *vcpu; + int err; + + vcpu_e500 = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); + if (!vcpu_e500) { + err = -ENOMEM; + goto out; + } + vcpu = &vcpu_e500->vcpu; + + /* Invalid PIR value -- this LPID dosn't have valid state on any cpu */ + vcpu->arch.oldpir = 0xffffffff; + + err = kvm_vcpu_init(vcpu, kvm, id); + if (err) + goto free_vcpu; + + err = kvmppc_e500_tlb_init(vcpu_e500); + if (err) + goto uninit_vcpu; + + vcpu->arch.shared = (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO); + if (!vcpu->arch.shared) + goto uninit_tlb; + + return vcpu; + +uninit_tlb: + kvmppc_e500_tlb_uninit(vcpu_e500); +uninit_vcpu: + kvm_vcpu_uninit(vcpu); + +free_vcpu: + kmem_cache_free(kvm_vcpu_cache, vcpu_e500); +out: + return ERR_PTR(err); +} + +void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + + free_page((unsigned long)vcpu->arch.shared); + kvmppc_e500_tlb_uninit(vcpu_e500); + kvm_vcpu_uninit(vcpu); + kmem_cache_free(kvm_vcpu_cache, vcpu_e500); +} + +int kvmppc_core_init_vm(struct kvm *kvm) +{ + int lpid; + + lpid = kvmppc_alloc_lpid(); + if (lpid < 0) + return lpid; + + kvm->arch.lpid = lpid; + return 0; +} + +void kvmppc_core_destroy_vm(struct kvm *kvm) +{ + kvmppc_free_lpid(kvm->arch.lpid); +} + +static int __init kvmppc_e500mc_init(void) +{ + int r; + + r = kvmppc_booke_init(); + if (r) + return r; + + kvmppc_init_lpid(64); + kvmppc_claim_lpid(0); /* host */ + + return kvm_init(NULL, sizeof(struct kvmppc_vcpu_e500), 0, THIS_MODULE); +} + +static void __exit kvmppc_e500mc_exit(void) +{ + kvmppc_booke_exit(); +} + +module_init(kvmppc_e500mc_init); +module_exit(kvmppc_e500mc_exit); diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 6a530e4..14bf8d5 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -235,7 +235,7 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_PPC_PAIRED_SINGLES: case KVM_CAP_PPC_OSI: case KVM_CAP_PPC_GET_PVINFO: -#ifdef CONFIG_KVM_E500 +#if defined(CONFIG_KVM_E500) || defined(CONFIG_KVM_E500MC) case KVM_CAP_SW_TLB: #endif r = 1; @@ -629,7 +629,7 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, r = 0; vcpu->arch.papr_enabled = true; break; -#ifdef CONFIG_KVM_E500 +#if defined(CONFIG_KVM_E500) || defined(CONFIG_KVM_E500MC) case KVM_CAP_SW_TLB: { struct kvm_config_tlb cfg; void __user *user_ptr = (void __user *)(uintptr_t)cap->args[0]; @@ -706,7 +706,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp, break; } -#ifdef CONFIG_KVM_E500 +#if defined(CONFIG_KVM_E500) || defined(CONFIG_KVM_E500MC) case KVM_DIRTY_TLB: { struct kvm_dirty_tlb dirty; r = -EFAULT; -- cgit v0.10.2 From 4ab969199ec6a14604ceaffb21fe78cc4881d3b8 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Wed, 15 Feb 2012 13:28:48 +0000 Subject: KVM: PPC: e500mc: Add doorbell emulation support When one vcpu wants to kick another, it can issue a special IPI instruction called msgsnd. This patch emulates this instruction, its clearing counterpart and the infrastructure required to actually trigger that interrupt inside a guest vcpu. With this patch, SMP guests on e500mc work. Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity diff --git a/arch/powerpc/include/asm/dbell.h b/arch/powerpc/include/asm/dbell.h index d7365b0..154c067 100644 --- a/arch/powerpc/include/asm/dbell.h +++ b/arch/powerpc/include/asm/dbell.h @@ -19,7 +19,9 @@ #define PPC_DBELL_MSG_BRDCAST (0x04000000) #define PPC_DBELL_TYPE(x) (((x) & 0xf) << (63-36)) +#define PPC_DBELL_TYPE_MASK PPC_DBELL_TYPE(0xf) #define PPC_DBELL_LPID(x) ((x) << (63 - 49)) +#define PPC_DBELL_PIR_MASK 0x3fff enum ppc_dbell { PPC_DBELL = 0, /* doorbell */ PPC_DBELL_CRIT = 1, /* critical doorbell */ diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 0b77be1..85bd5b8 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -326,6 +326,7 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, int_class = INT_CLASS_NONCRIT; break; case BOOKE_IRQPRIO_CRITICAL: + case BOOKE_IRQPRIO_DBELL_CRIT: allowed = vcpu->arch.shared->msr & MSR_CE; allowed = allowed && !crit; msr_mask = MSR_GS | MSR_ME; @@ -342,6 +343,7 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, keep_irq = true; /* fall through */ case BOOKE_IRQPRIO_EXTERNAL: + case BOOKE_IRQPRIO_DBELL: allowed = vcpu->arch.shared->msr & MSR_EE; allowed = allowed && !crit; msr_mask = MSR_GS | MSR_CE | MSR_ME | MSR_DE; diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c index 98b6c1c..99155f8 100644 --- a/arch/powerpc/kvm/e500_emulate.c +++ b/arch/powerpc/kvm/e500_emulate.c @@ -14,16 +14,74 @@ #include #include +#include #include "booke.h" #include "e500.h" +#define XOP_MSGSND 206 +#define XOP_MSGCLR 238 #define XOP_TLBIVAX 786 #define XOP_TLBSX 914 #define XOP_TLBRE 946 #define XOP_TLBWE 978 #define XOP_TLBILX 18 +#ifdef CONFIG_KVM_E500MC +static int dbell2prio(ulong param) +{ + int msg = param & PPC_DBELL_TYPE_MASK; + int prio = -1; + + switch (msg) { + case PPC_DBELL_TYPE(PPC_DBELL): + prio = BOOKE_IRQPRIO_DBELL; + break; + case PPC_DBELL_TYPE(PPC_DBELL_CRIT): + prio = BOOKE_IRQPRIO_DBELL_CRIT; + break; + default: + break; + } + + return prio; +} + +static int kvmppc_e500_emul_msgclr(struct kvm_vcpu *vcpu, int rb) +{ + ulong param = vcpu->arch.gpr[rb]; + int prio = dbell2prio(param); + + if (prio < 0) + return EMULATE_FAIL; + + clear_bit(prio, &vcpu->arch.pending_exceptions); + return EMULATE_DONE; +} + +static int kvmppc_e500_emul_msgsnd(struct kvm_vcpu *vcpu, int rb) +{ + ulong param = vcpu->arch.gpr[rb]; + int prio = dbell2prio(rb); + int pir = param & PPC_DBELL_PIR_MASK; + int i; + struct kvm_vcpu *cvcpu; + + if (prio < 0) + return EMULATE_FAIL; + + kvm_for_each_vcpu(i, cvcpu, vcpu->kvm) { + int cpir = cvcpu->arch.shared->pir; + if ((param & PPC_DBELL_MSG_BRDCAST) || (cpir == pir)) { + set_bit(prio, &cvcpu->arch.pending_exceptions); + kvm_vcpu_kick(cvcpu); + } + } + + return EMULATE_DONE; +} +#endif + int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, unsigned int inst, int *advance) { @@ -36,6 +94,16 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, case 31: switch (get_xop(inst)) { +#ifdef CONFIG_KVM_E500MC + case XOP_MSGSND: + emulated = kvmppc_e500_emul_msgsnd(vcpu, get_rb(inst)); + break; + + case XOP_MSGCLR: + emulated = kvmppc_e500_emul_msgclr(vcpu, get_rb(inst)); + break; +#endif + case XOP_TLBRE: emulated = kvmppc_e500_emul_tlbre(vcpu); break; -- cgit v0.10.2 From 79300f8cb9be201f916d075b3ef2e032d83a0d75 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Wed, 15 Feb 2012 19:12:29 +0000 Subject: KVM: PPC: e500mc: implicitly set MSR_GS When setting MSR for an e500mc guest, we implicitly always set MSR_GS to make sure the guest is in guest state. Since we have this implicit rule there, we don't need to explicitly pass MSR_GS to set_msr(). Remove all explicit setters of MSR_GS. Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 85bd5b8..fcbe928 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -280,7 +280,7 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority) { int allowed = 0; - ulong uninitialized_var(msr_mask); + ulong msr_mask = 0; bool update_esr = false, update_dear = false; ulong crit_raw = vcpu->arch.shared->critical; ulong crit_r1 = kvmppc_get_gpr(vcpu, 1); @@ -322,20 +322,19 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, case BOOKE_IRQPRIO_AP_UNAVAIL: case BOOKE_IRQPRIO_ALIGNMENT: allowed = 1; - msr_mask = MSR_GS | MSR_CE | MSR_ME | MSR_DE; + msr_mask = MSR_CE | MSR_ME | MSR_DE; int_class = INT_CLASS_NONCRIT; break; case BOOKE_IRQPRIO_CRITICAL: case BOOKE_IRQPRIO_DBELL_CRIT: allowed = vcpu->arch.shared->msr & MSR_CE; allowed = allowed && !crit; - msr_mask = MSR_GS | MSR_ME; + msr_mask = MSR_ME; int_class = INT_CLASS_CRIT; break; case BOOKE_IRQPRIO_MACHINE_CHECK: allowed = vcpu->arch.shared->msr & MSR_ME; allowed = allowed && !crit; - msr_mask = MSR_GS; int_class = INT_CLASS_MC; break; case BOOKE_IRQPRIO_DECREMENTER: @@ -346,13 +345,13 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, case BOOKE_IRQPRIO_DBELL: allowed = vcpu->arch.shared->msr & MSR_EE; allowed = allowed && !crit; - msr_mask = MSR_GS | MSR_CE | MSR_ME | MSR_DE; + msr_mask = MSR_CE | MSR_ME | MSR_DE; int_class = INT_CLASS_NONCRIT; break; case BOOKE_IRQPRIO_DEBUG: allowed = vcpu->arch.shared->msr & MSR_DE; allowed = allowed && !crit; - msr_mask = MSR_GS | MSR_ME; + msr_mask = MSR_ME; int_class = INT_CLASS_CRIT; break; } -- cgit v0.10.2 From a2723ce7fe4b99bc2df492067c3f81de2ee89aab Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Wed, 15 Feb 2012 23:06:24 +0000 Subject: KVM: PPC: e500mc: Move r1/r2 restoration very early If we hit any exception whatsoever in the restore path and r1/r2 aren't the host registers, we don't get a working oops. So it's always a good idea to restore them as early as possible. This time, it actually has practical reasons to do so too, since we need to have the host page fault handler fix up our guest instruction read code. And for that to work we need r1/r2 restored. Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity diff --git a/arch/powerpc/kvm/bookehv_interrupts.S b/arch/powerpc/kvm/bookehv_interrupts.S index 9eaeebd..63023ae 100644 --- a/arch/powerpc/kvm/bookehv_interrupts.S +++ b/arch/powerpc/kvm/bookehv_interrupts.S @@ -67,6 +67,12 @@ * saved in vcpu: cr, ctr, r3-r13 */ .macro kvm_handler_common intno, srr0, flags + /* Restore host stack pointer */ + PPC_STL r1, VCPU_GPR(r1)(r4) + PPC_STL r2, VCPU_GPR(r2)(r4) + PPC_LL r1, VCPU_HOST_STACK(r4) + PPC_LL r2, HOST_R2(r1) + mfspr r10, SPRN_PID lwz r8, VCPU_HOST_PID(r4) PPC_LL r11, VCPU_SHARED(r4) @@ -290,10 +296,8 @@ _GLOBAL(kvmppc_resume_host) /* Save remaining volatile guest register state to vcpu. */ mfspr r3, SPRN_VRSAVE PPC_STL r0, VCPU_GPR(r0)(r4) - PPC_STL r1, VCPU_GPR(r1)(r4) mflr r5 mfspr r6, SPRN_SPRG4 - PPC_STL r2, VCPU_GPR(r2)(r4) PPC_STL r5, VCPU_LR(r4) mfspr r7, SPRN_SPRG5 PPC_STL r3, VCPU_VRSAVE(r4) @@ -334,10 +338,6 @@ _GLOBAL(kvmppc_resume_host) mtspr SPRN_EPCR, r3 isync - /* Restore host stack pointer */ - PPC_LL r1, VCPU_HOST_STACK(r4) - PPC_LL r2, HOST_R2(r1) - /* Switch to kernel stack and jump to handler. */ PPC_LL r3, HOST_RUN(r1) mr r5, r14 /* intno */ -- cgit v0.10.2 From 1d628af78a28c77143bcdd4ed09e93bb235d4198 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Wed, 15 Feb 2012 23:24:28 +0000 Subject: KVM: PPC: e500mc: add load inst fixup There's always a chance we're unable to read a guest instruction. The guest could have its TLB mapped execute-, but not readable, something odd happens and our TLB gets flushed. So it's a good idea to be prepared for that case and have a fallback that allows us to fix things up in that case. Add fixup code that keeps guest code from potentially crashing our host kernel. Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity diff --git a/arch/powerpc/kvm/bookehv_interrupts.S b/arch/powerpc/kvm/bookehv_interrupts.S index 63023ae..e9e7350 100644 --- a/arch/powerpc/kvm/bookehv_interrupts.S +++ b/arch/powerpc/kvm/bookehv_interrupts.S @@ -28,6 +28,7 @@ #include #include #include +#include #include "../kernel/head_booke.h" /* for THREAD_NORMSAVE() */ @@ -171,9 +172,36 @@ PPC_STL r30, VCPU_GPR(r30)(r4) PPC_STL r31, VCPU_GPR(r31)(r4) mtspr SPRN_EPLC, r8 + + /* disable preemption, so we are sure we hit the fixup handler */ +#ifdef CONFIG_PPC64 + clrrdi r8,r1,THREAD_SHIFT +#else + rlwinm r8,r1,0,0,31-THREAD_SHIFT /* current thread_info */ +#endif + li r7, 1 + stw r7, TI_PREEMPT(r8) + isync - lwepx r9, 0, r5 + + /* + * In case the read goes wrong, we catch it and write an invalid value + * in LAST_INST instead. + */ +1: lwepx r9, 0, r5 +2: +.section .fixup, "ax" +3: li r9, KVM_INST_FETCH_FAILED + b 2b +.previous +.section __ex_table,"a" + PPC_LONG_ALIGN + PPC_LONG 1b,3b +.previous + mtspr SPRN_EPLC, r3 + li r7, 0 + stw r7, TI_PREEMPT(r8) stw r9, VCPU_LAST_INST(r4) .endif -- cgit v0.10.2 From bf7ca4bdcb8f1eb15d11879efa824b45443ddb69 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Wed, 15 Feb 2012 23:40:00 +0000 Subject: KVM: PPC: rename CONFIG_KVM_E500 -> CONFIG_KVM_E500V2 The CONFIG_KVM_E500 option really indicates that we're running on a V2 machine, not on a machine of the generic E500 class. So indicate that properly and change the config name accordingly. Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index 58f6e68..44a998d 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig @@ -109,7 +109,7 @@ config KVM_440 config KVM_EXIT_TIMING bool "Detailed exit timing" - depends on KVM_440 || KVM_E500 || KVM_E500MC + depends on KVM_440 || KVM_E500V2 || KVM_E500MC ---help--- Calculate elapsed time for every exit/enter cycle. A per-vcpu report is available in debugfs kvm/vm#_vcpu#_timing. @@ -118,14 +118,14 @@ config KVM_EXIT_TIMING If unsure, say N. -config KVM_E500 - bool "KVM support for PowerPC E500 processors" +config KVM_E500V2 + bool "KVM support for PowerPC E500v2 processors" depends on EXPERIMENTAL && E500 select KVM select KVM_MMIO ---help--- Support running unmodified E500 guest kernels in virtual machines on - E500 host processors. + E500v2 host processors. This module provides access to the hardware capabilities through a character device node named /dev/kvm. diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile index 62febd7..25225ae 100644 --- a/arch/powerpc/kvm/Makefile +++ b/arch/powerpc/kvm/Makefile @@ -36,7 +36,7 @@ kvm-e500-objs := \ e500.o \ e500_tlb.o \ e500_emulate.o -kvm-objs-$(CONFIG_KVM_E500) := $(kvm-e500-objs) +kvm-objs-$(CONFIG_KVM_E500V2) := $(kvm-e500-objs) kvm-e500mc-objs := \ $(common-objs-y) \ @@ -98,7 +98,7 @@ kvm-objs-$(CONFIG_KVM_BOOK3S_32) := $(kvm-book3s_32-objs) kvm-objs := $(kvm-objs-m) $(kvm-objs-y) obj-$(CONFIG_KVM_440) += kvm.o -obj-$(CONFIG_KVM_E500) += kvm.o +obj-$(CONFIG_KVM_E500V2) += kvm.o obj-$(CONFIG_KVM_E500MC) += kvm.o obj-$(CONFIG_KVM_BOOK3S_64) += kvm.o obj-$(CONFIG_KVM_BOOK3S_32) += kvm.o diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index fcbe928..9fcc760 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -762,7 +762,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, gpa_t gpaddr; gfn_t gfn; -#ifdef CONFIG_KVM_E500 +#ifdef CONFIG_KVM_E500V2 if (!(vcpu->arch.shared->msr & MSR_PR) && (eaddr & PAGE_MASK) == vcpu->arch.magic_page_ea) { kvmppc_map_magic(vcpu); diff --git a/arch/powerpc/kvm/e500.h b/arch/powerpc/kvm/e500.h index 3143085..7967f3f 100644 --- a/arch/powerpc/kvm/e500.h +++ b/arch/powerpc/kvm/e500.h @@ -39,7 +39,7 @@ struct tlbe_priv { struct tlbe_ref ref; /* TLB0 only -- TLB1 uses tlb_refs */ }; -#ifdef CONFIG_KVM_E500 +#ifdef CONFIG_KVM_E500V2 struct vcpu_id_table; #endif @@ -89,7 +89,7 @@ struct kvmppc_vcpu_e500 { u64 *g2h_tlb1_map; unsigned int *h2g_tlb1_rmap; -#ifdef CONFIG_KVM_E500 +#ifdef CONFIG_KVM_E500V2 u32 pid[E500_PID_NUM]; /* vcpu id table */ @@ -136,7 +136,7 @@ void kvmppc_get_sregs_e500_tlb(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); int kvmppc_set_sregs_e500_tlb(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); -#ifdef CONFIG_KVM_E500 +#ifdef CONFIG_KVM_E500V2 unsigned int kvmppc_e500_get_sid(struct kvmppc_vcpu_e500 *vcpu_e500, unsigned int as, unsigned int gid, unsigned int pr, int avoid_recursion); diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c index e232bb4..279e10a 100644 --- a/arch/powerpc/kvm/e500_tlb.c +++ b/arch/powerpc/kvm/e500_tlb.c @@ -156,7 +156,7 @@ static inline void write_host_tlbe(struct kvmppc_vcpu_e500 *vcpu_e500, } } -#ifdef CONFIG_KVM_E500 +#ifdef CONFIG_KVM_E500V2 void kvmppc_map_magic(struct kvm_vcpu *vcpu) { struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 14bf8d5..58ad860 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -79,7 +79,7 @@ int kvmppc_kvm_pv(struct kvm_vcpu *vcpu) } case HC_VENDOR_KVM | KVM_HC_FEATURES: r = HC_EV_SUCCESS; -#if defined(CONFIG_PPC_BOOK3S) || defined(CONFIG_KVM_E500) +#if defined(CONFIG_PPC_BOOK3S) || defined(CONFIG_KVM_E500V2) /* XXX Missing magic page on 44x */ r2 |= (1 << KVM_FEATURE_MAGIC_PAGE); #endif @@ -235,7 +235,7 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_PPC_PAIRED_SINGLES: case KVM_CAP_PPC_OSI: case KVM_CAP_PPC_GET_PVINFO: -#if defined(CONFIG_KVM_E500) || defined(CONFIG_KVM_E500MC) +#if defined(CONFIG_KVM_E500V2) || defined(CONFIG_KVM_E500MC) case KVM_CAP_SW_TLB: #endif r = 1; @@ -629,7 +629,7 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, r = 0; vcpu->arch.papr_enabled = true; break; -#if defined(CONFIG_KVM_E500) || defined(CONFIG_KVM_E500MC) +#if defined(CONFIG_KVM_E500V2) || defined(CONFIG_KVM_E500MC) case KVM_CAP_SW_TLB: { struct kvm_config_tlb cfg; void __user *user_ptr = (void __user *)(uintptr_t)cap->args[0]; @@ -706,7 +706,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp, break; } -#if defined(CONFIG_KVM_E500) || defined(CONFIG_KVM_E500MC) +#if defined(CONFIG_KVM_E500V2) || defined(CONFIG_KVM_E500MC) case KVM_DIRTY_TLB: { struct kvm_dirty_tlb dirty; r = -EFAULT; -- cgit v0.10.2 From b2e19b20708edb6413dea38e6285a6e546dce06b Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Wed, 15 Feb 2012 23:41:20 +0000 Subject: KVM: PPC: make e500v2 kvm and e500mc cpu mutually exclusive We can't run e500v2 kvm on e500mc kernels, so indicate that by making the 2 options mutually exclusive in kconfig. Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index 44a998d..f4dacb9 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig @@ -120,7 +120,7 @@ config KVM_EXIT_TIMING config KVM_E500V2 bool "KVM support for PowerPC E500v2 processors" - depends on EXPERIMENTAL && E500 + depends on EXPERIMENTAL && E500 && !PPC_E500MC select KVM select KVM_MMIO ---help--- -- cgit v0.10.2 From acab05290696db0a5431a9ad171be649ab56e87b Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Thu, 16 Feb 2012 13:07:32 +0000 Subject: KVM: PPC: booke: remove leftover debugging The e500mc patches left some debug code in that we don't need. Remove it. Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 9fcc760..17d5318 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -469,11 +469,6 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) return -EINVAL; } - if (!current->thread.kvm_vcpu) { - WARN(1, "no vcpu\n"); - return -EPERM; - } - local_irq_disable(); kvmppc_core_prepare_to_enter(vcpu); -- cgit v0.10.2 From d1ff54992d3008f4253ab3176913bb85d770e935 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Thu, 16 Feb 2012 13:24:03 +0000 Subject: KVM: PPC: booke: deliver program int on emulation failure When we fail to emulate an instruction for the guest, we better go in and tell it that we failed to emulate it, by throwing an illegal instruction exception. Please beware that we basically never get around to telling the guest that we failed thanks to the debugging code right above it. If user space however decides that it wants to ignore the debug, we would at least do "the right thing" afterwards. Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 17d5318..9979be1 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -545,13 +545,13 @@ static int emulation_exit(struct kvm_run *run, struct kvm_vcpu *vcpu) return RESUME_HOST; case EMULATE_FAIL: - /* XXX Deliver Program interrupt to guest. */ printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n", __func__, vcpu->arch.pc, vcpu->arch.last_inst); /* For debugging, encode the failing instruction and * report it to userspace. */ run->hw.hardware_exit_reason = ~0ULL << 32; run->hw.hardware_exit_reason |= vcpu->arch.last_inst; + kvmppc_core_queue_program(vcpu, ESR_PIL); return RESUME_HOST; default: -- cgit v0.10.2 From a8e4ef841429d338b8700998afb3dfc18c1f25d9 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Thu, 16 Feb 2012 14:07:37 +0000 Subject: KVM: PPC: booke: rework rescheduling checks Instead of checking whether we should reschedule only when we exited due to an interrupt, let's always check before entering the guest back again. This gets the target more in line with the other archs. Also while at it, generalize the whole thing so that eventually we could have a single kvmppc_prepare_to_enter function for all ppc targets that does signal and reschedule checking for us. Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index e709975..7f0a3da 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -95,7 +95,7 @@ extern int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu, extern void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu); extern void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu); -extern void kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu); +extern int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu); extern int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu); extern void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong flags); extern void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu); diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 7d54f4e..c8ead7b 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -258,7 +258,7 @@ static bool clear_irqprio(struct kvm_vcpu *vcpu, unsigned int priority) return true; } -void kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu) +int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu) { unsigned long *pending = &vcpu->arch.pending_exceptions; unsigned long old_pending = vcpu->arch.pending_exceptions; @@ -283,6 +283,8 @@ void kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu) /* Tell the guest about our interrupt status */ kvmppc_update_int_pending(vcpu, *pending, old_pending); + + return 0; } pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn) diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 9979be1..3da0e42 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -439,8 +439,9 @@ static void kvmppc_core_check_exceptions(struct kvm_vcpu *vcpu) } /* Check pending exceptions and deliver one, if possible. */ -void kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu) +int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu) { + int r = 0; WARN_ON_ONCE(!irqs_disabled()); kvmppc_core_check_exceptions(vcpu); @@ -451,8 +452,46 @@ void kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu) local_irq_disable(); kvmppc_set_exit_type(vcpu, EMULATED_MTMSRWE_EXITS); - kvmppc_core_check_exceptions(vcpu); + r = 1; }; + + return r; +} + +/* + * Common checks before entering the guest world. Call with interrupts + * disabled. + * + * returns !0 if a signal is pending and check_signal is true + */ +static int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu, bool check_signal) +{ + int r = 0; + + WARN_ON_ONCE(!irqs_disabled()); + while (true) { + if (need_resched()) { + local_irq_enable(); + cond_resched(); + local_irq_disable(); + continue; + } + + if (check_signal && signal_pending(current)) { + r = 1; + break; + } + + if (kvmppc_core_prepare_to_enter(vcpu)) { + /* interrupts got enabled in between, so we + are back at square 1 */ + continue; + } + + break; + } + + return r; } int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) @@ -470,10 +509,7 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) } local_irq_disable(); - - kvmppc_core_prepare_to_enter(vcpu); - - if (signal_pending(current)) { + if (kvmppc_prepare_to_enter(vcpu, true)) { kvm_run->exit_reason = KVM_EXIT_INTR; ret = -EINTR; goto out; @@ -598,25 +634,21 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, switch (exit_nr) { case BOOKE_INTERRUPT_MACHINE_CHECK: - kvm_resched(vcpu); r = RESUME_GUEST; break; case BOOKE_INTERRUPT_EXTERNAL: kvmppc_account_exit(vcpu, EXT_INTR_EXITS); - kvm_resched(vcpu); r = RESUME_GUEST; break; case BOOKE_INTERRUPT_DECREMENTER: kvmppc_account_exit(vcpu, DEC_EXITS); - kvm_resched(vcpu); r = RESUME_GUEST; break; case BOOKE_INTERRUPT_DOORBELL: kvmppc_account_exit(vcpu, DBELL_EXITS); - kvm_resched(vcpu); r = RESUME_GUEST; break; @@ -865,19 +897,15 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, BUG(); } + /* + * To avoid clobbering exit_reason, only check for signals if we + * aren't already exiting to userspace for some other reason. + */ local_irq_disable(); - - kvmppc_core_prepare_to_enter(vcpu); - - if (!(r & RESUME_HOST)) { - /* To avoid clobbering exit_reason, only check for signals if - * we aren't already exiting to userspace for some other - * reason. */ - if (signal_pending(current)) { - run->exit_reason = KVM_EXIT_INTR; - r = (-EINTR << 2) | RESUME_HOST | (r & RESUME_FLAG_NV); - kvmppc_account_exit(vcpu, SIGNAL_EXITS); - } + if (kvmppc_prepare_to_enter(vcpu, !(r & RESUME_HOST))) { + run->exit_reason = KVM_EXIT_INTR; + r = (-EINTR << 2) | RESUME_HOST | (r & RESUME_FLAG_NV); + kvmppc_account_exit(vcpu, SIGNAL_EXITS); } return r; -- cgit v0.10.2 From 8b3a00fcd3c9ea4e2cbae12af3cd8c9d7d1e109a Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Thu, 16 Feb 2012 14:12:46 +0000 Subject: KVM: PPC: booke: BOOKE_IRQPRIO_MAX is n+1 The semantics of BOOKE_IRQPRIO_MAX changed to denote the highest available irqprio + 1, so let's reflect that in the code too. Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 3da0e42..11b0625 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -425,7 +425,7 @@ static void kvmppc_core_check_exceptions(struct kvm_vcpu *vcpu) } priority = __ffs(*pending); - while (priority <= BOOKE_IRQPRIO_MAX) { + while (priority < BOOKE_IRQPRIO_MAX) { if (kvmppc_booke_irqprio_deliver(vcpu, priority)) break; -- cgit v0.10.2 From 73ede8d32be6adc298fe3c2716e77c352c504c8c Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Thu, 16 Feb 2012 14:34:55 +0000 Subject: KVM: PPC: bookehv: fix exit timing When using exit timing stats, we clobber r9 in the NEED_EMU case, so better move that part down a few lines and fix it that way. Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity diff --git a/arch/powerpc/kvm/bookehv_interrupts.S b/arch/powerpc/kvm/bookehv_interrupts.S index e9e7350..e4a1172 100644 --- a/arch/powerpc/kvm/bookehv_interrupts.S +++ b/arch/powerpc/kvm/bookehv_interrupts.S @@ -83,10 +83,6 @@ stw r10, VCPU_GUEST_PID(r4) mtspr SPRN_PID, r8 - .if \flags & NEED_EMU - lwz r9, VCPU_KVM(r4) - .endif - #ifdef CONFIG_KVM_EXIT_TIMING /* save exit time */ 1: mfspr r7, SPRN_TBRU @@ -98,6 +94,10 @@ PPC_STL r9, VCPU_TIMING_EXIT_TBU(r4) #endif + .if \flags & NEED_EMU + lwz r9, VCPU_KVM(r4) + .endif + oris r8, r6, MSR_CE@h #ifndef CONFIG_64BIT stw r6, (VCPU_SHARED_MSR + 4)(r11) -- cgit v0.10.2 From 8764b46ee3873b685a7823fc79388bae7d19e51e Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Thu, 16 Feb 2012 14:40:26 +0000 Subject: KVM: PPC: bookehv: remove negation for CONFIG_64BIT Instead if doing #ifndef CONFIG_64BIT ... #else ... #endif we should rather do #ifdef CONFIG_64BIT ... #else ... #endif which is a lot easier to read. Change the bookehv implementation to stick with this rule. Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity diff --git a/arch/powerpc/kvm/bookehv_interrupts.S b/arch/powerpc/kvm/bookehv_interrupts.S index e4a1172..af771de 100644 --- a/arch/powerpc/kvm/bookehv_interrupts.S +++ b/arch/powerpc/kvm/bookehv_interrupts.S @@ -99,10 +99,10 @@ .endif oris r8, r6, MSR_CE@h -#ifndef CONFIG_64BIT - stw r6, (VCPU_SHARED_MSR + 4)(r11) -#else +#ifdef CONFIG_64BIT std r6, (VCPU_SHARED_MSR)(r11) +#else + stw r6, (VCPU_SHARED_MSR + 4)(r11) #endif ori r8, r8, MSR_ME | MSR_RI PPC_STL r5, VCPU_PC(r4) @@ -344,10 +344,10 @@ _GLOBAL(kvmppc_resume_host) stw r5, VCPU_SHARED_MAS0(r11) mfspr r7, SPRN_MAS2 stw r6, VCPU_SHARED_MAS1(r11) -#ifndef CONFIG_64BIT - stw r7, (VCPU_SHARED_MAS2 + 4)(r11) -#else +#ifdef CONFIG_64BIT std r7, (VCPU_SHARED_MAS2)(r11) +#else + stw r7, (VCPU_SHARED_MAS2 + 4)(r11) #endif mfspr r5, SPRN_MAS3 mfspr r6, SPRN_MAS4 @@ -530,10 +530,10 @@ lightweight_exit: stw r3, VCPU_HOST_MAS6(r4) lwz r3, VCPU_SHARED_MAS0(r11) lwz r5, VCPU_SHARED_MAS1(r11) -#ifndef CONFIG_64BIT - lwz r6, (VCPU_SHARED_MAS2 + 4)(r11) -#else +#ifdef CONFIG_64BIT ld r6, (VCPU_SHARED_MAS2)(r11) +#else + lwz r6, (VCPU_SHARED_MAS2 + 4)(r11) #endif lwz r7, VCPU_SHARED_MAS7_3+4(r11) lwz r8, VCPU_SHARED_MAS4(r11) @@ -572,10 +572,10 @@ lightweight_exit: PPC_LL r6, VCPU_CTR(r4) PPC_LL r7, VCPU_CR(r4) PPC_LL r8, VCPU_PC(r4) -#ifndef CONFIG_64BIT - lwz r9, (VCPU_SHARED_MSR + 4)(r11) -#else +#ifdef CONFIG_64BIT ld r9, (VCPU_SHARED_MSR)(r11) +#else + lwz r9, (VCPU_SHARED_MSR + 4)(r11) #endif PPC_LL r0, VCPU_GPR(r0)(r4) PPC_LL r1, VCPU_GPR(r1)(r4) -- cgit v0.10.2 From 8a3da55784cf2aea8ef6acdd1f50e5ad52f76574 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Thu, 16 Feb 2012 14:45:33 +0000 Subject: KVM: PPC: bookehv: remove SET_VCPU The SET_VCPU macro is a leftover from times when the vcpu struct wasn't stored in the thread on vcpu_load/put. It's not needed anymore. Remove it. Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity diff --git a/arch/powerpc/kvm/bookehv_interrupts.S b/arch/powerpc/kvm/bookehv_interrupts.S index af771de..dfa606d 100644 --- a/arch/powerpc/kvm/bookehv_interrupts.S +++ b/arch/powerpc/kvm/bookehv_interrupts.S @@ -35,9 +35,6 @@ #define GET_VCPU(vcpu, thread) \ PPC_LL vcpu, THREAD_KVM_VCPU(thread) -#define SET_VCPU(vcpu) \ - PPC_STL vcpu, (THREAD + THREAD_KVM_VCPU)(r2) - #define LONGBYTES (BITS_PER_LONG / 8) #define VCPU_GPR(n) (VCPU_GPRS + (n * LONGBYTES)) @@ -517,11 +514,6 @@ lightweight_exit: lwz r3, VCPU_GUEST_PID(r4) mtspr SPRN_PID, r3 - /* Save vcpu pointer for the exception handlers - * must be done before loading guest r2. - */ -// SET_VCPU(r4) - PPC_LL r11, VCPU_SHARED(r4) /* Save host mas4 and mas6 and load guest MAS registers */ mfspr r3, SPRN_MAS4 -- cgit v0.10.2 From e9ba39c1f3dff93efddacbd4569ada05633e2a9b Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Thu, 16 Feb 2012 14:53:04 +0000 Subject: KVM: PPC: bookehv: disable MAS register updates early We need to make sure that no MAS updates happen automatically while we have the guest MAS registers loaded. So move the disabling code a bit higher up so that it covers the full time we have guest values in MAS registers. The race this patch fixes should never occur, but it makes the code a bit more logical to do it this way around. Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity diff --git a/arch/powerpc/kvm/bookehv_interrupts.S b/arch/powerpc/kvm/bookehv_interrupts.S index dfa606d..3a1db90 100644 --- a/arch/powerpc/kvm/bookehv_interrupts.S +++ b/arch/powerpc/kvm/bookehv_interrupts.S @@ -358,6 +358,7 @@ _GLOBAL(kvmppc_resume_host) mtspr SPRN_MAS4, r6 stw r5, VCPU_SHARED_MAS7_3+0(r11) mtspr SPRN_MAS6, r8 + /* Enable MAS register updates via exception */ mfspr r3, SPRN_EPCR rlwinm r3, r3, 0, ~SPRN_EPCR_DMIUH mtspr SPRN_EPCR, r3 @@ -515,6 +516,11 @@ lightweight_exit: mtspr SPRN_PID, r3 PPC_LL r11, VCPU_SHARED(r4) + /* Disable MAS register updates via exception */ + mfspr r3, SPRN_EPCR + oris r3, r3, SPRN_EPCR_DMIUH@h + mtspr SPRN_EPCR, r3 + isync /* Save host mas4 and mas6 and load guest MAS registers */ mfspr r3, SPRN_MAS4 stw r3, VCPU_HOST_MAS4(r4) @@ -538,10 +544,6 @@ lightweight_exit: lwz r5, VCPU_SHARED_MAS7_3+0(r11) mtspr SPRN_MAS6, r3 mtspr SPRN_MAS7, r5 - /* Disable MAS register updates via exception */ - mfspr r3, SPRN_EPCR - oris r3, r3, SPRN_EPCR_DMIUH@h - mtspr SPRN_EPCR, r3 /* * Host interrupt handlers may have clobbered these guest-readable -- cgit v0.10.2 From 5fd8505ea4b7456d57eacefbf00b669f15f5f0c0 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Thu, 16 Feb 2012 15:04:54 +0000 Subject: KVM: PPC: bookehv: add comment about shadow_msr For BookE HV the guest visible MSR is shared->msr and is identical to the MSR that is in use while the guest is running, because we can't trap reads from/to MSR. So shadow_msr is unused there. Indicate that with a comment. Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index e645623..97ecdaf 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -386,6 +386,7 @@ struct kvm_vcpu_arch { #endif u32 vrsave; /* also USPRG0 */ u32 mmucr; + /* shadow_msr is unused for BookE HV */ ulong shadow_msr; ulong csrr0; ulong csrr1; -- cgit v0.10.2 From c35c9d84cf141ebf05e2e481c0faccedc4f8f7ff Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Mon, 20 Feb 2012 12:21:18 +0100 Subject: KVM: PPC: booke: Readd debug abort code for machine check When during guest execution we get a machine check interrupt, we don't know how to handle it yet. So let's add the error printing code back again that we dropped accidently earlier and tell user space that something went really wrong. Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 11b0625..af02d9d 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -634,7 +634,12 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, switch (exit_nr) { case BOOKE_INTERRUPT_MACHINE_CHECK: - r = RESUME_GUEST; + printk("MACHINE CHECK: %lx\n", mfspr(SPRN_MCSR)); + kvmppc_dump_vcpu(vcpu); + /* For debugging, send invalid exit reason to user space */ + run->hw.hardware_exit_reason = ~1ULL << 32; + run->hw.hardware_exit_reason |= mfspr(SPRN_MCSR); + r = RESUME_HOST; break; case BOOKE_INTERRUPT_EXTERNAL: -- cgit v0.10.2 From 0268597c811ccf55e0bda20907c1a9e6001365cf Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Mon, 20 Feb 2012 12:33:22 +0100 Subject: KVM: PPC: booke: add GS documentation for program interrupt The comment for program interrupts triggered when using bookehv was misleading. Update it to mention why MSR_GS indicates that we have to inject an interrupt into the guest again, not emulate it. Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index af02d9d..7df3f3a 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -685,8 +685,14 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, case BOOKE_INTERRUPT_PROGRAM: if (vcpu->arch.shared->msr & (MSR_PR | MSR_GS)) { - /* Program traps generated by user-level software must be handled - * by the guest kernel. */ + /* + * Program traps generated by user-level software must + * be handled by the guest kernel. + * + * In GS mode, hypervisor privileged instructions trap + * on BOOKE_INTERRUPT_HV_PRIV, not here, so these are + * actual program interrupts, handled by the guest. + */ kvmppc_core_queue_program(vcpu, vcpu->arch.fault_esr); r = RESUME_GUEST; kvmppc_account_exit(vcpu, USR_PR_INST); -- cgit v0.10.2 From 55cdf08b9a6c99f1335fa6ad42372dcfb3715b56 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Mon, 20 Feb 2012 12:39:36 +0100 Subject: KVM: PPC: bookehv: remove unused code There was some unused code in the exit code path that must have been a leftover from earlier iterations. While it did no harm, it's superfluous and thus should be removed. Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity diff --git a/arch/powerpc/kvm/bookehv_interrupts.S b/arch/powerpc/kvm/bookehv_interrupts.S index 3a1db90..2d1f56c 100644 --- a/arch/powerpc/kvm/bookehv_interrupts.S +++ b/arch/powerpc/kvm/bookehv_interrupts.S @@ -91,10 +91,6 @@ PPC_STL r9, VCPU_TIMING_EXIT_TBU(r4) #endif - .if \flags & NEED_EMU - lwz r9, VCPU_KVM(r4) - .endif - oris r8, r6, MSR_CE@h #ifdef CONFIG_64BIT std r6, (VCPU_SHARED_MSR)(r11) @@ -112,9 +108,6 @@ * appropriate for the exception type). */ cmpw r6, r8 - .if \flags & NEED_EMU - lwz r9, KVM_LPID(r9) - .endif beq 1f mfmsr r7 .if \srr0 != SPRN_MCSRR0 && \srr0 != SPRN_CSRR0 -- cgit v0.10.2 From c6b3733bef2ffece49336dba7220fefdae5fa908 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Mon, 20 Feb 2012 17:48:47 +0100 Subject: KVM: PPC: e500: fix typo in tlb code The tlbncfg registers should be populated with their respective TLB's values. Fix the obvious typo. Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c index 279e10a..e05232b 100644 --- a/arch/powerpc/kvm/e500_tlb.c +++ b/arch/powerpc/kvm/e500_tlb.c @@ -1268,8 +1268,8 @@ int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500) vcpu->arch.tlbcfg[1] = mfspr(SPRN_TLB1CFG) & ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC); - vcpu->arch.tlbcfg[0] |= vcpu_e500->gtlb_params[1].entries; - vcpu->arch.tlbcfg[0] |= + vcpu->arch.tlbcfg[1] |= vcpu_e500->gtlb_params[1].entries; + vcpu->arch.tlbcfg[1] |= vcpu_e500->gtlb_params[1].ways << TLBnCFG_ASSOC_SHIFT; return 0; -- cgit v0.10.2 From 95f2e921446dbc2e9a785734049ee349a67434bd Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Mon, 20 Feb 2012 22:45:12 +0100 Subject: KVM: PPC: booke: Support perfmon interrupts When during guest context we get a performance monitor interrupt, we currently bail out and oops. Let's route it to its correct handler instead. Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 7df3f3a..ee39c8a 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -679,6 +679,10 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, r = RESUME_GUEST; break; + case BOOKE_INTERRUPT_PERFORMANCE_MONITOR: + r = RESUME_GUEST; + break; + case BOOKE_INTERRUPT_HV_PRIV: r = emulation_exit(run, vcpu); break; -- cgit v0.10.2 From 4e642ccbd6a3f1410155c7700f54b56b6c7df9a2 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Mon, 20 Feb 2012 23:57:26 +0100 Subject: KVM: PPC: booke: expose good state on irq reinject When reinjecting an interrupt into the host interrupt handler after we're back in host kernel land, we need to tell the kernel where the interrupt happened. We can't tell it that we were in guest state, because that might lead to random code walking host addresses. So instead, we tell it that we came from the interrupt reinject code. This helps getting reasonable numbers out of perf. Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index ee39c8a..488936b 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -595,37 +595,63 @@ static int emulation_exit(struct kvm_run *run, struct kvm_vcpu *vcpu) } } -/** - * kvmppc_handle_exit - * - * Return value is in the form (errcode<<2 | RESUME_FLAG_HOST | RESUME_FLAG_NV) - */ -int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, - unsigned int exit_nr) +static void kvmppc_fill_pt_regs(struct pt_regs *regs) { - int r = RESUME_HOST; + ulong r1, ip, msr, lr; + + asm("mr %0, 1" : "=r"(r1)); + asm("mflr %0" : "=r"(lr)); + asm("mfmsr %0" : "=r"(msr)); + asm("bl 1f; 1: mflr %0" : "=r"(ip)); + + memset(regs, 0, sizeof(*regs)); + regs->gpr[1] = r1; + regs->nip = ip; + regs->msr = msr; + regs->link = lr; +} - /* update before a new last_exit_type is rewritten */ - kvmppc_update_timing_stats(vcpu); +static void kvmppc_restart_interrupt(struct kvm_vcpu *vcpu, + unsigned int exit_nr) +{ + struct pt_regs regs; switch (exit_nr) { case BOOKE_INTERRUPT_EXTERNAL: - do_IRQ(current->thread.regs); + kvmppc_fill_pt_regs(®s); + do_IRQ(®s); break; - case BOOKE_INTERRUPT_DECREMENTER: - timer_interrupt(current->thread.regs); + kvmppc_fill_pt_regs(®s); + timer_interrupt(®s); break; - #if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_BOOK3E_64) case BOOKE_INTERRUPT_DOORBELL: - doorbell_exception(current->thread.regs); + kvmppc_fill_pt_regs(®s); + doorbell_exception(®s); break; #endif case BOOKE_INTERRUPT_MACHINE_CHECK: /* FIXME */ break; } +} + +/** + * kvmppc_handle_exit + * + * Return value is in the form (errcode<<2 | RESUME_FLAG_HOST | RESUME_FLAG_NV) + */ +int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, + unsigned int exit_nr) +{ + int r = RESUME_HOST; + + /* update before a new last_exit_type is rewritten */ + kvmppc_update_timing_stats(vcpu); + + /* restart interrupts if they were meant for the host */ + kvmppc_restart_interrupt(vcpu, exit_nr); local_irq_enable(); -- cgit v0.10.2 From 7cc1e8ee78f469ecff8aa29465325f1e4c5e1b5f Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Wed, 22 Feb 2012 16:26:34 +0100 Subject: KVM: PPC: booke: Reinject performance monitor interrupts When we get a performance monitor interrupt, we need to make sure that the host receives it. So reinject it like we reinject the other host destined interrupts. Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index 51010bf..c9aac24 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -33,6 +33,7 @@ extern void __replay_interrupt(unsigned int vector); extern void timer_interrupt(struct pt_regs *); +extern void performance_monitor_exception(struct pt_regs *regs); #ifdef CONFIG_PPC64 #include diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 488936b..8e8aa4c 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -634,6 +634,10 @@ static void kvmppc_restart_interrupt(struct kvm_vcpu *vcpu, case BOOKE_INTERRUPT_MACHINE_CHECK: /* FIXME */ break; + case BOOKE_INTERRUPT_PERFORMANCE_MONITOR: + kvmppc_fill_pt_regs(®s); + performance_monitor_exception(®s); + break; } } -- cgit v0.10.2 From 03660ba27020250eae0b5a2722e0c7bec4968c3c Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Tue, 28 Feb 2012 12:00:41 +0100 Subject: KVM: PPC: Booke: only prepare to enter when we enter So far, we've always called prepare_to_enter even when all we did was return to the host. This patch changes that semantic to only call prepare_to_enter when we actually want to get back into the guest. Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 8e8aa4c..9f27258 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -464,7 +464,7 @@ int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu) * * returns !0 if a signal is pending and check_signal is true */ -static int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu, bool check_signal) +static int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu) { int r = 0; @@ -477,7 +477,7 @@ static int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu, bool check_signal) continue; } - if (check_signal && signal_pending(current)) { + if (signal_pending(current)) { r = 1; break; } @@ -509,7 +509,7 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) } local_irq_disable(); - if (kvmppc_prepare_to_enter(vcpu, true)) { + if (kvmppc_prepare_to_enter(vcpu)) { kvm_run->exit_reason = KVM_EXIT_INTR; ret = -EINTR; goto out; @@ -946,11 +946,13 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, * To avoid clobbering exit_reason, only check for signals if we * aren't already exiting to userspace for some other reason. */ - local_irq_disable(); - if (kvmppc_prepare_to_enter(vcpu, !(r & RESUME_HOST))) { - run->exit_reason = KVM_EXIT_INTR; - r = (-EINTR << 2) | RESUME_HOST | (r & RESUME_FLAG_NV); - kvmppc_account_exit(vcpu, SIGNAL_EXITS); + if (!(r & RESUME_HOST)) { + local_irq_disable(); + if (kvmppc_prepare_to_enter(vcpu)) { + run->exit_reason = KVM_EXIT_INTR; + r = (-EINTR << 2) | RESUME_HOST | (r & RESUME_FLAG_NV); + kvmppc_account_exit(vcpu, SIGNAL_EXITS); + } } return r; -- cgit v0.10.2 From 3aaefef200f618dc455cdf18053a7aeb262b5a11 Mon Sep 17 00:00:00 2001 From: Matt Evans Date: Mon, 30 Jan 2012 20:25:31 +0000 Subject: KVM: PPC: Book3s: PR: Add SPAPR H_BULK_REMOVE support SPAPR support includes various in-kernel hypercalls, improving performance by cutting out the exit to userspace. H_BULK_REMOVE is implemented in this patch. Signed-off-by: Matt Evans Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity diff --git a/arch/powerpc/kvm/book3s_pr_papr.c b/arch/powerpc/kvm/book3s_pr_papr.c index b958932..6d1bfe2 100644 --- a/arch/powerpc/kvm/book3s_pr_papr.c +++ b/arch/powerpc/kvm/book3s_pr_papr.c @@ -98,6 +98,83 @@ static int kvmppc_h_pr_remove(struct kvm_vcpu *vcpu) return EMULATE_DONE; } +/* Request defs for kvmppc_h_pr_bulk_remove() */ +#define H_BULK_REMOVE_TYPE 0xc000000000000000ULL +#define H_BULK_REMOVE_REQUEST 0x4000000000000000ULL +#define H_BULK_REMOVE_RESPONSE 0x8000000000000000ULL +#define H_BULK_REMOVE_END 0xc000000000000000ULL +#define H_BULK_REMOVE_CODE 0x3000000000000000ULL +#define H_BULK_REMOVE_SUCCESS 0x0000000000000000ULL +#define H_BULK_REMOVE_NOT_FOUND 0x1000000000000000ULL +#define H_BULK_REMOVE_PARM 0x2000000000000000ULL +#define H_BULK_REMOVE_HW 0x3000000000000000ULL +#define H_BULK_REMOVE_RC 0x0c00000000000000ULL +#define H_BULK_REMOVE_FLAGS 0x0300000000000000ULL +#define H_BULK_REMOVE_ABSOLUTE 0x0000000000000000ULL +#define H_BULK_REMOVE_ANDCOND 0x0100000000000000ULL +#define H_BULK_REMOVE_AVPN 0x0200000000000000ULL +#define H_BULK_REMOVE_PTEX 0x00ffffffffffffffULL +#define H_BULK_REMOVE_MAX_BATCH 4 + +static int kvmppc_h_pr_bulk_remove(struct kvm_vcpu *vcpu) +{ + int i; + int paramnr = 4; + int ret = H_SUCCESS; + + for (i = 0; i < H_BULK_REMOVE_MAX_BATCH; i++) { + unsigned long tsh = kvmppc_get_gpr(vcpu, paramnr+(2*i)); + unsigned long tsl = kvmppc_get_gpr(vcpu, paramnr+(2*i)+1); + unsigned long pteg, rb, flags; + unsigned long pte[2]; + unsigned long v = 0; + + if ((tsh & H_BULK_REMOVE_TYPE) == H_BULK_REMOVE_END) { + break; /* Exit success */ + } else if ((tsh & H_BULK_REMOVE_TYPE) != + H_BULK_REMOVE_REQUEST) { + ret = H_PARAMETER; + break; /* Exit fail */ + } + + tsh &= H_BULK_REMOVE_PTEX | H_BULK_REMOVE_FLAGS; + tsh |= H_BULK_REMOVE_RESPONSE; + + if ((tsh & H_BULK_REMOVE_ANDCOND) && + (tsh & H_BULK_REMOVE_AVPN)) { + tsh |= H_BULK_REMOVE_PARM; + kvmppc_set_gpr(vcpu, paramnr+(2*i), tsh); + ret = H_PARAMETER; + break; /* Exit fail */ + } + + pteg = get_pteg_addr(vcpu, tsh & H_BULK_REMOVE_PTEX); + copy_from_user(pte, (void __user *)pteg, sizeof(pte)); + + /* tsl = AVPN */ + flags = (tsh & H_BULK_REMOVE_FLAGS) >> 26; + + if ((pte[0] & HPTE_V_VALID) == 0 || + ((flags & H_AVPN) && (pte[0] & ~0x7fUL) != tsl) || + ((flags & H_ANDCOND) && (pte[0] & tsl) != 0)) { + tsh |= H_BULK_REMOVE_NOT_FOUND; + } else { + /* Splat the pteg in (userland) hpt */ + copy_to_user((void __user *)pteg, &v, sizeof(v)); + + rb = compute_tlbie_rb(pte[0], pte[1], + tsh & H_BULK_REMOVE_PTEX); + vcpu->arch.mmu.tlbie(vcpu, rb, rb & 1 ? true : false); + tsh |= H_BULK_REMOVE_SUCCESS; + tsh |= (pte[1] & (HPTE_R_C | HPTE_R_R)) << 43; + } + kvmppc_set_gpr(vcpu, paramnr+(2*i), tsh); + } + kvmppc_set_gpr(vcpu, 3, ret); + + return EMULATE_DONE; +} + static int kvmppc_h_pr_protect(struct kvm_vcpu *vcpu) { unsigned long flags = kvmppc_get_gpr(vcpu, 4); @@ -144,10 +221,7 @@ int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd) case H_PROTECT: return kvmppc_h_pr_protect(vcpu); case H_BULK_REMOVE: - /* We just flush all PTEs, so user space can - handle the HPT modifications */ - kvmppc_mmu_pte_flush(vcpu, 0, 0); - break; + return kvmppc_h_pr_bulk_remove(vcpu); case H_CEDE: kvm_vcpu_block(vcpu); vcpu->stat.halt_wakeup++; -- cgit v0.10.2 From f6127716c346c73ab1513edee53231800188c5ba Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Mon, 5 Mar 2012 16:00:28 +0100 Subject: KVM: PPC: Save/Restore CR over vcpu_run On PPC, CR2-CR4 are nonvolatile, thus have to be saved across function calls. We didn't respect that for any architecture until Paul spotted it in his patch for Book3S-HV. This patch saves/restores CR for all KVM capable PPC hosts. Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity diff --git a/arch/powerpc/kvm/bookehv_interrupts.S b/arch/powerpc/kvm/bookehv_interrupts.S index 2d1f56c..57e2fa4 100644 --- a/arch/powerpc/kvm/bookehv_interrupts.S +++ b/arch/powerpc/kvm/bookehv_interrupts.S @@ -49,7 +49,8 @@ * kernel with the -ffixed-r2 gcc option. */ #define HOST_R2 (3 * LONGBYTES) -#define HOST_NV_GPRS (4 * LONGBYTES) +#define HOST_CR (4 * LONGBYTES) +#define HOST_NV_GPRS (5 * LONGBYTES) #define HOST_NV_GPR(n) (HOST_NV_GPRS + ((n - 14) * LONGBYTES)) #define HOST_MIN_STACK_SIZE (HOST_NV_GPR(31) + LONGBYTES) #define HOST_STACK_SIZE ((HOST_MIN_STACK_SIZE + 15) & ~15) /* Align. */ @@ -396,6 +397,7 @@ skip_nv_load: heavyweight_exit: /* Not returning to guest. */ PPC_LL r5, HOST_STACK_LR(r1) + lwz r6, HOST_CR(r1) /* * We already saved guest volatile register state; now save the @@ -442,6 +444,7 @@ heavyweight_exit: /* Return to kvm_vcpu_run(). */ mtlr r5 + mtcr r6 addi r1, r1, HOST_STACK_SIZE /* r3 still contains the return code from kvmppc_handle_exit(). */ blr @@ -457,8 +460,11 @@ _GLOBAL(__kvmppc_vcpu_run) /* Save host state to stack. */ PPC_STL r3, HOST_RUN(r1) mflr r3 + mfcr r5 PPC_STL r3, HOST_STACK_LR(r1) + stw r5, HOST_CR(r1) + /* Save host non-volatile register state to stack. */ PPC_STL r14, HOST_NV_GPR(r14)(r1) PPC_STL r15, HOST_NV_GPR(r15)(r1) -- cgit v0.10.2 From f0888f70151c7f53de2b45ee20ff1905837943e8 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 3 Feb 2012 00:54:17 +0000 Subject: KVM: PPC: Book3S HV: Make secondary threads more robust against stray IPIs Currently on POWER7, if we are running the guest on a core and we don't need all the hardware threads, we do nothing to ensure that the unused threads aren't executing in the kernel (other than checking that they are offline). We just assume they're napping and we don't do anything to stop them trying to enter the kernel while the guest is running. This means that a stray IPI can wake up the hardware thread and it will then try to enter the kernel, but since the core is in guest context, it will execute code from the guest in hypervisor mode once it turns the MMU on, which tends to lead to crashes or hangs in the host. This fixes the problem by adding two new one-byte flags in the kvmppc_host_state structure in the PACA which are used to interlock between the primary thread and the unused secondary threads when entering the guest. With these flags, the primary thread can ensure that the unused secondaries are not already in kernel mode (i.e. handling a stray IPI) and then indicate that they should not try to enter the kernel if they do get woken for any reason. Instead they will go into KVM code, find that there is no vcpu to run, acknowledge and clear the IPI and go back to nap mode. Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h index 1f2f5b6..88609b2 100644 --- a/arch/powerpc/include/asm/kvm_book3s_asm.h +++ b/arch/powerpc/include/asm/kvm_book3s_asm.h @@ -79,6 +79,9 @@ struct kvmppc_host_state { u8 napping; #ifdef CONFIG_KVM_BOOK3S_64_HV + u8 hwthread_req; + u8 hwthread_state; + struct kvm_vcpu *kvm_vcpu; struct kvmppc_vcore *kvm_vcore; unsigned long xics_phys; @@ -122,4 +125,9 @@ struct kvmppc_book3s_shadow_vcpu { #endif /*__ASSEMBLY__ */ +/* Values for kvm_state */ +#define KVM_HWTHREAD_IN_KERNEL 0 +#define KVM_HWTHREAD_IN_NAP 1 +#define KVM_HWTHREAD_IN_KVM 2 + #endif /* __ASM_KVM_BOOK3S_ASM_H__ */ diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index bbede58..2abcf7d 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -540,6 +540,8 @@ int main(void) HSTATE_FIELD(HSTATE_IN_GUEST, in_guest); HSTATE_FIELD(HSTATE_RESTORE_HID5, restore_hid5); HSTATE_FIELD(HSTATE_NAPPING, napping); + HSTATE_FIELD(HSTATE_HWTHREAD_REQ, hwthread_req); + HSTATE_FIELD(HSTATE_HWTHREAD_STATE, hwthread_state); #ifdef CONFIG_KVM_BOOK3S_64_HV HSTATE_FIELD(HSTATE_KVM_VCPU, kvm_vcpu); diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index cb705fd..8829b10 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -63,11 +63,13 @@ BEGIN_FTR_SECTION GET_PACA(r13) #ifdef CONFIG_KVM_BOOK3S_64_HV - lbz r0,PACAPROCSTART(r13) - cmpwi r0,0x80 - bne 1f - li r0,1 - stb r0,PACAPROCSTART(r13) + li r0,KVM_HWTHREAD_IN_KERNEL + stb r0,HSTATE_HWTHREAD_STATE(r13) + /* Order setting hwthread_state vs. testing hwthread_req */ + sync + lbz r0,HSTATE_HWTHREAD_REQ(r13) + cmpwi r0,0 + beq 1f b kvm_start_guest 1: #endif diff --git a/arch/powerpc/kernel/idle_power7.S b/arch/powerpc/kernel/idle_power7.S index 0cdc9a3..7140d83 100644 --- a/arch/powerpc/kernel/idle_power7.S +++ b/arch/powerpc/kernel/idle_power7.S @@ -16,6 +16,7 @@ #include #include #include +#include #undef DEBUG @@ -81,6 +82,12 @@ _GLOBAL(power7_idle) std r9,_MSR(r1) std r1,PACAR1(r13) +#ifdef CONFIG_KVM_BOOK3S_64_HV + /* Tell KVM we're napping */ + li r4,KVM_HWTHREAD_IN_NAP + stb r4,HSTATE_HWTHREAD_STATE(r13) +#endif + /* Magic NAP mode enter sequence */ std r0,0(r1) ptesync diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 01294a5..e87f619 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -569,6 +569,45 @@ static void kvmppc_remove_runnable(struct kvmppc_vcore *vc, list_del(&vcpu->arch.run_list); } +static int kvmppc_grab_hwthread(int cpu) +{ + struct paca_struct *tpaca; + long timeout = 1000; + + tpaca = &paca[cpu]; + + /* Ensure the thread won't go into the kernel if it wakes */ + tpaca->kvm_hstate.hwthread_req = 1; + + /* + * If the thread is already executing in the kernel (e.g. handling + * a stray interrupt), wait for it to get back to nap mode. + * The smp_mb() is to ensure that our setting of hwthread_req + * is visible before we look at hwthread_state, so if this + * races with the code at system_reset_pSeries and the thread + * misses our setting of hwthread_req, we are sure to see its + * setting of hwthread_state, and vice versa. + */ + smp_mb(); + while (tpaca->kvm_hstate.hwthread_state == KVM_HWTHREAD_IN_KERNEL) { + if (--timeout <= 0) { + pr_err("KVM: couldn't grab cpu %d\n", cpu); + return -EBUSY; + } + udelay(1); + } + return 0; +} + +static void kvmppc_release_hwthread(int cpu) +{ + struct paca_struct *tpaca; + + tpaca = &paca[cpu]; + tpaca->kvm_hstate.hwthread_req = 0; + tpaca->kvm_hstate.kvm_vcpu = NULL; +} + static void kvmppc_start_thread(struct kvm_vcpu *vcpu) { int cpu; @@ -588,8 +627,7 @@ static void kvmppc_start_thread(struct kvm_vcpu *vcpu) smp_wmb(); #if defined(CONFIG_PPC_ICP_NATIVE) && defined(CONFIG_SMP) if (vcpu->arch.ptid) { - tpaca->cpu_start = 0x80; - wmb(); + kvmppc_grab_hwthread(cpu); xics_wake_cpu(cpu); ++vc->n_woken; } @@ -639,7 +677,7 @@ static int kvmppc_run_core(struct kvmppc_vcore *vc) struct kvm_vcpu *vcpu, *vcpu0, *vnext; long ret; u64 now; - int ptid; + int ptid, i; /* don't start if any threads have a signal pending */ list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) @@ -686,12 +724,17 @@ static int kvmppc_run_core(struct kvmppc_vcore *vc) vc->napping_threads = 0; list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) kvmppc_start_thread(vcpu); + /* Grab any remaining hw threads so they can't go into the kernel */ + for (i = ptid; i < threads_per_core; ++i) + kvmppc_grab_hwthread(vc->pcpu + i); preempt_disable(); spin_unlock(&vc->lock); kvm_guest_enter(); __kvmppc_vcore_entry(NULL, vcpu0); + for (i = 0; i < threads_per_core; ++i) + kvmppc_release_hwthread(vc->pcpu + i); spin_lock(&vc->lock); /* disable sending of IPIs on virtual external irqs */ diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index b70bf22..d595033 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -26,6 +26,7 @@ #include #include #include +#include /***************************************************************************** * * @@ -82,6 +83,7 @@ _GLOBAL(kvmppc_hv_entry_trampoline) #define XICS_XIRR 4 #define XICS_QIRR 0xc +#define XICS_IPI 2 /* interrupt source # for IPIs */ /* * We come in here when wakened from nap mode on a secondary hw thread. @@ -94,26 +96,54 @@ kvm_start_guest: subi r1,r1,STACK_FRAME_OVERHEAD ld r2,PACATOC(r13) - /* were we napping due to cede? */ - lbz r0,HSTATE_NAPPING(r13) - cmpwi r0,0 - bne kvm_end_cede + li r0,KVM_HWTHREAD_IN_KVM + stb r0,HSTATE_HWTHREAD_STATE(r13) - /* get vcpu pointer */ - ld r4, HSTATE_KVM_VCPU(r13) + /* NV GPR values from power7_idle() will no longer be valid */ + li r0,1 + stb r0,PACA_NAPSTATELOST(r13) - /* We got here with an IPI; clear it */ - ld r5, HSTATE_XICS_PHYS(r13) - li r0, 0xff - li r6, XICS_QIRR - li r7, XICS_XIRR - lwzcix r8, r5, r7 /* ack the interrupt */ + /* get vcpu pointer, NULL if we have no vcpu to run */ + ld r4,HSTATE_KVM_VCPU(r13) + cmpdi cr1,r4,0 + + /* Check the wake reason in SRR1 to see why we got here */ + mfspr r3,SPRN_SRR1 + rlwinm r3,r3,44-31,0x7 /* extract wake reason field */ + cmpwi r3,4 /* was it an external interrupt? */ + bne 27f + + /* + * External interrupt - for now assume it is an IPI, since we + * should never get any other interrupts sent to offline threads. + * Only do this for secondary threads. + */ + beq cr1,25f + lwz r3,VCPU_PTID(r4) + cmpwi r3,0 + beq 27f +25: ld r5,HSTATE_XICS_PHYS(r13) + li r0,0xff + li r6,XICS_QIRR + li r7,XICS_XIRR + lwzcix r8,r5,r7 /* get and ack the interrupt */ sync - stbcix r0, r5, r6 /* clear it */ - stwcix r8, r5, r7 /* EOI it */ + clrldi. r9,r8,40 /* get interrupt source ID. */ + beq 27f /* none there? */ + cmpwi r9,XICS_IPI + bne 26f + stbcix r0,r5,r6 /* clear IPI */ +26: stwcix r8,r5,r7 /* EOI the interrupt */ - /* NV GPR values from power7_idle() will no longer be valid */ - stb r0, PACA_NAPSTATELOST(r13) +27: /* XXX should handle hypervisor maintenance interrupts etc. here */ + + /* if we have no vcpu to run, go back to sleep */ + beq cr1,kvm_no_guest + + /* were we napping due to cede? */ + lbz r0,HSTATE_NAPPING(r13) + cmpwi r0,0 + bne kvm_end_cede .global kvmppc_hv_entry kvmppc_hv_entry: @@ -1445,8 +1475,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206) * Take a nap until a decrementer or external interrupt occurs, * with PECE1 (wake on decr) and PECE0 (wake on external) set in LPCR */ - li r0,0x80 - stb r0,PACAPROCSTART(r13) + li r0,1 + stb r0,HSTATE_HWTHREAD_REQ(r13) mfspr r5,SPRN_LPCR ori r5,r5,LPCR_PECE0 | LPCR_PECE1 mtspr SPRN_LPCR,r5 @@ -1463,26 +1493,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206) kvm_end_cede: /* Woken by external or decrementer interrupt */ ld r1, HSTATE_HOST_R1(r13) - ld r2, PACATOC(r13) - /* If we're a secondary thread and we got here by an IPI, ack it */ - ld r4,HSTATE_KVM_VCPU(r13) - lwz r3,VCPU_PTID(r4) - cmpwi r3,0 - beq 27f - mfspr r3,SPRN_SRR1 - rlwinm r3,r3,44-31,0x7 /* extract wake reason field */ - cmpwi r3,4 /* was it an external interrupt? */ - bne 27f - ld r5, HSTATE_XICS_PHYS(r13) - li r0,0xff - li r6,XICS_QIRR - li r7,XICS_XIRR - lwzcix r8,r5,r7 /* ack the interrupt */ - sync - stbcix r0,r5,r6 /* clear it */ - stwcix r8,r5,r7 /* EOI it */ -27: /* load up FP state */ bl kvmppc_load_fp @@ -1580,12 +1591,17 @@ secondary_nap: stwcx. r3, 0, r4 bne 51b +kvm_no_guest: + li r0, KVM_HWTHREAD_IN_NAP + stb r0, HSTATE_HWTHREAD_STATE(r13) + li r0, 0 + std r0, HSTATE_KVM_VCPU(r13) + li r3, LPCR_PECE0 mfspr r4, SPRN_LPCR rlwimi r4, r3, 0, LPCR_PECE0 | LPCR_PECE1 mtspr SPRN_LPCR, r4 isync - li r0, 0 std r0, HSTATE_SCRATCH0(r13) ptesync ld r0, HSTATE_SCRATCH0(r13) -- cgit v0.10.2 From 2e25aa5f64b18a97f35266e51c71ff4dc644db0c Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Sun, 19 Feb 2012 17:46:32 +0000 Subject: KVM: PPC: Book3S HV: Make virtual processor area registration more robust The PAPR API allows three sorts of per-virtual-processor areas to be registered (VPA, SLB shadow buffer, and dispatch trace log), and furthermore, these can be registered and unregistered for another virtual CPU. Currently we just update the vcpu fields pointing to these areas at the time of registration or unregistration. If this is done on another vcpu, there is the possibility that the target vcpu is using those fields at the time and could end up using a bogus pointer and corrupting memory. This fixes the race by making the target cpu itself do the update, so we can be sure that the update happens at a time when the fields aren't being used. Each area now has a struct kvmppc_vpa which is used to manage these updates. There is also a spinlock which protects access to all of the kvmppc_vpa structs, other than to the pinned_addr fields. (We could have just taken the spinlock when using the vpa, slb_shadow or dtl fields, but that would mean taking the spinlock on every guest entry and exit.) This also changes 'struct dtl' (which was undefined) to 'struct dtl_entry', which is what the rest of the kernel uses. Thanks to Michael Ellerman for pointing out the need to initialize vcpu->arch.vpa_update_lock. Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index 1c324ff..318bac9 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -114,6 +114,16 @@ #define H_PP1 (1UL<<(63-62)) #define H_PP2 (1UL<<(63-63)) +/* Flags for H_REGISTER_VPA subfunction field */ +#define H_VPA_FUNC_SHIFT (63-18) /* Bit posn of subfunction code */ +#define H_VPA_FUNC_MASK 7UL +#define H_VPA_REG_VPA 1UL /* Register Virtual Processor Area */ +#define H_VPA_REG_DTL 2UL /* Register Dispatch Trace Log */ +#define H_VPA_REG_SLB 3UL /* Register SLB shadow buffer */ +#define H_VPA_DEREG_VPA 5UL /* Deregister Virtual Processor Area */ +#define H_VPA_DEREG_DTL 6UL /* Deregister Dispatch Trace Log */ +#define H_VPA_DEREG_SLB 7UL /* Deregister SLB shadow buffer */ + /* VASI States */ #define H_VASI_INVALID 0 #define H_VASI_ENABLED 1 diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 97ecdaf..93ffd8d 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -82,7 +82,7 @@ struct kvm_vcpu; struct lppaca; struct slb_shadow; -struct dtl; +struct dtl_entry; struct kvm_vm_stat { u32 remote_tlb_flush; @@ -279,6 +279,19 @@ struct kvmppc_vcore { #define VCORE_EXITING 2 #define VCORE_SLEEPING 3 +/* + * Struct used to manage memory for a virtual processor area + * registered by a PAPR guest. There are three types of area + * that a guest can register. + */ +struct kvmppc_vpa { + void *pinned_addr; /* Address in kernel linear mapping */ + void *pinned_end; /* End of region */ + unsigned long next_gpa; /* Guest phys addr for update */ + unsigned long len; /* Number of bytes required */ + u8 update_pending; /* 1 => update pinned_addr from next_gpa */ +}; + struct kvmppc_pte { ulong eaddr; u64 vpage; @@ -473,11 +486,6 @@ struct kvm_vcpu_arch { u8 prodded; u32 last_inst; - struct lppaca *vpa; - struct slb_shadow *slb_shadow; - struct dtl *dtl; - struct dtl *dtl_end; - wait_queue_head_t *wqp; struct kvmppc_vcore *vcore; int ret; @@ -502,6 +510,13 @@ struct kvm_vcpu_arch { struct task_struct *run_task; struct kvm_run *kvm_run; pgd_t *pgdir; + + spinlock_t vpa_update_lock; + struct kvmppc_vpa vpa; + struct kvmppc_vpa dtl; + struct dtl_entry *dtl_ptr; + unsigned long dtl_index; + struct kvmppc_vpa slb_shadow; #endif }; diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 2abcf7d..502e038 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -466,7 +466,7 @@ int main(void) DEFINE(VCPU_PENDING_EXC, offsetof(struct kvm_vcpu, arch.pending_exceptions)); DEFINE(VCPU_CEDED, offsetof(struct kvm_vcpu, arch.ceded)); DEFINE(VCPU_PRODDED, offsetof(struct kvm_vcpu, arch.prodded)); - DEFINE(VCPU_VPA, offsetof(struct kvm_vcpu, arch.vpa)); + DEFINE(VCPU_VPA, offsetof(struct kvm_vcpu, arch.vpa.pinned_addr)); DEFINE(VCPU_MMCR, offsetof(struct kvm_vcpu, arch.mmcr)); DEFINE(VCPU_PMC, offsetof(struct kvm_vcpu, arch.pmc)); DEFINE(VCPU_SLB, offsetof(struct kvm_vcpu, arch.slb)); diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index e87f619..2444a9c 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -134,6 +134,22 @@ static void init_vpa(struct kvm_vcpu *vcpu, struct lppaca *vpa) vpa->yield_count = 1; } +/* Length for a per-processor buffer is passed in at offset 4 in the buffer */ +struct reg_vpa { + u32 dummy; + union { + u16 hword; + u32 word; + } length; +}; + +static int vpa_is_registered(struct kvmppc_vpa *vpap) +{ + if (vpap->update_pending) + return vpap->next_gpa != 0; + return vpap->pinned_addr != NULL; +} + static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu, unsigned long flags, unsigned long vcpuid, unsigned long vpa) @@ -142,88 +158,153 @@ static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu, unsigned long len, nb; void *va; struct kvm_vcpu *tvcpu; - int err = H_PARAMETER; + int err; + int subfunc; + struct kvmppc_vpa *vpap; tvcpu = kvmppc_find_vcpu(kvm, vcpuid); if (!tvcpu) return H_PARAMETER; - flags >>= 63 - 18; - flags &= 7; - if (flags == 0 || flags == 4) - return H_PARAMETER; - if (flags < 4) { - if (vpa & 0x7f) + subfunc = (flags >> H_VPA_FUNC_SHIFT) & H_VPA_FUNC_MASK; + if (subfunc == H_VPA_REG_VPA || subfunc == H_VPA_REG_DTL || + subfunc == H_VPA_REG_SLB) { + /* Registering new area - address must be cache-line aligned */ + if ((vpa & (L1_CACHE_BYTES - 1)) || !vpa) return H_PARAMETER; - if (flags >= 2 && !tvcpu->arch.vpa) - return H_RESOURCE; - /* registering new area; convert logical addr to real */ + + /* convert logical addr to kernel addr and read length */ va = kvmppc_pin_guest_page(kvm, vpa, &nb); if (va == NULL) return H_PARAMETER; - if (flags <= 1) - len = *(unsigned short *)(va + 4); + if (subfunc == H_VPA_REG_VPA) + len = ((struct reg_vpa *)va)->length.hword; else - len = *(unsigned int *)(va + 4); - if (len > nb) - goto out_unpin; - switch (flags) { - case 1: /* register VPA */ - if (len < 640) - goto out_unpin; - if (tvcpu->arch.vpa) - kvmppc_unpin_guest_page(kvm, vcpu->arch.vpa); - tvcpu->arch.vpa = va; - init_vpa(vcpu, va); - break; - case 2: /* register DTL */ - if (len < 48) - goto out_unpin; - len -= len % 48; - if (tvcpu->arch.dtl) - kvmppc_unpin_guest_page(kvm, vcpu->arch.dtl); - tvcpu->arch.dtl = va; - tvcpu->arch.dtl_end = va + len; + len = ((struct reg_vpa *)va)->length.word; + kvmppc_unpin_guest_page(kvm, va); + + /* Check length */ + if (len > nb || len < sizeof(struct reg_vpa)) + return H_PARAMETER; + } else { + vpa = 0; + len = 0; + } + + err = H_PARAMETER; + vpap = NULL; + spin_lock(&tvcpu->arch.vpa_update_lock); + + switch (subfunc) { + case H_VPA_REG_VPA: /* register VPA */ + if (len < sizeof(struct lppaca)) break; - case 3: /* register SLB shadow buffer */ - if (len < 16) - goto out_unpin; - if (tvcpu->arch.slb_shadow) - kvmppc_unpin_guest_page(kvm, vcpu->arch.slb_shadow); - tvcpu->arch.slb_shadow = va; + vpap = &tvcpu->arch.vpa; + err = 0; + break; + + case H_VPA_REG_DTL: /* register DTL */ + if (len < sizeof(struct dtl_entry)) break; - } - } else { - switch (flags) { - case 5: /* unregister VPA */ - if (tvcpu->arch.slb_shadow || tvcpu->arch.dtl) - return H_RESOURCE; - if (!tvcpu->arch.vpa) - break; - kvmppc_unpin_guest_page(kvm, tvcpu->arch.vpa); - tvcpu->arch.vpa = NULL; + len -= len % sizeof(struct dtl_entry); + + /* Check that they have previously registered a VPA */ + err = H_RESOURCE; + if (!vpa_is_registered(&tvcpu->arch.vpa)) break; - case 6: /* unregister DTL */ - if (!tvcpu->arch.dtl) - break; - kvmppc_unpin_guest_page(kvm, tvcpu->arch.dtl); - tvcpu->arch.dtl = NULL; + + vpap = &tvcpu->arch.dtl; + err = 0; + break; + + case H_VPA_REG_SLB: /* register SLB shadow buffer */ + /* Check that they have previously registered a VPA */ + err = H_RESOURCE; + if (!vpa_is_registered(&tvcpu->arch.vpa)) break; - case 7: /* unregister SLB shadow buffer */ - if (!tvcpu->arch.slb_shadow) - break; - kvmppc_unpin_guest_page(kvm, tvcpu->arch.slb_shadow); - tvcpu->arch.slb_shadow = NULL; + + vpap = &tvcpu->arch.slb_shadow; + err = 0; + break; + + case H_VPA_DEREG_VPA: /* deregister VPA */ + /* Check they don't still have a DTL or SLB buf registered */ + err = H_RESOURCE; + if (vpa_is_registered(&tvcpu->arch.dtl) || + vpa_is_registered(&tvcpu->arch.slb_shadow)) break; - } + + vpap = &tvcpu->arch.vpa; + err = 0; + break; + + case H_VPA_DEREG_DTL: /* deregister DTL */ + vpap = &tvcpu->arch.dtl; + err = 0; + break; + + case H_VPA_DEREG_SLB: /* deregister SLB shadow buffer */ + vpap = &tvcpu->arch.slb_shadow; + err = 0; + break; } - return H_SUCCESS; - out_unpin: - kvmppc_unpin_guest_page(kvm, va); + if (vpap) { + vpap->next_gpa = vpa; + vpap->len = len; + vpap->update_pending = 1; + } + + spin_unlock(&tvcpu->arch.vpa_update_lock); + return err; } +static void kvmppc_update_vpa(struct kvm *kvm, struct kvmppc_vpa *vpap) +{ + void *va; + unsigned long nb; + + vpap->update_pending = 0; + va = NULL; + if (vpap->next_gpa) { + va = kvmppc_pin_guest_page(kvm, vpap->next_gpa, &nb); + if (nb < vpap->len) { + /* + * If it's now too short, it must be that userspace + * has changed the mappings underlying guest memory, + * so unregister the region. + */ + kvmppc_unpin_guest_page(kvm, va); + va = NULL; + } + } + if (vpap->pinned_addr) + kvmppc_unpin_guest_page(kvm, vpap->pinned_addr); + vpap->pinned_addr = va; + if (va) + vpap->pinned_end = va + vpap->len; +} + +static void kvmppc_update_vpas(struct kvm_vcpu *vcpu) +{ + struct kvm *kvm = vcpu->kvm; + + spin_lock(&vcpu->arch.vpa_update_lock); + if (vcpu->arch.vpa.update_pending) { + kvmppc_update_vpa(kvm, &vcpu->arch.vpa); + init_vpa(vcpu, vcpu->arch.vpa.pinned_addr); + } + if (vcpu->arch.dtl.update_pending) { + kvmppc_update_vpa(kvm, &vcpu->arch.dtl); + vcpu->arch.dtl_ptr = vcpu->arch.dtl.pinned_addr; + vcpu->arch.dtl_index = 0; + } + if (vcpu->arch.slb_shadow.update_pending) + kvmppc_update_vpa(kvm, &vcpu->arch.slb_shadow); + spin_unlock(&vcpu->arch.vpa_update_lock); +} + int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) { unsigned long req = kvmppc_get_gpr(vcpu, 3); @@ -468,6 +549,7 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) /* default to host PVR, since we can't spoof it */ vcpu->arch.pvr = mfspr(SPRN_PVR); kvmppc_set_pvr(vcpu, vcpu->arch.pvr); + spin_lock_init(&vcpu->arch.vpa_update_lock); kvmppc_mmu_book3s_hv_init(vcpu); @@ -512,12 +594,14 @@ out: void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) { - if (vcpu->arch.dtl) - kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.dtl); - if (vcpu->arch.slb_shadow) - kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.slb_shadow); - if (vcpu->arch.vpa) - kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.vpa); + spin_lock(&vcpu->arch.vpa_update_lock); + if (vcpu->arch.dtl.pinned_addr) + kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.dtl.pinned_addr); + if (vcpu->arch.slb_shadow.pinned_addr) + kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.slb_shadow.pinned_addr); + if (vcpu->arch.vpa.pinned_addr) + kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.vpa.pinned_addr); + spin_unlock(&vcpu->arch.vpa_update_lock); kvm_vcpu_uninit(vcpu); kmem_cache_free(kvm_vcpu_cache, vcpu); } @@ -722,8 +806,13 @@ static int kvmppc_run_core(struct kvmppc_vcore *vc) vc->in_guest = 0; vc->pcpu = smp_processor_id(); vc->napping_threads = 0; - list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) + list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) { kvmppc_start_thread(vcpu); + if (vcpu->arch.vpa.update_pending || + vcpu->arch.slb_shadow.update_pending || + vcpu->arch.dtl.update_pending) + kvmppc_update_vpas(vcpu); + } /* Grab any remaining hw threads so they can't go into the kernel */ for (i = ptid; i < threads_per_core; ++i) kvmppc_grab_hwthread(vc->pcpu + i); -- cgit v0.10.2 From 0456ec4ff2b832ab9ff476ed687fea704500f1cd Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 3 Feb 2012 00:56:21 +0000 Subject: KVM: PPC: Book3S HV: Report stolen time to guest through dispatch trace log This adds code to measure "stolen" time per virtual core in units of timebase ticks, and to report the stolen time to the guest using the dispatch trace log (DTL). The guest can register an area of memory for the DTL for a given vcpu. The DTL is a ring buffer where KVM fills in one entry every time it enters the guest for that vcpu. Stolen time is measured as time when the virtual core is not running, either because the vcore is not runnable (e.g. some of its vcpus are executing elsewhere in the kernel or in userspace), or when the vcpu thread that is running the vcore is preempted. This includes time when all the vcpus are idle (i.e. have executed the H_CEDE hypercall), which is OK because the guest accounts stolen time while idle as idle time. Each vcpu keeps a record of how much stolen time has been reported to the guest for that vcpu so far. When we are about to enter the guest, we create a new DTL entry (if the guest vcpu has a DTL) and report the difference between total stolen time for the vcore and stolen time reported so far for the vcpu as the "enqueue to dispatch" time in the DTL entry. Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 93ffd8d..014eaf2 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -268,6 +268,9 @@ struct kvmppc_vcore { struct list_head runnable_threads; spinlock_t lock; wait_queue_head_t wq; + u64 stolen_tb; + u64 preempt_tb; + struct kvm_vcpu *runner; }; #define VCORE_ENTRY_COUNT(vc) ((vc)->entry_exit_count & 0xff) @@ -516,6 +519,7 @@ struct kvm_vcpu_arch { struct kvmppc_vpa dtl; struct dtl_entry *dtl_ptr; unsigned long dtl_index; + u64 stolen_logged; struct kvmppc_vpa slb_shadow; #endif }; diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 2444a9c..9079357 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -60,12 +60,20 @@ static int kvmppc_hv_setup_rma(struct kvm_vcpu *vcpu); void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { + struct kvmppc_vcore *vc = vcpu->arch.vcore; + local_paca->kvm_hstate.kvm_vcpu = vcpu; - local_paca->kvm_hstate.kvm_vcore = vcpu->arch.vcore; + local_paca->kvm_hstate.kvm_vcore = vc; + if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE) + vc->stolen_tb += mftb() - vc->preempt_tb; } void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) { + struct kvmppc_vcore *vc = vcpu->arch.vcore; + + if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE) + vc->preempt_tb = mftb(); } void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr) @@ -305,6 +313,35 @@ static void kvmppc_update_vpas(struct kvm_vcpu *vcpu) spin_unlock(&vcpu->arch.vpa_update_lock); } +static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu, + struct kvmppc_vcore *vc) +{ + struct dtl_entry *dt; + struct lppaca *vpa; + unsigned long old_stolen; + + dt = vcpu->arch.dtl_ptr; + vpa = vcpu->arch.vpa.pinned_addr; + old_stolen = vcpu->arch.stolen_logged; + vcpu->arch.stolen_logged = vc->stolen_tb; + if (!dt || !vpa) + return; + memset(dt, 0, sizeof(struct dtl_entry)); + dt->dispatch_reason = 7; + dt->processor_id = vc->pcpu + vcpu->arch.ptid; + dt->timebase = mftb(); + dt->enqueue_to_dispatch_time = vc->stolen_tb - old_stolen; + dt->srr0 = kvmppc_get_pc(vcpu); + dt->srr1 = vcpu->arch.shregs.msr; + ++dt; + if (dt == vcpu->arch.dtl.pinned_end) + dt = vcpu->arch.dtl.pinned_addr; + vcpu->arch.dtl_ptr = dt; + /* order writing *dt vs. writing vpa->dtl_idx */ + smp_wmb(); + vpa->dtl_idx = ++vcpu->arch.dtl_index; +} + int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) { unsigned long req = kvmppc_get_gpr(vcpu, 3); @@ -568,6 +605,7 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) INIT_LIST_HEAD(&vcore->runnable_threads); spin_lock_init(&vcore->lock); init_waitqueue_head(&vcore->wq); + vcore->preempt_tb = mftb(); } kvm->arch.vcores[core] = vcore; } @@ -580,6 +618,7 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) ++vcore->num_threads; spin_unlock(&vcore->lock); vcpu->arch.vcore = vcore; + vcpu->arch.stolen_logged = vcore->stolen_tb; vcpu->arch.cpu_type = KVM_CPU_3S_64; kvmppc_sanity_check(vcpu); @@ -803,6 +842,7 @@ static int kvmppc_run_core(struct kvmppc_vcore *vc) vc->nap_count = 0; vc->entry_exit_count = 0; vc->vcore_state = VCORE_RUNNING; + vc->stolen_tb += mftb() - vc->preempt_tb; vc->in_guest = 0; vc->pcpu = smp_processor_id(); vc->napping_threads = 0; @@ -812,6 +852,7 @@ static int kvmppc_run_core(struct kvmppc_vcore *vc) vcpu->arch.slb_shadow.update_pending || vcpu->arch.dtl.update_pending) kvmppc_update_vpas(vcpu); + kvmppc_create_dtl_entry(vcpu, vc); } /* Grab any remaining hw threads so they can't go into the kernel */ for (i = ptid; i < threads_per_core; ++i) @@ -869,6 +910,7 @@ static int kvmppc_run_core(struct kvmppc_vcore *vc) spin_lock(&vc->lock); out: vc->vcore_state = VCORE_INACTIVE; + vc->preempt_tb = mftb(); list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads, arch.run_list) { if (vcpu->arch.ret != RESUME_GUEST) { @@ -967,6 +1009,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) spin_lock(&vc->lock); continue; } + vc->runner = vcpu; n_ceded = 0; list_for_each_entry(v, &vc->runnable_threads, arch.run_list) n_ceded += v->arch.ceded; @@ -986,6 +1029,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) wake_up(&v->arch.cpu_run); } } + vc->runner = NULL; } if (signal_pending(current)) { -- cgit v0.10.2 From c0fe7b099931c6c05c98a05c277185ee25254f35 Mon Sep 17 00:00:00 2001 From: Bharat Bhushan Date: Mon, 5 Mar 2012 01:34:08 +0000 Subject: Restore guest CR after exit timing calculation No instruction which can change Condition Register (CR) should be executed after Guest CR is loaded. So the guest CR is restored after the Exit Timing in lightweight_exit executes cmpw, which can clobber CR. Signed-off-by: Bharat Bhushan Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity diff --git a/arch/powerpc/kvm/bookehv_interrupts.S b/arch/powerpc/kvm/bookehv_interrupts.S index 57e2fa4..909e96e 100644 --- a/arch/powerpc/kvm/bookehv_interrupts.S +++ b/arch/powerpc/kvm/bookehv_interrupts.S @@ -580,7 +580,6 @@ lightweight_exit: mtlr r3 mtxer r5 mtctr r6 - mtcr r7 mtsrr0 r8 mtsrr1 r9 @@ -588,14 +587,20 @@ lightweight_exit: /* save enter time */ 1: mfspr r6, SPRN_TBRU - mfspr r7, SPRN_TBRL + mfspr r9, SPRN_TBRL mfspr r8, SPRN_TBRU cmpw r8, r6 - PPC_STL r7, VCPU_TIMING_LAST_ENTER_TBL(r4) + PPC_STL r9, VCPU_TIMING_LAST_ENTER_TBL(r4) bne 1b PPC_STL r8, VCPU_TIMING_LAST_ENTER_TBU(r4) #endif + /* + * Don't execute any instruction which can change CR after + * below instruction. + */ + mtcr r7 + /* Finish loading guest volatiles and jump to guest. */ PPC_LL r5, VCPU_GPR(r5)(r4) PPC_LL r6, VCPU_GPR(r6)(r4) -- cgit v0.10.2 From 7657f4089b097846cc37bfa2b74fc0bd2bd60e30 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 5 Mar 2012 21:42:25 +0000 Subject: KVM: PPC: Book 3S: Fix compilation for !HV configs Commits 2f5cdd5487 ("KVM: PPC: Book3S HV: Make secondary threads more robust against stray IPIs") and 1c2066b0f7 ("KVM: PPC: Book3S HV: Make virtual processor area registration more robust") added fields to struct kvm_vcpu_arch inside #ifdef CONFIG_KVM_BOOK3S_64_HV regions, and added lines to arch/powerpc/kernel/asm-offsets.c to generate assembler constants for their offsets. Unfortunately this led to compile errors on Book 3S machines for configs that had KVM enabled but not CONFIG_KVM_BOOK3S_64_HV. This fixes the problem by moving the offending lines inside #ifdef CONFIG_KVM_BOOK3S_64_HV regions. Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 502e038..694af3e 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -450,6 +450,7 @@ int main(void) DEFINE(KVM_VRMA_SLB_V, offsetof(struct kvm, arch.vrma_slb_v)); DEFINE(VCPU_DSISR, offsetof(struct kvm_vcpu, arch.shregs.dsisr)); DEFINE(VCPU_DAR, offsetof(struct kvm_vcpu, arch.shregs.dar)); + DEFINE(VCPU_VPA, offsetof(struct kvm_vcpu, arch.vpa.pinned_addr)); #endif #ifdef CONFIG_PPC_BOOK3S DEFINE(VCPU_VCPUID, offsetof(struct kvm_vcpu, vcpu_id)); @@ -466,7 +467,6 @@ int main(void) DEFINE(VCPU_PENDING_EXC, offsetof(struct kvm_vcpu, arch.pending_exceptions)); DEFINE(VCPU_CEDED, offsetof(struct kvm_vcpu, arch.ceded)); DEFINE(VCPU_PRODDED, offsetof(struct kvm_vcpu, arch.prodded)); - DEFINE(VCPU_VPA, offsetof(struct kvm_vcpu, arch.vpa.pinned_addr)); DEFINE(VCPU_MMCR, offsetof(struct kvm_vcpu, arch.mmcr)); DEFINE(VCPU_PMC, offsetof(struct kvm_vcpu, arch.pmc)); DEFINE(VCPU_SLB, offsetof(struct kvm_vcpu, arch.slb)); @@ -540,10 +540,10 @@ int main(void) HSTATE_FIELD(HSTATE_IN_GUEST, in_guest); HSTATE_FIELD(HSTATE_RESTORE_HID5, restore_hid5); HSTATE_FIELD(HSTATE_NAPPING, napping); - HSTATE_FIELD(HSTATE_HWTHREAD_REQ, hwthread_req); - HSTATE_FIELD(HSTATE_HWTHREAD_STATE, hwthread_state); #ifdef CONFIG_KVM_BOOK3S_64_HV + HSTATE_FIELD(HSTATE_HWTHREAD_REQ, hwthread_req); + HSTATE_FIELD(HSTATE_HWTHREAD_STATE, hwthread_state); HSTATE_FIELD(HSTATE_KVM_VCPU, kvm_vcpu); HSTATE_FIELD(HSTATE_KVM_VCORE, kvm_vcore); HSTATE_FIELD(HSTATE_XICS_PHYS, xics_phys); -- cgit v0.10.2 From 8943633cf9b87980d261a022e90d94bc2c55df35 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 2 Mar 2012 01:38:23 +0000 Subject: KVM: PPC: Work around POWER7 DABR corruption problem It turns out that on POWER7, writing to the DABR can cause a corrupted value to be written if the PMU is active and updating SDAR in continuous sampling mode. To work around this, we make sure that the PMU is inactive and SDAR updates are disabled (via MMCRA) when we are context-switching DABR. When the guest sets DABR via the H_SET_DABR hypercall, we use a slightly different workaround, which is to read back the DABR and write it again if it got corrupted. While we are at it, make it consistent that the saving and restoring of the guest's non-volatile GPRs and the FPRs are done with the guest setup of the PMU active. Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S index d3fb4df..84035a5 100644 --- a/arch/powerpc/kvm/book3s_hv_interrupts.S +++ b/arch/powerpc/kvm/book3s_hv_interrupts.S @@ -68,19 +68,24 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) rotldi r10,r10,16 mtmsrd r10,1 - /* Save host PMU registers and load guest PMU registers */ + /* Save host PMU registers */ /* R4 is live here (vcpu pointer) but not r3 or r5 */ li r3, 1 sldi r3, r3, 31 /* MMCR0_FC (freeze counters) bit */ mfspr r7, SPRN_MMCR0 /* save MMCR0 */ mtspr SPRN_MMCR0, r3 /* freeze all counters, disable interrupts */ + mfspr r6, SPRN_MMCRA +BEGIN_FTR_SECTION + /* On P7, clear MMCRA in order to disable SDAR updates */ + li r5, 0 + mtspr SPRN_MMCRA, r5 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) isync ld r3, PACALPPACAPTR(r13) /* is the host using the PMU? */ lbz r5, LPPACA_PMCINUSE(r3) cmpwi r5, 0 beq 31f /* skip if not */ mfspr r5, SPRN_MMCR1 - mfspr r6, SPRN_MMCRA std r7, HSTATE_MMCR(r13) std r5, HSTATE_MMCR + 8(r13) std r6, HSTATE_MMCR + 16(r13) diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index d595033..a84aafc 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -159,24 +159,15 @@ kvmppc_hv_entry: mflr r0 std r0, HSTATE_VMHANDLER(r13) - ld r14, VCPU_GPR(r14)(r4) - ld r15, VCPU_GPR(r15)(r4) - ld r16, VCPU_GPR(r16)(r4) - ld r17, VCPU_GPR(r17)(r4) - ld r18, VCPU_GPR(r18)(r4) - ld r19, VCPU_GPR(r19)(r4) - ld r20, VCPU_GPR(r20)(r4) - ld r21, VCPU_GPR(r21)(r4) - ld r22, VCPU_GPR(r22)(r4) - ld r23, VCPU_GPR(r23)(r4) - ld r24, VCPU_GPR(r24)(r4) - ld r25, VCPU_GPR(r25)(r4) - ld r26, VCPU_GPR(r26)(r4) - ld r27, VCPU_GPR(r27)(r4) - ld r28, VCPU_GPR(r28)(r4) - ld r29, VCPU_GPR(r29)(r4) - ld r30, VCPU_GPR(r30)(r4) - ld r31, VCPU_GPR(r31)(r4) + /* Set partition DABR */ + /* Do this before re-enabling PMU to avoid P7 DABR corruption bug */ + li r5,3 + ld r6,VCPU_DABR(r4) + mtspr SPRN_DABRX,r5 + mtspr SPRN_DABR,r6 +BEGIN_FTR_SECTION + isync +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) /* Load guest PMU registers */ /* R4 is live here (vcpu pointer) */ @@ -215,6 +206,25 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) /* Load up FP, VMX and VSX registers */ bl kvmppc_load_fp + ld r14, VCPU_GPR(r14)(r4) + ld r15, VCPU_GPR(r15)(r4) + ld r16, VCPU_GPR(r16)(r4) + ld r17, VCPU_GPR(r17)(r4) + ld r18, VCPU_GPR(r18)(r4) + ld r19, VCPU_GPR(r19)(r4) + ld r20, VCPU_GPR(r20)(r4) + ld r21, VCPU_GPR(r21)(r4) + ld r22, VCPU_GPR(r22)(r4) + ld r23, VCPU_GPR(r23)(r4) + ld r24, VCPU_GPR(r24)(r4) + ld r25, VCPU_GPR(r25)(r4) + ld r26, VCPU_GPR(r26)(r4) + ld r27, VCPU_GPR(r27)(r4) + ld r28, VCPU_GPR(r28)(r4) + ld r29, VCPU_GPR(r29)(r4) + ld r30, VCPU_GPR(r30)(r4) + ld r31, VCPU_GPR(r31)(r4) + BEGIN_FTR_SECTION /* Switch DSCR to guest value */ ld r5, VCPU_DSCR(r4) @@ -256,12 +266,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) mtspr SPRN_DAR, r5 mtspr SPRN_DSISR, r6 - /* Set partition DABR */ - li r5,3 - ld r6,VCPU_DABR(r4) - mtspr SPRN_DABRX,r5 - mtspr SPRN_DABR,r6 - BEGIN_FTR_SECTION /* Restore AMR and UAMOR, set AMOR to all 1s */ ld r5,VCPU_AMR(r4) @@ -955,12 +959,6 @@ BEGIN_FTR_SECTION mtspr SPRN_AMR,r6 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) - /* Restore host DABR and DABRX */ - ld r5,HSTATE_DABR(r13) - li r6,7 - mtspr SPRN_DABR,r5 - mtspr SPRN_DABRX,r6 - /* Switch DSCR back to host value */ BEGIN_FTR_SECTION mfspr r8, SPRN_DSCR @@ -999,6 +997,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) std r5, VCPU_SPRG2(r9) std r6, VCPU_SPRG3(r9) + /* save FP state */ + mr r3, r9 + bl .kvmppc_save_fp + /* Increment yield count if they have a VPA */ ld r8, VCPU_VPA(r9) /* do they have a VPA? */ cmpdi r8, 0 @@ -1013,6 +1015,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) sldi r3, r3, 31 /* MMCR0_FC (freeze counters) bit */ mfspr r4, SPRN_MMCR0 /* save MMCR0 */ mtspr SPRN_MMCR0, r3 /* freeze all counters, disable ints */ + mfspr r6, SPRN_MMCRA +BEGIN_FTR_SECTION + /* On P7, clear MMCRA in order to disable SDAR updates */ + li r7, 0 + mtspr SPRN_MMCRA, r7 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) isync beq 21f /* if no VPA, save PMU stuff anyway */ lbz r7, LPPACA_PMCINUSE(r8) @@ -1021,7 +1029,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) std r3, VCPU_MMCR(r9) /* if not, set saved MMCR0 to FC */ b 22f 21: mfspr r5, SPRN_MMCR1 - mfspr r6, SPRN_MMCRA std r4, VCPU_MMCR(r9) std r5, VCPU_MMCR + 8(r9) std r6, VCPU_MMCR + 16(r9) @@ -1046,17 +1053,20 @@ BEGIN_FTR_SECTION stw r11, VCPU_PMC + 28(r9) END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) 22: - /* save FP state */ - mr r3, r9 - bl .kvmppc_save_fp /* Secondary threads go off to take a nap on POWER7 */ BEGIN_FTR_SECTION - lwz r0,VCPU_PTID(r3) + lwz r0,VCPU_PTID(r9) cmpwi r0,0 bne secondary_nap END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) + /* Restore host DABR and DABRX */ + ld r5,HSTATE_DABR(r13) + li r6,7 + mtspr SPRN_DABR,r5 + mtspr SPRN_DABRX,r6 + /* * Reload DEC. HDEC interrupts were disabled when * we reloaded the host's LPCR value. @@ -1393,7 +1403,12 @@ bounce_ext_interrupt: _GLOBAL(kvmppc_h_set_dabr) std r4,VCPU_DABR(r3) - mtspr SPRN_DABR,r4 + /* Work around P7 bug where DABR can get corrupted on mtspr */ +1: mtspr SPRN_DABR,r4 + mfspr r5, SPRN_DABR + cmpd r4, r5 + bne 1b + isync li r3,0 blr @@ -1615,8 +1630,8 @@ kvm_no_guest: * r3 = vcpu pointer */ _GLOBAL(kvmppc_save_fp) - mfmsr r9 - ori r8,r9,MSR_FP + mfmsr r5 + ori r8,r5,MSR_FP #ifdef CONFIG_ALTIVEC BEGIN_FTR_SECTION oris r8,r8,MSR_VEC@h @@ -1665,7 +1680,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) #endif mfspr r6,SPRN_VRSAVE stw r6,VCPU_VRSAVE(r3) - mtmsrd r9 + mtmsrd r5 isync blr -- cgit v0.10.2 From 6020c0f6e78888b6023559e9bf633ad0092a1709 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Mon, 12 Mar 2012 02:26:30 +0100 Subject: KVM: PPC: Pass EA to updating emulation ops MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When emulating updating load/store instructions (lwzu, stwu, ...) we need to write the effective address of the load/store into a register. Currently, we write the physical address in there, which is very wrong. So instead let's save off where the virtual fault was on MMIO and use that information as value to put into the register. While at it, also move the XOP variants of the above instructions to the new scheme of using the already known vaddr instead of calculating it themselves. Reported-by: Jörg Sommer Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 014eaf2..42a527e 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -464,6 +464,7 @@ struct kvm_vcpu_arch { u32 epr; #endif gpa_t paddr_accessed; + gva_t vaddr_accessed; u8 io_gpr; /* GPR used as IO source/target */ u8 mmio_is_bigendian; diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index d031ce1..8e6401f 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -447,7 +447,7 @@ static int instruction_is_store(unsigned int instr) } static int kvmppc_hv_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu, - unsigned long gpa, int is_store) + unsigned long gpa, gva_t ea, int is_store) { int ret; u32 last_inst; @@ -494,6 +494,7 @@ static int kvmppc_hv_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu, */ vcpu->arch.paddr_accessed = gpa; + vcpu->arch.vaddr_accessed = ea; return kvmppc_emulate_mmio(run, vcpu); } @@ -547,7 +548,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, /* No memslot means it's an emulated MMIO region */ if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) { unsigned long gpa = (gfn << PAGE_SHIFT) | (ea & (psize - 1)); - return kvmppc_hv_emulate_mmio(run, vcpu, gpa, + return kvmppc_hv_emulate_mmio(run, vcpu, gpa, ea, dsisr & DSISR_ISSTORE); } diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 7759053..158047f 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -351,6 +351,7 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu, /* MMIO */ vcpu->stat.mmio_exits++; vcpu->arch.paddr_accessed = pte.raddr; + vcpu->arch.vaddr_accessed = pte.eaddr; r = kvmppc_emulate_mmio(run, vcpu); if ( r == RESUME_HOST_NV ) r = RESUME_HOST; diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 9f27258..2675dcb 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -875,6 +875,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, /* Guest has mapped and accessed a page which is not * actually RAM. */ vcpu->arch.paddr_accessed = gpaddr; + vcpu->arch.vaddr_accessed = eaddr; r = kvmppc_emulate_mmio(run, vcpu); kvmppc_account_exit(vcpu, MMIO_EXITS); } diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c index 968f401..e79a620 100644 --- a/arch/powerpc/kvm/emulate.c +++ b/arch/powerpc/kvm/emulate.c @@ -141,7 +141,6 @@ u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb) int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) { u32 inst = kvmppc_get_last_inst(vcpu); - u32 ea; int ra; int rb; int rs; @@ -185,12 +184,8 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) ra = get_ra(inst); rb = get_rb(inst); - ea = kvmppc_get_gpr(vcpu, rb); - if (ra) - ea += kvmppc_get_gpr(vcpu, ra); - emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); - kvmppc_set_gpr(vcpu, ra, ea); + kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); break; case OP_31_XOP_STWX: @@ -212,14 +207,10 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) ra = get_ra(inst); rb = get_rb(inst); - ea = kvmppc_get_gpr(vcpu, rb); - if (ra) - ea += kvmppc_get_gpr(vcpu, ra); - emulated = kvmppc_handle_store(run, vcpu, kvmppc_get_gpr(vcpu, rs), 1, 1); - kvmppc_set_gpr(vcpu, rs, ea); + kvmppc_set_gpr(vcpu, rs, vcpu->arch.vaddr_accessed); break; case OP_31_XOP_LHAX: @@ -237,12 +228,8 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) ra = get_ra(inst); rb = get_rb(inst); - ea = kvmppc_get_gpr(vcpu, rb); - if (ra) - ea += kvmppc_get_gpr(vcpu, ra); - emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1); - kvmppc_set_gpr(vcpu, ra, ea); + kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); break; case OP_31_XOP_MFSPR: @@ -318,14 +305,10 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) ra = get_ra(inst); rb = get_rb(inst); - ea = kvmppc_get_gpr(vcpu, rb); - if (ra) - ea += kvmppc_get_gpr(vcpu, ra); - emulated = kvmppc_handle_store(run, vcpu, kvmppc_get_gpr(vcpu, rs), 2, 1); - kvmppc_set_gpr(vcpu, ra, ea); + kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); break; case OP_31_XOP_MTSPR: @@ -429,7 +412,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) ra = get_ra(inst); rt = get_rt(inst); emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed); + kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); break; case OP_LBZ: @@ -441,7 +424,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) ra = get_ra(inst); rt = get_rt(inst); emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed); + kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); break; case OP_STW: @@ -457,7 +440,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) emulated = kvmppc_handle_store(run, vcpu, kvmppc_get_gpr(vcpu, rs), 4, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed); + kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); break; case OP_STB: @@ -473,7 +456,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) emulated = kvmppc_handle_store(run, vcpu, kvmppc_get_gpr(vcpu, rs), 1, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed); + kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); break; case OP_LHZ: @@ -485,7 +468,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) ra = get_ra(inst); rt = get_rt(inst); emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed); + kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); break; case OP_LHA: @@ -497,7 +480,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) ra = get_ra(inst); rt = get_rt(inst); emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed); + kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); break; case OP_STH: @@ -513,7 +496,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) emulated = kvmppc_handle_store(run, vcpu, kvmppc_get_gpr(vcpu, rs), 2, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed); + kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); break; default: -- cgit v0.10.2 From 6df79df5b27d74e0c9803d7f47bb878370996548 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Tue, 13 Mar 2012 22:15:45 +0100 Subject: KVM: PPC: Emulate tw and td instructions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are 4 conditional trapping instructions: tw, twi, td, tdi. The ones with an i take an immediate comparison, the others compare two registers. All of them arrive in the emulator when the condition to trap was successfully fulfilled. Unfortunately, we were only implementing the i versions so far, so let's also add support for the other two. This fixes kernel booting with recents book3s_32 guest kernels. Reported-by: Jörg Sommer Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c index e79a620..afc9154 100644 --- a/arch/powerpc/kvm/emulate.c +++ b/arch/powerpc/kvm/emulate.c @@ -35,7 +35,9 @@ #define OP_TRAP 3 #define OP_TRAP_64 2 +#define OP_31_XOP_TRAP 4 #define OP_31_XOP_LWZX 23 +#define OP_31_XOP_TRAP_64 68 #define OP_31_XOP_LBZX 87 #define OP_31_XOP_STWX 151 #define OP_31_XOP_STBX 215 @@ -169,6 +171,18 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) case 31: switch (get_xop(inst)) { + case OP_31_XOP_TRAP: +#ifdef CONFIG_64BIT + case OP_31_XOP_TRAP_64: +#endif +#ifdef CONFIG_PPC_BOOK3S + kvmppc_core_queue_program(vcpu, SRR1_PROGTRAP); +#else + kvmppc_core_queue_program(vcpu, + vcpu->arch.shared->esr | ESR_PTR); +#endif + advance = 0; + break; case OP_31_XOP_LWZX: rt = get_rt(inst); emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1); -- cgit v0.10.2 From 4f225ae06e7f39a523ec500c3cf127e50797983e Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Tue, 13 Mar 2012 23:05:16 +0100 Subject: KVM: PPC: Book3s: PR: Add HV traps so we can run in HV=1 mode on p7 When running PR KVM on a p7 system in bare metal, we get HV exits instead of normal supervisor traps. Semantically they are identical though and the HSRR vs SRR difference is already taken care of in the exit code. So all we need to do is handle them in addition to our normal exits. Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 158047f..a7f031b 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -618,10 +618,13 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, break; /* We're good on these - the host merely wanted to get our attention */ case BOOK3S_INTERRUPT_DECREMENTER: + case BOOK3S_INTERRUPT_HV_DECREMENTER: vcpu->stat.dec_exits++; r = RESUME_GUEST; break; case BOOK3S_INTERRUPT_EXTERNAL: + case BOOK3S_INTERRUPT_EXTERNAL_LEVEL: + case BOOK3S_INTERRUPT_EXTERNAL_HV: vcpu->stat.ext_intr_exits++; r = RESUME_GUEST; break; @@ -629,6 +632,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, r = RESUME_GUEST; break; case BOOK3S_INTERRUPT_PROGRAM: + case BOOK3S_INTERRUPT_H_EMUL_ASSIST: { enum emulation_result er; struct kvmppc_book3s_shadow_vcpu *svcpu; -- cgit v0.10.2 From 966cd0f3bdd422f0b10686fb59d0d456fbbb6398 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Wed, 14 Mar 2012 16:55:08 +0100 Subject: KVM: PPC: Ignore unhalt request from kvm_vcpu_block When running kvm_vcpu_block and it realizes that the CPU is actually good to run, we get a request bit set for KVM_REQ_UNHALT. Right now, there's nothing we can do with that bit, so let's unset it right after the call again so we don't get confused in our later checks for pending work. Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index a7f031b..912e10f 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -120,6 +120,7 @@ void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr) if (msr & MSR_POW) { if (!vcpu->arch.pending_exceptions) { kvm_vcpu_block(vcpu); + clear_bit(KVM_REQ_UNHALT, &vcpu->requests); vcpu->stat.halt_wakeup++; /* Unset POW bit after we woke up */ diff --git a/arch/powerpc/kvm/book3s_pr_papr.c b/arch/powerpc/kvm/book3s_pr_papr.c index 6d1bfe2..60ac0e7 100644 --- a/arch/powerpc/kvm/book3s_pr_papr.c +++ b/arch/powerpc/kvm/book3s_pr_papr.c @@ -224,6 +224,7 @@ int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd) return kvmppc_h_pr_bulk_remove(vcpu); case H_CEDE: kvm_vcpu_block(vcpu); + clear_bit(KVM_REQ_UNHALT, &vcpu->requests); vcpu->stat.halt_wakeup++; return EMULATE_DONE; } diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 2675dcb..72f13f4 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -449,6 +449,7 @@ int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu) if (vcpu->arch.shared->msr & MSR_WE) { local_irq_enable(); kvm_vcpu_block(vcpu); + clear_bit(KVM_REQ_UNHALT, &vcpu->requests); local_irq_disable(); kvmppc_set_exit_type(vcpu, EMULATED_MTMSRWE_EXITS); -- cgit v0.10.2 From bbcc9c06695243ea23d30de36842df9200c33857 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 13 Mar 2012 21:52:44 +0000 Subject: powerpc/kvm: Fix magic page vs. 32-bit RTAS on ppc64 When the kernel calls into RTAS, it switches to 32-bit mode. The magic page was is longer accessible in that case, causing the patched instructions in the RTAS call wrapper to crash. This fixes it by making available a 32-bit mapping of the magic page in that case. This mapping is flushed whenever we switch the kernel back to 64-bit mode. Signed-off-by: Benjamin Herrenschmidt [agraf: add a check if the magic page is mapped] Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index c8ead7b..3f2a836 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -291,6 +291,9 @@ pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn) { ulong mp_pa = vcpu->arch.magic_page_pa; + if (!(vcpu->arch.shared->msr & MSR_SF)) + mp_pa = (uint32_t)mp_pa; + /* Magic page override */ if (unlikely(mp_pa) && unlikely(((gfn << PAGE_SHIFT) & KVM_PAM) == diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 912e10f..dba282e 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -145,6 +145,21 @@ void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr) } } + /* + * When switching from 32 to 64-bit, we may have a stale 32-bit + * magic page around, we need to flush it. Typically 32-bit magic + * page will be instanciated when calling into RTAS. Note: We + * assume that such transition only happens while in kernel mode, + * ie, we never transition from user 32-bit to kernel 64-bit with + * a 32-bit magic page around. + */ + if (vcpu->arch.magic_page_pa && + !(old_msr & MSR_PR) && !(old_msr & MSR_SF) && (msr & MSR_SF)) { + /* going from RTAS to normal kernel code */ + kvmppc_mmu_pte_flush(vcpu, (uint32_t)vcpu->arch.magic_page_pa, + ~0xFFFUL); + } + /* Preload FPU if it's enabled */ if (vcpu->arch.shared->msr & MSR_FP) kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP); @@ -252,6 +267,9 @@ static int kvmppc_visible_gfn(struct kvm_vcpu *vcpu, gfn_t gfn) { ulong mp_pa = vcpu->arch.magic_page_pa; + if (!(vcpu->arch.shared->msr & MSR_SF)) + mp_pa = (uint32_t)mp_pa; + if (unlikely(mp_pa) && unlikely((mp_pa & KVM_PAM) >> PAGE_SHIFT == gfn)) { return 1; -- cgit v0.10.2 From e9bda3b3d0ce775afe15eaf71922d342cc74991c Mon Sep 17 00:00:00 2001 From: Josh Triplett Date: Tue, 20 Mar 2012 23:33:51 -0700 Subject: KVM: VMX: Auto-load on CPUs with VMX Enable x86 feature-based autoloading for the kvm-intel module on CPUs with X86_FEATURE_VMX. Signed-off-by: Josh Triplett Acked-By: Kay Sievers Signed-off-by: Avi Kivity diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index ad85adf..52f6856 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -51,6 +52,12 @@ MODULE_AUTHOR("Qumranet"); MODULE_LICENSE("GPL"); +static const struct x86_cpu_id vmx_cpu_id[] = { + X86_FEATURE_MATCH(X86_FEATURE_VMX), + {} +}; +MODULE_DEVICE_TABLE(x86cpu, vmx_cpu_id); + static bool __read_mostly enable_vpid = 1; module_param_named(vpid, enable_vpid, bool, 0444); -- cgit v0.10.2 From c36fc04ef558c95cff46a8c89d2f804f217335f5 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Thu, 8 Mar 2012 12:45:54 +0100 Subject: KVM: x86: add paging gcc optimization Since most guests will have paging enabled for memory management, add likely() optimization around CR0.PG checks. Signed-off-by: Davidlohr Bueso Signed-off-by: Avi Kivity diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index cb80c29..3d1134d 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -64,7 +64,7 @@ static inline int is_pse(struct kvm_vcpu *vcpu) static inline int is_paging(struct kvm_vcpu *vcpu) { - return kvm_read_cr0_bits(vcpu, X86_CR0_PG); + return likely(kvm_read_cr0_bits(vcpu, X86_CR0_PG)); } static inline u32 bit(int bitno) -- cgit v0.10.2 From 220f773a0013bf6fe2eefd9718ac7471f368fd8e Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Wed, 21 Mar 2012 23:49:39 +0900 Subject: KVM: MMU: Make pte_list_desc fit cache lines well We have PTE_LIST_EXT + 1 pointers in this structure and these 40/20 bytes do not fit cache lines well. Furthermore, some allocators may use 64/32-byte objects for the pte_list_desc cache. This patch solves this problem by changing PTE_LIST_EXT from 4 to 3. For shadow paging, the new size is still large enough to hold both the kernel and process mappings for usual anonymous pages. For file mappings, there may be a slight change in the cache usage. Note: with EPT/NPT we almost always have a single spte in each reverse mapping and we will not see any change by this. Signed-off-by: Takuya Yoshikawa Signed-off-by: Avi Kivity diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index dc5f245..3213348 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -135,8 +135,6 @@ module_param(dbg, bool, 0644); #define PT64_PERM_MASK (PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK \ | PT64_NX_MASK) -#define PTE_LIST_EXT 4 - #define ACC_EXEC_MASK 1 #define ACC_WRITE_MASK PT_WRITABLE_MASK #define ACC_USER_MASK PT_USER_MASK @@ -151,6 +149,9 @@ module_param(dbg, bool, 0644); #define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level) +/* make pte_list_desc fit well in cache line */ +#define PTE_LIST_EXT 3 + struct pte_list_desc { u64 *sptes[PTE_LIST_EXT]; struct pte_list_desc *more; -- cgit v0.10.2 From 1e3f42f03c38c29c1814199a6f0a2f01b919ea3f Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Wed, 21 Mar 2012 23:50:34 +0900 Subject: KVM: MMU: Improve iteration through sptes from rmap Iteration using rmap_next(), the actual body is pte_list_next(), is inefficient: every time we call it we start from checking whether rmap holds a single spte or points to a descriptor which links more sptes. In the case of shadow paging, this quadratic total iteration cost is a problem. Even for two dimensional paging, with EPT/NPT on, in which we almost always have a single mapping, the extra checks at the end of the iteration should be eliminated. This patch fixes this by introducing rmap_iterator which keeps the iteration context for the next search. Furthermore the implementation of rmap_next() is splitted into two functions, rmap_get_first() and rmap_get_next(), to avoid repeatedly checking whether the rmap being iterated on has only one spte. Although there seemed to be only a slight change for EPT/NPT, the actual improvement was significant: we observed that GET_DIRTY_LOG for 1GB dirty memory became 15% faster than before. This is probably because the new code is easy to make branch predictions. Note: we just remove pte_list_next() because we can think of parent_ptes as a reverse mapping. Signed-off-by: Takuya Yoshikawa Signed-off-by: Avi Kivity diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 3213348..29ad6f9 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -842,32 +842,6 @@ static int pte_list_add(struct kvm_vcpu *vcpu, u64 *spte, return count; } -static u64 *pte_list_next(unsigned long *pte_list, u64 *spte) -{ - struct pte_list_desc *desc; - u64 *prev_spte; - int i; - - if (!*pte_list) - return NULL; - else if (!(*pte_list & 1)) { - if (!spte) - return (u64 *)*pte_list; - return NULL; - } - desc = (struct pte_list_desc *)(*pte_list & ~1ul); - prev_spte = NULL; - while (desc) { - for (i = 0; i < PTE_LIST_EXT && desc->sptes[i]; ++i) { - if (prev_spte == spte) - return desc->sptes[i]; - prev_spte = desc->sptes[i]; - } - desc = desc->more; - } - return NULL; -} - static void pte_list_desc_remove_entry(unsigned long *pte_list, struct pte_list_desc *desc, int i, struct pte_list_desc *prev_desc) @@ -988,11 +962,6 @@ static int rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn) return pte_list_add(vcpu, spte, rmapp); } -static u64 *rmap_next(unsigned long *rmapp, u64 *spte) -{ - return pte_list_next(rmapp, spte); -} - static void rmap_remove(struct kvm *kvm, u64 *spte) { struct kvm_mmu_page *sp; @@ -1005,6 +974,67 @@ static void rmap_remove(struct kvm *kvm, u64 *spte) pte_list_remove(spte, rmapp); } +/* + * Used by the following functions to iterate through the sptes linked by a + * rmap. All fields are private and not assumed to be used outside. + */ +struct rmap_iterator { + /* private fields */ + struct pte_list_desc *desc; /* holds the sptep if not NULL */ + int pos; /* index of the sptep */ +}; + +/* + * Iteration must be started by this function. This should also be used after + * removing/dropping sptes from the rmap link because in such cases the + * information in the itererator may not be valid. + * + * Returns sptep if found, NULL otherwise. + */ +static u64 *rmap_get_first(unsigned long rmap, struct rmap_iterator *iter) +{ + if (!rmap) + return NULL; + + if (!(rmap & 1)) { + iter->desc = NULL; + return (u64 *)rmap; + } + + iter->desc = (struct pte_list_desc *)(rmap & ~1ul); + iter->pos = 0; + return iter->desc->sptes[iter->pos]; +} + +/* + * Must be used with a valid iterator: e.g. after rmap_get_first(). + * + * Returns sptep if found, NULL otherwise. + */ +static u64 *rmap_get_next(struct rmap_iterator *iter) +{ + if (iter->desc) { + if (iter->pos < PTE_LIST_EXT - 1) { + u64 *sptep; + + ++iter->pos; + sptep = iter->desc->sptes[iter->pos]; + if (sptep) + return sptep; + } + + iter->desc = iter->desc->more; + + if (iter->desc) { + iter->pos = 0; + /* desc->sptes[0] cannot be NULL */ + return iter->desc->sptes[iter->pos]; + } + } + + return NULL; +} + static void drop_spte(struct kvm *kvm, u64 *sptep) { if (mmu_spte_clear_track_bits(sptep)) @@ -1013,23 +1043,27 @@ static void drop_spte(struct kvm *kvm, u64 *sptep) static int __rmap_write_protect(struct kvm *kvm, unsigned long *rmapp, int level) { - u64 *spte = NULL; + u64 *sptep; + struct rmap_iterator iter; int write_protected = 0; - while ((spte = rmap_next(rmapp, spte))) { - BUG_ON(!(*spte & PT_PRESENT_MASK)); - rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte); + for (sptep = rmap_get_first(*rmapp, &iter); sptep;) { + BUG_ON(!(*sptep & PT_PRESENT_MASK)); + rmap_printk("rmap_write_protect: spte %p %llx\n", sptep, *sptep); - if (!is_writable_pte(*spte)) + if (!is_writable_pte(*sptep)) { + sptep = rmap_get_next(&iter); continue; + } if (level == PT_PAGE_TABLE_LEVEL) { - mmu_spte_update(spte, *spte & ~PT_WRITABLE_MASK); + mmu_spte_update(sptep, *sptep & ~PT_WRITABLE_MASK); + sptep = rmap_get_next(&iter); } else { - BUG_ON(!is_large_pte(*spte)); - drop_spte(kvm, spte); + BUG_ON(!is_large_pte(*sptep)); + drop_spte(kvm, sptep); --kvm->stat.lpages; - spte = NULL; + sptep = rmap_get_first(*rmapp, &iter); } write_protected = 1; @@ -1084,48 +1118,57 @@ static int rmap_write_protect(struct kvm *kvm, u64 gfn) static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, unsigned long data) { - u64 *spte; + u64 *sptep; + struct rmap_iterator iter; int need_tlb_flush = 0; - while ((spte = rmap_next(rmapp, NULL))) { - BUG_ON(!(*spte & PT_PRESENT_MASK)); - rmap_printk("kvm_rmap_unmap_hva: spte %p %llx\n", spte, *spte); - drop_spte(kvm, spte); + while ((sptep = rmap_get_first(*rmapp, &iter))) { + BUG_ON(!(*sptep & PT_PRESENT_MASK)); + rmap_printk("kvm_rmap_unmap_hva: spte %p %llx\n", sptep, *sptep); + + drop_spte(kvm, sptep); need_tlb_flush = 1; } + return need_tlb_flush; } static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp, unsigned long data) { + u64 *sptep; + struct rmap_iterator iter; int need_flush = 0; - u64 *spte, new_spte; + u64 new_spte; pte_t *ptep = (pte_t *)data; pfn_t new_pfn; WARN_ON(pte_huge(*ptep)); new_pfn = pte_pfn(*ptep); - spte = rmap_next(rmapp, NULL); - while (spte) { - BUG_ON(!is_shadow_present_pte(*spte)); - rmap_printk("kvm_set_pte_rmapp: spte %p %llx\n", spte, *spte); + + for (sptep = rmap_get_first(*rmapp, &iter); sptep;) { + BUG_ON(!is_shadow_present_pte(*sptep)); + rmap_printk("kvm_set_pte_rmapp: spte %p %llx\n", sptep, *sptep); + need_flush = 1; + if (pte_write(*ptep)) { - drop_spte(kvm, spte); - spte = rmap_next(rmapp, NULL); + drop_spte(kvm, sptep); + sptep = rmap_get_first(*rmapp, &iter); } else { - new_spte = *spte &~ (PT64_BASE_ADDR_MASK); + new_spte = *sptep & ~PT64_BASE_ADDR_MASK; new_spte |= (u64)new_pfn << PAGE_SHIFT; new_spte &= ~PT_WRITABLE_MASK; new_spte &= ~SPTE_HOST_WRITEABLE; new_spte &= ~shadow_accessed_mask; - mmu_spte_clear_track_bits(spte); - mmu_spte_set(spte, new_spte); - spte = rmap_next(rmapp, spte); + + mmu_spte_clear_track_bits(sptep); + mmu_spte_set(sptep, new_spte); + sptep = rmap_get_next(&iter); } } + if (need_flush) kvm_flush_remote_tlbs(kvm); @@ -1184,7 +1227,8 @@ void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte) static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, unsigned long data) { - u64 *spte; + u64 *sptep; + struct rmap_iterator iter; int young = 0; /* @@ -1197,25 +1241,24 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, if (!shadow_accessed_mask) return kvm_unmap_rmapp(kvm, rmapp, data); - spte = rmap_next(rmapp, NULL); - while (spte) { - int _young; - u64 _spte = *spte; - BUG_ON(!(_spte & PT_PRESENT_MASK)); - _young = _spte & PT_ACCESSED_MASK; - if (_young) { + for (sptep = rmap_get_first(*rmapp, &iter); sptep; + sptep = rmap_get_next(&iter)) { + BUG_ON(!(*sptep & PT_PRESENT_MASK)); + + if (*sptep & PT_ACCESSED_MASK) { young = 1; - clear_bit(PT_ACCESSED_SHIFT, (unsigned long *)spte); + clear_bit(PT_ACCESSED_SHIFT, (unsigned long *)sptep); } - spte = rmap_next(rmapp, spte); } + return young; } static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp, unsigned long data) { - u64 *spte; + u64 *sptep; + struct rmap_iterator iter; int young = 0; /* @@ -1226,16 +1269,14 @@ static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp, if (!shadow_accessed_mask) goto out; - spte = rmap_next(rmapp, NULL); - while (spte) { - u64 _spte = *spte; - BUG_ON(!(_spte & PT_PRESENT_MASK)); - young = _spte & PT_ACCESSED_MASK; - if (young) { + for (sptep = rmap_get_first(*rmapp, &iter); sptep; + sptep = rmap_get_next(&iter)) { + BUG_ON(!(*sptep & PT_PRESENT_MASK)); + + if (*sptep & PT_ACCESSED_MASK) { young = 1; break; } - spte = rmap_next(rmapp, spte); } out: return young; @@ -1887,10 +1928,11 @@ static void kvm_mmu_put_page(struct kvm_mmu_page *sp, u64 *parent_pte) static void kvm_mmu_unlink_parents(struct kvm *kvm, struct kvm_mmu_page *sp) { - u64 *parent_pte; + u64 *sptep; + struct rmap_iterator iter; - while ((parent_pte = pte_list_next(&sp->parent_ptes, NULL))) - drop_parent_pte(sp, parent_pte); + while ((sptep = rmap_get_first(sp->parent_ptes, &iter))) + drop_parent_pte(sp, sptep); } static int mmu_zap_unsync_children(struct kvm *kvm, diff --git a/arch/x86/kvm/mmu_audit.c b/arch/x86/kvm/mmu_audit.c index 715da5a..7d7d0b9 100644 --- a/arch/x86/kvm/mmu_audit.c +++ b/arch/x86/kvm/mmu_audit.c @@ -192,7 +192,8 @@ static void audit_write_protection(struct kvm *kvm, struct kvm_mmu_page *sp) { struct kvm_memory_slot *slot; unsigned long *rmapp; - u64 *spte; + u64 *sptep; + struct rmap_iterator iter; if (sp->role.direct || sp->unsync || sp->role.invalid) return; @@ -200,13 +201,12 @@ static void audit_write_protection(struct kvm *kvm, struct kvm_mmu_page *sp) slot = gfn_to_memslot(kvm, sp->gfn); rmapp = &slot->rmap[sp->gfn - slot->base_gfn]; - spte = rmap_next(rmapp, NULL); - while (spte) { - if (is_writable_pte(*spte)) + for (sptep = rmap_get_first(*rmapp, &iter); sptep; + sptep = rmap_get_next(&iter)) { + if (is_writable_pte(*sptep)) audit_printk(kvm, "shadow page has writable " "mappings: gfn %llx role %x\n", sp->gfn, sp->role.word); - spte = rmap_next(rmapp, spte); } } -- cgit v0.10.2 From 80881dae52d05d3d6c920624157d68006390f01e Mon Sep 17 00:00:00 2001 From: Stephen Warren Date: Mon, 26 Mar 2012 12:49:57 -0600 Subject: ARM: tegra: uncompress.h: Implement TEGRA_DEBUG_UART_AUTO_ODMDATA Tegra has 5 UARTS which could be used for low-level debug output. Commit fe26398 "ARM: tegra: uncompress.h: Choose a UART at runtime" implemented one method for the kernel to automatically determine which of these to use at run-time, so that the same DEBUG_LL-enabled kernel image could be used across multiple Tegra boards. The required bootloader-side setup for that option is implemented in NVIDIA's various downstream U-Boot branches, but the U-Boot maintainers have refused to accept it upstream. This change implements an alternative automatic UART selection option using ODMDATA. This is a 32-bit value programmed into Tegra's boot memory which provides a few pieces of basic board-specific information, including a field that indicates the console UART. Setting up this value is part of the standard Tegra boot architecture, and so requires no Tegra-specific hacks in the bootloader's UART driver. Note that in theory, the format of ODMDATA is board-specific. However, in practice all boards use the same location/size/values for the UART field. ODMDATA[19:18] (which drive the type of debug console) is more problematic, since some boards use value 2 for UART and others use 3. This patch just accepts either value; if this doesn't work well for a given board, I'd suggest simply not enabling this debug option when building for that board. Note that the kernel assumes the bootloader has already set up any required pinmux settings for the UART; there is no way the kernel can do this for itself prior to knowing which board it's running on. In practice, people using this feature are highly likely to be using bootloaders that have indeed configured the pinmux. This assumption existed prior to this patch. Signed-off-by: Stephen Warren diff --git a/arch/arm/mach-tegra/Kconfig b/arch/arm/mach-tegra/Kconfig index d0f2546..204d3d4 100644 --- a/arch/arm/mach-tegra/Kconfig +++ b/arch/arm/mach-tegra/Kconfig @@ -111,7 +111,7 @@ config MACH_VENTANA Support for the nVidia Ventana development platform choice - prompt "Low-level debug console UART" + prompt "Default low-level debug console UART" default TEGRA_DEBUG_UART_NONE config TEGRA_DEBUG_UART_NONE @@ -134,6 +134,33 @@ config TEGRA_DEBUG_UARTE endchoice +choice + prompt "Automatic low-level debug console UART" + default TEGRA_DEBUG_UART_AUTO_NONE + +config TEGRA_DEBUG_UART_AUTO_NONE + bool "None" + +config TEGRA_DEBUG_UART_AUTO_ODMDATA + bool "Via ODMDATA" + help + Automatically determines which UART to use for low-level debug based + on the ODMDATA value. This value is part of the BCT, and is written + to the boot memory device using nvflash, or other flashing tool. + When bits 19:18 are 3, then bits 17:15 indicate which UART to use; + 0/1/2/3/4 are UART A/B/C/D/E. + +config TEGRA_DEBUG_UART_AUTO_SCRATCH + bool "Via UART scratch register" + help + Automatically determines which UART to use for low-level debug based + on the UART scratch register value. Some bootloaders put ASCII 'D' + in this register when they initialize their own console UART output. + Using this option allows the kernel to automatically pick the same + UART. + +endchoice + config TEGRA_SYSTEM_DMA bool "Enable system DMA driver for NVIDIA Tegra SoCs" default y diff --git a/arch/arm/mach-tegra/include/mach/uncompress.h b/arch/arm/mach-tegra/include/mach/uncompress.h index 5a440f3..937c4c5 100644 --- a/arch/arm/mach-tegra/include/mach/uncompress.h +++ b/arch/arm/mach-tegra/include/mach/uncompress.h @@ -63,52 +63,86 @@ static inline void save_uart_address(void) buf[0] = 0; } -/* - * Setup before decompression. This is where we do UART selection for - * earlyprintk and init the uart_base register. - */ -static inline void arch_decomp_setup(void) +static const struct { + u32 base; + u32 reset_reg; + u32 clock_reg; + u32 bit; +} uarts[] = { + { + TEGRA_UARTA_BASE, + TEGRA_CLK_RESET_BASE + 0x04, + TEGRA_CLK_RESET_BASE + 0x10, + 6, + }, + { + TEGRA_UARTB_BASE, + TEGRA_CLK_RESET_BASE + 0x04, + TEGRA_CLK_RESET_BASE + 0x10, + 7, + }, + { + TEGRA_UARTC_BASE, + TEGRA_CLK_RESET_BASE + 0x08, + TEGRA_CLK_RESET_BASE + 0x14, + 23, + }, + { + TEGRA_UARTD_BASE, + TEGRA_CLK_RESET_BASE + 0x0c, + TEGRA_CLK_RESET_BASE + 0x18, + 1, + }, + { + TEGRA_UARTE_BASE, + TEGRA_CLK_RESET_BASE + 0x0c, + TEGRA_CLK_RESET_BASE + 0x18, + 2, + }, +}; + +static inline bool uart_clocked(int i) +{ + if (*(u8 *)uarts[i].reset_reg & BIT(uarts[i].bit)) + return false; + + if (!(*(u8 *)uarts[i].clock_reg & BIT(uarts[i].bit))) + return false; + + return true; +} + +#ifdef CONFIG_TEGRA_DEBUG_UART_AUTO_ODMDATA +int auto_odmdata(void) +{ + volatile u32 *pmc = (volatile u32 *)TEGRA_PMC_BASE; + u32 odmdata = pmc[0xa0 / 4]; + + /* + * Bits 19:18 are the console type: 0=default, 1=none, 2==DCC, 3==UART + * Some boards apparently swap the last two values, but we don't have + * any way of catering for that here, so we just accept either. If this + * doesn't make sense for your board, just don't enable this feature. + * + * Bits 17:15 indicate the UART to use, 0/1/2/3/4 are UART A/B/C/D/E. + */ + + switch ((odmdata >> 18) & 3) { + case 2: + case 3: + break; + default: + return -1; + } + + return (odmdata >> 15) & 7; +} +#endif + +#ifdef CONFIG_TEGRA_DEBUG_UART_AUTO_SCRATCH +int auto_scratch(void) { - static const struct { - u32 base; - u32 reset_reg; - u32 clock_reg; - u32 bit; - } uarts[] = { - { - TEGRA_UARTA_BASE, - TEGRA_CLK_RESET_BASE + 0x04, - TEGRA_CLK_RESET_BASE + 0x10, - 6, - }, - { - TEGRA_UARTB_BASE, - TEGRA_CLK_RESET_BASE + 0x04, - TEGRA_CLK_RESET_BASE + 0x10, - 7, - }, - { - TEGRA_UARTC_BASE, - TEGRA_CLK_RESET_BASE + 0x08, - TEGRA_CLK_RESET_BASE + 0x14, - 23, - }, - { - TEGRA_UARTD_BASE, - TEGRA_CLK_RESET_BASE + 0x0c, - TEGRA_CLK_RESET_BASE + 0x18, - 1, - }, - { - TEGRA_UARTE_BASE, - TEGRA_CLK_RESET_BASE + 0x0c, - TEGRA_CLK_RESET_BASE + 0x18, - 2, - }, - }; int i; - volatile u32 *apb_misc = (volatile u32 *)TEGRA_APB_MISC_BASE; - u32 chip, div; /* * Look for the first UART that: @@ -125,20 +159,60 @@ static inline void arch_decomp_setup(void) * back to what's specified in TEGRA_DEBUG_UART_BASE. */ for (i = 0; i < ARRAY_SIZE(uarts); i++) { - if (*(u8 *)uarts[i].reset_reg & BIT(uarts[i].bit)) - continue; - - if (!(*(u8 *)uarts[i].clock_reg & BIT(uarts[i].bit))) + if (!uart_clocked(i)) continue; uart = (volatile u8 *)uarts[i].base; if (uart[UART_SCR << DEBUG_UART_SHIFT] != 'D') continue; - break; + return i; } - if (i == ARRAY_SIZE(uarts)) - uart = (volatile u8 *)TEGRA_DEBUG_UART_BASE; + + return -1; +} +#endif + +/* + * Setup before decompression. This is where we do UART selection for + * earlyprintk and init the uart_base register. + */ +static inline void arch_decomp_setup(void) +{ + int uart_id, auto_uart_id; + volatile u32 *apb_misc = (volatile u32 *)TEGRA_APB_MISC_BASE; + u32 chip, div; + +#if defined(CONFIG_TEGRA_DEBUG_UARTA) + uart_id = 0; +#elif defined(CONFIG_TEGRA_DEBUG_UARTB) + uart_id = 1; +#elif defined(CONFIG_TEGRA_DEBUG_UARTC) + uart_id = 2; +#elif defined(CONFIG_TEGRA_DEBUG_UARTD) + uart_id = 3; +#elif defined(CONFIG_TEGRA_DEBUG_UARTE) + uart_id = 4; +#else + uart_id = -1; +#endif + +#if defined(CONFIG_TEGRA_DEBUG_UART_AUTO_ODMDATA) + auto_uart_id = auto_odmdata(); +#elif defined(CONFIG_TEGRA_DEBUG_UART_AUTO_SCRATCH) + auto_uart_id = auto_scratch(); +#else + auto_uart_id = -1; +#endif + if (auto_uart_id != -1) + uart_id = auto_uart_id; + + if (uart_id < 0 || uart_id >= ARRAY_SIZE(uarts) || + !uart_clocked(uart_id)) + uart = NULL; + else + uart = (volatile u8 *)uarts[uart_id].base; + save_uart_address(); if (uart == NULL) return; -- cgit v0.10.2 From 5528a8469fbc01db218fbc00ebec1e3fc088e759 Mon Sep 17 00:00:00 2001 From: Rafal Prylowski Date: Thu, 5 Apr 2012 13:44:07 +0200 Subject: arm: ep93xx: Don't try to release not acquired GPIO lines Fail path of ep93xx_keypad_acquire_gpio() tries to release GPIO lines not acquired successfully before. Fix this. Signed-off-by: Rafal Prylowski Acked-by: H Hartley Sweeten Signed-off-by: Ryan Mallon diff --git a/arch/arm/mach-ep93xx/core.c b/arch/arm/mach-ep93xx/core.c index 8d25895..2ea2ffc 100644 --- a/arch/arm/mach-ep93xx/core.c +++ b/arch/arm/mach-ep93xx/core.c @@ -734,7 +734,7 @@ int ep93xx_keypad_acquire_gpio(struct platform_device *pdev) fail_gpio_d: gpio_free(EP93XX_GPIO_LINE_C(i)); fail_gpio_c: - for ( ; i >= 0; --i) { + for (--i; i >= 0; --i) { gpio_free(EP93XX_GPIO_LINE_C(i)); gpio_free(EP93XX_GPIO_LINE_D(i)); } -- cgit v0.10.2 From 2db938bee32e7469ca8ed9bfb3a05535f28c680d Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 21 Feb 2011 17:25:37 +0100 Subject: jbd: Refine commit writeout logic Currently we write out all journal buffers in WRITE_SYNC mode. This improves performance for fsync heavy workloads but hinders performance when writes are mostly asynchronous, most noticably it slows down readers and users complain about slow desktop response etc. So submit writes as asynchronous in the normal case and only submit writes as WRITE_SYNC if we detect someone is waiting for current transaction commit. I've gathered some numbers to back this change. The first is the read latency test. It measures time to read 1 MB after several seconds of sleeping in presence of streaming writes. Top 10 times (out of 90) in us: Before After 2131586 697473 1709932 557487 1564598 535642 1480462 347573 1478579 323153 1408496 222181 1388960 181273 1329565 181070 1252486 172832 1223265 172278 Average: 619377 82180 So the improvement in both maximum and average latency is massive. I've measured fsync throughput by: fs_mark -n 100 -t 1 -s 16384 -d /mnt/fsync/ -S 1 -L 4 in presence of streaming reader. The numbers (fsyncs/s) are: Before After 9.9 6.3 6.8 6.0 6.3 6.2 5.8 6.1 So fsync performance seems unharmed by this change. Signed-off-by: Jan Kara diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c index f2b9a57..9d31e6a 100644 --- a/fs/jbd/commit.c +++ b/fs/jbd/commit.c @@ -298,6 +298,7 @@ void journal_commit_transaction(journal_t *journal) int tag_flag; int i; struct blk_plug plug; + int write_op = WRITE; /* * First job: lock down the current transaction and wait for @@ -413,13 +414,16 @@ void journal_commit_transaction(journal_t *journal) jbd_debug (3, "JBD: commit phase 2\n"); + if (tid_geq(journal->j_commit_waited, commit_transaction->t_tid)) + write_op = WRITE_SYNC; + /* * Now start flushing things to disk, in the order they appear * on the transaction lists. Data blocks go first. */ blk_start_plug(&plug); err = journal_submit_data_buffers(journal, commit_transaction, - WRITE_SYNC); + write_op); blk_finish_plug(&plug); /* @@ -478,7 +482,7 @@ void journal_commit_transaction(journal_t *journal) blk_start_plug(&plug); - journal_write_revoke_records(journal, commit_transaction, WRITE_SYNC); + journal_write_revoke_records(journal, commit_transaction, write_op); /* * If we found any dirty or locked buffers, then we should have @@ -649,7 +653,7 @@ start_journal_io: clear_buffer_dirty(bh); set_buffer_uptodate(bh); bh->b_end_io = journal_end_buffer_io_sync; - submit_bh(WRITE_SYNC, bh); + submit_bh(write_op, bh); } cond_resched(); diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c index 0971e92..2047fd7 100644 --- a/fs/jbd/journal.c +++ b/fs/jbd/journal.c @@ -563,6 +563,8 @@ int log_wait_commit(journal_t *journal, tid_t tid) spin_unlock(&journal->j_state_lock); #endif spin_lock(&journal->j_state_lock); + if (!tid_geq(journal->j_commit_waited, tid)) + journal->j_commit_waited = tid; while (tid_gt(tid, journal->j_commit_sequence)) { jbd_debug(1, "JBD: want %d, j_commit_sequence=%d\n", tid, journal->j_commit_sequence); diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c index b2a7e52..febc10d 100644 --- a/fs/jbd/transaction.c +++ b/fs/jbd/transaction.c @@ -1433,8 +1433,6 @@ int journal_stop(handle_t *handle) } } - if (handle->h_sync) - transaction->t_synchronous_commit = 1; current->journal_info = NULL; spin_lock(&journal->j_state_lock); spin_lock(&transaction->t_handle_lock); diff --git a/include/linux/jbd.h b/include/linux/jbd.h index d211732..f265682 100644 --- a/include/linux/jbd.h +++ b/include/linux/jbd.h @@ -479,12 +479,6 @@ struct transaction_s * How many handles used this transaction? [t_handle_lock] */ int t_handle_count; - - /* - * This transaction is being forced and some process is - * waiting for it to finish. - */ - unsigned int t_synchronous_commit:1; }; /** @@ -531,6 +525,8 @@ struct transaction_s * transaction * @j_commit_request: Sequence number of the most recent transaction wanting * commit + * @j_commit_waited: Sequence number of the most recent transaction someone + * is waiting for to commit. * @j_uuid: Uuid of client object. * @j_task: Pointer to the current commit thread for this journal * @j_max_transaction_buffers: Maximum number of metadata buffers to allow in a @@ -696,6 +692,13 @@ struct journal_s tid_t j_commit_request; /* + * Sequence number of the most recent transaction someone is waiting + * for to commit. + * [j_state_lock] + */ + tid_t j_commit_waited; + + /* * Journal uuid: identifies the object (filesystem, LVM volume etc) * backed by this journal. This will eventually be replaced by an array * of uuids, allowing us to index multiple devices within a single diff --git a/include/trace/events/jbd.h b/include/trace/events/jbd.h index aff64d8..9305e1b 100644 --- a/include/trace/events/jbd.h +++ b/include/trace/events/jbd.h @@ -36,19 +36,17 @@ DECLARE_EVENT_CLASS(jbd_commit, TP_STRUCT__entry( __field( dev_t, dev ) - __field( char, sync_commit ) __field( int, transaction ) ), TP_fast_assign( __entry->dev = journal->j_fs_dev->bd_dev; - __entry->sync_commit = commit_transaction->t_synchronous_commit; __entry->transaction = commit_transaction->t_tid; ), - TP_printk("dev %d,%d transaction %d sync %d", + TP_printk("dev %d,%d transaction %d", MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->transaction, __entry->sync_commit) + __entry->transaction) ); DEFINE_EVENT(jbd_commit, jbd_start_commit, @@ -87,19 +85,17 @@ TRACE_EVENT(jbd_drop_transaction, TP_STRUCT__entry( __field( dev_t, dev ) - __field( char, sync_commit ) __field( int, transaction ) ), TP_fast_assign( __entry->dev = journal->j_fs_dev->bd_dev; - __entry->sync_commit = commit_transaction->t_synchronous_commit; __entry->transaction = commit_transaction->t_tid; ), - TP_printk("dev %d,%d transaction %d sync %d", + TP_printk("dev %d,%d transaction %d", MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->transaction, __entry->sync_commit) + __entry->transaction) ); TRACE_EVENT(jbd_end_commit, @@ -109,21 +105,19 @@ TRACE_EVENT(jbd_end_commit, TP_STRUCT__entry( __field( dev_t, dev ) - __field( char, sync_commit ) __field( int, transaction ) __field( int, head ) ), TP_fast_assign( __entry->dev = journal->j_fs_dev->bd_dev; - __entry->sync_commit = commit_transaction->t_synchronous_commit; __entry->transaction = commit_transaction->t_tid; __entry->head = journal->j_tail_sequence; ), - TP_printk("dev %d,%d transaction %d sync %d head %d", + TP_printk("dev %d,%d transaction %d head %d", MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->transaction, __entry->sync_commit, __entry->head) + __entry->transaction, __entry->head) ); TRACE_EVENT(jbd_do_submit_data, @@ -133,19 +127,17 @@ TRACE_EVENT(jbd_do_submit_data, TP_STRUCT__entry( __field( dev_t, dev ) - __field( char, sync_commit ) __field( int, transaction ) ), TP_fast_assign( __entry->dev = journal->j_fs_dev->bd_dev; - __entry->sync_commit = commit_transaction->t_synchronous_commit; __entry->transaction = commit_transaction->t_tid; ), - TP_printk("dev %d,%d transaction %d sync %d", + TP_printk("dev %d,%d transaction %d", MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->transaction, __entry->sync_commit) + __entry->transaction) ); TRACE_EVENT(jbd_cleanup_journal_tail, -- cgit v0.10.2 From ac0dd2474822fbd7d8e8cca12495e6b4760556cd Mon Sep 17 00:00:00 2001 From: Akira Fujita Date: Tue, 27 Mar 2012 16:09:16 +0900 Subject: ext3: remove max_debt in find_group_orlov() max_debt, involved variables and calculations are no longer needed, clean them up. Signed-off-by: Akira Fujita Signed-off-by: Jan Kara diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c index e3c39e4..082afd7 100644 --- a/fs/ext3/ialloc.c +++ b/fs/ext3/ialloc.c @@ -180,8 +180,7 @@ error_return: * It's OK to put directory into a group unless * it has too many directories already (max_dirs) or * it has too few free inodes left (min_inodes) or - * it has too few free blocks left (min_blocks) or - * it's already running too large debt (max_debt). + * it has too few free blocks left (min_blocks). * Parent's group is preferred, if it doesn't satisfy these * conditions we search cyclically through the rest. If none * of the groups look good we just look for a group with more @@ -191,21 +190,16 @@ error_return: * when we allocate an inode, within 0--255. */ -#define INODE_COST 64 -#define BLOCK_COST 256 - static int find_group_orlov(struct super_block *sb, struct inode *parent) { int parent_group = EXT3_I(parent)->i_block_group; struct ext3_sb_info *sbi = EXT3_SB(sb); - struct ext3_super_block *es = sbi->s_es; int ngroups = sbi->s_groups_count; int inodes_per_group = EXT3_INODES_PER_GROUP(sb); unsigned int freei, avefreei; ext3_fsblk_t freeb, avefreeb; - ext3_fsblk_t blocks_per_dir; unsigned int ndirs; - int max_debt, max_dirs, min_inodes; + int max_dirs, min_inodes; ext3_grpblk_t min_blocks; int group = -1, i; struct ext3_group_desc *desc; @@ -242,20 +236,10 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent) goto fallback; } - blocks_per_dir = (le32_to_cpu(es->s_blocks_count) - freeb) / ndirs; - max_dirs = ndirs / ngroups + inodes_per_group / 16; min_inodes = avefreei - inodes_per_group / 4; min_blocks = avefreeb - EXT3_BLOCKS_PER_GROUP(sb) / 4; - max_debt = EXT3_BLOCKS_PER_GROUP(sb) / max(blocks_per_dir, (ext3_fsblk_t)BLOCK_COST); - if (max_debt * INODE_COST > inodes_per_group) - max_debt = inodes_per_group / INODE_COST; - if (max_debt > 255) - max_debt = 255; - if (max_debt == 0) - max_debt = 1; - for (i = 0; i < ngroups; i++) { group = (parent_group + i) % ngroups; desc = ext3_get_group_desc (sb, group, NULL); -- cgit v0.10.2 From ee65244b210e00060c2d4d8eff4ad9b18400f588 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Wed, 28 Mar 2012 09:32:32 +0100 Subject: ext3: update documentation with barrier=1 default Commit 00eacd6 ("ext3: make ext3 mount default to barrier=1") changed the default barrier mount option for ext3. The documentation needs to be updated, so this patch does that. Reviewed-by: Christoph Hellwig Signed-off-by: Stefan Hajnoczi Signed-off-by: Jan Kara diff --git a/Documentation/filesystems/ext3.txt b/Documentation/filesystems/ext3.txt index b100adc..293855e 100644 --- a/Documentation/filesystems/ext3.txt +++ b/Documentation/filesystems/ext3.txt @@ -59,9 +59,9 @@ commit=nrsec (*) Ext3 can be told to sync all its data and metadata Setting it to very large values will improve performance. -barrier=<0(*)|1> This enables/disables the use of write barriers in -barrier the jbd code. barrier=0 disables, barrier=1 enables. -nobarrier (*) This also requires an IO stack which can support +barrier=<0|1(*)> This enables/disables the use of write barriers in +barrier (*) the jbd code. barrier=0 disables, barrier=1 enables. +nobarrier This also requires an IO stack which can support barriers, and if jbd gets an error on a barrier write, it will disable again with a warning. Write barriers enforce proper on-disk ordering -- cgit v0.10.2 From f2b2242081314ee4385f3b49d92b0adff8324d80 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Wed, 21 Mar 2012 18:14:30 +0200 Subject: ext2: write superblock only once on unmount Currently on unmount if we are mounted R/W, we first write the superblock to the media if it is dirty, and then write it again, which is not optimal. This patch makes ext2 write the superblock on unmount less times. Signed-off-by: Artem Bityutskiy Signed-off-by: Jan Kara diff --git a/fs/ext2/super.c b/fs/ext2/super.c index e1025c7..12a7916 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -130,9 +130,6 @@ static void ext2_put_super (struct super_block * sb) dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED); - if (sb->s_dirt) - ext2_write_super(sb); - ext2_xattr_put_super(sb); if (!(sb->s_flags & MS_RDONLY)) { struct ext2_super_block *es = sbi->s_es; -- cgit v0.10.2 From b838ec2232b764a4903707e212c62f681b32cd51 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Sat, 31 Mar 2012 14:22:10 +0200 Subject: ext2: Remove s_dirt handling Places which modify superblock feature / state fields mark the superblock buffer dirty so it is written out by flusher thread. Thus there's no need to set s_dirt there. The only other fields changing in the superblock are the numbers of free blocks, free inodes and s_wtime. There's no real need to write (or even compute) these periodically. Free blocks / inodes counters are recomputed on every mount from group counters anyway and value of s_wtime is only informational and imprecise anyway. So it should be enough to write these opportunistically on mount, remount, umount, and sync_fs times. Signed-off-by: Jan Kara diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c index a8cbe1b..a9bba1e 100644 --- a/fs/ext2/balloc.c +++ b/fs/ext2/balloc.c @@ -165,7 +165,6 @@ static void release_blocks(struct super_block *sb, int count) struct ext2_sb_info *sbi = EXT2_SB(sb); percpu_counter_add(&sbi->s_freeblocks_counter, count); - sb->s_dirt = 1; } } @@ -180,7 +179,6 @@ static void group_adjust_blocks(struct super_block *sb, int group_no, free_blocks = le16_to_cpu(desc->bg_free_blocks_count); desc->bg_free_blocks_count = cpu_to_le16(free_blocks + count); spin_unlock(sb_bgl_lock(sbi, group_no)); - sb->s_dirt = 1; mark_buffer_dirty(bh); } } diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c index 8b15cf8..c13eb7b 100644 --- a/fs/ext2/ialloc.c +++ b/fs/ext2/ialloc.c @@ -81,7 +81,6 @@ static void ext2_release_inode(struct super_block *sb, int group, int dir) spin_unlock(sb_bgl_lock(EXT2_SB(sb), group)); if (dir) percpu_counter_dec(&EXT2_SB(sb)->s_dirs_counter); - sb->s_dirt = 1; mark_buffer_dirty(bh); } @@ -543,7 +542,6 @@ got: } spin_unlock(sb_bgl_lock(sbi, group)); - sb->s_dirt = 1; mark_buffer_dirty(bh2); if (test_opt(sb, GRPID)) { inode->i_mode = mode; diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 12a7916..a43f9ad 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -1158,7 +1158,6 @@ static void ext2_sync_super(struct super_block *sb, struct ext2_super_block *es, mark_buffer_dirty(EXT2_SB(sb)->s_sbh); if (wait) sync_dirty_buffer(EXT2_SB(sb)->s_sbh); - sb->s_dirt = 0; } /* @@ -1191,8 +1190,6 @@ void ext2_write_super(struct super_block *sb) { if (!(sb->s_flags & MS_RDONLY)) ext2_sync_fs(sb, 1); - else - sb->s_dirt = 0; } static int ext2_remount (struct super_block * sb, int * flags, char * data) diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c index 6dcafc7..b6754db 100644 --- a/fs/ext2/xattr.c +++ b/fs/ext2/xattr.c @@ -339,7 +339,6 @@ static void ext2_xattr_update_super_block(struct super_block *sb) spin_lock(&EXT2_SB(sb)->s_lock); EXT2_SET_COMPAT_FEATURE(sb, EXT2_FEATURE_COMPAT_EXT_ATTR); spin_unlock(&EXT2_SB(sb)->s_lock); - sb->s_dirt = 1; mark_buffer_dirty(EXT2_SB(sb)->s_sbh); } -- cgit v0.10.2 From f72cf5e223a28d3b3ea7dc9e40464fd534e359e8 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Tue, 3 Apr 2012 12:49:18 +0300 Subject: ext2: do not register write_super within VFS Jan Kara removed 'sb->s_dirt' VFS flag references, so we do not need to register the ext2 'ext2_write_super()' method in the VFS superblock operations, because 'sb->s_dirt' won't be ever set to 1 and VFS won't ever call '->write_super()' anyway. Thus, remove the method. Tested using xfstests. Signed-off-by: Artem Bityutskiy Signed-off-by: Jan Kara diff --git a/fs/ext2/super.c b/fs/ext2/super.c index a43f9ad..e0e8f45 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -302,7 +302,6 @@ static const struct super_operations ext2_sops = { .write_inode = ext2_write_inode, .evict_inode = ext2_evict_inode, .put_super = ext2_put_super, - .write_super = ext2_write_super, .sync_fs = ext2_sync_fs, .statfs = ext2_statfs, .remount_fs = ext2_remount, -- cgit v0.10.2 From 9de6fe91afcdc38efe398a9d42014a7c920a64db Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Wed, 11 Apr 2012 15:27:52 -0500 Subject: KVM: PPC: add CPU_FTR_EMB_HV to CPU table e6500 support (commit 10241842fbe900276634fee8d37ec48a7d8a762f, "powerpc: Add initial e6500 cpu support" and the introduction of CPU_FTR_EMB_HV (commit 73196cd364a2d972d73fa08da9d81ca3215bed68, "KVM: PPC: e500mc support") collided during merge, leaving e6500's CPU table entry missing CPU_FTR_EMB_HV. Signed-off-by: Scott Wood Signed-off-by: Marcelo Tosatti diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index 67c34af..50d82c8 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -395,7 +395,7 @@ extern const char *powerpc_base_platform; #define CPU_FTRS_E6500 (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | \ CPU_FTR_L2CSR | CPU_FTR_LWSYNC | CPU_FTR_NOEXECUTE | \ CPU_FTR_DBELL | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \ - CPU_FTR_DEBUG_LVL_EXC) + CPU_FTR_DEBUG_LVL_EXC | CPU_FTR_EMB_HV) #define CPU_FTRS_GENERIC_32 (CPU_FTR_COMMON | CPU_FTR_NODSISRALIGN) /* 64-bit CPUs */ -- cgit v0.10.2 From 18cf8cf8bab1296f477ee4dd8f78b5b23c5a192e Mon Sep 17 00:00:00 2001 From: H Hartley Sweeten Date: Thu, 12 Apr 2012 13:44:20 -0700 Subject: mm: page-writeback.c: local functions should not be exposed globally The function global_dirtyable_memory is only referenced in this file and should be marked static to prevent it from being exposed globally. This quiets the sparse warning: warning: symbol 'global_dirtyable_memory' was not declared. Should it be static? Signed-off-by: H Hartley Sweeten Signed-off-by: Fengguang Wu diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 26adea8..9dec97f 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -204,7 +204,7 @@ static unsigned long highmem_dirtyable_memory(unsigned long total) * Returns the global number of pages potentially available for dirty * page cache. This is the base value for the global dirty limits. */ -unsigned long global_dirtyable_memory(void) +static unsigned long global_dirtyable_memory(void) { unsigned long x; -- cgit v0.10.2 From ae75954457eee0a608072368c5b477e40f378d7b Mon Sep 17 00:00:00 2001 From: Josh Triplett Date: Wed, 28 Mar 2012 11:32:28 -0700 Subject: KVM: SVM: Auto-load on CPUs with SVM Enable x86 feature-based autoloading for the kvm-amd module on CPUs with X86_FEATURE_SVM. Signed-off-by: Josh Triplett Signed-off-by: Marcelo Tosatti diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index f316720..f75af40 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -22,6 +22,7 @@ #include "x86.h" #include +#include #include #include #include @@ -42,6 +43,12 @@ MODULE_AUTHOR("Qumranet"); MODULE_LICENSE("GPL"); +static const struct x86_cpu_id svm_cpu_id[] = { + X86_FEATURE_MATCH(X86_FEATURE_SVM), + {} +}; +MODULE_DEVICE_TABLE(x86cpu, svm_cpu_id); + #define IOPM_ALLOC_ORDER 2 #define MSRPM_ALLOC_ORDER 1 -- cgit v0.10.2 From 1c11b37669a5209bd11fb857a103634afef971e8 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 9 Apr 2012 18:39:59 +0300 Subject: KVM: x86 emulator: add support for vector alignment x86 defines three classes of vector instructions: explicitly aligned (#GP(0) if unaligned, explicitly unaligned, and default (which depends on the encoding: AVX is unaligned, SSE is aligned). Add support for marking an instruction as explicitly aligned or unaligned, and mark MOVDQU as unaligned. Signed-off-by: Avi Kivity Signed-off-by: Marcelo Tosatti diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 8375622..6302e5c 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -142,6 +142,9 @@ #define Src2FS (OpFS << Src2Shift) #define Src2GS (OpGS << Src2Shift) #define Src2Mask (OpMask << Src2Shift) +#define Aligned ((u64)1 << 41) /* Explicitly aligned (e.g. MOVDQA) */ +#define Unaligned ((u64)1 << 42) /* Explicitly unaligned (e.g. MOVDQU) */ +#define Avx ((u64)1 << 43) /* Advanced Vector Extensions */ #define X2(x...) x, x #define X3(x...) X2(x), x @@ -557,6 +560,29 @@ static void set_segment_selector(struct x86_emulate_ctxt *ctxt, u16 selector, ctxt->ops->set_segment(ctxt, selector, &desc, base3, seg); } +/* + * x86 defines three classes of vector instructions: explicitly + * aligned, explicitly unaligned, and the rest, which change behaviour + * depending on whether they're AVX encoded or not. + * + * Also included is CMPXCHG16B which is not a vector instruction, yet it is + * subject to the same check. + */ +static bool insn_aligned(struct x86_emulate_ctxt *ctxt, unsigned size) +{ + if (likely(size < 16)) + return false; + + if (ctxt->d & Aligned) + return true; + else if (ctxt->d & Unaligned) + return false; + else if (ctxt->d & Avx) + return false; + else + return true; +} + static int __linearize(struct x86_emulate_ctxt *ctxt, struct segmented_address addr, unsigned size, bool write, bool fetch, @@ -621,6 +647,8 @@ static int __linearize(struct x86_emulate_ctxt *ctxt, } if (fetch ? ctxt->mode != X86EMUL_MODE_PROT64 : ctxt->ad_bytes != 8) la &= (u32)-1; + if (insn_aligned(ctxt, size) && ((la & (size - 1)) != 0)) + return emulate_gp(ctxt, 0); *linear = la; return X86EMUL_CONTINUE; bad: @@ -3415,7 +3443,7 @@ static struct opcode group11[] = { }; static struct gprefix pfx_0f_6f_0f_7f = { - N, N, N, I(Sse, em_movdqu), + N, N, N, I(Sse | Unaligned, em_movdqu), }; static struct opcode opcode_table[256] = { -- cgit v0.10.2 From 49597d8116ad70aabb598e606b218ddd9315b0af Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Mon, 9 Apr 2012 18:40:00 +0300 Subject: KVM: x86: emulate movdqa An Ubuntu 9.10 Karmic Koala guest is unable to boot or install due to missing movdqa emulation: kvm_exit: reason EXCEPTION_NMI rip 0x7fef3e025a7b info 7fef3e799000 80000b0e kvm_page_fault: address 7fef3e799000 error_code f kvm_emulate_insn: 0:7fef3e025a7b: 66 0f 7f 07 (prot64) movdqa %xmm0,(%rdi) [avi: mark it explicitly aligned] Signed-off-by: Stefan Hajnoczi Signed-off-by: Avi Kivity Signed-off-by: Marcelo Tosatti diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 6302e5c..b160fb1f 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2818,7 +2818,7 @@ static int em_rdpmc(struct x86_emulate_ctxt *ctxt) static int em_mov(struct x86_emulate_ctxt *ctxt) { - ctxt->dst.val = ctxt->src.val; + memcpy(ctxt->dst.valptr, ctxt->src.valptr, ctxt->op_bytes); return X86EMUL_CONTINUE; } @@ -2898,12 +2898,6 @@ static int em_mov_sreg_rm(struct x86_emulate_ctxt *ctxt) return load_segment_descriptor(ctxt, sel, ctxt->modrm_reg); } -static int em_movdqu(struct x86_emulate_ctxt *ctxt) -{ - memcpy(&ctxt->dst.vec_val, &ctxt->src.vec_val, ctxt->op_bytes); - return X86EMUL_CONTINUE; -} - static int em_invlpg(struct x86_emulate_ctxt *ctxt) { int rc; @@ -3443,7 +3437,7 @@ static struct opcode group11[] = { }; static struct gprefix pfx_0f_6f_0f_7f = { - N, N, N, I(Sse | Unaligned, em_movdqu), + N, I(Sse | Aligned, em_mov), N, I(Sse | Unaligned, em_mov), }; static struct opcode opcode_table[256] = { -- cgit v0.10.2 From 3e114eb4db3a33141b8c91bb53dae9ba6b015a32 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 9 Apr 2012 18:40:01 +0300 Subject: KVM: x86 emulator: implement movntps Used to write to framebuffers (by at least Icaros). Signed-off-by: Avi Kivity Signed-off-by: Marcelo Tosatti diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index b160fb1f..fb39e0b 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -3440,6 +3440,10 @@ static struct gprefix pfx_0f_6f_0f_7f = { N, I(Sse | Aligned, em_mov), N, I(Sse | Unaligned, em_mov), }; +static struct gprefix pfx_vmovntpx = { + I(0, em_mov), N, N, N, +}; + static struct opcode opcode_table[256] = { /* 0x00 - 0x07 */ I6ALU(Lock, em_add), @@ -3571,7 +3575,8 @@ static struct opcode twobyte_table[256] = { IIP(ModRM | SrcMem | Priv | Op3264, em_cr_write, cr_write, check_cr_write), IIP(ModRM | SrcMem | Priv | Op3264, em_dr_write, dr_write, check_dr_write), N, N, N, N, - N, N, N, N, N, N, N, N, + N, N, N, GP(ModRM | DstMem | SrcReg | Sse | Mov | Aligned, &pfx_vmovntpx), + N, N, N, N, /* 0x30 - 0x3F */ II(ImplicitOps | Priv, em_wrmsr, wrmsr), IIP(ImplicitOps, em_rdtsc, rdtsc, check_rdtsc), -- cgit v0.10.2 From cbe2c9d30aa69b0551247ddb0fb450b6e8080ec4 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 9 Apr 2012 18:40:02 +0300 Subject: KVM: x86 emulator: MMX support General support for the MMX instruction set. Special care is taken to trap pending x87 exceptions so that they are properly reflected to the guest. Signed-off-by: Avi Kivity Signed-off-by: Marcelo Tosatti diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index c222e1a..1ac46c22 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -200,7 +200,7 @@ typedef u32 __attribute__((vector_size(16))) sse128_t; /* Type, address-of, and value of an instruction's operand. */ struct operand { - enum { OP_REG, OP_MEM, OP_IMM, OP_XMM, OP_NONE } type; + enum { OP_REG, OP_MEM, OP_IMM, OP_XMM, OP_MM, OP_NONE } type; unsigned int bytes; union { unsigned long orig_val; @@ -213,12 +213,14 @@ struct operand { unsigned seg; } mem; unsigned xmm; + unsigned mm; } addr; union { unsigned long val; u64 val64; char valptr[sizeof(unsigned long) + 2]; sse128_t vec_val; + u64 mm_val; }; }; diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index fb39e0b..0011b4a 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -142,6 +142,7 @@ #define Src2FS (OpFS << Src2Shift) #define Src2GS (OpGS << Src2Shift) #define Src2Mask (OpMask << Src2Shift) +#define Mmx ((u64)1 << 40) /* MMX Vector instruction */ #define Aligned ((u64)1 << 41) /* Explicitly aligned (e.g. MOVDQA) */ #define Unaligned ((u64)1 << 42) /* Explicitly unaligned (e.g. MOVDQU) */ #define Avx ((u64)1 << 43) /* Advanced Vector Extensions */ @@ -887,6 +888,40 @@ static void write_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data, ctxt->ops->put_fpu(ctxt); } +static void read_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg) +{ + ctxt->ops->get_fpu(ctxt); + switch (reg) { + case 0: asm("movq %%mm0, %0" : "=m"(*data)); break; + case 1: asm("movq %%mm1, %0" : "=m"(*data)); break; + case 2: asm("movq %%mm2, %0" : "=m"(*data)); break; + case 3: asm("movq %%mm3, %0" : "=m"(*data)); break; + case 4: asm("movq %%mm4, %0" : "=m"(*data)); break; + case 5: asm("movq %%mm5, %0" : "=m"(*data)); break; + case 6: asm("movq %%mm6, %0" : "=m"(*data)); break; + case 7: asm("movq %%mm7, %0" : "=m"(*data)); break; + default: BUG(); + } + ctxt->ops->put_fpu(ctxt); +} + +static void write_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg) +{ + ctxt->ops->get_fpu(ctxt); + switch (reg) { + case 0: asm("movq %0, %%mm0" : : "m"(*data)); break; + case 1: asm("movq %0, %%mm1" : : "m"(*data)); break; + case 2: asm("movq %0, %%mm2" : : "m"(*data)); break; + case 3: asm("movq %0, %%mm3" : : "m"(*data)); break; + case 4: asm("movq %0, %%mm4" : : "m"(*data)); break; + case 5: asm("movq %0, %%mm5" : : "m"(*data)); break; + case 6: asm("movq %0, %%mm6" : : "m"(*data)); break; + case 7: asm("movq %0, %%mm7" : : "m"(*data)); break; + default: BUG(); + } + ctxt->ops->put_fpu(ctxt); +} + static void decode_register_operand(struct x86_emulate_ctxt *ctxt, struct operand *op) { @@ -903,6 +938,13 @@ static void decode_register_operand(struct x86_emulate_ctxt *ctxt, read_sse_reg(ctxt, &op->vec_val, reg); return; } + if (ctxt->d & Mmx) { + reg &= 7; + op->type = OP_MM; + op->bytes = 8; + op->addr.mm = reg; + return; + } op->type = OP_REG; if (ctxt->d & ByteOp) { @@ -948,6 +990,12 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt, read_sse_reg(ctxt, &op->vec_val, ctxt->modrm_rm); return rc; } + if (ctxt->d & Mmx) { + op->type = OP_MM; + op->bytes = 8; + op->addr.xmm = ctxt->modrm_rm & 7; + return rc; + } fetch_register_operand(op); return rc; } @@ -1415,6 +1463,9 @@ static int writeback(struct x86_emulate_ctxt *ctxt) case OP_XMM: write_sse_reg(ctxt, &ctxt->dst.vec_val, ctxt->dst.addr.xmm); break; + case OP_MM: + write_mmx_reg(ctxt, &ctxt->dst.mm_val, ctxt->dst.addr.mm); + break; case OP_NONE: /* no writeback */ break; @@ -3987,6 +4038,8 @@ done_prefixes: if (ctxt->d & Sse) ctxt->op_bytes = 16; + else if (ctxt->d & Mmx) + ctxt->op_bytes = 8; /* ModRM and SIB bytes. */ if (ctxt->d & ModRM) { @@ -4057,6 +4110,35 @@ static bool string_insn_completed(struct x86_emulate_ctxt *ctxt) return false; } +static int flush_pending_x87_faults(struct x86_emulate_ctxt *ctxt) +{ + bool fault = false; + + ctxt->ops->get_fpu(ctxt); + asm volatile("1: fwait \n\t" + "2: \n\t" + ".pushsection .fixup,\"ax\" \n\t" + "3: \n\t" + "movb $1, %[fault] \n\t" + "jmp 2b \n\t" + ".popsection \n\t" + _ASM_EXTABLE(1b, 3b) + : [fault]"+rm"(fault)); + ctxt->ops->put_fpu(ctxt); + + if (unlikely(fault)) + return emulate_exception(ctxt, MF_VECTOR, 0, false); + + return X86EMUL_CONTINUE; +} + +static void fetch_possible_mmx_operand(struct x86_emulate_ctxt *ctxt, + struct operand *op) +{ + if (op->type == OP_MM) + read_mmx_reg(ctxt, &op->mm_val, op->addr.mm); +} + int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) { struct x86_emulate_ops *ops = ctxt->ops; @@ -4081,18 +4163,31 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) goto done; } - if ((ctxt->d & Sse) - && ((ops->get_cr(ctxt, 0) & X86_CR0_EM) - || !(ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR))) { + if (((ctxt->d & (Sse|Mmx)) && ((ops->get_cr(ctxt, 0) & X86_CR0_EM))) + || ((ctxt->d & Sse) && !(ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR))) { rc = emulate_ud(ctxt); goto done; } - if ((ctxt->d & Sse) && (ops->get_cr(ctxt, 0) & X86_CR0_TS)) { + if ((ctxt->d & (Sse|Mmx)) && (ops->get_cr(ctxt, 0) & X86_CR0_TS)) { rc = emulate_nm(ctxt); goto done; } + if (ctxt->d & Mmx) { + rc = flush_pending_x87_faults(ctxt); + if (rc != X86EMUL_CONTINUE) + goto done; + /* + * Now that we know the fpu is exception safe, we can fetch + * operands from it. + */ + fetch_possible_mmx_operand(ctxt, &ctxt->src); + fetch_possible_mmx_operand(ctxt, &ctxt->src2); + if (!(ctxt->d & Mov)) + fetch_possible_mmx_operand(ctxt, &ctxt->dst); + } + if (unlikely(ctxt->guest_mode) && ctxt->intercept) { rc = emulator_check_intercept(ctxt, ctxt->intercept, X86_ICPT_PRE_EXCEPT); -- cgit v0.10.2 From e59717550e5cf0e7159c5b7af1d1ead35fef49dd Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 9 Apr 2012 18:40:03 +0300 Subject: KVM: x86 emulator: implement MMX MOVQ (opcodes 0f 6f, 0f 7f) Needed by some framebuffer drivers. See https://bugzilla.kernel.org/show_bug.cgi?id=42779 Signed-off-by: Avi Kivity Signed-off-by: Marcelo Tosatti diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 0011b4a..d5729a9 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -3488,7 +3488,7 @@ static struct opcode group11[] = { }; static struct gprefix pfx_0f_6f_0f_7f = { - N, I(Sse | Aligned, em_mov), N, I(Sse | Unaligned, em_mov), + I(Mmx, em_mov), I(Sse | Aligned, em_mov), N, I(Sse | Unaligned, em_mov), }; static struct gprefix pfx_vmovntpx = { -- cgit v0.10.2 From a0c9a822bf37e6282eb6006b407ec5aec22e08fb Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 11 Apr 2012 18:49:55 +0300 Subject: KVM: dont clear TMR on EOI Intel spec says that TMR needs to be set/cleared when IRR is set, but kvm also clears it on EOI. I did some tests on a real (AMD based) system, and I see same TMR values both before and after EOI, so I think it's a minor bug in kvm. This patch fixes TMR to be set/cleared on IRR set only as per spec. And now that we don't clear TMR, we can save an atomic read of TMR on EOI that's not propagated to ioapic, by checking whether ioapic needs a specific vector first and calculating the mode afterwards. Signed-off-by: Michael S. Tsirkin Signed-off-by: Marcelo Tosatti diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 8584322..992b4ea 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -92,6 +92,11 @@ static inline int apic_test_and_clear_vector(int vec, void *bitmap) return test_and_clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); } +static inline int apic_test_vector(int vec, void *bitmap) +{ + return test_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); +} + static inline void apic_set_vector(int vec, void *bitmap) { set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); @@ -480,7 +485,6 @@ int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2) static void apic_set_eoi(struct kvm_lapic *apic) { int vector = apic_find_highest_isr(apic); - int trigger_mode; /* * Not every write EOI will has corresponding ISR, * one example is when Kernel check timer on setup_IO_APIC @@ -491,12 +495,15 @@ static void apic_set_eoi(struct kvm_lapic *apic) apic_clear_vector(vector, apic->regs + APIC_ISR); apic_update_ppr(apic); - if (apic_test_and_clear_vector(vector, apic->regs + APIC_TMR)) - trigger_mode = IOAPIC_LEVEL_TRIG; - else - trigger_mode = IOAPIC_EDGE_TRIG; - if (!(apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI)) + if (!(apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI) && + kvm_ioapic_handles_vector(apic->vcpu->kvm, vector)) { + int trigger_mode; + if (apic_test_vector(vector, apic->regs + APIC_TMR)) + trigger_mode = IOAPIC_LEVEL_TRIG; + else + trigger_mode = IOAPIC_EDGE_TRIG; kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode); + } kvm_make_request(KVM_REQ_EVENT, apic->vcpu); } diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c index dcaf272c26..26fd54d 100644 --- a/virt/kvm/ioapic.c +++ b/virt/kvm/ioapic.c @@ -254,13 +254,17 @@ static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int vector, } } +bool kvm_ioapic_handles_vector(struct kvm *kvm, int vector) +{ + struct kvm_ioapic *ioapic = kvm->arch.vioapic; + smp_rmb(); + return test_bit(vector, ioapic->handled_vectors); +} + void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode) { struct kvm_ioapic *ioapic = kvm->arch.vioapic; - smp_rmb(); - if (!test_bit(vector, ioapic->handled_vectors)) - return; spin_lock(&ioapic->lock); __kvm_ioapic_update_eoi(ioapic, vector, trigger_mode); spin_unlock(&ioapic->lock); diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h index 0b190c3..32872a0 100644 --- a/virt/kvm/ioapic.h +++ b/virt/kvm/ioapic.h @@ -71,6 +71,7 @@ int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, int short_hand, int dest, int dest_mode); int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2); void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode); +bool kvm_ioapic_handles_vector(struct kvm *kvm, int vector); int kvm_ioapic_init(struct kvm *kvm); void kvm_ioapic_destroy(struct kvm *kvm); int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level); -- cgit v0.10.2 From 9fe2a7015393dc0203ac39242ae9c89038994f3c Mon Sep 17 00:00:00 2001 From: Srivatsa Vaddagiri Date: Fri, 23 Mar 2012 13:36:28 +0530 Subject: debugfs: Add support to print u32 array in debugfs Move the code from Xen to debugfs to make the code common for other users as well. Accked-by: Greg Kroah-Hartman Signed-off-by: Srivatsa Vaddagiri Signed-off-by: Suzuki Poulose [v1: Fixed rebase issues] [v2: Fixed PPC compile issues] Signed-off-by: Raghavendra K T Signed-off-by: Konrad Rzeszutek Wilk diff --git a/arch/x86/xen/debugfs.c b/arch/x86/xen/debugfs.c index ef1db19..c8377fb 100644 --- a/arch/x86/xen/debugfs.c +++ b/arch/x86/xen/debugfs.c @@ -19,107 +19,3 @@ struct dentry * __init xen_init_debugfs(void) return d_xen_debug; } -struct array_data -{ - void *array; - unsigned elements; -}; - -static int u32_array_open(struct inode *inode, struct file *file) -{ - file->private_data = NULL; - return nonseekable_open(inode, file); -} - -static size_t format_array(char *buf, size_t bufsize, const char *fmt, - u32 *array, unsigned array_size) -{ - size_t ret = 0; - unsigned i; - - for(i = 0; i < array_size; i++) { - size_t len; - - len = snprintf(buf, bufsize, fmt, array[i]); - len++; /* ' ' or '\n' */ - ret += len; - - if (buf) { - buf += len; - bufsize -= len; - buf[-1] = (i == array_size-1) ? '\n' : ' '; - } - } - - ret++; /* \0 */ - if (buf) - *buf = '\0'; - - return ret; -} - -static char *format_array_alloc(const char *fmt, u32 *array, unsigned array_size) -{ - size_t len = format_array(NULL, 0, fmt, array, array_size); - char *ret; - - ret = kmalloc(len, GFP_KERNEL); - if (ret == NULL) - return NULL; - - format_array(ret, len, fmt, array, array_size); - return ret; -} - -static ssize_t u32_array_read(struct file *file, char __user *buf, size_t len, - loff_t *ppos) -{ - struct inode *inode = file->f_path.dentry->d_inode; - struct array_data *data = inode->i_private; - size_t size; - - if (*ppos == 0) { - if (file->private_data) { - kfree(file->private_data); - file->private_data = NULL; - } - - file->private_data = format_array_alloc("%u", data->array, data->elements); - } - - size = 0; - if (file->private_data) - size = strlen(file->private_data); - - return simple_read_from_buffer(buf, len, ppos, file->private_data, size); -} - -static int xen_array_release(struct inode *inode, struct file *file) -{ - kfree(file->private_data); - - return 0; -} - -static const struct file_operations u32_array_fops = { - .owner = THIS_MODULE, - .open = u32_array_open, - .release= xen_array_release, - .read = u32_array_read, - .llseek = no_llseek, -}; - -struct dentry *xen_debugfs_create_u32_array(const char *name, umode_t mode, - struct dentry *parent, - u32 *array, unsigned elements) -{ - struct array_data *data = kmalloc(sizeof(*data), GFP_KERNEL); - - if (data == NULL) - return NULL; - - data->array = array; - data->elements = elements; - - return debugfs_create_file(name, mode, parent, data, &u32_array_fops); -} diff --git a/arch/x86/xen/debugfs.h b/arch/x86/xen/debugfs.h index 78d2549..12ebf33 100644 --- a/arch/x86/xen/debugfs.h +++ b/arch/x86/xen/debugfs.h @@ -3,8 +3,4 @@ struct dentry * __init xen_init_debugfs(void); -struct dentry *xen_debugfs_create_u32_array(const char *name, umode_t mode, - struct dentry *parent, - u32 *array, unsigned elements); - #endif /* _XEN_DEBUGFS_H */ diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c index d69cc6c..83e866d 100644 --- a/arch/x86/xen/spinlock.c +++ b/arch/x86/xen/spinlock.c @@ -440,12 +440,12 @@ static int __init xen_spinlock_debugfs(void) debugfs_create_u64("time_total", 0444, d_spin_debug, &spinlock_stats.time_total); - xen_debugfs_create_u32_array("histo_total", 0444, d_spin_debug, - spinlock_stats.histo_spin_total, HISTO_BUCKETS + 1); - xen_debugfs_create_u32_array("histo_spinning", 0444, d_spin_debug, - spinlock_stats.histo_spin_spinning, HISTO_BUCKETS + 1); - xen_debugfs_create_u32_array("histo_blocked", 0444, d_spin_debug, - spinlock_stats.histo_spin_blocked, HISTO_BUCKETS + 1); + debugfs_create_u32_array("histo_total", 0444, d_spin_debug, + spinlock_stats.histo_spin_total, HISTO_BUCKETS + 1); + debugfs_create_u32_array("histo_spinning", 0444, d_spin_debug, + spinlock_stats.histo_spin_spinning, HISTO_BUCKETS + 1); + debugfs_create_u32_array("histo_blocked", 0444, d_spin_debug, + spinlock_stats.histo_spin_blocked, HISTO_BUCKETS + 1); return 0; } diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c index 5dfafdd..2340f69 100644 --- a/fs/debugfs/file.c +++ b/fs/debugfs/file.c @@ -20,6 +20,7 @@ #include #include #include +#include static ssize_t default_read_file(struct file *file, char __user *buf, size_t count, loff_t *ppos) @@ -520,6 +521,133 @@ struct dentry *debugfs_create_blob(const char *name, umode_t mode, } EXPORT_SYMBOL_GPL(debugfs_create_blob); +struct array_data { + void *array; + u32 elements; +}; + +static int u32_array_open(struct inode *inode, struct file *file) +{ + file->private_data = NULL; + return nonseekable_open(inode, file); +} + +static size_t format_array(char *buf, size_t bufsize, const char *fmt, + u32 *array, u32 array_size) +{ + size_t ret = 0; + u32 i; + + for (i = 0; i < array_size; i++) { + size_t len; + + len = snprintf(buf, bufsize, fmt, array[i]); + len++; /* ' ' or '\n' */ + ret += len; + + if (buf) { + buf += len; + bufsize -= len; + buf[-1] = (i == array_size-1) ? '\n' : ' '; + } + } + + ret++; /* \0 */ + if (buf) + *buf = '\0'; + + return ret; +} + +static char *format_array_alloc(const char *fmt, u32 *array, + u32 array_size) +{ + size_t len = format_array(NULL, 0, fmt, array, array_size); + char *ret; + + ret = kmalloc(len, GFP_KERNEL); + if (ret == NULL) + return NULL; + + format_array(ret, len, fmt, array, array_size); + return ret; +} + +static ssize_t u32_array_read(struct file *file, char __user *buf, size_t len, + loff_t *ppos) +{ + struct inode *inode = file->f_path.dentry->d_inode; + struct array_data *data = inode->i_private; + size_t size; + + if (*ppos == 0) { + if (file->private_data) { + kfree(file->private_data); + file->private_data = NULL; + } + + file->private_data = format_array_alloc("%u", data->array, + data->elements); + } + + size = 0; + if (file->private_data) + size = strlen(file->private_data); + + return simple_read_from_buffer(buf, len, ppos, + file->private_data, size); +} + +static int u32_array_release(struct inode *inode, struct file *file) +{ + kfree(file->private_data); + + return 0; +} + +static const struct file_operations u32_array_fops = { + .owner = THIS_MODULE, + .open = u32_array_open, + .release = u32_array_release, + .read = u32_array_read, + .llseek = no_llseek, +}; + +/** + * debugfs_create_u32_array - create a debugfs file that is used to read u32 + * array. + * @name: a pointer to a string containing the name of the file to create. + * @mode: the permission that the file should have. + * @parent: a pointer to the parent dentry for this file. This should be a + * directory dentry if set. If this parameter is %NULL, then the + * file will be created in the root of the debugfs filesystem. + * @array: u32 array that provides data. + * @elements: total number of elements in the array. + * + * This function creates a file in debugfs with the given name that exports + * @array as data. If the @mode variable is so set it can be read from. + * Writing is not supported. Seek within the file is also not supported. + * Once array is created its size can not be changed. + * + * The function returns a pointer to dentry on success. If debugfs is not + * enabled in the kernel, the value -%ENODEV will be returned. + */ +struct dentry *debugfs_create_u32_array(const char *name, umode_t mode, + struct dentry *parent, + u32 *array, u32 elements) +{ + struct array_data *data = kmalloc(sizeof(*data), GFP_KERNEL); + + if (data == NULL) + return NULL; + + data->array = array; + data->elements = elements; + + return debugfs_create_file(name, mode, parent, data, &u32_array_fops); +} +EXPORT_SYMBOL_GPL(debugfs_create_u32_array); + #ifdef CONFIG_HAS_IOMEM /* diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index ae36b72..66c434f 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -93,6 +93,10 @@ struct dentry *debugfs_create_regset32(const char *name, umode_t mode, int debugfs_print_regs32(struct seq_file *s, const struct debugfs_reg32 *regs, int nregs, void __iomem *base, char *prefix); +struct dentry *debugfs_create_u32_array(const char *name, umode_t mode, + struct dentry *parent, + u32 *array, u32 elements); + bool debugfs_initialized(void); #else @@ -219,6 +223,13 @@ static inline bool debugfs_initialized(void) return false; } +static inline struct dentry *debugfs_create_u32_array(const char *name, umode_t mode, + struct dentry *parent, + u32 *array, u32 elements) +{ + return ERR_PTR(-ENODEV); +} + #endif #endif -- cgit v0.10.2 From 569ca5b3f94cd0b3295ec5943aa457cf4a4f6a3a Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Thu, 5 Apr 2012 16:10:07 +0100 Subject: xen/gnttab: add deferred freeing logic Rather than just leaking pages that can't be freed at the point where access permission for the backend domain gets revoked, put them on a list and run a timer to (infrequently) retry freeing them. (This can particularly happen when unloading a frontend driver when devices are still present, and the backend still has them in non-closed state or hasn't finished closing them yet.) Signed-off-by: Jan Beulich Signed-off-by: Konrad Rzeszutek Wilk diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index b4d4eac..9f514bb 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c @@ -426,10 +426,8 @@ static int gnttab_end_foreign_access_ref_v1(grant_ref_t ref, int readonly) nflags = *pflags; do { flags = nflags; - if (flags & (GTF_reading|GTF_writing)) { - printk(KERN_ALERT "WARNING: g.e. still in use!\n"); + if (flags & (GTF_reading|GTF_writing)) return 0; - } } while ((nflags = sync_cmpxchg(pflags, flags, 0)) != flags); return 1; @@ -458,12 +456,103 @@ static int gnttab_end_foreign_access_ref_v2(grant_ref_t ref, int readonly) return 1; } -int gnttab_end_foreign_access_ref(grant_ref_t ref, int readonly) +static inline int _gnttab_end_foreign_access_ref(grant_ref_t ref, int readonly) { return gnttab_interface->end_foreign_access_ref(ref, readonly); } + +int gnttab_end_foreign_access_ref(grant_ref_t ref, int readonly) +{ + if (_gnttab_end_foreign_access_ref(ref, readonly)) + return 1; + pr_warn("WARNING: g.e. %#x still in use!\n", ref); + return 0; +} EXPORT_SYMBOL_GPL(gnttab_end_foreign_access_ref); +struct deferred_entry { + struct list_head list; + grant_ref_t ref; + bool ro; + uint16_t warn_delay; + struct page *page; +}; +static LIST_HEAD(deferred_list); +static void gnttab_handle_deferred(unsigned long); +static DEFINE_TIMER(deferred_timer, gnttab_handle_deferred, 0, 0); + +static void gnttab_handle_deferred(unsigned long unused) +{ + unsigned int nr = 10; + struct deferred_entry *first = NULL; + unsigned long flags; + + spin_lock_irqsave(&gnttab_list_lock, flags); + while (nr--) { + struct deferred_entry *entry + = list_first_entry(&deferred_list, + struct deferred_entry, list); + + if (entry == first) + break; + list_del(&entry->list); + spin_unlock_irqrestore(&gnttab_list_lock, flags); + if (_gnttab_end_foreign_access_ref(entry->ref, entry->ro)) { + put_free_entry(entry->ref); + if (entry->page) { + pr_debug("freeing g.e. %#x (pfn %#lx)\n", + entry->ref, page_to_pfn(entry->page)); + __free_page(entry->page); + } else + pr_info("freeing g.e. %#x\n", entry->ref); + kfree(entry); + entry = NULL; + } else { + if (!--entry->warn_delay) + pr_info("g.e. %#x still pending\n", + entry->ref); + if (!first) + first = entry; + } + spin_lock_irqsave(&gnttab_list_lock, flags); + if (entry) + list_add_tail(&entry->list, &deferred_list); + else if (list_empty(&deferred_list)) + break; + } + if (!list_empty(&deferred_list) && !timer_pending(&deferred_timer)) { + deferred_timer.expires = jiffies + HZ; + add_timer(&deferred_timer); + } + spin_unlock_irqrestore(&gnttab_list_lock, flags); +} + +static void gnttab_add_deferred(grant_ref_t ref, bool readonly, + struct page *page) +{ + struct deferred_entry *entry = kmalloc(sizeof(*entry), GFP_ATOMIC); + const char *what = KERN_WARNING "leaking"; + + if (entry) { + unsigned long flags; + + entry->ref = ref; + entry->ro = readonly; + entry->page = page; + entry->warn_delay = 60; + spin_lock_irqsave(&gnttab_list_lock, flags); + list_add_tail(&entry->list, &deferred_list); + if (!timer_pending(&deferred_timer)) { + deferred_timer.expires = jiffies + HZ; + add_timer(&deferred_timer); + } + spin_unlock_irqrestore(&gnttab_list_lock, flags); + what = KERN_DEBUG "deferring"; + } + printk("%s g.e. %#x (pfn %#lx)\n", + what, ref, page ? page_to_pfn(page) : -1); +} + void gnttab_end_foreign_access(grant_ref_t ref, int readonly, unsigned long page) { @@ -471,12 +560,9 @@ void gnttab_end_foreign_access(grant_ref_t ref, int readonly, put_free_entry(ref); if (page != 0) free_page(page); - } else { - /* XXX This needs to be fixed so that the ref and page are - placed on a list to be freed up later. */ - printk(KERN_WARNING - "WARNING: leaking g.e. and page still in use!\n"); - } + } else + gnttab_add_deferred(ref, readonly, + page ? virt_to_page(page) : NULL); } EXPORT_SYMBOL_GPL(gnttab_end_foreign_access); -- cgit v0.10.2 From f71fa31f9f7ac33cba12b8897983f950ad2c7a5b Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Wed, 18 Apr 2012 12:24:29 +0200 Subject: KVM: MMU: use page table level macro Its much cleaner to use PT_PAGE_TABLE_LEVEL than its numeric value. Signed-off-by: Davidlohr Bueso Signed-off-by: Marcelo Tosatti diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 29ad6f9..07424cf 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -3618,7 +3618,7 @@ static bool detect_write_flooding(struct kvm_mmu_page *sp) * Skip write-flooding detected for the sp whose level is 1, because * it can become unsync, then the guest page is not write-protected. */ - if (sp->role.level == 1) + if (sp->role.level == PT_PAGE_TABLE_LEVEL) return false; return ++sp->write_flooding_count >= 3; diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index df5a703..34f9709 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -658,7 +658,7 @@ static gpa_t FNAME(get_level1_sp_gpa)(struct kvm_mmu_page *sp) { int offset = 0; - WARN_ON(sp->role.level != 1); + WARN_ON(sp->role.level != PT_PAGE_TABLE_LEVEL); if (PTTYPE == 32) offset = sp->role.quadrant << PT64_LEVEL_BITS; -- cgit v0.10.2 From 7b91747d42a1012e3781dd09fa638d113809e3fd Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Wed, 18 Apr 2012 21:58:43 +0200 Subject: cris: Remove old legacy "-traditional" flag from arch-v10/lib/Makefile Most of these have been purged years ago. This one silently lived on until commit 69349c2dc01c489eccaa4c472542c08e370c6d7e "kconfig: fix IS_ENABLED to not require all options to be defined" In the above, we use some macro trickery to create a conditional that is valid in CPP and in C usage. However that trickery doesn't sit well if you have the legacy "-traditional" flag enabled. You'll get: AS arch/cris/arch-v10/lib/checksum.o In file included from :4:0: include/linux/kconfig.h:23:0: error: syntax error in macro parameter list make[2]: *** [arch/cris/arch-v10/lib/checksum.o] Error 1 Everything builds fine w/o "-traditional" so simply drop it from this location as well. Signed-off-by: Paul Gortmaker Signed-off-by: Jesper Nilsson diff --git a/arch/cris/arch-v10/lib/Makefile b/arch/cris/arch-v10/lib/Makefile index 36e9a9c..725153e 100644 --- a/arch/cris/arch-v10/lib/Makefile +++ b/arch/cris/arch-v10/lib/Makefile @@ -2,8 +2,5 @@ # Makefile for Etrax-specific library files.. # - -EXTRA_AFLAGS := -traditional - lib-y = checksum.o checksumcopy.o string.o usercopy.o memset.o csumcpfruser.o -- cgit v0.10.2 From 95022b8cf6ed7f3292b60c8e85fe59a12bfb1c9e Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Wed, 18 Apr 2012 15:19:40 -0700 Subject: x86/mce: Avoid reading every machine check bank register twice. Reading machine check bank registers is slow. There is a trend of increasing the number of banks, and the number of cores. The main section of do_machine_check() is a serialized section where each cpu in turn checks every bank. Even on a little two socket SandyBridge-EP system that multiplies out as: 2 sockets * 8 cores * 2 hyperthreads * 20 banks = 640 MSRs We already scan the banks in parallel in mce_no_way_out() to see if there is a fatal error anywhere in the system. If we build a cache of VALID bits during this scan, we can avoid uselessly re-reading banks that have no data. Note that this cache is only a hint. If the valid bit is set in a shared bank, all cpus that share that bank will see it during the parallel scan, but the first to find it in the sequential scan will (usually) clear the bank. Acked-by: Borislav Petkov Signed-off-by: Tony Luck diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index d086a09..66e1c51 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -641,16 +641,18 @@ EXPORT_SYMBOL_GPL(machine_check_poll); * Do a quick check if any of the events requires a panic. * This decides if we keep the events around or clear them. */ -static int mce_no_way_out(struct mce *m, char **msg) +static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp) { - int i; + int i, ret = 0; for (i = 0; i < banks; i++) { m->status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i)); + if (m->status & MCI_STATUS_VAL) + __set_bit(i, validp); if (mce_severity(m, tolerant, msg) >= MCE_PANIC_SEVERITY) - return 1; + ret = 1; } - return 0; + return ret; } /* @@ -1011,6 +1013,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) */ int kill_it = 0; DECLARE_BITMAP(toclear, MAX_NR_BANKS); + DECLARE_BITMAP(valid_banks, MAX_NR_BANKS); char *msg = "Unknown"; atomic_inc(&mce_entry); @@ -1025,7 +1028,8 @@ void do_machine_check(struct pt_regs *regs, long error_code) final = &__get_cpu_var(mces_seen); *final = m; - no_way_out = mce_no_way_out(&m, &msg); + memset(valid_banks, 0, sizeof(valid_banks)); + no_way_out = mce_no_way_out(&m, &msg, valid_banks); barrier(); @@ -1045,6 +1049,8 @@ void do_machine_check(struct pt_regs *regs, long error_code) order = mce_start(&no_way_out); for (i = 0; i < banks; i++) { __clear_bit(i, toclear); + if (!test_bit(i, valid_banks)) + continue; if (!mce_banks[i].ctl) continue; -- cgit v0.10.2 From 2b3c83efc9a653af6a24799eeb1a2900ba0439e6 Mon Sep 17 00:00:00 2001 From: Rafal Prylowski Date: Thu, 19 Apr 2012 11:19:00 +0200 Subject: dmaengine/ep93xx_dma: Implement double buffering for M2M DMA channels Add double buffering support for M2M DMA channels. Implement this by using EP93xx M2M DMA Buffer and Control Finite State Machines to be sure that we are not disabling the channel when it's actually operating. Signed-off-by: Rafal Prylowski Tested-by: H Hartley Sweeten Acked-by: H Hartley Sweeten Acked-by: Mika Westerberg Signed-off-by: Ryan Mallon diff --git a/drivers/dma/ep93xx_dma.c b/drivers/dma/ep93xx_dma.c index e6f133b..bbfbb06 100644 --- a/drivers/dma/ep93xx_dma.c +++ b/drivers/dma/ep93xx_dma.c @@ -71,6 +71,7 @@ #define M2M_CONTROL_TM_SHIFT 13 #define M2M_CONTROL_TM_TX (1 << M2M_CONTROL_TM_SHIFT) #define M2M_CONTROL_TM_RX (2 << M2M_CONTROL_TM_SHIFT) +#define M2M_CONTROL_NFBINT BIT(21) #define M2M_CONTROL_RSS_SHIFT 22 #define M2M_CONTROL_RSS_SSPRX (1 << M2M_CONTROL_RSS_SHIFT) #define M2M_CONTROL_RSS_SSPTX (2 << M2M_CONTROL_RSS_SHIFT) @@ -79,7 +80,22 @@ #define M2M_CONTROL_PWSC_SHIFT 25 #define M2M_INTERRUPT 0x0004 -#define M2M_INTERRUPT_DONEINT BIT(1) +#define M2M_INTERRUPT_MASK 6 + +#define M2M_STATUS 0x000c +#define M2M_STATUS_CTL_SHIFT 1 +#define M2M_STATUS_CTL_IDLE (0 << M2M_STATUS_CTL_SHIFT) +#define M2M_STATUS_CTL_STALL (1 << M2M_STATUS_CTL_SHIFT) +#define M2M_STATUS_CTL_MEMRD (2 << M2M_STATUS_CTL_SHIFT) +#define M2M_STATUS_CTL_MEMWR (3 << M2M_STATUS_CTL_SHIFT) +#define M2M_STATUS_CTL_BWCWAIT (4 << M2M_STATUS_CTL_SHIFT) +#define M2M_STATUS_CTL_MASK (7 << M2M_STATUS_CTL_SHIFT) +#define M2M_STATUS_BUF_SHIFT 4 +#define M2M_STATUS_BUF_NO (0 << M2M_STATUS_BUF_SHIFT) +#define M2M_STATUS_BUF_ON (1 << M2M_STATUS_BUF_SHIFT) +#define M2M_STATUS_BUF_NEXT (2 << M2M_STATUS_BUF_SHIFT) +#define M2M_STATUS_BUF_MASK (3 << M2M_STATUS_BUF_SHIFT) +#define M2M_STATUS_DONE BIT(6) #define M2M_BCR0 0x0010 #define M2M_BCR1 0x0014 @@ -426,15 +442,6 @@ static int m2p_hw_interrupt(struct ep93xx_dma_chan *edmac) /* * M2M DMA implementation - * - * For the M2M transfers we don't use NFB at all. This is because it simply - * doesn't work well with memcpy transfers. When you submit both buffers it is - * extremely unlikely that you get an NFB interrupt, but it instead reports - * DONE interrupt and both buffers are already transferred which means that we - * weren't able to update the next buffer. - * - * So for now we "simulate" NFB by just submitting buffer after buffer - * without double buffering. */ static int m2m_hw_setup(struct ep93xx_dma_chan *edmac) @@ -543,6 +550,11 @@ static void m2m_hw_submit(struct ep93xx_dma_chan *edmac) m2m_fill_desc(edmac); control |= M2M_CONTROL_DONEINT; + if (ep93xx_dma_advance_active(edmac)) { + m2m_fill_desc(edmac); + control |= M2M_CONTROL_NFBINT; + } + /* * Now we can finally enable the channel. For M2M channel this must be * done _after_ the BCRx registers are programmed. @@ -560,32 +572,89 @@ static void m2m_hw_submit(struct ep93xx_dma_chan *edmac) } } +/* + * According to EP93xx User's Guide, we should receive DONE interrupt when all + * M2M DMA controller transactions complete normally. This is not always the + * case - sometimes EP93xx M2M DMA asserts DONE interrupt when the DMA channel + * is still running (channel Buffer FSM in DMA_BUF_ON state, and channel + * Control FSM in DMA_MEM_RD state, observed at least in IDE-DMA operation). + * In effect, disabling the channel when only DONE bit is set could stop + * currently running DMA transfer. To avoid this, we use Buffer FSM and + * Control FSM to check current state of DMA channel. + */ static int m2m_hw_interrupt(struct ep93xx_dma_chan *edmac) { + u32 status = readl(edmac->regs + M2M_STATUS); + u32 ctl_fsm = status & M2M_STATUS_CTL_MASK; + u32 buf_fsm = status & M2M_STATUS_BUF_MASK; + bool done = status & M2M_STATUS_DONE; + bool last_done; u32 control; + struct ep93xx_dma_desc *desc; - if (!(readl(edmac->regs + M2M_INTERRUPT) & M2M_INTERRUPT_DONEINT)) + /* Accept only DONE and NFB interrupts */ + if (!(readl(edmac->regs + M2M_INTERRUPT) & M2M_INTERRUPT_MASK)) return INTERRUPT_UNKNOWN; - /* Clear the DONE bit */ - writel(0, edmac->regs + M2M_INTERRUPT); + if (done) { + /* Clear the DONE bit */ + writel(0, edmac->regs + M2M_INTERRUPT); + } - /* Disable interrupts and the channel */ - control = readl(edmac->regs + M2M_CONTROL); - control &= ~(M2M_CONTROL_DONEINT | M2M_CONTROL_ENABLE); - writel(control, edmac->regs + M2M_CONTROL); + /* + * Check whether we are done with descriptors or not. This, together + * with DMA channel state, determines action to take in interrupt. + */ + desc = ep93xx_dma_get_active(edmac); + last_done = !desc || desc->txd.cookie; /* - * Since we only get DONE interrupt we have to find out ourselves - * whether there still is something to process. So we try to advance - * the chain an see whether it succeeds. + * Use M2M DMA Buffer FSM and Control FSM to check current state of + * DMA channel. Using DONE and NFB bits from channel status register + * or bits from channel interrupt register is not reliable. */ - if (ep93xx_dma_advance_active(edmac)) { - edmac->edma->hw_submit(edmac); - return INTERRUPT_NEXT_BUFFER; + if (!last_done && + (buf_fsm == M2M_STATUS_BUF_NO || + buf_fsm == M2M_STATUS_BUF_ON)) { + /* + * Two buffers are ready for update when Buffer FSM is in + * DMA_NO_BUF state. Only one buffer can be prepared without + * disabling the channel or polling the DONE bit. + * To simplify things, always prepare only one buffer. + */ + if (ep93xx_dma_advance_active(edmac)) { + m2m_fill_desc(edmac); + if (done && !edmac->chan.private) { + /* Software trigger for memcpy channel */ + control = readl(edmac->regs + M2M_CONTROL); + control |= M2M_CONTROL_START; + writel(control, edmac->regs + M2M_CONTROL); + } + return INTERRUPT_NEXT_BUFFER; + } else { + last_done = true; + } + } + + /* + * Disable the channel only when Buffer FSM is in DMA_NO_BUF state + * and Control FSM is in DMA_STALL state. + */ + if (last_done && + buf_fsm == M2M_STATUS_BUF_NO && + ctl_fsm == M2M_STATUS_CTL_STALL) { + /* Disable interrupts and the channel */ + control = readl(edmac->regs + M2M_CONTROL); + control &= ~(M2M_CONTROL_DONEINT | M2M_CONTROL_NFBINT + | M2M_CONTROL_ENABLE); + writel(control, edmac->regs + M2M_CONTROL); + return INTERRUPT_DONE; } - return INTERRUPT_DONE; + /* + * Nothing to do this time. + */ + return INTERRUPT_NEXT_BUFFER; } /* -- cgit v0.10.2 From f78146b0f9230765c6315b2e14f56112513389ad Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 18 Apr 2012 19:22:47 +0300 Subject: KVM: Fix page-crossing MMIO MMIO that are split across a page boundary are currently broken - the code does not expect to be aborted by the exit to userspace for the first MMIO fragment. This patch fixes the problem by generalizing the current code for handling 16-byte MMIOs to handle a number of "fragments", and changes the MMIO code to create those fragments. Signed-off-by: Avi Kivity Signed-off-by: Marcelo Tosatti diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h index c4b4bac..6d6a5ac 100644 --- a/arch/ia64/include/asm/kvm_host.h +++ b/arch/ia64/include/asm/kvm_host.h @@ -449,6 +449,8 @@ struct kvm_vcpu_arch { char log_buf[VMM_LOG_LEN]; union context host; union context guest; + + char mmio_data[8]; }; struct kvm_vm_stat { diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index 9d80ff8..882ab21 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -232,12 +232,12 @@ static int handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) if ((p->addr & PAGE_MASK) == IOAPIC_DEFAULT_BASE_ADDRESS) goto mmio; vcpu->mmio_needed = 1; - vcpu->mmio_phys_addr = kvm_run->mmio.phys_addr = p->addr; - vcpu->mmio_size = kvm_run->mmio.len = p->size; + vcpu->mmio_fragments[0].gpa = kvm_run->mmio.phys_addr = p->addr; + vcpu->mmio_fragments[0].len = kvm_run->mmio.len = p->size; vcpu->mmio_is_write = kvm_run->mmio.is_write = !p->dir; if (vcpu->mmio_is_write) - memcpy(vcpu->mmio_data, &p->data, p->size); + memcpy(vcpu->arch.mmio_data, &p->data, p->size); memcpy(kvm_run->mmio.data, &p->data, p->size); kvm_run->exit_reason = KVM_EXIT_MMIO; return 0; @@ -719,7 +719,7 @@ static void kvm_set_mmio_data(struct kvm_vcpu *vcpu) struct kvm_mmio_req *p = kvm_get_vcpu_ioreq(vcpu); if (!vcpu->mmio_is_write) - memcpy(&p->data, vcpu->mmio_data, 8); + memcpy(&p->data, vcpu->arch.mmio_data, 8); p->state = STATE_IORESP_READY; } @@ -739,7 +739,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) } if (vcpu->mmio_needed) { - memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8); + memcpy(vcpu->arch.mmio_data, kvm_run->mmio.data, 8); kvm_set_mmio_data(vcpu); vcpu->mmio_read_completed = 1; vcpu->mmio_needed = 0; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0d9a578..4de705c 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3718,9 +3718,8 @@ struct read_write_emulator_ops { static int read_prepare(struct kvm_vcpu *vcpu, void *val, int bytes) { if (vcpu->mmio_read_completed) { - memcpy(val, vcpu->mmio_data, bytes); trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes, - vcpu->mmio_phys_addr, *(u64 *)val); + vcpu->mmio_fragments[0].gpa, *(u64 *)val); vcpu->mmio_read_completed = 0; return 1; } @@ -3756,8 +3755,9 @@ static int read_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa, static int write_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa, void *val, int bytes) { - memcpy(vcpu->mmio_data, val, bytes); - memcpy(vcpu->run->mmio.data, vcpu->mmio_data, 8); + struct kvm_mmio_fragment *frag = &vcpu->mmio_fragments[0]; + + memcpy(vcpu->run->mmio.data, frag->data, frag->len); return X86EMUL_CONTINUE; } @@ -3784,10 +3784,7 @@ static int emulator_read_write_onepage(unsigned long addr, void *val, gpa_t gpa; int handled, ret; bool write = ops->write; - - if (ops->read_write_prepare && - ops->read_write_prepare(vcpu, val, bytes)) - return X86EMUL_CONTINUE; + struct kvm_mmio_fragment *frag; ret = vcpu_mmio_gva_to_gpa(vcpu, addr, &gpa, exception, write); @@ -3813,15 +3810,19 @@ mmio: bytes -= handled; val += handled; - vcpu->mmio_needed = 1; - vcpu->run->exit_reason = KVM_EXIT_MMIO; - vcpu->run->mmio.phys_addr = vcpu->mmio_phys_addr = gpa; - vcpu->mmio_size = bytes; - vcpu->run->mmio.len = min(vcpu->mmio_size, 8); - vcpu->run->mmio.is_write = vcpu->mmio_is_write = write; - vcpu->mmio_index = 0; + while (bytes) { + unsigned now = min(bytes, 8U); - return ops->read_write_exit_mmio(vcpu, gpa, val, bytes); + frag = &vcpu->mmio_fragments[vcpu->mmio_nr_fragments++]; + frag->gpa = gpa; + frag->data = val; + frag->len = now; + + gpa += now; + val += now; + bytes -= now; + } + return X86EMUL_CONTINUE; } int emulator_read_write(struct x86_emulate_ctxt *ctxt, unsigned long addr, @@ -3830,10 +3831,18 @@ int emulator_read_write(struct x86_emulate_ctxt *ctxt, unsigned long addr, struct read_write_emulator_ops *ops) { struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); + gpa_t gpa; + int rc; + + if (ops->read_write_prepare && + ops->read_write_prepare(vcpu, val, bytes)) + return X86EMUL_CONTINUE; + + vcpu->mmio_nr_fragments = 0; /* Crossing a page boundary? */ if (((addr + bytes - 1) ^ addr) & PAGE_MASK) { - int rc, now; + int now; now = -addr & ~PAGE_MASK; rc = emulator_read_write_onepage(addr, val, now, exception, @@ -3846,8 +3855,25 @@ int emulator_read_write(struct x86_emulate_ctxt *ctxt, unsigned long addr, bytes -= now; } - return emulator_read_write_onepage(addr, val, bytes, exception, - vcpu, ops); + rc = emulator_read_write_onepage(addr, val, bytes, exception, + vcpu, ops); + if (rc != X86EMUL_CONTINUE) + return rc; + + if (!vcpu->mmio_nr_fragments) + return rc; + + gpa = vcpu->mmio_fragments[0].gpa; + + vcpu->mmio_needed = 1; + vcpu->mmio_cur_fragment = 0; + + vcpu->run->mmio.len = vcpu->mmio_fragments[0].len; + vcpu->run->mmio.is_write = vcpu->mmio_is_write = ops->write; + vcpu->run->exit_reason = KVM_EXIT_MMIO; + vcpu->run->mmio.phys_addr = gpa; + + return ops->read_write_exit_mmio(vcpu, gpa, val, bytes); } static int emulator_read_emulated(struct x86_emulate_ctxt *ctxt, @@ -5446,33 +5472,55 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) return r; } +/* + * Implements the following, as a state machine: + * + * read: + * for each fragment + * write gpa, len + * exit + * copy data + * execute insn + * + * write: + * for each fragment + * write gpa, len + * copy data + * exit + */ static int complete_mmio(struct kvm_vcpu *vcpu) { struct kvm_run *run = vcpu->run; + struct kvm_mmio_fragment *frag; int r; if (!(vcpu->arch.pio.count || vcpu->mmio_needed)) return 1; if (vcpu->mmio_needed) { - vcpu->mmio_needed = 0; + /* Complete previous fragment */ + frag = &vcpu->mmio_fragments[vcpu->mmio_cur_fragment++]; if (!vcpu->mmio_is_write) - memcpy(vcpu->mmio_data + vcpu->mmio_index, - run->mmio.data, 8); - vcpu->mmio_index += 8; - if (vcpu->mmio_index < vcpu->mmio_size) { - run->exit_reason = KVM_EXIT_MMIO; - run->mmio.phys_addr = vcpu->mmio_phys_addr + vcpu->mmio_index; - memcpy(run->mmio.data, vcpu->mmio_data + vcpu->mmio_index, 8); - run->mmio.len = min(vcpu->mmio_size - vcpu->mmio_index, 8); - run->mmio.is_write = vcpu->mmio_is_write; - vcpu->mmio_needed = 1; - return 0; + memcpy(frag->data, run->mmio.data, frag->len); + if (vcpu->mmio_cur_fragment == vcpu->mmio_nr_fragments) { + vcpu->mmio_needed = 0; + if (vcpu->mmio_is_write) + return 1; + vcpu->mmio_read_completed = 1; + goto done; } + /* Initiate next fragment */ + ++frag; + run->exit_reason = KVM_EXIT_MMIO; + run->mmio.phys_addr = frag->gpa; if (vcpu->mmio_is_write) - return 1; - vcpu->mmio_read_completed = 1; + memcpy(run->mmio.data, frag->data, frag->len); + run->mmio.len = frag->len; + run->mmio.is_write = vcpu->mmio_is_write; + return 0; + } +done: vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); r = emulate_instruction(vcpu, EMULTYPE_NO_DECODE); srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index a2d00b1..186ffab 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -35,6 +35,20 @@ #endif /* + * If we support unaligned MMIO, at most one fragment will be split into two: + */ +#ifdef KVM_UNALIGNED_MMIO +# define KVM_EXTRA_MMIO_FRAGMENTS 1 +#else +# define KVM_EXTRA_MMIO_FRAGMENTS 0 +#endif + +#define KVM_USER_MMIO_SIZE 8 + +#define KVM_MAX_MMIO_FRAGMENTS \ + (KVM_MMIO_SIZE / KVM_USER_MMIO_SIZE + KVM_EXTRA_MMIO_FRAGMENTS) + +/* * vcpu->requests bit members */ #define KVM_REQ_TLB_FLUSH 0 @@ -117,6 +131,16 @@ enum { EXITING_GUEST_MODE }; +/* + * Sometimes a large or cross-page mmio needs to be broken up into separate + * exits for userspace servicing. + */ +struct kvm_mmio_fragment { + gpa_t gpa; + void *data; + unsigned len; +}; + struct kvm_vcpu { struct kvm *kvm; #ifdef CONFIG_PREEMPT_NOTIFIERS @@ -144,10 +168,9 @@ struct kvm_vcpu { int mmio_needed; int mmio_read_completed; int mmio_is_write; - int mmio_size; - int mmio_index; - unsigned char mmio_data[KVM_MMIO_SIZE]; - gpa_t mmio_phys_addr; + int mmio_cur_fragment; + int mmio_nr_fragments; + struct kvm_mmio_fragment mmio_fragments[KVM_MAX_MMIO_FRAGMENTS]; #endif #ifdef CONFIG_KVM_ASYNC_PF -- cgit v0.10.2 From 8281715b4109b5ee26032ff7b77c0d575c4150f7 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 18 Apr 2012 19:23:50 +0300 Subject: KVM: ia64: fix build due to typo s/kcm/kvm/. Signed-off-by: Avi Kivity Signed-off-by: Marcelo Tosatti diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index 882ab21..bd77cb5 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -1174,7 +1174,7 @@ out: bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu) { - return irqchip_in_kernel(vcpu->kcm) == (vcpu->arch.apic != NULL); + return irqchip_in_kernel(vcpu->kvm) == (vcpu->arch.apic != NULL); } int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) -- cgit v0.10.2 From 1f15d10984c854e077da5aa1a23f901496b49773 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Fri, 20 Apr 2012 18:21:46 -0300 Subject: KVM: add kvm_arch_para_features stub to asm-generic/kvm_para.h Needed by kvm_para_has_feature(). Reported-by: Stephen Rothwell Signed-off-by: Marcelo Tosatti diff --git a/include/asm-generic/kvm_para.h b/include/asm-generic/kvm_para.h index 05ef7e7..9a7bbad 100644 --- a/include/asm-generic/kvm_para.h +++ b/include/asm-generic/kvm_para.h @@ -11,4 +11,9 @@ static inline bool kvm_check_and_clear_guest_paused(void) return false; } +static inline unsigned int kvm_arch_para_features(void) +{ + return 0; +} + #endif -- cgit v0.10.2 From 4ccf4beab8c447f8cd33d46afb6e10e1aa3befc6 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Wed, 31 Aug 2011 20:35:40 +0200 Subject: lib: add support for stmp-style devices MX23/28 use IP cores which follow a register layout I have first seen on STMP3xxx SoCs. In this layout, every register actually has four u32: 1.) to store a value directly 2.) a SET register where every 1-bit sets the corresponding bit, others are unaffected 3.) same with a CLR register 4.) same with a TOG (toggle) register Also, the 2 MSBs in register 0 are always the same and can be used to reset the IP core. All this is strictly speaking not mach-specific (but IP core specific) and, thus, doesn't need to be in mach-mxs/include. At least mx6 also uses IP cores following this stmp-style. So: Introduce a stmp-style device, put the code and defines for that in a public place (lib/), and let drivers for stmp-style devices select that code. To avoid regressions and ease reviewing, the actual code is simply copied from mach-mxs. It definately wants updates, but those need a seperate patch series. Voila, mach dependency gone, reusable code introduced. Note that I didn't remove the duplicated code from mach-mxs yet, first the drivers have to be converted. Signed-off-by: Wolfram Sang Acked-by: Shawn Guo Acked-by: Dong Aisheng diff --git a/include/linux/stmp_device.h b/include/linux/stmp_device.h new file mode 100644 index 0000000..6cf7ec9 --- /dev/null +++ b/include/linux/stmp_device.h @@ -0,0 +1,20 @@ +/* + * basic functions for devices following the "stmp" style register layout + * + * Copyright (C) 2011 Wolfram Sang, Pengutronix e.K. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#ifndef __STMP_DEVICE_H__ +#define __STMP_DEVICE_H__ + +#define STMP_OFFSET_REG_SET 0x4 +#define STMP_OFFSET_REG_CLR 0x8 +#define STMP_OFFSET_REG_TOG 0xc + +extern int stmp_reset_block(void __iomem *); +#endif /* __STMP_DEVICE_H__ */ diff --git a/lib/Kconfig b/lib/Kconfig index 4a8aba2..c5da154 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -33,6 +33,9 @@ config GENERIC_IO boolean default n +config STMP_DEVICE + bool + config CRC_CCITT tristate "CRC-CCITT functions" help diff --git a/lib/Makefile b/lib/Makefile index 18515f0..f78dbcd 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -123,6 +123,8 @@ obj-$(CONFIG_SIGNATURE) += digsig.o obj-$(CONFIG_CLZ_TAB) += clz_tab.o +obj-$(CONFIG_STMP_DEVICE) += stmp_device.o + hostprogs-y := gen_crc32table clean-files := crc32table.h diff --git a/lib/stmp_device.c b/lib/stmp_device.c new file mode 100644 index 0000000..8ac9bcc --- /dev/null +++ b/lib/stmp_device.c @@ -0,0 +1,80 @@ +/* + * Copyright (C) 1999 ARM Limited + * Copyright (C) 2000 Deep Blue Solutions Ltd + * Copyright 2006-2007,2010 Freescale Semiconductor, Inc. All Rights Reserved. + * Copyright 2008 Juergen Beisert, kernel@pengutronix.de + * Copyright 2009 Ilya Yanok, Emcraft Systems Ltd, yanok@emcraft.com + * Copyright (C) 2011 Wolfram Sang, Pengutronix e.K. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include +#include +#include +#include +#include + +#define STMP_MODULE_CLKGATE (1 << 30) +#define STMP_MODULE_SFTRST (1 << 31) + +/* + * Clear the bit and poll it cleared. This is usually called with + * a reset address and mask being either SFTRST(bit 31) or CLKGATE + * (bit 30). + */ +static int stmp_clear_poll_bit(void __iomem *addr, u32 mask) +{ + int timeout = 0x400; + + writel(mask, addr + STMP_OFFSET_REG_CLR); + udelay(1); + while ((readl(addr) & mask) && --timeout) + /* nothing */; + + return !timeout; +} + +int stmp_reset_block(void __iomem *reset_addr) +{ + int ret; + int timeout = 0x400; + + /* clear and poll SFTRST */ + ret = stmp_clear_poll_bit(reset_addr, STMP_MODULE_SFTRST); + if (unlikely(ret)) + goto error; + + /* clear CLKGATE */ + writel(STMP_MODULE_CLKGATE, reset_addr + STMP_OFFSET_REG_CLR); + + /* set SFTRST to reset the block */ + writel(STMP_MODULE_SFTRST, reset_addr + STMP_OFFSET_REG_SET); + udelay(1); + + /* poll CLKGATE becoming set */ + while ((!(readl(reset_addr) & STMP_MODULE_CLKGATE)) && --timeout) + /* nothing */; + if (unlikely(!timeout)) + goto error; + + /* clear and poll SFTRST */ + ret = stmp_clear_poll_bit(reset_addr, STMP_MODULE_SFTRST); + if (unlikely(ret)) + goto error; + + /* clear and poll CLKGATE */ + ret = stmp_clear_poll_bit(reset_addr, STMP_MODULE_CLKGATE); + if (unlikely(ret)) + goto error; + + return 0; + +error: + pr_err("%s(%p): module reset timeout\n", __func__, reset_addr); + return -ETIMEDOUT; +} +EXPORT_SYMBOL(stmp_reset_block); -- cgit v0.10.2 From 6b866c151ad9ea44799b32c3a8c90a03c5b981da Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Wed, 31 Aug 2011 20:37:50 +0200 Subject: i2c: mxs: use global reset function The former mach specific reset_block function has been converted to a global one. Use the new one to remove mach dependency from the driver. Signed-off-by: Wolfram Sang diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig index d2c5095..ccfd37d 100644 --- a/drivers/i2c/busses/Kconfig +++ b/drivers/i2c/busses/Kconfig @@ -483,6 +483,7 @@ config I2C_MV64XXX config I2C_MXS tristate "Freescale i.MX28 I2C interface" depends on SOC_IMX28 + select STMP_DEVICE help Say Y here if you want to use the I2C bus controller on the Freescale i.MX28 processors. diff --git a/drivers/i2c/busses/i2c-mxs.c b/drivers/i2c/busses/i2c-mxs.c index 3d471d5..0834802 100644 --- a/drivers/i2c/busses/i2c-mxs.c +++ b/drivers/i2c/busses/i2c-mxs.c @@ -26,8 +26,7 @@ #include #include #include - -#include +#include #define DRIVER_NAME "mxs-i2c" @@ -111,13 +110,9 @@ struct mxs_i2c_dev { struct i2c_adapter adapter; }; -/* - * TODO: check if calls to here are really needed. If not, we could get rid of - * mxs_reset_block and the mach-dependency. Needs an I2C analyzer, probably. - */ static void mxs_i2c_reset(struct mxs_i2c_dev *i2c) { - mxs_reset_block(i2c->regs); + stmp_reset_block(i2c->regs); writel(MXS_I2C_IRQ_MASK << 8, i2c->regs + MXS_I2C_CTRL1_SET); writel(MXS_I2C_QUEUECTRL_PIO_QUEUE_MODE, i2c->regs + MXS_I2C_QUEUECTRL_SET); -- cgit v0.10.2 From 8571723a698dcc0ee16c1c63908aa99dd940ce5c Mon Sep 17 00:00:00 2001 From: Chen Gong Date: Fri, 20 Apr 2012 16:02:05 -0700 Subject: x86/mce Add validation check before GHES error is recorded When GHES error record is logged into mcelog kernel buffer, a validation check for physical address is necessary, which prevents reporting an invalid physical address. [Since physical address is the only useful element in this error record, we drop generating the record completely if we don't have a valid address] Signed-off-by: Chen Gong Signed-off-by: Tony Luck diff --git a/arch/x86/kernel/cpu/mcheck/mce-apei.c b/arch/x86/kernel/cpu/mcheck/mce-apei.c index 507ea58..cd8b166 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-apei.c +++ b/arch/x86/kernel/cpu/mcheck/mce-apei.c @@ -42,7 +42,8 @@ void apei_mce_report_mem_error(int corrected, struct cper_sec_mem_err *mem_err) struct mce m; /* Only corrected MC is reported */ - if (!corrected) + if (!corrected || !(mem_err->validation_bits & + CPER_MEM_VALID_PHYSICAL_ADDRESS)) return; mce_setup(&m); -- cgit v0.10.2 From dba3c29ea4a1d5d544f59b94fd8a41662135e071 Mon Sep 17 00:00:00 2001 From: Balaji T K Date: Mon, 9 Apr 2012 12:08:32 +0530 Subject: mmc: omap_hsmmc: Enable Auto CMD12 Enable Auto-CMD12 for multi block read/write on HSMMC. Tested on OMAP4430, OMAP3430 and OMAP2430 SDP Signed-off-by: Balaji T K Signed-off-by: Venkatraman S Signed-off-by: Chris Ball diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c index 56d4499..dfa6f87 100644 --- a/drivers/mmc/host/omap_hsmmc.c +++ b/drivers/mmc/host/omap_hsmmc.c @@ -85,6 +85,7 @@ #define BRR_ENABLE (1 << 5) #define DTO_ENABLE (1 << 20) #define INIT_STREAM (1 << 1) +#define ACEN_ACMD12 (1 << 2) #define DP_SELECT (1 << 21) #define DDIR (1 << 4) #define DMA_EN 0x1 @@ -115,6 +116,7 @@ #define OMAP_MMC_MAX_CLOCK 52000000 #define DRIVER_NAME "omap_hsmmc" +#define AUTO_CMD12 (1 << 0) /* Auto CMD12 support */ /* * One controller can have multiple slots, like on some omap boards using * omap.c controller driver. Luckily this is not currently done on any known @@ -175,6 +177,7 @@ struct omap_hsmmc_host { int reqs_blocked; int use_reg; int req_in_progress; + unsigned int flags; struct omap_hsmmc_next next_data; struct omap_mmc_platform_data *pdata; @@ -766,6 +769,8 @@ omap_hsmmc_start_command(struct omap_hsmmc_host *host, struct mmc_command *cmd, cmdtype = 0x3; cmdreg = (cmd->opcode << 24) | (resptype << 16) | (cmdtype << 22); + if ((host->flags & AUTO_CMD12) && mmc_op_multi(cmd->opcode)) + cmdreg |= ACEN_ACMD12; if (data) { cmdreg |= DP_SELECT | MSBS | BCE; @@ -837,11 +842,14 @@ omap_hsmmc_xfer_done(struct omap_hsmmc_host *host, struct mmc_data *data) else data->bytes_xfered = 0; - if (!data->stop) { + if (data->stop && ((!(host->flags & AUTO_CMD12)) || data->error)) { + omap_hsmmc_start_command(host, data->stop, NULL); + } else { + if (data->stop) + data->stop->resp[0] = OMAP_HSMMC_READ(host->base, + RSP76); omap_hsmmc_request_done(host, data->mrq); - return; } - omap_hsmmc_start_command(host, data->stop, NULL); } /* @@ -1844,6 +1852,7 @@ static int __devinit omap_hsmmc_probe(struct platform_device *pdev) host->mapbase = res->start + pdata->reg_offset; host->base = ioremap(host->mapbase, SZ_4K); host->power_mode = MMC_POWER_OFF; + host->flags = AUTO_CMD12; host->next_data.cookie = 1; platform_set_drvdata(pdev, host); -- cgit v0.10.2 From 03b5d924b926dd994b16f30f7a13bfb71ee0f478 Mon Sep 17 00:00:00 2001 From: Balaji T K Date: Mon, 9 Apr 2012 12:08:33 +0530 Subject: mmc: omap_hsmmc: add DDR support Add Dual data rate support for omap_hsmmc. Signed-off-by: Balaji T K Signed-off-by: Venkatraman S Signed-off-by: Chris Ball diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c index dfa6f87..dc41b9e 100644 --- a/drivers/mmc/host/omap_hsmmc.c +++ b/drivers/mmc/host/omap_hsmmc.c @@ -92,6 +92,7 @@ #define MSBS (1 << 5) #define BCE (1 << 1) #define FOUR_BIT (1 << 1) +#define DDR (1 << 19) #define DW8 (1 << 5) #define CC 0x1 #define TC 0x02 @@ -523,6 +524,10 @@ static void omap_hsmmc_set_bus_width(struct omap_hsmmc_host *host) u32 con; con = OMAP_HSMMC_READ(host->base, CON); + if (ios->timing == MMC_TIMING_UHS_DDR50) + con |= DDR; /* configure in DDR mode */ + else + con &= ~DDR; switch (ios->bus_width) { case MMC_BUS_WIDTH_8: OMAP_HSMMC_WRITE(host->base, CON, con | DW8); -- cgit v0.10.2 From 31463b141587001781d86b2ef360544f101bd998 Mon Sep 17 00:00:00 2001 From: Venkatraman S Date: Mon, 9 Apr 2012 12:08:34 +0530 Subject: mmc: omap_hsmmc: use spinlock IRQ safe variant Prevent possible races between HSMMC/DMA IRQs and next requests. Signed-off-by: Venkatraman S Signed-off-by: Chris Ball diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c index dc41b9e..a5bca72 100644 --- a/drivers/mmc/host/omap_hsmmc.c +++ b/drivers/mmc/host/omap_hsmmc.c @@ -806,11 +806,12 @@ omap_hsmmc_get_dma_dir(struct omap_hsmmc_host *host, struct mmc_data *data) static void omap_hsmmc_request_done(struct omap_hsmmc_host *host, struct mmc_request *mrq) { int dma_ch; + unsigned long flags; - spin_lock(&host->irq_lock); + spin_lock_irqsave(&host->irq_lock, flags); host->req_in_progress = 0; dma_ch = host->dma_ch; - spin_unlock(&host->irq_lock); + spin_unlock_irqrestore(&host->irq_lock, flags); omap_hsmmc_disable_irq(host); /* Do not complete the request if DMA is still in progress */ @@ -887,13 +888,14 @@ omap_hsmmc_cmd_done(struct omap_hsmmc_host *host, struct mmc_command *cmd) static void omap_hsmmc_dma_cleanup(struct omap_hsmmc_host *host, int errno) { int dma_ch; + unsigned long flags; host->data->error = errno; - spin_lock(&host->irq_lock); + spin_lock_irqsave(&host->irq_lock, flags); dma_ch = host->dma_ch; host->dma_ch = -1; - spin_unlock(&host->irq_lock); + spin_unlock_irqrestore(&host->irq_lock, flags); if (host->use_dma && dma_ch != -1) { dma_unmap_sg(mmc_dev(host->mmc), host->data->sg, @@ -1247,6 +1249,7 @@ static void omap_hsmmc_dma_cb(int lch, u16 ch_status, void *cb_data) struct omap_hsmmc_host *host = cb_data; struct mmc_data *data; int dma_ch, req_in_progress; + unsigned long flags; if (!(ch_status & OMAP_DMA_BLOCK_IRQ)) { dev_warn(mmc_dev(host->mmc), "unexpected dma status %x\n", @@ -1254,9 +1257,9 @@ static void omap_hsmmc_dma_cb(int lch, u16 ch_status, void *cb_data) return; } - spin_lock(&host->irq_lock); + spin_lock_irqsave(&host->irq_lock, flags); if (host->dma_ch < 0) { - spin_unlock(&host->irq_lock); + spin_unlock_irqrestore(&host->irq_lock, flags); return; } @@ -1266,7 +1269,7 @@ static void omap_hsmmc_dma_cb(int lch, u16 ch_status, void *cb_data) /* Fire up the next transfer. */ omap_hsmmc_config_dma_params(host, data, data->sg + host->dma_sg_idx); - spin_unlock(&host->irq_lock); + spin_unlock_irqrestore(&host->irq_lock, flags); return; } @@ -1277,7 +1280,7 @@ static void omap_hsmmc_dma_cb(int lch, u16 ch_status, void *cb_data) req_in_progress = host->req_in_progress; dma_ch = host->dma_ch; host->dma_ch = -1; - spin_unlock(&host->irq_lock); + spin_unlock_irqrestore(&host->irq_lock, flags); omap_free_dma(dma_ch); -- cgit v0.10.2 From cd03d9a85802c0023e522c21a1dabaf3e5785010 Mon Sep 17 00:00:00 2001 From: Rajendra Nayak Date: Mon, 9 Apr 2012 12:08:35 +0530 Subject: mmc: omap_hsmmc: Cleanup use of cpu_is_* for debounce_clock There really does not seem to be a need to use cpu_is_* check for getting the debounce clock as clkdev is perfectly capable of handling situations when certain clocks are only available on select platforms. Also get rid of the 'got_dbclk' flag and instead use the dbclk clock pointer to know if a valid debounce clock exists for the platform. Signed-off-by: Rajendra Nayak Signed-off-by: Venkatraman S Signed-off-by: Chris Ball diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c index a5bca72..4254b6f 100644 --- a/drivers/mmc/host/omap_hsmmc.c +++ b/drivers/mmc/host/omap_hsmmc.c @@ -170,7 +170,6 @@ struct omap_hsmmc_host { int use_dma, dma_ch; int dma_line_tx, dma_line_rx; int slot_id; - int got_dbclk; int response_busy; int context_loss; int vdd; @@ -1097,7 +1096,7 @@ static int omap_hsmmc_switch_opcond(struct omap_hsmmc_host *host, int vdd) /* Disable the clocks */ pm_runtime_put_sync(host->dev); - if (host->got_dbclk) + if (host->dbclk) clk_disable(host->dbclk); /* Turn the power off */ @@ -1108,7 +1107,7 @@ static int omap_hsmmc_switch_opcond(struct omap_hsmmc_host *host, int vdd) ret = mmc_slot(host).set_power(host->dev, host->slot_id, 1, vdd); pm_runtime_get_sync(host->dev); - if (host->got_dbclk) + if (host->dbclk) clk_enable(host->dbclk); if (ret != 0) @@ -1902,21 +1901,17 @@ static int __devinit omap_hsmmc_probe(struct platform_device *pdev) omap_hsmmc_context_save(host); - if (cpu_is_omap2430()) { - host->dbclk = clk_get(&pdev->dev, "mmchsdb_fck"); - /* - * MMC can still work without debounce clock. - */ - if (IS_ERR(host->dbclk)) - dev_warn(mmc_dev(host->mmc), - "Failed to get debounce clock\n"); - else - host->got_dbclk = 1; - - if (host->got_dbclk) - if (clk_enable(host->dbclk) != 0) - dev_dbg(mmc_dev(host->mmc), "Enabling debounce" - " clk failed\n"); + host->dbclk = clk_get(&pdev->dev, "mmchsdb_fck"); + /* + * MMC can still work without debounce clock. + */ + if (IS_ERR(host->dbclk)) { + dev_warn(mmc_dev(host->mmc), "Failed to get debounce clk\n"); + host->dbclk = NULL; + } else if (clk_enable(host->dbclk) != 0) { + dev_warn(mmc_dev(host->mmc), "Failed to enable debounce clk\n"); + clk_put(host->dbclk); + host->dbclk = NULL; } /* Since we do only SG emulation, we can have as many segs @@ -2036,7 +2031,7 @@ err_irq: pm_runtime_put_sync(host->dev); pm_runtime_disable(host->dev); clk_put(host->fclk); - if (host->got_dbclk) { + if (host->dbclk) { clk_disable(host->dbclk); clk_put(host->dbclk); } @@ -2069,7 +2064,7 @@ static int __devexit omap_hsmmc_remove(struct platform_device *pdev) pm_runtime_put_sync(host->dev); pm_runtime_disable(host->dev); clk_put(host->fclk); - if (host->got_dbclk) { + if (host->dbclk) { clk_disable(host->dbclk); clk_put(host->dbclk); } @@ -2127,7 +2122,7 @@ static int omap_hsmmc_suspend(struct device *dev) OMAP_HSMMC_READ(host->base, HCTL) & ~SDBP); } - if (host->got_dbclk) + if (host->dbclk) clk_disable(host->dbclk); err: pm_runtime_put_sync(host->dev); @@ -2148,7 +2143,7 @@ static int omap_hsmmc_resume(struct device *dev) pm_runtime_get_sync(host->dev); - if (host->got_dbclk) + if (host->dbclk) clk_enable(host->dbclk); if (!(host->mmc->pm_flags & MMC_PM_KEEP_POWER)) -- cgit v0.10.2 From 7f8bea7f75618165d015f083c77f1db3e4584f88 Mon Sep 17 00:00:00 2001 From: Jan Luebbe Date: Thu, 5 Apr 2012 10:05:08 +0200 Subject: mmc: davinci_mmc: set MODULE_ALIAS to allow autoloading Davinci MMC platform devices (as in mach-davinci/devices-da8xx.c) use "davinci_mmc" as their name. To allow autoloading of the relevant driver, the module needs to set the MODULE_ALIAS. Signed-off-by: Jan Luebbe Signed-off-by: Chris Ball diff --git a/drivers/mmc/host/davinci_mmc.c b/drivers/mmc/host/davinci_mmc.c index c1f3673..7cf6c62 100644 --- a/drivers/mmc/host/davinci_mmc.c +++ b/drivers/mmc/host/davinci_mmc.c @@ -1533,4 +1533,5 @@ module_exit(davinci_mmcsd_exit); MODULE_AUTHOR("Texas Instruments India"); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("MMC/SD driver for Davinci MMC controller"); +MODULE_ALIAS("platform:davinci_mmc"); -- cgit v0.10.2 From 021459773d9b93e8d7388086db7b17107bdfc51d Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 10 Apr 2012 11:36:03 +0100 Subject: mmc: sdhci: Log what timeout was set if the timeout is too large Rather than just logging that we came up with an excessively large timeout say what the timeout was, this may provide some clues as to what the issue is. Signed-off-by: Mark Brown Signed-off-by: Chris Ball diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index ccefdeb..e626732 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -680,8 +680,8 @@ static u8 sdhci_calc_timeout(struct sdhci_host *host, struct mmc_command *cmd) } if (count >= 0xF) { - pr_warning("%s: Too large timeout requested for CMD%d!\n", - mmc_hostname(host->mmc), cmd->opcode); + pr_warning("%s: Too large timeout 0x%x requested for CMD%d!\n", + mmc_hostname(host->mmc), count, cmd->opcode); count = 0xE; } -- cgit v0.10.2 From 6187fee46f4bc7f18f2caefdc75a073c6a25adab Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Wed, 11 Apr 2012 22:24:49 +0200 Subject: mmc: remove imxmmc driver This driver is broken since 2.6.31 when the traditional i.MX1 support was removed. In theory the i.MX1 mmc controller can be supported by the mxcmmc driver which basically is the same hardware. However, the i.MX1 controller has severe bugs which made several workarounds necessary which resulted in a different driver structure. At that time it seemed easier to write a second driver to support hardware without bugs. As noone cared for the i.MX1 driver for a long time and it does not compile, remove it. Signed-off-by: Sascha Hauer Signed-off-by: Chris Ball diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig index 2bc06e7..9f1f5e2 100644 --- a/drivers/mmc/host/Kconfig +++ b/drivers/mmc/host/Kconfig @@ -307,16 +307,6 @@ config MMC_ATMELMCI_DMA If unsure, say N. -config MMC_IMX - tristate "Motorola i.MX Multimedia Card Interface support" - depends on ARCH_MX1 - help - This selects the Motorola i.MX Multimedia card Interface. - If you have a i.MX platform with a Multimedia Card slot, - say Y or M here. - - If unsure, say N. - config MMC_MSM tristate "Qualcomm SDCC Controller Support" depends on MMC && ARCH_MSM diff --git a/drivers/mmc/host/Makefile b/drivers/mmc/host/Makefile index 3e7e26d..8922b06 100644 --- a/drivers/mmc/host/Makefile +++ b/drivers/mmc/host/Makefile @@ -4,7 +4,6 @@ obj-$(CONFIG_MMC_ARMMMCI) += mmci.o obj-$(CONFIG_MMC_PXA) += pxamci.o -obj-$(CONFIG_MMC_IMX) += imxmmc.o obj-$(CONFIG_MMC_MXC) += mxcmmc.o obj-$(CONFIG_MMC_MXS) += mxs-mmc.o obj-$(CONFIG_MMC_SDHCI) += sdhci.o diff --git a/drivers/mmc/host/imxmmc.c b/drivers/mmc/host/imxmmc.c deleted file mode 100644 index ea0f3ce..0000000 --- a/drivers/mmc/host/imxmmc.c +++ /dev/null @@ -1,1169 +0,0 @@ -/* - * linux/drivers/mmc/host/imxmmc.c - Motorola i.MX MMCI driver - * - * Copyright (C) 2004 Sascha Hauer, Pengutronix - * Copyright (C) 2006 Pavel Pisa, PiKRON - * - * derived from pxamci.c by Russell King - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -#include "imxmmc.h" - -#define DRIVER_NAME "imx-mmc" - -#define IMXMCI_INT_MASK_DEFAULT (INT_MASK_BUF_READY | INT_MASK_DATA_TRAN | \ - INT_MASK_WRITE_OP_DONE | INT_MASK_END_CMD_RES | \ - INT_MASK_AUTO_CARD_DETECT | INT_MASK_DAT0_EN | INT_MASK_SDIO) - -struct imxmci_host { - struct mmc_host *mmc; - spinlock_t lock; - struct resource *res; - void __iomem *base; - int irq; - imx_dmach_t dma; - volatile unsigned int imask; - unsigned int power_mode; - unsigned int present; - struct imxmmc_platform_data *pdata; - - struct mmc_request *req; - struct mmc_command *cmd; - struct mmc_data *data; - - struct timer_list timer; - struct tasklet_struct tasklet; - unsigned int status_reg; - unsigned long pending_events; - /* Next two fields are there for CPU driven transfers to overcome SDHC deficiencies */ - u16 *data_ptr; - unsigned int data_cnt; - atomic_t stuck_timeout; - - unsigned int dma_nents; - unsigned int dma_size; - unsigned int dma_dir; - int dma_allocated; - - unsigned char actual_bus_width; - - int prev_cmd_code; - - struct clk *clk; -}; - -#define IMXMCI_PEND_IRQ_b 0 -#define IMXMCI_PEND_DMA_END_b 1 -#define IMXMCI_PEND_DMA_ERR_b 2 -#define IMXMCI_PEND_WAIT_RESP_b 3 -#define IMXMCI_PEND_DMA_DATA_b 4 -#define IMXMCI_PEND_CPU_DATA_b 5 -#define IMXMCI_PEND_CARD_XCHG_b 6 -#define IMXMCI_PEND_SET_INIT_b 7 -#define IMXMCI_PEND_STARTED_b 8 - -#define IMXMCI_PEND_IRQ_m (1 << IMXMCI_PEND_IRQ_b) -#define IMXMCI_PEND_DMA_END_m (1 << IMXMCI_PEND_DMA_END_b) -#define IMXMCI_PEND_DMA_ERR_m (1 << IMXMCI_PEND_DMA_ERR_b) -#define IMXMCI_PEND_WAIT_RESP_m (1 << IMXMCI_PEND_WAIT_RESP_b) -#define IMXMCI_PEND_DMA_DATA_m (1 << IMXMCI_PEND_DMA_DATA_b) -#define IMXMCI_PEND_CPU_DATA_m (1 << IMXMCI_PEND_CPU_DATA_b) -#define IMXMCI_PEND_CARD_XCHG_m (1 << IMXMCI_PEND_CARD_XCHG_b) -#define IMXMCI_PEND_SET_INIT_m (1 << IMXMCI_PEND_SET_INIT_b) -#define IMXMCI_PEND_STARTED_m (1 << IMXMCI_PEND_STARTED_b) - -static void imxmci_stop_clock(struct imxmci_host *host) -{ - int i = 0; - u16 reg; - - reg = readw(host->base + MMC_REG_STR_STP_CLK); - writew(reg & ~STR_STP_CLK_START_CLK, host->base + MMC_REG_STR_STP_CLK); - while (i < 0x1000) { - if (!(i & 0x7f)) { - reg = readw(host->base + MMC_REG_STR_STP_CLK); - writew(reg | STR_STP_CLK_STOP_CLK, - host->base + MMC_REG_STR_STP_CLK); - } - - reg = readw(host->base + MMC_REG_STATUS); - if (!(reg & STATUS_CARD_BUS_CLK_RUN)) { - /* Check twice before cut */ - reg = readw(host->base + MMC_REG_STATUS); - if (!(reg & STATUS_CARD_BUS_CLK_RUN)) - return; - } - - i++; - } - dev_dbg(mmc_dev(host->mmc), "imxmci_stop_clock blocked, no luck\n"); -} - -static int imxmci_start_clock(struct imxmci_host *host) -{ - unsigned int trials = 0; - unsigned int delay_limit = 128; - unsigned long flags; - u16 reg; - - reg = readw(host->base + MMC_REG_STR_STP_CLK); - writew(reg & ~STR_STP_CLK_STOP_CLK, host->base + MMC_REG_STR_STP_CLK); - - clear_bit(IMXMCI_PEND_STARTED_b, &host->pending_events); - - /* - * Command start of the clock, this usually succeeds in less - * then 6 delay loops, but during card detection (low clockrate) - * it takes up to 5000 delay loops and sometimes fails for the first time - */ - reg = readw(host->base + MMC_REG_STR_STP_CLK); - writew(reg | STR_STP_CLK_START_CLK, host->base + MMC_REG_STR_STP_CLK); - - do { - unsigned int delay = delay_limit; - - while (delay--) { - reg = readw(host->base + MMC_REG_STATUS); - if (reg & STATUS_CARD_BUS_CLK_RUN) { - /* Check twice before cut */ - reg = readw(host->base + MMC_REG_STATUS); - if (reg & STATUS_CARD_BUS_CLK_RUN) - return 0; - } - - if (test_bit(IMXMCI_PEND_STARTED_b, &host->pending_events)) - return 0; - } - - local_irq_save(flags); - /* - * Ensure, that request is not doubled under all possible circumstances. - * It is possible, that cock running state is missed, because some other - * IRQ or schedule delays this function execution and the clocks has - * been already stopped by other means (response processing, SDHC HW) - */ - if (!test_bit(IMXMCI_PEND_STARTED_b, &host->pending_events)) { - reg = readw(host->base + MMC_REG_STR_STP_CLK); - writew(reg | STR_STP_CLK_START_CLK, - host->base + MMC_REG_STR_STP_CLK); - } - local_irq_restore(flags); - - } while (++trials < 256); - - dev_err(mmc_dev(host->mmc), "imxmci_start_clock blocked, no luck\n"); - - return -1; -} - -static void imxmci_softreset(struct imxmci_host *host) -{ - int i; - - /* reset sequence */ - writew(0x08, host->base + MMC_REG_STR_STP_CLK); - writew(0x0D, host->base + MMC_REG_STR_STP_CLK); - - for (i = 0; i < 8; i++) - writew(0x05, host->base + MMC_REG_STR_STP_CLK); - - writew(0xff, host->base + MMC_REG_RES_TO); - writew(512, host->base + MMC_REG_BLK_LEN); - writew(1, host->base + MMC_REG_NOB); -} - -static int imxmci_busy_wait_for_status(struct imxmci_host *host, - unsigned int *pstat, unsigned int stat_mask, - int timeout, const char *where) -{ - int loops = 0; - - while (!(*pstat & stat_mask)) { - loops += 2; - if (loops >= timeout) { - dev_dbg(mmc_dev(host->mmc), "busy wait timeout in %s, STATUS = 0x%x (0x%x)\n", - where, *pstat, stat_mask); - return -1; - } - udelay(2); - *pstat |= readw(host->base + MMC_REG_STATUS); - } - if (!loops) - return 0; - - /* The busy-wait is expected there for clock <8MHz due to SDHC hardware flaws */ - if (!(stat_mask & STATUS_END_CMD_RESP) || (host->mmc->ios.clock >= 8000000)) - dev_info(mmc_dev(host->mmc), "busy wait for %d usec in %s, STATUS = 0x%x (0x%x)\n", - loops, where, *pstat, stat_mask); - return loops; -} - -static void imxmci_setup_data(struct imxmci_host *host, struct mmc_data *data) -{ - unsigned int nob = data->blocks; - unsigned int blksz = data->blksz; - unsigned int datasz = nob * blksz; - int i; - - if (data->flags & MMC_DATA_STREAM) - nob = 0xffff; - - host->data = data; - data->bytes_xfered = 0; - - writew(nob, host->base + MMC_REG_NOB); - writew(blksz, host->base + MMC_REG_BLK_LEN); - - /* - * DMA cannot be used for small block sizes, we have to use CPU driven transfers otherwise. - * We are in big troubles for non-512 byte transfers according to note in the paragraph - * 20.6.7 of User Manual anyway, but we need to be able to transfer SCR at least. - * The situation is even more complex in reality. The SDHC in not able to handle wll - * partial FIFO fills and reads. The length has to be rounded up to burst size multiple. - * This is required for SCR read at least. - */ - if (datasz < 512) { - host->dma_size = datasz; - if (data->flags & MMC_DATA_READ) { - host->dma_dir = DMA_FROM_DEVICE; - - /* Hack to enable read SCR */ - writew(1, host->base + MMC_REG_NOB); - writew(512, host->base + MMC_REG_BLK_LEN); - } else { - host->dma_dir = DMA_TO_DEVICE; - } - - /* Convert back to virtual address */ - host->data_ptr = (u16 *)sg_virt(data->sg); - host->data_cnt = 0; - - clear_bit(IMXMCI_PEND_DMA_DATA_b, &host->pending_events); - set_bit(IMXMCI_PEND_CPU_DATA_b, &host->pending_events); - - return; - } - - if (data->flags & MMC_DATA_READ) { - host->dma_dir = DMA_FROM_DEVICE; - host->dma_nents = dma_map_sg(mmc_dev(host->mmc), data->sg, - data->sg_len, host->dma_dir); - - imx_dma_setup_sg(host->dma, data->sg, data->sg_len, datasz, - host->res->start + MMC_REG_BUFFER_ACCESS, - DMA_MODE_READ); - - /*imx_dma_setup_mem2dev_ccr(host->dma, DMA_MODE_READ, IMX_DMA_WIDTH_16, CCR_REN);*/ - CCR(host->dma) = CCR_DMOD_LINEAR | CCR_DSIZ_32 | CCR_SMOD_FIFO | CCR_SSIZ_16 | CCR_REN; - } else { - host->dma_dir = DMA_TO_DEVICE; - - host->dma_nents = dma_map_sg(mmc_dev(host->mmc), data->sg, - data->sg_len, host->dma_dir); - - imx_dma_setup_sg(host->dma, data->sg, data->sg_len, datasz, - host->res->start + MMC_REG_BUFFER_ACCESS, - DMA_MODE_WRITE); - - /*imx_dma_setup_mem2dev_ccr(host->dma, DMA_MODE_WRITE, IMX_DMA_WIDTH_16, CCR_REN);*/ - CCR(host->dma) = CCR_SMOD_LINEAR | CCR_SSIZ_32 | CCR_DMOD_FIFO | CCR_DSIZ_16 | CCR_REN; - } - -#if 1 /* This code is there only for consistency checking and can be disabled in future */ - host->dma_size = 0; - for (i = 0; i < host->dma_nents; i++) - host->dma_size += data->sg[i].length; - - if (datasz > host->dma_size) { - dev_err(mmc_dev(host->mmc), "imxmci_setup_data datasz 0x%x > 0x%x dm_size\n", - datasz, host->dma_size); - } -#endif - - host->dma_size = datasz; - - wmb(); - - set_bit(IMXMCI_PEND_DMA_DATA_b, &host->pending_events); - clear_bit(IMXMCI_PEND_CPU_DATA_b, &host->pending_events); - - /* start DMA engine for read, write is delayed after initial response */ - if (host->dma_dir == DMA_FROM_DEVICE) - imx_dma_enable(host->dma); -} - -static void imxmci_start_cmd(struct imxmci_host *host, struct mmc_command *cmd, unsigned int cmdat) -{ - unsigned long flags; - u32 imask; - - WARN_ON(host->cmd != NULL); - host->cmd = cmd; - - /* Ensure, that clock are stopped else command programming and start fails */ - imxmci_stop_clock(host); - - if (cmd->flags & MMC_RSP_BUSY) - cmdat |= CMD_DAT_CONT_BUSY; - - switch (mmc_resp_type(cmd)) { - case MMC_RSP_R1: /* short CRC, OPCODE */ - case MMC_RSP_R1B:/* short CRC, OPCODE, BUSY */ - cmdat |= CMD_DAT_CONT_RESPONSE_FORMAT_R1; - break; - case MMC_RSP_R2: /* long 136 bit + CRC */ - cmdat |= CMD_DAT_CONT_RESPONSE_FORMAT_R2; - break; - case MMC_RSP_R3: /* short */ - cmdat |= CMD_DAT_CONT_RESPONSE_FORMAT_R3; - break; - default: - break; - } - - if (test_and_clear_bit(IMXMCI_PEND_SET_INIT_b, &host->pending_events)) - cmdat |= CMD_DAT_CONT_INIT; /* This command needs init */ - - if (host->actual_bus_width == MMC_BUS_WIDTH_4) - cmdat |= CMD_DAT_CONT_BUS_WIDTH_4; - - writew(cmd->opcode, host->base + MMC_REG_CMD); - writew(cmd->arg >> 16, host->base + MMC_REG_ARGH); - writew(cmd->arg & 0xffff, host->base + MMC_REG_ARGL); - writew(cmdat, host->base + MMC_REG_CMD_DAT_CONT); - - atomic_set(&host->stuck_timeout, 0); - set_bit(IMXMCI_PEND_WAIT_RESP_b, &host->pending_events); - - - imask = IMXMCI_INT_MASK_DEFAULT; - imask &= ~INT_MASK_END_CMD_RES; - if (cmdat & CMD_DAT_CONT_DATA_ENABLE) { - /* imask &= ~INT_MASK_BUF_READY; */ - imask &= ~INT_MASK_DATA_TRAN; - if (cmdat & CMD_DAT_CONT_WRITE) - imask &= ~INT_MASK_WRITE_OP_DONE; - if (test_bit(IMXMCI_PEND_CPU_DATA_b, &host->pending_events)) - imask &= ~INT_MASK_BUF_READY; - } - - spin_lock_irqsave(&host->lock, flags); - host->imask = imask; - writew(host->imask, host->base + MMC_REG_INT_MASK); - spin_unlock_irqrestore(&host->lock, flags); - - dev_dbg(mmc_dev(host->mmc), "CMD%02d (0x%02x) mask set to 0x%04x\n", - cmd->opcode, cmd->opcode, imask); - - imxmci_start_clock(host); -} - -static void imxmci_finish_request(struct imxmci_host *host, struct mmc_request *req) -{ - unsigned long flags; - - spin_lock_irqsave(&host->lock, flags); - - host->pending_events &= ~(IMXMCI_PEND_WAIT_RESP_m | IMXMCI_PEND_DMA_END_m | - IMXMCI_PEND_DMA_DATA_m | IMXMCI_PEND_CPU_DATA_m); - - host->imask = IMXMCI_INT_MASK_DEFAULT; - writew(host->imask, host->base + MMC_REG_INT_MASK); - - spin_unlock_irqrestore(&host->lock, flags); - - if (req && req->cmd) - host->prev_cmd_code = req->cmd->opcode; - - host->req = NULL; - host->cmd = NULL; - host->data = NULL; - mmc_request_done(host->mmc, req); -} - -static int imxmci_finish_data(struct imxmci_host *host, unsigned int stat) -{ - struct mmc_data *data = host->data; - int data_error; - - if (test_and_clear_bit(IMXMCI_PEND_DMA_DATA_b, &host->pending_events)) { - imx_dma_disable(host->dma); - dma_unmap_sg(mmc_dev(host->mmc), data->sg, host->dma_nents, - host->dma_dir); - } - - if (stat & STATUS_ERR_MASK) { - dev_dbg(mmc_dev(host->mmc), "request failed. status: 0x%08x\n", stat); - if (stat & (STATUS_CRC_READ_ERR | STATUS_CRC_WRITE_ERR)) - data->error = -EILSEQ; - else if (stat & STATUS_TIME_OUT_READ) - data->error = -ETIMEDOUT; - else - data->error = -EIO; - } else { - data->bytes_xfered = host->dma_size; - } - - data_error = data->error; - - host->data = NULL; - - return data_error; -} - -static int imxmci_cmd_done(struct imxmci_host *host, unsigned int stat) -{ - struct mmc_command *cmd = host->cmd; - int i; - u32 a, b, c; - struct mmc_data *data = host->data; - - if (!cmd) - return 0; - - host->cmd = NULL; - - if (stat & STATUS_TIME_OUT_RESP) { - dev_dbg(mmc_dev(host->mmc), "CMD TIMEOUT\n"); - cmd->error = -ETIMEDOUT; - } else if (stat & STATUS_RESP_CRC_ERR && cmd->flags & MMC_RSP_CRC) { - dev_dbg(mmc_dev(host->mmc), "cmd crc error\n"); - cmd->error = -EILSEQ; - } - - if (cmd->flags & MMC_RSP_PRESENT) { - if (cmd->flags & MMC_RSP_136) { - for (i = 0; i < 4; i++) { - a = readw(host->base + MMC_REG_RES_FIFO); - b = readw(host->base + MMC_REG_RES_FIFO); - cmd->resp[i] = a << 16 | b; - } - } else { - a = readw(host->base + MMC_REG_RES_FIFO); - b = readw(host->base + MMC_REG_RES_FIFO); - c = readw(host->base + MMC_REG_RES_FIFO); - cmd->resp[0] = a << 24 | b << 8 | c >> 8; - } - } - - dev_dbg(mmc_dev(host->mmc), "RESP 0x%08x, 0x%08x, 0x%08x, 0x%08x, error %d\n", - cmd->resp[0], cmd->resp[1], cmd->resp[2], cmd->resp[3], cmd->error); - - if (data && !cmd->error && !(stat & STATUS_ERR_MASK)) { - if (host->req->data->flags & MMC_DATA_WRITE) { - - /* Wait for FIFO to be empty before starting DMA write */ - - stat = readw(host->base + MMC_REG_STATUS); - if (imxmci_busy_wait_for_status(host, &stat, - STATUS_APPL_BUFF_FE, - 40, "imxmci_cmd_done DMA WR") < 0) { - cmd->error = -EIO; - imxmci_finish_data(host, stat); - if (host->req) - imxmci_finish_request(host, host->req); - dev_warn(mmc_dev(host->mmc), "STATUS = 0x%04x\n", - stat); - return 0; - } - - if (test_bit(IMXMCI_PEND_DMA_DATA_b, &host->pending_events)) - imx_dma_enable(host->dma); - } - } else { - struct mmc_request *req; - imxmci_stop_clock(host); - req = host->req; - - if (data) - imxmci_finish_data(host, stat); - - if (req) - imxmci_finish_request(host, req); - else - dev_warn(mmc_dev(host->mmc), "imxmci_cmd_done: no request to finish\n"); - } - - return 1; -} - -static int imxmci_data_done(struct imxmci_host *host, unsigned int stat) -{ - struct mmc_data *data = host->data; - int data_error; - - if (!data) - return 0; - - data_error = imxmci_finish_data(host, stat); - - if (host->req->stop) { - imxmci_stop_clock(host); - imxmci_start_cmd(host, host->req->stop, 0); - } else { - struct mmc_request *req; - req = host->req; - if (req) - imxmci_finish_request(host, req); - else - dev_warn(mmc_dev(host->mmc), "imxmci_data_done: no request to finish\n"); - } - - return 1; -} - -static int imxmci_cpu_driven_data(struct imxmci_host *host, unsigned int *pstat) -{ - int i; - int burst_len; - int trans_done = 0; - unsigned int stat = *pstat; - - if (host->actual_bus_width != MMC_BUS_WIDTH_4) - burst_len = 16; - else - burst_len = 64; - - /* This is unfortunately required */ - dev_dbg(mmc_dev(host->mmc), "imxmci_cpu_driven_data running STATUS = 0x%x\n", - stat); - - udelay(20); /* required for clocks < 8MHz*/ - - if (host->dma_dir == DMA_FROM_DEVICE) { - imxmci_busy_wait_for_status(host, &stat, - STATUS_APPL_BUFF_FF | STATUS_DATA_TRANS_DONE | - STATUS_TIME_OUT_READ, - 50, "imxmci_cpu_driven_data read"); - - while ((stat & (STATUS_APPL_BUFF_FF | STATUS_DATA_TRANS_DONE)) && - !(stat & STATUS_TIME_OUT_READ) && - (host->data_cnt < 512)) { - - udelay(20); /* required for clocks < 8MHz*/ - - for (i = burst_len; i >= 2 ; i -= 2) { - u16 data; - data = readw(host->base + MMC_REG_BUFFER_ACCESS); - udelay(10); /* required for clocks < 8MHz*/ - if (host->data_cnt+2 <= host->dma_size) { - *(host->data_ptr++) = data; - } else { - if (host->data_cnt < host->dma_size) - *(u8 *)(host->data_ptr) = data; - } - host->data_cnt += 2; - } - - stat = readw(host->base + MMC_REG_STATUS); - - dev_dbg(mmc_dev(host->mmc), "imxmci_cpu_driven_data read %d burst %d STATUS = 0x%x\n", - host->data_cnt, burst_len, stat); - } - - if ((stat & STATUS_DATA_TRANS_DONE) && (host->data_cnt >= 512)) - trans_done = 1; - - if (host->dma_size & 0x1ff) - stat &= ~STATUS_CRC_READ_ERR; - - if (stat & STATUS_TIME_OUT_READ) { - dev_dbg(mmc_dev(host->mmc), "imxmci_cpu_driven_data read timeout STATUS = 0x%x\n", - stat); - trans_done = -1; - } - - } else { - imxmci_busy_wait_for_status(host, &stat, - STATUS_APPL_BUFF_FE, - 20, "imxmci_cpu_driven_data write"); - - while ((stat & STATUS_APPL_BUFF_FE) && - (host->data_cnt < host->dma_size)) { - if (burst_len >= host->dma_size - host->data_cnt) { - burst_len = host->dma_size - host->data_cnt; - host->data_cnt = host->dma_size; - trans_done = 1; - } else { - host->data_cnt += burst_len; - } - - for (i = burst_len; i > 0 ; i -= 2) - writew(*(host->data_ptr++), host->base + MMC_REG_BUFFER_ACCESS); - - stat = readw(host->base + MMC_REG_STATUS); - - dev_dbg(mmc_dev(host->mmc), "imxmci_cpu_driven_data write burst %d STATUS = 0x%x\n", - burst_len, stat); - } - } - - *pstat = stat; - - return trans_done; -} - -static void imxmci_dma_irq(int dma, void *devid) -{ - struct imxmci_host *host = devid; - u32 stat = readw(host->base + MMC_REG_STATUS); - - atomic_set(&host->stuck_timeout, 0); - host->status_reg = stat; - set_bit(IMXMCI_PEND_DMA_END_b, &host->pending_events); - tasklet_schedule(&host->tasklet); -} - -static irqreturn_t imxmci_irq(int irq, void *devid) -{ - struct imxmci_host *host = devid; - u32 stat = readw(host->base + MMC_REG_STATUS); - int handled = 1; - - writew(host->imask | INT_MASK_SDIO | INT_MASK_AUTO_CARD_DETECT, - host->base + MMC_REG_INT_MASK); - - atomic_set(&host->stuck_timeout, 0); - host->status_reg = stat; - set_bit(IMXMCI_PEND_IRQ_b, &host->pending_events); - set_bit(IMXMCI_PEND_STARTED_b, &host->pending_events); - tasklet_schedule(&host->tasklet); - - return IRQ_RETVAL(handled); -} - -static void imxmci_tasklet_fnc(unsigned long data) -{ - struct imxmci_host *host = (struct imxmci_host *)data; - u32 stat; - unsigned int data_dir_mask = 0; /* STATUS_WR_CRC_ERROR_CODE_MASK */ - int timeout = 0; - - if (atomic_read(&host->stuck_timeout) > 4) { - char *what; - timeout = 1; - stat = readw(host->base + MMC_REG_STATUS); - host->status_reg = stat; - if (test_bit(IMXMCI_PEND_WAIT_RESP_b, &host->pending_events)) - if (test_bit(IMXMCI_PEND_DMA_DATA_b, &host->pending_events)) - what = "RESP+DMA"; - else - what = "RESP"; - else - if (test_bit(IMXMCI_PEND_DMA_DATA_b, &host->pending_events)) - if (test_bit(IMXMCI_PEND_DMA_END_b, &host->pending_events)) - what = "DATA"; - else - what = "DMA"; - else - what = "???"; - - dev_err(mmc_dev(host->mmc), - "%s TIMEOUT, hardware stucked STATUS = 0x%04x IMASK = 0x%04x\n", - what, stat, - readw(host->base + MMC_REG_INT_MASK)); - dev_err(mmc_dev(host->mmc), - "CMD_DAT_CONT = 0x%04x, MMC_BLK_LEN = 0x%04x, MMC_NOB = 0x%04x, DMA_CCR = 0x%08x\n", - readw(host->base + MMC_REG_CMD_DAT_CONT), - readw(host->base + MMC_REG_BLK_LEN), - readw(host->base + MMC_REG_NOB), - CCR(host->dma)); - dev_err(mmc_dev(host->mmc), "CMD%d, prevCMD%d, bus %d-bit, dma_size = 0x%x\n", - host->cmd ? host->cmd->opcode : 0, - host->prev_cmd_code, - 1 << host->actual_bus_width, host->dma_size); - } - - if (!host->present || timeout) - host->status_reg = STATUS_TIME_OUT_RESP | STATUS_TIME_OUT_READ | - STATUS_CRC_READ_ERR | STATUS_CRC_WRITE_ERR; - - if (test_bit(IMXMCI_PEND_IRQ_b, &host->pending_events) || timeout) { - clear_bit(IMXMCI_PEND_IRQ_b, &host->pending_events); - - stat = readw(host->base + MMC_REG_STATUS); - /* - * This is not required in theory, but there is chance to miss some flag - * which clears automatically by mask write, FreeScale original code keeps - * stat from IRQ time so do I - */ - stat |= host->status_reg; - - if (test_bit(IMXMCI_PEND_CPU_DATA_b, &host->pending_events)) - stat &= ~STATUS_CRC_READ_ERR; - - if (test_bit(IMXMCI_PEND_WAIT_RESP_b, &host->pending_events)) { - imxmci_busy_wait_for_status(host, &stat, - STATUS_END_CMD_RESP | STATUS_ERR_MASK, - 20, "imxmci_tasklet_fnc resp (ERRATUM #4)"); - } - - if (stat & (STATUS_END_CMD_RESP | STATUS_ERR_MASK)) { - if (test_and_clear_bit(IMXMCI_PEND_WAIT_RESP_b, &host->pending_events)) - imxmci_cmd_done(host, stat); - if (host->data && (stat & STATUS_ERR_MASK)) - imxmci_data_done(host, stat); - } - - if (test_bit(IMXMCI_PEND_CPU_DATA_b, &host->pending_events)) { - stat |= readw(host->base + MMC_REG_STATUS); - if (imxmci_cpu_driven_data(host, &stat)) { - if (test_and_clear_bit(IMXMCI_PEND_WAIT_RESP_b, &host->pending_events)) - imxmci_cmd_done(host, stat); - atomic_clear_mask(IMXMCI_PEND_IRQ_m|IMXMCI_PEND_CPU_DATA_m, - &host->pending_events); - imxmci_data_done(host, stat); - } - } - } - - if (test_bit(IMXMCI_PEND_DMA_END_b, &host->pending_events) && - !test_bit(IMXMCI_PEND_WAIT_RESP_b, &host->pending_events)) { - - stat = readw(host->base + MMC_REG_STATUS); - /* Same as above */ - stat |= host->status_reg; - - if (host->dma_dir == DMA_TO_DEVICE) - data_dir_mask = STATUS_WRITE_OP_DONE; - else - data_dir_mask = STATUS_DATA_TRANS_DONE; - - if (stat & data_dir_mask) { - clear_bit(IMXMCI_PEND_DMA_END_b, &host->pending_events); - imxmci_data_done(host, stat); - } - } - - if (test_and_clear_bit(IMXMCI_PEND_CARD_XCHG_b, &host->pending_events)) { - - if (host->cmd) - imxmci_cmd_done(host, STATUS_TIME_OUT_RESP); - - if (host->data) - imxmci_data_done(host, STATUS_TIME_OUT_READ | - STATUS_CRC_READ_ERR | STATUS_CRC_WRITE_ERR); - - if (host->req) - imxmci_finish_request(host, host->req); - - mmc_detect_change(host->mmc, msecs_to_jiffies(100)); - - } -} - -static void imxmci_request(struct mmc_host *mmc, struct mmc_request *req) -{ - struct imxmci_host *host = mmc_priv(mmc); - unsigned int cmdat; - - WARN_ON(host->req != NULL); - - host->req = req; - - cmdat = 0; - - if (req->data) { - imxmci_setup_data(host, req->data); - - cmdat |= CMD_DAT_CONT_DATA_ENABLE; - - if (req->data->flags & MMC_DATA_WRITE) - cmdat |= CMD_DAT_CONT_WRITE; - - if (req->data->flags & MMC_DATA_STREAM) - cmdat |= CMD_DAT_CONT_STREAM_BLOCK; - } - - imxmci_start_cmd(host, req->cmd, cmdat); -} - -#define CLK_RATE 19200000 - -static void imxmci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) -{ - struct imxmci_host *host = mmc_priv(mmc); - int prescaler; - - if (ios->bus_width == MMC_BUS_WIDTH_4) { - host->actual_bus_width = MMC_BUS_WIDTH_4; - imx_gpio_mode(PB11_PF_SD_DAT3); - BLR(host->dma) = 0; /* burst 64 byte read/write */ - } else { - host->actual_bus_width = MMC_BUS_WIDTH_1; - imx_gpio_mode(GPIO_PORTB | GPIO_IN | GPIO_PUEN | 11); - BLR(host->dma) = 16; /* burst 16 byte read/write */ - } - - if (host->power_mode != ios->power_mode) { - switch (ios->power_mode) { - case MMC_POWER_OFF: - break; - case MMC_POWER_UP: - set_bit(IMXMCI_PEND_SET_INIT_b, &host->pending_events); - break; - case MMC_POWER_ON: - break; - } - host->power_mode = ios->power_mode; - } - - if (ios->clock) { - unsigned int clk; - u16 reg; - - /* The prescaler is 5 for PERCLK2 equal to 96MHz - * then 96MHz / 5 = 19.2 MHz - */ - clk = clk_get_rate(host->clk); - prescaler = (clk + (CLK_RATE * 7) / 8) / CLK_RATE; - switch (prescaler) { - case 0: - case 1: prescaler = 0; - break; - case 2: prescaler = 1; - break; - case 3: prescaler = 2; - break; - case 4: prescaler = 4; - break; - default: - case 5: prescaler = 5; - break; - } - - dev_dbg(mmc_dev(host->mmc), "PERCLK2 %d MHz -> prescaler %d\n", - clk, prescaler); - - for (clk = 0; clk < 8; clk++) { - int x; - x = CLK_RATE / (1 << clk); - if (x <= ios->clock) - break; - } - - /* enable controller */ - reg = readw(host->base + MMC_REG_STR_STP_CLK); - writew(reg | STR_STP_CLK_ENABLE, - host->base + MMC_REG_STR_STP_CLK); - - imxmci_stop_clock(host); - writew((prescaler << 3) | clk, host->base + MMC_REG_CLK_RATE); - /* - * Under my understanding, clock should not be started there, because it would - * initiate SDHC sequencer and send last or random command into card - */ - /* imxmci_start_clock(host); */ - - dev_dbg(mmc_dev(host->mmc), - "MMC_CLK_RATE: 0x%08x\n", - readw(host->base + MMC_REG_CLK_RATE)); - } else { - imxmci_stop_clock(host); - } -} - -static int imxmci_get_ro(struct mmc_host *mmc) -{ - struct imxmci_host *host = mmc_priv(mmc); - - if (host->pdata && host->pdata->get_ro) - return !!host->pdata->get_ro(mmc_dev(mmc)); - /* - * Board doesn't support read only detection; let the mmc core - * decide what to do. - */ - return -ENOSYS; -} - - -static const struct mmc_host_ops imxmci_ops = { - .request = imxmci_request, - .set_ios = imxmci_set_ios, - .get_ro = imxmci_get_ro, -}; - -static void imxmci_check_status(unsigned long data) -{ - struct imxmci_host *host = (struct imxmci_host *)data; - - if (host->pdata && host->pdata->card_present && - host->pdata->card_present(mmc_dev(host->mmc)) != host->present) { - host->present ^= 1; - dev_info(mmc_dev(host->mmc), "card %s\n", - host->present ? "inserted" : "removed"); - - set_bit(IMXMCI_PEND_CARD_XCHG_b, &host->pending_events); - tasklet_schedule(&host->tasklet); - } - - if (test_bit(IMXMCI_PEND_WAIT_RESP_b, &host->pending_events) || - test_bit(IMXMCI_PEND_DMA_DATA_b, &host->pending_events)) { - atomic_inc(&host->stuck_timeout); - if (atomic_read(&host->stuck_timeout) > 4) - tasklet_schedule(&host->tasklet); - } else { - atomic_set(&host->stuck_timeout, 0); - - } - - mod_timer(&host->timer, jiffies + (HZ>>1)); -} - -static int __init imxmci_probe(struct platform_device *pdev) -{ - struct mmc_host *mmc; - struct imxmci_host *host = NULL; - struct resource *r; - int ret = 0, irq; - u16 rev_no; - - pr_info("i.MX mmc driver\n"); - - r = platform_get_resource(pdev, IORESOURCE_MEM, 0); - irq = platform_get_irq(pdev, 0); - if (!r || irq < 0) - return -ENXIO; - - r = request_mem_region(r->start, resource_size(r), pdev->name); - if (!r) - return -EBUSY; - - mmc = mmc_alloc_host(sizeof(struct imxmci_host), &pdev->dev); - if (!mmc) { - ret = -ENOMEM; - goto out; - } - - mmc->ops = &imxmci_ops; - mmc->f_min = 150000; - mmc->f_max = CLK_RATE/2; - mmc->ocr_avail = MMC_VDD_32_33; - mmc->caps = MMC_CAP_4_BIT_DATA; - - /* MMC core transfer sizes tunable parameters */ - mmc->max_segs = 64; - mmc->max_seg_size = 64*512; /* default PAGE_CACHE_SIZE */ - mmc->max_req_size = 64*512; /* default PAGE_CACHE_SIZE */ - mmc->max_blk_size = 2048; - mmc->max_blk_count = 65535; - - host = mmc_priv(mmc); - host->base = ioremap(r->start, resource_size(r)); - if (!host->base) { - ret = -ENOMEM; - goto out; - } - - host->mmc = mmc; - host->dma_allocated = 0; - host->pdata = pdev->dev.platform_data; - if (!host->pdata) - dev_warn(&pdev->dev, "No platform data provided!\n"); - - spin_lock_init(&host->lock); - host->res = r; - host->irq = irq; - - host->clk = clk_get(&pdev->dev, "perclk2"); - if (IS_ERR(host->clk)) { - ret = PTR_ERR(host->clk); - goto out; - } - clk_enable(host->clk); - - imx_gpio_mode(PB8_PF_SD_DAT0); - imx_gpio_mode(PB9_PF_SD_DAT1); - imx_gpio_mode(PB10_PF_SD_DAT2); - /* Configured as GPIO with pull-up to ensure right MCC card mode */ - /* Switched to PB11_PF_SD_DAT3 if 4 bit bus is configured */ - imx_gpio_mode(GPIO_PORTB | GPIO_IN | GPIO_PUEN | 11); - /* imx_gpio_mode(PB11_PF_SD_DAT3); */ - imx_gpio_mode(PB12_PF_SD_CLK); - imx_gpio_mode(PB13_PF_SD_CMD); - - imxmci_softreset(host); - - rev_no = readw(host->base + MMC_REG_REV_NO); - if (rev_no != 0x390) { - dev_err(mmc_dev(host->mmc), "wrong rev.no. 0x%08x. aborting.\n", - readw(host->base + MMC_REG_REV_NO)); - goto out; - } - - /* recommended in data sheet */ - writew(0x2db4, host->base + MMC_REG_READ_TO); - - host->imask = IMXMCI_INT_MASK_DEFAULT; - writew(host->imask, host->base + MMC_REG_INT_MASK); - - host->dma = imx_dma_request_by_prio(DRIVER_NAME, DMA_PRIO_LOW); - if(host->dma < 0) { - dev_err(mmc_dev(host->mmc), "imx_dma_request_by_prio failed\n"); - ret = -EBUSY; - goto out; - } - host->dma_allocated = 1; - imx_dma_setup_handlers(host->dma, imxmci_dma_irq, NULL, host); - RSSR(host->dma) = DMA_REQ_SDHC; - - tasklet_init(&host->tasklet, imxmci_tasklet_fnc, (unsigned long)host); - host->status_reg=0; - host->pending_events=0; - - ret = request_irq(host->irq, imxmci_irq, 0, DRIVER_NAME, host); - if (ret) - goto out; - - if (host->pdata && host->pdata->card_present) - host->present = host->pdata->card_present(mmc_dev(mmc)); - else /* if there is no way to detect assume that card is present */ - host->present = 1; - - init_timer(&host->timer); - host->timer.data = (unsigned long)host; - host->timer.function = imxmci_check_status; - add_timer(&host->timer); - mod_timer(&host->timer, jiffies + (HZ >> 1)); - - platform_set_drvdata(pdev, mmc); - - mmc_add_host(mmc); - - return 0; - -out: - if (host) { - if (host->dma_allocated) { - imx_dma_free(host->dma); - host->dma_allocated = 0; - } - if (host->clk) { - clk_disable(host->clk); - clk_put(host->clk); - } - if (host->base) - iounmap(host->base); - } - if (mmc) - mmc_free_host(mmc); - release_mem_region(r->start, resource_size(r)); - return ret; -} - -static int __exit imxmci_remove(struct platform_device *pdev) -{ - struct mmc_host *mmc = platform_get_drvdata(pdev); - - platform_set_drvdata(pdev, NULL); - - if (mmc) { - struct imxmci_host *host = mmc_priv(mmc); - - tasklet_disable(&host->tasklet); - - del_timer_sync(&host->timer); - mmc_remove_host(mmc); - - free_irq(host->irq, host); - iounmap(host->base); - if (host->dma_allocated) { - imx_dma_free(host->dma); - host->dma_allocated = 0; - } - - tasklet_kill(&host->tasklet); - - clk_disable(host->clk); - clk_put(host->clk); - - release_mem_region(host->res->start, resource_size(host->res)); - - mmc_free_host(mmc); - } - return 0; -} - -#ifdef CONFIG_PM -static int imxmci_suspend(struct platform_device *dev, pm_message_t state) -{ - struct mmc_host *mmc = platform_get_drvdata(dev); - int ret = 0; - - if (mmc) - ret = mmc_suspend_host(mmc); - - return ret; -} - -static int imxmci_resume(struct platform_device *dev) -{ - struct mmc_host *mmc = platform_get_drvdata(dev); - struct imxmci_host *host; - int ret = 0; - - if (mmc) { - host = mmc_priv(mmc); - if (host) - set_bit(IMXMCI_PEND_SET_INIT_b, &host->pending_events); - ret = mmc_resume_host(mmc); - } - - return ret; -} -#else -#define imxmci_suspend NULL -#define imxmci_resume NULL -#endif /* CONFIG_PM */ - -static struct platform_driver imxmci_driver = { - .remove = __exit_p(imxmci_remove), - .suspend = imxmci_suspend, - .resume = imxmci_resume, - .driver = { - .name = DRIVER_NAME, - .owner = THIS_MODULE, - } -}; - -static int __init imxmci_init(void) -{ - return platform_driver_probe(&imxmci_driver, imxmci_probe); -} - -static void __exit imxmci_exit(void) -{ - platform_driver_unregister(&imxmci_driver); -} - -module_init(imxmci_init); -module_exit(imxmci_exit); - -MODULE_DESCRIPTION("i.MX Multimedia Card Interface Driver"); -MODULE_AUTHOR("Sascha Hauer, Pengutronix"); -MODULE_LICENSE("GPL"); -MODULE_ALIAS("platform:imx-mmc"); diff --git a/drivers/mmc/host/imxmmc.h b/drivers/mmc/host/imxmmc.h deleted file mode 100644 index 09d5d4e..0000000 --- a/drivers/mmc/host/imxmmc.h +++ /dev/null @@ -1,64 +0,0 @@ -#define MMC_REG_STR_STP_CLK 0x00 -#define MMC_REG_STATUS 0x04 -#define MMC_REG_CLK_RATE 0x08 -#define MMC_REG_CMD_DAT_CONT 0x0C -#define MMC_REG_RES_TO 0x10 -#define MMC_REG_READ_TO 0x14 -#define MMC_REG_BLK_LEN 0x18 -#define MMC_REG_NOB 0x1C -#define MMC_REG_REV_NO 0x20 -#define MMC_REG_INT_MASK 0x24 -#define MMC_REG_CMD 0x28 -#define MMC_REG_ARGH 0x2C -#define MMC_REG_ARGL 0x30 -#define MMC_REG_RES_FIFO 0x34 -#define MMC_REG_BUFFER_ACCESS 0x38 - -#define STR_STP_CLK_IPG_CLK_GATE_DIS (1<<15) -#define STR_STP_CLK_IPG_PERCLK_GATE_DIS (1<<14) -#define STR_STP_CLK_ENDIAN (1<<5) -#define STR_STP_CLK_RESET (1<<3) -#define STR_STP_CLK_ENABLE (1<<2) -#define STR_STP_CLK_START_CLK (1<<1) -#define STR_STP_CLK_STOP_CLK (1<<0) -#define STATUS_CARD_PRESENCE (1<<15) -#define STATUS_SDIO_INT_ACTIVE (1<<14) -#define STATUS_END_CMD_RESP (1<<13) -#define STATUS_WRITE_OP_DONE (1<<12) -#define STATUS_DATA_TRANS_DONE (1<<11) -#define STATUS_WR_CRC_ERROR_CODE_MASK (3<<10) -#define STATUS_CARD_BUS_CLK_RUN (1<<8) -#define STATUS_APPL_BUFF_FF (1<<7) -#define STATUS_APPL_BUFF_FE (1<<6) -#define STATUS_RESP_CRC_ERR (1<<5) -#define STATUS_CRC_READ_ERR (1<<3) -#define STATUS_CRC_WRITE_ERR (1<<2) -#define STATUS_TIME_OUT_RESP (1<<1) -#define STATUS_TIME_OUT_READ (1<<0) -#define STATUS_ERR_MASK 0x2f -#define CLK_RATE_PRESCALER(x) ((x) & 0x7) -#define CLK_RATE_CLK_RATE(x) (((x) & 0x7) << 3) -#define CMD_DAT_CONT_CMD_RESP_LONG_OFF (1<<12) -#define CMD_DAT_CONT_STOP_READWAIT (1<<11) -#define CMD_DAT_CONT_START_READWAIT (1<<10) -#define CMD_DAT_CONT_BUS_WIDTH_1 (0<<8) -#define CMD_DAT_CONT_BUS_WIDTH_4 (2<<8) -#define CMD_DAT_CONT_INIT (1<<7) -#define CMD_DAT_CONT_BUSY (1<<6) -#define CMD_DAT_CONT_STREAM_BLOCK (1<<5) -#define CMD_DAT_CONT_WRITE (1<<4) -#define CMD_DAT_CONT_DATA_ENABLE (1<<3) -#define CMD_DAT_CONT_RESPONSE_FORMAT_R1 (1) -#define CMD_DAT_CONT_RESPONSE_FORMAT_R2 (2) -#define CMD_DAT_CONT_RESPONSE_FORMAT_R3 (3) -#define CMD_DAT_CONT_RESPONSE_FORMAT_R4 (4) -#define CMD_DAT_CONT_RESPONSE_FORMAT_R5 (5) -#define CMD_DAT_CONT_RESPONSE_FORMAT_R6 (6) -#define INT_MASK_AUTO_CARD_DETECT (1<<6) -#define INT_MASK_DAT0_EN (1<<5) -#define INT_MASK_SDIO (1<<4) -#define INT_MASK_BUF_READY (1<<3) -#define INT_MASK_END_CMD_RES (1<<2) -#define INT_MASK_WRITE_OP_DONE (1<<1) -#define INT_MASK_DATA_TRAN (1<<0) -#define INT_ALL (0x7f) -- cgit v0.10.2 From bbbc4c4d8c5face097d695f9bf3a39647ba6b7e7 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Mon, 16 Apr 2012 19:16:54 -0400 Subject: mmc: sdio: avoid spurious calls to interrupt handlers Commit 06e8935feb ("optimized SDIO IRQ handling for single irq") introduced some spurious calls to SDIO function interrupt handlers, such as when the SDIO IRQ thread is started, or the safety check performed upon a system resume. Let's add a flag to perform the optimization only when a real interrupt is signaled by the host driver and we know there is no point confirming it. Reported-by: Sujit Reddy Thumma Signed-off-by: Nicolas Pitre Cc: stable Signed-off-by: Chris Ball diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c index 2c7c83f..13d0e95 100644 --- a/drivers/mmc/core/sdio.c +++ b/drivers/mmc/core/sdio.c @@ -947,7 +947,7 @@ static int mmc_sdio_resume(struct mmc_host *host) } if (!err && host->sdio_irqs) - mmc_signal_sdio_irq(host); + wake_up_process(host->sdio_irq_thread); mmc_release_host(host); /* diff --git a/drivers/mmc/core/sdio_irq.c b/drivers/mmc/core/sdio_irq.c index f573e7f..3d8ceb4 100644 --- a/drivers/mmc/core/sdio_irq.c +++ b/drivers/mmc/core/sdio_irq.c @@ -28,18 +28,20 @@ #include "sdio_ops.h" -static int process_sdio_pending_irqs(struct mmc_card *card) +static int process_sdio_pending_irqs(struct mmc_host *host) { + struct mmc_card *card = host->card; int i, ret, count; unsigned char pending; struct sdio_func *func; /* * Optimization, if there is only 1 function interrupt registered - * call irq handler directly + * and we know an IRQ was signaled then call irq handler directly. + * Otherwise do the full probe. */ func = card->sdio_single_irq; - if (func) { + if (func && host->sdio_irq_pending) { func->irq_handler(func); return 1; } @@ -116,7 +118,8 @@ static int sdio_irq_thread(void *_host) ret = __mmc_claim_host(host, &host->sdio_irq_thread_abort); if (ret) break; - ret = process_sdio_pending_irqs(host->card); + ret = process_sdio_pending_irqs(host); + host->sdio_irq_pending = false; mmc_release_host(host); /* diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index cbde4b7..0707d22 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -297,6 +297,7 @@ struct mmc_host { unsigned int sdio_irqs; struct task_struct *sdio_irq_thread; + bool sdio_irq_pending; atomic_t sdio_irq_thread_abort; mmc_pm_flag_t pm_flags; /* requested pm features */ @@ -352,6 +353,7 @@ extern int mmc_cache_ctrl(struct mmc_host *, u8); static inline void mmc_signal_sdio_irq(struct mmc_host *host) { host->ops->enable_sdio_irq(host, 0); + host->sdio_irq_pending = true; wake_up_process(host->sdio_irq_thread); } -- cgit v0.10.2 From ca5879d3ffebd967e94b2dc3b1a3dc089709206f Mon Sep 17 00:00:00 2001 From: Pavan Kunapuli Date: Wed, 18 Apr 2012 18:48:02 +0530 Subject: mmc: tegra: support SDHCI SPEC 300 Tegra3 SDHOST controller doesn't advertise v3.00 support by default. This support has to be enabled by configuring a vendor register in the tegra3 sd host address space. Signed-off-by: Pavan Kunapuli Acked-by: Stephen Warren Signed-off-by: Chris Ball diff --git a/drivers/mmc/host/sdhci-tegra.c b/drivers/mmc/host/sdhci-tegra.c index 53b2650..cff0c52 100644 --- a/drivers/mmc/host/sdhci-tegra.c +++ b/drivers/mmc/host/sdhci-tegra.c @@ -32,8 +32,13 @@ #include "sdhci-pltfm.h" +/* Tegra SDHOST controller vendor register definitions */ +#define SDHCI_TEGRA_VENDOR_MISC_CTRL 0x120 +#define SDHCI_MISC_CTRL_ENABLE_SDHCI_SPEC_300 0x20 + #define NVQUIRK_FORCE_SDHCI_SPEC_200 BIT(0) #define NVQUIRK_ENABLE_BLOCK_GAP_DET BIT(1) +#define NVQUIRK_ENABLE_SDHCI_SPEC_300 BIT(2) struct sdhci_tegra_soc_data { struct sdhci_pltfm_data *pdata; @@ -120,6 +125,25 @@ static irqreturn_t carddetect_irq(int irq, void *data) return IRQ_HANDLED; }; +static void tegra_sdhci_reset_exit(struct sdhci_host *host, u8 mask) +{ + struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); + struct sdhci_tegra *tegra_host = pltfm_host->priv; + const struct sdhci_tegra_soc_data *soc_data = tegra_host->soc_data; + + if (!(mask & SDHCI_RESET_ALL)) + return; + + /* Erratum: Enable SDHCI spec v3.00 support */ + if (soc_data->nvquirks & NVQUIRK_ENABLE_SDHCI_SPEC_300) { + u32 misc_ctrl; + + misc_ctrl = sdhci_readb(host, SDHCI_TEGRA_VENDOR_MISC_CTRL); + misc_ctrl |= SDHCI_MISC_CTRL_ENABLE_SDHCI_SPEC_300; + sdhci_writeb(host, misc_ctrl, SDHCI_TEGRA_VENDOR_MISC_CTRL); + } +} + static int tegra_sdhci_8bit(struct sdhci_host *host, int bus_width) { struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); @@ -148,6 +172,7 @@ static struct sdhci_ops tegra_sdhci_ops = { .read_w = tegra_sdhci_readw, .write_l = tegra_sdhci_writel, .platform_8bit_width = tegra_sdhci_8bit, + .platform_reset_exit = tegra_sdhci_reset_exit, }; #ifdef CONFIG_ARCH_TEGRA_2x_SOC @@ -178,6 +203,7 @@ static struct sdhci_pltfm_data sdhci_tegra30_pdata = { static struct sdhci_tegra_soc_data soc_data_tegra30 = { .pdata = &sdhci_tegra30_pdata, + .nvquirks = NVQUIRK_ENABLE_SDHCI_SPEC_300, }; #endif -- cgit v0.10.2 From 48b332f9916f33ba0001b78e5cea49ef17f3c81e Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 18 Apr 2012 11:11:57 +0100 Subject: mmc: omap_hsmmc: release correct resource res can be one of several resources, as this variable is re-used several times during probe. This can cause the wrong resource parameters to be passed to release_mem_region(). Get the original memory resource before calling release_mem_region(). Signed-off-by: Russell King Acked-by: Tony Lindgren Signed-off-by: Chris Ball diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c index 4254b6f..d15b149 100644 --- a/drivers/mmc/host/omap_hsmmc.c +++ b/drivers/mmc/host/omap_hsmmc.c @@ -2042,7 +2042,9 @@ err1: err_alloc: omap_hsmmc_gpio_free(pdata); err: - release_mem_region(res->start, resource_size(res)); + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (res) + release_mem_region(res->start, resource_size(res)); return ret; } -- cgit v0.10.2 From 0dd1bfeb6cc89467261cc62ca81e0ac5da78ec0a Mon Sep 17 00:00:00 2001 From: Chuanxiao Dong Date: Mon, 23 Apr 2012 15:18:02 +0800 Subject: mmc: core: check PM_SLEEP for mmc_bus_suspend/resume callbacks If PM_SLEEP is not enabled, mmc.c will give warnning since mmc_bus_suspend/ mmc_bus_resume functions are defined but not used. This patch can fix this warnning. Reported-by: Andrew Lunn Signed-off-by: Chuanxiao Dong Signed-off-by: Chris Ball diff --git a/drivers/mmc/core/bus.c b/drivers/mmc/core/bus.c index c60cee9..9b68933 100644 --- a/drivers/mmc/core/bus.c +++ b/drivers/mmc/core/bus.c @@ -122,6 +122,7 @@ static int mmc_bus_remove(struct device *dev) return 0; } +#ifdef CONFIG_PM_SLEEP static int mmc_bus_suspend(struct device *dev) { struct mmc_driver *drv = to_mmc_driver(dev->driver); @@ -143,6 +144,7 @@ static int mmc_bus_resume(struct device *dev) ret = drv->resume(card); return ret; } +#endif #ifdef CONFIG_PM_RUNTIME -- cgit v0.10.2 From d29bf01941795891828bf671f74c3a4f6fc3517f Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Mon, 9 Apr 2012 22:53:21 +0200 Subject: dma/amba-pl08x: check for terminal count status only For some reason I can't figure out we're reading the PL080_INT_STATUS register instead of PL080_TC_STATUS when checking for the terminal count. The PL080_INT_STATUS is a logical OR between the error and terminal count status register and may not report what we want it to, especially if there is an error and a terminal count at the same time and the former is not lowered in time for the check in the TC register. Make sure we read what we're actually interested in. Cc: Russell King Cc: Viresh Kumar Cc: Alim Akhtar Signed-off-by: Linus Walleij Acked-by: Viresh Kumar Signed-off-by: Vinod Koul diff --git a/drivers/dma/amba-pl08x.c b/drivers/dma/amba-pl08x.c index c301a8e..08589c6 100644 --- a/drivers/dma/amba-pl08x.c +++ b/drivers/dma/amba-pl08x.c @@ -1615,7 +1615,7 @@ static irqreturn_t pl08x_irq(int irq, void *dev) __func__, err); writel(err, pl08x->base + PL080_ERR_CLEAR); } - tc = readl(pl08x->base + PL080_INT_STATUS); + tc = readl(pl08x->base + PL080_TC_STATUS); if (tc) writel(tc, pl08x->base + PL080_TC_CLEAR); -- cgit v0.10.2 From 07975ad3b30579ca27d880491ad992326b930c63 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Thu, 29 Mar 2012 21:14:12 +0200 Subject: KVM: Introduce direct MSI message injection for in-kernel irqchips Currently, MSI messages can only be injected to in-kernel irqchips by defining a corresponding IRQ route for each message. This is not only unhandy if the MSI messages are generated "on the fly" by user space, IRQ routes are a limited resource that user space has to manage carefully. By providing a direct injection path, we can both avoid using up limited resources and simplify the necessary steps for user land. Signed-off-by: Jan Kiszka Signed-off-by: Avi Kivity diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 81ff39f..a155221 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -1689,6 +1689,27 @@ where the guest will clear the flag: when the soft lockup watchdog timer resets itself or when a soft lockup is detected. This ioctl can be called any time after pausing the vcpu, but before it is resumed. +4.71 KVM_SIGNAL_MSI + +Capability: KVM_CAP_SIGNAL_MSI +Architectures: x86 +Type: vm ioctl +Parameters: struct kvm_msi (in) +Returns: >0 on delivery, 0 if guest blocked the MSI, and -1 on error + +Directly inject a MSI message. Only valid with in-kernel irqchip that handles +MSI messages. + +struct kvm_msi { + __u32 address_lo; + __u32 address_hi; + __u32 data; + __u32 flags; + __u8 pad[16]; +}; + +No flags are defined so far. The corresponding field must be 0. + 5. The kvm_run structure Application code obtains a pointer to the kvm_run structure by diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index 1a7fe86..a28f338 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -36,6 +36,7 @@ config KVM select TASKSTATS select TASK_DELAY_ACCT select PERF_EVENTS + select HAVE_KVM_MSI ---help--- Support hosting fully virtualized guest machines using hardware virtualization extensions. You will need a fairly recent diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 7a9dd4b..225b452 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -590,6 +590,7 @@ struct kvm_ppc_pvinfo { #define KVM_CAP_SYNC_REGS 74 #define KVM_CAP_PCI_2_3 75 #define KVM_CAP_KVMCLOCK_CTRL 76 +#define KVM_CAP_SIGNAL_MSI 77 #ifdef KVM_CAP_IRQ_ROUTING @@ -715,6 +716,14 @@ struct kvm_one_reg { __u64 addr; }; +struct kvm_msi { + __u32 address_lo; + __u32 address_hi; + __u32 data; + __u32 flags; + __u8 pad[16]; +}; + /* * ioctls for VM fds */ @@ -789,6 +798,8 @@ struct kvm_s390_ucas_mapping { /* Available with KVM_CAP_PCI_2_3 */ #define KVM_ASSIGN_SET_INTX_MASK _IOW(KVMIO, 0xa4, \ struct kvm_assigned_pci_dev) +/* Available with KVM_CAP_SIGNAL_MSI */ +#define KVM_SIGNAL_MSI _IOW(KVMIO, 0xa5, struct kvm_msi) /* * ioctls for vcpu fds diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 186ffab..6f34330 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -802,6 +802,8 @@ int kvm_set_irq_routing(struct kvm *kvm, unsigned flags); void kvm_free_irq_routing(struct kvm *kvm); +int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi); + #else static inline void kvm_free_irq_routing(struct kvm *kvm) {} diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig index f63ccb0..28694f4 100644 --- a/virt/kvm/Kconfig +++ b/virt/kvm/Kconfig @@ -18,3 +18,6 @@ config KVM_MMIO config KVM_ASYNC_PF bool + +config HAVE_KVM_MSI + bool diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index 9f614b4..a6a0365 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c @@ -138,6 +138,20 @@ int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, return kvm_irq_delivery_to_apic(kvm, NULL, &irq); } +int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi) +{ + struct kvm_kernel_irq_routing_entry route; + + if (!irqchip_in_kernel(kvm) || msi->flags != 0) + return -EINVAL; + + route.msi.address_lo = msi->address_lo; + route.msi.address_hi = msi->address_hi; + route.msi.data = msi->data; + + return kvm_set_msi(&route, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1); +} + /* * Return value: * < 0 Interrupt was ignored (masked or not delivered for other reasons) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 9eb7936..1847c76 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2060,6 +2060,17 @@ static long kvm_vm_ioctl(struct file *filp, mutex_unlock(&kvm->lock); break; #endif +#ifdef CONFIG_HAVE_KVM_MSI + case KVM_SIGNAL_MSI: { + struct kvm_msi msi; + + r = -EFAULT; + if (copy_from_user(&msi, argp, sizeof msi)) + goto out; + r = kvm_send_userspace_msi(kvm, &msi); + break; + } +#endif default: r = kvm_arch_vm_ioctl(filp, ioctl, arg); if (r == -ENOTTY) @@ -2188,6 +2199,9 @@ static long kvm_dev_ioctl_check_extension_generic(long arg) case KVM_CAP_SET_BOOT_CPU_ID: #endif case KVM_CAP_INTERNAL_ERROR_DATA: +#ifdef CONFIG_HAVE_KVM_MSI + case KVM_CAP_SIGNAL_MSI: +#endif return 1; #ifdef CONFIG_HAVE_KVM_IRQCHIP case KVM_CAP_IRQ_ROUTING: -- cgit v0.10.2 From 413837714232b3a4c0705e915d8af75ad521d083 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Thu, 19 Apr 2012 14:06:29 +0300 Subject: KVM: Introduce bitmask for apic attention reasons The patch introduces a bitmap that will hold reasons apic should be checked during vmexit. This is in a preparation for vp eoi patch that will add one more check on vmexit. With the bitmap we can do if(apic_attention) to check everything simultaneously which will add zero overhead on the fast path. Signed-off-by: Gleb Natapov Signed-off-by: Avi Kivity diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index f624ca7..69e39bc 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -172,6 +172,9 @@ enum { #define DR7_FIXED_1 0x00000400 #define DR7_VOLATILE 0xffff23ff +/* apic attention bits */ +#define KVM_APIC_CHECK_VAPIC 0 + /* * We don't want allocation failures within the mmu code, so we preallocate * enough memory for a single page fault in a cache. @@ -337,6 +340,7 @@ struct kvm_vcpu_arch { u64 efer; u64 apic_base; struct kvm_lapic *apic; /* kernel irqchip context */ + unsigned long apic_attention; int32_t apic_arb_prio; int mp_state; int sipi_vector; diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 992b4ea..93c1574 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1088,6 +1088,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu) apic_update_ppr(apic); vcpu->arch.apic_arb_prio = 0; + vcpu->arch.apic_attention = 0; apic_debug(KERN_INFO "%s: vcpu=%p, id=%d, base_msr=" "0x%016" PRIx64 ", base_address=0x%0lx.\n", __func__, @@ -1287,7 +1288,7 @@ void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu) u32 data; void *vapic; - if (!irqchip_in_kernel(vcpu->kvm) || !vcpu->arch.apic->vapic_addr) + if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention)) return; vapic = kmap_atomic(vcpu->arch.apic->vapic_page); @@ -1304,7 +1305,7 @@ void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu) struct kvm_lapic *apic; void *vapic; - if (!irqchip_in_kernel(vcpu->kvm) || !vcpu->arch.apic->vapic_addr) + if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention)) return; apic = vcpu->arch.apic; @@ -1324,10 +1325,11 @@ void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu) void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr) { - if (!irqchip_in_kernel(vcpu->kvm)) - return; - vcpu->arch.apic->vapic_addr = vapic_addr; + if (vapic_addr) + __set_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention); + else + __clear_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention); } int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data) -- cgit v0.10.2 From 38e8a2ddc9ada5dd1f2def95bebb733bf619bbef Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 22 Apr 2012 15:12:50 +0300 Subject: KVM: x86 emulator: fix asm constraint in flush_pending_x87_faults 'bool' wants 8-bit registers. Reported-by: Takuya Yoshikawa Signed-off-by: Avi Kivity diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index d5729a9..0d151e2 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -4123,7 +4123,7 @@ static int flush_pending_x87_faults(struct x86_emulate_ctxt *ctxt) "jmp 2b \n\t" ".popsection \n\t" _ASM_EXTABLE(1b, 3b) - : [fault]"+rm"(fault)); + : [fault]"+qm"(fault)); ctxt->ops->put_fpu(ctxt); if (unlikely(fault)) -- cgit v0.10.2 From 0e9f480bb553d39ee06ccd45639ba7a5446a7b81 Mon Sep 17 00:00:00 2001 From: Guennadi Liakhovetski Date: Tue, 24 Apr 2012 17:56:29 +0200 Subject: mmc: cd-gpio: protect against NULL context in mmc_cd_gpio_free() Do not oops, even if mmc_cd_gpio_free() is mistakenly called on a driver cleanup path, even though a previous call to mmc_cd_gpio_request() failed. Signed-off-by: Guennadi Liakhovetski [stable@: please apply to 3.3-stable] Cc: stable Signed-off-by: Chris Ball diff --git a/drivers/mmc/core/cd-gpio.c b/drivers/mmc/core/cd-gpio.c index 2c14be7..f13e38d 100644 --- a/drivers/mmc/core/cd-gpio.c +++ b/drivers/mmc/core/cd-gpio.c @@ -73,6 +73,9 @@ void mmc_cd_gpio_free(struct mmc_host *host) { struct mmc_cd_gpio *cd = host->hotplug.handler_priv; + if (!cd) + return; + free_irq(host->hotplug.irq, host); gpio_free(cd->gpio); kfree(cd); -- cgit v0.10.2 From 5654dc94f872f823aa13941a8fdba69a3feca39c Mon Sep 17 00:00:00 2001 From: Mike Turquette Date: Mon, 26 Mar 2012 11:51:34 -0700 Subject: clk: core: correct clk_set_rate kerneldoc Remove old and misleading documentation from the previous clk_set_rate implementaion. Reported-by: Shawn Guo Signed-off-by: Mike Turquette diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index 9cf6f59e..3ed36d3 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -859,38 +859,19 @@ static void clk_change_rate(struct clk *clk) * @clk: the clk whose rate is being changed * @rate: the new rate for clk * - * In the simplest case clk_set_rate will only change the rate of clk. + * In the simplest case clk_set_rate will only adjust the rate of clk. * - * If clk has the CLK_SET_RATE_GATE flag set and it is enabled this call - * will fail; only when the clk is disabled will it be able to change - * its rate. + * Setting the CLK_SET_RATE_PARENT flag allows the rate change operation to + * propagate up to clk's parent; whether or not this happens depends on the + * outcome of clk's .round_rate implementation. If *parent_rate is unchanged + * after calling .round_rate then upstream parent propagation is ignored. If + * *parent_rate comes back with a new rate for clk's parent then we propagate + * up to clk's parent and set it's rate. Upward propagation will continue + * until either a clk does not support the CLK_SET_RATE_PARENT flag or + * .round_rate stops requesting changes to clk's parent_rate. * - * Setting the CLK_SET_RATE_PARENT flag allows clk_set_rate to - * recursively propagate up to clk's parent; whether or not this happens - * depends on the outcome of clk's .round_rate implementation. If - * *parent_rate is 0 after calling .round_rate then upstream parent - * propagation is ignored. If *parent_rate comes back with a new rate - * for clk's parent then we propagate up to clk's parent and set it's - * rate. Upward propagation will continue until either a clk does not - * support the CLK_SET_RATE_PARENT flag or .round_rate stops requesting - * changes to clk's parent_rate. If there is a failure during upstream - * propagation then clk_set_rate will unwind and restore each clk's rate - * that had been successfully changed. Afterwards a rate change abort - * notification will be propagated downstream, starting from the clk - * that failed. - * - * At the end of all of the rate setting, clk_set_rate internally calls - * __clk_recalc_rates and propagates the rate changes downstream, - * starting from the highest clk whose rate was changed. This has the - * added benefit of propagating post-rate change notifiers. - * - * Note that while post-rate change and rate change abort notifications - * are guaranteed to be sent to a clk only once per call to - * clk_set_rate, pre-change notifications will be sent for every clk - * whose rate is changed. Stacking pre-change notifications is noisy - * for the drivers subscribed to them, but this allows drivers to react - * to intermediate clk rate changes up until the point where the final - * rate is achieved at the end of upstream propagation. + * Rate changes are accomplished via tree traversal that also recalculates the + * rates for the clocks and fires off POST_RATE_CHANGE notifiers. * * Returns 0 on success, -EERROR otherwise. */ -- cgit v0.10.2 From 70d347e6cd0d2a7ecc023b44ef721bc2c2a38f22 Mon Sep 17 00:00:00 2001 From: Mike Turquette Date: Mon, 26 Mar 2012 11:53:47 -0700 Subject: clk: core: remove dead code paths Some static inline dummy functions were left over from before the clock core was consolidated from several C files down to one. Remove them. Reported-by: Shawn Guo Signed-off-by: Mike Turquette diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index 3ed36d3..4daacf5 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -194,7 +194,7 @@ static int __init clk_debug_init(void) late_initcall(clk_debug_init); #else static inline int clk_debug_register(struct clk *clk) { return 0; } -#endif /* CONFIG_COMMON_CLK_DEBUG */ +#endif #ifdef CONFIG_COMMON_CLK_DISABLE_UNUSED /* caller must hold prepare_lock */ @@ -246,9 +246,7 @@ static int clk_disable_unused(void) return 0; } late_initcall(clk_disable_unused); -#else -static inline int clk_disable_unused(struct clk *clk) { return 0; } -#endif /* CONFIG_COMMON_CLK_DISABLE_UNUSED */ +#endif /*** helper functions ***/ -- cgit v0.10.2 From 7452b2191cd55fb3fd6ad65344466ddcdbe4676e Mon Sep 17 00:00:00 2001 From: Mike Turquette Date: Mon, 26 Mar 2012 14:45:36 -0700 Subject: clk: core: clk_calc_new_rates handles NULL parents It is possible to call clk_set_rate on a clock with a NULL parent. One such example is an adjustable-rate root clock. Ensure that clk_calc_new_rates does not dereference parent without checking first and also handle the corner cases gracefully. Reported-by: Rajendra Nayak Signed-off-by: Mike Turquette diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index 4daacf5..d83a9e0 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -763,25 +763,38 @@ static void clk_calc_subtree(struct clk *clk, unsigned long new_rate) static struct clk *clk_calc_new_rates(struct clk *clk, unsigned long rate) { struct clk *top = clk; - unsigned long best_parent_rate = clk->parent->rate; + unsigned long best_parent_rate; unsigned long new_rate; - if (!clk->ops->round_rate && !(clk->flags & CLK_SET_RATE_PARENT)) { - clk->new_rate = clk->rate; + /* sanity */ + if (IS_ERR_OR_NULL(clk)) + return NULL; + + /* never propagate up to the parent */ + if (!(clk->flags & CLK_SET_RATE_PARENT)) { + if (!clk->ops->round_rate) { + clk->new_rate = clk->rate; + return NULL; + } else { + new_rate = clk->ops->round_rate(clk->hw, rate, NULL); + goto out; + } + } + + /* need clk->parent from here on out */ + if (!clk->parent) { + pr_debug("%s: %s has NULL parent\n", __func__, clk->name); return NULL; } - if (!clk->ops->round_rate && (clk->flags & CLK_SET_RATE_PARENT)) { + if (!clk->ops->round_rate) { top = clk_calc_new_rates(clk->parent, rate); new_rate = clk->new_rate = clk->parent->new_rate; goto out; } - if (clk->flags & CLK_SET_RATE_PARENT) - new_rate = clk->ops->round_rate(clk->hw, rate, &best_parent_rate); - else - new_rate = clk->ops->round_rate(clk->hw, rate, NULL); + new_rate = clk->ops->round_rate(clk->hw, rate, &best_parent_rate); if (best_parent_rate != clk->parent->rate) { top = clk_calc_new_rates(clk->parent, best_parent_rate); -- cgit v0.10.2 From d4d7e3ddc76c5ae3b4fbd15cb6f30aa78c28d788 Mon Sep 17 00:00:00 2001 From: Mike Turquette Date: Mon, 26 Mar 2012 16:15:52 -0700 Subject: clk: core: enforce clk_ops consistency Documentation/clk.txt has some handsome ASCII art outlining which clk_ops are mandatory for a given clock, given the capability of the hardware. Enforce those mandates with sanity checks in __clk_init. Signed-off-by: Mike Turquette diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index d83a9e0..9924aec 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -1202,6 +1202,20 @@ void __clk_init(struct device *dev, struct clk *clk) if (__clk_lookup(clk->name)) goto out; + /* check that clk_ops are sane. See Documentation/clk.txt */ + if (clk->ops->set_rate && + !(clk->ops->round_rate && clk->ops->recalc_rate)) { + pr_warning("%s: %s must implement .round_rate & .recalc_rate\n", + __func__, clk->name); + goto out; + } + + if (clk->ops->set_parent && !clk->ops->get_parent) { + pr_warning("%s: %s must implement .get_parent & .set_parent\n", + __func__, clk->name); + goto out; + } + /* throw a WARN if any entries in parent_names are NULL */ for (i = 0; i < clk->num_parents; i++) WARN(!clk->parent_names[i], -- cgit v0.10.2 From 10363b5838b4d5dcbf01db219f35e91aa94f24c6 Mon Sep 17 00:00:00 2001 From: Shawn Guo Date: Tue, 27 Mar 2012 15:23:20 +0800 Subject: clk: use kzalloc in clk_register_mux Change clk_register_mux to use kzalloc, just like what all other basic clk registration functions do. Signed-off-by: Shawn Guo Signed-off-by: Mike Turquette diff --git a/drivers/clk/clk-mux.c b/drivers/clk/clk-mux.c index c71ad1f..50e0595 100644 --- a/drivers/clk/clk-mux.c +++ b/drivers/clk/clk-mux.c @@ -97,7 +97,7 @@ struct clk *clk_register_mux(struct device *dev, const char *name, { struct clk_mux *mux; - mux = kmalloc(sizeof(struct clk_mux), GFP_KERNEL); + mux = kzalloc(sizeof(struct clk_mux), GFP_KERNEL); if (!mux) { pr_err("%s: could not allocate mux clk\n", __func__); -- cgit v0.10.2 From c0d2530c03cbf3741cb7a0f8ebae93e7a563fc58 Mon Sep 17 00:00:00 2001 From: Shawn Guo Date: Tue, 27 Mar 2012 15:23:21 +0800 Subject: clk: remove unnecessary EXPORT_SYMBOL_GPL It makes no sense to have EXPORT_SYMBOL_GPL on static functions. Signed-off-by: Shawn Guo Signed-off-by: Mike Turquette diff --git a/drivers/clk/clk-divider.c b/drivers/clk/clk-divider.c index d5ac6a7..231cd6e 100644 --- a/drivers/clk/clk-divider.c +++ b/drivers/clk/clk-divider.c @@ -45,7 +45,6 @@ static unsigned long clk_divider_recalc_rate(struct clk_hw *hw, return parent_rate / div; } -EXPORT_SYMBOL_GPL(clk_divider_recalc_rate); /* * The reverse of DIV_ROUND_UP: The maximum number which @@ -117,7 +116,6 @@ static long clk_divider_round_rate(struct clk_hw *hw, unsigned long rate, return r / div; } } -EXPORT_SYMBOL_GPL(clk_divider_round_rate); static int clk_divider_set_rate(struct clk_hw *hw, unsigned long rate) { @@ -147,7 +145,6 @@ static int clk_divider_set_rate(struct clk_hw *hw, unsigned long rate) return 0; } -EXPORT_SYMBOL_GPL(clk_divider_set_rate); struct clk_ops clk_divider_ops = { .recalc_rate = clk_divider_recalc_rate, diff --git a/drivers/clk/clk-fixed-rate.c b/drivers/clk/clk-fixed-rate.c index 90c79fb..651b06f 100644 --- a/drivers/clk/clk-fixed-rate.c +++ b/drivers/clk/clk-fixed-rate.c @@ -32,7 +32,6 @@ static unsigned long clk_fixed_rate_recalc_rate(struct clk_hw *hw, { return to_clk_fixed_rate(hw)->fixed_rate; } -EXPORT_SYMBOL_GPL(clk_fixed_rate_recalc_rate); struct clk_ops clk_fixed_rate_ops = { .recalc_rate = clk_fixed_rate_recalc_rate, diff --git a/drivers/clk/clk-gate.c b/drivers/clk/clk-gate.c index b5902e2..b688f47 100644 --- a/drivers/clk/clk-gate.c +++ b/drivers/clk/clk-gate.c @@ -71,7 +71,6 @@ static int clk_gate_enable(struct clk_hw *hw) return 0; } -EXPORT_SYMBOL_GPL(clk_gate_enable); static void clk_gate_disable(struct clk_hw *hw) { @@ -82,7 +81,6 @@ static void clk_gate_disable(struct clk_hw *hw) else clk_gate_clear_bit(gate); } -EXPORT_SYMBOL_GPL(clk_gate_disable); static int clk_gate_is_enabled(struct clk_hw *hw) { @@ -99,7 +97,6 @@ static int clk_gate_is_enabled(struct clk_hw *hw) return reg ? 1 : 0; } -EXPORT_SYMBOL_GPL(clk_gate_is_enabled); struct clk_ops clk_gate_ops = { .enable = clk_gate_enable, diff --git a/drivers/clk/clk-mux.c b/drivers/clk/clk-mux.c index 50e0595..45cad61 100644 --- a/drivers/clk/clk-mux.c +++ b/drivers/clk/clk-mux.c @@ -55,7 +55,6 @@ static u8 clk_mux_get_parent(struct clk_hw *hw) return val; } -EXPORT_SYMBOL_GPL(clk_mux_get_parent); static int clk_mux_set_parent(struct clk_hw *hw, u8 index) { @@ -82,7 +81,6 @@ static int clk_mux_set_parent(struct clk_hw *hw, u8 index) return 0; } -EXPORT_SYMBOL_GPL(clk_mux_set_parent); struct clk_ops clk_mux_ops = { .get_parent = clk_mux_get_parent, -- cgit v0.10.2 From 822c250e154cd44cf60a4f0d647aa70abea09520 Mon Sep 17 00:00:00 2001 From: Shawn Guo Date: Tue, 27 Mar 2012 15:23:22 +0800 Subject: clk: add "const" for clk_ops of basic clks The clk_ops of basic clks should have "const" to match the definition in "struct clk" and clk_register prototype. Signed-off-by: Shawn Guo Signed-off-by: Mike Turquette diff --git a/drivers/clk/clk-divider.c b/drivers/clk/clk-divider.c index 231cd6e..b1c4b02 100644 --- a/drivers/clk/clk-divider.c +++ b/drivers/clk/clk-divider.c @@ -146,7 +146,7 @@ static int clk_divider_set_rate(struct clk_hw *hw, unsigned long rate) return 0; } -struct clk_ops clk_divider_ops = { +const struct clk_ops clk_divider_ops = { .recalc_rate = clk_divider_recalc_rate, .round_rate = clk_divider_round_rate, .set_rate = clk_divider_set_rate, diff --git a/drivers/clk/clk-fixed-rate.c b/drivers/clk/clk-fixed-rate.c index 651b06f..027e477 100644 --- a/drivers/clk/clk-fixed-rate.c +++ b/drivers/clk/clk-fixed-rate.c @@ -33,7 +33,7 @@ static unsigned long clk_fixed_rate_recalc_rate(struct clk_hw *hw, return to_clk_fixed_rate(hw)->fixed_rate; } -struct clk_ops clk_fixed_rate_ops = { +const struct clk_ops clk_fixed_rate_ops = { .recalc_rate = clk_fixed_rate_recalc_rate, }; EXPORT_SYMBOL_GPL(clk_fixed_rate_ops); diff --git a/drivers/clk/clk-gate.c b/drivers/clk/clk-gate.c index b688f47..fe2ff9e 100644 --- a/drivers/clk/clk-gate.c +++ b/drivers/clk/clk-gate.c @@ -98,7 +98,7 @@ static int clk_gate_is_enabled(struct clk_hw *hw) return reg ? 1 : 0; } -struct clk_ops clk_gate_ops = { +const struct clk_ops clk_gate_ops = { .enable = clk_gate_enable, .disable = clk_gate_disable, .is_enabled = clk_gate_is_enabled, diff --git a/drivers/clk/clk-mux.c b/drivers/clk/clk-mux.c index 45cad61..5424488 100644 --- a/drivers/clk/clk-mux.c +++ b/drivers/clk/clk-mux.c @@ -82,7 +82,7 @@ static int clk_mux_set_parent(struct clk_hw *hw, u8 index) return 0; } -struct clk_ops clk_mux_ops = { +const struct clk_ops clk_mux_ops = { .get_parent = clk_mux_get_parent, .set_parent = clk_mux_set_parent, }; diff --git a/include/linux/clk-private.h b/include/linux/clk-private.h index 5e4312b..5f4ccd7 100644 --- a/include/linux/clk-private.h +++ b/include/linux/clk-private.h @@ -55,7 +55,7 @@ struct clk { * alternative macro for static initialization */ -extern struct clk_ops clk_fixed_rate_ops; +extern const struct clk_ops clk_fixed_rate_ops; #define DEFINE_CLK_FIXED_RATE(_name, _flags, _rate, \ _fixed_rate_flags) \ @@ -78,7 +78,7 @@ extern struct clk_ops clk_fixed_rate_ops; .flags = _flags, \ }; -extern struct clk_ops clk_gate_ops; +extern const struct clk_ops clk_gate_ops; #define DEFINE_CLK_GATE(_name, _parent_name, _parent_ptr, \ _flags, _reg, _bit_idx, \ @@ -110,7 +110,7 @@ extern struct clk_ops clk_gate_ops; .flags = _flags, \ }; -extern struct clk_ops clk_divider_ops; +extern const struct clk_ops clk_divider_ops; #define DEFINE_CLK_DIVIDER(_name, _parent_name, _parent_ptr, \ _flags, _reg, _shift, _width, \ @@ -143,7 +143,7 @@ extern struct clk_ops clk_divider_ops; .flags = _flags, \ }; -extern struct clk_ops clk_mux_ops; +extern const struct clk_ops clk_mux_ops; #define DEFINE_CLK_MUX(_name, _parent_names, _parents, _flags, \ _reg, _shift, _width, \ -- cgit v0.10.2 From bffad66e31fe9d94cd096f2e4de7c683e1ae32ef Mon Sep 17 00:00:00 2001 From: Shawn Guo Date: Tue, 27 Mar 2012 15:23:23 +0800 Subject: clk: declare clk_ops of basic clks in clk-provider.h Besides the static initialization, the clk_ops of basic clks could also be used by particular clk type being subclass of the basic clks. For example, clk_busy_divider has the same clk_ops as clk_divider, except it has to wait for a busy bit before return success with .set_rate. clk_busy_divider will somehow reuse clk_ops of clk_divider. Since clk-provider.h is included by clk-private.h, it's safe to move those clk_ops declaration of basic clks form clk-private.h into clk-provider.h, so that implementation of clks like clk_busy_divider above do not need to include clk-private.h to access those clk_ops. Signed-off-by: Shawn Guo Signed-off-by: Mike Turquette diff --git a/include/linux/clk-private.h b/include/linux/clk-private.h index 5f4ccd7..f19fee0 100644 --- a/include/linux/clk-private.h +++ b/include/linux/clk-private.h @@ -55,8 +55,6 @@ struct clk { * alternative macro for static initialization */ -extern const struct clk_ops clk_fixed_rate_ops; - #define DEFINE_CLK_FIXED_RATE(_name, _flags, _rate, \ _fixed_rate_flags) \ static struct clk _name; \ @@ -78,8 +76,6 @@ extern const struct clk_ops clk_fixed_rate_ops; .flags = _flags, \ }; -extern const struct clk_ops clk_gate_ops; - #define DEFINE_CLK_GATE(_name, _parent_name, _parent_ptr, \ _flags, _reg, _bit_idx, \ _gate_flags, _lock) \ @@ -110,8 +106,6 @@ extern const struct clk_ops clk_gate_ops; .flags = _flags, \ }; -extern const struct clk_ops clk_divider_ops; - #define DEFINE_CLK_DIVIDER(_name, _parent_name, _parent_ptr, \ _flags, _reg, _shift, _width, \ _divider_flags, _lock) \ @@ -143,8 +137,6 @@ extern const struct clk_ops clk_divider_ops; .flags = _flags, \ }; -extern const struct clk_ops clk_mux_ops; - #define DEFINE_CLK_MUX(_name, _parent_names, _parents, _flags, \ _reg, _shift, _width, \ _mux_flags, _lock) \ diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index 5508897..6eb8e5d 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -149,6 +149,7 @@ struct clk_fixed_rate { u8 flags; }; +extern const struct clk_ops clk_fixed_rate_ops; struct clk *clk_register_fixed_rate(struct device *dev, const char *name, const char *parent_name, unsigned long flags, unsigned long fixed_rate); @@ -180,6 +181,7 @@ struct clk_gate { #define CLK_GATE_SET_TO_DISABLE BIT(0) +extern const struct clk_ops clk_gate_ops; struct clk *clk_register_gate(struct device *dev, const char *name, const char *parent_name, unsigned long flags, void __iomem *reg, u8 bit_idx, @@ -218,6 +220,7 @@ struct clk_divider { #define CLK_DIVIDER_ONE_BASED BIT(0) #define CLK_DIVIDER_POWER_OF_TWO BIT(1) +extern const struct clk_ops clk_divider_ops; struct clk *clk_register_divider(struct device *dev, const char *name, const char *parent_name, unsigned long flags, void __iomem *reg, u8 shift, u8 width, @@ -252,6 +255,7 @@ struct clk_mux { #define CLK_MUX_INDEX_ONE BIT(0) #define CLK_MUX_INDEX_BIT BIT(1) +extern const struct clk_ops clk_mux_ops; struct clk *clk_register_mux(struct device *dev, const char *name, char **parent_names, u8 num_parents, unsigned long flags, void __iomem *reg, u8 shift, u8 width, -- cgit v0.10.2 From 34e44fe87437b6a5aad856f15f7a849e5fc137aa Mon Sep 17 00:00:00 2001 From: Rajendra Nayak Date: Mon, 26 Mar 2012 19:01:48 +0530 Subject: clk: Make clk_get_rate() return 0 on error Most users of clk_get_rate() actually assume a non zero return value as a valid rate returned. Returing -EINVAL might confuse such users, so make it instead return zero on error. Besides the return value of clk_get_rate seems to be 'unsigned long'. Signed-off-by: Rajendra nayak Signed-off-by: Mike Turquette diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index 9924aec..a24b121 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -285,7 +285,7 @@ unsigned long __clk_get_rate(struct clk *clk) unsigned long ret; if (!clk) { - ret = -EINVAL; + ret = 0; goto out; } @@ -295,7 +295,7 @@ unsigned long __clk_get_rate(struct clk *clk) goto out; if (!clk->parent) - ret = -ENODEV; + ret = 0; out: return ret; @@ -560,7 +560,7 @@ EXPORT_SYMBOL_GPL(clk_enable); * @clk: the clk whose rate is being returned * * Simply returns the cached rate of the clk. Does not query the hardware. If - * clk is NULL then returns -EINVAL. + * clk is NULL then returns 0. */ unsigned long clk_get_rate(struct clk *clk) { -- cgit v0.10.2 From 7e87aed965fa7a642fc299af96d370dad7b5b814 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Sun, 1 Apr 2012 15:31:23 +0100 Subject: clk: Remove comment for end of CONFIG_COMMON_CLK section The comment is inaccurate (it actually ends the CONFIG_COMMON_CLK section, there's no else) and given that we've just got a single level of ifdef isn't really needed anyway. Signed-off-by: Mark Brown Signed-off-by: Mike Turquette diff --git a/include/linux/clk.h b/include/linux/clk.h index b025272..c9547d9 100644 --- a/include/linux/clk.h +++ b/include/linux/clk.h @@ -81,7 +81,7 @@ int clk_notifier_register(struct clk *clk, struct notifier_block *nb); int clk_notifier_unregister(struct clk *clk, struct notifier_block *nb); -#endif /* !CONFIG_COMMON_CLK */ +#endif /** * clk_get - lookup and obtain a reference to a clock producer. -- cgit v0.10.2 From d305fb78f31209596c9135d396a0d3af7ac86947 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 21 Mar 2012 20:01:20 +0000 Subject: clk: Constify parent name arrays Drivers should be able to declare their arrays of parent names as const so the APIs need to accept const arguments. Signed-off-by: Mark Brown [mturquette@linaro.org: constified gate] Signed-off-by: Mike Turquette diff --git a/drivers/clk/clk-mux.c b/drivers/clk/clk-mux.c index 5424488..bd5e598 100644 --- a/drivers/clk/clk-mux.c +++ b/drivers/clk/clk-mux.c @@ -89,7 +89,7 @@ const struct clk_ops clk_mux_ops = { EXPORT_SYMBOL_GPL(clk_mux_ops); struct clk *clk_register_mux(struct device *dev, const char *name, - char **parent_names, u8 num_parents, unsigned long flags, + const char **parent_names, u8 num_parents, unsigned long flags, void __iomem *reg, u8 shift, u8 width, u8 clk_mux_flags, spinlock_t *lock) { diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index a24b121..ddade87 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -1328,7 +1328,7 @@ out: */ struct clk *clk_register(struct device *dev, const char *name, const struct clk_ops *ops, struct clk_hw *hw, - char **parent_names, u8 num_parents, unsigned long flags) + const char **parent_names, u8 num_parents, unsigned long flags) { struct clk *clk; diff --git a/include/linux/clk-private.h b/include/linux/clk-private.h index f19fee0..e9c8b98 100644 --- a/include/linux/clk-private.h +++ b/include/linux/clk-private.h @@ -30,7 +30,7 @@ struct clk { const struct clk_ops *ops; struct clk_hw *hw; struct clk *parent; - char **parent_names; + const char **parent_names; struct clk **parents; u8 num_parents; unsigned long rate; diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index 6eb8e5d..8981435 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -176,7 +176,7 @@ struct clk_gate { u8 bit_idx; u8 flags; spinlock_t *lock; - char *parent[1]; + const char *parent[1]; }; #define CLK_GATE_SET_TO_DISABLE BIT(0) @@ -214,7 +214,7 @@ struct clk_divider { u8 width; u8 flags; spinlock_t *lock; - char *parent[1]; + const char *parent[1]; }; #define CLK_DIVIDER_ONE_BASED BIT(0) @@ -257,7 +257,7 @@ struct clk_mux { extern const struct clk_ops clk_mux_ops; struct clk *clk_register_mux(struct device *dev, const char *name, - char **parent_names, u8 num_parents, unsigned long flags, + const char **parent_names, u8 num_parents, unsigned long flags, void __iomem *reg, u8 shift, u8 width, u8 clk_mux_flags, spinlock_t *lock); @@ -278,7 +278,7 @@ struct clk *clk_register_mux(struct device *dev, const char *name, */ struct clk *clk_register(struct device *dev, const char *name, const struct clk_ops *ops, struct clk_hw *hw, - char **parent_names, u8 num_parents, unsigned long flags); + const char **parent_names, u8 num_parents, unsigned long flags); /* helper functions */ const char *__clk_get_name(struct clk *clk); -- cgit v0.10.2 From d1302a36a7f1c33d1a8babc6a510e1401a5e5aed Mon Sep 17 00:00:00 2001 From: Mike Turquette Date: Thu, 29 Mar 2012 14:30:40 -0700 Subject: clk: core: copy parent_names & return error codes This patch cleans up clk_register and solves a few bugs by teaching clk_register and __clk_init to return error codes (instead of just NULL) to better align with the existing clk.h api. Along with that change this patch also introduces a new behavior whereby clk_register copies the parent_names array, thus allowing platforms to declare their parent_names arrays as __initdata. Signed-off-by: Mike Turquette diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index ddade87..8f7c384 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -1185,34 +1185,41 @@ EXPORT_SYMBOL_GPL(clk_set_parent); * very large numbers of clocks that need to be statically initialized. It is * a layering violation to include clk-private.h from any code which implements * a clock's .ops; as such any statically initialized clock data MUST be in a - * separate C file from the logic that implements it's operations. + * separate C file from the logic that implements it's operations. Returns 0 + * on success, otherwise an error code. */ -void __clk_init(struct device *dev, struct clk *clk) +int __clk_init(struct device *dev, struct clk *clk) { - int i; + int i, ret = 0; struct clk *orphan; struct hlist_node *tmp, *tmp2; if (!clk) - return; + return -EINVAL; mutex_lock(&prepare_lock); /* check to see if a clock with this name is already registered */ - if (__clk_lookup(clk->name)) + if (__clk_lookup(clk->name)) { + pr_debug("%s: clk %s already initialized\n", + __func__, clk->name); + ret = -EEXIST; goto out; + } /* check that clk_ops are sane. See Documentation/clk.txt */ if (clk->ops->set_rate && !(clk->ops->round_rate && clk->ops->recalc_rate)) { pr_warning("%s: %s must implement .round_rate & .recalc_rate\n", __func__, clk->name); + ret = -EINVAL; goto out; } if (clk->ops->set_parent && !clk->ops->get_parent) { pr_warning("%s: %s must implement .get_parent & .set_parent\n", __func__, clk->name); + ret = -EINVAL; goto out; } @@ -1308,7 +1315,7 @@ void __clk_init(struct device *dev, struct clk *clk) out: mutex_unlock(&prepare_lock); - return; + return ret; } /** @@ -1324,29 +1331,63 @@ out: * clk_register is the primary interface for populating the clock tree with new * clock nodes. It returns a pointer to the newly allocated struct clk which * cannot be dereferenced by driver code but may be used in conjuction with the - * rest of the clock API. + * rest of the clock API. In the event of an error clk_register will return an + * error code; drivers must test for an error code after calling clk_register. */ struct clk *clk_register(struct device *dev, const char *name, const struct clk_ops *ops, struct clk_hw *hw, const char **parent_names, u8 num_parents, unsigned long flags) { + int i, ret; struct clk *clk; clk = kzalloc(sizeof(*clk), GFP_KERNEL); - if (!clk) - return NULL; + if (!clk) { + pr_err("%s: could not allocate clk\n", __func__); + ret = -ENOMEM; + goto fail_out; + } clk->name = name; clk->ops = ops; clk->hw = hw; clk->flags = flags; - clk->parent_names = parent_names; clk->num_parents = num_parents; hw->clk = clk; - __clk_init(dev, clk); + /* allocate local copy in case parent_names is __initdata */ + clk->parent_names = kzalloc((sizeof(char*) * num_parents), + GFP_KERNEL); + + if (!clk->parent_names) { + pr_err("%s: could not allocate clk->parent_names\n", __func__); + ret = -ENOMEM; + goto fail_parent_names; + } + + + /* copy each string name in case parent_names is __initdata */ + for (i = 0; i < num_parents; i++) { + clk->parent_names[i] = kstrdup(parent_names[i], GFP_KERNEL); + if (!clk->parent_names[i]) { + pr_err("%s: could not copy parent_names\n", __func__); + ret = -ENOMEM; + goto fail_parent_names_copy; + } + } + + ret = __clk_init(dev, clk); + if (!ret) + return clk; - return clk; +fail_parent_names_copy: + while (--i >= 0) + kfree(clk->parent_names[i]); + kfree(clk->parent_names); +fail_parent_names: + kfree(clk); +fail_out: + return ERR_PTR(ret); } EXPORT_SYMBOL_GPL(clk_register); diff --git a/include/linux/clk-private.h b/include/linux/clk-private.h index e9c8b98..e7032fd 100644 --- a/include/linux/clk-private.h +++ b/include/linux/clk-private.h @@ -181,8 +181,10 @@ struct clk { * * It is not necessary to call clk_register if __clk_init is used directly with * statically initialized clock data. + * + * Returns 0 on success, otherwise an error code. */ -void __clk_init(struct device *dev, struct clk *clk); +int __clk_init(struct device *dev, struct clk *clk); #endif /* CONFIG_COMMON_CLK */ #endif /* CLK_PRIVATE_H */ diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index 8981435..97f9fab 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -274,7 +274,8 @@ struct clk *clk_register_mux(struct device *dev, const char *name, * clk_register is the primary interface for populating the clock tree with new * clock nodes. It returns a pointer to the newly allocated struct clk which * cannot be dereferenced by driver code but may be used in conjuction with the - * rest of the clock API. + * rest of the clock API. In the event of an error clk_register will return an + * error code; drivers must test for an error code after calling clk_register. */ struct clk *clk_register(struct device *dev, const char *name, const struct clk_ops *ops, struct clk_hw *hw, -- cgit v0.10.2 From 27d545915fd49cbe18a3877d82359896e9851efb Mon Sep 17 00:00:00 2001 From: Mike Turquette Date: Mon, 26 Mar 2012 17:51:03 -0700 Subject: clk: basic: improve parent_names & return errors This patch is the basic clk version of 'clk: core: copy parent_names & return error codes'. The registration functions are changed to allow the core code to copy the array of strings and allow platforms to declare those arrays as __initdata. This patch also converts all of the basic clk registration functions to return error codes which better aligns them with the existing clk.h api. Signed-off-by: Mike Turquette diff --git a/drivers/clk/clk-divider.c b/drivers/clk/clk-divider.c index b1c4b02..5fc541d 100644 --- a/drivers/clk/clk-divider.c +++ b/drivers/clk/clk-divider.c @@ -153,6 +153,18 @@ const struct clk_ops clk_divider_ops = { }; EXPORT_SYMBOL_GPL(clk_divider_ops); +/** + * clk_register_divider - register a divider clock with the clock framework + * @dev: device registering this clock + * @name: name of this clock + * @parent_name: name of clock's parent + * @flags: framework-specific flags + * @reg: register address to adjust divider + * @shift: number of bits to shift the bitfield + * @width: width of the bitfield + * @clk_divider_flags: divider-specific flags for this clock + * @lock: shared register lock for this clock + */ struct clk *clk_register_divider(struct device *dev, const char *name, const char *parent_name, unsigned long flags, void __iomem *reg, u8 shift, u8 width, @@ -161,11 +173,11 @@ struct clk *clk_register_divider(struct device *dev, const char *name, struct clk_divider *div; struct clk *clk; + /* allocate the divider */ div = kzalloc(sizeof(struct clk_divider), GFP_KERNEL); - if (!div) { pr_err("%s: could not allocate divider clk\n", __func__); - return NULL; + return ERR_PTR(-ENOMEM); } /* struct clk_divider assignments */ @@ -175,23 +187,15 @@ struct clk *clk_register_divider(struct device *dev, const char *name, div->flags = clk_divider_flags; div->lock = lock; - if (parent_name) { - div->parent[0] = kstrdup(parent_name, GFP_KERNEL); - if (!div->parent[0]) - goto out; - } - + /* register the clock */ clk = clk_register(dev, name, &clk_divider_ops, &div->hw, - div->parent, + (parent_name ? &parent_name: NULL), (parent_name ? 1 : 0), flags); - if (clk) - return clk; -out: - kfree(div->parent[0]); - kfree(div); + if (IS_ERR(clk)) + kfree(div); - return NULL; + return clk; } diff --git a/drivers/clk/clk-fixed-rate.c b/drivers/clk/clk-fixed-rate.c index 027e477..b555a04 100644 --- a/drivers/clk/clk-fixed-rate.c +++ b/drivers/clk/clk-fixed-rate.c @@ -38,16 +38,23 @@ const struct clk_ops clk_fixed_rate_ops = { }; EXPORT_SYMBOL_GPL(clk_fixed_rate_ops); +/** + * clk_register_fixed_rate - register fixed-rate clock with the clock framework + * @dev: device that is registering this clock + * @name: name of this clock + * @parent_name: name of clock's parent + * @flags: framework-specific flags + * @fixed_rate: non-adjustable clock rate + */ struct clk *clk_register_fixed_rate(struct device *dev, const char *name, const char *parent_name, unsigned long flags, unsigned long fixed_rate) { struct clk_fixed_rate *fixed; - char **parent_names = NULL; - u8 len; + struct clk *clk; + /* allocate fixed-rate clock */ fixed = kzalloc(sizeof(struct clk_fixed_rate), GFP_KERNEL); - if (!fixed) { pr_err("%s: could not allocate fixed clk\n", __func__); return ERR_PTR(-ENOMEM); @@ -56,26 +63,15 @@ struct clk *clk_register_fixed_rate(struct device *dev, const char *name, /* struct clk_fixed_rate assignments */ fixed->fixed_rate = fixed_rate; - if (parent_name) { - parent_names = kmalloc(sizeof(char *), GFP_KERNEL); - - if (! parent_names) - goto out; - - len = sizeof(char) * strlen(parent_name); - - parent_names[0] = kmalloc(len, GFP_KERNEL); - - if (!parent_names[0]) - goto out; - - strncpy(parent_names[0], parent_name, len); - } - -out: - return clk_register(dev, name, + /* register the clock */ + clk = clk_register(dev, name, &clk_fixed_rate_ops, &fixed->hw, - parent_names, + (parent_name ? &parent_name : NULL), (parent_name ? 1 : 0), flags); + + if (IS_ERR(clk)) + kfree(fixed); + + return clk; } diff --git a/drivers/clk/clk-gate.c b/drivers/clk/clk-gate.c index fe2ff9e..42a4b94 100644 --- a/drivers/clk/clk-gate.c +++ b/drivers/clk/clk-gate.c @@ -105,6 +105,17 @@ const struct clk_ops clk_gate_ops = { }; EXPORT_SYMBOL_GPL(clk_gate_ops); +/** + * clk_register_gate - register a gate clock with the clock framework + * @dev: device that is registering this clock + * @name: name of this clock + * @parent_name: name of this clock's parent + * @flags: framework-specific flags for this clock + * @reg: register address to control gating of this clock + * @bit_idx: which bit in the register controls gating of this clock + * @clk_gate_flags: gate-specific flags for this clock + * @lock: shared register lock for this clock + */ struct clk *clk_register_gate(struct device *dev, const char *name, const char *parent_name, unsigned long flags, void __iomem *reg, u8 bit_idx, @@ -113,11 +124,11 @@ struct clk *clk_register_gate(struct device *dev, const char *name, struct clk_gate *gate; struct clk *clk; + /* allocate the gate */ gate = kzalloc(sizeof(struct clk_gate), GFP_KERNEL); - if (!gate) { pr_err("%s: could not allocate gated clk\n", __func__); - return NULL; + return ERR_PTR(-ENOMEM); } /* struct clk_gate assignments */ @@ -126,22 +137,15 @@ struct clk *clk_register_gate(struct device *dev, const char *name, gate->flags = clk_gate_flags; gate->lock = lock; - if (parent_name) { - gate->parent[0] = kstrdup(parent_name, GFP_KERNEL); - if (!gate->parent[0]) - goto out; - } - + /* register the clock */ clk = clk_register(dev, name, &clk_gate_ops, &gate->hw, - gate->parent, + (parent_name ? &parent_name : NULL), (parent_name ? 1 : 0), flags); - if (clk) - return clk; -out: - kfree(gate->parent[0]); - kfree(gate); - return NULL; + if (IS_ERR(clk)) + kfree(gate); + + return clk; } diff --git a/drivers/clk/clk-mux.c b/drivers/clk/clk-mux.c index bd5e598..6e58f11 100644 --- a/drivers/clk/clk-mux.c +++ b/drivers/clk/clk-mux.c @@ -94,9 +94,10 @@ struct clk *clk_register_mux(struct device *dev, const char *name, u8 clk_mux_flags, spinlock_t *lock) { struct clk_mux *mux; + struct clk *clk; + /* allocate the mux */ mux = kzalloc(sizeof(struct clk_mux), GFP_KERNEL); - if (!mux) { pr_err("%s: could not allocate mux clk\n", __func__); return ERR_PTR(-ENOMEM); @@ -109,6 +110,11 @@ struct clk *clk_register_mux(struct device *dev, const char *name, mux->flags = clk_mux_flags; mux->lock = lock; - return clk_register(dev, name, &clk_mux_ops, &mux->hw, + clk = clk_register(dev, name, &clk_mux_ops, &mux->hw, parent_names, num_parents, flags); + + if (IS_ERR(clk)) + kfree(mux); + + return clk; } diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index 97f9fab..3323d24 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -176,7 +176,6 @@ struct clk_gate { u8 bit_idx; u8 flags; spinlock_t *lock; - const char *parent[1]; }; #define CLK_GATE_SET_TO_DISABLE BIT(0) @@ -214,7 +213,6 @@ struct clk_divider { u8 width; u8 flags; spinlock_t *lock; - const char *parent[1]; }; #define CLK_DIVIDER_ONE_BASED BIT(0) -- cgit v0.10.2 From 81536e072b54e30bbfd1a9a6b8094f7b3dd5321c Mon Sep 17 00:00:00 2001 From: Shawn Guo Date: Thu, 12 Apr 2012 20:50:17 +0800 Subject: clk: always pass parent_rate into .round_rate The parent_rate will likely be used by most .round_rate implementation no matter whether flag CLK_SET_RATE_PARENT is set or not, so let's always pass parent_rate into .round_rate. Signed-off-by: Shawn Guo Signed-off-by: Mike Turquette diff --git a/drivers/clk/clk-divider.c b/drivers/clk/clk-divider.c index 5fc541d..03b127c 100644 --- a/drivers/clk/clk-divider.c +++ b/drivers/clk/clk-divider.c @@ -67,8 +67,8 @@ static int clk_divider_bestdiv(struct clk_hw *hw, unsigned long rate, if (divider->flags & CLK_DIVIDER_ONE_BASED) maxdiv--; - if (!best_parent_rate) { - parent_rate = __clk_get_rate(__clk_get_parent(hw->clk)); + if (!(__clk_get_flags(hw->clk) & CLK_SET_RATE_PARENT)) { + parent_rate = *best_parent_rate; bestdiv = DIV_ROUND_UP(parent_rate, rate); bestdiv = bestdiv == 0 ? 1 : bestdiv; bestdiv = bestdiv > maxdiv ? maxdiv : bestdiv; @@ -108,13 +108,7 @@ static long clk_divider_round_rate(struct clk_hw *hw, unsigned long rate, int div; div = clk_divider_bestdiv(hw, rate, prate); - if (prate) - return *prate / div; - else { - unsigned long r; - r = __clk_get_rate(__clk_get_parent(hw->clk)); - return r / div; - } + return *prate / div; } static int clk_divider_set_rate(struct clk_hw *hw, unsigned long rate) diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index 8f7c384..1ab4f7e 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -582,7 +582,7 @@ EXPORT_SYMBOL_GPL(clk_get_rate); */ unsigned long __clk_round_rate(struct clk *clk, unsigned long rate) { - unsigned long unused; + unsigned long parent_rate = 0; if (!clk) return -EINVAL; @@ -590,10 +590,10 @@ unsigned long __clk_round_rate(struct clk *clk, unsigned long rate) if (!clk->ops->round_rate) return clk->rate; - if (clk->flags & CLK_SET_RATE_PARENT) - return clk->ops->round_rate(clk->hw, rate, &unused); - else - return clk->ops->round_rate(clk->hw, rate, NULL); + if (clk->parent) + parent_rate = clk->parent->rate; + + return clk->ops->round_rate(clk->hw, rate, &parent_rate); } /** @@ -763,7 +763,7 @@ static void clk_calc_subtree(struct clk *clk, unsigned long new_rate) static struct clk *clk_calc_new_rates(struct clk *clk, unsigned long rate) { struct clk *top = clk; - unsigned long best_parent_rate; + unsigned long best_parent_rate = 0; unsigned long new_rate; /* sanity */ @@ -775,9 +775,6 @@ static struct clk *clk_calc_new_rates(struct clk *clk, unsigned long rate) if (!clk->ops->round_rate) { clk->new_rate = clk->rate; return NULL; - } else { - new_rate = clk->ops->round_rate(clk->hw, rate, NULL); - goto out; } } @@ -794,6 +791,7 @@ static struct clk *clk_calc_new_rates(struct clk *clk, unsigned long rate) goto out; } + best_parent_rate = clk->parent->rate; new_rate = clk->ops->round_rate(clk->hw, rate, &best_parent_rate); if (best_parent_rate != clk->parent->rate) { -- cgit v0.10.2 From 1c0035d710dd3bfa86d58f851b8737c7f11a9bbc Mon Sep 17 00:00:00 2001 From: Shawn Guo Date: Thu, 12 Apr 2012 20:50:18 +0800 Subject: clk: pass parent_rate into .set_rate For most of .set_rate implementation, parent_rate will be used, so just like passing parent_rate into .recalc_rate, let's pass parent_rate into .set_rate too. It also updates the kernel doc for .set_rate ops. Signed-off-by: Shawn Guo Signed-off-by: Mike Turquette diff --git a/drivers/clk/clk-divider.c b/drivers/clk/clk-divider.c index 03b127c..90627e4 100644 --- a/drivers/clk/clk-divider.c +++ b/drivers/clk/clk-divider.c @@ -111,14 +111,15 @@ static long clk_divider_round_rate(struct clk_hw *hw, unsigned long rate, return *prate / div; } -static int clk_divider_set_rate(struct clk_hw *hw, unsigned long rate) +static int clk_divider_set_rate(struct clk_hw *hw, unsigned long rate, + unsigned long parent_rate) { struct clk_divider *divider = to_clk_divider(hw); unsigned int div; unsigned long flags = 0; u32 val; - div = __clk_get_rate(__clk_get_parent(hw->clk)) / rate; + div = parent_rate / rate; if (!(divider->flags & CLK_DIVIDER_ONE_BASED)) div--; diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index 1ab4f7e..62ecac5 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -848,7 +848,7 @@ static void clk_change_rate(struct clk *clk) old_rate = clk->rate; if (clk->ops->set_rate) - clk->ops->set_rate(clk->hw, clk->new_rate); + clk->ops->set_rate(clk->hw, clk->new_rate, clk->parent->rate); if (clk->ops->recalc_rate) clk->rate = clk->ops->recalc_rate(clk->hw, diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index 3323d24..cb82918 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -88,19 +88,11 @@ struct clk_hw { * array index into the value programmed into the hardware. * Returns 0 on success, -EERROR otherwise. * - * @set_rate: Change the rate of this clock. If this callback returns - * CLK_SET_RATE_PARENT, the rate change will be propagated to the - * parent clock (which may propagate again if the parent clock - * also sets this flag). The requested rate of the parent is - * passed back from the callback in the second 'unsigned long *' - * argument. Note that it is up to the hardware clock's set_rate - * implementation to insure that clocks do not run out of spec - * when propgating the call to set_rate up to the parent. One way - * to do this is to gate the clock (via clk_disable and/or - * clk_unprepare) before calling clk_set_rate, then ungating it - * afterward. If your clock also has the CLK_GATE_SET_RATE flag - * set then this will insure safety. Returns 0 on success, - * -EERROR otherwise. + * @set_rate: Change the rate of this clock. The requested rate is specified + * by the second argument, which should typically be the return + * of .round_rate call. The third argument gives the parent rate + * which is likely helpful for most .set_rate implementation. + * Returns 0 on success, -EERROR otherwise. * * The clk_enable/clk_disable and clk_prepare/clk_unprepare pairs allow * implementations to split any work between atomic (enable) and sleepable @@ -125,7 +117,8 @@ struct clk_ops { unsigned long *); int (*set_parent)(struct clk_hw *hw, u8 index); u8 (*get_parent)(struct clk_hw *hw); - int (*set_rate)(struct clk_hw *hw, unsigned long); + int (*set_rate)(struct clk_hw *hw, unsigned long, + unsigned long); void (*init)(struct clk_hw *hw); }; -- cgit v0.10.2 From f4d8af2e5ae6294d5e2220d3963def6f7ffc0873 Mon Sep 17 00:00:00 2001 From: Shawn Guo Date: Thu, 12 Apr 2012 20:50:19 +0800 Subject: clk: propagate round_rate for CLK_SET_RATE_PARENT case Need to propagate round_rate call for the clk that has no .round_rate operation but with flag CLK_SET_RATE_PARENT set. For example, clk_mux is a clk with no .round_rate operation. However, it could likely be in a clk_set_rate propagation path, saying it has parent clk who has .round_rate and .set_rate operations. Signed-off-by: Shawn Guo Signed-off-by: Mike Turquette diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index 62ecac5..c6e8866 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -587,8 +587,12 @@ unsigned long __clk_round_rate(struct clk *clk, unsigned long rate) if (!clk) return -EINVAL; - if (!clk->ops->round_rate) - return clk->rate; + if (!clk->ops->round_rate) { + if (clk->flags & CLK_SET_RATE_PARENT) + return __clk_round_rate(clk->parent, rate); + else + return clk->rate; + } if (clk->parent) parent_rate = clk->parent->rate; -- cgit v0.10.2 From 1f73f31ad6e37df0679f6842b7405d96515ec8b1 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 17 Apr 2012 16:45:35 +0530 Subject: clk: Fix typo in comment CLK_MUX_INDEX_BIT is mistakenly written as CLK_MUX_INDEX_BITWISE in comment. Fix it. CLK_GATE_SET_TO_DISABLE is mistakenly written as CLK_GATE_SET_DISABLE in comment. Fix it. Signed-off-by: Viresh Kumar Signed-off-by: Mike Turquette diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index cb82918..8f21489 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -159,7 +159,7 @@ struct clk *clk_register_fixed_rate(struct device *dev, const char *name, * Clock which can gate its output. Implements .enable & .disable * * Flags: - * CLK_GATE_SET_DISABLE - by default this clock sets the bit at bit_idx to + * CLK_GATE_SET_TO_DISABLE - by default this clock sets the bit at bit_idx to * enable the clock. Setting this flag does the opposite: setting the bit * disable the clock and clearing it enables the clock */ @@ -232,7 +232,7 @@ struct clk *clk_register_divider(struct device *dev, const char *name, * * Flags: * CLK_MUX_INDEX_ONE - register index starts at 1, not 0 - * CLK_MUX_INDEX_BITWISE - register index is a single bit (power of two) + * CLK_MUX_INDEX_BIT - register index is a single bit (power of two) */ struct clk_mux { struct clk_hw hw; -- cgit v0.10.2 From fbc42aab543307e9bfc1dfb029db929f3fafcacd Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 17 Apr 2012 16:45:37 +0530 Subject: clk: clk-gate: Create clk_gate_endisable() This patch tries to remove duplicate code for clk_gate clocks. This creates another routine clk_gate_endisable() which will take care of enable/disable clock with knowledge of CLK_GATE_SET_TO_DISABLE flag. It works on following logic: For enabling clock, enable = 1 set2dis = 1 -> clear bit -> set = 0 set2dis = 0 -> set bit -> set = 1 For disabling clock, enable = 0 set2dis = 1 -> set bit -> set = 1 set2dis = 0 -> clear bit -> set = 0 So, result is always: enable xor set2dis. Signed-off-by: Viresh Kumar Signed-off-by: Mike Turquette diff --git a/drivers/clk/clk-gate.c b/drivers/clk/clk-gate.c index 42a4b94..0021616 100644 --- a/drivers/clk/clk-gate.c +++ b/drivers/clk/clk-gate.c @@ -28,32 +28,38 @@ #define to_clk_gate(_hw) container_of(_hw, struct clk_gate, hw) -static void clk_gate_set_bit(struct clk_gate *gate) +/* + * It works on following logic: + * + * For enabling clock, enable = 1 + * set2dis = 1 -> clear bit -> set = 0 + * set2dis = 0 -> set bit -> set = 1 + * + * For disabling clock, enable = 0 + * set2dis = 1 -> set bit -> set = 1 + * set2dis = 0 -> clear bit -> set = 0 + * + * So, result is always: enable xor set2dis. + */ +static void clk_gate_endisable(struct clk_hw *hw, int enable) { - u32 reg; + struct clk_gate *gate = to_clk_gate(hw); + int set = gate->flags & CLK_GATE_SET_TO_DISABLE ? 1 : 0; unsigned long flags = 0; + u32 reg; + + set ^= enable; if (gate->lock) spin_lock_irqsave(gate->lock, flags); reg = readl(gate->reg); - reg |= BIT(gate->bit_idx); - writel(reg, gate->reg); - - if (gate->lock) - spin_unlock_irqrestore(gate->lock, flags); -} - -static void clk_gate_clear_bit(struct clk_gate *gate) -{ - u32 reg; - unsigned long flags = 0; - if (gate->lock) - spin_lock_irqsave(gate->lock, flags); + if (set) + reg |= BIT(gate->bit_idx); + else + reg &= ~BIT(gate->bit_idx); - reg = readl(gate->reg); - reg &= ~BIT(gate->bit_idx); writel(reg, gate->reg); if (gate->lock) @@ -62,24 +68,14 @@ static void clk_gate_clear_bit(struct clk_gate *gate) static int clk_gate_enable(struct clk_hw *hw) { - struct clk_gate *gate = to_clk_gate(hw); - - if (gate->flags & CLK_GATE_SET_TO_DISABLE) - clk_gate_clear_bit(gate); - else - clk_gate_set_bit(gate); + clk_gate_endisable(hw, 1); return 0; } static void clk_gate_disable(struct clk_hw *hw) { - struct clk_gate *gate = to_clk_gate(hw); - - if (gate->flags & CLK_GATE_SET_TO_DISABLE) - clk_gate_set_bit(gate); - else - clk_gate_clear_bit(gate); + clk_gate_endisable(hw, 0); } static int clk_gate_is_enabled(struct clk_hw *hw) -- cgit v0.10.2 From 182f9e8cd5e451911a37f121f942409205ede0d6 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 17 Apr 2012 16:45:36 +0530 Subject: clk: clk-private: Add DEFINE_CLK macro All macros used for creating different kind of clocks have similar code for initializing struct clk. This patch removes those redundant lines and create another macro DEFINE_CLK. Signed-off-by: Viresh Kumar Signed-off-by: Mike Turquette diff --git a/include/linux/clk-private.h b/include/linux/clk-private.h index e7032fd..eeae7a3 100644 --- a/include/linux/clk-private.h +++ b/include/linux/clk-private.h @@ -55,6 +55,18 @@ struct clk { * alternative macro for static initialization */ +#define DEFINE_CLK(_name, _ops, _flags, _parent_names, \ + _parents) \ + static struct clk _name = { \ + .name = #_name, \ + .ops = &_ops, \ + .hw = &_name##_hw.hw, \ + .parent_names = _parent_names, \ + .num_parents = ARRAY_SIZE(_parent_names), \ + .parents = _parents, \ + .flags = _flags, \ + } + #define DEFINE_CLK_FIXED_RATE(_name, _flags, _rate, \ _fixed_rate_flags) \ static struct clk _name; \ @@ -66,15 +78,8 @@ struct clk { .fixed_rate = _rate, \ .flags = _fixed_rate_flags, \ }; \ - static struct clk _name = { \ - .name = #_name, \ - .ops = &clk_fixed_rate_ops, \ - .hw = &_name##_hw.hw, \ - .parent_names = _name##_parent_names, \ - .num_parents = \ - ARRAY_SIZE(_name##_parent_names), \ - .flags = _flags, \ - }; + DEFINE_CLK(_name, clk_fixed_rate_ops, _flags, \ + _name##_parent_names, NULL); #define DEFINE_CLK_GATE(_name, _parent_name, _parent_ptr, \ _flags, _reg, _bit_idx, \ @@ -95,16 +100,8 @@ struct clk { .flags = _gate_flags, \ .lock = _lock, \ }; \ - static struct clk _name = { \ - .name = #_name, \ - .ops = &clk_gate_ops, \ - .hw = &_name##_hw.hw, \ - .parent_names = _name##_parent_names, \ - .num_parents = \ - ARRAY_SIZE(_name##_parent_names), \ - .parents = _name##_parents, \ - .flags = _flags, \ - }; + DEFINE_CLK(_name, clk_gate_ops, _flags, \ + _name##_parent_names, _name##_parents); #define DEFINE_CLK_DIVIDER(_name, _parent_name, _parent_ptr, \ _flags, _reg, _shift, _width, \ @@ -126,16 +123,8 @@ struct clk { .flags = _divider_flags, \ .lock = _lock, \ }; \ - static struct clk _name = { \ - .name = #_name, \ - .ops = &clk_divider_ops, \ - .hw = &_name##_hw.hw, \ - .parent_names = _name##_parent_names, \ - .num_parents = \ - ARRAY_SIZE(_name##_parent_names), \ - .parents = _name##_parents, \ - .flags = _flags, \ - }; + DEFINE_CLK(_name, clk_divider_ops, _flags, \ + _name##_parent_names, _name##_parents); #define DEFINE_CLK_MUX(_name, _parent_names, _parents, _flags, \ _reg, _shift, _width, \ @@ -151,16 +140,8 @@ struct clk { .flags = _mux_flags, \ .lock = _lock, \ }; \ - static struct clk _name = { \ - .name = #_name, \ - .ops = &clk_mux_ops, \ - .hw = &_name##_hw.hw, \ - .parent_names = _parent_names, \ - .num_parents = \ - ARRAY_SIZE(_parent_names), \ - .parents = _parents, \ - .flags = _flags, \ - }; + DEFINE_CLK(_name, clk_mux_ops, _flags, _parent_names, \ + _parents); /** * __clk_init - initialize the data structures in a struct clk -- cgit v0.10.2 From 1b2f99037a29d48d03ddd2fd0dc117888ec737f4 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 17 Apr 2012 16:45:38 +0530 Subject: clk: Don't set clk->new_rate twice if (!clk->ops->round_rate && (clk->flags & CLK_SET_RATE_PARENT)) is true, then we don't need to set clk->new_rate here, as we will call clk_calc_subtree() afterwards and it also sets clk->new_rate. Signed-off-by: Viresh Kumar Signed-off-by: Mike Turquette diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index c6e8866..2dd20c0 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -790,7 +790,7 @@ static struct clk *clk_calc_new_rates(struct clk *clk, unsigned long rate) if (!clk->ops->round_rate) { top = clk_calc_new_rates(clk->parent, rate); - new_rate = clk->new_rate = clk->parent->new_rate; + new_rate = clk->parent->new_rate; goto out; } -- cgit v0.10.2 From 01033be1742abfa4359a40d21e8e8ecca39974e5 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Mon, 9 Apr 2012 15:24:58 -0500 Subject: clk: select CLKDEV_LOOKUP for COMMON_CLK Using the common clock infrastructure without the common clkdev code makes little sense, so select CLKDEV_LOOKUP for COMMON_CLK. Signed-off-by: Rob Herring diff --git a/drivers/clk/Kconfig b/drivers/clk/Kconfig index 165e1fe..f05a60d 100644 --- a/drivers/clk/Kconfig +++ b/drivers/clk/Kconfig @@ -12,6 +12,7 @@ config HAVE_MACH_CLKDEV config COMMON_CLK bool select HAVE_CLK_PREPARE + select CLKDEV_LOOKUP ---help--- The common clock framework is a single definition of struct clk, useful across many platforms, as well as an -- cgit v0.10.2 From 8b7730ddff5affd623bed2affa0d0fa47ebbad3b Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Mon, 9 Apr 2012 15:24:59 -0500 Subject: clk: remove trailing whitespace from clk.h Remove trailing whitespace from 2 lines. Signed-off-by: Rob Herring diff --git a/include/linux/clk.h b/include/linux/clk.h index c9547d9..0e078bd 100644 --- a/include/linux/clk.h +++ b/include/linux/clk.h @@ -220,7 +220,7 @@ void clk_put(struct clk *clk); * Returns rounded clock rate in Hz, or negative errno. */ long clk_round_rate(struct clk *clk, unsigned long rate); - + /** * clk_set_rate - set the clock rate for a clock source * @clk: clock source @@ -229,7 +229,7 @@ long clk_round_rate(struct clk *clk, unsigned long rate); * Returns success (0) or negative errno. */ int clk_set_rate(struct clk *clk, unsigned long rate); - + /** * clk_set_parent - set the parent clock source for this clock * @clk: clock source -- cgit v0.10.2 From d25a8f940c477dc8ed5893d7366a63dba53b0b19 Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Tue, 24 Apr 2012 18:06:49 -0700 Subject: ARM: S3C24XX: Add forgotten clock lookup table to S3C2443 When the hsmmc clock lookup was changed to be lookup tables based, it seems the S3C2443 was forgotten. As subsequent patches will want to add more lookups to it, this patch adds the base table with the missing hsmmc lookup. Signed-off-by: Heiko Stuebner Signed-off-by: Kukjin Kim diff --git a/arch/arm/mach-s3c24xx/clock-s3c2443.c b/arch/arm/mach-s3c24xx/clock-s3c2443.c index efb3ac3..bce1cd3 100644 --- a/arch/arm/mach-s3c24xx/clock-s3c2443.c +++ b/arch/arm/mach-s3c24xx/clock-s3c2443.c @@ -179,6 +179,10 @@ static struct clk *clks[] __initdata = { &clk_hsmmc, }; +static struct clk_lookup s3c2443_clk_lookup[] = { + CLKDEV_INIT("s3c-sdhci.1", "mmc_busclk.2", &clk_hsmmc), +}; + void __init s3c2443_init_clocks(int xtal) { unsigned long epllcon = __raw_readl(S3C2443_EPLLCON); @@ -210,6 +214,7 @@ void __init s3c2443_init_clocks(int xtal) s3c_register_clocks(init_clocks_off, ARRAY_SIZE(init_clocks_off)); s3c_disable_clocks(init_clocks_off, ARRAY_SIZE(init_clocks_off)); + clkdev_add_table(s3c2443_clk_lookup, ARRAY_SIZE(s3c2443_clk_lookup)); s3c_pwmclk_init(); } -- cgit v0.10.2 From ad6c1d43964dcba57bfe20b7185af5a71f94a1b7 Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Tue, 24 Apr 2012 18:06:53 -0700 Subject: ARM: S3C24XX: claim spi channels for hsspi in dma-s3c2443 SoCs starting with the S3C2443 contain SPI controllers compatible with the spi-s3c64xx driver and therefore need separate dma channels for rx and tx. This patch introduces dma channel declarations for these and changes the dma-s3c2443.c accordingly. None of the older SoCs use the spi-dma at all. Most boards bitbang their spi use and the spi-s3c24xx driver also does not use the dma system. Signed-off-by: Heiko Stuebner Signed-off-by: Kukjin Kim diff --git a/arch/arm/mach-s3c24xx/dma-s3c2443.c b/arch/arm/mach-s3c24xx/dma-s3c2443.c index e227c47..2d94228 100644 --- a/arch/arm/mach-s3c24xx/dma-s3c2443.c +++ b/arch/arm/mach-s3c24xx/dma-s3c2443.c @@ -55,12 +55,20 @@ static struct s3c24xx_dma_map __initdata s3c2443_dma_mappings[] = { .name = "sdi", .channels = MAP(S3C2443_DMAREQSEL_SDI), }, - [DMACH_SPI0] = { - .name = "spi0", + [DMACH_SPI0_RX] = { + .name = "spi0-rx", + .channels = MAP(S3C2443_DMAREQSEL_SPI0RX), + }, + [DMACH_SPI0_TX] = { + .name = "spi0-tx", .channels = MAP(S3C2443_DMAREQSEL_SPI0TX), }, - [DMACH_SPI1] = { /* only on S3C2443/S3C2450 */ - .name = "spi1", + [DMACH_SPI1_RX] = { /* only on S3C2443/S3C2450 */ + .name = "spi1-rx", + .channels = MAP(S3C2443_DMAREQSEL_SPI1RX), + }, + [DMACH_SPI1_TX] = { /* only on S3C2443/S3C2450 */ + .name = "spi1-tx", .channels = MAP(S3C2443_DMAREQSEL_SPI1TX), }, [DMACH_UART0] = { diff --git a/arch/arm/mach-s3c24xx/include/mach/dma.h b/arch/arm/mach-s3c24xx/include/mach/dma.h index acbdfec..454831b 100644 --- a/arch/arm/mach-s3c24xx/include/mach/dma.h +++ b/arch/arm/mach-s3c24xx/include/mach/dma.h @@ -47,6 +47,10 @@ enum dma_ch { DMACH_UART2_SRC2, DMACH_UART3, /* s3c2443 has extra uart */ DMACH_UART3_SRC2, + DMACH_SPI0_TX, /* s3c2443/2416/2450 hsspi0 */ + DMACH_SPI0_RX, /* s3c2443/2416/2450 hsspi0 */ + DMACH_SPI1_TX, /* s3c2443/2450 hsspi1 */ + DMACH_SPI1_RX, /* s3c2443/2450 hsspi1 */ DMACH_MAX, /* the end entry */ }; -- cgit v0.10.2 From 97e1ed7960bc3b86c51aef8d3d014286032d476e Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Tue, 24 Apr 2012 18:06:58 -0700 Subject: ARM: S3C24XX: Add map entries needed by the s3c64xx-spi devices The S3C_PA_SPIx constants are only used by the s3c64xx-spi driver and don't conflict with the older SoCs as they don't support hsspi. Signed-off-by: Heiko Stuebner Signed-off-by: Kukjin Kim diff --git a/arch/arm/mach-s3c24xx/include/mach/map.h b/arch/arm/mach-s3c24xx/include/mach/map.h index 78ae807..8ba381f 100644 --- a/arch/arm/mach-s3c24xx/include/mach/map.h +++ b/arch/arm/mach-s3c24xx/include/mach/map.h @@ -98,6 +98,8 @@ /* SPI */ #define S3C2410_PA_SPI (0x59000000) +#define S3C2443_PA_SPI0 (0x52000000) +#define S3C2443_PA_SPI1 S3C2410_PA_SPI /* SDI */ #define S3C2410_PA_SDI (0x5A000000) @@ -162,4 +164,7 @@ #define S3C_PA_WDT S3C2410_PA_WATCHDOG #define S3C_PA_NAND S3C24XX_PA_NAND +#define S3C_PA_SPI0 S3C2443_PA_SPI0 +#define S3C_PA_SPI1 S3C2443_PA_SPI1 + #endif /* __ASM_ARCH_MAP_H */ -- cgit v0.10.2 From 5c2f2917168e7a36c0fda0e7c2b0246c83eb7fe0 Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Tue, 24 Apr 2012 18:07:03 -0700 Subject: ARM: S3C24XX: Add clock-lookup entries required by s3c64xx-spi Currently usable are busclk0 and busclk2. Signed-off-by: Heiko Stuebner Signed-off-by: Kukjin Kim diff --git a/arch/arm/mach-s3c24xx/clock-s3c2416.c b/arch/arm/mach-s3c24xx/clock-s3c2416.c index dbc9ab4..8702ecf 100644 --- a/arch/arm/mach-s3c24xx/clock-s3c2416.c +++ b/arch/arm/mach-s3c24xx/clock-s3c2416.c @@ -144,6 +144,7 @@ static struct clk_lookup s3c2416_clk_lookup[] = { CLKDEV_INIT("s3c-sdhci.0", "mmc_busclk.0", &hsmmc0_clk), CLKDEV_INIT("s3c-sdhci.0", "mmc_busclk.2", &hsmmc_mux0.clk), CLKDEV_INIT("s3c-sdhci.1", "mmc_busclk.2", &hsmmc_mux1.clk), + CLKDEV_INIT("s3c64xx-spi.0", "spi_busclk2", &hsspi_mux.clk), }; void __init s3c2416_init_clocks(int xtal) diff --git a/arch/arm/mach-s3c24xx/clock-s3c2443.c b/arch/arm/mach-s3c24xx/clock-s3c2443.c index bce1cd3..a4c5a52 100644 --- a/arch/arm/mach-s3c24xx/clock-s3c2443.c +++ b/arch/arm/mach-s3c24xx/clock-s3c2443.c @@ -181,6 +181,7 @@ static struct clk *clks[] __initdata = { static struct clk_lookup s3c2443_clk_lookup[] = { CLKDEV_INIT("s3c-sdhci.1", "mmc_busclk.2", &clk_hsmmc), + CLKDEV_INIT("s3c64xx-spi.0", "spi_busclk2", &clk_hsspi.clk), }; void __init s3c2443_init_clocks(int xtal) diff --git a/arch/arm/mach-s3c24xx/common-s3c2443.c b/arch/arm/mach-s3c24xx/common-s3c2443.c index 4604315..aeeb2be 100644 --- a/arch/arm/mach-s3c24xx/common-s3c2443.c +++ b/arch/arm/mach-s3c24xx/common-s3c2443.c @@ -424,11 +424,6 @@ static struct clk init_clocks_off[] = { .enable = s3c2443_clkcon_enable_p, .ctrlbit = S3C2443_PCLKCON_IIS, }, { - .name = "hsspi", - .parent = &clk_p, - .enable = s3c2443_clkcon_enable_p, - .ctrlbit = S3C2443_PCLKCON_HSSPI, - }, { .name = "adc", .parent = &clk_p, .enable = s3c2443_clkcon_enable_p, @@ -562,6 +557,14 @@ static struct clk hsmmc1_clk = { .ctrlbit = S3C2443_HCLKCON_HSMMC, }; +static struct clk hsspi_clk = { + .name = "spi", + .devname = "s3c64xx-spi.0", + .parent = &clk_p, + .enable = s3c2443_clkcon_enable_p, + .ctrlbit = S3C2443_PCLKCON_HSSPI, +}; + /* EPLLCON compatible enough to get on/off information */ void __init_or_cpufreq s3c2443_common_setup_clocks(pll_fn get_mpll) @@ -612,6 +615,7 @@ static struct clk *clks[] __initdata = { &clk_usb_bus, &clk_armdiv, &hsmmc1_clk, + &hsspi_clk, }; static struct clksrc_clk *clksrcs[] __initdata = { @@ -629,6 +633,7 @@ static struct clk_lookup s3c2443_clk_lookup[] = { CLKDEV_INIT(NULL, "clk_uart_baud2", &clk_p), CLKDEV_INIT(NULL, "clk_uart_baud3", &clk_esys_uart.clk), CLKDEV_INIT("s3c-sdhci.1", "mmc_busclk.0", &hsmmc1_clk), + CLKDEV_INIT("s3c64xx-spi.0", "spi_busclk0", &hsspi_clk), }; void __init s3c2443_common_init_clocks(int xtal, pll_fn get_mpll, -- cgit v0.10.2 From f03eb25e223cf1fc9e807a479b776e8e2f1cc9e1 Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Tue, 24 Apr 2012 18:07:10 -0700 Subject: ARM: S3C24XX: Add HSSPI setup callback for s3c64xx-spi driver This lets the s3c64xx-spi driver know the specifics of the controller- variant and also setups the gpios and the misccr bit. This setup is valid for all S3C24XX SoCs containing a HSSPI controller (i.e. S3C2416/2450 and S3C2443) Signed-off-by: Heiko Stuebner Signed-off-by: Kukjin Kim diff --git a/arch/arm/mach-s3c24xx/Kconfig b/arch/arm/mach-s3c24xx/Kconfig index 0f3a327..38e6b30 100644 --- a/arch/arm/mach-s3c24xx/Kconfig +++ b/arch/arm/mach-s3c24xx/Kconfig @@ -518,6 +518,11 @@ config S3C2443_DMA help Internal config node for S3C2443 DMA support +config S3C2443_SETUP_SPI + bool + help + Common setup code for SPI GPIO configurations + endif # CPU_S3C2443 || CPU_S3C2416 if CPU_S3C2443 diff --git a/arch/arm/mach-s3c24xx/Makefile b/arch/arm/mach-s3c24xx/Makefile index 3518fe8..d0f3a92 100644 --- a/arch/arm/mach-s3c24xx/Makefile +++ b/arch/arm/mach-s3c24xx/Makefile @@ -91,5 +91,6 @@ obj-$(CONFIG_MACH_OSIRIS_DVS) += mach-osiris-dvs.o # device setup obj-$(CONFIG_S3C2416_SETUP_SDHCI_GPIO) += setup-sdhci-gpio.o +obj-$(CONFIG_S3C2443_SETUP_SPI) += setup-spi.o obj-$(CONFIG_ARCH_S3C24XX) += setup-i2c.o obj-$(CONFIG_S3C24XX_SETUP_TS) += setup-ts.o diff --git a/arch/arm/mach-s3c24xx/setup-spi.c b/arch/arm/mach-s3c24xx/setup-spi.c new file mode 100644 index 0000000..5712c85 --- /dev/null +++ b/arch/arm/mach-s3c24xx/setup-spi.c @@ -0,0 +1,39 @@ +/* + * HS-SPI device setup for S3C2443/S3C2416 + * + * Copyright (C) 2011 Samsung Electronics Ltd. + * http://www.samsung.com/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include + +#include +#include + +#include +#include + +#ifdef CONFIG_S3C64XX_DEV_SPI0 +struct s3c64xx_spi_info s3c64xx_spi0_pdata __initdata = { + .fifo_lvl_mask = 0x7f, + .rx_lvl_offset = 13, + .tx_st_done = 21, + .high_speed = 1, +}; + +int s3c64xx_spi0_cfg_gpio(struct platform_device *pdev) +{ + /* enable hsspi bit in misccr */ + s3c2410_modify_misccr(S3C2416_MISCCR_HSSPI_EN2, 1); + + s3c_gpio_cfgall_range(S3C2410_GPE(11), 3, + S3C_GPIO_SFN(2), S3C_GPIO_PULL_UP); + + return 0; +} +#endif -- cgit v0.10.2 From c4bec603831d2f792a73c2422118f07000c15d5d Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Tue, 24 Apr 2012 18:07:16 -0700 Subject: spi/s3c64xx: Allow usage for ARCH_S3C24XX Newer SoCs from the S3C24XX line, namely S3C2416/2443/2450 contain hsspi-controllers compatible with the s3c64xx type. The previous patches enabled platform support for it, so make the driver also usable for the S3C24xx architecture. Signed-off-by: Heiko Stuebner Acked-by: Grant Likely Signed-off-by: Kukjin Kim diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig index 3ed7483..f899def 100644 --- a/drivers/spi/Kconfig +++ b/drivers/spi/Kconfig @@ -311,7 +311,7 @@ config SPI_S3C24XX_FIQ config SPI_S3C64XX tristate "Samsung S3C64XX series type SPI" - depends on (ARCH_S3C64XX || ARCH_S5P64X0 || ARCH_EXYNOS) + depends on (ARCH_S3C24XX || ARCH_S3C64XX || ARCH_S5P64X0 || ARCH_EXYNOS) select S3C64XX_DMA if ARCH_S3C64XX help SPI driver for Samsung S3C64XX and newer SoCs. -- cgit v0.10.2 From affa115ed365d646ad1a8cc7d2d063b8181cce37 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Thu, 12 Apr 2012 09:01:49 +0200 Subject: dma/amba-pl08x: add support for the Nomadik variant The Nomadik PL080 variant has some extra protection bits that may be set, so we need to check these bits to see if the channels are actually available for the DMAengine to use. Cc: Russell King Cc: Alim Akhtar Cc: Alessandro Rubini Reviewed-by: Viresh Kumar Signed-off-by: Linus Walleij Signed-off-by: Vinod Koul diff --git a/arch/arm/include/asm/hardware/pl080.h b/arch/arm/include/asm/hardware/pl080.h index 33c78d7..4eea210 100644 --- a/arch/arm/include/asm/hardware/pl080.h +++ b/arch/arm/include/asm/hardware/pl080.h @@ -102,6 +102,8 @@ #define PL080_WIDTH_16BIT (0x1) #define PL080_WIDTH_32BIT (0x2) +#define PL080N_CONFIG_ITPROT (1 << 20) +#define PL080N_CONFIG_SECPROT (1 << 19) #define PL080_CONFIG_HALT (1 << 18) #define PL080_CONFIG_ACTIVE (1 << 17) /* RO */ #define PL080_CONFIG_LOCK (1 << 16) diff --git a/drivers/dma/amba-pl08x.c b/drivers/dma/amba-pl08x.c index 08589c6..629250e 100644 --- a/drivers/dma/amba-pl08x.c +++ b/drivers/dma/amba-pl08x.c @@ -95,10 +95,14 @@ static struct amba_driver pl08x_amba_driver; * struct vendor_data - vendor-specific config parameters for PL08x derivatives * @channels: the number of channels available in this variant * @dualmaster: whether this version supports dual AHB masters or not. + * @nomadik: whether the channels have Nomadik security extension bits + * that need to be checked for permission before use and some registers are + * missing */ struct vendor_data { u8 channels; bool dualmaster; + bool nomadik; }; /* @@ -385,7 +389,7 @@ pl08x_get_phy_channel(struct pl08x_driver_data *pl08x, spin_lock_irqsave(&ch->lock, flags); - if (!ch->serving) { + if (!ch->locked && !ch->serving) { ch->serving = virt_chan; ch->signal = -1; spin_unlock_irqrestore(&ch->lock, flags); @@ -1483,6 +1487,9 @@ bool pl08x_filter_id(struct dma_chan *chan, void *chan_id) */ static void pl08x_ensure_on(struct pl08x_driver_data *pl08x) { + /* The Nomadik variant does not have the config register */ + if (pl08x->vd->nomadik) + return; writel(PL080_CONFIG_ENABLE, pl08x->base + PL080_CONFIG); } @@ -1772,8 +1779,10 @@ static int pl08x_debugfs_show(struct seq_file *s, void *data) spin_lock_irqsave(&ch->lock, flags); virt_chan = ch->serving; - seq_printf(s, "%d\t\t%s\n", - ch->id, virt_chan ? virt_chan->name : "(none)"); + seq_printf(s, "%d\t\t%s%s\n", + ch->id, + virt_chan ? virt_chan->name : "(none)", + ch->locked ? " LOCKED" : ""); spin_unlock_irqrestore(&ch->lock, flags); } @@ -1917,7 +1926,7 @@ static int pl08x_probe(struct amba_device *adev, const struct amba_id *id) } /* Initialize physical channels */ - pl08x->phy_chans = kmalloc((vd->channels * sizeof(*pl08x->phy_chans)), + pl08x->phy_chans = kzalloc((vd->channels * sizeof(*pl08x->phy_chans)), GFP_KERNEL); if (!pl08x->phy_chans) { dev_err(&adev->dev, "%s failed to allocate " @@ -1932,8 +1941,23 @@ static int pl08x_probe(struct amba_device *adev, const struct amba_id *id) ch->id = i; ch->base = pl08x->base + PL080_Cx_BASE(i); spin_lock_init(&ch->lock); - ch->serving = NULL; ch->signal = -1; + + /* + * Nomadik variants can have channels that are locked + * down for the secure world only. Lock up these channels + * by perpetually serving a dummy virtual channel. + */ + if (vd->nomadik) { + u32 val; + + val = readl(ch->base + PL080_CH_CONFIG); + if (val & (PL080N_CONFIG_ITPROT | PL080N_CONFIG_SECPROT)) { + dev_info(&adev->dev, "physical channel %d reserved for secure access only\n", i); + ch->locked = true; + } + } + dev_dbg(&adev->dev, "physical channel %d is %s\n", i, pl08x_phy_channel_busy(ch) ? "BUSY" : "FREE"); } @@ -2016,6 +2040,12 @@ static struct vendor_data vendor_pl080 = { .dualmaster = true, }; +static struct vendor_data vendor_nomadik = { + .channels = 8, + .dualmaster = true, + .nomadik = true, +}; + static struct vendor_data vendor_pl081 = { .channels = 2, .dualmaster = false, @@ -2036,9 +2066,9 @@ static struct amba_id pl08x_ids[] = { }, /* Nomadik 8815 PL080 variant */ { - .id = 0x00280880, + .id = 0x00280080, .mask = 0x00ffffff, - .data = &vendor_pl080, + .data = &vendor_nomadik, }, { 0, 0 }, }; diff --git a/include/linux/amba/pl08x.h b/include/linux/amba/pl08x.h index e64ce2c..0254901 100644 --- a/include/linux/amba/pl08x.h +++ b/include/linux/amba/pl08x.h @@ -92,6 +92,8 @@ struct pl08x_bus_data { * right now * @serving: the virtual channel currently being served by this physical * channel + * @locked: channel unavailable for the system, e.g. dedicated to secure + * world */ struct pl08x_phy_chan { unsigned int id; @@ -99,6 +101,7 @@ struct pl08x_phy_chan { spinlock_t lock; int signal; struct pl08x_dma_chan *serving; + bool locked; }; /** -- cgit v0.10.2 From 4cd9069a0a0e5fb8b007425c937642682ac96c76 Mon Sep 17 00:00:00 2001 From: Richard Kennedy Date: Wed, 25 Apr 2012 14:53:05 +0100 Subject: fs: remove 8 bytes of padding from struct writeback_control on 64 bit builds Reorder structure writeback_control to remove 8 bytes of padding on 64 bit builds, this shrinks its size from 48 to 40 bytes. This structure is always on the stack and uses C99 named initialisation, so should be safe and have a small impact on stack usage. Signed-off-by: Richard Kennedy Signed-off-by: Fengguang Wu diff --git a/include/linux/writeback.h b/include/linux/writeback.h index a2b84f5..3309736 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -58,7 +58,6 @@ extern const char *wb_reason_name[]; * in a manner such that unspecified fields are set to zero. */ struct writeback_control { - enum writeback_sync_modes sync_mode; long nr_to_write; /* Write this many pages, and decrement this for each page written */ long pages_skipped; /* Pages which were not written */ @@ -71,6 +70,8 @@ struct writeback_control { loff_t range_start; loff_t range_end; + enum writeback_sync_modes sync_mode; + unsigned for_kupdate:1; /* A kupdate writeback */ unsigned for_background:1; /* A background writeback */ unsigned tagged_writepages:1; /* tag-and-write to avoid livelock */ -- cgit v0.10.2 From 7560e3f3581ed415828d3f431b8622fa38c2d133 Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Wed, 7 Mar 2012 09:30:06 +0100 Subject: dmaengine i.MX SDMA: do not depend on grouped clocks the current i.MX clock support groups together unrelated clocks to a single clock which is then used by the driver. This can't be accomplished with the generic clock framework so we instead request the individual clocks in the driver. For i.MX there are generally three different clocks: ipg: bus clock (needed to access registers) ahb: dma relevant clock, sometimes referred to as hclk in the datasheet per: bit clock, pixel clock This patch changes the driver to request the individual clocks. Currently all clk_get will get the same clock until the SoCs are converted to the generic clock framework Signed-off-by: Sascha Hauer diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c index d3e38e2..fddccae 100644 --- a/drivers/dma/imx-sdma.c +++ b/drivers/dma/imx-sdma.c @@ -322,7 +322,8 @@ struct sdma_engine { struct sdma_context_data *context; dma_addr_t context_phys; struct dma_device dma_device; - struct clk *clk; + struct clk *clk_ipg; + struct clk *clk_ahb; struct mutex channel_0_lock; struct sdma_script_start_addrs *script_addrs; }; @@ -859,7 +860,8 @@ static int sdma_alloc_chan_resources(struct dma_chan *chan) sdmac->peripheral_type = data->peripheral_type; sdmac->event_id0 = data->dma_request; - clk_enable(sdmac->sdma->clk); + clk_enable(sdmac->sdma->clk_ipg); + clk_enable(sdmac->sdma->clk_ahb); ret = sdma_request_channel(sdmac); if (ret) @@ -896,7 +898,8 @@ static void sdma_free_chan_resources(struct dma_chan *chan) dma_free_coherent(NULL, PAGE_SIZE, sdmac->bd, sdmac->bd_phys); - clk_disable(sdma->clk); + clk_disable(sdma->clk_ipg); + clk_disable(sdma->clk_ahb); } static struct dma_async_tx_descriptor *sdma_prep_slave_sg( @@ -1169,12 +1172,14 @@ static void sdma_load_firmware(const struct firmware *fw, void *context) addr = (void *)header + header->script_addrs_start; ram_code = (void *)header + header->ram_code_start; - clk_enable(sdma->clk); + clk_enable(sdma->clk_ipg); + clk_enable(sdma->clk_ahb); /* download the RAM image for SDMA */ sdma_load_script(sdma, ram_code, header->ram_code_size, addr->ram_code_start_addr); - clk_disable(sdma->clk); + clk_disable(sdma->clk_ipg); + clk_disable(sdma->clk_ahb); sdma_add_scripts(sdma, addr); @@ -1216,7 +1221,8 @@ static int __init sdma_init(struct sdma_engine *sdma) return -ENODEV; } - clk_enable(sdma->clk); + clk_enable(sdma->clk_ipg); + clk_enable(sdma->clk_ahb); /* Be sure SDMA has not started yet */ writel_relaxed(0, sdma->regs + SDMA_H_C0PTR); @@ -1269,12 +1275,14 @@ static int __init sdma_init(struct sdma_engine *sdma) /* Initializes channel's priorities */ sdma_set_channel_priority(&sdma->channel[0], 7); - clk_disable(sdma->clk); + clk_disable(sdma->clk_ipg); + clk_disable(sdma->clk_ahb); return 0; err_dma_alloc: - clk_disable(sdma->clk); + clk_disable(sdma->clk_ipg); + clk_disable(sdma->clk_ahb); dev_err(sdma->dev, "initialisation failed with %d\n", ret); return ret; } @@ -1313,12 +1321,21 @@ static int __init sdma_probe(struct platform_device *pdev) goto err_request_region; } - sdma->clk = clk_get(&pdev->dev, NULL); - if (IS_ERR(sdma->clk)) { - ret = PTR_ERR(sdma->clk); + sdma->clk_ipg = devm_clk_get(&pdev->dev, "ipg"); + if (IS_ERR(sdma->clk_ipg)) { + ret = PTR_ERR(sdma->clk_ipg); goto err_clk; } + sdma->clk_ahb = devm_clk_get(&pdev->dev, "ahb"); + if (IS_ERR(sdma->clk_ahb)) { + ret = PTR_ERR(sdma->clk_ahb); + goto err_clk; + } + + clk_prepare(sdma->clk_ipg); + clk_prepare(sdma->clk_ahb); + sdma->regs = ioremap(iores->start, resource_size(iores)); if (!sdma->regs) { ret = -ENOMEM; @@ -1426,7 +1443,6 @@ err_alloc: err_request_irq: iounmap(sdma->regs); err_ioremap: - clk_put(sdma->clk); err_clk: release_mem_region(iores->start, resource_size(iores)); err_request_region: -- cgit v0.10.2 From aa29d840e3138fdf9459cc1e0101d6f25f8a48f4 Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Wed, 7 Mar 2012 09:30:22 +0100 Subject: spi i.MX: do not depend on grouped clocks the current i.MX clock support groups together unrelated clocks to a single clock which is then used by the driver. This can't be accomplished with the generic clock framework so we instead request the individual clocks in the driver. For i.MX there are generally three different clocks: ipg: bus clock (needed to access registers) ahb: dma relevant clock, sometimes referred to as hclk in the datasheet per: bit clock, pixel clock This patch changes the driver to request the individual clocks. Currently all clk_get will get the same clock until the SoCs are converted to the generic clock framework Signed-off-by: Sascha Hauer diff --git a/drivers/spi/spi-imx.c b/drivers/spi/spi-imx.c index 570f220..4b66886 100644 --- a/drivers/spi/spi-imx.c +++ b/drivers/spi/spi-imx.c @@ -85,7 +85,8 @@ struct spi_imx_data { struct completion xfer_done; void __iomem *base; int irq; - struct clk *clk; + struct clk *clk_per; + struct clk *clk_ipg; unsigned long spi_clk; unsigned int count; @@ -845,15 +846,22 @@ static int __devinit spi_imx_probe(struct platform_device *pdev) goto out_iounmap; } - spi_imx->clk = clk_get(&pdev->dev, NULL); - if (IS_ERR(spi_imx->clk)) { - dev_err(&pdev->dev, "unable to get clock\n"); - ret = PTR_ERR(spi_imx->clk); + spi_imx->clk_ipg = devm_clk_get(&pdev->dev, "ipg"); + if (IS_ERR(spi_imx->clk_ipg)) { + ret = PTR_ERR(spi_imx->clk_ipg); goto out_free_irq; } - clk_enable(spi_imx->clk); - spi_imx->spi_clk = clk_get_rate(spi_imx->clk); + spi_imx->clk_per = devm_clk_get(&pdev->dev, "per"); + if (IS_ERR(spi_imx->clk_per)) { + ret = PTR_ERR(spi_imx->clk_per); + goto out_free_irq; + } + + clk_prepare_enable(spi_imx->clk_per); + clk_prepare_enable(spi_imx->clk_ipg); + + spi_imx->spi_clk = clk_get_rate(spi_imx->clk_per); spi_imx->devtype_data->reset(spi_imx); @@ -871,8 +879,8 @@ static int __devinit spi_imx_probe(struct platform_device *pdev) return ret; out_clk_put: - clk_disable(spi_imx->clk); - clk_put(spi_imx->clk); + clk_disable_unprepare(spi_imx->clk_per); + clk_disable_unprepare(spi_imx->clk_ipg); out_free_irq: free_irq(spi_imx->irq, spi_imx); out_iounmap: @@ -900,8 +908,8 @@ static int __devexit spi_imx_remove(struct platform_device *pdev) spi_bitbang_stop(&spi_imx->bitbang); writel(0, spi_imx->base + MXC_CSPICTRL); - clk_disable(spi_imx->clk); - clk_put(spi_imx->clk); + clk_disable_unprepare(spi_imx->clk_per); + clk_disable_unprepare(spi_imx->clk_ipg); free_irq(spi_imx->irq, spi_imx); iounmap(spi_imx->base); -- cgit v0.10.2 From 13aaea03b9e33af420a327b7ab800332d6fbabf5 Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Wed, 7 Mar 2012 09:30:36 +0100 Subject: video imxfb: do not depend on grouped clocks the current i.MX clock support groups together unrelated clocks to a single clock which is then used by the driver. This can't be accomplished with the generic clock framework so we instead request the individual clocks in the driver. For i.MX there are generally three different clocks: ipg: bus clock (needed to access registers) ahb: dma relevant clock, sometimes referred to as hclk in the datasheet per: bit clock, pixel clock This patch changes the driver to request the individual clocks. Currently all clk_get will get the same clock until the SoCs are converted to the generic clock framework Signed-off-by: Sascha Hauer diff --git a/drivers/video/imxfb.c b/drivers/video/imxfb.c index f135dbe..caad368 100644 --- a/drivers/video/imxfb.c +++ b/drivers/video/imxfb.c @@ -131,7 +131,9 @@ struct imxfb_rgb { struct imxfb_info { struct platform_device *pdev; void __iomem *regs; - struct clk *clk; + struct clk *clk_ipg; + struct clk *clk_ahb; + struct clk *clk_per; /* * These are the addresses we mapped @@ -340,7 +342,7 @@ static int imxfb_check_var(struct fb_var_screeninfo *var, struct fb_info *info) pr_debug("var->bits_per_pixel=%d\n", var->bits_per_pixel); - lcd_clk = clk_get_rate(fbi->clk); + lcd_clk = clk_get_rate(fbi->clk_per); tmp = var->pixclock * (unsigned long long)lcd_clk; @@ -455,11 +457,17 @@ static int imxfb_bl_update_status(struct backlight_device *bl) fbi->pwmr = (fbi->pwmr & ~0xFF) | brightness; - if (bl->props.fb_blank != FB_BLANK_UNBLANK) - clk_enable(fbi->clk); + if (bl->props.fb_blank != FB_BLANK_UNBLANK) { + clk_prepare_enable(fbi->clk_ipg); + clk_prepare_enable(fbi->clk_ahb); + clk_prepare_enable(fbi->clk_per); + } writel(fbi->pwmr, fbi->regs + LCDC_PWMR); - if (bl->props.fb_blank != FB_BLANK_UNBLANK) - clk_disable(fbi->clk); + if (bl->props.fb_blank != FB_BLANK_UNBLANK) { + clk_disable_unprepare(fbi->clk_per); + clk_disable_unprepare(fbi->clk_ahb); + clk_disable_unprepare(fbi->clk_ipg); + } return 0; } @@ -522,7 +530,9 @@ static void imxfb_enable_controller(struct imxfb_info *fbi) */ writel(RMCR_LCDC_EN_MX1, fbi->regs + LCDC_RMCR); - clk_enable(fbi->clk); + clk_prepare_enable(fbi->clk_ipg); + clk_prepare_enable(fbi->clk_ahb); + clk_prepare_enable(fbi->clk_per); if (fbi->backlight_power) fbi->backlight_power(1); @@ -539,7 +549,9 @@ static void imxfb_disable_controller(struct imxfb_info *fbi) if (fbi->lcd_power) fbi->lcd_power(0); - clk_disable(fbi->clk); + clk_disable_unprepare(fbi->clk_per); + clk_disable_unprepare(fbi->clk_ipg); + clk_disable_unprepare(fbi->clk_ahb); writel(0, fbi->regs + LCDC_RMCR); } @@ -770,10 +782,21 @@ static int __init imxfb_probe(struct platform_device *pdev) goto failed_req; } - fbi->clk = clk_get(&pdev->dev, NULL); - if (IS_ERR(fbi->clk)) { - ret = PTR_ERR(fbi->clk); - dev_err(&pdev->dev, "unable to get clock: %d\n", ret); + fbi->clk_ipg = devm_clk_get(&pdev->dev, "ipg"); + if (IS_ERR(fbi->clk_ipg)) { + ret = PTR_ERR(fbi->clk_ipg); + goto failed_getclock; + } + + fbi->clk_ahb = devm_clk_get(&pdev->dev, "ahb"); + if (IS_ERR(fbi->clk_ahb)) { + ret = PTR_ERR(fbi->clk_ahb); + goto failed_getclock; + } + + fbi->clk_per = devm_clk_get(&pdev->dev, "per"); + if (IS_ERR(fbi->clk_per)) { + ret = PTR_ERR(fbi->clk_per); goto failed_getclock; } @@ -858,7 +881,6 @@ failed_platform_init: failed_map: iounmap(fbi->regs); failed_ioremap: - clk_put(fbi->clk); failed_getclock: release_mem_region(res->start, resource_size(res)); failed_req: @@ -895,8 +917,6 @@ static int __devexit imxfb_remove(struct platform_device *pdev) iounmap(fbi->regs); release_mem_region(res->start, resource_size(res)); - clk_disable(fbi->clk); - clk_put(fbi->clk); platform_set_drvdata(pdev, NULL); -- cgit v0.10.2 From f4d40de39a23f0c39cca55ac63e1175c69c3d2f7 Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Wed, 7 Mar 2012 09:30:49 +0100 Subject: net fec: do not depend on grouped clocks the current i.MX clock support groups together unrelated clocks to a single clock which is then used by the driver. This can't be accomplished with the generic clock framework so we instead request the individual clocks in the driver. For i.MX there are generally three different clocks: ipg: bus clock (needed to access registers) ahb: dma relevant clock, sometimes referred to as hclk in the datasheet per: bit clock, pixel clock This patch changes the driver to request the individual clocks. Currently all clk_get will get the same clock until the SoCs are converted to the generic clock framework Signed-off-by: Sascha Hauer diff --git a/drivers/net/ethernet/freescale/fec.c b/drivers/net/ethernet/freescale/fec.c index a12b3f5..b249463 100644 --- a/drivers/net/ethernet/freescale/fec.c +++ b/drivers/net/ethernet/freescale/fec.c @@ -206,7 +206,8 @@ struct fec_enet_private { struct net_device *netdev; - struct clk *clk; + struct clk *clk_ipg; + struct clk *clk_ahb; /* The saved address of a sent-in-place packet/buffer, for skfree(). */ unsigned char *tx_bounce[TX_RING_SIZE]; @@ -1064,7 +1065,7 @@ static int fec_enet_mii_init(struct platform_device *pdev) * Reference Manual has an error on this, and gets fixed on i.MX6Q * document. */ - fep->phy_speed = DIV_ROUND_UP(clk_get_rate(fep->clk), 5000000); + fep->phy_speed = DIV_ROUND_UP(clk_get_rate(fep->clk_ahb), 5000000); if (id_entry->driver_data & FEC_QUIRK_ENET_MAC) fep->phy_speed--; fep->phy_speed <<= 1; @@ -1609,12 +1610,20 @@ fec_probe(struct platform_device *pdev) } } - fep->clk = clk_get(&pdev->dev, NULL); - if (IS_ERR(fep->clk)) { - ret = PTR_ERR(fep->clk); + fep->clk_ipg = devm_clk_get(&pdev->dev, "ipg"); + if (IS_ERR(fep->clk_ipg)) { + ret = PTR_ERR(fep->clk_ipg); goto failed_clk; } - clk_prepare_enable(fep->clk); + + fep->clk_ahb = devm_clk_get(&pdev->dev, "ahb"); + if (IS_ERR(fep->clk_ahb)) { + ret = PTR_ERR(fep->clk_ahb); + goto failed_clk; + } + + clk_prepare_enable(fep->clk_ahb); + clk_prepare_enable(fep->clk_ipg); ret = fec_enet_init(ndev); if (ret) @@ -1637,8 +1646,8 @@ failed_register: fec_enet_mii_remove(fep); failed_mii_init: failed_init: - clk_disable_unprepare(fep->clk); - clk_put(fep->clk); + clk_disable_unprepare(fep->clk_ahb); + clk_disable_unprepare(fep->clk_ipg); failed_clk: for (i = 0; i < FEC_IRQ_NUM; i++) { irq = platform_get_irq(pdev, i); @@ -1670,8 +1679,8 @@ fec_drv_remove(struct platform_device *pdev) if (irq > 0) free_irq(irq, ndev); } - clk_disable_unprepare(fep->clk); - clk_put(fep->clk); + clk_disable_unprepare(fep->clk_ahb); + clk_disable_unprepare(fep->clk_ipg); iounmap(fep->hwp); free_netdev(ndev); @@ -1695,7 +1704,8 @@ fec_suspend(struct device *dev) fec_stop(ndev); netif_device_detach(ndev); } - clk_disable_unprepare(fep->clk); + clk_disable_unprepare(fep->clk_ahb); + clk_disable_unprepare(fep->clk_ipg); return 0; } @@ -1706,7 +1716,8 @@ fec_resume(struct device *dev) struct net_device *ndev = dev_get_drvdata(dev); struct fec_enet_private *fep = netdev_priv(ndev); - clk_prepare_enable(fep->clk); + clk_prepare_enable(fep->clk_ahb); + clk_prepare_enable(fep->clk_ipg); if (netif_running(ndev)) { fec_restart(ndev, fep->full_duplex); netif_device_attach(ndev); -- cgit v0.10.2 From 529aa29e033f3bcd3346de1532e4bd5ff969fd0d Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Wed, 7 Mar 2012 09:31:14 +0100 Subject: mmc mxcmmc: do not depend on grouped clocks the current i.MX clock support groups together unrelated clocks to a single clock which is then used by the driver. This can't be accomplished with the generic clock framework so we instead request the individual clocks in the driver. For i.MX there are generally three different clocks: ipg: bus clock (needed to access registers) ahb: dma relevant clock, sometimes referred to as hclk in the datasheet per: bit clock, pixel clock This patch changes the driver to request the individual clocks. Currently all clk_get will get the same clock until the SoCs are converted to the generic clock framework Signed-off-by: Sascha Hauer diff --git a/drivers/mmc/host/mxcmmc.c b/drivers/mmc/host/mxcmmc.c index b2058b4..28ed52d 100644 --- a/drivers/mmc/host/mxcmmc.c +++ b/drivers/mmc/host/mxcmmc.c @@ -136,7 +136,8 @@ struct mxcmci_host { u16 rev_no; unsigned int cmdat; - struct clk *clk; + struct clk *clk_ipg; + struct clk *clk_per; int clock; @@ -672,7 +673,7 @@ static void mxcmci_set_clk_rate(struct mxcmci_host *host, unsigned int clk_ios) { unsigned int divider; int prescaler = 0; - unsigned int clk_in = clk_get_rate(host->clk); + unsigned int clk_in = clk_get_rate(host->clk_per); while (prescaler <= 0x800) { for (divider = 1; divider <= 0xF; divider++) { @@ -900,12 +901,20 @@ static int mxcmci_probe(struct platform_device *pdev) host->res = r; host->irq = irq; - host->clk = clk_get(&pdev->dev, NULL); - if (IS_ERR(host->clk)) { - ret = PTR_ERR(host->clk); + host->clk_ipg = devm_clk_get(&pdev->dev, "ipg"); + if (IS_ERR(host->clk_ipg)) { + ret = PTR_ERR(host->clk_ipg); goto out_iounmap; } - clk_enable(host->clk); + + host->clk_per = devm_clk_get(&pdev->dev, "per"); + if (IS_ERR(host->clk_per)) { + ret = PTR_ERR(host->clk_per); + goto out_iounmap; + } + + clk_prepare_enable(host->clk_per); + clk_prepare_enable(host->clk_ipg); mxcmci_softreset(host); @@ -917,8 +926,8 @@ static int mxcmci_probe(struct platform_device *pdev) goto out_clk_put; } - mmc->f_min = clk_get_rate(host->clk) >> 16; - mmc->f_max = clk_get_rate(host->clk) >> 1; + mmc->f_min = clk_get_rate(host->clk_per) >> 16; + mmc->f_max = clk_get_rate(host->clk_per) >> 1; /* recommended in data sheet */ writew(0x2db4, host->base + MMC_REG_READ_TO); @@ -967,8 +976,8 @@ out_free_dma: if (host->dma) dma_release_channel(host->dma); out_clk_put: - clk_disable(host->clk); - clk_put(host->clk); + clk_disable_unprepare(host->clk_per); + clk_disable_unprepare(host->clk_ipg); out_iounmap: iounmap(host->base); out_free: @@ -999,8 +1008,8 @@ static int mxcmci_remove(struct platform_device *pdev) if (host->dma) dma_release_channel(host->dma); - clk_disable(host->clk); - clk_put(host->clk); + clk_disable_unprepare(host->clk_per); + clk_disable_unprepare(host->clk_ipg); release_mem_region(host->res->start, resource_size(host->res)); @@ -1018,7 +1027,8 @@ static int mxcmci_suspend(struct device *dev) if (mmc) ret = mmc_suspend_host(mmc); - clk_disable(host->clk); + clk_disable_unprepare(host->clk_per); + clk_disable_unprepare(host->clk_ipg); return ret; } @@ -1029,7 +1039,8 @@ static int mxcmci_resume(struct device *dev) struct mxcmci_host *host = mmc_priv(mmc); int ret = 0; - clk_enable(host->clk); + clk_prepare_enable(host->clk_per); + clk_prepare_enable(host->clk_ipg); if (mmc) ret = mmc_resume_host(mmc); -- cgit v0.10.2 From 52dac6150580f356a96b9a49715f6234fbf00d3a Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Wed, 7 Mar 2012 09:31:34 +0100 Subject: mmc sdhc i.MX: do not depend on grouped clocks the current i.MX clock support groups together unrelated clocks to a single clock which is then used by the driver. This can't be accomplished with the generic clock framework so we instead request the individual clocks in the driver. For i.MX there are generally three different clocks: ipg: bus clock (needed to access registers) ahb: dma relevant clock, sometimes referred to as hclk in the datasheet per: bit clock, pixel clock This patch changes the driver to request the individual clocks. Currently all clk_get will get the same clock until the SoCs are converted to the generic clock framework Signed-off-by: Sascha Hauer diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c b/drivers/mmc/host/sdhci-esdhc-imx.c index 8abdaf6..ce83d61 100644 --- a/drivers/mmc/host/sdhci-esdhc-imx.c +++ b/drivers/mmc/host/sdhci-esdhc-imx.c @@ -69,6 +69,9 @@ struct pltfm_imx_data { u32 scratchpad; enum imx_esdhc_type devtype; struct esdhc_platform_data boarddata; + struct clk *clk_ipg; + struct clk *clk_ahb; + struct clk *clk_per; }; static struct platform_device_id imx_esdhc_devtype[] = { @@ -437,7 +440,6 @@ static int __devinit sdhci_esdhc_imx_probe(struct platform_device *pdev) struct sdhci_pltfm_host *pltfm_host; struct sdhci_host *host; struct esdhc_platform_data *boarddata; - struct clk *clk; int err; struct pltfm_imx_data *imx_data; @@ -458,14 +460,29 @@ static int __devinit sdhci_esdhc_imx_probe(struct platform_device *pdev) imx_data->devtype = pdev->id_entry->driver_data; pltfm_host->priv = imx_data; - clk = clk_get(mmc_dev(host->mmc), NULL); - if (IS_ERR(clk)) { - dev_err(mmc_dev(host->mmc), "clk err\n"); - err = PTR_ERR(clk); + imx_data->clk_ipg = devm_clk_get(&pdev->dev, "ipg"); + if (IS_ERR(imx_data->clk_ipg)) { + err = PTR_ERR(imx_data->clk_ipg); goto err_clk_get; } - clk_prepare_enable(clk); - pltfm_host->clk = clk; + + imx_data->clk_ahb = devm_clk_get(&pdev->dev, "ahb"); + if (IS_ERR(imx_data->clk_ahb)) { + err = PTR_ERR(imx_data->clk_ahb); + goto err_clk_get; + } + + imx_data->clk_per = devm_clk_get(&pdev->dev, "per"); + if (IS_ERR(imx_data->clk_per)) { + err = PTR_ERR(imx_data->clk_per); + goto err_clk_get; + } + + pltfm_host->clk = imx_data->clk_per; + + clk_prepare_enable(imx_data->clk_per); + clk_prepare_enable(imx_data->clk_ipg); + clk_prepare_enable(imx_data->clk_ahb); host->quirks |= SDHCI_QUIRK_BROKEN_TIMEOUT_VAL; @@ -558,8 +575,9 @@ no_card_detect_irq: gpio_free(boarddata->wp_gpio); no_card_detect_pin: no_board_data: - clk_disable_unprepare(pltfm_host->clk); - clk_put(pltfm_host->clk); + clk_disable_unprepare(imx_data->clk_per); + clk_disable_unprepare(imx_data->clk_ipg); + clk_disable_unprepare(imx_data->clk_ahb); err_clk_get: kfree(imx_data); err_imx_data: @@ -585,8 +603,10 @@ static int __devexit sdhci_esdhc_imx_remove(struct platform_device *pdev) gpio_free(boarddata->cd_gpio); } - clk_disable_unprepare(pltfm_host->clk); - clk_put(pltfm_host->clk); + clk_disable_unprepare(imx_data->clk_per); + clk_disable_unprepare(imx_data->clk_ipg); + clk_disable_unprepare(imx_data->clk_ahb); + kfree(imx_data); sdhci_pltfm_free(pdev); -- cgit v0.10.2 From 3a9465fa2dc42a8ebc2fe9144f4dfa23d5899f85 Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Wed, 7 Mar 2012 09:31:43 +0100 Subject: serial i.MX: do not depend on grouped clocks the current i.MX clock support groups together unrelated clocks to a single clock which is then used by the driver. This can't be accomplished with the generic clock framework so we instead request the individual clocks in the driver. For i.MX there are generally three different clocks: ipg: bus clock (needed to access registers) ahb: dma relevant clock, sometimes referred to as hclk in the datasheet per: bit clock, pixel clock This patch changes the driver to request the individual clocks. Currently all clk_get will get the same clock until the SoCs are converted to the generic clock framework Signed-off-by: Sascha Hauer diff --git a/drivers/tty/serial/imx.c b/drivers/tty/serial/imx.c index e7fecee..267ec6d 100644 --- a/drivers/tty/serial/imx.c +++ b/drivers/tty/serial/imx.c @@ -204,7 +204,8 @@ struct imx_port { unsigned int irda_inv_rx:1; unsigned int irda_inv_tx:1; unsigned short trcv_delay; /* transceiver delay */ - struct clk *clk; + struct clk *clk_ipg; + struct clk *clk_per; struct imx_uart_data *devdata; }; @@ -672,7 +673,7 @@ static int imx_setup_ufcr(struct imx_port *sport, unsigned int mode) * RFDIV is set such way to satisfy requested uartclk value */ val = TXTL << 10 | RXTL; - ufcr_rfdiv = (clk_get_rate(sport->clk) + sport->port.uartclk / 2) + ufcr_rfdiv = (clk_get_rate(sport->clk_per) + sport->port.uartclk / 2) / sport->port.uartclk; if(!ufcr_rfdiv) @@ -1285,7 +1286,7 @@ imx_console_get_options(struct imx_port *sport, int *baud, else ucfr_rfdiv = 6 - ucfr_rfdiv; - uartclk = clk_get_rate(sport->clk); + uartclk = clk_get_rate(sport->clk_per); uartclk /= ucfr_rfdiv; { /* @@ -1503,14 +1504,22 @@ static int serial_imx_probe(struct platform_device *pdev) sport->timer.function = imx_timeout; sport->timer.data = (unsigned long)sport; - sport->clk = clk_get(&pdev->dev, "uart"); - if (IS_ERR(sport->clk)) { - ret = PTR_ERR(sport->clk); + sport->clk_ipg = devm_clk_get(&pdev->dev, "ipg"); + if (IS_ERR(sport->clk_ipg)) { + ret = PTR_ERR(sport->clk_ipg); goto unmap; } - clk_prepare_enable(sport->clk); - sport->port.uartclk = clk_get_rate(sport->clk); + sport->clk_per = devm_clk_get(&pdev->dev, "per"); + if (IS_ERR(sport->clk_per)) { + ret = PTR_ERR(sport->clk_per); + goto unmap; + } + + clk_prepare_enable(sport->clk_per); + clk_prepare_enable(sport->clk_ipg); + + sport->port.uartclk = clk_get_rate(sport->clk_per); imx_ports[sport->port.line] = sport; @@ -1531,8 +1540,8 @@ deinit: if (pdata && pdata->exit) pdata->exit(pdev); clkput: - clk_disable_unprepare(sport->clk); - clk_put(sport->clk); + clk_disable_unprepare(sport->clk_per); + clk_disable_unprepare(sport->clk_ipg); unmap: iounmap(sport->port.membase); free: @@ -1550,11 +1559,10 @@ static int serial_imx_remove(struct platform_device *pdev) platform_set_drvdata(pdev, NULL); - if (sport) { - uart_remove_one_port(&imx_reg, &sport->port); - clk_disable_unprepare(sport->clk); - clk_put(sport->clk); - } + uart_remove_one_port(&imx_reg, &sport->port); + + clk_disable_unprepare(sport->clk_per); + clk_disable_unprepare(sport->clk_ipg); if (pdata && pdata->exit) pdata->exit(pdev); -- cgit v0.10.2 From 97c3213fd9fc28c0e86b69df09f4228424cafecc Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Wed, 7 Mar 2012 20:56:35 +0100 Subject: mtd mxc_nand: prepare/unprepare clock Signed-off-by: Sascha Hauer diff --git a/drivers/mtd/nand/mxc_nand.c b/drivers/mtd/nand/mxc_nand.c index cc0678a..9e374e9 100644 --- a/drivers/mtd/nand/mxc_nand.c +++ b/drivers/mtd/nand/mxc_nand.c @@ -690,7 +690,7 @@ static void mxc_nand_select_chip(struct mtd_info *mtd, int chip) if (chip == -1) { /* Disable the NFC clock */ if (host->clk_act) { - clk_disable(host->clk); + clk_disable_unprepare(host->clk); host->clk_act = 0; } return; @@ -698,7 +698,7 @@ static void mxc_nand_select_chip(struct mtd_info *mtd, int chip) if (!host->clk_act) { /* Enable the NFC clock */ - clk_enable(host->clk); + clk_prepare_enable(host->clk); host->clk_act = 1; } @@ -1078,7 +1078,7 @@ static int __init mxcnd_probe(struct platform_device *pdev) goto eclk; } - clk_enable(host->clk); + clk_prepare_enable(host->clk); host->clk_act = 1; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); -- cgit v0.10.2 From 198ad2cecde16ce309a65f2fddd5f6d3442f8250 Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Wed, 7 Mar 2012 20:58:21 +0100 Subject: USB ehci mxc: prepare/unprepare clock Signed-off-by: Sascha Hauer diff --git a/drivers/usb/host/ehci-mxc.c b/drivers/usb/host/ehci-mxc.c index a797d51..ae16c34 100644 --- a/drivers/usb/host/ehci-mxc.c +++ b/drivers/usb/host/ehci-mxc.c @@ -171,7 +171,7 @@ static int ehci_mxc_drv_probe(struct platform_device *pdev) ret = PTR_ERR(priv->usbclk); goto err_clk; } - clk_enable(priv->usbclk); + clk_prepare_enable(priv->usbclk); if (!cpu_is_mx35() && !cpu_is_mx25()) { priv->ahbclk = clk_get(dev, "usb_ahb"); @@ -179,7 +179,7 @@ static int ehci_mxc_drv_probe(struct platform_device *pdev) ret = PTR_ERR(priv->ahbclk); goto err_clk_ahb; } - clk_enable(priv->ahbclk); + clk_prepare_enable(priv->ahbclk); } /* "dr" device has its own clock on i.MX51 */ @@ -189,7 +189,7 @@ static int ehci_mxc_drv_probe(struct platform_device *pdev) ret = PTR_ERR(priv->phy1clk); goto err_clk_phy; } - clk_enable(priv->phy1clk); + clk_prepare_enable(priv->phy1clk); } @@ -266,16 +266,16 @@ err_add: pdata->exit(pdev); err_init: if (priv->phy1clk) { - clk_disable(priv->phy1clk); + clk_disable_unprepare(priv->phy1clk); clk_put(priv->phy1clk); } err_clk_phy: if (priv->ahbclk) { - clk_disable(priv->ahbclk); + clk_disable_unprepare(priv->ahbclk); clk_put(priv->ahbclk); } err_clk_ahb: - clk_disable(priv->usbclk); + clk_disable_unprepare(priv->usbclk); clk_put(priv->usbclk); err_clk: iounmap(hcd->regs); @@ -307,14 +307,14 @@ static int __exit ehci_mxc_drv_remove(struct platform_device *pdev) usb_put_hcd(hcd); platform_set_drvdata(pdev, NULL); - clk_disable(priv->usbclk); + clk_disable_unprepare(priv->usbclk); clk_put(priv->usbclk); if (priv->ahbclk) { - clk_disable(priv->ahbclk); + clk_disable_unprepare(priv->ahbclk); clk_put(priv->ahbclk); } if (priv->phy1clk) { - clk_disable(priv->phy1clk); + clk_disable_unprepare(priv->phy1clk); clk_put(priv->phy1clk); } -- cgit v0.10.2 From c943740ccd7ccfc7e92c80d194d0a8a80ab7b55c Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Wed, 25 Apr 2012 16:39:06 +0200 Subject: USB ehci mxc: sanitize clock handling Every i.MX ehci controller has a ahb and a ipg clock, so request it on every SoC. Do not make a special case for the usb phy clock of the i.MX51. Just request it but make it optional. Signed-off-by: Sascha Hauer diff --git a/drivers/usb/host/ehci-mxc.c b/drivers/usb/host/ehci-mxc.c index ae16c34..c778ffe 100644 --- a/drivers/usb/host/ehci-mxc.c +++ b/drivers/usb/host/ehci-mxc.c @@ -32,7 +32,7 @@ #define ULPI_VIEWPORT_OFFSET 0x170 struct ehci_mxc_priv { - struct clk *usbclk, *ahbclk, *phy1clk; + struct clk *usbclk, *ahbclk, *phyclk; struct usb_hcd *hcd; }; @@ -166,31 +166,26 @@ static int ehci_mxc_drv_probe(struct platform_device *pdev) } /* enable clocks */ - priv->usbclk = clk_get(dev, "usb"); + priv->usbclk = clk_get(dev, "ipg"); if (IS_ERR(priv->usbclk)) { ret = PTR_ERR(priv->usbclk); goto err_clk; } clk_prepare_enable(priv->usbclk); - if (!cpu_is_mx35() && !cpu_is_mx25()) { - priv->ahbclk = clk_get(dev, "usb_ahb"); - if (IS_ERR(priv->ahbclk)) { - ret = PTR_ERR(priv->ahbclk); - goto err_clk_ahb; - } - clk_prepare_enable(priv->ahbclk); + priv->ahbclk = clk_get(dev, "ahb"); + if (IS_ERR(priv->ahbclk)) { + ret = PTR_ERR(priv->ahbclk); + goto err_clk_ahb; } + clk_prepare_enable(priv->ahbclk); /* "dr" device has its own clock on i.MX51 */ - if (cpu_is_mx51() && (pdev->id == 0)) { - priv->phy1clk = clk_get(dev, "usb_phy1"); - if (IS_ERR(priv->phy1clk)) { - ret = PTR_ERR(priv->phy1clk); - goto err_clk_phy; - } - clk_prepare_enable(priv->phy1clk); - } + priv->phyclk = clk_get(dev, "phy"); + if (IS_ERR(priv->phyclk)) + priv->phyclk = NULL; + if (priv->phyclk) + clk_prepare_enable(priv->phyclk); /* call platform specific init function */ @@ -265,15 +260,13 @@ err_add: if (pdata && pdata->exit) pdata->exit(pdev); err_init: - if (priv->phy1clk) { - clk_disable_unprepare(priv->phy1clk); - clk_put(priv->phy1clk); - } -err_clk_phy: - if (priv->ahbclk) { - clk_disable_unprepare(priv->ahbclk); - clk_put(priv->ahbclk); + if (priv->phyclk) { + clk_disable_unprepare(priv->phyclk); + clk_put(priv->phyclk); } + + clk_disable_unprepare(priv->ahbclk); + clk_put(priv->ahbclk); err_clk_ahb: clk_disable_unprepare(priv->usbclk); clk_put(priv->usbclk); @@ -309,13 +302,12 @@ static int __exit ehci_mxc_drv_remove(struct platform_device *pdev) clk_disable_unprepare(priv->usbclk); clk_put(priv->usbclk); - if (priv->ahbclk) { - clk_disable_unprepare(priv->ahbclk); - clk_put(priv->ahbclk); - } - if (priv->phy1clk) { - clk_disable_unprepare(priv->phy1clk); - clk_put(priv->phy1clk); + clk_disable_unprepare(priv->ahbclk); + clk_put(priv->ahbclk); + + if (priv->phyclk) { + clk_disable_unprepare(priv->phyclk); + clk_put(priv->phyclk); } kfree(priv); -- cgit v0.10.2 From 60178b6329259457013d0e39cbee22aec0230dfb Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Wed, 7 Mar 2012 20:59:36 +0100 Subject: w1 i.MX: prepare/unprepare clock Signed-off-by: Sascha Hauer diff --git a/drivers/w1/masters/mxc_w1.c b/drivers/w1/masters/mxc_w1.c index a3b6a74..1cc61a7 100644 --- a/drivers/w1/masters/mxc_w1.c +++ b/drivers/w1/masters/mxc_w1.c @@ -138,7 +138,7 @@ static int __devinit mxc_w1_probe(struct platform_device *pdev) goto failed_ioremap; } - clk_enable(mdev->clk); + clk_prepare_enable(mdev->clk); __raw_writeb(mdev->clkdiv, mdev->regs + MXC_W1_TIME_DIVIDER); mdev->bus_master.data = mdev; @@ -178,7 +178,7 @@ static int __devexit mxc_w1_remove(struct platform_device *pdev) iounmap(mdev->regs); release_mem_region(res->start, resource_size(res)); - clk_disable(mdev->clk); + clk_disable_unprepare(mdev->clk); clk_put(mdev->clk); platform_set_drvdata(pdev, NULL); -- cgit v0.10.2 From 4e7b6c9a6b4700cf121a0d5924f193db83cbd008 Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Tue, 3 Apr 2012 12:34:57 +0200 Subject: watchdog imx2: prepare clk before enabling it Signed-off-by: Sascha Hauer diff --git a/drivers/watchdog/imx2_wdt.c b/drivers/watchdog/imx2_wdt.c index 7a2b734..bcfab2b 100644 --- a/drivers/watchdog/imx2_wdt.c +++ b/drivers/watchdog/imx2_wdt.c @@ -121,7 +121,7 @@ static void imx2_wdt_start(void) { if (!test_and_set_bit(IMX2_WDT_STATUS_STARTED, &imx2_wdt.status)) { /* at our first start we enable clock and do initialisations */ - clk_enable(imx2_wdt.clk); + clk_prepare_enable(imx2_wdt.clk); imx2_wdt_setup(); } else /* delete the timer that pings the watchdog after close */ -- cgit v0.10.2 From fdf7748b9f8d392a086560616bf112f0ba0c1f71 Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Tue, 3 Apr 2012 12:35:16 +0200 Subject: media mx3 camera: prepare clk before enabling it Signed-off-by: Sascha Hauer diff --git a/drivers/media/video/mx3_camera.c b/drivers/media/video/mx3_camera.c index 93c35ef..e2e33df 100644 --- a/drivers/media/video/mx3_camera.c +++ b/drivers/media/video/mx3_camera.c @@ -508,7 +508,7 @@ static void mx3_camera_activate(struct mx3_camera_dev *mx3_cam, /* ipu_csi_init_interface() */ csi_reg_write(mx3_cam, conf, CSI_SENS_CONF); - clk_enable(mx3_cam->clk); + clk_prepare_enable(mx3_cam->clk); rate = clk_round_rate(mx3_cam->clk, mx3_cam->mclk); dev_dbg(icd->parent, "Set SENS_CONF to %x, rate %ld\n", conf, rate); if (rate) @@ -549,7 +549,7 @@ static void mx3_camera_remove_device(struct soc_camera_device *icd) *ichan = NULL; } - clk_disable(mx3_cam->clk); + clk_disable_unprepare(mx3_cam->clk); mx3_cam->icd = NULL; -- cgit v0.10.2 From 4fa030a43ddb0d8fe3f2530d6162c11a3b3d31de Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Sun, 18 Mar 2012 23:48:13 +0100 Subject: dmaengine i.MX ipu: clk_prepare/unprepare clock Signed-off-by: Sascha Hauer diff --git a/drivers/dma/ipu/ipu_idmac.c b/drivers/dma/ipu/ipu_idmac.c index 62e3f8e..5ec7204 100644 --- a/drivers/dma/ipu/ipu_idmac.c +++ b/drivers/dma/ipu/ipu_idmac.c @@ -1715,7 +1715,7 @@ static int __init ipu_probe(struct platform_device *pdev) } /* Make sure IPU HSP clock is running */ - clk_enable(ipu_data.ipu_clk); + clk_prepare_enable(ipu_data.ipu_clk); /* Disable all interrupts */ idmac_write_ipureg(&ipu_data, 0, IPU_INT_CTRL_1); @@ -1747,7 +1747,7 @@ static int __init ipu_probe(struct platform_device *pdev) err_idmac_init: err_attach_irq: ipu_irq_detach_irq(&ipu_data, pdev); - clk_disable(ipu_data.ipu_clk); + clk_disable_unprepare(ipu_data.ipu_clk); clk_put(ipu_data.ipu_clk); err_clk_get: iounmap(ipu_data.reg_ic); @@ -1765,7 +1765,7 @@ static int __exit ipu_remove(struct platform_device *pdev) ipu_idmac_exit(ipu); ipu_irq_detach_irq(ipu, pdev); - clk_disable(ipu->ipu_clk); + clk_disable_unprepare(ipu->ipu_clk); clk_put(ipu->ipu_clk); iounmap(ipu->reg_ic); iounmap(ipu->reg_ipu); -- cgit v0.10.2 From 4ec8c7f52654cdbb13770d04dc76f9a4615cd4e1 Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Wed, 25 Apr 2012 16:35:16 +0200 Subject: rtc: imx dryice: Add missing clk_prepare prepare the clock before enabling it. Signed-off-by: Sascha Hauer diff --git a/drivers/rtc/rtc-imxdi.c b/drivers/rtc/rtc-imxdi.c index d93a960..891cd6c 100644 --- a/drivers/rtc/rtc-imxdi.c +++ b/drivers/rtc/rtc-imxdi.c @@ -405,7 +405,7 @@ static int dryice_rtc_probe(struct platform_device *pdev) imxdi->clk = clk_get(&pdev->dev, NULL); if (IS_ERR(imxdi->clk)) return PTR_ERR(imxdi->clk); - clk_enable(imxdi->clk); + clk_prepare_enable(imxdi->clk); /* * Initialize dryice hardware @@ -470,7 +470,7 @@ static int dryice_rtc_probe(struct platform_device *pdev) return 0; err: - clk_disable(imxdi->clk); + clk_disable_unprepare(imxdi->clk); clk_put(imxdi->clk); return rc; @@ -487,7 +487,7 @@ static int __devexit dryice_rtc_remove(struct platform_device *pdev) rtc_device_unregister(imxdi->rtc); - clk_disable(imxdi->clk); + clk_disable_unprepare(imxdi->clk); clk_put(imxdi->clk); return 0; -- cgit v0.10.2 From 096c19c37b43ee8ce04d4f27022f899ef4133fbb Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Thu, 5 Apr 2012 15:05:09 +0200 Subject: ARM i.MX5: prepare gpc_dvfs_clk Signed-off-by: Sascha Hauer diff --git a/arch/arm/mach-imx/mm-imx5.c b/arch/arm/mach-imx/mm-imx5.c index e10f391..68341cf 100644 --- a/arch/arm/mach-imx/mm-imx5.c +++ b/arch/arm/mach-imx/mm-imx5.c @@ -32,6 +32,7 @@ static void imx5_idle(void) gpc_dvfs_clk = clk_get(NULL, "gpc_dvfs"); if (IS_ERR(gpc_dvfs_clk)) return; + clk_prepare(gpc_dvfs_clk); } clk_enable(gpc_dvfs_clk); mx5_cpu_lp_set(WAIT_UNCLOCKED_POWER_OFF); -- cgit v0.10.2 From 821dc4dfd955da0679872088025542a0795c6b3e Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Fri, 9 Mar 2012 09:29:27 +0100 Subject: ARM i.MX timer: request correct clock We used to pass the timer clock directly to mxc_timer_init. We should instead request the correct clock. This is an intermediate step: For now we request the clock in the timer code when NULL is passed as clock. Also, the gpt on some i.MX have an additional ipg clock which can be gated. Request and enable this. Signed-off-by: Sascha Hauer diff --git a/arch/arm/plat-mxc/time.c b/arch/arm/plat-mxc/time.c index 7daf7c9..99f958c 100644 --- a/arch/arm/plat-mxc/time.c +++ b/arch/arm/plat-mxc/time.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -282,6 +283,19 @@ static int __init mxc_clockevent_init(struct clk *timer_clk) void __init mxc_timer_init(struct clk *timer_clk, void __iomem *base, int irq) { uint32_t tctl_val; + struct clk *timer_ipg_clk; + + if (!timer_clk) { + timer_clk = clk_get_sys("imx-gpt.0", "per"); + if (IS_ERR(timer_clk)) { + pr_err("i.MX timer: unable to get clk\n"); + return; + } + + timer_ipg_clk = clk_get_sys("imx-gpt.0", "ipg"); + if (!IS_ERR(timer_ipg_clk)) + clk_prepare_enable(timer_ipg_clk); + } clk_prepare_enable(timer_clk); -- cgit v0.10.2 From eb92044eb3d59d29c9812e85e3a4bf41f6f38e3a Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Tue, 3 Apr 2012 12:42:27 +0200 Subject: ARM i.MX3: Make ccm base address a variable Instead of having a cpu_is_* in each ccm register access it is more efficient to make it a variable. Signed-off-by: Sascha Hauer diff --git a/arch/arm/mach-imx/crmregs-imx3.h b/arch/arm/mach-imx/crmregs-imx3.h index 5314127..a1dfde5 100644 --- a/arch/arm/mach-imx/crmregs-imx3.h +++ b/arch/arm/mach-imx/crmregs-imx3.h @@ -24,48 +24,47 @@ #define CKIH_CLK_FREQ_27MHZ 27000000 #define CKIL_CLK_FREQ 32768 -#define MXC_CCM_BASE (cpu_is_mx31() ? \ -MX31_IO_ADDRESS(MX31_CCM_BASE_ADDR) : MX35_IO_ADDRESS(MX35_CCM_BASE_ADDR)) +extern void __iomem *mx3_ccm_base; /* Register addresses */ -#define MXC_CCM_CCMR (MXC_CCM_BASE + 0x00) -#define MXC_CCM_PDR0 (MXC_CCM_BASE + 0x04) -#define MXC_CCM_PDR1 (MXC_CCM_BASE + 0x08) -#define MX35_CCM_PDR2 (MXC_CCM_BASE + 0x0C) -#define MXC_CCM_RCSR (MXC_CCM_BASE + 0x0C) -#define MX35_CCM_PDR3 (MXC_CCM_BASE + 0x10) -#define MXC_CCM_MPCTL (MXC_CCM_BASE + 0x10) -#define MX35_CCM_PDR4 (MXC_CCM_BASE + 0x14) -#define MXC_CCM_UPCTL (MXC_CCM_BASE + 0x14) -#define MX35_CCM_RCSR (MXC_CCM_BASE + 0x18) -#define MXC_CCM_SRPCTL (MXC_CCM_BASE + 0x18) -#define MX35_CCM_MPCTL (MXC_CCM_BASE + 0x1C) -#define MXC_CCM_COSR (MXC_CCM_BASE + 0x1C) -#define MX35_CCM_PPCTL (MXC_CCM_BASE + 0x20) -#define MXC_CCM_CGR0 (MXC_CCM_BASE + 0x20) -#define MX35_CCM_ACMR (MXC_CCM_BASE + 0x24) -#define MXC_CCM_CGR1 (MXC_CCM_BASE + 0x24) -#define MX35_CCM_COSR (MXC_CCM_BASE + 0x28) -#define MXC_CCM_CGR2 (MXC_CCM_BASE + 0x28) -#define MX35_CCM_CGR0 (MXC_CCM_BASE + 0x2C) -#define MXC_CCM_WIMR (MXC_CCM_BASE + 0x2C) -#define MX35_CCM_CGR1 (MXC_CCM_BASE + 0x30) -#define MXC_CCM_LDC (MXC_CCM_BASE + 0x30) -#define MX35_CCM_CGR2 (MXC_CCM_BASE + 0x34) -#define MXC_CCM_DCVR0 (MXC_CCM_BASE + 0x34) -#define MX35_CCM_CGR3 (MXC_CCM_BASE + 0x38) -#define MXC_CCM_DCVR1 (MXC_CCM_BASE + 0x38) -#define MXC_CCM_DCVR2 (MXC_CCM_BASE + 0x3C) -#define MXC_CCM_DCVR3 (MXC_CCM_BASE + 0x40) -#define MXC_CCM_LTR0 (MXC_CCM_BASE + 0x44) -#define MXC_CCM_LTR1 (MXC_CCM_BASE + 0x48) -#define MXC_CCM_LTR2 (MXC_CCM_BASE + 0x4C) -#define MXC_CCM_LTR3 (MXC_CCM_BASE + 0x50) -#define MXC_CCM_LTBR0 (MXC_CCM_BASE + 0x54) -#define MXC_CCM_LTBR1 (MXC_CCM_BASE + 0x58) -#define MXC_CCM_PMCR0 (MXC_CCM_BASE + 0x5C) -#define MXC_CCM_PMCR1 (MXC_CCM_BASE + 0x60) -#define MXC_CCM_PDR2 (MXC_CCM_BASE + 0x64) +#define MXC_CCM_CCMR 0x00 +#define MXC_CCM_PDR0 0x04 +#define MXC_CCM_PDR1 0x08 +#define MX35_CCM_PDR2 0x0C +#define MXC_CCM_RCSR 0x0C +#define MX35_CCM_PDR3 0x10 +#define MXC_CCM_MPCTL 0x10 +#define MX35_CCM_PDR4 0x14 +#define MXC_CCM_UPCTL 0x14 +#define MX35_CCM_RCSR 0x18 +#define MXC_CCM_SRPCTL 0x18 +#define MX35_CCM_MPCTL 0x1C +#define MXC_CCM_COSR 0x1C +#define MX35_CCM_PPCTL 0x20 +#define MXC_CCM_CGR0 0x20 +#define MX35_CCM_ACMR 0x24 +#define MXC_CCM_CGR1 0x24 +#define MX35_CCM_COSR 0x28 +#define MXC_CCM_CGR2 0x28 +#define MX35_CCM_CGR0 0x2C +#define MXC_CCM_WIMR 0x2C +#define MX35_CCM_CGR1 0x30 +#define MXC_CCM_LDC 0x30 +#define MX35_CCM_CGR2 0x34 +#define MXC_CCM_DCVR0 0x34 +#define MX35_CCM_CGR3 0x38 +#define MXC_CCM_DCVR1 0x38 +#define MXC_CCM_DCVR2 0x3C +#define MXC_CCM_DCVR3 0x40 +#define MXC_CCM_LTR0 0x44 +#define MXC_CCM_LTR1 0x48 +#define MXC_CCM_LTR2 0x4C +#define MXC_CCM_LTR3 0x50 +#define MXC_CCM_LTBR0 0x54 +#define MXC_CCM_LTBR1 0x58 +#define MXC_CCM_PMCR0 0x5C +#define MXC_CCM_PMCR1 0x60 +#define MXC_CCM_PDR2 0x64 /* Register bit definitions */ #define MXC_CCM_CCMR_WBEN (1 << 27) diff --git a/arch/arm/mach-imx/mm-imx3.c b/arch/arm/mach-imx/mm-imx3.c index 7412738..57b39f8 100644 --- a/arch/arm/mach-imx/mm-imx3.c +++ b/arch/arm/mach-imx/mm-imx3.c @@ -31,6 +31,10 @@ #include #include +#include "crmregs-imx3.h" + +void __iomem *mx3_ccm_base; + static void imx3_idle(void) { unsigned long reg = 0; @@ -137,6 +141,7 @@ void __init imx31_init_early(void) mxc_arch_reset_init(MX31_IO_ADDRESS(MX31_WDOG_BASE_ADDR)); arch_ioremap_caller = imx3_ioremap_caller; arm_pm_idle = imx3_idle; + mx3_ccm_base = MX31_IO_ADDRESS(MX31_CCM_BASE_ADDR); } void __init mx31_init_irq(void) @@ -210,6 +215,7 @@ void __init imx35_init_early(void) mxc_arch_reset_init(MX35_IO_ADDRESS(MX35_WDOG_BASE_ADDR)); arm_pm_idle = imx3_idle; arch_ioremap_caller = imx3_ioremap_caller; + mx3_ccm_base = MX35_IO_ADDRESS(MX35_CCM_BASE_ADDR); } void __init mx35_init_irq(void) diff --git a/arch/arm/mach-imx/pm-imx3.c b/arch/arm/mach-imx/pm-imx3.c index b375243..822103b 100644 --- a/arch/arm/mach-imx/pm-imx3.c +++ b/arch/arm/mach-imx/pm-imx3.c @@ -21,14 +21,14 @@ */ void mx3_cpu_lp_set(enum mx3_cpu_pwr_mode mode) { - int reg = __raw_readl(MXC_CCM_CCMR); + int reg = __raw_readl(mx3_ccm_base + MXC_CCM_CCMR); reg &= ~MXC_CCM_CCMR_LPM_MASK; switch (mode) { case MX3_WAIT: if (cpu_is_mx35()) reg |= MXC_CCM_CCMR_LPM_WAIT_MX35; - __raw_writel(reg, MXC_CCM_CCMR); + __raw_writel(reg, mx3_ccm_base + MXC_CCM_CCMR); break; default: pr_err("Unknown cpu power mode: %d\n", mode); -- cgit v0.10.2 From 6c7b06850c5a1615cc9e660e0d24ce2025bb9bcf Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Wed, 7 Mar 2012 21:01:28 +0100 Subject: ARM i.MX: prepare for common clock framework - Add necessary #ifdefs for CONFIG_COMMON_CLOCK - Add a global spinlock to protect the CCM registers Signed-off-by: Sascha Hauer diff --git a/arch/arm/mach-imx/clk.h b/arch/arm/mach-imx/clk.h new file mode 100644 index 0000000..00f2590 --- /dev/null +++ b/arch/arm/mach-imx/clk.h @@ -0,0 +1,44 @@ +#ifndef __MACH_IMX_CLK_H +#define __MACH_IMX_CLK_H + +#include +#include +#include + +struct clk *imx_clk_pllv1(const char *name, char *parent, + void __iomem *base); + +static inline struct clk *imx_clk_fixed(const char *name, int rate) +{ + return clk_register_fixed_rate(NULL, name, NULL, CLK_IS_ROOT, rate); +} + +static inline struct clk *imx_clk_divider(const char *name, const char *parent, + void __iomem *reg, u8 shift, u8 width) +{ + return clk_register_divider(NULL, name, parent, CLK_SET_RATE_PARENT, + reg, shift, width, 0, &imx_ccm_lock); +} + +static inline struct clk *imx_clk_gate(const char *name, const char *parent, + void __iomem *reg, u8 shift) +{ + return clk_register_gate(NULL, name, parent, CLK_SET_RATE_PARENT, reg, + shift, 0, &imx_ccm_lock); +} + +static inline struct clk *imx_clk_mux(const char *name, void __iomem *reg, + u8 shift, u8 width, const char **parents, int num_parents) +{ + return clk_register_mux(NULL, name, parents, num_parents, 0, reg, shift, + width, 0, &imx_ccm_lock); +} + +static inline struct clk *imx_clk_fixed_factor(const char *name, + const char *parent, unsigned int mult, unsigned int div) +{ + return clk_register_fixed_factor(NULL, name, parent, + CLK_SET_RATE_PARENT, mult, div); +} + +#endif diff --git a/arch/arm/plat-mxc/clock.c b/arch/arm/plat-mxc/clock.c index 2ed3ab1..5079787 100644 --- a/arch/arm/plat-mxc/clock.c +++ b/arch/arm/plat-mxc/clock.c @@ -41,6 +41,7 @@ #include #include +#ifndef CONFIG_COMMON_CLK static LIST_HEAD(clocks); static DEFINE_MUTEX(clocks_mutex); @@ -200,6 +201,16 @@ struct clk *clk_get_parent(struct clk *clk) } EXPORT_SYMBOL(clk_get_parent); +#else + +/* + * Lock to protect the clock module (ccm) registers. Used + * on all i.MXs + */ +DEFINE_SPINLOCK(imx_ccm_lock); + +#endif /* CONFIG_COMMON_CLK */ + /* * Get the resulting clock rate from a PLL register value and the input * frequency. PLLs with this register layout can at least be found on diff --git a/arch/arm/plat-mxc/include/mach/clock.h b/arch/arm/plat-mxc/include/mach/clock.h index 753a598..bd940c7 100644 --- a/arch/arm/plat-mxc/include/mach/clock.h +++ b/arch/arm/plat-mxc/include/mach/clock.h @@ -23,6 +23,7 @@ #ifndef __ASSEMBLY__ #include +#ifndef CONFIG_COMMON_CLK struct module; struct clk { @@ -59,6 +60,9 @@ struct clk { int clk_register(struct clk *clk); void clk_unregister(struct clk *clk); +#endif /* CONFIG_COMMON_CLK */ + +extern spinlock_t imx_ccm_lock; unsigned long mxc_decode_pll(unsigned int pll, u32 f_ref); -- cgit v0.10.2 From 8703612b0abb33e6daacc0f6b709a006ac85b285 Mon Sep 17 00:00:00 2001 From: Stephen Warren Date: Fri, 30 Mar 2012 15:50:44 -0600 Subject: ARM: tegra: provide clock aliases for AHUB configlink The Tegra30 AHUB driver must call tegra_periph_reset_deassert() for all devices on the AHUB's configlink bus. The AHUB driver must be able to call clk_get_sys() to retrieve the clock parameter for this function. Add the necessary clock aliases to allow this. Signed-off-by: Stephen Warren Acked-by: Olof Johansson diff --git a/arch/arm/mach-tegra/tegra30_clocks.c b/arch/arm/mach-tegra/tegra30_clocks.c index 6d08b53..e33fe4b 100644 --- a/arch/arm/mach-tegra/tegra30_clocks.c +++ b/arch/arm/mach-tegra/tegra30_clocks.c @@ -3015,6 +3015,15 @@ struct clk_duplicate tegra_clk_duplicates[] = { CLK_DUPLICATE("sbc6", "spi_slave_tegra.5", NULL), CLK_DUPLICATE("twd", "smp_twd", NULL), CLK_DUPLICATE("vcp", "nvavp", "vcp"), + CLK_DUPLICATE("i2s0", NULL, "i2s0"), + CLK_DUPLICATE("i2s1", NULL, "i2s1"), + CLK_DUPLICATE("i2s2", NULL, "i2s2"), + CLK_DUPLICATE("i2s3", NULL, "i2s3"), + CLK_DUPLICATE("i2s4", NULL, "i2s4"), + CLK_DUPLICATE("dam0", NULL, "dam0"), + CLK_DUPLICATE("dam1", NULL, "dam1"), + CLK_DUPLICATE("dam2", NULL, "dam2"), + CLK_DUPLICATE("spdif_in", NULL, "spdif_in"), }; struct clk *tegra_ptr_clks[] = { -- cgit v0.10.2 From 6437626928467e81aa4a3087d88cd3f443b3e9ec Mon Sep 17 00:00:00 2001 From: Peter De Schrijver Date: Mon, 23 Apr 2012 01:31:49 -0700 Subject: ARM: tegra: Initialize pll_p_out1 pll_a uses pll_p_out1 as its parent. Therefore this clock needs to be initialized to make sure pll_a has a known input clock. Failure to do so will cause the system to crash early in the bootup. Signed-off-by: Peter De Schrijver Signed-off-by: Stephen Warren diff --git a/arch/arm/mach-tegra/common.c b/arch/arm/mach-tegra/common.c index 22df10f..1f76233 100644 --- a/arch/arm/mach-tegra/common.c +++ b/arch/arm/mach-tegra/common.c @@ -93,6 +93,17 @@ static __initdata struct tegra_clk_init_table tegra20_clk_init_table[] = { }; #endif +#ifdef CONFIG_ARCH_TEGRA_3x_SOC +static __initdata struct tegra_clk_init_table tegra30_clk_init_table[] = { + /* name parent rate enabled */ + { "clk_m", NULL, 0, true }, + { "pll_p", "clk_m", 408000000, true }, + { "pll_p_out1", "pll_p", 9600000, true }, + { NULL, NULL, 0, 0}, +}; +#endif + + static void __init tegra_init_cache(u32 tag_latency, u32 data_latency) { #ifdef CONFIG_CACHE_L2X0 @@ -127,6 +138,7 @@ void __init tegra30_init_early(void) { tegra_init_fuse(); tegra30_init_clocks(); + tegra_clk_init_from_table(tegra30_clk_init_table); tegra_init_cache(0x441, 0x551); tegra_pmc_init(); tegra_powergate_init(); -- cgit v0.10.2 From 18b81fb733356025a6ad48b85092a0456e348ff0 Mon Sep 17 00:00:00 2001 From: Stephen Warren Date: Mon, 26 Mar 2012 16:49:39 -0600 Subject: ARM: tegra: set up audio clocks for tegra30 dt Set up the audio clock tree for Tegra30 in an equivalent fashion to the existing setup for Tegra20. Signed-off-by: Stephen Warren Acked-by: Olof Johansson diff --git a/arch/arm/mach-tegra/board-dt-tegra30.c b/arch/arm/mach-tegra/board-dt-tegra30.c index 5f7c03e..3de21c0 100644 --- a/arch/arm/mach-tegra/board-dt-tegra30.c +++ b/arch/arm/mach-tegra/board-dt-tegra30.c @@ -57,6 +57,15 @@ struct of_dev_auxdata tegra30_auxdata_lookup[] __initdata = { static __initdata struct tegra_clk_init_table tegra_dt_clk_init_table[] = { /* name parent rate enabled */ { "uarta", "pll_p", 408000000, true }, + { "pll_a", "pll_p_out1", 564480000, true }, + { "pll_a_out0", "pll_a", 11289600, true }, + { "extern1", "pll_a_out0", 0, true }, + { "clk_out_1", "extern1", 0, true }, + { "i2s0", "pll_a_out0", 11289600, false}, + { "i2s1", "pll_a_out0", 11289600, false}, + { "i2s2", "pll_a_out0", 11289600, false}, + { "i2s3", "pll_a_out0", 11289600, false}, + { "i2s4", "pll_a_out0", 11289600, false}, { NULL, NULL, 0, 0}, }; -- cgit v0.10.2 From 5657d98deadb5e245bf5608cfb06e86898032125 Mon Sep 17 00:00:00 2001 From: Stephen Warren Date: Tue, 27 Mar 2012 11:37:47 -0600 Subject: ARM: tegra: add AUXDATA required for audio Both the Tegra30 I2S and AHUB modules used clocks, and hence currently require AUXDATA in order to get specific device names so that clock lookups work. Signed-off-by: Stephen Warren Acked-by: Olof Johansson diff --git a/arch/arm/mach-tegra/board-dt-tegra30.c b/arch/arm/mach-tegra/board-dt-tegra30.c index 3de21c0..d96dae0 100644 --- a/arch/arm/mach-tegra/board-dt-tegra30.c +++ b/arch/arm/mach-tegra/board-dt-tegra30.c @@ -51,6 +51,7 @@ struct of_dev_auxdata tegra30_auxdata_lookup[] __initdata = { OF_DEV_AUXDATA("nvidia,tegra20-i2c", 0x7000C500, "tegra-i2c.2", NULL), OF_DEV_AUXDATA("nvidia,tegra20-i2c", 0x7000C700, "tegra-i2c.3", NULL), OF_DEV_AUXDATA("nvidia,tegra20-i2c", 0x7000D000, "tegra-i2c.4", NULL), + OF_DEV_AUXDATA("nvidia,tegra30-ahub", 0x70080000, "tegra30-ahub", NULL), {} }; -- cgit v0.10.2 From 9ee6a5c4f4cd2b0a732502c5f11b3ee8f13fac76 Mon Sep 17 00:00:00 2001 From: Stephen Warren Date: Tue, 27 Mar 2012 12:40:53 -0600 Subject: ARM: dt: tegra30.dtsi: Add audio-related nodes Add nodes for the Tegra30 AHUB and I2S controllers. Signed-off-by: Stephen Warren Acked-by: Olof Johansson diff --git a/arch/arm/boot/dts/tegra30.dtsi b/arch/arm/boot/dts/tegra30.dtsi index 62a7b39..15200a9 100644 --- a/arch/arm/boot/dts/tegra30.dtsi +++ b/arch/arm/boot/dts/tegra30.dtsi @@ -183,4 +183,45 @@ reg = < 0x70000868 0xd0 /* Pad control registers */ 0x70003000 0x3e0 >; /* Mux registers */ }; + + ahub { + compatible = "nvidia,tegra30-ahub"; + reg = <0x70080000 0x200 0x70080200 0x100>; + interrupts = < 0 103 0x04 >; + nvidia,dma-request-selector = <&apbdma 1>; + + ranges; + #address-cells = <1>; + #size-cells = <1>; + + tegra_i2s0: i2s@70080300 { + compatible = "nvidia,tegra30-i2s"; + reg = <0x70080300 0x100>; + nvidia,ahub-cif-ids = <4 4>; + }; + + tegra_i2s1: i2s@70080400 { + compatible = "nvidia,tegra30-i2s"; + reg = <0x70080400 0x100>; + nvidia,ahub-cif-ids = <5 5>; + }; + + tegra_i2s2: i2s@70080500 { + compatible = "nvidia,tegra30-i2s"; + reg = <0x70080500 0x100>; + nvidia,ahub-cif-ids = <6 6>; + }; + + tegra_i2s3: i2s@70080600 { + compatible = "nvidia,tegra30-i2s"; + reg = <0x70080600 0x100>; + nvidia,ahub-cif-ids = <7 7>; + }; + + tegra_i2s4: i2s@70080700 { + compatible = "nvidia,tegra30-i2s"; + reg = <0x70080700 0x100>; + nvidia,ahub-cif-ids = <8 8>; + }; + }; }; -- cgit v0.10.2 From 8c6a3852f639736ca7cb0f5e7b3fd0f314b0fc6a Mon Sep 17 00:00:00 2001 From: Stephen Warren Date: Tue, 27 Mar 2012 12:41:37 -0600 Subject: ARM: dt: tegra cardhu: basic audio support Add WM8903 codec nodes, and top-level sound complex node for basic analog audio over headset jack and internal speakers. Signed-off-by: Stephen Warren Acked-by: Olof Johansson diff --git a/arch/arm/boot/dts/tegra-cardhu.dts b/arch/arm/boot/dts/tegra-cardhu.dts index 0a9f34a..ab8d901 100644 --- a/arch/arm/boot/dts/tegra-cardhu.dts +++ b/arch/arm/boot/dts/tegra-cardhu.dts @@ -51,6 +51,15 @@ nvidia,pull = <2>; nvidia,tristate = <0>; }; + dap2_fs_pa2 { + nvidia,pins = "dap2_fs_pa2", + "dap2_sclk_pa3", + "dap2_din_pa4", + "dap2_dout_pa5"; + nvidia,function = "i2s1"; + nvidia,pull = <0>; + nvidia,tristate = <0>; + }; }; }; @@ -92,6 +101,20 @@ i2c@7000d000 { clock-frequency = <100000>; + + wm8903: wm8903@1a { + compatible = "wlf,wm8903"; + reg = <0x1a>; + interrupt-parent = <&gpio>; + interrupts = <179 0x04>; /* gpio PW3 */ + + gpio-controller; + #gpio-cells = <2>; + + micdet-cfg = <0>; + micdet-delay = <100>; + gpio-cfg = <0xffffffff 0xffffffff 0 0xffffffff 0xffffffff>; + }; }; sdhci@78000000 { @@ -111,4 +134,44 @@ sdhci@78000400 { support-8bit; }; + + ahub@70080000 { + i2s@70080300 { + status = "disable"; + }; + + i2s@70080500 { + status = "disable"; + }; + + i2s@70080600 { + status = "disable"; + }; + + i2s@70080700 { + status = "disable"; + }; + }; + + sound { + compatible = "nvidia,tegra-audio-wm8903-cardhu", + "nvidia,tegra-audio-wm8903"; + nvidia,model = "NVIDIA Tegra Cardhu"; + + nvidia,audio-routing = + "Headphone Jack", "HPOUTR", + "Headphone Jack", "HPOUTL", + "Int Spk", "ROP", + "Int Spk", "RON", + "Int Spk", "LOP", + "Int Spk", "LON", + "Mic Jack", "MICBIAS", + "IN1L", "Mic Jack"; + + nvidia,i2s-controller = <&tegra_i2s1>; + nvidia,audio-codec = <&wm8903>; + + nvidia,spkr-en-gpios = <&wm8903 2 0>; + nvidia,hp-det-gpios = <&gpio 178 0>; /* gpio PW2 */ + }; }; -- cgit v0.10.2 From c8b62ab41f76218efca5e4baa5c22ef52a9fe3a5 Mon Sep 17 00:00:00 2001 From: Allen Martin Date: Fri, 10 Sep 2010 09:17:33 -0500 Subject: ARM: tegra: Add pllc clock init table pll_c will be used as a clock source. Fill in tegra_pll_c_freq_table[] so that it's possible to explicitly initialize the PLL. NVIDIA's downstream nv-3.1 kernel and the ChromeOS kernel have different pll_c tables. nv-3.1 contains entries for 522MHz and 598MHz output, whereas the ChromeOS kernel contains entries for 600MHz output. I chose to upstream the ChromeOS values for now, since the 600MHz rate appears to match the default rate of this PLL when the HW boots, and it's not clear to me why 522 or 598MHz are more useful. Signed-off-by: Allen Martin Signed-off-by: Olof Johansson Signed-off-by: Stephen Warren [swarren: wrote commit description] diff --git a/arch/arm/mach-tegra/tegra2_clocks.c b/arch/arm/mach-tegra/tegra2_clocks.c index 592a4ee..a357ad2 100644 --- a/arch/arm/mach-tegra/tegra2_clocks.c +++ b/arch/arm/mach-tegra/tegra2_clocks.c @@ -1486,6 +1486,10 @@ static struct clk tegra_clk_m = { }; static struct clk_pll_freq_table tegra_pll_c_freq_table[] = { + { 12000000, 600000000, 600, 12, 1, 8 }, + { 13000000, 600000000, 600, 13, 1, 8 }, + { 19200000, 600000000, 500, 16, 1, 6 }, + { 26000000, 600000000, 600, 26, 1, 8 }, { 0, 0, 0, 0, 0, 0 }, }; -- cgit v0.10.2 From 60f975b98cf41476ba0e156f7523b197b046cf2b Mon Sep 17 00:00:00 2001 From: Stephen Warren Date: Thu, 12 Apr 2012 14:09:39 -0600 Subject: ARM: tegra: reparent sclk to pll_c_out1 pll_p_out4 needs to be used for other purposes. Reparent sclk so that it runs from pll_c. Change sclk's rate to 120MHz from 108MHz since this is the lowest precise rate that can be achieved by dividing the pll_c rate without reducing the sclk rate. (600/5=120, 600/5.5=109.0909..., 600/6=100). Signed-off-by: Stephen Warren diff --git a/arch/arm/mach-tegra/common.c b/arch/arm/mach-tegra/common.c index 22df10f..e969004 100644 --- a/arch/arm/mach-tegra/common.c +++ b/arch/arm/mach-tegra/common.c @@ -83,8 +83,10 @@ static __initdata struct tegra_clk_init_table tegra20_clk_init_table[] = { { "pll_p_out2", "pll_p", 48000000, true }, { "pll_p_out3", "pll_p", 72000000, true }, { "pll_p_out4", "pll_p", 108000000, true }, - { "sclk", "pll_p_out4", 108000000, true }, - { "hclk", "sclk", 108000000, true }, + { "pll_c", "clk_m", 600000000, true }, + { "pll_c_out1", "pll_c", 120000000, true }, + { "sclk", "pll_c_out1", 120000000, true }, + { "hclk", "sclk", 120000000, true }, { "pclk", "hclk", 54000000, true }, { "csite", NULL, 0, true }, { "emc", NULL, 0, true }, -- cgit v0.10.2 From 7ff4db0967bd7d617c77dc5a66c0d95166277817 Mon Sep 17 00:00:00 2001 From: Stephen Warren Date: Fri, 20 Apr 2012 16:58:18 -0600 Subject: ARM: tegra: fix pclk rate Commit 40f9cf0 "ARM: tegra: reparent sclk to pll_c_out1" changed the rate of hclk. Since pclk is derived from that, and only has integer dividers, the pclk rate needs to change in the same fashion, from 54MHz to 60MHz. Signed-off-by: Stephen Warren diff --git a/arch/arm/mach-tegra/common.c b/arch/arm/mach-tegra/common.c index e969004..a4fba88 100644 --- a/arch/arm/mach-tegra/common.c +++ b/arch/arm/mach-tegra/common.c @@ -87,7 +87,7 @@ static __initdata struct tegra_clk_init_table tegra20_clk_init_table[] = { { "pll_c_out1", "pll_c", 120000000, true }, { "sclk", "pll_c_out1", 120000000, true }, { "hclk", "sclk", 120000000, true }, - { "pclk", "hclk", 54000000, true }, + { "pclk", "hclk", 60000000, true }, { "csite", NULL, 0, true }, { "emc", NULL, 0, true }, { "cpu", NULL, 0, true }, -- cgit v0.10.2 From 9abafa021e223f04d6589ee2b977bbaf2e1f1367 Mon Sep 17 00:00:00 2001 From: Stephen Warren Date: Thu, 12 Apr 2012 14:13:05 -0600 Subject: ARM: tegra: change pll_p_out4's rate to 24MHz pll_p_out4 is used on all/most Tegra boards to drive the cdev2 output pin to provide a reference clock to a ULPI USB PHY. This reference clock must run at 24MHz, and the cdev2 output has no additional dividers. Remove board-paz00.c's now-duplicate initialization of this clock. Reported-by: Marc Dietrich Signed-off-by: Stephen Warren diff --git a/arch/arm/mach-tegra/board-paz00.c b/arch/arm/mach-tegra/board-paz00.c index 330afdf..aebfa40 100644 --- a/arch/arm/mach-tegra/board-paz00.c +++ b/arch/arm/mach-tegra/board-paz00.c @@ -176,7 +176,6 @@ static __initdata struct tegra_clk_init_table paz00_clk_init_table[] = { { "uarta", "pll_p", 216000000, true }, { "uartc", "pll_p", 216000000, true }, - { "pll_p_out4", "pll_p", 24000000, true }, { "usbd", "clk_m", 12000000, false }, { "usb2", "clk_m", 12000000, false }, { "usb3", "clk_m", 12000000, false }, diff --git a/arch/arm/mach-tegra/common.c b/arch/arm/mach-tegra/common.c index a4fba88..f18f615 100644 --- a/arch/arm/mach-tegra/common.c +++ b/arch/arm/mach-tegra/common.c @@ -82,7 +82,7 @@ static __initdata struct tegra_clk_init_table tegra20_clk_init_table[] = { { "pll_p_out1", "pll_p", 28800000, true }, { "pll_p_out2", "pll_p", 48000000, true }, { "pll_p_out3", "pll_p", 72000000, true }, - { "pll_p_out4", "pll_p", 108000000, true }, + { "pll_p_out4", "pll_p", 24000000, true }, { "pll_c", "clk_m", 600000000, true }, { "pll_c_out1", "pll_c", 120000000, true }, { "sclk", "pll_c_out1", 120000000, true }, -- cgit v0.10.2 From 60d148b9f838b718e4808061ee6d5833de77fd9c Mon Sep 17 00:00:00 2001 From: Stephen Warren Date: Thu, 12 Apr 2012 15:03:52 -0600 Subject: ARM: tegra: don't hard-code USB ULPI PHY reset_gpio Not all boards use GPIO_PV0 as the ULPI PHY reset signal. Instead of hard-coding this GPIO into devices.c, make the board files set it explicitly. This will allow the PHY code to differentiate between set and unset values, and hence know when to read the value from device tree. Signed-off-by: Stephen Warren diff --git a/arch/arm/mach-tegra/board-paz00.c b/arch/arm/mach-tegra/board-paz00.c index aebfa40..e31317d 100644 --- a/arch/arm/mach-tegra/board-paz00.c +++ b/arch/arm/mach-tegra/board-paz00.c @@ -159,6 +159,8 @@ static void paz00_i2c_init(void) static void paz00_usb_init(void) { + tegra_ehci2_ulpi_phy_config.reset_gpio = TEGRA_ULPI_RST; + platform_device_register(&tegra_ehci2_device); platform_device_register(&tegra_ehci3_device); } diff --git a/arch/arm/mach-tegra/board-trimslice.c b/arch/arm/mach-tegra/board-trimslice.c index f6f5b6a..24f1678 100644 --- a/arch/arm/mach-tegra/board-trimslice.c +++ b/arch/arm/mach-tegra/board-trimslice.c @@ -117,6 +117,8 @@ static void trimslice_usb_init(void) pdata = tegra_ehci1_device.dev.platform_data; pdata->vbus_gpio = TRIMSLICE_GPIO_USB1_MODE; + tegra_ehci2_ulpi_phy_config.reset_gpio = TEGRA_GPIO_PV0; + platform_device_register(&tegra_ehci3_device); platform_device_register(&tegra_ehci2_device); platform_device_register(&tegra_ehci1_device); diff --git a/arch/arm/mach-tegra/devices.c b/arch/arm/mach-tegra/devices.c index bd3035e..9fcb9a5 100644 --- a/arch/arm/mach-tegra/devices.c +++ b/arch/arm/mach-tegra/devices.c @@ -439,9 +439,8 @@ static struct resource tegra_usb3_resources[] = { }, }; -static struct tegra_ulpi_config tegra_ehci2_ulpi_phy_config = { - /* All existing boards use GPIO PV0 for phy reset */ - .reset_gpio = TEGRA_GPIO_PV0, +struct tegra_ulpi_config tegra_ehci2_ulpi_phy_config = { + .reset_gpio = -1, .clk = "cdev2", }; diff --git a/arch/arm/mach-tegra/devices.h b/arch/arm/mach-tegra/devices.h index ec45567..4290ea0 100644 --- a/arch/arm/mach-tegra/devices.h +++ b/arch/arm/mach-tegra/devices.h @@ -22,6 +22,10 @@ #include #include +#include + +extern struct tegra_ulpi_config tegra_ehci2_ulpi_phy_config; + extern struct tegra_ehci_platform_data tegra_ehci1_pdata; extern struct tegra_ehci_platform_data tegra_ehci2_pdata; extern struct tegra_ehci_platform_data tegra_ehci3_pdata; -- cgit v0.10.2 From aa607ebf93a5fc26275a575781399df971dd1b91 Mon Sep 17 00:00:00 2001 From: Stephen Warren Date: Thu, 12 Apr 2012 15:46:49 -0600 Subject: ARM: tegra: add USB ULPI PHY reset GPIO to device tree ULPI PHYs have a reset signal, and different boards use a different GPIO for this task. Add a property to device tree to represent this. I'm not sure if adding this property to the EHCI controller node is entirely correct; perhaps eventually we should have explicit separate nodes for the various PHYs. However, we don't have that right now, so this binding seems like a reasonable choice. Cc: Cc: Greg Kroah-Hartman Cc: Signed-off-by: Stephen Warren diff --git a/Documentation/devicetree/bindings/usb/tegra-usb.txt b/Documentation/devicetree/bindings/usb/tegra-usb.txt index 007005d..e9b005d 100644 --- a/Documentation/devicetree/bindings/usb/tegra-usb.txt +++ b/Documentation/devicetree/bindings/usb/tegra-usb.txt @@ -12,6 +12,9 @@ Required properties : - nvidia,vbus-gpio : If present, specifies a gpio that needs to be activated for the bus to be powered. +Required properties for phy_type == ulpi: + - nvidia,phy-reset-gpio : The GPIO used to reset the PHY. + Optional properties: - dr_mode : dual role mode. Indicates the working mode for nvidia,tegra20-ehci compatible controllers. Can be "host", "peripheral", diff --git a/arch/arm/boot/dts/tegra-harmony.dts b/arch/arm/boot/dts/tegra-harmony.dts index 1a0b1f1..59bf1cf 100644 --- a/arch/arm/boot/dts/tegra-harmony.dts +++ b/arch/arm/boot/dts/tegra-harmony.dts @@ -336,4 +336,8 @@ power-gpios = <&gpio 70 0>; /* gpio PI6 */ support-8bit; }; + + usb@c5004000 { + nvidia,phy-reset-gpio = <&gpio 169 0>; /* gpio PV1 */ + }; }; diff --git a/arch/arm/boot/dts/tegra-paz00.dts b/arch/arm/boot/dts/tegra-paz00.dts index 10943fb..fad92f2 100644 --- a/arch/arm/boot/dts/tegra-paz00.dts +++ b/arch/arm/boot/dts/tegra-paz00.dts @@ -351,4 +351,8 @@ linux,default-trigger = "rfkill0"; }; }; + + usb@c5004000 { + nvidia,phy-reset-gpio = <&gpio 168 0>; /* gpio PV0 */ + }; }; diff --git a/arch/arm/boot/dts/tegra-seaboard.dts b/arch/arm/boot/dts/tegra-seaboard.dts index ec33116..ed0a2f5 100644 --- a/arch/arm/boot/dts/tegra-seaboard.dts +++ b/arch/arm/boot/dts/tegra-seaboard.dts @@ -415,4 +415,8 @@ 0x00000000 0x00000000 0x00000000 0x00000000 >; }; }; + + usb@c5004000 { + nvidia,phy-reset-gpio = <&gpio 169 0>; /* gpio PV1 */ + }; }; diff --git a/arch/arm/boot/dts/tegra-trimslice.dts b/arch/arm/boot/dts/tegra-trimslice.dts index 98efd5b..71b73aa 100644 --- a/arch/arm/boot/dts/tegra-trimslice.dts +++ b/arch/arm/boot/dts/tegra-trimslice.dts @@ -304,4 +304,8 @@ cd-gpios = <&gpio 121 0>; wp-gpios = <&gpio 122 0>; }; + + usb@c5004000 { + nvidia,phy-reset-gpio = <&gpio 168 0>; /* gpio PV0 */ + }; }; diff --git a/arch/arm/boot/dts/tegra-ventana.dts b/arch/arm/boot/dts/tegra-ventana.dts index 71eb2e5..bd074cf 100644 --- a/arch/arm/boot/dts/tegra-ventana.dts +++ b/arch/arm/boot/dts/tegra-ventana.dts @@ -335,4 +335,8 @@ sdhci@c8000600 { support-8bit; }; + + usb@c5004000 { + nvidia,phy-reset-gpio = <&gpio 169 0>; /* gpio PV1 */ + }; }; diff --git a/arch/arm/mach-tegra/include/mach/usb_phy.h b/arch/arm/mach-tegra/include/mach/usb_phy.h index de1a0f6..935ce9f 100644 --- a/arch/arm/mach-tegra/include/mach/usb_phy.h +++ b/arch/arm/mach-tegra/include/mach/usb_phy.h @@ -61,8 +61,8 @@ struct tegra_usb_phy { struct usb_phy *ulpi; }; -struct tegra_usb_phy *tegra_usb_phy_open(int instance, void __iomem *regs, - void *config, enum tegra_usb_phy_mode phy_mode); +struct tegra_usb_phy *tegra_usb_phy_open(struct device *dev, int instance, + void __iomem *regs, void *config, enum tegra_usb_phy_mode phy_mode); int tegra_usb_phy_power_on(struct tegra_usb_phy *phy); diff --git a/arch/arm/mach-tegra/usb_phy.c b/arch/arm/mach-tegra/usb_phy.c index d71d2fe..54e353c 100644 --- a/arch/arm/mach-tegra/usb_phy.c +++ b/arch/arm/mach-tegra/usb_phy.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -654,8 +655,8 @@ static void ulpi_phy_power_off(struct tegra_usb_phy *phy) clk_disable(phy->clk); } -struct tegra_usb_phy *tegra_usb_phy_open(int instance, void __iomem *regs, - void *config, enum tegra_usb_phy_mode phy_mode) +struct tegra_usb_phy *tegra_usb_phy_open(struct device *dev, int instance, + void __iomem *regs, void *config, enum tegra_usb_phy_mode phy_mode) { struct tegra_usb_phy *phy; struct tegra_ulpi_config *ulpi_config; @@ -711,6 +712,16 @@ struct tegra_usb_phy *tegra_usb_phy_open(int instance, void __iomem *regs, err = -ENXIO; goto err1; } + if (!gpio_is_valid(ulpi_config->reset_gpio)) + ulpi_config->reset_gpio = + of_get_named_gpio(dev->of_node, + "nvidia,phy-reset-gpio", 0); + if (!gpio_is_valid(ulpi_config->reset_gpio)) { + pr_err("%s: invalid reset gpio: %d\n", __func__, + ulpi_config->reset_gpio); + err = -EINVAL; + goto err1; + } gpio_request(ulpi_config->reset_gpio, "ulpi_phy_reset_b"); gpio_direction_output(ulpi_config->reset_gpio, 0); phy->ulpi = otg_ulpi_create(&ulpi_viewport_access_ops, 0); diff --git a/drivers/usb/host/ehci-tegra.c b/drivers/usb/host/ehci-tegra.c index 9692bef..14532fe 100644 --- a/drivers/usb/host/ehci-tegra.c +++ b/drivers/usb/host/ehci-tegra.c @@ -708,8 +708,9 @@ static int tegra_ehci_probe(struct platform_device *pdev) } } - tegra->phy = tegra_usb_phy_open(instance, hcd->regs, pdata->phy_config, - TEGRA_USB_PHY_MODE_HOST); + tegra->phy = tegra_usb_phy_open(&pdev->dev, instance, hcd->regs, + pdata->phy_config, + TEGRA_USB_PHY_MODE_HOST); if (IS_ERR(tegra->phy)) { dev_err(&pdev->dev, "Failed to open USB phy\n"); err = -ENXIO; -- cgit v0.10.2 From 563da21b1d1878736905bbff0f096fcd960d57c5 Mon Sep 17 00:00:00 2001 From: Stephen Warren Date: Fri, 13 Apr 2012 16:35:20 -0600 Subject: ARM: dt: tegra: pinmux changes for USB ULPI Ensure that the USB ULPI signals are not tri-stated, and have no pull- up or pull-down. Ensure that the pingroup hosting the USB ULPI reset signal (GPIO PV0 or PV1 depending on the board, so UAC) is not tri-stated, and has no pull- up or pull-down. This change appears larger than it is due to the grouping and sorting of the pin configuration data. Signed-off-by: Stephen Warren diff --git a/arch/arm/boot/dts/tegra-harmony.dts b/arch/arm/boot/dts/tegra-harmony.dts index 59bf1cf..6857cec 100644 --- a/arch/arm/boot/dts/tegra-harmony.dts +++ b/arch/arm/boot/dts/tegra-harmony.dts @@ -167,28 +167,28 @@ }; conf_ata { nvidia,pins = "ata", "atb", "atc", "atd", "ate", - "cdev1", "dap1", "dtb", "gma", "gmb", - "gmc", "gmd", "gme", "gpu7", "gpv", - "i2cp", "pta", "rm", "slxa", "slxk", - "spia", "spib"; + "cdev1", "cdev2", "dap1", "dtb", "gma", + "gmb", "gmc", "gmd", "gme", "gpu7", + "gpv", "i2cp", "pta", "rm", "slxa", + "slxk", "spia", "spib", "uac"; nvidia,pull = <0>; nvidia,tristate = <0>; }; - conf_cdev2 { - nvidia,pins = "cdev2", "csus", "spid", "spif"; - nvidia,pull = <1>; - nvidia,tristate = <1>; - }; conf_ck32 { nvidia,pins = "ck32", "ddrc", "pmca", "pmcb", "pmcc", "pmcd", "pmce", "xm2c", "xm2d"; nvidia,pull = <0>; }; + conf_csus { + nvidia,pins = "csus", "spid", "spif"; + nvidia,pull = <1>; + nvidia,tristate = <1>; + }; conf_crtp { nvidia,pins = "crtp", "dap2", "dap3", "dap4", "dtc", "dte", "dtf", "gpu", "sdio1", "slxc", "slxd", "spdi", "spdo", "spig", - "uac", "uda"; + "uda"; nvidia,pull = <0>; nvidia,tristate = <1>; }; diff --git a/arch/arm/boot/dts/tegra-paz00.dts b/arch/arm/boot/dts/tegra-paz00.dts index fad92f2..34a9177 100644 --- a/arch/arm/boot/dts/tegra-paz00.dts +++ b/arch/arm/boot/dts/tegra-paz00.dts @@ -159,18 +159,14 @@ }; conf_ata { nvidia,pins = "ata", "atb", "atc", "atd", "ate", - "cdev1", "dap1", "dap2", "dtf", "gma", - "gmb", "gmc", "gmd", "gme", "gpu", - "gpu7", "gpv", "i2cp", "pta", "rm", - "sdio1", "slxk", "spdo", "uac", "uda"; + "cdev1", "cdev2", "dap1", "dap2", "dtf", + "gma", "gmb", "gmc", "gmd", "gme", + "gpu", "gpu7", "gpv", "i2cp", "pta", + "rm", "sdio1", "slxk", "spdo", "uac", + "uda"; nvidia,pull = <0>; nvidia,tristate = <0>; }; - conf_cdev2 { - nvidia,pins = "cdev2"; - nvidia,pull = <1>; - nvidia,tristate = <0>; - }; conf_ck32 { nvidia,pins = "ck32", "ddrc", "pmca", "pmcb", "pmcc", "pmcd", "pmce", "xm2c", "xm2d"; diff --git a/arch/arm/boot/dts/tegra-trimslice.dts b/arch/arm/boot/dts/tegra-trimslice.dts index 71b73aa..e4fcf9a 100644 --- a/arch/arm/boot/dts/tegra-trimslice.dts +++ b/arch/arm/boot/dts/tegra-trimslice.dts @@ -182,23 +182,23 @@ nvidia,tristate = <1>; }; conf_atb { - nvidia,pins = "atb", "cdev1", "dap1", "gma", - "gmc", "gmd", "gpu", "gpu7", "gpv", - "sdio1", "slxa", "slxk", "uac"; + nvidia,pins = "atb", "cdev1", "cdev2", "dap1", + "gma", "gmc", "gmd", "gpu", "gpu7", + "gpv", "sdio1", "slxa", "slxk", "uac"; nvidia,pull = <0>; nvidia,tristate = <0>; }; - conf_cdev2 { - nvidia,pins = "cdev2", "csus", "spia", "spib", - "spid", "spif"; - nvidia,pull = <1>; - nvidia,tristate = <1>; - }; conf_ck32 { nvidia,pins = "ck32", "ddrc", "pmca", "pmcb", "pmcc", "pmcd", "pmce", "xm2c", "xm2d"; nvidia,pull = <0>; }; + conf_csus { + nvidia,pins = "csus", "spia", "spib", + "spid", "spif"; + nvidia,pull = <1>; + nvidia,tristate = <1>; + }; conf_ddc { nvidia,pins = "ddc", "dtf", "rm", "sdc", "sdd"; nvidia,pull = <2>; -- cgit v0.10.2 From 6ce34a5fb4955fac1eebe080e1c2784bc8710449 Mon Sep 17 00:00:00 2001 From: Chao Xie Date: Sun, 1 Apr 2012 10:08:01 +0800 Subject: Input: pxa27x_keypad keep clock on as wakeup source When the keypad is used as wake up source, the clock can not be disabled. Or it can not detect key pressing. If the keypad is used as wake up source, when resume back, do not enable the clock and configure it again because the register content is retained. Signed-off-by: Chao Xie Signed-off-by: Haojian Zhuang diff --git a/drivers/input/keyboard/pxa27x_keypad.c b/drivers/input/keyboard/pxa27x_keypad.c index 29fe1b2..b07771e 100644 --- a/drivers/input/keyboard/pxa27x_keypad.c +++ b/drivers/input/keyboard/pxa27x_keypad.c @@ -399,7 +399,7 @@ static int pxa27x_keypad_open(struct input_dev *dev) struct pxa27x_keypad *keypad = input_get_drvdata(dev); /* Enable unit clock */ - clk_enable(keypad->clk); + clk_prepare_enable(keypad->clk); pxa27x_keypad_config(keypad); return 0; @@ -410,7 +410,7 @@ static void pxa27x_keypad_close(struct input_dev *dev) struct pxa27x_keypad *keypad = input_get_drvdata(dev); /* Disable clock unit */ - clk_disable(keypad->clk); + clk_disable_unprepare(keypad->clk); } #ifdef CONFIG_PM @@ -419,10 +419,14 @@ static int pxa27x_keypad_suspend(struct device *dev) struct platform_device *pdev = to_platform_device(dev); struct pxa27x_keypad *keypad = platform_get_drvdata(pdev); - clk_disable(keypad->clk); - + /* + * If the keypad is used a wake up source, clock can not be disabled. + * Or it can not detect the key pressing. + */ if (device_may_wakeup(&pdev->dev)) enable_irq_wake(keypad->irq); + else + clk_disable_unprepare(keypad->clk); return 0; } @@ -433,19 +437,24 @@ static int pxa27x_keypad_resume(struct device *dev) struct pxa27x_keypad *keypad = platform_get_drvdata(pdev); struct input_dev *input_dev = keypad->input_dev; - if (device_may_wakeup(&pdev->dev)) + /* + * If the keypad is used as wake up source, the clock is not turned + * off. So do not need configure it again. + */ + if (device_may_wakeup(&pdev->dev)) { disable_irq_wake(keypad->irq); + } else { + mutex_lock(&input_dev->mutex); - mutex_lock(&input_dev->mutex); + if (input_dev->users) { + /* Enable unit clock */ + clk_prepare_enable(keypad->clk); + pxa27x_keypad_config(keypad); + } - if (input_dev->users) { - /* Enable unit clock */ - clk_enable(keypad->clk); - pxa27x_keypad_config(keypad); + mutex_unlock(&input_dev->mutex); } - mutex_unlock(&input_dev->mutex); - return 0; } -- cgit v0.10.2 From 5545fa897a0a1a34249e98ea4244b256cbe58df3 Mon Sep 17 00:00:00 2001 From: Chao Xie Date: Sun, 1 Apr 2012 10:08:02 +0800 Subject: Input: pxa27x_keypad bug fix for direct_key_mask When direcct_key_num is 0, the mask should be 0. When direcct_key_num is 1, the mask should be 0b1. Signed-off-by: Chao Xie Signed-off-by: Haojian Zhuang diff --git a/drivers/input/keyboard/pxa27x_keypad.c b/drivers/input/keyboard/pxa27x_keypad.c index b07771e..5d71720 100644 --- a/drivers/input/keyboard/pxa27x_keypad.c +++ b/drivers/input/keyboard/pxa27x_keypad.c @@ -383,7 +383,7 @@ static void pxa27x_keypad_config(struct pxa27x_keypad *keypad) if (pdata->direct_key_num > direct_key_num) direct_key_num = pdata->direct_key_num; - keypad->direct_key_mask = ((2 << direct_key_num) - 1) & ~mask; + keypad->direct_key_mask = ((1 << direct_key_num) - 1) & ~mask; /* enable direct key */ if (direct_key_num) -- cgit v0.10.2 From ee1d8040a6b0a8106c7d9e80b9193077224493ba Mon Sep 17 00:00:00 2001 From: Chao Xie Date: Sun, 1 Apr 2012 10:08:03 +0800 Subject: Input: pxa27x_keypad direct key may be low active KPDK_DK only indicates the pin level of direct key. So it is related to board, and low level may be active which indicates that a key is pressed. Signed-off-by: Chao Xie Signed-off-by: Haojian Zhuang diff --git a/arch/arm/plat-pxa/include/plat/pxa27x_keypad.h b/arch/arm/plat-pxa/include/plat/pxa27x_keypad.h index abcc36e..7ffb16b 100644 --- a/arch/arm/plat-pxa/include/plat/pxa27x_keypad.h +++ b/arch/arm/plat-pxa/include/plat/pxa27x_keypad.h @@ -44,6 +44,8 @@ struct pxa27x_keypad_platform_data { /* direct keys */ int direct_key_num; unsigned int direct_key_map[MAX_DIRECT_KEY_NUM]; + /* the key output may be low active */ + int direct_key_low_active; /* rotary encoders 0 */ int enable_rotary0; diff --git a/drivers/input/keyboard/pxa27x_keypad.c b/drivers/input/keyboard/pxa27x_keypad.c index 5d71720..a60f14e 100644 --- a/drivers/input/keyboard/pxa27x_keypad.c +++ b/drivers/input/keyboard/pxa27x_keypad.c @@ -311,7 +311,15 @@ static void pxa27x_keypad_scan_direct(struct pxa27x_keypad *keypad) if (pdata->enable_rotary0 || pdata->enable_rotary1) pxa27x_keypad_scan_rotary(keypad); - new_state = KPDK_DK(kpdk) & keypad->direct_key_mask; + /* + * The KPDR_DK only output the key pin level, so it relates to board, + * and low level may be active. + */ + if (pdata->direct_key_low_active) + new_state = ~KPDK_DK(kpdk) & keypad->direct_key_mask; + else + new_state = KPDK_DK(kpdk) & keypad->direct_key_mask; + bits_changed = keypad->direct_key_state ^ new_state; if (bits_changed == 0) -- cgit v0.10.2 From fb054bf26914ee4c55cf149bc5b2a8e2c89fb81f Mon Sep 17 00:00:00 2001 From: Chao Xie Date: Sun, 1 Apr 2012 10:08:04 +0800 Subject: Input: pxa27x_keypad add choice to set direct_key_mask Direct keys usage may not start from KP_DKIN0, add a msk option to configure the specifics for platforms that can skip some keys. Signed-off-by: Chao Xie Signed-off-by: Haojian Zhuang diff --git a/arch/arm/plat-pxa/include/plat/pxa27x_keypad.h b/arch/arm/plat-pxa/include/plat/pxa27x_keypad.h index 7ffb16b..5ce8d5e6 100644 --- a/arch/arm/plat-pxa/include/plat/pxa27x_keypad.h +++ b/arch/arm/plat-pxa/include/plat/pxa27x_keypad.h @@ -46,6 +46,8 @@ struct pxa27x_keypad_platform_data { unsigned int direct_key_map[MAX_DIRECT_KEY_NUM]; /* the key output may be low active */ int direct_key_low_active; + /* give board a chance to choose the start direct key */ + unsigned int direct_key_mask; /* rotary encoders 0 */ int enable_rotary0; diff --git a/drivers/input/keyboard/pxa27x_keypad.c b/drivers/input/keyboard/pxa27x_keypad.c index a60f14e..7f7b724 100644 --- a/drivers/input/keyboard/pxa27x_keypad.c +++ b/drivers/input/keyboard/pxa27x_keypad.c @@ -391,7 +391,14 @@ static void pxa27x_keypad_config(struct pxa27x_keypad *keypad) if (pdata->direct_key_num > direct_key_num) direct_key_num = pdata->direct_key_num; - keypad->direct_key_mask = ((1 << direct_key_num) - 1) & ~mask; + /* + * Direct keys usage may not start from KP_DKIN0, check the platfrom + * mask data to config the specific. + */ + if (pdata->direct_key_mask) + keypad->direct_key_mask = pdata->direct_key_mask; + else + keypad->direct_key_mask = ((1 << direct_key_num) - 1) & ~mask; /* enable direct key */ if (direct_key_num) -- cgit v0.10.2 From 4e42ff9b1a1727cde9548cacf0bdd790cac3f903 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Wed, 11 Apr 2012 22:12:12 -0300 Subject: ARM: imx_v4_v5_defconfig: Let CONFIG_MACH_IMX27_DT be built by default Let CONFIG_MACH_IMX27_DT be built by default. Signed-off-by: Fabio Estevam Signed-off-by: Sascha Hauer diff --git a/arch/arm/configs/imx_v4_v5_defconfig b/arch/arm/configs/imx_v4_v5_defconfig index 6b31cb6..72364e2 100644 --- a/arch/arm/configs/imx_v4_v5_defconfig +++ b/arch/arm/configs/imx_v4_v5_defconfig @@ -33,6 +33,7 @@ CONFIG_MACH_IMX27LITE=y CONFIG_MACH_PCA100=y CONFIG_MACH_MXT_TD60=y CONFIG_MACH_IMX27IPCAM=y +CONFIG_MACH_IMX27_DT=y CONFIG_MXC_IRQ_PRIOR=y CONFIG_MXC_PWM=y CONFIG_NO_HZ=y -- cgit v0.10.2 From d86a3480543941344625d1c71aa70510d01f7b11 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Fri, 27 Apr 2012 01:05:02 -0300 Subject: ARM: imx_v6_v7_defconfig: Add SPI NOR support Add SPI NOR support in defconfig. Signed-off-by: Fabio Estevam Signed-off-by: Sascha Hauer diff --git a/arch/arm/configs/imx_v6_v7_defconfig b/arch/arm/configs/imx_v6_v7_defconfig index dc6f641..12617f7 100644 --- a/arch/arm/configs/imx_v6_v7_defconfig +++ b/arch/arm/configs/imx_v6_v7_defconfig @@ -64,6 +64,12 @@ CONFIG_IPV6=y # CONFIG_WIRELESS is not set CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y +CONFIG_MTD=y +CONFIG_MTD_OF_PARTS=y +CONFIG_MTD_CHAR=y +CONFIG_MTD_DATAFLASH=y +CONFIG_MTD_M25P80=y +CONFIG_MTD_SST25L=y # CONFIG_STANDALONE is not set CONFIG_CONNECTOR=y CONFIG_BLK_DEV_LOOP=y -- cgit v0.10.2 From a20c6bec0b8ae775e2e8f350819cef98eea9a832 Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Fri, 20 Apr 2012 14:47:36 -0400 Subject: NFS: grab open context in direct read Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 481be7f..8a89423 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -260,7 +260,7 @@ static void nfs_direct_read_release(void *calldata) if (put_dreq(dreq)) nfs_direct_complete(dreq); - nfs_readdata_free(data); + nfs_readdata_release(data); } static const struct rpc_call_ops nfs_read_direct_ops = { @@ -337,7 +337,7 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq, data->inode = inode; data->cred = msg.rpc_cred; data->args.fh = NFS_FH(inode); - data->args.context = ctx; + data->args.context = get_nfs_open_context(ctx); data->args.lock_context = dreq->l_ctx; data->args.offset = pos; data->args.pgbase = pgbase; -- cgit v0.10.2 From 1acbbb4e16209e85c35ff6cacad61d802c07289b Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Fri, 20 Apr 2012 14:47:37 -0400 Subject: NFS4.1: make pnfs_ld_[read|write]_done consistent The two functions had diverged quite a bit, with the write function being a bit more robust than the read. However, these still break badly in the desc->pg_bsize < PAGE_CACHE_SIZE case, as then there is nothing hanging on the data->pages list, and the resend ends up doing nothing. This will be fixed in a patch later in the series. Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 38512bc..9c4d14a 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1189,6 +1189,17 @@ static int pnfs_write_done_resend_to_mds(struct inode *inode, struct list_head * return 0; } +static void pnfs_ld_handle_write_error(struct nfs_write_data *data) +{ + dprintk("pnfs write error = %d\n", data->pnfs_error); + if (NFS_SERVER(data->inode)->pnfs_curr_ld->flags & + PNFS_LAYOUTRET_ON_ERROR) { + clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(data->inode)->flags); + pnfs_return_layout(data->inode); + } + data->task.tk_status = pnfs_write_done_resend_to_mds(data->inode, &data->pages); +} + /* * Called by non rpc-based layout drivers */ @@ -1197,19 +1208,8 @@ void pnfs_ld_write_done(struct nfs_write_data *data) if (likely(!data->pnfs_error)) { pnfs_set_layoutcommit(data); data->mds_ops->rpc_call_done(&data->task, data); - } else { - dprintk("pnfs write error = %d\n", data->pnfs_error); - if (NFS_SERVER(data->inode)->pnfs_curr_ld->flags & - PNFS_LAYOUTRET_ON_ERROR) { - /* Don't lo_commit on error, Server will needs to - * preform a file recovery. - */ - clear_bit(NFS_INO_LAYOUTCOMMIT, - &NFS_I(data->inode)->flags); - pnfs_return_layout(data->inode); - } - data->task.tk_status = pnfs_write_done_resend_to_mds(data->inode, &data->pages); - } + } else + pnfs_ld_handle_write_error(data); put_lseg(data->lseg); data->mds_ops->rpc_release(data); } @@ -1293,26 +1293,38 @@ pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) } EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages); -static void pnfs_ld_handle_read_error(struct nfs_read_data *data) +static int pnfs_read_done_resend_to_mds(struct inode *inode, struct list_head *head) { struct nfs_pageio_descriptor pgio; + LIST_HEAD(failed); - put_lseg(data->lseg); - data->lseg = NULL; - dprintk("pnfs write error = %d\n", data->pnfs_error); - if (NFS_SERVER(data->inode)->pnfs_curr_ld->flags & - PNFS_LAYOUTRET_ON_ERROR) - pnfs_return_layout(data->inode); - - nfs_pageio_init_read_mds(&pgio, data->inode); - - while (!list_empty(&data->pages)) { - struct nfs_page *req = nfs_list_entry(data->pages.next); + /* Resend all requests through the MDS */ + nfs_pageio_init_read_mds(&pgio, inode); + while (!list_empty(head)) { + struct nfs_page *req = nfs_list_entry(head->next); nfs_list_remove_request(req); - nfs_pageio_add_request(&pgio, req); + if (!nfs_pageio_add_request(&pgio, req)) + nfs_list_add_request(req, &failed); } nfs_pageio_complete(&pgio); + + if (!list_empty(&failed)) { + list_move(&failed, head); + return -EIO; + } + return 0; +} + +static void pnfs_ld_handle_read_error(struct nfs_read_data *data) +{ + dprintk("pnfs read error = %d\n", data->pnfs_error); + if (NFS_SERVER(data->inode)->pnfs_curr_ld->flags & + PNFS_LAYOUTRET_ON_ERROR) { + clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(data->inode)->flags); + pnfs_return_layout(data->inode); + } + data->task.tk_status = pnfs_read_done_resend_to_mds(data->inode, &data->pages); } /* -- cgit v0.10.2 From 799ba8d53d32c84bd2a867ca2689538a48176140 Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Fri, 20 Apr 2012 14:47:38 -0400 Subject: NFS4.1: Add lseg to struct nfs4_fl_commit_bucket Also create a commit_info structure to hold the bucket array and push it up from the lseg to the layout where it really belongs. While we are at it, fix a refcounting bug due to an (incorrect) implicit assumption that filelayout_scan_ds_commit_list always completely emptied the src list. This clarifies refcounting, removes the ugly find_only_write_lseg functions, and pushes the file layout commit code along on the path to supporting multiple lsegs. Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 5acfd9e..15aeba2 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -650,10 +650,66 @@ filelayout_free_lseg(struct pnfs_layout_segment *lseg) dprintk("--> %s\n", __func__); nfs4_fl_put_deviceid(fl->dsaddr); - kfree(fl->commit_buckets); + /* This assumes a single RW lseg */ + if (lseg->pls_range.iomode == IOMODE_RW) { + struct nfs4_filelayout *flo; + + flo = FILELAYOUT_FROM_HDR(lseg->pls_layout); + flo->commit_info.nbuckets = 0; + kfree(flo->commit_info.buckets); + flo->commit_info.buckets = NULL; + } _filelayout_free_lseg(fl); } +static int +filelayout_alloc_commit_info(struct pnfs_layout_segment *lseg, + gfp_t gfp_flags) +{ + struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg); + struct nfs4_filelayout *flo = FILELAYOUT_FROM_HDR(lseg->pls_layout); + + struct nfs4_fl_commit_bucket *buckets; + int size; + + if (fl->commit_through_mds) + return 0; + if (flo->commit_info.nbuckets != 0) { + /* This assumes there is only one IOMODE_RW lseg. What + * we really want to do is have a layout_hdr level + * dictionary of keys, each + * associated with a struct list_head, populated by calls + * to filelayout_write_pagelist(). + * */ + return 0; + } + + size = (fl->stripe_type == STRIPE_SPARSE) ? + fl->dsaddr->ds_num : fl->dsaddr->stripe_count; + + buckets = kcalloc(size, sizeof(struct nfs4_fl_commit_bucket), + gfp_flags); + if (!buckets) + return -ENOMEM; + else { + int i; + + spin_lock(&lseg->pls_layout->plh_inode->i_lock); + if (flo->commit_info.nbuckets != 0) + kfree(buckets); + else { + flo->commit_info.buckets = buckets; + flo->commit_info.nbuckets = size; + for (i = 0; i < size; i++) { + INIT_LIST_HEAD(&buckets[i].written); + INIT_LIST_HEAD(&buckets[i].committing); + } + } + spin_unlock(&lseg->pls_layout->plh_inode->i_lock); + return 0; + } +} + static struct pnfs_layout_segment * filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid, struct nfs4_layoutget_res *lgr, @@ -673,29 +729,6 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid, _filelayout_free_lseg(fl); return NULL; } - - /* This assumes there is only one IOMODE_RW lseg. What - * we really want to do is have a layout_hdr level - * dictionary of keys, each - * associated with a struct list_head, populated by calls - * to filelayout_write_pagelist(). - * */ - if ((!fl->commit_through_mds) && (lgr->range.iomode == IOMODE_RW)) { - int i; - int size = (fl->stripe_type == STRIPE_SPARSE) ? - fl->dsaddr->ds_num : fl->dsaddr->stripe_count; - - fl->commit_buckets = kcalloc(size, sizeof(struct nfs4_fl_commit_bucket), gfp_flags); - if (!fl->commit_buckets) { - filelayout_free_lseg(&fl->generic_hdr); - return NULL; - } - fl->number_of_buckets = size; - for (i = 0; i < size; i++) { - INIT_LIST_HEAD(&fl->commit_buckets[i].written); - INIT_LIST_HEAD(&fl->commit_buckets[i].committing); - } - } return &fl->generic_hdr; } @@ -747,6 +780,8 @@ static void filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) { + int status; + BUG_ON(pgio->pg_lseg != NULL); pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, @@ -757,7 +792,16 @@ filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio, GFP_NOFS); /* If no lseg, fall back to write through mds */ if (pgio->pg_lseg == NULL) - nfs_pageio_reset_write_mds(pgio); + goto out_mds; + status = filelayout_alloc_commit_info(pgio->pg_lseg, GFP_NOFS); + if (status < 0) { + put_lseg(pgio->pg_lseg); + pgio->pg_lseg = NULL; + goto out_mds; + } + return; +out_mds: + nfs_pageio_reset_write_mds(pgio); } static const struct nfs_pageio_ops filelayout_pg_read_ops = { @@ -793,17 +837,13 @@ filelayout_clear_request_commit(struct nfs_page *req) if (!test_and_clear_bit(PG_COMMIT_TO_DS, &req->wb_flags)) goto out; if (list_is_singular(&req->wb_list)) { - struct pnfs_layout_segment *lseg; + struct nfs4_fl_commit_bucket *bucket; - /* From here we can find the bucket, but for the moment, - * since there is only one relevant lseg... - */ - list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list) { - if (lseg->pls_range.iomode == IOMODE_RW) { - freeme = lseg; - break; - } - } + bucket = list_first_entry(&req->wb_list, + struct nfs4_fl_commit_bucket, + written); + freeme = bucket->wlseg; + bucket->wlseg = NULL; } out: nfs_request_remove_commit_list(req); @@ -818,6 +858,7 @@ filelayout_choose_commit_list(struct nfs_page *req, struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg); u32 i, j; struct list_head *list; + struct nfs4_fl_commit_bucket *buckets; if (fl->commit_through_mds) return &NFS_I(req->wb_context->dentry->d_inode)->commit_list; @@ -831,15 +872,16 @@ filelayout_choose_commit_list(struct nfs_page *req, j = nfs4_fl_calc_j_index(lseg, (loff_t)req->wb_index << PAGE_CACHE_SHIFT); i = select_bucket_index(fl, j); - list = &fl->commit_buckets[i].written; + buckets = FILELAYOUT_FROM_HDR(lseg->pls_layout)->commit_info.buckets; + list = &buckets[i].written; if (list_empty(list)) { /* Non-empty buckets hold a reference on the lseg. That ref * is normally transferred to the COMMIT call and released * there. It could also be released if the last req is pulled * off due to a rewrite, in which case it will be done in - * filelayout_remove_commit_req + * filelayout_clear_request_commit */ - get_lseg(lseg); + buckets[i].wlseg = get_lseg(lseg); } set_bit(PG_COMMIT_TO_DS, &req->wb_flags); return list; @@ -908,32 +950,6 @@ static int filelayout_initiate_commit(struct nfs_write_data *data, int how) &filelayout_commit_call_ops, how); } -/* - * This is only useful while we are using whole file layouts. - */ -static struct pnfs_layout_segment * -find_only_write_lseg_locked(struct inode *inode) -{ - struct pnfs_layout_segment *lseg; - - list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list) - if (lseg->pls_range.iomode == IOMODE_RW) - return lseg; - return NULL; -} - -static struct pnfs_layout_segment *find_only_write_lseg(struct inode *inode) -{ - struct pnfs_layout_segment *rv; - - spin_lock(&inode->i_lock); - rv = find_only_write_lseg_locked(inode); - if (rv) - get_lseg(rv); - spin_unlock(&inode->i_lock); - return rv; -} - static int filelayout_scan_ds_commit_list(struct nfs4_fl_commit_bucket *bucket, int max, spinlock_t *lock) @@ -955,6 +971,13 @@ filelayout_scan_ds_commit_list(struct nfs4_fl_commit_bucket *bucket, int max, if (ret == max) break; } + if (ret) { + bucket->clseg = bucket->wlseg; + if (list_empty(src)) + bucket->wlseg = NULL; + else + get_lseg(bucket->clseg); + } return ret; } @@ -964,18 +987,14 @@ filelayout_scan_ds_commit_list(struct nfs4_fl_commit_bucket *bucket, int max, static int filelayout_scan_commit_lists(struct inode *inode, int max, spinlock_t *lock) { - struct pnfs_layout_segment *lseg; - struct nfs4_filelayout_segment *fl; + struct nfs4_fl_commit_info *fl_cinfo; int i, rv = 0, cnt; - lseg = find_only_write_lseg_locked(inode); - if (!lseg) - goto out_done; - fl = FILELAYOUT_LSEG(lseg); - if (fl->commit_through_mds) + fl_cinfo = &FILELAYOUT_FROM_HDR(NFS_I(inode)->layout)->commit_info; + if (fl_cinfo->nbuckets == 0) goto out_done; - for (i = 0; i < fl->number_of_buckets && max != 0; i++) { - cnt = filelayout_scan_ds_commit_list(&fl->commit_buckets[i], + for (i = 0; i < fl_cinfo->nbuckets && max != 0; i++) { + cnt = filelayout_scan_ds_commit_list(&fl_cinfo->buckets[i], max, lock); max -= cnt; rv += cnt; @@ -987,38 +1006,35 @@ out_done: static unsigned int alloc_ds_commits(struct inode *inode, struct list_head *list) { - struct pnfs_layout_segment *lseg; - struct nfs4_filelayout_segment *fl; + struct nfs4_fl_commit_info *fl_cinfo; + struct nfs4_fl_commit_bucket *bucket; struct nfs_write_data *data; int i, j; unsigned int nreq = 0; - /* Won't need this when non-whole file layout segments are supported - * instead we will use a pnfs_layout_hdr structure */ - lseg = find_only_write_lseg(inode); - if (!lseg) - return 0; - fl = FILELAYOUT_LSEG(lseg); - for (i = 0; i < fl->number_of_buckets; i++) { - if (list_empty(&fl->commit_buckets[i].committing)) + fl_cinfo = &FILELAYOUT_FROM_HDR(NFS_I(inode)->layout)->commit_info; + bucket = fl_cinfo->buckets; + for (i = 0; i < fl_cinfo->nbuckets; i++, bucket++) { + if (list_empty(&bucket->committing)) continue; data = nfs_commitdata_alloc(); if (!data) break; data->ds_commit_index = i; - data->lseg = lseg; + data->lseg = bucket->clseg; + bucket->clseg = NULL; list_add(&data->pages, list); nreq++; } /* Clean up on error */ - for (j = i; j < fl->number_of_buckets; j++) { - if (list_empty(&fl->commit_buckets[i].committing)) + for (j = i; j < fl_cinfo->nbuckets; j++, bucket++) { + if (list_empty(&bucket->committing)) continue; - nfs_retry_commit(&fl->commit_buckets[i].committing, lseg); - put_lseg(lseg); /* associated with emptying bucket */ + nfs_retry_commit(&bucket->committing, bucket->clseg); + put_lseg(bucket->clseg); + bucket->clseg = NULL; } - put_lseg(lseg); /* Caller will clean up entries put on list */ return nreq; } @@ -1058,7 +1074,10 @@ filelayout_commit_pagelist(struct inode *inode, struct list_head *mds_pages, nfs_initiate_commit(data, NFS_CLIENT(inode), data->mds_ops, how); } else { - nfs_init_commit(data, &FILELAYOUT_LSEG(data->lseg)->commit_buckets[data->ds_commit_index].committing, data->lseg); + struct nfs4_fl_commit_info *fl_cinfo; + + fl_cinfo = &FILELAYOUT_FROM_HDR(data->lseg->pls_layout)->commit_info; + nfs_init_commit(data, &fl_cinfo->buckets[data->ds_commit_index].committing, data->lseg); filelayout_initiate_commit(data, how); } } @@ -1072,10 +1091,27 @@ filelayout_free_deveiceid_node(struct nfs4_deviceid_node *d) nfs4_fl_free_deviceid(container_of(d, struct nfs4_file_layout_dsaddr, id_node)); } +static struct pnfs_layout_hdr * +filelayout_alloc_layout_hdr(struct inode *inode, gfp_t gfp_flags) +{ + struct nfs4_filelayout *flo; + + flo = kzalloc(sizeof(*flo), gfp_flags); + return &flo->generic_hdr; +} + +static void +filelayout_free_layout_hdr(struct pnfs_layout_hdr *lo) +{ + kfree(FILELAYOUT_FROM_HDR(lo)); +} + static struct pnfs_layoutdriver_type filelayout_type = { .id = LAYOUT_NFSV4_1_FILES, .name = "LAYOUT_NFSV4_1_FILES", .owner = THIS_MODULE, + .alloc_layout_hdr = filelayout_alloc_layout_hdr, + .free_layout_hdr = filelayout_free_layout_hdr, .alloc_lseg = filelayout_alloc_lseg, .free_lseg = filelayout_free_lseg, .pg_read_ops = &filelayout_pg_read_ops, diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h index 21190bb..333a3ac 100644 --- a/fs/nfs/nfs4filelayout.h +++ b/fs/nfs/nfs4filelayout.h @@ -77,6 +77,13 @@ struct nfs4_file_layout_dsaddr { struct nfs4_fl_commit_bucket { struct list_head written; struct list_head committing; + struct pnfs_layout_segment *wlseg; + struct pnfs_layout_segment *clseg; +}; + +struct nfs4_fl_commit_info { + int nbuckets; + struct nfs4_fl_commit_bucket *buckets; }; struct nfs4_filelayout_segment { @@ -89,10 +96,19 @@ struct nfs4_filelayout_segment { struct nfs4_file_layout_dsaddr *dsaddr; /* Point to GETDEVINFO data */ unsigned int num_fh; struct nfs_fh **fh_array; - struct nfs4_fl_commit_bucket *commit_buckets; /* Sort commits to ds */ - int number_of_buckets; }; +struct nfs4_filelayout { + struct pnfs_layout_hdr generic_hdr; + struct nfs4_fl_commit_info commit_info; +}; + +static inline struct nfs4_filelayout * +FILELAYOUT_FROM_HDR(struct pnfs_layout_hdr *lo) +{ + return container_of(lo, struct nfs4_filelayout, generic_hdr); +} + static inline struct nfs4_filelayout_segment * FILELAYOUT_LSEG(struct pnfs_layout_segment *lseg) { -- cgit v0.10.2 From 0b7c01533aa9f4a228d07d2768d084acb3a387bc Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Fri, 20 Apr 2012 14:47:39 -0400 Subject: NFS: add a struct nfs_commit_data to replace nfs_write_data in commits Commits don't need the vectors of pages, etc. that writes do. Split out a separate structure for the commit operation. Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 8a89423..5897dfe 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -82,7 +82,7 @@ struct nfs_direct_req { /* commit state */ struct list_head rewrite_list; /* saved nfs_write_data structs */ - struct nfs_write_data * commit_data; /* special write_data for commits */ + struct nfs_commit_data *commit_data; /* special write_data for commits */ int flags; #define NFS_ODIRECT_DO_COMMIT (1) /* an unstable reply was received */ #define NFS_ODIRECT_RESCHED_WRITES (2) /* write verification failed */ @@ -524,7 +524,7 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) static void nfs_direct_commit_result(struct rpc_task *task, void *calldata) { - struct nfs_write_data *data = calldata; + struct nfs_commit_data *data = calldata; /* Call the NFS version-specific code */ NFS_PROTO(data->inode)->commit_done(task, data); @@ -532,8 +532,8 @@ static void nfs_direct_commit_result(struct rpc_task *task, void *calldata) static void nfs_direct_commit_release(void *calldata) { - struct nfs_write_data *data = calldata; - struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req; + struct nfs_commit_data *data = calldata; + struct nfs_direct_req *dreq = data->dreq; int status = data->task.tk_status; if (status < 0) { @@ -551,14 +551,14 @@ static void nfs_direct_commit_release(void *calldata) } static const struct rpc_call_ops nfs_commit_direct_ops = { - .rpc_call_prepare = nfs_write_prepare, + .rpc_call_prepare = nfs_commit_prepare, .rpc_call_done = nfs_direct_commit_result, .rpc_release = nfs_direct_commit_release, }; static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq) { - struct nfs_write_data *data = dreq->commit_data; + struct nfs_commit_data *data = dreq->commit_data; struct rpc_task *task; struct rpc_message msg = { .rpc_argp = &data->args, @@ -581,9 +581,6 @@ static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq) data->args.fh = NFS_FH(data->inode); data->args.offset = 0; data->args.count = 0; - data->args.context = dreq->ctx; - data->args.lock_context = dreq->l_ctx; - data->res.count = 0; data->res.fattr = &data->fattr; data->res.verf = &data->verf; nfs_fattr_init(&data->fattr); @@ -625,7 +622,7 @@ static void nfs_alloc_commit_data(struct nfs_direct_req *dreq) { dreq->commit_data = nfs_commitdata_alloc(); if (dreq->commit_data != NULL) - dreq->commit_data->req = (struct nfs_page *) dreq; + dreq->commit_data->dreq = dreq; } #else static inline void nfs_alloc_commit_data(struct nfs_direct_req *dreq) diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index b777bda..29ab441 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -314,24 +314,25 @@ extern void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags); extern void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio); extern void nfs_writedata_release(struct nfs_write_data *wdata); -extern void nfs_commit_free(struct nfs_write_data *p); +extern void nfs_commit_free(struct nfs_commit_data *p); extern int nfs_initiate_write(struct nfs_write_data *data, struct rpc_clnt *clnt, const struct rpc_call_ops *call_ops, int how); extern void nfs_write_prepare(struct rpc_task *task, void *calldata); -extern int nfs_initiate_commit(struct nfs_write_data *data, - struct rpc_clnt *clnt, +extern void nfs_commit_prepare(struct rpc_task *task, void *calldata); +extern int nfs_initiate_commit(struct rpc_clnt *clnt, + struct nfs_commit_data *data, const struct rpc_call_ops *call_ops, int how); -extern void nfs_init_commit(struct nfs_write_data *data, +extern void nfs_init_commit(struct nfs_commit_data *data, struct list_head *head, struct pnfs_layout_segment *lseg); void nfs_retry_commit(struct list_head *page_list, struct pnfs_layout_segment *lseg); void nfs_commit_clear_lock(struct nfs_inode *nfsi); -void nfs_commitdata_release(void *data); -void nfs_commit_release_pages(struct nfs_write_data *data); +void nfs_commitdata_release(struct nfs_commit_data *data); +void nfs_commit_release_pages(struct nfs_commit_data *data); void nfs_request_add_commit_list(struct nfs_page *req, struct list_head *head); void nfs_request_remove_commit_list(struct nfs_page *req); diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 5242eae..b1daca7 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -848,7 +848,12 @@ static void nfs3_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_ rpc_call_start(task); } -static int nfs3_commit_done(struct rpc_task *task, struct nfs_write_data *data) +static void nfs3_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data) +{ + rpc_call_start(task); +} + +static int nfs3_commit_done(struct rpc_task *task, struct nfs_commit_data *data) { if (nfs3_async_handle_jukebox(task, data->inode)) return -EAGAIN; @@ -856,7 +861,7 @@ static int nfs3_commit_done(struct rpc_task *task, struct nfs_write_data *data) return 0; } -static void nfs3_proc_commit_setup(struct nfs_write_data *data, struct rpc_message *msg) +static void nfs3_proc_commit_setup(struct nfs_commit_data *data, struct rpc_message *msg) { msg->rpc_proc = &nfs3_procedures[NFS3PROC_COMMIT]; } @@ -907,6 +912,7 @@ const struct nfs_rpc_ops nfs_v3_clientops = { .write_rpc_prepare = nfs3_proc_write_rpc_prepare, .write_done = nfs3_write_done, .commit_setup = nfs3_proc_commit_setup, + .commit_rpc_prepare = nfs3_proc_commit_rpc_prepare, .commit_done = nfs3_commit_done, .lock = nfs3_proc_lock, .clear_acl_cache = nfs3_forget_cached_acls, diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index a77cc9a..01e53e9 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -1287,7 +1287,7 @@ static void nfs3_xdr_enc_readdirplus3args(struct rpc_rqst *req, * }; */ static void encode_commit3args(struct xdr_stream *xdr, - const struct nfs_writeargs *args) + const struct nfs_commitargs *args) { __be32 *p; @@ -1300,7 +1300,7 @@ static void encode_commit3args(struct xdr_stream *xdr, static void nfs3_xdr_enc_commit3args(struct rpc_rqst *req, struct xdr_stream *xdr, - const struct nfs_writeargs *args) + const struct nfs_commitargs *args) { encode_commit3args(xdr, args); } @@ -2319,7 +2319,7 @@ out_status: */ static int nfs3_xdr_dec_commit3res(struct rpc_rqst *req, struct xdr_stream *xdr, - struct nfs_writeres *result) + struct nfs_commitres *result) { enum nfs_stat status; int error; diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 15aeba2..675ce3b 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -250,7 +250,7 @@ static int filelayout_write_done_cb(struct rpc_task *task, } /* Fake up some data that will cause nfs_commit_release to retry the writes. */ -static void prepare_to_resend_writes(struct nfs_write_data *data) +static void prepare_to_resend_writes(struct nfs_commit_data *data) { struct nfs_page *first = nfs_list_entry(data->pages.next); @@ -261,11 +261,11 @@ static void prepare_to_resend_writes(struct nfs_write_data *data) } static int filelayout_commit_done_cb(struct rpc_task *task, - struct nfs_write_data *data) + struct nfs_commit_data *data) { int reset = 0; - if (filelayout_async_handle_error(task, data->args.context->state, + if (filelayout_async_handle_error(task, data->context->state, data->ds_clp, &reset) == -EAGAIN) { dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n", __func__, data->ds_clp, data->ds_clp->cl_session); @@ -315,15 +315,42 @@ static void filelayout_write_release(void *data) wdata->mds_ops->rpc_release(data); } -static void filelayout_commit_release(void *data) +static void filelayout_commit_prepare(struct rpc_task *task, void *data) { - struct nfs_write_data *wdata = (struct nfs_write_data *)data; + struct nfs_commit_data *wdata = data; - nfs_commit_release_pages(wdata); - if (atomic_dec_and_test(&NFS_I(wdata->inode)->commits_outstanding)) - nfs_commit_clear_lock(NFS_I(wdata->inode)); - put_lseg(wdata->lseg); - nfs_commitdata_release(wdata); + if (nfs41_setup_sequence(wdata->ds_clp->cl_session, + &wdata->args.seq_args, &wdata->res.seq_res, + task)) + return; + + rpc_call_start(task); +} + +static void filelayout_write_commit_done(struct rpc_task *task, void *data) +{ + struct nfs_commit_data *wdata = data; + + /* Note this may cause RPC to be resent */ + wdata->mds_ops->rpc_call_done(task, data); +} + +static void filelayout_commit_count_stats(struct rpc_task *task, void *data) +{ + struct nfs_commit_data *cdata = data; + + rpc_count_iostats(task, NFS_SERVER(cdata->inode)->client->cl_metrics); +} + +static void filelayout_commit_release(void *calldata) +{ + struct nfs_commit_data *data = calldata; + + nfs_commit_release_pages(data); + if (atomic_dec_and_test(&NFS_I(data->inode)->commits_outstanding)) + nfs_commit_clear_lock(NFS_I(data->inode)); + put_lseg(data->lseg); + nfs_commitdata_release(data); } static const struct rpc_call_ops filelayout_read_call_ops = { @@ -341,9 +368,9 @@ static const struct rpc_call_ops filelayout_write_call_ops = { }; static const struct rpc_call_ops filelayout_commit_call_ops = { - .rpc_call_prepare = filelayout_write_prepare, - .rpc_call_done = filelayout_write_call_done, - .rpc_count_stats = filelayout_write_count_stats, + .rpc_call_prepare = filelayout_commit_prepare, + .rpc_call_done = filelayout_write_commit_done, + .rpc_count_stats = filelayout_commit_count_stats, .rpc_release = filelayout_commit_release, }; @@ -922,7 +949,7 @@ select_ds_fh_from_commit(struct pnfs_layout_segment *lseg, u32 i) return flseg->fh_array[i]; } -static int filelayout_initiate_commit(struct nfs_write_data *data, int how) +static int filelayout_initiate_commit(struct nfs_commit_data *data, int how) { struct pnfs_layout_segment *lseg = data->lseg; struct nfs4_pnfs_ds *ds; @@ -941,12 +968,12 @@ static int filelayout_initiate_commit(struct nfs_write_data *data, int how) return -EAGAIN; } dprintk("%s ino %lu, how %d\n", __func__, data->inode->i_ino, how); - data->write_done_cb = filelayout_commit_done_cb; + data->commit_done_cb = filelayout_commit_done_cb; data->ds_clp = ds->ds_clp; fh = select_ds_fh_from_commit(lseg, data->ds_commit_index); if (fh) data->args.fh = fh; - return nfs_initiate_commit(data, ds->ds_clp->cl_rpcclient, + return nfs_initiate_commit(ds->ds_clp->cl_rpcclient, data, &filelayout_commit_call_ops, how); } @@ -1008,7 +1035,7 @@ alloc_ds_commits(struct inode *inode, struct list_head *list) { struct nfs4_fl_commit_info *fl_cinfo; struct nfs4_fl_commit_bucket *bucket; - struct nfs_write_data *data; + struct nfs_commit_data *data; int i, j; unsigned int nreq = 0; @@ -1044,7 +1071,7 @@ static int filelayout_commit_pagelist(struct inode *inode, struct list_head *mds_pages, int how) { - struct nfs_write_data *data, *tmp; + struct nfs_commit_data *data, *tmp; LIST_HEAD(list); unsigned int nreq = 0; @@ -1071,7 +1098,7 @@ filelayout_commit_pagelist(struct inode *inode, struct list_head *mds_pages, list_del_init(&data->pages); if (!data->lseg) { nfs_init_commit(data, mds_pages, NULL); - nfs_initiate_commit(data, NFS_CLIENT(inode), + nfs_initiate_commit(NFS_CLIENT(inode), data, data->mds_ops, how); } else { struct nfs4_fl_commit_info *fl_cinfo; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 75eb883..cc04b6e 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3468,7 +3468,17 @@ static void nfs4_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_ rpc_call_start(task); } -static int nfs4_commit_done_cb(struct rpc_task *task, struct nfs_write_data *data) +static void nfs4_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data) +{ + if (nfs4_setup_sequence(NFS_SERVER(data->inode), + &data->args.seq_args, + &data->res.seq_res, + task)) + return; + rpc_call_start(task); +} + +static int nfs4_commit_done_cb(struct rpc_task *task, struct nfs_commit_data *data) { struct inode *inode = data->inode; @@ -3480,14 +3490,14 @@ static int nfs4_commit_done_cb(struct rpc_task *task, struct nfs_write_data *dat return 0; } -static int nfs4_commit_done(struct rpc_task *task, struct nfs_write_data *data) +static int nfs4_commit_done(struct rpc_task *task, struct nfs_commit_data *data) { if (!nfs4_sequence_done(task, &data->res.seq_res)) return -EAGAIN; - return data->write_done_cb(task, data); + return data->commit_done_cb(task, data); } -static void nfs4_proc_commit_setup(struct nfs_write_data *data, struct rpc_message *msg) +static void nfs4_proc_commit_setup(struct nfs_commit_data *data, struct rpc_message *msg) { struct nfs_server *server = NFS_SERVER(data->inode); @@ -3496,8 +3506,8 @@ static void nfs4_proc_commit_setup(struct nfs_write_data *data, struct rpc_messa data->res.fattr = NULL; } else data->args.bitmask = server->cache_consistency_bitmask; - if (!data->write_done_cb) - data->write_done_cb = nfs4_commit_done_cb; + if (data->commit_done_cb == NULL) + data->commit_done_cb = nfs4_commit_done_cb; data->res.server = server; msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT]; nfs41_init_sequence(&data->args.seq_args, &data->res.seq_res, 1); @@ -6591,6 +6601,7 @@ const struct nfs_rpc_ops nfs_v4_clientops = { .write_rpc_prepare = nfs4_proc_write_rpc_prepare, .write_done = nfs4_write_done, .commit_setup = nfs4_proc_commit_setup, + .commit_rpc_prepare = nfs4_proc_commit_rpc_prepare, .commit_done = nfs4_commit_done, .lock = nfs4_proc_lock, .clear_acl_cache = nfs4_zap_acl_attr, diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index c54aae3..4c3cc0e 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -1103,7 +1103,7 @@ static void encode_close(struct xdr_stream *xdr, const struct nfs_closeargs *arg encode_nfs4_stateid(xdr, arg->stateid); } -static void encode_commit(struct xdr_stream *xdr, const struct nfs_writeargs *args, struct compound_hdr *hdr) +static void encode_commit(struct xdr_stream *xdr, const struct nfs_commitargs *args, struct compound_hdr *hdr) { __be32 *p; @@ -2448,7 +2448,7 @@ static void nfs4_xdr_enc_write(struct rpc_rqst *req, struct xdr_stream *xdr, * a COMMIT request */ static void nfs4_xdr_enc_commit(struct rpc_rqst *req, struct xdr_stream *xdr, - struct nfs_writeargs *args) + struct nfs_commitargs *args) { struct compound_hdr hdr = { .minorversion = nfs4_xdr_minorversion(&args->seq_args), @@ -4102,7 +4102,7 @@ static int decode_verifier(struct xdr_stream *xdr, void *verifier) return decode_opaque_fixed(xdr, verifier, NFS4_VERIFIER_SIZE); } -static int decode_commit(struct xdr_stream *xdr, struct nfs_writeres *res) +static int decode_commit(struct xdr_stream *xdr, struct nfs_commitres *res) { int status; @@ -6353,7 +6353,7 @@ out: * Decode COMMIT response */ static int nfs4_xdr_dec_commit(struct rpc_rqst *rqstp, struct xdr_stream *xdr, - struct nfs_writeres *res) + struct nfs_commitres *res) { struct compound_hdr hdr; int status; diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index b63b6f4..bf80503 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -688,8 +688,13 @@ static void nfs_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_d rpc_call_start(task); } +static void nfs_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data) +{ + BUG(); +} + static void -nfs_proc_commit_setup(struct nfs_write_data *data, struct rpc_message *msg) +nfs_proc_commit_setup(struct nfs_commit_data *data, struct rpc_message *msg) { BUG(); } @@ -764,6 +769,7 @@ const struct nfs_rpc_ops nfs_v2_clientops = { .write_rpc_prepare = nfs_proc_write_rpc_prepare, .write_done = nfs_write_done, .commit_setup = nfs_proc_commit_setup, + .commit_rpc_prepare = nfs_proc_commit_rpc_prepare, .lock = nfs_proc_lock, .lock_check_bounds = nfs_lock_check_bounds, .close_context = nfs_close_context, diff --git a/fs/nfs/write.c b/fs/nfs/write.c index c074623..54f7c0f 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -48,11 +48,12 @@ static const struct rpc_call_ops nfs_commit_ops; static struct kmem_cache *nfs_wdata_cachep; static mempool_t *nfs_wdata_mempool; +static struct kmem_cache *nfs_cdata_cachep; static mempool_t *nfs_commit_mempool; -struct nfs_write_data *nfs_commitdata_alloc(void) +struct nfs_commit_data *nfs_commitdata_alloc(void) { - struct nfs_write_data *p = mempool_alloc(nfs_commit_mempool, GFP_NOFS); + struct nfs_commit_data *p = mempool_alloc(nfs_commit_mempool, GFP_NOFS); if (p) { memset(p, 0, sizeof(*p)); @@ -62,10 +63,8 @@ struct nfs_write_data *nfs_commitdata_alloc(void) } EXPORT_SYMBOL_GPL(nfs_commitdata_alloc); -void nfs_commit_free(struct nfs_write_data *p) +void nfs_commit_free(struct nfs_commit_data *p) { - if (p && (p->pagevec != &p->page_array[0])) - kfree(p->pagevec); mempool_free(p, nfs_commit_mempool); } EXPORT_SYMBOL_GPL(nfs_commit_free); @@ -1179,6 +1178,13 @@ void nfs_write_prepare(struct rpc_task *task, void *calldata) NFS_PROTO(data->inode)->write_rpc_prepare(task, data); } +void nfs_commit_prepare(struct rpc_task *task, void *calldata) +{ + struct nfs_commit_data *data = calldata; + + NFS_PROTO(data->inode)->commit_rpc_prepare(task, data); +} + static const struct rpc_call_ops nfs_write_partial_ops = { .rpc_call_prepare = nfs_write_prepare, .rpc_call_done = nfs_writeback_done_partial, @@ -1355,16 +1361,14 @@ void nfs_commit_clear_lock(struct nfs_inode *nfsi) } EXPORT_SYMBOL_GPL(nfs_commit_clear_lock); -void nfs_commitdata_release(void *data) +void nfs_commitdata_release(struct nfs_commit_data *data) { - struct nfs_write_data *wdata = data; - - put_nfs_open_context(wdata->args.context); - nfs_commit_free(wdata); + put_nfs_open_context(data->context); + nfs_commit_free(data); } EXPORT_SYMBOL_GPL(nfs_commitdata_release); -int nfs_initiate_commit(struct nfs_write_data *data, struct rpc_clnt *clnt, +int nfs_initiate_commit(struct rpc_clnt *clnt, struct nfs_commit_data *data, const struct rpc_call_ops *call_ops, int how) { @@ -1403,7 +1407,7 @@ EXPORT_SYMBOL_GPL(nfs_initiate_commit); /* * Set up the argument/result storage required for the RPC call. */ -void nfs_init_commit(struct nfs_write_data *data, +void nfs_init_commit(struct nfs_commit_data *data, struct list_head *head, struct pnfs_layout_segment *lseg) { @@ -1424,8 +1428,7 @@ void nfs_init_commit(struct nfs_write_data *data, /* Note: we always request a commit of the entire inode */ data->args.offset = 0; data->args.count = 0; - data->args.context = get_nfs_open_context(first->wb_context); - data->res.count = 0; + data->context = get_nfs_open_context(first->wb_context); data->res.fattr = &data->fattr; data->res.verf = &data->verf; nfs_fattr_init(&data->fattr); @@ -1455,7 +1458,7 @@ EXPORT_SYMBOL_GPL(nfs_retry_commit); static int nfs_commit_list(struct inode *inode, struct list_head *head, int how) { - struct nfs_write_data *data; + struct nfs_commit_data *data; data = nfs_commitdata_alloc(); @@ -1464,7 +1467,7 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how) /* Set up the argument struct */ nfs_init_commit(data, head, NULL); - return nfs_initiate_commit(data, NFS_CLIENT(inode), data->mds_ops, how); + return nfs_initiate_commit(NFS_CLIENT(inode), data, data->mds_ops, how); out_bad: nfs_retry_commit(head, NULL); nfs_commit_clear_lock(NFS_I(inode)); @@ -1476,7 +1479,7 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how) */ static void nfs_commit_done(struct rpc_task *task, void *calldata) { - struct nfs_write_data *data = calldata; + struct nfs_commit_data *data = calldata; dprintk("NFS: %5u nfs_commit_done (status %d)\n", task->tk_pid, task->tk_status); @@ -1485,7 +1488,7 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata) NFS_PROTO(data->inode)->commit_done(task, data); } -void nfs_commit_release_pages(struct nfs_write_data *data) +void nfs_commit_release_pages(struct nfs_commit_data *data) { struct nfs_page *req; int status = data->task.tk_status; @@ -1526,7 +1529,7 @@ EXPORT_SYMBOL_GPL(nfs_commit_release_pages); static void nfs_commit_release(void *calldata) { - struct nfs_write_data *data = calldata; + struct nfs_commit_data *data = calldata; nfs_commit_release_pages(data); nfs_commit_clear_lock(NFS_I(data->inode)); @@ -1534,7 +1537,7 @@ static void nfs_commit_release(void *calldata) } static const struct rpc_call_ops nfs_commit_ops = { - .rpc_call_prepare = nfs_write_prepare, + .rpc_call_prepare = nfs_commit_prepare, .rpc_call_done = nfs_commit_done, .rpc_release = nfs_commit_release, }; @@ -1753,6 +1756,13 @@ int __init nfs_init_writepagecache(void) if (nfs_wdata_mempool == NULL) return -ENOMEM; + nfs_cdata_cachep = kmem_cache_create("nfs_commit_data", + sizeof(struct nfs_commit_data), + 0, SLAB_HWCACHE_ALIGN, + NULL); + if (nfs_cdata_cachep == NULL) + return -ENOMEM; + nfs_commit_mempool = mempool_create_slab_pool(MIN_POOL_COMMIT, nfs_wdata_cachep); if (nfs_commit_mempool == NULL) diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 52a1bdb..d5d68f3 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -552,8 +552,8 @@ extern int nfs_wb_page(struct inode *inode, struct page* page); extern int nfs_wb_page_cancel(struct inode *inode, struct page* page); #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) extern int nfs_commit_inode(struct inode *, int); -extern struct nfs_write_data *nfs_commitdata_alloc(void); -extern void nfs_commit_free(struct nfs_write_data *wdata); +extern struct nfs_commit_data *nfs_commitdata_alloc(void); +extern void nfs_commit_free(struct nfs_commit_data *data); #else static inline int nfs_commit_inode(struct inode *inode, int how) diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 7ba3551..8fb036a 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -519,6 +519,24 @@ struct nfs_writeres { }; /* + * Arguments to the commit call. + */ +struct nfs_commitargs { + struct nfs_fh *fh; + __u64 offset; + __u32 count; + const u32 *bitmask; + struct nfs4_sequence_args seq_args; +}; + +struct nfs_commitres { + struct nfs_fattr *fattr; + struct nfs_writeverf *verf; + const struct nfs_server *server; + struct nfs4_sequence_res seq_res; +}; + +/* * Common arguments to the unlink call */ struct nfs_removeargs { @@ -1171,6 +1189,8 @@ struct nfs_read_data { struct page *page_array[NFS_PAGEVEC_SIZE]; }; +struct nfs_direct_req; + struct nfs_write_data { struct rpc_task task; struct inode *inode; @@ -1186,7 +1206,6 @@ struct nfs_write_data { struct nfs_writeres res; /* result struct */ struct pnfs_layout_segment *lseg; struct nfs_client *ds_clp; /* pNFS data server */ - int ds_commit_index; const struct rpc_call_ops *mds_ops; int (*write_done_cb) (struct rpc_task *task, struct nfs_write_data *data); #ifdef CONFIG_NFS_V4 @@ -1197,6 +1216,25 @@ struct nfs_write_data { struct page *page_array[NFS_PAGEVEC_SIZE]; }; +struct nfs_commit_data { + struct rpc_task task; + struct inode *inode; + struct rpc_cred *cred; + struct nfs_fattr fattr; + struct nfs_writeverf verf; + struct list_head pages; /* Coalesced requests we wish to flush */ + struct list_head list; /* lists of struct nfs_write_data */ + struct nfs_direct_req *dreq; /* O_DIRECT request */ + struct nfs_commitargs args; /* argument struct */ + struct nfs_commitres res; /* result struct */ + struct nfs_open_context *context; + struct pnfs_layout_segment *lseg; + struct nfs_client *ds_clp; /* pNFS data server */ + int ds_commit_index; + const struct rpc_call_ops *mds_ops; + int (*commit_done_cb) (struct rpc_task *task, struct nfs_commit_data *data); +}; + struct nfs_unlinkdata { struct hlist_node list; struct nfs_removeargs args; @@ -1277,8 +1315,9 @@ struct nfs_rpc_ops { void (*write_setup) (struct nfs_write_data *, struct rpc_message *); void (*write_rpc_prepare)(struct rpc_task *, struct nfs_write_data *); int (*write_done) (struct rpc_task *, struct nfs_write_data *); - void (*commit_setup) (struct nfs_write_data *, struct rpc_message *); - int (*commit_done) (struct rpc_task *, struct nfs_write_data *); + void (*commit_setup) (struct nfs_commit_data *, struct rpc_message *); + void (*commit_rpc_prepare)(struct rpc_task *, struct nfs_commit_data *); + int (*commit_done) (struct rpc_task *, struct nfs_commit_data *); int (*lock)(struct file *, int, struct file_lock *); int (*lock_check_bounds)(const struct file_lock *); void (*clear_acl_cache)(struct inode *); -- cgit v0.10.2 From 31f6852a4c187c031456581b35e146c0d5bbdecd Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Fri, 20 Apr 2012 14:47:40 -0400 Subject: NFS: dprintks in directio code were referencing task after put Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 5897dfe..fb7fbaa 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -357,15 +357,15 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq, task = rpc_run_task(&task_setup_data); if (IS_ERR(task)) break; - rpc_put_task(task); dprintk("NFS: %5u initiated direct read call " "(req %s/%Ld, %zu bytes @ offset %Lu)\n", - data->task.tk_pid, + task->tk_pid, inode->i_sb->s_id, (long long)NFS_FILEID(inode), bytes, (unsigned long long)data->args.offset); + rpc_put_task(task); started += bytes; user_addr += bytes; @@ -784,15 +784,15 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq, task = rpc_run_task(&task_setup_data); if (IS_ERR(task)) break; - rpc_put_task(task); dprintk("NFS: %5u initiated direct write call " "(req %s/%Ld, %zu bytes @ offset %Lu)\n", - data->task.tk_pid, + task->tk_pid, inode->i_sb->s_id, (long long)NFS_FILEID(inode), bytes, (unsigned long long)data->args.offset); + rpc_put_task(task); started += bytes; user_addr += bytes; -- cgit v0.10.2 From c5996c4efb95bbb80a25acc890357c9eae998eeb Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Fri, 20 Apr 2012 14:47:41 -0400 Subject: NFS: reverse arg order in nfs_initiate_[read|write] Make it consistent with nfs_initiate_commit. Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 29ab441..650127f 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -296,7 +296,8 @@ extern int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh); struct nfs_pageio_descriptor; /* read.c */ -extern int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt, +extern int nfs_initiate_read(struct rpc_clnt *clnt, + struct nfs_read_data *data, const struct rpc_call_ops *call_ops); extern void nfs_read_prepare(struct rpc_task *task, void *calldata); extern int nfs_generic_pagein(struct nfs_pageio_descriptor *desc, @@ -315,8 +316,8 @@ extern void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio, extern void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio); extern void nfs_writedata_release(struct nfs_write_data *wdata); extern void nfs_commit_free(struct nfs_commit_data *p); -extern int nfs_initiate_write(struct nfs_write_data *data, - struct rpc_clnt *clnt, +extern int nfs_initiate_write(struct rpc_clnt *clnt, + struct nfs_write_data *data, const struct rpc_call_ops *call_ops, int how); extern void nfs_write_prepare(struct rpc_task *task, void *calldata); diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 675ce3b..adbadcb 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -413,7 +413,7 @@ filelayout_read_pagelist(struct nfs_read_data *data) data->mds_offset = offset; /* Perform an asynchronous read to ds */ - status = nfs_initiate_read(data, ds->ds_clp->cl_rpcclient, + status = nfs_initiate_read(ds->ds_clp->cl_rpcclient, data, &filelayout_read_call_ops); BUG_ON(status != 0); return PNFS_ATTEMPTED; @@ -460,7 +460,7 @@ filelayout_write_pagelist(struct nfs_write_data *data, int sync) data->args.offset = filelayout_get_dserver_offset(lseg, offset); /* Perform an asynchronous write */ - status = nfs_initiate_write(data, ds->ds_clp->cl_rpcclient, + status = nfs_initiate_write(ds->ds_clp->cl_rpcclient, data, &filelayout_write_call_ops, sync); BUG_ON(status != 0); return PNFS_ATTEMPTED; diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 0a4be28..4ddba67 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -169,7 +169,8 @@ static void nfs_readpage_release(struct nfs_page *req) nfs_release_request(req); } -int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt, +int nfs_initiate_read(struct rpc_clnt *clnt, + struct nfs_read_data *data, const struct rpc_call_ops *call_ops) { struct inode *inode = data->inode; @@ -240,7 +241,7 @@ static int nfs_do_read(struct nfs_read_data *data, { struct inode *inode = data->args.context->dentry->d_inode; - return nfs_initiate_read(data, NFS_CLIENT(inode), call_ops); + return nfs_initiate_read(NFS_CLIENT(inode), data, call_ops); } static int diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 54f7c0f..76735dd 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -836,8 +836,8 @@ static int flush_task_priority(int how) return RPC_PRIORITY_NORMAL; } -int nfs_initiate_write(struct nfs_write_data *data, - struct rpc_clnt *clnt, +int nfs_initiate_write(struct rpc_clnt *clnt, + struct nfs_write_data *data, const struct rpc_call_ops *call_ops, int how) { @@ -937,7 +937,7 @@ static int nfs_do_write(struct nfs_write_data *data, { struct inode *inode = data->args.context->dentry->d_inode; - return nfs_initiate_write(data, NFS_CLIENT(inode), call_ops, how); + return nfs_initiate_write(NFS_CLIENT(inode), data, call_ops, how); } static int nfs_do_multiple_writes(struct list_head *head, -- cgit v0.10.2 From cd12ae326f5c040f61d64233514609adabe84ab8 Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Fri, 20 Apr 2012 14:47:42 -0400 Subject: NFS: remove unnecessary casts of void pointers in nfs4filelayout.c Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index adbadcb..31afd81 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -191,7 +191,7 @@ filelayout_set_layoutcommit(struct nfs_write_data *wdata) */ static void filelayout_read_prepare(struct rpc_task *task, void *data) { - struct nfs_read_data *rdata = (struct nfs_read_data *)data; + struct nfs_read_data *rdata = data; rdata->read_done_cb = filelayout_read_done_cb; @@ -205,7 +205,7 @@ static void filelayout_read_prepare(struct rpc_task *task, void *data) static void filelayout_read_call_done(struct rpc_task *task, void *data) { - struct nfs_read_data *rdata = (struct nfs_read_data *)data; + struct nfs_read_data *rdata = data; dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status); @@ -215,14 +215,14 @@ static void filelayout_read_call_done(struct rpc_task *task, void *data) static void filelayout_read_count_stats(struct rpc_task *task, void *data) { - struct nfs_read_data *rdata = (struct nfs_read_data *)data; + struct nfs_read_data *rdata = data; rpc_count_iostats(task, NFS_SERVER(rdata->inode)->client->cl_metrics); } static void filelayout_read_release(void *data) { - struct nfs_read_data *rdata = (struct nfs_read_data *)data; + struct nfs_read_data *rdata = data; put_lseg(rdata->lseg); rdata->mds_ops->rpc_release(data); @@ -282,7 +282,7 @@ static int filelayout_commit_done_cb(struct rpc_task *task, static void filelayout_write_prepare(struct rpc_task *task, void *data) { - struct nfs_write_data *wdata = (struct nfs_write_data *)data; + struct nfs_write_data *wdata = data; if (nfs41_setup_sequence(wdata->ds_clp->cl_session, &wdata->args.seq_args, &wdata->res.seq_res, @@ -294,7 +294,7 @@ static void filelayout_write_prepare(struct rpc_task *task, void *data) static void filelayout_write_call_done(struct rpc_task *task, void *data) { - struct nfs_write_data *wdata = (struct nfs_write_data *)data; + struct nfs_write_data *wdata = data; /* Note this may cause RPC to be resent */ wdata->mds_ops->rpc_call_done(task, data); @@ -302,14 +302,14 @@ static void filelayout_write_call_done(struct rpc_task *task, void *data) static void filelayout_write_count_stats(struct rpc_task *task, void *data) { - struct nfs_write_data *wdata = (struct nfs_write_data *)data; + struct nfs_write_data *wdata = data; rpc_count_iostats(task, NFS_SERVER(wdata->inode)->client->cl_metrics); } static void filelayout_write_release(void *data) { - struct nfs_write_data *wdata = (struct nfs_write_data *)data; + struct nfs_write_data *wdata = data; put_lseg(wdata->lseg); wdata->mds_ops->rpc_release(data); -- cgit v0.10.2 From b5542849764aa56fd3f05c0041195b637b9d2ac2 Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Fri, 20 Apr 2012 14:47:43 -0400 Subject: NFS: use req_offset where appropriate Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 31afd81..c536328 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -776,8 +776,8 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, !nfs_generic_pg_test(pgio, prev, req)) return false; - p_stripe = (u64)prev->wb_index << PAGE_CACHE_SHIFT; - r_stripe = (u64)req->wb_index << PAGE_CACHE_SHIFT; + p_stripe = (u64)req_offset(prev); + r_stripe = (u64)req_offset(req); stripe_unit = FILELAYOUT_LSEG(pgio->pg_lseg)->stripe_unit; do_div(p_stripe, stripe_unit); @@ -896,8 +896,7 @@ filelayout_choose_commit_list(struct nfs_page *req, * to store the value calculated in filelayout_write_pagelist * and just use that here. */ - j = nfs4_fl_calc_j_index(lseg, - (loff_t)req->wb_index << PAGE_CACHE_SHIFT); + j = nfs4_fl_calc_j_index(lseg, req_offset(req)); i = select_bucket_index(fl, j); buckets = FILELAYOUT_FROM_HDR(lseg->pls_layout)->commit_info.buckets; list = &buckets[i].written; -- cgit v0.10.2 From cd841605f7a721878d8a2d1362484723d8abf569 Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Fri, 20 Apr 2012 14:47:44 -0400 Subject: NFS: create common nfs_pgio_header for both read and write In order to avoid duplicating all the data in nfs_read_data whenever we split it up into multiple RPC calls (either due to a short read result or due to rsize < PAGE_SIZE), we split out the bits that are the same per RPC call into a separate "header" structure. The goal this patch moves towards is to have a single header refcounted by several rpc_data structures. Thus, want to always refer from rpc_data to the header, and not the other way. This patch comes close to that ideal, but the directio code currently needs some special casing, isolated in the nfs_direct_[read_write]hdr_release() functions. This will be dealt with in a future patch. Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index 7f6a23f..7a48251 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -187,7 +187,6 @@ static void bl_end_io_read(struct bio *bio, int err) struct parallel_io *par = bio->bi_private; const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; - struct nfs_read_data *rdata = (struct nfs_read_data *)par->data; do { struct page *page = bvec->bv_page; @@ -198,9 +197,12 @@ static void bl_end_io_read(struct bio *bio, int err) SetPageUptodate(page); } while (bvec >= bio->bi_io_vec); if (!uptodate) { - if (!rdata->pnfs_error) - rdata->pnfs_error = -EIO; - pnfs_set_lo_fail(rdata->lseg); + struct nfs_read_data *rdata = par->data; + struct nfs_pgio_header *header = rdata->header; + + if (!header->pnfs_error) + header->pnfs_error = -EIO; + pnfs_set_lo_fail(header->lseg); } bio_put(bio); put_parallel(par); @@ -221,7 +223,7 @@ bl_end_par_io_read(void *data, int unused) { struct nfs_read_data *rdata = data; - rdata->task.tk_status = rdata->pnfs_error; + rdata->task.tk_status = rdata->header->pnfs_error; INIT_WORK(&rdata->task.u.tk_work, bl_read_cleanup); schedule_work(&rdata->task.u.tk_work); } @@ -229,6 +231,7 @@ bl_end_par_io_read(void *data, int unused) static enum pnfs_try_status bl_read_pagelist(struct nfs_read_data *rdata) { + struct nfs_pgio_header *header = rdata->header; int i, hole; struct bio *bio = NULL; struct pnfs_block_extent *be = NULL, *cow_read = NULL; @@ -256,10 +259,10 @@ bl_read_pagelist(struct nfs_read_data *rdata) bl_put_extent(cow_read); bio = bl_submit_bio(READ, bio); /* Get the next one */ - be = bl_find_get_extent(BLK_LSEG2EXT(rdata->lseg), + be = bl_find_get_extent(BLK_LSEG2EXT(header->lseg), isect, &cow_read); if (!be) { - rdata->pnfs_error = -EIO; + header->pnfs_error = -EIO; goto out; } extent_length = be->be_length - @@ -286,7 +289,7 @@ bl_read_pagelist(struct nfs_read_data *rdata) isect, pages[i], be_read, bl_end_io_read, par); if (IS_ERR(bio)) { - rdata->pnfs_error = PTR_ERR(bio); + header->pnfs_error = PTR_ERR(bio); bio = NULL; goto out; } @@ -294,9 +297,9 @@ bl_read_pagelist(struct nfs_read_data *rdata) isect += PAGE_CACHE_SECTORS; extent_length -= PAGE_CACHE_SECTORS; } - if ((isect << SECTOR_SHIFT) >= rdata->inode->i_size) { + if ((isect << SECTOR_SHIFT) >= header->inode->i_size) { rdata->res.eof = 1; - rdata->res.count = rdata->inode->i_size - f_offset; + rdata->res.count = header->inode->i_size - f_offset; } else { rdata->res.count = (isect << SECTOR_SHIFT) - f_offset; } @@ -345,7 +348,6 @@ static void bl_end_io_write_zero(struct bio *bio, int err) struct parallel_io *par = bio->bi_private; const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; - struct nfs_write_data *wdata = (struct nfs_write_data *)par->data; do { struct page *page = bvec->bv_page; @@ -358,9 +360,12 @@ static void bl_end_io_write_zero(struct bio *bio, int err) } while (bvec >= bio->bi_io_vec); if (unlikely(!uptodate)) { - if (!wdata->pnfs_error) - wdata->pnfs_error = -EIO; - pnfs_set_lo_fail(wdata->lseg); + struct nfs_write_data *data = par->data; + struct nfs_pgio_header *header = data->header; + + if (!header->pnfs_error) + header->pnfs_error = -EIO; + pnfs_set_lo_fail(header->lseg); } bio_put(bio); put_parallel(par); @@ -370,12 +375,13 @@ static void bl_end_io_write(struct bio *bio, int err) { struct parallel_io *par = bio->bi_private; const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); - struct nfs_write_data *wdata = (struct nfs_write_data *)par->data; + struct nfs_write_data *data = par->data; + struct nfs_pgio_header *header = data->header; if (!uptodate) { - if (!wdata->pnfs_error) - wdata->pnfs_error = -EIO; - pnfs_set_lo_fail(wdata->lseg); + if (!header->pnfs_error) + header->pnfs_error = -EIO; + pnfs_set_lo_fail(header->lseg); } bio_put(bio); put_parallel(par); @@ -391,9 +397,9 @@ static void bl_write_cleanup(struct work_struct *work) dprintk("%s enter\n", __func__); task = container_of(work, struct rpc_task, u.tk_work); wdata = container_of(task, struct nfs_write_data, task); - if (likely(!wdata->pnfs_error)) { + if (likely(!wdata->header->pnfs_error)) { /* Marks for LAYOUTCOMMIT */ - mark_extents_written(BLK_LSEG2EXT(wdata->lseg), + mark_extents_written(BLK_LSEG2EXT(wdata->header->lseg), wdata->args.offset, wdata->args.count); } pnfs_ld_write_done(wdata); @@ -404,12 +410,12 @@ static void bl_end_par_io_write(void *data, int num_se) { struct nfs_write_data *wdata = data; - if (unlikely(wdata->pnfs_error)) { - bl_free_short_extents(&BLK_LSEG2EXT(wdata->lseg)->bl_inval, + if (unlikely(wdata->header->pnfs_error)) { + bl_free_short_extents(&BLK_LSEG2EXT(wdata->header->lseg)->bl_inval, num_se); } - wdata->task.tk_status = wdata->pnfs_error; + wdata->task.tk_status = wdata->header->pnfs_error; wdata->verf.committed = NFS_FILE_SYNC; INIT_WORK(&wdata->task.u.tk_work, bl_write_cleanup); schedule_work(&wdata->task.u.tk_work); @@ -540,6 +546,7 @@ check_page: static enum pnfs_try_status bl_write_pagelist(struct nfs_write_data *wdata, int sync) { + struct nfs_pgio_header *header = wdata->header; int i, ret, npg_zero, pg_index, last = 0; struct bio *bio = NULL; struct pnfs_block_extent *be = NULL, *cow_read = NULL; @@ -552,7 +559,7 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync) pgoff_t index; u64 temp; int npg_per_block = - NFS_SERVER(wdata->inode)->pnfs_blksize >> PAGE_CACHE_SHIFT; + NFS_SERVER(header->inode)->pnfs_blksize >> PAGE_CACHE_SHIFT; dprintk("%s enter, %Zu@%lld\n", __func__, count, offset); /* At this point, wdata->pages is a (sequential) list of nfs_pages. @@ -566,7 +573,7 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync) /* At this point, have to be more careful with error handling */ isect = (sector_t) ((offset & (long)PAGE_CACHE_MASK) >> SECTOR_SHIFT); - be = bl_find_get_extent(BLK_LSEG2EXT(wdata->lseg), isect, &cow_read); + be = bl_find_get_extent(BLK_LSEG2EXT(header->lseg), isect, &cow_read); if (!be || !is_writable(be, isect)) { dprintk("%s no matching extents!\n", __func__); goto out_mds; @@ -597,10 +604,10 @@ fill_invalid_ext: dprintk("%s zero %dth page: index %lu isect %llu\n", __func__, npg_zero, index, (unsigned long long)isect); - page = bl_find_get_zeroing_page(wdata->inode, index, + page = bl_find_get_zeroing_page(header->inode, index, cow_read); if (unlikely(IS_ERR(page))) { - wdata->pnfs_error = PTR_ERR(page); + header->pnfs_error = PTR_ERR(page); goto out; } else if (page == NULL) goto next_page; @@ -612,7 +619,7 @@ fill_invalid_ext: __func__, ret); end_page_writeback(page); page_cache_release(page); - wdata->pnfs_error = ret; + header->pnfs_error = ret; goto out; } if (likely(!bl_push_one_short_extent(be->be_inval))) @@ -620,11 +627,11 @@ fill_invalid_ext: else { end_page_writeback(page); page_cache_release(page); - wdata->pnfs_error = -ENOMEM; + header->pnfs_error = -ENOMEM; goto out; } /* FIXME: This should be done in bi_end_io */ - mark_extents_written(BLK_LSEG2EXT(wdata->lseg), + mark_extents_written(BLK_LSEG2EXT(header->lseg), page->index << PAGE_CACHE_SHIFT, PAGE_CACHE_SIZE); @@ -632,7 +639,7 @@ fill_invalid_ext: isect, page, be, bl_end_io_write_zero, par); if (IS_ERR(bio)) { - wdata->pnfs_error = PTR_ERR(bio); + header->pnfs_error = PTR_ERR(bio); bio = NULL; goto out; } @@ -653,10 +660,10 @@ next_page: bl_put_extent(be); bio = bl_submit_bio(WRITE, bio); /* Get the next one */ - be = bl_find_get_extent(BLK_LSEG2EXT(wdata->lseg), + be = bl_find_get_extent(BLK_LSEG2EXT(header->lseg), isect, NULL); if (!be || !is_writable(be, isect)) { - wdata->pnfs_error = -EINVAL; + header->pnfs_error = -EINVAL; goto out; } if (be->be_state == PNFS_BLOCK_INVALID_DATA) { @@ -664,7 +671,7 @@ next_page: be->be_inval))) par->bse_count++; else { - wdata->pnfs_error = -ENOMEM; + header->pnfs_error = -ENOMEM; goto out; } } @@ -677,7 +684,7 @@ next_page: if (unlikely(ret)) { dprintk("%s bl_mark_sectors_init fail %d\n", __func__, ret); - wdata->pnfs_error = ret; + header->pnfs_error = ret; goto out; } } @@ -685,7 +692,7 @@ next_page: isect, pages[i], be, bl_end_io_write, par); if (IS_ERR(bio)) { - wdata->pnfs_error = PTR_ERR(bio); + header->pnfs_error = PTR_ERR(bio); bio = NULL; goto out; } diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index fb7fbaa..56176af 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -242,7 +242,7 @@ static void nfs_direct_read_release(void *calldata) { struct nfs_read_data *data = calldata; - struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req; + struct nfs_direct_req *dreq = (struct nfs_direct_req *)data->header->req; int status = data->task.tk_status; spin_lock(&dreq->lock); @@ -269,6 +269,15 @@ static const struct rpc_call_ops nfs_read_direct_ops = { .rpc_release = nfs_direct_read_release, }; +static void nfs_direct_readhdr_release(struct nfs_read_header *rhdr) +{ + struct nfs_read_data *data = &rhdr->rpc_data; + + if (data->pagevec != data->page_array) + kfree(data->pagevec); + nfs_readhdr_free(&rhdr->header); +} + /* * For each rsize'd chunk of the user's buffer, dispatch an NFS READ * operation. If nfs_readdata_alloc() or get_user_pages() fails, @@ -301,6 +310,7 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq, ssize_t started = 0; do { + struct nfs_read_header *rhdr; struct nfs_read_data *data; size_t bytes; @@ -308,23 +318,24 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq, bytes = min(rsize,count); result = -ENOMEM; - data = nfs_readdata_alloc(nfs_page_array_len(pgbase, bytes)); - if (unlikely(!data)) + rhdr = nfs_readhdr_alloc(nfs_page_array_len(pgbase, bytes)); + if (unlikely(!rhdr)) break; + data = &rhdr->rpc_data; down_read(¤t->mm->mmap_sem); result = get_user_pages(current, current->mm, user_addr, data->npages, 1, 0, data->pagevec, NULL); up_read(¤t->mm->mmap_sem); if (result < 0) { - nfs_readdata_free(data); + nfs_direct_readhdr_release(rhdr); break; } if ((unsigned)result < data->npages) { bytes = result * PAGE_SIZE; if (bytes <= pgbase) { nfs_direct_release_pages(data->pagevec, result); - nfs_readdata_free(data); + nfs_direct_readhdr_release(rhdr); break; } bytes -= pgbase; @@ -333,9 +344,9 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq, get_dreq(dreq); - data->req = (struct nfs_page *) dreq; - data->inode = inode; - data->cred = msg.rpc_cred; + rhdr->header.req = (struct nfs_page *) dreq; + rhdr->header.inode = inode; + rhdr->header.cred = msg.rpc_cred; data->args.fh = NFS_FH(inode); data->args.context = get_nfs_open_context(ctx); data->args.lock_context = dreq->l_ctx; @@ -447,13 +458,23 @@ out: return result; } +static void nfs_direct_writehdr_release(struct nfs_write_header *whdr) +{ + struct nfs_write_data *data = &whdr->rpc_data; + + if (data->pagevec != data->page_array) + kfree(data->pagevec); + nfs_writehdr_free(&whdr->header); +} + static void nfs_direct_free_writedata(struct nfs_direct_req *dreq) { while (!list_empty(&dreq->rewrite_list)) { - struct nfs_write_data *data = list_entry(dreq->rewrite_list.next, struct nfs_write_data, pages); - list_del(&data->pages); - nfs_direct_release_pages(data->pagevec, data->npages); - nfs_writedata_free(data); + struct nfs_pgio_header *hdr = list_entry(dreq->rewrite_list.next, struct nfs_pgio_header, pages); + struct nfs_write_header *whdr = container_of(hdr, struct nfs_write_header, header); + list_del(&hdr->pages); + nfs_direct_release_pages(whdr->rpc_data.pagevec, whdr->rpc_data.npages); + nfs_direct_writehdr_release(whdr); } } @@ -463,6 +484,7 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) struct inode *inode = dreq->inode; struct list_head *p; struct nfs_write_data *data; + struct nfs_pgio_header *hdr; struct rpc_task *task; struct rpc_message msg = { .rpc_cred = dreq->ctx->cred, @@ -479,7 +501,8 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) get_dreq(dreq); list_for_each(p, &dreq->rewrite_list) { - data = list_entry(p, struct nfs_write_data, pages); + hdr = list_entry(p, struct nfs_pgio_header, pages); + data = &(container_of(hdr, struct nfs_write_header, header))->rpc_data; get_dreq(dreq); @@ -652,7 +675,8 @@ static void nfs_direct_write_result(struct rpc_task *task, void *calldata) static void nfs_direct_write_release(void *calldata) { struct nfs_write_data *data = calldata; - struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req; + struct nfs_pgio_header *hdr = data->header; + struct nfs_direct_req *dreq = (struct nfs_direct_req *) hdr->req; int status = data->task.tk_status; spin_lock(&dreq->lock); @@ -684,7 +708,7 @@ out_unlock: spin_unlock(&dreq->lock); if (put_dreq(dreq)) - nfs_direct_write_complete(dreq, data->inode); + nfs_direct_write_complete(dreq, hdr->inode); } static const struct rpc_call_ops nfs_write_direct_ops = { @@ -725,6 +749,7 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq, ssize_t started = 0; do { + struct nfs_write_header *whdr; struct nfs_write_data *data; size_t bytes; @@ -732,23 +757,25 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq, bytes = min(wsize,count); result = -ENOMEM; - data = nfs_writedata_alloc(nfs_page_array_len(pgbase, bytes)); - if (unlikely(!data)) + whdr = nfs_writehdr_alloc(nfs_page_array_len(pgbase, bytes)); + if (unlikely(!whdr)) break; + data = &whdr->rpc_data; + down_read(¤t->mm->mmap_sem); result = get_user_pages(current, current->mm, user_addr, data->npages, 0, 0, data->pagevec, NULL); up_read(¤t->mm->mmap_sem); if (result < 0) { - nfs_writedata_free(data); + nfs_direct_writehdr_release(whdr); break; } if ((unsigned)result < data->npages) { bytes = result * PAGE_SIZE; if (bytes <= pgbase) { nfs_direct_release_pages(data->pagevec, result); - nfs_writedata_free(data); + nfs_direct_writehdr_release(whdr); break; } bytes -= pgbase; @@ -757,11 +784,11 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq, get_dreq(dreq); - list_move_tail(&data->pages, &dreq->rewrite_list); + list_move_tail(&whdr->header.pages, &dreq->rewrite_list); - data->req = (struct nfs_page *) dreq; - data->inode = inode; - data->cred = msg.rpc_cred; + whdr->header.req = (struct nfs_page *) dreq; + whdr->header.inode = inode; + whdr->header.cred = msg.rpc_cred; data->args.fh = NFS_FH(inode); data->args.context = ctx; data->args.lock_context = dreq->l_ctx; diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 650127f..7dc9be1 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -296,6 +296,8 @@ extern int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh); struct nfs_pageio_descriptor; /* read.c */ +extern struct nfs_read_header *nfs_readhdr_alloc(unsigned int npages); +extern void nfs_readhdr_free(struct nfs_pgio_header *hdr); extern int nfs_initiate_read(struct rpc_clnt *clnt, struct nfs_read_data *data, const struct rpc_call_ops *call_ops); @@ -309,6 +311,8 @@ extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio); extern void nfs_readdata_release(struct nfs_read_data *rdata); /* write.c */ +extern struct nfs_write_header *nfs_writehdr_alloc(unsigned int npages); +extern void nfs_writehdr_free(struct nfs_pgio_header *hdr); extern int nfs_generic_flush(struct nfs_pageio_descriptor *desc, struct list_head *head); extern void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio, diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index b1daca7..56dcefc 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -811,11 +811,13 @@ nfs3_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, static int nfs3_read_done(struct rpc_task *task, struct nfs_read_data *data) { - if (nfs3_async_handle_jukebox(task, data->inode)) + struct inode *inode = data->header->inode; + + if (nfs3_async_handle_jukebox(task, inode)) return -EAGAIN; - nfs_invalidate_atime(data->inode); - nfs_refresh_inode(data->inode, &data->fattr); + nfs_invalidate_atime(inode); + nfs_refresh_inode(inode, &data->fattr); return 0; } @@ -831,10 +833,12 @@ static void nfs3_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_da static int nfs3_write_done(struct rpc_task *task, struct nfs_write_data *data) { - if (nfs3_async_handle_jukebox(task, data->inode)) + struct inode *inode = data->header->inode; + + if (nfs3_async_handle_jukebox(task, inode)) return -EAGAIN; if (task->tk_status >= 0) - nfs_post_op_update_inode_force_wcc(data->inode, data->res.fattr); + nfs_post_op_update_inode_force_wcc(inode, data->res.fattr); return 0; } diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index c536328..ad1d680 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -148,6 +148,7 @@ wait_on_recovery: static int filelayout_read_done_cb(struct rpc_task *task, struct nfs_read_data *data) { + struct nfs_pgio_header *hdr = data->header; int reset = 0; dprintk("%s DS read\n", __func__); @@ -157,7 +158,7 @@ static int filelayout_read_done_cb(struct rpc_task *task, dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n", __func__, data->ds_clp, data->ds_clp->cl_session); if (reset) { - pnfs_set_lo_fail(data->lseg); + pnfs_set_lo_fail(hdr->lseg); nfs4_reset_read(task, data); } rpc_restart_call_prepare(task); @@ -175,13 +176,15 @@ static int filelayout_read_done_cb(struct rpc_task *task, static void filelayout_set_layoutcommit(struct nfs_write_data *wdata) { - if (FILELAYOUT_LSEG(wdata->lseg)->commit_through_mds || + struct nfs_pgio_header *hdr = wdata->header; + + if (FILELAYOUT_LSEG(hdr->lseg)->commit_through_mds || wdata->res.verf->committed == NFS_FILE_SYNC) return; pnfs_set_layoutcommit(wdata); - dprintk("%s ionde %lu pls_end_pos %lu\n", __func__, wdata->inode->i_ino, - (unsigned long) NFS_I(wdata->inode)->layout->plh_lwb); + dprintk("%s ionde %lu pls_end_pos %lu\n", __func__, hdr->inode->i_ino, + (unsigned long) NFS_I(hdr->inode)->layout->plh_lwb); } /* @@ -210,27 +213,28 @@ static void filelayout_read_call_done(struct rpc_task *task, void *data) dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status); /* Note this may cause RPC to be resent */ - rdata->mds_ops->rpc_call_done(task, data); + rdata->header->mds_ops->rpc_call_done(task, data); } static void filelayout_read_count_stats(struct rpc_task *task, void *data) { struct nfs_read_data *rdata = data; - rpc_count_iostats(task, NFS_SERVER(rdata->inode)->client->cl_metrics); + rpc_count_iostats(task, NFS_SERVER(rdata->header->inode)->client->cl_metrics); } static void filelayout_read_release(void *data) { struct nfs_read_data *rdata = data; - put_lseg(rdata->lseg); - rdata->mds_ops->rpc_release(data); + put_lseg(rdata->header->lseg); + rdata->header->mds_ops->rpc_release(data); } static int filelayout_write_done_cb(struct rpc_task *task, struct nfs_write_data *data) { + struct nfs_pgio_header *hdr = data->header; int reset = 0; if (filelayout_async_handle_error(task, data->args.context->state, @@ -238,7 +242,7 @@ static int filelayout_write_done_cb(struct rpc_task *task, dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n", __func__, data->ds_clp, data->ds_clp->cl_session); if (reset) { - pnfs_set_lo_fail(data->lseg); + pnfs_set_lo_fail(hdr->lseg); nfs4_reset_write(task, data); } rpc_restart_call_prepare(task); @@ -297,22 +301,22 @@ static void filelayout_write_call_done(struct rpc_task *task, void *data) struct nfs_write_data *wdata = data; /* Note this may cause RPC to be resent */ - wdata->mds_ops->rpc_call_done(task, data); + wdata->header->mds_ops->rpc_call_done(task, data); } static void filelayout_write_count_stats(struct rpc_task *task, void *data) { struct nfs_write_data *wdata = data; - rpc_count_iostats(task, NFS_SERVER(wdata->inode)->client->cl_metrics); + rpc_count_iostats(task, NFS_SERVER(wdata->header->inode)->client->cl_metrics); } static void filelayout_write_release(void *data) { struct nfs_write_data *wdata = data; - put_lseg(wdata->lseg); - wdata->mds_ops->rpc_release(data); + put_lseg(wdata->header->lseg); + wdata->header->mds_ops->rpc_release(data); } static void filelayout_commit_prepare(struct rpc_task *task, void *data) @@ -377,7 +381,8 @@ static const struct rpc_call_ops filelayout_commit_call_ops = { static enum pnfs_try_status filelayout_read_pagelist(struct nfs_read_data *data) { - struct pnfs_layout_segment *lseg = data->lseg; + struct nfs_pgio_header *hdr = data->header; + struct pnfs_layout_segment *lseg = hdr->lseg; struct nfs4_pnfs_ds *ds; loff_t offset = data->args.offset; u32 j, idx; @@ -385,7 +390,7 @@ filelayout_read_pagelist(struct nfs_read_data *data) int status; dprintk("--> %s ino %lu pgbase %u req %Zu@%llu\n", - __func__, data->inode->i_ino, + __func__, hdr->inode->i_ino, data->args.pgbase, (size_t)data->args.count, offset); if (test_bit(NFS_DEVICEID_INVALID, &FILELAYOUT_DEVID_NODE(lseg)->flags)) @@ -423,7 +428,8 @@ filelayout_read_pagelist(struct nfs_read_data *data) static enum pnfs_try_status filelayout_write_pagelist(struct nfs_write_data *data, int sync) { - struct pnfs_layout_segment *lseg = data->lseg; + struct nfs_pgio_header *hdr = data->header; + struct pnfs_layout_segment *lseg = hdr->lseg; struct nfs4_pnfs_ds *ds; loff_t offset = data->args.offset; u32 j, idx; @@ -445,7 +451,7 @@ filelayout_write_pagelist(struct nfs_write_data *data, int sync) return PNFS_NOT_ATTEMPTED; } dprintk("%s ino %lu sync %d req %Zu@%llu DS: %s\n", __func__, - data->inode->i_ino, sync, (size_t) data->args.count, offset, + hdr->inode->i_ino, sync, (size_t) data->args.count, offset, ds->ds_remotestr); data->write_done_cb = filelayout_write_done_cb; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index cc04b6e..5375862 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3336,12 +3336,12 @@ static int nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, void __nfs4_read_done_cb(struct nfs_read_data *data) { - nfs_invalidate_atime(data->inode); + nfs_invalidate_atime(data->header->inode); } static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_read_data *data) { - struct nfs_server *server = NFS_SERVER(data->inode); + struct nfs_server *server = NFS_SERVER(data->header->inode); if (nfs4_async_handle_error(task, server, data->args.context->state) == -EAGAIN) { rpc_restart_call_prepare(task); @@ -3376,7 +3376,7 @@ static void nfs4_proc_read_setup(struct nfs_read_data *data, struct rpc_message static void nfs4_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_data *data) { - if (nfs4_setup_sequence(NFS_SERVER(data->inode), + if (nfs4_setup_sequence(NFS_SERVER(data->header->inode), &data->args.seq_args, &data->res.seq_res, task)) @@ -3387,22 +3387,25 @@ static void nfs4_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_da /* Reset the the nfs_read_data to send the read to the MDS. */ void nfs4_reset_read(struct rpc_task *task, struct nfs_read_data *data) { + struct nfs_pgio_header *hdr = data->header; + struct inode *inode = hdr->inode; + dprintk("%s Reset task for i/o through\n", __func__); - put_lseg(data->lseg); - data->lseg = NULL; + put_lseg(hdr->lseg); + hdr->lseg = NULL; + data->ds_clp = NULL; /* offsets will differ in the dense stripe case */ data->args.offset = data->mds_offset; - data->ds_clp = NULL; - data->args.fh = NFS_FH(data->inode); + data->args.fh = NFS_FH(inode); data->read_done_cb = nfs4_read_done_cb; - task->tk_ops = data->mds_ops; - rpc_task_reset_client(task, NFS_CLIENT(data->inode)); + task->tk_ops = hdr->mds_ops; + rpc_task_reset_client(task, NFS_CLIENT(inode)); } EXPORT_SYMBOL_GPL(nfs4_reset_read); static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_write_data *data) { - struct inode *inode = data->inode; + struct inode *inode = data->header->inode; if (nfs4_async_handle_error(task, NFS_SERVER(inode), data->args.context->state) == -EAGAIN) { rpc_restart_call_prepare(task); @@ -3426,25 +3429,28 @@ static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data) /* Reset the the nfs_write_data to send the write to the MDS. */ void nfs4_reset_write(struct rpc_task *task, struct nfs_write_data *data) { + struct nfs_pgio_header *hdr = data->header; + struct inode *inode = hdr->inode; + dprintk("%s Reset task for i/o through\n", __func__); - put_lseg(data->lseg); - data->lseg = NULL; - data->ds_clp = NULL; + put_lseg(hdr->lseg); + hdr->lseg = NULL; + data->ds_clp = NULL; data->write_done_cb = nfs4_write_done_cb; - data->args.fh = NFS_FH(data->inode); + data->args.fh = NFS_FH(inode); data->args.bitmask = data->res.server->cache_consistency_bitmask; data->args.offset = data->mds_offset; data->res.fattr = &data->fattr; - task->tk_ops = data->mds_ops; - rpc_task_reset_client(task, NFS_CLIENT(data->inode)); + task->tk_ops = hdr->mds_ops; + rpc_task_reset_client(task, NFS_CLIENT(inode)); } EXPORT_SYMBOL_GPL(nfs4_reset_write); static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_message *msg) { - struct nfs_server *server = NFS_SERVER(data->inode); + struct nfs_server *server = NFS_SERVER(data->header->inode); - if (data->lseg) { + if (data->header->lseg) { data->args.bitmask = NULL; data->res.fattr = NULL; } else @@ -3460,7 +3466,7 @@ static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_messag static void nfs4_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_data *data) { - if (nfs4_setup_sequence(NFS_SERVER(data->inode), + if (nfs4_setup_sequence(NFS_SERVER(data->header->inode), &data->args.seq_args, &data->res.seq_res, task)) diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index 4bff4a3..fbf4874 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c @@ -440,11 +440,12 @@ static void _read_done(struct ore_io_state *ios, void *private) int objio_read_pagelist(struct nfs_read_data *rdata) { + struct nfs_pgio_header *hdr = rdata->header; struct objio_state *objios; int ret; - ret = objio_alloc_io_state(NFS_I(rdata->inode)->layout, true, - rdata->lseg, rdata->args.pages, rdata->args.pgbase, + ret = objio_alloc_io_state(NFS_I(hdr->inode)->layout, true, + hdr->lseg, rdata->args.pages, rdata->args.pgbase, rdata->args.offset, rdata->args.count, rdata, GFP_KERNEL, &objios); if (unlikely(ret)) @@ -483,12 +484,12 @@ static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate) { struct objio_state *objios = priv; struct nfs_write_data *wdata = objios->oir.rpcdata; + struct address_space *mapping = wdata->header->inode->i_mapping; pgoff_t index = offset / PAGE_SIZE; - struct page *page = find_get_page(wdata->inode->i_mapping, index); + struct page *page = find_get_page(mapping, index); if (!page) { - page = find_or_create_page(wdata->inode->i_mapping, - index, GFP_NOFS); + page = find_or_create_page(mapping, index, GFP_NOFS); if (unlikely(!page)) { dprintk("%s: grab_cache_page Failed index=0x%lx\n", __func__, index); @@ -518,11 +519,12 @@ static const struct _ore_r4w_op _r4w_op = { int objio_write_pagelist(struct nfs_write_data *wdata, int how) { + struct nfs_pgio_header *hdr = wdata->header; struct objio_state *objios; int ret; - ret = objio_alloc_io_state(NFS_I(wdata->inode)->layout, false, - wdata->lseg, wdata->args.pages, wdata->args.pgbase, + ret = objio_alloc_io_state(NFS_I(hdr->inode)->layout, false, + hdr->lseg, wdata->args.pages, wdata->args.pgbase, wdata->args.offset, wdata->args.count, wdata, GFP_NOFS, &objios); if (unlikely(ret)) diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c index 595c5fc..8746135 100644 --- a/fs/nfs/objlayout/objlayout.c +++ b/fs/nfs/objlayout/objlayout.c @@ -258,7 +258,7 @@ objlayout_read_done(struct objlayout_io_res *oir, ssize_t status, bool sync) if (status >= 0) rdata->res.count = status; else - rdata->pnfs_error = status; + rdata->header->pnfs_error = status; objlayout_iodone(oir); /* must not use oir after this point */ @@ -279,12 +279,14 @@ objlayout_read_done(struct objlayout_io_res *oir, ssize_t status, bool sync) enum pnfs_try_status objlayout_read_pagelist(struct nfs_read_data *rdata) { + struct nfs_pgio_header *hdr = rdata->header; + struct inode *inode = hdr->inode; loff_t offset = rdata->args.offset; size_t count = rdata->args.count; int err; loff_t eof; - eof = i_size_read(rdata->inode); + eof = i_size_read(inode); if (unlikely(offset + count > eof)) { if (offset >= eof) { err = 0; @@ -297,17 +299,17 @@ objlayout_read_pagelist(struct nfs_read_data *rdata) } rdata->res.eof = (offset + count) >= eof; - _fix_verify_io_params(rdata->lseg, &rdata->args.pages, + _fix_verify_io_params(hdr->lseg, &rdata->args.pages, &rdata->args.pgbase, rdata->args.offset, rdata->args.count); dprintk("%s: inode(%lx) offset 0x%llx count 0x%Zx eof=%d\n", - __func__, rdata->inode->i_ino, offset, count, rdata->res.eof); + __func__, inode->i_ino, offset, count, rdata->res.eof); err = objio_read_pagelist(rdata); out: if (unlikely(err)) { - rdata->pnfs_error = err; + hdr->pnfs_error = err; dprintk("%s: Returned Error %d\n", __func__, err); return PNFS_NOT_ATTEMPTED; } @@ -340,7 +342,7 @@ objlayout_write_done(struct objlayout_io_res *oir, ssize_t status, bool sync) wdata->res.count = status; wdata->verf.committed = oir->committed; } else { - wdata->pnfs_error = status; + wdata->header->pnfs_error = status; } objlayout_iodone(oir); /* must not use oir after this point */ @@ -363,15 +365,16 @@ enum pnfs_try_status objlayout_write_pagelist(struct nfs_write_data *wdata, int how) { + struct nfs_pgio_header *hdr = wdata->header; int err; - _fix_verify_io_params(wdata->lseg, &wdata->args.pages, + _fix_verify_io_params(hdr->lseg, &wdata->args.pages, &wdata->args.pgbase, wdata->args.offset, wdata->args.count); err = objio_write_pagelist(wdata, how); if (unlikely(err)) { - wdata->pnfs_error = err; + hdr->pnfs_error = err; dprintk("%s: Returned Error %d\n", __func__, err); return PNFS_NOT_ATTEMPTED; } diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 9c4d14a..d705da4 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1191,13 +1191,15 @@ static int pnfs_write_done_resend_to_mds(struct inode *inode, struct list_head * static void pnfs_ld_handle_write_error(struct nfs_write_data *data) { - dprintk("pnfs write error = %d\n", data->pnfs_error); - if (NFS_SERVER(data->inode)->pnfs_curr_ld->flags & + struct nfs_pgio_header *hdr = data->header; + + dprintk("pnfs write error = %d\n", hdr->pnfs_error); + if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags & PNFS_LAYOUTRET_ON_ERROR) { - clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(data->inode)->flags); - pnfs_return_layout(data->inode); + clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(hdr->inode)->flags); + pnfs_return_layout(hdr->inode); } - data->task.tk_status = pnfs_write_done_resend_to_mds(data->inode, &data->pages); + data->task.tk_status = pnfs_write_done_resend_to_mds(hdr->inode, &hdr->pages); } /* @@ -1205,13 +1207,15 @@ static void pnfs_ld_handle_write_error(struct nfs_write_data *data) */ void pnfs_ld_write_done(struct nfs_write_data *data) { - if (likely(!data->pnfs_error)) { + struct nfs_pgio_header *hdr = data->header; + + if (!hdr->pnfs_error) { pnfs_set_layoutcommit(data); - data->mds_ops->rpc_call_done(&data->task, data); + hdr->mds_ops->rpc_call_done(&data->task, data); } else pnfs_ld_handle_write_error(data); - put_lseg(data->lseg); - data->mds_ops->rpc_release(data); + put_lseg(hdr->lseg); + hdr->mds_ops->rpc_release(data); } EXPORT_SYMBOL_GPL(pnfs_ld_write_done); @@ -1219,12 +1223,14 @@ static void pnfs_write_through_mds(struct nfs_pageio_descriptor *desc, struct nfs_write_data *data) { - list_splice_tail_init(&data->pages, &desc->pg_list); - if (data->req && list_empty(&data->req->wb_list)) - nfs_list_add_request(data->req, &desc->pg_list); + struct nfs_pgio_header *hdr = data->header; + + list_splice_tail_init(&hdr->pages, &desc->pg_list); + if (hdr->req && list_empty(&hdr->req->wb_list)) + nfs_list_add_request(hdr->req, &desc->pg_list); nfs_pageio_reset_write_mds(desc); desc->pg_recoalesce = 1; - put_lseg(data->lseg); + put_lseg(hdr->lseg); nfs_writedata_release(data); } @@ -1234,20 +1240,21 @@ pnfs_try_to_write_data(struct nfs_write_data *wdata, struct pnfs_layout_segment *lseg, int how) { - struct inode *inode = wdata->inode; + struct nfs_pgio_header *hdr = wdata->header; + struct inode *inode = hdr->inode; enum pnfs_try_status trypnfs; struct nfs_server *nfss = NFS_SERVER(inode); - wdata->mds_ops = call_ops; - wdata->lseg = get_lseg(lseg); + hdr->mds_ops = call_ops; + hdr->lseg = get_lseg(lseg); dprintk("%s: Writing ino:%lu %u@%llu (how %d)\n", __func__, inode->i_ino, wdata->args.count, wdata->args.offset, how); trypnfs = nfss->pnfs_curr_ld->write_pagelist(wdata, how); if (trypnfs == PNFS_NOT_ATTEMPTED) { - put_lseg(wdata->lseg); - wdata->lseg = NULL; + put_lseg(hdr->lseg); + hdr->lseg = NULL; } else nfs_inc_stats(inode, NFSIOS_PNFS_WRITE); @@ -1318,13 +1325,15 @@ static int pnfs_read_done_resend_to_mds(struct inode *inode, struct list_head *h static void pnfs_ld_handle_read_error(struct nfs_read_data *data) { - dprintk("pnfs read error = %d\n", data->pnfs_error); - if (NFS_SERVER(data->inode)->pnfs_curr_ld->flags & + struct nfs_pgio_header *hdr = data->header; + + dprintk("pnfs read error = %d\n", hdr->pnfs_error); + if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags & PNFS_LAYOUTRET_ON_ERROR) { - clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(data->inode)->flags); - pnfs_return_layout(data->inode); + clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(hdr->inode)->flags); + pnfs_return_layout(hdr->inode); } - data->task.tk_status = pnfs_read_done_resend_to_mds(data->inode, &data->pages); + data->task.tk_status = pnfs_read_done_resend_to_mds(hdr->inode, &hdr->pages); } /* @@ -1332,13 +1341,15 @@ static void pnfs_ld_handle_read_error(struct nfs_read_data *data) */ void pnfs_ld_read_done(struct nfs_read_data *data) { - if (likely(!data->pnfs_error)) { + struct nfs_pgio_header *hdr = data->header; + + if (likely(!hdr->pnfs_error)) { __nfs4_read_done_cb(data); - data->mds_ops->rpc_call_done(&data->task, data); + hdr->mds_ops->rpc_call_done(&data->task, data); } else pnfs_ld_handle_read_error(data); - put_lseg(data->lseg); - data->mds_ops->rpc_release(data); + put_lseg(hdr->lseg); + hdr->mds_ops->rpc_release(data); } EXPORT_SYMBOL_GPL(pnfs_ld_read_done); @@ -1346,9 +1357,11 @@ static void pnfs_read_through_mds(struct nfs_pageio_descriptor *desc, struct nfs_read_data *data) { - list_splice_tail_init(&data->pages, &desc->pg_list); - if (data->req && list_empty(&data->req->wb_list)) - nfs_list_add_request(data->req, &desc->pg_list); + struct nfs_pgio_header *hdr = data->header; + + list_splice_tail_init(&hdr->pages, &desc->pg_list); + if (hdr->req && list_empty(&hdr->req->wb_list)) + nfs_list_add_request(hdr->req, &desc->pg_list); nfs_pageio_reset_read_mds(desc); desc->pg_recoalesce = 1; nfs_readdata_release(data); @@ -1362,20 +1375,21 @@ pnfs_try_to_read_data(struct nfs_read_data *rdata, const struct rpc_call_ops *call_ops, struct pnfs_layout_segment *lseg) { - struct inode *inode = rdata->inode; + struct nfs_pgio_header *hdr = rdata->header; + struct inode *inode = hdr->inode; struct nfs_server *nfss = NFS_SERVER(inode); enum pnfs_try_status trypnfs; - rdata->mds_ops = call_ops; - rdata->lseg = get_lseg(lseg); + hdr->mds_ops = call_ops; + hdr->lseg = get_lseg(lseg); dprintk("%s: Reading ino:%lu %u@%llu\n", __func__, inode->i_ino, rdata->args.count, rdata->args.offset); trypnfs = nfss->pnfs_curr_ld->read_pagelist(rdata); if (trypnfs == PNFS_NOT_ATTEMPTED) { - put_lseg(rdata->lseg); - rdata->lseg = NULL; + put_lseg(hdr->lseg); + hdr->lseg = NULL; } else { nfs_inc_stats(inode, NFSIOS_PNFS_READ); } @@ -1450,30 +1464,32 @@ EXPORT_SYMBOL_GPL(pnfs_set_lo_fail); void pnfs_set_layoutcommit(struct nfs_write_data *wdata) { - struct nfs_inode *nfsi = NFS_I(wdata->inode); + struct nfs_pgio_header *hdr = wdata->header; + struct inode *inode = hdr->inode; + struct nfs_inode *nfsi = NFS_I(inode); loff_t end_pos = wdata->mds_offset + wdata->res.count; bool mark_as_dirty = false; - spin_lock(&nfsi->vfs_inode.i_lock); + spin_lock(&inode->i_lock); if (!test_and_set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) { mark_as_dirty = true; dprintk("%s: Set layoutcommit for inode %lu ", - __func__, wdata->inode->i_ino); + __func__, inode->i_ino); } - if (!test_and_set_bit(NFS_LSEG_LAYOUTCOMMIT, &wdata->lseg->pls_flags)) { + if (!test_and_set_bit(NFS_LSEG_LAYOUTCOMMIT, &hdr->lseg->pls_flags)) { /* references matched in nfs4_layoutcommit_release */ - get_lseg(wdata->lseg); + get_lseg(hdr->lseg); } if (end_pos > nfsi->layout->plh_lwb) nfsi->layout->plh_lwb = end_pos; - spin_unlock(&nfsi->vfs_inode.i_lock); + spin_unlock(&inode->i_lock); dprintk("%s: lseg %p end_pos %llu\n", - __func__, wdata->lseg, nfsi->layout->plh_lwb); + __func__, hdr->lseg, nfsi->layout->plh_lwb); /* if pnfs_layoutcommit_inode() runs between inode locks, the next one * will be a noop because NFS_INO_LAYOUTCOMMIT will not be set */ if (mark_as_dirty) - mark_inode_dirty_sync(wdata->inode); + mark_inode_dirty_sync(inode); } EXPORT_SYMBOL_GPL(pnfs_set_layoutcommit); diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index bf80503..22ee705 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -641,12 +641,14 @@ nfs_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, static int nfs_read_done(struct rpc_task *task, struct nfs_read_data *data) { + struct inode *inode = data->header->inode; + if (nfs_async_handle_expired_key(task)) return -EAGAIN; - nfs_invalidate_atime(data->inode); + nfs_invalidate_atime(inode); if (task->tk_status >= 0) { - nfs_refresh_inode(data->inode, data->res.fattr); + nfs_refresh_inode(inode, data->res.fattr); /* Emulate the eof flag, which isn't normally needed in NFSv2 * as it is guaranteed to always return the file attributes */ @@ -668,11 +670,13 @@ static void nfs_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_dat static int nfs_write_done(struct rpc_task *task, struct nfs_write_data *data) { + struct inode *inode = data->header->inode; + if (nfs_async_handle_expired_key(task)) return -EAGAIN; if (task->tk_status >= 0) - nfs_post_op_update_inode_force_wcc(data->inode, data->res.fattr); + nfs_post_op_update_inode_force_wcc(inode, data->res.fattr); return 0; } diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 4ddba67..d6d4682 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -35,19 +35,24 @@ static const struct rpc_call_ops nfs_read_full_ops; static struct kmem_cache *nfs_rdata_cachep; -struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount) +struct nfs_read_header *nfs_readhdr_alloc(unsigned int pagecount) { - struct nfs_read_data *p; + struct nfs_read_header *p; p = kmem_cache_zalloc(nfs_rdata_cachep, GFP_KERNEL); if (p) { - INIT_LIST_HEAD(&p->pages); - p->npages = pagecount; - if (pagecount <= ARRAY_SIZE(p->page_array)) - p->pagevec = p->page_array; + struct nfs_pgio_header *hdr = &p->header; + struct nfs_read_data *data = &p->rpc_data; + + INIT_LIST_HEAD(&hdr->pages); + INIT_LIST_HEAD(&data->list); + data->npages = pagecount; + data->header = hdr; + if (pagecount <= ARRAY_SIZE(data->page_array)) + data->pagevec = data->page_array; else { - p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_KERNEL); - if (!p->pagevec) { + data->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_KERNEL); + if (!data->pagevec) { kmem_cache_free(nfs_rdata_cachep, p); p = NULL; } @@ -56,17 +61,19 @@ struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount) return p; } -void nfs_readdata_free(struct nfs_read_data *p) +void nfs_readhdr_free(struct nfs_pgio_header *hdr) { - if (p && (p->pagevec != &p->page_array[0])) - kfree(p->pagevec); - kmem_cache_free(nfs_rdata_cachep, p); + struct nfs_read_header *rhdr = container_of(hdr, struct nfs_read_header, header); + + kmem_cache_free(nfs_rdata_cachep, rhdr); } void nfs_readdata_release(struct nfs_read_data *rdata) { put_nfs_open_context(rdata->args.context); - nfs_readdata_free(rdata); + if (rdata->pagevec != rdata->page_array) + kfree(rdata->pagevec); + nfs_readhdr_free(rdata->header); } static @@ -173,13 +180,13 @@ int nfs_initiate_read(struct rpc_clnt *clnt, struct nfs_read_data *data, const struct rpc_call_ops *call_ops) { - struct inode *inode = data->inode; + struct inode *inode = data->header->inode; int swap_flags = IS_SWAPFILE(inode) ? NFS_RPC_SWAPFLAGS : 0; struct rpc_task *task; struct rpc_message msg = { .rpc_argp = &data->args, .rpc_resp = &data->res, - .rpc_cred = data->cred, + .rpc_cred = data->header->cred, }; struct rpc_task_setup task_setup_data = { .task = &data->task, @@ -216,11 +223,11 @@ EXPORT_SYMBOL_GPL(nfs_initiate_read); static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data, unsigned int count, unsigned int offset) { - struct inode *inode = req->wb_context->dentry->d_inode; + struct inode *inode = data->header->inode; - data->req = req; - data->inode = inode; - data->cred = req->wb_context->cred; + data->header->req = req; + data->header->inode = inode; + data->header->cred = req->wb_context->cred; data->args.fh = NFS_FH(inode); data->args.offset = req_offset(req) + offset; @@ -239,7 +246,7 @@ static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data, static int nfs_do_read(struct nfs_read_data *data, const struct rpc_call_ops *call_ops) { - struct inode *inode = data->args.context->dentry->d_inode; + struct inode *inode = data->header->inode; return nfs_initiate_read(NFS_CLIENT(inode), data, call_ops); } @@ -293,6 +300,7 @@ static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc, struct list_head { struct nfs_page *req = nfs_list_entry(desc->pg_list.next); struct page *page = req->wb_page; + struct nfs_read_header *rhdr; struct nfs_read_data *data; size_t rsize = desc->pg_bsize, nbytes; unsigned int offset; @@ -306,9 +314,10 @@ static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc, struct list_head do { size_t len = min(nbytes,rsize); - data = nfs_readdata_alloc(1); - if (!data) + rhdr = nfs_readhdr_alloc(1); + if (!rhdr) goto out_bad; + data = &rhdr->rpc_data; data->pagevec[0] = page; nfs_read_rpcsetup(req, data, len, offset); list_add(&data->list, res); @@ -333,26 +342,28 @@ static int nfs_pagein_one(struct nfs_pageio_descriptor *desc, struct list_head * { struct nfs_page *req; struct page **pages; + struct nfs_read_header *rhdr; struct nfs_read_data *data; struct list_head *head = &desc->pg_list; int ret = 0; - data = nfs_readdata_alloc(nfs_page_array_len(desc->pg_base, - desc->pg_count)); - if (!data) { + rhdr = nfs_readhdr_alloc(nfs_page_array_len(desc->pg_base, + desc->pg_count)); + if (!rhdr) { nfs_async_read_error(head); ret = -ENOMEM; goto out; } + data = &rhdr->rpc_data; pages = data->pagevec; while (!list_empty(head)) { req = nfs_list_entry(head->next); nfs_list_remove_request(req); - nfs_list_add_request(req, &data->pages); + nfs_list_add_request(req, &rhdr->header.pages); *pages++ = req->wb_page; } - req = nfs_list_entry(data->pages.next); + req = nfs_list_entry(rhdr->header.pages.next); nfs_read_rpcsetup(req, data, desc->pg_count, 0); list_add(&data->list, res); @@ -390,20 +401,21 @@ static const struct nfs_pageio_ops nfs_pageio_read_ops = { */ int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data) { + struct inode *inode = data->header->inode; int status; dprintk("NFS: %s: %5u, (status %d)\n", __func__, task->tk_pid, task->tk_status); - status = NFS_PROTO(data->inode)->read_done(task, data); + status = NFS_PROTO(inode)->read_done(task, data); if (status != 0) return status; - nfs_add_stats(data->inode, NFSIOS_SERVERREADBYTES, data->res.count); + nfs_add_stats(inode, NFSIOS_SERVERREADBYTES, data->res.count); if (task->tk_status == -ESTALE) { - set_bit(NFS_INO_STALE, &NFS_I(data->inode)->flags); - nfs_mark_for_revalidate(data->inode); + set_bit(NFS_INO_STALE, &NFS_I(inode)->flags); + nfs_mark_for_revalidate(inode); } return 0; } @@ -417,7 +429,7 @@ static void nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data return; /* This is a short read! */ - nfs_inc_stats(data->inode, NFSIOS_SHORTREAD); + nfs_inc_stats(data->header->inode, NFSIOS_SHORTREAD); /* Has the server at least made some progress? */ if (resp->count == 0) return; @@ -449,7 +461,7 @@ static void nfs_readpage_result_partial(struct rpc_task *task, void *calldata) static void nfs_readpage_release_partial(void *calldata) { struct nfs_read_data *data = calldata; - struct nfs_page *req = data->req; + struct nfs_page *req = data->header->req; struct page *page = req->wb_page; int status = data->task.tk_status; @@ -461,13 +473,13 @@ static void nfs_readpage_release_partial(void *calldata) SetPageUptodate(page); nfs_readpage_release(req); } - nfs_readdata_release(calldata); + nfs_readdata_release(data); } void nfs_read_prepare(struct rpc_task *task, void *calldata) { struct nfs_read_data *data = calldata; - NFS_PROTO(data->inode)->read_rpc_prepare(task, data); + NFS_PROTO(data->header->inode)->read_rpc_prepare(task, data); } static const struct rpc_call_ops nfs_read_partial_ops = { @@ -524,9 +536,10 @@ static void nfs_readpage_result_full(struct rpc_task *task, void *calldata) static void nfs_readpage_release_full(void *calldata) { struct nfs_read_data *data = calldata; + struct nfs_pgio_header *hdr = data->header; - while (!list_empty(&data->pages)) { - struct nfs_page *req = nfs_list_entry(data->pages.next); + while (!list_empty(&hdr->pages)) { + struct nfs_page *req = nfs_list_entry(hdr->pages.next); nfs_list_remove_request(req); nfs_readpage_release(req); @@ -685,7 +698,7 @@ out: int __init nfs_init_readpagecache(void) { nfs_rdata_cachep = kmem_cache_create("nfs_read_data", - sizeof(struct nfs_read_data), + sizeof(struct nfs_read_header), 0, SLAB_HWCACHE_ALIGN, NULL); if (nfs_rdata_cachep == NULL) diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 76735dd..dbb5c0a 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -69,19 +69,24 @@ void nfs_commit_free(struct nfs_commit_data *p) } EXPORT_SYMBOL_GPL(nfs_commit_free); -struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount) +struct nfs_write_header *nfs_writehdr_alloc(unsigned int pagecount) { - struct nfs_write_data *p = mempool_alloc(nfs_wdata_mempool, GFP_NOFS); + struct nfs_write_header *p = mempool_alloc(nfs_wdata_mempool, GFP_NOFS); if (p) { + struct nfs_pgio_header *hdr = &p->header; + struct nfs_write_data *data = &p->rpc_data; + memset(p, 0, sizeof(*p)); - INIT_LIST_HEAD(&p->pages); - p->npages = pagecount; - if (pagecount <= ARRAY_SIZE(p->page_array)) - p->pagevec = p->page_array; + INIT_LIST_HEAD(&hdr->pages); + INIT_LIST_HEAD(&data->list); + data->npages = pagecount; + data->header = hdr; + if (pagecount <= ARRAY_SIZE(data->page_array)) + data->pagevec = data->page_array; else { - p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_NOFS); - if (!p->pagevec) { + data->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_NOFS); + if (!data->pagevec) { mempool_free(p, nfs_wdata_mempool); p = NULL; } @@ -90,17 +95,18 @@ struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount) return p; } -void nfs_writedata_free(struct nfs_write_data *p) +void nfs_writehdr_free(struct nfs_pgio_header *hdr) { - if (p && (p->pagevec != &p->page_array[0])) - kfree(p->pagevec); - mempool_free(p, nfs_wdata_mempool); + struct nfs_write_header *whdr = container_of(hdr, struct nfs_write_header, header); + mempool_free(whdr, nfs_wdata_mempool); } void nfs_writedata_release(struct nfs_write_data *wdata) { put_nfs_open_context(wdata->args.context); - nfs_writedata_free(wdata); + if (wdata->pagevec != wdata->page_array) + kfree(wdata->pagevec); + nfs_writehdr_free(wdata->header); } static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error) @@ -507,9 +513,8 @@ static inline int nfs_write_need_commit(struct nfs_write_data *data) { if (data->verf.committed == NFS_DATA_SYNC) - return data->lseg == NULL; - else - return data->verf.committed != NFS_FILE_SYNC; + return data->header->lseg == NULL; + return data->verf.committed != NFS_FILE_SYNC; } static inline @@ -517,7 +522,7 @@ int nfs_reschedule_unstable_write(struct nfs_page *req, struct nfs_write_data *data) { if (test_and_clear_bit(PG_NEED_COMMIT, &req->wb_flags)) { - nfs_mark_request_commit(req, data->lseg); + nfs_mark_request_commit(req, data->header->lseg); return 1; } if (test_and_clear_bit(PG_NEED_RESCHED, &req->wb_flags)) { @@ -841,13 +846,13 @@ int nfs_initiate_write(struct rpc_clnt *clnt, const struct rpc_call_ops *call_ops, int how) { - struct inode *inode = data->inode; + struct inode *inode = data->header->inode; int priority = flush_task_priority(how); struct rpc_task *task; struct rpc_message msg = { .rpc_argp = &data->args, .rpc_resp = &data->res, - .rpc_cred = data->cred, + .rpc_cred = data->header->cred, }; struct rpc_task_setup task_setup_data = { .rpc_client = clnt, @@ -896,14 +901,15 @@ static void nfs_write_rpcsetup(struct nfs_page *req, unsigned int count, unsigned int offset, int how) { + struct nfs_pgio_header *hdr = data->header; struct inode *inode = req->wb_context->dentry->d_inode; /* Set up the RPC argument and reply structs * NB: take care not to mess about with data->commit et al. */ - data->req = req; - data->inode = inode = req->wb_context->dentry->d_inode; - data->cred = req->wb_context->cred; + hdr->req = req; + hdr->inode = inode = req->wb_context->dentry->d_inode; + hdr->cred = req->wb_context->cred; data->args.fh = NFS_FH(inode); data->args.offset = req_offset(req) + offset; @@ -935,7 +941,7 @@ static int nfs_do_write(struct nfs_write_data *data, const struct rpc_call_ops *call_ops, int how) { - struct inode *inode = data->args.context->dentry->d_inode; + struct inode *inode = data->header->inode; return nfs_initiate_write(NFS_CLIENT(inode), data, call_ops, how); } @@ -981,6 +987,7 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, struct list_head { struct nfs_page *req = nfs_list_entry(desc->pg_list.next); struct page *page = req->wb_page; + struct nfs_write_header *whdr; struct nfs_write_data *data; size_t wsize = desc->pg_bsize, nbytes; unsigned int offset; @@ -1000,9 +1007,10 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, struct list_head do { size_t len = min(nbytes, wsize); - data = nfs_writedata_alloc(1); - if (!data) + whdr = nfs_writehdr_alloc(1); + if (!whdr) goto out_bad; + data = &whdr->rpc_data; data->pagevec[0] = page; nfs_write_rpcsetup(req, data, len, offset, desc->pg_ioflags); list_add(&data->list, res); @@ -1036,13 +1044,14 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc, struct list_head *r { struct nfs_page *req; struct page **pages; + struct nfs_write_header *whdr; struct nfs_write_data *data; struct list_head *head = &desc->pg_list; int ret = 0; - data = nfs_writedata_alloc(nfs_page_array_len(desc->pg_base, - desc->pg_count)); - if (!data) { + whdr = nfs_writehdr_alloc(nfs_page_array_len(desc->pg_base, + desc->pg_count)); + if (!whdr) { while (!list_empty(head)) { req = nfs_list_entry(head->next); nfs_list_remove_request(req); @@ -1051,14 +1060,15 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc, struct list_head *r ret = -ENOMEM; goto out; } + data = &whdr->rpc_data; pages = data->pagevec; while (!list_empty(head)) { req = nfs_list_entry(head->next); nfs_list_remove_request(req); - nfs_list_add_request(req, &data->pages); + nfs_list_add_request(req, &whdr->header.pages); *pages++ = req->wb_page; } - req = nfs_list_entry(data->pages.next); + req = nfs_list_entry(whdr->header.pages.next); if ((desc->pg_ioflags & FLUSH_COND_STABLE) && (desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit)) @@ -1126,10 +1136,11 @@ static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata) dprintk("NFS: %5u write(%s/%lld %d@%lld)", task->tk_pid, - data->req->wb_context->dentry->d_inode->i_sb->s_id, + data->header->inode->i_sb->s_id, (long long) - NFS_FILEID(data->req->wb_context->dentry->d_inode), - data->req->wb_bytes, (long long)req_offset(data->req)); + NFS_FILEID(data->header->inode), + data->header->req->wb_bytes, + (long long)req_offset(data->header->req)); nfs_writeback_done(task, data); } @@ -1137,7 +1148,7 @@ static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata) static void nfs_writeback_release_partial(void *calldata) { struct nfs_write_data *data = calldata; - struct nfs_page *req = data->req; + struct nfs_page *req = data->header->req; struct page *page = req->wb_page; int status = data->task.tk_status; @@ -1169,13 +1180,13 @@ static void nfs_writeback_release_partial(void *calldata) out: if (atomic_dec_and_test(&req->wb_complete)) nfs_writepage_release(req, data); - nfs_writedata_release(calldata); + nfs_writedata_release(data); } void nfs_write_prepare(struct rpc_task *task, void *calldata) { struct nfs_write_data *data = calldata; - NFS_PROTO(data->inode)->write_rpc_prepare(task, data); + NFS_PROTO(data->header->inode)->write_rpc_prepare(task, data); } void nfs_commit_prepare(struct rpc_task *task, void *calldata) @@ -1208,11 +1219,12 @@ static void nfs_writeback_done_full(struct rpc_task *task, void *calldata) static void nfs_writeback_release_full(void *calldata) { struct nfs_write_data *data = calldata; + struct nfs_pgio_header *hdr = data->header; int status = data->task.tk_status; /* Update attributes as result of writeback. */ - while (!list_empty(&data->pages)) { - struct nfs_page *req = nfs_list_entry(data->pages.next); + while (!list_empty(&hdr->pages)) { + struct nfs_page *req = nfs_list_entry(hdr->pages.next); struct page *page = req->wb_page; nfs_list_remove_request(req); @@ -1233,7 +1245,7 @@ static void nfs_writeback_release_full(void *calldata) if (nfs_write_need_commit(data)) { memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf)); - nfs_mark_request_commit(req, data->lseg); + nfs_mark_request_commit(req, hdr->lseg); dprintk(" marked for commit\n"); goto next; } @@ -1244,7 +1256,7 @@ remove_request: nfs_unlock_request(req); nfs_end_page_writeback(page); } - nfs_writedata_release(calldata); + nfs_writedata_release(data); } static const struct rpc_call_ops nfs_write_full_ops = { @@ -1261,6 +1273,7 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) { struct nfs_writeargs *argp = &data->args; struct nfs_writeres *resp = &data->res; + struct inode *inode = data->header->inode; int status; dprintk("NFS: %5u nfs_writeback_done (status %d)\n", @@ -1273,10 +1286,10 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) * another writer had changed the file, but some applications * depend on tighter cache coherency when writing. */ - status = NFS_PROTO(data->inode)->write_done(task, data); + status = NFS_PROTO(inode)->write_done(task, data); if (status != 0) return; - nfs_add_stats(data->inode, NFSIOS_SERVERWRITTENBYTES, resp->count); + nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, resp->count); #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) if (resp->verf->committed < argp->stable && task->tk_status >= 0) { @@ -1294,7 +1307,7 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) if (time_before(complain, jiffies)) { dprintk("NFS: faulty NFS server %s:" " (committed = %d) != (stable = %d)\n", - NFS_SERVER(data->inode)->nfs_client->cl_hostname, + NFS_SERVER(inode)->nfs_client->cl_hostname, resp->verf->committed, argp->stable); complain = jiffies + 300 * HZ; } @@ -1304,7 +1317,7 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) if (task->tk_status >= 0 && resp->count < argp->count) { static unsigned long complain; - nfs_inc_stats(data->inode, NFSIOS_SHORTWRITE); + nfs_inc_stats(inode, NFSIOS_SHORTWRITE); /* Has the server at least made some progress? */ if (resp->count != 0) { @@ -1333,7 +1346,6 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) /* Can't do anything about it except throw an error. */ task->tk_status = -EIO; } - return; } @@ -1745,7 +1757,7 @@ int nfs_migrate_page(struct address_space *mapping, struct page *newpage, int __init nfs_init_writepagecache(void) { nfs_wdata_cachep = kmem_cache_create("nfs_write_data", - sizeof(struct nfs_write_data), + sizeof(struct nfs_write_header), 0, SLAB_HWCACHE_ALIGN, NULL); if (nfs_wdata_cachep == NULL) diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index d5d68f3..8d3a2b8 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -569,12 +569,6 @@ nfs_have_writebacks(struct inode *inode) } /* - * Allocate nfs_write_data structures - */ -extern struct nfs_write_data *nfs_writedata_alloc(unsigned int npages); -extern void nfs_writedata_free(struct nfs_write_data *); - -/* * linux/fs/nfs/read.c */ extern int nfs_readpage(struct file *, struct page *); @@ -585,12 +579,6 @@ extern int nfs_readpage_async(struct nfs_open_context *, struct inode *, struct page *); /* - * Allocate nfs_read_data structures - */ -extern struct nfs_read_data *nfs_readdata_alloc(unsigned int npages); -extern void nfs_readdata_free(struct nfs_read_data *); - -/* * linux/fs/nfs3proc.c */ #ifdef CONFIG_NFS_V3_ACL diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 8fb036a..fee3241 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1168,52 +1168,58 @@ struct nfs_page; #define NFS_PAGEVEC_SIZE (8U) struct nfs_read_data { + struct nfs_pgio_header *header; + struct list_head list; struct rpc_task task; - struct inode *inode; - struct rpc_cred *cred; struct nfs_fattr fattr; /* fattr storage */ - struct list_head pages; /* Coalesced read requests */ - struct list_head list; /* lists of struct nfs_read_data */ - struct nfs_page *req; /* multi ops per nfs_page */ struct page **pagevec; unsigned int npages; /* Max length of pagevec */ struct nfs_readargs args; struct nfs_readres res; unsigned long timestamp; /* For lease renewal */ - struct pnfs_layout_segment *lseg; - struct nfs_client *ds_clp; /* pNFS data server */ - const struct rpc_call_ops *mds_ops; int (*read_done_cb) (struct rpc_task *task, struct nfs_read_data *data); __u64 mds_offset; - int pnfs_error; struct page *page_array[NFS_PAGEVEC_SIZE]; + struct nfs_client *ds_clp; /* pNFS data server */ +}; + +struct nfs_pgio_header { + struct inode *inode; + struct rpc_cred *cred; + struct list_head pages; + struct nfs_page *req; + struct pnfs_layout_segment *lseg; + const struct rpc_call_ops *mds_ops; + int pnfs_error; +}; + +struct nfs_read_header { + struct nfs_pgio_header header; + struct nfs_read_data rpc_data; }; struct nfs_direct_req; struct nfs_write_data { + struct nfs_pgio_header *header; + struct list_head list; struct rpc_task task; - struct inode *inode; - struct rpc_cred *cred; struct nfs_fattr fattr; struct nfs_writeverf verf; - struct list_head pages; /* Coalesced requests we wish to flush */ - struct list_head list; /* lists of struct nfs_write_data */ - struct nfs_page *req; /* multi ops per nfs_page */ struct page **pagevec; unsigned int npages; /* Max length of pagevec */ struct nfs_writeargs args; /* argument struct */ struct nfs_writeres res; /* result struct */ - struct pnfs_layout_segment *lseg; - struct nfs_client *ds_clp; /* pNFS data server */ - const struct rpc_call_ops *mds_ops; - int (*write_done_cb) (struct rpc_task *task, struct nfs_write_data *data); -#ifdef CONFIG_NFS_V4 unsigned long timestamp; /* For lease renewal */ -#endif + int (*write_done_cb) (struct rpc_task *task, struct nfs_write_data *data); __u64 mds_offset; /* Filelayout dense stripe */ - int pnfs_error; struct page *page_array[NFS_PAGEVEC_SIZE]; + struct nfs_client *ds_clp; /* pNFS data server */ +}; + +struct nfs_write_header { + struct nfs_pgio_header header; + struct nfs_write_data rpc_data; }; struct nfs_commit_data { -- cgit v0.10.2 From 30dd374f6fc1b202db3a1b57b61afff1326bad92 Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Fri, 20 Apr 2012 14:47:45 -0400 Subject: NFS: create struct nfs_page_array Both nfs_read_data and nfs_write_data devote several fields which can be combined into a single shared struct. Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index 7a48251..7ae8a60 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -242,7 +242,7 @@ bl_read_pagelist(struct nfs_read_data *rdata) int pg_index = rdata->args.pgbase >> PAGE_CACHE_SHIFT; dprintk("%s enter nr_pages %u offset %lld count %u\n", __func__, - rdata->npages, f_offset, (unsigned int)rdata->args.count); + rdata->pages.npages, f_offset, (unsigned int)rdata->args.count); par = alloc_parallel(rdata); if (!par) @@ -252,7 +252,7 @@ bl_read_pagelist(struct nfs_read_data *rdata) isect = (sector_t) (f_offset >> SECTOR_SHIFT); /* Code assumes extents are page-aligned */ - for (i = pg_index; i < rdata->npages; i++) { + for (i = pg_index; i < rdata->pages.npages; i++) { if (!extent_length) { /* We've used up the previous extent */ bl_put_extent(be); @@ -285,7 +285,8 @@ bl_read_pagelist(struct nfs_read_data *rdata) struct pnfs_block_extent *be_read; be_read = (hole && cow_read) ? cow_read : be; - bio = bl_add_page_to_bio(bio, rdata->npages - i, READ, + bio = bl_add_page_to_bio(bio, rdata->pages.npages - i, + READ, isect, pages[i], be_read, bl_end_io_read, par); if (IS_ERR(bio)) { @@ -654,7 +655,7 @@ next_page: /* Middle pages */ pg_index = wdata->args.pgbase >> PAGE_CACHE_SHIFT; - for (i = pg_index; i < wdata->npages; i++) { + for (i = pg_index; i < wdata->pages.npages; i++) { if (!extent_length) { /* We've used up the previous extent */ bl_put_extent(be); @@ -688,7 +689,7 @@ next_page: goto out; } } - bio = bl_add_page_to_bio(bio, wdata->npages - i, WRITE, + bio = bl_add_page_to_bio(bio, wdata->pages.npages - i, WRITE, isect, pages[i], be, bl_end_io_write, par); if (IS_ERR(bio)) { diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 56176af..0faba4c 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -252,11 +252,11 @@ static void nfs_direct_read_release(void *calldata) } else { dreq->count += data->res.count; spin_unlock(&dreq->lock); - nfs_direct_dirty_pages(data->pagevec, + nfs_direct_dirty_pages(data->pages.pagevec, data->args.pgbase, data->res.count); } - nfs_direct_release_pages(data->pagevec, data->npages); + nfs_direct_release_pages(data->pages.pagevec, data->pages.npages); if (put_dreq(dreq)) nfs_direct_complete(dreq); @@ -273,8 +273,8 @@ static void nfs_direct_readhdr_release(struct nfs_read_header *rhdr) { struct nfs_read_data *data = &rhdr->rpc_data; - if (data->pagevec != data->page_array) - kfree(data->pagevec); + if (data->pages.pagevec != data->pages.page_array) + kfree(data->pages.pagevec); nfs_readhdr_free(&rhdr->header); } @@ -312,6 +312,7 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq, do { struct nfs_read_header *rhdr; struct nfs_read_data *data; + struct nfs_page_array *pages; size_t bytes; pgbase = user_addr & ~PAGE_MASK; @@ -322,24 +323,25 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq, if (unlikely(!rhdr)) break; data = &rhdr->rpc_data; + pages = &data->pages; down_read(¤t->mm->mmap_sem); result = get_user_pages(current, current->mm, user_addr, - data->npages, 1, 0, data->pagevec, NULL); + pages->npages, 1, 0, pages->pagevec, NULL); up_read(¤t->mm->mmap_sem); if (result < 0) { nfs_direct_readhdr_release(rhdr); break; } - if ((unsigned)result < data->npages) { + if ((unsigned)result < pages->npages) { bytes = result * PAGE_SIZE; if (bytes <= pgbase) { - nfs_direct_release_pages(data->pagevec, result); + nfs_direct_release_pages(pages->pagevec, result); nfs_direct_readhdr_release(rhdr); break; } bytes -= pgbase; - data->npages = result; + pages->npages = result; } get_dreq(dreq); @@ -352,7 +354,7 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq, data->args.lock_context = dreq->l_ctx; data->args.offset = pos; data->args.pgbase = pgbase; - data->args.pages = data->pagevec; + data->args.pages = pages->pagevec; data->args.count = bytes; data->res.fattr = &data->fattr; data->res.eof = 0; @@ -462,8 +464,8 @@ static void nfs_direct_writehdr_release(struct nfs_write_header *whdr) { struct nfs_write_data *data = &whdr->rpc_data; - if (data->pagevec != data->page_array) - kfree(data->pagevec); + if (data->pages.pagevec != data->pages.page_array) + kfree(data->pages.pagevec); nfs_writehdr_free(&whdr->header); } @@ -472,8 +474,10 @@ static void nfs_direct_free_writedata(struct nfs_direct_req *dreq) while (!list_empty(&dreq->rewrite_list)) { struct nfs_pgio_header *hdr = list_entry(dreq->rewrite_list.next, struct nfs_pgio_header, pages); struct nfs_write_header *whdr = container_of(hdr, struct nfs_write_header, header); + struct nfs_page_array *p = &whdr->rpc_data.pages; + list_del(&hdr->pages); - nfs_direct_release_pages(whdr->rpc_data.pagevec, whdr->rpc_data.npages); + nfs_direct_release_pages(p->pagevec, p->npages); nfs_direct_writehdr_release(whdr); } } @@ -751,6 +755,7 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq, do { struct nfs_write_header *whdr; struct nfs_write_data *data; + struct nfs_page_array *pages; size_t bytes; pgbase = user_addr & ~PAGE_MASK; @@ -762,24 +767,25 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq, break; data = &whdr->rpc_data; + pages = &data->pages; down_read(¤t->mm->mmap_sem); result = get_user_pages(current, current->mm, user_addr, - data->npages, 0, 0, data->pagevec, NULL); + pages->npages, 0, 0, pages->pagevec, NULL); up_read(¤t->mm->mmap_sem); if (result < 0) { nfs_direct_writehdr_release(whdr); break; } - if ((unsigned)result < data->npages) { + if ((unsigned)result < pages->npages) { bytes = result * PAGE_SIZE; if (bytes <= pgbase) { - nfs_direct_release_pages(data->pagevec, result); + nfs_direct_release_pages(pages->pagevec, result); nfs_direct_writehdr_release(whdr); break; } bytes -= pgbase; - data->npages = result; + pages->npages = result; } get_dreq(dreq); @@ -794,7 +800,7 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq, data->args.lock_context = dreq->l_ctx; data->args.offset = pos; data->args.pgbase = pgbase; - data->args.pages = data->pagevec; + data->args.pages = pages->pagevec; data->args.count = bytes; data->args.stable = sync; data->res.fattr = &data->fattr; diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 7dc9be1..5c3d77f 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -210,6 +210,7 @@ extern void nfs_destroy_writepagecache(void); extern int __init nfs_init_directcache(void); extern void nfs_destroy_directcache(void); +extern bool nfs_pgarray_set(struct nfs_page_array *p, unsigned int pagecount); /* nfs2xdr.c */ extern int nfs_stat_to_errno(enum nfs_stat); diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index d21fcea..d349bd4 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -26,6 +26,19 @@ static struct kmem_cache *nfs_page_cachep; +bool nfs_pgarray_set(struct nfs_page_array *p, unsigned int pagecount) +{ + p->npages = pagecount; + if (pagecount <= ARRAY_SIZE(p->page_array)) + p->pagevec = p->page_array; + else { + p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_KERNEL); + if (!p->pagevec) + p->npages = 0; + } + return p->pagevec != NULL; +} + static inline struct nfs_page * nfs_page_alloc(void) { diff --git a/fs/nfs/read.c b/fs/nfs/read.c index d6d4682..f6ab30b 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -46,16 +46,10 @@ struct nfs_read_header *nfs_readhdr_alloc(unsigned int pagecount) INIT_LIST_HEAD(&hdr->pages); INIT_LIST_HEAD(&data->list); - data->npages = pagecount; data->header = hdr; - if (pagecount <= ARRAY_SIZE(data->page_array)) - data->pagevec = data->page_array; - else { - data->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_KERNEL); - if (!data->pagevec) { - kmem_cache_free(nfs_rdata_cachep, p); - p = NULL; - } + if (!nfs_pgarray_set(&data->pages, pagecount)) { + kmem_cache_free(nfs_rdata_cachep, p); + p = NULL; } } return p; @@ -71,8 +65,8 @@ void nfs_readhdr_free(struct nfs_pgio_header *hdr) void nfs_readdata_release(struct nfs_read_data *rdata) { put_nfs_open_context(rdata->args.context); - if (rdata->pagevec != rdata->page_array) - kfree(rdata->pagevec); + if (rdata->pages.pagevec != rdata->pages.page_array) + kfree(rdata->pages.pagevec); nfs_readhdr_free(rdata->header); } @@ -232,7 +226,7 @@ static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data, data->args.fh = NFS_FH(inode); data->args.offset = req_offset(req) + offset; data->args.pgbase = req->wb_pgbase + offset; - data->args.pages = data->pagevec; + data->args.pages = data->pages.pagevec; data->args.count = count; data->args.context = get_nfs_open_context(req->wb_context); data->args.lock_context = req->wb_lock_context; @@ -318,7 +312,7 @@ static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc, struct list_head if (!rhdr) goto out_bad; data = &rhdr->rpc_data; - data->pagevec[0] = page; + data->pages.pagevec[0] = page; nfs_read_rpcsetup(req, data, len, offset); list_add(&data->list, res); requests++; @@ -356,7 +350,7 @@ static int nfs_pagein_one(struct nfs_pageio_descriptor *desc, struct list_head * } data = &rhdr->rpc_data; - pages = data->pagevec; + pages = data->pages.pagevec; while (!list_empty(head)) { req = nfs_list_entry(head->next); nfs_list_remove_request(req); diff --git a/fs/nfs/write.c b/fs/nfs/write.c index dbb5c0a..2efae04 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -80,16 +80,10 @@ struct nfs_write_header *nfs_writehdr_alloc(unsigned int pagecount) memset(p, 0, sizeof(*p)); INIT_LIST_HEAD(&hdr->pages); INIT_LIST_HEAD(&data->list); - data->npages = pagecount; data->header = hdr; - if (pagecount <= ARRAY_SIZE(data->page_array)) - data->pagevec = data->page_array; - else { - data->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_NOFS); - if (!data->pagevec) { - mempool_free(p, nfs_wdata_mempool); - p = NULL; - } + if (!nfs_pgarray_set(&data->pages, pagecount)) { + mempool_free(p, nfs_wdata_mempool); + p = NULL; } } return p; @@ -104,8 +98,8 @@ void nfs_writehdr_free(struct nfs_pgio_header *hdr) void nfs_writedata_release(struct nfs_write_data *wdata) { put_nfs_open_context(wdata->args.context); - if (wdata->pagevec != wdata->page_array) - kfree(wdata->pagevec); + if (wdata->pages.pagevec != wdata->pages.page_array) + kfree(wdata->pages.pagevec); nfs_writehdr_free(wdata->header); } @@ -916,7 +910,7 @@ static void nfs_write_rpcsetup(struct nfs_page *req, /* pnfs_set_layoutcommit needs this */ data->mds_offset = data->args.offset; data->args.pgbase = req->wb_pgbase + offset; - data->args.pages = data->pagevec; + data->args.pages = data->pages.pagevec; data->args.count = count; data->args.context = get_nfs_open_context(req->wb_context); data->args.lock_context = req->wb_lock_context; @@ -1011,7 +1005,7 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, struct list_head if (!whdr) goto out_bad; data = &whdr->rpc_data; - data->pagevec[0] = page; + data->pages.pagevec[0] = page; nfs_write_rpcsetup(req, data, len, offset, desc->pg_ioflags); list_add(&data->list, res); requests++; @@ -1061,7 +1055,7 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc, struct list_head *r goto out; } data = &whdr->rpc_data; - pages = data->pagevec; + pages = data->pages.pagevec; while (!list_empty(head)) { req = nfs_list_entry(head->next); nfs_list_remove_request(req); diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index fee3241..e34beaf 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1167,19 +1167,23 @@ struct nfs_page; #define NFS_PAGEVEC_SIZE (8U) +struct nfs_page_array { + struct page **pagevec; + unsigned int npages; /* Max length of pagevec */ + struct page *page_array[NFS_PAGEVEC_SIZE]; +}; + struct nfs_read_data { struct nfs_pgio_header *header; struct list_head list; struct rpc_task task; struct nfs_fattr fattr; /* fattr storage */ - struct page **pagevec; - unsigned int npages; /* Max length of pagevec */ struct nfs_readargs args; struct nfs_readres res; unsigned long timestamp; /* For lease renewal */ int (*read_done_cb) (struct rpc_task *task, struct nfs_read_data *data); __u64 mds_offset; - struct page *page_array[NFS_PAGEVEC_SIZE]; + struct nfs_page_array pages; struct nfs_client *ds_clp; /* pNFS data server */ }; @@ -1206,14 +1210,12 @@ struct nfs_write_data { struct rpc_task task; struct nfs_fattr fattr; struct nfs_writeverf verf; - struct page **pagevec; - unsigned int npages; /* Max length of pagevec */ struct nfs_writeargs args; /* argument struct */ struct nfs_writeres res; /* result struct */ unsigned long timestamp; /* For lease renewal */ int (*write_done_cb) (struct rpc_task *task, struct nfs_write_data *data); __u64 mds_offset; /* Filelayout dense stripe */ - struct page *page_array[NFS_PAGEVEC_SIZE]; + struct nfs_page_array pages; struct nfs_client *ds_clp; /* pNFS data server */ }; -- cgit v0.10.2 From 4db6e0b74c0f6dfc2f9c0690e8df512e3b635983 Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Fri, 20 Apr 2012 14:47:46 -0400 Subject: NFS: merge _full and _partial read rpc_ops Decouple nfs_pgio_header and nfs_read_data, and have (possibly multiple) nfs_read_datas each take a refcount on nfs_pgio_header. For the moment keeps nfs_read_header as a way to preallocate a single nfs_read_data with the nfs_pgio_header. The code doesn't need this, and would be prettier without, but given the amount of churn I am already introducing I didn't want to play with tuning new mempools. This also fixes bug in pnfs_ld_handle_read_error. In the case of desc->pg_bsize < PAGE_CACHE_SIZE, the pages list was empty, causing replay attempt to do nothing. Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 0faba4c..90b00ce 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -319,10 +319,16 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq, bytes = min(rsize,count); result = -ENOMEM; - rhdr = nfs_readhdr_alloc(nfs_page_array_len(pgbase, bytes)); + rhdr = nfs_readhdr_alloc(); if (unlikely(!rhdr)) break; - data = &rhdr->rpc_data; + data = nfs_readdata_alloc(&rhdr->header, nfs_page_array_len(pgbase, bytes)); + if (!data) { + nfs_readhdr_free(&rhdr->header); + break; + } + data->header = &rhdr->header; + atomic_inc(&data->header->refcnt); pages = &data->pages; down_read(¤t->mm->mmap_sem); diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 5c3d77f..33af5e5 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -200,6 +200,7 @@ struct vfsmount *nfs_do_refmount(struct rpc_clnt *client, struct dentry *dentry) extern struct svc_version nfs4_callback_version1; extern struct svc_version nfs4_callback_version4; +struct nfs_pageio_descriptor; /* pagelist.c */ extern int __init nfs_init_nfspagecache(void); extern void nfs_destroy_nfspagecache(void); @@ -211,6 +212,10 @@ extern void nfs_destroy_writepagecache(void); extern int __init nfs_init_directcache(void); extern void nfs_destroy_directcache(void); extern bool nfs_pgarray_set(struct nfs_page_array *p, unsigned int pagecount); +extern void nfs_pgheader_init(struct nfs_pageio_descriptor *desc, + struct nfs_pgio_header *hdr, + void (*release)(struct nfs_pgio_header *hdr)); +void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos); /* nfs2xdr.c */ extern int nfs_stat_to_errno(enum nfs_stat); @@ -295,17 +300,19 @@ extern struct dentry *nfs4_get_root(struct super_block *, struct nfs_fh *, extern int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh); #endif -struct nfs_pageio_descriptor; /* read.c */ -extern struct nfs_read_header *nfs_readhdr_alloc(unsigned int npages); +extern void nfs_async_read_error(struct list_head *head); +extern struct nfs_read_header *nfs_readhdr_alloc(void); extern void nfs_readhdr_free(struct nfs_pgio_header *hdr); +extern void nfs_read_completion(struct nfs_pgio_header *hdr); +extern struct nfs_read_data *nfs_readdata_alloc(struct nfs_pgio_header *hdr, + unsigned int pagecount); extern int nfs_initiate_read(struct rpc_clnt *clnt, struct nfs_read_data *data, const struct rpc_call_ops *call_ops); extern void nfs_read_prepare(struct rpc_task *task, void *calldata); extern int nfs_generic_pagein(struct nfs_pageio_descriptor *desc, - struct list_head *head); - + struct nfs_pgio_header *hdr); extern void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio, struct inode *inode); extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio); diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index ad1d680..333e765 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -227,7 +227,6 @@ static void filelayout_read_release(void *data) { struct nfs_read_data *rdata = data; - put_lseg(rdata->header->lseg); rdata->header->mds_ops->rpc_release(data); } diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 5375862..ce31ab2 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3391,8 +3391,6 @@ void nfs4_reset_read(struct rpc_task *task, struct nfs_read_data *data) struct inode *inode = hdr->inode; dprintk("%s Reset task for i/o through\n", __func__); - put_lseg(hdr->lseg); - hdr->lseg = NULL; data->ds_clp = NULL; /* offsets will differ in the dense stripe case */ data->args.offset = data->mds_offset; diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index d349bd4..cd4c038 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -39,6 +39,30 @@ bool nfs_pgarray_set(struct nfs_page_array *p, unsigned int pagecount) return p->pagevec != NULL; } +void nfs_pgheader_init(struct nfs_pageio_descriptor *desc, + struct nfs_pgio_header *hdr, + void (*release)(struct nfs_pgio_header *hdr)) +{ + hdr->req = nfs_list_entry(desc->pg_list.next); + hdr->inode = desc->pg_inode; + hdr->cred = hdr->req->wb_context->cred; + hdr->io_start = req_offset(hdr->req); + hdr->good_bytes = desc->pg_count; + hdr->release = release; +} + +void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos) +{ + spin_lock(&hdr->lock); + if (pos < hdr->io_start + hdr->good_bytes) { + set_bit(NFS_IOHDR_ERROR, &hdr->flags); + clear_bit(NFS_IOHDR_EOF, &hdr->flags); + hdr->good_bytes = pos - hdr->io_start; + hdr->error = error; + } + spin_unlock(&hdr->lock); +} + static inline struct nfs_page * nfs_page_alloc(void) { diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index d705da4..d1a91db 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1333,7 +1333,9 @@ static void pnfs_ld_handle_read_error(struct nfs_read_data *data) clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(hdr->inode)->flags); pnfs_return_layout(hdr->inode); } - data->task.tk_status = pnfs_read_done_resend_to_mds(hdr->inode, &hdr->pages); + if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) + data->task.tk_status = pnfs_read_done_resend_to_mds(hdr->inode, + &hdr->pages); } /* @@ -1348,7 +1350,6 @@ void pnfs_ld_read_done(struct nfs_read_data *data) hdr->mds_ops->rpc_call_done(&data->task, data); } else pnfs_ld_handle_read_error(data); - put_lseg(hdr->lseg); hdr->mds_ops->rpc_release(data); } EXPORT_SYMBOL_GPL(pnfs_ld_read_done); @@ -1359,11 +1360,11 @@ pnfs_read_through_mds(struct nfs_pageio_descriptor *desc, { struct nfs_pgio_header *hdr = data->header; - list_splice_tail_init(&hdr->pages, &desc->pg_list); - if (hdr->req && list_empty(&hdr->req->wb_list)) - nfs_list_add_request(hdr->req, &desc->pg_list); - nfs_pageio_reset_read_mds(desc); - desc->pg_recoalesce = 1; + if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { + list_splice_tail_init(&hdr->pages, &desc->pg_list); + nfs_pageio_reset_read_mds(desc); + desc->pg_recoalesce = 1; + } nfs_readdata_release(data); } @@ -1381,18 +1382,13 @@ pnfs_try_to_read_data(struct nfs_read_data *rdata, enum pnfs_try_status trypnfs; hdr->mds_ops = call_ops; - hdr->lseg = get_lseg(lseg); dprintk("%s: Reading ino:%lu %u@%llu\n", __func__, inode->i_ino, rdata->args.count, rdata->args.offset); trypnfs = nfss->pnfs_curr_ld->read_pagelist(rdata); - if (trypnfs == PNFS_NOT_ATTEMPTED) { - put_lseg(hdr->lseg); - hdr->lseg = NULL; - } else { + if (trypnfs != PNFS_NOT_ATTEMPTED) nfs_inc_stats(inode, NFSIOS_PNFS_READ); - } dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs); return trypnfs; } @@ -1408,7 +1404,7 @@ pnfs_do_multiple_reads(struct nfs_pageio_descriptor *desc, struct list_head *hea while (!list_empty(head)) { enum pnfs_try_status trypnfs; - data = list_entry(head->next, struct nfs_read_data, list); + data = list_first_entry(head, struct nfs_read_data, list); list_del_init(&data->list); trypnfs = pnfs_try_to_read_data(data, call_ops, lseg); @@ -1418,20 +1414,41 @@ pnfs_do_multiple_reads(struct nfs_pageio_descriptor *desc, struct list_head *hea put_lseg(lseg); } +static void pnfs_readhdr_free(struct nfs_pgio_header *hdr) +{ + put_lseg(hdr->lseg); + nfs_readhdr_free(hdr); +} + int pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) { - LIST_HEAD(head); + struct nfs_read_header *rhdr; + struct nfs_pgio_header *hdr; int ret; - ret = nfs_generic_pagein(desc, &head); - if (ret != 0) { + rhdr = nfs_readhdr_alloc(); + if (!rhdr) { + nfs_async_read_error(&desc->pg_list); + ret = -ENOMEM; put_lseg(desc->pg_lseg); desc->pg_lseg = NULL; return ret; } - pnfs_do_multiple_reads(desc, &head); - return 0; + hdr = &rhdr->header; + nfs_pgheader_init(desc, hdr, pnfs_readhdr_free); + hdr->lseg = get_lseg(desc->pg_lseg); + atomic_inc(&hdr->refcnt); + ret = nfs_generic_pagein(desc, hdr); + if (ret != 0) { + put_lseg(desc->pg_lseg); + desc->pg_lseg = NULL; + set_bit(NFS_IOHDR_REDO, &hdr->flags); + } else + pnfs_do_multiple_reads(desc, &hdr->rpc_list); + if (atomic_dec_and_test(&hdr->refcnt)) + nfs_read_completion(hdr); + return ret; } EXPORT_SYMBOL_GPL(pnfs_generic_pg_readpages); diff --git a/fs/nfs/read.c b/fs/nfs/read.c index f6ab30b..c9633b2 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -30,29 +30,49 @@ #define NFSDBG_FACILITY NFSDBG_PAGECACHE static const struct nfs_pageio_ops nfs_pageio_read_ops; -static const struct rpc_call_ops nfs_read_partial_ops; -static const struct rpc_call_ops nfs_read_full_ops; +static const struct rpc_call_ops nfs_read_common_ops; static struct kmem_cache *nfs_rdata_cachep; -struct nfs_read_header *nfs_readhdr_alloc(unsigned int pagecount) +struct nfs_read_header *nfs_readhdr_alloc() { - struct nfs_read_header *p; + struct nfs_read_header *rhdr; - p = kmem_cache_zalloc(nfs_rdata_cachep, GFP_KERNEL); - if (p) { - struct nfs_pgio_header *hdr = &p->header; - struct nfs_read_data *data = &p->rpc_data; + rhdr = kmem_cache_zalloc(nfs_rdata_cachep, GFP_KERNEL); + if (rhdr) { + struct nfs_pgio_header *hdr = &rhdr->header; INIT_LIST_HEAD(&hdr->pages); - INIT_LIST_HEAD(&data->list); + INIT_LIST_HEAD(&hdr->rpc_list); + spin_lock_init(&hdr->lock); + atomic_set(&hdr->refcnt, 0); + } + return rhdr; +} + +struct nfs_read_data *nfs_readdata_alloc(struct nfs_pgio_header *hdr, + unsigned int pagecount) +{ + struct nfs_read_data *data, *prealloc; + + prealloc = &container_of(hdr, struct nfs_read_header, header)->rpc_data; + if (prealloc->header == NULL) + data = prealloc; + else + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (!data) + goto out; + + if (nfs_pgarray_set(&data->pages, pagecount)) { data->header = hdr; - if (!nfs_pgarray_set(&data->pages, pagecount)) { - kmem_cache_free(nfs_rdata_cachep, p); - p = NULL; - } + atomic_inc(&hdr->refcnt); + } else { + if (data != prealloc) + kfree(data); + data = NULL; } - return p; +out: + return data; } void nfs_readhdr_free(struct nfs_pgio_header *hdr) @@ -64,10 +84,18 @@ void nfs_readhdr_free(struct nfs_pgio_header *hdr) void nfs_readdata_release(struct nfs_read_data *rdata) { + struct nfs_pgio_header *hdr = rdata->header; + struct nfs_read_header *read_header = container_of(hdr, struct nfs_read_header, header); + put_nfs_open_context(rdata->args.context); if (rdata->pages.pagevec != rdata->pages.page_array) kfree(rdata->pages.pagevec); - nfs_readhdr_free(rdata->header); + if (rdata != &read_header->rpc_data) + kfree(rdata); + else + rdata->header = NULL; + if (atomic_dec_and_test(&hdr->refcnt)) + nfs_read_completion(hdr); } static @@ -79,35 +107,6 @@ int nfs_return_empty_page(struct page *page) return 0; } -static void nfs_readpage_truncate_uninitialised_page(struct nfs_read_data *data) -{ - unsigned int remainder = data->args.count - data->res.count; - unsigned int base = data->args.pgbase + data->res.count; - unsigned int pglen; - struct page **pages; - - if (data->res.eof == 0 || remainder == 0) - return; - /* - * Note: "remainder" can never be negative, since we check for - * this in the XDR code. - */ - pages = &data->args.pages[base >> PAGE_CACHE_SHIFT]; - base &= ~PAGE_CACHE_MASK; - pglen = PAGE_CACHE_SIZE - base; - for (;;) { - if (remainder <= pglen) { - zero_user(*pages, base, remainder); - break; - } - zero_user(*pages, base, pglen); - pages++; - remainder -= pglen; - pglen = PAGE_CACHE_SIZE; - base = 0; - } -} - void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio, struct inode *inode) { @@ -170,6 +169,46 @@ static void nfs_readpage_release(struct nfs_page *req) nfs_release_request(req); } +/* Note io was page aligned */ +void nfs_read_completion(struct nfs_pgio_header *hdr) +{ + unsigned long bytes = 0; + + if (test_bit(NFS_IOHDR_REDO, &hdr->flags)) + goto out; + if (!test_bit(NFS_IOHDR_ERROR, &hdr->flags)) { + while (!list_empty(&hdr->pages)) { + struct nfs_page *req = nfs_list_entry(hdr->pages.next); + struct page *page = req->wb_page; + + if (test_bit(NFS_IOHDR_EOF, &hdr->flags)) { + if (bytes > hdr->good_bytes) + zero_user(page, 0, PAGE_SIZE); + else if (hdr->good_bytes - bytes < PAGE_SIZE) + zero_user_segment(page, + hdr->good_bytes & ~PAGE_MASK, + PAGE_SIZE); + } + SetPageUptodate(page); + nfs_list_remove_request(req); + nfs_readpage_release(req); + bytes += PAGE_SIZE; + } + } else { + while (!list_empty(&hdr->pages)) { + struct nfs_page *req = nfs_list_entry(hdr->pages.next); + + bytes += req->wb_bytes; + if (bytes <= hdr->good_bytes) + SetPageUptodate(req->wb_page); + nfs_list_remove_request(req); + nfs_readpage_release(req); + } + } +out: + hdr->release(hdr); +} + int nfs_initiate_read(struct rpc_clnt *clnt, struct nfs_read_data *data, const struct rpc_call_ops *call_ops) @@ -214,16 +253,12 @@ EXPORT_SYMBOL_GPL(nfs_initiate_read); /* * Set up the NFS read request struct */ -static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data, +static void nfs_read_rpcsetup(struct nfs_read_data *data, unsigned int count, unsigned int offset) { - struct inode *inode = data->header->inode; - - data->header->req = req; - data->header->inode = inode; - data->header->cred = req->wb_context->cred; + struct nfs_page *req = data->header->req; - data->args.fh = NFS_FH(inode); + data->args.fh = NFS_FH(data->header->inode); data->args.offset = req_offset(req) + offset; data->args.pgbase = req->wb_pgbase + offset; data->args.pages = data->pages.pagevec; @@ -255,7 +290,7 @@ nfs_do_multiple_reads(struct list_head *head, while (!list_empty(head)) { int ret2; - data = list_entry(head->next, struct nfs_read_data, list); + data = list_first_entry(head, struct nfs_read_data, list); list_del_init(&data->list); ret2 = nfs_do_read(data, call_ops); @@ -265,7 +300,7 @@ nfs_do_multiple_reads(struct list_head *head, return ret; } -static void +void nfs_async_read_error(struct list_head *head) { struct nfs_page *req; @@ -290,11 +325,11 @@ nfs_async_read_error(struct list_head *head) * won't see the new data until our attribute cache is updated. This is more * or less conventional NFS client behavior. */ -static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc, struct list_head *res) +static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc, + struct nfs_pgio_header *hdr) { - struct nfs_page *req = nfs_list_entry(desc->pg_list.next); + struct nfs_page *req = hdr->req; struct page *page = req->wb_page; - struct nfs_read_header *rhdr; struct nfs_read_data *data; size_t rsize = desc->pg_bsize, nbytes; unsigned int offset; @@ -302,85 +337,97 @@ static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc, struct list_head int ret = 0; nfs_list_remove_request(req); + nfs_list_add_request(req, &hdr->pages); offset = 0; nbytes = desc->pg_count; do { size_t len = min(nbytes,rsize); - rhdr = nfs_readhdr_alloc(1); - if (!rhdr) + data = nfs_readdata_alloc(hdr, 1); + if (!data) goto out_bad; - data = &rhdr->rpc_data; data->pages.pagevec[0] = page; - nfs_read_rpcsetup(req, data, len, offset); - list_add(&data->list, res); + nfs_read_rpcsetup(data, len, offset); + list_add(&data->list, &hdr->rpc_list); requests++; nbytes -= len; offset += len; } while(nbytes != 0); - atomic_set(&req->wb_complete, requests); - desc->pg_rpc_callops = &nfs_read_partial_ops; + desc->pg_rpc_callops = &nfs_read_common_ops; return ret; out_bad: - while (!list_empty(res)) { - data = list_entry(res->next, struct nfs_read_data, list); + while (!list_empty(&hdr->rpc_list)) { + data = list_first_entry(&hdr->rpc_list, struct nfs_read_data, list); list_del(&data->list); nfs_readdata_release(data); } - nfs_readpage_release(req); + nfs_async_read_error(&hdr->pages); return -ENOMEM; } -static int nfs_pagein_one(struct nfs_pageio_descriptor *desc, struct list_head *res) +static int nfs_pagein_one(struct nfs_pageio_descriptor *desc, + struct nfs_pgio_header *hdr) { struct nfs_page *req; struct page **pages; - struct nfs_read_header *rhdr; - struct nfs_read_data *data; + struct nfs_read_data *data; struct list_head *head = &desc->pg_list; int ret = 0; - rhdr = nfs_readhdr_alloc(nfs_page_array_len(desc->pg_base, - desc->pg_count)); - if (!rhdr) { + data = nfs_readdata_alloc(hdr, nfs_page_array_len(desc->pg_base, + desc->pg_count)); + if (!data) { nfs_async_read_error(head); ret = -ENOMEM; goto out; } - data = &rhdr->rpc_data; pages = data->pages.pagevec; while (!list_empty(head)) { req = nfs_list_entry(head->next); nfs_list_remove_request(req); - nfs_list_add_request(req, &rhdr->header.pages); + nfs_list_add_request(req, &hdr->pages); *pages++ = req->wb_page; } - req = nfs_list_entry(rhdr->header.pages.next); - nfs_read_rpcsetup(req, data, desc->pg_count, 0); - list_add(&data->list, res); - desc->pg_rpc_callops = &nfs_read_full_ops; + nfs_read_rpcsetup(data, desc->pg_count, 0); + list_add(&data->list, &hdr->rpc_list); + desc->pg_rpc_callops = &nfs_read_common_ops; out: return ret; } -int nfs_generic_pagein(struct nfs_pageio_descriptor *desc, struct list_head *head) +int nfs_generic_pagein(struct nfs_pageio_descriptor *desc, + struct nfs_pgio_header *hdr) { if (desc->pg_bsize < PAGE_CACHE_SIZE) - return nfs_pagein_multi(desc, head); - return nfs_pagein_one(desc, head); + return nfs_pagein_multi(desc, hdr); + return nfs_pagein_one(desc, hdr); } static int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) { - LIST_HEAD(head); + struct nfs_read_header *rhdr; + struct nfs_pgio_header *hdr; int ret; - ret = nfs_generic_pagein(desc, &head); + rhdr = nfs_readhdr_alloc(); + if (!rhdr) { + nfs_async_read_error(&desc->pg_list); + return -ENOMEM; + } + hdr = &rhdr->header; + nfs_pgheader_init(desc, hdr, nfs_readhdr_free); + atomic_inc(&hdr->refcnt); + ret = nfs_generic_pagein(desc, hdr); if (ret == 0) - ret = nfs_do_multiple_reads(&head, desc->pg_rpc_callops); + ret = nfs_do_multiple_reads(&hdr->rpc_list, + desc->pg_rpc_callops); + else + set_bit(NFS_IOHDR_REDO, &hdr->flags); + if (atomic_dec_and_test(&hdr->refcnt)) + nfs_read_completion(hdr); return ret; } @@ -419,15 +466,13 @@ static void nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data struct nfs_readargs *argp = &data->args; struct nfs_readres *resp = &data->res; - if (resp->eof || resp->count == argp->count) - return; - /* This is a short read! */ nfs_inc_stats(data->header->inode, NFSIOS_SHORTREAD); /* Has the server at least made some progress? */ - if (resp->count == 0) + if (resp->count == 0) { + nfs_set_pgio_error(data->header, -EIO, argp->offset); return; - + } /* Yes, so retry the read at the end of the data */ data->mds_offset += resp->count; argp->offset += resp->count; @@ -436,38 +481,34 @@ static void nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data rpc_restart_call_prepare(task); } -/* - * Handle a read reply that fills part of a page. - */ -static void nfs_readpage_result_partial(struct rpc_task *task, void *calldata) +static void nfs_readpage_result_common(struct rpc_task *task, void *calldata) { struct nfs_read_data *data = calldata; - + struct nfs_pgio_header *hdr = data->header; + + /* Note the only returns of nfs_readpage_result are 0 and -EAGAIN */ if (nfs_readpage_result(task, data) != 0) return; if (task->tk_status < 0) - return; - - nfs_readpage_truncate_uninitialised_page(data); - nfs_readpage_retry(task, data); + nfs_set_pgio_error(hdr, task->tk_status, data->args.offset); + else if (data->res.eof) { + loff_t bound; + + bound = data->args.offset + data->res.count; + spin_lock(&hdr->lock); + if (bound < hdr->io_start + hdr->good_bytes) { + set_bit(NFS_IOHDR_EOF, &hdr->flags); + clear_bit(NFS_IOHDR_ERROR, &hdr->flags); + hdr->good_bytes = bound - hdr->io_start; + } + spin_unlock(&hdr->lock); + } else if (data->res.count != data->args.count) + nfs_readpage_retry(task, data); } -static void nfs_readpage_release_partial(void *calldata) +static void nfs_readpage_release_common(void *calldata) { - struct nfs_read_data *data = calldata; - struct nfs_page *req = data->header->req; - struct page *page = req->wb_page; - int status = data->task.tk_status; - - if (status < 0) - set_bit(PG_PARTIAL_READ_FAILED, &req->wb_flags); - - if (atomic_dec_and_test(&req->wb_complete)) { - if (!test_bit(PG_PARTIAL_READ_FAILED, &req->wb_flags)) - SetPageUptodate(page); - nfs_readpage_release(req); - } - nfs_readdata_release(data); + nfs_readdata_release(calldata); } void nfs_read_prepare(struct rpc_task *task, void *calldata) @@ -476,75 +517,10 @@ void nfs_read_prepare(struct rpc_task *task, void *calldata) NFS_PROTO(data->header->inode)->read_rpc_prepare(task, data); } -static const struct rpc_call_ops nfs_read_partial_ops = { - .rpc_call_prepare = nfs_read_prepare, - .rpc_call_done = nfs_readpage_result_partial, - .rpc_release = nfs_readpage_release_partial, -}; - -static void nfs_readpage_set_pages_uptodate(struct nfs_read_data *data) -{ - unsigned int count = data->res.count; - unsigned int base = data->args.pgbase; - struct page **pages; - - if (data->res.eof) - count = data->args.count; - if (unlikely(count == 0)) - return; - pages = &data->args.pages[base >> PAGE_CACHE_SHIFT]; - base &= ~PAGE_CACHE_MASK; - count += base; - for (;count >= PAGE_CACHE_SIZE; count -= PAGE_CACHE_SIZE, pages++) - SetPageUptodate(*pages); - if (count == 0) - return; - /* Was this a short read? */ - if (data->res.eof || data->res.count == data->args.count) - SetPageUptodate(*pages); -} - -/* - * This is the callback from RPC telling us whether a reply was - * received or some error occurred (timeout or socket shutdown). - */ -static void nfs_readpage_result_full(struct rpc_task *task, void *calldata) -{ - struct nfs_read_data *data = calldata; - - if (nfs_readpage_result(task, data) != 0) - return; - if (task->tk_status < 0) - return; - /* - * Note: nfs_readpage_retry may change the values of - * data->args. In the multi-page case, we therefore need - * to ensure that we call nfs_readpage_set_pages_uptodate() - * first. - */ - nfs_readpage_truncate_uninitialised_page(data); - nfs_readpage_set_pages_uptodate(data); - nfs_readpage_retry(task, data); -} - -static void nfs_readpage_release_full(void *calldata) -{ - struct nfs_read_data *data = calldata; - struct nfs_pgio_header *hdr = data->header; - - while (!list_empty(&hdr->pages)) { - struct nfs_page *req = nfs_list_entry(hdr->pages.next); - - nfs_list_remove_request(req); - nfs_readpage_release(req); - } - nfs_readdata_release(calldata); -} - -static const struct rpc_call_ops nfs_read_full_ops = { +static const struct rpc_call_ops nfs_read_common_ops = { .rpc_call_prepare = nfs_read_prepare, - .rpc_call_done = nfs_readpage_result_full, - .rpc_release = nfs_readpage_release_full, + .rpc_call_done = nfs_readpage_result_common, + .rpc_release = nfs_readpage_release_common, }; /* diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index eac30d6..5c52034 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -27,7 +27,6 @@ enum { PG_CLEAN, PG_NEED_COMMIT, PG_NEED_RESCHED, - PG_PARTIAL_READ_FAILED, PG_COMMIT_TO_DS, }; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index e34beaf..1648621 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1187,14 +1187,30 @@ struct nfs_read_data { struct nfs_client *ds_clp; /* pNFS data server */ }; +/* used as flag bits in nfs_pgio_header */ +enum { + NFS_IOHDR_ERROR = 0, + NFS_IOHDR_EOF, + NFS_IOHDR_REDO, +}; + struct nfs_pgio_header { struct inode *inode; struct rpc_cred *cred; struct list_head pages; + struct list_head rpc_list; + atomic_t refcnt; struct nfs_page *req; struct pnfs_layout_segment *lseg; + loff_t io_start; const struct rpc_call_ops *mds_ops; + void (*release) (struct nfs_pgio_header *hdr); + spinlock_t lock; + /* fields protected by lock */ int pnfs_error; + int error; /* merge with pnfs_error */ + unsigned long good_bytes; /* boundary of good data */ + unsigned long flags; }; struct nfs_read_header { -- cgit v0.10.2 From 6c75dc0d498caa402fb17b1bf769835a9db875c8 Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Fri, 20 Apr 2012 14:47:47 -0400 Subject: NFS: merge _full and _partial write rpc_ops Decouple nfs_pgio_header and nfs_write_data, and have (possibly multiple) nfs_write_datas each take a refcount on nfs_pgio_header. For the moment keeps nfs_write_header as a way to preallocate a single nfs_write_data with the nfs_pgio_header. The code doesn't need this, and would be prettier without, but given the amount of churn I am already introducing I didn't want to play with tuning new mempools. This also fixes bug in pnfs_ld_handle_write_error. In the case of desc->pg_bsize < PAGE_CACHE_SIZE, the pages list was empty, causing replay attempt to do nothing. Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 90b00ce..22a40c4 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -768,11 +768,17 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq, bytes = min(wsize,count); result = -ENOMEM; - whdr = nfs_writehdr_alloc(nfs_page_array_len(pgbase, bytes)); + whdr = nfs_writehdr_alloc(); if (unlikely(!whdr)) break; - data = &whdr->rpc_data; + data = nfs_writedata_alloc(&whdr->header, nfs_page_array_len(pgbase, bytes)); + if (!data) { + nfs_writehdr_free(&whdr->header); + break; + } + data->header = &whdr->header; + atomic_inc(&data->header->refcnt); pages = &data->pages; down_read(¤t->mm->mmap_sem); diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 33af5e5..16bc9c4 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -319,10 +319,14 @@ extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio); extern void nfs_readdata_release(struct nfs_read_data *rdata); /* write.c */ -extern struct nfs_write_header *nfs_writehdr_alloc(unsigned int npages); +extern void nfs_async_write_error(struct list_head *head); +extern struct nfs_write_header *nfs_writehdr_alloc(void); extern void nfs_writehdr_free(struct nfs_pgio_header *hdr); +extern struct nfs_write_data *nfs_writedata_alloc(struct nfs_pgio_header *hdr, + unsigned int pagecount); +extern void nfs_write_completion(struct nfs_pgio_header *hdr); extern int nfs_generic_flush(struct nfs_pageio_descriptor *desc, - struct list_head *head); + struct nfs_pgio_header *hdr); extern void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags); extern void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio); diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 333e765..02d8170 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -314,7 +314,6 @@ static void filelayout_write_release(void *data) { struct nfs_write_data *wdata = data; - put_lseg(wdata->header->lseg); wdata->header->mds_ops->rpc_release(data); } diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index ce31ab2..87af80d 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3431,8 +3431,6 @@ void nfs4_reset_write(struct rpc_task *task, struct nfs_write_data *data) struct inode *inode = hdr->inode; dprintk("%s Reset task for i/o through\n", __func__); - put_lseg(hdr->lseg); - hdr->lseg = NULL; data->ds_clp = NULL; data->write_done_cb = nfs4_write_done_cb; data->args.fh = NFS_FH(inode); @@ -3448,7 +3446,7 @@ static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_messag { struct nfs_server *server = NFS_SERVER(data->header->inode); - if (data->header->lseg) { + if (data->ds_clp) { data->args.bitmask = NULL; data->res.fattr = NULL; } else diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index d1a91db..d515f00 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1199,7 +1199,9 @@ static void pnfs_ld_handle_write_error(struct nfs_write_data *data) clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(hdr->inode)->flags); pnfs_return_layout(hdr->inode); } - data->task.tk_status = pnfs_write_done_resend_to_mds(hdr->inode, &hdr->pages); + if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) + data->task.tk_status = pnfs_write_done_resend_to_mds(hdr->inode, + &hdr->pages); } /* @@ -1214,7 +1216,6 @@ void pnfs_ld_write_done(struct nfs_write_data *data) hdr->mds_ops->rpc_call_done(&data->task, data); } else pnfs_ld_handle_write_error(data); - put_lseg(hdr->lseg); hdr->mds_ops->rpc_release(data); } EXPORT_SYMBOL_GPL(pnfs_ld_write_done); @@ -1225,12 +1226,11 @@ pnfs_write_through_mds(struct nfs_pageio_descriptor *desc, { struct nfs_pgio_header *hdr = data->header; - list_splice_tail_init(&hdr->pages, &desc->pg_list); - if (hdr->req && list_empty(&hdr->req->wb_list)) - nfs_list_add_request(hdr->req, &desc->pg_list); - nfs_pageio_reset_write_mds(desc); - desc->pg_recoalesce = 1; - put_lseg(hdr->lseg); + if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { + list_splice_tail_init(&hdr->pages, &desc->pg_list); + nfs_pageio_reset_write_mds(desc); + desc->pg_recoalesce = 1; + } nfs_writedata_release(data); } @@ -1246,18 +1246,12 @@ pnfs_try_to_write_data(struct nfs_write_data *wdata, struct nfs_server *nfss = NFS_SERVER(inode); hdr->mds_ops = call_ops; - hdr->lseg = get_lseg(lseg); dprintk("%s: Writing ino:%lu %u@%llu (how %d)\n", __func__, inode->i_ino, wdata->args.count, wdata->args.offset, how); - trypnfs = nfss->pnfs_curr_ld->write_pagelist(wdata, how); - if (trypnfs == PNFS_NOT_ATTEMPTED) { - put_lseg(hdr->lseg); - hdr->lseg = NULL; - } else + if (trypnfs != PNFS_NOT_ATTEMPTED) nfs_inc_stats(inode, NFSIOS_PNFS_WRITE); - dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs); return trypnfs; } @@ -1273,7 +1267,7 @@ pnfs_do_multiple_writes(struct nfs_pageio_descriptor *desc, struct list_head *he while (!list_empty(head)) { enum pnfs_try_status trypnfs; - data = list_entry(head->next, struct nfs_write_data, list); + data = list_first_entry(head, struct nfs_write_data, list); list_del_init(&data->list); trypnfs = pnfs_try_to_write_data(data, call_ops, lseg, how); @@ -1283,20 +1277,40 @@ pnfs_do_multiple_writes(struct nfs_pageio_descriptor *desc, struct list_head *he put_lseg(lseg); } +static void pnfs_writehdr_free(struct nfs_pgio_header *hdr) +{ + put_lseg(hdr->lseg); + nfs_writehdr_free(hdr); +} + int pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) { - LIST_HEAD(head); + struct nfs_write_header *whdr; + struct nfs_pgio_header *hdr; int ret; - ret = nfs_generic_flush(desc, &head); - if (ret != 0) { + whdr = nfs_writehdr_alloc(); + if (!whdr) { + nfs_async_write_error(&desc->pg_list); put_lseg(desc->pg_lseg); desc->pg_lseg = NULL; - return ret; + return -ENOMEM; } - pnfs_do_multiple_writes(desc, &head, desc->pg_ioflags); - return 0; + hdr = &whdr->header; + nfs_pgheader_init(desc, hdr, pnfs_writehdr_free); + hdr->lseg = get_lseg(desc->pg_lseg); + atomic_inc(&hdr->refcnt); + ret = nfs_generic_flush(desc, hdr); + if (ret != 0) { + put_lseg(desc->pg_lseg); + desc->pg_lseg = NULL; + set_bit(NFS_IOHDR_REDO, &hdr->flags); + } else + pnfs_do_multiple_writes(desc, &hdr->rpc_list, desc->pg_ioflags); + if (atomic_dec_and_test(&hdr->refcnt)) + nfs_write_completion(hdr); + return ret; } EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages); diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 2efae04..076075e 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -42,8 +42,7 @@ static void nfs_pageio_init_write(struct nfs_pageio_descriptor *desc, struct inode *inode, int ioflags); static void nfs_redirty_request(struct nfs_page *req); -static const struct rpc_call_ops nfs_write_partial_ops; -static const struct rpc_call_ops nfs_write_full_ops; +static const struct rpc_call_ops nfs_write_common_ops; static const struct rpc_call_ops nfs_commit_ops; static struct kmem_cache *nfs_wdata_cachep; @@ -69,26 +68,47 @@ void nfs_commit_free(struct nfs_commit_data *p) } EXPORT_SYMBOL_GPL(nfs_commit_free); -struct nfs_write_header *nfs_writehdr_alloc(unsigned int pagecount) +struct nfs_write_header *nfs_writehdr_alloc(void) { struct nfs_write_header *p = mempool_alloc(nfs_wdata_mempool, GFP_NOFS); if (p) { struct nfs_pgio_header *hdr = &p->header; - struct nfs_write_data *data = &p->rpc_data; memset(p, 0, sizeof(*p)); INIT_LIST_HEAD(&hdr->pages); - INIT_LIST_HEAD(&data->list); - data->header = hdr; - if (!nfs_pgarray_set(&data->pages, pagecount)) { - mempool_free(p, nfs_wdata_mempool); - p = NULL; - } + INIT_LIST_HEAD(&hdr->rpc_list); + spin_lock_init(&hdr->lock); + atomic_set(&hdr->refcnt, 0); } return p; } +struct nfs_write_data *nfs_writedata_alloc(struct nfs_pgio_header *hdr, + unsigned int pagecount) +{ + struct nfs_write_data *data, *prealloc; + + prealloc = &container_of(hdr, struct nfs_write_header, header)->rpc_data; + if (prealloc->header == NULL) + data = prealloc; + else + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (!data) + goto out; + + if (nfs_pgarray_set(&data->pages, pagecount)) { + data->header = hdr; + atomic_inc(&hdr->refcnt); + } else { + if (data != prealloc) + kfree(data); + data = NULL; + } +out: + return data; +} + void nfs_writehdr_free(struct nfs_pgio_header *hdr) { struct nfs_write_header *whdr = container_of(hdr, struct nfs_write_header, header); @@ -97,10 +117,18 @@ void nfs_writehdr_free(struct nfs_pgio_header *hdr) void nfs_writedata_release(struct nfs_write_data *wdata) { + struct nfs_pgio_header *hdr = wdata->header; + struct nfs_write_header *write_header = container_of(hdr, struct nfs_write_header, header); + put_nfs_open_context(wdata->args.context); if (wdata->pages.pagevec != wdata->pages.page_array) kfree(wdata->pages.pagevec); - nfs_writehdr_free(wdata->header); + if (wdata != &write_header->rpc_data) + kfree(wdata); + else + wdata->header = NULL; + if (atomic_dec_and_test(&hdr->refcnt)) + nfs_write_completion(hdr); } static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error) @@ -511,20 +539,6 @@ int nfs_write_need_commit(struct nfs_write_data *data) return data->verf.committed != NFS_FILE_SYNC; } -static inline -int nfs_reschedule_unstable_write(struct nfs_page *req, - struct nfs_write_data *data) -{ - if (test_and_clear_bit(PG_NEED_COMMIT, &req->wb_flags)) { - nfs_mark_request_commit(req, data->header->lseg); - return 1; - } - if (test_and_clear_bit(PG_NEED_RESCHED, &req->wb_flags)) { - nfs_mark_request_dirty(req); - return 1; - } - return 0; -} #else static void nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg) @@ -542,13 +556,43 @@ int nfs_write_need_commit(struct nfs_write_data *data) return 0; } -static inline -int nfs_reschedule_unstable_write(struct nfs_page *req, - struct nfs_write_data *data) +#endif + +void nfs_write_completion(struct nfs_pgio_header *hdr) { - return 0; + unsigned long bytes = 0; + + if (test_bit(NFS_IOHDR_REDO, &hdr->flags)) + goto out; + while (!list_empty(&hdr->pages)) { + struct nfs_page *req = nfs_list_entry(hdr->pages.next); + struct page *page = req->wb_page; + + bytes += req->wb_bytes; + nfs_list_remove_request(req); + if (test_bit(NFS_IOHDR_ERROR, &hdr->flags) && + (hdr->good_bytes < bytes)) { + nfs_set_pageerror(page); + nfs_context_set_write_error(req->wb_context, hdr->error); + goto remove_req; + } + if (test_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags)) { + nfs_mark_request_dirty(req); + goto next; + } + if (test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) { + nfs_mark_request_commit(req, hdr->lseg); + goto next; + } +remove_req: + nfs_inode_remove_request(req); +next: + nfs_unlock_request(req); + nfs_end_page_writeback(page); + } +out: + hdr->release(hdr); } -#endif #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) static int @@ -813,17 +857,6 @@ int nfs_updatepage(struct file *file, struct page *page, return status; } -static void nfs_writepage_release(struct nfs_page *req, - struct nfs_write_data *data) -{ - struct page *page = req->wb_page; - - if (PageError(req->wb_page) || !nfs_reschedule_unstable_write(req, data)) - nfs_inode_remove_request(req); - nfs_unlock_request(req); - nfs_end_page_writeback(page); -} - static int flush_task_priority(int how) { switch (how & (FLUSH_HIGHPRI|FLUSH_LOWPRI)) { @@ -890,22 +923,16 @@ EXPORT_SYMBOL_GPL(nfs_initiate_write); /* * Set up the argument/result storage required for the RPC call. */ -static void nfs_write_rpcsetup(struct nfs_page *req, - struct nfs_write_data *data, +static void nfs_write_rpcsetup(struct nfs_write_data *data, unsigned int count, unsigned int offset, int how) { - struct nfs_pgio_header *hdr = data->header; - struct inode *inode = req->wb_context->dentry->d_inode; + struct nfs_page *req = data->header->req; /* Set up the RPC argument and reply structs * NB: take care not to mess about with data->commit et al. */ - hdr->req = req; - hdr->inode = inode = req->wb_context->dentry->d_inode; - hdr->cred = req->wb_context->cred; - - data->args.fh = NFS_FH(inode); + data->args.fh = NFS_FH(data->header->inode); data->args.offset = req_offset(req) + offset; /* pnfs_set_layoutcommit needs this */ data->mds_offset = data->args.offset; @@ -919,7 +946,7 @@ static void nfs_write_rpcsetup(struct nfs_page *req, case 0: break; case FLUSH_COND_STABLE: - if (nfs_need_commit(NFS_I(inode))) + if (nfs_need_commit(NFS_I(data->header->inode))) break; default: data->args.stable = NFS_FILE_SYNC; @@ -950,7 +977,7 @@ static int nfs_do_multiple_writes(struct list_head *head, while (!list_empty(head)) { int ret2; - data = list_entry(head->next, struct nfs_write_data, list); + data = list_first_entry(head, struct nfs_write_data, list); list_del_init(&data->list); ret2 = nfs_do_write(data, call_ops, how); @@ -973,15 +1000,26 @@ static void nfs_redirty_request(struct nfs_page *req) nfs_end_page_writeback(page); } +void nfs_async_write_error(struct list_head *head) +{ + struct nfs_page *req; + + while (!list_empty(head)) { + req = nfs_list_entry(head->next); + nfs_list_remove_request(req); + nfs_redirty_request(req); + } +} + /* * Generate multiple small requests to write out a single * contiguous dirty area on one page. */ -static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, struct list_head *res) +static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, + struct nfs_pgio_header *hdr) { - struct nfs_page *req = nfs_list_entry(desc->pg_list.next); + struct nfs_page *req = hdr->req; struct page *page = req->wb_page; - struct nfs_write_header *whdr; struct nfs_write_data *data; size_t wsize = desc->pg_bsize, nbytes; unsigned int offset; @@ -989,6 +1027,7 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, struct list_head int ret = 0; nfs_list_remove_request(req); + nfs_list_add_request(req, &hdr->pages); if ((desc->pg_ioflags & FLUSH_COND_STABLE) && (desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit || @@ -1001,28 +1040,27 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, struct list_head do { size_t len = min(nbytes, wsize); - whdr = nfs_writehdr_alloc(1); - if (!whdr) + data = nfs_writedata_alloc(hdr, 1); + if (!data) goto out_bad; - data = &whdr->rpc_data; data->pages.pagevec[0] = page; - nfs_write_rpcsetup(req, data, len, offset, desc->pg_ioflags); - list_add(&data->list, res); + nfs_write_rpcsetup(data, len, offset, desc->pg_ioflags); + list_add(&data->list, &hdr->rpc_list); requests++; nbytes -= len; offset += len; } while (nbytes != 0); atomic_set(&req->wb_complete, requests); - desc->pg_rpc_callops = &nfs_write_partial_ops; + desc->pg_rpc_callops = &nfs_write_common_ops; return ret; out_bad: - while (!list_empty(res)) { - data = list_entry(res->next, struct nfs_write_data, list); + while (!list_empty(&hdr->rpc_list)) { + data = list_first_entry(&hdr->rpc_list, struct nfs_write_data, list); list_del(&data->list); nfs_writedata_release(data); } - nfs_redirty_request(req); + nfs_async_write_error(&hdr->pages); return -ENOMEM; } @@ -1034,64 +1072,74 @@ out_bad: * This is the case if nfs_updatepage detects a conflicting request * that has been written but not committed. */ -static int nfs_flush_one(struct nfs_pageio_descriptor *desc, struct list_head *res) +static int nfs_flush_one(struct nfs_pageio_descriptor *desc, + struct nfs_pgio_header *hdr) { struct nfs_page *req; struct page **pages; - struct nfs_write_header *whdr; struct nfs_write_data *data; struct list_head *head = &desc->pg_list; int ret = 0; - whdr = nfs_writehdr_alloc(nfs_page_array_len(desc->pg_base, - desc->pg_count)); - if (!whdr) { - while (!list_empty(head)) { - req = nfs_list_entry(head->next); - nfs_list_remove_request(req); - nfs_redirty_request(req); - } + data = nfs_writedata_alloc(hdr, nfs_page_array_len(desc->pg_base, + desc->pg_count)); + if (!data) { + nfs_async_write_error(head); ret = -ENOMEM; goto out; } - data = &whdr->rpc_data; + pages = data->pages.pagevec; while (!list_empty(head)) { req = nfs_list_entry(head->next); nfs_list_remove_request(req); - nfs_list_add_request(req, &whdr->header.pages); + nfs_list_add_request(req, &hdr->pages); *pages++ = req->wb_page; } - req = nfs_list_entry(whdr->header.pages.next); if ((desc->pg_ioflags & FLUSH_COND_STABLE) && (desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit)) desc->pg_ioflags &= ~FLUSH_COND_STABLE; /* Set up the argument struct */ - nfs_write_rpcsetup(req, data, desc->pg_count, 0, desc->pg_ioflags); - list_add(&data->list, res); - desc->pg_rpc_callops = &nfs_write_full_ops; + nfs_write_rpcsetup(data, desc->pg_count, 0, desc->pg_ioflags); + list_add(&data->list, &hdr->rpc_list); + desc->pg_rpc_callops = &nfs_write_common_ops; out: return ret; } -int nfs_generic_flush(struct nfs_pageio_descriptor *desc, struct list_head *head) +int nfs_generic_flush(struct nfs_pageio_descriptor *desc, + struct nfs_pgio_header *hdr) { if (desc->pg_bsize < PAGE_CACHE_SIZE) - return nfs_flush_multi(desc, head); - return nfs_flush_one(desc, head); + return nfs_flush_multi(desc, hdr); + return nfs_flush_one(desc, hdr); } static int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) { - LIST_HEAD(head); + struct nfs_write_header *whdr; + struct nfs_pgio_header *hdr; int ret; - ret = nfs_generic_flush(desc, &head); + whdr = nfs_writehdr_alloc(); + if (!whdr) { + nfs_async_write_error(&desc->pg_list); + return -ENOMEM; + } + hdr = &whdr->header; + nfs_pgheader_init(desc, hdr, nfs_writehdr_free); + atomic_inc(&hdr->refcnt); + ret = nfs_generic_flush(desc, hdr); if (ret == 0) - ret = nfs_do_multiple_writes(&head, desc->pg_rpc_callops, - desc->pg_ioflags); + ret = nfs_do_multiple_writes(&hdr->rpc_list, + desc->pg_rpc_callops, + desc->pg_ioflags); + else + set_bit(NFS_IOHDR_REDO, &hdr->flags); + if (atomic_dec_and_test(&hdr->refcnt)) + nfs_write_completion(hdr); return ret; } @@ -1121,62 +1169,6 @@ static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, nfs_pageio_init_write_mds(pgio, inode, ioflags); } -/* - * Handle a write reply that flushed part of a page. - */ -static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata) -{ - struct nfs_write_data *data = calldata; - - dprintk("NFS: %5u write(%s/%lld %d@%lld)", - task->tk_pid, - data->header->inode->i_sb->s_id, - (long long) - NFS_FILEID(data->header->inode), - data->header->req->wb_bytes, - (long long)req_offset(data->header->req)); - - nfs_writeback_done(task, data); -} - -static void nfs_writeback_release_partial(void *calldata) -{ - struct nfs_write_data *data = calldata; - struct nfs_page *req = data->header->req; - struct page *page = req->wb_page; - int status = data->task.tk_status; - - if (status < 0) { - nfs_set_pageerror(page); - nfs_context_set_write_error(req->wb_context, status); - dprintk(", error = %d\n", status); - goto out; - } - - if (nfs_write_need_commit(data)) { - struct inode *inode = page->mapping->host; - - spin_lock(&inode->i_lock); - if (test_bit(PG_NEED_RESCHED, &req->wb_flags)) { - /* Do nothing we need to resend the writes */ - } else if (!test_and_set_bit(PG_NEED_COMMIT, &req->wb_flags)) { - memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf)); - dprintk(" defer commit\n"); - } else if (memcmp(&req->wb_verf, &data->verf, sizeof(req->wb_verf))) { - set_bit(PG_NEED_RESCHED, &req->wb_flags); - clear_bit(PG_NEED_COMMIT, &req->wb_flags); - dprintk(" server reboot detected\n"); - } - spin_unlock(&inode->i_lock); - } else - dprintk(" OK\n"); - -out: - if (atomic_dec_and_test(&req->wb_complete)) - nfs_writepage_release(req, data); - nfs_writedata_release(data); -} - void nfs_write_prepare(struct rpc_task *task, void *calldata) { struct nfs_write_data *data = calldata; @@ -1190,12 +1182,6 @@ void nfs_commit_prepare(struct rpc_task *task, void *calldata) NFS_PROTO(data->inode)->commit_rpc_prepare(task, data); } -static const struct rpc_call_ops nfs_write_partial_ops = { - .rpc_call_prepare = nfs_write_prepare, - .rpc_call_done = nfs_writeback_done_partial, - .rpc_release = nfs_writeback_release_partial, -}; - /* * Handle a write reply that flushes a whole page. * @@ -1203,60 +1189,37 @@ static const struct rpc_call_ops nfs_write_partial_ops = { * writebacks since the page->count is kept > 1 for as long * as the page has a write request pending. */ -static void nfs_writeback_done_full(struct rpc_task *task, void *calldata) +static void nfs_writeback_done_common(struct rpc_task *task, void *calldata) { struct nfs_write_data *data = calldata; nfs_writeback_done(task, data); } -static void nfs_writeback_release_full(void *calldata) +static void nfs_writeback_release_common(void *calldata) { struct nfs_write_data *data = calldata; struct nfs_pgio_header *hdr = data->header; int status = data->task.tk_status; + struct nfs_page *req = hdr->req; - /* Update attributes as result of writeback. */ - while (!list_empty(&hdr->pages)) { - struct nfs_page *req = nfs_list_entry(hdr->pages.next); - struct page *page = req->wb_page; - - nfs_list_remove_request(req); - - dprintk("NFS: %5u write (%s/%lld %d@%lld)", - data->task.tk_pid, - req->wb_context->dentry->d_inode->i_sb->s_id, - (long long)NFS_FILEID(req->wb_context->dentry->d_inode), - req->wb_bytes, - (long long)req_offset(req)); - - if (status < 0) { - nfs_set_pageerror(page); - nfs_context_set_write_error(req->wb_context, status); - dprintk(", error = %d\n", status); - goto remove_request; - } - - if (nfs_write_need_commit(data)) { + if ((status >= 0) && nfs_write_need_commit(data)) { + spin_lock(&hdr->lock); + if (test_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags)) + ; /* Do nothing */ + else if (!test_and_set_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf)); - nfs_mark_request_commit(req, hdr->lseg); - dprintk(" marked for commit\n"); - goto next; - } - dprintk(" OK\n"); -remove_request: - nfs_inode_remove_request(req); - next: - nfs_unlock_request(req); - nfs_end_page_writeback(page); + else if (memcmp(&req->wb_verf, &data->verf, sizeof(req->wb_verf))) + set_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags); + spin_unlock(&hdr->lock); } nfs_writedata_release(data); } -static const struct rpc_call_ops nfs_write_full_ops = { +static const struct rpc_call_ops nfs_write_common_ops = { .rpc_call_prepare = nfs_write_prepare, - .rpc_call_done = nfs_writeback_done_full, - .rpc_release = nfs_writeback_release_full, + .rpc_call_done = nfs_writeback_done_common, + .rpc_release = nfs_writeback_release_common, }; @@ -1307,38 +1270,40 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) } } #endif - /* Is this a short write? */ - if (task->tk_status >= 0 && resp->count < argp->count) { + if (task->tk_status < 0) + nfs_set_pgio_error(data->header, task->tk_status, argp->offset); + else if (resp->count < argp->count) { static unsigned long complain; + /* This a short write! */ nfs_inc_stats(inode, NFSIOS_SHORTWRITE); /* Has the server at least made some progress? */ - if (resp->count != 0) { - /* Was this an NFSv2 write or an NFSv3 stable write? */ - if (resp->verf->committed != NFS_UNSTABLE) { - /* Resend from where the server left off */ - data->mds_offset += resp->count; - argp->offset += resp->count; - argp->pgbase += resp->count; - argp->count -= resp->count; - } else { - /* Resend as a stable write in order to avoid - * headaches in the case of a server crash. - */ - argp->stable = NFS_FILE_SYNC; + if (resp->count == 0) { + if (time_before(complain, jiffies)) { + printk(KERN_WARNING + "NFS: Server wrote zero bytes, expected %u.\n", + argp->count); + complain = jiffies + 300 * HZ; } - rpc_restart_call_prepare(task); + nfs_set_pgio_error(data->header, -EIO, argp->offset); + task->tk_status = -EIO; return; } - if (time_before(complain, jiffies)) { - printk(KERN_WARNING - "NFS: Server wrote zero bytes, expected %u.\n", - argp->count); - complain = jiffies + 300 * HZ; + /* Was this an NFSv2 write or an NFSv3 stable write? */ + if (resp->verf->committed != NFS_UNSTABLE) { + /* Resend from where the server left off */ + data->mds_offset += resp->count; + argp->offset += resp->count; + argp->pgbase += resp->count; + argp->count -= resp->count; + } else { + /* Resend as a stable write in order to avoid + * headaches in the case of a server crash. + */ + argp->stable = NFS_FILE_SYNC; } - /* Can't do anything about it except throw an error. */ - task->tk_status = -EIO; + rpc_restart_call_prepare(task); } } diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 1648621..0d17db7 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1192,6 +1192,8 @@ enum { NFS_IOHDR_ERROR = 0, NFS_IOHDR_EOF, NFS_IOHDR_REDO, + NFS_IOHDR_NEED_COMMIT, + NFS_IOHDR_NEED_RESCHED, }; struct nfs_pgio_header { -- cgit v0.10.2 From 061ae2edb7375ab6776468b075da71008a098b55 Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Fri, 20 Apr 2012 14:47:48 -0400 Subject: NFS: create completion structure to pass into page_init functions Factors out the code that will need to change when directio starts using these code paths. This will allow directio to use the generic pagein and flush routines Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 16bc9c4..3ef8fcd 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -300,11 +300,10 @@ extern struct dentry *nfs4_get_root(struct super_block *, struct nfs_fh *, extern int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh); #endif +struct nfs_pgio_completion_ops; /* read.c */ -extern void nfs_async_read_error(struct list_head *head); extern struct nfs_read_header *nfs_readhdr_alloc(void); extern void nfs_readhdr_free(struct nfs_pgio_header *hdr); -extern void nfs_read_completion(struct nfs_pgio_header *hdr); extern struct nfs_read_data *nfs_readdata_alloc(struct nfs_pgio_header *hdr, unsigned int pagecount); extern int nfs_initiate_read(struct rpc_clnt *clnt, @@ -314,21 +313,21 @@ extern void nfs_read_prepare(struct rpc_task *task, void *calldata); extern int nfs_generic_pagein(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr); extern void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio, - struct inode *inode); + struct inode *inode, + const struct nfs_pgio_completion_ops *compl_ops); extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio); extern void nfs_readdata_release(struct nfs_read_data *rdata); /* write.c */ -extern void nfs_async_write_error(struct list_head *head); extern struct nfs_write_header *nfs_writehdr_alloc(void); extern void nfs_writehdr_free(struct nfs_pgio_header *hdr); extern struct nfs_write_data *nfs_writedata_alloc(struct nfs_pgio_header *hdr, unsigned int pagecount); -extern void nfs_write_completion(struct nfs_pgio_header *hdr); extern int nfs_generic_flush(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr); extern void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio, - struct inode *inode, int ioflags); + struct inode *inode, int ioflags, + const struct nfs_pgio_completion_ops *compl_ops); extern void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio); extern void nfs_writedata_release(struct nfs_write_data *wdata); extern void nfs_commit_free(struct nfs_commit_data *p); diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index cd4c038..4cf2a68 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -49,6 +49,7 @@ void nfs_pgheader_init(struct nfs_pageio_descriptor *desc, hdr->io_start = req_offset(hdr->req); hdr->good_bytes = desc->pg_count; hdr->release = release; + hdr->completion_ops = desc->pg_completion_ops; } void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos) @@ -240,6 +241,7 @@ EXPORT_SYMBOL_GPL(nfs_generic_pg_test); void nfs_pageio_init(struct nfs_pageio_descriptor *desc, struct inode *inode, const struct nfs_pageio_ops *pg_ops, + const struct nfs_pgio_completion_ops *compl_ops, size_t bsize, int io_flags) { @@ -252,6 +254,7 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc, desc->pg_recoalesce = 0; desc->pg_inode = inode; desc->pg_ops = pg_ops; + desc->pg_completion_ops = compl_ops; desc->pg_ioflags = io_flags; desc->pg_error = 0; desc->pg_lseg = NULL; diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index d515f00..b3a0c01 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1113,26 +1113,31 @@ pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page * EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_write); bool -pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode) +pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode, + const struct nfs_pgio_completion_ops *compl_ops) { struct nfs_server *server = NFS_SERVER(inode); struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld; if (ld == NULL) return false; - nfs_pageio_init(pgio, inode, ld->pg_read_ops, server->rsize, 0); + nfs_pageio_init(pgio, inode, ld->pg_read_ops, compl_ops, + server->rsize, 0); return true; } bool -pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags) +pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, + int ioflags, + const struct nfs_pgio_completion_ops *compl_ops) { struct nfs_server *server = NFS_SERVER(inode); struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld; if (ld == NULL) return false; - nfs_pageio_init(pgio, inode, ld->pg_write_ops, server->wsize, ioflags); + nfs_pageio_init(pgio, inode, ld->pg_write_ops, compl_ops, + server->wsize, ioflags); return true; } @@ -1162,13 +1167,15 @@ pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, } EXPORT_SYMBOL_GPL(pnfs_generic_pg_test); -static int pnfs_write_done_resend_to_mds(struct inode *inode, struct list_head *head) +static int pnfs_write_done_resend_to_mds(struct inode *inode, + struct list_head *head, + const struct nfs_pgio_completion_ops *compl_ops) { struct nfs_pageio_descriptor pgio; LIST_HEAD(failed); /* Resend all requests through the MDS */ - nfs_pageio_init_write_mds(&pgio, inode, FLUSH_STABLE); + nfs_pageio_init_write_mds(&pgio, inode, FLUSH_STABLE, compl_ops); while (!list_empty(head)) { struct nfs_page *req = nfs_list_entry(head->next); @@ -1201,7 +1208,8 @@ static void pnfs_ld_handle_write_error(struct nfs_write_data *data) } if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) data->task.tk_status = pnfs_write_done_resend_to_mds(hdr->inode, - &hdr->pages); + &hdr->pages, + hdr->completion_ops); } /* @@ -1292,7 +1300,7 @@ pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) whdr = nfs_writehdr_alloc(); if (!whdr) { - nfs_async_write_error(&desc->pg_list); + desc->pg_completion_ops->error_cleanup(&hdr->pages); put_lseg(desc->pg_lseg); desc->pg_lseg = NULL; return -ENOMEM; @@ -1309,18 +1317,20 @@ pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) } else pnfs_do_multiple_writes(desc, &hdr->rpc_list, desc->pg_ioflags); if (atomic_dec_and_test(&hdr->refcnt)) - nfs_write_completion(hdr); + hdr->completion_ops->completion(hdr); return ret; } EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages); -static int pnfs_read_done_resend_to_mds(struct inode *inode, struct list_head *head) +static int pnfs_read_done_resend_to_mds(struct inode *inode, + struct list_head *head, + const struct nfs_pgio_completion_ops *compl_ops) { struct nfs_pageio_descriptor pgio; LIST_HEAD(failed); /* Resend all requests through the MDS */ - nfs_pageio_init_read_mds(&pgio, inode); + nfs_pageio_init_read_mds(&pgio, inode, compl_ops); while (!list_empty(head)) { struct nfs_page *req = nfs_list_entry(head->next); @@ -1349,7 +1359,8 @@ static void pnfs_ld_handle_read_error(struct nfs_read_data *data) } if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) data->task.tk_status = pnfs_read_done_resend_to_mds(hdr->inode, - &hdr->pages); + &hdr->pages, + hdr->completion_ops); } /* @@ -1443,7 +1454,7 @@ pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) rhdr = nfs_readhdr_alloc(); if (!rhdr) { - nfs_async_read_error(&desc->pg_list); + desc->pg_completion_ops->error_cleanup(&desc->pg_list); ret = -ENOMEM; put_lseg(desc->pg_lseg); desc->pg_lseg = NULL; @@ -1461,7 +1472,7 @@ pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) } else pnfs_do_multiple_reads(desc, &hdr->rpc_list); if (atomic_dec_and_test(&hdr->refcnt)) - nfs_read_completion(hdr); + hdr->completion_ops->completion(hdr); return ret; } EXPORT_SYMBOL_GPL(pnfs_generic_pg_readpages); diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 442ebf6..734e4ef 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -168,8 +168,10 @@ extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp); void get_layout_hdr(struct pnfs_layout_hdr *lo); void put_lseg(struct pnfs_layout_segment *lseg); -bool pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *); -bool pnfs_pageio_init_write(struct nfs_pageio_descriptor *, struct inode *, int); +bool pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *, + const struct nfs_pgio_completion_ops *); +bool pnfs_pageio_init_write(struct nfs_pageio_descriptor *, struct inode *, + int, const struct nfs_pgio_completion_ops *); void set_pnfs_layoutdriver(struct nfs_server *, const struct nfs_fh *, u32); void unset_pnfs_layoutdriver(struct nfs_server *); diff --git a/fs/nfs/read.c b/fs/nfs/read.c index c9633b2..5e78af1 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -31,6 +31,7 @@ static const struct nfs_pageio_ops nfs_pageio_read_ops; static const struct rpc_call_ops nfs_read_common_ops; +static const struct nfs_pgio_completion_ops nfs_async_read_completion_ops; static struct kmem_cache *nfs_rdata_cachep; @@ -95,7 +96,7 @@ void nfs_readdata_release(struct nfs_read_data *rdata) else rdata->header = NULL; if (atomic_dec_and_test(&hdr->refcnt)) - nfs_read_completion(hdr); + hdr->completion_ops->completion(hdr); } static @@ -108,9 +109,10 @@ int nfs_return_empty_page(struct page *page) } void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio, - struct inode *inode) + struct inode *inode, + const struct nfs_pgio_completion_ops *compl_ops) { - nfs_pageio_init(pgio, inode, &nfs_pageio_read_ops, + nfs_pageio_init(pgio, inode, &nfs_pageio_read_ops, compl_ops, NFS_SERVER(inode)->rsize, 0); } @@ -122,10 +124,11 @@ void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio) EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds); static void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, - struct inode *inode) + struct inode *inode, + const struct nfs_pgio_completion_ops *compl_ops) { - if (!pnfs_pageio_init_read(pgio, inode)) - nfs_pageio_init_read_mds(pgio, inode); + if (!pnfs_pageio_init_read(pgio, inode, compl_ops)) + nfs_pageio_init_read_mds(pgio, inode, compl_ops); } int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, @@ -146,7 +149,7 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, if (len < PAGE_CACHE_SIZE) zero_user_segment(page, len, PAGE_CACHE_SIZE); - nfs_pageio_init_read(&pgio, inode); + nfs_pageio_init_read(&pgio, inode, &nfs_async_read_completion_ops); nfs_pageio_add_request(&pgio, new); nfs_pageio_complete(&pgio); return 0; @@ -170,7 +173,7 @@ static void nfs_readpage_release(struct nfs_page *req) } /* Note io was page aligned */ -void nfs_read_completion(struct nfs_pgio_header *hdr) +static void nfs_read_completion(struct nfs_pgio_header *hdr) { unsigned long bytes = 0; @@ -300,7 +303,7 @@ nfs_do_multiple_reads(struct list_head *head, return ret; } -void +static void nfs_async_read_error(struct list_head *head) { struct nfs_page *req; @@ -312,6 +315,11 @@ nfs_async_read_error(struct list_head *head) } } +static const struct nfs_pgio_completion_ops nfs_async_read_completion_ops = { + .error_cleanup = nfs_async_read_error, + .completion = nfs_read_completion, +}; + /* * Generate multiple requests to fill a single page. * @@ -362,7 +370,7 @@ out_bad: list_del(&data->list); nfs_readdata_release(data); } - nfs_async_read_error(&hdr->pages); + desc->pg_completion_ops->error_cleanup(&hdr->pages); return -ENOMEM; } @@ -378,7 +386,7 @@ static int nfs_pagein_one(struct nfs_pageio_descriptor *desc, data = nfs_readdata_alloc(hdr, nfs_page_array_len(desc->pg_base, desc->pg_count)); if (!data) { - nfs_async_read_error(head); + desc->pg_completion_ops->error_cleanup(head); ret = -ENOMEM; goto out; } @@ -414,7 +422,7 @@ static int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) rhdr = nfs_readhdr_alloc(); if (!rhdr) { - nfs_async_read_error(&desc->pg_list); + desc->pg_completion_ops->error_cleanup(&desc->pg_list); return -ENOMEM; } hdr = &rhdr->header; @@ -427,7 +435,7 @@ static int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) else set_bit(NFS_IOHDR_REDO, &hdr->flags); if (atomic_dec_and_test(&hdr->refcnt)) - nfs_read_completion(hdr); + hdr->completion_ops->completion(hdr); return ret; } @@ -652,7 +660,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping, if (ret == 0) goto read_complete; /* all pages were read */ - nfs_pageio_init_read(&pgio, inode); + nfs_pageio_init_read(&pgio, inode, &nfs_async_read_completion_ops); ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc); diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 076075e..1503972 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -40,10 +40,12 @@ * Local function declarations */ static void nfs_pageio_init_write(struct nfs_pageio_descriptor *desc, - struct inode *inode, int ioflags); + struct inode *inode, int ioflags, + const struct nfs_pgio_completion_ops *compl_ops); static void nfs_redirty_request(struct nfs_page *req); static const struct rpc_call_ops nfs_write_common_ops; static const struct rpc_call_ops nfs_commit_ops; +static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops; static struct kmem_cache *nfs_wdata_cachep; static mempool_t *nfs_wdata_mempool; @@ -128,7 +130,7 @@ void nfs_writedata_release(struct nfs_write_data *wdata) else wdata->header = NULL; if (atomic_dec_and_test(&hdr->refcnt)) - nfs_write_completion(hdr); + hdr->completion_ops->completion(hdr); } static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error) @@ -337,7 +339,8 @@ static int nfs_writepage_locked(struct page *page, struct writeback_control *wbc struct nfs_pageio_descriptor pgio; int err; - nfs_pageio_init_write(&pgio, page->mapping->host, wb_priority(wbc)); + nfs_pageio_init_write(&pgio, page->mapping->host, wb_priority(wbc), + &nfs_async_write_completion_ops); err = nfs_do_writepage(page, wbc, &pgio); nfs_pageio_complete(&pgio); if (err < 0) @@ -380,7 +383,8 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc) nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES); - nfs_pageio_init_write(&pgio, inode, wb_priority(wbc)); + nfs_pageio_init_write(&pgio, inode, wb_priority(wbc), + &nfs_async_write_completion_ops); err = write_cache_pages(mapping, wbc, nfs_writepages_callback, &pgio); nfs_pageio_complete(&pgio); @@ -558,7 +562,7 @@ int nfs_write_need_commit(struct nfs_write_data *data) #endif -void nfs_write_completion(struct nfs_pgio_header *hdr) +static void nfs_write_completion(struct nfs_pgio_header *hdr) { unsigned long bytes = 0; @@ -1000,7 +1004,7 @@ static void nfs_redirty_request(struct nfs_page *req) nfs_end_page_writeback(page); } -void nfs_async_write_error(struct list_head *head) +static void nfs_async_write_error(struct list_head *head) { struct nfs_page *req; @@ -1011,6 +1015,11 @@ void nfs_async_write_error(struct list_head *head) } } +static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops = { + .error_cleanup = nfs_async_write_error, + .completion = nfs_write_completion, +}; + /* * Generate multiple small requests to write out a single * contiguous dirty area on one page. @@ -1060,7 +1069,7 @@ out_bad: list_del(&data->list); nfs_writedata_release(data); } - nfs_async_write_error(&hdr->pages); + desc->pg_completion_ops->error_cleanup(&hdr->pages); return -ENOMEM; } @@ -1084,7 +1093,7 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc, data = nfs_writedata_alloc(hdr, nfs_page_array_len(desc->pg_base, desc->pg_count)); if (!data) { - nfs_async_write_error(head); + desc->pg_completion_ops->error_cleanup(head); ret = -ENOMEM; goto out; } @@ -1125,7 +1134,7 @@ static int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) whdr = nfs_writehdr_alloc(); if (!whdr) { - nfs_async_write_error(&desc->pg_list); + desc->pg_completion_ops->error_cleanup(&hdr->pages); return -ENOMEM; } hdr = &whdr->header; @@ -1139,7 +1148,7 @@ static int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) else set_bit(NFS_IOHDR_REDO, &hdr->flags); if (atomic_dec_and_test(&hdr->refcnt)) - nfs_write_completion(hdr); + hdr->completion_ops->completion(hdr); return ret; } @@ -1149,9 +1158,10 @@ static const struct nfs_pageio_ops nfs_pageio_write_ops = { }; void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio, - struct inode *inode, int ioflags) + struct inode *inode, int ioflags, + const struct nfs_pgio_completion_ops *compl_ops) { - nfs_pageio_init(pgio, inode, &nfs_pageio_write_ops, + nfs_pageio_init(pgio, inode, &nfs_pageio_write_ops, compl_ops, NFS_SERVER(inode)->wsize, ioflags); } @@ -1163,10 +1173,11 @@ void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio) EXPORT_SYMBOL_GPL(nfs_pageio_reset_write_mds); static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, - struct inode *inode, int ioflags) + struct inode *inode, int ioflags, + const struct nfs_pgio_completion_ops *compl_ops) { - if (!pnfs_pageio_init_write(pgio, inode, ioflags)) - nfs_pageio_init_write_mds(pgio, inode, ioflags); + if (!pnfs_pageio_init_write(pgio, inode, ioflags, compl_ops)) + nfs_pageio_init_write_mds(pgio, inode, ioflags, compl_ops); } void nfs_write_prepare(struct rpc_task *task, void *calldata) diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 5c52034..bc5b7a5 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -67,6 +67,7 @@ struct nfs_pageio_descriptor { int pg_ioflags; int pg_error; const struct rpc_call_ops *pg_rpc_callops; + const struct nfs_pgio_completion_ops *pg_completion_ops; struct pnfs_layout_segment *pg_lseg; }; @@ -83,6 +84,7 @@ extern void nfs_release_request(struct nfs_page *req); extern void nfs_pageio_init(struct nfs_pageio_descriptor *desc, struct inode *inode, const struct nfs_pageio_ops *pg_ops, + const struct nfs_pgio_completion_ops *compl_ops, size_t bsize, int how); extern int nfs_pageio_add_request(struct nfs_pageio_descriptor *, diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 0d17db7..6fa1d22 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1207,6 +1207,7 @@ struct nfs_pgio_header { loff_t io_start; const struct rpc_call_ops *mds_ops; void (*release) (struct nfs_pgio_header *hdr); + const struct nfs_pgio_completion_ops *completion_ops; spinlock_t lock; /* fields protected by lock */ int pnfs_error; @@ -1261,6 +1262,11 @@ struct nfs_commit_data { int (*commit_done_cb) (struct rpc_task *task, struct nfs_commit_data *data); }; +struct nfs_pgio_completion_ops { + void (*error_cleanup)(struct list_head *head); + void (*completion)(struct nfs_pgio_header *hdr); +}; + struct nfs_unlinkdata { struct hlist_node list; struct nfs_removeargs args; -- cgit v0.10.2 From 9533da2979757258d3fd5429d830a297013d69ed Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Fri, 20 Apr 2012 14:47:49 -0400 Subject: NFS: remove unused wb_complete field from struct nfs_page Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 4cf2a68..5d01a16 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -114,7 +114,6 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode, * long write-back delay. This will be adjusted in * update_nfs_request below if the region is not locked. */ req->wb_page = page; - atomic_set(&req->wb_complete, 0); req->wb_index = page->index; page_cache_get(page); BUG_ON(PagePrivate(page)); diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 1503972..705bf01 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1059,7 +1059,6 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, nbytes -= len; offset += len; } while (nbytes != 0); - atomic_set(&req->wb_complete, requests); desc->pg_rpc_callops = &nfs_write_common_ops; return ret; diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index bc5b7a5..0a5b63f 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -36,7 +36,6 @@ struct nfs_page { struct page *wb_page; /* page to read in/write out */ struct nfs_open_context *wb_context; /* File state context info */ struct nfs_lock_context *wb_lock_context; /* lock context info */ - atomic_t wb_complete; /* i/os we're waiting for */ pgoff_t wb_index; /* Offset >> PAGE_CACHE_SHIFT */ unsigned int wb_offset, /* Offset & ~PAGE_CACHE_MASK */ wb_pgbase, /* Start of page data */ -- cgit v0.10.2 From 1825a0d08f22463e5a8f4b1636473efd057a3479 Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Fri, 20 Apr 2012 19:55:31 -0400 Subject: NFS: prepare coalesce testing for directio The coalesce code made assumptions that will no longer be true once non-page aligned io occurs. This introduces no change in current behavior, but allows for more general situations to come. Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 02d8170..e40523f 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -796,6 +796,16 @@ filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio, { BUG_ON(pgio->pg_lseg != NULL); + if (req->wb_offset != req->wb_pgbase) { + /* + * Handling unaligned pages is difficult, because have to + * somehow split a req in two in certain cases in the + * pg.test code. Avoid this by just not using pnfs + * in this case. + */ + nfs_pageio_reset_read_mds(pgio); + return; + } pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, req->wb_context, 0, @@ -815,6 +825,8 @@ filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio, BUG_ON(pgio->pg_lseg != NULL); + if (req->wb_offset != req->wb_pgbase) + goto out_mds; pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, req->wb_context, 0, diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 5d01a16..638ca7f 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -280,12 +280,12 @@ static bool nfs_can_coalesce_requests(struct nfs_page *prev, return false; if (req->wb_context->state != prev->wb_context->state) return false; - if (req->wb_index != (prev->wb_index + 1)) - return false; if (req->wb_pgbase != 0) return false; if (prev->wb_pgbase + prev->wb_bytes != PAGE_CACHE_SIZE) return false; + if (req_offset(req) != req_offset(prev) + prev->wb_bytes) + return false; return pgio->pg_ops->pg_test(pgio, prev, req); } diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index b3a0c01..4da05e4 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1082,6 +1082,10 @@ pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *r { BUG_ON(pgio->pg_lseg != NULL); + if (req->wb_offset != req->wb_pgbase) { + nfs_pageio_reset_read_mds(pgio); + return; + } pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, req->wb_context, req_offset(req), @@ -1100,6 +1104,10 @@ pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page * { BUG_ON(pgio->pg_lseg != NULL); + if (req->wb_offset != req->wb_pgbase) { + nfs_pageio_reset_write_mds(pgio); + return; + } pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, req->wb_context, req_offset(req), -- cgit v0.10.2 From 584aa810b6240d88c28113a90c5029449814a3b5 Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Fri, 20 Apr 2012 14:47:51 -0400 Subject: NFS: rewrite directio read to use async coalesce code This also has the advantage that it allows directio to use pnfs. Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 22a40c4..4ba9a2c 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -124,22 +124,6 @@ ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_ return -EINVAL; } -static void nfs_direct_dirty_pages(struct page **pages, unsigned int pgbase, size_t count) -{ - unsigned int npages; - unsigned int i; - - if (count == 0) - return; - pages += (pgbase >> PAGE_SHIFT); - npages = (count + (pgbase & ~PAGE_MASK) + PAGE_SIZE - 1) >> PAGE_SHIFT; - for (i = 0; i < npages; i++) { - struct page *page = pages[i]; - if (!PageCompound(page)) - set_page_dirty(page); - } -} - static void nfs_direct_release_pages(struct page **pages, unsigned int npages) { unsigned int i; @@ -226,58 +210,92 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq) nfs_direct_req_release(dreq); } -/* - * We must hold a reference to all the pages in this direct read request - * until the RPCs complete. This could be long *after* we are woken up in - * nfs_direct_wait (for instance, if someone hits ^C on a slow server). - */ -static void nfs_direct_read_result(struct rpc_task *task, void *calldata) +void nfs_direct_readpage_release(struct nfs_page *req) { - struct nfs_read_data *data = calldata; - - nfs_readpage_result(task, data); + dprintk("NFS: direct read done (%s/%lld %d@%lld)\n", + req->wb_context->dentry->d_inode->i_sb->s_id, + (long long)NFS_FILEID(req->wb_context->dentry->d_inode), + req->wb_bytes, + (long long)req_offset(req)); + nfs_release_request(req); } -static void nfs_direct_read_release(void *calldata) +static void nfs_direct_read_completion(struct nfs_pgio_header *hdr) { + unsigned long bytes = 0; + struct nfs_direct_req *dreq = hdr->dreq; - struct nfs_read_data *data = calldata; - struct nfs_direct_req *dreq = (struct nfs_direct_req *)data->header->req; - int status = data->task.tk_status; + if (test_bit(NFS_IOHDR_REDO, &hdr->flags)) + goto out_put; spin_lock(&dreq->lock); - if (unlikely(status < 0)) { - dreq->error = status; - spin_unlock(&dreq->lock); + if (test_bit(NFS_IOHDR_ERROR, &hdr->flags) && (hdr->good_bytes == 0)) + dreq->error = hdr->error; + else + dreq->count += hdr->good_bytes; + spin_unlock(&dreq->lock); + + if (!test_bit(NFS_IOHDR_ERROR, &hdr->flags)) { + while (!list_empty(&hdr->pages)) { + struct nfs_page *req = nfs_list_entry(hdr->pages.next); + struct page *page = req->wb_page; + + if (test_bit(NFS_IOHDR_EOF, &hdr->flags)) { + if (bytes > hdr->good_bytes) + zero_user(page, 0, PAGE_SIZE); + else if (hdr->good_bytes - bytes < PAGE_SIZE) + zero_user_segment(page, + hdr->good_bytes & ~PAGE_MASK, + PAGE_SIZE); + } + bytes += req->wb_bytes; + nfs_list_remove_request(req); + nfs_direct_readpage_release(req); + if (!PageCompound(page)) + set_page_dirty(page); + page_cache_release(page); + } } else { - dreq->count += data->res.count; - spin_unlock(&dreq->lock); - nfs_direct_dirty_pages(data->pages.pagevec, - data->args.pgbase, - data->res.count); + while (!list_empty(&hdr->pages)) { + struct nfs_page *req = nfs_list_entry(hdr->pages.next); + + if (bytes < hdr->good_bytes) + if (!PageCompound(req->wb_page)) + set_page_dirty(req->wb_page); + bytes += req->wb_bytes; + page_cache_release(req->wb_page); + nfs_list_remove_request(req); + nfs_direct_readpage_release(req); + } } - nfs_direct_release_pages(data->pages.pagevec, data->pages.npages); - +out_put: if (put_dreq(dreq)) nfs_direct_complete(dreq); - nfs_readdata_release(data); + hdr->release(hdr); } -static const struct rpc_call_ops nfs_read_direct_ops = { - .rpc_call_prepare = nfs_read_prepare, - .rpc_call_done = nfs_direct_read_result, - .rpc_release = nfs_direct_read_release, -}; - -static void nfs_direct_readhdr_release(struct nfs_read_header *rhdr) +static void nfs_sync_pgio_error(struct list_head *head) { - struct nfs_read_data *data = &rhdr->rpc_data; + struct nfs_page *req; - if (data->pages.pagevec != data->pages.page_array) - kfree(data->pages.pagevec); - nfs_readhdr_free(&rhdr->header); + while (!list_empty(head)) { + req = nfs_list_entry(head->next); + nfs_list_remove_request(req); + nfs_release_request(req); + } } +static void nfs_direct_pgio_init(struct nfs_pgio_header *hdr) +{ + get_dreq(hdr->dreq); +} + +static const struct nfs_pgio_completion_ops nfs_direct_read_completion_ops = { + .error_cleanup = nfs_sync_pgio_error, + .init_hdr = nfs_direct_pgio_init, + .completion = nfs_direct_read_completion, +}; + /* * For each rsize'd chunk of the user's buffer, dispatch an NFS READ * operation. If nfs_readdata_alloc() or get_user_pages() fails, @@ -285,118 +303,85 @@ static void nfs_direct_readhdr_release(struct nfs_read_header *rhdr) * handled automatically by nfs_direct_read_result(). Otherwise, if * no requests have been sent, just return an error. */ -static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq, +static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *desc, const struct iovec *iov, loff_t pos) { + struct nfs_direct_req *dreq = desc->pg_dreq; struct nfs_open_context *ctx = dreq->ctx; struct inode *inode = ctx->dentry->d_inode; unsigned long user_addr = (unsigned long)iov->iov_base; size_t count = iov->iov_len; size_t rsize = NFS_SERVER(inode)->rsize; - struct rpc_task *task; - struct rpc_message msg = { - .rpc_cred = ctx->cred, - }; - struct rpc_task_setup task_setup_data = { - .rpc_client = NFS_CLIENT(inode), - .rpc_message = &msg, - .callback_ops = &nfs_read_direct_ops, - .workqueue = nfsiod_workqueue, - .flags = RPC_TASK_ASYNC, - }; unsigned int pgbase; int result; ssize_t started = 0; + struct page **pagevec = NULL; + unsigned int npages; do { - struct nfs_read_header *rhdr; - struct nfs_read_data *data; - struct nfs_page_array *pages; size_t bytes; + int i; pgbase = user_addr & ~PAGE_MASK; - bytes = min(rsize,count); + bytes = min(max(rsize, PAGE_SIZE), count); result = -ENOMEM; - rhdr = nfs_readhdr_alloc(); - if (unlikely(!rhdr)) - break; - data = nfs_readdata_alloc(&rhdr->header, nfs_page_array_len(pgbase, bytes)); - if (!data) { - nfs_readhdr_free(&rhdr->header); + npages = nfs_page_array_len(pgbase, bytes); + if (!pagevec) + pagevec = kmalloc(npages * sizeof(struct page *), + GFP_KERNEL); + if (!pagevec) break; - } - data->header = &rhdr->header; - atomic_inc(&data->header->refcnt); - pages = &data->pages; - down_read(¤t->mm->mmap_sem); result = get_user_pages(current, current->mm, user_addr, - pages->npages, 1, 0, pages->pagevec, NULL); + npages, 1, 0, pagevec, NULL); up_read(¤t->mm->mmap_sem); - if (result < 0) { - nfs_direct_readhdr_release(rhdr); + if (result < 0) break; - } - if ((unsigned)result < pages->npages) { + if ((unsigned)result < npages) { bytes = result * PAGE_SIZE; if (bytes <= pgbase) { - nfs_direct_release_pages(pages->pagevec, result); - nfs_direct_readhdr_release(rhdr); + nfs_direct_release_pages(pagevec, result); break; } bytes -= pgbase; - pages->npages = result; + npages = result; } - get_dreq(dreq); - - rhdr->header.req = (struct nfs_page *) dreq; - rhdr->header.inode = inode; - rhdr->header.cred = msg.rpc_cred; - data->args.fh = NFS_FH(inode); - data->args.context = get_nfs_open_context(ctx); - data->args.lock_context = dreq->l_ctx; - data->args.offset = pos; - data->args.pgbase = pgbase; - data->args.pages = pages->pagevec; - data->args.count = bytes; - data->res.fattr = &data->fattr; - data->res.eof = 0; - data->res.count = bytes; - nfs_fattr_init(&data->fattr); - msg.rpc_argp = &data->args; - msg.rpc_resp = &data->res; - - task_setup_data.task = &data->task; - task_setup_data.callback_data = data; - NFS_PROTO(inode)->read_setup(data, &msg); - - task = rpc_run_task(&task_setup_data); - if (IS_ERR(task)) - break; - - dprintk("NFS: %5u initiated direct read call " - "(req %s/%Ld, %zu bytes @ offset %Lu)\n", - task->tk_pid, - inode->i_sb->s_id, - (long long)NFS_FILEID(inode), - bytes, - (unsigned long long)data->args.offset); - rpc_put_task(task); - - started += bytes; - user_addr += bytes; - pos += bytes; - /* FIXME: Remove this unnecessary math from final patch */ - pgbase += bytes; - pgbase &= ~PAGE_MASK; - BUG_ON(pgbase != (user_addr & ~PAGE_MASK)); - - count -= bytes; + for (i = 0; i < npages; i++) { + struct nfs_page *req; + unsigned int req_len = min(bytes, PAGE_SIZE - pgbase); + /* XXX do we need to do the eof zeroing found in async_filler? */ + req = nfs_create_request(dreq->ctx, dreq->inode, + pagevec[i], + pgbase, req_len); + if (IS_ERR(req)) { + nfs_direct_release_pages(pagevec + i, + npages - i); + result = PTR_ERR(req); + break; + } + req->wb_index = pos >> PAGE_SHIFT; + req->wb_offset = pos & ~PAGE_MASK; + if (!nfs_pageio_add_request(desc, req)) { + result = desc->pg_error; + nfs_release_request(req); + nfs_direct_release_pages(pagevec + i, + npages - i); + break; + } + pgbase = 0; + bytes -= req_len; + started += req_len; + user_addr += req_len; + pos += req_len; + count -= req_len; + } } while (count != 0); + kfree(pagevec); + if (started) return started; return result < 0 ? (ssize_t) result : -EFAULT; @@ -407,15 +392,19 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq, unsigned long nr_segs, loff_t pos) { + struct nfs_pageio_descriptor desc; ssize_t result = -EINVAL; size_t requested_bytes = 0; unsigned long seg; + nfs_pageio_init_read(&desc, dreq->inode, + &nfs_direct_read_completion_ops); get_dreq(dreq); + desc.pg_dreq = dreq; for (seg = 0; seg < nr_segs; seg++) { const struct iovec *vec = &iov[seg]; - result = nfs_direct_read_schedule_segment(dreq, vec, pos); + result = nfs_direct_read_schedule_segment(&desc, vec, pos); if (result < 0) break; requested_bytes += result; @@ -424,6 +413,8 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq, pos += vec->iov_len; } + nfs_pageio_complete(&desc); + /* * If no bytes were started, return the error, and let the * generic layer handle the completion. diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 3ef8fcd..cd5d4a3 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -304,8 +304,9 @@ struct nfs_pgio_completion_ops; /* read.c */ extern struct nfs_read_header *nfs_readhdr_alloc(void); extern void nfs_readhdr_free(struct nfs_pgio_header *hdr); -extern struct nfs_read_data *nfs_readdata_alloc(struct nfs_pgio_header *hdr, - unsigned int pagecount); +extern void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, + struct inode *inode, + const struct nfs_pgio_completion_ops *compl_ops); extern int nfs_initiate_read(struct rpc_clnt *clnt, struct nfs_read_data *data, const struct rpc_call_ops *call_ops); diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 638ca7f..33a21ca 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -48,8 +48,11 @@ void nfs_pgheader_init(struct nfs_pageio_descriptor *desc, hdr->cred = hdr->req->wb_context->cred; hdr->io_start = req_offset(hdr->req); hdr->good_bytes = desc->pg_count; + hdr->dreq = desc->pg_dreq; hdr->release = release; hdr->completion_ops = desc->pg_completion_ops; + if (hdr->completion_ops->init_hdr) + hdr->completion_ops->init_hdr(hdr); } void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos) @@ -116,9 +119,6 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode, req->wb_page = page; req->wb_index = page->index; page_cache_get(page); - BUG_ON(PagePrivate(page)); - BUG_ON(!PageLocked(page)); - BUG_ON(page->mapping->host != inode); req->wb_offset = offset; req->wb_pgbase = offset; req->wb_bytes = count; @@ -257,6 +257,7 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc, desc->pg_ioflags = io_flags; desc->pg_error = 0; desc->pg_lseg = NULL; + desc->pg_dreq = NULL; } /** diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 5e78af1..35e2dce 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -51,8 +51,8 @@ struct nfs_read_header *nfs_readhdr_alloc() return rhdr; } -struct nfs_read_data *nfs_readdata_alloc(struct nfs_pgio_header *hdr, - unsigned int pagecount) +static struct nfs_read_data *nfs_readdata_alloc(struct nfs_pgio_header *hdr, + unsigned int pagecount) { struct nfs_read_data *data, *prealloc; @@ -123,9 +123,9 @@ void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio) } EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds); -static void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, - struct inode *inode, - const struct nfs_pgio_completion_ops *compl_ops) +void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, + struct inode *inode, + const struct nfs_pgio_completion_ops *compl_ops) { if (!pnfs_pageio_init_read(pgio, inode, compl_ops)) nfs_pageio_init_read_mds(pgio, inode, compl_ops); diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 0a5b63f..f9ee9eb 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -68,6 +68,7 @@ struct nfs_pageio_descriptor { const struct rpc_call_ops *pg_rpc_callops; const struct nfs_pgio_completion_ops *pg_completion_ops; struct pnfs_layout_segment *pg_lseg; + struct nfs_direct_req *pg_dreq; }; #define NFS_WBACK_BUSY(req) (test_bit(PG_BUSY,&(req)->wb_flags)) diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 6fa1d22..38687b8 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1208,6 +1208,7 @@ struct nfs_pgio_header { const struct rpc_call_ops *mds_ops; void (*release) (struct nfs_pgio_header *hdr); const struct nfs_pgio_completion_ops *completion_ops; + struct nfs_direct_req *dreq; spinlock_t lock; /* fields protected by lock */ int pnfs_error; @@ -1221,8 +1222,6 @@ struct nfs_read_header { struct nfs_read_data rpc_data; }; -struct nfs_direct_req; - struct nfs_write_data { struct nfs_pgio_header *header; struct list_head list; @@ -1264,6 +1263,7 @@ struct nfs_commit_data { struct nfs_pgio_completion_ops { void (*error_cleanup)(struct list_head *head); + void (*init_hdr)(struct nfs_pgio_header *hdr); void (*completion)(struct nfs_pgio_header *hdr); }; -- cgit v0.10.2 From 84c53ab5c093058c756dcef1879d38be6de90a3c Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Fri, 20 Apr 2012 14:47:52 -0400 Subject: NFS: create nfs_generic_commit_list Simple refactoring. Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 705bf01..2500f1c 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1523,6 +1523,17 @@ static const struct rpc_call_ops nfs_commit_ops = { .rpc_release = nfs_commit_release, }; +static int nfs_generic_commit_list(struct inode *inode, struct list_head *head, + int how) +{ + int status; + + status = pnfs_commit_list(inode, head, how); + if (status == PNFS_NOT_ATTEMPTED) + status = nfs_commit_list(inode, head, how); + return status; +} + int nfs_commit_inode(struct inode *inode, int how) { LIST_HEAD(head); @@ -1536,9 +1547,7 @@ int nfs_commit_inode(struct inode *inode, int how) if (res) { int error; - error = pnfs_commit_list(inode, &head, how); - if (error == PNFS_NOT_ATTEMPTED) - error = nfs_commit_list(inode, &head, how); + error = nfs_generic_commit_list(inode, &head, how); if (error < 0) return error; if (!may_wait) -- cgit v0.10.2 From ea2cf2282b4278461266013e9c002ee1c66700ff Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Fri, 20 Apr 2012 14:47:53 -0400 Subject: NFS: create struct nfs_commit_info It is COMMIT that is handled the most differently between the paged and direct paths. Create a structure that encapsulates everything either path needs to know about the commit state. We could use void to hide some of the layout driver stuff, but Trond suggests pulling it out to ensure type checking, given the huge changes being made, and the fact that it doesn't interfere with other drivers. Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index e8bbfa5..59a12c6 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1547,7 +1547,7 @@ static inline void nfs4_init_once(struct nfs_inode *nfsi) nfsi->delegation_state = 0; init_rwsem(&nfsi->rwsem); nfsi->layout = NULL; - atomic_set(&nfsi->commits_outstanding, 0); + atomic_set(&nfsi->commit_info.rpcs_out, 0); #endif } @@ -1559,9 +1559,9 @@ static void init_once(void *foo) INIT_LIST_HEAD(&nfsi->open_files); INIT_LIST_HEAD(&nfsi->access_cache_entry_lru); INIT_LIST_HEAD(&nfsi->access_cache_inode_lru); - INIT_LIST_HEAD(&nfsi->commit_list); + INIT_LIST_HEAD(&nfsi->commit_info.list); nfsi->npages = 0; - nfsi->ncommit = 0; + nfsi->commit_info.ncommit = 0; atomic_set(&nfsi->silly_count, 1); INIT_HLIST_HEAD(&nfsi->silly_list); init_waitqueue_head(&nfsi->waitqueue); diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index cd5d4a3..145e9e7 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -346,12 +346,18 @@ extern void nfs_init_commit(struct nfs_commit_data *data, struct list_head *head, struct pnfs_layout_segment *lseg); void nfs_retry_commit(struct list_head *page_list, - struct pnfs_layout_segment *lseg); + struct pnfs_layout_segment *lseg, + struct nfs_commit_info *cinfo); void nfs_commit_clear_lock(struct nfs_inode *nfsi); void nfs_commitdata_release(struct nfs_commit_data *data); void nfs_commit_release_pages(struct nfs_commit_data *data); -void nfs_request_add_commit_list(struct nfs_page *req, struct list_head *head); -void nfs_request_remove_commit_list(struct nfs_page *req); +void nfs_request_add_commit_list(struct nfs_page *req, struct list_head *dst, + struct nfs_commit_info *cinfo); +void nfs_request_remove_commit_list(struct nfs_page *req, + struct nfs_commit_info *cinfo); +void nfs_init_cinfo(struct nfs_commit_info *cinfo, + struct inode *inode, + struct nfs_direct_req *dreq); #ifdef CONFIG_MIGRATION extern int nfs_migrate_page(struct address_space *, diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index e40523f..fe2cb55 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -347,9 +347,11 @@ static void filelayout_commit_count_stats(struct rpc_task *task, void *data) static void filelayout_commit_release(void *calldata) { struct nfs_commit_data *data = calldata; + struct nfs_commit_info cinfo; nfs_commit_release_pages(data); - if (atomic_dec_and_test(&NFS_I(data->inode)->commits_outstanding)) + nfs_init_cinfo(&cinfo, data->inode, data->dreq); + if (atomic_dec_and_test(&cinfo.mds->rpcs_out)) nfs_commit_clear_lock(NFS_I(data->inode)); put_lseg(data->lseg); nfs_commitdata_release(data); @@ -695,17 +697,16 @@ filelayout_free_lseg(struct pnfs_layout_segment *lseg) static int filelayout_alloc_commit_info(struct pnfs_layout_segment *lseg, + struct nfs_commit_info *cinfo, gfp_t gfp_flags) { struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg); - struct nfs4_filelayout *flo = FILELAYOUT_FROM_HDR(lseg->pls_layout); - - struct nfs4_fl_commit_bucket *buckets; + struct pnfs_commit_bucket *buckets; int size; if (fl->commit_through_mds) return 0; - if (flo->commit_info.nbuckets != 0) { + if (cinfo->ds->nbuckets != 0) { /* This assumes there is only one IOMODE_RW lseg. What * we really want to do is have a layout_hdr level * dictionary of keys, each @@ -718,25 +719,25 @@ filelayout_alloc_commit_info(struct pnfs_layout_segment *lseg, size = (fl->stripe_type == STRIPE_SPARSE) ? fl->dsaddr->ds_num : fl->dsaddr->stripe_count; - buckets = kcalloc(size, sizeof(struct nfs4_fl_commit_bucket), + buckets = kcalloc(size, sizeof(struct pnfs_commit_bucket), gfp_flags); if (!buckets) return -ENOMEM; else { int i; - spin_lock(&lseg->pls_layout->plh_inode->i_lock); - if (flo->commit_info.nbuckets != 0) + spin_lock(cinfo->lock); + if (cinfo->ds->nbuckets != 0) kfree(buckets); else { - flo->commit_info.buckets = buckets; - flo->commit_info.nbuckets = size; + cinfo->ds->buckets = buckets; + cinfo->ds->nbuckets = size; for (i = 0; i < size; i++) { INIT_LIST_HEAD(&buckets[i].written); INIT_LIST_HEAD(&buckets[i].committing); } } - spin_unlock(&lseg->pls_layout->plh_inode->i_lock); + spin_unlock(cinfo->lock); return 0; } } @@ -821,6 +822,7 @@ static void filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) { + struct nfs_commit_info cinfo; int status; BUG_ON(pgio->pg_lseg != NULL); @@ -836,7 +838,8 @@ filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio, /* If no lseg, fall back to write through mds */ if (pgio->pg_lseg == NULL) goto out_mds; - status = filelayout_alloc_commit_info(pgio->pg_lseg, GFP_NOFS); + nfs_init_cinfo(&cinfo, pgio->pg_inode, pgio->pg_dreq); + status = filelayout_alloc_commit_info(pgio->pg_lseg, &cinfo, GFP_NOFS); if (status < 0) { put_lseg(pgio->pg_lseg); pgio->pg_lseg = NULL; @@ -871,40 +874,42 @@ static u32 select_bucket_index(struct nfs4_filelayout_segment *fl, u32 j) * If this will make the bucket empty, it will need to put the lseg reference. */ static void -filelayout_clear_request_commit(struct nfs_page *req) +filelayout_clear_request_commit(struct nfs_page *req, + struct nfs_commit_info *cinfo) { struct pnfs_layout_segment *freeme = NULL; - struct inode *inode = req->wb_context->dentry->d_inode; - spin_lock(&inode->i_lock); + spin_lock(cinfo->lock); if (!test_and_clear_bit(PG_COMMIT_TO_DS, &req->wb_flags)) goto out; + cinfo->ds->nwritten--; if (list_is_singular(&req->wb_list)) { - struct nfs4_fl_commit_bucket *bucket; + struct pnfs_commit_bucket *bucket; bucket = list_first_entry(&req->wb_list, - struct nfs4_fl_commit_bucket, + struct pnfs_commit_bucket, written); freeme = bucket->wlseg; bucket->wlseg = NULL; } out: - nfs_request_remove_commit_list(req); - spin_unlock(&inode->i_lock); + nfs_request_remove_commit_list(req, cinfo); + spin_unlock(cinfo->lock); put_lseg(freeme); } static struct list_head * filelayout_choose_commit_list(struct nfs_page *req, - struct pnfs_layout_segment *lseg) + struct pnfs_layout_segment *lseg, + struct nfs_commit_info *cinfo) { struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg); u32 i, j; struct list_head *list; - struct nfs4_fl_commit_bucket *buckets; + struct pnfs_commit_bucket *buckets; if (fl->commit_through_mds) - return &NFS_I(req->wb_context->dentry->d_inode)->commit_list; + return &cinfo->mds->list; /* Note that we are calling nfs4_fl_calc_j_index on each page * that ends up being committed to a data server. An attractive @@ -914,7 +919,7 @@ filelayout_choose_commit_list(struct nfs_page *req, */ j = nfs4_fl_calc_j_index(lseg, req_offset(req)); i = select_bucket_index(fl, j); - buckets = FILELAYOUT_FROM_HDR(lseg->pls_layout)->commit_info.buckets; + buckets = cinfo->ds->buckets; list = &buckets[i].written; if (list_empty(list)) { /* Non-empty buckets hold a reference on the lseg. That ref @@ -926,17 +931,19 @@ filelayout_choose_commit_list(struct nfs_page *req, buckets[i].wlseg = get_lseg(lseg); } set_bit(PG_COMMIT_TO_DS, &req->wb_flags); + cinfo->ds->nwritten++; return list; } static void filelayout_mark_request_commit(struct nfs_page *req, - struct pnfs_layout_segment *lseg) + struct pnfs_layout_segment *lseg, + struct nfs_commit_info *cinfo) { struct list_head *list; - list = filelayout_choose_commit_list(req, lseg); - nfs_request_add_commit_list(req, list); + list = filelayout_choose_commit_list(req, lseg, cinfo); + nfs_request_add_commit_list(req, list, cinfo); } static u32 calc_ds_index_from_commit(struct pnfs_layout_segment *lseg, u32 i) @@ -993,8 +1000,9 @@ static int filelayout_initiate_commit(struct nfs_commit_data *data, int how) } static int -filelayout_scan_ds_commit_list(struct nfs4_fl_commit_bucket *bucket, int max, - spinlock_t *lock) +filelayout_scan_ds_commit_list(struct pnfs_commit_bucket *bucket, + struct nfs_commit_info *cinfo, + int max) { struct list_head *src = &bucket->written; struct list_head *dst = &bucket->committing; @@ -1004,9 +1012,9 @@ filelayout_scan_ds_commit_list(struct nfs4_fl_commit_bucket *bucket, int max, list_for_each_entry_safe(req, tmp, src, wb_list) { if (!nfs_lock_request(req)) continue; - if (cond_resched_lock(lock)) + if (cond_resched_lock(cinfo->lock)) list_safe_reset_next(req, tmp, wb_list); - nfs_request_remove_commit_list(req); + nfs_request_remove_commit_list(req, cinfo); clear_bit(PG_COMMIT_TO_DS, &req->wb_flags); nfs_list_add_request(req, dst); ret++; @@ -1014,6 +1022,8 @@ filelayout_scan_ds_commit_list(struct nfs4_fl_commit_bucket *bucket, int max, break; } if (ret) { + cinfo->ds->nwritten -= ret; + cinfo->ds->ncommitting += ret; bucket->clseg = bucket->wlseg; if (list_empty(src)) bucket->wlseg = NULL; @@ -1024,37 +1034,32 @@ filelayout_scan_ds_commit_list(struct nfs4_fl_commit_bucket *bucket, int max, } /* Move reqs from written to committing lists, returning count of number moved. - * Note called with i_lock held. + * Note called with cinfo->lock held. */ -static int filelayout_scan_commit_lists(struct inode *inode, int max, - spinlock_t *lock) +static int filelayout_scan_commit_lists(struct nfs_commit_info *cinfo, + int max) { - struct nfs4_fl_commit_info *fl_cinfo; int i, rv = 0, cnt; - fl_cinfo = &FILELAYOUT_FROM_HDR(NFS_I(inode)->layout)->commit_info; - if (fl_cinfo->nbuckets == 0) - goto out_done; - for (i = 0; i < fl_cinfo->nbuckets && max != 0; i++) { - cnt = filelayout_scan_ds_commit_list(&fl_cinfo->buckets[i], - max, lock); + for (i = 0; i < cinfo->ds->nbuckets && max != 0; i++) { + cnt = filelayout_scan_ds_commit_list(&cinfo->ds->buckets[i], + cinfo, max); max -= cnt; rv += cnt; } -out_done: return rv; } static unsigned int -alloc_ds_commits(struct inode *inode, struct list_head *list) +alloc_ds_commits(struct nfs_commit_info *cinfo, struct list_head *list) { - struct nfs4_fl_commit_info *fl_cinfo; - struct nfs4_fl_commit_bucket *bucket; + struct pnfs_ds_commit_info *fl_cinfo; + struct pnfs_commit_bucket *bucket; struct nfs_commit_data *data; int i, j; unsigned int nreq = 0; - fl_cinfo = &FILELAYOUT_FROM_HDR(NFS_I(inode)->layout)->commit_info; + fl_cinfo = cinfo->ds; bucket = fl_cinfo->buckets; for (i = 0; i < fl_cinfo->nbuckets; i++, bucket++) { if (list_empty(&bucket->committing)) @@ -1073,7 +1078,7 @@ alloc_ds_commits(struct inode *inode, struct list_head *list) for (j = i; j < fl_cinfo->nbuckets; j++, bucket++) { if (list_empty(&bucket->committing)) continue; - nfs_retry_commit(&bucket->committing, bucket->clseg); + nfs_retry_commit(&bucket->committing, bucket->clseg, cinfo); put_lseg(bucket->clseg); bucket->clseg = NULL; } @@ -1084,7 +1089,7 @@ alloc_ds_commits(struct inode *inode, struct list_head *list) /* This follows nfs_commit_list pretty closely */ static int filelayout_commit_pagelist(struct inode *inode, struct list_head *mds_pages, - int how) + int how, struct nfs_commit_info *cinfo) { struct nfs_commit_data *data, *tmp; LIST_HEAD(list); @@ -1097,17 +1102,17 @@ filelayout_commit_pagelist(struct inode *inode, struct list_head *mds_pages, list_add(&data->pages, &list); nreq++; } else - nfs_retry_commit(mds_pages, NULL); + nfs_retry_commit(mds_pages, NULL, cinfo); } - nreq += alloc_ds_commits(inode, &list); + nreq += alloc_ds_commits(cinfo, &list); if (nreq == 0) { nfs_commit_clear_lock(NFS_I(inode)); goto out; } - atomic_add(nreq, &NFS_I(inode)->commits_outstanding); + atomic_add(nreq, &cinfo->mds->rpcs_out); list_for_each_entry_safe(data, tmp, &list, pages) { list_del_init(&data->pages); @@ -1116,14 +1121,15 @@ filelayout_commit_pagelist(struct inode *inode, struct list_head *mds_pages, nfs_initiate_commit(NFS_CLIENT(inode), data, data->mds_ops, how); } else { - struct nfs4_fl_commit_info *fl_cinfo; + struct pnfs_commit_bucket *buckets; - fl_cinfo = &FILELAYOUT_FROM_HDR(data->lseg->pls_layout)->commit_info; - nfs_init_commit(data, &fl_cinfo->buckets[data->ds_commit_index].committing, data->lseg); + buckets = cinfo->ds->buckets; + nfs_init_commit(data, &buckets[data->ds_commit_index].committing, data->lseg); filelayout_initiate_commit(data, how); } } out: + cinfo->ds->ncommitting = 0; return PNFS_ATTEMPTED; } @@ -1148,6 +1154,12 @@ filelayout_free_layout_hdr(struct pnfs_layout_hdr *lo) kfree(FILELAYOUT_FROM_HDR(lo)); } +static struct pnfs_ds_commit_info * +filelayout_get_ds_info(struct inode *inode) +{ + return &FILELAYOUT_FROM_HDR(NFS_I(inode)->layout)->commit_info; +} + static struct pnfs_layoutdriver_type filelayout_type = { .id = LAYOUT_NFSV4_1_FILES, .name = "LAYOUT_NFSV4_1_FILES", @@ -1158,6 +1170,7 @@ static struct pnfs_layoutdriver_type filelayout_type = { .free_lseg = filelayout_free_lseg, .pg_read_ops = &filelayout_pg_read_ops, .pg_write_ops = &filelayout_pg_write_ops, + .get_ds_info = &filelayout_get_ds_info, .mark_request_commit = filelayout_mark_request_commit, .clear_request_commit = filelayout_clear_request_commit, .scan_commit_lists = filelayout_scan_commit_lists, diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h index 333a3ac..96b89bb 100644 --- a/fs/nfs/nfs4filelayout.h +++ b/fs/nfs/nfs4filelayout.h @@ -74,18 +74,6 @@ struct nfs4_file_layout_dsaddr { struct nfs4_pnfs_ds *ds_list[1]; }; -struct nfs4_fl_commit_bucket { - struct list_head written; - struct list_head committing; - struct pnfs_layout_segment *wlseg; - struct pnfs_layout_segment *clseg; -}; - -struct nfs4_fl_commit_info { - int nbuckets; - struct nfs4_fl_commit_bucket *buckets; -}; - struct nfs4_filelayout_segment { struct pnfs_layout_segment generic_hdr; u32 stripe_type; @@ -100,7 +88,7 @@ struct nfs4_filelayout_segment { struct nfs4_filelayout { struct pnfs_layout_hdr generic_hdr; - struct nfs4_fl_commit_info commit_info; + struct pnfs_ds_commit_info commit_info; }; static inline struct nfs4_filelayout * diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 734e4ef..4cd8760 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -94,11 +94,18 @@ struct pnfs_layoutdriver_type { const struct nfs_pageio_ops *pg_read_ops; const struct nfs_pageio_ops *pg_write_ops; + struct pnfs_ds_commit_info *(*get_ds_info) (struct inode *inode); void (*mark_request_commit) (struct nfs_page *req, - struct pnfs_layout_segment *lseg); - void (*clear_request_commit) (struct nfs_page *req); - int (*scan_commit_lists) (struct inode *inode, int max, spinlock_t *lock); - int (*commit_pagelist)(struct inode *inode, struct list_head *mds_pages, int how); + struct pnfs_layout_segment *lseg, + struct nfs_commit_info *cinfo); + void (*clear_request_commit) (struct nfs_page *req, + struct nfs_commit_info *cinfo); + int (*scan_commit_lists) (struct nfs_commit_info *cinfo, + int max); + int (*commit_pagelist)(struct inode *inode, + struct list_head *mds_pages, + int how, + struct nfs_commit_info *cinfo); /* * Return PNFS_ATTEMPTED to indicate the layout code has attempted @@ -263,49 +270,57 @@ static inline int pnfs_enabled_sb(struct nfs_server *nfss) } static inline int -pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how) +pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how, + struct nfs_commit_info *cinfo) { - if (!test_and_clear_bit(NFS_INO_PNFS_COMMIT, &NFS_I(inode)->flags)) + if (cinfo->ds == NULL || cinfo->ds->ncommitting == 0) return PNFS_NOT_ATTEMPTED; - return NFS_SERVER(inode)->pnfs_curr_ld->commit_pagelist(inode, mds_pages, how); + return NFS_SERVER(inode)->pnfs_curr_ld->commit_pagelist(inode, mds_pages, how, cinfo); +} + +static inline struct pnfs_ds_commit_info * +pnfs_get_ds_info(struct inode *inode) +{ + struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld; + + if (ld == NULL || ld->get_ds_info == NULL) + return NULL; + return ld->get_ds_info(inode); } static inline bool -pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg) +pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg, + struct nfs_commit_info *cinfo) { struct inode *inode = req->wb_context->dentry->d_inode; struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld; if (lseg == NULL || ld->mark_request_commit == NULL) return false; - ld->mark_request_commit(req, lseg); + ld->mark_request_commit(req, lseg, cinfo); return true; } static inline bool -pnfs_clear_request_commit(struct nfs_page *req) +pnfs_clear_request_commit(struct nfs_page *req, struct nfs_commit_info *cinfo) { struct inode *inode = req->wb_context->dentry->d_inode; struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld; if (ld == NULL || ld->clear_request_commit == NULL) return false; - ld->clear_request_commit(req); + ld->clear_request_commit(req, cinfo); return true; } static inline int -pnfs_scan_commit_lists(struct inode *inode, int max, spinlock_t *lock) +pnfs_scan_commit_lists(struct inode *inode, struct nfs_commit_info *cinfo, + int max) { - struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld; - int ret; - - if (ld == NULL || ld->scan_commit_lists == NULL) + if (cinfo->ds == NULL || cinfo->ds->nwritten == 0) return 0; - ret = ld->scan_commit_lists(inode, max, lock); - if (ret != 0) - set_bit(NFS_INO_PNFS_COMMIT, &NFS_I(inode)->flags); - return ret; + else + return NFS_SERVER(inode)->pnfs_curr_ld->scan_commit_lists(cinfo, max); } /* Should the pNFS client commit and return the layout upon a setattr */ @@ -409,25 +424,34 @@ static inline bool pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, st } static inline int -pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how) +pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how, + struct nfs_commit_info *cinfo) { return PNFS_NOT_ATTEMPTED; } +static inline struct pnfs_ds_commit_info * +pnfs_get_ds_info(struct inode *inode) +{ + return NULL; +} + static inline bool -pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg) +pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg, + struct nfs_commit_info *cinfo) { return false; } static inline bool -pnfs_clear_request_commit(struct nfs_page *req) +pnfs_clear_request_commit(struct nfs_page *req, struct nfs_commit_info *cinfo) { return false; } static inline int -pnfs_scan_commit_lists(struct inode *inode, int max, spinlock_t *lock) +pnfs_scan_commit_lists(struct inode *inode, struct nfs_commit_info *cinfo, + int max) { return 0; } diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 2500f1c..18bf700 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -452,65 +452,79 @@ nfs_mark_request_dirty(struct nfs_page *req) /** * nfs_request_add_commit_list - add request to a commit list * @req: pointer to a struct nfs_page - * @head: commit list head + * @dst: commit list head + * @cinfo: holds list lock and accounting info * - * This sets the PG_CLEAN bit, updates the inode global count of + * This sets the PG_CLEAN bit, updates the cinfo count of * number of outstanding requests requiring a commit as well as * the MM page stats. * - * The caller must _not_ hold the inode->i_lock, but must be + * The caller must _not_ hold the cinfo->lock, but must be * holding the nfs_page lock. */ void -nfs_request_add_commit_list(struct nfs_page *req, struct list_head *head) +nfs_request_add_commit_list(struct nfs_page *req, struct list_head *dst, + struct nfs_commit_info *cinfo) { - struct inode *inode = req->wb_context->dentry->d_inode; - set_bit(PG_CLEAN, &(req)->wb_flags); - spin_lock(&inode->i_lock); - nfs_list_add_request(req, head); - NFS_I(inode)->ncommit++; - spin_unlock(&inode->i_lock); + spin_lock(cinfo->lock); + nfs_list_add_request(req, dst); + cinfo->mds->ncommit++; + spin_unlock(cinfo->lock); inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); inc_bdi_stat(req->wb_page->mapping->backing_dev_info, BDI_RECLAIMABLE); - __mark_inode_dirty(inode, I_DIRTY_DATASYNC); + __mark_inode_dirty(req->wb_context->dentry->d_inode, I_DIRTY_DATASYNC); } EXPORT_SYMBOL_GPL(nfs_request_add_commit_list); /** * nfs_request_remove_commit_list - Remove request from a commit list * @req: pointer to a nfs_page + * @cinfo: holds list lock and accounting info * - * This clears the PG_CLEAN bit, and updates the inode global count of + * This clears the PG_CLEAN bit, and updates the cinfo's count of * number of outstanding requests requiring a commit * It does not update the MM page stats. * - * The caller _must_ hold the inode->i_lock and the nfs_page lock. + * The caller _must_ hold the cinfo->lock and the nfs_page lock. */ void -nfs_request_remove_commit_list(struct nfs_page *req) +nfs_request_remove_commit_list(struct nfs_page *req, + struct nfs_commit_info *cinfo) { - struct inode *inode = req->wb_context->dentry->d_inode; - if (!test_and_clear_bit(PG_CLEAN, &(req)->wb_flags)) return; nfs_list_remove_request(req); - NFS_I(inode)->ncommit--; + cinfo->mds->ncommit--; } EXPORT_SYMBOL_GPL(nfs_request_remove_commit_list); +static void nfs_init_cinfo_from_inode(struct nfs_commit_info *cinfo, + struct inode *inode) +{ + cinfo->lock = &inode->i_lock; + cinfo->mds = &NFS_I(inode)->commit_info; + cinfo->ds = pnfs_get_ds_info(inode); +} + +void nfs_init_cinfo(struct nfs_commit_info *cinfo, + struct inode *inode, + struct nfs_direct_req *dreq) +{ + nfs_init_cinfo_from_inode(cinfo, inode); +} +EXPORT_SYMBOL_GPL(nfs_init_cinfo); /* * Add a request to the inode's commit list. */ static void -nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg) +nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg, + struct nfs_commit_info *cinfo) { - struct inode *inode = req->wb_context->dentry->d_inode; - - if (pnfs_mark_request_commit(req, lseg)) + if (pnfs_mark_request_commit(req, lseg, cinfo)) return; - nfs_request_add_commit_list(req, &NFS_I(inode)->commit_list); + nfs_request_add_commit_list(req, &cinfo->mds->list, cinfo); } static void @@ -525,11 +539,13 @@ nfs_clear_request_commit(struct nfs_page *req) { if (test_bit(PG_CLEAN, &req->wb_flags)) { struct inode *inode = req->wb_context->dentry->d_inode; + struct nfs_commit_info cinfo; - if (!pnfs_clear_request_commit(req)) { - spin_lock(&inode->i_lock); - nfs_request_remove_commit_list(req); - spin_unlock(&inode->i_lock); + nfs_init_cinfo_from_inode(&cinfo, inode); + if (!pnfs_clear_request_commit(req, &cinfo)) { + spin_lock(cinfo.lock); + nfs_request_remove_commit_list(req, &cinfo); + spin_unlock(cinfo.lock); } nfs_clear_page_commit(req->wb_page); } @@ -545,7 +561,8 @@ int nfs_write_need_commit(struct nfs_write_data *data) #else static void -nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg) +nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg, + struct nfs_commit_info *cinfo) { } @@ -564,10 +581,12 @@ int nfs_write_need_commit(struct nfs_write_data *data) static void nfs_write_completion(struct nfs_pgio_header *hdr) { + struct nfs_commit_info cinfo; unsigned long bytes = 0; if (test_bit(NFS_IOHDR_REDO, &hdr->flags)) goto out; + nfs_init_cinfo_from_inode(&cinfo, hdr->inode); while (!list_empty(&hdr->pages)) { struct nfs_page *req = nfs_list_entry(hdr->pages.next); struct page *page = req->wb_page; @@ -585,7 +604,7 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr) goto next; } if (test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) { - nfs_mark_request_commit(req, hdr->lseg); + nfs_mark_request_commit(req, hdr->lseg, &cinfo); goto next; } remove_req: @@ -599,16 +618,16 @@ out: } #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) -static int -nfs_need_commit(struct nfs_inode *nfsi) +static unsigned long +nfs_reqs_to_commit(struct nfs_commit_info *cinfo) { - return nfsi->ncommit > 0; + return cinfo->mds->ncommit; } -/* i_lock held by caller */ +/* cinfo->lock held by caller */ static int -nfs_scan_commit_list(struct list_head *src, struct list_head *dst, int max, - spinlock_t *lock) +nfs_scan_commit_list(struct list_head *src, struct list_head *dst, + struct nfs_commit_info *cinfo, int max) { struct nfs_page *req, *tmp; int ret = 0; @@ -616,9 +635,9 @@ nfs_scan_commit_list(struct list_head *src, struct list_head *dst, int max, list_for_each_entry_safe(req, tmp, src, wb_list) { if (!nfs_lock_request(req)) continue; - if (cond_resched_lock(lock)) + if (cond_resched_lock(cinfo->lock)) list_safe_reset_next(req, tmp, wb_list); - nfs_request_remove_commit_list(req); + nfs_request_remove_commit_list(req, cinfo); nfs_list_add_request(req, dst); ret++; if (ret == max) @@ -630,37 +649,38 @@ nfs_scan_commit_list(struct list_head *src, struct list_head *dst, int max, /* * nfs_scan_commit - Scan an inode for commit requests * @inode: NFS inode to scan - * @dst: destination list + * @dst: mds destination list + * @cinfo: mds and ds lists of reqs ready to commit * * Moves requests from the inode's 'commit' request list. * The requests are *not* checked to ensure that they form a contiguous set. */ static int -nfs_scan_commit(struct inode *inode, struct list_head *dst) +nfs_scan_commit(struct inode *inode, struct list_head *dst, + struct nfs_commit_info *cinfo) { - struct nfs_inode *nfsi = NFS_I(inode); int ret = 0; - spin_lock(&inode->i_lock); - if (nfsi->ncommit > 0) { + spin_lock(cinfo->lock); + if (cinfo->mds->ncommit > 0) { const int max = INT_MAX; - ret = nfs_scan_commit_list(&nfsi->commit_list, dst, max, - &inode->i_lock); - ret += pnfs_scan_commit_lists(inode, max - ret, - &inode->i_lock); + ret = nfs_scan_commit_list(&cinfo->mds->list, dst, + cinfo, max); + ret += pnfs_scan_commit_lists(inode, cinfo, max - ret); } - spin_unlock(&inode->i_lock); + spin_unlock(cinfo->lock); return ret; } #else -static inline int nfs_need_commit(struct nfs_inode *nfsi) +static unsigned long nfs_reqs_to_commit(struct nfs_commit_info *cinfo) { return 0; } -static inline int nfs_scan_commit(struct inode *inode, struct list_head *dst) +static inline int nfs_scan_commit(struct inode *inode, struct list_head *dst, + struct nfs_commit_info *cinfo) { return 0; } @@ -929,7 +949,7 @@ EXPORT_SYMBOL_GPL(nfs_initiate_write); */ static void nfs_write_rpcsetup(struct nfs_write_data *data, unsigned int count, unsigned int offset, - int how) + int how, struct nfs_commit_info *cinfo) { struct nfs_page *req = data->header->req; @@ -950,7 +970,7 @@ static void nfs_write_rpcsetup(struct nfs_write_data *data, case 0: break; case FLUSH_COND_STABLE: - if (nfs_need_commit(NFS_I(data->header->inode))) + if (nfs_reqs_to_commit(cinfo)) break; default: data->args.stable = NFS_FILE_SYNC; @@ -1034,12 +1054,14 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, unsigned int offset; int requests = 0; int ret = 0; + struct nfs_commit_info cinfo; + nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq); nfs_list_remove_request(req); nfs_list_add_request(req, &hdr->pages); if ((desc->pg_ioflags & FLUSH_COND_STABLE) && - (desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit || + (desc->pg_moreio || nfs_reqs_to_commit(&cinfo) || desc->pg_count > wsize)) desc->pg_ioflags &= ~FLUSH_COND_STABLE; @@ -1053,7 +1075,7 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, if (!data) goto out_bad; data->pages.pagevec[0] = page; - nfs_write_rpcsetup(data, len, offset, desc->pg_ioflags); + nfs_write_rpcsetup(data, len, offset, desc->pg_ioflags, &cinfo); list_add(&data->list, &hdr->rpc_list); requests++; nbytes -= len; @@ -1088,6 +1110,7 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc, struct nfs_write_data *data; struct list_head *head = &desc->pg_list; int ret = 0; + struct nfs_commit_info cinfo; data = nfs_writedata_alloc(hdr, nfs_page_array_len(desc->pg_base, desc->pg_count)); @@ -1097,6 +1120,7 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc, goto out; } + nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq); pages = data->pages.pagevec; while (!list_empty(head)) { req = nfs_list_entry(head->next); @@ -1106,11 +1130,11 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc, } if ((desc->pg_ioflags & FLUSH_COND_STABLE) && - (desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit)) + (desc->pg_moreio || nfs_reqs_to_commit(&cinfo))) desc->pg_ioflags &= ~FLUSH_COND_STABLE; /* Set up the argument struct */ - nfs_write_rpcsetup(data, desc->pg_count, 0, desc->pg_ioflags); + nfs_write_rpcsetup(data, desc->pg_count, 0, desc->pg_ioflags, &cinfo); list_add(&data->list, &hdr->rpc_list); desc->pg_rpc_callops = &nfs_write_common_ops; out: @@ -1417,14 +1441,15 @@ void nfs_init_commit(struct nfs_commit_data *data, EXPORT_SYMBOL_GPL(nfs_init_commit); void nfs_retry_commit(struct list_head *page_list, - struct pnfs_layout_segment *lseg) + struct pnfs_layout_segment *lseg, + struct nfs_commit_info *cinfo) { struct nfs_page *req; while (!list_empty(page_list)) { req = nfs_list_entry(page_list->next); nfs_list_remove_request(req); - nfs_mark_request_commit(req, lseg); + nfs_mark_request_commit(req, lseg, cinfo); dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); dec_bdi_stat(req->wb_page->mapping->backing_dev_info, BDI_RECLAIMABLE); @@ -1437,7 +1462,8 @@ EXPORT_SYMBOL_GPL(nfs_retry_commit); * Commit dirty pages */ static int -nfs_commit_list(struct inode *inode, struct list_head *head, int how) +nfs_commit_list(struct inode *inode, struct list_head *head, int how, + struct nfs_commit_info *cinfo) { struct nfs_commit_data *data; @@ -1450,7 +1476,7 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how) nfs_init_commit(data, head, NULL); return nfs_initiate_commit(NFS_CLIENT(inode), data, data->mds_ops, how); out_bad: - nfs_retry_commit(head, NULL); + nfs_retry_commit(head, NULL, cinfo); nfs_commit_clear_lock(NFS_I(inode)); return -ENOMEM; } @@ -1524,30 +1550,32 @@ static const struct rpc_call_ops nfs_commit_ops = { }; static int nfs_generic_commit_list(struct inode *inode, struct list_head *head, - int how) + int how, struct nfs_commit_info *cinfo) { int status; - status = pnfs_commit_list(inode, head, how); + status = pnfs_commit_list(inode, head, how, cinfo); if (status == PNFS_NOT_ATTEMPTED) - status = nfs_commit_list(inode, head, how); + status = nfs_commit_list(inode, head, how, cinfo); return status; } int nfs_commit_inode(struct inode *inode, int how) { LIST_HEAD(head); + struct nfs_commit_info cinfo; int may_wait = how & FLUSH_SYNC; int res; res = nfs_commit_set_lock(NFS_I(inode), may_wait); if (res <= 0) goto out_mark_dirty; - res = nfs_scan_commit(inode, &head); + nfs_init_cinfo_from_inode(&cinfo, inode); + res = nfs_scan_commit(inode, &head, &cinfo); if (res) { int error; - error = nfs_generic_commit_list(inode, &head, how); + error = nfs_generic_commit_list(inode, &head, how, &cinfo); if (error < 0) return error; if (!may_wait) @@ -1578,14 +1606,14 @@ static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_contr int ret = 0; /* no commits means nothing needs to be done */ - if (!nfsi->ncommit) + if (!nfsi->commit_info.ncommit) return ret; if (wbc->sync_mode == WB_SYNC_NONE) { /* Don't commit yet if this is a non-blocking flush and there * are a lot of outstanding writes for this mapping. */ - if (nfsi->ncommit <= (nfsi->npages >> 1)) + if (nfsi->commit_info.ncommit <= (nfsi->npages >> 1)) goto out_mark_dirty; /* don't wait for the COMMIT response */ diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 8d3a2b8..8a88c16 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -179,8 +179,7 @@ struct nfs_inode { __be32 cookieverf[2]; unsigned long npages; - unsigned long ncommit; - struct list_head commit_list; + struct nfs_mds_commit_info commit_info; /* Open contexts for shared mmap writes */ struct list_head open_files; @@ -201,7 +200,6 @@ struct nfs_inode { /* pNFS layout information */ struct pnfs_layout_hdr *layout; - atomic_t commits_outstanding; #endif /* CONFIG_NFS_V4*/ #ifdef CONFIG_NFS_FSCACHE struct fscache_cookie *fscache; @@ -230,7 +228,6 @@ struct nfs_inode { #define NFS_INO_FSCACHE (5) /* inode can be cached by FS-Cache */ #define NFS_INO_FSCACHE_LOCK (6) /* FS-Cache cookie management lock */ #define NFS_INO_COMMIT (7) /* inode is committing unstable writes */ -#define NFS_INO_PNFS_COMMIT (8) /* use pnfs code for commit */ #define NFS_INO_LAYOUTCOMMIT (9) /* layoutcommit required */ #define NFS_INO_LAYOUTCOMMITTING (10) /* layoutcommit inflight */ diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 38687b8..224e1e8 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1079,6 +1079,21 @@ struct nfstime4 { }; #ifdef CONFIG_NFS_V4_1 + +struct pnfs_commit_bucket { + struct list_head written; + struct list_head committing; + struct pnfs_layout_segment *wlseg; + struct pnfs_layout_segment *clseg; +}; + +struct pnfs_ds_commit_info { + int nwritten; + int ncommitting; + int nbuckets; + struct pnfs_commit_bucket *buckets; +}; + #define NFS4_EXCHANGE_ID_LEN (48) struct nfs41_exchange_id_args { struct nfs_client *client; @@ -1242,6 +1257,18 @@ struct nfs_write_header { struct nfs_write_data rpc_data; }; +struct nfs_mds_commit_info { + atomic_t rpcs_out; + unsigned long ncommit; + struct list_head list; +}; + +struct nfs_commit_info { + spinlock_t *lock; + struct nfs_mds_commit_info *mds; + struct pnfs_ds_commit_info *ds; +}; + struct nfs_commit_data { struct rpc_task task; struct inode *inode; -- cgit v0.10.2 From f453a54a01c7c0453ad9550906e3d2663dd486ac Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Fri, 20 Apr 2012 14:47:54 -0400 Subject: NFS: create nfs_commit_completion_ops Factors out the code that needs to change when directio starts using these code paths. Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 145e9e7..137f5cd 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -344,13 +344,12 @@ extern int nfs_initiate_commit(struct rpc_clnt *clnt, int how); extern void nfs_init_commit(struct nfs_commit_data *data, struct list_head *head, - struct pnfs_layout_segment *lseg); + struct pnfs_layout_segment *lseg, + struct nfs_commit_info *cinfo); void nfs_retry_commit(struct list_head *page_list, struct pnfs_layout_segment *lseg, struct nfs_commit_info *cinfo); -void nfs_commit_clear_lock(struct nfs_inode *nfsi); void nfs_commitdata_release(struct nfs_commit_data *data); -void nfs_commit_release_pages(struct nfs_commit_data *data); void nfs_request_add_commit_list(struct nfs_page *req, struct list_head *dst, struct nfs_commit_info *cinfo); void nfs_request_remove_commit_list(struct nfs_page *req, diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index fe2cb55..26d1da4 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -347,12 +347,8 @@ static void filelayout_commit_count_stats(struct rpc_task *task, void *data) static void filelayout_commit_release(void *calldata) { struct nfs_commit_data *data = calldata; - struct nfs_commit_info cinfo; - nfs_commit_release_pages(data); - nfs_init_cinfo(&cinfo, data->inode, data->dreq); - if (atomic_dec_and_test(&cinfo.mds->rpcs_out)) - nfs_commit_clear_lock(NFS_I(data->inode)); + data->completion_ops->completion(data); put_lseg(data->lseg); nfs_commitdata_release(data); } @@ -1108,7 +1104,7 @@ filelayout_commit_pagelist(struct inode *inode, struct list_head *mds_pages, nreq += alloc_ds_commits(cinfo, &list); if (nreq == 0) { - nfs_commit_clear_lock(NFS_I(inode)); + cinfo->completion_ops->error_cleanup(NFS_I(inode)); goto out; } @@ -1117,14 +1113,14 @@ filelayout_commit_pagelist(struct inode *inode, struct list_head *mds_pages, list_for_each_entry_safe(data, tmp, &list, pages) { list_del_init(&data->pages); if (!data->lseg) { - nfs_init_commit(data, mds_pages, NULL); + nfs_init_commit(data, mds_pages, NULL, cinfo); nfs_initiate_commit(NFS_CLIENT(inode), data, data->mds_ops, how); } else { struct pnfs_commit_bucket *buckets; buckets = cinfo->ds->buckets; - nfs_init_commit(data, &buckets[data->ds_commit_index].committing, data->lseg); + nfs_init_commit(data, &buckets[data->ds_commit_index].committing, data->lseg, cinfo); filelayout_initiate_commit(data, how); } } diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 18bf700..333d01d 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -46,6 +46,7 @@ static void nfs_redirty_request(struct nfs_page *req); static const struct rpc_call_ops nfs_write_common_ops; static const struct rpc_call_ops nfs_commit_ops; static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops; +static const struct nfs_commit_completion_ops nfs_commit_completion_ops; static struct kmem_cache *nfs_wdata_cachep; static mempool_t *nfs_wdata_mempool; @@ -505,6 +506,7 @@ static void nfs_init_cinfo_from_inode(struct nfs_commit_info *cinfo, cinfo->lock = &inode->i_lock; cinfo->mds = &NFS_I(inode)->commit_info; cinfo->ds = pnfs_get_ds_info(inode); + cinfo->completion_ops = &nfs_commit_completion_ops; } void nfs_init_cinfo(struct nfs_commit_info *cinfo, @@ -1358,13 +1360,12 @@ static int nfs_commit_set_lock(struct nfs_inode *nfsi, int may_wait) return (ret < 0) ? ret : 1; } -void nfs_commit_clear_lock(struct nfs_inode *nfsi) +static void nfs_commit_clear_lock(struct nfs_inode *nfsi) { clear_bit(NFS_INO_COMMIT, &nfsi->flags); smp_mb__after_clear_bit(); wake_up_bit(&nfsi->flags, NFS_INO_COMMIT); } -EXPORT_SYMBOL_GPL(nfs_commit_clear_lock); void nfs_commitdata_release(struct nfs_commit_data *data) { @@ -1413,8 +1414,9 @@ EXPORT_SYMBOL_GPL(nfs_initiate_commit); * Set up the argument/result storage required for the RPC call. */ void nfs_init_commit(struct nfs_commit_data *data, - struct list_head *head, - struct pnfs_layout_segment *lseg) + struct list_head *head, + struct pnfs_layout_segment *lseg, + struct nfs_commit_info *cinfo) { struct nfs_page *first = nfs_list_entry(head->next); struct inode *inode = first->wb_context->dentry->d_inode; @@ -1428,6 +1430,7 @@ void nfs_init_commit(struct nfs_commit_data *data, data->cred = first->wb_context->cred; data->lseg = lseg; /* reference transferred */ data->mds_ops = &nfs_commit_ops; + data->completion_ops = cinfo->completion_ops; data->args.fh = NFS_FH(data->inode); /* Note: we always request a commit of the entire inode */ @@ -1473,11 +1476,12 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how, goto out_bad; /* Set up the argument struct */ - nfs_init_commit(data, head, NULL); + nfs_init_commit(data, head, NULL, cinfo); + atomic_inc(&cinfo->mds->rpcs_out); return nfs_initiate_commit(NFS_CLIENT(inode), data, data->mds_ops, how); out_bad: nfs_retry_commit(head, NULL, cinfo); - nfs_commit_clear_lock(NFS_I(inode)); + cinfo->completion_ops->error_cleanup(NFS_I(inode)); return -ENOMEM; } @@ -1495,10 +1499,11 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata) NFS_PROTO(data->inode)->commit_done(task, data); } -void nfs_commit_release_pages(struct nfs_commit_data *data) +static void nfs_commit_release_pages(struct nfs_commit_data *data) { struct nfs_page *req; int status = data->task.tk_status; + struct nfs_commit_info cinfo; while (!list_empty(&data->pages)) { req = nfs_list_entry(data->pages.next); @@ -1531,15 +1536,16 @@ void nfs_commit_release_pages(struct nfs_commit_data *data) next: nfs_unlock_request(req); } + nfs_init_cinfo(&cinfo, data->inode, data->dreq); + if (atomic_dec_and_test(&cinfo.mds->rpcs_out)) + nfs_commit_clear_lock(NFS_I(data->inode)); } -EXPORT_SYMBOL_GPL(nfs_commit_release_pages); static void nfs_commit_release(void *calldata) { struct nfs_commit_data *data = calldata; - nfs_commit_release_pages(data); - nfs_commit_clear_lock(NFS_I(data->inode)); + data->completion_ops->completion(data); nfs_commitdata_release(calldata); } @@ -1549,6 +1555,11 @@ static const struct rpc_call_ops nfs_commit_ops = { .rpc_release = nfs_commit_release, }; +static const struct nfs_commit_completion_ops nfs_commit_completion_ops = { + .completion = nfs_commit_release_pages, + .error_cleanup = nfs_commit_clear_lock, +}; + static int nfs_generic_commit_list(struct inode *inode, struct list_head *head, int how, struct nfs_commit_info *cinfo) { diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 224e1e8..0e8b88a 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1263,10 +1263,18 @@ struct nfs_mds_commit_info { struct list_head list; }; +struct nfs_commit_data; +struct nfs_inode; +struct nfs_commit_completion_ops { + void (*error_cleanup) (struct nfs_inode *nfsi); + void (*completion) (struct nfs_commit_data *data); +}; + struct nfs_commit_info { spinlock_t *lock; struct nfs_mds_commit_info *mds; struct pnfs_ds_commit_info *ds; + const struct nfs_commit_completion_ops *completion_ops; }; struct nfs_commit_data { @@ -1285,6 +1293,7 @@ struct nfs_commit_data { struct nfs_client *ds_clp; /* pNFS data server */ int ds_commit_index; const struct rpc_call_ops *mds_ops; + const struct nfs_commit_completion_ops *completion_ops; int (*commit_done_cb) (struct rpc_task *task, struct nfs_commit_data *data); }; -- cgit v0.10.2 From b359f9d09bcbaede09243cfe844172ba055d89fd Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Fri, 20 Apr 2012 14:47:55 -0400 Subject: NFS: add dreq to nfs_commit_info Need this to pass into nfs_commitdata_init, in order to keep data->dreq accurate. Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 333d01d..44a93d8 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -506,6 +506,7 @@ static void nfs_init_cinfo_from_inode(struct nfs_commit_info *cinfo, cinfo->lock = &inode->i_lock; cinfo->mds = &NFS_I(inode)->commit_info; cinfo->ds = pnfs_get_ds_info(inode); + cinfo->dreq = NULL; cinfo->completion_ops = &nfs_commit_completion_ops; } @@ -1431,6 +1432,7 @@ void nfs_init_commit(struct nfs_commit_data *data, data->lseg = lseg; /* reference transferred */ data->mds_ops = &nfs_commit_ops; data->completion_ops = cinfo->completion_ops; + data->dreq = cinfo->dreq; data->args.fh = NFS_FH(data->inode); /* Note: we always request a commit of the entire inode */ diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 0e8b88a..5f563bd 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1274,6 +1274,7 @@ struct nfs_commit_info { spinlock_t *lock; struct nfs_mds_commit_info *mds; struct pnfs_ds_commit_info *ds; + struct nfs_direct_req *dreq; /* O_DIRECT request */ const struct nfs_commit_completion_ops *completion_ops; }; -- cgit v0.10.2 From 56f9cd684d25f1bae901c5a872b8427f8b417c3f Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Fri, 20 Apr 2012 14:47:56 -0400 Subject: NFS: avoid some stat gathering for direct io Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 44a93d8..56db9e7 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -472,9 +472,13 @@ nfs_request_add_commit_list(struct nfs_page *req, struct list_head *dst, nfs_list_add_request(req, dst); cinfo->mds->ncommit++; spin_unlock(cinfo->lock); - inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); - inc_bdi_stat(req->wb_page->mapping->backing_dev_info, BDI_RECLAIMABLE); - __mark_inode_dirty(req->wb_context->dentry->d_inode, I_DIRTY_DATASYNC); + if (!cinfo->dreq) { + inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); + inc_bdi_stat(req->wb_page->mapping->backing_dev_info, + BDI_RECLAIMABLE); + __mark_inode_dirty(req->wb_context->dentry->d_inode, + I_DIRTY_DATASYNC); + } } EXPORT_SYMBOL_GPL(nfs_request_add_commit_list); @@ -1455,9 +1459,11 @@ void nfs_retry_commit(struct list_head *page_list, req = nfs_list_entry(page_list->next); nfs_list_remove_request(req); nfs_mark_request_commit(req, lseg, cinfo); - dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); - dec_bdi_stat(req->wb_page->mapping->backing_dev_info, - BDI_RECLAIMABLE); + if (!cinfo->dreq) { + dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); + dec_bdi_stat(req->wb_page->mapping->backing_dev_info, + BDI_RECLAIMABLE); + } nfs_unlock_request(req); } } -- cgit v0.10.2 From 1763da1234cba663b849476d451bdccac5147859 Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Fri, 20 Apr 2012 14:47:57 -0400 Subject: NFS: rewrite directio write to use async coalesce code This also has the advantage that it allows directio to use pnfs. Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 4ba9a2c..d44de2f 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -56,6 +56,7 @@ #include "internal.h" #include "iostat.h" +#include "pnfs.h" #define NFSDBG_FACILITY NFSDBG_VFS @@ -81,16 +82,19 @@ struct nfs_direct_req { struct completion completion; /* wait for i/o completion */ /* commit state */ - struct list_head rewrite_list; /* saved nfs_write_data structs */ - struct nfs_commit_data *commit_data; /* special write_data for commits */ + struct nfs_mds_commit_info mds_cinfo; /* Storage for cinfo */ + struct pnfs_ds_commit_info ds_cinfo; /* Storage for cinfo */ + struct work_struct work; int flags; #define NFS_ODIRECT_DO_COMMIT (1) /* an unstable reply was received */ #define NFS_ODIRECT_RESCHED_WRITES (2) /* write verification failed */ struct nfs_writeverf verf; /* unstable write verifier */ }; +static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops; +static const struct nfs_commit_completion_ops nfs_direct_commit_completion_ops; static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode); -static const struct rpc_call_ops nfs_write_direct_ops; +static void nfs_direct_write_schedule_work(struct work_struct *work); static inline void get_dreq(struct nfs_direct_req *dreq) { @@ -131,6 +135,16 @@ static void nfs_direct_release_pages(struct page **pages, unsigned int npages) page_cache_release(pages[i]); } +void nfs_init_cinfo_from_dreq(struct nfs_commit_info *cinfo, + struct nfs_direct_req *dreq) +{ + cinfo->lock = &dreq->lock; + cinfo->mds = &dreq->mds_cinfo; + cinfo->ds = &dreq->ds_cinfo; + cinfo->dreq = dreq; + cinfo->completion_ops = &nfs_direct_commit_completion_ops; +} + static inline struct nfs_direct_req *nfs_direct_req_alloc(void) { struct nfs_direct_req *dreq; @@ -142,7 +156,11 @@ static inline struct nfs_direct_req *nfs_direct_req_alloc(void) kref_init(&dreq->kref); kref_get(&dreq->kref); init_completion(&dreq->completion); - INIT_LIST_HEAD(&dreq->rewrite_list); + dreq->mds_cinfo.ncommit = 0; + atomic_set(&dreq->mds_cinfo.rpcs_out, 0); + INIT_LIST_HEAD(&dreq->mds_cinfo.list); + INIT_WORK(&dreq->work, nfs_direct_write_schedule_work); + memset(&dreq->ds_cinfo, 0, sizeof(dreq->ds_cinfo)); dreq->iocb = NULL; dreq->ctx = NULL; dreq->l_ctx = NULL; @@ -457,112 +475,60 @@ out: return result; } -static void nfs_direct_writehdr_release(struct nfs_write_header *whdr) -{ - struct nfs_write_data *data = &whdr->rpc_data; - - if (data->pages.pagevec != data->pages.page_array) - kfree(data->pages.pagevec); - nfs_writehdr_free(&whdr->header); -} - -static void nfs_direct_free_writedata(struct nfs_direct_req *dreq) -{ - while (!list_empty(&dreq->rewrite_list)) { - struct nfs_pgio_header *hdr = list_entry(dreq->rewrite_list.next, struct nfs_pgio_header, pages); - struct nfs_write_header *whdr = container_of(hdr, struct nfs_write_header, header); - struct nfs_page_array *p = &whdr->rpc_data.pages; - - list_del(&hdr->pages); - nfs_direct_release_pages(p->pagevec, p->npages); - nfs_direct_writehdr_release(whdr); - } -} - #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) { - struct inode *inode = dreq->inode; - struct list_head *p; - struct nfs_write_data *data; - struct nfs_pgio_header *hdr; - struct rpc_task *task; - struct rpc_message msg = { - .rpc_cred = dreq->ctx->cred, - }; - struct rpc_task_setup task_setup_data = { - .rpc_client = NFS_CLIENT(inode), - .rpc_message = &msg, - .callback_ops = &nfs_write_direct_ops, - .workqueue = nfsiod_workqueue, - .flags = RPC_TASK_ASYNC, - }; + struct nfs_pageio_descriptor desc; + struct nfs_page *req, *tmp; + LIST_HEAD(reqs); + struct nfs_commit_info cinfo; + LIST_HEAD(failed); + + nfs_init_cinfo_from_dreq(&cinfo, dreq); + pnfs_recover_commit_reqs(dreq->inode, &reqs, &cinfo); + spin_lock(cinfo.lock); + nfs_scan_commit_list(&cinfo.mds->list, &reqs, &cinfo, 0); + spin_unlock(cinfo.lock); dreq->count = 0; get_dreq(dreq); - list_for_each(p, &dreq->rewrite_list) { - hdr = list_entry(p, struct nfs_pgio_header, pages); - data = &(container_of(hdr, struct nfs_write_header, header))->rpc_data; - - get_dreq(dreq); - - /* Use stable writes */ - data->args.stable = NFS_FILE_SYNC; - - /* - * Reset data->res. - */ - nfs_fattr_init(&data->fattr); - data->res.count = data->args.count; - memset(&data->verf, 0, sizeof(data->verf)); - - /* - * Reuse data->task; data->args should not have changed - * since the original request was sent. - */ - task_setup_data.task = &data->task; - task_setup_data.callback_data = data; - msg.rpc_argp = &data->args; - msg.rpc_resp = &data->res; - NFS_PROTO(inode)->write_setup(data, &msg); - - /* - * We're called via an RPC callback, so BKL is already held. - */ - task = rpc_run_task(&task_setup_data); - if (!IS_ERR(task)) - rpc_put_task(task); - - dprintk("NFS: %5u rescheduled direct write call (req %s/%Ld, %u bytes @ offset %Lu)\n", - data->task.tk_pid, - inode->i_sb->s_id, - (long long)NFS_FILEID(inode), - data->args.count, - (unsigned long long)data->args.offset); - } + nfs_pageio_init_write(&desc, dreq->inode, FLUSH_STABLE, + &nfs_direct_write_completion_ops); + desc.pg_dreq = dreq; - if (put_dreq(dreq)) - nfs_direct_write_complete(dreq, inode); -} + list_for_each_entry_safe(req, tmp, &reqs, wb_list) { + if (!nfs_pageio_add_request(&desc, req)) { + nfs_list_add_request(req, &failed); + spin_lock(cinfo.lock); + dreq->flags = 0; + dreq->error = -EIO; + spin_unlock(cinfo.lock); + } + } + nfs_pageio_complete(&desc); -static void nfs_direct_commit_result(struct rpc_task *task, void *calldata) -{ - struct nfs_commit_data *data = calldata; + while (!list_empty(&failed)) { + page_cache_release(req->wb_page); + nfs_release_request(req); + nfs_unlock_request(req); + } - /* Call the NFS version-specific code */ - NFS_PROTO(data->inode)->commit_done(task, data); + if (put_dreq(dreq)) + nfs_direct_write_complete(dreq, dreq->inode); } -static void nfs_direct_commit_release(void *calldata) +static void nfs_direct_commit_complete(struct nfs_commit_data *data) { - struct nfs_commit_data *data = calldata; struct nfs_direct_req *dreq = data->dreq; + struct nfs_commit_info cinfo; + struct nfs_page *req; int status = data->task.tk_status; + nfs_init_cinfo_from_dreq(&cinfo, dreq); if (status < 0) { dprintk("NFS: %5u commit failed with error %d.\n", - data->task.tk_pid, status); + data->task.tk_pid, status); dreq->flags = NFS_ODIRECT_RESCHED_WRITES; } else if (memcmp(&dreq->verf, &data->verf, sizeof(data->verf))) { dprintk("NFS: %5u commit verify failed\n", data->task.tk_pid); @@ -570,59 +536,49 @@ static void nfs_direct_commit_release(void *calldata) } dprintk("NFS: %5u commit returned %d\n", data->task.tk_pid, status); - nfs_direct_write_complete(dreq, data->inode); - nfs_commit_free(data); + while (!list_empty(&data->pages)) { + req = nfs_list_entry(data->pages.next); + nfs_list_remove_request(req); + if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES) { + /* Note the rewrite will go through mds */ + nfs_mark_request_commit(req, NULL, &cinfo); + } else { + page_cache_release(req->wb_page); + nfs_release_request(req); + } + nfs_unlock_request(req); + } + + if (atomic_dec_and_test(&cinfo.mds->rpcs_out)) + nfs_direct_write_complete(dreq, data->inode); } -static const struct rpc_call_ops nfs_commit_direct_ops = { - .rpc_call_prepare = nfs_commit_prepare, - .rpc_call_done = nfs_direct_commit_result, - .rpc_release = nfs_direct_commit_release, +static void nfs_direct_error_cleanup(struct nfs_inode *nfsi) +{ + /* There is no lock to clear */ +} + +static const struct nfs_commit_completion_ops nfs_direct_commit_completion_ops = { + .completion = nfs_direct_commit_complete, + .error_cleanup = nfs_direct_error_cleanup, }; static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq) { - struct nfs_commit_data *data = dreq->commit_data; - struct rpc_task *task; - struct rpc_message msg = { - .rpc_argp = &data->args, - .rpc_resp = &data->res, - .rpc_cred = dreq->ctx->cred, - }; - struct rpc_task_setup task_setup_data = { - .task = &data->task, - .rpc_client = NFS_CLIENT(dreq->inode), - .rpc_message = &msg, - .callback_ops = &nfs_commit_direct_ops, - .callback_data = data, - .workqueue = nfsiod_workqueue, - .flags = RPC_TASK_ASYNC, - }; - - data->inode = dreq->inode; - data->cred = msg.rpc_cred; - - data->args.fh = NFS_FH(data->inode); - data->args.offset = 0; - data->args.count = 0; - data->res.fattr = &data->fattr; - data->res.verf = &data->verf; - nfs_fattr_init(&data->fattr); - - NFS_PROTO(data->inode)->commit_setup(data, &msg); - - /* Note: task.tk_ops->rpc_release will free dreq->commit_data */ - dreq->commit_data = NULL; - - dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid); - - task = rpc_run_task(&task_setup_data); - if (!IS_ERR(task)) - rpc_put_task(task); + int res; + struct nfs_commit_info cinfo; + LIST_HEAD(mds_list); + + nfs_init_cinfo_from_dreq(&cinfo, dreq); + nfs_scan_commit(dreq->inode, &mds_list, &cinfo); + res = nfs_generic_commit_list(dreq->inode, &mds_list, 0, &cinfo); + if (res < 0) /* res == -ENOMEM */ + nfs_direct_write_reschedule(dreq); } -static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode) +static void nfs_direct_write_schedule_work(struct work_struct *work) { + struct nfs_direct_req *dreq = container_of(work, struct nfs_direct_req, work); int flags = dreq->flags; dreq->flags = 0; @@ -634,90 +590,29 @@ static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode nfs_direct_write_reschedule(dreq); break; default: - if (dreq->commit_data != NULL) - nfs_commit_free(dreq->commit_data); - nfs_direct_free_writedata(dreq); - nfs_zap_mapping(inode, inode->i_mapping); + nfs_zap_mapping(dreq->inode, dreq->inode->i_mapping); nfs_direct_complete(dreq); } } -static void nfs_alloc_commit_data(struct nfs_direct_req *dreq) +static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode) { - dreq->commit_data = nfs_commitdata_alloc(); - if (dreq->commit_data != NULL) - dreq->commit_data->dreq = dreq; + schedule_work(&dreq->work); /* Calls nfs_direct_write_schedule_work */ } + #else -static inline void nfs_alloc_commit_data(struct nfs_direct_req *dreq) -{ - dreq->commit_data = NULL; -} static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode) { - nfs_direct_free_writedata(dreq); nfs_zap_mapping(inode, inode->i_mapping); nfs_direct_complete(dreq); } #endif -static void nfs_direct_write_result(struct rpc_task *task, void *calldata) -{ - struct nfs_write_data *data = calldata; - - nfs_writeback_done(task, data); -} - /* * NB: Return the value of the first error return code. Subsequent * errors after the first one are ignored. */ -static void nfs_direct_write_release(void *calldata) -{ - struct nfs_write_data *data = calldata; - struct nfs_pgio_header *hdr = data->header; - struct nfs_direct_req *dreq = (struct nfs_direct_req *) hdr->req; - int status = data->task.tk_status; - - spin_lock(&dreq->lock); - - if (unlikely(status < 0)) { - /* An error has occurred, so we should not commit */ - dreq->flags = 0; - dreq->error = status; - } - if (unlikely(dreq->error != 0)) - goto out_unlock; - - dreq->count += data->res.count; - - if (data->res.verf->committed != NFS_FILE_SYNC) { - switch (dreq->flags) { - case 0: - memcpy(&dreq->verf, &data->verf, sizeof(dreq->verf)); - dreq->flags = NFS_ODIRECT_DO_COMMIT; - break; - case NFS_ODIRECT_DO_COMMIT: - if (memcmp(&dreq->verf, &data->verf, sizeof(dreq->verf))) { - dprintk("NFS: %5u write verify failed\n", data->task.tk_pid); - dreq->flags = NFS_ODIRECT_RESCHED_WRITES; - } - } - } -out_unlock: - spin_unlock(&dreq->lock); - - if (put_dreq(dreq)) - nfs_direct_write_complete(dreq, hdr->inode); -} - -static const struct rpc_call_ops nfs_write_direct_ops = { - .rpc_call_prepare = nfs_write_prepare, - .rpc_call_done = nfs_direct_write_result, - .rpc_release = nfs_direct_write_release, -}; - /* * For each wsize'd chunk of the user's buffer, dispatch an NFS WRITE * operation. If nfs_writedata_alloc() or get_user_pages() fails, @@ -725,143 +620,181 @@ static const struct rpc_call_ops nfs_write_direct_ops = { * handled automatically by nfs_direct_write_result(). Otherwise, if * no requests have been sent, just return an error. */ -static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq, +static ssize_t nfs_direct_write_schedule_segment(struct nfs_pageio_descriptor *desc, const struct iovec *iov, - loff_t pos, int sync) + loff_t pos) { + struct nfs_direct_req *dreq = desc->pg_dreq; struct nfs_open_context *ctx = dreq->ctx; struct inode *inode = ctx->dentry->d_inode; unsigned long user_addr = (unsigned long)iov->iov_base; size_t count = iov->iov_len; - struct rpc_task *task; - struct rpc_message msg = { - .rpc_cred = ctx->cred, - }; - struct rpc_task_setup task_setup_data = { - .rpc_client = NFS_CLIENT(inode), - .rpc_message = &msg, - .callback_ops = &nfs_write_direct_ops, - .workqueue = nfsiod_workqueue, - .flags = RPC_TASK_ASYNC, - }; size_t wsize = NFS_SERVER(inode)->wsize; unsigned int pgbase; int result; ssize_t started = 0; + struct page **pagevec = NULL; + unsigned int npages; do { - struct nfs_write_header *whdr; - struct nfs_write_data *data; - struct nfs_page_array *pages; size_t bytes; + int i; pgbase = user_addr & ~PAGE_MASK; - bytes = min(wsize,count); + bytes = min(max(wsize, PAGE_SIZE), count); result = -ENOMEM; - whdr = nfs_writehdr_alloc(); - if (unlikely(!whdr)) + npages = nfs_page_array_len(pgbase, bytes); + if (!pagevec) + pagevec = kmalloc(npages * sizeof(struct page *), GFP_KERNEL); + if (!pagevec) break; - data = nfs_writedata_alloc(&whdr->header, nfs_page_array_len(pgbase, bytes)); - if (!data) { - nfs_writehdr_free(&whdr->header); - break; - } - data->header = &whdr->header; - atomic_inc(&data->header->refcnt); - pages = &data->pages; - down_read(¤t->mm->mmap_sem); result = get_user_pages(current, current->mm, user_addr, - pages->npages, 0, 0, pages->pagevec, NULL); + npages, 0, 0, pagevec, NULL); up_read(¤t->mm->mmap_sem); - if (result < 0) { - nfs_direct_writehdr_release(whdr); + if (result < 0) break; - } - if ((unsigned)result < pages->npages) { + + if ((unsigned)result < npages) { bytes = result * PAGE_SIZE; if (bytes <= pgbase) { - nfs_direct_release_pages(pages->pagevec, result); - nfs_direct_writehdr_release(whdr); + nfs_direct_release_pages(pagevec, result); break; } bytes -= pgbase; - pages->npages = result; + npages = result; } - get_dreq(dreq); - - list_move_tail(&whdr->header.pages, &dreq->rewrite_list); - - whdr->header.req = (struct nfs_page *) dreq; - whdr->header.inode = inode; - whdr->header.cred = msg.rpc_cred; - data->args.fh = NFS_FH(inode); - data->args.context = ctx; - data->args.lock_context = dreq->l_ctx; - data->args.offset = pos; - data->args.pgbase = pgbase; - data->args.pages = pages->pagevec; - data->args.count = bytes; - data->args.stable = sync; - data->res.fattr = &data->fattr; - data->res.count = bytes; - data->res.verf = &data->verf; - nfs_fattr_init(&data->fattr); - - task_setup_data.task = &data->task; - task_setup_data.callback_data = data; - msg.rpc_argp = &data->args; - msg.rpc_resp = &data->res; - NFS_PROTO(inode)->write_setup(data, &msg); - - task = rpc_run_task(&task_setup_data); - if (IS_ERR(task)) - break; + for (i = 0; i < npages; i++) { + struct nfs_page *req; + unsigned int req_len = min(bytes, PAGE_SIZE - pgbase); - dprintk("NFS: %5u initiated direct write call " - "(req %s/%Ld, %zu bytes @ offset %Lu)\n", - task->tk_pid, - inode->i_sb->s_id, - (long long)NFS_FILEID(inode), - bytes, - (unsigned long long)data->args.offset); - rpc_put_task(task); - - started += bytes; - user_addr += bytes; - pos += bytes; - - /* FIXME: Remove this useless math from the final patch */ - pgbase += bytes; - pgbase &= ~PAGE_MASK; - BUG_ON(pgbase != (user_addr & ~PAGE_MASK)); - - count -= bytes; + req = nfs_create_request(dreq->ctx, dreq->inode, + pagevec[i], + pgbase, req_len); + if (IS_ERR(req)) { + nfs_direct_release_pages(pagevec + i, + npages - i); + result = PTR_ERR(req); + break; + } + nfs_lock_request(req); + req->wb_index = pos >> PAGE_SHIFT; + req->wb_offset = pos & ~PAGE_MASK; + if (!nfs_pageio_add_request(desc, req)) { + result = desc->pg_error; + nfs_unlock_request(req); + nfs_release_request(req); + nfs_direct_release_pages(pagevec + i, + npages - i); + } + pgbase = 0; + bytes -= req_len; + started += req_len; + user_addr += req_len; + pos += req_len; + count -= req_len; + } } while (count != 0); + kfree(pagevec); + if (started) return started; return result < 0 ? (ssize_t) result : -EFAULT; } +static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) +{ + struct nfs_direct_req *dreq = hdr->dreq; + struct nfs_commit_info cinfo; + int bit = -1; + struct nfs_page *req = nfs_list_entry(hdr->pages.next); + + if (test_bit(NFS_IOHDR_REDO, &hdr->flags)) + goto out_put; + + nfs_init_cinfo_from_dreq(&cinfo, dreq); + + spin_lock(&dreq->lock); + + if (test_bit(NFS_IOHDR_ERROR, &hdr->flags)) { + dreq->flags = 0; + dreq->error = hdr->error; + } + if (dreq->error != 0) + bit = NFS_IOHDR_ERROR; + else { + dreq->count += hdr->good_bytes; + if (test_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags)) { + dreq->flags = NFS_ODIRECT_RESCHED_WRITES; + bit = NFS_IOHDR_NEED_RESCHED; + } else if (test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) { + if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES) + bit = NFS_IOHDR_NEED_RESCHED; + else if (dreq->flags == 0) { + memcpy(&dreq->verf, &req->wb_verf, + sizeof(dreq->verf)); + bit = NFS_IOHDR_NEED_COMMIT; + dreq->flags = NFS_ODIRECT_DO_COMMIT; + } else if (dreq->flags == NFS_ODIRECT_DO_COMMIT) { + if (memcmp(&dreq->verf, &req->wb_verf, sizeof(dreq->verf))) { + dreq->flags = NFS_ODIRECT_RESCHED_WRITES; + bit = NFS_IOHDR_NEED_RESCHED; + } else + bit = NFS_IOHDR_NEED_COMMIT; + } + } + } + spin_unlock(&dreq->lock); + + while (!list_empty(&hdr->pages)) { + req = nfs_list_entry(hdr->pages.next); + nfs_list_remove_request(req); + switch (bit) { + case NFS_IOHDR_NEED_RESCHED: + case NFS_IOHDR_NEED_COMMIT: + nfs_mark_request_commit(req, hdr->lseg, &cinfo); + break; + default: + page_cache_release(req->wb_page); + nfs_release_request(req); + } + nfs_unlock_request(req); + } + +out_put: + if (put_dreq(dreq)) + nfs_direct_write_complete(dreq, hdr->inode); + hdr->release(hdr); +} + +static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops = { + .error_cleanup = nfs_sync_pgio_error, + .init_hdr = nfs_direct_pgio_init, + .completion = nfs_direct_write_completion, +}; + static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, const struct iovec *iov, unsigned long nr_segs, - loff_t pos, int sync) + loff_t pos) { + struct nfs_pageio_descriptor desc; ssize_t result = 0; size_t requested_bytes = 0; unsigned long seg; + nfs_pageio_init_write(&desc, dreq->inode, FLUSH_COND_STABLE, + &nfs_direct_write_completion_ops); + desc.pg_dreq = dreq; get_dreq(dreq); for (seg = 0; seg < nr_segs; seg++) { const struct iovec *vec = &iov[seg]; - result = nfs_direct_write_schedule_segment(dreq, vec, - pos, sync); + result = nfs_direct_write_schedule_segment(&desc, vec, pos); if (result < 0) break; requested_bytes += result; @@ -869,6 +802,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, break; pos += vec->iov_len; } + nfs_pageio_complete(&desc); /* * If no bytes were started, return the error, and let the @@ -891,16 +825,10 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov, ssize_t result = -ENOMEM; struct inode *inode = iocb->ki_filp->f_mapping->host; struct nfs_direct_req *dreq; - size_t wsize = NFS_SERVER(inode)->wsize; - int sync = NFS_UNSTABLE; dreq = nfs_direct_req_alloc(); if (!dreq) goto out; - nfs_alloc_commit_data(dreq); - - if (dreq->commit_data == NULL || count <= wsize) - sync = NFS_FILE_SYNC; dreq->inode = inode; dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp)); @@ -910,7 +838,7 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov, if (!is_sync_kiocb(iocb)) dreq->iocb = iocb; - result = nfs_direct_write_schedule_iovec(dreq, iov, nr_segs, pos, sync); + result = nfs_direct_write_schedule_iovec(dreq, iov, nr_segs, pos); if (!result) result = nfs_direct_wait(dreq); out_release: @@ -1030,10 +958,15 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov, task_io_account_write(count); retval = nfs_direct_write(iocb, iov, nr_segs, pos, count); + if (retval > 0) { + struct inode *inode = mapping->host; - if (retval > 0) iocb->ki_pos = pos + retval; - + spin_lock(&inode->i_lock); + if (i_size_read(inode) < iocb->ki_pos) + i_size_write(inode, iocb->ki_pos); + spin_unlock(&inode->i_lock); + } out: return retval; } diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 137f5cd..d68810f 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -320,10 +320,11 @@ extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio); extern void nfs_readdata_release(struct nfs_read_data *rdata); /* write.c */ +extern void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, + struct inode *inode, int ioflags, + const struct nfs_pgio_completion_ops *compl_ops); extern struct nfs_write_header *nfs_writehdr_alloc(void); extern void nfs_writehdr_free(struct nfs_pgio_header *hdr); -extern struct nfs_write_data *nfs_writedata_alloc(struct nfs_pgio_header *hdr, - unsigned int pagecount); extern int nfs_generic_flush(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr); extern void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio, @@ -346,6 +347,15 @@ extern void nfs_init_commit(struct nfs_commit_data *data, struct list_head *head, struct pnfs_layout_segment *lseg, struct nfs_commit_info *cinfo); +int nfs_scan_commit_list(struct list_head *src, struct list_head *dst, + struct nfs_commit_info *cinfo, int max); +int nfs_scan_commit(struct inode *inode, struct list_head *dst, + struct nfs_commit_info *cinfo); +void nfs_mark_request_commit(struct nfs_page *req, + struct pnfs_layout_segment *lseg, + struct nfs_commit_info *cinfo); +int nfs_generic_commit_list(struct inode *inode, struct list_head *head, + int how, struct nfs_commit_info *cinfo); void nfs_retry_commit(struct list_head *page_list, struct pnfs_layout_segment *lseg, struct nfs_commit_info *cinfo); @@ -365,6 +375,10 @@ extern int nfs_migrate_page(struct address_space *, #define nfs_migrate_page NULL #endif +/* direct.c */ +void nfs_init_cinfo_from_dreq(struct nfs_commit_info *cinfo, + struct nfs_direct_req *dreq); + /* nfs4proc.c */ extern void __nfs4_read_done_cb(struct nfs_read_data *); extern void nfs4_reset_read(struct rpc_task *task, struct nfs_read_data *data); diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 26d1da4..806a55f 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -996,12 +996,9 @@ static int filelayout_initiate_commit(struct nfs_commit_data *data, int how) } static int -filelayout_scan_ds_commit_list(struct pnfs_commit_bucket *bucket, - struct nfs_commit_info *cinfo, - int max) +transfer_commit_list(struct list_head *src, struct list_head *dst, + struct nfs_commit_info *cinfo, int max) { - struct list_head *src = &bucket->written; - struct list_head *dst = &bucket->committing; struct nfs_page *req, *tmp; int ret = 0; @@ -1014,9 +1011,22 @@ filelayout_scan_ds_commit_list(struct pnfs_commit_bucket *bucket, clear_bit(PG_COMMIT_TO_DS, &req->wb_flags); nfs_list_add_request(req, dst); ret++; - if (ret == max) + if ((ret == max) && !cinfo->dreq) break; } + return ret; +} + +static int +filelayout_scan_ds_commit_list(struct pnfs_commit_bucket *bucket, + struct nfs_commit_info *cinfo, + int max) +{ + struct list_head *src = &bucket->written; + struct list_head *dst = &bucket->committing; + int ret; + + ret = transfer_commit_list(src, dst, cinfo, max); if (ret) { cinfo->ds->nwritten -= ret; cinfo->ds->ncommitting += ret; @@ -1046,6 +1056,27 @@ static int filelayout_scan_commit_lists(struct nfs_commit_info *cinfo, return rv; } +/* Pull everything off the committing lists and dump into @dst */ +static void filelayout_recover_commit_reqs(struct list_head *dst, + struct nfs_commit_info *cinfo) +{ + struct pnfs_commit_bucket *b; + int i; + + /* NOTE cinfo->lock is NOT held, relying on fact that this is + * only called on single thread per dreq. + * Can't take the lock because need to do put_lseg + */ + for (i = 0, b = cinfo->ds->buckets; i < cinfo->ds->nbuckets; i++, b++) { + if (transfer_commit_list(&b->written, dst, cinfo, 0)) { + BUG_ON(!list_empty(&b->written)); + put_lseg(b->wlseg); + b->wlseg = NULL; + } + } + cinfo->ds->nwritten = 0; +} + static unsigned int alloc_ds_commits(struct nfs_commit_info *cinfo, struct list_head *list) { @@ -1170,6 +1201,7 @@ static struct pnfs_layoutdriver_type filelayout_type = { .mark_request_commit = filelayout_mark_request_commit, .clear_request_commit = filelayout_clear_request_commit, .scan_commit_lists = filelayout_scan_commit_lists, + .recover_commit_reqs = filelayout_recover_commit_reqs, .commit_pagelist = filelayout_commit_pagelist, .read_pagelist = filelayout_read_pagelist, .write_pagelist = filelayout_write_pagelist, diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 4cd8760..8efbee7 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -102,6 +102,8 @@ struct pnfs_layoutdriver_type { struct nfs_commit_info *cinfo); int (*scan_commit_lists) (struct nfs_commit_info *cinfo, int max); + void (*recover_commit_reqs) (struct list_head *list, + struct nfs_commit_info *cinfo); int (*commit_pagelist)(struct inode *inode, struct list_head *mds_pages, int how, @@ -323,6 +325,15 @@ pnfs_scan_commit_lists(struct inode *inode, struct nfs_commit_info *cinfo, return NFS_SERVER(inode)->pnfs_curr_ld->scan_commit_lists(cinfo, max); } +static inline void +pnfs_recover_commit_reqs(struct inode *inode, struct list_head *list, + struct nfs_commit_info *cinfo) +{ + if (cinfo->ds == NULL || cinfo->ds->nwritten == 0) + return; + NFS_SERVER(inode)->pnfs_curr_ld->recover_commit_reqs(list, cinfo); +} + /* Should the pNFS client commit and return the layout upon a setattr */ static inline bool pnfs_ld_layoutret_on_setattr(struct inode *inode) @@ -456,6 +467,12 @@ pnfs_scan_commit_lists(struct inode *inode, struct nfs_commit_info *cinfo, return 0; } +static inline void +pnfs_recover_commit_reqs(struct inode *inode, struct list_head *list, + struct nfs_commit_info *cinfo) +{ +} + static inline int pnfs_layoutcommit_inode(struct inode *inode, bool sync) { return 0; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 56db9e7..fec214b 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -39,9 +39,6 @@ /* * Local function declarations */ -static void nfs_pageio_init_write(struct nfs_pageio_descriptor *desc, - struct inode *inode, int ioflags, - const struct nfs_pgio_completion_ops *compl_ops); static void nfs_redirty_request(struct nfs_page *req); static const struct rpc_call_ops nfs_write_common_ops; static const struct rpc_call_ops nfs_commit_ops; @@ -87,8 +84,8 @@ struct nfs_write_header *nfs_writehdr_alloc(void) return p; } -struct nfs_write_data *nfs_writedata_alloc(struct nfs_pgio_header *hdr, - unsigned int pagecount) +static struct nfs_write_data *nfs_writedata_alloc(struct nfs_pgio_header *hdr, + unsigned int pagecount) { struct nfs_write_data *data, *prealloc; @@ -518,14 +515,17 @@ void nfs_init_cinfo(struct nfs_commit_info *cinfo, struct inode *inode, struct nfs_direct_req *dreq) { - nfs_init_cinfo_from_inode(cinfo, inode); + if (dreq) + nfs_init_cinfo_from_dreq(cinfo, dreq); + else + nfs_init_cinfo_from_inode(cinfo, inode); } EXPORT_SYMBOL_GPL(nfs_init_cinfo); /* * Add a request to the inode's commit list. */ -static void +void nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg, struct nfs_commit_info *cinfo) { @@ -567,7 +567,7 @@ int nfs_write_need_commit(struct nfs_write_data *data) } #else -static void +void nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg, struct nfs_commit_info *cinfo) { @@ -632,7 +632,7 @@ nfs_reqs_to_commit(struct nfs_commit_info *cinfo) } /* cinfo->lock held by caller */ -static int +int nfs_scan_commit_list(struct list_head *src, struct list_head *dst, struct nfs_commit_info *cinfo, int max) { @@ -647,7 +647,7 @@ nfs_scan_commit_list(struct list_head *src, struct list_head *dst, nfs_request_remove_commit_list(req, cinfo); nfs_list_add_request(req, dst); ret++; - if (ret == max) + if ((ret == max) && !cinfo->dreq) break; } return ret; @@ -662,7 +662,7 @@ nfs_scan_commit_list(struct list_head *src, struct list_head *dst, * Moves requests from the inode's 'commit' request list. * The requests are *not* checked to ensure that they form a contiguous set. */ -static int +int nfs_scan_commit(struct inode *inode, struct list_head *dst, struct nfs_commit_info *cinfo) { @@ -686,8 +686,8 @@ static unsigned long nfs_reqs_to_commit(struct nfs_commit_info *cinfo) return 0; } -static inline int nfs_scan_commit(struct inode *inode, struct list_head *dst, - struct nfs_commit_info *cinfo) +int nfs_scan_commit(struct inode *inode, struct list_head *dst, + struct nfs_commit_info *cinfo) { return 0; } @@ -1202,9 +1202,9 @@ void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio) } EXPORT_SYMBOL_GPL(nfs_pageio_reset_write_mds); -static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, - struct inode *inode, int ioflags, - const struct nfs_pgio_completion_ops *compl_ops) +void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, + struct inode *inode, int ioflags, + const struct nfs_pgio_completion_ops *compl_ops) { if (!pnfs_pageio_init_write(pgio, inode, ioflags, compl_ops)) nfs_pageio_init_write_mds(pgio, inode, ioflags, compl_ops); @@ -1568,8 +1568,8 @@ static const struct nfs_commit_completion_ops nfs_commit_completion_ops = { .error_cleanup = nfs_commit_clear_lock, }; -static int nfs_generic_commit_list(struct inode *inode, struct list_head *head, - int how, struct nfs_commit_info *cinfo) +int nfs_generic_commit_list(struct inode *inode, struct list_head *head, + int how, struct nfs_commit_info *cinfo) { int status; -- cgit v0.10.2 From df0117481cd94dbb8970f4be9d05b0568fa09ab1 Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Tue, 24 Apr 2012 14:50:34 -0400 Subject: NFS: Prevent garbage cinfo->ds from leaking out This is a bugfix that applies on top of the previous directio patches, that fixes a bug introduced in "NFS: create struct nfs_commit_info". Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 806a55f..80a63f6 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -1184,7 +1184,12 @@ filelayout_free_layout_hdr(struct pnfs_layout_hdr *lo) static struct pnfs_ds_commit_info * filelayout_get_ds_info(struct inode *inode) { - return &FILELAYOUT_FROM_HDR(NFS_I(inode)->layout)->commit_info; + struct pnfs_layout_hdr *layout = NFS_I(inode)->layout; + + if (layout == NULL) + return NULL; + else + return &FILELAYOUT_FROM_HDR(layout)->commit_info; } static struct pnfs_layoutdriver_type filelayout_type = { -- cgit v0.10.2 From 2671bfc3beb44e70636bd0208274426db57f73b5 Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Fri, 27 Apr 2012 13:27:44 -0400 Subject: NFS: Remove secinfo knowledge out of the generic client And also remove the unneeded rpc_op. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index d68810f..d699444 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -286,9 +286,6 @@ extern void nfs_sb_deactive(struct super_block *sb); extern char *nfs_path(char **p, struct dentry *dentry, char *buffer, ssize_t buflen); extern struct vfsmount *nfs_d_automount(struct path *path); -#ifdef CONFIG_NFS_V4 -rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *); -#endif /* getroot.c */ extern struct dentry *nfs_get_root(struct super_block *, struct nfs_fh *, diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index d51868e..2a9591b 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -119,35 +119,6 @@ Elong: } #ifdef CONFIG_NFS_V4 -rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *flavors) -{ - struct gss_api_mech *mech; - struct xdr_netobj oid; - int i; - rpc_authflavor_t pseudoflavor = RPC_AUTH_UNIX; - - for (i = 0; i < flavors->num_flavors; i++) { - struct nfs4_secinfo_flavor *flavor; - flavor = &flavors->flavors[i]; - - if (flavor->flavor == RPC_AUTH_NULL || flavor->flavor == RPC_AUTH_UNIX) { - pseudoflavor = flavor->flavor; - break; - } else if (flavor->flavor == RPC_AUTH_GSS) { - oid.len = flavor->gss.sec_oid4.len; - oid.data = flavor->gss.sec_oid4.data; - mech = gss_mech_get_by_OID(&oid); - if (!mech) - continue; - pseudoflavor = gss_svc_to_pseudoflavor(mech, flavor->gss.service); - gss_mech_put(mech); - break; - } - } - - return pseudoflavor; -} - static struct rpc_clnt *nfs_lookup_mountpoint(struct inode *dir, struct qstr *name, struct nfs_fh *fh, diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 8d75021..53a487e 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -206,6 +206,7 @@ extern const struct dentry_operations nfs4_dentry_operations; extern const struct inode_operations nfs4_dir_inode_operations; /* nfs4namespace.c */ +rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *); struct rpc_clnt *nfs4_create_sec_client(struct rpc_clnt *, struct inode *, struct qstr *); /* nfs4proc.c */ diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c index a7f3ded..a69ee39 100644 --- a/fs/nfs/nfs4namespace.c +++ b/fs/nfs/nfs4namespace.c @@ -132,6 +132,35 @@ static size_t nfs_parse_server_name(char *string, size_t len, return ret; } +rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *flavors) +{ + struct gss_api_mech *mech; + struct xdr_netobj oid; + int i; + rpc_authflavor_t pseudoflavor = RPC_AUTH_UNIX; + + for (i = 0; i < flavors->num_flavors; i++) { + struct nfs4_secinfo_flavor *flavor; + flavor = &flavors->flavors[i]; + + if (flavor->flavor == RPC_AUTH_NULL || flavor->flavor == RPC_AUTH_UNIX) { + pseudoflavor = flavor->flavor; + break; + } else if (flavor->flavor == RPC_AUTH_GSS) { + oid.len = flavor->gss.sec_oid4.len; + oid.data = flavor->gss.sec_oid4.data; + mech = gss_mech_get_by_OID(&oid); + if (!mech) + continue; + pseudoflavor = gss_svc_to_pseudoflavor(mech, flavor->gss.service); + gss_mech_put(mech); + break; + } + } + + return pseudoflavor; +} + static rpc_authflavor_t nfs4_negotiate_security(struct inode *inode, struct qstr *name) { struct page *page; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 87af80d..fa661b9 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -6610,7 +6610,6 @@ const struct nfs_rpc_ops nfs_v4_clientops = { .close_context = nfs4_close_context, .open_context = nfs4_atomic_open, .init_client = nfs4_init_client, - .secinfo = nfs4_proc_secinfo, }; static const struct xattr_handler nfs4_xattr_nfs4_acl_handler = { diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 5f563bd..eb1f143 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1397,7 +1397,6 @@ struct nfs_rpc_ops { struct iattr *iattr); int (*init_client) (struct nfs_client *, const struct rpc_timeout *, const char *, rpc_authflavor_t, int); - int (*secinfo)(struct inode *, const struct qstr *, struct nfs4_secinfo_flavors *); }; /* -- cgit v0.10.2 From 281cad46b34db4dbb1d1e603f7b9cfe25d1ae7c9 Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Fri, 27 Apr 2012 13:27:45 -0400 Subject: NFS: Create a submount rpc_op This simplifies the code for v2 and v3 and gives v4 a chance to decide on referrals without needing to modify the generic client. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index d699444..0fd1efa 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -185,17 +185,6 @@ static inline void nfs_fs_proc_exit(void) } #endif -/* nfs4namespace.c */ -#ifdef CONFIG_NFS_V4 -extern struct vfsmount *nfs_do_refmount(struct rpc_clnt *client, struct dentry *dentry); -#else -static inline -struct vfsmount *nfs_do_refmount(struct rpc_clnt *client, struct dentry *dentry) -{ - return ERR_PTR(-ENOENT); -} -#endif - /* callback_xdr.c */ extern struct svc_version nfs4_callback_version1; extern struct svc_version nfs4_callback_version4; @@ -286,6 +275,10 @@ extern void nfs_sb_deactive(struct super_block *sb); extern char *nfs_path(char **p, struct dentry *dentry, char *buffer, ssize_t buflen); extern struct vfsmount *nfs_d_automount(struct path *path); +struct vfsmount *nfs_submount(struct nfs_server *, struct dentry *, + struct nfs_fh *, struct nfs_fattr *); +struct vfsmount *nfs_do_submount(struct dentry *, struct nfs_fh *, + struct nfs_fattr *, rpc_authflavor_t); /* getroot.c */ extern struct dentry *nfs_get_root(struct super_block *, struct nfs_fh *, diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index 2a9591b..e36fd8a 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -26,11 +26,6 @@ static LIST_HEAD(nfs_automount_list); static DECLARE_DELAYED_WORK(nfs_automount_task, nfs_expire_automounts); int nfs_mountpoint_expiry_timeout = 500 * HZ; -static struct vfsmount *nfs_do_submount(struct dentry *dentry, - struct nfs_fh *fh, - struct nfs_fattr *fattr, - rpc_authflavor_t authflavor); - /* * nfs_path - reconstruct the path given an arbitrary dentry * @base - used to return pointer to the end of devname part of path @@ -118,35 +113,6 @@ Elong: return ERR_PTR(-ENAMETOOLONG); } -#ifdef CONFIG_NFS_V4 -static struct rpc_clnt *nfs_lookup_mountpoint(struct inode *dir, - struct qstr *name, - struct nfs_fh *fh, - struct nfs_fattr *fattr) -{ - int err; - - if (NFS_PROTO(dir)->version == 4) - return nfs4_proc_lookup_mountpoint(dir, name, fh, fattr); - - err = NFS_PROTO(dir)->lookup(NFS_SERVER(dir)->client, dir, name, fh, fattr); - if (err) - return ERR_PTR(err); - return rpc_clone_client(NFS_SERVER(dir)->client); -} -#else /* CONFIG_NFS_V4 */ -static inline struct rpc_clnt *nfs_lookup_mountpoint(struct inode *dir, - struct qstr *name, - struct nfs_fh *fh, - struct nfs_fattr *fattr) -{ - int err = NFS_PROTO(dir)->lookup(NFS_SERVER(dir)->client, dir, name, fh, fattr); - if (err) - return ERR_PTR(err); - return rpc_clone_client(NFS_SERVER(dir)->client); -} -#endif /* CONFIG_NFS_V4 */ - /* * nfs_d_automount - Handle crossing a mountpoint on the server * @path - The mountpoint @@ -162,10 +128,9 @@ static inline struct rpc_clnt *nfs_lookup_mountpoint(struct inode *dir, struct vfsmount *nfs_d_automount(struct path *path) { struct vfsmount *mnt; - struct dentry *parent; + struct nfs_server *server = NFS_SERVER(path->dentry->d_inode); struct nfs_fh *fh = NULL; struct nfs_fattr *fattr = NULL; - struct rpc_clnt *client; dprintk("--> nfs_d_automount()\n"); @@ -181,21 +146,7 @@ struct vfsmount *nfs_d_automount(struct path *path) dprintk("%s: enter\n", __func__); - /* Look it up again to get its attributes */ - parent = dget_parent(path->dentry); - client = nfs_lookup_mountpoint(parent->d_inode, &path->dentry->d_name, fh, fattr); - dput(parent); - if (IS_ERR(client)) { - mnt = ERR_CAST(client); - goto out; - } - - if (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) - mnt = nfs_do_refmount(client, path->dentry); - else - mnt = nfs_do_submount(path->dentry, fh, fattr, client->cl_auth->au_flavor); - rpc_shutdown_client(client); - + mnt = server->nfs_client->rpc_ops->submount(server, path->dentry, fh, fattr); if (IS_ERR(mnt)) goto out; @@ -268,10 +219,8 @@ static struct vfsmount *nfs_do_clone_mount(struct nfs_server *server, * @authflavor - security flavor to use when performing the mount * */ -static struct vfsmount *nfs_do_submount(struct dentry *dentry, - struct nfs_fh *fh, - struct nfs_fattr *fattr, - rpc_authflavor_t authflavor) +struct vfsmount *nfs_do_submount(struct dentry *dentry, struct nfs_fh *fh, + struct nfs_fattr *fattr, rpc_authflavor_t authflavor) { struct nfs_clone_mount mountdata = { .sb = dentry->d_sb, @@ -304,3 +253,19 @@ out: dprintk("<-- nfs_do_submount() = %p\n", mnt); return mnt; } + +struct vfsmount *nfs_submount(struct nfs_server *server, struct dentry *dentry, + struct nfs_fh *fh, struct nfs_fattr *fattr) +{ + int err; + struct dentry *parent = dget_parent(dentry); + + /* Look it up again to get its attributes */ + err = server->nfs_client->rpc_ops->lookup(server->client, parent->d_inode, + &dentry->d_name, fh, fattr); + dput(parent); + if (err != 0) + return ERR_PTR(err); + + return nfs_do_submount(dentry, fh, fattr, server->client->cl_auth->au_flavor); +} diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 56dcefc..c23214d 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -885,6 +885,7 @@ const struct nfs_rpc_ops nfs_v3_clientops = { .file_inode_ops = &nfs3_file_inode_operations, .file_ops = &nfs_file_operations, .getroot = nfs3_proc_get_root, + .submount = nfs_submount, .getattr = nfs3_proc_getattr, .setattr = nfs3_proc_setattr, .lookup = nfs3_proc_lookup, diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 53a487e..97365b0 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -208,6 +208,8 @@ extern const struct inode_operations nfs4_dir_inode_operations; /* nfs4namespace.c */ rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *); struct rpc_clnt *nfs4_create_sec_client(struct rpc_clnt *, struct inode *, struct qstr *); +struct vfsmount *nfs4_submount(struct nfs_server *, struct dentry *, + struct nfs_fh *, struct nfs_fattr *); /* nfs4proc.c */ extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *, struct nfs4_setclientid_res *); diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c index a69ee39..80fc0fe 100644 --- a/fs/nfs/nfs4namespace.c +++ b/fs/nfs/nfs4namespace.c @@ -329,7 +329,7 @@ out: * @dentry - dentry of referral * */ -struct vfsmount *nfs_do_refmount(struct rpc_clnt *client, struct dentry *dentry) +static struct vfsmount *nfs_do_refmount(struct rpc_clnt *client, struct dentry *dentry) { struct vfsmount *mnt = ERR_PTR(-ENOMEM); struct dentry *parent; @@ -370,3 +370,25 @@ out: dprintk("%s: done\n", __func__); return mnt; } + +struct vfsmount *nfs4_submount(struct nfs_server *server, struct dentry *dentry, + struct nfs_fh *fh, struct nfs_fattr *fattr) +{ + struct dentry *parent = dget_parent(dentry); + struct rpc_clnt *client; + struct vfsmount *mnt; + + /* Look it up again to get its attributes and sec flavor */ + client = nfs4_proc_lookup_mountpoint(parent->d_inode, &dentry->d_name, fh, fattr); + dput(parent); + if (IS_ERR(client)) + return ERR_CAST(client); + + if (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) + mnt = nfs_do_refmount(client, dentry); + else + mnt = nfs_do_submount(dentry, fh, fattr, client->cl_auth->au_flavor); + + rpc_shutdown_client(client); + return mnt; +} diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index fa661b9..2091af2 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -6571,6 +6571,7 @@ const struct nfs_rpc_ops nfs_v4_clientops = { .file_inode_ops = &nfs4_file_inode_operations, .file_ops = &nfs4_file_operations, .getroot = nfs4_proc_get_root, + .submount = nfs4_submount, .getattr = nfs4_proc_getattr, .setattr = nfs4_proc_setattr, .lookup = nfs4_proc_lookup, diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index 22ee705..76b3229 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -742,6 +742,7 @@ const struct nfs_rpc_ops nfs_v2_clientops = { .file_inode_ops = &nfs_file_inode_operations, .file_ops = &nfs_file_operations, .getroot = nfs_proc_get_root, + .submount = nfs_submount, .getattr = nfs_proc_getattr, .setattr = nfs_proc_setattr, .lookup = nfs_proc_lookup, diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index eb1f143..4dada94 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1341,6 +1341,8 @@ struct nfs_rpc_ops { int (*getroot) (struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); + struct vfsmount *(*submount) (struct nfs_server *, struct dentry *, + struct nfs_fh *, struct nfs_fattr *); int (*getattr) (struct nfs_server *, struct nfs_fh *, struct nfs_fattr *); int (*setattr) (struct dentry *, struct nfs_fattr *, -- cgit v0.10.2 From 80a16b21a81eb639f0b726549f4c46c0e9aff92e Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Fri, 27 Apr 2012 13:27:46 -0400 Subject: NFS: Remove extra rpc_clnt argument to proc_lookup Now that I'm doing secinfo automatically in the v4 code this extra argument isn't needed. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 8789210..82b42e2 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1143,7 +1143,7 @@ static int nfs_lookup_revalidate(struct dentry *dentry, struct nameidata *nd) if (fhandle == NULL || fattr == NULL) goto out_error; - error = NFS_PROTO(dir)->lookup(NFS_SERVER(dir)->client, dir, &dentry->d_name, fhandle, fattr); + error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr); if (error) goto out_bad; if (nfs_compare_fh(NFS_FH(inode), fhandle)) @@ -1299,7 +1299,7 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru parent = dentry->d_parent; /* Protect against concurrent sillydeletes */ nfs_block_sillyrename(parent); - error = NFS_PROTO(dir)->lookup(NFS_SERVER(dir)->client, dir, &dentry->d_name, fhandle, fattr); + error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr); if (error == -ENOENT) goto no_entry; if (error < 0) { @@ -1646,7 +1646,7 @@ int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle, if (dentry->d_inode) goto out; if (fhandle->size == 0) { - error = NFS_PROTO(dir)->lookup(NFS_SERVER(dir)->client, dir, &dentry->d_name, fhandle, fattr); + error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr); if (error) goto out_error; } diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index e36fd8a..08b9c93 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -261,8 +261,7 @@ struct vfsmount *nfs_submount(struct nfs_server *server, struct dentry *dentry, struct dentry *parent = dget_parent(dentry); /* Look it up again to get its attributes */ - err = server->nfs_client->rpc_ops->lookup(server->client, parent->d_inode, - &dentry->d_name, fh, fattr); + err = server->nfs_client->rpc_ops->lookup(parent->d_inode, &dentry->d_name, fh, fattr); dput(parent); if (err != 0) return ERR_PTR(err); diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index c23214d..48bcad2 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -142,7 +142,7 @@ nfs3_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, } static int -nfs3_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, struct qstr *name, +nfs3_proc_lookup(struct inode *dir, struct qstr *name, struct nfs_fh *fhandle, struct nfs_fattr *fattr) { struct nfs3_diropargs arg = { diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 2091af2..1780391 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2578,7 +2578,7 @@ out: return err; } -static int nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, struct qstr *name, +static int nfs4_proc_lookup(struct inode *dir, struct qstr *name, struct nfs_fh *fhandle, struct nfs_fattr *fattr) { int status; diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index 76b3229..fea9163 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -178,7 +178,7 @@ nfs_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, } static int -nfs_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, struct qstr *name, +nfs_proc_lookup(struct inode *dir, struct qstr *name, struct nfs_fh *fhandle, struct nfs_fattr *fattr) { struct nfs_diropargs arg = { diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 4dada94..c940d46 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1347,7 +1347,7 @@ struct nfs_rpc_ops { struct nfs_fattr *); int (*setattr) (struct dentry *, struct nfs_fattr *, struct iattr *); - int (*lookup) (struct rpc_clnt *clnt, struct inode *, struct qstr *, + int (*lookup) (struct inode *, struct qstr *, struct nfs_fh *, struct nfs_fattr *); int (*access) (struct inode *, struct nfs_access_entry *); int (*readlink)(struct inode *, struct page *, unsigned int, -- cgit v0.10.2 From c6b556a53970c262571b2cf60661a33ddeb1a651 Mon Sep 17 00:00:00 2001 From: Stephen Warren Date: Wed, 11 Apr 2012 10:37:02 -0600 Subject: ARM: tegra: update defconfig New options enabled: * RUNTIME_PM * ISL29028 (light and proximity) * INPUT_MPU3050 (gyro) * BATTERY_SBS * EM3027 RTC * INPUT_MISC (dependency) * POWER_SUPPLY (dependency) The IIO option seems to have been moved recently too. Signed-off-by: Stephen Warren diff --git a/arch/arm/configs/tegra_defconfig b/arch/arm/configs/tegra_defconfig index 351d670..7a90abb 100644 --- a/arch/arm/configs/tegra_defconfig +++ b/arch/arm/configs/tegra_defconfig @@ -45,6 +45,7 @@ CONFIG_CPU_FREQ=y CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y CONFIG_CPU_IDLE=y CONFIG_VFP=y +CONFIG_PM_RUNTIME=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y @@ -91,6 +92,8 @@ CONFIG_USB_NET_SMSC75XX=y CONFIG_USB_NET_SMSC95XX=y # CONFIG_WLAN is not set CONFIG_INPUT_EVDEV=y +CONFIG_INPUT_MISC=y +CONFIG_INPUT_MPU3050=y # CONFIG_VT is not set # CONFIG_LEGACY_PTYS is not set # CONFIG_DEVKMEM is not set @@ -103,6 +106,8 @@ CONFIG_I2C=y CONFIG_I2C_TEGRA=y CONFIG_SPI=y CONFIG_SPI_TEGRA=y +CONFIG_POWER_SUPPLY=y +CONFIG_BATTERY_SBS=y CONFIG_SENSORS_LM90=y CONFIG_MFD_TPS6586X=y CONFIG_REGULATOR=y @@ -133,16 +138,18 @@ CONFIG_MMC_SDHCI=y CONFIG_MMC_SDHCI_PLTFM=y CONFIG_MMC_SDHCI_TEGRA=y CONFIG_RTC_CLASS=y +CONFIG_RTC_DRV_EM3027=y CONFIG_RTC_DRV_TEGRA=y CONFIG_STAGING=y -CONFIG_IIO=y CONFIG_SENSORS_ISL29018=y +CONFIG_SENSORS_ISL29028=y CONFIG_SENSORS_AK8975=y CONFIG_MFD_NVEC=y CONFIG_KEYBOARD_NVEC=y CONFIG_SERIO_NVEC_PS2=y CONFIG_TEGRA_IOMMU_GART=y CONFIG_TEGRA_IOMMU_SMMU=y +CONFIG_IIO=y CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y -- cgit v0.10.2 From 9b5415b536cc3193e9608a7fced1372df8ce4dcf Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 27 Apr 2012 14:31:47 -0400 Subject: NFS: Fix a use-before-initialised warning in fs/nfs/write.c and fs/nfs/pnfs.c If the allocation of nfs_write_header fails, the list of nfs_pages that needs to be cleaned up is still on desc->pg_list... Reported-by: Bryan Schumaker Signed-off-by: Trond Myklebust Acked-by: Fred Isaman diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 4da05e4..39cbac5 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1308,7 +1308,7 @@ pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) whdr = nfs_writehdr_alloc(); if (!whdr) { - desc->pg_completion_ops->error_cleanup(&hdr->pages); + desc->pg_completion_ops->error_cleanup(&desc->pg_list); put_lseg(desc->pg_lseg); desc->pg_lseg = NULL; return -ENOMEM; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index fec214b..3636191 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1164,7 +1164,7 @@ static int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) whdr = nfs_writehdr_alloc(); if (!whdr) { - desc->pg_completion_ops->error_cleanup(&hdr->pages); + desc->pg_completion_ops->error_cleanup(&desc->pg_list); return -ENOMEM; } hdr = &whdr->header; -- cgit v0.10.2 From 414fa985f91bdf61fe2baf8111c0ddbdb94808ea Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Tue, 24 Apr 2012 16:40:15 +0200 Subject: KVM: Improve readability of KVM API doc This helps to identify sections and it also fixes the numbering from 4.54 to 4.61. Signed-off-by: Jan Kiszka Signed-off-by: Marcelo Tosatti diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index a155221..b48f927 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -2,6 +2,7 @@ The Definitive KVM (Kernel-based Virtual Machine) API Documentation =================================================================== 1. General description +---------------------- The kvm API is a set of ioctls that are issued to control various aspects of a virtual machine. The ioctls belong to three classes @@ -23,7 +24,9 @@ of a virtual machine. The ioctls belong to three classes Only run vcpu ioctls from the same thread that was used to create the vcpu. + 2. File descriptors +------------------- The kvm API is centered around file descriptors. An initial open("/dev/kvm") obtains a handle to the kvm subsystem; this handle @@ -41,7 +44,9 @@ not cause harm to the host, their actual behavior is not guaranteed by the API. The only supported use is one virtual machine per process, and one vcpu per thread. + 3. Extensions +------------- As of Linux 2.6.22, the KVM ABI has been stabilized: no backward incompatible change are allowed. However, there is an extension @@ -53,7 +58,9 @@ Instead, kvm defines extension identifiers and a facility to query whether a particular extension identifier is available. If it is, a set of ioctls is available for application use. + 4. API description +------------------ This section describes ioctls that can be used to control kvm guests. For each ioctl, the following information is provided along with a @@ -75,6 +82,7 @@ description: Returns: the return value. General error numbers (EBADF, ENOMEM, EINVAL) are not detailed, but errors with specific meanings are. + 4.1 KVM_GET_API_VERSION Capability: basic @@ -90,6 +98,7 @@ supported. Applications should refuse to run if KVM_GET_API_VERSION returns a value other than 12. If this check passes, all ioctls described as 'basic' will be available. + 4.2 KVM_CREATE_VM Capability: basic @@ -109,6 +118,7 @@ In order to create user controlled virtual machines on S390, check KVM_CAP_S390_UCONTROL and use the flag KVM_VM_S390_UCONTROL as privileged user (CAP_SYS_ADMIN). + 4.3 KVM_GET_MSR_INDEX_LIST Capability: basic @@ -135,6 +145,7 @@ Note: if kvm indicates supports MCE (KVM_CAP_MCE), then the MCE bank MSRs are not returned in the MSR list, as different vcpus can have a different number of banks, as set via the KVM_X86_SETUP_MCE ioctl. + 4.4 KVM_CHECK_EXTENSION Capability: basic @@ -149,6 +160,7 @@ receives an integer that describes the extension availability. Generally 0 means no and 1 means yes, but some extensions may report additional information in the integer return value. + 4.5 KVM_GET_VCPU_MMAP_SIZE Capability: basic @@ -161,6 +173,7 @@ The KVM_RUN ioctl (cf.) communicates with userspace via a shared memory region. This ioctl returns the size of that region. See the KVM_RUN documentation for details. + 4.6 KVM_SET_MEMORY_REGION Capability: basic @@ -171,6 +184,7 @@ Returns: 0 on success, -1 on error This ioctl is obsolete and has been removed. + 4.7 KVM_CREATE_VCPU Capability: basic @@ -223,6 +237,7 @@ machines, the resulting vcpu fd can be memory mapped at page offset KVM_S390_SIE_PAGE_OFFSET in order to obtain a memory map of the virtual cpu's hardware control block. + 4.8 KVM_GET_DIRTY_LOG (vm ioctl) Capability: basic @@ -246,6 +261,7 @@ since the last call to this ioctl. Bit 0 is the first page in the memory slot. Ensure the entire structure is cleared to avoid padding issues. + 4.9 KVM_SET_MEMORY_ALIAS Capability: basic @@ -256,6 +272,7 @@ Returns: 0 (success), -1 (error) This ioctl is obsolete and has been removed. + 4.10 KVM_RUN Capability: basic @@ -272,6 +289,7 @@ obtained by mmap()ing the vcpu fd at offset 0, with the size given by KVM_GET_VCPU_MMAP_SIZE. The parameter block is formatted as a 'struct kvm_run' (see below). + 4.11 KVM_GET_REGS Capability: basic @@ -292,6 +310,7 @@ struct kvm_regs { __u64 rip, rflags; }; + 4.12 KVM_SET_REGS Capability: basic @@ -304,6 +323,7 @@ Writes the general purpose registers into the vcpu. See KVM_GET_REGS for the data structure. + 4.13 KVM_GET_SREGS Capability: basic @@ -331,6 +351,7 @@ interrupt_bitmap is a bitmap of pending external interrupts. At most one bit may be set. This interrupt has been acknowledged by the APIC but not yet injected into the cpu core. + 4.14 KVM_SET_SREGS Capability: basic @@ -342,6 +363,7 @@ Returns: 0 on success, -1 on error Writes special registers into the vcpu. See KVM_GET_SREGS for the data structures. + 4.15 KVM_TRANSLATE Capability: basic @@ -365,6 +387,7 @@ struct kvm_translation { __u8 pad[5]; }; + 4.16 KVM_INTERRUPT Capability: basic @@ -413,6 +436,7 @@ c) KVM_INTERRUPT_SET_LEVEL Note that any value for 'irq' other than the ones stated above is invalid and incurs unexpected behavior. + 4.17 KVM_DEBUG_GUEST Capability: basic @@ -423,6 +447,7 @@ Returns: -1 on error Support for this has been removed. Use KVM_SET_GUEST_DEBUG instead. + 4.18 KVM_GET_MSRS Capability: basic @@ -451,6 +476,7 @@ Application code should set the 'nmsrs' member (which indicates the size of the entries array) and the 'index' member of each array entry. kvm will fill in the 'data' member. + 4.19 KVM_SET_MSRS Capability: basic @@ -466,6 +492,7 @@ Application code should set the 'nmsrs' member (which indicates the size of the entries array), and the 'index' and 'data' members of each array entry. + 4.20 KVM_SET_CPUID Capability: basic @@ -494,6 +521,7 @@ struct kvm_cpuid { struct kvm_cpuid_entry entries[0]; }; + 4.21 KVM_SET_SIGNAL_MASK Capability: basic @@ -516,6 +544,7 @@ struct kvm_signal_mask { __u8 sigset[0]; }; + 4.22 KVM_GET_FPU Capability: basic @@ -541,6 +570,7 @@ struct kvm_fpu { __u32 pad2; }; + 4.23 KVM_SET_FPU Capability: basic @@ -566,6 +596,7 @@ struct kvm_fpu { __u32 pad2; }; + 4.24 KVM_CREATE_IRQCHIP Capability: KVM_CAP_IRQCHIP @@ -579,6 +610,7 @@ ioapic, a virtual PIC (two PICs, nested), and sets up future vcpus to have a local APIC. IRQ routing for GSIs 0-15 is set to both PIC and IOAPIC; GSI 16-23 only go to the IOAPIC. On ia64, a IOSAPIC is created. + 4.25 KVM_IRQ_LINE Capability: KVM_CAP_IRQCHIP @@ -600,6 +632,7 @@ struct kvm_irq_level { __u32 level; /* 0 or 1 */ }; + 4.26 KVM_GET_IRQCHIP Capability: KVM_CAP_IRQCHIP @@ -621,6 +654,7 @@ struct kvm_irqchip { } chip; }; + 4.27 KVM_SET_IRQCHIP Capability: KVM_CAP_IRQCHIP @@ -642,6 +676,7 @@ struct kvm_irqchip { } chip; }; + 4.28 KVM_XEN_HVM_CONFIG Capability: KVM_CAP_XEN_HVM @@ -666,6 +701,7 @@ struct kvm_xen_hvm_config { __u8 pad2[30]; }; + 4.29 KVM_GET_CLOCK Capability: KVM_CAP_ADJUST_CLOCK @@ -684,6 +720,7 @@ struct kvm_clock_data { __u32 pad[9]; }; + 4.30 KVM_SET_CLOCK Capability: KVM_CAP_ADJUST_CLOCK @@ -702,6 +739,7 @@ struct kvm_clock_data { __u32 pad[9]; }; + 4.31 KVM_GET_VCPU_EVENTS Capability: KVM_CAP_VCPU_EVENTS @@ -741,6 +779,7 @@ struct kvm_vcpu_events { KVM_VCPUEVENT_VALID_SHADOW may be set in the flags field to signal that interrupt.shadow contains a valid state. Otherwise, this field is undefined. + 4.32 KVM_SET_VCPU_EVENTS Capability: KVM_CAP_VCPU_EVENTS @@ -767,6 +806,7 @@ If KVM_CAP_INTR_SHADOW is available, KVM_VCPUEVENT_VALID_SHADOW can be set in the flags field to signal that interrupt.shadow contains a valid state and shall be written into the VCPU. + 4.33 KVM_GET_DEBUGREGS Capability: KVM_CAP_DEBUGREGS @@ -785,6 +825,7 @@ struct kvm_debugregs { __u64 reserved[9]; }; + 4.34 KVM_SET_DEBUGREGS Capability: KVM_CAP_DEBUGREGS @@ -798,6 +839,7 @@ Writes debug registers into the vcpu. See KVM_GET_DEBUGREGS for the data structure. The flags field is unused yet and must be cleared on entry. + 4.35 KVM_SET_USER_MEMORY_REGION Capability: KVM_CAP_USER_MEM @@ -844,6 +886,7 @@ It is recommended to use this API instead of the KVM_SET_MEMORY_REGION ioctl. The KVM_SET_MEMORY_REGION does not allow fine grained control over memory allocation and is deprecated. + 4.36 KVM_SET_TSS_ADDR Capability: KVM_CAP_SET_TSS_ADDR @@ -862,6 +905,7 @@ This ioctl is required on Intel-based hosts. This is needed on Intel hardware because of a quirk in the virtualization implementation (see the internals documentation when it pops into existence). + 4.37 KVM_ENABLE_CAP Capability: KVM_CAP_ENABLE_CAP @@ -897,6 +941,7 @@ function properly, this is the place to put them. __u8 pad[64]; }; + 4.38 KVM_GET_MP_STATE Capability: KVM_CAP_MP_STATE @@ -927,6 +972,7 @@ Possible values are: This ioctl is only useful after KVM_CREATE_IRQCHIP. Without an in-kernel irqchip, the multiprocessing state must be maintained by userspace. + 4.39 KVM_SET_MP_STATE Capability: KVM_CAP_MP_STATE @@ -941,6 +987,7 @@ arguments. This ioctl is only useful after KVM_CREATE_IRQCHIP. Without an in-kernel irqchip, the multiprocessing state must be maintained by userspace. + 4.40 KVM_SET_IDENTITY_MAP_ADDR Capability: KVM_CAP_SET_IDENTITY_MAP_ADDR @@ -959,6 +1006,7 @@ This ioctl is required on Intel-based hosts. This is needed on Intel hardware because of a quirk in the virtualization implementation (see the internals documentation when it pops into existence). + 4.41 KVM_SET_BOOT_CPU_ID Capability: KVM_CAP_SET_BOOT_CPU_ID @@ -971,6 +1019,7 @@ Define which vcpu is the Bootstrap Processor (BSP). Values are the same as the vcpu id in KVM_CREATE_VCPU. If this ioctl is not called, the default is vcpu 0. + 4.42 KVM_GET_XSAVE Capability: KVM_CAP_XSAVE @@ -985,6 +1034,7 @@ struct kvm_xsave { This ioctl would copy current vcpu's xsave struct to the userspace. + 4.43 KVM_SET_XSAVE Capability: KVM_CAP_XSAVE @@ -999,6 +1049,7 @@ struct kvm_xsave { This ioctl would copy userspace's xsave struct to the kernel. + 4.44 KVM_GET_XCRS Capability: KVM_CAP_XCRS @@ -1022,6 +1073,7 @@ struct kvm_xcrs { This ioctl would copy current vcpu's xcrs to the userspace. + 4.45 KVM_SET_XCRS Capability: KVM_CAP_XCRS @@ -1045,6 +1097,7 @@ struct kvm_xcrs { This ioctl would set vcpu's xcr to the value userspace specified. + 4.46 KVM_GET_SUPPORTED_CPUID Capability: KVM_CAP_EXT_CPUID @@ -1119,6 +1172,7 @@ support. Instead it is reported via if that returns true and you use KVM_CREATE_IRQCHIP, or if you emulate the feature in userspace, then you can enable the feature for KVM_SET_CPUID2. + 4.47 KVM_PPC_GET_PVINFO Capability: KVM_CAP_PPC_GET_PVINFO @@ -1142,6 +1196,7 @@ of 4 instructions that make up a hypercall. If any additional field gets added to this structure later on, a bit for that additional piece of information will be set in the flags bitmap. + 4.48 KVM_ASSIGN_PCI_DEVICE Capability: KVM_CAP_DEVICE_ASSIGNMENT @@ -1185,6 +1240,7 @@ Only PCI header type 0 devices with PCI BAR resources are supported by device assignment. The user requesting this ioctl must have read/write access to the PCI sysfs resource files associated with the device. + 4.49 KVM_DEASSIGN_PCI_DEVICE Capability: KVM_CAP_DEVICE_DEASSIGNMENT @@ -1198,6 +1254,7 @@ Ends PCI device assignment, releasing all associated resources. See KVM_CAP_DEVICE_ASSIGNMENT for the data structure. Only assigned_dev_id is used in kvm_assigned_pci_dev to identify the device. + 4.50 KVM_ASSIGN_DEV_IRQ Capability: KVM_CAP_ASSIGN_DEV_IRQ @@ -1231,6 +1288,7 @@ The following flags are defined: It is not valid to specify multiple types per host or guest IRQ. However, the IRQ type of host and guest can differ or can even be null. + 4.51 KVM_DEASSIGN_DEV_IRQ Capability: KVM_CAP_ASSIGN_DEV_IRQ @@ -1245,6 +1303,7 @@ See KVM_ASSIGN_DEV_IRQ for the data structure. The target device is specified by assigned_dev_id, flags must correspond to the IRQ type specified on KVM_ASSIGN_DEV_IRQ. Partial deassignment of host or guest IRQ is allowed. + 4.52 KVM_SET_GSI_ROUTING Capability: KVM_CAP_IRQ_ROUTING @@ -1293,6 +1352,7 @@ struct kvm_irq_routing_msi { __u32 pad; }; + 4.53 KVM_ASSIGN_SET_MSIX_NR Capability: KVM_CAP_DEVICE_MSIX @@ -1314,6 +1374,7 @@ struct kvm_assigned_msix_nr { #define KVM_MAX_MSIX_PER_DEV 256 + 4.54 KVM_ASSIGN_SET_MSIX_ENTRY Capability: KVM_CAP_DEVICE_MSIX @@ -1332,7 +1393,8 @@ struct kvm_assigned_msix_entry { __u16 padding[3]; }; -4.54 KVM_SET_TSC_KHZ + +4.55 KVM_SET_TSC_KHZ Capability: KVM_CAP_TSC_CONTROL Architectures: x86 @@ -1343,7 +1405,8 @@ Returns: 0 on success, -1 on error Specifies the tsc frequency for the virtual machine. The unit of the frequency is KHz. -4.55 KVM_GET_TSC_KHZ + +4.56 KVM_GET_TSC_KHZ Capability: KVM_CAP_GET_TSC_KHZ Architectures: x86 @@ -1355,7 +1418,8 @@ Returns the tsc frequency of the guest. The unit of the return value is KHz. If the host has unstable tsc this ioctl returns -EIO instead as an error. -4.56 KVM_GET_LAPIC + +4.57 KVM_GET_LAPIC Capability: KVM_CAP_IRQCHIP Architectures: x86 @@ -1371,7 +1435,8 @@ struct kvm_lapic_state { Reads the Local APIC registers and copies them into the input argument. The data format and layout are the same as documented in the architecture manual. -4.57 KVM_SET_LAPIC + +4.58 KVM_SET_LAPIC Capability: KVM_CAP_IRQCHIP Architectures: x86 @@ -1387,7 +1452,8 @@ struct kvm_lapic_state { Copies the input argument into the the Local APIC registers. The data format and layout are the same as documented in the architecture manual. -4.58 KVM_IOEVENTFD + +4.59 KVM_IOEVENTFD Capability: KVM_CAP_IOEVENTFD Architectures: all @@ -1417,7 +1483,8 @@ The following flags are defined: If datamatch flag is set, the event will be signaled only if the written value to the registered address is equal to datamatch in struct kvm_ioeventfd. -4.59 KVM_DIRTY_TLB + +4.60 KVM_DIRTY_TLB Capability: KVM_CAP_SW_TLB Architectures: ppc @@ -1449,7 +1516,8 @@ The "num_dirty" field is a performance hint for KVM to determine whether it should skip processing the bitmap and just invalidate everything. It must be set to the number of set bits in the bitmap. -4.60 KVM_ASSIGN_SET_INTX_MASK + +4.61 KVM_ASSIGN_SET_INTX_MASK Capability: KVM_CAP_PCI_2_3 Architectures: x86 @@ -1482,6 +1550,7 @@ See KVM_ASSIGN_DEV_IRQ for the data structure. The target device is specified by assigned_dev_id. In the flags field, only KVM_DEV_ASSIGN_MASK_INTX is evaluated. + 4.62 KVM_CREATE_SPAPR_TCE Capability: KVM_CAP_SPAPR_TCE @@ -1517,6 +1586,7 @@ the entries written by kernel-handled H_PUT_TCE calls, and also lets userspace update the TCE table directly which is useful in some circumstances. + 4.63 KVM_ALLOCATE_RMA Capability: KVM_CAP_PPC_RMA @@ -1549,6 +1619,7 @@ is supported; 2 if the processor requires all virtual machines to have an RMA, or 1 if the processor can use an RMA but doesn't require it, because it supports the Virtual RMA (VRMA) facility. + 4.64 KVM_NMI Capability: KVM_CAP_USER_NMI @@ -1574,6 +1645,7 @@ following algorithm: Some guests configure the LINT1 NMI input to cause a panic, aiding in debugging. + 4.65 KVM_S390_UCAS_MAP Capability: KVM_CAP_S390_UCONTROL @@ -1593,6 +1665,7 @@ This ioctl maps the memory at "user_addr" with the length "length" to the vcpu's address space starting at "vcpu_addr". All parameters need to be alligned by 1 megabyte. + 4.66 KVM_S390_UCAS_UNMAP Capability: KVM_CAP_S390_UCONTROL @@ -1612,6 +1685,7 @@ This ioctl unmaps the memory in the vcpu's address space starting at "vcpu_addr" with the length "length". The field "user_addr" is ignored. All parameters need to be alligned by 1 megabyte. + 4.67 KVM_S390_VCPU_FAULT Capability: KVM_CAP_S390_UCONTROL @@ -1628,6 +1702,7 @@ table upfront. This is useful to handle validity intercepts for user controlled virtual machines to fault in the virtual cpu's lowcore pages prior to calling the KVM_RUN ioctl. + 4.68 KVM_SET_ONE_REG Capability: KVM_CAP_ONE_REG @@ -1653,6 +1728,7 @@ registers, find a list below: | | PPC | KVM_REG_PPC_HIOR | 64 + 4.69 KVM_GET_ONE_REG Capability: KVM_CAP_ONE_REG @@ -1669,6 +1745,7 @@ at the memory location pointed to by "addr". The list of registers accessible using this interface is identical to the list in 4.64. + 4.70 KVM_KVMCLOCK_CTRL Capability: KVM_CAP_KVMCLOCK_CTRL @@ -1689,6 +1766,7 @@ where the guest will clear the flag: when the soft lockup watchdog timer resets itself or when a soft lockup is detected. This ioctl can be called any time after pausing the vcpu, but before it is resumed. + 4.71 KVM_SIGNAL_MSI Capability: KVM_CAP_SIGNAL_MSI @@ -1710,7 +1788,9 @@ struct kvm_msi { No flags are defined so far. The corresponding field must be 0. + 5. The kvm_run structure +------------------------ Application code obtains a pointer to the kvm_run structure by mmap()ing a vcpu fd. From that point, application code can control @@ -1951,7 +2031,9 @@ and usually define the validity of a groups of registers. (e.g. one bit }; + 6. Capabilities that can be enabled +----------------------------------- There are certain capabilities that change the behavior of the virtual CPU when enabled. To enable them, please see section 4.37. Below you can find a list of @@ -1967,6 +2049,7 @@ The following information is provided along with the description: Returns: the return value. General error numbers (EBADF, ENOMEM, EINVAL) are not detailed, but errors with specific meanings are. + 6.1 KVM_CAP_PPC_OSI Architectures: ppc @@ -1980,6 +2063,7 @@ between the guest and the host. When this capability is enabled, KVM_EXIT_OSI can occur. + 6.2 KVM_CAP_PPC_PAPR Architectures: ppc @@ -1998,6 +2082,7 @@ HTAB invisible to the guest. When this capability is enabled, KVM_EXIT_PAPR_HCALL can occur. + 6.3 KVM_CAP_SW_TLB Architectures: ppc -- cgit v0.10.2 From 0589ff6c11d8128cf053c3ddc75b0f6d8b71c62b Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Tue, 24 Apr 2012 16:40:16 +0200 Subject: KVM: x86: Document in-kernel PIT API Add descriptions for KVM_CREATE_PIT2 and KVM_GET/SET_PIT2. Signed-off-by: Jan Kiszka Signed-off-by: Marcelo Tosatti diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index b48f927..a7cb93c 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -1789,6 +1789,69 @@ struct kvm_msi { No flags are defined so far. The corresponding field must be 0. +4.71 KVM_CREATE_PIT2 + +Capability: KVM_CAP_PIT2 +Architectures: x86 +Type: vm ioctl +Parameters: struct kvm_pit_config (in) +Returns: 0 on success, -1 on error + +Creates an in-kernel device model for the i8254 PIT. This call is only valid +after enabling in-kernel irqchip support via KVM_CREATE_IRQCHIP. The following +parameters have to be passed: + +struct kvm_pit_config { + __u32 flags; + __u32 pad[15]; +}; + +Valid flags are: + +#define KVM_PIT_SPEAKER_DUMMY 1 /* emulate speaker port stub */ + +This IOCTL replaces the obsolete KVM_CREATE_PIT. + + +4.72 KVM_GET_PIT2 + +Capability: KVM_CAP_PIT_STATE2 +Architectures: x86 +Type: vm ioctl +Parameters: struct kvm_pit_state2 (out) +Returns: 0 on success, -1 on error + +Retrieves the state of the in-kernel PIT model. Only valid after +KVM_CREATE_PIT2. The state is returned in the following structure: + +struct kvm_pit_state2 { + struct kvm_pit_channel_state channels[3]; + __u32 flags; + __u32 reserved[9]; +}; + +Valid flags are: + +/* disable PIT in HPET legacy mode */ +#define KVM_PIT_FLAGS_HPET_LEGACY 0x00000001 + +This IOCTL replaces the obsolete KVM_GET_PIT. + + +4.73 KVM_SET_PIT2 + +Capability: KVM_CAP_PIT_STATE2 +Architectures: x86 +Type: vm ioctl +Parameters: struct kvm_pit_state2 (in) +Returns: 0 on success, -1 on error + +Sets the state of the in-kernel PIT model. Only valid after KVM_CREATE_PIT2. +See KVM_GET_PIT2 for details on struct kvm_pit_state2. + +This IOCTL replaces the obsolete KVM_SET_PIT. + + 5. The kvm_run structure ------------------------ -- cgit v0.10.2 From b6ddf05ff68d81a7c1736717faf492b70e9bf4f9 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Tue, 24 Apr 2012 16:40:17 +0200 Subject: KVM: x86: Run PIT work in own kthread We can't run PIT IRQ injection work in the interrupt context of the host timer. This would allow the user to influence the handler complexity by asking for a broadcast to a large number of VCPUs. Therefore, this work was pushed into workqueue context in 9d244caf2e. However, this prevents prioritizing the PIT injection over other task as workqueues share kernel threads. This replaces the workqueue with a kthread worker and gives that thread a name in the format "kvm-pit/". That allows to identify and adjust the kthread priority according to the VM process parameters. Signed-off-by: Jan Kiszka Signed-off-by: Marcelo Tosatti diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index a7cb93c..eb62761 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -1810,6 +1810,14 @@ Valid flags are: #define KVM_PIT_SPEAKER_DUMMY 1 /* emulate speaker port stub */ +PIT timer interrupts may use a per-VM kernel thread for injection. If it +exists, this thread will have a name of the following pattern: + +kvm-pit/ + +When running a guest with elevated priorities, the scheduling parameters of +this thread may have to be adjusted accordingly. + This IOCTL replaces the obsolete KVM_CREATE_PIT. diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index d68f99d..adba28f 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c @@ -34,7 +34,6 @@ #include #include -#include #include "irq.h" #include "i8254.h" @@ -249,7 +248,7 @@ static void kvm_pit_ack_irq(struct kvm_irq_ack_notifier *kian) /* in this case, we had multiple outstanding pit interrupts * that we needed to inject. Reinject */ - queue_work(ps->pit->wq, &ps->pit->expired); + queue_kthread_work(&ps->pit->worker, &ps->pit->expired); ps->irq_ack = 1; spin_unlock(&ps->inject_lock); } @@ -270,7 +269,7 @@ void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu) static void destroy_pit_timer(struct kvm_pit *pit) { hrtimer_cancel(&pit->pit_state.pit_timer.timer); - cancel_work_sync(&pit->expired); + flush_kthread_work(&pit->expired); } static bool kpit_is_periodic(struct kvm_timer *ktimer) @@ -284,7 +283,7 @@ static struct kvm_timer_ops kpit_ops = { .is_periodic = kpit_is_periodic, }; -static void pit_do_work(struct work_struct *work) +static void pit_do_work(struct kthread_work *work) { struct kvm_pit *pit = container_of(work, struct kvm_pit, expired); struct kvm *kvm = pit->kvm; @@ -328,7 +327,7 @@ static enum hrtimer_restart pit_timer_fn(struct hrtimer *data) if (ktimer->reinject || !atomic_read(&ktimer->pending)) { atomic_inc(&ktimer->pending); - queue_work(pt->wq, &pt->expired); + queue_kthread_work(&pt->worker, &pt->expired); } if (ktimer->t_ops->is_periodic(ktimer)) { @@ -353,7 +352,7 @@ static void create_pit_timer(struct kvm *kvm, u32 val, int is_period) /* TODO The new value only affected after the retriggered */ hrtimer_cancel(&pt->timer); - cancel_work_sync(&ps->pit->expired); + flush_kthread_work(&ps->pit->expired); pt->period = interval; ps->is_periodic = is_period; @@ -669,6 +668,8 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags) { struct kvm_pit *pit; struct kvm_kpit_state *pit_state; + struct pid *pid; + pid_t pid_nr; int ret; pit = kzalloc(sizeof(struct kvm_pit), GFP_KERNEL); @@ -685,14 +686,20 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags) mutex_lock(&pit->pit_state.lock); spin_lock_init(&pit->pit_state.inject_lock); - pit->wq = create_singlethread_workqueue("kvm-pit-wq"); - if (!pit->wq) { + pid = get_pid(task_tgid(current)); + pid_nr = pid_vnr(pid); + put_pid(pid); + + init_kthread_worker(&pit->worker); + pit->worker_task = kthread_run(kthread_worker_fn, &pit->worker, + "kvm-pit/%d", pid_nr); + if (IS_ERR(pit->worker_task)) { mutex_unlock(&pit->pit_state.lock); kvm_free_irq_source_id(kvm, pit->irq_source_id); kfree(pit); return NULL; } - INIT_WORK(&pit->expired, pit_do_work); + init_kthread_work(&pit->expired, pit_do_work); kvm->arch.vpit = pit; pit->kvm = kvm; @@ -736,7 +743,7 @@ fail: kvm_unregister_irq_mask_notifier(kvm, 0, &pit->mask_notifier); kvm_unregister_irq_ack_notifier(kvm, &pit_state->irq_ack_notifier); kvm_free_irq_source_id(kvm, pit->irq_source_id); - destroy_workqueue(pit->wq); + kthread_stop(pit->worker_task); kfree(pit); return NULL; } @@ -756,10 +763,10 @@ void kvm_free_pit(struct kvm *kvm) mutex_lock(&kvm->arch.vpit->pit_state.lock); timer = &kvm->arch.vpit->pit_state.pit_timer.timer; hrtimer_cancel(timer); - cancel_work_sync(&kvm->arch.vpit->expired); + flush_kthread_work(&kvm->arch.vpit->expired); + kthread_stop(kvm->arch.vpit->worker_task); kvm_free_irq_source_id(kvm, kvm->arch.vpit->irq_source_id); mutex_unlock(&kvm->arch.vpit->pit_state.lock); - destroy_workqueue(kvm->arch.vpit->wq); kfree(kvm->arch.vpit); } } diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h index 51a9742..fdf4042 100644 --- a/arch/x86/kvm/i8254.h +++ b/arch/x86/kvm/i8254.h @@ -1,6 +1,8 @@ #ifndef __I8254_H #define __I8254_H +#include + #include "iodev.h" struct kvm_kpit_channel_state { @@ -39,8 +41,9 @@ struct kvm_pit { struct kvm_kpit_state pit_state; int irq_source_id; struct kvm_irq_mask_notifier mask_notifier; - struct workqueue_struct *wq; - struct work_struct expired; + struct kthread_worker worker; + struct task_struct *worker_task; + struct kthread_work expired; }; #define KVM_PIT_BASE_ADDRESS 0x40 -- cgit v0.10.2 From b58fee2189b17719c846f65ffe9483c2814e6605 Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Mon, 30 Apr 2012 13:06:53 -0400 Subject: NFS: pnfs_pageio_init_read() and init_write() need an extra argument This is only when CONFIG_NFS_V4_1 isn't enabled. Signed-off-by: Bryan Schumaker Acked-by: Fred Isaman Signed-off-by: Trond Myklebust diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 8efbee7..f20054b 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -424,12 +424,14 @@ static inline void unset_pnfs_layoutdriver(struct nfs_server *s) { } -static inline bool pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode) +static inline bool pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode, + const struct nfs_pgio_completion_ops *compl_ops) { return false; } -static inline bool pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags) +static inline bool pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags, + const struct nfs_pgio_completion_ops *compl_ops) { return false; } -- cgit v0.10.2 From 24fc9211f4d48c04882a52e42b21c9b4abc4f9bf Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Mon, 30 Apr 2012 13:27:11 -0400 Subject: NFS: Define nfs_direct_write_schedule_work() when v3 and v4 are disabled v2 doesn't have commits, so this function can be a no-op. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index d44de2f..e83545c 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -601,6 +601,9 @@ static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode } #else +static void nfs_direct_write_schedule_work(struct work_struct *work) +{ +} static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode) { -- cgit v0.10.2 From 68cd6fa4f3be07ba648e22617dfa16a40d671d19 Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Mon, 30 Apr 2012 14:30:22 -0400 Subject: NFS: Define dummy nfs_init_cinfo() and nfs_init_cinfo_from_inode() These are needed when v3 and v4 are not enabled. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 3636191..2f80aa5 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -567,6 +567,17 @@ int nfs_write_need_commit(struct nfs_write_data *data) } #else +static void nfs_init_cinfo_from_inode(struct nfs_commit_info *cinfo, + struct inode *inode) +{ +} + +void nfs_init_cinfo(struct nfs_commit_info *cinfo, + struct inode *inode, + struct nfs_direct_req *dreq) +{ +} + void nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg, struct nfs_commit_info *cinfo) -- cgit v0.10.2 From 71e8cc00c63e8518ce86b4079355fc9086a4869d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 30 Apr 2012 13:22:54 -0400 Subject: NFS: Ensure that we break out of read/write_schedule_segment on error Currently we do break out of the for() loop, but we also need to break out of the enclosing do {} while()... Signed-off-by: Trond Myklebust Cc: Fred Isaman diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index e83545c..f30d5c2 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -396,7 +396,7 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *de pos += req_len; count -= req_len; } - } while (count != 0); + } while (count != 0 && result >= 0); kfree(pagevec); @@ -692,6 +692,7 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_pageio_descriptor *d nfs_release_request(req); nfs_direct_release_pages(pagevec + i, npages - i); + break; } pgbase = 0; bytes -= req_len; @@ -700,7 +701,7 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_pageio_descriptor *d pos += req_len; count -= req_len; } - } while (count != 0); + } while (count != 0 && result >= 0); kfree(pagevec); -- cgit v0.10.2 From 3e9e0ca3f19e911ce13c2e6c9858fcb41a37496c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 30 Apr 2012 13:40:06 -0400 Subject: NFS: O_DIRECT pgio_completion_ops error_cleanup must unlock the request Signed-off-by: Trond Myklebust Cc: Fred Isaman diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index f30d5c2..af02bde 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -292,7 +292,7 @@ out_put: hdr->release(hdr); } -static void nfs_sync_pgio_error(struct list_head *head) +static void nfs_read_sync_pgio_error(struct list_head *head) { struct nfs_page *req; @@ -309,7 +309,7 @@ static void nfs_direct_pgio_init(struct nfs_pgio_header *hdr) } static const struct nfs_pgio_completion_ops nfs_direct_read_completion_ops = { - .error_cleanup = nfs_sync_pgio_error, + .error_cleanup = nfs_read_sync_pgio_error, .init_hdr = nfs_direct_pgio_init, .completion = nfs_direct_read_completion, }; @@ -775,8 +775,20 @@ out_put: hdr->release(hdr); } +static void nfs_write_sync_pgio_error(struct list_head *head) +{ + struct nfs_page *req; + + while (!list_empty(head)) { + req = nfs_list_entry(head->next); + nfs_list_remove_request(req); + nfs_release_request(req); + nfs_unlock_request(req); + } +} + static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops = { - .error_cleanup = nfs_sync_pgio_error, + .error_cleanup = nfs_write_sync_pgio_error, .init_hdr = nfs_direct_pgio_init, .completion = nfs_direct_write_completion, }; -- cgit v0.10.2 From 6d74743b088d116e31fe1b73f47e782ee2016b94 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 30 Apr 2012 13:27:31 -0400 Subject: NFS: Simplify O_DIRECT page referencing The O_DIRECT code shouldn't need to hold 2 references to each page. The reference held by the struct nfs_page should suffice. Signed-off-by: Trond Myklebust Cc: Fred Isaman diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index af02bde..78d1ead 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -268,10 +268,9 @@ static void nfs_direct_read_completion(struct nfs_pgio_header *hdr) } bytes += req->wb_bytes; nfs_list_remove_request(req); - nfs_direct_readpage_release(req); if (!PageCompound(page)) set_page_dirty(page); - page_cache_release(page); + nfs_direct_readpage_release(req); } } else { while (!list_empty(&hdr->pages)) { @@ -281,7 +280,6 @@ static void nfs_direct_read_completion(struct nfs_pgio_header *hdr) if (!PageCompound(req->wb_page)) set_page_dirty(req->wb_page); bytes += req->wb_bytes; - page_cache_release(req->wb_page); nfs_list_remove_request(req); nfs_direct_readpage_release(req); } @@ -375,8 +373,6 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *de pagevec[i], pgbase, req_len); if (IS_ERR(req)) { - nfs_direct_release_pages(pagevec + i, - npages - i); result = PTR_ERR(req); break; } @@ -385,8 +381,6 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *de if (!nfs_pageio_add_request(desc, req)) { result = desc->pg_error; nfs_release_request(req); - nfs_direct_release_pages(pagevec + i, - npages - i); break; } pgbase = 0; @@ -396,6 +390,8 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *de pos += req_len; count -= req_len; } + /* The nfs_page now hold references to these pages */ + nfs_direct_release_pages(pagevec, npages); } while (count != 0 && result >= 0); kfree(pagevec); @@ -509,7 +505,6 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) nfs_pageio_complete(&desc); while (!list_empty(&failed)) { - page_cache_release(req->wb_page); nfs_release_request(req); nfs_unlock_request(req); } @@ -542,10 +537,8 @@ static void nfs_direct_commit_complete(struct nfs_commit_data *data) if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES) { /* Note the rewrite will go through mds */ nfs_mark_request_commit(req, NULL, &cinfo); - } else { - page_cache_release(req->wb_page); + } else nfs_release_request(req); - } nfs_unlock_request(req); } @@ -678,8 +671,6 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_pageio_descriptor *d pagevec[i], pgbase, req_len); if (IS_ERR(req)) { - nfs_direct_release_pages(pagevec + i, - npages - i); result = PTR_ERR(req); break; } @@ -690,8 +681,6 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_pageio_descriptor *d result = desc->pg_error; nfs_unlock_request(req); nfs_release_request(req); - nfs_direct_release_pages(pagevec + i, - npages - i); break; } pgbase = 0; @@ -701,6 +690,8 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_pageio_descriptor *d pos += req_len; count -= req_len; } + /* The nfs_page now hold references to these pages */ + nfs_direct_release_pages(pagevec, npages); } while (count != 0 && result >= 0); kfree(pagevec); @@ -763,7 +754,6 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) nfs_mark_request_commit(req, hdr->lseg, &cinfo); break; default: - page_cache_release(req->wb_page); nfs_release_request(req); } nfs_unlock_request(req); -- cgit v0.10.2 From 292f3eeef00a20fa0ef4feec62792ad0065760a0 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 30 Apr 2012 18:31:49 -0400 Subject: NFS: Use kmem_cache_zalloc() in nfs_direct_req_alloc Simplify the initialisation of O_DIRECT requests. Signed-off-by: Trond Myklebust Cc: Fred Isaman diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 78d1ead..f17e469 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -149,26 +149,16 @@ static inline struct nfs_direct_req *nfs_direct_req_alloc(void) { struct nfs_direct_req *dreq; - dreq = kmem_cache_alloc(nfs_direct_cachep, GFP_KERNEL); + dreq = kmem_cache_zalloc(nfs_direct_cachep, GFP_KERNEL); if (!dreq) return NULL; kref_init(&dreq->kref); kref_get(&dreq->kref); init_completion(&dreq->completion); - dreq->mds_cinfo.ncommit = 0; - atomic_set(&dreq->mds_cinfo.rpcs_out, 0); INIT_LIST_HEAD(&dreq->mds_cinfo.list); INIT_WORK(&dreq->work, nfs_direct_write_schedule_work); - memset(&dreq->ds_cinfo, 0, sizeof(dreq->ds_cinfo)); - dreq->iocb = NULL; - dreq->ctx = NULL; - dreq->l_ctx = NULL; spin_lock_init(&dreq->lock); - atomic_set(&dreq->io_count, 0); - dreq->count = 0; - dreq->error = 0; - dreq->flags = 0; return dreq; } -- cgit v0.10.2 From 4f97615d19c370d1d907ef37f8bcd9c3672851ca Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 30 Apr 2012 18:39:20 -0400 Subject: NFS: Fix a compile issue when CONFIG_NFS_V4_1 is undefined struct nfs_direct_req can't compile when struct pnfs_ds_commit_info is undefined. Reported-by: Bryan Schumaker Signed-off-by: Trond Myklebust Cc: Fred Isaman diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index c940d46..6deb8f0 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1176,6 +1176,11 @@ struct nfs41_free_stateid_res { struct nfs4_sequence_res seq_res; }; +#else + +struct pnfs_ds_commit_info { +}; + #endif /* CONFIG_NFS_V4_1 */ struct nfs_page; -- cgit v0.10.2 From 41628d334361670d825fb03c04568f5ef9f084dc Mon Sep 17 00:00:00 2001 From: Konstantin Weitz Date: Wed, 25 Apr 2012 15:30:38 +0200 Subject: KVM: s390: Implement the directed yield (diag 9c) hypervisor call for KVM This patch implements the directed yield hypercall found on other System z hypervisors. It delegates execution time to the virtual cpu specified in the instruction's parameter. Useful to avoid long spinlock waits in the guest. Christian Borntraeger: moved common code in virt/kvm/ Signed-off-by: Konstantin Weitz Signed-off-by: Christian Borntraeger Signed-off-by: Marcelo Tosatti diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 7343872..dd17537 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -148,6 +148,7 @@ struct kvm_vcpu_stat { u32 instruction_sigp_restart; u32 diagnose_10; u32 diagnose_44; + u32 diagnose_9c; }; struct kvm_s390_io_info { diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c index a353f0e..2d2ae32 100644 --- a/arch/s390/kvm/diag.c +++ b/arch/s390/kvm/diag.c @@ -53,6 +53,29 @@ static int __diag_time_slice_end(struct kvm_vcpu *vcpu) return 0; } +static int __diag_time_slice_end_directed(struct kvm_vcpu *vcpu) +{ + struct kvm *kvm = vcpu->kvm; + struct kvm_vcpu *tcpu; + int tid; + int i; + + tid = vcpu->run->s.regs.gprs[(vcpu->arch.sie_block->ipa & 0xf0) >> 4]; + vcpu->stat.diagnose_9c++; + VCPU_EVENT(vcpu, 5, "diag time slice end directed to %d", tid); + + if (tid == vcpu->vcpu_id) + return 0; + + kvm_for_each_vcpu(i, tcpu, kvm) + if (tcpu->vcpu_id == tid) { + kvm_vcpu_yield_to(tcpu); + break; + } + + return 0; +} + static int __diag_ipl_functions(struct kvm_vcpu *vcpu) { unsigned int reg = vcpu->arch.sie_block->ipa & 0xf; @@ -89,6 +112,8 @@ int kvm_s390_handle_diag(struct kvm_vcpu *vcpu) return diag_release_pages(vcpu); case 0x44: return __diag_time_slice_end(vcpu); + case 0x9c: + return __diag_time_slice_end_directed(vcpu); case 0x308: return __diag_ipl_functions(vcpu); default: diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index d30c835..fd98914 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -74,6 +74,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) }, { "diagnose_10", VCPU_STAT(diagnose_10) }, { "diagnose_44", VCPU_STAT(diagnose_44) }, + { "diagnose_9c", VCPU_STAT(diagnose_9c) }, { NULL } }; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 6f34330..cae342d 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -461,6 +461,7 @@ void mark_page_dirty_in_slot(struct kvm *kvm, struct kvm_memory_slot *memslot, void kvm_vcpu_block(struct kvm_vcpu *vcpu); void kvm_vcpu_kick(struct kvm_vcpu *vcpu); +bool kvm_vcpu_yield_to(struct kvm_vcpu *target); void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu); void kvm_resched(struct kvm_vcpu *vcpu); void kvm_load_guest_fpu(struct kvm_vcpu *vcpu); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 1847c76..7e14068 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1543,6 +1543,31 @@ void kvm_resched(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_resched); +bool kvm_vcpu_yield_to(struct kvm_vcpu *target) +{ + struct pid *pid; + struct task_struct *task = NULL; + + rcu_read_lock(); + pid = rcu_dereference(target->pid); + if (pid) + task = get_pid_task(target->pid, PIDTYPE_PID); + rcu_read_unlock(); + if (!task) + return false; + if (task->flags & PF_VCPU) { + put_task_struct(task); + return false; + } + if (yield_to(task, 1)) { + put_task_struct(task); + return true; + } + put_task_struct(task); + return false; +} +EXPORT_SYMBOL_GPL(kvm_vcpu_yield_to); + void kvm_vcpu_on_spin(struct kvm_vcpu *me) { struct kvm *kvm = me->kvm; @@ -1561,8 +1586,6 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me) */ for (pass = 0; pass < 2 && !yielded; pass++) { kvm_for_each_vcpu(i, vcpu, kvm) { - struct task_struct *task = NULL; - struct pid *pid; if (!pass && i < last_boosted_vcpu) { i = last_boosted_vcpu; continue; @@ -1572,24 +1595,11 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me) continue; if (waitqueue_active(&vcpu->wq)) continue; - rcu_read_lock(); - pid = rcu_dereference(vcpu->pid); - if (pid) - task = get_pid_task(vcpu->pid, PIDTYPE_PID); - rcu_read_unlock(); - if (!task) - continue; - if (task->flags & PF_VCPU) { - put_task_struct(task); - continue; - } - if (yield_to(task, 1)) { - put_task_struct(task); + if (kvm_vcpu_yield_to(vcpu)) { kvm->last_boosted_vcpu = i; yielded = 1; break; } - put_task_struct(task); } } } -- cgit v0.10.2 From 8733ac36fc37fe055a7d7daadf5451b4231f0214 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Wed, 25 Apr 2012 15:30:39 +0200 Subject: KVM: s390: use kvm_vcpu_on_spin for diag 0x44 Lets replace the old open coded version of diag 0x44 (which relied on compat_sched_yield) with kvm_vcpu_on_spin. Signed-off-by: Christian Borntraeger Signed-off-by: Marcelo Tosatti diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c index 2d2ae32..b23d9ac 100644 --- a/arch/s390/kvm/diag.c +++ b/arch/s390/kvm/diag.c @@ -47,9 +47,7 @@ static int __diag_time_slice_end(struct kvm_vcpu *vcpu) { VCPU_EVENT(vcpu, 5, "%s", "diag time slice end"); vcpu->stat.diagnose_44++; - vcpu_put(vcpu); - yield(); - vcpu_load(vcpu); + kvm_vcpu_on_spin(vcpu); return 0; } -- cgit v0.10.2 From 8c3f61e2ddb6c1845704fd8ef1b999ecd2e4725c Mon Sep 17 00:00:00 2001 From: Cornelia Huck Date: Tue, 24 Apr 2012 09:24:44 +0200 Subject: KVM: s390: Handle sckpf instruction Handle the mandatory intercept SET CLOCK PROGRAMMABLE FIELD instruction. Signed-off-by: Cornelia Huck Signed-off-by: Marcelo Tosatti diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index 3614565..979cbe5 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -101,6 +101,7 @@ static int handle_lctl(struct kvm_vcpu *vcpu) } static intercept_handler_t instruction_handlers[256] = { + [0x01] = kvm_s390_handle_01, [0x83] = kvm_s390_handle_diag, [0xae] = kvm_s390_handle_sigp, [0xb2] = kvm_s390_handle_b2, diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index ff28f9d..2294377 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -79,6 +79,7 @@ int kvm_s390_inject_sigp_stop(struct kvm_vcpu *vcpu, int action); /* implemented in priv.c */ int kvm_s390_handle_b2(struct kvm_vcpu *vcpu); int kvm_s390_handle_e5(struct kvm_vcpu *vcpu); +int kvm_s390_handle_01(struct kvm_vcpu *vcpu); /* implemented in sigp.c */ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu); diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index e5a45db..68a6b2e 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -380,3 +380,34 @@ int kvm_s390_handle_e5(struct kvm_vcpu *vcpu) return -EOPNOTSUPP; } +static int handle_sckpf(struct kvm_vcpu *vcpu) +{ + u32 value; + + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, + PGM_PRIVILEGED_OPERATION); + + if (vcpu->run->s.regs.gprs[0] & 0x00000000ffff0000) + return kvm_s390_inject_program_int(vcpu, + PGM_SPECIFICATION); + + value = vcpu->run->s.regs.gprs[0] & 0x000000000000ffff; + vcpu->arch.sie_block->todpr = value; + + return 0; +} + +static intercept_handler_t x01_handlers[256] = { + [0x07] = handle_sckpf, +}; + +int kvm_s390_handle_01(struct kvm_vcpu *vcpu) +{ + intercept_handler_t handler; + + handler = x01_handlers[vcpu->arch.sie_block->ipa & 0x00ff]; + if (handler) + return handler(vcpu); + return -EOPNOTSUPP; +} -- cgit v0.10.2 From f4bf7cf4cab90c98fe68a7afa12fb72790fd04bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 30 Mar 2012 22:04:56 +0200 Subject: mfd: Mark const init data with __initconst instead of __initdata for ab5500 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As long as there is no other non-const variable marked __initdata in the same compilation unit it doesn't hurt. If there were one however compilation would fail with error: $variablename causes a section type conflict because a section containing const variables is marked read only and so cannot contain non-const variables. Signed-off-by: Uwe Kleine-König Signed-off-by: Samuel Ortiz diff --git a/drivers/mfd/ab5500-core.c b/drivers/mfd/ab5500-core.c index 54d0fe4..3765c76 100644 --- a/drivers/mfd/ab5500-core.c +++ b/drivers/mfd/ab5500-core.c @@ -1291,7 +1291,7 @@ struct ab_family_id { char *name; }; -static const struct ab_family_id ids[] __initdata = { +static const struct ab_family_id ids[] __initconst = { /* AB5500 */ { .id = AB5500_1_0, -- cgit v0.10.2 From 4630b130b30be6420394ba31121e111c8771ca08 Mon Sep 17 00:00:00 2001 From: Aaron Sierra Date: Wed, 28 Mar 2012 09:43:10 -0500 Subject: mfd: Add LPC driver for Intel ICH chipsets This driver currently creates resources for use by a forthcoming ICH chipset GPIO driver. It could be expanded to create the resources for converting the esb2rom (mtd) and iTCO_wdt (wdt), and potentially more, drivers to use the mfd model. Signed-off-by: Aaron Sierra Signed-off-by: Guenter Roeck Signed-off-by: Samuel Ortiz diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig index 11e4438..c6edba6 100644 --- a/drivers/mfd/Kconfig +++ b/drivers/mfd/Kconfig @@ -748,6 +748,15 @@ config LPC_SCH LPC bridge function of the Intel SCH provides support for System Management Bus and General Purpose I/O. +config LPC_ICH + tristate "Intel ICH LPC" + depends on PCI + select MFD_CORE + help + The LPC bridge function of the Intel ICH provides support for + many functional units. This driver provides needed support for + other drivers to control these functions, currently GPIO. + config MFD_RDC321X tristate "Support for RDC-R321x southbridge" select MFD_CORE diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile index 05fa538..c4500c3 100644 --- a/drivers/mfd/Makefile +++ b/drivers/mfd/Makefile @@ -99,6 +99,7 @@ obj-$(CONFIG_MFD_DB5500_PRCMU) += db5500-prcmu.o obj-$(CONFIG_MFD_TIMBERDALE) += timberdale.o obj-$(CONFIG_PMIC_ADP5520) += adp5520.o obj-$(CONFIG_LPC_SCH) += lpc_sch.o +obj-$(CONFIG_LPC_ICH) += lpc_ich.o obj-$(CONFIG_MFD_RDC321X) += rdc321x-southbridge.o obj-$(CONFIG_MFD_JANZ_CMODIO) += janz-cmodio.o obj-$(CONFIG_MFD_JZ4740_ADC) += jz4740-adc.o diff --git a/drivers/mfd/lpc_ich.c b/drivers/mfd/lpc_ich.c new file mode 100644 index 0000000..7e3a7b6 --- /dev/null +++ b/drivers/mfd/lpc_ich.c @@ -0,0 +1,719 @@ +/* + * lpc_ich.c - LPC interface for Intel ICH + * + * LPC bridge function of the Intel ICH contains many other + * functional units, such as Interrupt controllers, Timers, + * Power Management, System Management, GPIO, RTC, and LPC + * Configuration Registers. + * + * This driver is derived from lpc_sch. + + * Copyright (c) 2011 Extreme Engineering Solution, Inc. + * Author: Aaron Sierra + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License 2 as published + * by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * + * This driver supports the following I/O Controller hubs: + * (See the intel documentation on http://developer.intel.com.) + * document number 290655-003, 290677-014: 82801AA (ICH), 82801AB (ICHO) + * document number 290687-002, 298242-027: 82801BA (ICH2) + * document number 290733-003, 290739-013: 82801CA (ICH3-S) + * document number 290716-001, 290718-007: 82801CAM (ICH3-M) + * document number 290744-001, 290745-025: 82801DB (ICH4) + * document number 252337-001, 252663-008: 82801DBM (ICH4-M) + * document number 273599-001, 273645-002: 82801E (C-ICH) + * document number 252516-001, 252517-028: 82801EB (ICH5), 82801ER (ICH5R) + * document number 300641-004, 300884-013: 6300ESB + * document number 301473-002, 301474-026: 82801F (ICH6) + * document number 313082-001, 313075-006: 631xESB, 632xESB + * document number 307013-003, 307014-024: 82801G (ICH7) + * document number 322896-001, 322897-001: NM10 + * document number 313056-003, 313057-017: 82801H (ICH8) + * document number 316972-004, 316973-012: 82801I (ICH9) + * document number 319973-002, 319974-002: 82801J (ICH10) + * document number 322169-001, 322170-003: 5 Series, 3400 Series (PCH) + * document number 320066-003, 320257-008: EP80597 (IICH) + * document number 324645-001, 324646-001: Cougar Point (CPT) + * document number TBD : Patsburg (PBG) + * document number TBD : DH89xxCC + * document number TBD : Panther Point + * document number TBD : Lynx Point + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include + +#define ACPIBASE 0x40 +#define ACPIBASE_GPE_OFF 0x28 +#define ACPIBASE_GPE_END 0x2f +#define ACPICTRL 0x44 + +#define GPIOBASE 0x48 +#define GPIOCTRL 0x4C + +static int lpc_ich_acpi_save = -1; +static int lpc_ich_gpio_save = -1; + +static struct resource gpio_ich_res[] = { + /* GPIO */ + { + .flags = IORESOURCE_IO, + }, + /* ACPI - GPE0 */ + { + .flags = IORESOURCE_IO, + }, +}; + +enum lpc_cells { + LPC_GPIO = 0, +}; + +static struct mfd_cell lpc_ich_cells[] = { + [LPC_GPIO] = { + .name = "gpio_ich", + .num_resources = ARRAY_SIZE(gpio_ich_res), + .resources = gpio_ich_res, + .ignore_resource_conflicts = true, + }, +}; + +/* chipset related info */ +enum lpc_chipsets { + LPC_ICH = 0, /* ICH */ + LPC_ICH0, /* ICH0 */ + LPC_ICH2, /* ICH2 */ + LPC_ICH2M, /* ICH2-M */ + LPC_ICH3, /* ICH3-S */ + LPC_ICH3M, /* ICH3-M */ + LPC_ICH4, /* ICH4 */ + LPC_ICH4M, /* ICH4-M */ + LPC_CICH, /* C-ICH */ + LPC_ICH5, /* ICH5 & ICH5R */ + LPC_6300ESB, /* 6300ESB */ + LPC_ICH6, /* ICH6 & ICH6R */ + LPC_ICH6M, /* ICH6-M */ + LPC_ICH6W, /* ICH6W & ICH6RW */ + LPC_631XESB, /* 631xESB/632xESB */ + LPC_ICH7, /* ICH7 & ICH7R */ + LPC_ICH7DH, /* ICH7DH */ + LPC_ICH7M, /* ICH7-M & ICH7-U */ + LPC_ICH7MDH, /* ICH7-M DH */ + LPC_NM10, /* NM10 */ + LPC_ICH8, /* ICH8 & ICH8R */ + LPC_ICH8DH, /* ICH8DH */ + LPC_ICH8DO, /* ICH8DO */ + LPC_ICH8M, /* ICH8M */ + LPC_ICH8ME, /* ICH8M-E */ + LPC_ICH9, /* ICH9 */ + LPC_ICH9R, /* ICH9R */ + LPC_ICH9DH, /* ICH9DH */ + LPC_ICH9DO, /* ICH9DO */ + LPC_ICH9M, /* ICH9M */ + LPC_ICH9ME, /* ICH9M-E */ + LPC_ICH10, /* ICH10 */ + LPC_ICH10R, /* ICH10R */ + LPC_ICH10D, /* ICH10D */ + LPC_ICH10DO, /* ICH10DO */ + LPC_PCH, /* PCH Desktop Full Featured */ + LPC_PCHM, /* PCH Mobile Full Featured */ + LPC_P55, /* P55 */ + LPC_PM55, /* PM55 */ + LPC_H55, /* H55 */ + LPC_QM57, /* QM57 */ + LPC_H57, /* H57 */ + LPC_HM55, /* HM55 */ + LPC_Q57, /* Q57 */ + LPC_HM57, /* HM57 */ + LPC_PCHMSFF, /* PCH Mobile SFF Full Featured */ + LPC_QS57, /* QS57 */ + LPC_3400, /* 3400 */ + LPC_3420, /* 3420 */ + LPC_3450, /* 3450 */ + LPC_EP80579, /* EP80579 */ + LPC_CPT, /* Cougar Point */ + LPC_CPTD, /* Cougar Point Desktop */ + LPC_CPTM, /* Cougar Point Mobile */ + LPC_PBG, /* Patsburg */ + LPC_DH89XXCC, /* DH89xxCC */ + LPC_PPT, /* Panther Point */ + LPC_LPT, /* Lynx Point */ +}; + +struct lpc_ich_info lpc_chipset_info[] __devinitdata = { + [LPC_ICH] = { + .name = "ICH", + }, + [LPC_ICH0] = { + .name = "ICH0", + }, + [LPC_ICH2] = { + .name = "ICH2", + }, + [LPC_ICH2M] = { + .name = "ICH2-M", + }, + [LPC_ICH3] = { + .name = "ICH3-S", + }, + [LPC_ICH3M] = { + .name = "ICH3-M", + }, + [LPC_ICH4] = { + .name = "ICH4", + }, + [LPC_ICH4M] = { + .name = "ICH4-M", + }, + [LPC_CICH] = { + .name = "C-ICH", + }, + [LPC_ICH5] = { + .name = "ICH5 or ICH5R", + }, + [LPC_6300ESB] = { + .name = "6300ESB", + }, + [LPC_ICH6] = { + .name = "ICH6 or ICH6R", + .gpio_version = ICH_V6_GPIO, + }, + [LPC_ICH6M] = { + .name = "ICH6-M", + .gpio_version = ICH_V6_GPIO, + }, + [LPC_ICH6W] = { + .name = "ICH6W or ICH6RW", + .gpio_version = ICH_V6_GPIO, + }, + [LPC_631XESB] = { + .name = "631xESB/632xESB", + .gpio_version = ICH_V6_GPIO, + }, + [LPC_ICH7] = { + .name = "ICH7 or ICH7R", + .gpio_version = ICH_V7_GPIO, + }, + [LPC_ICH7DH] = { + .name = "ICH7DH", + .gpio_version = ICH_V7_GPIO, + }, + [LPC_ICH7M] = { + .name = "ICH7-M or ICH7-U", + .gpio_version = ICH_V7_GPIO, + }, + [LPC_ICH7MDH] = { + .name = "ICH7-M DH", + .gpio_version = ICH_V7_GPIO, + }, + [LPC_NM10] = { + .name = "NM10", + }, + [LPC_ICH8] = { + .name = "ICH8 or ICH8R", + .gpio_version = ICH_V7_GPIO, + }, + [LPC_ICH8DH] = { + .name = "ICH8DH", + .gpio_version = ICH_V7_GPIO, + }, + [LPC_ICH8DO] = { + .name = "ICH8DO", + .gpio_version = ICH_V7_GPIO, + }, + [LPC_ICH8M] = { + .name = "ICH8M", + .gpio_version = ICH_V7_GPIO, + }, + [LPC_ICH8ME] = { + .name = "ICH8M-E", + .gpio_version = ICH_V7_GPIO, + }, + [LPC_ICH9] = { + .name = "ICH9", + .gpio_version = ICH_V9_GPIO, + }, + [LPC_ICH9R] = { + .name = "ICH9R", + .gpio_version = ICH_V9_GPIO, + }, + [LPC_ICH9DH] = { + .name = "ICH9DH", + .gpio_version = ICH_V9_GPIO, + }, + [LPC_ICH9DO] = { + .name = "ICH9DO", + .gpio_version = ICH_V9_GPIO, + }, + [LPC_ICH9M] = { + .name = "ICH9M", + .gpio_version = ICH_V9_GPIO, + }, + [LPC_ICH9ME] = { + .name = "ICH9M-E", + .gpio_version = ICH_V9_GPIO, + }, + [LPC_ICH10] = { + .name = "ICH10", + .gpio_version = ICH_V10CONS_GPIO, + }, + [LPC_ICH10R] = { + .name = "ICH10R", + .gpio_version = ICH_V10CONS_GPIO, + }, + [LPC_ICH10D] = { + .name = "ICH10D", + .gpio_version = ICH_V10CORP_GPIO, + }, + [LPC_ICH10DO] = { + .name = "ICH10DO", + .gpio_version = ICH_V10CORP_GPIO, + }, + [LPC_PCH] = { + .name = "PCH Desktop Full Featured", + .gpio_version = ICH_V5_GPIO, + }, + [LPC_PCHM] = { + .name = "PCH Mobile Full Featured", + .gpio_version = ICH_V5_GPIO, + }, + [LPC_P55] = { + .name = "P55", + .gpio_version = ICH_V5_GPIO, + }, + [LPC_PM55] = { + .name = "PM55", + .gpio_version = ICH_V5_GPIO, + }, + [LPC_H55] = { + .name = "H55", + .gpio_version = ICH_V5_GPIO, + }, + [LPC_QM57] = { + .name = "QM57", + .gpio_version = ICH_V5_GPIO, + }, + [LPC_H57] = { + .name = "H57", + .gpio_version = ICH_V5_GPIO, + }, + [LPC_HM55] = { + .name = "HM55", + .gpio_version = ICH_V5_GPIO, + }, + [LPC_Q57] = { + .name = "Q57", + .gpio_version = ICH_V5_GPIO, + }, + [LPC_HM57] = { + .name = "HM57", + .gpio_version = ICH_V5_GPIO, + }, + [LPC_PCHMSFF] = { + .name = "PCH Mobile SFF Full Featured", + .gpio_version = ICH_V5_GPIO, + }, + [LPC_QS57] = { + .name = "QS57", + .gpio_version = ICH_V5_GPIO, + }, + [LPC_3400] = { + .name = "3400", + .gpio_version = ICH_V5_GPIO, + }, + [LPC_3420] = { + .name = "3420", + .gpio_version = ICH_V5_GPIO, + }, + [LPC_3450] = { + .name = "3450", + .gpio_version = ICH_V5_GPIO, + }, + [LPC_EP80579] = { + .name = "EP80579", + }, + [LPC_CPT] = { + .name = "Cougar Point", + .gpio_version = ICH_V5_GPIO, + }, + [LPC_CPTD] = { + .name = "Cougar Point Desktop", + .gpio_version = ICH_V5_GPIO, + }, + [LPC_CPTM] = { + .name = "Cougar Point Mobile", + .gpio_version = ICH_V5_GPIO, + }, + [LPC_PBG] = { + .name = "Patsburg", + }, + [LPC_DH89XXCC] = { + .name = "DH89xxCC", + }, + [LPC_PPT] = { + .name = "Panther Point", + }, + [LPC_LPT] = { + .name = "Lynx Point", + }, +}; + +/* + * This data only exists for exporting the supported PCI ids + * via MODULE_DEVICE_TABLE. We do not actually register a + * pci_driver, because the I/O Controller Hub has also other + * functions that probably will be registered by other drivers. + */ +static DEFINE_PCI_DEVICE_TABLE(lpc_ich_ids) = { + { PCI_VDEVICE(INTEL, 0x2410), LPC_ICH}, + { PCI_VDEVICE(INTEL, 0x2420), LPC_ICH0}, + { PCI_VDEVICE(INTEL, 0x2440), LPC_ICH2}, + { PCI_VDEVICE(INTEL, 0x244c), LPC_ICH2M}, + { PCI_VDEVICE(INTEL, 0x2480), LPC_ICH3}, + { PCI_VDEVICE(INTEL, 0x248c), LPC_ICH3M}, + { PCI_VDEVICE(INTEL, 0x24c0), LPC_ICH4}, + { PCI_VDEVICE(INTEL, 0x24cc), LPC_ICH4M}, + { PCI_VDEVICE(INTEL, 0x2450), LPC_CICH}, + { PCI_VDEVICE(INTEL, 0x24d0), LPC_ICH5}, + { PCI_VDEVICE(INTEL, 0x25a1), LPC_6300ESB}, + { PCI_VDEVICE(INTEL, 0x2640), LPC_ICH6}, + { PCI_VDEVICE(INTEL, 0x2641), LPC_ICH6M}, + { PCI_VDEVICE(INTEL, 0x2642), LPC_ICH6W}, + { PCI_VDEVICE(INTEL, 0x2670), LPC_631XESB}, + { PCI_VDEVICE(INTEL, 0x2671), LPC_631XESB}, + { PCI_VDEVICE(INTEL, 0x2672), LPC_631XESB}, + { PCI_VDEVICE(INTEL, 0x2673), LPC_631XESB}, + { PCI_VDEVICE(INTEL, 0x2674), LPC_631XESB}, + { PCI_VDEVICE(INTEL, 0x2675), LPC_631XESB}, + { PCI_VDEVICE(INTEL, 0x2676), LPC_631XESB}, + { PCI_VDEVICE(INTEL, 0x2677), LPC_631XESB}, + { PCI_VDEVICE(INTEL, 0x2678), LPC_631XESB}, + { PCI_VDEVICE(INTEL, 0x2679), LPC_631XESB}, + { PCI_VDEVICE(INTEL, 0x267a), LPC_631XESB}, + { PCI_VDEVICE(INTEL, 0x267b), LPC_631XESB}, + { PCI_VDEVICE(INTEL, 0x267c), LPC_631XESB}, + { PCI_VDEVICE(INTEL, 0x267d), LPC_631XESB}, + { PCI_VDEVICE(INTEL, 0x267e), LPC_631XESB}, + { PCI_VDEVICE(INTEL, 0x267f), LPC_631XESB}, + { PCI_VDEVICE(INTEL, 0x27b8), LPC_ICH7}, + { PCI_VDEVICE(INTEL, 0x27b0), LPC_ICH7DH}, + { PCI_VDEVICE(INTEL, 0x27b9), LPC_ICH7M}, + { PCI_VDEVICE(INTEL, 0x27bd), LPC_ICH7MDH}, + { PCI_VDEVICE(INTEL, 0x27bc), LPC_NM10}, + { PCI_VDEVICE(INTEL, 0x2810), LPC_ICH8}, + { PCI_VDEVICE(INTEL, 0x2812), LPC_ICH8DH}, + { PCI_VDEVICE(INTEL, 0x2814), LPC_ICH8DO}, + { PCI_VDEVICE(INTEL, 0x2815), LPC_ICH8M}, + { PCI_VDEVICE(INTEL, 0x2811), LPC_ICH8ME}, + { PCI_VDEVICE(INTEL, 0x2918), LPC_ICH9}, + { PCI_VDEVICE(INTEL, 0x2916), LPC_ICH9R}, + { PCI_VDEVICE(INTEL, 0x2912), LPC_ICH9DH}, + { PCI_VDEVICE(INTEL, 0x2914), LPC_ICH9DO}, + { PCI_VDEVICE(INTEL, 0x2919), LPC_ICH9M}, + { PCI_VDEVICE(INTEL, 0x2917), LPC_ICH9ME}, + { PCI_VDEVICE(INTEL, 0x3a18), LPC_ICH10}, + { PCI_VDEVICE(INTEL, 0x3a16), LPC_ICH10R}, + { PCI_VDEVICE(INTEL, 0x3a1a), LPC_ICH10D}, + { PCI_VDEVICE(INTEL, 0x3a14), LPC_ICH10DO}, + { PCI_VDEVICE(INTEL, 0x3b00), LPC_PCH}, + { PCI_VDEVICE(INTEL, 0x3b01), LPC_PCHM}, + { PCI_VDEVICE(INTEL, 0x3b02), LPC_P55}, + { PCI_VDEVICE(INTEL, 0x3b03), LPC_PM55}, + { PCI_VDEVICE(INTEL, 0x3b06), LPC_H55}, + { PCI_VDEVICE(INTEL, 0x3b07), LPC_QM57}, + { PCI_VDEVICE(INTEL, 0x3b08), LPC_H57}, + { PCI_VDEVICE(INTEL, 0x3b09), LPC_HM55}, + { PCI_VDEVICE(INTEL, 0x3b0a), LPC_Q57}, + { PCI_VDEVICE(INTEL, 0x3b0b), LPC_HM57}, + { PCI_VDEVICE(INTEL, 0x3b0d), LPC_PCHMSFF}, + { PCI_VDEVICE(INTEL, 0x3b0f), LPC_QS57}, + { PCI_VDEVICE(INTEL, 0x3b12), LPC_3400}, + { PCI_VDEVICE(INTEL, 0x3b14), LPC_3420}, + { PCI_VDEVICE(INTEL, 0x3b16), LPC_3450}, + { PCI_VDEVICE(INTEL, 0x5031), LPC_EP80579}, + { PCI_VDEVICE(INTEL, 0x1c41), LPC_CPT}, + { PCI_VDEVICE(INTEL, 0x1c42), LPC_CPTD}, + { PCI_VDEVICE(INTEL, 0x1c43), LPC_CPTM}, + { PCI_VDEVICE(INTEL, 0x1c44), LPC_CPT}, + { PCI_VDEVICE(INTEL, 0x1c45), LPC_CPT}, + { PCI_VDEVICE(INTEL, 0x1c46), LPC_CPT}, + { PCI_VDEVICE(INTEL, 0x1c47), LPC_CPT}, + { PCI_VDEVICE(INTEL, 0x1c48), LPC_CPT}, + { PCI_VDEVICE(INTEL, 0x1c49), LPC_CPT}, + { PCI_VDEVICE(INTEL, 0x1c4a), LPC_CPT}, + { PCI_VDEVICE(INTEL, 0x1c4b), LPC_CPT}, + { PCI_VDEVICE(INTEL, 0x1c4c), LPC_CPT}, + { PCI_VDEVICE(INTEL, 0x1c4d), LPC_CPT}, + { PCI_VDEVICE(INTEL, 0x1c4e), LPC_CPT}, + { PCI_VDEVICE(INTEL, 0x1c4f), LPC_CPT}, + { PCI_VDEVICE(INTEL, 0x1c50), LPC_CPT}, + { PCI_VDEVICE(INTEL, 0x1c51), LPC_CPT}, + { PCI_VDEVICE(INTEL, 0x1c52), LPC_CPT}, + { PCI_VDEVICE(INTEL, 0x1c53), LPC_CPT}, + { PCI_VDEVICE(INTEL, 0x1c54), LPC_CPT}, + { PCI_VDEVICE(INTEL, 0x1c55), LPC_CPT}, + { PCI_VDEVICE(INTEL, 0x1c56), LPC_CPT}, + { PCI_VDEVICE(INTEL, 0x1c57), LPC_CPT}, + { PCI_VDEVICE(INTEL, 0x1c58), LPC_CPT}, + { PCI_VDEVICE(INTEL, 0x1c59), LPC_CPT}, + { PCI_VDEVICE(INTEL, 0x1c5a), LPC_CPT}, + { PCI_VDEVICE(INTEL, 0x1c5b), LPC_CPT}, + { PCI_VDEVICE(INTEL, 0x1c5c), LPC_CPT}, + { PCI_VDEVICE(INTEL, 0x1c5d), LPC_CPT}, + { PCI_VDEVICE(INTEL, 0x1c5e), LPC_CPT}, + { PCI_VDEVICE(INTEL, 0x1c5f), LPC_CPT}, + { PCI_VDEVICE(INTEL, 0x1d40), LPC_PBG}, + { PCI_VDEVICE(INTEL, 0x1d41), LPC_PBG}, + { PCI_VDEVICE(INTEL, 0x2310), LPC_DH89XXCC}, + { PCI_VDEVICE(INTEL, 0x1e40), LPC_PPT}, + { PCI_VDEVICE(INTEL, 0x1e41), LPC_PPT}, + { PCI_VDEVICE(INTEL, 0x1e42), LPC_PPT}, + { PCI_VDEVICE(INTEL, 0x1e43), LPC_PPT}, + { PCI_VDEVICE(INTEL, 0x1e44), LPC_PPT}, + { PCI_VDEVICE(INTEL, 0x1e45), LPC_PPT}, + { PCI_VDEVICE(INTEL, 0x1e46), LPC_PPT}, + { PCI_VDEVICE(INTEL, 0x1e47), LPC_PPT}, + { PCI_VDEVICE(INTEL, 0x1e48), LPC_PPT}, + { PCI_VDEVICE(INTEL, 0x1e49), LPC_PPT}, + { PCI_VDEVICE(INTEL, 0x1e4a), LPC_PPT}, + { PCI_VDEVICE(INTEL, 0x1e4b), LPC_PPT}, + { PCI_VDEVICE(INTEL, 0x1e4c), LPC_PPT}, + { PCI_VDEVICE(INTEL, 0x1e4d), LPC_PPT}, + { PCI_VDEVICE(INTEL, 0x1e4e), LPC_PPT}, + { PCI_VDEVICE(INTEL, 0x1e4f), LPC_PPT}, + { PCI_VDEVICE(INTEL, 0x1e50), LPC_PPT}, + { PCI_VDEVICE(INTEL, 0x1e51), LPC_PPT}, + { PCI_VDEVICE(INTEL, 0x1e52), LPC_PPT}, + { PCI_VDEVICE(INTEL, 0x1e53), LPC_PPT}, + { PCI_VDEVICE(INTEL, 0x1e54), LPC_PPT}, + { PCI_VDEVICE(INTEL, 0x1e55), LPC_PPT}, + { PCI_VDEVICE(INTEL, 0x1e56), LPC_PPT}, + { PCI_VDEVICE(INTEL, 0x1e57), LPC_PPT}, + { PCI_VDEVICE(INTEL, 0x1e58), LPC_PPT}, + { PCI_VDEVICE(INTEL, 0x1e59), LPC_PPT}, + { PCI_VDEVICE(INTEL, 0x1e5a), LPC_PPT}, + { PCI_VDEVICE(INTEL, 0x1e5b), LPC_PPT}, + { PCI_VDEVICE(INTEL, 0x1e5c), LPC_PPT}, + { PCI_VDEVICE(INTEL, 0x1e5d), LPC_PPT}, + { PCI_VDEVICE(INTEL, 0x1e5e), LPC_PPT}, + { PCI_VDEVICE(INTEL, 0x1e5f), LPC_PPT}, + { PCI_VDEVICE(INTEL, 0x8c40), LPC_LPT}, + { PCI_VDEVICE(INTEL, 0x8c41), LPC_LPT}, + { PCI_VDEVICE(INTEL, 0x8c42), LPC_LPT}, + { PCI_VDEVICE(INTEL, 0x8c43), LPC_LPT}, + { PCI_VDEVICE(INTEL, 0x8c44), LPC_LPT}, + { PCI_VDEVICE(INTEL, 0x8c45), LPC_LPT}, + { PCI_VDEVICE(INTEL, 0x8c46), LPC_LPT}, + { PCI_VDEVICE(INTEL, 0x8c47), LPC_LPT}, + { PCI_VDEVICE(INTEL, 0x8c48), LPC_LPT}, + { PCI_VDEVICE(INTEL, 0x8c49), LPC_LPT}, + { PCI_VDEVICE(INTEL, 0x8c4a), LPC_LPT}, + { PCI_VDEVICE(INTEL, 0x8c4b), LPC_LPT}, + { PCI_VDEVICE(INTEL, 0x8c4c), LPC_LPT}, + { PCI_VDEVICE(INTEL, 0x8c4d), LPC_LPT}, + { PCI_VDEVICE(INTEL, 0x8c4e), LPC_LPT}, + { PCI_VDEVICE(INTEL, 0x8c4f), LPC_LPT}, + { PCI_VDEVICE(INTEL, 0x8c50), LPC_LPT}, + { PCI_VDEVICE(INTEL, 0x8c51), LPC_LPT}, + { PCI_VDEVICE(INTEL, 0x8c52), LPC_LPT}, + { PCI_VDEVICE(INTEL, 0x8c53), LPC_LPT}, + { PCI_VDEVICE(INTEL, 0x8c54), LPC_LPT}, + { PCI_VDEVICE(INTEL, 0x8c55), LPC_LPT}, + { PCI_VDEVICE(INTEL, 0x8c56), LPC_LPT}, + { PCI_VDEVICE(INTEL, 0x8c57), LPC_LPT}, + { PCI_VDEVICE(INTEL, 0x8c58), LPC_LPT}, + { PCI_VDEVICE(INTEL, 0x8c59), LPC_LPT}, + { PCI_VDEVICE(INTEL, 0x8c5a), LPC_LPT}, + { PCI_VDEVICE(INTEL, 0x8c5b), LPC_LPT}, + { PCI_VDEVICE(INTEL, 0x8c5c), LPC_LPT}, + { PCI_VDEVICE(INTEL, 0x8c5d), LPC_LPT}, + { PCI_VDEVICE(INTEL, 0x8c5e), LPC_LPT}, + { PCI_VDEVICE(INTEL, 0x8c5f), LPC_LPT}, + { 0, }, /* End of list */ +}; +MODULE_DEVICE_TABLE(pci, lpc_ich_ids); + +static void lpc_ich_restore_config_space(struct pci_dev *dev) +{ + if (lpc_ich_acpi_save >= 0) { + pci_write_config_byte(dev, ACPICTRL, lpc_ich_acpi_save); + lpc_ich_acpi_save = -1; + } + + if (lpc_ich_gpio_save >= 0) { + pci_write_config_byte(dev, GPIOCTRL, lpc_ich_gpio_save); + lpc_ich_gpio_save = -1; + } +} + +static void __devinit lpc_ich_enable_acpi_space(struct pci_dev *dev) +{ + u8 reg_save; + + pci_read_config_byte(dev, ACPICTRL, ®_save); + pci_write_config_byte(dev, ACPICTRL, reg_save | 0x10); + lpc_ich_acpi_save = reg_save; +} + +static void __devinit lpc_ich_enable_gpio_space(struct pci_dev *dev) +{ + u8 reg_save; + + pci_read_config_byte(dev, GPIOCTRL, ®_save); + pci_write_config_byte(dev, GPIOCTRL, reg_save | 0x10); + lpc_ich_gpio_save = reg_save; +} + +static void __devinit lpc_ich_finalize_cell(struct mfd_cell *cell, + const struct pci_device_id *id) +{ + cell->platform_data = &lpc_chipset_info[id->driver_data]; + cell->pdata_size = sizeof(struct lpc_ich_info); +} + +static int __devinit lpc_ich_init_gpio(struct pci_dev *dev, + const struct pci_device_id *id) +{ + u32 base_addr_cfg; + u32 base_addr; + int ret; + bool acpi_conflict = false; + struct resource *res; + + /* Setup power management base register */ + pci_read_config_dword(dev, ACPIBASE, &base_addr_cfg); + base_addr = base_addr_cfg & 0x0000ff80; + if (!base_addr) { + dev_err(&dev->dev, "I/O space for ACPI uninitialized\n"); + lpc_ich_cells[LPC_GPIO].num_resources--; + goto gpe0_done; + } + + res = &gpio_ich_res[ICH_RES_GPE0]; + res->start = base_addr + ACPIBASE_GPE_OFF; + res->end = base_addr + ACPIBASE_GPE_END; + ret = acpi_check_resource_conflict(res); + if (ret) { + /* + * This isn't fatal for the GPIO, but we have to make sure that + * the platform_device subsystem doesn't see this resource + * or it will register an invalid region. + */ + lpc_ich_cells[LPC_GPIO].num_resources--; + acpi_conflict = true; + } else { + lpc_ich_enable_acpi_space(dev); + } + +gpe0_done: + /* Setup GPIO base register */ + pci_read_config_dword(dev, GPIOBASE, &base_addr_cfg); + base_addr = base_addr_cfg & 0x0000ff80; + if (!base_addr) { + dev_err(&dev->dev, "I/O space for GPIO uninitialized\n"); + ret = -ENODEV; + goto gpio_done; + } + + /* Older devices provide fewer GPIO and have a smaller resource size. */ + res = &gpio_ich_res[ICH_RES_GPIO]; + res->start = base_addr; + switch (lpc_chipset_info[id->driver_data].gpio_version) { + case ICH_V5_GPIO: + case ICH_V10CORP_GPIO: + res->end = res->start + 128 - 1; + break; + default: + res->end = res->start + 64 - 1; + break; + } + + ret = acpi_check_resource_conflict(res); + if (ret) { + /* this isn't necessarily fatal for the GPIO */ + acpi_conflict = true; + goto gpio_done; + } + lpc_ich_enable_gpio_space(dev); + + lpc_ich_finalize_cell(&lpc_ich_cells[LPC_GPIO], id); + ret = mfd_add_devices(&dev->dev, -1, &lpc_ich_cells[LPC_GPIO], + 1, NULL, 0); + +gpio_done: + if (acpi_conflict) + pr_warn("Resource conflict(s) found affecting %s\n", + lpc_ich_cells[LPC_GPIO].name); + return ret; +} + +static int __devinit lpc_ich_probe(struct pci_dev *dev, + const struct pci_device_id *id) +{ + int ret; + bool cell_added = false; + + ret = lpc_ich_init_gpio(dev, id); + if (!ret) + cell_added = true; + + /* + * We only care if at least one or none of the cells registered + * successfully. + */ + if (!cell_added) { + lpc_ich_restore_config_space(dev); + return -ENODEV; + } + + return 0; +} + +static void __devexit lpc_ich_remove(struct pci_dev *dev) +{ + mfd_remove_devices(&dev->dev); + lpc_ich_restore_config_space(dev); +} + +static struct pci_driver lpc_ich_driver = { + .name = "lpc_ich", + .id_table = lpc_ich_ids, + .probe = lpc_ich_probe, + .remove = __devexit_p(lpc_ich_remove), +}; + +static int __init lpc_ich_init(void) +{ + return pci_register_driver(&lpc_ich_driver); +} + +static void __exit lpc_ich_exit(void) +{ + pci_unregister_driver(&lpc_ich_driver); +} + +module_init(lpc_ich_init); +module_exit(lpc_ich_exit); + +MODULE_AUTHOR("Aaron Sierra "); +MODULE_DESCRIPTION("LPC interface for Intel ICH"); +MODULE_LICENSE("GPL"); diff --git a/include/linux/mfd/lpc_ich.h b/include/linux/mfd/lpc_ich.h new file mode 100644 index 0000000..91300b1 --- /dev/null +++ b/include/linux/mfd/lpc_ich.h @@ -0,0 +1,41 @@ +/* + * linux/drivers/mfd/lpc_ich.h + * + * Copyright (c) 2012 Extreme Engineering Solution, Inc. + * Author: Aaron Sierra + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License 2 as published + * by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + */ +#ifndef LPC_ICH_H +#define LPC_ICH_H + +/* GPIO resources */ +#define ICH_RES_GPIO 0 +#define ICH_RES_GPE0 1 + +/* GPIO compatibility */ +#define ICH_I3100_GPIO 0x401 +#define ICH_V5_GPIO 0x501 +#define ICH_V6_GPIO 0x601 +#define ICH_V7_GPIO 0x701 +#define ICH_V9_GPIO 0x801 +#define ICH_V10CORP_GPIO 0xa01 +#define ICH_V10CONS_GPIO 0xa11 + +struct lpc_ich_info { + char name[32]; + unsigned int gpio_version; +}; + +#endif -- cgit v0.10.2 From 6ed9f9c405f97cb7cda485f589cfa6c2bb3fb78e Mon Sep 17 00:00:00 2001 From: Peter Tyser Date: Wed, 18 Apr 2012 09:48:24 -0500 Subject: gpio: Add support for Intel ICHx/3100/Series[56] GPIO This driver works on many Intel chipsets, including the ICH6, ICH7, ICH8, ICH9, ICH10, 3100, Series 5/3400 (Ibex Peak), Series 6/C200 (Cougar Point), and NM10 (Tiger Point). Additional Intel chipsets should be easily supported if needed, eg the ICH1-5, EP80579, etc. Tested on QM67 (Cougar Point), QM57 (Ibex Peak), 3100 (Whitmore Lake), and NM10 (Tiger Point). Includes work from Jean Delvare: - Resource leak removal during module load/unload - GPIO API bit value enforcement Also includes code cleanup from Guenter Roeck and Grant Likely. Signed-off-by: Peter Tyser Signed-off-by: Aaron Sierra Acked-by: Grant Likely Signed-off-by: Samuel Ortiz diff --git a/MAINTAINERS b/MAINTAINERS index bb76fc4..28ebbb5 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3340,6 +3340,12 @@ W: http://www.developer.ibm.com/welcome/netfinity/serveraid.html S: Supported F: drivers/scsi/ips.* +ICH LPC AND GPIO DRIVER +M: Peter Tyser +S: Maintained +F: drivers/mfd/lpc_ich.c +F: drivers/gpio/gpio-ich.c + IDE SUBSYSTEM M: "David S. Miller" L: linux-ide@vger.kernel.org diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig index e03653d..a0d5796 100644 --- a/drivers/gpio/Kconfig +++ b/drivers/gpio/Kconfig @@ -178,6 +178,19 @@ config GPIO_SCH The Intel Tunnel Creek processor has 5 GPIOs powered by the core power rail and 9 from suspend power supply. +config GPIO_ICH + tristate "Intel ICH GPIO" + depends on PCI && X86 + select MFD_CORE + select LPC_ICH + help + Say yes here to support the GPIO functionality of a number of Intel + ICH-based chipsets. Currently supported devices: ICH6, ICH7, ICH8 + ICH9, ICH10, Series 5/3400 (eg Ibex Peak), Series 6/C200 (eg + Cougar Point), NM10 (Tiger Point), and 3100 (Whitmore Lake). + + If unsure, say N. + config GPIO_VX855 tristate "VIA VX855/VX875 GPIO" depends on PCI diff --git a/drivers/gpio/Makefile b/drivers/gpio/Makefile index 007f54b..183c42b 100644 --- a/drivers/gpio/Makefile +++ b/drivers/gpio/Makefile @@ -17,6 +17,7 @@ obj-$(CONFIG_GPIO_DA9052) += gpio-da9052.o obj-$(CONFIG_ARCH_DAVINCI) += gpio-davinci.o obj-$(CONFIG_GPIO_EP93XX) += gpio-ep93xx.o obj-$(CONFIG_GPIO_GE_FPGA) += gpio-ge.o +obj-$(CONFIG_GPIO_ICH) += gpio-ich.o obj-$(CONFIG_GPIO_IT8761E) += gpio-it8761e.o obj-$(CONFIG_GPIO_JANZ_TTL) += gpio-janz-ttl.o obj-$(CONFIG_ARCH_KS8695) += gpio-ks8695.o diff --git a/drivers/gpio/gpio-ich.c b/drivers/gpio/gpio-ich.c new file mode 100644 index 0000000..b7c0651 --- /dev/null +++ b/drivers/gpio/gpio-ich.c @@ -0,0 +1,419 @@ +/* + * Intel ICH6-10, Series 5 and 6 GPIO driver + * + * Copyright (C) 2010 Extreme Engineering Solutions. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include + +#define DRV_NAME "gpio_ich" + +/* + * GPIO register offsets in GPIO I/O space. + * Each chunk of 32 GPIOs is manipulated via its own USE_SELx, IO_SELx, and + * LVLx registers. Logic in the read/write functions takes a register and + * an absolute bit number and determines the proper register offset and bit + * number in that register. For example, to read the value of GPIO bit 50 + * the code would access offset ichx_regs[2(=GPIO_LVL)][1(=50/32)], + * bit 18 (50%32). + */ +enum GPIO_REG { + GPIO_USE_SEL = 0, + GPIO_IO_SEL, + GPIO_LVL, +}; + +static const u8 ichx_regs[3][3] = { + {0x00, 0x30, 0x40}, /* USE_SEL[1-3] offsets */ + {0x04, 0x34, 0x44}, /* IO_SEL[1-3] offsets */ + {0x0c, 0x38, 0x48}, /* LVL[1-3] offsets */ +}; + +#define ICHX_WRITE(val, reg, base_res) outl(val, (reg) + (base_res)->start) +#define ICHX_READ(reg, base_res) inl((reg) + (base_res)->start) + +struct ichx_desc { + /* Max GPIO pins the chipset can have */ + uint ngpio; + + /* Whether the chipset has GPIO in GPE0_STS in the PM IO region */ + bool uses_gpe0; + + /* USE_SEL is bogus on some chipsets, eg 3100 */ + u32 use_sel_ignore[3]; + + /* Some chipsets have quirks, let these use their own request/get */ + int (*request)(struct gpio_chip *chip, unsigned offset); + int (*get)(struct gpio_chip *chip, unsigned offset); +}; + +static struct { + spinlock_t lock; + struct platform_device *dev; + struct gpio_chip chip; + struct resource *gpio_base; /* GPIO IO base */ + struct resource *pm_base; /* Power Mangagment IO base */ + struct ichx_desc *desc; /* Pointer to chipset-specific description */ + u32 orig_gpio_ctrl; /* Orig CTRL value, used to restore on exit */ +} ichx_priv; + +static int modparam_gpiobase = -1; /* dynamic */ +module_param_named(gpiobase, modparam_gpiobase, int, 0444); +MODULE_PARM_DESC(gpiobase, "The GPIO number base. -1 means dynamic, " + "which is the default."); + +static int ichx_write_bit(int reg, unsigned nr, int val, int verify) +{ + unsigned long flags; + u32 data, tmp; + int reg_nr = nr / 32; + int bit = nr & 0x1f; + int ret = 0; + + spin_lock_irqsave(&ichx_priv.lock, flags); + + data = ICHX_READ(ichx_regs[reg][reg_nr], ichx_priv.gpio_base); + if (val) + data |= 1 << bit; + else + data &= ~(1 << bit); + ICHX_WRITE(data, ichx_regs[reg][reg_nr], ichx_priv.gpio_base); + tmp = ICHX_READ(ichx_regs[reg][reg_nr], ichx_priv.gpio_base); + if (verify && data != tmp) + ret = -EPERM; + + spin_unlock_irqrestore(&ichx_priv.lock, flags); + + return ret; +} + +static int ichx_read_bit(int reg, unsigned nr) +{ + unsigned long flags; + u32 data; + int reg_nr = nr / 32; + int bit = nr & 0x1f; + + spin_lock_irqsave(&ichx_priv.lock, flags); + + data = ICHX_READ(ichx_regs[reg][reg_nr], ichx_priv.gpio_base); + + spin_unlock_irqrestore(&ichx_priv.lock, flags); + + return data & (1 << bit) ? 1 : 0; +} + +static int ichx_gpio_direction_input(struct gpio_chip *gpio, unsigned nr) +{ + /* + * Try setting pin as an input and verify it worked since many pins + * are output-only. + */ + if (ichx_write_bit(GPIO_IO_SEL, nr, 1, 1)) + return -EINVAL; + + return 0; +} + +static int ichx_gpio_direction_output(struct gpio_chip *gpio, unsigned nr, + int val) +{ + /* Set GPIO output value. */ + ichx_write_bit(GPIO_LVL, nr, val, 0); + + /* + * Try setting pin as an output and verify it worked since many pins + * are input-only. + */ + if (ichx_write_bit(GPIO_IO_SEL, nr, 0, 1)) + return -EINVAL; + + return 0; +} + +static int ichx_gpio_get(struct gpio_chip *chip, unsigned nr) +{ + return ichx_read_bit(GPIO_LVL, nr); +} + +static int ich6_gpio_get(struct gpio_chip *chip, unsigned nr) +{ + unsigned long flags; + u32 data; + + /* + * GPI 0 - 15 need to be read from the power management registers on + * a ICH6/3100 bridge. + */ + if (nr < 16) { + if (!ichx_priv.pm_base) + return -ENXIO; + + spin_lock_irqsave(&ichx_priv.lock, flags); + + /* GPI 0 - 15 are latched, write 1 to clear*/ + ICHX_WRITE(1 << (16 + nr), 0, ichx_priv.pm_base); + data = ICHX_READ(0, ichx_priv.pm_base); + + spin_unlock_irqrestore(&ichx_priv.lock, flags); + + return (data >> 16) & (1 << nr) ? 1 : 0; + } else { + return ichx_gpio_get(chip, nr); + } +} + +static int ichx_gpio_request(struct gpio_chip *chip, unsigned nr) +{ + /* + * Note we assume the BIOS properly set a bridge's USE value. Some + * chips (eg Intel 3100) have bogus USE values though, so first see if + * the chipset's USE value can be trusted for this specific bit. + * If it can't be trusted, assume that the pin can be used as a GPIO. + */ + if (ichx_priv.desc->use_sel_ignore[nr / 32] & (1 << (nr & 0x1f))) + return 1; + + return ichx_read_bit(GPIO_USE_SEL, nr) ? 0 : -ENODEV; +} + +static int ich6_gpio_request(struct gpio_chip *chip, unsigned nr) +{ + /* + * Fixups for bits 16 and 17 are necessary on the Intel ICH6/3100 + * bridge as they are controlled by USE register bits 0 and 1. See + * "Table 704 GPIO_USE_SEL1 register" in the i3100 datasheet for + * additional info. + */ + if (nr == 16 || nr == 17) + nr -= 16; + + return ichx_gpio_request(chip, nr); +} + +static void ichx_gpio_set(struct gpio_chip *chip, unsigned nr, int val) +{ + ichx_write_bit(GPIO_LVL, nr, val, 0); +} + +static void __devinit ichx_gpiolib_setup(struct gpio_chip *chip) +{ + chip->owner = THIS_MODULE; + chip->label = DRV_NAME; + chip->dev = &ichx_priv.dev->dev; + + /* Allow chip-specific overrides of request()/get() */ + chip->request = ichx_priv.desc->request ? + ichx_priv.desc->request : ichx_gpio_request; + chip->get = ichx_priv.desc->get ? + ichx_priv.desc->get : ichx_gpio_get; + + chip->set = ichx_gpio_set; + chip->direction_input = ichx_gpio_direction_input; + chip->direction_output = ichx_gpio_direction_output; + chip->base = modparam_gpiobase; + chip->ngpio = ichx_priv.desc->ngpio; + chip->can_sleep = 0; + chip->dbg_show = NULL; +} + +/* ICH6-based, 631xesb-based */ +static struct ichx_desc ich6_desc = { + /* Bridges using the ICH6 controller need fixups for GPIO 0 - 17 */ + .request = ich6_gpio_request, + .get = ich6_gpio_get, + + /* GPIO 0-15 are read in the GPE0_STS PM register */ + .uses_gpe0 = true, + + .ngpio = 50, +}; + +/* Intel 3100 */ +static struct ichx_desc i3100_desc = { + /* + * Bits 16,17, 20 of USE_SEL and bit 16 of USE_SEL2 always read 0 on + * the Intel 3100. See "Table 712. GPIO Summary Table" of 3100 + * Datasheet for more info. + */ + .use_sel_ignore = {0x00130000, 0x00010000, 0x0}, + + /* The 3100 needs fixups for GPIO 0 - 17 */ + .request = ich6_gpio_request, + .get = ich6_gpio_get, + + /* GPIO 0-15 are read in the GPE0_STS PM register */ + .uses_gpe0 = true, + + .ngpio = 50, +}; + +/* ICH7 and ICH8-based */ +static struct ichx_desc ich7_desc = { + .ngpio = 50, +}; + +/* ICH9-based */ +static struct ichx_desc ich9_desc = { + .ngpio = 61, +}; + +/* ICH10-based - Consumer/corporate versions have different amount of GPIO */ +static struct ichx_desc ich10_cons_desc = { + .ngpio = 61, +}; +static struct ichx_desc ich10_corp_desc = { + .ngpio = 72, +}; + +/* Intel 5 series, 6 series, 3400 series, and C200 series */ +static struct ichx_desc intel5_desc = { + .ngpio = 76, +}; + +static int __devinit ichx_gpio_probe(struct platform_device *pdev) +{ + struct resource *res_base, *res_pm; + int err; + struct lpc_ich_info *ich_info = pdev->dev.platform_data; + + if (!ich_info) + return -ENODEV; + + ichx_priv.dev = pdev; + + switch (ich_info->gpio_version) { + case ICH_I3100_GPIO: + ichx_priv.desc = &i3100_desc; + break; + case ICH_V5_GPIO: + ichx_priv.desc = &intel5_desc; + break; + case ICH_V6_GPIO: + ichx_priv.desc = &ich6_desc; + break; + case ICH_V7_GPIO: + ichx_priv.desc = &ich7_desc; + break; + case ICH_V9_GPIO: + ichx_priv.desc = &ich9_desc; + break; + case ICH_V10CORP_GPIO: + ichx_priv.desc = &ich10_corp_desc; + break; + case ICH_V10CONS_GPIO: + ichx_priv.desc = &ich10_cons_desc; + break; + default: + return -ENODEV; + } + + res_base = platform_get_resource(pdev, IORESOURCE_IO, ICH_RES_GPIO); + if (!res_base || !res_base->start || !res_base->end) + return -ENODEV; + + if (!request_region(res_base->start, resource_size(res_base), + pdev->name)) + return -EBUSY; + + ichx_priv.gpio_base = res_base; + + /* + * If necessary, determine the I/O address of ACPI/power management + * registers which are needed to read the the GPE0 register for GPI pins + * 0 - 15 on some chipsets. + */ + if (!ichx_priv.desc->uses_gpe0) + goto init; + + res_pm = platform_get_resource(pdev, IORESOURCE_IO, ICH_RES_GPE0); + if (!res_pm) { + pr_warn("ACPI BAR is unavailable, GPI 0 - 15 unavailable\n"); + goto init; + } + + if (!request_region(res_pm->start, resource_size(res_pm), + pdev->name)) { + pr_warn("ACPI BAR is busy, GPI 0 - 15 unavailable\n"); + goto init; + } + + ichx_priv.pm_base = res_pm; + +init: + ichx_gpiolib_setup(&ichx_priv.chip); + err = gpiochip_add(&ichx_priv.chip); + if (err) { + pr_err("Failed to register GPIOs\n"); + goto add_err; + } + + pr_info("GPIO from %d to %d on %s\n", ichx_priv.chip.base, + ichx_priv.chip.base + ichx_priv.chip.ngpio - 1, DRV_NAME); + + return 0; + +add_err: + release_region(ichx_priv.gpio_base->start, + resource_size(ichx_priv.gpio_base)); + if (ichx_priv.pm_base) + release_region(ichx_priv.pm_base->start, + resource_size(ichx_priv.pm_base)); + return err; +} + +static int __devexit ichx_gpio_remove(struct platform_device *pdev) +{ + int err; + + err = gpiochip_remove(&ichx_priv.chip); + if (err) { + dev_err(&pdev->dev, "%s failed, %d\n", + "gpiochip_remove()", err); + return err; + } + + release_region(ichx_priv.gpio_base->start, + resource_size(ichx_priv.gpio_base)); + if (ichx_priv.pm_base) + release_region(ichx_priv.pm_base->start, + resource_size(ichx_priv.pm_base)); + + return 0; +} + +static struct platform_driver ichx_gpio_driver = { + .driver = { + .owner = THIS_MODULE, + .name = DRV_NAME, + }, + .probe = ichx_gpio_probe, + .remove = __devexit_p(ichx_gpio_remove), +}; + +module_platform_driver(ichx_gpio_driver); + +MODULE_AUTHOR("Peter Tyser "); +MODULE_DESCRIPTION("GPIO interface for Intel ICH series"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("platform:"DRV_NAME); -- cgit v0.10.2 From 38a36f5a6ab893fac87ffd1b1c92a491dfd71ea1 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Tue, 3 Apr 2012 09:09:19 +0800 Subject: mfd: Use module_pci_driver This patch converts the drivers in drivers/mfd/* to use module_pci_driver() macro which makes the code smaller and a bit simpler. Signed-off-by: Axel Lin Cc: Andres Salomon Cc: Ira W. Snyder Cc: Florian Fainelli Cc: Denis Turischev Cc: Harald Welte Signed-off-by: Samuel Ortiz diff --git a/drivers/mfd/cs5535-mfd.c b/drivers/mfd/cs5535-mfd.c index 315fef5..3419e72 100644 --- a/drivers/mfd/cs5535-mfd.c +++ b/drivers/mfd/cs5535-mfd.c @@ -186,18 +186,7 @@ static struct pci_driver cs5535_mfd_driver = { .remove = __devexit_p(cs5535_mfd_remove), }; -static int __init cs5535_mfd_init(void) -{ - return pci_register_driver(&cs5535_mfd_driver); -} - -static void __exit cs5535_mfd_exit(void) -{ - pci_unregister_driver(&cs5535_mfd_driver); -} - -module_init(cs5535_mfd_init); -module_exit(cs5535_mfd_exit); +module_pci_driver(cs5535_mfd_driver); MODULE_AUTHOR("Andres Salomon "); MODULE_DESCRIPTION("MFD driver for CS5535/CS5536 southbridge's ISA PCI device"); diff --git a/drivers/mfd/janz-cmodio.c b/drivers/mfd/janz-cmodio.c index a9223ed..2ea9998 100644 --- a/drivers/mfd/janz-cmodio.c +++ b/drivers/mfd/janz-cmodio.c @@ -283,23 +283,8 @@ static struct pci_driver cmodio_pci_driver = { .remove = __devexit_p(cmodio_pci_remove), }; -/* - * Module Init / Exit - */ - -static int __init cmodio_init(void) -{ - return pci_register_driver(&cmodio_pci_driver); -} - -static void __exit cmodio_exit(void) -{ - pci_unregister_driver(&cmodio_pci_driver); -} +module_pci_driver(cmodio_pci_driver); MODULE_AUTHOR("Ira W. Snyder "); MODULE_DESCRIPTION("Janz CMOD-IO PCI MODULbus Carrier Board Driver"); MODULE_LICENSE("GPL"); - -module_init(cmodio_init); -module_exit(cmodio_exit); diff --git a/drivers/mfd/lpc_sch.c b/drivers/mfd/lpc_sch.c index abc4213..27026eb 100644 --- a/drivers/mfd/lpc_sch.c +++ b/drivers/mfd/lpc_sch.c @@ -167,18 +167,7 @@ static struct pci_driver lpc_sch_driver = { .remove = __devexit_p(lpc_sch_remove), }; -static int __init lpc_sch_init(void) -{ - return pci_register_driver(&lpc_sch_driver); -} - -static void __exit lpc_sch_exit(void) -{ - pci_unregister_driver(&lpc_sch_driver); -} - -module_init(lpc_sch_init); -module_exit(lpc_sch_exit); +module_pci_driver(lpc_sch_driver); MODULE_AUTHOR("Denis Turischev "); MODULE_DESCRIPTION("LPC interface for Intel Poulsbo SCH"); diff --git a/drivers/mfd/rdc321x-southbridge.c b/drivers/mfd/rdc321x-southbridge.c index 809bd4a..685d61e 100644 --- a/drivers/mfd/rdc321x-southbridge.c +++ b/drivers/mfd/rdc321x-southbridge.c @@ -108,18 +108,7 @@ static struct pci_driver rdc321x_sb_driver = { .remove = __devexit_p(rdc321x_sb_remove), }; -static int __init rdc321x_sb_init(void) -{ - return pci_register_driver(&rdc321x_sb_driver); -} - -static void __exit rdc321x_sb_exit(void) -{ - pci_unregister_driver(&rdc321x_sb_driver); -} - -module_init(rdc321x_sb_init); -module_exit(rdc321x_sb_exit); +module_pci_driver(rdc321x_sb_driver); MODULE_AUTHOR("Florian Fainelli "); MODULE_LICENSE("GPL"); diff --git a/drivers/mfd/vx855.c b/drivers/mfd/vx855.c index b73cc15..872aff2 100644 --- a/drivers/mfd/vx855.c +++ b/drivers/mfd/vx855.c @@ -131,17 +131,7 @@ static struct pci_driver vx855_pci_driver = { .remove = __devexit_p(vx855_remove), }; -static int vx855_init(void) -{ - return pci_register_driver(&vx855_pci_driver); -} -module_init(vx855_init); - -static void vx855_exit(void) -{ - pci_unregister_driver(&vx855_pci_driver); -} -module_exit(vx855_exit); +module_pci_driver(vx855_pci_driver); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Harald Welte "); -- cgit v0.10.2 From 18996db262d0f1e79357315e033ace24488f92ad Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 5 Apr 2012 15:16:56 +0100 Subject: mfd: Cache wm8994 chip revision There's no need to mark the chip revision registers as volatile, it won't change at runtime so we can cache it from the device at startup. Signed-off-by: Mark Brown Signed-off-by: Samuel Ortiz diff --git a/drivers/mfd/wm8994-regmap.c b/drivers/mfd/wm8994-regmap.c index bfd25af..52e9e29 100644 --- a/drivers/mfd/wm8994-regmap.c +++ b/drivers/mfd/wm8994-regmap.c @@ -1122,7 +1122,6 @@ static bool wm8994_volatile_register(struct device *dev, unsigned int reg) case WM8994_RATE_STATUS: case WM8958_MIC_DETECT_3: case WM8994_DC_SERVO_4E: - case WM8994_CHIP_REVISION: case WM8994_INTERRUPT_STATUS_1: case WM8994_INTERRUPT_STATUS_2: return true; -- cgit v0.10.2 From f22a9c6fd56f0a7b24ee43466e524ad4576c6dfd Mon Sep 17 00:00:00 2001 From: Paul Parsons Date: Thu, 5 Apr 2012 17:45:04 +0100 Subject: mfd: Add PCMCIA/CF support to asic3 This patch is part of a set which adds PCMCIA/CF support for the hx4700. This patch adds asic3_set_register() calls to: 1. Enable the PCMCIA/CF in asic3_probe(). 2. Disable the PCMCIA/CF in asic3_remove(). Signed-off-by: Paul Parsons Acked-by: Philipp Zabel Signed-off-by: Samuel Ortiz diff --git a/drivers/mfd/asic3.c b/drivers/mfd/asic3.c index 1582c3d..f75dc67 100644 --- a/drivers/mfd/asic3.c +++ b/drivers/mfd/asic3.c @@ -1000,6 +1000,9 @@ static int __init asic3_probe(struct platform_device *pdev) asic3_mfd_probe(pdev, pdata, mem); + asic3_set_register(asic, ASIC3_OFFSET(EXTCF, SELECT), + (ASIC3_EXTCF_CF0_BUF_EN|ASIC3_EXTCF_CF0_PWAIT_EN), 1); + dev_info(asic->dev, "ASIC3 Core driver\n"); return 0; @@ -1021,6 +1024,9 @@ static int __devexit asic3_remove(struct platform_device *pdev) int ret; struct asic3 *asic = platform_get_drvdata(pdev); + asic3_set_register(asic, ASIC3_OFFSET(EXTCF, SELECT), + (ASIC3_EXTCF_CF0_BUF_EN|ASIC3_EXTCF_CF0_PWAIT_EN), 0); + asic3_mfd_remove(pdev); ret = asic3_gpio_remove(pdev); -- cgit v0.10.2 From e1277f45d8748ff59608b140780f75390cb5400c Mon Sep 17 00:00:00 2001 From: Laxman Dewangan Date: Mon, 9 Apr 2012 13:55:55 +0530 Subject: mfd: Add rc5t583's gpio in mfd device list Adding the gpio of RC583 in the list of rc583 mfd devices to register the gpio driver of RC5T583. Signed-off-by: Laxman Dewangan Signed-off-by: Samuel Ortiz diff --git a/drivers/mfd/rc5t583.c b/drivers/mfd/rc5t583.c index 44afae0..1cb4c35 100644 --- a/drivers/mfd/rc5t583.c +++ b/drivers/mfd/rc5t583.c @@ -75,6 +75,7 @@ static struct deepsleep_control_data deepsleep_data[] = { (RC5T583_EXT_PWRREQ1_CONTROL | RC5T583_EXT_PWRREQ2_CONTROL) static struct mfd_cell rc5t583_subdevs[] = { + {.name = "rc5t583-gpio",}, {.name = "rc5t583-regulator",}, {.name = "rc5t583-rtc", }, {.name = "rc5t583-key", } -- cgit v0.10.2 From 4f304245bb6cfa665ff21b12c059499eafa8b725 Mon Sep 17 00:00:00 2001 From: Paul Parsons Date: Mon, 9 Apr 2012 13:18:31 +0100 Subject: mfd: Set asic3 DS1WM clock_rate The mfd/asic3 driver does not set the ds1wm_driver_data clock_rate field before passing the structure to the DS1WM w1 busmaster driver. This was not noticed before commit 26a6afb, because ds1wm_find_divisor() unintentionally returned the correct divisor when a zero clock_rate was passed in. However after that commit DS1WM fails a zero clock_rate: ds1wm ds1wm: no suitable divisor for 0Hz clock This patch sets the ds1wm_driver_data clock_rate field. Signed-off-by: Paul Parsons Acked-by: Philipp Zabel Signed-off-by: Samuel Ortiz diff --git a/drivers/mfd/asic3.c b/drivers/mfd/asic3.c index f75dc67..4c3ec81 100644 --- a/drivers/mfd/asic3.c +++ b/drivers/mfd/asic3.c @@ -894,10 +894,13 @@ static int __init asic3_mfd_probe(struct platform_device *pdev, asic3_mmc_resources[0].start >>= asic->bus_shift; asic3_mmc_resources[0].end >>= asic->bus_shift; - ret = mfd_add_devices(&pdev->dev, pdev->id, + if (pdata->clock_rate) { + ds1wm_pdata.clock_rate = pdata->clock_rate; + ret = mfd_add_devices(&pdev->dev, pdev->id, &asic3_cell_ds1wm, 1, mem, asic->irq_base); - if (ret < 0) - goto out; + if (ret < 0) + goto out; + } if (mem_sdio && (irq >= 0)) { ret = mfd_add_devices(&pdev->dev, pdev->id, diff --git a/include/linux/mfd/asic3.h b/include/linux/mfd/asic3.h index ed793b7..3fda7e5 100644 --- a/include/linux/mfd/asic3.h +++ b/include/linux/mfd/asic3.h @@ -31,6 +31,8 @@ struct asic3_platform_data { unsigned int gpio_base; + unsigned int clock_rate; + struct asic3_led *leds; }; -- cgit v0.10.2 From 1baf665b8167c0ab1240e76b1eae647d5ab60b23 Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Tue, 10 Apr 2012 22:51:28 +0200 Subject: mfd: Remove redundant spi driver bus initialization In ancient times it was necessary to manually initialize the bus field of an spi_driver to spi_bus_type. These days this is done in spi_driver_register() so we can drop the manual assignment. The patch was generated using the following coccinelle semantic patch: // @@ identifier _driver; @@ struct spi_driver _driver = { .driver = { - .bus = &spi_bus_type, }, }; // Signed-off-by: Lars-Peter Clausen Cc: Srinidhi Kasagar Acked-by: Linus Walleij Signed-off-by: Samuel Ortiz diff --git a/drivers/mfd/da9052-spi.c b/drivers/mfd/da9052-spi.c index 6faf149..6e09498 100644 --- a/drivers/mfd/da9052-spi.c +++ b/drivers/mfd/da9052-spi.c @@ -88,7 +88,6 @@ static struct spi_driver da9052_spi_driver = { .id_table = da9052_spi_id, .driver = { .name = "da9052", - .bus = &spi_bus_type, .owner = THIS_MODULE, }, }; diff --git a/drivers/mfd/stmpe-spi.c b/drivers/mfd/stmpe-spi.c index b58c43c..afd4590 100644 --- a/drivers/mfd/stmpe-spi.c +++ b/drivers/mfd/stmpe-spi.c @@ -122,7 +122,6 @@ MODULE_DEVICE_TABLE(spi, stmpe_id); static struct spi_driver stmpe_spi_driver = { .driver = { .name = "stmpe-spi", - .bus = &spi_bus_type, .owner = THIS_MODULE, #ifdef CONFIG_PM .pm = &stmpe_dev_pm_ops, -- cgit v0.10.2 From 2fe372fc2a037c8de0c721b45cd0e4e9c8d8c25e Mon Sep 17 00:00:00 2001 From: Paul Parsons Date: Wed, 11 Apr 2012 00:35:34 +0100 Subject: mfd: Avoid unbalanced asic3 irq wakeup enables/disables The mfd/asic3 driver does not currently define a irq_set_wake() handler. Consequently any attempt to configure the 3 ASIC3 GPIO buttons - RECORD, CALENDAR, HOME - as wakeup sources results in Unbalanced IRQ warnings when the system is woken from sleep mode: WARNING: at kernel/irq/manage.c:520 irq_set_irq_wake+0xc4/0xf8() Unbalanced IRQ 342 wake disable ... WARNING: at kernel/irq/manage.c:520 irq_set_irq_wake+0xc4/0xf8() Unbalanced IRQ 337 wake disable ... WARNING: at kernel/irq/manage.c:520 irq_set_irq_wake+0xc4/0xf8() Unbalanced IRQ 339 wake disable ... This patch adds a irq_set_wake() handler to the mfd/asic3 driver. Signed-off-by: Paul Parsons Cc: Philipp Zabel Signed-off-by: Samuel Ortiz diff --git a/drivers/mfd/asic3.c b/drivers/mfd/asic3.c index 4c3ec81..9d4a492 100644 --- a/drivers/mfd/asic3.c +++ b/drivers/mfd/asic3.c @@ -353,12 +353,28 @@ static int asic3_gpio_irq_type(struct irq_data *data, unsigned int type) return 0; } +static int asic3_gpio_irq_set_wake(struct irq_data *data, unsigned int on) +{ + struct asic3 *asic = irq_data_get_irq_chip_data(data); + u32 bank, index; + u16 bit; + + bank = asic3_irq_to_bank(asic, data->irq); + index = asic3_irq_to_index(asic, data->irq); + bit = 1< Date: Thu, 12 Apr 2012 12:40:37 +0300 Subject: mfd: Convert Intel MSIC driver to use devm_* interfaces. The devm_* functions eliminate the need for manual resource releasing and simplify error handling. Resources allocated by devm_* are freed automatically on driver detach. Signed-off-by: Pasi Savanainen Acked-by: Mika Westerberg Signed-off-by: Samuel Ortiz diff --git a/drivers/mfd/intel_msic.c b/drivers/mfd/intel_msic.c index b76657e..59df558 100644 --- a/drivers/mfd/intel_msic.c +++ b/drivers/mfd/intel_msic.c @@ -406,7 +406,7 @@ static int __devinit intel_msic_probe(struct platform_device *pdev) return -ENXIO; } - msic = kzalloc(sizeof(*msic), GFP_KERNEL); + msic = devm_kzalloc(&pdev->dev, sizeof(*msic), GFP_KERNEL); if (!msic) return -ENOMEM; @@ -421,21 +421,13 @@ static int __devinit intel_msic_probe(struct platform_device *pdev) res = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (!res) { dev_err(&pdev->dev, "failed to get SRAM iomem resource\n"); - ret = -ENODEV; - goto fail_free_msic; + return -ENODEV; } - res = request_mem_region(res->start, resource_size(res), pdev->name); - if (!res) { - ret = -EBUSY; - goto fail_free_msic; - } - - msic->irq_base = ioremap_nocache(res->start, resource_size(res)); + msic->irq_base = devm_request_and_ioremap(&pdev->dev, res); if (!msic->irq_base) { dev_err(&pdev->dev, "failed to map SRAM memory\n"); - ret = -ENOMEM; - goto fail_release_region; + return -ENOMEM; } platform_set_drvdata(pdev, msic); @@ -443,7 +435,7 @@ static int __devinit intel_msic_probe(struct platform_device *pdev) ret = intel_msic_init_devices(msic); if (ret) { dev_err(&pdev->dev, "failed to initialize MSIC devices\n"); - goto fail_unmap_mem; + return ret; } dev_info(&pdev->dev, "Intel MSIC version %c%d (vendor %#x)\n", @@ -451,27 +443,14 @@ static int __devinit intel_msic_probe(struct platform_device *pdev) msic->vendor); return 0; - -fail_unmap_mem: - iounmap(msic->irq_base); -fail_release_region: - release_mem_region(res->start, resource_size(res)); -fail_free_msic: - kfree(msic); - - return ret; } static int __devexit intel_msic_remove(struct platform_device *pdev) { struct intel_msic *msic = platform_get_drvdata(pdev); - struct resource *res = platform_get_resource(pdev, IORESOURCE_MEM, 0); intel_msic_remove_devices(msic); platform_set_drvdata(pdev, NULL); - iounmap(msic->irq_base); - release_mem_region(res->start, resource_size(res)); - kfree(msic); return 0; } -- cgit v0.10.2 From 58d114b669d2b86aa79eac6688590c808072579b Mon Sep 17 00:00:00 2001 From: "Ying-Chun Liu (PaulLiu)" Date: Mon, 16 Apr 2012 00:01:50 +0800 Subject: mfd: Add device-tree support for da9502 i2c driver This patch adds device-tree support for dialog MFD and the binding documentations. Signed-off-by: Ying-Chun Liu (PaulLiu) Cc: Samuel Ortiz Cc: Mark Brown Cc: Shawn Guo Cc: Ashish Jangam Signed-off-by: Samuel Ortiz diff --git a/Documentation/devicetree/bindings/mfd/da9052-i2c.txt b/Documentation/devicetree/bindings/mfd/da9052-i2c.txt new file mode 100644 index 0000000..1857f4a --- /dev/null +++ b/Documentation/devicetree/bindings/mfd/da9052-i2c.txt @@ -0,0 +1,60 @@ +* Dialog DA9052/53 Power Management Integrated Circuit (PMIC) + +Required properties: +- compatible : Should be "dlg,da9052", "dlg,da9053-aa", + "dlg,da9053-ab", or "dlg,da9053-bb" + +Sub-nodes: +- regulators : Contain the regulator nodes. The DA9052/53 regulators are + bound using their names as listed below: + + buck0 : regulator BUCK0 + buck1 : regulator BUCK1 + buck2 : regulator BUCK2 + buck3 : regulator BUCK3 + ldo4 : regulator LDO4 + ldo5 : regulator LDO5 + ldo6 : regulator LDO6 + ldo7 : regulator LDO7 + ldo8 : regulator LDO8 + ldo9 : regulator LDO9 + ldo10 : regulator LDO10 + ldo11 : regulator LDO11 + ldo12 : regulator LDO12 + ldo13 : regulator LDO13 + + The bindings details of individual regulator device can be found in: + Documentation/devicetree/bindings/regulator/regulator.txt + +Examples: + +i2c@63fc8000 { /* I2C1 */ + status = "okay"; + + pmic: dialog@48 { + compatible = "dlg,da9053-aa"; + reg = <0x48>; + + regulators { + buck0 { + regulator-min-microvolt = <500000>; + regulator-max-microvolt = <2075000>; + }; + + buck1 { + regulator-min-microvolt = <500000>; + regulator-max-microvolt = <2075000>; + }; + + buck2 { + regulator-min-microvolt = <925000>; + regulator-max-microvolt = <2500000>; + }; + + buck3 { + regulator-min-microvolt = <925000>; + regulator-max-microvolt = <2500000>; + }; + }; + }; +}; diff --git a/drivers/mfd/da9052-i2c.c b/drivers/mfd/da9052-i2c.c index 36b88e3..d8abdb3 100644 --- a/drivers/mfd/da9052-i2c.c +++ b/drivers/mfd/da9052-i2c.c @@ -22,6 +22,11 @@ #include #include +#ifdef CONFIG_OF +#include +#include +#endif + static int da9052_i2c_enable_multiwrite(struct da9052 *da9052) { int reg_val, ret; @@ -41,6 +46,24 @@ static int da9052_i2c_enable_multiwrite(struct da9052 *da9052) return 0; } +static struct i2c_device_id da9052_i2c_id[] = { + {"da9052", DA9052}, + {"da9053-aa", DA9053_AA}, + {"da9053-ba", DA9053_BA}, + {"da9053-bb", DA9053_BB}, + {} +}; + +#ifdef CONFIG_OF +static const struct of_device_id dialog_dt_ids[] = { + { .compatible = "dlg,da9052", .data = &da9052_i2c_id[0] }, + { .compatible = "dlg,da9053-aa", .data = &da9052_i2c_id[1] }, + { .compatible = "dlg,da9053-ab", .data = &da9052_i2c_id[2] }, + { .compatible = "dlg,da9053-bb", .data = &da9052_i2c_id[3] }, + { /* sentinel */ } +}; +#endif + static int __devinit da9052_i2c_probe(struct i2c_client *client, const struct i2c_device_id *id) { @@ -76,6 +99,22 @@ static int __devinit da9052_i2c_probe(struct i2c_client *client, if (ret < 0) goto err_regmap; +#ifdef CONFIG_OF + if (!id) { + struct device_node *np = client->dev.of_node; + const struct of_device_id *deviceid; + + deviceid = of_match_node(np, dialog_dt_ids); + id = (const struct i2c_device_id *)deviceid->data; + } +#endif + + if (!id) { + ret = -ENODEV; + dev_err(&client->dev, "id is null.\n"); + goto err_regmap; + } + ret = da9052_device_init(da9052, id->driver_data); if (ret != 0) goto err_regmap; @@ -100,14 +139,6 @@ static int __devexit da9052_i2c_remove(struct i2c_client *client) return 0; } -static struct i2c_device_id da9052_i2c_id[] = { - {"da9052", DA9052}, - {"da9053-aa", DA9053_AA}, - {"da9053-ba", DA9053_BA}, - {"da9053-bb", DA9053_BB}, - {} -}; - static struct i2c_driver da9052_i2c_driver = { .probe = da9052_i2c_probe, .remove = __devexit_p(da9052_i2c_remove), @@ -115,6 +146,9 @@ static struct i2c_driver da9052_i2c_driver = { .driver = { .name = "da9052", .owner = THIS_MODULE, +#ifdef CONFIG_OF + .of_match_table = dialog_dt_ids, +#endif }, }; -- cgit v0.10.2 From 201cf052810d20814a77ca0e0045a2c1a3508a1f Mon Sep 17 00:00:00 2001 From: Laxman Dewangan Date: Wed, 18 Apr 2012 12:13:51 +0200 Subject: mfd: Add support for tps65910 device sleep Adding support for device sleep through the external input control signal "SLEEP". Changing the SLEEP signal state can switch the device into SLEEP and ACTIVE state. Also adding sleep configuration for different resources so that they should be keep on during sleep state of device. Signed-off-by: Laxman Dewangan Signed-off-by: Samuel Ortiz diff --git a/drivers/mfd/tps65910.c b/drivers/mfd/tps65910.c index bf2b25e..ae7f47b 100644 --- a/drivers/mfd/tps65910.c +++ b/drivers/mfd/tps65910.c @@ -90,6 +90,66 @@ static const struct regmap_config tps65910_regmap_config = { .cache_type = REGCACHE_RBTREE, }; +static int __init tps65910_sleepinit(struct tps65910 *tps65910, + struct tps65910_board *pmic_pdata) +{ + struct device *dev = NULL; + int ret = 0; + + dev = tps65910->dev; + + if (!pmic_pdata->en_dev_slp) + return 0; + + /* enabling SLEEP device state */ + ret = tps65910_set_bits(tps65910, TPS65910_DEVCTRL, + DEVCTRL_DEV_SLP_MASK); + if (ret < 0) { + dev_err(dev, "set dev_slp failed: %d\n", ret); + goto err_sleep_init; + } + + /* Return if there is no sleep keepon data. */ + if (!pmic_pdata->slp_keepon) + return 0; + + if (pmic_pdata->slp_keepon->therm_keepon) { + ret = tps65910_set_bits(tps65910, TPS65910_SLEEP_KEEP_RES_ON, + SLEEP_KEEP_RES_ON_THERM_KEEPON_MASK); + if (ret < 0) { + dev_err(dev, "set therm_keepon failed: %d\n", ret); + goto disable_dev_slp; + } + } + + if (pmic_pdata->slp_keepon->clkout32k_keepon) { + ret = tps65910_set_bits(tps65910, TPS65910_SLEEP_KEEP_RES_ON, + SLEEP_KEEP_RES_ON_CLKOUT32K_KEEPON_MASK); + if (ret < 0) { + dev_err(dev, "set clkout32k_keepon failed: %d\n", ret); + goto disable_dev_slp; + } + } + + if (pmic_pdata->slp_keepon->i2chs_keepon) { + ret = tps65910_set_bits(tps65910, TPS65910_SLEEP_KEEP_RES_ON, + SLEEP_KEEP_RES_ON_I2CHS_KEEPON_MASK); + if (ret < 0) { + dev_err(dev, "set i2chs_keepon failed: %d\n", ret); + goto disable_dev_slp; + } + } + + return 0; + +disable_dev_slp: + tps65910_clear_bits(tps65910, TPS65910_DEVCTRL, DEVCTRL_DEV_SLP_MASK); + +err_sleep_init: + return ret; +} + + static int tps65910_i2c_probe(struct i2c_client *i2c, const struct i2c_device_id *id) { @@ -140,6 +200,8 @@ static int tps65910_i2c_probe(struct i2c_client *i2c, tps65910_irq_init(tps65910, init_data->irq, init_data); + tps65910_sleepinit(tps65910, pmic_plat_data); + kfree(init_data); return ret; diff --git a/include/linux/mfd/tps65910.h b/include/linux/mfd/tps65910.h index 1c6c286..56903ad 100644 --- a/include/linux/mfd/tps65910.h +++ b/include/linux/mfd/tps65910.h @@ -783,6 +783,18 @@ #define TPS65910_SLEEP_CONTROL_EXT_INPUT_EN3 0x4 #define TPS65911_SLEEP_CONTROL_EXT_INPUT_SLEEP 0x8 +/* + * Sleep keepon data: Maintains the state in sleep mode + * @therm_keepon: Keep on the thermal monitoring in sleep state. + * @clkout32k_keepon: Keep on the 32KHz clock output in sleep state. + * @i2chs_keepon: Keep on high speed internal clock in sleep state. + */ +struct tps65910_sleep_keepon_data { + unsigned therm_keepon:1; + unsigned clkout32k_keepon:1; + unsigned i2chs_keepon:1; +}; + /** * struct tps65910_board * Board platform data may be used to initialize regulators. @@ -794,6 +806,8 @@ struct tps65910_board { int irq_base; int vmbch_threshold; int vmbch2_threshold; + bool en_dev_slp; + struct tps65910_sleep_keepon_data *slp_keepon; bool en_gpio_sleep[TPS6591X_MAX_NUM_GPIO]; unsigned long regulator_ext_sleep_control[TPS65910_NUM_REGS]; struct regulator_init_data *tps65910_pmic_init_data[TPS65910_NUM_REGS]; -- cgit v0.10.2 From 12693f6c1ff6f43f34a0d105307976337f64dcdd Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Mon, 16 Apr 2012 21:28:29 +0200 Subject: mfd: No need to check for the GPIO offset from asic3_gpio_to_irq The gpiolib code will only call our gpio_to_irq ops for our registered GPIO range. Signed-off-by: Samuel Ortiz diff --git a/drivers/mfd/asic3.c b/drivers/mfd/asic3.c index 9d4a492..383421b 100644 --- a/drivers/mfd/asic3.c +++ b/drivers/mfd/asic3.c @@ -545,7 +545,7 @@ static int asic3_gpio_to_irq(struct gpio_chip *chip, unsigned offset) { struct asic3 *asic = container_of(chip, struct asic3, gpio); - return (offset < ASIC3_NUM_GPIOS) ? asic->irq_base + offset : -ENXIO; + return asic->irq_base + offset; } static __init int asic3_gpio_probe(struct platform_device *pdev, -- cgit v0.10.2 From 4c394bb1683dbd110314ab37da7bfa6c9a77073d Mon Sep 17 00:00:00 2001 From: Russ Dill Date: Sun, 22 Apr 2012 01:48:18 -0700 Subject: mfd: Fix build breakage in omap-usb-host.c 'ARM: OMAP3: USB: Fix the EHCI ULPI PHY reset issue' removes the include for linux/gpio.h from omap-usb-host.c. This include indirectly includes plat/cpu.h which is required by omap-usb-host.c. Fix the build breakage by including it directly. Acked-by: Keshava Munegowda Acked-by: Kevin Hilman Signed-off-by: Russ Dill Signed-off-by: Samuel Ortiz diff --git a/drivers/mfd/omap-usb-host.c b/drivers/mfd/omap-usb-host.c index c8aae66..7e96bb2 100644 --- a/drivers/mfd/omap-usb-host.c +++ b/drivers/mfd/omap-usb-host.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include -- cgit v0.10.2 From 5006fe546a4eb0393782b818d4e0e2a6b4fa3803 Mon Sep 17 00:00:00 2001 From: Marc Reilly Date: Tue, 1 May 2012 12:26:46 +0200 Subject: mfd: Prepare for separating spi and i2c mc13xxx-core backends This patch abstracts the bus specific operations from the driver core. Generic init and cleanup is consolidated into mc13xxx_common_*. spi specific functions are renamed to reflect such. (The irq member of the mc13xxx struct is no longer redundant, it's used to store the irq for cleanup time). Signed-off-by: Marc Reilly Signed-off-by: Samuel Ortiz diff --git a/drivers/mfd/mc13xxx-core.c b/drivers/mfd/mc13xxx-core.c index 9fd4f63..c8b7a28 100644 --- a/drivers/mfd/mc13xxx-core.c +++ b/drivers/mfd/mc13xxx-core.c @@ -22,8 +22,18 @@ #include #include +enum mc13xxx_id { + MC13XXX_ID_MC13783, + MC13XXX_ID_MC13892, + MC13XXX_ID_INVALID, +}; + struct mc13xxx { struct spi_device *spidev; + + struct device *dev; + enum mc13xxx_id ictype; + struct mutex lock; int irq; int flags; @@ -144,36 +154,32 @@ struct mc13xxx { void mc13xxx_lock(struct mc13xxx *mc13xxx) { if (!mutex_trylock(&mc13xxx->lock)) { - dev_dbg(&mc13xxx->spidev->dev, "wait for %s from %pf\n", + dev_dbg(mc13xxx->dev, "wait for %s from %pf\n", __func__, __builtin_return_address(0)); mutex_lock(&mc13xxx->lock); } - dev_dbg(&mc13xxx->spidev->dev, "%s from %pf\n", + dev_dbg(mc13xxx->dev, "%s from %pf\n", __func__, __builtin_return_address(0)); } EXPORT_SYMBOL(mc13xxx_lock); void mc13xxx_unlock(struct mc13xxx *mc13xxx) { - dev_dbg(&mc13xxx->spidev->dev, "%s from %pf\n", + dev_dbg(mc13xxx->dev, "%s from %pf\n", __func__, __builtin_return_address(0)); mutex_unlock(&mc13xxx->lock); } EXPORT_SYMBOL(mc13xxx_unlock); #define MC13XXX_REGOFFSET_SHIFT 25 -int mc13xxx_reg_read(struct mc13xxx *mc13xxx, unsigned int offset, u32 *val) +static int mc13xxx_spi_reg_read(struct mc13xxx *mc13xxx, + unsigned int offset, u32 *val) { struct spi_transfer t; struct spi_message m; int ret; - BUG_ON(!mutex_is_locked(&mc13xxx->lock)); - - if (offset > MC13XXX_NUMREGS) - return -EINVAL; - *val = offset << MC13XXX_REGOFFSET_SHIFT; memset(&t, 0, sizeof(t)); @@ -195,26 +201,17 @@ int mc13xxx_reg_read(struct mc13xxx *mc13xxx, unsigned int offset, u32 *val) *val &= 0xffffff; - dev_vdbg(&mc13xxx->spidev->dev, "[0x%02x] -> 0x%06x\n", offset, *val); - return 0; } -EXPORT_SYMBOL(mc13xxx_reg_read); -int mc13xxx_reg_write(struct mc13xxx *mc13xxx, unsigned int offset, u32 val) +static int mc13xxx_spi_reg_write(struct mc13xxx *mc13xxx, unsigned int offset, + u32 val) { u32 buf; struct spi_transfer t; struct spi_message m; int ret; - BUG_ON(!mutex_is_locked(&mc13xxx->lock)); - - dev_vdbg(&mc13xxx->spidev->dev, "[0x%02x] <- 0x%06x\n", offset, val); - - if (offset > MC13XXX_NUMREGS || val > 0xffffff) - return -EINVAL; - buf = 1 << 31 | offset << MC13XXX_REGOFFSET_SHIFT | val; memset(&t, 0, sizeof(t)); @@ -235,6 +232,34 @@ int mc13xxx_reg_write(struct mc13xxx *mc13xxx, unsigned int offset, u32 val) return 0; } + +int mc13xxx_reg_read(struct mc13xxx *mc13xxx, unsigned int offset, u32 *val) +{ + int ret; + + BUG_ON(!mutex_is_locked(&mc13xxx->lock)); + + if (offset > MC13XXX_NUMREGS) + return -EINVAL; + + ret = mc13xxx_spi_reg_read(mc13xxx, offset, val); + dev_vdbg(mc13xxx->dev, "[0x%02x] -> 0x%06x\n", offset, *val); + + return ret; +} +EXPORT_SYMBOL(mc13xxx_reg_read); + +int mc13xxx_reg_write(struct mc13xxx *mc13xxx, unsigned int offset, u32 val) +{ + BUG_ON(!mutex_is_locked(&mc13xxx->lock)); + + dev_vdbg(mc13xxx->dev, "[0x%02x] <- 0x%06x\n", offset, val); + + if (offset > MC13XXX_NUMREGS || val > 0xffffff) + return -EINVAL; + + return mc13xxx_spi_reg_write(mc13xxx, offset, val); +} EXPORT_SYMBOL(mc13xxx_reg_write); int mc13xxx_reg_rmw(struct mc13xxx *mc13xxx, unsigned int offset, @@ -439,7 +464,7 @@ static int mc13xxx_irq_handle(struct mc13xxx *mc13xxx, if (handled == IRQ_HANDLED) num_handled++; } else { - dev_err(&mc13xxx->spidev->dev, + dev_err(mc13xxx->dev, "BUG: irq %u but no handler\n", baseirq + irq); @@ -475,25 +500,23 @@ static irqreturn_t mc13xxx_irq_thread(int irq, void *data) return IRQ_RETVAL(handled); } -enum mc13xxx_id { - MC13XXX_ID_MC13783, - MC13XXX_ID_MC13892, - MC13XXX_ID_INVALID, -}; - static const char *mc13xxx_chipname[] = { [MC13XXX_ID_MC13783] = "mc13783", [MC13XXX_ID_MC13892] = "mc13892", }; #define maskval(reg, mask) (((reg) & (mask)) >> __ffs(mask)) -static int mc13xxx_identify(struct mc13xxx *mc13xxx, enum mc13xxx_id *id) +static int mc13xxx_identify(struct mc13xxx *mc13xxx) { u32 icid; u32 revision; - const char *name; int ret; + /* + * Get the generation ID from register 46, as apparently some older + * IC revisions only have this info at this location. Newer ICs seem to + * have both. + */ ret = mc13xxx_reg_read(mc13xxx, 46, &icid); if (ret) return ret; @@ -502,26 +525,23 @@ static int mc13xxx_identify(struct mc13xxx *mc13xxx, enum mc13xxx_id *id) switch (icid) { case 2: - *id = MC13XXX_ID_MC13783; - name = "mc13783"; + mc13xxx->ictype = MC13XXX_ID_MC13783; break; case 7: - *id = MC13XXX_ID_MC13892; - name = "mc13892"; + mc13xxx->ictype = MC13XXX_ID_MC13892; break; default: - *id = MC13XXX_ID_INVALID; + mc13xxx->ictype = MC13XXX_ID_INVALID; break; } - if (*id == MC13XXX_ID_MC13783 || *id == MC13XXX_ID_MC13892) { + if (mc13xxx->ictype == MC13XXX_ID_MC13783 || + mc13xxx->ictype == MC13XXX_ID_MC13892) { ret = mc13xxx_reg_read(mc13xxx, MC13XXX_REVISION, &revision); - if (ret) - return ret; - dev_info(&mc13xxx->spidev->dev, "%s: rev: %d.%d, " + dev_info(mc13xxx->dev, "%s: rev: %d.%d, " "fin: %d, fab: %d, icid: %d/%d\n", - mc13xxx_chipname[*id], + mc13xxx_chipname[mc13xxx->ictype], maskval(revision, MC13XXX_REVISION_REVFULL), maskval(revision, MC13XXX_REVISION_REVMETAL), maskval(revision, MC13XXX_REVISION_FIN), @@ -530,26 +550,12 @@ static int mc13xxx_identify(struct mc13xxx *mc13xxx, enum mc13xxx_id *id) maskval(revision, MC13XXX_REVISION_ICIDCODE)); } - if (*id != MC13XXX_ID_INVALID) { - const struct spi_device_id *devid = - spi_get_device_id(mc13xxx->spidev); - if (!devid || devid->driver_data != *id) - dev_warn(&mc13xxx->spidev->dev, "device id doesn't " - "match auto detection!\n"); - } - - return 0; + return (mc13xxx->ictype == MC13XXX_ID_INVALID) ? -ENODEV : 0; } static const char *mc13xxx_get_chipname(struct mc13xxx *mc13xxx) { - const struct spi_device_id *devid = - spi_get_device_id(mc13xxx->spidev); - - if (!devid) - return NULL; - - return mc13xxx_chipname[devid->driver_data]; + return mc13xxx_chipname[mc13xxx->ictype]; } int mc13xxx_get_flags(struct mc13xxx *mc13xxx) @@ -592,7 +598,7 @@ int mc13xxx_adc_do_conversion(struct mc13xxx *mc13xxx, unsigned int mode, }; init_completion(&adcdone_data.done); - dev_dbg(&mc13xxx->spidev->dev, "%s\n", __func__); + dev_dbg(mc13xxx->dev, "%s\n", __func__); mc13xxx_lock(mc13xxx); @@ -637,7 +643,8 @@ int mc13xxx_adc_do_conversion(struct mc13xxx *mc13xxx, unsigned int mode, adc1 |= ato << MC13783_ADC1_ATO_SHIFT; if (atox) adc1 |= MC13783_ADC1_ATOX; - dev_dbg(&mc13xxx->spidev->dev, "%s: request irq\n", __func__); + + dev_dbg(mc13xxx->dev, "%s: request irq\n", __func__); mc13xxx_irq_request(mc13xxx, MC13XXX_IRQ_ADCDONE, mc13xxx_handler_adcdone, __func__, &adcdone_data); mc13xxx_irq_ack(mc13xxx, MC13XXX_IRQ_ADCDONE); @@ -695,7 +702,7 @@ static int mc13xxx_add_subdevice_pdata(struct mc13xxx *mc13xxx, if (!cell.name) return -ENOMEM; - return mfd_add_devices(&mc13xxx->spidev->dev, -1, &cell, 1, NULL, 0); + return mfd_add_devices(mc13xxx->dev, -1, &cell, 1, NULL, 0); } static int mc13xxx_add_subdevice(struct mc13xxx *mc13xxx, const char *format) @@ -706,7 +713,7 @@ static int mc13xxx_add_subdevice(struct mc13xxx *mc13xxx, const char *format) #ifdef CONFIG_OF static int mc13xxx_probe_flags_dt(struct mc13xxx *mc13xxx) { - struct device_node *np = mc13xxx->spidev->dev.of_node; + struct device_node *np = mc13xxx->dev.of_node; if (!np) return -ENODEV; @@ -752,13 +759,17 @@ static const struct of_device_id mc13xxx_dt_ids[] = { }; MODULE_DEVICE_TABLE(of, mc13xxx_dt_ids); -static int mc13xxx_probe(struct spi_device *spi) +static int mc13xxx_common_init(struct mc13xxx *mc13xxx, + struct mc13xxx_platform_data *pdata, int irq); + +static void mc13xxx_common_cleanup(struct mc13xxx *mc13xxx); + +static int mc13xxx_spi_probe(struct spi_device *spi) { const struct of_device_id *of_id; struct spi_driver *sdrv = to_spi_driver(spi->dev.driver); struct mc13xxx *mc13xxx; struct mc13xxx_platform_data *pdata = dev_get_platdata(&spi->dev); - enum mc13xxx_id id; int ret; of_id = of_match_device(mc13xxx_dt_ids, &spi->dev); @@ -774,13 +785,34 @@ static int mc13xxx_probe(struct spi_device *spi) spi->bits_per_word = 32; spi_setup(spi); + mc13xxx->dev = &spi->dev; mc13xxx->spidev = spi; + ret = mc13xxx_common_init(mc13xxx, pdata, spi->irq); + + if (ret) { + dev_set_drvdata(&spi->dev, NULL); + } else { + const struct spi_device_id *devid = + spi_get_device_id(mc13xxx->spidev); + if (!devid || devid->driver_data != mc13xxx->ictype) + dev_warn(mc13xxx->dev, + "device id doesn't match auto detection!\n"); + } + + return ret; +} + +static int mc13xxx_common_init(struct mc13xxx *mc13xxx, + struct mc13xxx_platform_data *pdata, int irq) +{ + int ret; + mutex_init(&mc13xxx->lock); mc13xxx_lock(mc13xxx); - ret = mc13xxx_identify(mc13xxx, &id); - if (ret || id == MC13XXX_ID_INVALID) + ret = mc13xxx_identify(mc13xxx); + if (ret) goto err_revision; /* mask all irqs */ @@ -792,18 +824,19 @@ static int mc13xxx_probe(struct spi_device *spi) if (ret) goto err_mask; - ret = request_threaded_irq(spi->irq, NULL, mc13xxx_irq_thread, + ret = request_threaded_irq(irq, NULL, mc13xxx_irq_thread, IRQF_ONESHOT | IRQF_TRIGGER_HIGH, "mc13xxx", mc13xxx); if (ret) { err_mask: err_revision: mc13xxx_unlock(mc13xxx); - dev_set_drvdata(&spi->dev, NULL); kfree(mc13xxx); return ret; } + mc13xxx->irq = irq; + mc13xxx_unlock(mc13xxx); if (mc13xxx_probe_flags_dt(mc13xxx) < 0 && pdata) @@ -838,39 +871,44 @@ err_revision: return 0; } -static int __devexit mc13xxx_remove(struct spi_device *spi) +static int __devexit mc13xxx_spi_remove(struct spi_device *spi) { struct mc13xxx *mc13xxx = dev_get_drvdata(&spi->dev); - free_irq(mc13xxx->spidev->irq, mc13xxx); + mc13xxx_common_cleanup(mc13xxx); - mfd_remove_devices(&spi->dev); + return 0; +} - kfree(mc13xxx); +static void mc13xxx_common_cleanup(struct mc13xxx *mc13xxx) +{ + free_irq(mc13xxx->irq, mc13xxx); - return 0; + mfd_remove_devices(mc13xxx->dev); + + kfree(mc13xxx); } -static struct spi_driver mc13xxx_driver = { +static struct spi_driver mc13xxx_spi_driver = { .id_table = mc13xxx_device_id, .driver = { .name = "mc13xxx", .owner = THIS_MODULE, .of_match_table = mc13xxx_dt_ids, }, - .probe = mc13xxx_probe, - .remove = __devexit_p(mc13xxx_remove), + .probe = mc13xxx_spi_probe, + .remove = __devexit_p(mc13xxx_spi_remove), }; static int __init mc13xxx_init(void) { - return spi_register_driver(&mc13xxx_driver); + return spi_register_driver(&mc13xxx_spi_driver); } subsys_initcall(mc13xxx_init); static void __exit mc13xxx_exit(void) { - spi_unregister_driver(&mc13xxx_driver); + spi_unregister_driver(&mc13xxx_spi_driver); } module_exit(mc13xxx_exit); -- cgit v0.10.2 From 91b5e741184ea9836cd7d7509e4f9b6eefa27df2 Mon Sep 17 00:00:00 2001 From: Marc Reilly Date: Sun, 1 Apr 2012 16:41:37 +1000 Subject: mfd: Use regmap for the mc13xxx-core register access This change converts the mc13xxx core to use regmap rather than direct spi r/w. The spidev member of mc13xxx struct becomes redundant and is removed. Extra debugging aids are added to mc13xxx_reg_rmw. Mutex init is moved to before regmap init. Signed-off-by: Marc Reilly Reviewed-by: Mark Brown Signed-off-by: Samuel Ortiz diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig index c6edba6..411a61a 100644 --- a/drivers/mfd/Kconfig +++ b/drivers/mfd/Kconfig @@ -600,6 +600,7 @@ config MFD_MC13XXX depends on SPI_MASTER select MFD_CORE select MFD_MC13783 + select REGMAP_SPI help Support for the Freescale (Atlas) PMIC and audio CODECs MC13783 and MC13892. diff --git a/drivers/mfd/mc13xxx-core.c b/drivers/mfd/mc13xxx-core.c index c8b7a28..d761a2e 100644 --- a/drivers/mfd/mc13xxx-core.c +++ b/drivers/mfd/mc13xxx-core.c @@ -21,6 +21,8 @@ #include #include #include +#include +#include enum mc13xxx_id { MC13XXX_ID_MC13783, @@ -29,7 +31,7 @@ enum mc13xxx_id { }; struct mc13xxx { - struct spi_device *spidev; + struct regmap *regmap; struct device *dev; enum mc13xxx_id ictype; @@ -172,67 +174,6 @@ void mc13xxx_unlock(struct mc13xxx *mc13xxx) } EXPORT_SYMBOL(mc13xxx_unlock); -#define MC13XXX_REGOFFSET_SHIFT 25 -static int mc13xxx_spi_reg_read(struct mc13xxx *mc13xxx, - unsigned int offset, u32 *val) -{ - struct spi_transfer t; - struct spi_message m; - int ret; - - *val = offset << MC13XXX_REGOFFSET_SHIFT; - - memset(&t, 0, sizeof(t)); - - t.tx_buf = val; - t.rx_buf = val; - t.len = sizeof(u32); - - spi_message_init(&m); - spi_message_add_tail(&t, &m); - - ret = spi_sync(mc13xxx->spidev, &m); - - /* error in message.status implies error return from spi_sync */ - BUG_ON(!ret && m.status); - - if (ret) - return ret; - - *val &= 0xffffff; - - return 0; -} - -static int mc13xxx_spi_reg_write(struct mc13xxx *mc13xxx, unsigned int offset, - u32 val) -{ - u32 buf; - struct spi_transfer t; - struct spi_message m; - int ret; - - buf = 1 << 31 | offset << MC13XXX_REGOFFSET_SHIFT | val; - - memset(&t, 0, sizeof(t)); - - t.tx_buf = &buf; - t.rx_buf = &buf; - t.len = sizeof(u32); - - spi_message_init(&m); - spi_message_add_tail(&t, &m); - - ret = spi_sync(mc13xxx->spidev, &m); - - BUG_ON(!ret && m.status); - - if (ret) - return ret; - - return 0; -} - int mc13xxx_reg_read(struct mc13xxx *mc13xxx, unsigned int offset, u32 *val) { int ret; @@ -242,7 +183,7 @@ int mc13xxx_reg_read(struct mc13xxx *mc13xxx, unsigned int offset, u32 *val) if (offset > MC13XXX_NUMREGS) return -EINVAL; - ret = mc13xxx_spi_reg_read(mc13xxx, offset, val); + ret = regmap_read(mc13xxx->regmap, offset, val); dev_vdbg(mc13xxx->dev, "[0x%02x] -> 0x%06x\n", offset, *val); return ret; @@ -258,25 +199,19 @@ int mc13xxx_reg_write(struct mc13xxx *mc13xxx, unsigned int offset, u32 val) if (offset > MC13XXX_NUMREGS || val > 0xffffff) return -EINVAL; - return mc13xxx_spi_reg_write(mc13xxx, offset, val); + return regmap_write(mc13xxx->regmap, offset, val); } EXPORT_SYMBOL(mc13xxx_reg_write); int mc13xxx_reg_rmw(struct mc13xxx *mc13xxx, unsigned int offset, u32 mask, u32 val) { - int ret; - u32 valread; - + BUG_ON(!mutex_is_locked(&mc13xxx->lock)); BUG_ON(val & ~mask); + dev_vdbg(mc13xxx->dev, "[0x%02x] <- 0x%06x (mask: 0x%06x)\n", + offset, val, mask); - ret = mc13xxx_reg_read(mc13xxx, offset, &valread); - if (ret) - return ret; - - valread = (valread & ~mask) | val; - - return mc13xxx_reg_write(mc13xxx, offset, valread); + return regmap_update_bits(mc13xxx->regmap, offset, mask, val); } EXPORT_SYMBOL(mc13xxx_reg_rmw); @@ -713,7 +648,7 @@ static int mc13xxx_add_subdevice(struct mc13xxx *mc13xxx, const char *format) #ifdef CONFIG_OF static int mc13xxx_probe_flags_dt(struct mc13xxx *mc13xxx) { - struct device_node *np = mc13xxx->dev.of_node; + struct device_node *np = mc13xxx->dev->of_node; if (!np) return -ENODEV; @@ -759,6 +694,16 @@ static const struct of_device_id mc13xxx_dt_ids[] = { }; MODULE_DEVICE_TABLE(of, mc13xxx_dt_ids); +static struct regmap_config mc13xxx_regmap_spi_config = { + .reg_bits = 7, + .pad_bits = 1, + .val_bits = 24, + + .max_register = MC13XXX_NUMREGS, + + .cache_type = REGCACHE_NONE, +}; + static int mc13xxx_common_init(struct mc13xxx *mc13xxx, struct mc13xxx_platform_data *pdata, int irq); @@ -783,10 +728,19 @@ static int mc13xxx_spi_probe(struct spi_device *spi) dev_set_drvdata(&spi->dev, mc13xxx); spi->mode = SPI_MODE_0 | SPI_CS_HIGH; spi->bits_per_word = 32; - spi_setup(spi); mc13xxx->dev = &spi->dev; - mc13xxx->spidev = spi; + mutex_init(&mc13xxx->lock); + + mc13xxx->regmap = regmap_init_spi(spi, &mc13xxx_regmap_spi_config); + if (IS_ERR(mc13xxx->regmap)) { + ret = PTR_ERR(mc13xxx->regmap); + dev_err(mc13xxx->dev, "Failed to initialize register map: %d\n", + ret); + dev_set_drvdata(&spi->dev, NULL); + kfree(mc13xxx); + return ret; + } ret = mc13xxx_common_init(mc13xxx, pdata, spi->irq); @@ -794,7 +748,7 @@ static int mc13xxx_spi_probe(struct spi_device *spi) dev_set_drvdata(&spi->dev, NULL); } else { const struct spi_device_id *devid = - spi_get_device_id(mc13xxx->spidev); + spi_get_device_id(spi); if (!devid || devid->driver_data != mc13xxx->ictype) dev_warn(mc13xxx->dev, "device id doesn't match auto detection!\n"); @@ -808,7 +762,6 @@ static int mc13xxx_common_init(struct mc13xxx *mc13xxx, { int ret; - mutex_init(&mc13xxx->lock); mc13xxx_lock(mc13xxx); ret = mc13xxx_identify(mc13xxx); @@ -886,6 +839,8 @@ static void mc13xxx_common_cleanup(struct mc13xxx *mc13xxx) mfd_remove_devices(mc13xxx->dev); + regmap_exit(mc13xxx->regmap); + kfree(mc13xxx); } -- cgit v0.10.2 From a0c7c1d48ea9f53c67c79eda498bb8eda1422748 Mon Sep 17 00:00:00 2001 From: Marc Reilly Date: Sun, 1 Apr 2012 16:41:38 +1000 Subject: mfd: Move the mc13xxx-core spi specific code into a separate module All spi specific code is moved into a new module. The mc13xxx struct moves to a new local include file by necessity. A new config choice selects the SPI bus type support and by default is value of SPI_MASTER to remain compatible with existing configs. Signed-off-by: Marc Reilly Signed-off-by: Samuel Ortiz diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig index 411a61a..9c8294c 100644 --- a/drivers/mfd/Kconfig +++ b/drivers/mfd/Kconfig @@ -600,14 +600,23 @@ config MFD_MC13XXX depends on SPI_MASTER select MFD_CORE select MFD_MC13783 - select REGMAP_SPI help - Support for the Freescale (Atlas) PMIC and audio CODECs - MC13783 and MC13892. - This driver provides common support for accessing the device, + Enable support for the Freescale MC13783 and MC13892 PMICs. + This driver provides common support for accessing the device, additional drivers must be enabled in order to use the functionality of the device. +if MFD_MC13XXX + +config MFD_MC13XXX_SPI + tristate "MC13xxx SPI interface" if SPI_MASTER + default SPI_MASTER + select REGMAP_SPI + help + Select this if your MC13xxx is connected via an SPI bus. + +endif + config ABX500_CORE bool "ST-Ericsson ABX500 Mixed Signal Circuit register functions" default y if ARCH_U300 || ARCH_U8500 diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile index c4500c3..7ce5b5c 100644 --- a/drivers/mfd/Makefile +++ b/drivers/mfd/Makefile @@ -54,6 +54,7 @@ obj-$(CONFIG_TWL6030_PWM) += twl6030-pwm.o obj-$(CONFIG_TWL6040_CORE) += twl6040-core.o twl6040-irq.o obj-$(CONFIG_MFD_MC13XXX) += mc13xxx-core.o +obj-$(CONFIG_MFD_MC13XXX_SPI) += mc13xxx-spi.o obj-$(CONFIG_MFD_CORE) += mfd-core.o diff --git a/drivers/mfd/mc13xxx-core.c b/drivers/mfd/mc13xxx-core.c index d761a2e..13a32b7 100644 --- a/drivers/mfd/mc13xxx-core.c +++ b/drivers/mfd/mc13xxx-core.c @@ -15,36 +15,13 @@ #include #include #include -#include #include #include #include #include #include -#include -#include -enum mc13xxx_id { - MC13XXX_ID_MC13783, - MC13XXX_ID_MC13892, - MC13XXX_ID_INVALID, -}; - -struct mc13xxx { - struct regmap *regmap; - - struct device *dev; - enum mc13xxx_id ictype; - - struct mutex lock; - int irq; - int flags; - - irq_handler_t irqhandler[MC13XXX_NUM_IRQ]; - void *irqdata[MC13XXX_NUM_IRQ]; - - int adcflags; -}; +#include "mc13xxx.h" #define MC13XXX_IRQSTAT0 0 #define MC13XXX_IRQSTAT0_ADCDONEI (1 << 0) @@ -151,8 +128,6 @@ struct mc13xxx { #define MC13XXX_ADC2 45 -#define MC13XXX_NUMREGS 0x3f - void mc13xxx_lock(struct mc13xxx *mc13xxx) { if (!mutex_trylock(&mc13xxx->lock)) { @@ -674,90 +649,7 @@ static inline int mc13xxx_probe_flags_dt(struct mc13xxx *mc13xxx) } #endif -static const struct spi_device_id mc13xxx_device_id[] = { - { - .name = "mc13783", - .driver_data = MC13XXX_ID_MC13783, - }, { - .name = "mc13892", - .driver_data = MC13XXX_ID_MC13892, - }, { - /* sentinel */ - } -}; -MODULE_DEVICE_TABLE(spi, mc13xxx_device_id); - -static const struct of_device_id mc13xxx_dt_ids[] = { - { .compatible = "fsl,mc13783", .data = (void *) MC13XXX_ID_MC13783, }, - { .compatible = "fsl,mc13892", .data = (void *) MC13XXX_ID_MC13892, }, - { /* sentinel */ } -}; -MODULE_DEVICE_TABLE(of, mc13xxx_dt_ids); - -static struct regmap_config mc13xxx_regmap_spi_config = { - .reg_bits = 7, - .pad_bits = 1, - .val_bits = 24, - - .max_register = MC13XXX_NUMREGS, - - .cache_type = REGCACHE_NONE, -}; - -static int mc13xxx_common_init(struct mc13xxx *mc13xxx, - struct mc13xxx_platform_data *pdata, int irq); - -static void mc13xxx_common_cleanup(struct mc13xxx *mc13xxx); - -static int mc13xxx_spi_probe(struct spi_device *spi) -{ - const struct of_device_id *of_id; - struct spi_driver *sdrv = to_spi_driver(spi->dev.driver); - struct mc13xxx *mc13xxx; - struct mc13xxx_platform_data *pdata = dev_get_platdata(&spi->dev); - int ret; - - of_id = of_match_device(mc13xxx_dt_ids, &spi->dev); - if (of_id) - sdrv->id_table = &mc13xxx_device_id[(enum mc13xxx_id) of_id->data]; - - mc13xxx = kzalloc(sizeof(*mc13xxx), GFP_KERNEL); - if (!mc13xxx) - return -ENOMEM; - - dev_set_drvdata(&spi->dev, mc13xxx); - spi->mode = SPI_MODE_0 | SPI_CS_HIGH; - spi->bits_per_word = 32; - - mc13xxx->dev = &spi->dev; - mutex_init(&mc13xxx->lock); - - mc13xxx->regmap = regmap_init_spi(spi, &mc13xxx_regmap_spi_config); - if (IS_ERR(mc13xxx->regmap)) { - ret = PTR_ERR(mc13xxx->regmap); - dev_err(mc13xxx->dev, "Failed to initialize register map: %d\n", - ret); - dev_set_drvdata(&spi->dev, NULL); - kfree(mc13xxx); - return ret; - } - - ret = mc13xxx_common_init(mc13xxx, pdata, spi->irq); - - if (ret) { - dev_set_drvdata(&spi->dev, NULL); - } else { - const struct spi_device_id *devid = - spi_get_device_id(spi); - if (!devid || devid->driver_data != mc13xxx->ictype) - dev_warn(mc13xxx->dev, - "device id doesn't match auto detection!\n"); - } - - return ret; -} - -static int mc13xxx_common_init(struct mc13xxx *mc13xxx, +int mc13xxx_common_init(struct mc13xxx *mc13xxx, struct mc13xxx_platform_data *pdata, int irq) { int ret; @@ -823,17 +715,9 @@ err_revision: return 0; } +EXPORT_SYMBOL_GPL(mc13xxx_common_init); -static int __devexit mc13xxx_spi_remove(struct spi_device *spi) -{ - struct mc13xxx *mc13xxx = dev_get_drvdata(&spi->dev); - - mc13xxx_common_cleanup(mc13xxx); - - return 0; -} - -static void mc13xxx_common_cleanup(struct mc13xxx *mc13xxx) +void mc13xxx_common_cleanup(struct mc13xxx *mc13xxx) { free_irq(mc13xxx->irq, mc13xxx); @@ -843,29 +727,7 @@ static void mc13xxx_common_cleanup(struct mc13xxx *mc13xxx) kfree(mc13xxx); } - -static struct spi_driver mc13xxx_spi_driver = { - .id_table = mc13xxx_device_id, - .driver = { - .name = "mc13xxx", - .owner = THIS_MODULE, - .of_match_table = mc13xxx_dt_ids, - }, - .probe = mc13xxx_spi_probe, - .remove = __devexit_p(mc13xxx_spi_remove), -}; - -static int __init mc13xxx_init(void) -{ - return spi_register_driver(&mc13xxx_spi_driver); -} -subsys_initcall(mc13xxx_init); - -static void __exit mc13xxx_exit(void) -{ - spi_unregister_driver(&mc13xxx_spi_driver); -} -module_exit(mc13xxx_exit); +EXPORT_SYMBOL_GPL(mc13xxx_common_cleanup); MODULE_DESCRIPTION("Core driver for Freescale MC13XXX PMIC"); MODULE_AUTHOR("Uwe Kleine-Koenig "); diff --git a/drivers/mfd/mc13xxx-spi.c b/drivers/mfd/mc13xxx-spi.c new file mode 100644 index 0000000..3fcdab3 --- /dev/null +++ b/drivers/mfd/mc13xxx-spi.c @@ -0,0 +1,140 @@ +/* + * Copyright 2009-2010 Pengutronix + * Uwe Kleine-Koenig + * + * loosely based on an earlier driver that has + * Copyright 2009 Pengutronix, Sascha Hauer + * + * This program is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License version 2 as published by the + * Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "mc13xxx.h" + +static const struct spi_device_id mc13xxx_device_id[] = { + { + .name = "mc13783", + .driver_data = MC13XXX_ID_MC13783, + }, { + .name = "mc13892", + .driver_data = MC13XXX_ID_MC13892, + }, { + /* sentinel */ + } +}; +MODULE_DEVICE_TABLE(spi, mc13xxx_device_id); + +static const struct of_device_id mc13xxx_dt_ids[] = { + { .compatible = "fsl,mc13783", .data = (void *) MC13XXX_ID_MC13783, }, + { .compatible = "fsl,mc13892", .data = (void *) MC13XXX_ID_MC13892, }, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(of, mc13xxx_dt_ids); + +static struct regmap_config mc13xxx_regmap_spi_config = { + .reg_bits = 7, + .pad_bits = 1, + .val_bits = 24, + + .max_register = MC13XXX_NUMREGS, + + .cache_type = REGCACHE_NONE, +}; + +static int mc13xxx_spi_probe(struct spi_device *spi) +{ + const struct of_device_id *of_id; + struct spi_driver *sdrv = to_spi_driver(spi->dev.driver); + struct mc13xxx *mc13xxx; + struct mc13xxx_platform_data *pdata = dev_get_platdata(&spi->dev); + int ret; + + of_id = of_match_device(mc13xxx_dt_ids, &spi->dev); + if (of_id) + sdrv->id_table = &mc13xxx_device_id[(enum mc13xxx_id) of_id->data]; + + mc13xxx = kzalloc(sizeof(*mc13xxx), GFP_KERNEL); + if (!mc13xxx) + return -ENOMEM; + + dev_set_drvdata(&spi->dev, mc13xxx); + spi->mode = SPI_MODE_0 | SPI_CS_HIGH; + spi->bits_per_word = 32; + + mc13xxx->dev = &spi->dev; + mutex_init(&mc13xxx->lock); + + mc13xxx->regmap = regmap_init_spi(spi, &mc13xxx_regmap_spi_config); + if (IS_ERR(mc13xxx->regmap)) { + ret = PTR_ERR(mc13xxx->regmap); + dev_err(mc13xxx->dev, "Failed to initialize register map: %d\n", + ret); + dev_set_drvdata(&spi->dev, NULL); + kfree(mc13xxx); + return ret; + } + + ret = mc13xxx_common_init(mc13xxx, pdata, spi->irq); + + if (ret) { + dev_set_drvdata(&spi->dev, NULL); + } else { + const struct spi_device_id *devid = + spi_get_device_id(spi); + if (!devid || devid->driver_data != mc13xxx->ictype) + dev_warn(mc13xxx->dev, + "device id doesn't match auto detection!\n"); + } + + return ret; +} + +static int __devexit mc13xxx_spi_remove(struct spi_device *spi) +{ + struct mc13xxx *mc13xxx = dev_get_drvdata(&spi->dev); + + mc13xxx_common_cleanup(mc13xxx); + + return 0; +} + +static struct spi_driver mc13xxx_spi_driver = { + .id_table = mc13xxx_device_id, + .driver = { + .name = "mc13xxx", + .owner = THIS_MODULE, + .of_match_table = mc13xxx_dt_ids, + }, + .probe = mc13xxx_spi_probe, + .remove = __devexit_p(mc13xxx_spi_remove), +}; + +static int __init mc13xxx_init(void) +{ + return spi_register_driver(&mc13xxx_spi_driver); +} +subsys_initcall(mc13xxx_init); + +static void __exit mc13xxx_exit(void) +{ + spi_unregister_driver(&mc13xxx_spi_driver); +} +module_exit(mc13xxx_exit); + +MODULE_DESCRIPTION("Core driver for Freescale MC13XXX PMIC"); +MODULE_AUTHOR("Uwe Kleine-Koenig "); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/mfd/mc13xxx.h b/drivers/mfd/mc13xxx.h new file mode 100644 index 0000000..bbba06f --- /dev/null +++ b/drivers/mfd/mc13xxx.h @@ -0,0 +1,45 @@ +/* + * Copyright 2012 Creative Product Design + * Marc Reilly + * + * This program is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License version 2 as published by the + * Free Software Foundation. + */ +#ifndef __DRIVERS_MFD_MC13XXX_H +#define __DRIVERS_MFD_MC13XXX_H + +#include +#include +#include + +enum mc13xxx_id { + MC13XXX_ID_MC13783, + MC13XXX_ID_MC13892, + MC13XXX_ID_INVALID, +}; + +#define MC13XXX_NUMREGS 0x3f + +struct mc13xxx { + struct regmap *regmap; + + struct device *dev; + enum mc13xxx_id ictype; + + struct mutex lock; + int irq; + int flags; + + irq_handler_t irqhandler[MC13XXX_NUM_IRQ]; + void *irqdata[MC13XXX_NUM_IRQ]; + + int adcflags; +}; + +int mc13xxx_common_init(struct mc13xxx *mc13xxx, + struct mc13xxx_platform_data *pdata, int irq); + +void mc13xxx_common_cleanup(struct mc13xxx *mc13xxx); + +#endif /* __DRIVERS_MFD_MC13XXX_H */ -- cgit v0.10.2 From df3df6469fd1e59284d6b5d4dd9dbe1bd7861040 Mon Sep 17 00:00:00 2001 From: Marc Reilly Date: Sun, 1 Apr 2012 16:41:39 +1000 Subject: mfd: Add mc13xxx i2c driver Adds support for mc13xxx family ICs connected via i2c. Signed-off-by: Marc Reilly Acked-by: Oskar Schirmer Signed-off-by: Samuel Ortiz diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig index 9c8294c..ef86a74 100644 --- a/drivers/mfd/Kconfig +++ b/drivers/mfd/Kconfig @@ -597,7 +597,7 @@ config MFD_MC13783 config MFD_MC13XXX tristate "Support Freescale MC13783 and MC13892" - depends on SPI_MASTER + depends on SPI_MASTER || I2C select MFD_CORE select MFD_MC13783 help @@ -615,6 +615,13 @@ config MFD_MC13XXX_SPI help Select this if your MC13xxx is connected via an SPI bus. +config MFD_MC13XXX_I2C + tristate "MC13xxx I2C interface" if I2C + default I2C + select REGMAP_I2C + help + Select this if your MC13xxx is connected via an I2C bus. + endif config ABX500_CORE diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile index 7ce5b5c..5dd6be7 100644 --- a/drivers/mfd/Makefile +++ b/drivers/mfd/Makefile @@ -55,6 +55,7 @@ obj-$(CONFIG_TWL6040_CORE) += twl6040-core.o twl6040-irq.o obj-$(CONFIG_MFD_MC13XXX) += mc13xxx-core.o obj-$(CONFIG_MFD_MC13XXX_SPI) += mc13xxx-spi.o +obj-$(CONFIG_MFD_MC13XXX_I2C) += mc13xxx-i2c.o obj-$(CONFIG_MFD_CORE) += mfd-core.o diff --git a/drivers/mfd/mc13xxx-i2c.c b/drivers/mfd/mc13xxx-i2c.c new file mode 100644 index 0000000..d22501d --- /dev/null +++ b/drivers/mfd/mc13xxx-i2c.c @@ -0,0 +1,128 @@ +/* + * Copyright 2009-2010 Creative Product Design + * Marc Reilly marc@cpdesign.com.au + * + * This program is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License version 2 as published by the + * Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "mc13xxx.h" + +static const struct i2c_device_id mc13xxx_i2c_device_id[] = { + { + .name = "mc13892", + .driver_data = MC13XXX_ID_MC13892, + }, { + /* sentinel */ + } +}; +MODULE_DEVICE_TABLE(i2c, mc13xxx_i2c_device_id); + +static const struct of_device_id mc13xxx_dt_ids[] = { + { + .compatible = "fsl,mc13892", + .data = (void *) &mc13xxx_i2c_device_id[0], + }, { + /* sentinel */ + } +}; +MODULE_DEVICE_TABLE(of, mc13xxx_dt_ids); + +static struct regmap_config mc13xxx_regmap_i2c_config = { + .reg_bits = 8, + .val_bits = 24, + + .max_register = MC13XXX_NUMREGS, + + .cache_type = REGCACHE_NONE, +}; + +static int mc13xxx_i2c_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + const struct of_device_id *of_id; + struct i2c_driver *idrv = to_i2c_driver(client->dev.driver); + struct mc13xxx *mc13xxx; + struct mc13xxx_platform_data *pdata = dev_get_platdata(&client->dev); + int ret; + + of_id = of_match_device(mc13xxx_dt_ids, &client->dev); + if (of_id) + idrv->id_table = (const struct i2c_device_id*) of_id->data; + + mc13xxx = kzalloc(sizeof(*mc13xxx), GFP_KERNEL); + if (!mc13xxx) + return -ENOMEM; + + dev_set_drvdata(&client->dev, mc13xxx); + + mc13xxx->dev = &client->dev; + mutex_init(&mc13xxx->lock); + + mc13xxx->regmap = regmap_init_i2c(client, &mc13xxx_regmap_i2c_config); + if (IS_ERR(mc13xxx->regmap)) { + ret = PTR_ERR(mc13xxx->regmap); + dev_err(mc13xxx->dev, "Failed to initialize register map: %d\n", + ret); + dev_set_drvdata(&client->dev, NULL); + kfree(mc13xxx); + return ret; + } + + ret = mc13xxx_common_init(mc13xxx, pdata, client->irq); + + if (ret == 0 && (id->driver_data != mc13xxx->ictype)) + dev_warn(mc13xxx->dev, + "device id doesn't match auto detection!\n"); + + return ret; +} + +static int __devexit mc13xxx_i2c_remove(struct i2c_client *client) +{ + struct mc13xxx *mc13xxx = dev_get_drvdata(&client->dev); + + mc13xxx_common_cleanup(mc13xxx); + + return 0; +} + +static struct i2c_driver mc13xxx_i2c_driver = { + .id_table = mc13xxx_i2c_device_id, + .driver = { + .owner = THIS_MODULE, + .name = "mc13xxx", + .of_match_table = mc13xxx_dt_ids, + }, + .probe = mc13xxx_i2c_probe, + .remove = __devexit_p(mc13xxx_i2c_remove), +}; + +static int __init mc13xxx_i2c_init(void) +{ + return i2c_add_driver(&mc13xxx_i2c_driver); +} +subsys_initcall(mc13xxx_i2c_init); + +static void __exit mc13xxx_i2c_exit(void) +{ + i2c_del_driver(&mc13xxx_i2c_driver); +} +module_exit(mc13xxx_i2c_exit); + +MODULE_DESCRIPTION("i2c driver for Freescale MC13XXX PMIC"); +MODULE_AUTHOR("Marc Reilly Date: Tue, 17 Apr 2012 09:30:14 +0200 Subject: mfd: Add new resources on ab8500 AB8505 and AB9540 The AB8505 and AB9540 has extended support for micro USB resistance detection, used for detecting chargers. Let's register resources for this resource. Let's also split off the separate codec device for AB9540. Signed-off-by: Virupax Sadashivpetimath Signed-off-by: Linus Walleij Signed-off-by: Samuel Ortiz diff --git a/drivers/mfd/ab8500-core.c b/drivers/mfd/ab8500-core.c index 1f08704..ae67612 100644 --- a/drivers/mfd/ab8500-core.c +++ b/drivers/mfd/ab8500-core.c @@ -744,6 +744,39 @@ static struct resource __devinitdata ab8500_usb_resources[] = { }, }; +static struct resource __devinitdata ab8505_iddet_resources[] = { + { + .name = "KeyDeglitch", + .start = AB8505_INT_KEYDEGLITCH, + .end = AB8505_INT_KEYDEGLITCH, + .flags = IORESOURCE_IRQ, + }, + { + .name = "KP", + .start = AB8505_INT_KP, + .end = AB8505_INT_KP, + .flags = IORESOURCE_IRQ, + }, + { + .name = "IKP", + .start = AB8505_INT_IKP, + .end = AB8505_INT_IKP, + .flags = IORESOURCE_IRQ, + }, + { + .name = "IKR", + .start = AB8505_INT_IKR, + .end = AB8505_INT_IKR, + .flags = IORESOURCE_IRQ, + }, + { + .name = "KeyStuck", + .start = AB8505_INT_KEYSTUCK, + .end = AB8505_INT_KEYSTUCK, + .flags = IORESOURCE_IRQ, + }, +}; + static struct resource __devinitdata ab8500_temp_resources[] = { { .name = "AB8500_TEMP_WARM", @@ -803,10 +836,6 @@ static struct mfd_cell __devinitdata abx500_common_devs[] = { .resources = ab8500_av_acc_detect_resources, }, { - .name = "ab8500-codec", - }, - - { .name = "ab8500-poweron-key", .num_resources = ARRAY_SIZE(ab8500_poweronkey_db_resources), .resources = ab8500_poweronkey_db_resources, @@ -845,6 +874,9 @@ static struct mfd_cell __devinitdata ab8500_devs[] = { .num_resources = ARRAY_SIZE(ab8500_usb_resources), .resources = ab8500_usb_resources, }, + { + .name = "ab8500-codec", + }, }; static struct mfd_cell __devinitdata ab9540_devs[] = { @@ -858,6 +890,18 @@ static struct mfd_cell __devinitdata ab9540_devs[] = { .num_resources = ARRAY_SIZE(ab8500_usb_resources), .resources = ab8500_usb_resources, }, + { + .name = "ab9540-codec", + }, +}; + +/* Device list common to ab9540 and ab8505 */ +static struct mfd_cell __devinitdata ab9540_ab8505_devs[] = { + { + .name = "ab-iddet", + .num_resources = ARRAY_SIZE(ab8505_iddet_resources), + .resources = ab8505_iddet_resources, + }, }; static ssize_t show_chip_id(struct device *dev, @@ -1125,8 +1169,14 @@ int __devinit ab8500_init(struct ab8500 *ab8500, enum ab8500_version version) ab8500->irq_base); else ret = mfd_add_devices(ab8500->dev, 0, ab8500_devs, - ARRAY_SIZE(ab9540_devs), NULL, + ARRAY_SIZE(ab8500_devs), NULL, + ab8500->irq_base); + + if (is_ab9540(ab8500) || is_ab8505(ab8500)) + ret = mfd_add_devices(ab8500->dev, 0, ab9540_ab8505_devs, + ARRAY_SIZE(ab9540_ab8505_devs), NULL, ab8500->irq_base); + if (ret) goto out_freeirq; diff --git a/include/linux/mfd/abx500/ab8500.h b/include/linux/mfd/abx500/ab8500.h index fccc300..d798f5b 100644 --- a/include/linux/mfd/abx500/ab8500.h +++ b/include/linux/mfd/abx500/ab8500.h @@ -194,6 +194,14 @@ enum ab8500_version { #define AB9540_INT_GPIO52F 123 #define AB9540_INT_GPIO53F 124 #define AB9540_INT_GPIO54F 125 /* not 8505 */ +/* ab8500_irq_regoffset[16] -> IT[Source|Latch|Mask]25 */ +#define AB8505_INT_KEYSTUCK 128 +#define AB8505_INT_IKR 129 +#define AB8505_INT_IKP 130 +#define AB8505_INT_KP 131 +#define AB8505_INT_KEYDEGLITCH 132 +#define AB8505_INT_MODPWRSTATUSF 134 +#define AB8505_INT_MODPWRSTATUSR 135 /* * AB8500_AB9540_NR_IRQS is used when configuring the IRQ numbers for the @@ -203,8 +211,8 @@ enum ab8500_version { * which is larger. */ #define AB8500_NR_IRQS 112 -#define AB8505_NR_IRQS 128 -#define AB9540_NR_IRQS 128 +#define AB8505_NR_IRQS 136 +#define AB9540_NR_IRQS 136 /* This is set to the roof of any AB8500 chip variant IRQ counts */ #define AB8500_MAX_NR_IRQS AB9540_NR_IRQS -- cgit v0.10.2 From 7e82d6ff5d2c5e35d1fcb8c673287f7d780a13bb Mon Sep 17 00:00:00 2001 From: Rabin Vincent Date: Tue, 17 Apr 2012 09:30:24 +0200 Subject: mfd: Handle the ab8500 irq for suspend/resume Ensure that the AB interrupt is only handled at a time when all core drivers are resumed. Ensure that the AB interrupt is marked as a wakeup interrupt. Signed-off-by: Rabin Vincent Reviewed-by: Jonas Aberg Reviewed-by: Mattias Wallin Signed-off-by: Linus Walleij Signed-off-by: Samuel Ortiz diff --git a/drivers/mfd/ab8500-i2c.c b/drivers/mfd/ab8500-i2c.c index b83045f..89b31a3 100644 --- a/drivers/mfd/ab8500-i2c.c +++ b/drivers/mfd/ab8500-i2c.c @@ -101,10 +101,42 @@ static const struct platform_device_id ab8500_id[] = { { } }; +#ifdef CONFIG_PM +static int ab8500_i2c_suspend(struct device *dev) +{ + struct ab8500 *ab = dev_get_drvdata(dev); + + disable_irq(ab->irq); + enable_irq_wake(ab->irq); + + return 0; +} + +static int ab8500_i2c_resume(struct device *dev) +{ + struct ab8500 *ab = dev_get_drvdata(dev); + + disable_irq_wake(ab->irq); + enable_irq(ab->irq); + + return 0; +} + +static const struct dev_pm_ops ab8500_i2c_pm_ops = { + .suspend = ab8500_i2c_suspend, + .resume = ab8500_i2c_resume, +}; + +#define AB8500_I2C_PM_OPS (&ab8500_i2c_pm_ops) +#else +#define AB8500_I2C_PM_OPS NULL +#endif + static struct platform_driver ab8500_i2c_driver = { .driver = { .name = "ab8500-i2c", .owner = THIS_MODULE, + .pm = AB8500_I2C_PM_OPS, }, .probe = ab8500_i2c_probe, .remove = __devexit_p(ab8500_i2c_remove), -- cgit v0.10.2 From 112a80d29b529d4057777ac2cb4ec15ff5b6d210 Mon Sep 17 00:00:00 2001 From: Jonas Aaberg Date: Tue, 17 Apr 2012 09:30:33 +0200 Subject: mfd: Deny ab8500 suspend if i2c transfer is ongoing If we are in the middle of an I2C transfer we need to deny suspend of the AB8500 core. Implement an atomic reference counter for the I2C operations to make sure we don't do this. Signed-off-by: Jonas Aaberg Reviewed-by: Mattias Wallin Signed-off-by: Linus Walleij Signed-off-by: Samuel Ortiz diff --git a/drivers/mfd/ab8500-core.c b/drivers/mfd/ab8500-core.c index ae67612..eee9560 100644 --- a/drivers/mfd/ab8500-core.c +++ b/drivers/mfd/ab8500-core.c @@ -161,9 +161,13 @@ static int set_register_interruptible(struct ab8500 *ab8500, u8 bank, static int ab8500_set_register(struct device *dev, u8 bank, u8 reg, u8 value) { + int ret; struct ab8500 *ab8500 = dev_get_drvdata(dev->parent); - return set_register_interruptible(ab8500, bank, reg, value); + atomic_inc(&ab8500->transfer_ongoing); + ret = set_register_interruptible(ab8500, bank, reg, value); + atomic_dec(&ab8500->transfer_ongoing); + return ret; } static int get_register_interruptible(struct ab8500 *ab8500, u8 bank, @@ -192,9 +196,13 @@ static int get_register_interruptible(struct ab8500 *ab8500, u8 bank, static int ab8500_get_register(struct device *dev, u8 bank, u8 reg, u8 *value) { + int ret; struct ab8500 *ab8500 = dev_get_drvdata(dev->parent); - return get_register_interruptible(ab8500, bank, reg, value); + atomic_inc(&ab8500->transfer_ongoing); + ret = get_register_interruptible(ab8500, bank, reg, value); + atomic_dec(&ab8500->transfer_ongoing); + return ret; } static int mask_and_set_register_interruptible(struct ab8500 *ab8500, u8 bank, @@ -241,11 +249,14 @@ out: static int ab8500_mask_and_set_register(struct device *dev, u8 bank, u8 reg, u8 bitmask, u8 bitvalues) { + int ret; struct ab8500 *ab8500 = dev_get_drvdata(dev->parent); - return mask_and_set_register_interruptible(ab8500, bank, reg, - bitmask, bitvalues); - + atomic_inc(&ab8500->transfer_ongoing); + ret= mask_and_set_register_interruptible(ab8500, bank, reg, + bitmask, bitvalues); + atomic_dec(&ab8500->transfer_ongoing); + return ret; } static struct abx500_ops ab8500_ops = { @@ -264,6 +275,7 @@ static void ab8500_irq_lock(struct irq_data *data) struct ab8500 *ab8500 = irq_data_get_irq_chip_data(data); mutex_lock(&ab8500->irq_lock); + atomic_inc(&ab8500->transfer_ongoing); } static void ab8500_irq_sync_unlock(struct irq_data *data) @@ -292,7 +304,7 @@ static void ab8500_irq_sync_unlock(struct irq_data *data) reg = AB8500_IT_MASK1_REG + ab8500->irq_reg_offset[i]; set_register_interruptible(ab8500, AB8500_INTERRUPT, reg, new); } - + atomic_dec(&ab8500->transfer_ongoing); mutex_unlock(&ab8500->irq_lock); } @@ -332,6 +344,8 @@ static irqreturn_t ab8500_irq(int irq, void *dev) dev_vdbg(ab8500->dev, "interrupt\n"); + atomic_inc(&ab8500->transfer_ongoing); + for (i = 0; i < ab8500->mask_size; i++) { int regoffset = ab8500->irq_reg_offset[i]; int status; @@ -355,9 +369,10 @@ static irqreturn_t ab8500_irq(int irq, void *dev) handle_nested_irq(ab8500->irq_base + line); value &= ~(1 << bit); + } while (value); } - + atomic_dec(&ab8500->transfer_ongoing); return IRQ_HANDLED; } @@ -411,6 +426,14 @@ static void ab8500_irq_remove(struct ab8500 *ab8500) } } +int ab8500_suspend(struct ab8500 *ab8500) +{ + if (atomic_read(&ab8500->transfer_ongoing)) + return -EINVAL; + else + return 0; +} + /* AB8500 GPIO Resources */ static struct resource __devinitdata ab8500_gpio_resources[] = { { @@ -1059,6 +1082,7 @@ int __devinit ab8500_init(struct ab8500 *ab8500, enum ab8500_version version) mutex_init(&ab8500->lock); mutex_init(&ab8500->irq_lock); + atomic_set(&ab8500->transfer_ongoing, 0); if (version != AB8500_VERSION_UNDEFINED) ab8500->version = version; diff --git a/include/linux/mfd/abx500/ab8500.h b/include/linux/mfd/abx500/ab8500.h index d798f5b..91dd3ef 100644 --- a/include/linux/mfd/abx500/ab8500.h +++ b/include/linux/mfd/abx500/ab8500.h @@ -7,6 +7,7 @@ #ifndef MFD_AB8500_H #define MFD_AB8500_H +#include #include struct device; @@ -224,6 +225,7 @@ enum ab8500_version { * @dev: parent device * @lock: read/write operations lock * @irq_lock: genirq bus lock + * @transfer_ongoing: 0 if no transfer ongoing * @irq: irq line * @version: chip version id (e.g. ab8500 or ab9540) * @chip_id: chip revision id @@ -242,7 +244,7 @@ struct ab8500 { struct device *dev; struct mutex lock; struct mutex irq_lock; - + atomic_t transfer_ongoing; int irq_base; int irq; enum ab8500_version version; @@ -288,6 +290,8 @@ extern int __devinit ab8500_init(struct ab8500 *ab8500, enum ab8500_version version); extern int __devexit ab8500_exit(struct ab8500 *ab8500); +extern int ab8500_suspend(struct ab8500 *ab8500); + static inline int is_ab8500(struct ab8500 *ab) { return ab->version == AB8500_VERSION_AB8500; -- cgit v0.10.2 From 6ef9418c9e6fc41e6c1066b6423f3a1625e4b822 Mon Sep 17 00:00:00 2001 From: Rickard Andersson Date: Tue, 17 Apr 2012 09:30:57 +0200 Subject: mfd: Add parameter to disable ab8500 battery management This patch makes it possible to disable battery management via a module boot parameter. When 'ab8500-core.no_bm=1' then ab8500_btemp, ab8500_chargalg, ab8500_charger and ab8500_fg will not be probed. This boot parameter is used for scripted testing of the system. Signed-off-by: Rickard Andersson Reviewed-by: Jonas Aberg Signed-off-by: Linus Walleij Signed-off-by: Samuel Ortiz diff --git a/drivers/mfd/ab8500-core.c b/drivers/mfd/ab8500-core.c index eee9560..08850f0 100644 --- a/drivers/mfd/ab8500-core.c +++ b/drivers/mfd/ab8500-core.c @@ -97,6 +97,9 @@ #define AB8500_TURN_ON_STATUS 0x00 +static bool no_bm; /* No battery management */ +module_param(no_bm, bool, S_IRUGO); + #define AB9540_MODEM_CTRL2_REG 0x23 #define AB9540_MODEM_CTRL2_SWDBBRSTN_BIT BIT(2) @@ -834,26 +837,6 @@ static struct mfd_cell __devinitdata abx500_common_devs[] = { .resources = ab8500_rtc_resources, }, { - .name = "ab8500-charger", - .num_resources = ARRAY_SIZE(ab8500_charger_resources), - .resources = ab8500_charger_resources, - }, - { - .name = "ab8500-btemp", - .num_resources = ARRAY_SIZE(ab8500_btemp_resources), - .resources = ab8500_btemp_resources, - }, - { - .name = "ab8500-fg", - .num_resources = ARRAY_SIZE(ab8500_fg_resources), - .resources = ab8500_fg_resources, - }, - { - .name = "ab8500-chargalg", - .num_resources = ARRAY_SIZE(ab8500_chargalg_resources), - .resources = ab8500_chargalg_resources, - }, - { .name = "ab8500-acc-det", .num_resources = ARRAY_SIZE(ab8500_av_acc_detect_resources), .resources = ab8500_av_acc_detect_resources, @@ -886,6 +869,29 @@ static struct mfd_cell __devinitdata abx500_common_devs[] = { }, }; +static struct mfd_cell __devinitdata ab8500_bm_devs[] = { + { + .name = "ab8500-charger", + .num_resources = ARRAY_SIZE(ab8500_charger_resources), + .resources = ab8500_charger_resources, + }, + { + .name = "ab8500-btemp", + .num_resources = ARRAY_SIZE(ab8500_btemp_resources), + .resources = ab8500_btemp_resources, + }, + { + .name = "ab8500-fg", + .num_resources = ARRAY_SIZE(ab8500_fg_resources), + .resources = ab8500_fg_resources, + }, + { + .name = "ab8500-chargalg", + .num_resources = ARRAY_SIZE(ab8500_chargalg_resources), + .resources = ab8500_chargalg_resources, + }, +}; + static struct mfd_cell __devinitdata ab8500_devs[] = { { .name = "ab8500-gpio", @@ -1204,6 +1210,15 @@ int __devinit ab8500_init(struct ab8500 *ab8500, enum ab8500_version version) if (ret) goto out_freeirq; + if (!no_bm) { + /* Add battery management devices */ + ret = mfd_add_devices(ab8500->dev, 0, ab8500_bm_devs, + ARRAY_SIZE(ab8500_bm_devs), NULL, + ab8500->irq_base); + if (ret) + dev_err(ab8500->dev, "error adding bm devices\n"); + } + if (is_ab9540(ab8500)) ret = sysfs_create_group(&ab8500->dev->kobj, &ab9540_attr_group); -- cgit v0.10.2 From 3524b5d1edbcaf0aae9196ac942d8307624ff3f0 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 27 Mar 2012 08:40:34 +0530 Subject: mmc: sdhci-spear: No need to check 'pdev == NULL' in probe pdev is guaranteed to be valid in probe. And so check for non-NULL is not required. Remove it. Signed-off-by: Viresh Kumar Signed-off-by: Chris Ball diff --git a/drivers/mmc/host/sdhci-spear.c b/drivers/mmc/host/sdhci-spear.c index 6dfa82e..15191c1 100644 --- a/drivers/mmc/host/sdhci-spear.c +++ b/drivers/mmc/host/sdhci-spear.c @@ -75,8 +75,6 @@ static int __devinit sdhci_probe(struct platform_device *pdev) struct spear_sdhci *sdhci; int ret; - BUG_ON(pdev == NULL); - iomem = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (!iomem) { ret = -ENOMEM; -- cgit v0.10.2 From 6ebaf8f2b0f9e67ac2e00ba7af04a58b39312b3c Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 27 Mar 2012 08:40:35 +0530 Subject: mmc: sdhci-spear: Use devm_* derivatives This patch replaces normal calls to resource allocation routines with devm_*() derivative of those routines. This removes the need to free those resources inside the driver. Signed-off-by: Viresh Kumar Signed-off-by: Chris Ball diff --git a/drivers/mmc/host/sdhci-spear.c b/drivers/mmc/host/sdhci-spear.c index 15191c1..1fe32df 100644 --- a/drivers/mmc/host/sdhci-spear.c +++ b/drivers/mmc/host/sdhci-spear.c @@ -82,18 +82,18 @@ static int __devinit sdhci_probe(struct platform_device *pdev) goto err; } - if (!request_mem_region(iomem->start, resource_size(iomem), - "spear-sdhci")) { + if (!devm_request_mem_region(&pdev->dev, iomem->start, + resource_size(iomem), "spear-sdhci")) { ret = -EBUSY; dev_dbg(&pdev->dev, "cannot request region\n"); goto err; } - sdhci = kzalloc(sizeof(*sdhci), GFP_KERNEL); + sdhci = devm_kzalloc(&pdev->dev, sizeof(*sdhci), GFP_KERNEL); if (!sdhci) { ret = -ENOMEM; dev_dbg(&pdev->dev, "cannot allocate memory for sdhci\n"); - goto err_kzalloc; + goto err; } /* clk enable */ @@ -101,13 +101,13 @@ static int __devinit sdhci_probe(struct platform_device *pdev) if (IS_ERR(sdhci->clk)) { ret = PTR_ERR(sdhci->clk); dev_dbg(&pdev->dev, "Error getting clock\n"); - goto err_clk_get; + goto err; } ret = clk_enable(sdhci->clk); if (ret) { dev_dbg(&pdev->dev, "Error enabling clock\n"); - goto err_clk_enb; + goto put_clk; } /* overwrite platform_data */ @@ -122,7 +122,7 @@ static int __devinit sdhci_probe(struct platform_device *pdev) if (IS_ERR(host)) { ret = PTR_ERR(host); dev_dbg(&pdev->dev, "error allocating host\n"); - goto err_alloc_host; + goto disable_clk; } host->hw_name = "sdhci"; @@ -130,17 +130,18 @@ static int __devinit sdhci_probe(struct platform_device *pdev) host->irq = platform_get_irq(pdev, 0); host->quirks = SDHCI_QUIRK_BROKEN_ADMA; - host->ioaddr = ioremap(iomem->start, resource_size(iomem)); + host->ioaddr = devm_ioremap(&pdev->dev, iomem->start, + resource_size(iomem)); if (!host->ioaddr) { ret = -ENOMEM; dev_dbg(&pdev->dev, "failed to remap registers\n"); - goto err_ioremap; + goto free_host; } ret = sdhci_add_host(host); if (ret) { dev_dbg(&pdev->dev, "error adding host\n"); - goto err_add_host; + goto free_host; } platform_set_drvdata(pdev, host); @@ -159,11 +160,12 @@ static int __devinit sdhci_probe(struct platform_device *pdev) if (sdhci->data->card_power_gpio >= 0) { int val = 0; - ret = gpio_request(sdhci->data->card_power_gpio, "sdhci"); + ret = devm_gpio_request(&pdev->dev, + sdhci->data->card_power_gpio, "sdhci"); if (ret < 0) { dev_dbg(&pdev->dev, "gpio request fail: %d\n", sdhci->data->card_power_gpio); - goto err_pgpio_request; + goto set_drvdata; } if (sdhci->data->power_always_enb) @@ -175,60 +177,48 @@ static int __devinit sdhci_probe(struct platform_device *pdev) if (ret) { dev_dbg(&pdev->dev, "gpio set direction fail: %d\n", sdhci->data->card_power_gpio); - goto err_pgpio_direction; + goto set_drvdata; } } if (sdhci->data->card_int_gpio >= 0) { - ret = gpio_request(sdhci->data->card_int_gpio, "sdhci"); + ret = devm_gpio_request(&pdev->dev, sdhci->data->card_int_gpio, + "sdhci"); if (ret < 0) { dev_dbg(&pdev->dev, "gpio request fail: %d\n", sdhci->data->card_int_gpio); - goto err_igpio_request; + goto set_drvdata; } ret = gpio_direction_input(sdhci->data->card_int_gpio); if (ret) { dev_dbg(&pdev->dev, "gpio set direction fail: %d\n", sdhci->data->card_int_gpio); - goto err_igpio_direction; + goto set_drvdata; } - ret = request_irq(gpio_to_irq(sdhci->data->card_int_gpio), + ret = devm_request_irq(&pdev->dev, + gpio_to_irq(sdhci->data->card_int_gpio), sdhci_gpio_irq, IRQF_TRIGGER_LOW, mmc_hostname(host->mmc), pdev); if (ret) { dev_dbg(&pdev->dev, "gpio request irq fail: %d\n", sdhci->data->card_int_gpio); - goto err_igpio_request_irq; + goto set_drvdata; } } return 0; -err_igpio_request_irq: -err_igpio_direction: - if (sdhci->data->card_int_gpio >= 0) - gpio_free(sdhci->data->card_int_gpio); -err_igpio_request: -err_pgpio_direction: - if (sdhci->data->card_power_gpio >= 0) - gpio_free(sdhci->data->card_power_gpio); -err_pgpio_request: +set_drvdata: platform_set_drvdata(pdev, NULL); sdhci_remove_host(host, 1); -err_add_host: - iounmap(host->ioaddr); -err_ioremap: +free_host: sdhci_free_host(host); -err_alloc_host: +disable_clk: clk_disable(sdhci->clk); -err_clk_enb: +put_clk: clk_put(sdhci->clk); -err_clk_get: - kfree(sdhci); -err_kzalloc: - release_mem_region(iomem->start, resource_size(iomem)); err: dev_err(&pdev->dev, "spear-sdhci probe failed: %d\n", ret); return ret; @@ -237,35 +227,19 @@ err: static int __devexit sdhci_remove(struct platform_device *pdev) { struct sdhci_host *host = platform_get_drvdata(pdev); - struct resource *iomem = platform_get_resource(pdev, IORESOURCE_MEM, 0); struct spear_sdhci *sdhci = dev_get_platdata(&pdev->dev); - int dead; + int dead = 0; u32 scratch; - if (sdhci->data) { - if (sdhci->data->card_int_gpio >= 0) { - free_irq(gpio_to_irq(sdhci->data->card_int_gpio), pdev); - gpio_free(sdhci->data->card_int_gpio); - } - - if (sdhci->data->card_power_gpio >= 0) - gpio_free(sdhci->data->card_power_gpio); - } - platform_set_drvdata(pdev, NULL); - dead = 0; scratch = readl(host->ioaddr + SDHCI_INT_STATUS); if (scratch == (u32)-1) dead = 1; sdhci_remove_host(host, dead); - iounmap(host->ioaddr); sdhci_free_host(host); clk_disable(sdhci->clk); clk_put(sdhci->clk); - kfree(sdhci); - if (iomem) - release_mem_region(iomem->start, resource_size(iomem)); return 0; } -- cgit v0.10.2 From 9146ab5055152bbacb5690c384df2fd610fb3c68 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 1 May 2012 11:21:43 -0400 Subject: NFS: Read cleanups Remove unused variables, and reformat some code. Signed-off-by: Trond Myklebust Cc: Fred Isaman diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 35e2dce..20a0293 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -341,8 +341,6 @@ static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc, struct nfs_read_data *data; size_t rsize = desc->pg_bsize, nbytes; unsigned int offset; - int requests = 0; - int ret = 0; nfs_list_remove_request(req); nfs_list_add_request(req, &hdr->pages); @@ -358,12 +356,11 @@ static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc, data->pages.pagevec[0] = page; nfs_read_rpcsetup(data, len, offset); list_add(&data->list, &hdr->rpc_list); - requests++; nbytes -= len; offset += len; - } while(nbytes != 0); + } while (nbytes != 0); desc->pg_rpc_callops = &nfs_read_common_ops; - return ret; + return 0; out_bad: while (!list_empty(&hdr->rpc_list)) { data = list_first_entry(&hdr->rpc_list, struct nfs_read_data, list); @@ -387,8 +384,7 @@ static int nfs_pagein_one(struct nfs_pageio_descriptor *desc, desc->pg_count)); if (!data) { desc->pg_completion_ops->error_cleanup(head); - ret = -ENOMEM; - goto out; + return -ENOMEM; } pages = data->pages.pagevec; @@ -402,8 +398,7 @@ static int nfs_pagein_one(struct nfs_pageio_descriptor *desc, nfs_read_rpcsetup(data, desc->pg_count, 0); list_add(&data->list, &hdr->rpc_list); desc->pg_rpc_callops = &nfs_read_common_ops; -out: - return ret; + return 0; } int nfs_generic_pagein(struct nfs_pageio_descriptor *desc, -- cgit v0.10.2 From 25b11dcdbfcad69a5ec03265e2dce19e5eca936b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 1 May 2012 12:07:22 -0400 Subject: NFS: Clean up nfs read and write error paths Move the error handling for nfs_generic_pagein() into a single function. Ditto for nfs_generic_flush(). Signed-off-by: Trond Myklebust Cc: Fred Isaman diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 39cbac5..6fdeca2 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1321,7 +1321,6 @@ pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) if (ret != 0) { put_lseg(desc->pg_lseg); desc->pg_lseg = NULL; - set_bit(NFS_IOHDR_REDO, &hdr->flags); } else pnfs_do_multiple_writes(desc, &hdr->rpc_list, desc->pg_ioflags); if (atomic_dec_and_test(&hdr->refcnt)) @@ -1476,7 +1475,6 @@ pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) if (ret != 0) { put_lseg(desc->pg_lseg); desc->pg_lseg = NULL; - set_bit(NFS_IOHDR_REDO, &hdr->flags); } else pnfs_do_multiple_reads(desc, &hdr->rpc_list); if (atomic_dec_and_test(&hdr->refcnt)) diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 20a0293..1961a19 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -320,6 +320,19 @@ static const struct nfs_pgio_completion_ops nfs_async_read_completion_ops = { .completion = nfs_read_completion, }; +static void nfs_pagein_error(struct nfs_pageio_descriptor *desc, + struct nfs_pgio_header *hdr) +{ + set_bit(NFS_IOHDR_REDO, &hdr->flags); + while (!list_empty(&hdr->rpc_list)) { + struct nfs_read_data *data = list_first_entry(&hdr->rpc_list, + struct nfs_read_data, list); + list_del(&data->list); + nfs_readdata_release(data); + } + desc->pg_completion_ops->error_cleanup(&desc->pg_list); +} + /* * Generate multiple requests to fill a single page. * @@ -342,33 +355,27 @@ static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc, size_t rsize = desc->pg_bsize, nbytes; unsigned int offset; - nfs_list_remove_request(req); - nfs_list_add_request(req, &hdr->pages); - offset = 0; nbytes = desc->pg_count; do { size_t len = min(nbytes,rsize); data = nfs_readdata_alloc(hdr, 1); - if (!data) - goto out_bad; + if (!data) { + nfs_pagein_error(desc, hdr); + return -ENOMEM; + } data->pages.pagevec[0] = page; nfs_read_rpcsetup(data, len, offset); list_add(&data->list, &hdr->rpc_list); nbytes -= len; offset += len; } while (nbytes != 0); + + nfs_list_remove_request(req); + nfs_list_add_request(req, &hdr->pages); desc->pg_rpc_callops = &nfs_read_common_ops; return 0; -out_bad: - while (!list_empty(&hdr->rpc_list)) { - data = list_first_entry(&hdr->rpc_list, struct nfs_read_data, list); - list_del(&data->list); - nfs_readdata_release(data); - } - desc->pg_completion_ops->error_cleanup(&hdr->pages); - return -ENOMEM; } static int nfs_pagein_one(struct nfs_pageio_descriptor *desc, @@ -378,12 +385,11 @@ static int nfs_pagein_one(struct nfs_pageio_descriptor *desc, struct page **pages; struct nfs_read_data *data; struct list_head *head = &desc->pg_list; - int ret = 0; data = nfs_readdata_alloc(hdr, nfs_page_array_len(desc->pg_base, desc->pg_count)); if (!data) { - desc->pg_completion_ops->error_cleanup(head); + nfs_pagein_error(desc, hdr); return -ENOMEM; } @@ -427,8 +433,6 @@ static int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) if (ret == 0) ret = nfs_do_multiple_reads(&hdr->rpc_list, desc->pg_rpc_callops); - else - set_bit(NFS_IOHDR_REDO, &hdr->flags); if (atomic_dec_and_test(&hdr->refcnt)) hdr->completion_ops->completion(hdr); return ret; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 2f80aa5..d1e4f81 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1058,6 +1058,19 @@ static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops = { .completion = nfs_write_completion, }; +static void nfs_flush_error(struct nfs_pageio_descriptor *desc, + struct nfs_pgio_header *hdr) +{ + set_bit(NFS_IOHDR_REDO, &hdr->flags); + while (!list_empty(&hdr->rpc_list)) { + struct nfs_write_data *data = list_first_entry(&hdr->rpc_list, + struct nfs_write_data, list); + list_del(&data->list); + nfs_writedata_release(data); + } + desc->pg_completion_ops->error_cleanup(&desc->pg_list); +} + /* * Generate multiple small requests to write out a single * contiguous dirty area on one page. @@ -1071,12 +1084,9 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, size_t wsize = desc->pg_bsize, nbytes; unsigned int offset; int requests = 0; - int ret = 0; struct nfs_commit_info cinfo; nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq); - nfs_list_remove_request(req); - nfs_list_add_request(req, &hdr->pages); if ((desc->pg_ioflags & FLUSH_COND_STABLE) && (desc->pg_moreio || nfs_reqs_to_commit(&cinfo) || @@ -1090,8 +1100,10 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, size_t len = min(nbytes, wsize); data = nfs_writedata_alloc(hdr, 1); - if (!data) - goto out_bad; + if (!data) { + nfs_flush_error(desc, hdr); + return -ENOMEM; + } data->pages.pagevec[0] = page; nfs_write_rpcsetup(data, len, offset, desc->pg_ioflags, &cinfo); list_add(&data->list, &hdr->rpc_list); @@ -1099,17 +1111,10 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, nbytes -= len; offset += len; } while (nbytes != 0); + nfs_list_remove_request(req); + nfs_list_add_request(req, &hdr->pages); desc->pg_rpc_callops = &nfs_write_common_ops; - return ret; - -out_bad: - while (!list_empty(&hdr->rpc_list)) { - data = list_first_entry(&hdr->rpc_list, struct nfs_write_data, list); - list_del(&data->list); - nfs_writedata_release(data); - } - desc->pg_completion_ops->error_cleanup(&hdr->pages); - return -ENOMEM; + return 0; } /* @@ -1127,15 +1132,13 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc, struct page **pages; struct nfs_write_data *data; struct list_head *head = &desc->pg_list; - int ret = 0; struct nfs_commit_info cinfo; data = nfs_writedata_alloc(hdr, nfs_page_array_len(desc->pg_base, desc->pg_count)); if (!data) { - desc->pg_completion_ops->error_cleanup(head); - ret = -ENOMEM; - goto out; + nfs_flush_error(desc, hdr); + return -ENOMEM; } nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq); @@ -1155,8 +1158,7 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc, nfs_write_rpcsetup(data, desc->pg_count, 0, desc->pg_ioflags, &cinfo); list_add(&data->list, &hdr->rpc_list); desc->pg_rpc_callops = &nfs_write_common_ops; -out: - return ret; + return 0; } int nfs_generic_flush(struct nfs_pageio_descriptor *desc, @@ -1186,8 +1188,6 @@ static int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) ret = nfs_do_multiple_writes(&hdr->rpc_list, desc->pg_rpc_callops, desc->pg_ioflags); - else - set_bit(NFS_IOHDR_REDO, &hdr->flags); if (atomic_dec_and_test(&hdr->refcnt)) hdr->completion_ops->completion(hdr); return ret; -- cgit v0.10.2 From 4bd8b010136afa0df9122a08bad361686bda0a1d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 1 May 2012 12:49:58 -0400 Subject: NFS: Simplify the nfs_read_completion functions Signed-off-by: Trond Myklebust Cc: Fred Isaman diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index f17e469..aab3016 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -243,36 +243,28 @@ static void nfs_direct_read_completion(struct nfs_pgio_header *hdr) dreq->count += hdr->good_bytes; spin_unlock(&dreq->lock); - if (!test_bit(NFS_IOHDR_ERROR, &hdr->flags)) { - while (!list_empty(&hdr->pages)) { - struct nfs_page *req = nfs_list_entry(hdr->pages.next); - struct page *page = req->wb_page; - - if (test_bit(NFS_IOHDR_EOF, &hdr->flags)) { - if (bytes > hdr->good_bytes) - zero_user(page, 0, PAGE_SIZE); - else if (hdr->good_bytes - bytes < PAGE_SIZE) - zero_user_segment(page, - hdr->good_bytes & ~PAGE_MASK, - PAGE_SIZE); - } - bytes += req->wb_bytes; - nfs_list_remove_request(req); - if (!PageCompound(page)) - set_page_dirty(page); - nfs_direct_readpage_release(req); + while (!list_empty(&hdr->pages)) { + struct nfs_page *req = nfs_list_entry(hdr->pages.next); + struct page *page = req->wb_page; + + if (test_bit(NFS_IOHDR_EOF, &hdr->flags)) { + if (bytes > hdr->good_bytes) + zero_user(page, 0, PAGE_SIZE); + else if (hdr->good_bytes - bytes < PAGE_SIZE) + zero_user_segment(page, + hdr->good_bytes & ~PAGE_MASK, + PAGE_SIZE); } - } else { - while (!list_empty(&hdr->pages)) { - struct nfs_page *req = nfs_list_entry(hdr->pages.next); - - if (bytes < hdr->good_bytes) - if (!PageCompound(req->wb_page)) - set_page_dirty(req->wb_page); - bytes += req->wb_bytes; - nfs_list_remove_request(req); - nfs_direct_readpage_release(req); + if (!PageCompound(page)) { + if (test_bit(NFS_IOHDR_ERROR, &hdr->flags)) { + if (bytes < hdr->good_bytes) + set_page_dirty(page); + } else + set_page_dirty(page); } + bytes += req->wb_bytes; + nfs_list_remove_request(req); + nfs_direct_readpage_release(req); } out_put: if (put_dreq(dreq)) diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 1961a19..37c9eb2 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -179,34 +179,26 @@ static void nfs_read_completion(struct nfs_pgio_header *hdr) if (test_bit(NFS_IOHDR_REDO, &hdr->flags)) goto out; - if (!test_bit(NFS_IOHDR_ERROR, &hdr->flags)) { - while (!list_empty(&hdr->pages)) { - struct nfs_page *req = nfs_list_entry(hdr->pages.next); - struct page *page = req->wb_page; - - if (test_bit(NFS_IOHDR_EOF, &hdr->flags)) { - if (bytes > hdr->good_bytes) - zero_user(page, 0, PAGE_SIZE); - else if (hdr->good_bytes - bytes < PAGE_SIZE) - zero_user_segment(page, - hdr->good_bytes & ~PAGE_MASK, - PAGE_SIZE); - } - SetPageUptodate(page); - nfs_list_remove_request(req); - nfs_readpage_release(req); - bytes += PAGE_SIZE; + while (!list_empty(&hdr->pages)) { + struct nfs_page *req = nfs_list_entry(hdr->pages.next); + struct page *page = req->wb_page; + + if (test_bit(NFS_IOHDR_EOF, &hdr->flags)) { + if (bytes > hdr->good_bytes) + zero_user(page, 0, PAGE_SIZE); + else if (hdr->good_bytes - bytes < PAGE_SIZE) + zero_user_segment(page, + hdr->good_bytes & ~PAGE_MASK, + PAGE_SIZE); } - } else { - while (!list_empty(&hdr->pages)) { - struct nfs_page *req = nfs_list_entry(hdr->pages.next); - - bytes += req->wb_bytes; + bytes += req->wb_bytes; + if (test_bit(NFS_IOHDR_ERROR, &hdr->flags)) { if (bytes <= hdr->good_bytes) - SetPageUptodate(req->wb_page); - nfs_list_remove_request(req); - nfs_readpage_release(req); - } + SetPageUptodate(page); + } else + SetPageUptodate(page); + nfs_list_remove_request(req); + nfs_readpage_release(req); } out: hdr->release(hdr); -- cgit v0.10.2 From a9f6991b6cd3f55aa8482633337cd811d84d0dd8 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 27 Apr 2012 13:48:17 -0400 Subject: NFSv4: Fix a typo in NFS4_enc_link_sz Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 4c3cc0e..fe61424 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -626,9 +626,9 @@ static int nfs4_stat_to_errno(int); encode_savefh_maxsz + \ encode_putfh_maxsz + \ encode_link_maxsz + \ - decode_getattr_maxsz + \ + encode_getattr_maxsz + \ encode_restorefh_maxsz + \ - decode_getattr_maxsz) + encode_getattr_maxsz) #define NFS4_dec_link_sz (compound_decode_hdr_maxsz + \ decode_sequence_maxsz + \ decode_putfh_maxsz + \ -- cgit v0.10.2 From 9e907fec6ef7705ba07e22f034dacf102d29a538 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 27 Apr 2012 13:48:17 -0400 Subject: NFSv4: Delegreturn only needs the cache consistency bitmask In order to do close-to-open cache consistency checking after a delegreturn, we don't need to retrieve the full set of attributes. Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 1780391..111a3cc 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4117,7 +4117,7 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co nfs41_init_sequence(&data->args.seq_args, &data->res.seq_res, 1); data->args.fhandle = &data->fh; data->args.stateid = &data->stateid; - data->args.bitmask = server->attr_bitmask; + data->args.bitmask = server->cache_consistency_bitmask; nfs_copy_fh(&data->fh, NFS_FH(inode)); nfs4_stateid_copy(&data->stateid, stateid); data->res.fattr = &data->fattr; -- cgit v0.10.2 From e144cbcc251f16c1a14b9256cda73ab4aebe933a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 28 Apr 2012 16:05:03 -0400 Subject: NFSv4: Retrieve attributes _before_ calling delegreturn In order to retrieve cache consistency attributes before anyone else has a chance to change the inode, we need to put the GETATTR op _before_ the DELEGRETURN op. We can then use that as part of a 'nfs_post_op_update_inode_force_wcc()' call, to ensure that we update the attributes without clearing our cached data. Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 111a3cc..2e0fbff 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4138,9 +4138,10 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co if (status != 0) goto out; status = data->rpc_status; - if (status != 0) - goto out; - nfs_refresh_inode(inode, &data->fattr); + if (status == 0) + nfs_post_op_update_inode_force_wcc(inode, &data->fattr); + else + nfs_refresh_inode(inode, &data->fattr); out: rpc_put_task(task); return status; diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index fe61424..ac7a3b0 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -2602,8 +2602,8 @@ static void nfs4_xdr_enc_delegreturn(struct rpc_rqst *req, encode_compound_hdr(xdr, req, &hdr); encode_sequence(xdr, &args->seq_args, &hdr); encode_putfh(xdr, args->fhandle, &hdr); - encode_delegreturn(xdr, args->stateid, &hdr); encode_getfattr(xdr, args->bitmask, &hdr); + encode_delegreturn(xdr, args->stateid, &hdr); encode_nops(&hdr); } @@ -6527,10 +6527,10 @@ static int nfs4_xdr_dec_delegreturn(struct rpc_rqst *rqstp, status = decode_putfh(xdr); if (status != 0) goto out; - status = decode_delegreturn(xdr); + status = decode_getfattr(xdr, res->fattr, res->server); if (status != 0) goto out; - decode_getfattr(xdr, res->fattr, res->server); + status = decode_delegreturn(xdr); out: return status; } -- cgit v0.10.2 From b4b1eadf7c5f00636500ad47f68edc0666e63ea5 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 29 Apr 2012 11:23:50 -0400 Subject: NFS: Don't force page cache revalidations when holding a delegation If we're holding a delegation, then we already know that our page cache is valid. Signed-off-by: Trond Myklebust diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 59a12c6..fed27c0 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -870,6 +870,15 @@ static int nfs_invalidate_mapping(struct inode *inode, struct address_space *map return 0; } +static bool nfs_mapping_need_revalidate_inode(struct inode *inode) +{ + if (nfs_have_delegated_attributes(inode)) + return false; + return (NFS_I(inode)->cache_validity & NFS_INO_REVAL_PAGECACHE) + || nfs_attribute_timeout(inode) + || NFS_STALE(inode); +} + /** * nfs_revalidate_mapping - Revalidate the pagecache * @inode - pointer to host inode @@ -880,9 +889,7 @@ int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping) struct nfs_inode *nfsi = NFS_I(inode); int ret = 0; - if ((nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE) - || nfs_attribute_cache_expired(inode) - || NFS_STALE(inode)) { + if (nfs_mapping_need_revalidate_inode(inode)) { ret = __nfs_revalidate_inode(NFS_SERVER(inode), inode); if (ret < 0) goto out; -- cgit v0.10.2 From 01da47bde78ff2149f6546a0f17e25983aaddd7b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 29 Apr 2012 12:30:19 -0400 Subject: NFS: Optimise away nfs_check_inode_attributes() when holding a delegation We already know that the attribute cache is valid. Signed-off-by: Trond Myklebust diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index fed27c0..81946e7 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -955,6 +955,8 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat unsigned long invalid = 0; + if (nfs_have_delegated_attributes(inode)) + return 0; /* Has the inode gone and changed behind our back? */ if ((fattr->valid & NFS_ATTR_FATTR_FILEID) && nfsi->fileid != fattr->fileid) return -EIO; -- cgit v0.10.2 From 8d197a568fc337c66729b289c7fa0f28c14ba5ac Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 29 Apr 2012 12:50:01 -0400 Subject: NFS: Always trust the PageUptodate flag when we have a delegation We can always use the optimal full page write if we know that we hold a delegation. Signed-off-by: Trond Myklebust diff --git a/fs/nfs/write.c b/fs/nfs/write.c index d1e4f81..6f263da 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -850,10 +850,14 @@ int nfs_flush_incompatible(struct file *file, struct page *page) * the PageUptodate() flag. In this case, we will need to turn off * write optimisations that depend on the page contents being correct. */ -static int nfs_write_pageuptodate(struct page *page, struct inode *inode) +static bool nfs_write_pageuptodate(struct page *page, struct inode *inode) { - return PageUptodate(page) && - !(NFS_I(inode)->cache_validity & (NFS_INO_REVAL_PAGECACHE|NFS_INO_INVALID_DATA)); + if (nfs_have_delegated_attributes(inode)) + goto out; + if (NFS_I(inode)->cache_validity & NFS_INO_REVAL_PAGECACHE) + return false; +out: + return PageUptodate(page) != 0; } /* -- cgit v0.10.2 From 4124bbc52118e7da6f7ad41cc247fa16f4b3f051 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 27 Apr 2012 13:48:17 -0400 Subject: NFS: Simplify nfs_fhget() If the inode is being initialised, there is no point in setting flags such as NFS_INO_INVALID_ACCESS, NFS_INO_INVALID_ACL or NFS_INO_INVALID_DATA since there are no cached access calls, acls or data caches to invalidate. Signed-off-by: Trond Myklebust diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 81946e7..8d67e5e 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -285,9 +285,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) inode->i_mode = fattr->mode; if ((fattr->valid & NFS_ATTR_FATTR_MODE) == 0 && nfs_server_capable(inode, NFS_CAP_MODE)) - nfsi->cache_validity |= NFS_INO_INVALID_ATTR - | NFS_INO_INVALID_ACCESS - | NFS_INO_INVALID_ACL; + nfsi->cache_validity |= NFS_INO_INVALID_ATTR; /* Why so? Because we want revalidate for devices/FIFOs, and * that's precisely what we have in nfs_file_inode_operations. */ @@ -337,24 +335,19 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) if (fattr->valid & NFS_ATTR_FATTR_MTIME) inode->i_mtime = fattr->mtime; else if (nfs_server_capable(inode, NFS_CAP_MTIME)) - nfsi->cache_validity |= NFS_INO_INVALID_ATTR - | NFS_INO_INVALID_DATA; + nfsi->cache_validity |= NFS_INO_INVALID_ATTR; if (fattr->valid & NFS_ATTR_FATTR_CTIME) inode->i_ctime = fattr->ctime; else if (nfs_server_capable(inode, NFS_CAP_CTIME)) - nfsi->cache_validity |= NFS_INO_INVALID_ATTR - | NFS_INO_INVALID_ACCESS - | NFS_INO_INVALID_ACL; + nfsi->cache_validity |= NFS_INO_INVALID_ATTR; if (fattr->valid & NFS_ATTR_FATTR_CHANGE) inode->i_version = fattr->change_attr; else if (nfs_server_capable(inode, NFS_CAP_CHANGE_ATTR)) - nfsi->cache_validity |= NFS_INO_INVALID_ATTR - | NFS_INO_INVALID_DATA; + nfsi->cache_validity |= NFS_INO_INVALID_ATTR; if (fattr->valid & NFS_ATTR_FATTR_SIZE) inode->i_size = nfs_size_to_loff_t(fattr->size); else nfsi->cache_validity |= NFS_INO_INVALID_ATTR - | NFS_INO_INVALID_DATA | NFS_INO_REVAL_PAGECACHE; if (fattr->valid & NFS_ATTR_FATTR_NLINK) set_nlink(inode, fattr->nlink); @@ -363,15 +356,11 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) if (fattr->valid & NFS_ATTR_FATTR_OWNER) inode->i_uid = fattr->uid; else if (nfs_server_capable(inode, NFS_CAP_OWNER)) - nfsi->cache_validity |= NFS_INO_INVALID_ATTR - | NFS_INO_INVALID_ACCESS - | NFS_INO_INVALID_ACL; + nfsi->cache_validity |= NFS_INO_INVALID_ATTR; if (fattr->valid & NFS_ATTR_FATTR_GROUP) inode->i_gid = fattr->gid; else if (nfs_server_capable(inode, NFS_CAP_OWNER_GROUP)) - nfsi->cache_validity |= NFS_INO_INVALID_ATTR - | NFS_INO_INVALID_ACCESS - | NFS_INO_INVALID_ACL; + nfsi->cache_validity |= NFS_INO_INVALID_ATTR; if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED) inode->i_blocks = fattr->du.nfs2.blocks; if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) { -- cgit v0.10.2 From 6a4506c0b56889aaa15bcf50b3c75f46a8d0a3bd Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 27 Apr 2012 13:48:17 -0400 Subject: NFS: Change attribute updates should set NFS_INO_REVAL_PAGECACHE Signed-off-by: Trond Myklebust diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 8d67e5e..9d76c0b 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1312,7 +1312,11 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) if (inode->i_version != fattr->change_attr) { dprintk("NFS: change_attr change on server for file %s/%ld\n", inode->i_sb->s_id, inode->i_ino); - invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; + invalid |= NFS_INO_INVALID_ATTR + | NFS_INO_INVALID_DATA + | NFS_INO_INVALID_ACCESS + | NFS_INO_INVALID_ACL + | NFS_INO_REVAL_PAGECACHE; if (S_ISDIR(inode->i_mode)) nfs_force_lookup_revalidate(inode); inode->i_version = fattr->change_attr; -- cgit v0.10.2 From 3a1556e8662cc425c433b463fcdae138908ca467 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 27 Apr 2012 13:48:18 -0400 Subject: NFSv2/v3: Simulate the change attribute Use the ctime to simulate a change attribute for NFSv2 and NFSv3. Signed-off-by: Trond Myklebust diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 60f7e4e..a8f8de6 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -880,7 +880,7 @@ static int nfs_init_server(struct nfs_server *server, server->options = data->options; server->caps |= NFS_CAP_HARDLINKS|NFS_CAP_SYMLINKS|NFS_CAP_FILEID| NFS_CAP_MODE|NFS_CAP_NLINK|NFS_CAP_OWNER|NFS_CAP_OWNER_GROUP| - NFS_CAP_ATIME|NFS_CAP_CTIME|NFS_CAP_MTIME; + NFS_CAP_ATIME|NFS_CAP_CTIME|NFS_CAP_MTIME|NFS_CAP_CHANGE_ATTR; if (data->rsize) server->rsize = nfs_block_size(data->rsize, NULL); diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 0fd1efa..1855e8f 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -493,3 +493,15 @@ unsigned int nfs_page_array_len(unsigned int base, size_t len) PAGE_SIZE - 1) >> PAGE_SHIFT; } +/* + * Convert a struct timespec into a 64-bit change attribute + * + * This does approximately the same thing as timespec_to_ns(), + * but for calculation efficiency, we multiply the seconds by + * 1024*1024*1024. + */ +static inline +u64 nfs_timespec_to_change_attr(const struct timespec *ts) +{ + return ((u64)ts->tv_sec << 30) + ts->tv_nsec; +} diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c index 1f56000..c99008e 100644 --- a/fs/nfs/nfs2xdr.c +++ b/fs/nfs/nfs2xdr.c @@ -313,6 +313,8 @@ static int decode_fattr(struct xdr_stream *xdr, struct nfs_fattr *fattr) p = xdr_decode_time(p, &fattr->atime); p = xdr_decode_time(p, &fattr->mtime); xdr_decode_time(p, &fattr->ctime); + fattr->change_attr = nfs_timespec_to_change_attr(&fattr->ctime); + return 0; out_overflow: print_overflow_msg(__func__, xdr); diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index 01e53e9..ee284c2 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -675,6 +675,7 @@ static int decode_fattr3(struct xdr_stream *xdr, struct nfs_fattr *fattr) p = xdr_decode_nfstime3(p, &fattr->atime); p = xdr_decode_nfstime3(p, &fattr->mtime); xdr_decode_nfstime3(p, &fattr->ctime); + fattr->change_attr = nfs_timespec_to_change_attr(&fattr->ctime); fattr->valid |= NFS_ATTR_FATTR_V3; return 0; @@ -725,12 +726,14 @@ static int decode_wcc_attr(struct xdr_stream *xdr, struct nfs_fattr *fattr) goto out_overflow; fattr->valid |= NFS_ATTR_FATTR_PRESIZE + | NFS_ATTR_FATTR_PRECHANGE | NFS_ATTR_FATTR_PREMTIME | NFS_ATTR_FATTR_PRECTIME; p = xdr_decode_size3(p, &fattr->pre_size); p = xdr_decode_nfstime3(p, &fattr->pre_mtime); xdr_decode_nfstime3(p, &fattr->pre_ctime); + fattr->pre_change_attr = nfs_timespec_to_change_attr(&fattr->pre_ctime); return 0; out_overflow: diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 6deb8f0..bc36808 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -106,14 +106,14 @@ struct nfs_fattr { | NFS_ATTR_FATTR_FILEID \ | NFS_ATTR_FATTR_ATIME \ | NFS_ATTR_FATTR_MTIME \ - | NFS_ATTR_FATTR_CTIME) + | NFS_ATTR_FATTR_CTIME \ + | NFS_ATTR_FATTR_CHANGE) #define NFS_ATTR_FATTR_V2 (NFS_ATTR_FATTR \ | NFS_ATTR_FATTR_BLOCKS_USED) #define NFS_ATTR_FATTR_V3 (NFS_ATTR_FATTR \ | NFS_ATTR_FATTR_SPACE_USED) #define NFS_ATTR_FATTR_V4 (NFS_ATTR_FATTR \ - | NFS_ATTR_FATTR_SPACE_USED \ - | NFS_ATTR_FATTR_CHANGE) + | NFS_ATTR_FATTR_SPACE_USED) /* * Info on the file system -- cgit v0.10.2 From fee7fe196c41847c135cde41b0ec790f53ee6fcf Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 27 Apr 2012 13:48:18 -0400 Subject: NFS: Simplify the cache invalidation code Now that NFSv2 and NFSv3 have simulated change attributes, instead of using all three of mtime, ctime and change attribute to manage data cache consistency, we can simplify the code to just use the change attribute. Signed-off-by: Trond Myklebust diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 9d76c0b..0d53113 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -958,7 +958,7 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat /* Verify a few of the more important attributes */ if ((fattr->valid & NFS_ATTR_FATTR_MTIME) && !timespec_equal(&inode->i_mtime, &fattr->mtime)) - invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; + invalid |= NFS_INO_INVALID_ATTR; if (fattr->valid & NFS_ATTR_FATTR_SIZE) { cur_size = i_size_read(inode); @@ -1325,38 +1325,15 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) invalid |= save_cache_validity; if (fattr->valid & NFS_ATTR_FATTR_MTIME) { - /* NFSv2/v3: Check if the mtime agrees */ - if (!timespec_equal(&inode->i_mtime, &fattr->mtime)) { - dprintk("NFS: mtime change on server for file %s/%ld\n", - inode->i_sb->s_id, inode->i_ino); - invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; - if (S_ISDIR(inode->i_mode)) - nfs_force_lookup_revalidate(inode); - memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime)); - } + memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime)); } else if (server->caps & NFS_CAP_MTIME) invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR - | NFS_INO_INVALID_DATA - | NFS_INO_REVAL_PAGECACHE | NFS_INO_REVAL_FORCED); if (fattr->valid & NFS_ATTR_FATTR_CTIME) { - /* If ctime has changed we should definitely clear access+acl caches */ - if (!timespec_equal(&inode->i_ctime, &fattr->ctime)) { - invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; - /* and probably clear data for a directory too as utimes can cause - * havoc with our cache. - */ - if (S_ISDIR(inode->i_mode)) { - invalid |= NFS_INO_INVALID_DATA; - nfs_force_lookup_revalidate(inode); - } - memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime)); - } + memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime)); } else if (server->caps & NFS_CAP_CTIME) invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR - | NFS_INO_INVALID_ACCESS - | NFS_INO_INVALID_ACL | NFS_INO_REVAL_FORCED); /* Check if our cached file size is stale */ -- cgit v0.10.2 From 90ff0c548d1220d31f80e498b587393895705e6c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 27 Apr 2012 13:48:18 -0400 Subject: NFSv4: Simplify the NFSv4 OPEN compound Get rid of the post-op GETATTR on the directory in order to reduce the amount of processing done on the server. The cost is that if we later need to stat() the directory, then we know that the ctime and mtime are likely to be invalid. Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 2e0fbff..f01c3d1 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -788,7 +788,6 @@ struct nfs4_opendata { struct nfs4_string owner_name; struct nfs4_string group_name; struct nfs_fattr f_attr; - struct nfs_fattr dir_attr; struct dentry *dir; struct dentry *dentry; struct nfs4_state_owner *owner; @@ -804,12 +803,10 @@ struct nfs4_opendata { static void nfs4_init_opendata_res(struct nfs4_opendata *p) { p->o_res.f_attr = &p->f_attr; - p->o_res.dir_attr = &p->dir_attr; p->o_res.seqid = p->o_arg.seqid; p->c_res.seqid = p->c_arg.seqid; p->o_res.server = p->o_arg.server; nfs_fattr_init(&p->f_attr); - nfs_fattr_init(&p->dir_attr); nfs_fattr_init_names(&p->f_attr, &p->owner_name, &p->group_name); } @@ -843,7 +840,6 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, p->o_arg.name = &dentry->d_name; p->o_arg.server = server; p->o_arg.bitmask = server->attr_bitmask; - p->o_arg.dir_bitmask = server->cache_consistency_bitmask; p->o_arg.claim = NFS4_OPEN_CLAIM_NULL; if (attrs != NULL && attrs->ia_valid != 0) { __be32 verf[2]; @@ -1611,8 +1607,6 @@ static int _nfs4_recover_proc_open(struct nfs4_opendata *data) nfs_fattr_map_and_free_names(NFS_SERVER(dir), &data->f_attr); - nfs_refresh_inode(dir, o_res->dir_attr); - if (o_res->rflags & NFS4_OPEN_RESULT_CONFIRM) { status = _nfs4_proc_open_confirm(data); if (status != 0) @@ -1645,11 +1639,8 @@ static int _nfs4_proc_open(struct nfs4_opendata *data) nfs_fattr_map_and_free_names(server, &data->f_attr); - if (o_arg->open_flags & O_CREAT) { + if (o_arg->open_flags & O_CREAT) update_changeattr(dir, &o_res->cinfo); - nfs_post_op_update_inode(dir, o_res->dir_attr); - } else - nfs_refresh_inode(dir, o_res->dir_attr); if ((o_res->rflags & NFS4_OPEN_RESULT_LOCKTYPE_POSIX) == 0) server->caps &= ~NFS_CAP_POSIX_LOCK; if(o_res->rflags & NFS4_OPEN_RESULT_CONFIRM) { diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index ac7a3b0..6e878dc 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -431,20 +431,14 @@ static int nfs4_stat_to_errno(int); #define NFS4_enc_open_sz (compound_encode_hdr_maxsz + \ encode_sequence_maxsz + \ encode_putfh_maxsz + \ - encode_savefh_maxsz + \ encode_open_maxsz + \ encode_getfh_maxsz + \ - encode_getattr_maxsz + \ - encode_restorefh_maxsz + \ encode_getattr_maxsz) #define NFS4_dec_open_sz (compound_decode_hdr_maxsz + \ decode_sequence_maxsz + \ decode_putfh_maxsz + \ - decode_savefh_maxsz + \ decode_open_maxsz + \ decode_getfh_maxsz + \ - decode_getattr_maxsz + \ - decode_restorefh_maxsz + \ decode_getattr_maxsz) #define NFS4_enc_open_confirm_sz \ (compound_encode_hdr_maxsz + \ @@ -2191,12 +2185,9 @@ static void nfs4_xdr_enc_open(struct rpc_rqst *req, struct xdr_stream *xdr, encode_compound_hdr(xdr, req, &hdr); encode_sequence(xdr, &args->seq_args, &hdr); encode_putfh(xdr, args->fh, &hdr); - encode_savefh(xdr, &hdr); encode_open(xdr, args, &hdr); encode_getfh(xdr, &hdr); encode_getfattr(xdr, args->bitmask, &hdr); - encode_restorefh(xdr, &hdr); - encode_getfattr(xdr, args->dir_bitmask, &hdr); encode_nops(&hdr); } @@ -6075,19 +6066,12 @@ static int nfs4_xdr_dec_open(struct rpc_rqst *rqstp, struct xdr_stream *xdr, status = decode_putfh(xdr); if (status) goto out; - status = decode_savefh(xdr); - if (status) - goto out; status = decode_open(xdr, res); if (status) goto out; if (decode_getfh(xdr, &res->fh) != 0) goto out; - if (decode_getfattr(xdr, res->f_attr, res->server) != 0) - goto out; - if (decode_restorefh(xdr) != 0) - goto out; - decode_getfattr(xdr, res->dir_attr, res->server); + decode_getfattr(xdr, res->f_attr, res->server); out: return status; } diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index bc36808..92a929f 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -338,7 +338,6 @@ struct nfs_openargs { const struct qstr * name; const struct nfs_server *server; /* Needed for ID mapping */ const u32 * bitmask; - const u32 * dir_bitmask; __u32 claim; struct nfs4_sequence_args seq_args; }; @@ -349,7 +348,6 @@ struct nfs_openres { struct nfs4_change_info cinfo; __u32 rflags; struct nfs_fattr * f_attr; - struct nfs_fattr * dir_attr; struct nfs_seqid * seqid; const struct nfs_server *server; fmode_t delegation_type; -- cgit v0.10.2 From 7c317fcfbae773e493ecee1c53738db774b1d0ca Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 27 Apr 2012 13:48:18 -0400 Subject: NFSv4: Simplify the NFSv4 CREATE compound Get rid of the post-op GETATTR on the directory in order to reduce the amount of processing done on the server. Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index f01c3d1..619bc1e 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2994,7 +2994,6 @@ struct nfs4_createdata { struct nfs4_create_res res; struct nfs_fh fh; struct nfs_fattr fattr; - struct nfs_fattr dir_fattr; }; static struct nfs4_createdata *nfs4_alloc_createdata(struct inode *dir, @@ -3018,9 +3017,7 @@ static struct nfs4_createdata *nfs4_alloc_createdata(struct inode *dir, data->res.server = server; data->res.fh = &data->fh; data->res.fattr = &data->fattr; - data->res.dir_fattr = &data->dir_fattr; nfs_fattr_init(data->res.fattr); - nfs_fattr_init(data->res.dir_fattr); } return data; } @@ -3031,7 +3028,6 @@ static int nfs4_do_create(struct inode *dir, struct dentry *dentry, struct nfs4_ &data->arg.seq_args, &data->res.seq_res, 1); if (status == 0) { update_changeattr(dir, &data->res.dir_cinfo); - nfs_post_op_update_inode(dir, data->res.dir_fattr); status = nfs_instantiate(dentry, data->res.fh, data->res.fattr); } return status; diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 6e878dc..1a70097 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -647,20 +647,14 @@ static int nfs4_stat_to_errno(int); #define NFS4_enc_create_sz (compound_encode_hdr_maxsz + \ encode_sequence_maxsz + \ encode_putfh_maxsz + \ - encode_savefh_maxsz + \ encode_create_maxsz + \ encode_getfh_maxsz + \ - encode_getattr_maxsz + \ - encode_restorefh_maxsz + \ encode_getattr_maxsz) #define NFS4_dec_create_sz (compound_decode_hdr_maxsz + \ decode_sequence_maxsz + \ decode_putfh_maxsz + \ - decode_savefh_maxsz + \ decode_create_maxsz + \ decode_getfh_maxsz + \ - decode_getattr_maxsz + \ - decode_restorefh_maxsz + \ decode_getattr_maxsz) #define NFS4_enc_pathconf_sz (compound_encode_hdr_maxsz + \ encode_sequence_maxsz + \ @@ -2119,12 +2113,9 @@ static void nfs4_xdr_enc_create(struct rpc_rqst *req, struct xdr_stream *xdr, encode_compound_hdr(xdr, req, &hdr); encode_sequence(xdr, &args->seq_args, &hdr); encode_putfh(xdr, args->dir_fh, &hdr); - encode_savefh(xdr, &hdr); encode_create(xdr, args, &hdr); encode_getfh(xdr, &hdr); encode_getfattr(xdr, args->bitmask, &hdr); - encode_restorefh(xdr, &hdr); - encode_getfattr(xdr, args->bitmask, &hdr); encode_nops(&hdr); } @@ -5895,21 +5886,13 @@ static int nfs4_xdr_dec_create(struct rpc_rqst *rqstp, struct xdr_stream *xdr, status = decode_putfh(xdr); if (status) goto out; - status = decode_savefh(xdr); - if (status) - goto out; status = decode_create(xdr, &res->dir_cinfo); if (status) goto out; status = decode_getfh(xdr, res->fh); if (status) goto out; - if (decode_getfattr(xdr, res->fattr, res->server)) - goto out; - status = decode_restorefh(xdr); - if (status) - goto out; - decode_getfattr(xdr, res->dir_fattr, res->server); + decode_getfattr(xdr, res->fattr, res->server); out: return status; } diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 92a929f..696a17e 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -855,7 +855,6 @@ struct nfs4_create_res { struct nfs_fh * fh; struct nfs_fattr * fattr; struct nfs4_change_info dir_cinfo; - struct nfs_fattr * dir_fattr; struct nfs4_sequence_res seq_res; }; -- cgit v0.10.2 From 778d28172f710184855bcfeadcdd6b46997c4de2 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 27 Apr 2012 13:48:19 -0400 Subject: NFSv4: Simplify the NFSv4 REMOVE, LINK and RENAME compounds Get rid of the post-op GETATTR on the directory in order to reduce the amount of processing done on the server. Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 619bc1e..c746b0c 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2775,7 +2775,6 @@ static int _nfs4_proc_remove(struct inode *dir, struct qstr *name) .fh = NFS_FH(dir), .name.len = name->len, .name.name = name->name, - .bitmask = server->attr_bitmask, }; struct nfs_removeres res = { .server = server, @@ -2785,19 +2784,11 @@ static int _nfs4_proc_remove(struct inode *dir, struct qstr *name) .rpc_argp = &args, .rpc_resp = &res, }; - int status = -ENOMEM; - - res.dir_attr = nfs_alloc_fattr(); - if (res.dir_attr == NULL) - goto out; + int status; status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 1); - if (status == 0) { + if (status == 0) update_changeattr(dir, &res.cinfo); - nfs_post_op_update_inode(dir, res.dir_attr); - } - nfs_free_fattr(res.dir_attr); -out: return status; } @@ -2819,7 +2810,6 @@ static void nfs4_proc_unlink_setup(struct rpc_message *msg, struct inode *dir) struct nfs_removeargs *args = msg->rpc_argp; struct nfs_removeres *res = msg->rpc_resp; - args->bitmask = server->cache_consistency_bitmask; res->server = server; msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_REMOVE]; nfs41_init_sequence(&args->seq_args, &res->seq_res, 1); @@ -2844,7 +2834,6 @@ static int nfs4_proc_unlink_done(struct rpc_task *task, struct inode *dir) if (nfs4_async_handle_error(task, res->server, NULL) == -EAGAIN) return 0; update_changeattr(dir, &res->cinfo); - nfs_post_op_update_inode(dir, res->dir_attr); return 1; } @@ -2855,7 +2844,6 @@ static void nfs4_proc_rename_setup(struct rpc_message *msg, struct inode *dir) struct nfs_renameres *res = msg->rpc_resp; msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENAME]; - arg->bitmask = server->attr_bitmask; res->server = server; nfs41_init_sequence(&arg->seq_args, &res->seq_res, 1); } @@ -2881,9 +2869,7 @@ static int nfs4_proc_rename_done(struct rpc_task *task, struct inode *old_dir, return 0; update_changeattr(old_dir, &res->old_cinfo); - nfs_post_op_update_inode(old_dir, res->old_fattr); update_changeattr(new_dir, &res->new_cinfo); - nfs_post_op_update_inode(new_dir, res->new_fattr); return 1; } @@ -2896,7 +2882,6 @@ static int _nfs4_proc_rename(struct inode *old_dir, struct qstr *old_name, .new_dir = NFS_FH(new_dir), .old_name = old_name, .new_name = new_name, - .bitmask = server->attr_bitmask, }; struct nfs_renameres res = { .server = server, @@ -2908,21 +2893,11 @@ static int _nfs4_proc_rename(struct inode *old_dir, struct qstr *old_name, }; int status = -ENOMEM; - res.old_fattr = nfs_alloc_fattr(); - res.new_fattr = nfs_alloc_fattr(); - if (res.old_fattr == NULL || res.new_fattr == NULL) - goto out; - status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1); if (!status) { update_changeattr(old_dir, &res.old_cinfo); - nfs_post_op_update_inode(old_dir, res.old_fattr); update_changeattr(new_dir, &res.new_cinfo); - nfs_post_op_update_inode(new_dir, res.new_fattr); } -out: - nfs_free_fattr(res.new_fattr); - nfs_free_fattr(res.old_fattr); return status; } @@ -2960,18 +2935,15 @@ static int _nfs4_proc_link(struct inode *inode, struct inode *dir, struct qstr * int status = -ENOMEM; res.fattr = nfs_alloc_fattr(); - res.dir_attr = nfs_alloc_fattr(); - if (res.fattr == NULL || res.dir_attr == NULL) + if (res.fattr == NULL) goto out; status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1); if (!status) { update_changeattr(dir, &res.cinfo); - nfs_post_op_update_inode(dir, res.dir_attr); nfs_post_op_update_inode(inode, res.fattr); } out: - nfs_free_fattr(res.dir_attr); nfs_free_fattr(res.fattr); return status; } diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 1a70097..49483f1 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -589,38 +589,29 @@ static int nfs4_stat_to_errno(int); #define NFS4_enc_remove_sz (compound_encode_hdr_maxsz + \ encode_sequence_maxsz + \ encode_putfh_maxsz + \ - encode_remove_maxsz + \ - encode_getattr_maxsz) + encode_remove_maxsz) #define NFS4_dec_remove_sz (compound_decode_hdr_maxsz + \ decode_sequence_maxsz + \ decode_putfh_maxsz + \ - decode_remove_maxsz + \ - decode_getattr_maxsz) + decode_remove_maxsz) #define NFS4_enc_rename_sz (compound_encode_hdr_maxsz + \ encode_sequence_maxsz + \ encode_putfh_maxsz + \ encode_savefh_maxsz + \ encode_putfh_maxsz + \ - encode_rename_maxsz + \ - encode_getattr_maxsz + \ - encode_restorefh_maxsz + \ - encode_getattr_maxsz) + encode_rename_maxsz) #define NFS4_dec_rename_sz (compound_decode_hdr_maxsz + \ decode_sequence_maxsz + \ decode_putfh_maxsz + \ decode_savefh_maxsz + \ decode_putfh_maxsz + \ - decode_rename_maxsz + \ - decode_getattr_maxsz + \ - decode_restorefh_maxsz + \ - decode_getattr_maxsz) + decode_rename_maxsz) #define NFS4_enc_link_sz (compound_encode_hdr_maxsz + \ encode_sequence_maxsz + \ encode_putfh_maxsz + \ encode_savefh_maxsz + \ encode_putfh_maxsz + \ encode_link_maxsz + \ - encode_getattr_maxsz + \ encode_restorefh_maxsz + \ encode_getattr_maxsz) #define NFS4_dec_link_sz (compound_decode_hdr_maxsz + \ @@ -629,7 +620,6 @@ static int nfs4_stat_to_errno(int); decode_savefh_maxsz + \ decode_putfh_maxsz + \ decode_link_maxsz + \ - decode_getattr_maxsz + \ decode_restorefh_maxsz + \ decode_getattr_maxsz) #define NFS4_enc_symlink_sz (compound_encode_hdr_maxsz + \ @@ -2052,7 +2042,6 @@ static void nfs4_xdr_enc_remove(struct rpc_rqst *req, struct xdr_stream *xdr, encode_sequence(xdr, &args->seq_args, &hdr); encode_putfh(xdr, args->fh, &hdr); encode_remove(xdr, &args->name, &hdr); - encode_getfattr(xdr, args->bitmask, &hdr); encode_nops(&hdr); } @@ -2072,9 +2061,6 @@ static void nfs4_xdr_enc_rename(struct rpc_rqst *req, struct xdr_stream *xdr, encode_savefh(xdr, &hdr); encode_putfh(xdr, args->new_dir, &hdr); encode_rename(xdr, args->old_name, args->new_name, &hdr); - encode_getfattr(xdr, args->bitmask, &hdr); - encode_restorefh(xdr, &hdr); - encode_getfattr(xdr, args->bitmask, &hdr); encode_nops(&hdr); } @@ -2094,7 +2080,6 @@ static void nfs4_xdr_enc_link(struct rpc_rqst *req, struct xdr_stream *xdr, encode_savefh(xdr, &hdr); encode_putfh(xdr, args->dir_fh, &hdr); encode_link(xdr, args->name, &hdr); - encode_getfattr(xdr, args->bitmask, &hdr); encode_restorefh(xdr, &hdr); encode_getfattr(xdr, args->bitmask, &hdr); encode_nops(&hdr); @@ -5782,9 +5767,6 @@ static int nfs4_xdr_dec_remove(struct rpc_rqst *rqstp, struct xdr_stream *xdr, if (status) goto out; status = decode_remove(xdr, &res->cinfo); - if (status) - goto out; - decode_getfattr(xdr, res->dir_attr, res->server); out: return status; } @@ -5814,15 +5796,6 @@ static int nfs4_xdr_dec_rename(struct rpc_rqst *rqstp, struct xdr_stream *xdr, if (status) goto out; status = decode_rename(xdr, &res->old_cinfo, &res->new_cinfo); - if (status) - goto out; - /* Current FH is target directory */ - if (decode_getfattr(xdr, res->new_fattr, res->server)) - goto out; - status = decode_restorefh(xdr); - if (status) - goto out; - decode_getfattr(xdr, res->old_fattr, res->server); out: return status; } @@ -5858,8 +5831,6 @@ static int nfs4_xdr_dec_link(struct rpc_rqst *rqstp, struct xdr_stream *xdr, * Note order: OP_LINK leaves the directory as the current * filehandle. */ - if (decode_getfattr(xdr, res->dir_attr, res->server)) - goto out; status = decode_restorefh(xdr); if (status) goto out; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 696a17e..2e53a3f 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -540,7 +540,6 @@ struct nfs_commitres { struct nfs_removeargs { const struct nfs_fh *fh; struct qstr name; - const u32 * bitmask; struct nfs4_sequence_args seq_args; }; @@ -559,7 +558,6 @@ struct nfs_renameargs { const struct nfs_fh *new_dir; const struct qstr *old_name; const struct qstr *new_name; - const u32 *bitmask; struct nfs4_sequence_args seq_args; }; -- cgit v0.10.2 From 5a37f85131c526ed7a3991d4dc2845498f81c1de Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 28 Apr 2012 14:55:16 -0400 Subject: NFSv4: Don't request cache consistency attributes on some writes We don't need cache consistency information when we're doing O_DIRECT writes. Ditto for the case of delegated writes. Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index c746b0c..64b67f3 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3370,7 +3370,7 @@ static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_write_data *data } if (task->tk_status >= 0) { renew_lease(NFS_SERVER(inode), data->timestamp); - nfs_post_op_update_inode_force_wcc(inode, data->res.fattr); + nfs_post_op_update_inode_force_wcc(inode, &data->fattr); } return 0; } @@ -3401,15 +3401,30 @@ void nfs4_reset_write(struct rpc_task *task, struct nfs_write_data *data) } EXPORT_SYMBOL_GPL(nfs4_reset_write); +static +bool nfs4_write_need_cache_consistency_data(const struct nfs_write_data *data) +{ + const struct nfs_pgio_header *hdr = data->header; + + /* Don't request attributes for pNFS or O_DIRECT writes */ + if (data->ds_clp != NULL || hdr->dreq != NULL) + return false; + /* Otherwise, request attributes if and only if we don't hold + * a delegation + */ + return nfs_have_delegation(hdr->inode, FMODE_READ) == 0; +} + static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_message *msg) { struct nfs_server *server = NFS_SERVER(data->header->inode); - if (data->ds_clp) { + if (!nfs4_write_need_cache_consistency_data(data)) { data->args.bitmask = NULL; data->res.fattr = NULL; } else data->args.bitmask = server->cache_consistency_bitmask; + if (!data->write_done_cb) data->write_done_cb = nfs4_write_done_cb; data->res.server = server; -- cgit v0.10.2 From 8582715e733d08bc98fe629db0601360d70de4dc Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 29 Apr 2012 10:44:42 -0400 Subject: NFSv4: COMMIT does not need post-op attributes No attributes are supposed to change during a COMMIT call, so there is no need to request post-op attributes. Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 64b67f3..98eb48d 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3462,7 +3462,6 @@ static int nfs4_commit_done_cb(struct rpc_task *task, struct nfs_commit_data *da rpc_restart_call_prepare(task); return -EAGAIN; } - nfs_refresh_inode(inode, data->res.fattr); return 0; } @@ -3477,11 +3476,6 @@ static void nfs4_proc_commit_setup(struct nfs_commit_data *data, struct rpc_mess { struct nfs_server *server = NFS_SERVER(data->inode); - if (data->lseg) { - data->args.bitmask = NULL; - data->res.fattr = NULL; - } else - data->args.bitmask = server->cache_consistency_bitmask; if (data->commit_done_cb == NULL) data->commit_done_cb = nfs4_commit_done_cb; data->res.server = server; diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 49483f1..db040e9 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -421,13 +421,11 @@ static int nfs4_stat_to_errno(int); #define NFS4_enc_commit_sz (compound_encode_hdr_maxsz + \ encode_sequence_maxsz + \ encode_putfh_maxsz + \ - encode_commit_maxsz + \ - encode_getattr_maxsz) + encode_commit_maxsz) #define NFS4_dec_commit_sz (compound_decode_hdr_maxsz + \ decode_sequence_maxsz + \ decode_putfh_maxsz + \ - decode_commit_maxsz + \ - decode_getattr_maxsz) + decode_commit_maxsz) #define NFS4_enc_open_sz (compound_encode_hdr_maxsz + \ encode_sequence_maxsz + \ encode_putfh_maxsz + \ @@ -2425,8 +2423,6 @@ static void nfs4_xdr_enc_commit(struct rpc_rqst *req, struct xdr_stream *xdr, encode_sequence(xdr, &args->seq_args, &hdr); encode_putfh(xdr, args->fh, &hdr); encode_commit(xdr, args, &hdr); - if (args->bitmask) - encode_getfattr(xdr, args->bitmask, &hdr); encode_nops(&hdr); } @@ -6306,10 +6302,6 @@ static int nfs4_xdr_dec_commit(struct rpc_rqst *rqstp, struct xdr_stream *xdr, if (status) goto out; status = decode_commit(xdr, res); - if (status) - goto out; - if (res->fattr) - decode_getfattr(xdr, res->fattr, res->server); out: return status; } -- cgit v0.10.2 From d69ee9b85541a69a1092f5da675bd23256dc62af Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 1 May 2012 17:37:59 -0400 Subject: NFS: Adapt readdirplus to application usage patterns While the use of READDIRPLUS is significantly more efficient than READDIR followed by many LOOKUP calls, it is still less efficient than just READDIR if the attributes are not required. This patch tracks when lookups are attempted on the directory, and uses that information to selectively disable READDIRPLUS on that directory. The first 'readdir' call is always served using READDIRPLUS. Subsequent calls only use READDIRPLUS if there was a successful lookup or revalidation on a child in the mean time. Credit for the original idea should go to Neil Brown. See: http://www.spinics.net/lists/linux-nfs/msg19996.html However, the implementation in this patch differs from Neil's in that it focuses on tracking lookups rather than calls to stat(). Signed-off-by: Trond Myklebust Cc: Neil Brown diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 82b42e2..d0884c0 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -475,6 +475,29 @@ different: } static +bool nfs_use_readdirplus(struct inode *dir, struct file *filp) +{ + if (!nfs_server_capable(dir, NFS_CAP_READDIRPLUS)) + return false; + if (test_and_clear_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(dir)->flags)) + return true; + if (filp->f_pos == 0) + return true; + return false; +} + +/* + * This function is called by the lookup code to request the use of + * readdirplus to accelerate any future lookups in the same + * directory. + */ +static +void nfs_advise_use_readdirplus(struct inode *dir) +{ + set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(dir)->flags); +} + +static void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry) { struct qstr filename = { @@ -874,7 +897,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir) desc->file = filp; desc->dir_cookie = &dir_ctx->dir_cookie; desc->decode = NFS_PROTO(inode)->decode_dirent; - desc->plus = NFS_USE_READDIRPLUS(inode); + desc->plus = nfs_use_readdirplus(inode, filp) ? 1 : 0; nfs_block_sillyrename(dentry); res = nfs_revalidate_mapping(inode, filp->f_mapping); @@ -1114,7 +1137,7 @@ static int nfs_lookup_revalidate(struct dentry *dentry, struct nameidata *nd) if (!inode) { if (nfs_neg_need_reval(dir, dentry, nd)) goto out_bad; - goto out_valid; + goto out_valid_noent; } if (is_bad_inode(inode)) { @@ -1156,6 +1179,9 @@ static int nfs_lookup_revalidate(struct dentry *dentry, struct nameidata *nd) out_set_verifier: nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); out_valid: + /* Success: notify readdir to use READDIRPLUS */ + nfs_advise_use_readdirplus(dir); + out_valid_noent: dput(parent); dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) is valid\n", __func__, dentry->d_parent->d_name.name, @@ -1311,6 +1337,9 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru if (IS_ERR(res)) goto out_unblock_sillyrename; + /* Success: notify readdir to use READDIRPLUS */ + nfs_advise_use_readdirplus(dir); + no_entry: res = d_materialise_unique(dentry, inode); if (res != NULL) { diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 0d53113..9f17cd1 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -298,8 +298,6 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) inode->i_op = NFS_SB(sb)->nfs_client->rpc_ops->dir_inode_ops; inode->i_fop = &nfs_dir_operations; inode->i_data.a_ops = &nfs_dir_aops; - if (nfs_server_capable(inode, NFS_CAP_READDIRPLUS)) - set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags); /* Deal with crossing mountpoints */ if (fattr->valid & NFS_ATTR_FATTR_MOUNTPOINT || fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) { diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 8a88c16..6cc7dba 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -314,11 +314,6 @@ static inline int nfs_server_capable(struct inode *inode, int cap) return NFS_SERVER(inode)->caps & cap; } -static inline int NFS_USE_READDIRPLUS(struct inode *inode) -{ - return test_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags); -} - static inline void nfs_set_verifier(struct dentry * dentry, unsigned long verf) { dentry->d_time = verf; -- cgit v0.10.2 From e447c50e3af5dcad3075c80bd1bdc4e2024b8186 Mon Sep 17 00:00:00 2001 From: Rajendra Nayak Date: Fri, 27 Apr 2012 17:58:13 +0530 Subject: clk: constify parent name arrays in macros parent name array is now expected to be const char *, make the relevent changes in the clk macros which define default clock types. Signed-off-by: Rajendra Nayak Signed-off-by: Mike Turquette diff --git a/include/linux/clk-private.h b/include/linux/clk-private.h index eeae7a3..6ebec83 100644 --- a/include/linux/clk-private.h +++ b/include/linux/clk-private.h @@ -70,7 +70,7 @@ struct clk { #define DEFINE_CLK_FIXED_RATE(_name, _flags, _rate, \ _fixed_rate_flags) \ static struct clk _name; \ - static char *_name##_parent_names[] = {}; \ + static const char *_name##_parent_names[] = {}; \ static struct clk_fixed_rate _name##_hw = { \ .hw = { \ .clk = &_name, \ @@ -85,7 +85,7 @@ struct clk { _flags, _reg, _bit_idx, \ _gate_flags, _lock) \ static struct clk _name; \ - static char *_name##_parent_names[] = { \ + static const char *_name##_parent_names[] = { \ _parent_name, \ }; \ static struct clk *_name##_parents[] = { \ @@ -107,7 +107,7 @@ struct clk { _flags, _reg, _shift, _width, \ _divider_flags, _lock) \ static struct clk _name; \ - static char *_name##_parent_names[] = { \ + static const char *_name##_parent_names[] = { \ _parent_name, \ }; \ static struct clk *_name##_parents[] = { \ -- cgit v0.10.2 From 0197b3ea0f66cd2a11417f58fe1812858ea77908 Mon Sep 17 00:00:00 2001 From: Saravana Kannan Date: Wed, 25 Apr 2012 22:58:56 -0700 Subject: clk: Use a separate struct for holding init data. Create a struct clk_init_data to hold all data that needs to be passed from the platfrom specific driver to the common clock framework during clock registration. Add a pointer to this struct inside clk_hw. This has several advantages: * Completely hides struct clk from many clock platform drivers and static clock initialization code that don't care for static initialization of the struct clks. * For platforms that want to do complete static initialization, it removed the need to directly mess with the struct clk's fields while still allowing to statically allocate struct clk. This keeps the code more future proof even if they include clk-private.h. * Simplifies the generic clk_register() function and allows adding optional fields in the future without modifying the function signature. * Simplifies the static initialization of clocks on all platforms by removing the need for forward delcarations or convoluted macros. Signed-off-by: Saravana Kannan [mturquette@linaro.org: kept DEFINE_CLK_* macros and __clk_init] Signed-off-by: Mike Turquette Cc: Andrew Lunn Cc: Rob Herring Cc: Russell King Cc: Jeremy Kerr Cc: Thomas Gleixner Cc: Arnd Bergman Cc: Paul Walmsley Cc: Shawn Guo Cc: Sascha Hauer Cc: Jamie Iles Cc: Richard Zhao Cc: Saravana Kannan Cc: Magnus Damm Cc: Mark Brown Cc: Linus Walleij Cc: Stephen Boyd Cc: Amit Kucheria Cc: Deepak Saxena Cc: Grant Likely diff --git a/drivers/clk/clk-divider.c b/drivers/clk/clk-divider.c index 90627e4..8ea11b44 100644 --- a/drivers/clk/clk-divider.c +++ b/drivers/clk/clk-divider.c @@ -167,6 +167,7 @@ struct clk *clk_register_divider(struct device *dev, const char *name, { struct clk_divider *div; struct clk *clk; + struct clk_init_data init; /* allocate the divider */ div = kzalloc(sizeof(struct clk_divider), GFP_KERNEL); @@ -175,19 +176,22 @@ struct clk *clk_register_divider(struct device *dev, const char *name, return ERR_PTR(-ENOMEM); } + init.name = name; + init.ops = &clk_divider_ops; + init.flags = flags; + init.parent_names = (parent_name ? &parent_name: NULL); + init.num_parents = (parent_name ? 1 : 0); + /* struct clk_divider assignments */ div->reg = reg; div->shift = shift; div->width = width; div->flags = clk_divider_flags; div->lock = lock; + div->hw.init = &init; /* register the clock */ - clk = clk_register(dev, name, - &clk_divider_ops, &div->hw, - (parent_name ? &parent_name: NULL), - (parent_name ? 1 : 0), - flags); + clk = clk_register(dev, &div->hw); if (IS_ERR(clk)) kfree(div); diff --git a/drivers/clk/clk-fixed-rate.c b/drivers/clk/clk-fixed-rate.c index b555a04..cbd2462 100644 --- a/drivers/clk/clk-fixed-rate.c +++ b/drivers/clk/clk-fixed-rate.c @@ -52,6 +52,7 @@ struct clk *clk_register_fixed_rate(struct device *dev, const char *name, { struct clk_fixed_rate *fixed; struct clk *clk; + struct clk_init_data init; /* allocate fixed-rate clock */ fixed = kzalloc(sizeof(struct clk_fixed_rate), GFP_KERNEL); @@ -60,15 +61,18 @@ struct clk *clk_register_fixed_rate(struct device *dev, const char *name, return ERR_PTR(-ENOMEM); } + init.name = name; + init.ops = &clk_fixed_rate_ops; + init.flags = flags; + init.parent_names = (parent_name ? &parent_name: NULL); + init.num_parents = (parent_name ? 1 : 0); + /* struct clk_fixed_rate assignments */ fixed->fixed_rate = fixed_rate; + fixed->hw.init = &init; /* register the clock */ - clk = clk_register(dev, name, - &clk_fixed_rate_ops, &fixed->hw, - (parent_name ? &parent_name : NULL), - (parent_name ? 1 : 0), - flags); + clk = clk_register(dev, &fixed->hw); if (IS_ERR(clk)) kfree(fixed); diff --git a/drivers/clk/clk-gate.c b/drivers/clk/clk-gate.c index 0021616..578465e 100644 --- a/drivers/clk/clk-gate.c +++ b/drivers/clk/clk-gate.c @@ -119,6 +119,7 @@ struct clk *clk_register_gate(struct device *dev, const char *name, { struct clk_gate *gate; struct clk *clk; + struct clk_init_data init; /* allocate the gate */ gate = kzalloc(sizeof(struct clk_gate), GFP_KERNEL); @@ -127,18 +128,20 @@ struct clk *clk_register_gate(struct device *dev, const char *name, return ERR_PTR(-ENOMEM); } + init.name = name; + init.ops = &clk_gate_ops; + init.flags = flags; + init.parent_names = (parent_name ? &parent_name: NULL); + init.num_parents = (parent_name ? 1 : 0); + /* struct clk_gate assignments */ gate->reg = reg; gate->bit_idx = bit_idx; gate->flags = clk_gate_flags; gate->lock = lock; + gate->hw.init = &init; - /* register the clock */ - clk = clk_register(dev, name, - &clk_gate_ops, &gate->hw, - (parent_name ? &parent_name : NULL), - (parent_name ? 1 : 0), - flags); + clk = clk_register(dev, &gate->hw); if (IS_ERR(clk)) kfree(gate); diff --git a/drivers/clk/clk-mux.c b/drivers/clk/clk-mux.c index 6e58f11..8e97491 100644 --- a/drivers/clk/clk-mux.c +++ b/drivers/clk/clk-mux.c @@ -95,6 +95,7 @@ struct clk *clk_register_mux(struct device *dev, const char *name, { struct clk_mux *mux; struct clk *clk; + struct clk_init_data init; /* allocate the mux */ mux = kzalloc(sizeof(struct clk_mux), GFP_KERNEL); @@ -103,6 +104,12 @@ struct clk *clk_register_mux(struct device *dev, const char *name, return ERR_PTR(-ENOMEM); } + init.name = name; + init.ops = &clk_mux_ops; + init.flags = flags; + init.parent_names = parent_names; + init.num_parents = num_parents; + /* struct clk_mux assignments */ mux->reg = reg; mux->shift = shift; @@ -110,8 +117,7 @@ struct clk *clk_register_mux(struct device *dev, const char *name, mux->flags = clk_mux_flags; mux->lock = lock; - clk = clk_register(dev, name, &clk_mux_ops, &mux->hw, - parent_names, num_parents, flags); + clk = clk_register(dev, &mux->hw); if (IS_ERR(clk)) kfree(mux); diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index 2dd20c0..c81803b 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -1169,26 +1169,6 @@ EXPORT_SYMBOL_GPL(clk_set_parent); * * Initializes the lists in struct clk, queries the hardware for the * parent and rate and sets them both. - * - * Any struct clk passed into __clk_init must have the following members - * populated: - * .name - * .ops - * .hw - * .parent_names - * .num_parents - * .flags - * - * Essentially, everything that would normally be passed into clk_register is - * assumed to be initialized already in __clk_init. The other members may be - * populated, but are optional. - * - * __clk_init is only exposed via clk-private.h and is intended for use with - * very large numbers of clocks that need to be statically initialized. It is - * a layering violation to include clk-private.h from any code which implements - * a clock's .ops; as such any statically initialized clock data MUST be in a - * separate C file from the logic that implements it's operations. Returns 0 - * on success, otherwise an error code. */ int __clk_init(struct device *dev, struct clk *clk) { @@ -1321,14 +1301,47 @@ out: } /** + * __clk_register - register a clock and return a cookie. + * + * Same as clk_register, except that the .clk field inside hw shall point to a + * preallocated (generally statically allocated) struct clk. None of the fields + * of the struct clk need to be initialized. + * + * The data pointed to by .init and .clk field shall NOT be marked as init + * data. + * + * __clk_register is only exposed via clk-private.h and is intended for use with + * very large numbers of clocks that need to be statically initialized. It is + * a layering violation to include clk-private.h from any code which implements + * a clock's .ops; as such any statically initialized clock data MUST be in a + * separate C file from the logic that implements it's operations. Returns 0 + * on success, otherwise an error code. + */ +struct clk *__clk_register(struct device *dev, struct clk_hw *hw) +{ + int ret; + struct clk *clk; + + clk = hw->clk; + clk->name = hw->init->name; + clk->ops = hw->init->ops; + clk->hw = hw; + clk->flags = hw->init->flags; + clk->parent_names = hw->init->parent_names; + clk->num_parents = hw->init->num_parents; + + ret = __clk_init(dev, clk); + if (ret) + return ERR_PTR(ret); + + return clk; +} +EXPORT_SYMBOL_GPL(__clk_register); + +/** * clk_register - allocate a new clock, register it and return an opaque cookie * @dev: device that is registering this clock - * @name: clock name - * @ops: operations this clock supports * @hw: link to hardware-specific clock data - * @parent_names: array of string names for all possible parents - * @num_parents: number of possible parents - * @flags: framework-level hints and quirks * * clk_register is the primary interface for populating the clock tree with new * clock nodes. It returns a pointer to the newly allocated struct clk which @@ -1336,9 +1349,7 @@ out: * rest of the clock API. In the event of an error clk_register will return an * error code; drivers must test for an error code after calling clk_register. */ -struct clk *clk_register(struct device *dev, const char *name, - const struct clk_ops *ops, struct clk_hw *hw, - const char **parent_names, u8 num_parents, unsigned long flags) +struct clk *clk_register(struct device *dev, struct clk_hw *hw) { int i, ret; struct clk *clk; @@ -1350,15 +1361,20 @@ struct clk *clk_register(struct device *dev, const char *name, goto fail_out; } - clk->name = name; - clk->ops = ops; + clk->name = kstrdup(hw->init->name, GFP_KERNEL); + if (!clk->name) { + pr_err("%s: could not allocate clk->name\n", __func__); + ret = -ENOMEM; + goto fail_name; + } + clk->ops = hw->init->ops; clk->hw = hw; - clk->flags = flags; - clk->num_parents = num_parents; + clk->flags = hw->init->flags; + clk->num_parents = hw->init->num_parents; hw->clk = clk; /* allocate local copy in case parent_names is __initdata */ - clk->parent_names = kzalloc((sizeof(char*) * num_parents), + clk->parent_names = kzalloc((sizeof(char*) * clk->num_parents), GFP_KERNEL); if (!clk->parent_names) { @@ -1369,8 +1385,9 @@ struct clk *clk_register(struct device *dev, const char *name, /* copy each string name in case parent_names is __initdata */ - for (i = 0; i < num_parents; i++) { - clk->parent_names[i] = kstrdup(parent_names[i], GFP_KERNEL); + for (i = 0; i < clk->num_parents; i++) { + clk->parent_names[i] = kstrdup(hw->init->parent_names[i], + GFP_KERNEL); if (!clk->parent_names[i]) { pr_err("%s: could not copy parent_names\n", __func__); ret = -ENOMEM; @@ -1387,6 +1404,8 @@ fail_parent_names_copy: kfree(clk->parent_names[i]); kfree(clk->parent_names); fail_parent_names: + kfree(clk->name); +fail_name: kfree(clk); fail_out: return ERR_PTR(ret); diff --git a/include/linux/clk-private.h b/include/linux/clk-private.h index 6ebec83..b258532 100644 --- a/include/linux/clk-private.h +++ b/include/linux/clk-private.h @@ -167,5 +167,7 @@ struct clk { */ int __clk_init(struct device *dev, struct clk *clk); +struct clk *__clk_register(struct device *dev, struct clk_hw *hw); + #endif /* CONFIG_COMMON_CLK */ #endif /* CLK_PRIVATE_H */ diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index 8f21489..5db3412 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -15,19 +15,6 @@ #ifdef CONFIG_COMMON_CLK -/** - * struct clk_hw - handle for traversing from a struct clk to its corresponding - * hardware-specific structure. struct clk_hw should be declared within struct - * clk_foo and then referenced by the struct clk instance that uses struct - * clk_foo's clk_ops - * - * clk: pointer to the struct clk instance that points back to this struct - * clk_hw instance - */ -struct clk_hw { - struct clk *clk; -}; - /* * flags used across common struct clk. these flags should only affect the * top-level framework. custom flags for dealing with hardware specifics @@ -39,6 +26,8 @@ struct clk_hw { #define CLK_IGNORE_UNUSED BIT(3) /* do not gate even if unused */ #define CLK_IS_ROOT BIT(4) /* root clk, has no parent */ +struct clk_hw; + /** * struct clk_ops - Callback operations for hardware clocks; these are to * be provided by the clock implementation, and will be called by drivers @@ -122,6 +111,41 @@ struct clk_ops { void (*init)(struct clk_hw *hw); }; +/** + * struct clk_init_data - holds init data that's common to all clocks and is + * shared between the clock provider and the common clock framework. + * + * @name: clock name + * @ops: operations this clock supports + * @parent_names: array of string names for all possible parents + * @num_parents: number of possible parents + * @flags: framework-level hints and quirks + */ +struct clk_init_data { + const char *name; + const struct clk_ops *ops; + const char **parent_names; + u8 num_parents; + unsigned long flags; +}; + +/** + * struct clk_hw - handle for traversing from a struct clk to its corresponding + * hardware-specific structure. struct clk_hw should be declared within struct + * clk_foo and then referenced by the struct clk instance that uses struct + * clk_foo's clk_ops + * + * @clk: pointer to the struct clk instance that points back to this struct + * clk_hw instance + * + * @init: pointer to struct clk_init_data that contains the init data shared + * with the common clock framework. + */ +struct clk_hw { + struct clk *clk; + struct clk_init_data *init; +}; + /* * DOC: Basic clock implementations common to many platforms * @@ -255,12 +279,7 @@ struct clk *clk_register_mux(struct device *dev, const char *name, /** * clk_register - allocate a new clock, register it and return an opaque cookie * @dev: device that is registering this clock - * @name: clock name - * @ops: operations this clock supports * @hw: link to hardware-specific clock data - * @parent_names: array of string names for all possible parents - * @num_parents: number of possible parents - * @flags: framework-level hints and quirks * * clk_register is the primary interface for populating the clock tree with new * clock nodes. It returns a pointer to the newly allocated struct clk which @@ -268,9 +287,7 @@ struct clk *clk_register_mux(struct device *dev, const char *name, * rest of the clock API. In the event of an error clk_register will return an * error code; drivers must test for an error code after calling clk_register. */ -struct clk *clk_register(struct device *dev, const char *name, - const struct clk_ops *ops, struct clk_hw *hw, - const char **parent_names, u8 num_parents, unsigned long flags); +struct clk *clk_register(struct device *dev, struct clk_hw *hw); /* helper functions */ const char *__clk_get_name(struct clk *clk); -- cgit v0.10.2 From 0e1c03017549a9df513622b3f15ff38eb8d35a62 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Wed, 11 Apr 2012 16:03:42 +0530 Subject: clk: clk_set_rate() must fail if CLK_SET_RATE_GATE is set and clk is enabled This is well documented but isn't implemented. clk_set_rate() must check if flags have CLK_SET_RATE_GATE bit set and is enabled too. Signed-off-by: Viresh Kumar Signed-off-by: Mike Turquette diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index c81803b..8149764 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -900,6 +900,11 @@ int clk_set_rate(struct clk *clk, unsigned long rate) if (rate == clk->rate) goto out; + if ((clk->flags & CLK_SET_RATE_GATE) && __clk_is_enabled(clk)) { + ret = -EBUSY; + goto out; + } + /* calculate new rates and get the topmost changed clock */ top = clk_calc_new_rates(clk, rate); if (!top) { -- cgit v0.10.2 From 2af9e6db14db9a7a0a6510227352fb2e69f9d032 Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Fri, 9 Mar 2012 09:11:55 +0100 Subject: ARM i.MX: Add common clock support for pllv1 The pllv1 is found on i.MX1, i.M25, i.MX27, i.MX31 and i.MX35. Currently only reading the rate is supported. Signed-off-by: Sascha Hauer diff --git a/arch/arm/mach-imx/Makefile b/arch/arm/mach-imx/Makefile index ab939c5..7e71b39 100644 --- a/arch/arm/mach-imx/Makefile +++ b/arch/arm/mach-imx/Makefile @@ -11,6 +11,8 @@ obj-$(CONFIG_SOC_IMX35) += mm-imx3.o cpu-imx35.o clock-imx35.o ehci-imx35.o pm-i obj-$(CONFIG_SOC_IMX5) += cpu-imx5.o mm-imx5.o clock-mx51-mx53.o ehci-imx5.o pm-imx5.o cpu_op-mx51.o +obj-$(CONFIG_COMMON_CLK) += clk-pllv1.o + # Support for CMOS sensor interface obj-$(CONFIG_MX1_VIDEO) += mx1-camera-fiq.o mx1-camera-fiq-ksym.o diff --git a/arch/arm/mach-imx/clk-pllv1.c b/arch/arm/mach-imx/clk-pllv1.c new file mode 100644 index 0000000..2d856f9 --- /dev/null +++ b/arch/arm/mach-imx/clk-pllv1.c @@ -0,0 +1,66 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "clk.h" + +/** + * pll v1 + * + * @clk_hw clock source + * @parent the parent clock name + * @base base address of pll registers + * + * PLL clock version 1, found on i.MX1/21/25/27/31/35 + */ +struct clk_pllv1 { + struct clk_hw hw; + void __iomem *base; +}; + +#define to_clk_pllv1(clk) (container_of(clk, struct clk_pllv1, clk)) + +static unsigned long clk_pllv1_recalc_rate(struct clk_hw *hw, + unsigned long parent_rate) +{ + struct clk_pllv1 *pll = to_clk_pllv1(hw); + + return mxc_decode_pll(readl(pll->base), parent_rate); +} + +struct clk_ops clk_pllv1_ops = { + .recalc_rate = clk_pllv1_recalc_rate, +}; + +struct clk *imx_clk_pllv1(const char *name, const char *parent, + void __iomem *base) +{ + struct clk_pllv1 *pll; + struct clk *clk; + struct clk_init_data init; + + pll = kmalloc(sizeof(*pll), GFP_KERNEL); + if (!pll) + return ERR_PTR(-ENOMEM); + + pll->base = base; + + init.name = name; + init.ops = &clk_pllv1_ops; + init.flags = 0; + init.parent_names = &parent; + init.num_parents = 1; + + pll->hw.init = &init; + + clk = clk_register(NULL, &pll->hw); + if (IS_ERR(clk)) + kfree(pll); + + return clk; +} diff --git a/arch/arm/mach-imx/clk.h b/arch/arm/mach-imx/clk.h index 00f2590..8a4aee6 100644 --- a/arch/arm/mach-imx/clk.h +++ b/arch/arm/mach-imx/clk.h @@ -5,7 +5,7 @@ #include #include -struct clk *imx_clk_pllv1(const char *name, char *parent, +struct clk *imx_clk_pllv1(const char *name, const char *parent, void __iomem *base); static inline struct clk *imx_clk_fixed(const char *name, int rate) -- cgit v0.10.2 From a547b816a879b235ae0f90ae32d74effbb20d6d1 Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Mon, 19 Mar 2012 12:36:10 +0100 Subject: ARM i.MX: Add common clock support for pllv2 This PLL is found on i.MX51 and i.MX53 Signed-off-by: Sascha Hauer diff --git a/arch/arm/mach-imx/Makefile b/arch/arm/mach-imx/Makefile index 7e71b39..0c0e5c4 100644 --- a/arch/arm/mach-imx/Makefile +++ b/arch/arm/mach-imx/Makefile @@ -11,7 +11,7 @@ obj-$(CONFIG_SOC_IMX35) += mm-imx3.o cpu-imx35.o clock-imx35.o ehci-imx35.o pm-i obj-$(CONFIG_SOC_IMX5) += cpu-imx5.o mm-imx5.o clock-mx51-mx53.o ehci-imx5.o pm-imx5.o cpu_op-mx51.o -obj-$(CONFIG_COMMON_CLK) += clk-pllv1.o +obj-$(CONFIG_COMMON_CLK) += clk-pllv1.o clk-pllv2.o # Support for CMOS sensor interface obj-$(CONFIG_MX1_VIDEO) += mx1-camera-fiq.o mx1-camera-fiq-ksym.o diff --git a/arch/arm/mach-imx/clk-pllv2.c b/arch/arm/mach-imx/clk-pllv2.c new file mode 100644 index 0000000..4685919 --- /dev/null +++ b/arch/arm/mach-imx/clk-pllv2.c @@ -0,0 +1,249 @@ +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "clk.h" + +#define to_clk_pllv2(clk) (container_of(clk, struct clk_pllv2, clk)) + +/* PLL Register Offsets */ +#define MXC_PLL_DP_CTL 0x00 +#define MXC_PLL_DP_CONFIG 0x04 +#define MXC_PLL_DP_OP 0x08 +#define MXC_PLL_DP_MFD 0x0C +#define MXC_PLL_DP_MFN 0x10 +#define MXC_PLL_DP_MFNMINUS 0x14 +#define MXC_PLL_DP_MFNPLUS 0x18 +#define MXC_PLL_DP_HFS_OP 0x1C +#define MXC_PLL_DP_HFS_MFD 0x20 +#define MXC_PLL_DP_HFS_MFN 0x24 +#define MXC_PLL_DP_MFN_TOGC 0x28 +#define MXC_PLL_DP_DESTAT 0x2c + +/* PLL Register Bit definitions */ +#define MXC_PLL_DP_CTL_MUL_CTRL 0x2000 +#define MXC_PLL_DP_CTL_DPDCK0_2_EN 0x1000 +#define MXC_PLL_DP_CTL_DPDCK0_2_OFFSET 12 +#define MXC_PLL_DP_CTL_ADE 0x800 +#define MXC_PLL_DP_CTL_REF_CLK_DIV 0x400 +#define MXC_PLL_DP_CTL_REF_CLK_SEL_MASK (3 << 8) +#define MXC_PLL_DP_CTL_REF_CLK_SEL_OFFSET 8 +#define MXC_PLL_DP_CTL_HFSM 0x80 +#define MXC_PLL_DP_CTL_PRE 0x40 +#define MXC_PLL_DP_CTL_UPEN 0x20 +#define MXC_PLL_DP_CTL_RST 0x10 +#define MXC_PLL_DP_CTL_RCP 0x8 +#define MXC_PLL_DP_CTL_PLM 0x4 +#define MXC_PLL_DP_CTL_BRM0 0x2 +#define MXC_PLL_DP_CTL_LRF 0x1 + +#define MXC_PLL_DP_CONFIG_BIST 0x8 +#define MXC_PLL_DP_CONFIG_SJC_CE 0x4 +#define MXC_PLL_DP_CONFIG_AREN 0x2 +#define MXC_PLL_DP_CONFIG_LDREQ 0x1 + +#define MXC_PLL_DP_OP_MFI_OFFSET 4 +#define MXC_PLL_DP_OP_MFI_MASK (0xF << 4) +#define MXC_PLL_DP_OP_PDF_OFFSET 0 +#define MXC_PLL_DP_OP_PDF_MASK 0xF + +#define MXC_PLL_DP_MFD_OFFSET 0 +#define MXC_PLL_DP_MFD_MASK 0x07FFFFFF + +#define MXC_PLL_DP_MFN_OFFSET 0x0 +#define MXC_PLL_DP_MFN_MASK 0x07FFFFFF + +#define MXC_PLL_DP_MFN_TOGC_TOG_DIS (1 << 17) +#define MXC_PLL_DP_MFN_TOGC_TOG_EN (1 << 16) +#define MXC_PLL_DP_MFN_TOGC_CNT_OFFSET 0x0 +#define MXC_PLL_DP_MFN_TOGC_CNT_MASK 0xFFFF + +#define MXC_PLL_DP_DESTAT_TOG_SEL (1 << 31) +#define MXC_PLL_DP_DESTAT_MFN 0x07FFFFFF + +#define MAX_DPLL_WAIT_TRIES 1000 /* 1000 * udelay(1) = 1ms */ + +struct clk_pllv2 { + struct clk_hw hw; + void __iomem *base; +}; + +static unsigned long clk_pllv2_recalc_rate(struct clk_hw *hw, + unsigned long parent_rate) +{ + long mfi, mfn, mfd, pdf, ref_clk, mfn_abs; + unsigned long dp_op, dp_mfd, dp_mfn, dp_ctl, pll_hfsm, dbl; + void __iomem *pllbase; + s64 temp; + struct clk_pllv2 *pll = to_clk_pllv2(hw); + + pllbase = pll->base; + + dp_ctl = __raw_readl(pllbase + MXC_PLL_DP_CTL); + pll_hfsm = dp_ctl & MXC_PLL_DP_CTL_HFSM; + dbl = dp_ctl & MXC_PLL_DP_CTL_DPDCK0_2_EN; + + if (pll_hfsm == 0) { + dp_op = __raw_readl(pllbase + MXC_PLL_DP_OP); + dp_mfd = __raw_readl(pllbase + MXC_PLL_DP_MFD); + dp_mfn = __raw_readl(pllbase + MXC_PLL_DP_MFN); + } else { + dp_op = __raw_readl(pllbase + MXC_PLL_DP_HFS_OP); + dp_mfd = __raw_readl(pllbase + MXC_PLL_DP_HFS_MFD); + dp_mfn = __raw_readl(pllbase + MXC_PLL_DP_HFS_MFN); + } + pdf = dp_op & MXC_PLL_DP_OP_PDF_MASK; + mfi = (dp_op & MXC_PLL_DP_OP_MFI_MASK) >> MXC_PLL_DP_OP_MFI_OFFSET; + mfi = (mfi <= 5) ? 5 : mfi; + mfd = dp_mfd & MXC_PLL_DP_MFD_MASK; + mfn = mfn_abs = dp_mfn & MXC_PLL_DP_MFN_MASK; + /* Sign extend to 32-bits */ + if (mfn >= 0x04000000) { + mfn |= 0xFC000000; + mfn_abs = -mfn; + } + + ref_clk = 2 * parent_rate; + if (dbl != 0) + ref_clk *= 2; + + ref_clk /= (pdf + 1); + temp = (u64) ref_clk * mfn_abs; + do_div(temp, mfd + 1); + if (mfn < 0) + temp = -temp; + temp = (ref_clk * mfi) + temp; + + return temp; +} + +static int clk_pllv2_set_rate(struct clk_hw *hw, unsigned long rate, + unsigned long parent_rate) +{ + struct clk_pllv2 *pll = to_clk_pllv2(hw); + u32 reg; + void __iomem *pllbase; + long mfi, pdf, mfn, mfd = 999999; + s64 temp64; + unsigned long quad_parent_rate; + unsigned long pll_hfsm, dp_ctl; + + pllbase = pll->base; + + quad_parent_rate = 4 * parent_rate; + pdf = mfi = -1; + while (++pdf < 16 && mfi < 5) + mfi = rate * (pdf+1) / quad_parent_rate; + if (mfi > 15) + return -EINVAL; + pdf--; + + temp64 = rate * (pdf+1) - quad_parent_rate * mfi; + do_div(temp64, quad_parent_rate/1000000); + mfn = (long)temp64; + + dp_ctl = __raw_readl(pllbase + MXC_PLL_DP_CTL); + /* use dpdck0_2 */ + __raw_writel(dp_ctl | 0x1000L, pllbase + MXC_PLL_DP_CTL); + pll_hfsm = dp_ctl & MXC_PLL_DP_CTL_HFSM; + if (pll_hfsm == 0) { + reg = mfi << 4 | pdf; + __raw_writel(reg, pllbase + MXC_PLL_DP_OP); + __raw_writel(mfd, pllbase + MXC_PLL_DP_MFD); + __raw_writel(mfn, pllbase + MXC_PLL_DP_MFN); + } else { + reg = mfi << 4 | pdf; + __raw_writel(reg, pllbase + MXC_PLL_DP_HFS_OP); + __raw_writel(mfd, pllbase + MXC_PLL_DP_HFS_MFD); + __raw_writel(mfn, pllbase + MXC_PLL_DP_HFS_MFN); + } + + return 0; +} + +static long clk_pllv2_round_rate(struct clk_hw *hw, unsigned long rate, + unsigned long *prate) +{ + return rate; +} + +static int clk_pllv2_prepare(struct clk_hw *hw) +{ + struct clk_pllv2 *pll = to_clk_pllv2(hw); + u32 reg; + void __iomem *pllbase; + int i = 0; + + pllbase = pll->base; + reg = __raw_readl(pllbase + MXC_PLL_DP_CTL) | MXC_PLL_DP_CTL_UPEN; + __raw_writel(reg, pllbase + MXC_PLL_DP_CTL); + + /* Wait for lock */ + do { + reg = __raw_readl(pllbase + MXC_PLL_DP_CTL); + if (reg & MXC_PLL_DP_CTL_LRF) + break; + + udelay(1); + } while (++i < MAX_DPLL_WAIT_TRIES); + + if (i == MAX_DPLL_WAIT_TRIES) { + pr_err("MX5: pll locking failed\n"); + return -EINVAL; + } + + return 0; +} + +static void clk_pllv2_unprepare(struct clk_hw *hw) +{ + struct clk_pllv2 *pll = to_clk_pllv2(hw); + u32 reg; + void __iomem *pllbase; + + pllbase = pll->base; + reg = __raw_readl(pllbase + MXC_PLL_DP_CTL) & ~MXC_PLL_DP_CTL_UPEN; + __raw_writel(reg, pllbase + MXC_PLL_DP_CTL); +} + +struct clk_ops clk_pllv2_ops = { + .prepare = clk_pllv2_prepare, + .unprepare = clk_pllv2_unprepare, + .recalc_rate = clk_pllv2_recalc_rate, + .round_rate = clk_pllv2_round_rate, + .set_rate = clk_pllv2_set_rate, +}; + +struct clk *imx_clk_pllv2(const char *name, const char *parent, + void __iomem *base) +{ + struct clk_pllv2 *pll; + struct clk *clk; + struct clk_init_data init; + + pll = kzalloc(sizeof(*pll), GFP_KERNEL); + if (!pll) + return ERR_PTR(-ENOMEM); + + pll->base = base; + + init.name = name; + init.ops = &clk_pllv2_ops; + init.flags = 0; + init.parent_names = &parent; + init.num_parents = 1; + + pll->hw.init = &init; + + clk = clk_register(NULL, &pll->hw); + if (IS_ERR(clk)) + kfree(pll); + + return clk; +} diff --git a/arch/arm/mach-imx/clk.h b/arch/arm/mach-imx/clk.h index 8a4aee6..7f5da75 100644 --- a/arch/arm/mach-imx/clk.h +++ b/arch/arm/mach-imx/clk.h @@ -8,6 +8,9 @@ struct clk *imx_clk_pllv1(const char *name, const char *parent, void __iomem *base); +struct clk *imx_clk_pllv2(const char *name, const char *parent, + void __iomem *base); + static inline struct clk *imx_clk_fixed(const char *name, int rate) { return clk_register_fixed_rate(NULL, name, NULL, CLK_IS_ROOT, rate); -- cgit v0.10.2 From a3f6b9dbf2a964b85f0523e577807d6f63d6b29d Mon Sep 17 00:00:00 2001 From: Shawn Guo Date: Wed, 4 Apr 2012 16:02:28 +0800 Subject: ARM: imx: add common clock support for pllv3 This PLL is found on i.MX6 SoCs Signed-off-by: Shawn Guo diff --git a/arch/arm/mach-imx/Makefile b/arch/arm/mach-imx/Makefile index 0c0e5c4..66bc6be 100644 --- a/arch/arm/mach-imx/Makefile +++ b/arch/arm/mach-imx/Makefile @@ -11,7 +11,7 @@ obj-$(CONFIG_SOC_IMX35) += mm-imx3.o cpu-imx35.o clock-imx35.o ehci-imx35.o pm-i obj-$(CONFIG_SOC_IMX5) += cpu-imx5.o mm-imx5.o clock-mx51-mx53.o ehci-imx5.o pm-imx5.o cpu_op-mx51.o -obj-$(CONFIG_COMMON_CLK) += clk-pllv1.o clk-pllv2.o +obj-$(CONFIG_COMMON_CLK) += clk-pllv1.o clk-pllv2.o clk-pllv3.o # Support for CMOS sensor interface obj-$(CONFIG_MX1_VIDEO) += mx1-camera-fiq.o mx1-camera-fiq-ksym.o diff --git a/arch/arm/mach-imx/clk-pllv3.c b/arch/arm/mach-imx/clk-pllv3.c new file mode 100644 index 0000000..36aac94 --- /dev/null +++ b/arch/arm/mach-imx/clk-pllv3.c @@ -0,0 +1,419 @@ +/* + * Copyright 2012 Freescale Semiconductor, Inc. + * Copyright 2012 Linaro Ltd. + * + * The code contained herein is licensed under the GNU General Public + * License. You may obtain a copy of the GNU General Public License + * Version 2 or later at the following locations: + * + * http://www.opensource.org/licenses/gpl-license.html + * http://www.gnu.org/copyleft/gpl.html + */ + +#include +#include +#include +#include +#include +#include +#include "clk.h" + +#define PLL_NUM_OFFSET 0x10 +#define PLL_DENOM_OFFSET 0x20 + +#define BM_PLL_POWER (0x1 << 12) +#define BM_PLL_ENABLE (0x1 << 13) +#define BM_PLL_BYPASS (0x1 << 16) +#define BM_PLL_LOCK (0x1 << 31) + +/** + * struct clk_pllv3 - IMX PLL clock version 3 + * @clk_hw: clock source + * @base: base address of PLL registers + * @powerup_set: set POWER bit to power up the PLL + * @gate_mask: mask of gate bits + * @div_mask: mask of divider bits + * + * IMX PLL clock version 3, found on i.MX6 series. Divider for pllv3 + * is actually a multiplier, and always sits at bit 0. + */ +struct clk_pllv3 { + struct clk_hw hw; + void __iomem *base; + bool powerup_set; + u32 gate_mask; + u32 div_mask; +}; + +#define to_clk_pllv3(_hw) container_of(_hw, struct clk_pllv3, hw) + +static int clk_pllv3_prepare(struct clk_hw *hw) +{ + struct clk_pllv3 *pll = to_clk_pllv3(hw); + unsigned long timeout = jiffies + msecs_to_jiffies(10); + u32 val; + + val = readl_relaxed(pll->base); + val &= ~BM_PLL_BYPASS; + if (pll->powerup_set) + val |= BM_PLL_POWER; + else + val &= ~BM_PLL_POWER; + writel_relaxed(val, pll->base); + + /* Wait for PLL to lock */ + while (!(readl_relaxed(pll->base) & BM_PLL_LOCK)) + if (time_after(jiffies, timeout)) + return -ETIMEDOUT; + + return 0; +} + +static void clk_pllv3_unprepare(struct clk_hw *hw) +{ + struct clk_pllv3 *pll = to_clk_pllv3(hw); + u32 val; + + val = readl_relaxed(pll->base); + val |= BM_PLL_BYPASS; + if (pll->powerup_set) + val &= ~BM_PLL_POWER; + else + val |= BM_PLL_POWER; + writel_relaxed(val, pll->base); +} + +static int clk_pllv3_enable(struct clk_hw *hw) +{ + struct clk_pllv3 *pll = to_clk_pllv3(hw); + u32 val; + + val = readl_relaxed(pll->base); + val |= pll->gate_mask; + writel_relaxed(val, pll->base); + + return 0; +} + +static void clk_pllv3_disable(struct clk_hw *hw) +{ + struct clk_pllv3 *pll = to_clk_pllv3(hw); + u32 val; + + val = readl_relaxed(pll->base); + val &= ~pll->gate_mask; + writel_relaxed(val, pll->base); +} + +static unsigned long clk_pllv3_recalc_rate(struct clk_hw *hw, + unsigned long parent_rate) +{ + struct clk_pllv3 *pll = to_clk_pllv3(hw); + u32 div = readl_relaxed(pll->base) & pll->div_mask; + + return (div == 1) ? parent_rate * 22 : parent_rate * 20; +} + +static long clk_pllv3_round_rate(struct clk_hw *hw, unsigned long rate, + unsigned long *prate) +{ + unsigned long parent_rate = *prate; + + return (rate >= parent_rate * 22) ? parent_rate * 22 : + parent_rate * 20; +} + +static int clk_pllv3_set_rate(struct clk_hw *hw, unsigned long rate, + unsigned long parent_rate) +{ + struct clk_pllv3 *pll = to_clk_pllv3(hw); + u32 val, div; + + if (rate == parent_rate * 22) + div = 1; + else if (rate == parent_rate * 20) + div = 0; + else + return -EINVAL; + + val = readl_relaxed(pll->base); + val &= ~pll->div_mask; + val |= div; + writel_relaxed(val, pll->base); + + return 0; +} + +static const struct clk_ops clk_pllv3_ops = { + .prepare = clk_pllv3_prepare, + .unprepare = clk_pllv3_unprepare, + .enable = clk_pllv3_enable, + .disable = clk_pllv3_disable, + .recalc_rate = clk_pllv3_recalc_rate, + .round_rate = clk_pllv3_round_rate, + .set_rate = clk_pllv3_set_rate, +}; + +static unsigned long clk_pllv3_sys_recalc_rate(struct clk_hw *hw, + unsigned long parent_rate) +{ + struct clk_pllv3 *pll = to_clk_pllv3(hw); + u32 div = readl_relaxed(pll->base) & pll->div_mask; + + return parent_rate * div / 2; +} + +static long clk_pllv3_sys_round_rate(struct clk_hw *hw, unsigned long rate, + unsigned long *prate) +{ + unsigned long parent_rate = *prate; + unsigned long min_rate = parent_rate * 54 / 2; + unsigned long max_rate = parent_rate * 108 / 2; + u32 div; + + if (rate > max_rate) + rate = max_rate; + else if (rate < min_rate) + rate = min_rate; + div = rate * 2 / parent_rate; + + return parent_rate * div / 2; +} + +static int clk_pllv3_sys_set_rate(struct clk_hw *hw, unsigned long rate, + unsigned long parent_rate) +{ + struct clk_pllv3 *pll = to_clk_pllv3(hw); + unsigned long min_rate = parent_rate * 54 / 2; + unsigned long max_rate = parent_rate * 108 / 2; + u32 val, div; + + if (rate < min_rate || rate > max_rate) + return -EINVAL; + + div = rate * 2 / parent_rate; + val = readl_relaxed(pll->base); + val &= ~pll->div_mask; + val |= div; + writel_relaxed(val, pll->base); + + return 0; +} + +static const struct clk_ops clk_pllv3_sys_ops = { + .prepare = clk_pllv3_prepare, + .unprepare = clk_pllv3_unprepare, + .enable = clk_pllv3_enable, + .disable = clk_pllv3_disable, + .recalc_rate = clk_pllv3_sys_recalc_rate, + .round_rate = clk_pllv3_sys_round_rate, + .set_rate = clk_pllv3_sys_set_rate, +}; + +static unsigned long clk_pllv3_av_recalc_rate(struct clk_hw *hw, + unsigned long parent_rate) +{ + struct clk_pllv3 *pll = to_clk_pllv3(hw); + u32 mfn = readl_relaxed(pll->base + PLL_NUM_OFFSET); + u32 mfd = readl_relaxed(pll->base + PLL_DENOM_OFFSET); + u32 div = readl_relaxed(pll->base) & pll->div_mask; + + return (parent_rate * div) + ((parent_rate / mfd) * mfn); +} + +static long clk_pllv3_av_round_rate(struct clk_hw *hw, unsigned long rate, + unsigned long *prate) +{ + unsigned long parent_rate = *prate; + unsigned long min_rate = parent_rate * 27; + unsigned long max_rate = parent_rate * 54; + u32 div; + u32 mfn, mfd = 1000000; + s64 temp64; + + if (rate > max_rate) + rate = max_rate; + else if (rate < min_rate) + rate = min_rate; + + div = rate / parent_rate; + temp64 = (u64) (rate - div * parent_rate); + temp64 *= mfd; + do_div(temp64, parent_rate); + mfn = temp64; + + return parent_rate * div + parent_rate / mfd * mfn; +} + +static int clk_pllv3_av_set_rate(struct clk_hw *hw, unsigned long rate, + unsigned long parent_rate) +{ + struct clk_pllv3 *pll = to_clk_pllv3(hw); + unsigned long min_rate = parent_rate * 27; + unsigned long max_rate = parent_rate * 54; + u32 val, div; + u32 mfn, mfd = 1000000; + s64 temp64; + + if (rate < min_rate || rate > max_rate) + return -EINVAL; + + div = rate / parent_rate; + temp64 = (u64) (rate - div * parent_rate); + temp64 *= mfd; + do_div(temp64, parent_rate); + mfn = temp64; + + val = readl_relaxed(pll->base); + val &= ~pll->div_mask; + val |= div; + writel_relaxed(val, pll->base); + writel_relaxed(mfn, pll->base + PLL_NUM_OFFSET); + writel_relaxed(mfd, pll->base + PLL_DENOM_OFFSET); + + return 0; +} + +static const struct clk_ops clk_pllv3_av_ops = { + .prepare = clk_pllv3_prepare, + .unprepare = clk_pllv3_unprepare, + .enable = clk_pllv3_enable, + .disable = clk_pllv3_disable, + .recalc_rate = clk_pllv3_av_recalc_rate, + .round_rate = clk_pllv3_av_round_rate, + .set_rate = clk_pllv3_av_set_rate, +}; + +static unsigned long clk_pllv3_enet_recalc_rate(struct clk_hw *hw, + unsigned long parent_rate) +{ + struct clk_pllv3 *pll = to_clk_pllv3(hw); + u32 div = readl_relaxed(pll->base) & pll->div_mask; + + switch (div) { + case 0: + return 25000000; + case 1: + return 50000000; + case 2: + return 100000000; + case 3: + return 125000000; + } + + return 0; +} + +static long clk_pllv3_enet_round_rate(struct clk_hw *hw, unsigned long rate, + unsigned long *prate) +{ + if (rate >= 125000000) + rate = 125000000; + else if (rate >= 100000000) + rate = 100000000; + else if (rate >= 50000000) + rate = 50000000; + else + rate = 25000000; + return rate; +} + +static int clk_pllv3_enet_set_rate(struct clk_hw *hw, unsigned long rate, + unsigned long parent_rate) +{ + struct clk_pllv3 *pll = to_clk_pllv3(hw); + u32 val, div; + + switch (rate) { + case 25000000: + div = 0; + break; + case 50000000: + div = 1; + break; + case 100000000: + div = 2; + break; + case 125000000: + div = 3; + break; + default: + return -EINVAL; + } + + val = readl_relaxed(pll->base); + val &= ~pll->div_mask; + val |= div; + writel_relaxed(val, pll->base); + + return 0; +} + +static const struct clk_ops clk_pllv3_enet_ops = { + .prepare = clk_pllv3_prepare, + .unprepare = clk_pllv3_unprepare, + .enable = clk_pllv3_enable, + .disable = clk_pllv3_disable, + .recalc_rate = clk_pllv3_enet_recalc_rate, + .round_rate = clk_pllv3_enet_round_rate, + .set_rate = clk_pllv3_enet_set_rate, +}; + +static const struct clk_ops clk_pllv3_mlb_ops = { + .prepare = clk_pllv3_prepare, + .unprepare = clk_pllv3_unprepare, + .enable = clk_pllv3_enable, + .disable = clk_pllv3_disable, +}; + +struct clk *imx_clk_pllv3(enum imx_pllv3_type type, const char *name, + const char *parent_name, void __iomem *base, + u32 gate_mask, u32 div_mask) +{ + struct clk_pllv3 *pll; + const struct clk_ops *ops; + struct clk *clk; + struct clk_init_data init; + + pll = kzalloc(sizeof(*pll), GFP_KERNEL); + if (!pll) + return ERR_PTR(-ENOMEM); + + switch (type) { + case IMX_PLLV3_SYS: + ops = &clk_pllv3_sys_ops; + break; + case IMX_PLLV3_USB: + ops = &clk_pllv3_ops; + pll->powerup_set = true; + break; + case IMX_PLLV3_AV: + ops = &clk_pllv3_av_ops; + break; + case IMX_PLLV3_ENET: + ops = &clk_pllv3_enet_ops; + break; + case IMX_PLLV3_MLB: + ops = &clk_pllv3_mlb_ops; + break; + default: + ops = &clk_pllv3_ops; + } + pll->base = base; + pll->gate_mask = gate_mask; + pll->div_mask = div_mask; + + init.name = name; + init.ops = ops; + init.flags = 0; + init.parent_names = &parent_name; + init.num_parents = 1; + + pll->hw.init = &init; + + clk = clk_register(NULL, &pll->hw); + if (IS_ERR(clk)) + kfree(pll); + + return clk; +} diff --git a/arch/arm/mach-imx/clk.h b/arch/arm/mach-imx/clk.h index 7f5da75..331316d 100644 --- a/arch/arm/mach-imx/clk.h +++ b/arch/arm/mach-imx/clk.h @@ -11,6 +11,19 @@ struct clk *imx_clk_pllv1(const char *name, const char *parent, struct clk *imx_clk_pllv2(const char *name, const char *parent, void __iomem *base); +enum imx_pllv3_type { + IMX_PLLV3_GENERIC, + IMX_PLLV3_SYS, + IMX_PLLV3_USB, + IMX_PLLV3_AV, + IMX_PLLV3_ENET, + IMX_PLLV3_MLB, +}; + +struct clk *imx_clk_pllv3(enum imx_pllv3_type type, const char *name, + const char *parent_name, void __iomem *base, u32 gate_mask, + u32 div_mask); + static inline struct clk *imx_clk_fixed(const char *name, int rate) { return clk_register_fixed_rate(NULL, name, NULL, CLK_IS_ROOT, rate); -- cgit v0.10.2 From b75c015143a4a6021731ff3e36718896381be94f Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Tue, 19 Apr 2011 08:33:45 +0200 Subject: ARM i.MX: Add common clock support for 2bit gate This gate consists of two bits: 0b00: clk disabled 0b01: clk enabled in run mode and disabled in sleep mode 0b11: clk enabled Currently only disabled and enabled are supported. As it's unlikely that we find something like this in another SoC create a i.MX specific clk helper for this. Signed-off-by: Sascha Hauer diff --git a/arch/arm/mach-imx/Makefile b/arch/arm/mach-imx/Makefile index 66bc6be..1b3f2ae 100644 --- a/arch/arm/mach-imx/Makefile +++ b/arch/arm/mach-imx/Makefile @@ -11,7 +11,7 @@ obj-$(CONFIG_SOC_IMX35) += mm-imx3.o cpu-imx35.o clock-imx35.o ehci-imx35.o pm-i obj-$(CONFIG_SOC_IMX5) += cpu-imx5.o mm-imx5.o clock-mx51-mx53.o ehci-imx5.o pm-imx5.o cpu_op-mx51.o -obj-$(CONFIG_COMMON_CLK) += clk-pllv1.o clk-pllv2.o clk-pllv3.o +obj-$(CONFIG_COMMON_CLK) += clk-pllv1.o clk-pllv2.o clk-pllv3.o clk-gate2.o # Support for CMOS sensor interface obj-$(CONFIG_MX1_VIDEO) += mx1-camera-fiq.o mx1-camera-fiq-ksym.o diff --git a/arch/arm/mach-imx/clk-gate2.c b/arch/arm/mach-imx/clk-gate2.c new file mode 100644 index 0000000..3c1b8ff --- /dev/null +++ b/arch/arm/mach-imx/clk-gate2.c @@ -0,0 +1,118 @@ +/* + * Copyright (C) 2010-2011 Canonical Ltd + * Copyright (C) 2011-2012 Mike Turquette, Linaro Ltd + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Gated clock implementation + */ + +#include +#include +#include +#include +#include +#include + +/** + * DOC: basic gatable clock which can gate and ungate it's ouput + * + * Traits of this clock: + * prepare - clk_(un)prepare only ensures parent is (un)prepared + * enable - clk_enable and clk_disable are functional & control gating + * rate - inherits rate from parent. No clk_set_rate support + * parent - fixed parent. No clk_set_parent support + */ + +#define to_clk_gate(_hw) container_of(_hw, struct clk_gate, hw) + +static int clk_gate2_enable(struct clk_hw *hw) +{ + struct clk_gate *gate = to_clk_gate(hw); + u32 reg; + unsigned long flags = 0; + + if (gate->lock) + spin_lock_irqsave(gate->lock, flags); + + reg = readl(gate->reg); + reg |= 3 << gate->bit_idx; + writel(reg, gate->reg); + + if (gate->lock) + spin_unlock_irqrestore(gate->lock, flags); + + return 0; +} + +static void clk_gate2_disable(struct clk_hw *hw) +{ + struct clk_gate *gate = to_clk_gate(hw); + u32 reg; + unsigned long flags = 0; + + if (gate->lock) + spin_lock_irqsave(gate->lock, flags); + + reg = readl(gate->reg); + reg &= ~(3 << gate->bit_idx); + writel(reg, gate->reg); + + if (gate->lock) + spin_unlock_irqrestore(gate->lock, flags); +} + +static int clk_gate2_is_enabled(struct clk_hw *hw) +{ + u32 reg; + struct clk_gate *gate = to_clk_gate(hw); + + reg = readl(gate->reg); + + if (((reg >> gate->bit_idx) & 3) == 3) + return 1; + + return 0; +} + +static struct clk_ops clk_gate2_ops = { + .enable = clk_gate2_enable, + .disable = clk_gate2_disable, + .is_enabled = clk_gate2_is_enabled, +}; + +struct clk *clk_register_gate2(struct device *dev, const char *name, + const char *parent_name, unsigned long flags, + void __iomem *reg, u8 bit_idx, + u8 clk_gate2_flags, spinlock_t *lock) +{ + struct clk_gate *gate; + struct clk *clk; + struct clk_init_data init; + + gate = kzalloc(sizeof(struct clk_gate), GFP_KERNEL); + if (!gate) + return ERR_PTR(-ENOMEM); + + /* struct clk_gate assignments */ + gate->reg = reg; + gate->bit_idx = bit_idx; + gate->flags = clk_gate2_flags; + gate->lock = lock; + + init.name = name; + init.ops = &clk_gate2_ops; + init.flags = flags; + init.parent_names = parent_name ? &parent_name : NULL; + init.num_parents = parent_name ? 1 : 0; + + gate->hw.init = &init; + + clk = clk_register(dev, &gate->hw); + if (IS_ERR(clk)) + kfree(clk); + + return clk; +} diff --git a/arch/arm/mach-imx/clk.h b/arch/arm/mach-imx/clk.h index 331316d..5f6e435 100644 --- a/arch/arm/mach-imx/clk.h +++ b/arch/arm/mach-imx/clk.h @@ -24,6 +24,18 @@ struct clk *imx_clk_pllv3(enum imx_pllv3_type type, const char *name, const char *parent_name, void __iomem *base, u32 gate_mask, u32 div_mask); +struct clk *clk_register_gate2(struct device *dev, const char *name, + const char *parent_name, unsigned long flags, + void __iomem *reg, u8 bit_idx, + u8 clk_gate_flags, spinlock_t *lock); + +static inline struct clk *imx_clk_gate2(const char *name, const char *parent, + void __iomem *reg, u8 shift) +{ + return clk_register_gate2(NULL, name, parent, CLK_SET_RATE_PARENT, reg, + shift, 0, &imx_ccm_lock); +} + static inline struct clk *imx_clk_fixed(const char *name, int rate) { return clk_register_fixed_rate(NULL, name, NULL, CLK_IS_ROOT, rate); -- cgit v0.10.2 From a10bd67f1905b394f5a9bd610dfc8b9b9befac0e Mon Sep 17 00:00:00 2001 From: Shawn Guo Date: Wed, 4 Apr 2012 16:07:53 +0800 Subject: ARM: imx: add common clock support for pfd Signed-off-by: Shawn Guo diff --git a/arch/arm/mach-imx/Makefile b/arch/arm/mach-imx/Makefile index 1b3f2ae..4d6be8d 100644 --- a/arch/arm/mach-imx/Makefile +++ b/arch/arm/mach-imx/Makefile @@ -11,7 +11,8 @@ obj-$(CONFIG_SOC_IMX35) += mm-imx3.o cpu-imx35.o clock-imx35.o ehci-imx35.o pm-i obj-$(CONFIG_SOC_IMX5) += cpu-imx5.o mm-imx5.o clock-mx51-mx53.o ehci-imx5.o pm-imx5.o cpu_op-mx51.o -obj-$(CONFIG_COMMON_CLK) += clk-pllv1.o clk-pllv2.o clk-pllv3.o clk-gate2.o +obj-$(CONFIG_COMMON_CLK) += clk-pllv1.o clk-pllv2.o clk-pllv3.o clk-gate2.o \ + clk-pfd.o # Support for CMOS sensor interface obj-$(CONFIG_MX1_VIDEO) += mx1-camera-fiq.o mx1-camera-fiq-ksym.o diff --git a/arch/arm/mach-imx/clk-pfd.c b/arch/arm/mach-imx/clk-pfd.c new file mode 100644 index 0000000..e2ed416 --- /dev/null +++ b/arch/arm/mach-imx/clk-pfd.c @@ -0,0 +1,147 @@ +/* + * Copyright 2012 Freescale Semiconductor, Inc. + * Copyright 2012 Linaro Ltd. + * + * The code contained herein is licensed under the GNU General Public + * License. You may obtain a copy of the GNU General Public License + * Version 2 or later at the following locations: + * + * http://www.opensource.org/licenses/gpl-license.html + * http://www.gnu.org/copyleft/gpl.html + */ + +#include +#include +#include +#include +#include +#include "clk.h" + +/** + * struct clk_pfd - IMX PFD clock + * @clk_hw: clock source + * @reg: PFD register address + * @idx: the index of PFD encoded in the register + * + * PFD clock found on i.MX6 series. Each register for PFD has 4 clk_pfd + * data encoded, and member idx is used to specify the one. And each + * register has SET, CLR and TOG registers at offset 0x4 0x8 and 0xc. + */ +struct clk_pfd { + struct clk_hw hw; + void __iomem *reg; + u8 idx; +}; + +#define to_clk_pfd(_hw) container_of(_hw, struct clk_pfd, hw) + +#define SET 0x4 +#define CLR 0x8 +#define OTG 0xc + +static int clk_pfd_enable(struct clk_hw *hw) +{ + struct clk_pfd *pfd = to_clk_pfd(hw); + + writel_relaxed(1 << ((pfd->idx + 1) * 8 - 1), pfd->reg + CLR); + + return 0; +} + +static void clk_pfd_disable(struct clk_hw *hw) +{ + struct clk_pfd *pfd = to_clk_pfd(hw); + + writel_relaxed(1 << ((pfd->idx + 1) * 8 - 1), pfd->reg + SET); +} + +static unsigned long clk_pfd_recalc_rate(struct clk_hw *hw, + unsigned long parent_rate) +{ + struct clk_pfd *pfd = to_clk_pfd(hw); + u64 tmp = parent_rate; + u8 frac = (readl_relaxed(pfd->reg) >> (pfd->idx * 8)) & 0x3f; + + tmp *= 18; + do_div(tmp, frac); + + return tmp; +} + +static long clk_pfd_round_rate(struct clk_hw *hw, unsigned long rate, + unsigned long *prate) +{ + u64 tmp = *prate; + u8 frac; + + tmp = tmp * 18 + rate / 2; + do_div(tmp, rate); + frac = tmp; + if (frac < 12) + frac = 12; + else if (frac > 35) + frac = 35; + tmp = *prate; + tmp *= 18; + do_div(tmp, frac); + + return tmp; +} + +static int clk_pfd_set_rate(struct clk_hw *hw, unsigned long rate, + unsigned long parent_rate) +{ + struct clk_pfd *pfd = to_clk_pfd(hw); + u64 tmp = parent_rate; + u8 frac; + + tmp = tmp * 18 + rate / 2; + do_div(tmp, rate); + frac = tmp; + if (frac < 12) + frac = 12; + else if (frac > 35) + frac = 35; + + writel_relaxed(0x3f << (pfd->idx * 8), pfd->reg + CLR); + writel_relaxed(frac << (pfd->idx * 8), pfd->reg + SET); + + return 0; +} + +static const struct clk_ops clk_pfd_ops = { + .enable = clk_pfd_enable, + .disable = clk_pfd_disable, + .recalc_rate = clk_pfd_recalc_rate, + .round_rate = clk_pfd_round_rate, + .set_rate = clk_pfd_set_rate, +}; + +struct clk *imx_clk_pfd(const char *name, const char *parent_name, + void __iomem *reg, u8 idx) +{ + struct clk_pfd *pfd; + struct clk *clk; + struct clk_init_data init; + + pfd = kzalloc(sizeof(*pfd), GFP_KERNEL); + if (!pfd) + return ERR_PTR(-ENOMEM); + + pfd->reg = reg; + pfd->idx = idx; + + init.name = name; + init.ops = &clk_pfd_ops; + init.flags = 0; + init.parent_names = &parent_name; + init.num_parents = 1; + + pfd->hw.init = &init; + + clk = clk_register(NULL, &pfd->hw); + if (IS_ERR(clk)) + kfree(pfd); + + return clk; +} diff --git a/arch/arm/mach-imx/clk.h b/arch/arm/mach-imx/clk.h index 5f6e435..5297099 100644 --- a/arch/arm/mach-imx/clk.h +++ b/arch/arm/mach-imx/clk.h @@ -36,6 +36,9 @@ static inline struct clk *imx_clk_gate2(const char *name, const char *parent, shift, 0, &imx_ccm_lock); } +struct clk *imx_clk_pfd(const char *name, const char *parent_name, + void __iomem *reg, u8 idx); + static inline struct clk *imx_clk_fixed(const char *name, int rate) { return clk_register_fixed_rate(NULL, name, NULL, CLK_IS_ROOT, rate); -- cgit v0.10.2 From 32af7a830fe1625fa93900606f82c541f3b3de94 Mon Sep 17 00:00:00 2001 From: Shawn Guo Date: Wed, 4 Apr 2012 16:20:56 +0800 Subject: ARM: imx: add common clock support for clk busy Signed-off-by: Shawn Guo Signed-off-by: Sascha Hauer diff --git a/arch/arm/mach-imx/Makefile b/arch/arm/mach-imx/Makefile index 4d6be8d..ae0a779 100644 --- a/arch/arm/mach-imx/Makefile +++ b/arch/arm/mach-imx/Makefile @@ -12,7 +12,7 @@ obj-$(CONFIG_SOC_IMX35) += mm-imx3.o cpu-imx35.o clock-imx35.o ehci-imx35.o pm-i obj-$(CONFIG_SOC_IMX5) += cpu-imx5.o mm-imx5.o clock-mx51-mx53.o ehci-imx5.o pm-imx5.o cpu_op-mx51.o obj-$(CONFIG_COMMON_CLK) += clk-pllv1.o clk-pllv2.o clk-pllv3.o clk-gate2.o \ - clk-pfd.o + clk-pfd.o clk-busy.o # Support for CMOS sensor interface obj-$(CONFIG_MX1_VIDEO) += mx1-camera-fiq.o mx1-camera-fiq-ksym.o diff --git a/arch/arm/mach-imx/clk-busy.c b/arch/arm/mach-imx/clk-busy.c new file mode 100644 index 0000000..1a7a8dd --- /dev/null +++ b/arch/arm/mach-imx/clk-busy.c @@ -0,0 +1,189 @@ +/* + * Copyright 2012 Freescale Semiconductor, Inc. + * Copyright 2012 Linaro Ltd. + * + * The code contained herein is licensed under the GNU General Public + * License. You may obtain a copy of the GNU General Public License + * Version 2 or later at the following locations: + * + * http://www.opensource.org/licenses/gpl-license.html + * http://www.gnu.org/copyleft/gpl.html + */ + +#include +#include +#include +#include +#include +#include +#include "clk.h" + +static int clk_busy_wait(void __iomem *reg, u8 shift) +{ + unsigned long timeout = jiffies + msecs_to_jiffies(10); + + while (readl_relaxed(reg) & (1 << shift)) + if (time_after(jiffies, timeout)) + return -ETIMEDOUT; + + return 0; +} + +struct clk_busy_divider { + struct clk_divider div; + const struct clk_ops *div_ops; + void __iomem *reg; + u8 shift; +}; + +static inline struct clk_busy_divider *to_clk_busy_divider(struct clk_hw *hw) +{ + struct clk_divider *div = container_of(hw, struct clk_divider, hw); + + return container_of(div, struct clk_busy_divider, div); +} + +static unsigned long clk_busy_divider_recalc_rate(struct clk_hw *hw, + unsigned long parent_rate) +{ + struct clk_busy_divider *busy = to_clk_busy_divider(hw); + + return busy->div_ops->recalc_rate(&busy->div.hw, parent_rate); +} + +static long clk_busy_divider_round_rate(struct clk_hw *hw, unsigned long rate, + unsigned long *prate) +{ + struct clk_busy_divider *busy = to_clk_busy_divider(hw); + + return busy->div_ops->round_rate(&busy->div.hw, rate, prate); +} + +static int clk_busy_divider_set_rate(struct clk_hw *hw, unsigned long rate, + unsigned long parent_rate) +{ + struct clk_busy_divider *busy = to_clk_busy_divider(hw); + int ret; + + ret = busy->div_ops->set_rate(&busy->div.hw, rate, parent_rate); + if (!ret) + ret = clk_busy_wait(busy->reg, busy->shift); + + return ret; +} + +static struct clk_ops clk_busy_divider_ops = { + .recalc_rate = clk_busy_divider_recalc_rate, + .round_rate = clk_busy_divider_round_rate, + .set_rate = clk_busy_divider_set_rate, +}; + +struct clk *imx_clk_busy_divider(const char *name, const char *parent_name, + void __iomem *reg, u8 shift, u8 width, + void __iomem *busy_reg, u8 busy_shift) +{ + struct clk_busy_divider *busy; + struct clk *clk; + struct clk_init_data init; + + busy = kzalloc(sizeof(*busy), GFP_KERNEL); + if (!busy) + return ERR_PTR(-ENOMEM); + + busy->reg = busy_reg; + busy->shift = busy_shift; + + busy->div.reg = reg; + busy->div.shift = shift; + busy->div.width = width; + busy->div.lock = &imx_ccm_lock; + busy->div_ops = &clk_divider_ops; + + init.name = name; + init.ops = &clk_busy_divider_ops; + init.flags = CLK_SET_RATE_PARENT; + init.parent_names = &parent_name; + init.num_parents = 1; + + busy->div.hw.init = &init; + + clk = clk_register(NULL, &busy->div.hw); + if (!clk) + kfree(busy); + + return clk; +} + +struct clk_busy_mux { + struct clk_mux mux; + const struct clk_ops *mux_ops; + void __iomem *reg; + u8 shift; +}; + +static inline struct clk_busy_mux *to_clk_busy_mux(struct clk_hw *hw) +{ + struct clk_mux *mux = container_of(hw, struct clk_mux, hw); + + return container_of(mux, struct clk_busy_mux, mux); +} + +static u8 clk_busy_mux_get_parent(struct clk_hw *hw) +{ + struct clk_busy_mux *busy = to_clk_busy_mux(hw); + + return busy->mux_ops->get_parent(&busy->mux.hw); +} + +static int clk_busy_mux_set_parent(struct clk_hw *hw, u8 index) +{ + struct clk_busy_mux *busy = to_clk_busy_mux(hw); + int ret; + + ret = busy->mux_ops->set_parent(&busy->mux.hw, index); + if (!ret) + ret = clk_busy_wait(busy->reg, busy->shift); + + return ret; +} + +struct clk_ops clk_busy_mux_ops = { + .get_parent = clk_busy_mux_get_parent, + .set_parent = clk_busy_mux_set_parent, +}; + +struct clk *imx_clk_busy_mux(const char *name, void __iomem *reg, u8 shift, + u8 width, void __iomem *busy_reg, u8 busy_shift, + const char **parent_names, int num_parents) +{ + struct clk_busy_mux *busy; + struct clk *clk; + struct clk_init_data init; + + busy = kzalloc(sizeof(*busy), GFP_KERNEL); + if (!busy) + return ERR_PTR(-ENOMEM); + + busy->reg = busy_reg; + busy->shift = busy_shift; + + busy->mux.reg = reg; + busy->mux.shift = shift; + busy->mux.width = width; + busy->mux.lock = &imx_ccm_lock; + busy->mux_ops = &clk_mux_ops; + + init.name = name; + init.ops = &clk_busy_mux_ops; + init.flags = 0; + init.parent_names = parent_names; + init.num_parents = num_parents; + + busy->mux.hw.init = &init; + + clk = clk_register(NULL, &busy->mux.hw); + if (IS_ERR(clk)) + kfree(busy); + + return clk; +} diff --git a/arch/arm/mach-imx/clk.h b/arch/arm/mach-imx/clk.h index 5297099..1bf64fe 100644 --- a/arch/arm/mach-imx/clk.h +++ b/arch/arm/mach-imx/clk.h @@ -39,6 +39,14 @@ static inline struct clk *imx_clk_gate2(const char *name, const char *parent, struct clk *imx_clk_pfd(const char *name, const char *parent_name, void __iomem *reg, u8 idx); +struct clk *imx_clk_busy_divider(const char *name, const char *parent_name, + void __iomem *reg, u8 shift, u8 width, + void __iomem *busy_reg, u8 busy_shift); + +struct clk *imx_clk_busy_mux(const char *name, void __iomem *reg, u8 shift, + u8 width, void __iomem *busy_reg, u8 busy_shift, + const char **parent_names, int num_parents); + static inline struct clk *imx_clk_fixed(const char *name, int rate) { return clk_register_fixed_rate(NULL, name, NULL, CLK_IS_ROOT, rate); -- cgit v0.10.2 From 6bbaec5676e4f475b0d78743cbd4c70a8804ce14 Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Thu, 8 Mar 2012 22:24:12 +0100 Subject: ARM i.MX25: implement clocks using common clock framework Signed-off-by: Sascha Hauer Signed-off-by: Philipp Zabel diff --git a/arch/arm/mach-imx/Kconfig b/arch/arm/mach-imx/Kconfig index 7561eca..0b832b1 100644 --- a/arch/arm/mach-imx/Kconfig +++ b/arch/arm/mach-imx/Kconfig @@ -48,6 +48,7 @@ config SOC_IMX21 config SOC_IMX25 bool select ARCH_MX25 + select COMMON_CLK select CPU_ARM926T select ARCH_MXC_IOMUX_V3 select MXC_AVIC diff --git a/arch/arm/mach-imx/Makefile b/arch/arm/mach-imx/Makefile index ae0a779..173e869 100644 --- a/arch/arm/mach-imx/Makefile +++ b/arch/arm/mach-imx/Makefile @@ -1,7 +1,7 @@ obj-$(CONFIG_SOC_IMX1) += clock-imx1.o mm-imx1.o obj-$(CONFIG_SOC_IMX21) += clock-imx21.o mm-imx21.o -obj-$(CONFIG_SOC_IMX25) += clock-imx25.o mm-imx25.o ehci-imx25.o cpu-imx25.o +obj-$(CONFIG_SOC_IMX25) += clk-imx25.o mm-imx25.o ehci-imx25.o cpu-imx25.o obj-$(CONFIG_SOC_IMX27) += cpu-imx27.o pm-imx27.o obj-$(CONFIG_SOC_IMX27) += clock-imx27.o mm-imx27.o ehci-imx27.o diff --git a/arch/arm/mach-imx/clk-imx25.c b/arch/arm/mach-imx/clk-imx25.c new file mode 100644 index 0000000..d9833bb --- /dev/null +++ b/arch/arm/mach-imx/clk-imx25.c @@ -0,0 +1,248 @@ +/* + * Copyright (C) 2009 by Sascha Hauer, Pengutronix + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + * MA 02110-1301, USA. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include "clk.h" + +#define CRM_BASE MX25_IO_ADDRESS(MX25_CRM_BASE_ADDR) + +#define CCM_MPCTL 0x00 +#define CCM_UPCTL 0x04 +#define CCM_CCTL 0x08 +#define CCM_CGCR0 0x0C +#define CCM_CGCR1 0x10 +#define CCM_CGCR2 0x14 +#define CCM_PCDR0 0x18 +#define CCM_PCDR1 0x1C +#define CCM_PCDR2 0x20 +#define CCM_PCDR3 0x24 +#define CCM_RCSR 0x28 +#define CCM_CRDR 0x2C +#define CCM_DCVR0 0x30 +#define CCM_DCVR1 0x34 +#define CCM_DCVR2 0x38 +#define CCM_DCVR3 0x3c +#define CCM_LTR0 0x40 +#define CCM_LTR1 0x44 +#define CCM_LTR2 0x48 +#define CCM_LTR3 0x4c +#define CCM_MCR 0x64 + +#define ccm(x) (CRM_BASE + (x)) + +static const char *cpu_sel_clks[] = { "mpll", "mpll_cpu_3_4", }; +static const char *per_sel_clks[] = { "ahb", "upll", }; + +enum mx25_clks { + dummy, osc, mpll, upll, mpll_cpu_3_4, cpu_sel, cpu, ahb, usb_div, ipg, + per0_sel, per1_sel, per2_sel, per3_sel, per4_sel, per5_sel, per6_sel, + per7_sel, per8_sel, per9_sel, per10_sel, per11_sel, per12_sel, + per13_sel, per14_sel, per15_sel, per0, per1, per2, per3, per4, per5, + per6, per7, per8, per9, per10, per11, per12, per13, per14, per15, + csi_ipg_per, esdhc1_ipg_per, esdhc2_ipg_per, gpt_ipg_per, i2c_ipg_per, + lcdc_ipg_per, nfc_ipg_per, ssi1_ipg_per, ssi2_ipg_per, uart_ipg_per, + csi_ahb, esdhc1_ahb, esdhc2_ahb, fec_ahb, lcdc_ahb, sdma_ahb, + usbotg_ahb, can1_ipg, can2_ipg, csi_ipg, cspi1_ipg, cspi2_ipg, + cspi3_ipg, dryice_ipg, esdhc1_ipg, esdhc2_ipg, fec_ipg, iim_ipg, + kpp_ipg, lcdc_ipg, pwm1_ipg, pwm2_ipg, pwm3_ipg, pwm4_ipg, sdma_ipg, + ssi1_ipg, ssi2_ipg, tsc_ipg, uart1_ipg, uart2_ipg, uart3_ipg, + uart4_ipg, uart5_ipg, wdt_ipg, clk_max +}; + +static struct clk *clk[clk_max]; + +int __init mx25_clocks_init(void) +{ + int i; + + clk[dummy] = imx_clk_fixed("dummy", 0); + clk[osc] = imx_clk_fixed("osc", 24000000); + clk[mpll] = imx_clk_pllv1("mpll", "osc", ccm(CCM_MPCTL)); + clk[upll] = imx_clk_pllv1("upll", "osc", ccm(CCM_UPCTL)); + clk[mpll_cpu_3_4] = imx_clk_fixed_factor("mpll_cpu_3_4", "mpll", 3, 4); + clk[cpu_sel] = imx_clk_mux("cpu_sel", ccm(CCM_CCTL), 14, 1, cpu_sel_clks, ARRAY_SIZE(cpu_sel_clks)); + clk[cpu] = imx_clk_divider("cpu", "cpu_sel", ccm(CCM_CCTL), 30, 2); + clk[ahb] = imx_clk_divider("ahb", "cpu", ccm(CCM_CCTL), 28, 2); + clk[usb_div] = imx_clk_divider("usb_div", "upll", ccm(CCM_CCTL), 16, 6); + clk[ipg] = imx_clk_fixed_factor("ipg", "ahb", 1, 2); + clk[per0_sel] = imx_clk_mux("per0_sel", ccm(CCM_MCR), 0, 1, per_sel_clks, ARRAY_SIZE(per_sel_clks)); + clk[per1_sel] = imx_clk_mux("per1_sel", ccm(CCM_MCR), 1, 1, per_sel_clks, ARRAY_SIZE(per_sel_clks)); + clk[per2_sel] = imx_clk_mux("per2_sel", ccm(CCM_MCR), 2, 1, per_sel_clks, ARRAY_SIZE(per_sel_clks)); + clk[per3_sel] = imx_clk_mux("per3_sel", ccm(CCM_MCR), 3, 1, per_sel_clks, ARRAY_SIZE(per_sel_clks)); + clk[per4_sel] = imx_clk_mux("per4_sel", ccm(CCM_MCR), 4, 1, per_sel_clks, ARRAY_SIZE(per_sel_clks)); + clk[per5_sel] = imx_clk_mux("per5_sel", ccm(CCM_MCR), 5, 1, per_sel_clks, ARRAY_SIZE(per_sel_clks)); + clk[per6_sel] = imx_clk_mux("per6_sel", ccm(CCM_MCR), 6, 1, per_sel_clks, ARRAY_SIZE(per_sel_clks)); + clk[per7_sel] = imx_clk_mux("per7_sel", ccm(CCM_MCR), 7, 1, per_sel_clks, ARRAY_SIZE(per_sel_clks)); + clk[per8_sel] = imx_clk_mux("per8_sel", ccm(CCM_MCR), 8, 1, per_sel_clks, ARRAY_SIZE(per_sel_clks)); + clk[per9_sel] = imx_clk_mux("per9_sel", ccm(CCM_MCR), 9, 1, per_sel_clks, ARRAY_SIZE(per_sel_clks)); + clk[per10_sel] = imx_clk_mux("per10_sel", ccm(CCM_MCR), 10, 1, per_sel_clks, ARRAY_SIZE(per_sel_clks)); + clk[per11_sel] = imx_clk_mux("per11_sel", ccm(CCM_MCR), 11, 1, per_sel_clks, ARRAY_SIZE(per_sel_clks)); + clk[per12_sel] = imx_clk_mux("per12_sel", ccm(CCM_MCR), 12, 1, per_sel_clks, ARRAY_SIZE(per_sel_clks)); + clk[per13_sel] = imx_clk_mux("per13_sel", ccm(CCM_MCR), 13, 1, per_sel_clks, ARRAY_SIZE(per_sel_clks)); + clk[per14_sel] = imx_clk_mux("per14_sel", ccm(CCM_MCR), 14, 1, per_sel_clks, ARRAY_SIZE(per_sel_clks)); + clk[per15_sel] = imx_clk_mux("per15_sel", ccm(CCM_MCR), 15, 1, per_sel_clks, ARRAY_SIZE(per_sel_clks)); + clk[per0] = imx_clk_divider("per0", "per0_sel", ccm(CCM_PCDR0), 0, 6); + clk[per1] = imx_clk_divider("per1", "per1_sel", ccm(CCM_PCDR0), 8, 6); + clk[per2] = imx_clk_divider("per2", "per2_sel", ccm(CCM_PCDR0), 16, 6); + clk[per3] = imx_clk_divider("per3", "per3_sel", ccm(CCM_PCDR0), 24, 6); + clk[per4] = imx_clk_divider("per4", "per4_sel", ccm(CCM_PCDR1), 0, 6); + clk[per5] = imx_clk_divider("per5", "per5_sel", ccm(CCM_PCDR1), 8, 6); + clk[per6] = imx_clk_divider("per6", "per6_sel", ccm(CCM_PCDR1), 16, 6); + clk[per7] = imx_clk_divider("per7", "per7_sel", ccm(CCM_PCDR1), 24, 6); + clk[per8] = imx_clk_divider("per8", "per8_sel", ccm(CCM_PCDR2), 0, 6); + clk[per9] = imx_clk_divider("per9", "per9_sel", ccm(CCM_PCDR2), 8, 6); + clk[per10] = imx_clk_divider("per10", "per10_sel", ccm(CCM_PCDR2), 16, 6); + clk[per11] = imx_clk_divider("per11", "per11_sel", ccm(CCM_PCDR2), 24, 6); + clk[per12] = imx_clk_divider("per12", "per12_sel", ccm(CCM_PCDR3), 0, 6); + clk[per13] = imx_clk_divider("per13", "per13_sel", ccm(CCM_PCDR3), 8, 6); + clk[per14] = imx_clk_divider("per14", "per14_sel", ccm(CCM_PCDR3), 16, 6); + clk[per15] = imx_clk_divider("per15", "per15_sel", ccm(CCM_PCDR3), 24, 6); + clk[csi_ipg_per] = imx_clk_gate("csi_ipg_per", "per0", ccm(CCM_CGCR0), 0); + clk[esdhc1_ipg_per] = imx_clk_gate("esdhc1_ipg_per", "per3", ccm(CCM_CGCR0), 3); + clk[esdhc2_ipg_per] = imx_clk_gate("esdhc2_ipg_per", "per4", ccm(CCM_CGCR0), 4); + clk[gpt_ipg_per] = imx_clk_gate("gpt_ipg_per", "per5", ccm(CCM_CGCR0), 5); + clk[i2c_ipg_per] = imx_clk_gate("i2c_ipg_per", "per6", ccm(CCM_CGCR0), 6); + clk[lcdc_ipg_per] = imx_clk_gate("lcdc_ipg_per", "per8", ccm(CCM_CGCR0), 7); + clk[nfc_ipg_per] = imx_clk_gate("nfc_ipg_per", "ipg_per", ccm(CCM_CGCR0), 8); + clk[ssi1_ipg_per] = imx_clk_gate("ssi1_ipg_per", "per13", ccm(CCM_CGCR0), 13); + clk[ssi2_ipg_per] = imx_clk_gate("ssi2_ipg_per", "per14", ccm(CCM_CGCR0), 14); + clk[uart_ipg_per] = imx_clk_gate("uart_ipg_per", "per15", ccm(CCM_CGCR0), 15); + clk[csi_ahb] = imx_clk_gate("csi_ahb", "ahb", ccm(CCM_CGCR0), 18); + clk[esdhc1_ahb] = imx_clk_gate("esdhc1_ahb", "ahb", ccm(CCM_CGCR0), 21); + clk[esdhc2_ahb] = imx_clk_gate("esdhc2_ahb", "ahb", ccm(CCM_CGCR0), 22); + clk[fec_ahb] = imx_clk_gate("fec_ahb", "ahb", ccm(CCM_CGCR0), 23); + clk[lcdc_ahb] = imx_clk_gate("lcdc_ahb", "ahb", ccm(CCM_CGCR0), 24); + clk[sdma_ahb] = imx_clk_gate("sdma_ahb", "ahb", ccm(CCM_CGCR0), 26); + clk[usbotg_ahb] = imx_clk_gate("usbotg_ahb", "ahb", ccm(CCM_CGCR0), 28); + clk[can1_ipg] = imx_clk_gate("can1_ipg", "ipg", ccm(CCM_CGCR1), 2); + clk[can2_ipg] = imx_clk_gate("can2_ipg", "ipg", ccm(CCM_CGCR1), 3); + clk[csi_ipg] = imx_clk_gate("csi_ipg", "ipg", ccm(CCM_CGCR1), 4); + clk[cspi1_ipg] = imx_clk_gate("cspi1_ipg", "ipg", ccm(CCM_CGCR1), 5); + clk[cspi2_ipg] = imx_clk_gate("cspi2_ipg", "ipg", ccm(CCM_CGCR1), 6); + clk[cspi3_ipg] = imx_clk_gate("cspi3_ipg", "ipg", ccm(CCM_CGCR1), 7); + clk[dryice_ipg] = imx_clk_gate("dryice_ipg", "ipg", ccm(CCM_CGCR1), 8); + clk[esdhc1_ipg] = imx_clk_gate("esdhc1_ipg", "ipg", ccm(CCM_CGCR1), 13); + clk[esdhc2_ipg] = imx_clk_gate("esdhc2_ipg", "ipg", ccm(CCM_CGCR1), 14); + clk[fec_ipg] = imx_clk_gate("fec_ipg", "ipg", ccm(CCM_CGCR1), 15); + clk[iim_ipg] = imx_clk_gate("iim_ipg", "ipg", ccm(CCM_CGCR1), 26); + clk[kpp_ipg] = imx_clk_gate("kpp_ipg", "ipg", ccm(CCM_CGCR1), 28); + clk[lcdc_ipg] = imx_clk_gate("lcdc_ipg", "ipg", ccm(CCM_CGCR1), 29); + clk[pwm1_ipg] = imx_clk_gate("pwm1_ipg", "ipg", ccm(CCM_CGCR1), 31); + clk[pwm2_ipg] = imx_clk_gate("pwm2_ipg", "ipg", ccm(CCM_CGCR2), 0); + clk[pwm3_ipg] = imx_clk_gate("pwm3_ipg", "ipg", ccm(CCM_CGCR2), 1); + clk[pwm4_ipg] = imx_clk_gate("pwm4_ipg", "ipg", ccm(CCM_CGCR2), 2); + clk[sdma_ipg] = imx_clk_gate("sdma_ipg", "ipg", ccm(CCM_CGCR2), 6); + clk[ssi1_ipg] = imx_clk_gate("ssi1_ipg", "ipg", ccm(CCM_CGCR2), 11); + clk[ssi2_ipg] = imx_clk_gate("ssi2_ipg", "ipg", ccm(CCM_CGCR2), 12); + clk[tsc_ipg] = imx_clk_gate("tsc_ipg", "ipg", ccm(CCM_CGCR2), 13); + clk[uart1_ipg] = imx_clk_gate("uart1_ipg", "ipg", ccm(CCM_CGCR2), 14); + clk[uart2_ipg] = imx_clk_gate("uart2_ipg", "ipg", ccm(CCM_CGCR2), 15); + clk[uart3_ipg] = imx_clk_gate("uart3_ipg", "ipg", ccm(CCM_CGCR2), 16); + clk[uart4_ipg] = imx_clk_gate("uart4_ipg", "ipg", ccm(CCM_CGCR2), 17); + clk[uart5_ipg] = imx_clk_gate("uart5_ipg", "ipg", ccm(CCM_CGCR2), 18); + clk[wdt_ipg] = imx_clk_gate("wdt_ipg", "ipg", ccm(CCM_CGCR2), 19); + + for (i = 0; i < ARRAY_SIZE(clk); i++) + if (IS_ERR(clk[i])) + pr_err("i.MX25 clk %d: register failed with %ld\n", + i, PTR_ERR(clk[i])); + + /* i.mx25 has the i.mx21 type uart */ + clk_register_clkdev(clk[uart1_ipg], "ipg", "imx21-uart.0"); + clk_register_clkdev(clk[uart_ipg_per], "per", "imx21-uart.0"); + clk_register_clkdev(clk[uart2_ipg], "ipg", "imx21-uart.1"); + clk_register_clkdev(clk[uart_ipg_per], "per", "imx21-uart.1"); + clk_register_clkdev(clk[uart3_ipg], "ipg", "imx21-uart.2"); + clk_register_clkdev(clk[uart_ipg_per], "per", "imx21-uart.2"); + clk_register_clkdev(clk[uart4_ipg], "ipg", "imx21-uart.3"); + clk_register_clkdev(clk[uart_ipg_per], "per", "imx21-uart.3"); + clk_register_clkdev(clk[uart5_ipg], "ipg", "imx21-uart.4"); + clk_register_clkdev(clk[uart_ipg_per], "per", "imx21-uart.4"); + clk_register_clkdev(clk[ipg], "ipg", "imx-gpt.0"); + clk_register_clkdev(clk[gpt_ipg_per], "per", "imx-gpt.0"); + clk_register_clkdev(clk[ipg], "ipg", "mxc-ehci.0"); + clk_register_clkdev(clk[usbotg_ahb], "ahb", "mxc-ehci.0"); + clk_register_clkdev(clk[usb_div], "per", "mxc-ehci.0"); + clk_register_clkdev(clk[ipg], "ipg", "mxc-ehci.1"); + clk_register_clkdev(clk[usbotg_ahb], "ahb", "mxc-ehci.1"); + clk_register_clkdev(clk[usb_div], "per", "mxc-ehci.1"); + clk_register_clkdev(clk[ipg], "ipg", "mxc-ehci.2"); + clk_register_clkdev(clk[usbotg_ahb], "ahb", "mxc-ehci.2"); + clk_register_clkdev(clk[usb_div], "per", "mxc-ehci.2"); + clk_register_clkdev(clk[ipg], "ipg", "fsl-usb2-udc"); + clk_register_clkdev(clk[usbotg_ahb], "ahb", "fsl-usb2-udc"); + clk_register_clkdev(clk[usb_div], "per", "fsl-usb2-udc"); + clk_register_clkdev(clk[nfc_ipg_per], NULL, "mxc_nand.0"); + /* i.mx25 has the i.mx35 type cspi */ + clk_register_clkdev(clk[cspi1_ipg], NULL, "imx35-cspi.0"); + clk_register_clkdev(clk[cspi2_ipg], NULL, "imx35-cspi.1"); + clk_register_clkdev(clk[cspi3_ipg], NULL, "imx35-cspi.2"); + clk_register_clkdev(clk[pwm1_ipg], "ipg", "mxc_pwm.0"); + clk_register_clkdev(clk[per10], "per", "mxc_pwm.0"); + clk_register_clkdev(clk[pwm1_ipg], "ipg", "mxc_pwm.1"); + clk_register_clkdev(clk[per10], "per", "mxc_pwm.1"); + clk_register_clkdev(clk[pwm1_ipg], "ipg", "mxc_pwm.2"); + clk_register_clkdev(clk[per10], "per", "mxc_pwm.2"); + clk_register_clkdev(clk[pwm1_ipg], "ipg", "mxc_pwm.3"); + clk_register_clkdev(clk[per10], "per", "mxc_pwm.3"); + clk_register_clkdev(clk[kpp_ipg], NULL, "imx-keypad"); + clk_register_clkdev(clk[tsc_ipg], NULL, "mx25-adc"); + clk_register_clkdev(clk[i2c_ipg_per], NULL, "imx-i2c.0"); + clk_register_clkdev(clk[i2c_ipg_per], NULL, "imx-i2c.1"); + clk_register_clkdev(clk[i2c_ipg_per], NULL, "imx-i2c.2"); + clk_register_clkdev(clk[fec_ipg], "ipg", "imx25-fec.0"); + clk_register_clkdev(clk[fec_ahb], "ahb", "imx25-fec.0"); + clk_register_clkdev(clk[dryice_ipg], NULL, "imxdi_rtc.0"); + clk_register_clkdev(clk[lcdc_ipg_per], "per", "imx-fb.0"); + clk_register_clkdev(clk[lcdc_ipg], "ipg", "imx-fb.0"); + clk_register_clkdev(clk[lcdc_ahb], "ahb", "imx-fb.0"); + clk_register_clkdev(clk[wdt_ipg], NULL, "imx2-wdt.0"); + clk_register_clkdev(clk[ssi1_ipg_per], "per", "imx-ssi.0"); + clk_register_clkdev(clk[ssi1_ipg], "ipg", "imx-ssi.0"); + clk_register_clkdev(clk[ssi2_ipg_per], "per", "imx-ssi.1"); + clk_register_clkdev(clk[ssi2_ipg], "ipg", "imx-ssi.1"); + clk_register_clkdev(clk[esdhc1_ipg_per], "per", "sdhci-esdhc-imx25.0"); + clk_register_clkdev(clk[esdhc1_ipg], "ipg", "sdhci-esdhc-imx25.0"); + clk_register_clkdev(clk[esdhc1_ahb], "ahb", "sdhci-esdhc-imx25.0"); + clk_register_clkdev(clk[esdhc2_ipg_per], "per", "sdhci-esdhc-imx25.1"); + clk_register_clkdev(clk[esdhc2_ipg], "ipg", "sdhci-esdhc-imx25.1"); + clk_register_clkdev(clk[esdhc2_ahb], "ahb", "sdhci-esdhc-imx25.1"); + clk_register_clkdev(clk[csi_ipg_per], "per", "mx2-camera.0"); + clk_register_clkdev(clk[csi_ipg], "ipg", "mx2-camera.0"); + clk_register_clkdev(clk[csi_ahb], "ahb", "mx2-camera.0"); + clk_register_clkdev(clk[dummy], "audmux", NULL); + clk_register_clkdev(clk[can1_ipg], NULL, "flexcan.0"); + clk_register_clkdev(clk[can2_ipg], NULL, "flexcan.1"); + /* i.mx25 has the i.mx35 type sdma */ + clk_register_clkdev(clk[sdma_ipg], "ipg", "imx35-sdma"); + clk_register_clkdev(clk[sdma_ahb], "ahb", "imx35-sdma"); + clk_register_clkdev(clk[iim_ipg], "iim", NULL); + + mxc_timer_init(NULL, MX25_IO_ADDRESS(MX25_GPT1_BASE_ADDR), 54); + return 0; +} -- cgit v0.10.2 From cd73785fb7cdd59f0edaaf9958b1bce60e0aab3a Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Fri, 9 Mar 2012 09:11:32 +0100 Subject: ARM i.MX1: implement clocks using common clock framework This also changes the DMA clkdev lookup to use the imx-dma driver name and "ahb" as connection ID to request the hclk dma clock. Signed-off-by: Sascha Hauer Signed-off-by: Philipp Zabel diff --git a/arch/arm/mach-imx/Kconfig b/arch/arm/mach-imx/Kconfig index 0b832b1..c03920a 100644 --- a/arch/arm/mach-imx/Kconfig +++ b/arch/arm/mach-imx/Kconfig @@ -34,6 +34,7 @@ config ARCH_MX53 config SOC_IMX1 bool select ARCH_MX1 + select COMMON_CLK select CPU_ARM920T select IMX_HAVE_IOMUX_V1 select MXC_AVIC diff --git a/arch/arm/mach-imx/Makefile b/arch/arm/mach-imx/Makefile index 173e869..52b3af4 100644 --- a/arch/arm/mach-imx/Makefile +++ b/arch/arm/mach-imx/Makefile @@ -1,4 +1,4 @@ -obj-$(CONFIG_SOC_IMX1) += clock-imx1.o mm-imx1.o +obj-$(CONFIG_SOC_IMX1) += clk-imx1.o mm-imx1.o obj-$(CONFIG_SOC_IMX21) += clock-imx21.o mm-imx21.o obj-$(CONFIG_SOC_IMX25) += clk-imx25.o mm-imx25.o ehci-imx25.o cpu-imx25.o diff --git a/arch/arm/mach-imx/clk-imx1.c b/arch/arm/mach-imx/clk-imx1.c new file mode 100644 index 0000000..0f0beb5 --- /dev/null +++ b/arch/arm/mach-imx/clk-imx1.c @@ -0,0 +1,115 @@ +/* + * Copyright (C) 2008 Sascha Hauer , Pengutronix + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include "clk.h" + +/* CCM register addresses */ +#define IO_ADDR_CCM(off) (MX1_IO_ADDRESS(MX1_CCM_BASE_ADDR + (off))) + +#define CCM_CSCR IO_ADDR_CCM(0x0) +#define CCM_MPCTL0 IO_ADDR_CCM(0x4) +#define CCM_SPCTL0 IO_ADDR_CCM(0xc) +#define CCM_PCDR IO_ADDR_CCM(0x20) + +/* SCM register addresses */ +#define IO_ADDR_SCM(off) (MX1_IO_ADDRESS(MX1_SCM_BASE_ADDR + (off))) + +#define SCM_GCCR IO_ADDR_SCM(0xc) + +static const char *prem_sel_clks[] = { "clk32_premult", "clk16m", }; +static const char *clko_sel_clks[] = { "per1", "hclk", "clk48m", "clk16m", "prem", + "fclk", }; +enum imx1_clks { + dummy, clk32, clk16m_ext, clk16m, clk32_premult, prem, mpll, spll, mcu, + fclk, hclk, clk48m, per1, per2, per3, clko, dma_gate, csi_gate, + mma_gate, usbd_gate, clk_max +}; + +static struct clk *clk[clk_max]; + +int __init mx1_clocks_init(unsigned long fref) +{ + int i; + + clk[dummy] = imx_clk_fixed("dummy", 0); + clk[clk32] = imx_clk_fixed("clk32", fref); + clk[clk16m_ext] = imx_clk_fixed("clk16m_ext", 16000000); + clk[clk16m] = imx_clk_gate("clk16m", "clk16m_ext", CCM_CSCR, 17); + clk[clk32_premult] = imx_clk_fixed_factor("clk32_premult", "clk32", 512, 1); + clk[prem] = imx_clk_mux("prem", CCM_CSCR, 16, 1, prem_sel_clks, + ARRAY_SIZE(prem_sel_clks)); + clk[mpll] = imx_clk_pllv1("mpll", "clk32_premult", CCM_MPCTL0); + clk[spll] = imx_clk_pllv1("spll", "prem", CCM_SPCTL0); + clk[mcu] = imx_clk_divider("mcu", "clk32_premult", CCM_CSCR, 15, 1); + clk[fclk] = imx_clk_divider("fclk", "mpll", CCM_CSCR, 15, 1); + clk[hclk] = imx_clk_divider("hclk", "spll", CCM_CSCR, 10, 4); + clk[clk48m] = imx_clk_divider("clk48m", "spll", CCM_CSCR, 26, 3); + clk[per1] = imx_clk_divider("per1", "spll", CCM_PCDR, 0, 4); + clk[per2] = imx_clk_divider("per2", "spll", CCM_PCDR, 4, 4); + clk[per3] = imx_clk_divider("per3", "spll", CCM_PCDR, 16, 7); + clk[clko] = imx_clk_mux("clko", CCM_CSCR, 29, 3, clko_sel_clks, + ARRAY_SIZE(clko_sel_clks)); + clk[dma_gate] = imx_clk_gate("dma_gate", "hclk", SCM_GCCR, 4); + clk[csi_gate] = imx_clk_gate("csi_gate", "hclk", SCM_GCCR, 2); + clk[mma_gate] = imx_clk_gate("mma_gate", "hclk", SCM_GCCR, 1); + clk[usbd_gate] = imx_clk_gate("usbd_gate", "clk48m", SCM_GCCR, 0); + + for (i = 0; i < ARRAY_SIZE(clk); i++) + if (IS_ERR(clk[i])) + pr_err("imx1 clk %d: register failed with %ld\n", + i, PTR_ERR(clk[i])); + + clk_register_clkdev(clk[dma_gate], "ahb", "imx-dma"); + clk_register_clkdev(clk[csi_gate], NULL, "mx1-camera.0"); + clk_register_clkdev(clk[mma_gate], "mma", NULL); + clk_register_clkdev(clk[usbd_gate], NULL, "imx_udc.0"); + clk_register_clkdev(clk[per1], "per", "imx-gpt.0"); + clk_register_clkdev(clk[hclk], "ipg", "imx-gpt.0"); + clk_register_clkdev(clk[per1], "per", "imx1-uart.0"); + clk_register_clkdev(clk[hclk], "ipg", "imx1-uart.0"); + clk_register_clkdev(clk[per1], "per", "imx1-uart.1"); + clk_register_clkdev(clk[hclk], "ipg", "imx1-uart.1"); + clk_register_clkdev(clk[per1], "per", "imx1-uart.2"); + clk_register_clkdev(clk[hclk], "ipg", "imx1-uart.2"); + clk_register_clkdev(clk[hclk], NULL, "imx-i2c.0"); + clk_register_clkdev(clk[per2], "per", "imx1-cspi.0"); + clk_register_clkdev(clk[dummy], "ipg", "imx1-cspi.0"); + clk_register_clkdev(clk[per2], "per", "imx1-cspi.1"); + clk_register_clkdev(clk[dummy], "ipg", "imx1-cspi.1"); + clk_register_clkdev(clk[per2], NULL, "imx-mmc.0"); + clk_register_clkdev(clk[per2], "per", "imx-fb.0"); + clk_register_clkdev(clk[dummy], "ipg", "imx-fb.0"); + clk_register_clkdev(clk[dummy], "ahb", "imx-fb.0"); + clk_register_clkdev(clk[hclk], "mshc", NULL); + clk_register_clkdev(clk[per3], "ssi", NULL); + clk_register_clkdev(clk[clk32], NULL, "mxc_rtc.0"); + clk_register_clkdev(clk[clko], "clko", NULL); + + mxc_timer_init(NULL, MX1_IO_ADDRESS(MX1_TIM1_BASE_ADDR), + MX1_TIM1_INT); + + return 0; +} -- cgit v0.10.2 From 93421e42277009cbf23957e2ce02a7520cee7cb3 Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Fri, 9 Mar 2012 09:11:39 +0100 Subject: ARM i.MX21: implement clocks using common clock framework Signed-off-by: Sascha Hauer Signed-off-by: Philipp Zabel diff --git a/arch/arm/mach-imx/Kconfig b/arch/arm/mach-imx/Kconfig index c03920a..d23cebd 100644 --- a/arch/arm/mach-imx/Kconfig +++ b/arch/arm/mach-imx/Kconfig @@ -43,6 +43,7 @@ config SOC_IMX21 bool select MACH_MX21 select CPU_ARM926T + select COMMON_CLK select IMX_HAVE_IOMUX_V1 select MXC_AVIC diff --git a/arch/arm/mach-imx/Makefile b/arch/arm/mach-imx/Makefile index 52b3af4..a2efb79 100644 --- a/arch/arm/mach-imx/Makefile +++ b/arch/arm/mach-imx/Makefile @@ -1,5 +1,5 @@ obj-$(CONFIG_SOC_IMX1) += clk-imx1.o mm-imx1.o -obj-$(CONFIG_SOC_IMX21) += clock-imx21.o mm-imx21.o +obj-$(CONFIG_SOC_IMX21) += clk-imx21.o mm-imx21.o obj-$(CONFIG_SOC_IMX25) += clk-imx25.o mm-imx25.o ehci-imx25.o cpu-imx25.o diff --git a/arch/arm/mach-imx/clk-imx21.c b/arch/arm/mach-imx/clk-imx21.c new file mode 100644 index 0000000..4e4f384 --- /dev/null +++ b/arch/arm/mach-imx/clk-imx21.c @@ -0,0 +1,186 @@ +/* + * Copyright 2004-2007 Freescale Semiconductor, Inc. All Rights Reserved. + * Copyright 2008 Juergen Beisert, kernel@pengutronix.de + * Copyright 2008 Martin Fuzzey, mfuzzey@gmail.com + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + * MA 02110-1301, USA. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include "clk.h" + +#define IO_ADDR_CCM(off) (MX21_IO_ADDRESS(MX21_CCM_BASE_ADDR + (off))) + +/* Register offsets */ +#define CCM_CSCR IO_ADDR_CCM(0x0) +#define CCM_MPCTL0 IO_ADDR_CCM(0x4) +#define CCM_MPCTL1 IO_ADDR_CCM(0x8) +#define CCM_SPCTL0 IO_ADDR_CCM(0xc) +#define CCM_SPCTL1 IO_ADDR_CCM(0x10) +#define CCM_OSC26MCTL IO_ADDR_CCM(0x14) +#define CCM_PCDR0 IO_ADDR_CCM(0x18) +#define CCM_PCDR1 IO_ADDR_CCM(0x1c) +#define CCM_PCCR0 IO_ADDR_CCM(0x20) +#define CCM_PCCR1 IO_ADDR_CCM(0x24) +#define CCM_CCSR IO_ADDR_CCM(0x28) +#define CCM_PMCTL IO_ADDR_CCM(0x2c) +#define CCM_PMCOUNT IO_ADDR_CCM(0x30) +#define CCM_WKGDCTL IO_ADDR_CCM(0x34) + +static const char *mpll_sel_clks[] = { "fpm", "ckih", }; +static const char *spll_sel_clks[] = { "fpm", "ckih", }; + +enum imx21_clks { + ckil, ckih, fpm, mpll_sel, spll_sel, mpll, spll, fclk, hclk, ipg, per1, + per2, per3, per4, uart1_ipg_gate, uart2_ipg_gate, uart3_ipg_gate, + uart4_ipg_gate, gpt1_ipg_gate, gpt2_ipg_gate, gpt3_ipg_gate, + pwm_ipg_gate, sdhc1_ipg_gate, sdhc2_ipg_gate, lcdc_ipg_gate, + lcdc_hclk_gate, cspi3_ipg_gate, cspi2_ipg_gate, cspi1_ipg_gate, + per4_gate, csi_hclk_gate, usb_div, usb_gate, usb_hclk_gate, ssi1_gate, + ssi2_gate, nfc_div, nfc_gate, dma_gate, dma_hclk_gate, brom_gate, + emma_gate, emma_hclk_gate, slcdc_gate, slcdc_hclk_gate, wdog_gate, + gpio_gate, i2c_gate, kpp_gate, owire_gate, rtc_gate, clk_max +}; + +static struct clk *clk[clk_max]; + +/* + * must be called very early to get information about the + * available clock rate when the timer framework starts + */ +int __init mx21_clocks_init(unsigned long lref, unsigned long href) +{ + int i; + + clk[ckil] = imx_clk_fixed("ckil", lref); + clk[ckih] = imx_clk_fixed("ckih", href); + clk[fpm] = imx_clk_fixed_factor("fpm", "ckil", 512, 1); + clk[mpll_sel] = imx_clk_mux("mpll_sel", CCM_CSCR, 16, 1, mpll_sel_clks, + ARRAY_SIZE(mpll_sel_clks)); + clk[spll_sel] = imx_clk_mux("spll_sel", CCM_CSCR, 17, 1, spll_sel_clks, + ARRAY_SIZE(spll_sel_clks)); + clk[mpll] = imx_clk_pllv1("mpll", "mpll_sel", CCM_MPCTL0); + clk[spll] = imx_clk_pllv1("spll", "spll_sel", CCM_SPCTL0); + clk[fclk] = imx_clk_divider("fclk", "mpll", CCM_CSCR, 29, 3); + clk[hclk] = imx_clk_divider("hclk", "fclk", CCM_CSCR, 10, 4); + clk[ipg] = imx_clk_divider("ipg", "hclk", CCM_CSCR, 9, 1); + clk[per1] = imx_clk_divider("per1", "mpll", CCM_PCDR1, 0, 6); + clk[per2] = imx_clk_divider("per2", "mpll", CCM_PCDR1, 8, 6); + clk[per3] = imx_clk_divider("per3", "mpll", CCM_PCDR1, 16, 6); + clk[per4] = imx_clk_divider("per4", "mpll", CCM_PCDR1, 24, 6); + clk[uart1_ipg_gate] = imx_clk_gate("uart1_ipg_gate", "ipg", CCM_PCCR0, 0); + clk[uart2_ipg_gate] = imx_clk_gate("uart2_ipg_gate", "ipg", CCM_PCCR0, 1); + clk[uart3_ipg_gate] = imx_clk_gate("uart3_ipg_gate", "ipg", CCM_PCCR0, 2); + clk[uart4_ipg_gate] = imx_clk_gate("uart4_ipg_gate", "ipg", CCM_PCCR0, 3); + clk[gpt1_ipg_gate] = imx_clk_gate("gpt1_ipg_gate", "ipg", CCM_PCCR1, 25); + clk[gpt2_ipg_gate] = imx_clk_gate("gpt2_ipg_gate", "ipg", CCM_PCCR1, 26); + clk[gpt3_ipg_gate] = imx_clk_gate("gpt3_ipg_gate", "ipg", CCM_PCCR1, 27); + clk[pwm_ipg_gate] = imx_clk_gate("pwm_ipg_gate", "ipg", CCM_PCCR1, 28); + clk[sdhc1_ipg_gate] = imx_clk_gate("sdhc1_ipg_gate", "ipg", CCM_PCCR0, 9); + clk[sdhc2_ipg_gate] = imx_clk_gate("sdhc2_ipg_gate", "ipg", CCM_PCCR0, 10); + clk[lcdc_ipg_gate] = imx_clk_gate("lcdc_ipg_gate", "ipg", CCM_PCCR0, 18); + clk[lcdc_hclk_gate] = imx_clk_gate("lcdc_hclk_gate", "hclk", CCM_PCCR0, 26); + clk[cspi3_ipg_gate] = imx_clk_gate("cspi3_ipg_gate", "ipg", CCM_PCCR1, 23); + clk[cspi2_ipg_gate] = imx_clk_gate("cspi2_ipg_gate", "ipg", CCM_PCCR0, 5); + clk[cspi1_ipg_gate] = imx_clk_gate("cspi1_ipg_gate", "ipg", CCM_PCCR0, 4); + clk[per4_gate] = imx_clk_gate("per4_gate", "per4", CCM_PCCR0, 22); + clk[csi_hclk_gate] = imx_clk_gate("csi_hclk_gate", "hclk", CCM_PCCR0, 31); + clk[usb_div] = imx_clk_divider("usb_div", "spll", CCM_CSCR, 26, 3); + clk[usb_gate] = imx_clk_gate("usb_gate", "usb_div", CCM_PCCR0, 14); + clk[usb_hclk_gate] = imx_clk_gate("usb_hclk_gate", "hclk", CCM_PCCR0, 24); + clk[ssi1_gate] = imx_clk_gate("ssi1_gate", "ipg", CCM_PCCR0, 6); + clk[ssi2_gate] = imx_clk_gate("ssi2_gate", "ipg", CCM_PCCR0, 7); + clk[nfc_div] = imx_clk_divider("nfc_div", "ipg", CCM_PCDR0, 12, 4); + clk[nfc_gate] = imx_clk_gate("nfc_gate", "nfc_div", CCM_PCCR0, 19); + clk[dma_gate] = imx_clk_gate("dma_gate", "ipg", CCM_PCCR0, 13); + clk[dma_hclk_gate] = imx_clk_gate("dma_hclk_gate", "hclk", CCM_PCCR0, 30); + clk[brom_gate] = imx_clk_gate("brom_gate", "hclk", CCM_PCCR0, 28); + clk[emma_gate] = imx_clk_gate("emma_gate", "ipg", CCM_PCCR0, 15); + clk[emma_hclk_gate] = imx_clk_gate("emma_hclk_gate", "hclk", CCM_PCCR0, 27); + clk[slcdc_gate] = imx_clk_gate("slcdc_gate", "ipg", CCM_PCCR0, 25); + clk[slcdc_hclk_gate] = imx_clk_gate("slcdc_hclk_gate", "hclk", CCM_PCCR0, 21); + clk[wdog_gate] = imx_clk_gate("wdog_gate", "ipg", CCM_PCCR1, 24); + clk[gpio_gate] = imx_clk_gate("gpio_gate", "ipg", CCM_PCCR0, 11); + clk[i2c_gate] = imx_clk_gate("i2c_gate", "ipg", CCM_PCCR0, 12); + clk[kpp_gate] = imx_clk_gate("kpp_gate", "ipg", CCM_PCCR1, 30); + clk[owire_gate] = imx_clk_gate("owire_gate", "ipg", CCM_PCCR1, 31); + clk[rtc_gate] = imx_clk_gate("rtc_gate", "ipg", CCM_PCCR1, 29); + + for (i = 0; i < ARRAY_SIZE(clk); i++) + if (IS_ERR(clk[i])) + pr_err("i.MX21 clk %d: register failed with %ld\n", + i, PTR_ERR(clk[i])); + + clk_register_clkdev(clk[per1], "per1", NULL); + clk_register_clkdev(clk[per2], "per2", NULL); + clk_register_clkdev(clk[per3], "per3", NULL); + clk_register_clkdev(clk[per4], "per4", NULL); + clk_register_clkdev(clk[per1], "per", "imx21-uart.0"); + clk_register_clkdev(clk[uart1_ipg_gate], "ipg", "imx21-uart.0"); + clk_register_clkdev(clk[per1], "per", "imx21-uart.1"); + clk_register_clkdev(clk[uart2_ipg_gate], "ipg", "imx21-uart.1"); + clk_register_clkdev(clk[per1], "per", "imx21-uart.2"); + clk_register_clkdev(clk[uart3_ipg_gate], "ipg", "imx21-uart.2"); + clk_register_clkdev(clk[per1], "per", "imx21-uart.3"); + clk_register_clkdev(clk[uart4_ipg_gate], "ipg", "imx21-uart.3"); + clk_register_clkdev(clk[gpt1_ipg_gate], "ipg", "imx-gpt.0"); + clk_register_clkdev(clk[per1], "per", "imx-gpt.0"); + clk_register_clkdev(clk[gpt2_ipg_gate], "ipg", "imx-gpt.1"); + clk_register_clkdev(clk[per1], "per", "imx-gpt.1"); + clk_register_clkdev(clk[gpt3_ipg_gate], "ipg", "imx-gpt.2"); + clk_register_clkdev(clk[per1], "per", "imx-gpt.2"); + clk_register_clkdev(clk[pwm_ipg_gate], "pwm", "mxc_pwm.0"); + clk_register_clkdev(clk[per2], "per", "imx21-cspi.0"); + clk_register_clkdev(clk[cspi1_ipg_gate], "ipg", "imx21-cspi.0"); + clk_register_clkdev(clk[per2], "per", "imx21-cspi.1"); + clk_register_clkdev(clk[cspi2_ipg_gate], "ipg", "imx21-cspi.1"); + clk_register_clkdev(clk[per2], "per", "imx21-cspi.2"); + clk_register_clkdev(clk[cspi3_ipg_gate], "ipg", "imx21-cspi.2"); + clk_register_clkdev(clk[per3], "per", "imx-fb.0"); + clk_register_clkdev(clk[lcdc_ipg_gate], "ipg", "imx-fb.0"); + clk_register_clkdev(clk[lcdc_hclk_gate], "ahb", "imx-fb.0"); + clk_register_clkdev(clk[usb_gate], "per", "imx21-hcd.0"); + clk_register_clkdev(clk[usb_hclk_gate], "ahb", "imx21-hcd.0"); + clk_register_clkdev(clk[nfc_gate], NULL, "mxc_nand.0"); + clk_register_clkdev(clk[dma_hclk_gate], "ahb", "imx-dma"); + clk_register_clkdev(clk[dma_gate], "ipg", "imx-dma"); + clk_register_clkdev(clk[wdog_gate], NULL, "imx2-wdt.0"); + clk_register_clkdev(clk[i2c_gate], NULL, "imx-i2c.0"); + clk_register_clkdev(clk[kpp_gate], NULL, "mxc-keypad"); + clk_register_clkdev(clk[owire_gate], NULL, "mxc_w1.0"); + clk_register_clkdev(clk[brom_gate], "brom", NULL); + clk_register_clkdev(clk[emma_gate], "emma", NULL); + clk_register_clkdev(clk[slcdc_gate], "slcdc", NULL); + clk_register_clkdev(clk[gpio_gate], "gpio", NULL); + clk_register_clkdev(clk[rtc_gate], "rtc", NULL); + clk_register_clkdev(clk[csi_hclk_gate], "csi", NULL); + clk_register_clkdev(clk[ssi1_gate], "ssi1", NULL); + clk_register_clkdev(clk[ssi2_gate], "ssi2", NULL); + clk_register_clkdev(clk[sdhc1_ipg_gate], "sdhc1", NULL); + clk_register_clkdev(clk[sdhc2_ipg_gate], "sdhc2", NULL); + + mxc_timer_init(NULL, MX21_IO_ADDRESS(MX21_GPT1_BASE_ADDR), + MX21_INT_GPT1); + return 0; +} -- cgit v0.10.2 From e038ed50a4a767add205094c035b6943e7b30140 Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Fri, 9 Mar 2012 09:11:46 +0100 Subject: ARM i.MX27: implement clocks using common clock framework Signed-off-by: Sascha Hauer Signed-off-by: Philipp Zabel diff --git a/arch/arm/mach-imx/Kconfig b/arch/arm/mach-imx/Kconfig index d23cebd..efed8f9 100644 --- a/arch/arm/mach-imx/Kconfig +++ b/arch/arm/mach-imx/Kconfig @@ -59,6 +59,7 @@ config SOC_IMX27 bool select MACH_MX27 select CPU_ARM926T + select COMMON_CLK select IMX_HAVE_IOMUX_V1 select MXC_AVIC diff --git a/arch/arm/mach-imx/Makefile b/arch/arm/mach-imx/Makefile index a2efb79..c4ab418 100644 --- a/arch/arm/mach-imx/Makefile +++ b/arch/arm/mach-imx/Makefile @@ -4,7 +4,7 @@ obj-$(CONFIG_SOC_IMX21) += clk-imx21.o mm-imx21.o obj-$(CONFIG_SOC_IMX25) += clk-imx25.o mm-imx25.o ehci-imx25.o cpu-imx25.o obj-$(CONFIG_SOC_IMX27) += cpu-imx27.o pm-imx27.o -obj-$(CONFIG_SOC_IMX27) += clock-imx27.o mm-imx27.o ehci-imx27.o +obj-$(CONFIG_SOC_IMX27) += clk-imx27.o mm-imx27.o ehci-imx27.o obj-$(CONFIG_SOC_IMX31) += mm-imx3.o cpu-imx31.o clock-imx31.o iomux-imx31.o ehci-imx31.o pm-imx3.o obj-$(CONFIG_SOC_IMX35) += mm-imx3.o cpu-imx35.o clock-imx35.o ehci-imx35.o pm-imx3.o diff --git a/arch/arm/mach-imx/clk-imx27.c b/arch/arm/mach-imx/clk-imx27.c new file mode 100644 index 0000000..50a7ebd --- /dev/null +++ b/arch/arm/mach-imx/clk-imx27.c @@ -0,0 +1,290 @@ +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include "clk.h" + +#define IO_ADDR_CCM(off) (MX27_IO_ADDRESS(MX27_CCM_BASE_ADDR + (off))) + +/* Register offsets */ +#define CCM_CSCR IO_ADDR_CCM(0x0) +#define CCM_MPCTL0 IO_ADDR_CCM(0x4) +#define CCM_MPCTL1 IO_ADDR_CCM(0x8) +#define CCM_SPCTL0 IO_ADDR_CCM(0xc) +#define CCM_SPCTL1 IO_ADDR_CCM(0x10) +#define CCM_OSC26MCTL IO_ADDR_CCM(0x14) +#define CCM_PCDR0 IO_ADDR_CCM(0x18) +#define CCM_PCDR1 IO_ADDR_CCM(0x1c) +#define CCM_PCCR0 IO_ADDR_CCM(0x20) +#define CCM_PCCR1 IO_ADDR_CCM(0x24) +#define CCM_CCSR IO_ADDR_CCM(0x28) +#define CCM_PMCTL IO_ADDR_CCM(0x2c) +#define CCM_PMCOUNT IO_ADDR_CCM(0x30) +#define CCM_WKGDCTL IO_ADDR_CCM(0x34) + +#define CCM_CSCR_UPDATE_DIS (1 << 31) +#define CCM_CSCR_SSI2 (1 << 23) +#define CCM_CSCR_SSI1 (1 << 22) +#define CCM_CSCR_VPU (1 << 21) +#define CCM_CSCR_MSHC (1 << 20) +#define CCM_CSCR_SPLLRES (1 << 19) +#define CCM_CSCR_MPLLRES (1 << 18) +#define CCM_CSCR_SP (1 << 17) +#define CCM_CSCR_MCU (1 << 16) +#define CCM_CSCR_OSC26MDIV (1 << 4) +#define CCM_CSCR_OSC26M (1 << 3) +#define CCM_CSCR_FPM (1 << 2) +#define CCM_CSCR_SPEN (1 << 1) +#define CCM_CSCR_MPEN (1 << 0) + +/* i.MX27 TO 2+ */ +#define CCM_CSCR_ARM_SRC (1 << 15) + +#define CCM_SPCTL1_LF (1 << 15) +#define CCM_SPCTL1_BRMO (1 << 6) + +static const char *vpu_sel_clks[] = { "spll", "mpll_main2", }; +static const char *cpu_sel_clks[] = { "mpll_main2", "mpll", }; +static const char *clko_sel_clks[] = { + "ckil", "prem", "ckih", "ckih", + "ckih", "mpll", "spll", "cpu_div", + "ahb", "ipg", "per1_div", "per2_div", + "per3_div", "per4_div", "ssi1_div", "ssi2_div", + "nfc_div", "mshc_div", "vpu_div", "60m", + "32k", "usb_div", "dptc", +}; + +static const char *ssi_sel_clks[] = { "spll", "mpll", }; + +enum mx27_clks { + dummy, ckih, ckil, mpll, spll, mpll_main2, ahb, ipg, nfc_div, per1_div, + per2_div, per3_div, per4_div, vpu_sel, vpu_div, usb_div, cpu_sel, + clko_sel, cpu_div, clko_div, ssi1_sel, ssi2_sel, ssi1_div, ssi2_div, + clko_en, ssi2_ipg_gate, ssi1_ipg_gate, slcdc_ipg_gate, sdhc3_ipg_gate, + sdhc2_ipg_gate, sdhc1_ipg_gate, scc_ipg_gate, sahara_ipg_gate, + rtc_ipg_gate, pwm_ipg_gate, owire_ipg_gate, lcdc_ipg_gate, + kpp_ipg_gate, iim_ipg_gate, i2c2_ipg_gate, i2c1_ipg_gate, + gpt6_ipg_gate, gpt5_ipg_gate, gpt4_ipg_gate, gpt3_ipg_gate, + gpt2_ipg_gate, gpt1_ipg_gate, gpio_ipg_gate, fec_ipg_gate, + emma_ipg_gate, dma_ipg_gate, cspi3_ipg_gate, cspi2_ipg_gate, + cspi1_ipg_gate, nfc_baud_gate, ssi2_baud_gate, ssi1_baud_gate, + vpu_baud_gate, per4_gate, per3_gate, per2_gate, per1_gate, + usb_ahb_gate, slcdc_ahb_gate, sahara_ahb_gate, lcdc_ahb_gate, + vpu_ahb_gate, fec_ahb_gate, emma_ahb_gate, emi_ahb_gate, dma_ahb_gate, + csi_ahb_gate, brom_ahb_gate, ata_ahb_gate, wdog_ipg_gate, usb_ipg_gate, + uart6_ipg_gate, uart5_ipg_gate, uart4_ipg_gate, uart3_ipg_gate, + uart2_ipg_gate, uart1_ipg_gate, clk_max +}; + +static struct clk *clk[clk_max]; + +int __init mx27_clocks_init(unsigned long fref) +{ + int i; + + clk[dummy] = imx_clk_fixed("dummy", 0); + clk[ckih] = imx_clk_fixed("ckih", fref); + clk[ckil] = imx_clk_fixed("ckil", 32768); + clk[mpll] = imx_clk_pllv1("mpll", "ckih", CCM_MPCTL0); + clk[spll] = imx_clk_pllv1("spll", "ckih", CCM_SPCTL0); + clk[mpll_main2] = imx_clk_fixed_factor("mpll_main2", "mpll", 2, 3); + + if (mx27_revision() >= IMX_CHIP_REVISION_2_0) { + clk[ahb] = imx_clk_divider("ahb", "mpll_main2", CCM_CSCR, 8, 2); + clk[ipg] = imx_clk_fixed_factor("ipg", "ahb", 1, 2); + } else { + clk[ahb] = imx_clk_divider("ahb", "mpll_main2", CCM_CSCR, 9, 4); + clk[ipg] = imx_clk_divider("ipg", "ahb", CCM_CSCR, 8, 1); + } + + clk[nfc_div] = imx_clk_divider("nfc_div", "ahb", CCM_PCDR0, 6, 4); + clk[per1_div] = imx_clk_divider("per1_div", "mpll_main2", CCM_PCDR1, 0, 6); + clk[per2_div] = imx_clk_divider("per2_div", "mpll_main2", CCM_PCDR1, 8, 6); + clk[per3_div] = imx_clk_divider("per3_div", "mpll_main2", CCM_PCDR1, 16, 6); + clk[per4_div] = imx_clk_divider("per4_div", "mpll_main2", CCM_PCDR1, 24, 6); + clk[vpu_sel] = imx_clk_mux("vpu_sel", CCM_CSCR, 21, 1, vpu_sel_clks, ARRAY_SIZE(vpu_sel_clks)); + clk[vpu_div] = imx_clk_divider("vpu_div", "vpu_sel", CCM_PCDR0, 10, 3); + clk[usb_div] = imx_clk_divider("usb_div", "spll", CCM_CSCR, 28, 3); + clk[cpu_sel] = imx_clk_mux("cpu_sel", CCM_CSCR, 15, 1, cpu_sel_clks, ARRAY_SIZE(cpu_sel_clks)); + clk[clko_sel] = imx_clk_mux("clko_sel", CCM_CCSR, 0, 5, clko_sel_clks, ARRAY_SIZE(clko_sel_clks)); + if (mx27_revision() >= IMX_CHIP_REVISION_2_0) + clk[cpu_div] = imx_clk_divider("cpu_div", "cpu_sel", CCM_CSCR, 12, 2); + else + clk[cpu_div] = imx_clk_divider("cpu_div", "cpu_sel", CCM_CSCR, 13, 3); + clk[clko_div] = imx_clk_divider("clko_div", "clko_sel", CCM_PCDR0, 22, 3); + clk[ssi1_sel] = imx_clk_mux("ssi1_sel", CCM_CSCR, 22, 1, ssi_sel_clks, ARRAY_SIZE(ssi_sel_clks)); + clk[ssi2_sel] = imx_clk_mux("ssi2_sel", CCM_CSCR, 23, 1, ssi_sel_clks, ARRAY_SIZE(ssi_sel_clks)); + clk[ssi1_div] = imx_clk_divider("ssi1_div", "ssi1_sel", CCM_PCDR0, 16, 6); + clk[ssi2_div] = imx_clk_divider("ssi2_div", "ssi2_sel", CCM_PCDR0, 26, 3); + clk[clko_en] = imx_clk_gate("clko_en", "clko_div", CCM_PCCR0, 0); + clk[ssi2_ipg_gate] = imx_clk_gate("ssi2_ipg_gate", "ipg", CCM_PCCR0, 0); + clk[ssi1_ipg_gate] = imx_clk_gate("ssi1_ipg_gate", "ipg", CCM_PCCR0, 1); + clk[slcdc_ipg_gate] = imx_clk_gate("slcdc_ipg_gate", "ipg", CCM_PCCR0, 2); + clk[sdhc3_ipg_gate] = imx_clk_gate("sdhc3_ipg_gate", "ipg", CCM_PCCR0, 3); + clk[sdhc2_ipg_gate] = imx_clk_gate("sdhc2_ipg_gate", "ipg", CCM_PCCR0, 4); + clk[sdhc1_ipg_gate] = imx_clk_gate("sdhc1_ipg_gate", "ipg", CCM_PCCR0, 5); + clk[scc_ipg_gate] = imx_clk_gate("scc_ipg_gate", "ipg", CCM_PCCR0, 6); + clk[sahara_ipg_gate] = imx_clk_gate("sahara_ipg_gate", "ipg", CCM_PCCR0, 7); + clk[rtc_ipg_gate] = imx_clk_gate("rtc_ipg_gate", "ipg", CCM_PCCR0, 9); + clk[pwm_ipg_gate] = imx_clk_gate("pwm_ipg_gate", "ipg", CCM_PCCR0, 11); + clk[owire_ipg_gate] = imx_clk_gate("owire_ipg_gate", "ipg", CCM_PCCR0, 12); + clk[lcdc_ipg_gate] = imx_clk_gate("lcdc_ipg_gate", "ipg", CCM_PCCR0, 14); + clk[kpp_ipg_gate] = imx_clk_gate("kpp_ipg_gate", "ipg", CCM_PCCR0, 15); + clk[iim_ipg_gate] = imx_clk_gate("iim_ipg_gate", "ipg", CCM_PCCR0, 16); + clk[i2c2_ipg_gate] = imx_clk_gate("i2c2_ipg_gate", "ipg", CCM_PCCR0, 17); + clk[i2c1_ipg_gate] = imx_clk_gate("i2c1_ipg_gate", "ipg", CCM_PCCR0, 18); + clk[gpt6_ipg_gate] = imx_clk_gate("gpt6_ipg_gate", "ipg", CCM_PCCR0, 19); + clk[gpt5_ipg_gate] = imx_clk_gate("gpt5_ipg_gate", "ipg", CCM_PCCR0, 20); + clk[gpt4_ipg_gate] = imx_clk_gate("gpt4_ipg_gate", "ipg", CCM_PCCR0, 21); + clk[gpt3_ipg_gate] = imx_clk_gate("gpt3_ipg_gate", "ipg", CCM_PCCR0, 22); + clk[gpt2_ipg_gate] = imx_clk_gate("gpt2_ipg_gate", "ipg", CCM_PCCR0, 23); + clk[gpt1_ipg_gate] = imx_clk_gate("gpt1_ipg_gate", "ipg", CCM_PCCR0, 24); + clk[gpio_ipg_gate] = imx_clk_gate("gpio_ipg_gate", "ipg", CCM_PCCR0, 25); + clk[fec_ipg_gate] = imx_clk_gate("fec_ipg_gate", "ipg", CCM_PCCR0, 26); + clk[emma_ipg_gate] = imx_clk_gate("emma_ipg_gate", "ipg", CCM_PCCR0, 27); + clk[dma_ipg_gate] = imx_clk_gate("dma_ipg_gate", "ipg", CCM_PCCR0, 28); + clk[cspi3_ipg_gate] = imx_clk_gate("cspi3_ipg_gate", "ipg", CCM_PCCR0, 29); + clk[cspi2_ipg_gate] = imx_clk_gate("cspi2_ipg_gate", "ipg", CCM_PCCR0, 30); + clk[cspi1_ipg_gate] = imx_clk_gate("cspi1_ipg_gate", "ipg", CCM_PCCR0, 31); + clk[nfc_baud_gate] = imx_clk_gate("nfc_baud_gate", "nfc_div", CCM_PCCR1, 3); + clk[ssi2_baud_gate] = imx_clk_gate("ssi2_baud_gate", "ssi2_div", CCM_PCCR1, 4); + clk[ssi1_baud_gate] = imx_clk_gate("ssi1_baud_gate", "ssi1_div", CCM_PCCR1, 5); + clk[vpu_baud_gate] = imx_clk_gate("vpu_baud_gate", "vpu_div", CCM_PCCR1, 6); + clk[per4_gate] = imx_clk_gate("per4_gate", "per4_div", CCM_PCCR1, 7); + clk[per3_gate] = imx_clk_gate("per3_gate", "per3_div", CCM_PCCR1, 8); + clk[per2_gate] = imx_clk_gate("per2_gate", "per2_div", CCM_PCCR1, 9); + clk[per1_gate] = imx_clk_gate("per1_gate", "per1_div", CCM_PCCR1, 10); + clk[usb_ahb_gate] = imx_clk_gate("usb_ahb_gate", "ahb", CCM_PCCR1, 11); + clk[slcdc_ahb_gate] = imx_clk_gate("slcdc_ahb_gate", "ahb", CCM_PCCR1, 12); + clk[sahara_ahb_gate] = imx_clk_gate("sahara_ahb_gate", "ahb", CCM_PCCR1, 13); + clk[lcdc_ahb_gate] = imx_clk_gate("lcdc_ahb_gate", "ahb", CCM_PCCR1, 15); + clk[vpu_ahb_gate] = imx_clk_gate("vpu_ahb_gate", "ahb", CCM_PCCR1, 16); + clk[fec_ahb_gate] = imx_clk_gate("fec_ahb_gate", "ahb", CCM_PCCR1, 17); + clk[emma_ahb_gate] = imx_clk_gate("emma_ahb_gate", "ahb", CCM_PCCR1, 18); + clk[emi_ahb_gate] = imx_clk_gate("emi_ahb_gate", "ahb", CCM_PCCR1, 19); + clk[dma_ahb_gate] = imx_clk_gate("dma_ahb_gate", "ahb", CCM_PCCR1, 20); + clk[csi_ahb_gate] = imx_clk_gate("csi_ahb_gate", "ahb", CCM_PCCR1, 21); + clk[brom_ahb_gate] = imx_clk_gate("brom_ahb_gate", "ahb", CCM_PCCR1, 22); + clk[ata_ahb_gate] = imx_clk_gate("ata_ahb_gate", "ahb", CCM_PCCR1, 23); + clk[wdog_ipg_gate] = imx_clk_gate("wdog_ipg_gate", "ipg", CCM_PCCR1, 24); + clk[usb_ipg_gate] = imx_clk_gate("usb_ipg_gate", "ipg", CCM_PCCR1, 25); + clk[uart6_ipg_gate] = imx_clk_gate("uart6_ipg_gate", "ipg", CCM_PCCR1, 26); + clk[uart5_ipg_gate] = imx_clk_gate("uart5_ipg_gate", "ipg", CCM_PCCR1, 27); + clk[uart4_ipg_gate] = imx_clk_gate("uart4_ipg_gate", "ipg", CCM_PCCR1, 28); + clk[uart3_ipg_gate] = imx_clk_gate("uart3_ipg_gate", "ipg", CCM_PCCR1, 29); + clk[uart2_ipg_gate] = imx_clk_gate("uart2_ipg_gate", "ipg", CCM_PCCR1, 30); + clk[uart1_ipg_gate] = imx_clk_gate("uart1_ipg_gate", "ipg", CCM_PCCR1, 31); + + for (i = 0; i < ARRAY_SIZE(clk); i++) + if (IS_ERR(clk[i])) + pr_err("i.MX27 clk %d: register failed with %ld\n", + i, PTR_ERR(clk[i])); + + clk_register_clkdev(clk[uart1_ipg_gate], "ipg", "imx21-uart.0"); + clk_register_clkdev(clk[per1_gate], "per", "imx21-uart.0"); + clk_register_clkdev(clk[uart2_ipg_gate], "ipg", "imx21-uart.1"); + clk_register_clkdev(clk[per1_gate], "per", "imx21-uart.1"); + clk_register_clkdev(clk[uart3_ipg_gate], "ipg", "imx21-uart.2"); + clk_register_clkdev(clk[per1_gate], "per", "imx21-uart.2"); + clk_register_clkdev(clk[uart4_ipg_gate], "ipg", "imx21-uart.3"); + clk_register_clkdev(clk[per1_gate], "per", "imx21-uart.3"); + clk_register_clkdev(clk[uart5_ipg_gate], "ipg", "imx21-uart.4"); + clk_register_clkdev(clk[per1_gate], "per", "imx21-uart.4"); + clk_register_clkdev(clk[uart6_ipg_gate], "ipg", "imx21-uart.5"); + clk_register_clkdev(clk[per1_gate], "per", "imx21-uart.5"); + clk_register_clkdev(clk[gpt1_ipg_gate], "ipg", "imx-gpt.0"); + clk_register_clkdev(clk[per1_gate], "per", "imx-gpt.0"); + clk_register_clkdev(clk[gpt2_ipg_gate], "ipg", "imx-gpt.1"); + clk_register_clkdev(clk[per1_gate], "per", "imx-gpt.1"); + clk_register_clkdev(clk[gpt3_ipg_gate], "ipg", "imx-gpt.2"); + clk_register_clkdev(clk[per1_gate], "per", "imx-gpt.2"); + clk_register_clkdev(clk[gpt4_ipg_gate], "ipg", "imx-gpt.3"); + clk_register_clkdev(clk[per1_gate], "per", "imx-gpt.3"); + clk_register_clkdev(clk[gpt5_ipg_gate], "ipg", "imx-gpt.4"); + clk_register_clkdev(clk[per1_gate], "per", "imx-gpt.4"); + clk_register_clkdev(clk[gpt6_ipg_gate], "ipg", "imx-gpt.5"); + clk_register_clkdev(clk[per1_gate], "per", "imx-gpt.5"); + clk_register_clkdev(clk[pwm_ipg_gate], NULL, "mxc_pwm.0"); + clk_register_clkdev(clk[per2_gate], "per", "mxc-mmc.0"); + clk_register_clkdev(clk[sdhc1_ipg_gate], "ipg", "mxc-mmc.0"); + clk_register_clkdev(clk[per2_gate], "per", "mxc-mmc.1"); + clk_register_clkdev(clk[sdhc2_ipg_gate], "ipg", "mxc-mmc.1"); + clk_register_clkdev(clk[per2_gate], "per", "mxc-mmc.2"); + clk_register_clkdev(clk[sdhc2_ipg_gate], "ipg", "mxc-mmc.2"); + clk_register_clkdev(clk[cspi1_ipg_gate], NULL, "imx27-cspi.0"); + clk_register_clkdev(clk[cspi2_ipg_gate], NULL, "imx27-cspi.1"); + clk_register_clkdev(clk[cspi3_ipg_gate], NULL, "imx27-cspi.2"); + clk_register_clkdev(clk[per3_gate], "per", "imx-fb.0"); + clk_register_clkdev(clk[lcdc_ipg_gate], "ipg", "imx-fb.0"); + clk_register_clkdev(clk[lcdc_ahb_gate], "ahb", "imx-fb.0"); + clk_register_clkdev(clk[csi_ahb_gate], NULL, "mx2-camera.0"); + clk_register_clkdev(clk[usb_div], "per", "fsl-usb2-udc"); + clk_register_clkdev(clk[usb_ipg_gate], "ipg", "fsl-usb2-udc"); + clk_register_clkdev(clk[usb_ahb_gate], "ahb", "fsl-usb2-udc"); + clk_register_clkdev(clk[usb_div], "per", "mxc-ehci.0"); + clk_register_clkdev(clk[usb_ipg_gate], "ipg", "mxc-ehci.0"); + clk_register_clkdev(clk[usb_ahb_gate], "ahb", "mxc-ehci.0"); + clk_register_clkdev(clk[usb_div], "per", "mxc-ehci.1"); + clk_register_clkdev(clk[usb_ipg_gate], "ipg", "mxc-ehci.1"); + clk_register_clkdev(clk[usb_ahb_gate], "ahb", "mxc-ehci.1"); + clk_register_clkdev(clk[usb_div], "per", "mxc-ehci.2"); + clk_register_clkdev(clk[usb_ipg_gate], "ipg", "mxc-ehci.2"); + clk_register_clkdev(clk[usb_ahb_gate], "ahb", "mxc-ehci.2"); + clk_register_clkdev(clk[ssi1_ipg_gate], NULL, "imx-ssi.0"); + clk_register_clkdev(clk[ssi2_ipg_gate], NULL, "imx-ssi.1"); + clk_register_clkdev(clk[nfc_baud_gate], NULL, "mxc_nand.0"); + clk_register_clkdev(clk[vpu_baud_gate], "per", "imx-vpu"); + clk_register_clkdev(clk[vpu_ahb_gate], "ahb", "imx-vpu"); + clk_register_clkdev(clk[dma_ahb_gate], "ahb", "imx-dma"); + clk_register_clkdev(clk[dma_ipg_gate], "ipg", "imx-dma"); + clk_register_clkdev(clk[fec_ipg_gate], "ipg", "imx27-fec.0"); + clk_register_clkdev(clk[fec_ahb_gate], "ahb", "imx27-fec.0"); + clk_register_clkdev(clk[wdog_ipg_gate], NULL, "imx2-wdt.0"); + clk_register_clkdev(clk[i2c1_ipg_gate], NULL, "imx-i2c.0"); + clk_register_clkdev(clk[i2c2_ipg_gate], NULL, "imx-i2c.1"); + clk_register_clkdev(clk[owire_ipg_gate], NULL, "mxc_w1.0"); + clk_register_clkdev(clk[kpp_ipg_gate], NULL, "imx-keypad"); + clk_register_clkdev(clk[emma_ahb_gate], "ahb", "imx-emma"); + clk_register_clkdev(clk[emma_ipg_gate], "ipg", "imx-emma"); + clk_register_clkdev(clk[iim_ipg_gate], "iim", NULL); + clk_register_clkdev(clk[gpio_ipg_gate], "gpio", NULL); + clk_register_clkdev(clk[brom_ahb_gate], "brom", NULL); + clk_register_clkdev(clk[ata_ahb_gate], "ata", NULL); + clk_register_clkdev(clk[rtc_ipg_gate], "rtc", NULL); + clk_register_clkdev(clk[scc_ipg_gate], "scc", NULL); + clk_register_clkdev(clk[cpu_div], "cpu", NULL); + clk_register_clkdev(clk[emi_ahb_gate], "emi_ahb" , NULL); + clk_register_clkdev(clk[ssi1_baud_gate], "bitrate" , "imx-ssi.0"); + clk_register_clkdev(clk[ssi2_baud_gate], "bitrate" , "imx-ssi.1"); + + mxc_timer_init(NULL, MX27_IO_ADDRESS(MX27_GPT1_BASE_ADDR), + MX27_INT_GPT1); + + clk_prepare_enable(clk[emi_ahb_gate]); + + return 0; +} + +#ifdef CONFIG_OF +int __init mx27_clocks_init_dt(void) +{ + struct device_node *np; + u32 fref = 26000000; /* default */ + + for_each_compatible_node(np, NULL, "fixed-clock") { + if (!of_device_is_compatible(np, "fsl,imx-osc26m")) + continue; + + if (!of_property_read_u32(np, "clock-frequency", &fref)) + break; + } + + return mx27_clocks_init(fref); +} +#endif -- cgit v0.10.2 From 95878cbed89504d21bc87b11e10eb46c5cc8443e Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Sun, 18 Mar 2012 23:48:35 +0100 Subject: ARM i.MX31: implement clocks using common clock framework Signed-off-by: Sascha Hauer Signed-off-by: Philipp Zabel diff --git a/arch/arm/mach-imx/Kconfig b/arch/arm/mach-imx/Kconfig index efed8f9..692de7b 100644 --- a/arch/arm/mach-imx/Kconfig +++ b/arch/arm/mach-imx/Kconfig @@ -68,6 +68,7 @@ config SOC_IMX31 select CPU_V6 select IMX_HAVE_PLATFORM_MXC_RNGA select MXC_AVIC + select COMMON_CLK select SMP_ON_UP if SMP config SOC_IMX35 diff --git a/arch/arm/mach-imx/Makefile b/arch/arm/mach-imx/Makefile index c4ab418..fdad8f4 100644 --- a/arch/arm/mach-imx/Makefile +++ b/arch/arm/mach-imx/Makefile @@ -6,7 +6,7 @@ obj-$(CONFIG_SOC_IMX25) += clk-imx25.o mm-imx25.o ehci-imx25.o cpu-imx25.o obj-$(CONFIG_SOC_IMX27) += cpu-imx27.o pm-imx27.o obj-$(CONFIG_SOC_IMX27) += clk-imx27.o mm-imx27.o ehci-imx27.o -obj-$(CONFIG_SOC_IMX31) += mm-imx3.o cpu-imx31.o clock-imx31.o iomux-imx31.o ehci-imx31.o pm-imx3.o +obj-$(CONFIG_SOC_IMX31) += mm-imx3.o cpu-imx31.o clk-imx31.o iomux-imx31.o ehci-imx31.o pm-imx3.o obj-$(CONFIG_SOC_IMX35) += mm-imx3.o cpu-imx35.o clock-imx35.o ehci-imx35.o pm-imx3.o obj-$(CONFIG_SOC_IMX5) += cpu-imx5.o mm-imx5.o clock-mx51-mx53.o ehci-imx5.o pm-imx5.o cpu_op-mx51.o diff --git a/arch/arm/mach-imx/clk-imx31.c b/arch/arm/mach-imx/clk-imx31.c new file mode 100644 index 0000000..a854b9c --- /dev/null +++ b/arch/arm/mach-imx/clk-imx31.c @@ -0,0 +1,182 @@ +/* + * Copyright (C) 2012 Sascha Hauer + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation. + */ + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "clk.h" +#include "crmregs-imx3.h" + +static const char *mcu_main_sel[] = { "spll", "mpll", }; +static const char *per_sel[] = { "per_div", "ipg", }; +static const char *csi_sel[] = { "upll", "spll", }; +static const char *fir_sel[] = { "mcu_main", "upll", "spll" }; + +enum mx31_clks { + ckih, ckil, mpll, spll, upll, mcu_main, hsp, ahb, nfc, ipg, per_div, + per, csi, fir, csi_div, usb_div_pre, usb_div_post, fir_div_pre, + fir_div_post, sdhc1_gate, sdhc2_gate, gpt_gate, epit1_gate, epit2_gate, + iim_gate, ata_gate, sdma_gate, cspi3_gate, rng_gate, uart1_gate, + uart2_gate, ssi1_gate, i2c1_gate, i2c2_gate, i2c3_gate, hantro_gate, + mstick1_gate, mstick2_gate, csi_gate, rtc_gate, wdog_gate, pwm_gate, + sim_gate, ect_gate, usb_gate, kpp_gate, ipu_gate, uart3_gate, + uart4_gate, uart5_gate, owire_gate, ssi2_gate, cspi1_gate, cspi2_gate, + gacc_gate, emi_gate, rtic_gate, firi_gate, clk_max +}; + +static struct clk *clk[clk_max]; + +int __init mx31_clocks_init(unsigned long fref) +{ + void __iomem *base = MX31_IO_ADDRESS(MX31_CCM_BASE_ADDR); + int i; + + clk[ckih] = imx_clk_fixed("ckih", fref); + clk[ckil] = imx_clk_fixed("ckil", 32768); + clk[mpll] = imx_clk_pllv1("mpll", "ckih", base + MXC_CCM_MPCTL); + clk[spll] = imx_clk_pllv1("spll", "ckih", base + MXC_CCM_SRPCTL); + clk[upll] = imx_clk_pllv1("upll", "ckih", base + MXC_CCM_UPCTL); + clk[mcu_main] = imx_clk_mux("mcu_main", base + MXC_CCM_PMCR0, 31, 1, mcu_main_sel, ARRAY_SIZE(mcu_main_sel)); + clk[hsp] = imx_clk_divider("hsp", "mcu_main", base + MXC_CCM_PDR0, 11, 3); + clk[ahb] = imx_clk_divider("ahb", "mcu_main", base + MXC_CCM_PDR0, 3, 3); + clk[nfc] = imx_clk_divider("nfc", "ahb", base + MXC_CCM_PDR0, 8, 3); + clk[ipg] = imx_clk_divider("ipg", "ahb", base + MXC_CCM_PDR0, 6, 2); + clk[per_div] = imx_clk_divider("per_div", "upll", base + MXC_CCM_PDR0, 16, 5); + clk[per] = imx_clk_mux("per", base + MXC_CCM_CCMR, 24, 1, per_sel, ARRAY_SIZE(per_sel)); + clk[csi] = imx_clk_mux("csi_sel", base + MXC_CCM_CCMR, 25, 1, csi_sel, ARRAY_SIZE(csi_sel)); + clk[fir] = imx_clk_mux("fir_sel", base + MXC_CCM_CCMR, 11, 2, fir_sel, ARRAY_SIZE(fir_sel)); + clk[csi_div] = imx_clk_divider("csi_div", "csi_sel", base + MXC_CCM_PDR0, 23, 9); + clk[usb_div_pre] = imx_clk_divider("usb_div_pre", "upll", base + MXC_CCM_PDR1, 30, 2); + clk[usb_div_post] = imx_clk_divider("usb_div_post", "usb_div_pre", base + MXC_CCM_PDR1, 27, 3); + clk[fir_div_pre] = imx_clk_divider("fir_div_pre", "fir_sel", base + MXC_CCM_PDR1, 24, 3); + clk[fir_div_post] = imx_clk_divider("fir_div_post", "fir_div_pre", base + MXC_CCM_PDR1, 23, 6); + clk[sdhc1_gate] = imx_clk_gate2("sdhc1_gate", "per", base + MXC_CCM_CGR0, 0); + clk[sdhc2_gate] = imx_clk_gate2("sdhc2_gate", "per", base + MXC_CCM_CGR0, 2); + clk[gpt_gate] = imx_clk_gate2("gpt_gate", "per", base + MXC_CCM_CGR0, 4); + clk[epit1_gate] = imx_clk_gate2("epit1_gate", "per", base + MXC_CCM_CGR0, 6); + clk[epit2_gate] = imx_clk_gate2("epit2_gate", "per", base + MXC_CCM_CGR0, 8); + clk[iim_gate] = imx_clk_gate2("iim_gate", "ipg", base + MXC_CCM_CGR0, 10); + clk[ata_gate] = imx_clk_gate2("ata_gate", "ipg", base + MXC_CCM_CGR0, 12); + clk[sdma_gate] = imx_clk_gate2("sdma_gate", "ahb", base + MXC_CCM_CGR0, 14); + clk[cspi3_gate] = imx_clk_gate2("cspi3_gate", "ipg", base + MXC_CCM_CGR0, 16); + clk[rng_gate] = imx_clk_gate2("rng_gate", "ipg", base + MXC_CCM_CGR0, 18); + clk[uart1_gate] = imx_clk_gate2("uart1_gate", "per", base + MXC_CCM_CGR0, 20); + clk[uart2_gate] = imx_clk_gate2("uart2_gate", "per", base + MXC_CCM_CGR0, 22); + clk[ssi1_gate] = imx_clk_gate2("ssi1_gate", "spll", base + MXC_CCM_CGR0, 24); + clk[i2c1_gate] = imx_clk_gate2("i2c1_gate", "per", base + MXC_CCM_CGR0, 26); + clk[i2c2_gate] = imx_clk_gate2("i2c2_gate", "per", base + MXC_CCM_CGR0, 28); + clk[i2c3_gate] = imx_clk_gate2("i2c3_gate", "per", base + MXC_CCM_CGR0, 30); + clk[hantro_gate] = imx_clk_gate2("hantro_gate", "per", base + MXC_CCM_CGR1, 0); + clk[mstick1_gate] = imx_clk_gate2("mstick1_gate", "per", base + MXC_CCM_CGR1, 2); + clk[mstick2_gate] = imx_clk_gate2("mstick2_gate", "per", base + MXC_CCM_CGR1, 4); + clk[csi_gate] = imx_clk_gate2("csi_gate", "csi_div", base + MXC_CCM_CGR1, 6); + clk[rtc_gate] = imx_clk_gate2("rtc_gate", "ipg", base + MXC_CCM_CGR1, 8); + clk[wdog_gate] = imx_clk_gate2("wdog_gate", "ipg", base + MXC_CCM_CGR1, 10); + clk[pwm_gate] = imx_clk_gate2("pwm_gate", "per", base + MXC_CCM_CGR1, 12); + clk[sim_gate] = imx_clk_gate2("sim_gate", "per", base + MXC_CCM_CGR1, 14); + clk[ect_gate] = imx_clk_gate2("ect_gate", "per", base + MXC_CCM_CGR1, 16); + clk[usb_gate] = imx_clk_gate2("usb_gate", "ahb", base + MXC_CCM_CGR1, 18); + clk[kpp_gate] = imx_clk_gate2("kpp_gate", "ipg", base + MXC_CCM_CGR1, 20); + clk[ipu_gate] = imx_clk_gate2("ipu_gate", "hsp", base + MXC_CCM_CGR1, 22); + clk[uart3_gate] = imx_clk_gate2("uart3_gate", "per", base + MXC_CCM_CGR1, 24); + clk[uart4_gate] = imx_clk_gate2("uart4_gate", "per", base + MXC_CCM_CGR1, 26); + clk[uart5_gate] = imx_clk_gate2("uart5_gate", "per", base + MXC_CCM_CGR1, 28); + clk[owire_gate] = imx_clk_gate2("owire_gate", "per", base + MXC_CCM_CGR1, 30); + clk[ssi2_gate] = imx_clk_gate2("ssi2_gate", "spll", base + MXC_CCM_CGR2, 0); + clk[cspi1_gate] = imx_clk_gate2("cspi1_gate", "ipg", base + MXC_CCM_CGR2, 2); + clk[cspi2_gate] = imx_clk_gate2("cspi2_gate", "ipg", base + MXC_CCM_CGR2, 4); + clk[gacc_gate] = imx_clk_gate2("gacc_gate", "per", base + MXC_CCM_CGR2, 6); + clk[emi_gate] = imx_clk_gate2("emi_gate", "ahb", base + MXC_CCM_CGR2, 8); + clk[rtic_gate] = imx_clk_gate2("rtic_gate", "ahb", base + MXC_CCM_CGR2, 10); + clk[firi_gate] = imx_clk_gate2("firi_gate", "upll", base+MXC_CCM_CGR2, 12); + + for (i = 0; i < ARRAY_SIZE(clk); i++) + if (IS_ERR(clk[i])) + pr_err("imx31 clk %d: register failed with %ld\n", + i, PTR_ERR(clk[i])); + + clk_register_clkdev(clk[gpt_gate], "per", "imx-gpt.0"); + clk_register_clkdev(clk[ipg], "ipg", "imx-gpt.0"); + clk_register_clkdev(clk[cspi1_gate], NULL, "imx31-cspi.0"); + clk_register_clkdev(clk[cspi2_gate], NULL, "imx31-cspi.1"); + clk_register_clkdev(clk[cspi3_gate], NULL, "imx31-cspi.2"); + clk_register_clkdev(clk[pwm_gate], "pwm", NULL); + clk_register_clkdev(clk[wdog_gate], NULL, "imx2-wdt.0"); + clk_register_clkdev(clk[rtc_gate], "rtc", NULL); + clk_register_clkdev(clk[epit1_gate], "epit", NULL); + clk_register_clkdev(clk[epit2_gate], "epit", NULL); + clk_register_clkdev(clk[nfc], NULL, "mxc_nand.0"); + clk_register_clkdev(clk[ipu_gate], NULL, "ipu-core"); + clk_register_clkdev(clk[ipu_gate], NULL, "mx3_sdc_fb"); + clk_register_clkdev(clk[kpp_gate], "kpp", NULL); + clk_register_clkdev(clk[usb_div_post], "per", "mxc-ehci.0"); + clk_register_clkdev(clk[usb_gate], "ahb", "mxc-ehci.0"); + clk_register_clkdev(clk[ipg], "ipg", "mxc-ehci.0"); + clk_register_clkdev(clk[usb_div_post], "per", "mxc-ehci.1"); + clk_register_clkdev(clk[usb_gate], "ahb", "mxc-ehci.1"); + clk_register_clkdev(clk[ipg], "ipg", "mxc-ehci.1"); + clk_register_clkdev(clk[usb_div_post], "per", "mxc-ehci.2"); + clk_register_clkdev(clk[usb_gate], "ahb", "mxc-ehci.2"); + clk_register_clkdev(clk[ipg], "ipg", "mxc-ehci.2"); + clk_register_clkdev(clk[usb_div_post], "per", "fsl-usb2-udc"); + clk_register_clkdev(clk[usb_gate], "ahb", "fsl-usb2-udc"); + clk_register_clkdev(clk[ipg], "ipg", "fsl-usb2-udc"); + clk_register_clkdev(clk[csi_gate], NULL, "mx3-camera.0"); + /* i.mx31 has the i.mx21 type uart */ + clk_register_clkdev(clk[uart1_gate], "per", "imx21-uart.0"); + clk_register_clkdev(clk[ipg], "ipg", "imx21-uart.0"); + clk_register_clkdev(clk[uart2_gate], "per", "imx21-uart.1"); + clk_register_clkdev(clk[ipg], "ipg", "imx21-uart.1"); + clk_register_clkdev(clk[uart3_gate], "per", "imx21-uart.2"); + clk_register_clkdev(clk[ipg], "ipg", "imx21-uart.2"); + clk_register_clkdev(clk[uart4_gate], "per", "imx21-uart.3"); + clk_register_clkdev(clk[ipg], "ipg", "imx21-uart.3"); + clk_register_clkdev(clk[uart5_gate], "per", "imx21-uart.4"); + clk_register_clkdev(clk[ipg], "ipg", "imx21-uart.4"); + clk_register_clkdev(clk[i2c1_gate], NULL, "imx-i2c.0"); + clk_register_clkdev(clk[i2c2_gate], NULL, "imx-i2c.1"); + clk_register_clkdev(clk[i2c3_gate], NULL, "imx-i2c.2"); + clk_register_clkdev(clk[owire_gate], NULL, "mxc_w1.0"); + clk_register_clkdev(clk[sdhc1_gate], NULL, "mxc-mmc.0"); + clk_register_clkdev(clk[sdhc2_gate], NULL, "mxc-mmc.1"); + clk_register_clkdev(clk[ssi1_gate], NULL, "imx-ssi.0"); + clk_register_clkdev(clk[ssi2_gate], NULL, "imx-ssi.1"); + clk_register_clkdev(clk[firi_gate], "firi", NULL); + clk_register_clkdev(clk[ata_gate], NULL, "pata_imx"); + clk_register_clkdev(clk[rtic_gate], "rtic", NULL); + clk_register_clkdev(clk[rng_gate], "rng", NULL); + clk_register_clkdev(clk[sdma_gate], NULL, "imx31-sdma"); + clk_register_clkdev(clk[iim_gate], "iim", NULL); + + clk_set_parent(clk[csi], clk[upll]); + clk_prepare_enable(clk[emi_gate]); + clk_prepare_enable(clk[iim_gate]); + mx31_revision(); + clk_disable_unprepare(clk[iim_gate]); + + mxc_timer_init(NULL, MX31_IO_ADDRESS(MX31_GPT1_BASE_ADDR), + MX31_INT_GPT); + + return 0; +} -- cgit v0.10.2 From e726b1bd64b0b8945c171d2d4bf749fba9fc0800 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Wed, 2 May 2012 10:50:38 +0200 Subject: KVM: s390: implement KVM_CAP_NR/MAX_VCPUS Let userspace know the number of max and supported cpus for kvm on s390. Return KVM_MAX_VCPUS (currently 64) for both values. Signed-off-by: Christian Borntraeger Signed-off-by: Marcelo Tosatti diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index fd98914..e5e3800 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -136,6 +136,10 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_SYNC_REGS: r = 1; break; + case KVM_CAP_NR_VCPUS: + case KVM_CAP_MAX_VCPUS: + r = KVM_MAX_VCPUS; + break; default: r = 0; } -- cgit v0.10.2 From 90de41375ccf8373c0a39d04547f3e3c65d90ec0 Mon Sep 17 00:00:00 2001 From: Shawn Guo Date: Wed, 25 Apr 2012 22:24:44 +0800 Subject: ARM: provide a late_initcall hook for platform initialization This allows platforms to set up things that need to be done at late_initcall time. Signed-off-by: Shawn Guo Tested-by: Robert Lee Tested-by: Stephen Warren Reviewed-by: H Hartley Sweeten Acked-by: Russell King diff --git a/arch/arm/include/asm/mach/arch.h b/arch/arm/include/asm/mach/arch.h index d7692ca..0b1c94b 100644 --- a/arch/arm/include/asm/mach/arch.h +++ b/arch/arm/include/asm/mach/arch.h @@ -43,6 +43,7 @@ struct machine_desc { void (*init_irq)(void); struct sys_timer *timer; /* system tick timer */ void (*init_machine)(void); + void (*init_late)(void); #ifdef CONFIG_MULTI_IRQ_HANDLER void (*handle_irq)(struct pt_regs *); #endif diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index ebfac78..549f036 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -800,6 +800,14 @@ static int __init customize_machine(void) } arch_initcall(customize_machine); +static int __init init_machine_late(void) +{ + if (machine_desc->init_late) + machine_desc->init_late(); + return 0; +} +late_initcall(init_machine_late); + #ifdef CONFIG_KEXEC static inline unsigned long long get_total_mem(void) { -- cgit v0.10.2 From b46b0b54dea200973ce380369beb192b136d8934 Mon Sep 17 00:00:00 2001 From: Laxman Dewangan Date: Mon, 23 Apr 2012 17:41:36 +0530 Subject: ARM: dt: tegra: enable als and proximity sensor Add the device info for ALS and proximity sensor for tegra boards cardhu, ventana and seaboard. Signed-off-by: Laxman Dewangan [swarren: s/PZ02/PZ2/ in .dts files, s/seabridge/seaboard/ in commit description] Signed-off-by: Stephen Warren diff --git a/arch/arm/boot/dts/tegra-cardhu.dts b/arch/arm/boot/dts/tegra-cardhu.dts index 0a9f34a..8c81b44 100644 --- a/arch/arm/boot/dts/tegra-cardhu.dts +++ b/arch/arm/boot/dts/tegra-cardhu.dts @@ -84,6 +84,14 @@ i2c@7000c500 { clock-frequency = <100000>; + + /* ALS and Proximity sensor */ + isl29028@44 { + compatible = "isil,isl29028"; + reg = <0x44>; + interrupt-parent = <&gpio>; + interrupts = <88 0x04>; /*gpio PL0 */ + }; }; i2c@7000c700 { diff --git a/arch/arm/boot/dts/tegra-seaboard.dts b/arch/arm/boot/dts/tegra-seaboard.dts index ec33116..0f30fc9 100644 --- a/arch/arm/boot/dts/tegra-seaboard.dts +++ b/arch/arm/boot/dts/tegra-seaboard.dts @@ -270,6 +270,14 @@ micdet-delay = <100>; gpio-cfg = < 0xffffffff 0xffffffff 0 0xffffffff 0xffffffff >; }; + + /* ALS and proximity sensor */ + isl29018@44 { + compatible = "isil,isl29018"; + reg = <0x44>; + interrupt-parent = <&gpio>; + interrupts = < 202 0x04 >; /* GPIO PZ2 */ + }; }; i2c@7000c400 { diff --git a/arch/arm/boot/dts/tegra-ventana.dts b/arch/arm/boot/dts/tegra-ventana.dts index 71eb2e5..4ef84f4 100644 --- a/arch/arm/boot/dts/tegra-ventana.dts +++ b/arch/arm/boot/dts/tegra-ventana.dts @@ -256,6 +256,14 @@ micdet-delay = <100>; gpio-cfg = < 0xffffffff 0xffffffff 0 0xffffffff 0xffffffff >; }; + + /* ALS and proximity sensor */ + isl29018@44 { + compatible = "isil,isl29018"; + reg = <0x44>; + interrupt-parent = <&gpio>; + interrupts = <202 0x04>; /*gpio PZ2 */ + }; }; i2c@7000c400 { -- cgit v0.10.2 From 22bd1f7ef40a1c0f2ba796ba7cd80013adcb835d Mon Sep 17 00:00:00 2001 From: Stephen Warren Date: Thu, 26 Apr 2012 11:19:03 -0600 Subject: ARM: dt: tegra seaboard: fix I2C2 SCL rate This I2C bus is used for EDID/DDC reads and other "slow" I2C devices. This requires a 100KHz SCL (clock) rate. Signed-off-by: Stephen Warren diff --git a/arch/arm/boot/dts/tegra-seaboard.dts b/arch/arm/boot/dts/tegra-seaboard.dts index 0f30fc9..11aea88 100644 --- a/arch/arm/boot/dts/tegra-seaboard.dts +++ b/arch/arm/boot/dts/tegra-seaboard.dts @@ -281,7 +281,7 @@ }; i2c@7000c400 { - clock-frequency = <400000>; + clock-frequency = <100000>; }; i2c@7000c500 { -- cgit v0.10.2 From 802a849948789b6059899d79a4c8e71db19a6029 Mon Sep 17 00:00:00 2001 From: Stephen Warren Date: Thu, 26 Apr 2012 11:21:54 -0600 Subject: ARM: dt: tegra seaboard: configure I2C2 pinmux The I2C2 controller can be routed to either pingroup DDC or PTA. Seaboard actually uses this as an I2C bus mux, and devices are connected to both pingroups. This change statically assigns the I2C2 controller to pingroup PTA, so that on-board devices can be accessed. The DDC pingroup is used for EDID/DDC accesses which are not yet required, given the absence of any Tegra graphics support. I2C muxing will be supported later. Signed-off-by: Stephen Warren diff --git a/arch/arm/boot/dts/tegra-seaboard.dts b/arch/arm/boot/dts/tegra-seaboard.dts index 11aea88..60c9429 100644 --- a/arch/arm/boot/dts/tegra-seaboard.dts +++ b/arch/arm/boot/dts/tegra-seaboard.dts @@ -100,7 +100,7 @@ }; hdint { nvidia,pins = "hdint", "lpw0", "lpw2", "lsc1", - "lsck", "lsda", "pta"; + "lsck", "lsda"; nvidia,function = "hdmi"; }; i2cp { @@ -134,6 +134,10 @@ nvidia,pins = "pmc"; nvidia,function = "pwr_on"; }; + pta { + nvidia,pins = "pta"; + nvidia,function = "i2c2"; + }; rm { nvidia,pins = "rm"; nvidia,function = "i2c1"; -- cgit v0.10.2 From 45dbe9dd2cea5be9fe6997442aa703800ea145c8 Mon Sep 17 00:00:00 2001 From: Olof Johansson Date: Thu, 22 Dec 2011 16:33:13 +0000 Subject: ARM: dt: tegra seaboard: add i2c devices Add the known i2c devices on seaboard to the i2c table. Also rename the temperature sensor device node, and mark it as a nct1008 instead of an adt7461 (which it is -- the chips are compatible though). Signed-off-by: Olof Johansson [swarren: Removed isl29018 from patch; it's already there now. Fixed interrupts properties now that Tegra GPIO is an interrupt controller. Moved smart-battery to the correct I2C bus.] Signed-off-by: Stephen Warren diff --git a/arch/arm/boot/dts/tegra-seaboard.dts b/arch/arm/boot/dts/tegra-seaboard.dts index 60c9429..4e19dd1 100644 --- a/arch/arm/boot/dts/tegra-seaboard.dts +++ b/arch/arm/boot/dts/tegra-seaboard.dts @@ -282,10 +282,24 @@ interrupt-parent = <&gpio>; interrupts = < 202 0x04 >; /* GPIO PZ2 */ }; + + gyrometer@68 { + compatible = "invn,mpu3050"; + reg = <0x68>; + interrupt-parent = <&gpio>; + interrupts = <204 0x04>; /* gpio PZ4 */ + }; }; i2c@7000c400 { clock-frequency = <100000>; + + smart-battery@b { + compatible = "ti,bq20z75", "smart-battery-1.1"; + reg = <0xb>; + ti,i2c-retry-count = <2>; + ti,poll-retry-count = <10>; + }; }; i2c@7000c500 { @@ -295,10 +309,17 @@ i2c@7000d000 { clock-frequency = <400000>; - adt7461@4c { - compatible = "adt7461"; + temperature-sensor@4c { + compatible = "nct1008"; reg = <0x4c>; }; + + magnetometer@c { + compatible = "ak8975"; + reg = <0xc>; + interrupt-parent = <&gpio>; + interrupts = <109 0x04>; /* gpio PN5 */ + }; }; i2s@70002a00 { -- cgit v0.10.2 From 081cc0a57c2f976359d4dcefec480bdd2f848513 Mon Sep 17 00:00:00 2001 From: Stephen Warren Date: Fri, 27 Apr 2012 09:22:44 -0600 Subject: ARM: dt: tegra trimslice: add RTC I2C device According to the device's datasheet, it can support an interrupt too. However, the existing board file doesn't specify an interrupt, and I don't have the schematics, so I can't add an interrupts property. The current Linux driver doesn't support anyway. Signed-off-by: Stephen Warren diff --git a/arch/arm/boot/dts/tegra-trimslice.dts b/arch/arm/boot/dts/tegra-trimslice.dts index 98efd5b..3fe91a7 100644 --- a/arch/arm/boot/dts/tegra-trimslice.dts +++ b/arch/arm/boot/dts/tegra-trimslice.dts @@ -250,6 +250,11 @@ i2c@7000c500 { clock-frequency = <400000>; + + rtc@56 { + compatible = "emmicro,em3027"; + reg = <0x56>; + }; }; i2c@7000d000 { -- cgit v0.10.2 From c7bd632e88a84538eb966a1740bdc344a003b81f Mon Sep 17 00:00:00 2001 From: Stephen Warren Date: Fri, 27 Apr 2012 13:41:31 -0600 Subject: ARM: dt: tegra trimslice: enable SDHCI1 controller This is the micro-SD card slot. Signed-off-by: Stephen Warren diff --git a/arch/arm/boot/dts/tegra-trimslice.dts b/arch/arm/boot/dts/tegra-trimslice.dts index 3fe91a7..23e6472 100644 --- a/arch/arm/boot/dts/tegra-trimslice.dts +++ b/arch/arm/boot/dts/tegra-trimslice.dts @@ -293,10 +293,6 @@ status = "disable"; }; - sdhci@c8000000 { - status = "disable"; - }; - sdhci@c8000200 { status = "disable"; }; -- cgit v0.10.2 From 22bfe102c0c39f0bac24950b875e7bfdeb329dd9 Mon Sep 17 00:00:00 2001 From: Stephen Warren Date: Fri, 27 Apr 2012 13:24:03 -0600 Subject: ARM: dt: tegra trimslice: add support for audio * Add node for the audio codec * Enable Tegra's I2S1 controller and DAS * Add node for top-level sound complex Signed-off-by: Stephen Warren diff --git a/arch/arm/boot/dts/tegra-trimslice.dts b/arch/arm/boot/dts/tegra-trimslice.dts index 23e6472..6f8e26d 100644 --- a/arch/arm/boot/dts/tegra-trimslice.dts +++ b/arch/arm/boot/dts/tegra-trimslice.dts @@ -251,6 +251,11 @@ i2c@7000c500 { clock-frequency = <400000>; + codec: codec@1a { + compatible = "ti,tlv320aic23"; + reg = <0x1a>; + }; + rtc@56 { compatible = "emmicro,em3027"; reg = <0x56>; @@ -261,16 +266,14 @@ status = "disable"; }; - i2s@70002800 { - status = "disable"; - }; - i2s@70002a00 { status = "disable"; }; - das@70000c00 { - status = "disable"; + sound { + compatible = "nvidia,tegra-audio-trimslice"; + nvidia,i2s-controller = <&tegra_i2s1>; + nvidia,audio-codec = <&codec>; }; serial@70006000 { -- cgit v0.10.2 From 000bc9d5ed296550e7009f56cbdb4b35459beb69 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Mon, 16 Apr 2012 10:18:43 +0100 Subject: mmc: mmci: Enable Device Tree support for ux500 variants Provide a means to collect attributes specific to ST-Ericsson's ux500 variant series. This patch registers itself as the AMBA driver to be called during the probe process. Once all attributes and ux500 specifics are are collected the normal mmci core probe is called. Signed-off-by: Lee Jones Acked-by: Linus Walleij Acked-by: Arnd Bergmann Signed-off-by: Chris Ball diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c index 032b847..5a7da17 100644 --- a/drivers/mmc/host/mmci.c +++ b/drivers/mmc/host/mmci.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -1196,21 +1197,70 @@ static const struct mmc_host_ops mmci_ops = { .get_cd = mmci_get_cd, }; +#ifdef CONFIG_OF +static void mmci_dt_populate_generic_pdata(struct device_node *np, + struct mmci_platform_data *pdata) +{ + int bus_width = 0; + + of_property_read_u32(np, "wp-gpios", &pdata->gpio_wp); + if (!pdata->gpio_wp) + pdata->gpio_wp = -1; + + of_property_read_u32(np, "cd-gpios", &pdata->gpio_cd); + if (!pdata->gpio_cd) + pdata->gpio_cd = -1; + + if (of_get_property(np, "cd-inverted", NULL)) + pdata->cd_invert = true; + else + pdata->cd_invert = false; + + of_property_read_u32(np, "max-frequency", &pdata->f_max); + if (!pdata->f_max) + pr_warn("%s has no 'max-frequency' property\n", np->full_name); + + if (of_get_property(np, "mmc-cap-mmc-highspeed", NULL)) + pdata->capabilities |= MMC_CAP_MMC_HIGHSPEED; + if (of_get_property(np, "mmc-cap-sd-highspeed", NULL)) + pdata->capabilities |= MMC_CAP_SD_HIGHSPEED; + + of_property_read_u32(np, "bus-width", &bus_width); + switch (bus_width) { + case 0 : + /* No bus-width supplied. */ + break; + case 4 : + pdata->capabilities |= MMC_CAP_4_BIT_DATA; + break; + case 8 : + pdata->capabilities |= MMC_CAP_8_BIT_DATA; + break; + default : + pr_warn("%s: Unsupported bus width\n", np->full_name); + } +} +#endif + static int __devinit mmci_probe(struct amba_device *dev, const struct amba_id *id) { struct mmci_platform_data *plat = dev->dev.platform_data; + struct device_node *np = dev->dev.of_node; struct variant_data *variant = id->data; struct mmci_host *host; struct mmc_host *mmc; int ret; - /* must have platform data */ - if (!plat) { - ret = -EINVAL; - goto out; + /* Must have platform data or Device Tree. */ + if (!plat && !np) { + dev_err(&dev->dev, "No plat data or DT found\n"); + return -EINVAL; } + if (np) + mmci_dt_populate_generic_pdata(np, plat); + ret = amba_request_regions(dev, DRIVER_NAME); if (ret) goto out; -- cgit v0.10.2 From 8dde8c467441069144d2430d30fca697c8fae2d3 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Wed, 14 Mar 2012 13:58:34 +0000 Subject: mmc: mmci: Add required documentation for Device Tree bindings This provides full documentation detailing each compulsory, optional and device (variant) specific properties available for use within the MMCI Device Tree. Signed-off-by: Lee Jones Acked-by: Arnd Bergmann Signed-off-by: Chris Ball diff --git a/Documentation/devicetree/bindings/mmc/mmci.txt b/Documentation/devicetree/bindings/mmc/mmci.txt new file mode 100644 index 0000000..14a81d5 --- /dev/null +++ b/Documentation/devicetree/bindings/mmc/mmci.txt @@ -0,0 +1,19 @@ +* ARM PrimeCell MultiMedia Card Interface (MMCI) PL180/1 + +The ARM PrimeCell MMCI PL180 and PL181 provides and interface for +reading and writing to MultiMedia and SD cards alike. + +Required properties: +- compatible : contains "arm,pl18x", "arm,primecell". +- reg : contains pl18x registers and length. +- interrupts : contains the device IRQ(s). +- arm,primecell-periphid : contains the PrimeCell Peripheral ID. + +Optional properties: +- wp-gpios : contains any write protect (ro) gpios +- cd-gpios : contains any card detection gpios +- cd-inverted : indicates whether the cd gpio is inverted +- max-frequency : contains the maximum operating frequency +- bus-width : number of data lines, can be <1>, <4>, or <8> +- mmc-cap-mmc-highspeed : indicates whether MMC is high speed capable +- mmc-cap-sd-highspeed : indicates whether SD is high speed capable -- cgit v0.10.2 From 9a597016058520665452390df919428e4edd7770 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Thu, 12 Apr 2012 16:51:13 +0100 Subject: mmc: mmci: Use correct GPIO binding for IRQ requests Now there are irqdomains in place for Snowball, we can request GPIO IRQs directly by their binding. This replaces the previous method of hard-coding the hwirq using u32 values in the DT. Signed-off-by: Lee Jones Signed-off-by: Chris Ball diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c index 5a7da17..2303a16 100644 --- a/drivers/mmc/host/mmci.c +++ b/drivers/mmc/host/mmci.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -1203,11 +1204,11 @@ static void mmci_dt_populate_generic_pdata(struct device_node *np, { int bus_width = 0; - of_property_read_u32(np, "wp-gpios", &pdata->gpio_wp); + pdata->gpio_wp = of_get_named_gpio(np, "wp-gpios", 0); if (!pdata->gpio_wp) pdata->gpio_wp = -1; - of_property_read_u32(np, "cd-gpios", &pdata->gpio_cd); + pdata->gpio_cd = of_get_named_gpio(np, "cd-gpios", 0); if (!pdata->gpio_cd) pdata->gpio_cd = -1; -- cgit v0.10.2 From bf5fc4028ef751904a114ffc4b5d2cd9f0233142 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 4 May 2012 13:47:16 -0400 Subject: NFS: Fix O_DIRECT compile warnings Fix the following compile warnings: fs/nfs/direct.c: In function 'nfs_direct_read_schedule_segment': fs/nfs/direct.c:325:11: warning: comparison of distinct pointer types lacks a cast [enabled by default] fs/nfs/direct.c:325:11: warning: comparison of distinct pointer types lacks a cast [enabled by default] fs/nfs/direct.c:325:11: warning: comparison of distinct pointer types lacks a cast [enabled by default] fs/nfs/direct.c:352:27: warning: comparison of distinct pointer types lacks a cast [enabled by default] fs/nfs/direct.c: In function 'nfs_direct_write_schedule_segment': fs/nfs/direct.c:622:11: warning: comparison of distinct pointer types lacks a cast [enabled by default] fs/nfs/direct.c:622:11: warning: comparison of distinct pointer types lacks a cast [enabled by default] fs/nfs/direct.c:622:11: warning: comparison of distinct pointer types lacks a cast [enabled by default] fs/nfs/direct.c:650:27: warning: comparison of distinct pointer types lacks a cast [enabled by default] Reported-by: Stephen Rothwell Signed-off-by: Trond Myklebust Cc: Fred Isaman Cc: Stephen Rothwell diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index aab3016..dca9c81 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -322,7 +322,7 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *de int i; pgbase = user_addr & ~PAGE_MASK; - bytes = min(max(rsize, PAGE_SIZE), count); + bytes = min(max_t(size_t, rsize, PAGE_SIZE), count); result = -ENOMEM; npages = nfs_page_array_len(pgbase, bytes); @@ -349,7 +349,7 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *de for (i = 0; i < npages; i++) { struct nfs_page *req; - unsigned int req_len = min(bytes, PAGE_SIZE - pgbase); + unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase); /* XXX do we need to do the eof zeroing found in async_filler? */ req = nfs_create_request(dreq->ctx, dreq->inode, pagevec[i], @@ -619,7 +619,7 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_pageio_descriptor *d int i; pgbase = user_addr & ~PAGE_MASK; - bytes = min(max(wsize, PAGE_SIZE), count); + bytes = min(max_t(size_t, wsize, PAGE_SIZE), count); result = -ENOMEM; npages = nfs_page_array_len(pgbase, bytes); @@ -647,7 +647,7 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_pageio_descriptor *d for (i = 0; i < npages; i++) { struct nfs_page *req; - unsigned int req_len = min(bytes, PAGE_SIZE - pgbase); + unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase); req = nfs_create_request(dreq->ctx, dreq->inode, pagevec[i], -- cgit v0.10.2 From 1385b8117325e79f74c1e7d1cbf45c789deb85c5 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 4 May 2012 13:54:24 -0400 Subject: NFS: Fix sparse warnings Fix the following sparse warnings: fs/nfs/direct.c:221:6: warning: symbol 'nfs_direct_readpage_release' was not declared. Should it be static? fs/nfs/read.c:38:43: warning: non-ANSI function declaration of function 'nfs_readhdr_alloc' fs/nfs/objlayout/objio_osd.c:214:5: warning: symbol '__alloc_objio_seg' was not declared. Should it be static? Reported-by: Dan Carpenter Signed-off-by: Trond Myklebust Cc: Fred Isaman Cc: Boaz Harrosh diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index dca9c81..257d009 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -218,7 +218,7 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq) nfs_direct_req_release(dreq); } -void nfs_direct_readpage_release(struct nfs_page *req) +static void nfs_direct_readpage_release(struct nfs_page *req) { dprintk("NFS: direct read done (%s/%lld %d@%lld)\n", req->wb_context->dentry->d_inode->i_sb->s_id, diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index fbf4874..b47277ba 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c @@ -211,7 +211,7 @@ static void copy_single_comp(struct ore_components *oc, unsigned c, memcpy(ocomp->cred, src_comp->oc_cap.cred, sizeof(ocomp->cred)); } -int __alloc_objio_seg(unsigned numdevs, gfp_t gfp_flags, +static int __alloc_objio_seg(unsigned numdevs, gfp_t gfp_flags, struct objio_segment **pseg) { /* This is the in memory structure of the objio_segment diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 37c9eb2..f23cf25 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -35,7 +35,7 @@ static const struct nfs_pgio_completion_ops nfs_async_read_completion_ops; static struct kmem_cache *nfs_rdata_cachep; -struct nfs_read_header *nfs_readhdr_alloc() +struct nfs_read_header *nfs_readhdr_alloc(void) { struct nfs_read_header *rhdr; -- cgit v0.10.2 From a2a8e1bf210aa1d9dc6f36f4d612e3d775b83694 Mon Sep 17 00:00:00 2001 From: Edward Shao Date: Fri, 27 Apr 2012 11:14:44 +0800 Subject: kbuild: Makefile: remove unnecessary check for m68knommu ARCH ARCH is never set to m68knomm. make ARCH=m68knomm is not supported anymore. Signed-off-by: Edward Shao Acked-by: Sam Ravnborg Acked-by: Geert Uytterhoeven Acked-by: Greg Ungerer Signed-off-by: Michal Marek diff --git a/Makefile b/Makefile index 5e637c2..b2b1a3f 100644 --- a/Makefile +++ b/Makefile @@ -231,10 +231,6 @@ endif # Where to locate arch specific headers hdr-arch := $(SRCARCH) -ifeq ($(ARCH),m68knommu) - hdr-arch := m68k -endif - KCONFIG_CONFIG ?= .config export KCONFIG_CONFIG -- cgit v0.10.2 From 5efe241eac80bb534fed0a965684c2d7527af5bf Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 26 Apr 2012 01:51:32 -0700 Subject: kconfig: Add error handling to KCONFIG_ALLCONFIG - Only try to read the file specified if KCONFIG_ALL_CONFIG is set to something other than the empty string or "1". - Don't use stat to check the name passed to conf_read_simple so that zconf_fopen can find the file in the current directory or in SRCTREE removing a extremely source of confusing failure, where KCONFIG_ALL_CONFIG was not interpreted with respect to the directory make was called in. - If conf_read_simple fails complain clearly and stop processing. Allowing the simple debugging of typos. - Clearly document the behavior so it is clear to users which values are treated as flags and which values are treated as filenames. Signed-off-by: Eric W. Biederman Signed-off-by: Michal Marek diff --git a/Documentation/kbuild/kconfig.txt b/Documentation/kbuild/kconfig.txt index 9d5f2a9..a09f1a6 100644 --- a/Documentation/kbuild/kconfig.txt +++ b/Documentation/kbuild/kconfig.txt @@ -53,15 +53,15 @@ KCONFIG_ALLCONFIG -------------------------------------------------- (partially based on lkml email from/by Rob Landley, re: miniconfig) -------------------------------------------------- -The allyesconfig/allmodconfig/allnoconfig/randconfig variants can -also use the environment variable KCONFIG_ALLCONFIG as a flag or a -filename that contains config symbols that the user requires to be -set to a specific value. If KCONFIG_ALLCONFIG is used without a -filename, "make *config" checks for a file named -"all{yes/mod/no/def/random}.config" (corresponding to the *config command -that was used) for symbol values that are to be forced. If this file -is not found, it checks for a file named "all.config" to contain forced -values. +The allyesconfig/allmodconfig/allnoconfig/randconfig variants can also +use the environment variable KCONFIG_ALLCONFIG as a flag or a filename +that contains config symbols that the user requires to be set to a +specific value. If KCONFIG_ALLCONFIG is used without a filename where +KCONFIG_ALLCONFIG == "" or KCONFIG_ALLCONFIG == "1", "make *config" +checks for a file named "all{yes/mod/no/def/random}.config" +(corresponding to the *config command that was used) for symbol values +that are to be forced. If this file is not found, it checks for a +file named "all.config" to contain forced values. This enables you to create "miniature" config (miniconfig) or custom config files containing just the config symbols that you are interested diff --git a/scripts/kconfig/conf.c b/scripts/kconfig/conf.c index f208f90..0fdda91 100644 --- a/scripts/kconfig/conf.c +++ b/scripts/kconfig/conf.c @@ -574,8 +574,13 @@ int main(int ac, char **av) case alldefconfig: case randconfig: name = getenv("KCONFIG_ALLCONFIG"); - if (name && !stat(name, &tmpstat)) { - conf_read_simple(name, S_DEF_USER); + if (name && (strcmp(name, "") != 0) && (strcmp(name, "1") != 0)) { + if (conf_read_simple(name, S_DEF_USER)) { + fprintf(stderr, + _("*** Can't read seed configuration \"%s\"!\n"), + name); + exit(1); + } break; } switch (input_mode) { @@ -586,10 +591,13 @@ int main(int ac, char **av) case randconfig: name = "allrandom.config"; break; default: break; } - if (!stat(name, &tmpstat)) - conf_read_simple(name, S_DEF_USER); - else if (!stat("all.config", &tmpstat)) - conf_read_simple("all.config", S_DEF_USER); + if (conf_read_simple(name, S_DEF_USER) && + conf_read_simple("all.config", S_DEF_USER)) { + fprintf(stderr, + _("*** KCONFIG_ALLCONFIG set, but no \"%s\" or \"all.config\" file found\n"), + name); + exit(1); + } break; default: break; -- cgit v0.10.2 From 3aa3e8407ae9023c5ff59bf5c81fc2553c31eb70 Mon Sep 17 00:00:00 2001 From: Shawn Guo Date: Thu, 26 Apr 2012 09:45:39 +0800 Subject: ARM: davinci: use machine specific hook for late init Cc: Kevin Hilman Signed-off-by: Shawn Guo Acked-by: Sekhar Nori diff --git a/arch/arm/mach-davinci/board-da830-evm.c b/arch/arm/mach-davinci/board-da830-evm.c index dc1afe5..0031864 100644 --- a/arch/arm/mach-davinci/board-da830-evm.c +++ b/arch/arm/mach-davinci/board-da830-evm.c @@ -681,6 +681,7 @@ MACHINE_START(DAVINCI_DA830_EVM, "DaVinci DA830/OMAP-L137/AM17x EVM") .init_irq = cp_intc_init, .timer = &davinci_timer, .init_machine = da830_evm_init, + .init_late = davinci_init_late, .dma_zone_size = SZ_128M, .restart = da8xx_restart, MACHINE_END diff --git a/arch/arm/mach-davinci/board-da850-evm.c b/arch/arm/mach-davinci/board-da850-evm.c index a70de24..280cbed 100644 --- a/arch/arm/mach-davinci/board-da850-evm.c +++ b/arch/arm/mach-davinci/board-da850-evm.c @@ -1411,6 +1411,7 @@ MACHINE_START(DAVINCI_DA850_EVM, "DaVinci DA850/OMAP-L138/AM18x EVM") .init_irq = cp_intc_init, .timer = &davinci_timer, .init_machine = da850_evm_init, + .init_late = davinci_init_late, .dma_zone_size = SZ_128M, .restart = da8xx_restart, MACHINE_END diff --git a/arch/arm/mach-davinci/board-dm355-evm.c b/arch/arm/mach-davinci/board-dm355-evm.c index 82ed753..1c7b1f4 100644 --- a/arch/arm/mach-davinci/board-dm355-evm.c +++ b/arch/arm/mach-davinci/board-dm355-evm.c @@ -357,6 +357,7 @@ MACHINE_START(DAVINCI_DM355_EVM, "DaVinci DM355 EVM") .init_irq = davinci_irq_init, .timer = &davinci_timer, .init_machine = dm355_evm_init, + .init_late = davinci_init_late, .dma_zone_size = SZ_128M, .restart = davinci_restart, MACHINE_END diff --git a/arch/arm/mach-davinci/board-dm355-leopard.c b/arch/arm/mach-davinci/board-dm355-leopard.c index d74a8b3..8e77032 100644 --- a/arch/arm/mach-davinci/board-dm355-leopard.c +++ b/arch/arm/mach-davinci/board-dm355-leopard.c @@ -276,6 +276,7 @@ MACHINE_START(DM355_LEOPARD, "DaVinci DM355 leopard") .init_irq = davinci_irq_init, .timer = &davinci_timer, .init_machine = dm355_leopard_init, + .init_late = davinci_init_late, .dma_zone_size = SZ_128M, .restart = davinci_restart, MACHINE_END diff --git a/arch/arm/mach-davinci/board-dm365-evm.c b/arch/arm/mach-davinci/board-dm365-evm.c index 5bce2b8..688a9c5 100644 --- a/arch/arm/mach-davinci/board-dm365-evm.c +++ b/arch/arm/mach-davinci/board-dm365-evm.c @@ -618,6 +618,7 @@ MACHINE_START(DAVINCI_DM365_EVM, "DaVinci DM365 EVM") .init_irq = davinci_irq_init, .timer = &davinci_timer, .init_machine = dm365_evm_init, + .init_late = davinci_init_late, .dma_zone_size = SZ_128M, .restart = davinci_restart, MACHINE_END diff --git a/arch/arm/mach-davinci/board-dm644x-evm.c b/arch/arm/mach-davinci/board-dm644x-evm.c index 3683306..d34ed55 100644 --- a/arch/arm/mach-davinci/board-dm644x-evm.c +++ b/arch/arm/mach-davinci/board-dm644x-evm.c @@ -825,6 +825,7 @@ MACHINE_START(DAVINCI_EVM, "DaVinci DM644x EVM") .init_irq = davinci_irq_init, .timer = &davinci_timer, .init_machine = davinci_evm_init, + .init_late = davinci_init_late, .dma_zone_size = SZ_128M, .restart = davinci_restart, MACHINE_END diff --git a/arch/arm/mach-davinci/board-dm646x-evm.c b/arch/arm/mach-davinci/board-dm646x-evm.c index d72ab94..958679a 100644 --- a/arch/arm/mach-davinci/board-dm646x-evm.c +++ b/arch/arm/mach-davinci/board-dm646x-evm.c @@ -788,6 +788,7 @@ MACHINE_START(DAVINCI_DM6467_EVM, "DaVinci DM646x EVM") .init_irq = davinci_irq_init, .timer = &davinci_timer, .init_machine = evm_init, + .init_late = davinci_init_late, .dma_zone_size = SZ_128M, .restart = davinci_restart, MACHINE_END @@ -798,6 +799,7 @@ MACHINE_START(DAVINCI_DM6467TEVM, "DaVinci DM6467T EVM") .init_irq = davinci_irq_init, .timer = &davinci_timer, .init_machine = evm_init, + .init_late = davinci_init_late, .dma_zone_size = SZ_128M, .restart = davinci_restart, MACHINE_END diff --git a/arch/arm/mach-davinci/board-mityomapl138.c b/arch/arm/mach-davinci/board-mityomapl138.c index 672d820..beecde3 100644 --- a/arch/arm/mach-davinci/board-mityomapl138.c +++ b/arch/arm/mach-davinci/board-mityomapl138.c @@ -572,6 +572,7 @@ MACHINE_START(MITYOMAPL138, "MityDSP-L138/MityARM-1808") .init_irq = cp_intc_init, .timer = &davinci_timer, .init_machine = mityomapl138_init, + .init_late = davinci_init_late, .dma_zone_size = SZ_128M, .restart = da8xx_restart, MACHINE_END diff --git a/arch/arm/mach-davinci/board-neuros-osd2.c b/arch/arm/mach-davinci/board-neuros-osd2.c index a772bb4..5de69f2 100644 --- a/arch/arm/mach-davinci/board-neuros-osd2.c +++ b/arch/arm/mach-davinci/board-neuros-osd2.c @@ -278,6 +278,7 @@ MACHINE_START(NEUROS_OSD2, "Neuros OSD2") .init_irq = davinci_irq_init, .timer = &davinci_timer, .init_machine = davinci_ntosd2_init, + .init_late = davinci_init_late, .dma_zone_size = SZ_128M, .restart = davinci_restart, MACHINE_END diff --git a/arch/arm/mach-davinci/board-omapl138-hawk.c b/arch/arm/mach-davinci/board-omapl138-hawk.c index 45e8157..dc1208e 100644 --- a/arch/arm/mach-davinci/board-omapl138-hawk.c +++ b/arch/arm/mach-davinci/board-omapl138-hawk.c @@ -343,6 +343,7 @@ MACHINE_START(OMAPL138_HAWKBOARD, "AM18x/OMAP-L138 Hawkboard") .init_irq = cp_intc_init, .timer = &davinci_timer, .init_machine = omapl138_hawk_init, + .init_late = davinci_init_late, .dma_zone_size = SZ_128M, .restart = da8xx_restart, MACHINE_END diff --git a/arch/arm/mach-davinci/board-sffsdr.c b/arch/arm/mach-davinci/board-sffsdr.c index 76e67509..9078acf 100644 --- a/arch/arm/mach-davinci/board-sffsdr.c +++ b/arch/arm/mach-davinci/board-sffsdr.c @@ -157,6 +157,7 @@ MACHINE_START(SFFSDR, "Lyrtech SFFSDR") .init_irq = davinci_irq_init, .timer = &davinci_timer, .init_machine = davinci_sffsdr_init, + .init_late = davinci_init_late, .dma_zone_size = SZ_128M, .restart = davinci_restart, MACHINE_END diff --git a/arch/arm/mach-davinci/board-tnetv107x-evm.c b/arch/arm/mach-davinci/board-tnetv107x-evm.c index 5f14e30..ac4e003 100644 --- a/arch/arm/mach-davinci/board-tnetv107x-evm.c +++ b/arch/arm/mach-davinci/board-tnetv107x-evm.c @@ -282,6 +282,7 @@ MACHINE_START(TNETV107X, "TNETV107X EVM") .init_irq = cp_intc_init, .timer = &davinci_timer, .init_machine = tnetv107x_evm_board_init, + .init_late = davinci_init_late, .dma_zone_size = SZ_128M, .restart = tnetv107x_restart, MACHINE_END diff --git a/arch/arm/mach-davinci/clock.c b/arch/arm/mach-davinci/clock.c index 008772e..34668ea 100644 --- a/arch/arm/mach-davinci/clock.c +++ b/arch/arm/mach-davinci/clock.c @@ -213,7 +213,7 @@ EXPORT_SYMBOL(clk_unregister); /* * Disable any unused clocks left on by the bootloader */ -static int __init clk_disable_unused(void) +int __init davinci_clk_disable_unused(void) { struct clk *ck; @@ -237,7 +237,6 @@ static int __init clk_disable_unused(void) return 0; } -late_initcall(clk_disable_unused); #endif static unsigned long clk_sysclk_recalc(struct clk *clk) diff --git a/arch/arm/mach-davinci/common.c b/arch/arm/mach-davinci/common.c index cb9b2e4..64b0f65 100644 --- a/arch/arm/mach-davinci/common.c +++ b/arch/arm/mach-davinci/common.c @@ -117,3 +117,10 @@ void __init davinci_common_init(struct davinci_soc_info *soc_info) err: panic("davinci_common_init: SoC Initialization failed\n"); } + +void __init davinci_init_late(void) +{ + davinci_cpufreq_init(); + davinci_pm_init(); + davinci_clk_disable_unused(); +} diff --git a/arch/arm/mach-davinci/cpufreq.c b/arch/arm/mach-davinci/cpufreq.c index 031048f..4729eaa 100644 --- a/arch/arm/mach-davinci/cpufreq.c +++ b/arch/arm/mach-davinci/cpufreq.c @@ -240,10 +240,9 @@ static struct platform_driver davinci_cpufreq_driver = { .remove = __exit_p(davinci_cpufreq_remove), }; -static int __init davinci_cpufreq_init(void) +int __init davinci_cpufreq_init(void) { return platform_driver_probe(&davinci_cpufreq_driver, davinci_cpufreq_probe); } -late_initcall(davinci_cpufreq_init); diff --git a/arch/arm/mach-davinci/include/mach/common.h b/arch/arm/mach-davinci/include/mach/common.h index 5cd39a4..bdc4aa8 100644 --- a/arch/arm/mach-davinci/include/mach/common.h +++ b/arch/arm/mach-davinci/include/mach/common.h @@ -84,6 +84,25 @@ extern struct davinci_soc_info davinci_soc_info; extern void davinci_common_init(struct davinci_soc_info *soc_info); extern void davinci_init_ide(void); void davinci_restart(char mode, const char *cmd); +void davinci_init_late(void); + +#ifdef CONFIG_DAVINCI_RESET_CLOCKS +int davinci_clk_disable_unused(void); +#else +static inline int davinci_clk_disable_unused(void) { return 0; } +#endif + +#ifdef CONFIG_CPU_FREQ +int davinci_cpufreq_init(void); +#else +static inline int davinci_cpufreq_init(void) { return 0; } +#endif + +#ifdef CONFIG_SUSPEND +int davinci_pm_init(void); +#else +static inline int davinci_pm_init(void) { return 0; } +#endif /* standard place to map on-chip SRAMs; they *may* support DMA */ #define SRAM_VIRT 0xfffe0000 diff --git a/arch/arm/mach-davinci/pm.c b/arch/arm/mach-davinci/pm.c index 04c49f7..eb8360b 100644 --- a/arch/arm/mach-davinci/pm.c +++ b/arch/arm/mach-davinci/pm.c @@ -152,8 +152,7 @@ static struct platform_driver davinci_pm_driver = { .remove = __exit_p(davinci_pm_remove), }; -static int __init davinci_pm_init(void) +int __init davinci_pm_init(void) { return platform_driver_probe(&davinci_pm_driver, davinci_pm_probe); } -late_initcall(davinci_pm_init); -- cgit v0.10.2 From c914283f5e5f55ddf770b8632a5e65e5aa24343e Mon Sep 17 00:00:00 2001 From: Shawn Guo Date: Thu, 26 Apr 2012 10:05:15 +0800 Subject: ARM: ep93xx: use machine specific hook for late init Cc: Ryan Mallon Signed-off-by: Shawn Guo Acked-by: H Hartley Sweeten diff --git a/arch/arm/mach-ep93xx/adssphere.c b/arch/arm/mach-ep93xx/adssphere.c index 2d45947..a472777 100644 --- a/arch/arm/mach-ep93xx/adssphere.c +++ b/arch/arm/mach-ep93xx/adssphere.c @@ -41,5 +41,6 @@ MACHINE_START(ADSSPHERE, "ADS Sphere board") .handle_irq = vic_handle_irq, .timer = &ep93xx_timer, .init_machine = adssphere_init_machine, + .init_late = ep93xx_init_late, .restart = ep93xx_restart, MACHINE_END diff --git a/arch/arm/mach-ep93xx/core.c b/arch/arm/mach-ep93xx/core.c index 8d25895..365a90b 100644 --- a/arch/arm/mach-ep93xx/core.c +++ b/arch/arm/mach-ep93xx/core.c @@ -904,3 +904,8 @@ void ep93xx_restart(char mode, const char *cmd) while (1) ; } + +void __init ep93xx_init_late(void) +{ + crunch_init(); +} diff --git a/arch/arm/mach-ep93xx/crunch.c b/arch/arm/mach-ep93xx/crunch.c index 74753e2..a4a2ab9 100644 --- a/arch/arm/mach-ep93xx/crunch.c +++ b/arch/arm/mach-ep93xx/crunch.c @@ -79,12 +79,10 @@ static struct notifier_block crunch_notifier_block = { .notifier_call = crunch_do, }; -static int __init crunch_init(void) +int __init crunch_init(void) { thread_register_notifier(&crunch_notifier_block); elf_hwcap |= HWCAP_CRUNCH; return 0; } - -late_initcall(crunch_init); diff --git a/arch/arm/mach-ep93xx/edb93xx.c b/arch/arm/mach-ep93xx/edb93xx.c index da9047d..d74c5cd 100644 --- a/arch/arm/mach-ep93xx/edb93xx.c +++ b/arch/arm/mach-ep93xx/edb93xx.c @@ -255,6 +255,7 @@ MACHINE_START(EDB9301, "Cirrus Logic EDB9301 Evaluation Board") .handle_irq = vic_handle_irq, .timer = &ep93xx_timer, .init_machine = edb93xx_init_machine, + .init_late = ep93xx_init_late, .restart = ep93xx_restart, MACHINE_END #endif @@ -268,6 +269,7 @@ MACHINE_START(EDB9302, "Cirrus Logic EDB9302 Evaluation Board") .handle_irq = vic_handle_irq, .timer = &ep93xx_timer, .init_machine = edb93xx_init_machine, + .init_late = ep93xx_init_late, .restart = ep93xx_restart, MACHINE_END #endif @@ -281,6 +283,7 @@ MACHINE_START(EDB9302A, "Cirrus Logic EDB9302A Evaluation Board") .handle_irq = vic_handle_irq, .timer = &ep93xx_timer, .init_machine = edb93xx_init_machine, + .init_late = ep93xx_init_late, .restart = ep93xx_restart, MACHINE_END #endif @@ -294,6 +297,7 @@ MACHINE_START(EDB9307, "Cirrus Logic EDB9307 Evaluation Board") .handle_irq = vic_handle_irq, .timer = &ep93xx_timer, .init_machine = edb93xx_init_machine, + .init_late = ep93xx_init_late, .restart = ep93xx_restart, MACHINE_END #endif @@ -307,6 +311,7 @@ MACHINE_START(EDB9307A, "Cirrus Logic EDB9307A Evaluation Board") .handle_irq = vic_handle_irq, .timer = &ep93xx_timer, .init_machine = edb93xx_init_machine, + .init_late = ep93xx_init_late, .restart = ep93xx_restart, MACHINE_END #endif @@ -320,6 +325,7 @@ MACHINE_START(EDB9312, "Cirrus Logic EDB9312 Evaluation Board") .handle_irq = vic_handle_irq, .timer = &ep93xx_timer, .init_machine = edb93xx_init_machine, + .init_late = ep93xx_init_late, .restart = ep93xx_restart, MACHINE_END #endif @@ -333,6 +339,7 @@ MACHINE_START(EDB9315, "Cirrus Logic EDB9315 Evaluation Board") .handle_irq = vic_handle_irq, .timer = &ep93xx_timer, .init_machine = edb93xx_init_machine, + .init_late = ep93xx_init_late, .restart = ep93xx_restart, MACHINE_END #endif @@ -346,6 +353,7 @@ MACHINE_START(EDB9315A, "Cirrus Logic EDB9315A Evaluation Board") .handle_irq = vic_handle_irq, .timer = &ep93xx_timer, .init_machine = edb93xx_init_machine, + .init_late = ep93xx_init_late, .restart = ep93xx_restart, MACHINE_END #endif diff --git a/arch/arm/mach-ep93xx/gesbc9312.c b/arch/arm/mach-ep93xx/gesbc9312.c index fcdffbe..437c341 100644 --- a/arch/arm/mach-ep93xx/gesbc9312.c +++ b/arch/arm/mach-ep93xx/gesbc9312.c @@ -41,5 +41,6 @@ MACHINE_START(GESBC9312, "Glomation GESBC-9312-sx") .handle_irq = vic_handle_irq, .timer = &ep93xx_timer, .init_machine = gesbc9312_init_machine, + .init_late = ep93xx_init_late, .restart = ep93xx_restart, MACHINE_END diff --git a/arch/arm/mach-ep93xx/include/mach/platform.h b/arch/arm/mach-ep93xx/include/mach/platform.h index 602bd87..1ecb040 100644 --- a/arch/arm/mach-ep93xx/include/mach/platform.h +++ b/arch/arm/mach-ep93xx/include/mach/platform.h @@ -53,5 +53,12 @@ void ep93xx_init_devices(void); extern struct sys_timer ep93xx_timer; void ep93xx_restart(char, const char *); +void ep93xx_init_late(void); + +#ifdef CONFIG_CRUNCH +int crunch_init(void); +#else +static inline int crunch_init(void) { return 0; } +#endif #endif diff --git a/arch/arm/mach-ep93xx/micro9.c b/arch/arm/mach-ep93xx/micro9.c index dc431c5..3d7cdab 100644 --- a/arch/arm/mach-ep93xx/micro9.c +++ b/arch/arm/mach-ep93xx/micro9.c @@ -85,6 +85,7 @@ MACHINE_START(MICRO9, "Contec Micro9-High") .handle_irq = vic_handle_irq, .timer = &ep93xx_timer, .init_machine = micro9_init_machine, + .init_late = ep93xx_init_late, .restart = ep93xx_restart, MACHINE_END #endif @@ -98,6 +99,7 @@ MACHINE_START(MICRO9M, "Contec Micro9-Mid") .handle_irq = vic_handle_irq, .timer = &ep93xx_timer, .init_machine = micro9_init_machine, + .init_late = ep93xx_init_late, .restart = ep93xx_restart, MACHINE_END #endif @@ -111,6 +113,7 @@ MACHINE_START(MICRO9L, "Contec Micro9-Lite") .handle_irq = vic_handle_irq, .timer = &ep93xx_timer, .init_machine = micro9_init_machine, + .init_late = ep93xx_init_late, .restart = ep93xx_restart, MACHINE_END #endif @@ -124,6 +127,7 @@ MACHINE_START(MICRO9S, "Contec Micro9-Slim") .handle_irq = vic_handle_irq, .timer = &ep93xx_timer, .init_machine = micro9_init_machine, + .init_late = ep93xx_init_late, .restart = ep93xx_restart, MACHINE_END #endif diff --git a/arch/arm/mach-ep93xx/simone.c b/arch/arm/mach-ep93xx/simone.c index f40c298..33dc079 100644 --- a/arch/arm/mach-ep93xx/simone.c +++ b/arch/arm/mach-ep93xx/simone.c @@ -86,5 +86,6 @@ MACHINE_START(SIM_ONE, "Simplemachines Sim.One Board") .handle_irq = vic_handle_irq, .timer = &ep93xx_timer, .init_machine = simone_init_machine, + .init_late = ep93xx_init_late, .restart = ep93xx_restart, MACHINE_END diff --git a/arch/arm/mach-ep93xx/snappercl15.c b/arch/arm/mach-ep93xx/snappercl15.c index 0c00852..eb28237 100644 --- a/arch/arm/mach-ep93xx/snappercl15.c +++ b/arch/arm/mach-ep93xx/snappercl15.c @@ -183,5 +183,6 @@ MACHINE_START(SNAPPER_CL15, "Bluewater Systems Snapper CL15") .handle_irq = vic_handle_irq, .timer = &ep93xx_timer, .init_machine = snappercl15_init_machine, + .init_late = ep93xx_init_late, .restart = ep93xx_restart, MACHINE_END diff --git a/arch/arm/mach-ep93xx/ts72xx.c b/arch/arm/mach-ep93xx/ts72xx.c index 5ea7909..d4ef339 100644 --- a/arch/arm/mach-ep93xx/ts72xx.c +++ b/arch/arm/mach-ep93xx/ts72xx.c @@ -252,5 +252,6 @@ MACHINE_START(TS72XX, "Technologic Systems TS-72xx SBC") .handle_irq = vic_handle_irq, .timer = &ep93xx_timer, .init_machine = ts72xx_init_machine, + .init_late = ep93xx_init_late, .restart = ep93xx_restart, MACHINE_END diff --git a/arch/arm/mach-ep93xx/vision_ep9307.c b/arch/arm/mach-ep93xx/vision_ep9307.c index ba156eb..2905a49 100644 --- a/arch/arm/mach-ep93xx/vision_ep9307.c +++ b/arch/arm/mach-ep93xx/vision_ep9307.c @@ -367,5 +367,6 @@ MACHINE_START(VISION_EP9307, "Vision Engraving Systems EP9307") .handle_irq = vic_handle_irq, .timer = &ep93xx_timer, .init_machine = vision_init_machine, + .init_late = ep93xx_init_late, .restart = ep93xx_restart, MACHINE_END -- cgit v0.10.2 From 275eb135cfa2980d4ed1ec24746e05ca42f8418b Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Sat, 5 May 2012 10:18:39 +0200 Subject: kbuild: drop unused KBUILD_VMLINUX_OBJS from top-level Makefile Signed-off-by: Sam Ravnborg Signed-off-by: Michal Marek diff --git a/Makefile b/Makefile index b2b1a3f..06aff74 100644 --- a/Makefile +++ b/Makefile @@ -754,7 +754,6 @@ vmlinux-init := $(head-y) $(init-y) vmlinux-main := $(core-y) $(libs-y) $(drivers-y) $(net-y) vmlinux-all := $(vmlinux-init) $(vmlinux-main) vmlinux-lds := arch/$(SRCARCH)/kernel/vmlinux.lds -export KBUILD_VMLINUX_OBJS := $(vmlinux-all) # Rule to link vmlinux - also used during CONFIG_KALLSYMS # May be overridden by arch/$(ARCH)/Makefile -- cgit v0.10.2 From 95698570510b7be9ab1542a4a908242c05a9b0ed Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Sat, 5 May 2012 10:18:40 +0200 Subject: kbuild: refactor final link of sparc32 sparc32 uses an additional final link to support btfix. Introduce a new set of exported variables in the top-level Makefile to make the extra linking step simpler. sparc32 has hardcoded knowledge of kallsyms support. This fix include support for EXTRA_KALLSYM_PASS=1. The ugly part is that it is hardcoded in the arch/sparc/boot Makefile. Signed-off-by: Sam Ravnborg Cc: "David S. Miller" Signed-off-by: Michal Marek diff --git a/Makefile b/Makefile index 06aff74..7ea0c07 100644 --- a/Makefile +++ b/Makefile @@ -723,6 +723,11 @@ libs-y1 := $(patsubst %/, %/lib.a, $(libs-y)) libs-y2 := $(patsubst %/, %/built-in.o, $(libs-y)) libs-y := $(libs-y1) $(libs-y2) +# externally visible symbols +export KBUILD_VMLINUX_INIT := $(head-y) $(init-y) +export KBUILD_VMLINUX_MAIN := $(core-y) $(libs-y) $(drivers-y) $(net-y) +export KBUILD_LDS := arch/$(SRCARCH)/kernel/vmlinux.lds + # Build vmlinux # --------------------------------------------------------------------------- # vmlinux is built from the objects selected by $(vmlinux-init) and diff --git a/arch/sparc/Makefile b/arch/sparc/Makefile index eddcfb3..3195f77 100644 --- a/arch/sparc/Makefile +++ b/arch/sparc/Makefile @@ -72,17 +72,6 @@ libs-y += arch/sparc/lib/ drivers-$(CONFIG_OPROFILE) += arch/sparc/oprofile/ -# Export what is needed by arch/sparc/boot/Makefile -export VMLINUX_INIT VMLINUX_MAIN -VMLINUX_INIT := $(head-y) $(init-y) -VMLINUX_MAIN := $(core-y) kernel/ mm/ fs/ ipc/ security/ crypto/ block/ -VMLINUX_MAIN += $(patsubst %/, %/lib.a, $(libs-y)) $(libs-y) -VMLINUX_MAIN += $(drivers-y) $(net-y) - -ifdef CONFIG_KALLSYMS -export kallsyms.o := .tmp_kallsyms2.o -endif - boot := arch/sparc/boot # Default target diff --git a/arch/sparc/boot/Makefile b/arch/sparc/boot/Makefile index d56d199..78f1994 100644 --- a/arch/sparc/boot/Makefile +++ b/arch/sparc/boot/Makefile @@ -39,11 +39,15 @@ define rule_image echo 'cmd_$@ := $(cmd_image)' > $(@D)/.$(@F).cmd endef -BTOBJS := $(patsubst %/, %/built-in.o, $(VMLINUX_INIT)) -BTLIBS := $(patsubst %/, %/built-in.o, $(VMLINUX_MAIN)) -LDFLAGS_image := -T arch/sparc/kernel/vmlinux.lds $(BTOBJS) \ - --start-group $(BTLIBS) --end-group \ - $(kallsyms.o) $(obj)/btfix.o +# Support for kallsyms +kallsyms-$(CONFIG_KALLSYMS) := .tmp_kallsyms2.o +ifdef KALLSYMS_EXTRA_PASS + kallsyms-$(CONFIG_KALLSYMS) := .tmp_kallsyms3.o +endif + +LDFLAGS_image := -T $(KBUILD_LDS) $(KBUILD_VMLINUX_INIT) \ + --start-group $(KBUILD_VMLINUX_MAIN) --end-group \ + $(kallsyms-y) $(obj)/btfix.o # Link the final image including btfixup'ed symbols. # This is a replacement for the link done in the top-level Makefile. -- cgit v0.10.2 From 1f2bfbd00e466ff3489b2ca5cc75b1cccd14c123 Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Sat, 5 May 2012 10:18:41 +0200 Subject: kbuild: link of vmlinux moved to a script Move the final link of vmlinux to a script to improve readability and maintainability of the code. The Makefile fragments used to link vmlinux has over the years seen far too many changes and the logic had become hard to follow. As the process by nature is serialized there was nothing gained including this in the Makefile. "um" has special link requirments - and the only way to handle this was to hard-code the linking of "um" in the script. This was better than trying to modularize it only for the benefit of "um" anyway. The shell script has been improved after input from: Arnaud Lacombe Nick Bowler Signed-off-by: Sam Ravnborg Cc: Arnaud Lacombe Cc: Nick Bowler Cc: Richard Weinberger Signed-off-by: Michal Marek diff --git a/Makefile b/Makefile index 7ea0c07..a7cf6a1 100644 --- a/Makefile +++ b/Makefile @@ -337,7 +337,6 @@ AWK = awk GENKSYMS = scripts/genksyms/genksyms INSTALLKERNEL := installkernel DEPMOD = /sbin/depmod -KALLSYMS = scripts/kallsyms PERL = perl CHECK = sparse @@ -723,191 +722,21 @@ libs-y1 := $(patsubst %/, %/lib.a, $(libs-y)) libs-y2 := $(patsubst %/, %/built-in.o, $(libs-y)) libs-y := $(libs-y1) $(libs-y2) -# externally visible symbols +# Externally visible symbols (used by link-vmlinux.sh) export KBUILD_VMLINUX_INIT := $(head-y) $(init-y) export KBUILD_VMLINUX_MAIN := $(core-y) $(libs-y) $(drivers-y) $(net-y) export KBUILD_LDS := arch/$(SRCARCH)/kernel/vmlinux.lds +export LDFLAGS_vmlinux -# Build vmlinux -# --------------------------------------------------------------------------- -# vmlinux is built from the objects selected by $(vmlinux-init) and -# $(vmlinux-main). Most are built-in.o files from top-level directories -# in the kernel tree, others are specified in arch/$(ARCH)/Makefile. -# Ordering when linking is important, and $(vmlinux-init) must be first. -# -# vmlinux -# ^ -# | -# +-< $(vmlinux-init) -# | +--< init/version.o + more -# | -# +--< $(vmlinux-main) -# | +--< driver/built-in.o mm/built-in.o + more -# | -# +-< kallsyms.o (see description in CONFIG_KALLSYMS section) -# -# vmlinux version (uname -v) cannot be updated during normal -# descending-into-subdirs phase since we do not yet know if we need to -# update vmlinux. -# Therefore this step is delayed until just before final link of vmlinux - -# except in the kallsyms case where it is done just before adding the -# symbols to the kernel. -# -# System.map is generated to document addresses of all kernel symbols - -vmlinux-init := $(head-y) $(init-y) -vmlinux-main := $(core-y) $(libs-y) $(drivers-y) $(net-y) -vmlinux-all := $(vmlinux-init) $(vmlinux-main) -vmlinux-lds := arch/$(SRCARCH)/kernel/vmlinux.lds - -# Rule to link vmlinux - also used during CONFIG_KALLSYMS -# May be overridden by arch/$(ARCH)/Makefile -quiet_cmd_vmlinux__ ?= LD $@ - cmd_vmlinux__ ?= $(LD) $(LDFLAGS) $(LDFLAGS_vmlinux) -o $@ \ - -T $(vmlinux-lds) $(vmlinux-init) \ - --start-group $(vmlinux-main) --end-group \ - $(filter-out $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) vmlinux.o FORCE ,$^) - -# Generate new vmlinux version -quiet_cmd_vmlinux_version = GEN .version - cmd_vmlinux_version = set -e; \ - if [ ! -r .version ]; then \ - rm -f .version; \ - echo 1 >.version; \ - else \ - mv .version .old_version; \ - expr 0$$(cat .old_version) + 1 >.version; \ - fi; \ - $(MAKE) $(build)=init - -# Generate System.map -quiet_cmd_sysmap = SYSMAP - cmd_sysmap = $(CONFIG_SHELL) $(srctree)/scripts/mksysmap - -# Link of vmlinux -# If CONFIG_KALLSYMS is set .version is already updated -# Generate System.map and verify that the content is consistent -# Use + in front of the vmlinux_version rule to silent warning with make -j2 -# First command is ':' to allow us to use + in front of the rule -define rule_vmlinux__ - : - $(if $(CONFIG_KALLSYMS),,+$(call cmd,vmlinux_version)) - - $(call cmd,vmlinux__) - $(Q)echo 'cmd_$@ := $(cmd_vmlinux__)' > $(@D)/.$(@F).cmd - - $(Q)$(if $($(quiet)cmd_sysmap), \ - echo ' $($(quiet)cmd_sysmap) System.map' &&) \ - $(cmd_sysmap) $@ System.map; \ - if [ $$? -ne 0 ]; then \ - rm -f $@; \ - /bin/false; \ - fi; - $(verify_kallsyms) -endef - - -ifdef CONFIG_KALLSYMS -# Generate section listing all symbols and add it into vmlinux $(kallsyms.o) -# It's a three stage process: -# o .tmp_vmlinux1 has all symbols and sections, but __kallsyms is -# empty -# Running kallsyms on that gives us .tmp_kallsyms1.o with -# the right size - vmlinux version (uname -v) is updated during this step -# o .tmp_vmlinux2 now has a __kallsyms section of the right size, -# but due to the added section, some addresses have shifted. -# From here, we generate a correct .tmp_kallsyms2.o -# o The correct .tmp_kallsyms2.o is linked into the final vmlinux. -# o Verify that the System.map from vmlinux matches the map from -# .tmp_vmlinux2, just in case we did not generate kallsyms correctly. -# o If 'make KALLSYMS_EXTRA_PASS=1" was used, do an extra pass using -# .tmp_vmlinux3 and .tmp_kallsyms3.o. This is only meant as a -# temporary bypass to allow the kernel to be built while the -# maintainers work out what went wrong with kallsyms. - -last_kallsyms := 2 - -ifdef KALLSYMS_EXTRA_PASS -ifneq ($(KALLSYMS_EXTRA_PASS),0) -last_kallsyms := 3 -endif -endif - -kallsyms.o := .tmp_kallsyms$(last_kallsyms).o - -define verify_kallsyms - $(Q)$(if $($(quiet)cmd_sysmap), \ - echo ' $($(quiet)cmd_sysmap) .tmp_System.map' &&) \ - $(cmd_sysmap) .tmp_vmlinux$(last_kallsyms) .tmp_System.map - $(Q)cmp -s System.map .tmp_System.map || \ - (echo Inconsistent kallsyms data; \ - echo This is a bug - please report about it; \ - echo Try "make KALLSYMS_EXTRA_PASS=1" as a workaround; \ - rm .tmp_kallsyms* ; /bin/false ) -endef - -# Update vmlinux version before link -# Use + in front of this rule to silent warning about make -j1 -# First command is ':' to allow us to use + in front of this rule -cmd_ksym_ld = $(cmd_vmlinux__) -define rule_ksym_ld - : - +$(call cmd,vmlinux_version) - $(call cmd,vmlinux__) - $(Q)echo 'cmd_$@ := $(cmd_vmlinux__)' > $(@D)/.$(@F).cmd -endef - -# Generate .S file with all kernel symbols -quiet_cmd_kallsyms = KSYM $@ - cmd_kallsyms = $(NM) -n $< | $(KALLSYMS) \ - $(if $(CONFIG_KALLSYMS_ALL),--all-symbols) > $@ - -.tmp_kallsyms1.o .tmp_kallsyms2.o .tmp_kallsyms3.o: %.o: %.S scripts FORCE - $(call if_changed_dep,as_o_S) +vmlinux-deps := $(KBUILD_LDS) $(KBUILD_VMLINUX_INIT) $(KBUILD_VMLINUX_MAIN) -.tmp_kallsyms%.S: .tmp_vmlinux% $(KALLSYMS) - $(call cmd,kallsyms) +# Final link of vmlinux + cmd_link-vmlinux = $(CONFIG_SHELL) $< $(LD) $(LDFLAGS) $(LDFLAGS_vmlinux) +quiet_cmd_link-vmlinux = LINK $@ -# .tmp_vmlinux1 must be complete except kallsyms, so update vmlinux version -.tmp_vmlinux1: $(vmlinux-lds) $(vmlinux-all) FORCE - $(call if_changed_rule,ksym_ld) - -.tmp_vmlinux2: $(vmlinux-lds) $(vmlinux-all) .tmp_kallsyms1.o FORCE - $(call if_changed,vmlinux__) - -.tmp_vmlinux3: $(vmlinux-lds) $(vmlinux-all) .tmp_kallsyms2.o FORCE - $(call if_changed,vmlinux__) - -# Needs to visit scripts/ before $(KALLSYMS) can be used. -$(KALLSYMS): scripts ; - -# Generate some data for debugging strange kallsyms problems -debug_kallsyms: .tmp_map$(last_kallsyms) - -.tmp_map%: .tmp_vmlinux% FORCE - ($(OBJDUMP) -h $< | $(AWK) '/^ +[0-9]/{print $$4 " 0 " $$2}'; $(NM) $<) | sort > $@ - -.tmp_map3: .tmp_map2 - -.tmp_map2: .tmp_map1 - -endif # ifdef CONFIG_KALLSYMS - -# Do modpost on a prelinked vmlinux. The finally linked vmlinux has -# relevant sections renamed as per the linker script. -quiet_cmd_vmlinux-modpost = LD $@ - cmd_vmlinux-modpost = $(LD) $(LDFLAGS) -r -o $@ \ - $(vmlinux-init) --start-group $(vmlinux-main) --end-group \ - $(filter-out $(vmlinux-init) $(vmlinux-main) FORCE ,$^) -define rule_vmlinux-modpost - : - +$(call cmd,vmlinux-modpost) - $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modpost $@ - $(Q)echo 'cmd_$@ := $(cmd_vmlinux-modpost)' > $(dot-target).cmd -endef - -# vmlinux image - including updated kernel symbols -vmlinux: $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) vmlinux.o $(kallsyms.o) FORCE +# Include targets which we want to +# execute if the rest of the kernel build went well. +vmlinux: scripts/link-vmlinux.sh $(vmlinux-deps) FORCE ifdef CONFIG_HEADERS_CHECK $(Q)$(MAKE) -f $(srctree)/Makefile headers_check endif @@ -917,22 +746,11 @@ endif ifdef CONFIG_BUILD_DOCSRC $(Q)$(MAKE) $(build)=Documentation endif - $(call vmlinux-modpost) - $(call if_changed_rule,vmlinux__) - $(Q)rm -f .old_version - -# build vmlinux.o first to catch section mismatch errors early -ifdef CONFIG_KALLSYMS -.tmp_vmlinux1: vmlinux.o -endif - -modpost-init := $(filter-out init/built-in.o, $(vmlinux-init)) -vmlinux.o: $(modpost-init) $(vmlinux-main) FORCE - $(call if_changed_rule,vmlinux-modpost) + +$(call if_changed,link-vmlinux) # The actual objects are generated when descending, # make sure no implicit rule kicks in -$(sort $(vmlinux-init) $(vmlinux-main)) $(vmlinux-lds): $(vmlinux-dirs) ; +$(sort $(vmlinux-deps)): $(vmlinux-dirs) ; # Handle descending into subdirectories listed in $(vmlinux-dirs) # Preset locale variables to speed up the build process. Limit locale @@ -1156,8 +974,6 @@ endif # CONFIG_MODULES # Directories & files removed with 'make clean' CLEAN_DIRS += $(MODVERDIR) -CLEAN_FILES += vmlinux System.map \ - .tmp_kallsyms* .tmp_version .tmp_vmlinux* .tmp_System.map # Directories & files removed with 'make mrproper' MRPROPER_DIRS += include/config usr/include include/generated \ @@ -1403,6 +1219,7 @@ scripts: ; endif # KBUILD_EXTMOD clean: $(clean-dirs) + $(Q)$(CONFIG_SHELL) $(srctree)/scripts/link-vmlinux.sh clean $(call cmd,rmdirs) $(call cmd,rmfiles) @find $(if $(KBUILD_EXTMOD), $(KBUILD_EXTMOD), .) $(RCS_FIND_IGNORE) \ @@ -1536,14 +1353,6 @@ quiet_cmd_depmod = DEPMOD $(KERNELRELEASE) cmd_crmodverdir = $(Q)mkdir -p $(MODVERDIR) \ $(if $(KBUILD_MODULES),; rm -f $(MODVERDIR)/*) -a_flags = -Wp,-MD,$(depfile) $(KBUILD_AFLAGS) $(AFLAGS_KERNEL) \ - $(KBUILD_AFLAGS_KERNEL) \ - $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(KBUILD_CPPFLAGS) \ - $(modkern_aflags) $(EXTRA_AFLAGS) $(AFLAGS_$(basetarget).o) - -quiet_cmd_as_o_S = AS $@ -cmd_as_o_S = $(CC) $(a_flags) -c -o $@ $< - # read all saved command lines targets := $(wildcard $(sort $(targets))) diff --git a/arch/um/Makefile b/arch/um/Makefile index 55c0661..0970910 100644 --- a/arch/um/Makefile +++ b/arch/um/Makefile @@ -121,15 +121,8 @@ LINK_WRAPS = -Wl,--wrap,malloc -Wl,--wrap,free -Wl,--wrap,calloc LD_FLAGS_CMDLINE = $(foreach opt,$(LDFLAGS),-Wl,$(opt)) -CFLAGS_vmlinux := $(LINK-y) $(LINK_WRAPS) $(LD_FLAGS_CMDLINE) -define cmd_vmlinux__ - $(CC) $(CFLAGS_vmlinux) -o $@ \ - -Wl,-T,$(vmlinux-lds) $(vmlinux-init) \ - -Wl,--start-group $(vmlinux-main) -Wl,--end-group \ - -lutil \ - $(filter-out $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) vmlinux.o \ - FORCE ,$^) ; rm -f linux -endef +# Used by link-vmlinux.sh which has special support for um link +export CFLAGS_vmlinux := $(LINK-y) $(LINK_WRAPS) $(LD_FLAGS_CMDLINE) # When cleaning we don't include .config, so we don't include # TT or skas makefiles and don't clean skas_ptregs.h. diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh new file mode 100644 index 0000000..26c5b65 --- /dev/null +++ b/scripts/link-vmlinux.sh @@ -0,0 +1,211 @@ +#!/bin/sh +# +# link vmlinux +# +# vmlinux is linked from the objects selected by $(KBUILD_VMLINUX_INIT) and +# $(KBUILD_VMLINUX_MAIN). Most are built-in.o files from top-level directories +# in the kernel tree, others are specified in arch/$(ARCH)/Makefile. +# Ordering when linking is important, and $(KBUILD_VMLINUX_INIT) must be first. +# +# vmlinux +# ^ +# | +# +-< $(KBUILD_VMLINUX_INIT) +# | +--< init/version.o + more +# | +# +--< $(KBUILD_VMLINUX_MAIN) +# | +--< drivers/built-in.o mm/built-in.o + more +# | +# +-< ${kallsymso} (see description in KALLSYMS section) +# +# vmlinux version (uname -v) cannot be updated during normal +# descending-into-subdirs phase since we do not yet know if we need to +# update vmlinux. +# Therefore this step is delayed until just before final link of vmlinux. +# +# System.map is generated to document addresses of all kernel symbols + +# Error out on error +set -e + +# Nice output in kbuild format +# Will be supressed by "make -s" +info() +{ + if [ "${quiet}" != "silent_" ]; then + printf " %-7s %s\n" ${1} ${2} + fi +} + +# Link of vmlinux.o used for section mismatch analysis +# ${1} output file +modpost_link() +{ + ${LD} ${LDFLAGS} -r -o ${1} ${KBUILD_VMLINUX_INIT} \ + --start-group ${KBUILD_VMLINUX_MAIN} --end-group +} + +# Link of vmlinux +# ${1} - optional extra .o files +# ${2} - output file +vmlinux_link() +{ + local lds="${objtree}/${KBUILD_LDS}" + + if [ "${SRCARCH}" != "um" ]; then + ${LD} ${LDFLAGS} ${LDFLAGS_vmlinux} -o ${2} \ + -T ${lds} ${KBUILD_VMLINUX_INIT} \ + --start-group ${KBUILD_VMLINUX_MAIN} --end-group ${1} + else + ${CC} ${CFLAGS_vmlinux} -o ${2} \ + -Wl,-T,${lds} ${KBUILD_VMLINUX_INIT} \ + -Wl,--start-group \ + ${KBUILD_VMLINUX_MAIN} \ + -Wl,--end-group \ + -lutil ${1} + rm -f linux + fi +} + + +# Create ${2} .o file with all symbols from the ${1} object file +kallsyms() +{ + info KSYM ${2} + local kallsymopt; + + if [ -n "${CONFIG_KALLSYMS_ALL}" ]; then + kallsymopt=--all-symbols + fi + + local aflags="${KBUILD_AFLAGS} ${NOSTDINC_FLAGS} \ + ${LINUXINCLUDE} ${KBUILD_CPPFLAGS}" + + ${NM} -n ${1} | \ + scripts/kallsyms ${kallsymopt} | \ + ${CC} ${aflags} -c -o ${2} -x assembler-with-cpp - +} + +# Create map file with all symbols from ${1} +# See mksymap for additional details +mksysmap() +{ + ${CONFIG_SHELL} "${srctree}/scripts/mksysmap" ${1} ${2} +} + +# Delete output files in case of error +trap cleanup SIGHUP SIGINT SIGQUIT SIGTERM ERR +cleanup() +{ + rm -f .old_version + rm -f .tmp_System.map + rm -f .tmp_kallsyms* + rm -f .tmp_version + rm -f .tmp_vmlinux* + rm -f System.map + rm -f vmlinux + rm -f vmlinux.o +} + +# +# +# Use "make V=1" to debug this script +case "${KBUILD_VERBOSE}" in +*1*) + set -x + ;; +esac + +if [ "$1" = "clean" ]; then + cleanup + exit 0 +fi + +# We need access to CONFIG_ symbols +. ./.config + +#link vmlinux.o +info LD vmlinux.o +modpost_link vmlinux.o + +# modpost vmlinux.o to check for section mismatches +${MAKE} -f "${srctree}/scripts/Makefile.modpost" vmlinux.o + +# Update version +info GEN .version +if [ ! -r .version ]; then + rm -f .version; + echo 1 >.version; +else + mv .version .old_version; + expr 0$(cat .old_version) + 1 >.version; +fi; + +# final build of init/ +${MAKE} -f "${srctree}/scripts/Makefile.build" obj=init + +kallsymso="" +kallsyms_vmlinux="" +if [ -n "${CONFIG_KALLSYMS}" ]; then + + # kallsyms support + # Generate section listing all symbols and add it into vmlinux + # It's a three step process: + # 1) Link .tmp_vmlinux1 so it has all symbols and sections, + # but __kallsyms is empty. + # Running kallsyms on that gives us .tmp_kallsyms1.o with + # the right size + # 2) Link .tmp_vmlinux2 so it now has a __kallsyms section of + # the right size, but due to the added section, some + # addresses have shifted. + # From here, we generate a correct .tmp_kallsyms2.o + # 2a) We may use an extra pass as this has been necessary to + # woraround some alignment related bugs. + # KALLSYMS_EXTRA_PASS=1 is used to trigger this. + # 3) The correct ${kallsymso} is linked into the final vmlinux. + # + # a) Verify that the System.map from vmlinux matches the map from + # ${kallsymso}. + + kallsymso=.tmp_kallsyms2.o + kallsyms_vmlinux=.tmp_vmlinux2 + + # step 1 + vmlinux_link "" .tmp_vmlinux1 + kallsyms .tmp_vmlinux1 .tmp_kallsyms1.o + + # step 2 + vmlinux_link .tmp_kallsyms1.o .tmp_vmlinux2 + kallsyms .tmp_vmlinux2 .tmp_kallsyms2.o + + # step 2a + if [ -n "${KALLSYMS_EXTRA_PASS}" ]; then + kallsymso=.tmp_kallsyms3.o + kallsyms_vmlinux=.tmp_vmlinux3 + + vmlinux_link .tmp_kallsyms2.o .tmp_vmlinux3 + + kallsyms .tmp_vmlinux3 .tmp_kallsyms3.o + fi +fi + +info LD vmlinux +vmlinux_link "${kallsymso}" vmlinux + +info SYSMAP System.map +mksysmap vmlinux System.map + +# step a (see comment above) +if [ -n "${CONFIG_KALLSYMS}" ]; then + mksysmap ${kallsyms_vmlinux} .tmp_System.map + + if ! cmp -s System.map .tmp_System.map; then + echo Inconsistent kallsyms data + echo echo Try "make KALLSYMS_EXTRA_PASS=1" as a workaround + cleanup + exit 1 + fi +fi + +# We made a new kernel - delete old version file +rm -f .old_version -- cgit v0.10.2 From d27579a273d62cdb9436b7f1e5f7e247e987389c Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Sat, 5 May 2012 10:18:42 +0200 Subject: kbuild: document KBUILD_LDS, KBUILD_VMLINUX_{INIT,MAIN} and LDFLAGS_vmlinux Newly exported variables - used by link-vmlinux.sh Signed-off-by: Sam Ravnborg Signed-off-by: Michal Marek diff --git a/Documentation/kbuild/kbuild.txt b/Documentation/kbuild/kbuild.txt index 68e32bb..6466704 100644 --- a/Documentation/kbuild/kbuild.txt +++ b/Documentation/kbuild/kbuild.txt @@ -50,6 +50,10 @@ LDFLAGS_MODULE -------------------------------------------------- Additional options used for $(LD) when linking modules. +LDFLAGS_vmlinux +-------------------------------------------------- +Additional options passed to final link of vmlinux. + KBUILD_VERBOSE -------------------------------------------------- Set the kbuild verbosity. Can be assigned same values as "V=...". @@ -214,3 +218,18 @@ KBUILD_BUILD_USER, KBUILD_BUILD_HOST These two variables allow to override the user@host string displayed during boot and in /proc/version. The default value is the output of the commands whoami and host, respectively. + +KBUILD_LDS +-------------------------------------------------- +The linker script with full path. Assigned by the top-level Makefile. + +KBUILD_VMLINUX_INIT +-------------------------------------------------- +All object files for the init (first) part of vmlinux. +Files specified with KBUILD_VMLINUX_INIT are linked first. + +KBUILD_VMLINUX_MAIN +-------------------------------------------------- +All object files for the main part of vmlinux. +KBUILD_VMLINUX_INIT and KBUILD_VMLINUX_MAIN together specify +all the object files used to link vmlinux. -- cgit v0.10.2 From 68809c7108b9a75baf2a888b1c19ce1a4680f600 Mon Sep 17 00:00:00 2001 From: Fengguang Wu Date: Sun, 6 May 2012 13:21:42 +0800 Subject: writeback: initialize global_dirty_limit This prevents global_dirty_limit from remaining 0 (the initial value) for long time, since it's only updated in update_dirty_limit() when above the dirty freerun area. It will avoid unexpected consequences when some random code use it as a convenient approximation of the global dirty threshold. Signed-off-by: Fengguang Wu diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 9dec97f..93d8d2f 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -1568,6 +1568,7 @@ void writeback_set_ratelimit(void) unsigned long background_thresh; unsigned long dirty_thresh; global_dirty_limits(&background_thresh, &dirty_thresh); + global_dirty_limit = dirty_thresh; ratelimit_pages = dirty_thresh / (num_online_cpus() * 32); if (ratelimit_pages < 16) ratelimit_pages = 16; -- cgit v0.10.2 From 365b94ae67d2915d412b593d47449a6bffed9d37 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 3 May 2012 14:47:55 +0200 Subject: writeback: Move clearing of I_SYNC into inode_sync_complete() Move clearing of I_SYNC into inode_sync_complete(). It is more logical to have clearing of I_SYNC bit and waking of waiters in one place. Also later we will have two places needing to clear I_SYNC and wake up waiters so this allows them to use the common helper. Moving of I_SYNC clearing to a later stage of writeback_single_inode() is safe since we hold i_lock all the time. Reviewed-by: Christoph Hellwig Signed-off-by: Jan Kara Signed-off-by: Fengguang Wu diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 539f36c..dd41437 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -231,11 +231,8 @@ static void requeue_io(struct inode *inode, struct bdi_writeback *wb) static void inode_sync_complete(struct inode *inode) { - /* - * Prevent speculative execution through - * spin_unlock(&wb->list_lock); - */ - + inode->i_state &= ~I_SYNC; + /* Waiters must see I_SYNC cleared before being woken up */ smp_mb(); wake_up_bit(&inode->i_state, __I_SYNC); } @@ -436,7 +433,6 @@ writeback_single_inode(struct inode *inode, struct bdi_writeback *wb, spin_lock(&wb->list_lock); spin_lock(&inode->i_lock); - inode->i_state &= ~I_SYNC; if (!(inode->i_state & I_FREEING)) { /* * Sync livelock prevention. Each inode is tagged and synced in -- cgit v0.10.2 From cc1676d917f32504dbadc858fa790bc524c9f0da Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 3 May 2012 14:47:56 +0200 Subject: writeback: Move requeueing when I_SYNC set to writeback_sb_inodes() When writeback_single_inode() is called on inode which has I_SYNC already set while doing WB_SYNC_NONE, inode is moved to b_more_io list. However this makes sense only if the caller is flusher thread. For other callers of writeback_single_inode() it doesn't really make sense and may be even wrong - flusher thread may be doing WB_SYNC_ALL writeback in parallel. So we move requeueing from writeback_single_inode() to writeback_sb_inodes(). Reviewed-by: Christoph Hellwig Signed-off-by: Jan Kara Signed-off-by: Fengguang Wu diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index dd41437..65cd147 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -373,21 +373,8 @@ writeback_single_inode(struct inode *inode, struct bdi_writeback *wb, WARN_ON(inode->i_state & I_WILL_FREE); if (inode->i_state & I_SYNC) { - /* - * If this inode is locked for writeback and we are not doing - * writeback-for-data-integrity, move it to b_more_io so that - * writeback can proceed with the other inodes on s_io. - * - * We'll have another go at writing back this inode when we - * completed a full scan of b_io. - */ - if (wbc->sync_mode != WB_SYNC_ALL) { - requeue_io(inode, wb); - trace_writeback_single_inode_requeue(inode, wbc, - nr_to_write); + if (wbc->sync_mode != WB_SYNC_ALL) return 0; - } - /* * It's a data-integrity sync. We must wait. */ @@ -576,6 +563,21 @@ static long writeback_sb_inodes(struct super_block *sb, redirty_tail(inode, wb); continue; } + if ((inode->i_state & I_SYNC) && wbc.sync_mode != WB_SYNC_ALL) { + /* + * If this inode is locked for writeback and we are not + * doing writeback-for-data-integrity, move it to + * b_more_io so that writeback can proceed with the + * other inodes on s_io. + * + * We'll have another go at writing back this inode + * when we completed a full scan of b_io. + */ + spin_unlock(&inode->i_lock); + requeue_io(inode, wb); + trace_writeback_sb_inodes_requeue(inode); + continue; + } __iget(inode); write_chunk = writeback_chunk_size(wb->bdi, work); wbc.nr_to_write = write_chunk; diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h index 7b81887..b453d92 100644 --- a/include/trace/events/writeback.h +++ b/include/trace/events/writeback.h @@ -372,6 +372,35 @@ TRACE_EVENT(balance_dirty_pages, ) ); +TRACE_EVENT(writeback_sb_inodes_requeue, + + TP_PROTO(struct inode *inode), + TP_ARGS(inode), + + TP_STRUCT__entry( + __array(char, name, 32) + __field(unsigned long, ino) + __field(unsigned long, state) + __field(unsigned long, dirtied_when) + ), + + TP_fast_assign( + strncpy(__entry->name, + dev_name(inode_to_bdi(inode)->dev), 32); + __entry->ino = inode->i_ino; + __entry->state = inode->i_state; + __entry->dirtied_when = inode->dirtied_when; + ), + + TP_printk("bdi %s: ino=%lu state=%s dirtied_when=%lu age=%lu", + __entry->name, + __entry->ino, + show_inode_state(__entry->state), + __entry->dirtied_when, + (jiffies - __entry->dirtied_when) / HZ + ) +); + DECLARE_EVENT_CLASS(writeback_congest_waited_template, TP_PROTO(unsigned int usec_timeout, unsigned int usec_delayed), @@ -450,13 +479,6 @@ DECLARE_EVENT_CLASS(writeback_single_inode_template, ) ); -DEFINE_EVENT(writeback_single_inode_template, writeback_single_inode_requeue, - TP_PROTO(struct inode *inode, - struct writeback_control *wbc, - unsigned long nr_to_write), - TP_ARGS(inode, wbc, nr_to_write) -); - DEFINE_EVENT(writeback_single_inode_template, writeback_single_inode, TP_PROTO(struct inode *inode, struct writeback_control *wbc, -- cgit v0.10.2 From 6290be1c1dc6589eeda213aa40946b27fa4faac8 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 3 May 2012 14:47:57 +0200 Subject: writeback: Move I_DIRTY_PAGES handling Instead of clearing I_DIRTY_PAGES and resetting it when we didn't succeed in writing them all, just clear the bit only when we succeeded writing all the pages. We also move the clearing of the bit close to other i_state handling to separate it from writeback list handling. This is desirable because list handling will differ for flusher thread and other writeback_single_inode() callers in future. No filesystem plays any tricks with I_DIRTY_PAGES (like checking it in ->writepages or ->write_inode implementation) so this movement is safe. Signed-off-by: Jan Kara Signed-off-by: Fengguang Wu diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 65cd147..3804a10 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -385,7 +385,6 @@ writeback_single_inode(struct inode *inode, struct bdi_writeback *wb, /* Set I_SYNC, reset I_DIRTY_PAGES */ inode->i_state |= I_SYNC; - inode->i_state &= ~I_DIRTY_PAGES; spin_unlock(&inode->i_lock); spin_unlock(&wb->list_lock); @@ -408,6 +407,9 @@ writeback_single_inode(struct inode *inode, struct bdi_writeback *wb, * write_inode() */ spin_lock(&inode->i_lock); + /* Clear I_DIRTY_PAGES if we've written out all dirty pages */ + if (!mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) + inode->i_state &= ~I_DIRTY_PAGES; dirty = inode->i_state & I_DIRTY; inode->i_state &= ~(I_DIRTY_SYNC | I_DIRTY_DATASYNC); spin_unlock(&inode->i_lock); @@ -435,7 +437,6 @@ writeback_single_inode(struct inode *inode, struct bdi_writeback *wb, * We didn't write back all the pages. nfs_writepages() * sometimes bales out without doing anything. */ - inode->i_state |= I_DIRTY_PAGES; if (wbc->nr_to_write <= 0) { /* * slice used up: queue for next turn -- cgit v0.10.2 From ccb26b5a65867839d95156e02ea4861f64a8cbf3 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 3 May 2012 14:47:58 +0200 Subject: writeback: Separate inode requeueing after writeback Move inode requeueing after inode has been written out into a separate function. Reviewed-by: Christoph Hellwig Signed-off-by: Jan Kara Signed-off-by: Fengguang Wu diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 3804a10..5d3de00 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -345,6 +345,60 @@ static void inode_wait_for_writeback(struct inode *inode, } /* + * Find proper writeback list for the inode depending on its current state and + * possibly also change of its state while we were doing writeback. Here we + * handle things such as livelock prevention or fairness of writeback among + * inodes. This function can be called only by flusher thread - noone else + * processes all inodes in writeback lists and requeueing inodes behind flusher + * thread's back can have unexpected consequences. + */ +static void requeue_inode(struct inode *inode, struct bdi_writeback *wb, + struct writeback_control *wbc) +{ + if (inode->i_state & I_FREEING) + return; + + /* + * Sync livelock prevention. Each inode is tagged and synced in one + * shot. If still dirty, it will be redirty_tail()'ed below. Update + * the dirty time to prevent enqueue and sync it again. + */ + if ((inode->i_state & I_DIRTY) && + (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)) + inode->dirtied_when = jiffies; + + if (mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY)) { + /* + * We didn't write back all the pages. nfs_writepages() + * sometimes bales out without doing anything. + */ + if (wbc->nr_to_write <= 0) { + /* Slice used up. Queue for next turn. */ + requeue_io(inode, wb); + } else { + /* + * Writeback blocked by something other than + * congestion. Delay the inode for some time to + * avoid spinning on the CPU (100% iowait) + * retrying writeback of the dirty page/inode + * that cannot be performed immediately. + */ + redirty_tail(inode, wb); + } + } else if (inode->i_state & I_DIRTY) { + /* + * Filesystems can dirty the inode during writeback operations, + * such as delayed allocation during submission or metadata + * updates after data IO completion. + */ + redirty_tail(inode, wb); + } else { + /* The inode is clean. Remove from writeback lists. */ + list_del_init(&inode->i_wb_list); + } +} + +/* * Write out an inode's dirty pages. Called under wb->list_lock and * inode->i_lock. Either the caller has an active reference on the inode or * the inode has I_WILL_FREE set. @@ -422,53 +476,7 @@ writeback_single_inode(struct inode *inode, struct bdi_writeback *wb, spin_lock(&wb->list_lock); spin_lock(&inode->i_lock); - if (!(inode->i_state & I_FREEING)) { - /* - * Sync livelock prevention. Each inode is tagged and synced in - * one shot. If still dirty, it will be redirty_tail()'ed below. - * Update the dirty time to prevent enqueue and sync it again. - */ - if ((inode->i_state & I_DIRTY) && - (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)) - inode->dirtied_when = jiffies; - - if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { - /* - * We didn't write back all the pages. nfs_writepages() - * sometimes bales out without doing anything. - */ - if (wbc->nr_to_write <= 0) { - /* - * slice used up: queue for next turn - */ - requeue_io(inode, wb); - } else { - /* - * Writeback blocked by something other than - * congestion. Delay the inode for some time to - * avoid spinning on the CPU (100% iowait) - * retrying writeback of the dirty page/inode - * that cannot be performed immediately. - */ - redirty_tail(inode, wb); - } - } else if (inode->i_state & I_DIRTY) { - /* - * Filesystems can dirty the inode during writeback - * operations, such as delayed allocation during - * submission or metadata updates after data IO - * completion. - */ - redirty_tail(inode, wb); - } else { - /* - * The inode is clean. At this point we either have - * a reference to the inode or it's on it's way out. - * No need to add it back to the LRU. - */ - list_del_init(&inode->i_wb_list); - } - } + requeue_inode(inode, wb, wbc); inode_sync_complete(inode); trace_writeback_single_inode(inode, wbc, nr_to_write); return ret; -- cgit v0.10.2 From f0d07b7ffde758a27a48509ceda9a9ef413e0ea0 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 3 May 2012 14:47:59 +0200 Subject: writeback: Remove wb->list_lock from writeback_single_inode() writeback_single_inode() doesn't need wb->list_lock for anything on entry now. So remove the requirement. This makes locking of writeback_single_inode() temporarily awkward (entering with i_lock, returning with i_lock and wb->list_lock) but it will be sanitized in the next patch. Also inode_wait_for_writeback() doesn't need wb->list_lock for anything. It was just taking it to make usage convenient for callers but with writeback_single_inode() changing it's not very convenient anymore. So remove the lock from that function. Reviewed-by: Christoph Hellwig Signed-off-by: Jan Kara Signed-off-by: Fengguang Wu diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 5d3de00..3b87dc8 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -328,8 +328,7 @@ static int write_inode(struct inode *inode, struct writeback_control *wbc) /* * Wait for writeback on an inode to complete. */ -static void inode_wait_for_writeback(struct inode *inode, - struct bdi_writeback *wb) +static void inode_wait_for_writeback(struct inode *inode) { DEFINE_WAIT_BIT(wq, &inode->i_state, __I_SYNC); wait_queue_head_t *wqh; @@ -337,9 +336,7 @@ static void inode_wait_for_writeback(struct inode *inode, wqh = bit_waitqueue(&inode->i_state, __I_SYNC); while (inode->i_state & I_SYNC) { spin_unlock(&inode->i_lock); - spin_unlock(&wb->list_lock); __wait_on_bit(wqh, &wq, inode_wait, TASK_UNINTERRUPTIBLE); - spin_lock(&wb->list_lock); spin_lock(&inode->i_lock); } } @@ -418,7 +415,6 @@ writeback_single_inode(struct inode *inode, struct bdi_writeback *wb, unsigned dirty; int ret; - assert_spin_locked(&wb->list_lock); assert_spin_locked(&inode->i_lock); if (!atomic_read(&inode->i_count)) @@ -432,7 +428,7 @@ writeback_single_inode(struct inode *inode, struct bdi_writeback *wb, /* * It's a data-integrity sync. We must wait. */ - inode_wait_for_writeback(inode, wb); + inode_wait_for_writeback(inode); } BUG_ON(inode->i_state & I_SYNC); @@ -440,7 +436,6 @@ writeback_single_inode(struct inode *inode, struct bdi_writeback *wb, /* Set I_SYNC, reset I_DIRTY_PAGES */ inode->i_state |= I_SYNC; spin_unlock(&inode->i_lock); - spin_unlock(&wb->list_lock); ret = do_writepages(mapping, wbc); @@ -587,6 +582,8 @@ static long writeback_sb_inodes(struct super_block *sb, trace_writeback_sb_inodes_requeue(inode); continue; } + spin_unlock(&wb->list_lock); + __iget(inode); write_chunk = writeback_chunk_size(wb->bdi, work); wbc.nr_to_write = write_chunk; @@ -803,8 +800,10 @@ static long wb_writeback(struct bdi_writeback *wb, trace_writeback_wait(wb->bdi, work); inode = wb_inode(wb->b_more_io.prev); spin_lock(&inode->i_lock); - inode_wait_for_writeback(inode, wb); + spin_unlock(&wb->list_lock); + inode_wait_for_writeback(inode); spin_unlock(&inode->i_lock); + spin_lock(&wb->list_lock); } } spin_unlock(&wb->list_lock); @@ -1350,7 +1349,6 @@ int write_inode_now(struct inode *inode, int sync) wbc.nr_to_write = 0; might_sleep(); - spin_lock(&wb->list_lock); spin_lock(&inode->i_lock); ret = writeback_single_inode(inode, wb, &wbc); spin_unlock(&inode->i_lock); @@ -1375,7 +1373,6 @@ int sync_inode(struct inode *inode, struct writeback_control *wbc) struct bdi_writeback *wb = &inode_to_bdi(inode)->wb; int ret; - spin_lock(&wb->list_lock); spin_lock(&inode->i_lock); ret = writeback_single_inode(inode, wb, wbc); spin_unlock(&inode->i_lock); -- cgit v0.10.2 From 4f8ad655dbc82cf05d2edc11e66b78a42d38bf93 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 3 May 2012 14:48:00 +0200 Subject: writeback: Refactor writeback_single_inode() The code in writeback_single_inode() is relatively complex. The list requeing logic makes sense only for flusher thread but not really for sync_inode() or write_inode_now() callers. Also when we want to get rid of inode references held by flusher thread, we will need a special I_SYNC handling there. So separate part of writeback_single_inode() which does the real writeback work into __writeback_single_inode() and make writeback_single_inode() do only stuff necessary for callers writing only one inode, moving the special list handling into writeback_sb_inodes(). As a sideeffect this fixes a possible race where we could skip some inode during sync(2) because other writer refiled it from b_io to b_dirty list. Also I_SYNC handling is moved into the callers of __writeback_single_inode() to make locking easier. Reviewed-by: Christoph Hellwig Signed-off-by: Jan Kara Signed-off-by: Fengguang Wu diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 3b87dc8..5f2c682 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -364,6 +364,15 @@ static void requeue_inode(struct inode *inode, struct bdi_writeback *wb, (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)) inode->dirtied_when = jiffies; + if (wbc->pages_skipped) { + /* + * writeback is not making progress due to locked + * buffers. Skip this inode for now. + */ + redirty_tail(inode, wb); + return; + } + if (mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY)) { /* * We didn't write back all the pages. nfs_writepages() @@ -396,46 +405,20 @@ static void requeue_inode(struct inode *inode, struct bdi_writeback *wb, } /* - * Write out an inode's dirty pages. Called under wb->list_lock and - * inode->i_lock. Either the caller has an active reference on the inode or - * the inode has I_WILL_FREE set. - * - * If `wait' is set, wait on the writeout. - * - * The whole writeout design is quite complex and fragile. We want to avoid - * starvation of particular inodes when others are being redirtied, prevent - * livelocks, etc. + * Write out an inode and its dirty pages. Do not update the writeback list + * linkage. That is left to the caller. The caller is also responsible for + * setting I_SYNC flag and calling inode_sync_complete() to clear it. */ static int -writeback_single_inode(struct inode *inode, struct bdi_writeback *wb, - struct writeback_control *wbc) +__writeback_single_inode(struct inode *inode, struct bdi_writeback *wb, + struct writeback_control *wbc) { struct address_space *mapping = inode->i_mapping; long nr_to_write = wbc->nr_to_write; unsigned dirty; int ret; - assert_spin_locked(&inode->i_lock); - - if (!atomic_read(&inode->i_count)) - WARN_ON(!(inode->i_state & (I_WILL_FREE|I_FREEING))); - else - WARN_ON(inode->i_state & I_WILL_FREE); - - if (inode->i_state & I_SYNC) { - if (wbc->sync_mode != WB_SYNC_ALL) - return 0; - /* - * It's a data-integrity sync. We must wait. - */ - inode_wait_for_writeback(inode); - } - - BUG_ON(inode->i_state & I_SYNC); - - /* Set I_SYNC, reset I_DIRTY_PAGES */ - inode->i_state |= I_SYNC; - spin_unlock(&inode->i_lock); + WARN_ON(!(inode->i_state & I_SYNC)); ret = do_writepages(mapping, wbc); @@ -468,12 +451,65 @@ writeback_single_inode(struct inode *inode, struct bdi_writeback *wb, if (ret == 0) ret = err; } + trace_writeback_single_inode(inode, wbc, nr_to_write); + return ret; +} + +/* + * Write out an inode's dirty pages. Either the caller has an active reference + * on the inode or the inode has I_WILL_FREE set. + * + * This function is designed to be called for writing back one inode which + * we go e.g. from filesystem. Flusher thread uses __writeback_single_inode() + * and does more profound writeback list handling in writeback_sb_inodes(). + */ +static int +writeback_single_inode(struct inode *inode, struct bdi_writeback *wb, + struct writeback_control *wbc) +{ + int ret = 0; + + spin_lock(&inode->i_lock); + if (!atomic_read(&inode->i_count)) + WARN_ON(!(inode->i_state & (I_WILL_FREE|I_FREEING))); + else + WARN_ON(inode->i_state & I_WILL_FREE); + + if (inode->i_state & I_SYNC) { + if (wbc->sync_mode != WB_SYNC_ALL) + goto out; + /* + * It's a data-integrity sync. We must wait. + */ + inode_wait_for_writeback(inode); + } + WARN_ON(inode->i_state & I_SYNC); + /* + * Skip inode if it is clean. We don't want to mess with writeback + * lists in this function since flusher thread may be doing for example + * sync in parallel and if we move the inode, it could get skipped. So + * here we make sure inode is on some writeback list and leave it there + * unless we have completely cleaned the inode. + */ + if (!(inode->i_state & I_DIRTY)) + goto out; + inode->i_state |= I_SYNC; + spin_unlock(&inode->i_lock); + + ret = __writeback_single_inode(inode, wb, wbc); spin_lock(&wb->list_lock); spin_lock(&inode->i_lock); - requeue_inode(inode, wb, wbc); + /* + * If inode is clean, remove it from writeback lists. Otherwise don't + * touch it. See comment above for explanation. + */ + if (!(inode->i_state & I_DIRTY)) + list_del_init(&inode->i_wb_list); + spin_unlock(&wb->list_lock); inode_sync_complete(inode); - trace_writeback_single_inode(inode, wbc, nr_to_write); +out: + spin_unlock(&inode->i_lock); return ret; } @@ -585,23 +621,29 @@ static long writeback_sb_inodes(struct super_block *sb, spin_unlock(&wb->list_lock); __iget(inode); + /* + * We already requeued the inode if it had I_SYNC set and we + * are doing WB_SYNC_NONE writeback. So this catches only the + * WB_SYNC_ALL case. + */ + if (inode->i_state & I_SYNC) + inode_wait_for_writeback(inode); + inode->i_state |= I_SYNC; + spin_unlock(&inode->i_lock); write_chunk = writeback_chunk_size(wb->bdi, work); wbc.nr_to_write = write_chunk; wbc.pages_skipped = 0; - writeback_single_inode(inode, wb, &wbc); + __writeback_single_inode(inode, wb, &wbc); work->nr_pages -= write_chunk - wbc.nr_to_write; wrote += write_chunk - wbc.nr_to_write; + spin_lock(&wb->list_lock); + spin_lock(&inode->i_lock); if (!(inode->i_state & I_DIRTY)) wrote++; - if (wbc.pages_skipped) { - /* - * writeback is not making progress due to locked - * buffers. Skip this inode for now. - */ - redirty_tail(inode, wb); - } + requeue_inode(inode, wb, &wbc); + inode_sync_complete(inode); spin_unlock(&inode->i_lock); spin_unlock(&wb->list_lock); iput(inode); @@ -1337,7 +1379,6 @@ EXPORT_SYMBOL(sync_inodes_sb); int write_inode_now(struct inode *inode, int sync) { struct bdi_writeback *wb = &inode_to_bdi(inode)->wb; - int ret; struct writeback_control wbc = { .nr_to_write = LONG_MAX, .sync_mode = sync ? WB_SYNC_ALL : WB_SYNC_NONE, @@ -1349,11 +1390,7 @@ int write_inode_now(struct inode *inode, int sync) wbc.nr_to_write = 0; might_sleep(); - spin_lock(&inode->i_lock); - ret = writeback_single_inode(inode, wb, &wbc); - spin_unlock(&inode->i_lock); - spin_unlock(&wb->list_lock); - return ret; + return writeback_single_inode(inode, wb, &wbc); } EXPORT_SYMBOL(write_inode_now); @@ -1370,14 +1407,7 @@ EXPORT_SYMBOL(write_inode_now); */ int sync_inode(struct inode *inode, struct writeback_control *wbc) { - struct bdi_writeback *wb = &inode_to_bdi(inode)->wb; - int ret; - - spin_lock(&inode->i_lock); - ret = writeback_single_inode(inode, wb, wbc); - spin_unlock(&inode->i_lock); - spin_unlock(&wb->list_lock); - return ret; + return writeback_single_inode(inode, &inode_to_bdi(inode)->wb, wbc); } EXPORT_SYMBOL(sync_inode); -- cgit v0.10.2 From 7994e6f7254354e03028a11f98a27bd67dace9f1 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 3 May 2012 14:48:01 +0200 Subject: vfs: Move waiting for inode writeback from end_writeback() to evict_inode() Currently, I_SYNC can never be set when evict_inode() (and thus end_writeback()) is called because flusher thread holds inode reference while inode is under writeback. As a result inode_sync_wait() in those places currently does nothing. However that is going to change and unveils problems with calling inode_sync_wait() from end_writeback(). Several filesystems call end_writeback() after they have deleted the inode (btrfs, gfs2, ...) and other filesystems (ext3, ext4, reiserfs, ...) can deadlock when waiting for I_SYNC because they call end_writeback() from within a transaction. To avoid these issues, we move inode_sync_wait() into evict_inode() before calling ->evict_inode(). That way we preserve the current property that ->evict_inode() and writeback never run in parallel and all filesystems are safe. Signed-off-by: Jan Kara Signed-off-by: Fengguang Wu diff --git a/fs/inode.c b/fs/inode.c index 9f4f5fe..501fc5d 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -500,7 +500,6 @@ void end_writeback(struct inode *inode) BUG_ON(!list_empty(&inode->i_data.private_list)); BUG_ON(!(inode->i_state & I_FREEING)); BUG_ON(inode->i_state & I_CLEAR); - inode_sync_wait(inode); /* don't need i_lock here, no concurrent mods to i_state */ inode->i_state = I_FREEING | I_CLEAR; } @@ -531,6 +530,8 @@ static void evict(struct inode *inode) inode_sb_list_del(inode); + inode_sync_wait(inode); + if (op->evict_inode) { op->evict_inode(inode); } else { -- cgit v0.10.2 From dbd5768f87ff6fb0a4fe09c4d7b6c4a24de99430 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 3 May 2012 14:48:02 +0200 Subject: vfs: Rename end_writeback() to clear_inode() After we moved inode_sync_wait() from end_writeback() it doesn't make sense to call the function end_writeback() anymore. Rename it to clear_inode() which well says what the function really does - set I_CLEAR flag. Signed-off-by: Jan Kara Signed-off-by: Fengguang Wu diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting index 74acd96..8c91d10 100644 --- a/Documentation/filesystems/porting +++ b/Documentation/filesystems/porting @@ -297,7 +297,8 @@ in the beginning of ->setattr unconditionally. be used instead. It gets called whenever the inode is evicted, whether it has remaining links or not. Caller does *not* evict the pagecache or inode-associated metadata buffers; getting rid of those is responsibility of method, as it had -been for ->delete_inode(). +been for ->delete_inode(). Caller makes sure async writeback cannot be running +for the inode while (or after) ->evict_inode() is called. ->drop_inode() returns int now; it's called on final iput() with inode->i_lock held and it returns true if filesystems wants the inode to be @@ -306,14 +307,11 @@ updated appropriately. generic_delete_inode() is also alive and it consists simply of return 1. Note that all actual eviction work is done by caller after ->drop_inode() returns. - clear_inode() is gone; use end_writeback() instead. As before, it must -be called exactly once on each call of ->evict_inode() (as it used to be for -each call of ->delete_inode()). Unlike before, if you are using inode-associated -metadata buffers (i.e. mark_buffer_dirty_inode()), it's your responsibility to -call invalidate_inode_buffers() before end_writeback(). - No async writeback (and thus no calls of ->write_inode()) will happen -after end_writeback() returns, so actions that should not overlap with ->write_inode() -(e.g. freeing on-disk inode if i_nlink is 0) ought to be done after that call. + As before, clear_inode() must be called exactly once on each call of +->evict_inode() (as it used to be for each call of ->delete_inode()). Unlike +before, if you are using inode-associated metadata buffers (i.e. +mark_buffer_dirty_inode()), it's your responsibility to call +invalidate_inode_buffers() before clear_inode(). NOTE: checking i_nlink in the beginning of ->write_inode() and bailing out if it's zero is not *and* *never* *had* *been* enough. Final unlink() and iput() diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c index 1d75c92..66519d2 100644 --- a/arch/powerpc/platforms/cell/spufs/inode.c +++ b/arch/powerpc/platforms/cell/spufs/inode.c @@ -151,7 +151,7 @@ static void spufs_evict_inode(struct inode *inode) { struct spufs_inode_info *ei = SPUFS_I(inode); - end_writeback(inode); + clear_inode(inode); if (ei->i_ctx) put_spu_context(ei->i_ctx); if (ei->i_gang) diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c index 6a2cb56..73dae8b 100644 --- a/arch/s390/hypfs/inode.c +++ b/arch/s390/hypfs/inode.c @@ -115,7 +115,7 @@ static struct inode *hypfs_make_inode(struct super_block *sb, umode_t mode) static void hypfs_evict_inode(struct inode *inode) { - end_writeback(inode); + clear_inode(inode); kfree(inode->i_private); } diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 014c8dd..57ccb75 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -448,7 +448,7 @@ void v9fs_evict_inode(struct inode *inode) struct v9fs_inode *v9inode = V9FS_I(inode); truncate_inode_pages(inode->i_mapping, 0); - end_writeback(inode); + clear_inode(inode); filemap_fdatawrite(inode->i_mapping); #ifdef CONFIG_9P_FSCACHE diff --git a/fs/affs/inode.c b/fs/affs/inode.c index 88a4b0b..8bc4a59 100644 --- a/fs/affs/inode.c +++ b/fs/affs/inode.c @@ -264,7 +264,7 @@ affs_evict_inode(struct inode *inode) } invalidate_inode_buffers(inode); - end_writeback(inode); + clear_inode(inode); affs_free_prealloc(inode); cache_page = (unsigned long)AFFS_I(inode)->i_lc; if (cache_page) { diff --git a/fs/afs/inode.c b/fs/afs/inode.c index d890ae3..95cffd3 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -423,7 +423,7 @@ void afs_evict_inode(struct inode *inode) ASSERTCMP(inode->i_ino, ==, vnode->fid.vnode); truncate_inode_pages(&inode->i_data, 0); - end_writeback(inode); + clear_inode(inode); afs_give_up_callback(vnode); diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c index d8dc002..df31ddb 100644 --- a/fs/autofs4/inode.c +++ b/fs/autofs4/inode.c @@ -101,7 +101,7 @@ static int autofs4_show_options(struct seq_file *m, struct dentry *root) static void autofs4_evict_inode(struct inode *inode) { - end_writeback(inode); + clear_inode(inode); kfree(inode->i_private); } diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c index e23dc7c..9870417 100644 --- a/fs/bfs/inode.c +++ b/fs/bfs/inode.c @@ -174,7 +174,7 @@ static void bfs_evict_inode(struct inode *inode) truncate_inode_pages(&inode->i_data, 0); invalidate_inode_buffers(inode); - end_writeback(inode); + clear_inode(inode); if (inode->i_nlink) return; diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index 613aa06..790b3cd 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -505,7 +505,7 @@ static struct inode *bm_get_inode(struct super_block *sb, int mode) static void bm_evict_inode(struct inode *inode) { - end_writeback(inode); + clear_inode(inode); kfree(inode->i_private); } diff --git a/fs/block_dev.c b/fs/block_dev.c index e08f6a20..d8a7959 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -487,7 +487,7 @@ static void bdev_evict_inode(struct inode *inode) struct list_head *p; truncate_inode_pages(&inode->i_data, 0); invalidate_inode_buffers(inode); /* is it needed here? */ - end_writeback(inode); + clear_inode(inode); spin_lock(&bdev_lock); while ( (p = bdev->bd_inodes.next) != &bdev->bd_inodes ) { __bd_forget(list_entry(p, struct inode, i_devices)); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 115bc05..5c058c4 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3756,7 +3756,7 @@ void btrfs_evict_inode(struct inode *inode) btrfs_end_transaction(trans, root); btrfs_btree_balance_dirty(root, nr); no_delete: - end_writeback(inode); + clear_inode(inode); return; } diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index d342128..acb138f 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -272,7 +272,7 @@ static void cifs_evict_inode(struct inode *inode) { truncate_inode_pages(&inode->i_data, 0); - end_writeback(inode); + clear_inode(inode); cifs_fscache_release_inode_cookie(inode); } diff --git a/fs/coda/inode.c b/fs/coda/inode.c index 2870597..f181312 100644 --- a/fs/coda/inode.c +++ b/fs/coda/inode.c @@ -244,7 +244,7 @@ static void coda_put_super(struct super_block *sb) static void coda_evict_inode(struct inode *inode) { truncate_inode_pages(&inode->i_data, 0); - end_writeback(inode); + clear_inode(inode); coda_cache_clear_inode(inode); } diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c index 2dd946b..e879cf8 100644 --- a/fs/ecryptfs/super.c +++ b/fs/ecryptfs/super.c @@ -133,7 +133,7 @@ static int ecryptfs_statfs(struct dentry *dentry, struct kstatfs *buf) static void ecryptfs_evict_inode(struct inode *inode) { truncate_inode_pages(&inode->i_data, 0); - end_writeback(inode); + clear_inode(inode); iput(ecryptfs_inode_to_lower(inode)); } diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c index ea5e1f9..5badb0c 100644 --- a/fs/exofs/inode.c +++ b/fs/exofs/inode.c @@ -1473,7 +1473,7 @@ void exofs_evict_inode(struct inode *inode) goto no_delete; inode->i_size = 0; - end_writeback(inode); + clear_inode(inode); /* if we are deleting an obj that hasn't been created yet, wait. * This also makes sure that create_done cannot be called with an @@ -1503,5 +1503,5 @@ void exofs_evict_inode(struct inode *inode) return; no_delete: - end_writeback(inode); + clear_inode(inode); } diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 740cad8..37b8bf6 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -90,7 +90,7 @@ void ext2_evict_inode(struct inode * inode) } invalidate_inode_buffers(inode); - end_writeback(inode); + clear_inode(inode); ext2_discard_reservation(inode); rsv = EXT2_I(inode)->i_block_alloc_info; diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 10d7812..ca5eb61 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -272,18 +272,18 @@ void ext3_evict_inode (struct inode *inode) if (ext3_mark_inode_dirty(handle, inode)) { /* If that failed, just dquot_drop() and be done with that */ dquot_drop(inode); - end_writeback(inode); + clear_inode(inode); } else { ext3_xattr_delete_inode(handle, inode); dquot_free_inode(inode); dquot_drop(inode); - end_writeback(inode); + clear_inode(inode); ext3_free_inode(handle, inode); } ext3_journal_stop(handle); return; no_delete: - end_writeback(inode); + clear_inode(inode); dquot_drop(inode); } diff --git a/fs/ext4/super.c b/fs/ext4/super.c index ceebaf8..2484f56 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1007,7 +1007,7 @@ static void destroy_inodecache(void) void ext4_clear_inode(struct inode *inode) { invalidate_inode_buffers(inode); - end_writeback(inode); + clear_inode(inode); dquot_drop(inode); ext4_discard_preallocations(inode); if (EXT4_I(inode)->jinode) { diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 21687e3..b3d290c 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -454,7 +454,7 @@ static void fat_evict_inode(struct inode *inode) fat_truncate_blocks(inode, 0); } invalidate_inode_buffers(inode); - end_writeback(inode); + clear_inode(inode); fat_cache_inval_inode(inode); fat_detach(inode); } diff --git a/fs/freevxfs/vxfs_inode.c b/fs/freevxfs/vxfs_inode.c index cf9ef91..ef67c95 100644 --- a/fs/freevxfs/vxfs_inode.c +++ b/fs/freevxfs/vxfs_inode.c @@ -355,6 +355,6 @@ void vxfs_evict_inode(struct inode *ip) { truncate_inode_pages(&ip->i_data, 0); - end_writeback(ip); + clear_inode(ip); call_rcu(&ip->i_rcu, vxfs_i_callback); } diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 4aec599..87e6115 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -122,7 +122,7 @@ static void fuse_destroy_inode(struct inode *inode) static void fuse_evict_inode(struct inode *inode) { truncate_inode_pages(&inode->i_data, 0); - end_writeback(inode); + clear_inode(inode); if (inode->i_sb->s_flags & MS_ACTIVE) { struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_inode *fi = get_fuse_inode(inode); diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 6172fa7..713e621 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -1554,7 +1554,7 @@ out_unlock: out: /* Case 3 starts here */ truncate_inode_pages(&inode->i_data, 0); - end_writeback(inode); + clear_inode(inode); gfs2_dir_hash_inval(ip); ip->i_gl->gl_object = NULL; flush_delayed_work_sync(&ip->i_gl->gl_work); diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c index 737dbeb..761ec06 100644 --- a/fs/hfs/inode.c +++ b/fs/hfs/inode.c @@ -532,7 +532,7 @@ out: void hfs_evict_inode(struct inode *inode) { truncate_inode_pages(&inode->i_data, 0); - end_writeback(inode); + clear_inode(inode); if (HFS_IS_RSRC(inode) && HFS_I(inode)->rsrc_inode) { HFS_I(HFS_I(inode)->rsrc_inode)->rsrc_inode = NULL; iput(HFS_I(inode)->rsrc_inode); diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index ceb1c28..a9bca4b 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -154,7 +154,7 @@ static void hfsplus_evict_inode(struct inode *inode) { dprint(DBG_INODE, "hfsplus_evict_inode: %lu\n", inode->i_ino); truncate_inode_pages(&inode->i_data, 0); - end_writeback(inode); + clear_inode(inode); if (HFSPLUS_IS_RSRC(inode)) { HFSPLUS_I(HFSPLUS_I(inode)->rsrc_inode)->rsrc_inode = NULL; iput(HFSPLUS_I(inode)->rsrc_inode); diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index 07c516b..2afa5bb 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -240,7 +240,7 @@ static struct inode *hostfs_alloc_inode(struct super_block *sb) static void hostfs_evict_inode(struct inode *inode) { truncate_inode_pages(&inode->i_data, 0); - end_writeback(inode); + clear_inode(inode); if (HOSTFS_I(inode)->fd != -1) { close_file(&HOSTFS_I(inode)->fd); HOSTFS_I(inode)->fd = -1; diff --git a/fs/hpfs/inode.c b/fs/hpfs/inode.c index 3b2cec2..b43066c 100644 --- a/fs/hpfs/inode.c +++ b/fs/hpfs/inode.c @@ -299,7 +299,7 @@ void hpfs_write_if_changed(struct inode *inode) void hpfs_evict_inode(struct inode *inode) { truncate_inode_pages(&inode->i_data, 0); - end_writeback(inode); + clear_inode(inode); if (!inode->i_nlink) { hpfs_lock(inode->i_sb); hpfs_remove_fnode(inode->i_sb, inode->i_ino); diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c index a80e45a..d4f93b5 100644 --- a/fs/hppfs/hppfs.c +++ b/fs/hppfs/hppfs.c @@ -614,7 +614,7 @@ static struct inode *hppfs_alloc_inode(struct super_block *sb) void hppfs_evict_inode(struct inode *ino) { - end_writeback(ino); + clear_inode(ino); dput(HPPFS_I(ino)->proc_dentry); mntput(ino->i_sb->s_fs_info); } diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 28cf06e..568193d 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -393,7 +393,7 @@ static void truncate_hugepages(struct inode *inode, loff_t lstart) static void hugetlbfs_evict_inode(struct inode *inode) { truncate_hugepages(inode, 0); - end_writeback(inode); + clear_inode(inode); } static inline void diff --git a/fs/inode.c b/fs/inode.c index 501fc5d..02c0fa5 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -486,7 +486,7 @@ void __remove_inode_hash(struct inode *inode) } EXPORT_SYMBOL(__remove_inode_hash); -void end_writeback(struct inode *inode) +void clear_inode(struct inode *inode) { might_sleep(); /* @@ -503,7 +503,7 @@ void end_writeback(struct inode *inode) /* don't need i_lock here, no concurrent mods to i_state */ inode->i_state = I_FREEING | I_CLEAR; } -EXPORT_SYMBOL(end_writeback); +EXPORT_SYMBOL(clear_inode); /* * Free the inode passed in, removing it from the lists it is still connected @@ -537,7 +537,7 @@ static void evict(struct inode *inode) } else { if (inode->i_data.nrpages) truncate_inode_pages(&inode->i_data, 0); - end_writeback(inode); + clear_inode(inode); } if (S_ISBLK(inode->i_mode) && inode->i_bdev) bd_forget(inode); diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c index bb6f993..3d3092e 100644 --- a/fs/jffs2/fs.c +++ b/fs/jffs2/fs.c @@ -240,7 +240,7 @@ void jffs2_evict_inode (struct inode *inode) jffs2_dbg(1, "%s(): ino #%lu mode %o\n", __func__, inode->i_ino, inode->i_mode); truncate_inode_pages(&inode->i_data, 0); - end_writeback(inode); + clear_inode(inode); jffs2_do_clear_inode(c, f); } diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c index 77b69b2..4692bf3 100644 --- a/fs/jfs/inode.c +++ b/fs/jfs/inode.c @@ -169,7 +169,7 @@ void jfs_evict_inode(struct inode *inode) } else { truncate_inode_pages(&inode->i_data, 0); } - end_writeback(inode); + clear_inode(inode); dquot_drop(inode); } diff --git a/fs/logfs/readwrite.c b/fs/logfs/readwrite.c index e3ab5e5..f1cb512 100644 --- a/fs/logfs/readwrite.c +++ b/fs/logfs/readwrite.c @@ -2175,7 +2175,7 @@ void logfs_evict_inode(struct inode *inode) } } truncate_inode_pages(&inode->i_data, 0); - end_writeback(inode); + clear_inode(inode); /* Cheaper version of write_inode. All changes are concealed in * aliases, which are moved back. No write to the medium happens. diff --git a/fs/minix/inode.c b/fs/minix/inode.c index fcb05d2..2a503ad 100644 --- a/fs/minix/inode.c +++ b/fs/minix/inode.c @@ -32,7 +32,7 @@ static void minix_evict_inode(struct inode *inode) minix_truncate(inode); } invalidate_inode_buffers(inode); - end_writeback(inode); + clear_inode(inode); if (!inode->i_nlink) minix_free_inode(inode); } diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c index 87484fb..333df07 100644 --- a/fs/ncpfs/inode.c +++ b/fs/ncpfs/inode.c @@ -292,7 +292,7 @@ static void ncp_evict_inode(struct inode *inode) { truncate_inode_pages(&inode->i_data, 0); - end_writeback(inode); + clear_inode(inode); if (S_ISDIR(inode->i_mode)) { DDPRINTK("ncp_evict_inode: put directory %ld\n", inode->i_ino); diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index e8bbfa5..c607313 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -121,7 +121,7 @@ static void nfs_clear_inode(struct inode *inode) void nfs_evict_inode(struct inode *inode) { truncate_inode_pages(&inode->i_data, 0); - end_writeback(inode); + clear_inode(inode); nfs_clear_inode(inode); } @@ -1500,7 +1500,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) void nfs4_evict_inode(struct inode *inode) { truncate_inode_pages(&inode->i_data, 0); - end_writeback(inode); + clear_inode(inode); pnfs_return_layout(inode); pnfs_destroy_layout(NFS_I(inode)); /* If we are holding a delegation, return it! */ diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index 8f7b95a..7cc6446 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -734,7 +734,7 @@ void nilfs_evict_inode(struct inode *inode) if (inode->i_nlink || !ii->i_root || unlikely(is_bad_inode(inode))) { if (inode->i_data.nrpages) truncate_inode_pages(&inode->i_data, 0); - end_writeback(inode); + clear_inode(inode); nilfs_clear_inode(inode); return; } @@ -746,7 +746,7 @@ void nilfs_evict_inode(struct inode *inode) /* TODO: some of the following operations may fail. */ nilfs_truncate_bmap(ii, 0); nilfs_mark_inode_dirty(inode); - end_writeback(inode); + clear_inode(inode); ret = nilfs_ifile_delete_inode(ii->i_root->ifile, inode->i_ino); if (!ret) diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c index 2eaa666..c6dbd3d 100644 --- a/fs/ntfs/inode.c +++ b/fs/ntfs/inode.c @@ -2258,7 +2258,7 @@ void ntfs_evict_big_inode(struct inode *vi) ntfs_inode *ni = NTFS_I(vi); truncate_inode_pages(&vi->i_data, 0); - end_writeback(vi); + clear_inode(vi); #ifdef NTFS_RW if (NInoDirty(ni)) { diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c index 3b5825e..e31d6ae 100644 --- a/fs/ocfs2/dlmfs/dlmfs.c +++ b/fs/ocfs2/dlmfs/dlmfs.c @@ -367,7 +367,7 @@ static void dlmfs_evict_inode(struct inode *inode) int status; struct dlmfs_inode_private *ip; - end_writeback(inode); + clear_inode(inode); mlog(0, "inode %lu\n", inode->i_ino); diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index 17454a9..735514c 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c @@ -1069,7 +1069,7 @@ static void ocfs2_clear_inode(struct inode *inode) int status; struct ocfs2_inode_info *oi = OCFS2_I(inode); - end_writeback(inode); + clear_inode(inode); trace_ocfs2_clear_inode((unsigned long long)oi->ip_blkno, inode->i_nlink); diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c index dbc8422..e6213b3 100644 --- a/fs/omfs/inode.c +++ b/fs/omfs/inode.c @@ -184,7 +184,7 @@ int omfs_sync_inode(struct inode *inode) static void omfs_evict_inode(struct inode *inode) { truncate_inode_pages(&inode->i_data, 0); - end_writeback(inode); + clear_inode(inode); if (inode->i_nlink) return; diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 205c922..29ab406 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -33,7 +33,7 @@ static void proc_evict_inode(struct inode *inode) const struct proc_ns_operations *ns_ops; truncate_inode_pages(&inode->i_data, 0); - end_writeback(inode); + clear_inode(inode); /* Stop tracking associated processes */ put_pid(PROC_I(inode)->pid); diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c index 1950788..aeb19e6 100644 --- a/fs/pstore/inode.c +++ b/fs/pstore/inode.c @@ -85,7 +85,7 @@ static void pstore_evict_inode(struct inode *inode) struct pstore_private *p = inode->i_private; unsigned long flags; - end_writeback(inode); + clear_inode(inode); if (p) { spin_lock_irqsave(&allpstore_lock, flags); list_del(&p->list); diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index 494c315..59d0687 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -76,14 +76,14 @@ void reiserfs_evict_inode(struct inode *inode) ; } out: - end_writeback(inode); /* note this must go after the journal_end to prevent deadlock */ + clear_inode(inode); /* note this must go after the journal_end to prevent deadlock */ dquot_drop(inode); inode->i_blocks = 0; reiserfs_write_unlock_once(inode->i_sb, depth); return; no_delete: - end_writeback(inode); + clear_inode(inode); dquot_drop(inode); } diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c index feb2d69..b8ce6a9 100644 --- a/fs/sysfs/inode.c +++ b/fs/sysfs/inode.c @@ -310,7 +310,7 @@ void sysfs_evict_inode(struct inode *inode) struct sysfs_dirent *sd = inode->i_private; truncate_inode_pages(&inode->i_data, 0); - end_writeback(inode); + clear_inode(inode); sysfs_put(sd); } diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c index 3da5ce2..08d0b25 100644 --- a/fs/sysv/inode.c +++ b/fs/sysv/inode.c @@ -316,7 +316,7 @@ static void sysv_evict_inode(struct inode *inode) sysv_truncate(inode); } invalidate_inode_buffers(inode); - end_writeback(inode); + clear_inode(inode); if (!inode->i_nlink) sysv_free_inode(inode); } diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 76e4e05..7bf60ae 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -378,7 +378,7 @@ out: smp_wmb(); } done: - end_writeback(inode); + clear_inode(inode); } static void ubifs_dirty_inode(struct inode *inode, int flags) diff --git a/fs/udf/inode.c b/fs/udf/inode.c index 7d75280..873e1ba 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -80,7 +80,7 @@ void udf_evict_inode(struct inode *inode) } else truncate_inode_pages(&inode->i_data, 0); invalidate_inode_buffers(inode); - end_writeback(inode); + clear_inode(inode); if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB && inode->i_size != iinfo->i_lenExtents) { udf_warn(inode->i_sb, "Inode %lu (mode %o) has inode size %llu different from extent length %llu. Filesystem need not be standards compliant.\n", diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c index 7cdd395..dd7c89d 100644 --- a/fs/ufs/inode.c +++ b/fs/ufs/inode.c @@ -895,7 +895,7 @@ void ufs_evict_inode(struct inode * inode) } invalidate_inode_buffers(inode); - end_writeback(inode); + clear_inode(inode); if (want_delete) { lock_ufs(inode->i_sb); diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index dab9a5f..5b806f2 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -926,7 +926,7 @@ xfs_fs_evict_inode( trace_xfs_evict_inode(ip); truncate_inode_pages(&inode->i_data, 0); - end_writeback(inode); + clear_inode(inode); XFS_STATS_INC(vn_rele); XFS_STATS_INC(vn_remove); XFS_STATS_DEC(vn_active); diff --git a/include/linux/fs.h b/include/linux/fs.h index 8de6755..c79316c 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1744,8 +1744,8 @@ struct super_operations { * I_FREEING Set when inode is about to be freed but still has dirty * pages or buffers attached or the inode itself is still * dirty. - * I_CLEAR Added by end_writeback(). In this state the inode is clean - * and can be destroyed. Inode keeps I_FREEING. + * I_CLEAR Added by clear_inode(). In this state the inode is + * clean and can be destroyed. Inode keeps I_FREEING. * * Inodes that are I_WILL_FREE, I_FREEING or I_CLEAR are * prohibited for many purposes. iget() must wait for @@ -2328,7 +2328,7 @@ extern unsigned int get_next_ino(void); extern void __iget(struct inode * inode); extern void iget_failed(struct inode *); -extern void end_writeback(struct inode *); +extern void clear_inode(struct inode *); extern void __destroy_inode(struct inode *); extern struct inode *new_inode_pseudo(struct super_block *sb); extern struct inode *new_inode(struct super_block *sb); diff --git a/ipc/mqueue.c b/ipc/mqueue.c index 28bd64d..0032d9c 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -249,7 +249,7 @@ static void mqueue_evict_inode(struct inode *inode) int i; struct ipc_namespace *ipc_ns; - end_writeback(inode); + clear_inode(inode); if (S_ISDIR(inode->i_mode)) return; diff --git a/mm/shmem.c b/mm/shmem.c index f99ff3e..68412fa 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -597,7 +597,7 @@ static void shmem_evict_inode(struct inode *inode) } BUG_ON(inode->i_blocks); shmem_free_inode(inode->i_sb); - end_writeback(inode); + clear_inode(inode); } /* -- cgit v0.10.2 From 169ebd90131b2ffca74bb2dbe7eeacd39fb83714 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 3 May 2012 14:48:03 +0200 Subject: writeback: Avoid iput() from flusher thread Doing iput() from flusher thread (writeback_sb_inodes()) can create problems because iput() can do a lot of work - for example truncate the inode if it's the last iput on unlinked file. Some filesystems depend on flusher thread progressing (e.g. because they need to flush delay allocated blocks to reduce allocation uncertainty) and so flusher thread doing truncate creates interesting dependencies and possibilities for deadlocks. We get rid of iput() in flusher thread by using the fact that I_SYNC inode flag effectively pins the inode in memory. So if we take care to either hold i_lock or have I_SYNC set, we can get away without taking inode reference in writeback_sb_inodes(). As a side effect of these changes, we also fix possible use-after-free in wb_writeback() because inode_wait_for_writeback() call could try to reacquire i_lock on the inode that was already free. Signed-off-by: Jan Kara Signed-off-by: Fengguang Wu diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 5f2c682..8d2fb8c 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -326,9 +326,12 @@ static int write_inode(struct inode *inode, struct writeback_control *wbc) } /* - * Wait for writeback on an inode to complete. + * Wait for writeback on an inode to complete. Called with i_lock held. + * Caller must make sure inode cannot go away when we drop i_lock. */ -static void inode_wait_for_writeback(struct inode *inode) +static void __inode_wait_for_writeback(struct inode *inode) + __releases(inode->i_lock) + __acquires(inode->i_lock) { DEFINE_WAIT_BIT(wq, &inode->i_state, __I_SYNC); wait_queue_head_t *wqh; @@ -342,6 +345,36 @@ static void inode_wait_for_writeback(struct inode *inode) } /* + * Wait for writeback on an inode to complete. Caller must have inode pinned. + */ +void inode_wait_for_writeback(struct inode *inode) +{ + spin_lock(&inode->i_lock); + __inode_wait_for_writeback(inode); + spin_unlock(&inode->i_lock); +} + +/* + * Sleep until I_SYNC is cleared. This function must be called with i_lock + * held and drops it. It is aimed for callers not holding any inode reference + * so once i_lock is dropped, inode can go away. + */ +static void inode_sleep_on_writeback(struct inode *inode) + __releases(inode->i_lock) +{ + DEFINE_WAIT(wait); + wait_queue_head_t *wqh = bit_waitqueue(&inode->i_state, __I_SYNC); + int sleep; + + prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE); + sleep = inode->i_state & I_SYNC; + spin_unlock(&inode->i_lock); + if (sleep) + schedule(); + finish_wait(wqh, &wait); +} + +/* * Find proper writeback list for the inode depending on its current state and * possibly also change of its state while we were doing writeback. Here we * handle things such as livelock prevention or fairness of writeback among @@ -479,9 +512,11 @@ writeback_single_inode(struct inode *inode, struct bdi_writeback *wb, if (wbc->sync_mode != WB_SYNC_ALL) goto out; /* - * It's a data-integrity sync. We must wait. + * It's a data-integrity sync. We must wait. Since callers hold + * inode reference or inode has I_WILL_FREE set, it cannot go + * away under us. */ - inode_wait_for_writeback(inode); + __inode_wait_for_writeback(inode); } WARN_ON(inode->i_state & I_SYNC); /* @@ -620,20 +655,28 @@ static long writeback_sb_inodes(struct super_block *sb, } spin_unlock(&wb->list_lock); - __iget(inode); /* * We already requeued the inode if it had I_SYNC set and we * are doing WB_SYNC_NONE writeback. So this catches only the * WB_SYNC_ALL case. */ - if (inode->i_state & I_SYNC) - inode_wait_for_writeback(inode); + if (inode->i_state & I_SYNC) { + /* Wait for I_SYNC. This function drops i_lock... */ + inode_sleep_on_writeback(inode); + /* Inode may be gone, start again */ + continue; + } inode->i_state |= I_SYNC; spin_unlock(&inode->i_lock); + write_chunk = writeback_chunk_size(wb->bdi, work); wbc.nr_to_write = write_chunk; wbc.pages_skipped = 0; + /* + * We use I_SYNC to pin the inode in memory. While it is set + * evict_inode() will wait so the inode cannot be freed. + */ __writeback_single_inode(inode, wb, &wbc); work->nr_pages -= write_chunk - wbc.nr_to_write; @@ -645,10 +688,7 @@ static long writeback_sb_inodes(struct super_block *sb, requeue_inode(inode, wb, &wbc); inode_sync_complete(inode); spin_unlock(&inode->i_lock); - spin_unlock(&wb->list_lock); - iput(inode); - cond_resched(); - spin_lock(&wb->list_lock); + cond_resched_lock(&wb->list_lock); /* * bail out to wb_writeback() often enough to check * background threshold and other termination conditions. @@ -843,8 +883,8 @@ static long wb_writeback(struct bdi_writeback *wb, inode = wb_inode(wb->b_more_io.prev); spin_lock(&inode->i_lock); spin_unlock(&wb->list_lock); - inode_wait_for_writeback(inode); - spin_unlock(&inode->i_lock); + /* This function drops i_lock... */ + inode_sleep_on_writeback(inode); spin_lock(&wb->list_lock); } } diff --git a/fs/inode.c b/fs/inode.c index 02c0fa5..f4e1450 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -530,7 +530,13 @@ static void evict(struct inode *inode) inode_sb_list_del(inode); - inode_sync_wait(inode); + /* + * Wait for flusher thread to be done with the inode so that filesystem + * does not start destroying it while writeback is still running. Since + * the inode has I_FREEING set, flusher thread won't start new work on + * the inode. We just have to wait for running writeback to finish. + */ + inode_wait_for_writeback(inode); if (op->evict_inode) { op->evict_inode(inode); diff --git a/include/linux/fs.h b/include/linux/fs.h index c79316c..1c71e7f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1753,9 +1753,10 @@ struct super_operations { * anew. Other functions will just ignore such inodes, * if appropriate. I_NEW is used for waiting. * - * I_SYNC Synchonized write of dirty inode data. The bits is - * set during data writeback, and cleared with a wakeup - * on the bit address once it is done. + * I_SYNC Writeback of inode is running. The bit is set during + * data writeback, and cleared with a wakeup on the bit + * address once it is done. The bit is also used to pin + * the inode in memory for flusher thread. * * I_REFERENCED Marks the inode as recently references on the LRU list. * diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 3309736..6d0a0fc 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -95,6 +95,7 @@ long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages, enum wb_reason reason); long wb_do_writeback(struct bdi_writeback *wb, int force_wait); void wakeup_flusher_threads(long nr_pages, enum wb_reason reason); +void inode_wait_for_writeback(struct inode *inode); /* writeback.h requires fs.h; it, too, is not included from here. */ static inline void wait_on_inode(struct inode *inode) @@ -102,12 +103,6 @@ static inline void wait_on_inode(struct inode *inode) might_sleep(); wait_on_bit(&inode->i_state, __I_NEW, inode_wait, TASK_UNINTERRUPTIBLE); } -static inline void inode_sync_wait(struct inode *inode) -{ - might_sleep(); - wait_on_bit(&inode->i_state, __I_SYNC, inode_wait, - TASK_UNINTERRUPTIBLE); -} /* -- cgit v0.10.2 From 57c22e5f35aa4b9b2fe11f73f3e62bbf9ef36190 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 2 May 2012 17:55:56 +0300 Subject: KVM: fix cpuid eax for KVM leaf cpuid eax should return the max leaf so that guests can find out the valid range. This matches Xen et al. Update documentation to match. Tested with -cpu host. Signed-off-by: Michael S. Tsirkin Signed-off-by: Avi Kivity diff --git a/Documentation/virtual/kvm/cpuid.txt b/Documentation/virtual/kvm/cpuid.txt index 8820685..83afe65 100644 --- a/Documentation/virtual/kvm/cpuid.txt +++ b/Documentation/virtual/kvm/cpuid.txt @@ -10,11 +10,15 @@ a guest. KVM cpuid functions are: function: KVM_CPUID_SIGNATURE (0x40000000) -returns : eax = 0, +returns : eax = 0x40000001, ebx = 0x4b4d564b, ecx = 0x564b4d56, edx = 0x4d. Note that this value in ebx, ecx and edx corresponds to the string "KVMKVMKVM". +The value in eax corresponds to the maximum cpuid function present in this leaf, +and will be updated if more functions are added in the future. +Note also that old hosts set eax value to 0x0. This should +be interpreted as if the value was 0x40000001. This function queries the presence of KVM cpuid leafs. diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index c2134b8..7df1c6d 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -398,7 +398,7 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, case KVM_CPUID_SIGNATURE: { char signature[12] = "KVMKVMKVM\0\0"; u32 *sigptr = (u32 *)signature; - entry->eax = 0; + entry->eax = KVM_CPUID_FEATURES; entry->ebx = sigptr[0]; entry->ecx = sigptr[1]; entry->edx = sigptr[2]; -- cgit v0.10.2 From 9b72d3b07dd99ac8ab2b84de5004a295af460536 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Mon, 30 Apr 2012 14:45:49 +0300 Subject: KVM guest: make kvm_para_available() check hypervisor bit reading cpuid leaf This cpuid range does not exist on real HW and Intel spec says that "Information returned for highest basic information leaf" will be returned. Not very well defined. Signed-off-by: Gleb Natapov Signed-off-by: Avi Kivity diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h index 99c4bbe..a7a7a94 100644 --- a/arch/x86/include/asm/kvm_para.h +++ b/arch/x86/include/asm/kvm_para.h @@ -178,14 +178,16 @@ static inline int kvm_para_available(void) unsigned int eax, ebx, ecx, edx; char signature[13]; - cpuid(KVM_CPUID_SIGNATURE, &eax, &ebx, &ecx, &edx); - memcpy(signature + 0, &ebx, 4); - memcpy(signature + 4, &ecx, 4); - memcpy(signature + 8, &edx, 4); - signature[12] = 0; - - if (strcmp(signature, "KVMKVMKVM") == 0) - return 1; + if (cpu_has_hypervisor) { + cpuid(KVM_CPUID_SIGNATURE, &eax, &ebx, &ecx, &edx); + memcpy(signature + 0, &ebx, 4); + memcpy(signature + 4, &ecx, 4); + memcpy(signature + 8, &edx, 4); + signature[12] = 0; + + if (strcmp(signature, "KVMKVMKVM") == 0) + return 1; + } return 0; } -- cgit v0.10.2 From 1c2545be05f436523cabc54087c6a60ea10110d3 Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Mon, 30 Apr 2012 17:46:31 +0900 Subject: KVM: x86 emulator: Move ModRM flags for groups to top level opcode tables Needed for the following patch which simplifies ModRM fetching code. Signed-off-by: Takuya Yoshikawa Signed-off-by: Avi Kivity diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 0d151e2..8d2c3d0 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -3359,8 +3359,8 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt) .check_perm = (_p) } #define N D(0) #define EXT(_f, _e) { .flags = ((_f) | RMExt), .u.group = (_e) } -#define G(_f, _g) { .flags = ((_f) | Group), .u.group = (_g) } -#define GD(_f, _g) { .flags = ((_f) | GroupDual), .u.gdual = (_g) } +#define G(_f, _g) { .flags = ((_f) | Group | ModRM), .u.group = (_g) } +#define GD(_f, _g) { .flags = ((_f) | GroupDual | ModRM), .u.gdual = (_g) } #define I(_f, _e) { .flags = (_f), .u.execute = (_e) } #define II(_f, _e, _i) \ { .flags = (_f), .u.execute = (_e), .intercept = x86_intercept_##_i } @@ -3380,25 +3380,25 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt) I2bv(((_f) & ~Lock) | DstAcc | SrcImm, _e) static struct opcode group7_rm1[] = { - DI(SrcNone | ModRM | Priv, monitor), - DI(SrcNone | ModRM | Priv, mwait), + DI(SrcNone | Priv, monitor), + DI(SrcNone | Priv, mwait), N, N, N, N, N, N, }; static struct opcode group7_rm3[] = { - DIP(SrcNone | ModRM | Prot | Priv, vmrun, check_svme_pa), - II(SrcNone | ModRM | Prot | VendorSpecific, em_vmmcall, vmmcall), - DIP(SrcNone | ModRM | Prot | Priv, vmload, check_svme_pa), - DIP(SrcNone | ModRM | Prot | Priv, vmsave, check_svme_pa), - DIP(SrcNone | ModRM | Prot | Priv, stgi, check_svme), - DIP(SrcNone | ModRM | Prot | Priv, clgi, check_svme), - DIP(SrcNone | ModRM | Prot | Priv, skinit, check_svme), - DIP(SrcNone | ModRM | Prot | Priv, invlpga, check_svme), + DIP(SrcNone | Prot | Priv, vmrun, check_svme_pa), + II(SrcNone | Prot | VendorSpecific, em_vmmcall, vmmcall), + DIP(SrcNone | Prot | Priv, vmload, check_svme_pa), + DIP(SrcNone | Prot | Priv, vmsave, check_svme_pa), + DIP(SrcNone | Prot | Priv, stgi, check_svme), + DIP(SrcNone | Prot | Priv, clgi, check_svme), + DIP(SrcNone | Prot | Priv, skinit, check_svme), + DIP(SrcNone | Prot | Priv, invlpga, check_svme), }; static struct opcode group7_rm7[] = { N, - DIP(SrcNone | ModRM, rdtscp, check_rdtsc), + DIP(SrcNone, rdtscp, check_rdtsc), N, N, N, N, N, N, }; @@ -3414,76 +3414,77 @@ static struct opcode group1[] = { }; static struct opcode group1A[] = { - I(DstMem | SrcNone | ModRM | Mov | Stack, em_pop), N, N, N, N, N, N, N, + I(DstMem | SrcNone | Mov | Stack, em_pop), N, N, N, N, N, N, N, }; static struct opcode group3[] = { - I(DstMem | SrcImm | ModRM, em_test), - I(DstMem | SrcImm | ModRM, em_test), - I(DstMem | SrcNone | ModRM | Lock, em_not), - I(DstMem | SrcNone | ModRM | Lock, em_neg), - I(SrcMem | ModRM, em_mul_ex), - I(SrcMem | ModRM, em_imul_ex), - I(SrcMem | ModRM, em_div_ex), - I(SrcMem | ModRM, em_idiv_ex), + I(DstMem | SrcImm, em_test), + I(DstMem | SrcImm, em_test), + I(DstMem | SrcNone | Lock, em_not), + I(DstMem | SrcNone | Lock, em_neg), + I(SrcMem, em_mul_ex), + I(SrcMem, em_imul_ex), + I(SrcMem, em_div_ex), + I(SrcMem, em_idiv_ex), }; static struct opcode group4[] = { - I(ByteOp | DstMem | SrcNone | ModRM | Lock, em_grp45), - I(ByteOp | DstMem | SrcNone | ModRM | Lock, em_grp45), + I(ByteOp | DstMem | SrcNone | Lock, em_grp45), + I(ByteOp | DstMem | SrcNone | Lock, em_grp45), N, N, N, N, N, N, }; static struct opcode group5[] = { - I(DstMem | SrcNone | ModRM | Lock, em_grp45), - I(DstMem | SrcNone | ModRM | Lock, em_grp45), - I(SrcMem | ModRM | Stack, em_grp45), - I(SrcMemFAddr | ModRM | ImplicitOps | Stack, em_call_far), - I(SrcMem | ModRM | Stack, em_grp45), - I(SrcMemFAddr | ModRM | ImplicitOps, em_grp45), - I(SrcMem | ModRM | Stack, em_grp45), N, + I(DstMem | SrcNone | Lock, em_grp45), + I(DstMem | SrcNone | Lock, em_grp45), + I(SrcMem | Stack, em_grp45), + I(SrcMemFAddr | ImplicitOps | Stack, em_call_far), + I(SrcMem | Stack, em_grp45), + I(SrcMemFAddr | ImplicitOps, em_grp45), + I(SrcMem | Stack, em_grp45), N, }; static struct opcode group6[] = { - DI(ModRM | Prot, sldt), - DI(ModRM | Prot, str), - DI(ModRM | Prot | Priv, lldt), - DI(ModRM | Prot | Priv, ltr), + DI(Prot, sldt), + DI(Prot, str), + DI(Prot | Priv, lldt), + DI(Prot | Priv, ltr), N, N, N, N, }; static struct group_dual group7 = { { - DI(ModRM | Mov | DstMem | Priv, sgdt), - DI(ModRM | Mov | DstMem | Priv, sidt), - II(ModRM | SrcMem | Priv, em_lgdt, lgdt), - II(ModRM | SrcMem | Priv, em_lidt, lidt), - II(SrcNone | ModRM | DstMem | Mov, em_smsw, smsw), N, - II(SrcMem16 | ModRM | Mov | Priv, em_lmsw, lmsw), - II(SrcMem | ModRM | ByteOp | Priv | NoAccess, em_invlpg, invlpg), + DI(Mov | DstMem | Priv, sgdt), + DI(Mov | DstMem | Priv, sidt), + II(SrcMem | Priv, em_lgdt, lgdt), + II(SrcMem | Priv, em_lidt, lidt), + II(SrcNone | DstMem | Mov, em_smsw, smsw), N, + II(SrcMem16 | Mov | Priv, em_lmsw, lmsw), + II(SrcMem | ByteOp | Priv | NoAccess, em_invlpg, invlpg), }, { - I(SrcNone | ModRM | Priv | VendorSpecific, em_vmcall), + I(SrcNone | Priv | VendorSpecific, em_vmcall), EXT(0, group7_rm1), N, EXT(0, group7_rm3), - II(SrcNone | ModRM | DstMem | Mov, em_smsw, smsw), N, - II(SrcMem16 | ModRM | Mov | Priv, em_lmsw, lmsw), EXT(0, group7_rm7), + II(SrcNone | DstMem | Mov, em_smsw, smsw), N, + II(SrcMem16 | Mov | Priv, em_lmsw, lmsw), + EXT(0, group7_rm7), } }; static struct opcode group8[] = { N, N, N, N, - I(DstMem | SrcImmByte | ModRM, em_bt), - I(DstMem | SrcImmByte | ModRM | Lock | PageTable, em_bts), - I(DstMem | SrcImmByte | ModRM | Lock, em_btr), - I(DstMem | SrcImmByte | ModRM | Lock | PageTable, em_btc), + I(DstMem | SrcImmByte, em_bt), + I(DstMem | SrcImmByte | Lock | PageTable, em_bts), + I(DstMem | SrcImmByte | Lock, em_btr), + I(DstMem | SrcImmByte | Lock | PageTable, em_btc), }; static struct group_dual group9 = { { - N, I(DstMem64 | ModRM | Lock | PageTable, em_cmpxchg8b), N, N, N, N, N, N, + N, I(DstMem64 | Lock | PageTable, em_cmpxchg8b), N, N, N, N, N, N, }, { N, N, N, N, N, N, N, N, } }; static struct opcode group11[] = { - I(DstMem | SrcImm | ModRM | Mov | PageTable, em_mov), + I(DstMem | SrcImm | Mov | PageTable, em_mov), X7(D(Undefined)), }; @@ -3541,10 +3542,10 @@ static struct opcode opcode_table[256] = { /* 0x70 - 0x7F */ X16(D(SrcImmByte)), /* 0x80 - 0x87 */ - G(ByteOp | DstMem | SrcImm | ModRM | Group, group1), - G(DstMem | SrcImm | ModRM | Group, group1), - G(ByteOp | DstMem | SrcImm | ModRM | No64 | Group, group1), - G(DstMem | SrcImmByte | ModRM | Group, group1), + G(ByteOp | DstMem | SrcImm, group1), + G(DstMem | SrcImm, group1), + G(ByteOp | DstMem | SrcImm | No64, group1), + G(DstMem | SrcImmByte, group1), I2bv(DstMem | SrcReg | ModRM, em_test), I2bv(DstMem | SrcReg | ModRM | Lock | PageTable, em_xchg), /* 0x88 - 0x8F */ -- cgit v0.10.2 From 9f4260e73ac43aaa91eb5de95950e1de7002f467 Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Mon, 30 Apr 2012 17:48:25 +0900 Subject: KVM: x86 emulator: Avoid pushing back ModRM byte fetched for group decoding Although ModRM byte is fetched for group decoding, it is soon pushed back to make decode_modrm() fetch it later again. Now that ModRM flag can be found in the top level opcode tables, fetch ModRM byte before group decoding to make the code simpler. Signed-off-by: Takuya Yoshikawa Signed-off-by: Avi Kivity diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 8d2c3d0..7fd2576 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -972,7 +972,6 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt, ctxt->modrm_rm = base_reg = (ctxt->rex_prefix & 1) << 3; /* REG.B */ } - ctxt->modrm = insn_fetch(u8, ctxt); ctxt->modrm_mod |= (ctxt->modrm & 0xc0) >> 6; ctxt->modrm_reg |= (ctxt->modrm & 0x38) >> 3; ctxt->modrm_rm |= (ctxt->modrm & 0x07); @@ -3976,17 +3975,16 @@ done_prefixes: } ctxt->d = opcode.flags; + if (ctxt->d & ModRM) + ctxt->modrm = insn_fetch(u8, ctxt); + while (ctxt->d & GroupMask) { switch (ctxt->d & GroupMask) { case Group: - ctxt->modrm = insn_fetch(u8, ctxt); - --ctxt->_eip; goffset = (ctxt->modrm >> 3) & 7; opcode = opcode.u.group[goffset]; break; case GroupDual: - ctxt->modrm = insn_fetch(u8, ctxt); - --ctxt->_eip; goffset = (ctxt->modrm >> 3) & 7; if ((ctxt->modrm >> 6) == 3) opcode = opcode.u.gdual->mod3[goffset]; -- cgit v0.10.2 From cc902ad4f2b7cd3dd2cc268c63f6fb99fb1abf0f Mon Sep 17 00:00:00 2001 From: Bharat Bhushan Date: Thu, 22 Mar 2012 18:39:11 +0000 Subject: KVM: Use minimum and maximum address mapped by TLB1 Keep track of minimum and maximum address mapped by tlb1. This helps in TLBMISS handling in KVM to quick check whether the address lies in mapped range. If address does not lies in this range then no need to look in each tlb1 entry of tlb1 array. Signed-off-by: Bharat Bhushan Signed-off-by: Alexander Graf diff --git a/arch/powerpc/kvm/e500.h b/arch/powerpc/kvm/e500.h index 7967f3f..aa8b814 100644 --- a/arch/powerpc/kvm/e500.h +++ b/arch/powerpc/kvm/e500.h @@ -89,6 +89,10 @@ struct kvmppc_vcpu_e500 { u64 *g2h_tlb1_map; unsigned int *h2g_tlb1_rmap; + /* Minimum and maximum address mapped my TLB1 */ + unsigned long tlb1_min_eaddr; + unsigned long tlb1_max_eaddr; + #ifdef CONFIG_KVM_E500V2 u32 pid[E500_PID_NUM]; diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c index e05232b..c510fc9 100644 --- a/arch/powerpc/kvm/e500_tlb.c +++ b/arch/powerpc/kvm/e500_tlb.c @@ -261,6 +261,9 @@ static int kvmppc_e500_tlb_index(struct kvmppc_vcpu_e500 *vcpu_e500, set_base = gtlb0_set_base(vcpu_e500, eaddr); size = vcpu_e500->gtlb_params[0].ways; } else { + if (eaddr < vcpu_e500->tlb1_min_eaddr || + eaddr > vcpu_e500->tlb1_max_eaddr) + return -1; set_base = 0; } @@ -583,6 +586,65 @@ static int kvmppc_e500_tlb1_map(struct kvmppc_vcpu_e500 *vcpu_e500, return victim; } +static void kvmppc_recalc_tlb1map_range(struct kvmppc_vcpu_e500 *vcpu_e500) +{ + int size = vcpu_e500->gtlb_params[1].entries; + unsigned int offset; + gva_t eaddr; + int i; + + vcpu_e500->tlb1_min_eaddr = ~0UL; + vcpu_e500->tlb1_max_eaddr = 0; + offset = vcpu_e500->gtlb_offset[1]; + + for (i = 0; i < size; i++) { + struct kvm_book3e_206_tlb_entry *tlbe = + &vcpu_e500->gtlb_arch[offset + i]; + + if (!get_tlb_v(tlbe)) + continue; + + eaddr = get_tlb_eaddr(tlbe); + vcpu_e500->tlb1_min_eaddr = + min(vcpu_e500->tlb1_min_eaddr, eaddr); + + eaddr = get_tlb_end(tlbe); + vcpu_e500->tlb1_max_eaddr = + max(vcpu_e500->tlb1_max_eaddr, eaddr); + } +} + +static int kvmppc_need_recalc_tlb1map_range(struct kvmppc_vcpu_e500 *vcpu_e500, + struct kvm_book3e_206_tlb_entry *gtlbe) +{ + unsigned long start, end, size; + + size = get_tlb_bytes(gtlbe); + start = get_tlb_eaddr(gtlbe) & ~(size - 1); + end = start + size - 1; + + return vcpu_e500->tlb1_min_eaddr == start || + vcpu_e500->tlb1_max_eaddr == end; +} + +/* This function is supposed to be called for a adding a new valid tlb entry */ +static void kvmppc_set_tlb1map_range(struct kvm_vcpu *vcpu, + struct kvm_book3e_206_tlb_entry *gtlbe) +{ + unsigned long start, end, size; + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + + if (!get_tlb_v(gtlbe)) + return; + + size = get_tlb_bytes(gtlbe); + start = get_tlb_eaddr(gtlbe) & ~(size - 1); + end = start + size - 1; + + vcpu_e500->tlb1_min_eaddr = min(vcpu_e500->tlb1_min_eaddr, start); + vcpu_e500->tlb1_max_eaddr = max(vcpu_e500->tlb1_max_eaddr, end); +} + static inline int kvmppc_e500_gtlbe_invalidate( struct kvmppc_vcpu_e500 *vcpu_e500, int tlbsel, int esel) @@ -593,6 +655,9 @@ static inline int kvmppc_e500_gtlbe_invalidate( if (unlikely(get_tlb_iprot(gtlbe))) return -1; + if (tlbsel == 1 && kvmppc_need_recalc_tlb1map_range(vcpu_e500, gtlbe)) + kvmppc_recalc_tlb1map_range(vcpu_e500); + gtlbe->mas1 = 0; return 0; @@ -792,14 +857,19 @@ int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu) struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); struct kvm_book3e_206_tlb_entry *gtlbe, stlbe; int tlbsel, esel, stlbsel, sesel; + int recal = 0; tlbsel = get_tlb_tlbsel(vcpu); esel = get_tlb_esel(vcpu, tlbsel); gtlbe = get_entry(vcpu_e500, tlbsel, esel); - if (get_tlb_v(gtlbe)) + if (get_tlb_v(gtlbe)) { inval_gtlbe_on_host(vcpu_e500, tlbsel, esel); + if ((tlbsel == 1) && + kvmppc_need_recalc_tlb1map_range(vcpu_e500, gtlbe)) + recal = 1; + } gtlbe->mas1 = vcpu->arch.shared->mas1; gtlbe->mas2 = vcpu->arch.shared->mas2; @@ -808,6 +878,18 @@ int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu) trace_kvm_booke206_gtlb_write(vcpu->arch.shared->mas0, gtlbe->mas1, gtlbe->mas2, gtlbe->mas7_3); + if (tlbsel == 1) { + /* + * If a valid tlb1 entry is overwritten then recalculate the + * min/max TLB1 map address range otherwise no need to look + * in tlb1 array. + */ + if (recal) + kvmppc_recalc_tlb1map_range(vcpu_e500); + else + kvmppc_set_tlb1map_range(vcpu, gtlbe); + } + /* Invalidate shadow mappings for the about-to-be-clobbered TLBE. */ if (tlbe_is_host_safe(vcpu, gtlbe)) { u64 eaddr; @@ -1145,6 +1227,7 @@ int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu, vcpu_e500->gtlb_params[1].ways = params.tlb_sizes[1]; vcpu_e500->gtlb_params[1].sets = 1; + kvmppc_recalc_tlb1map_range(vcpu_e500); return 0; err_put_page: @@ -1163,7 +1246,7 @@ int kvm_vcpu_ioctl_dirty_tlb(struct kvm_vcpu *vcpu, struct kvm_dirty_tlb *dirty) { struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); - + kvmppc_recalc_tlb1map_range(vcpu_e500); clear_tlb_refs(vcpu_e500); return 0; } @@ -1272,6 +1355,7 @@ int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500) vcpu->arch.tlbcfg[1] |= vcpu_e500->gtlb_params[1].ways << TLBnCFG_ASSOC_SHIFT; + kvmppc_recalc_tlb1map_range(vcpu_e500); return 0; err: -- cgit v0.10.2 From 6e35994d1f6831af1e5577e28c363c9137d7d597 Mon Sep 17 00:00:00 2001 From: Bharat Bhushan Date: Wed, 18 Apr 2012 06:01:19 +0000 Subject: KVM: PPC: Use clockevent multiplier and shifter for decrementer Time for which the hrtimer is started for decrementer emulation is calculated using tb_ticks_per_usec. While hrtimer uses the clockevent for DEC reprogramming (if needed) and which calculate timebase ticks using the multiplier and shifter mechanism implemented within clockevent layer. It was observed that this conversion (timebase->time->timebase) are not correct because the mechanism are not consistent. In our setup it adds 2% jitter. With this patch clockevent multiplier and shifter mechanism are used when starting hrtimer for decrementer emulation. Now the jitter is < 0.5%. Signed-off-by: Bharat Bhushan Signed-off-by: Alexander Graf diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h index 2136f58..3b4b4a8 100644 --- a/arch/powerpc/include/asm/time.h +++ b/arch/powerpc/include/asm/time.h @@ -23,6 +23,7 @@ extern unsigned long tb_ticks_per_jiffy; extern unsigned long tb_ticks_per_usec; extern unsigned long tb_ticks_per_sec; +extern struct clock_event_device decrementer_clockevent; struct rtc_time; extern void to_tm(int tim, struct rtc_time * tm); diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 2c42cd7..99a995c 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -100,7 +100,7 @@ static int decrementer_set_next_event(unsigned long evt, static void decrementer_set_mode(enum clock_event_mode mode, struct clock_event_device *dev); -static struct clock_event_device decrementer_clockevent = { +struct clock_event_device decrementer_clockevent = { .name = "decrementer", .rating = 200, .irq = 0, @@ -108,6 +108,7 @@ static struct clock_event_device decrementer_clockevent = { .set_mode = decrementer_set_mode, .features = CLOCK_EVT_FEAT_ONESHOT, }; +EXPORT_SYMBOL(decrementer_clockevent); DEFINE_PER_CPU(u64, decrementers_next_tb); static DEFINE_PER_CPU(struct clock_event_device, decrementers); diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c index afc9154..b5872f6 100644 --- a/arch/powerpc/kvm/emulate.c +++ b/arch/powerpc/kvm/emulate.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -104,8 +105,12 @@ void kvmppc_emulate_dec(struct kvm_vcpu *vcpu) */ dec_time = vcpu->arch.dec; - dec_time *= 1000; - do_div(dec_time, tb_ticks_per_usec); + /* + * Guest timebase ticks at the same frequency as host decrementer. + * So use the host decrementer calculations for decrementer emulation. + */ + dec_time = dec_time << decrementer_clockevent.shift; + do_div(dec_time, decrementer_clockevent.mult); dec_nsec = do_div(dec_time, NSEC_PER_SEC); hrtimer_start(&vcpu->arch.dec_timer, ktime_set(dec_time, dec_nsec), HRTIMER_MODE_REL); -- cgit v0.10.2 From 185e4188dab6456409cad66c579501dd89487188 Mon Sep 17 00:00:00 2001 From: Varun Sethi Date: Wed, 25 Apr 2012 01:26:43 +0000 Subject: KVM: PPC: bookehv: Use a Macro for saving/restoring guest registers to/from their 64 bit copies. Introduced PPC_STD/PPC_LD macros for saving/restoring guest registers to/from their 64 bit copies. Signed-off-by: Varun Sethi Signed-off-by: Alexander Graf diff --git a/arch/powerpc/include/asm/kvm_asm.h b/arch/powerpc/include/asm/kvm_asm.h index 0978152..7d4018d 100644 --- a/arch/powerpc/include/asm/kvm_asm.h +++ b/arch/powerpc/include/asm/kvm_asm.h @@ -20,6 +20,14 @@ #ifndef __POWERPC_KVM_ASM_H__ #define __POWERPC_KVM_ASM_H__ +#ifdef CONFIG_64BIT +#define PPC_STD(sreg, offset, areg) std sreg, (offset)(areg) +#define PPC_LD(treg, offset, areg) ld treg, (offset)(areg) +#else +#define PPC_STD(sreg, offset, areg) stw sreg, (offset+4)(areg) +#define PPC_LD(treg, offset, areg) lwz treg, (offset+4)(areg) +#endif + /* IVPR must be 64KiB-aligned. */ #define VCPU_SIZE_ORDER 4 #define VCPU_SIZE_LOG (VCPU_SIZE_ORDER + 12) diff --git a/arch/powerpc/kvm/bookehv_interrupts.S b/arch/powerpc/kvm/bookehv_interrupts.S index 909e96e..41d3485 100644 --- a/arch/powerpc/kvm/bookehv_interrupts.S +++ b/arch/powerpc/kvm/bookehv_interrupts.S @@ -93,11 +93,7 @@ #endif oris r8, r6, MSR_CE@h -#ifdef CONFIG_64BIT - std r6, (VCPU_SHARED_MSR)(r11) -#else - stw r6, (VCPU_SHARED_MSR + 4)(r11) -#endif + PPC_STD(r6, VCPU_SHARED_MSR, r11) ori r8, r8, MSR_ME | MSR_RI PPC_STL r5, VCPU_PC(r4) @@ -335,11 +331,7 @@ _GLOBAL(kvmppc_resume_host) stw r5, VCPU_SHARED_MAS0(r11) mfspr r7, SPRN_MAS2 stw r6, VCPU_SHARED_MAS1(r11) -#ifdef CONFIG_64BIT - std r7, (VCPU_SHARED_MAS2)(r11) -#else - stw r7, (VCPU_SHARED_MAS2 + 4)(r11) -#endif + PPC_STD(r7, VCPU_SHARED_MAS2, r11) mfspr r5, SPRN_MAS3 mfspr r6, SPRN_MAS4 stw r5, VCPU_SHARED_MAS7_3+4(r11) @@ -527,11 +519,7 @@ lightweight_exit: stw r3, VCPU_HOST_MAS6(r4) lwz r3, VCPU_SHARED_MAS0(r11) lwz r5, VCPU_SHARED_MAS1(r11) -#ifdef CONFIG_64BIT - ld r6, (VCPU_SHARED_MAS2)(r11) -#else - lwz r6, (VCPU_SHARED_MAS2 + 4)(r11) -#endif + PPC_LD(r6, VCPU_SHARED_MAS2, r11) lwz r7, VCPU_SHARED_MAS7_3+4(r11) lwz r8, VCPU_SHARED_MAS4(r11) mtspr SPRN_MAS0, r3 @@ -565,11 +553,7 @@ lightweight_exit: PPC_LL r6, VCPU_CTR(r4) PPC_LL r7, VCPU_CR(r4) PPC_LL r8, VCPU_PC(r4) -#ifdef CONFIG_64BIT - ld r9, (VCPU_SHARED_MSR)(r11) -#else - lwz r9, (VCPU_SHARED_MSR + 4)(r11) -#endif + PPC_LD(r9, VCPU_SHARED_MSR, r11) PPC_LL r0, VCPU_GPR(r0)(r4) PPC_LL r1, VCPU_GPR(r1)(r4) PPC_LL r2, VCPU_GPR(r2)(r4) -- cgit v0.10.2 From 3d4c6826ed2a28e69e8ee14f1d58c4c8622f04b3 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Wed, 25 Apr 2012 13:48:54 +0200 Subject: KVM: PPC: Restrict PPC_[L|ST]D macro to asm code We only want asm code macros to be accessible from asm code, so #ifdef it depending on it. Signed-off-by: Alexander Graf diff --git a/arch/powerpc/include/asm/kvm_asm.h b/arch/powerpc/include/asm/kvm_asm.h index 7d4018d..76fdcfe 100644 --- a/arch/powerpc/include/asm/kvm_asm.h +++ b/arch/powerpc/include/asm/kvm_asm.h @@ -20,6 +20,7 @@ #ifndef __POWERPC_KVM_ASM_H__ #define __POWERPC_KVM_ASM_H__ +#ifdef __ASSEMBLY__ #ifdef CONFIG_64BIT #define PPC_STD(sreg, offset, areg) std sreg, (offset)(areg) #define PPC_LD(treg, offset, areg) ld treg, (offset)(areg) @@ -27,6 +28,7 @@ #define PPC_STD(sreg, offset, areg) stw sreg, (offset+4)(areg) #define PPC_LD(treg, offset, areg) lwz treg, (offset+4)(areg) #endif +#endif /* IVPR must be 64KiB-aligned. */ #define VCPU_SIZE_ORDER 4 -- cgit v0.10.2 From 30124906db8598255fba32c8bf0adb7e8f1503ab Mon Sep 17 00:00:00 2001 From: Varun Sethi Date: Wed, 25 Apr 2012 01:27:34 +0000 Subject: KVM: PPC: booke(hv): Fix save/restore of guest accessible SPRGs. For Guest accessible SPRGs 4-7, save/restore must be handled differently for 64bit and non-64 bit case. Use the PPC_STD/PPC_LD macros for saving/restoring to/from these registers. Signed-off-by: Varun Sethi Signed-off-by: Alexander Graf diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S index c8c4b87..8feec2f 100644 --- a/arch/powerpc/kvm/booke_interrupts.S +++ b/arch/powerpc/kvm/booke_interrupts.S @@ -419,13 +419,13 @@ lightweight_exit: * written directly to the shared area, so we * need to reload them here with the guest's values. */ - lwz r3, VCPU_SHARED_SPRG4(r5) + PPC_LD(r3, VCPU_SHARED_SPRG4, r5) mtspr SPRN_SPRG4W, r3 - lwz r3, VCPU_SHARED_SPRG5(r5) + PPC_LD(r3, VCPU_SHARED_SPRG5, r5) mtspr SPRN_SPRG5W, r3 - lwz r3, VCPU_SHARED_SPRG6(r5) + PPC_LD(r3, VCPU_SHARED_SPRG6, r5) mtspr SPRN_SPRG6W, r3 - lwz r3, VCPU_SHARED_SPRG7(r5) + PPC_LD(r3, VCPU_SHARED_SPRG7, r5) mtspr SPRN_SPRG7W, r3 #ifdef CONFIG_KVM_EXIT_TIMING diff --git a/arch/powerpc/kvm/bookehv_interrupts.S b/arch/powerpc/kvm/bookehv_interrupts.S index 41d3485..b7608ac 100644 --- a/arch/powerpc/kvm/bookehv_interrupts.S +++ b/arch/powerpc/kvm/bookehv_interrupts.S @@ -316,13 +316,13 @@ _GLOBAL(kvmppc_resume_host) PPC_STL r5, VCPU_LR(r4) mfspr r7, SPRN_SPRG5 PPC_STL r3, VCPU_VRSAVE(r4) - PPC_STL r6, VCPU_SHARED_SPRG4(r11) + PPC_STD(r6, VCPU_SHARED_SPRG4, r11) mfspr r8, SPRN_SPRG6 - PPC_STL r7, VCPU_SHARED_SPRG5(r11) + PPC_STD(r7, VCPU_SHARED_SPRG5, r11) mfspr r9, SPRN_SPRG7 - PPC_STL r8, VCPU_SHARED_SPRG6(r11) + PPC_STD(r8, VCPU_SHARED_SPRG6, r11) mfxer r3 - PPC_STL r9, VCPU_SHARED_SPRG7(r11) + PPC_STD(r9, VCPU_SHARED_SPRG7, r11) /* save guest MAS registers and restore host mas4 & mas6 */ mfspr r5, SPRN_MAS0 @@ -537,13 +537,13 @@ lightweight_exit: * SPRGs, so we need to reload them here with the guest's values. */ lwz r3, VCPU_VRSAVE(r4) - lwz r5, VCPU_SHARED_SPRG4(r11) + PPC_LD(r5, VCPU_SHARED_SPRG4, r11) mtspr SPRN_VRSAVE, r3 - lwz r6, VCPU_SHARED_SPRG5(r11) + PPC_LD(r6, VCPU_SHARED_SPRG5, r11) mtspr SPRN_SPRG4W, r5 - lwz r7, VCPU_SHARED_SPRG6(r11) + PPC_LD(r7, VCPU_SHARED_SPRG6, r11) mtspr SPRN_SPRG5W, r6 - lwz r8, VCPU_SHARED_SPRG7(r11) + PPC_LD(r8, VCPU_SHARED_SPRG7, r11) mtspr SPRN_SPRG6W, r7 mtspr SPRN_SPRG7W, r8 -- cgit v0.10.2 From 8c2d0be7efb0b92b5e4f89ea4363f3cdc11e2459 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Wed, 25 Apr 2012 14:28:23 +0200 Subject: KVM: PPC: Book3S: PR: Optimize entry path By shuffling a few instructions around we can execute more memory loads in parallel, giving us a small performance boost. With this patch and a simple priviledged SPR access loop guest, I get a speed bump from 2013052 to 2035607 exits per second. Signed-off-by: Alexander Graf diff --git a/arch/powerpc/kvm/book3s_segment.S b/arch/powerpc/kvm/book3s_segment.S index 0676ae2..6bae0a9 100644 --- a/arch/powerpc/kvm/book3s_segment.S +++ b/arch/powerpc/kvm/book3s_segment.S @@ -128,24 +128,25 @@ no_dcbz32_on: /* First clear RI in our current MSR value */ li r0, MSR_RI andc r6, r6, r0 - MTMSR_EERI(r6) - mtsrr0 r9 - mtsrr1 r4 PPC_LL r0, SVCPU_R0(r3) PPC_LL r1, SVCPU_R1(r3) PPC_LL r2, SVCPU_R2(r3) - PPC_LL r4, SVCPU_R4(r3) PPC_LL r5, SVCPU_R5(r3) - PPC_LL r6, SVCPU_R6(r3) PPC_LL r7, SVCPU_R7(r3) PPC_LL r8, SVCPU_R8(r3) - PPC_LL r9, SVCPU_R9(r3) PPC_LL r10, SVCPU_R10(r3) PPC_LL r11, SVCPU_R11(r3) PPC_LL r12, SVCPU_R12(r3) PPC_LL r13, SVCPU_R13(r3) + MTMSR_EERI(r6) + mtsrr0 r9 + mtsrr1 r4 + + PPC_LL r4, SVCPU_R4(r3) + PPC_LL r6, SVCPU_R6(r3) + PPC_LL r9, SVCPU_R9(r3) PPC_LL r3, (SVCPU_R3)(r3) RFI -- cgit v0.10.2 From af415087d2bbbef3cc25cdf371bfb0460cf66b3b Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Wed, 25 Apr 2012 14:29:57 +0200 Subject: KVM: PPC: Book3S: PR: No isync in slbie path While messing around with the SLBs we're running in real mode. The entry to guest space goes through rfid, which is context synchronizing, so there's no need to manually synchronize anything through isync. With this patch and a simple priviledged SPR access loop guest, I get a speed bump from 2035607 to 2181301 exits per second. Signed-off-by: Alexander Graf diff --git a/arch/powerpc/kvm/book3s_64_slb.S b/arch/powerpc/kvm/book3s_64_slb.S index f2e6e48..56b983e 100644 --- a/arch/powerpc/kvm/book3s_64_slb.S +++ b/arch/powerpc/kvm/book3s_64_slb.S @@ -90,8 +90,6 @@ slb_exit_skip_ ## num: or r10, r10, r12 slbie r10 - isync - /* Fill SLB with our shadow */ lbz r12, SVCPU_SLB_MAX(r3) -- cgit v0.10.2 From 518f040c826d569daf260153d4f75c21b6d9979b Mon Sep 17 00:00:00 2001 From: Mihai Caraman Date: Mon, 16 Apr 2012 04:08:54 +0000 Subject: KVM: PPC: bookehv: Use lwz/stw instead of PPC_LL/PPC_STL for 32-bit fields Interrupt code used PPC_LL/PPC_STL macros to load/store some of u32 fields which led to memory overflow on 64-bit. Use lwz/stw instead. Signed-off-by: Mihai Caraman Signed-off-by: Alexander Graf diff --git a/arch/powerpc/kvm/bookehv_interrupts.S b/arch/powerpc/kvm/bookehv_interrupts.S index b7608ac..06750cc 100644 --- a/arch/powerpc/kvm/bookehv_interrupts.S +++ b/arch/powerpc/kvm/bookehv_interrupts.S @@ -87,9 +87,9 @@ mfspr r8, SPRN_TBRL mfspr r9, SPRN_TBRU cmpw r9, r7 - PPC_STL r8, VCPU_TIMING_EXIT_TBL(r4) + stw r8, VCPU_TIMING_EXIT_TBL(r4) bne- 1b - PPC_STL r9, VCPU_TIMING_EXIT_TBU(r4) + stw r9, VCPU_TIMING_EXIT_TBU(r4) #endif oris r8, r6, MSR_CE@h @@ -216,7 +216,7 @@ _GLOBAL(kvmppc_handler_\intno\()_\srr1) PPC_STL r4, VCPU_GPR(r4)(r11) PPC_LL r4, THREAD_NORMSAVE(0)(r10) PPC_STL r5, VCPU_GPR(r5)(r11) - PPC_STL r13, VCPU_CR(r11) + stw r13, VCPU_CR(r11) mfspr r5, \srr0 PPC_STL r3, VCPU_GPR(r10)(r11) PPC_LL r3, THREAD_NORMSAVE(2)(r10) @@ -243,7 +243,7 @@ _GLOBAL(kvmppc_handler_\intno\()_\srr1) PPC_STL r4, VCPU_GPR(r4)(r11) PPC_LL r4, GPR9(r8) PPC_STL r5, VCPU_GPR(r5)(r11) - PPC_STL r9, VCPU_CR(r11) + stw r9, VCPU_CR(r11) mfspr r5, \srr0 PPC_STL r3, VCPU_GPR(r8)(r11) PPC_LL r3, GPR10(r8) @@ -315,7 +315,7 @@ _GLOBAL(kvmppc_resume_host) mfspr r6, SPRN_SPRG4 PPC_STL r5, VCPU_LR(r4) mfspr r7, SPRN_SPRG5 - PPC_STL r3, VCPU_VRSAVE(r4) + stw r3, VCPU_VRSAVE(r4) PPC_STD(r6, VCPU_SHARED_SPRG4, r11) mfspr r8, SPRN_SPRG6 PPC_STD(r7, VCPU_SHARED_SPRG5, r11) @@ -551,7 +551,7 @@ lightweight_exit: PPC_LL r3, VCPU_LR(r4) PPC_LL r5, VCPU_XER(r4) PPC_LL r6, VCPU_CTR(r4) - PPC_LL r7, VCPU_CR(r4) + lwz r7, VCPU_CR(r4) PPC_LL r8, VCPU_PC(r4) PPC_LD(r9, VCPU_SHARED_MSR, r11) PPC_LL r0, VCPU_GPR(r0)(r4) @@ -574,9 +574,9 @@ lightweight_exit: mfspr r9, SPRN_TBRL mfspr r8, SPRN_TBRU cmpw r8, r6 - PPC_STL r9, VCPU_TIMING_LAST_ENTER_TBL(r4) + stw r9, VCPU_TIMING_LAST_ENTER_TBL(r4) bne 1b - PPC_STL r8, VCPU_TIMING_LAST_ENTER_TBU(r4) + stw r8, VCPU_TIMING_LAST_ENTER_TBU(r4) #endif /* -- cgit v0.10.2 From 978b4fae45b3fae803a9f56e2262f01f71b7dbc9 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Fri, 27 Apr 2012 01:00:17 +0200 Subject: KVM: PPC: Fix stbux emulation Stbux writes the address it's operating on to the register specified in ra, not into the data source register. Signed-off-by: Alexander Graf diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c index b5872f6..a27d4dc 100644 --- a/arch/powerpc/kvm/emulate.c +++ b/arch/powerpc/kvm/emulate.c @@ -229,7 +229,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) emulated = kvmppc_handle_store(run, vcpu, kvmppc_get_gpr(vcpu, rs), 1, 1); - kvmppc_set_gpr(vcpu, rs, vcpu->arch.vaddr_accessed); + kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); break; case OP_31_XOP_LHAX: -- cgit v0.10.2 From 11f7d6c2d1b17abf7b91e0f2d43bfe9de0b9e5cf Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Fri, 27 Apr 2012 16:33:35 +0200 Subject: KVM: PPC: Fix PR KVM on POWER7 bare metal When running on a system that is HV capable, some interrupts use HSRR SPRs instead of the normal SRR SPRs. These are also used in the Linux handlers to jump back to code after an interrupt got processed. Unfortunately, in our "jump back to the real host handler after we've done the context switch" code, we were only setting the SRR SPRs, rendering Linux to jump back to some invalid IP after it's processed the interrupt. This fixes random crashes on p7 opal mode with PR KVM for me. Signed-off-by: Alexander Graf diff --git a/arch/powerpc/kvm/book3s_segment.S b/arch/powerpc/kvm/book3s_segment.S index 6bae0a9..8b2fc66 100644 --- a/arch/powerpc/kvm/book3s_segment.S +++ b/arch/powerpc/kvm/book3s_segment.S @@ -198,6 +198,7 @@ kvmppc_interrupt: /* Save guest PC and MSR */ #ifdef CONFIG_PPC64 BEGIN_FTR_SECTION + mr r10, r12 andi. r0,r12,0x2 beq 1f mfspr r3,SPRN_HSRR0 @@ -317,23 +318,17 @@ no_dcbz32_off: * Having set up SRR0/1 with the address where we want * to continue with relocation on (potentially in module * space), we either just go straight there with rfi[d], - * or we jump to an interrupt handler with bctr if there - * is an interrupt to be handled first. In the latter - * case, the rfi[d] at the end of the interrupt handler - * will get us back to where we want to continue. + * or we jump to an interrupt handler if there is an + * interrupt to be handled first. In the latter case, + * the rfi[d] at the end of the interrupt handler will + * get us back to where we want to continue. */ - cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL - beq 1f - cmpwi r12, BOOK3S_INTERRUPT_DECREMENTER - beq 1f - cmpwi r12, BOOK3S_INTERRUPT_PERFMON -1: mtctr r12 - /* Register usage at this point: * * R1 = host R1 * R2 = host R2 + * R10 = raw exit handler id * R12 = exit handler id * R13 = shadow vcpu (32-bit) or PACA (64-bit) * SVCPU.* = guest * @@ -343,12 +338,26 @@ no_dcbz32_off: PPC_LL r6, HSTATE_HOST_MSR(r13) PPC_LL r8, HSTATE_VMHANDLER(r13) - /* Restore host msr -> SRR1 */ +#ifdef CONFIG_PPC64 +BEGIN_FTR_SECTION + andi. r0,r10,0x2 + beq 1f + mtspr SPRN_HSRR1, r6 + mtspr SPRN_HSRR0, r8 +END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) +#endif +1: /* Restore host msr -> SRR1 */ mtsrr1 r6 /* Load highmem handler address */ mtsrr0 r8 /* RFI into the highmem handler, or jump to interrupt handler */ - beqctr + cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL + beqa BOOK3S_INTERRUPT_EXTERNAL + cmpwi r12, BOOK3S_INTERRUPT_DECREMENTER + beqa BOOK3S_INTERRUPT_DECREMENTER + cmpwi r12, BOOK3S_INTERRUPT_PERFMON + beqa BOOK3S_INTERRUPT_PERFMON + RFI kvmppc_handler_trampoline_exit_end: -- cgit v0.10.2 From 3b1d9d7d95e7c62518160edebd92450b58c6d55f Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Mon, 30 Apr 2012 10:56:12 +0200 Subject: KVM: PPC: Book3S: Enable IRQs during exit handling While handling an exit, we should listen for interrupts and make sure to receive them when they arrive, to keep our latencies low. Signed-off-by: Alexander Graf diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index dba282e..d169a0a 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -548,6 +548,9 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, run->exit_reason = KVM_EXIT_UNKNOWN; run->ready_for_interrupt_injection = 1; + /* We get here with MSR.EE=0, so enable it to be a nice citizen */ + __hard_irq_enable(); + trace_kvm_book3s_exit(exit_nr, vcpu); preempt_enable(); kvm_resched(vcpu); -- cgit v0.10.2 From 4444aa5f78eff73a353c8c4784cda2de74dea54b Mon Sep 17 00:00:00 2001 From: Mihai Caraman Date: Mon, 16 Apr 2012 04:08:53 +0000 Subject: KVM: PPC: bookehv: Fix r8/r13 storing in level exception handler Guest r8 register is held in the scratch register and stored correctly, so remove the instruction that clobbers it. Guest r13 was missing from vcpu, store it there. Signed-off-by: Mihai Caraman Signed-off-by: Alexander Graf diff --git a/arch/powerpc/kvm/bookehv_interrupts.S b/arch/powerpc/kvm/bookehv_interrupts.S index 06750cc..6048a00 100644 --- a/arch/powerpc/kvm/bookehv_interrupts.S +++ b/arch/powerpc/kvm/bookehv_interrupts.S @@ -252,10 +252,10 @@ _GLOBAL(kvmppc_handler_\intno\()_\srr1) mfspr r6, \srr1 PPC_LL r4, GPR11(r8) PPC_STL r7, VCPU_GPR(r7)(r11) - PPC_STL r8, VCPU_GPR(r8)(r11) PPC_STL r3, VCPU_GPR(r10)(r11) mfctr r7 PPC_STL r12, VCPU_GPR(r12)(r11) + PPC_STL r13, VCPU_GPR(r13)(r11) PPC_STL r4, VCPU_GPR(r11)(r11) PPC_STL r7, VCPU_CTR(r11) mr r4, r11 -- cgit v0.10.2 From f31e65e1170edba4a86bd8cba0318e251d3746d0 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Thu, 15 Mar 2012 21:58:34 +0000 Subject: kvm/book3s: Make kernel emulated H_PUT_TCE available for "PR" KVM There is nothing in the code for emulating TCE tables in the kernel that prevents it from working on "PR" KVM... other than ifdef's and location of the code. This and moves the bulk of the code there to a new file called book3s_64_vio.c. This speeds things up a bit on my G5. Signed-off-by: Benjamin Herrenschmidt [agraf: fix for hv kvm, 32bit, whitespace] Signed-off-by: Alexander Graf diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 42a527e..d848cdc 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -237,7 +237,6 @@ struct kvm_arch { unsigned long vrma_slb_v; int rma_setup_done; int using_mmu_notifiers; - struct list_head spapr_tce_tables; spinlock_t slot_phys_lock; unsigned long *slot_phys[KVM_MEM_SLOTS_NUM]; int slot_npages[KVM_MEM_SLOTS_NUM]; @@ -245,6 +244,9 @@ struct kvm_arch { struct kvmppc_vcore *vcores[KVM_MAX_VCORES]; struct kvmppc_linear_info *hpt_li; #endif /* CONFIG_KVM_BOOK3S_64_HV */ +#ifdef CONFIG_PPC_BOOK3S_64 + struct list_head spapr_tce_tables; +#endif }; /* diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 7f0a3da..c1069f6 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -126,6 +126,8 @@ extern void kvmppc_map_vrma(struct kvm_vcpu *vcpu, extern int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu); extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, struct kvm_create_spapr_tce *args); +extern long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, + unsigned long ioba, unsigned long tce); extern long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *rma); extern struct kvmppc_linear_info *kvm_alloc_rma(void); diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile index 25225ae..c2a0863 100644 --- a/arch/powerpc/kvm/Makefile +++ b/arch/powerpc/kvm/Makefile @@ -54,6 +54,7 @@ kvm-book3s_64-objs-$(CONFIG_KVM_BOOK3S_64_PR) := \ book3s_paired_singles.o \ book3s_pr.o \ book3s_pr_papr.o \ + book3s_64_vio_hv.o \ book3s_emulate.o \ book3s_interrupts.o \ book3s_mmu_hpte.o \ @@ -78,6 +79,7 @@ kvm-book3s_64-module-objs := \ powerpc.o \ emulate.o \ book3s.o \ + book3s_64_vio.o \ $(kvm-book3s_64-objs-y) kvm-objs-$(CONFIG_KVM_BOOK3S_64) := $(kvm-book3s_64-module-objs) diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c new file mode 100644 index 0000000..72ffc89 --- /dev/null +++ b/arch/powerpc/kvm/book3s_64_vio.c @@ -0,0 +1,150 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright 2010 Paul Mackerras, IBM Corp. + * Copyright 2011 David Gibson, IBM Corporation + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define TCES_PER_PAGE (PAGE_SIZE / sizeof(u64)) + +static long kvmppc_stt_npages(unsigned long window_size) +{ + return ALIGN((window_size >> SPAPR_TCE_SHIFT) + * sizeof(u64), PAGE_SIZE) / PAGE_SIZE; +} + +static void release_spapr_tce_table(struct kvmppc_spapr_tce_table *stt) +{ + struct kvm *kvm = stt->kvm; + int i; + + mutex_lock(&kvm->lock); + list_del(&stt->list); + for (i = 0; i < kvmppc_stt_npages(stt->window_size); i++) + __free_page(stt->pages[i]); + kfree(stt); + mutex_unlock(&kvm->lock); + + kvm_put_kvm(kvm); +} + +static int kvm_spapr_tce_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +{ + struct kvmppc_spapr_tce_table *stt = vma->vm_file->private_data; + struct page *page; + + if (vmf->pgoff >= kvmppc_stt_npages(stt->window_size)) + return VM_FAULT_SIGBUS; + + page = stt->pages[vmf->pgoff]; + get_page(page); + vmf->page = page; + return 0; +} + +static const struct vm_operations_struct kvm_spapr_tce_vm_ops = { + .fault = kvm_spapr_tce_fault, +}; + +static int kvm_spapr_tce_mmap(struct file *file, struct vm_area_struct *vma) +{ + vma->vm_ops = &kvm_spapr_tce_vm_ops; + return 0; +} + +static int kvm_spapr_tce_release(struct inode *inode, struct file *filp) +{ + struct kvmppc_spapr_tce_table *stt = filp->private_data; + + release_spapr_tce_table(stt); + return 0; +} + +static struct file_operations kvm_spapr_tce_fops = { + .mmap = kvm_spapr_tce_mmap, + .release = kvm_spapr_tce_release, +}; + +long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, + struct kvm_create_spapr_tce *args) +{ + struct kvmppc_spapr_tce_table *stt = NULL; + long npages; + int ret = -ENOMEM; + int i; + + /* Check this LIOBN hasn't been previously allocated */ + list_for_each_entry(stt, &kvm->arch.spapr_tce_tables, list) { + if (stt->liobn == args->liobn) + return -EBUSY; + } + + npages = kvmppc_stt_npages(args->window_size); + + stt = kzalloc(sizeof(*stt) + npages * sizeof(struct page *), + GFP_KERNEL); + if (!stt) + goto fail; + + stt->liobn = args->liobn; + stt->window_size = args->window_size; + stt->kvm = kvm; + + for (i = 0; i < npages; i++) { + stt->pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO); + if (!stt->pages[i]) + goto fail; + } + + kvm_get_kvm(kvm); + + mutex_lock(&kvm->lock); + list_add(&stt->list, &kvm->arch.spapr_tce_tables); + + mutex_unlock(&kvm->lock); + + return anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops, + stt, O_RDWR); + +fail: + if (stt) { + for (i = 0; i < npages; i++) + if (stt->pages[i]) + __free_page(stt->pages[i]); + + kfree(stt); + } + return ret; +} diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c index ea0f8c5..30c2f3b 100644 --- a/arch/powerpc/kvm/book3s_64_vio_hv.c +++ b/arch/powerpc/kvm/book3s_64_vio_hv.c @@ -38,6 +38,9 @@ #define TCES_PER_PAGE (PAGE_SIZE / sizeof(u64)) +/* WARNING: This will be called in real-mode on HV KVM and virtual + * mode on PR KVM + */ long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, unsigned long ioba, unsigned long tce) { diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 9079357..59c2967 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -1093,115 +1093,6 @@ int kvmppc_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu) return r; } -static long kvmppc_stt_npages(unsigned long window_size) -{ - return ALIGN((window_size >> SPAPR_TCE_SHIFT) - * sizeof(u64), PAGE_SIZE) / PAGE_SIZE; -} - -static void release_spapr_tce_table(struct kvmppc_spapr_tce_table *stt) -{ - struct kvm *kvm = stt->kvm; - int i; - - mutex_lock(&kvm->lock); - list_del(&stt->list); - for (i = 0; i < kvmppc_stt_npages(stt->window_size); i++) - __free_page(stt->pages[i]); - kfree(stt); - mutex_unlock(&kvm->lock); - - kvm_put_kvm(kvm); -} - -static int kvm_spapr_tce_fault(struct vm_area_struct *vma, struct vm_fault *vmf) -{ - struct kvmppc_spapr_tce_table *stt = vma->vm_file->private_data; - struct page *page; - - if (vmf->pgoff >= kvmppc_stt_npages(stt->window_size)) - return VM_FAULT_SIGBUS; - - page = stt->pages[vmf->pgoff]; - get_page(page); - vmf->page = page; - return 0; -} - -static const struct vm_operations_struct kvm_spapr_tce_vm_ops = { - .fault = kvm_spapr_tce_fault, -}; - -static int kvm_spapr_tce_mmap(struct file *file, struct vm_area_struct *vma) -{ - vma->vm_ops = &kvm_spapr_tce_vm_ops; - return 0; -} - -static int kvm_spapr_tce_release(struct inode *inode, struct file *filp) -{ - struct kvmppc_spapr_tce_table *stt = filp->private_data; - - release_spapr_tce_table(stt); - return 0; -} - -static struct file_operations kvm_spapr_tce_fops = { - .mmap = kvm_spapr_tce_mmap, - .release = kvm_spapr_tce_release, -}; - -long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, - struct kvm_create_spapr_tce *args) -{ - struct kvmppc_spapr_tce_table *stt = NULL; - long npages; - int ret = -ENOMEM; - int i; - - /* Check this LIOBN hasn't been previously allocated */ - list_for_each_entry(stt, &kvm->arch.spapr_tce_tables, list) { - if (stt->liobn == args->liobn) - return -EBUSY; - } - - npages = kvmppc_stt_npages(args->window_size); - - stt = kzalloc(sizeof(*stt) + npages* sizeof(struct page *), - GFP_KERNEL); - if (!stt) - goto fail; - - stt->liobn = args->liobn; - stt->window_size = args->window_size; - stt->kvm = kvm; - - for (i = 0; i < npages; i++) { - stt->pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO); - if (!stt->pages[i]) - goto fail; - } - - kvm_get_kvm(kvm); - - mutex_lock(&kvm->lock); - list_add(&stt->list, &kvm->arch.spapr_tce_tables); - - mutex_unlock(&kvm->lock); - - return anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops, - stt, O_RDWR); - -fail: - if (stt) { - for (i = 0; i < npages; i++) - if (stt->pages[i]) - __free_page(stt->pages[i]); - - kfree(stt); - } - return ret; -} /* Work out RMLS (real mode limit selector) field value for a given RMA size. Assumes POWER7 or PPC970. */ diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index d169a0a..815ac59 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -1171,11 +1171,18 @@ void kvmppc_core_commit_memory_region(struct kvm *kvm, int kvmppc_core_init_vm(struct kvm *kvm) { +#ifdef CONFIG_PPC64 + INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables); +#endif + return 0; } void kvmppc_core_destroy_vm(struct kvm *kvm) { +#ifdef CONFIG_PPC64 + WARN_ON(!list_empty(&kvm->arch.spapr_tce_tables)); +#endif } static int kvmppc_book3s_init(void) diff --git a/arch/powerpc/kvm/book3s_pr_papr.c b/arch/powerpc/kvm/book3s_pr_papr.c index 60ac0e7..3ff9013 100644 --- a/arch/powerpc/kvm/book3s_pr_papr.c +++ b/arch/powerpc/kvm/book3s_pr_papr.c @@ -15,6 +15,8 @@ * published by the Free Software Foundation. */ +#include + #include #include #include @@ -211,6 +213,20 @@ static int kvmppc_h_pr_protect(struct kvm_vcpu *vcpu) return EMULATE_DONE; } +static int kvmppc_h_pr_put_tce(struct kvm_vcpu *vcpu) +{ + unsigned long liobn = kvmppc_get_gpr(vcpu, 4); + unsigned long ioba = kvmppc_get_gpr(vcpu, 5); + unsigned long tce = kvmppc_get_gpr(vcpu, 6); + long rc; + + rc = kvmppc_h_put_tce(vcpu, liobn, ioba, tce); + if (rc == H_TOO_HARD) + return EMULATE_FAIL; + kvmppc_set_gpr(vcpu, 3, rc); + return EMULATE_DONE; +} + int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd) { switch (cmd) { @@ -222,6 +238,8 @@ int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd) return kvmppc_h_pr_protect(vcpu); case H_BULK_REMOVE: return kvmppc_h_pr_bulk_remove(vcpu); + case H_PUT_TCE: + return kvmppc_h_pr_put_tce(vcpu); case H_CEDE: kvm_vcpu_block(vcpu); clear_bit(KVM_REQ_UNHALT, &vcpu->requests); diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 58ad860..6ac3115 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -244,10 +244,12 @@ int kvm_dev_ioctl_check_extension(long ext) r = KVM_COALESCED_MMIO_PAGE_OFFSET; break; #endif -#ifdef CONFIG_KVM_BOOK3S_64_HV +#ifdef CONFIG_PPC_BOOK3S_64 case KVM_CAP_SPAPR_TCE: r = 1; break; +#endif /* CONFIG_PPC_BOOK3S_64 */ +#ifdef CONFIG_KVM_BOOK3S_64_HV case KVM_CAP_PPC_SMT: r = threads_per_core; break; @@ -773,7 +775,7 @@ long kvm_arch_vm_ioctl(struct file *filp, break; } -#ifdef CONFIG_KVM_BOOK3S_64_HV +#ifdef CONFIG_PPC_BOOK3S_64 case KVM_CREATE_SPAPR_TCE: { struct kvm_create_spapr_tce create_tce; struct kvm *kvm = filp->private_data; @@ -784,7 +786,9 @@ long kvm_arch_vm_ioctl(struct file *filp, r = kvm_vm_ioctl_create_spapr_tce(kvm, &create_tce); goto out; } +#endif /* CONFIG_PPC_BOOK3S_64 */ +#ifdef CONFIG_KVM_BOOK3S_64_HV case KVM_ALLOCATE_RMA: { struct kvm *kvm = filp->private_data; struct kvm_allocate_rma rma; -- cgit v0.10.2 From 5b74716ebab10e7bce960d148fe6d8f6920451e5 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Thu, 26 Apr 2012 19:43:42 +0000 Subject: kvm/powerpc: Add new ioctl to retreive server MMU infos This is necessary for qemu to be able to pass the right information to the guest, such as the supported page sizes and corresponding encodings in the SLB and hash table, which can vary depending on the processor type, the type of KVM used (PR vs HV) and the version of KVM Signed-off-by: Benjamin Herrenschmidt [agraf: fix compilation on hv, adjust for newer ioctl numbers] Signed-off-by: Alexander Graf diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index eb62761..9301266 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -1860,6 +1860,76 @@ See KVM_GET_PIT2 for details on struct kvm_pit_state2. This IOCTL replaces the obsolete KVM_SET_PIT. +4.74 KVM_PPC_GET_SMMU_INFO + +Capability: KVM_CAP_PPC_GET_SMMU_INFO +Architectures: powerpc +Type: vm ioctl +Parameters: None +Returns: 0 on success, -1 on error + +This populates and returns a structure describing the features of +the "Server" class MMU emulation supported by KVM. +This can in turn be used by userspace to generate the appropariate +device-tree properties for the guest operating system. + +The structure contains some global informations, followed by an +array of supported segment page sizes: + + struct kvm_ppc_smmu_info { + __u64 flags; + __u32 slb_size; + __u32 pad; + struct kvm_ppc_one_seg_page_size sps[KVM_PPC_PAGE_SIZES_MAX_SZ]; + }; + +The supported flags are: + + - KVM_PPC_PAGE_SIZES_REAL: + When that flag is set, guest page sizes must "fit" the backing + store page sizes. When not set, any page size in the list can + be used regardless of how they are backed by userspace. + + - KVM_PPC_1T_SEGMENTS + The emulated MMU supports 1T segments in addition to the + standard 256M ones. + +The "slb_size" field indicates how many SLB entries are supported + +The "sps" array contains 8 entries indicating the supported base +page sizes for a segment in increasing order. Each entry is defined +as follow: + + struct kvm_ppc_one_seg_page_size { + __u32 page_shift; /* Base page shift of segment (or 0) */ + __u32 slb_enc; /* SLB encoding for BookS */ + struct kvm_ppc_one_page_size enc[KVM_PPC_PAGE_SIZES_MAX_SZ]; + }; + +An entry with a "page_shift" of 0 is unused. Because the array is +organized in increasing order, a lookup can stop when encoutering +such an entry. + +The "slb_enc" field provides the encoding to use in the SLB for the +page size. The bits are in positions such as the value can directly +be OR'ed into the "vsid" argument of the slbmte instruction. + +The "enc" array is a list which for each of those segment base page +size provides the list of supported actual page sizes (which can be +only larger or equal to the base page size), along with the +corresponding encoding in the hash PTE. Similarily, the array is +8 entries sorted by increasing sizes and an entry with a "0" shift +is an empty entry and a terminator: + + struct kvm_ppc_one_page_size { + __u32 page_shift; /* Page shift (or 0) */ + __u32 pte_enc; /* Encoding in the HPTE (>>12) */ + }; + +The "pte_enc" field provides a value that can OR'ed into the hash +PTE's RPN field (ie, it needs to be shifted left by 12 to OR it +into the hash PTE second double word). + 5. The kvm_run structure ------------------------ diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index c1069f6..c87e3b5 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -140,6 +140,8 @@ extern int kvmppc_core_prepare_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem); extern void kvmppc_core_commit_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem); +extern int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm, + struct kvm_ppc_smmu_info *info); extern int kvmppc_bookehv_init(void); extern void kvmppc_bookehv_exit(void); diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c index 786a270..d1f2aaf 100644 --- a/arch/powerpc/kernel/ppc_ksyms.c +++ b/arch/powerpc/kernel/ppc_ksyms.c @@ -190,3 +190,7 @@ EXPORT_SYMBOL(__arch_hweight16); EXPORT_SYMBOL(__arch_hweight32); EXPORT_SYMBOL(__arch_hweight64); #endif + +#ifdef CONFIG_PPC_BOOK3S_64 +EXPORT_SYMBOL_GPL(mmu_psize_defs); +#endif diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 59c2967..bb5a0f4 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -1175,6 +1175,38 @@ long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret) return fd; } +static void kvmppc_add_seg_page_size(struct kvm_ppc_one_seg_page_size **sps, + int linux_psize) +{ + struct mmu_psize_def *def = &mmu_psize_defs[linux_psize]; + + if (!def->shift) + return; + (*sps)->page_shift = def->shift; + (*sps)->slb_enc = def->sllp; + (*sps)->enc[0].page_shift = def->shift; + (*sps)->enc[0].pte_enc = def->penc; + (*sps)++; +} + +int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm, struct kvm_ppc_smmu_info *info) +{ + struct kvm_ppc_one_seg_page_size *sps; + + info->flags = KVM_PPC_PAGE_SIZES_REAL; + if (mmu_has_feature(MMU_FTR_1T_SEGMENT)) + info->flags |= KVM_PPC_1T_SEGMENTS; + info->slb_size = mmu_slb_size; + + /* We only support these sizes for now, and no muti-size segments */ + sps = &info->sps[0]; + kvmppc_add_seg_page_size(&sps, MMU_PAGE_4K); + kvmppc_add_seg_page_size(&sps, MMU_PAGE_64K); + kvmppc_add_seg_page_size(&sps, MMU_PAGE_16M); + + return 0; +} + /* * Get (and clear) the dirty memory log for a memory slot. */ diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 815ac59..a1baec3 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -1158,6 +1158,31 @@ out: return r; } +#ifdef CONFIG_PPC64 +int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm, struct kvm_ppc_smmu_info *info) +{ + /* No flags */ + info->flags = 0; + + /* SLB is always 64 entries */ + info->slb_size = 64; + + /* Standard 4k base page size segment */ + info->sps[0].page_shift = 12; + info->sps[0].slb_enc = 0; + info->sps[0].enc[0].page_shift = 12; + info->sps[0].enc[0].pte_enc = 0; + + /* Standard 16M large page size segment */ + info->sps[1].page_shift = 24; + info->sps[1].slb_enc = SLB_VSID_L; + info->sps[1].enc[0].page_shift = 24; + info->sps[1].enc[0].pte_enc = 0; + + return 0; +} +#endif /* CONFIG_PPC64 */ + int kvmppc_core_prepare_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem) { diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 6ac3115..1493c8d 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -279,6 +279,11 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_MAX_VCPUS: r = KVM_MAX_VCPUS; break; +#ifdef CONFIG_PPC_BOOK3S_64 + case KVM_CAP_PPC_GET_SMMU_INFO: + r = 1; + break; +#endif default: r = 0; break; @@ -718,7 +723,6 @@ long kvm_arch_vcpu_ioctl(struct file *filp, break; } #endif - default: r = -EINVAL; } @@ -800,6 +804,18 @@ long kvm_arch_vm_ioctl(struct file *filp, } #endif /* CONFIG_KVM_BOOK3S_64_HV */ +#ifdef CONFIG_PPC_BOOK3S_64 + case KVM_PPC_GET_SMMU_INFO: { + struct kvm *kvm = filp->private_data; + struct kvm_ppc_smmu_info info; + + memset(&info, 0, sizeof(info)); + r = kvm_vm_ioctl_get_smmu_info(kvm, &info); + if (r >= 0 && copy_to_user(argp, &info, sizeof(info))) + r = -EFAULT; + break; + } +#endif /* CONFIG_PPC_BOOK3S_64 */ default: r = -ENOTTY; } diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 225b452..8d696cf 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -449,6 +449,30 @@ struct kvm_ppc_pvinfo { __u8 pad[108]; }; +/* for KVM_PPC_GET_SMMU_INFO */ +#define KVM_PPC_PAGE_SIZES_MAX_SZ 8 + +struct kvm_ppc_one_page_size { + __u32 page_shift; /* Page shift (or 0) */ + __u32 pte_enc; /* Encoding in the HPTE (>>12) */ +}; + +struct kvm_ppc_one_seg_page_size { + __u32 page_shift; /* Base page shift of segment (or 0) */ + __u32 slb_enc; /* SLB encoding for BookS */ + struct kvm_ppc_one_page_size enc[KVM_PPC_PAGE_SIZES_MAX_SZ]; +}; + +#define KVM_PPC_PAGE_SIZES_REAL 0x00000001 +#define KVM_PPC_1T_SEGMENTS 0x00000002 + +struct kvm_ppc_smmu_info { + __u64 flags; + __u32 slb_size; + __u32 pad; + struct kvm_ppc_one_seg_page_size sps[KVM_PPC_PAGE_SIZES_MAX_SZ]; +}; + #define KVMIO 0xAE /* machine type bits, to be used as argument to KVM_CREATE_VM */ @@ -591,6 +615,7 @@ struct kvm_ppc_pvinfo { #define KVM_CAP_PCI_2_3 75 #define KVM_CAP_KVMCLOCK_CTRL 76 #define KVM_CAP_SIGNAL_MSI 77 +#define KVM_CAP_PPC_GET_SMMU_INFO 78 #ifdef KVM_CAP_IRQ_ROUTING @@ -800,6 +825,8 @@ struct kvm_s390_ucas_mapping { struct kvm_assigned_pci_dev) /* Available with KVM_CAP_SIGNAL_MSI */ #define KVM_SIGNAL_MSI _IOW(KVMIO, 0xa5, struct kvm_msi) +/* Available with KVM_CAP_PPC_GET_SMMU_INFO */ +#define KVM_PPC_GET_SMMU_INFO _IOR(KVMIO, 0xa6, struct kvm_ppc_smmu_info) /* * ioctls for vcpu fds -- cgit v0.10.2 From c46dc9a86148bc37c31d67a22a3887144ba7aa81 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Fri, 4 May 2012 14:01:33 +0200 Subject: KVM: PPC: Emulator: clean up instruction parsing Instructions on PPC are pretty similarly encoded. So instead of every instruction emulation code decoding the instruction fields itself, we can move that code to more generic places and rely on the compiler to optimize the unused bits away. This has 2 advantages. It makes the code smaller and it makes the code less error prone, as the instruction fields are always available, so accidental misusage is reduced. Functionally, this patch doesn't change anything. Signed-off-by: Alexander Graf diff --git a/arch/powerpc/kvm/44x_emulate.c b/arch/powerpc/kvm/44x_emulate.c index 549bb2c..da81a2d 100644 --- a/arch/powerpc/kvm/44x_emulate.c +++ b/arch/powerpc/kvm/44x_emulate.c @@ -37,22 +37,19 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, unsigned int inst, int *advance) { int emulated = EMULATE_DONE; - int dcrn; - int ra; - int rb; - int rc; - int rs; - int rt; - int ws; + int dcrn = get_dcrn(inst); + int ra = get_ra(inst); + int rb = get_rb(inst); + int rc = get_rc(inst); + int rs = get_rs(inst); + int rt = get_rt(inst); + int ws = get_ws(inst); switch (get_op(inst)) { case 31: switch (get_xop(inst)) { case XOP_MFDCR: - dcrn = get_dcrn(inst); - rt = get_rt(inst); - /* The guest may access CPR0 registers to determine the timebase * frequency, and it must know the real host frequency because it * can directly access the timebase registers. @@ -88,9 +85,6 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, break; case XOP_MTDCR: - dcrn = get_dcrn(inst); - rs = get_rs(inst); - /* emulate some access in kernel */ switch (dcrn) { case DCRN_CPR0_CONFIG_ADDR: @@ -108,17 +102,10 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, break; case XOP_TLBWE: - ra = get_ra(inst); - rs = get_rs(inst); - ws = get_ws(inst); emulated = kvmppc_44x_emul_tlbwe(vcpu, ra, rs, ws); break; case XOP_TLBSX: - rt = get_rt(inst); - ra = get_ra(inst); - rb = get_rb(inst); - rc = get_rc(inst); emulated = kvmppc_44x_emul_tlbsx(vcpu, rt, ra, rb, rc); break; diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c index 135663a..c023bcd 100644 --- a/arch/powerpc/kvm/book3s_emulate.c +++ b/arch/powerpc/kvm/book3s_emulate.c @@ -87,6 +87,10 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, unsigned int inst, int *advance) { int emulated = EMULATE_DONE; + int rt = get_rt(inst); + int rs = get_rs(inst); + int ra = get_ra(inst); + int rb = get_rb(inst); switch (get_op(inst)) { case 19: @@ -106,21 +110,22 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, case 31: switch (get_xop(inst)) { case OP_31_XOP_MFMSR: - kvmppc_set_gpr(vcpu, get_rt(inst), - vcpu->arch.shared->msr); + kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->msr); break; case OP_31_XOP_MTMSRD: { - ulong rs = kvmppc_get_gpr(vcpu, get_rs(inst)); + ulong rs_val = kvmppc_get_gpr(vcpu, rs); if (inst & 0x10000) { - vcpu->arch.shared->msr &= ~(MSR_RI | MSR_EE); - vcpu->arch.shared->msr |= rs & (MSR_RI | MSR_EE); + ulong new_msr = vcpu->arch.shared->msr; + new_msr &= ~(MSR_RI | MSR_EE); + new_msr |= rs_val & (MSR_RI | MSR_EE); + vcpu->arch.shared->msr = new_msr; } else - kvmppc_set_msr(vcpu, rs); + kvmppc_set_msr(vcpu, rs_val); break; } case OP_31_XOP_MTMSR: - kvmppc_set_msr(vcpu, kvmppc_get_gpr(vcpu, get_rs(inst))); + kvmppc_set_msr(vcpu, kvmppc_get_gpr(vcpu, rs)); break; case OP_31_XOP_MFSR: { @@ -130,7 +135,7 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, if (vcpu->arch.mmu.mfsrin) { u32 sr; sr = vcpu->arch.mmu.mfsrin(vcpu, srnum); - kvmppc_set_gpr(vcpu, get_rt(inst), sr); + kvmppc_set_gpr(vcpu, rt, sr); } break; } @@ -138,29 +143,29 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, { int srnum; - srnum = (kvmppc_get_gpr(vcpu, get_rb(inst)) >> 28) & 0xf; + srnum = (kvmppc_get_gpr(vcpu, rb) >> 28) & 0xf; if (vcpu->arch.mmu.mfsrin) { u32 sr; sr = vcpu->arch.mmu.mfsrin(vcpu, srnum); - kvmppc_set_gpr(vcpu, get_rt(inst), sr); + kvmppc_set_gpr(vcpu, rt, sr); } break; } case OP_31_XOP_MTSR: vcpu->arch.mmu.mtsrin(vcpu, (inst >> 16) & 0xf, - kvmppc_get_gpr(vcpu, get_rs(inst))); + kvmppc_get_gpr(vcpu, rs)); break; case OP_31_XOP_MTSRIN: vcpu->arch.mmu.mtsrin(vcpu, - (kvmppc_get_gpr(vcpu, get_rb(inst)) >> 28) & 0xf, - kvmppc_get_gpr(vcpu, get_rs(inst))); + (kvmppc_get_gpr(vcpu, rb) >> 28) & 0xf, + kvmppc_get_gpr(vcpu, rs)); break; case OP_31_XOP_TLBIE: case OP_31_XOP_TLBIEL: { bool large = (inst & 0x00200000) ? true : false; - ulong addr = kvmppc_get_gpr(vcpu, get_rb(inst)); + ulong addr = kvmppc_get_gpr(vcpu, rb); vcpu->arch.mmu.tlbie(vcpu, addr, large); break; } @@ -171,15 +176,15 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, return EMULATE_FAIL; vcpu->arch.mmu.slbmte(vcpu, - kvmppc_get_gpr(vcpu, get_rs(inst)), - kvmppc_get_gpr(vcpu, get_rb(inst))); + kvmppc_get_gpr(vcpu, rs), + kvmppc_get_gpr(vcpu, rb)); break; case OP_31_XOP_SLBIE: if (!vcpu->arch.mmu.slbie) return EMULATE_FAIL; vcpu->arch.mmu.slbie(vcpu, - kvmppc_get_gpr(vcpu, get_rb(inst))); + kvmppc_get_gpr(vcpu, rb)); break; case OP_31_XOP_SLBIA: if (!vcpu->arch.mmu.slbia) @@ -191,22 +196,22 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, if (!vcpu->arch.mmu.slbmfee) { emulated = EMULATE_FAIL; } else { - ulong t, rb; + ulong t, rb_val; - rb = kvmppc_get_gpr(vcpu, get_rb(inst)); - t = vcpu->arch.mmu.slbmfee(vcpu, rb); - kvmppc_set_gpr(vcpu, get_rt(inst), t); + rb_val = kvmppc_get_gpr(vcpu, rb); + t = vcpu->arch.mmu.slbmfee(vcpu, rb_val); + kvmppc_set_gpr(vcpu, rt, t); } break; case OP_31_XOP_SLBMFEV: if (!vcpu->arch.mmu.slbmfev) { emulated = EMULATE_FAIL; } else { - ulong t, rb; + ulong t, rb_val; - rb = kvmppc_get_gpr(vcpu, get_rb(inst)); - t = vcpu->arch.mmu.slbmfev(vcpu, rb); - kvmppc_set_gpr(vcpu, get_rt(inst), t); + rb_val = kvmppc_get_gpr(vcpu, rb); + t = vcpu->arch.mmu.slbmfev(vcpu, rb_val); + kvmppc_set_gpr(vcpu, rt, t); } break; case OP_31_XOP_DCBA: @@ -214,17 +219,17 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, break; case OP_31_XOP_DCBZ: { - ulong rb = kvmppc_get_gpr(vcpu, get_rb(inst)); - ulong ra = 0; + ulong rb_val = kvmppc_get_gpr(vcpu, rb); + ulong ra_val = 0; ulong addr, vaddr; u32 zeros[8] = { 0, 0, 0, 0, 0, 0, 0, 0 }; u32 dsisr; int r; - if (get_ra(inst)) - ra = kvmppc_get_gpr(vcpu, get_ra(inst)); + if (ra) + ra_val = kvmppc_get_gpr(vcpu, ra); - addr = (ra + rb) & ~31ULL; + addr = (ra_val + rb_val) & ~31ULL; if (!(vcpu->arch.shared->msr & MSR_SF)) addr &= 0xffffffff; vaddr = addr; @@ -565,23 +570,22 @@ u32 kvmppc_alignment_dsisr(struct kvm_vcpu *vcpu, unsigned int inst) ulong kvmppc_alignment_dar(struct kvm_vcpu *vcpu, unsigned int inst) { ulong dar = 0; - ulong ra; + ulong ra = get_ra(inst); + ulong rb = get_rb(inst); switch (get_op(inst)) { case OP_LFS: case OP_LFD: case OP_STFD: case OP_STFS: - ra = get_ra(inst); if (ra) dar = kvmppc_get_gpr(vcpu, ra); dar += (s32)((s16)inst); break; case 31: - ra = get_ra(inst); if (ra) dar = kvmppc_get_gpr(vcpu, ra); - dar += kvmppc_get_gpr(vcpu, get_rb(inst)); + dar += kvmppc_get_gpr(vcpu, rb); break; default: printk(KERN_INFO "KVM: Unaligned instruction 0x%x\n", inst); diff --git a/arch/powerpc/kvm/booke_emulate.c b/arch/powerpc/kvm/booke_emulate.c index 904412b..e14f7b2 100644 --- a/arch/powerpc/kvm/booke_emulate.c +++ b/arch/powerpc/kvm/booke_emulate.c @@ -40,8 +40,8 @@ int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, unsigned int inst, int *advance) { int emulated = EMULATE_DONE; - int rs; - int rt; + int rs = get_rs(inst); + int rt = get_rt(inst); switch (get_op(inst)) { case 19: @@ -62,19 +62,16 @@ int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, switch (get_xop(inst)) { case OP_31_XOP_MFMSR: - rt = get_rt(inst); kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->msr); kvmppc_set_exit_type(vcpu, EMULATED_MFMSR_EXITS); break; case OP_31_XOP_MTMSR: - rs = get_rs(inst); kvmppc_set_exit_type(vcpu, EMULATED_MTMSR_EXITS); kvmppc_set_msr(vcpu, kvmppc_get_gpr(vcpu, rs)); break; case OP_31_XOP_WRTEE: - rs = get_rs(inst); vcpu->arch.shared->msr = (vcpu->arch.shared->msr & ~MSR_EE) | (kvmppc_get_gpr(vcpu, rs) & MSR_EE); kvmppc_set_exit_type(vcpu, EMULATED_WRTEE_EXITS); diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c index 99155f8..9b2dcda 100644 --- a/arch/powerpc/kvm/e500_emulate.c +++ b/arch/powerpc/kvm/e500_emulate.c @@ -86,9 +86,9 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, unsigned int inst, int *advance) { int emulated = EMULATE_DONE; - int ra; - int rb; - int rt; + int ra = get_ra(inst); + int rb = get_rb(inst); + int rt = get_rt(inst); switch (get_op(inst)) { case 31: @@ -96,11 +96,11 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, #ifdef CONFIG_KVM_E500MC case XOP_MSGSND: - emulated = kvmppc_e500_emul_msgsnd(vcpu, get_rb(inst)); + emulated = kvmppc_e500_emul_msgsnd(vcpu, rb); break; case XOP_MSGCLR: - emulated = kvmppc_e500_emul_msgclr(vcpu, get_rb(inst)); + emulated = kvmppc_e500_emul_msgclr(vcpu, rb); break; #endif @@ -113,20 +113,14 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, break; case XOP_TLBSX: - rb = get_rb(inst); emulated = kvmppc_e500_emul_tlbsx(vcpu,rb); break; case XOP_TLBILX: - ra = get_ra(inst); - rb = get_rb(inst); - rt = get_rt(inst); emulated = kvmppc_e500_emul_tlbilx(vcpu, rt, ra, rb); break; case XOP_TLBIVAX: - ra = get_ra(inst); - rb = get_rb(inst); emulated = kvmppc_e500_emul_tlbivax(vcpu, ra, rb); break; diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c index a27d4dc..f63b5cb 100644 --- a/arch/powerpc/kvm/emulate.c +++ b/arch/powerpc/kvm/emulate.c @@ -148,11 +148,10 @@ u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb) int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) { u32 inst = kvmppc_get_last_inst(vcpu); - int ra; - int rb; - int rs; - int rt; - int sprn; + int ra = get_ra(inst); + int rs = get_rs(inst); + int rt = get_rt(inst); + int sprn = get_sprn(inst); enum emulation_result emulated = EMULATE_DONE; int advance = 1; @@ -189,43 +188,31 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) advance = 0; break; case OP_31_XOP_LWZX: - rt = get_rt(inst); emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1); break; case OP_31_XOP_LBZX: - rt = get_rt(inst); emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); break; case OP_31_XOP_LBZUX: - rt = get_rt(inst); - ra = get_ra(inst); - rb = get_rb(inst); - emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); break; case OP_31_XOP_STWX: - rs = get_rs(inst); emulated = kvmppc_handle_store(run, vcpu, kvmppc_get_gpr(vcpu, rs), 4, 1); break; case OP_31_XOP_STBX: - rs = get_rs(inst); emulated = kvmppc_handle_store(run, vcpu, kvmppc_get_gpr(vcpu, rs), 1, 1); break; case OP_31_XOP_STBUX: - rs = get_rs(inst); - ra = get_ra(inst); - rb = get_rb(inst); - emulated = kvmppc_handle_store(run, vcpu, kvmppc_get_gpr(vcpu, rs), 1, 1); @@ -233,28 +220,19 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) break; case OP_31_XOP_LHAX: - rt = get_rt(inst); emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1); break; case OP_31_XOP_LHZX: - rt = get_rt(inst); emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1); break; case OP_31_XOP_LHZUX: - rt = get_rt(inst); - ra = get_ra(inst); - rb = get_rb(inst); - emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1); kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); break; case OP_31_XOP_MFSPR: - sprn = get_sprn(inst); - rt = get_rt(inst); - switch (sprn) { case SPRN_SRR0: kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->srr0); @@ -310,20 +288,12 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) break; case OP_31_XOP_STHX: - rs = get_rs(inst); - ra = get_ra(inst); - rb = get_rb(inst); - emulated = kvmppc_handle_store(run, vcpu, kvmppc_get_gpr(vcpu, rs), 2, 1); break; case OP_31_XOP_STHUX: - rs = get_rs(inst); - ra = get_ra(inst); - rb = get_rb(inst); - emulated = kvmppc_handle_store(run, vcpu, kvmppc_get_gpr(vcpu, rs), 2, 1); @@ -331,8 +301,6 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) break; case OP_31_XOP_MTSPR: - sprn = get_sprn(inst); - rs = get_rs(inst); switch (sprn) { case SPRN_SRR0: vcpu->arch.shared->srr0 = kvmppc_get_gpr(vcpu, rs); @@ -384,7 +352,6 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) break; case OP_31_XOP_LWBRX: - rt = get_rt(inst); emulated = kvmppc_handle_load(run, vcpu, rt, 4, 0); break; @@ -392,25 +359,16 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) break; case OP_31_XOP_STWBRX: - rs = get_rs(inst); - ra = get_ra(inst); - rb = get_rb(inst); - emulated = kvmppc_handle_store(run, vcpu, kvmppc_get_gpr(vcpu, rs), 4, 0); break; case OP_31_XOP_LHBRX: - rt = get_rt(inst); emulated = kvmppc_handle_load(run, vcpu, rt, 2, 0); break; case OP_31_XOP_STHBRX: - rs = get_rs(inst); - ra = get_ra(inst); - rb = get_rb(inst); - emulated = kvmppc_handle_store(run, vcpu, kvmppc_get_gpr(vcpu, rs), 2, 0); @@ -423,39 +381,30 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) break; case OP_LWZ: - rt = get_rt(inst); emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1); break; case OP_LWZU: - ra = get_ra(inst); - rt = get_rt(inst); emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1); kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); break; case OP_LBZ: - rt = get_rt(inst); emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); break; case OP_LBZU: - ra = get_ra(inst); - rt = get_rt(inst); emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); break; case OP_STW: - rs = get_rs(inst); emulated = kvmppc_handle_store(run, vcpu, kvmppc_get_gpr(vcpu, rs), 4, 1); break; case OP_STWU: - ra = get_ra(inst); - rs = get_rs(inst); emulated = kvmppc_handle_store(run, vcpu, kvmppc_get_gpr(vcpu, rs), 4, 1); @@ -463,15 +412,12 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) break; case OP_STB: - rs = get_rs(inst); emulated = kvmppc_handle_store(run, vcpu, kvmppc_get_gpr(vcpu, rs), 1, 1); break; case OP_STBU: - ra = get_ra(inst); - rs = get_rs(inst); emulated = kvmppc_handle_store(run, vcpu, kvmppc_get_gpr(vcpu, rs), 1, 1); @@ -479,39 +425,30 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) break; case OP_LHZ: - rt = get_rt(inst); emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1); break; case OP_LHZU: - ra = get_ra(inst); - rt = get_rt(inst); emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1); kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); break; case OP_LHA: - rt = get_rt(inst); emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1); break; case OP_LHAU: - ra = get_ra(inst); - rt = get_rt(inst); emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1); kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); break; case OP_STH: - rs = get_rs(inst); emulated = kvmppc_handle_store(run, vcpu, kvmppc_get_gpr(vcpu, rs), 2, 1); break; case OP_STHU: - ra = get_ra(inst); - rs = get_rs(inst); emulated = kvmppc_handle_store(run, vcpu, kvmppc_get_gpr(vcpu, rs), 2, 1); -- cgit v0.10.2 From 54771e6217ce05a474827d9b23ff03de9d2ef2a0 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Fri, 4 May 2012 14:55:12 +0200 Subject: KVM: PPC: Emulator: clean up SPR reads and writes When reading and writing SPRs, every SPR emulation piece had to read or write the respective GPR the value was read from or stored in itself. This approach is pretty prone to failure. What if we accidentally implement mfspr emulation where we just do "break" and nothing else? Suddenly we would get a random value in the return register - which is always a bad idea. So let's consolidate the generic code paths and only give the core specific SPR handling code readily made variables to read/write from/to. Functionally, this patch doesn't change anything, but it increases the readability of the code and makes is less prone to bugs. Signed-off-by: Alexander Graf diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index c87e3b5..f68c22f 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -107,8 +107,10 @@ extern void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu, extern int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, unsigned int op, int *advance); -extern int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs); -extern int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt); +extern int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, + ulong val); +extern int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, + ulong *val); extern int kvmppc_booke_init(void); extern void kvmppc_booke_exit(void); diff --git a/arch/powerpc/kvm/44x_emulate.c b/arch/powerpc/kvm/44x_emulate.c index da81a2d..c8c6157 100644 --- a/arch/powerpc/kvm/44x_emulate.c +++ b/arch/powerpc/kvm/44x_emulate.c @@ -128,41 +128,41 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, return emulated; } -int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) +int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) { int emulated = EMULATE_DONE; switch (sprn) { case SPRN_PID: - kvmppc_set_pid(vcpu, kvmppc_get_gpr(vcpu, rs)); break; + kvmppc_set_pid(vcpu, spr_val); break; case SPRN_MMUCR: - vcpu->arch.mmucr = kvmppc_get_gpr(vcpu, rs); break; + vcpu->arch.mmucr = spr_val; break; case SPRN_CCR0: - vcpu->arch.ccr0 = kvmppc_get_gpr(vcpu, rs); break; + vcpu->arch.ccr0 = spr_val; break; case SPRN_CCR1: - vcpu->arch.ccr1 = kvmppc_get_gpr(vcpu, rs); break; + vcpu->arch.ccr1 = spr_val; break; default: - emulated = kvmppc_booke_emulate_mtspr(vcpu, sprn, rs); + emulated = kvmppc_booke_emulate_mtspr(vcpu, sprn, spr_val); } return emulated; } -int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) +int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val) { int emulated = EMULATE_DONE; switch (sprn) { case SPRN_PID: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.pid); break; + *spr_val = vcpu->arch.pid; break; case SPRN_MMUCR: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.mmucr); break; + *spr_val = vcpu->arch.mmucr; break; case SPRN_CCR0: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.ccr0); break; + *spr_val = vcpu->arch.ccr0; break; case SPRN_CCR1: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.ccr1); break; + *spr_val = vcpu->arch.ccr1; break; default: - emulated = kvmppc_booke_emulate_mfspr(vcpu, sprn, rt); + emulated = kvmppc_booke_emulate_mfspr(vcpu, sprn, spr_val); } return emulated; diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c index c023bcd..b9a989d 100644 --- a/arch/powerpc/kvm/book3s_emulate.c +++ b/arch/powerpc/kvm/book3s_emulate.c @@ -318,10 +318,9 @@ static struct kvmppc_bat *kvmppc_find_bat(struct kvm_vcpu *vcpu, int sprn) return bat; } -int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) +int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) { int emulated = EMULATE_DONE; - ulong spr_val = kvmppc_get_gpr(vcpu, rs); switch (sprn) { case SPRN_SDR1: @@ -433,7 +432,7 @@ unprivileged: return emulated; } -int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) +int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val) { int emulated = EMULATE_DONE; @@ -446,46 +445,46 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) struct kvmppc_bat *bat = kvmppc_find_bat(vcpu, sprn); if (sprn % 2) - kvmppc_set_gpr(vcpu, rt, bat->raw >> 32); + *spr_val = bat->raw >> 32; else - kvmppc_set_gpr(vcpu, rt, bat->raw); + *spr_val = bat->raw; break; } case SPRN_SDR1: if (!spr_allowed(vcpu, PRIV_HYPER)) goto unprivileged; - kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->sdr1); + *spr_val = to_book3s(vcpu)->sdr1; break; case SPRN_DSISR: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->dsisr); + *spr_val = vcpu->arch.shared->dsisr; break; case SPRN_DAR: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->dar); + *spr_val = vcpu->arch.shared->dar; break; case SPRN_HIOR: - kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hior); + *spr_val = to_book3s(vcpu)->hior; break; case SPRN_HID0: - kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[0]); + *spr_val = to_book3s(vcpu)->hid[0]; break; case SPRN_HID1: - kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[1]); + *spr_val = to_book3s(vcpu)->hid[1]; break; case SPRN_HID2: case SPRN_HID2_GEKKO: - kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[2]); + *spr_val = to_book3s(vcpu)->hid[2]; break; case SPRN_HID4: case SPRN_HID4_GEKKO: - kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[4]); + *spr_val = to_book3s(vcpu)->hid[4]; break; case SPRN_HID5: - kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[5]); + *spr_val = to_book3s(vcpu)->hid[5]; break; case SPRN_CFAR: case SPRN_PURR: - kvmppc_set_gpr(vcpu, rt, 0); + *spr_val = 0; break; case SPRN_GQR0: case SPRN_GQR1: @@ -495,8 +494,7 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) case SPRN_GQR5: case SPRN_GQR6: case SPRN_GQR7: - kvmppc_set_gpr(vcpu, rt, - to_book3s(vcpu)->gqr[sprn - SPRN_GQR0]); + *spr_val = to_book3s(vcpu)->gqr[sprn - SPRN_GQR0]; break; case SPRN_THRM1: case SPRN_THRM2: @@ -511,7 +509,7 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) case SPRN_PMC3_GEKKO: case SPRN_PMC4_GEKKO: case SPRN_WPAR_GEKKO: - kvmppc_set_gpr(vcpu, rt, 0); + *spr_val = 0; break; default: unprivileged: diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index bb5a0f4..db36598 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -1505,12 +1505,12 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, return EMULATE_FAIL; } -int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) +int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) { return EMULATE_FAIL; } -int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) +int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val) { return EMULATE_FAIL; } diff --git a/arch/powerpc/kvm/booke.h b/arch/powerpc/kvm/booke.h index 62c4fe5..ba61974 100644 --- a/arch/powerpc/kvm/booke.h +++ b/arch/powerpc/kvm/booke.h @@ -75,8 +75,8 @@ void kvmppc_clr_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits); int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, unsigned int inst, int *advance); -int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt); -int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs); +int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val); +int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val); /* low-level asm code to transfer guest state */ void kvmppc_load_guest_spe(struct kvm_vcpu *vcpu); diff --git a/arch/powerpc/kvm/booke_emulate.c b/arch/powerpc/kvm/booke_emulate.c index e14f7b2..6c76397 100644 --- a/arch/powerpc/kvm/booke_emulate.c +++ b/arch/powerpc/kvm/booke_emulate.c @@ -102,22 +102,26 @@ int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, * will return the wrong result if called for them in another context * (such as debugging). */ -int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) +int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) { int emulated = EMULATE_DONE; - ulong spr_val = kvmppc_get_gpr(vcpu, rs); switch (sprn) { case SPRN_DEAR: - vcpu->arch.shared->dar = spr_val; break; + vcpu->arch.shared->dar = spr_val; + break; case SPRN_ESR: - vcpu->arch.shared->esr = spr_val; break; + vcpu->arch.shared->esr = spr_val; + break; case SPRN_DBCR0: - vcpu->arch.dbcr0 = spr_val; break; + vcpu->arch.dbcr0 = spr_val; + break; case SPRN_DBCR1: - vcpu->arch.dbcr1 = spr_val; break; + vcpu->arch.dbcr1 = spr_val; + break; case SPRN_DBSR: - vcpu->arch.dbsr &= ~spr_val; break; + vcpu->arch.dbsr &= ~spr_val; + break; case SPRN_TSR: kvmppc_clr_tsr_bits(vcpu, spr_val); break; @@ -131,13 +135,17 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) * guest (PR-mode only). */ case SPRN_SPRG4: - vcpu->arch.shared->sprg4 = spr_val; break; + vcpu->arch.shared->sprg4 = spr_val; + break; case SPRN_SPRG5: - vcpu->arch.shared->sprg5 = spr_val; break; + vcpu->arch.shared->sprg5 = spr_val; + break; case SPRN_SPRG6: - vcpu->arch.shared->sprg6 = spr_val; break; + vcpu->arch.shared->sprg6 = spr_val; + break; case SPRN_SPRG7: - vcpu->arch.shared->sprg7 = spr_val; break; + vcpu->arch.shared->sprg7 = spr_val; + break; case SPRN_IVPR: vcpu->arch.ivpr = spr_val; @@ -207,75 +215,83 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) return emulated; } -int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) +int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val) { int emulated = EMULATE_DONE; switch (sprn) { case SPRN_IVPR: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivpr); break; + *spr_val = vcpu->arch.ivpr; + break; case SPRN_DEAR: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->dar); break; + *spr_val = vcpu->arch.shared->dar; + break; case SPRN_ESR: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->esr); break; + *spr_val = vcpu->arch.shared->esr; + break; case SPRN_DBCR0: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.dbcr0); break; + *spr_val = vcpu->arch.dbcr0; + break; case SPRN_DBCR1: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.dbcr1); break; + *spr_val = vcpu->arch.dbcr1; + break; case SPRN_DBSR: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.dbsr); break; + *spr_val = vcpu->arch.dbsr; + break; case SPRN_TSR: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.tsr); break; + *spr_val = vcpu->arch.tsr; + break; case SPRN_TCR: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.tcr); break; + *spr_val = vcpu->arch.tcr; + break; case SPRN_IVOR0: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL]); + *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL]; break; case SPRN_IVOR1: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK]); + *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK]; break; case SPRN_IVOR2: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE]); + *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE]; break; case SPRN_IVOR3: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE]); + *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE]; break; case SPRN_IVOR4: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL]); + *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL]; break; case SPRN_IVOR5: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT]); + *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT]; break; case SPRN_IVOR6: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM]); + *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM]; break; case SPRN_IVOR7: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL]); + *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL]; break; case SPRN_IVOR8: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL]); + *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL]; break; case SPRN_IVOR9: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL]); + *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL]; break; case SPRN_IVOR10: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER]); + *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER]; break; case SPRN_IVOR11: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_FIT]); + *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_FIT]; break; case SPRN_IVOR12: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG]); + *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG]; break; case SPRN_IVOR13: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS]); + *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS]; break; case SPRN_IVOR14: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS]); + *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS]; break; case SPRN_IVOR15: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG]); + *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG]; break; default: diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c index 9b2dcda..8b99e07 100644 --- a/arch/powerpc/kvm/e500_emulate.c +++ b/arch/powerpc/kvm/e500_emulate.c @@ -140,11 +140,10 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, return emulated; } -int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) +int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) { struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); int emulated = EMULATE_DONE; - ulong spr_val = kvmppc_get_gpr(vcpu, rs); switch (sprn) { #ifndef CONFIG_KVM_BOOKE_HV @@ -154,25 +153,32 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) case SPRN_PID1: if (spr_val != 0) return EMULATE_FAIL; - vcpu_e500->pid[1] = spr_val; break; + vcpu_e500->pid[1] = spr_val; + break; case SPRN_PID2: if (spr_val != 0) return EMULATE_FAIL; - vcpu_e500->pid[2] = spr_val; break; + vcpu_e500->pid[2] = spr_val; + break; case SPRN_MAS0: - vcpu->arch.shared->mas0 = spr_val; break; + vcpu->arch.shared->mas0 = spr_val; + break; case SPRN_MAS1: - vcpu->arch.shared->mas1 = spr_val; break; + vcpu->arch.shared->mas1 = spr_val; + break; case SPRN_MAS2: - vcpu->arch.shared->mas2 = spr_val; break; + vcpu->arch.shared->mas2 = spr_val; + break; case SPRN_MAS3: vcpu->arch.shared->mas7_3 &= ~(u64)0xffffffff; vcpu->arch.shared->mas7_3 |= spr_val; break; case SPRN_MAS4: - vcpu->arch.shared->mas4 = spr_val; break; + vcpu->arch.shared->mas4 = spr_val; + break; case SPRN_MAS6: - vcpu->arch.shared->mas6 = spr_val; break; + vcpu->arch.shared->mas6 = spr_val; + break; case SPRN_MAS7: vcpu->arch.shared->mas7_3 &= (u64)0xffffffff; vcpu->arch.shared->mas7_3 |= (u64)spr_val << 32; @@ -183,11 +189,14 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) vcpu_e500->l1csr0 &= ~(L1CSR0_DCFI | L1CSR0_CLFC); break; case SPRN_L1CSR1: - vcpu_e500->l1csr1 = spr_val; break; + vcpu_e500->l1csr1 = spr_val; + break; case SPRN_HID0: - vcpu_e500->hid0 = spr_val; break; + vcpu_e500->hid0 = spr_val; + break; case SPRN_HID1: - vcpu_e500->hid1 = spr_val; break; + vcpu_e500->hid1 = spr_val; + break; case SPRN_MMUCSR0: emulated = kvmppc_e500_emul_mt_mmucsr0(vcpu_e500, @@ -216,90 +225,103 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) break; #endif default: - emulated = kvmppc_booke_emulate_mtspr(vcpu, sprn, rs); + emulated = kvmppc_booke_emulate_mtspr(vcpu, sprn, spr_val); } return emulated; } -int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) +int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val) { struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); int emulated = EMULATE_DONE; switch (sprn) { #ifndef CONFIG_KVM_BOOKE_HV - unsigned long val; - case SPRN_PID: - kvmppc_set_gpr(vcpu, rt, vcpu_e500->pid[0]); break; + *spr_val = vcpu_e500->pid[0]; + break; case SPRN_PID1: - kvmppc_set_gpr(vcpu, rt, vcpu_e500->pid[1]); break; + *spr_val = vcpu_e500->pid[1]; + break; case SPRN_PID2: - kvmppc_set_gpr(vcpu, rt, vcpu_e500->pid[2]); break; + *spr_val = vcpu_e500->pid[2]; + break; case SPRN_MAS0: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->mas0); break; + *spr_val = vcpu->arch.shared->mas0; + break; case SPRN_MAS1: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->mas1); break; + *spr_val = vcpu->arch.shared->mas1; + break; case SPRN_MAS2: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->mas2); break; + *spr_val = vcpu->arch.shared->mas2; + break; case SPRN_MAS3: - val = (u32)vcpu->arch.shared->mas7_3; - kvmppc_set_gpr(vcpu, rt, val); + *spr_val = (u32)vcpu->arch.shared->mas7_3; break; case SPRN_MAS4: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->mas4); break; + *spr_val = vcpu->arch.shared->mas4; + break; case SPRN_MAS6: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->mas6); break; + *spr_val = vcpu->arch.shared->mas6; + break; case SPRN_MAS7: - val = vcpu->arch.shared->mas7_3 >> 32; - kvmppc_set_gpr(vcpu, rt, val); + *spr_val = vcpu->arch.shared->mas7_3 >> 32; break; #endif case SPRN_TLB0CFG: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.tlbcfg[0]); break; + *spr_val = vcpu->arch.tlbcfg[0]; + break; case SPRN_TLB1CFG: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.tlbcfg[1]); break; + *spr_val = vcpu->arch.tlbcfg[1]; + break; case SPRN_L1CSR0: - kvmppc_set_gpr(vcpu, rt, vcpu_e500->l1csr0); break; + *spr_val = vcpu_e500->l1csr0; + break; case SPRN_L1CSR1: - kvmppc_set_gpr(vcpu, rt, vcpu_e500->l1csr1); break; + *spr_val = vcpu_e500->l1csr1; + break; case SPRN_HID0: - kvmppc_set_gpr(vcpu, rt, vcpu_e500->hid0); break; + *spr_val = vcpu_e500->hid0; + break; case SPRN_HID1: - kvmppc_set_gpr(vcpu, rt, vcpu_e500->hid1); break; + *spr_val = vcpu_e500->hid1; + break; case SPRN_SVR: - kvmppc_set_gpr(vcpu, rt, vcpu_e500->svr); break; + *spr_val = vcpu_e500->svr; + break; case SPRN_MMUCSR0: - kvmppc_set_gpr(vcpu, rt, 0); break; + *spr_val = 0; + break; case SPRN_MMUCFG: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.mmucfg); break; + *spr_val = vcpu->arch.mmucfg; + break; /* extra exceptions */ case SPRN_IVOR32: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL]); + *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL]; break; case SPRN_IVOR33: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA]); + *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA]; break; case SPRN_IVOR34: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND]); + *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND]; break; case SPRN_IVOR35: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR]); + *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR]; break; #ifdef CONFIG_KVM_BOOKE_HV case SPRN_IVOR36: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_DBELL]); + *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_DBELL]; break; case SPRN_IVOR37: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_DBELL_CRIT]); + *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_DBELL_CRIT]; break; #endif default: - emulated = kvmppc_booke_emulate_mfspr(vcpu, sprn, rt); + emulated = kvmppc_booke_emulate_mfspr(vcpu, sprn, spr_val); } return emulated; diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c index f63b5cb..f90e86d 100644 --- a/arch/powerpc/kvm/emulate.c +++ b/arch/powerpc/kvm/emulate.c @@ -154,6 +154,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) int sprn = get_sprn(inst); enum emulation_result emulated = EMULATE_DONE; int advance = 1; + ulong spr_val = 0; /* this default type might be overwritten by subcategories */ kvmppc_set_exit_type(vcpu, EMULATED_INST_EXITS); @@ -235,55 +236,59 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) case OP_31_XOP_MFSPR: switch (sprn) { case SPRN_SRR0: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->srr0); + spr_val = vcpu->arch.shared->srr0; break; case SPRN_SRR1: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->srr1); + spr_val = vcpu->arch.shared->srr1; break; case SPRN_PVR: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.pvr); break; + spr_val = vcpu->arch.pvr; + break; case SPRN_PIR: - kvmppc_set_gpr(vcpu, rt, vcpu->vcpu_id); break; + spr_val = vcpu->vcpu_id; + break; case SPRN_MSSSR0: - kvmppc_set_gpr(vcpu, rt, 0); break; + spr_val = 0; + break; /* Note: mftb and TBRL/TBWL are user-accessible, so * the guest can always access the real TB anyways. * In fact, we probably will never see these traps. */ case SPRN_TBWL: - kvmppc_set_gpr(vcpu, rt, get_tb() >> 32); break; + spr_val = get_tb() >> 32; + break; case SPRN_TBWU: - kvmppc_set_gpr(vcpu, rt, get_tb()); break; + spr_val = get_tb(); + break; case SPRN_SPRG0: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->sprg0); + spr_val = vcpu->arch.shared->sprg0; break; case SPRN_SPRG1: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->sprg1); + spr_val = vcpu->arch.shared->sprg1; break; case SPRN_SPRG2: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->sprg2); + spr_val = vcpu->arch.shared->sprg2; break; case SPRN_SPRG3: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->sprg3); + spr_val = vcpu->arch.shared->sprg3; break; /* Note: SPRG4-7 are user-readable, so we don't get * a trap. */ case SPRN_DEC: - { - kvmppc_set_gpr(vcpu, rt, - kvmppc_get_dec(vcpu, get_tb())); + spr_val = kvmppc_get_dec(vcpu, get_tb()); break; - } default: - emulated = kvmppc_core_emulate_mfspr(vcpu, sprn, rt); - if (emulated == EMULATE_FAIL) { - printk("mfspr: unknown spr %x\n", sprn); - kvmppc_set_gpr(vcpu, rt, 0); + emulated = kvmppc_core_emulate_mfspr(vcpu, sprn, + &spr_val); + if (unlikely(emulated == EMULATE_FAIL)) { + printk(KERN_INFO "mfspr: unknown spr " + "0x%x\n", sprn); } break; } + kvmppc_set_gpr(vcpu, rt, spr_val); kvmppc_set_exit_type(vcpu, EMULATED_MFSPR_EXITS); break; @@ -301,12 +306,13 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) break; case OP_31_XOP_MTSPR: + spr_val = kvmppc_get_gpr(vcpu, rs); switch (sprn) { case SPRN_SRR0: - vcpu->arch.shared->srr0 = kvmppc_get_gpr(vcpu, rs); + vcpu->arch.shared->srr0 = spr_val; break; case SPRN_SRR1: - vcpu->arch.shared->srr1 = kvmppc_get_gpr(vcpu, rs); + vcpu->arch.shared->srr1 = spr_val; break; /* XXX We need to context-switch the timebase for @@ -317,27 +323,29 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) case SPRN_MSSSR0: break; case SPRN_DEC: - vcpu->arch.dec = kvmppc_get_gpr(vcpu, rs); + vcpu->arch.dec = spr_val; kvmppc_emulate_dec(vcpu); break; case SPRN_SPRG0: - vcpu->arch.shared->sprg0 = kvmppc_get_gpr(vcpu, rs); + vcpu->arch.shared->sprg0 = spr_val; break; case SPRN_SPRG1: - vcpu->arch.shared->sprg1 = kvmppc_get_gpr(vcpu, rs); + vcpu->arch.shared->sprg1 = spr_val; break; case SPRN_SPRG2: - vcpu->arch.shared->sprg2 = kvmppc_get_gpr(vcpu, rs); + vcpu->arch.shared->sprg2 = spr_val; break; case SPRN_SPRG3: - vcpu->arch.shared->sprg3 = kvmppc_get_gpr(vcpu, rs); + vcpu->arch.shared->sprg3 = spr_val; break; default: - emulated = kvmppc_core_emulate_mtspr(vcpu, sprn, rs); + emulated = kvmppc_core_emulate_mtspr(vcpu, sprn, + spr_val); if (emulated == EMULATE_FAIL) - printk("mtspr: unknown spr %x\n", sprn); + printk(KERN_INFO "mtspr: unknown spr " + "0x%x\n", sprn); break; } kvmppc_set_exit_type(vcpu, EMULATED_MTSPR_EXITS); -- cgit v0.10.2 From 7704addb60e274d4e98c69a02f7ebe3f77c6c3a4 Mon Sep 17 00:00:00 2001 From: Mike Turquette Date: Wed, 2 May 2012 18:37:45 -0700 Subject: MAINTAINERS: add entry for common clk framework Signed-off-by: Mike Turquette diff --git a/MAINTAINERS b/MAINTAINERS index 1a2f8f5..164e9a1 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1882,6 +1882,16 @@ F: Documentation/filesystems/coda.txt F: fs/coda/ F: include/linux/coda*.h +COMMON CLK FRAMEWORK +M: Mike Turquette +M: Mike Turquette +L: linux-arm-kernel@lists.infradead.org (same as CLK API & CLKDEV) +T: git git://git.linaro.org/people/mturquette/linux.git +S: Maintained +F: drivers/clk/clk.c +F: drivers/clk/clk-* +F: include/linux/clk-pr* + COMMON INTERNET FILE SYSTEM (CIFS) M: Steve French L: linux-cifs@vger.kernel.org -- cgit v0.10.2 From 1ca5513af77307eccea7efd4d12ef5c14f1b12ab Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 7 May 2012 10:03:17 +0100 Subject: mfd: Fix tps65090 ifdefs for suspend mode CONFIG_PM also covers runtime only PM. Signed-off-by: Mark Brown Acked-by: Venu Byravarasu Signed-off-by: Samuel Ortiz diff --git a/drivers/mfd/tps65090.c b/drivers/mfd/tps65090.c index a66d4df..ac7e8b1 100644 --- a/drivers/mfd/tps65090.c +++ b/drivers/mfd/tps65090.c @@ -334,7 +334,7 @@ static int __devexit tps65090_i2c_remove(struct i2c_client *client) return 0; } -#ifdef CONFIG_PM +#ifdef CONFIG_PM_SLEEP static int tps65090_i2c_suspend(struct i2c_client *client, pm_message_t state) { if (client->irq) @@ -363,7 +363,7 @@ static struct i2c_driver tps65090_driver = { }, .probe = tps65090_i2c_probe, .remove = __devexit_p(tps65090_i2c_remove), -#ifdef CONFIG_PM +#ifdef CONFIG_PM_SLEEP .suspend = tps65090_i2c_suspend, .resume = tps65090_i2c_resume, #endif -- cgit v0.10.2 From b6c9eeef4e775e1fff76f4395d11638dc198271d Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 7 May 2012 10:03:18 +0100 Subject: mfd: Don't use I2C-specific suspend and resume operations for tps65090 The legacy suspend operations have been deprecated and printing warnings on boot for over a year now. Signed-off-by: Mark Brown Acked-by: Venu Byravarasu Signed-off-by: Samuel Ortiz diff --git a/drivers/mfd/tps65090.c b/drivers/mfd/tps65090.c index ac7e8b1..6dc4c34 100644 --- a/drivers/mfd/tps65090.c +++ b/drivers/mfd/tps65090.c @@ -335,21 +335,27 @@ static int __devexit tps65090_i2c_remove(struct i2c_client *client) } #ifdef CONFIG_PM_SLEEP -static int tps65090_i2c_suspend(struct i2c_client *client, pm_message_t state) +static int tps65090_suspend(struct device *dev) { + struct i2c_client *client = to_i2c_client(dev); if (client->irq) disable_irq(client->irq); return 0; } -static int tps65090_i2c_resume(struct i2c_client *client) +static int tps65090_resume(struct device *dev) { + struct i2c_client *client = to_i2c_client(dev); if (client->irq) enable_irq(client->irq); return 0; } #endif +static const struct dev_pm_ops tps65090_pm_ops = { + SET_SYSTEM_SLEEP_PM_OPS(tps65090_suspend, tps65090_resume) +}; + static const struct i2c_device_id tps65090_id_table[] = { { "tps65090", 0 }, { }, @@ -360,13 +366,10 @@ static struct i2c_driver tps65090_driver = { .driver = { .name = "tps65090", .owner = THIS_MODULE, + .pm = &tps65090_pm_ops, }, .probe = tps65090_i2c_probe, .remove = __devexit_p(tps65090_i2c_remove), -#ifdef CONFIG_PM_SLEEP - .suspend = tps65090_i2c_suspend, - .resume = tps65090_i2c_resume, -#endif .id_table = tps65090_id_table, }; -- cgit v0.10.2 From 63745d4068de8ccea3580214c6dbfdca0ec37859 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 7 May 2012 10:03:19 +0100 Subject: mfd: Fix tps65910 section annotations A warning was being generated by the reference from tps65910_i2c_probe() to tps65910_sleepinit() since the latter was annotated as __init but the former was unannotated. Since these functions can only be called during device init make them both __devinit, and while we're at it also annotate tps65910_i2c_remove() __devexit for symmetry. Signed-off-by: Mark Brown Signed-off-by: Samuel Ortiz diff --git a/drivers/mfd/tps65910.c b/drivers/mfd/tps65910.c index ae7f47b..7a55af9 100644 --- a/drivers/mfd/tps65910.c +++ b/drivers/mfd/tps65910.c @@ -90,7 +90,7 @@ static const struct regmap_config tps65910_regmap_config = { .cache_type = REGCACHE_RBTREE, }; -static int __init tps65910_sleepinit(struct tps65910 *tps65910, +static int __devinit tps65910_sleepinit(struct tps65910 *tps65910, struct tps65910_board *pmic_pdata) { struct device *dev = NULL; @@ -150,8 +150,8 @@ err_sleep_init: } -static int tps65910_i2c_probe(struct i2c_client *i2c, - const struct i2c_device_id *id) +static __devinit int tps65910_i2c_probe(struct i2c_client *i2c, + const struct i2c_device_id *id) { struct tps65910 *tps65910; struct tps65910_board *pmic_plat_data; @@ -213,7 +213,7 @@ regmap_err: return ret; } -static int tps65910_i2c_remove(struct i2c_client *i2c) +static __devexit int tps65910_i2c_remove(struct i2c_client *i2c) { struct tps65910 *tps65910 = i2c_get_clientdata(i2c); @@ -239,7 +239,7 @@ static struct i2c_driver tps65910_i2c_driver = { .owner = THIS_MODULE, }, .probe = tps65910_i2c_probe, - .remove = tps65910_i2c_remove, + .remove = __devexit_p(tps65910_i2c_remove), .id_table = tps65910_i2c_id, }; -- cgit v0.10.2 From ce7e4e11221dd7fbe82c8ad28d1875b0dfa20de4 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 7 May 2012 10:03:20 +0100 Subject: mfd: Fix wm831x register range passing for recent ARM updates The removal of mach/io.h from most ARM platforms also set the range of valid IO ports to be empty for most platforms when previously any 32 bit integer had been valid. This makes it impossible to add IO resources as the added range is smaller than that of the root resource for IO ports. Since we're not really using IO memory at all fix this by defining our own root resource outside the normal tree and make that the parent of all IO resources. This also ensures we won't conflict with read IO ports if we ever run on a platform which happens to use them. Signed-off-by: Mark Brown Signed-off-by: Samuel Ortiz diff --git a/drivers/mfd/wm831x-core.c b/drivers/mfd/wm831x-core.c index 838056c..476e4d3 100644 --- a/drivers/mfd/wm831x-core.c +++ b/drivers/mfd/wm831x-core.c @@ -614,8 +614,15 @@ int wm831x_set_bits(struct wm831x *wm831x, unsigned short reg, } EXPORT_SYMBOL_GPL(wm831x_set_bits); +static struct resource wm831x_io_parent = { + .start = 0, + .end = 0xffffffff, + .flags = IORESOURCE_IO, +}; + static struct resource wm831x_dcdc1_resources[] = { { + .parent = &wm831x_io_parent, .start = WM831X_DC1_CONTROL_1, .end = WM831X_DC1_DVS_CONTROL, .flags = IORESOURCE_IO, @@ -637,6 +644,7 @@ static struct resource wm831x_dcdc1_resources[] = { static struct resource wm831x_dcdc2_resources[] = { { + .parent = &wm831x_io_parent, .start = WM831X_DC2_CONTROL_1, .end = WM831X_DC2_DVS_CONTROL, .flags = IORESOURCE_IO, @@ -657,6 +665,7 @@ static struct resource wm831x_dcdc2_resources[] = { static struct resource wm831x_dcdc3_resources[] = { { + .parent = &wm831x_io_parent, .start = WM831X_DC3_CONTROL_1, .end = WM831X_DC3_SLEEP_CONTROL, .flags = IORESOURCE_IO, @@ -671,6 +680,7 @@ static struct resource wm831x_dcdc3_resources[] = { static struct resource wm831x_dcdc4_resources[] = { { + .parent = &wm831x_io_parent, .start = WM831X_DC4_CONTROL, .end = WM831X_DC4_SLEEP_CONTROL, .flags = IORESOURCE_IO, @@ -685,6 +695,7 @@ static struct resource wm831x_dcdc4_resources[] = { static struct resource wm8320_dcdc4_buck_resources[] = { { + .parent = &wm831x_io_parent, .start = WM831X_DC4_CONTROL, .end = WM832X_DC4_SLEEP_CONTROL, .flags = IORESOURCE_IO, @@ -707,6 +718,7 @@ static struct resource wm831x_gpio_resources[] = { static struct resource wm831x_isink1_resources[] = { { + .parent = &wm831x_io_parent, .start = WM831X_CURRENT_SINK_1, .end = WM831X_CURRENT_SINK_1, .flags = IORESOURCE_IO, @@ -720,6 +732,7 @@ static struct resource wm831x_isink1_resources[] = { static struct resource wm831x_isink2_resources[] = { { + .parent = &wm831x_io_parent, .start = WM831X_CURRENT_SINK_2, .end = WM831X_CURRENT_SINK_2, .flags = IORESOURCE_IO, @@ -733,6 +746,7 @@ static struct resource wm831x_isink2_resources[] = { static struct resource wm831x_ldo1_resources[] = { { + .parent = &wm831x_io_parent, .start = WM831X_LDO1_CONTROL, .end = WM831X_LDO1_SLEEP_CONTROL, .flags = IORESOURCE_IO, @@ -747,6 +761,7 @@ static struct resource wm831x_ldo1_resources[] = { static struct resource wm831x_ldo2_resources[] = { { + .parent = &wm831x_io_parent, .start = WM831X_LDO2_CONTROL, .end = WM831X_LDO2_SLEEP_CONTROL, .flags = IORESOURCE_IO, @@ -761,6 +776,7 @@ static struct resource wm831x_ldo2_resources[] = { static struct resource wm831x_ldo3_resources[] = { { + .parent = &wm831x_io_parent, .start = WM831X_LDO3_CONTROL, .end = WM831X_LDO3_SLEEP_CONTROL, .flags = IORESOURCE_IO, @@ -775,6 +791,7 @@ static struct resource wm831x_ldo3_resources[] = { static struct resource wm831x_ldo4_resources[] = { { + .parent = &wm831x_io_parent, .start = WM831X_LDO4_CONTROL, .end = WM831X_LDO4_SLEEP_CONTROL, .flags = IORESOURCE_IO, @@ -789,6 +806,7 @@ static struct resource wm831x_ldo4_resources[] = { static struct resource wm831x_ldo5_resources[] = { { + .parent = &wm831x_io_parent, .start = WM831X_LDO5_CONTROL, .end = WM831X_LDO5_SLEEP_CONTROL, .flags = IORESOURCE_IO, @@ -803,6 +821,7 @@ static struct resource wm831x_ldo5_resources[] = { static struct resource wm831x_ldo6_resources[] = { { + .parent = &wm831x_io_parent, .start = WM831X_LDO6_CONTROL, .end = WM831X_LDO6_SLEEP_CONTROL, .flags = IORESOURCE_IO, @@ -817,6 +836,7 @@ static struct resource wm831x_ldo6_resources[] = { static struct resource wm831x_ldo7_resources[] = { { + .parent = &wm831x_io_parent, .start = WM831X_LDO7_CONTROL, .end = WM831X_LDO7_SLEEP_CONTROL, .flags = IORESOURCE_IO, @@ -831,6 +851,7 @@ static struct resource wm831x_ldo7_resources[] = { static struct resource wm831x_ldo8_resources[] = { { + .parent = &wm831x_io_parent, .start = WM831X_LDO8_CONTROL, .end = WM831X_LDO8_SLEEP_CONTROL, .flags = IORESOURCE_IO, @@ -845,6 +866,7 @@ static struct resource wm831x_ldo8_resources[] = { static struct resource wm831x_ldo9_resources[] = { { + .parent = &wm831x_io_parent, .start = WM831X_LDO9_CONTROL, .end = WM831X_LDO9_SLEEP_CONTROL, .flags = IORESOURCE_IO, @@ -859,6 +881,7 @@ static struct resource wm831x_ldo9_resources[] = { static struct resource wm831x_ldo10_resources[] = { { + .parent = &wm831x_io_parent, .start = WM831X_LDO10_CONTROL, .end = WM831X_LDO10_SLEEP_CONTROL, .flags = IORESOURCE_IO, @@ -873,6 +896,7 @@ static struct resource wm831x_ldo10_resources[] = { static struct resource wm831x_ldo11_resources[] = { { + .parent = &wm831x_io_parent, .start = WM831X_LDO11_ON_CONTROL, .end = WM831X_LDO11_SLEEP_CONTROL, .flags = IORESOURCE_IO, @@ -974,6 +998,7 @@ static struct resource wm831x_rtc_resources[] = { static struct resource wm831x_status1_resources[] = { { + .parent = &wm831x_io_parent, .start = WM831X_STATUS_LED_1, .end = WM831X_STATUS_LED_1, .flags = IORESOURCE_IO, @@ -982,6 +1007,7 @@ static struct resource wm831x_status1_resources[] = { static struct resource wm831x_status2_resources[] = { { + .parent = &wm831x_io_parent, .start = WM831X_STATUS_LED_2, .end = WM831X_STATUS_LED_2, .flags = IORESOURCE_IO, -- cgit v0.10.2 From b7b142d9fc056e98e6fdef82dca3e87067517340 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 7 May 2012 10:03:21 +0100 Subject: mfd: Convert wm8350 physical I/O to regmap API The driver still uses a custom cache implementation but the underlying physical I/O is now done using the regmap API, saving some code and avoiding allocating enormous scratch arrays on the stack. Signed-off-by: Mark Brown Signed-off-by: Samuel Ortiz diff --git a/drivers/mfd/wm8350-core.c b/drivers/mfd/wm8350-core.c index dd1caaa..8a9b11c 100644 --- a/drivers/mfd/wm8350-core.c +++ b/drivers/mfd/wm8350-core.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -74,7 +75,7 @@ static int wm8350_phys_read(struct wm8350 *wm8350, u8 reg, int num_regs, int bytes = num_regs * 2; dev_dbg(wm8350->dev, "volatile read\n"); - ret = wm8350->read_dev(wm8350, reg, bytes, (char *)dest); + ret = regmap_raw_read(wm8350->regmap, reg, dest, bytes); for (i = reg; i < reg + num_regs; i++) { /* Cache is CPU endian */ @@ -96,9 +97,6 @@ static int wm8350_read(struct wm8350 *wm8350, u8 reg, int num_regs, u16 *dest) int ret = 0; int bytes = num_regs * 2; - if (wm8350->read_dev == NULL) - return -ENODEV; - if ((reg + num_regs - 1) > WM8350_MAX_REGISTER) { dev_err(wm8350->dev, "invalid reg %x\n", reg + num_regs - 1); @@ -149,9 +147,6 @@ static int wm8350_write(struct wm8350 *wm8350, u8 reg, int num_regs, u16 *src) int end = reg + num_regs; int bytes = num_regs * 2; - if (wm8350->write_dev == NULL) - return -ENODEV; - if ((reg + num_regs - 1) > WM8350_MAX_REGISTER) { dev_err(wm8350->dev, "invalid reg %x\n", reg + num_regs - 1); @@ -182,7 +177,7 @@ static int wm8350_write(struct wm8350 *wm8350, u8 reg, int num_regs, u16 *src) } /* Actually write it out */ - return wm8350->write_dev(wm8350, reg, bytes, (char *)src); + return regmap_raw_write(wm8350->regmap, reg, src, bytes); } /* @@ -515,9 +510,8 @@ static int wm8350_create_cache(struct wm8350 *wm8350, int type, int mode) * a PMIC so the device many not be in a virgin state and we * can't rely on the silicon values. */ - ret = wm8350->read_dev(wm8350, 0, - sizeof(u16) * (WM8350_MAX_REGISTER + 1), - wm8350->reg_cache); + ret = regmap_raw_read(wm8350->regmap, 0, wm8350->reg_cache, + sizeof(u16) * (WM8350_MAX_REGISTER + 1)); if (ret < 0) { dev_err(wm8350->dev, "failed to read initial cache values\n"); @@ -570,35 +564,30 @@ int wm8350_device_init(struct wm8350 *wm8350, int irq, struct wm8350_platform_data *pdata) { int ret; - u16 id1, id2, mask_rev; - u16 cust_id, mode, chip_rev; + unsigned int id1, id2, mask_rev; + unsigned int cust_id, mode, chip_rev; dev_set_drvdata(wm8350->dev, wm8350); /* get WM8350 revision and config mode */ - ret = wm8350->read_dev(wm8350, WM8350_RESET_ID, sizeof(id1), &id1); + ret = regmap_read(wm8350->regmap, WM8350_RESET_ID, &id1); if (ret != 0) { dev_err(wm8350->dev, "Failed to read ID: %d\n", ret); goto err; } - ret = wm8350->read_dev(wm8350, WM8350_ID, sizeof(id2), &id2); + ret = regmap_read(wm8350->regmap, WM8350_ID, &id2); if (ret != 0) { dev_err(wm8350->dev, "Failed to read ID: %d\n", ret); goto err; } - ret = wm8350->read_dev(wm8350, WM8350_REVISION, sizeof(mask_rev), - &mask_rev); + ret = regmap_read(wm8350->regmap, WM8350_REVISION, &mask_rev); if (ret != 0) { dev_err(wm8350->dev, "Failed to read revision: %d\n", ret); goto err; } - id1 = be16_to_cpu(id1); - id2 = be16_to_cpu(id2); - mask_rev = be16_to_cpu(mask_rev); - if (id1 != 0x6143) { dev_err(wm8350->dev, "Device with ID %x is not a WM8350\n", id1); diff --git a/drivers/mfd/wm8350-i2c.c b/drivers/mfd/wm8350-i2c.c index d955faa..271589f 100644 --- a/drivers/mfd/wm8350-i2c.c +++ b/drivers/mfd/wm8350-i2c.c @@ -15,47 +15,18 @@ #include #include +#include #include #include #include #include +#include #include -static int wm8350_i2c_read_device(struct wm8350 *wm8350, char reg, - int bytes, void *dest) -{ - int ret; - - ret = i2c_master_send(wm8350->i2c_client, ®, 1); - if (ret < 0) - return ret; - ret = i2c_master_recv(wm8350->i2c_client, dest, bytes); - if (ret < 0) - return ret; - if (ret != bytes) - return -EIO; - return 0; -} - -static int wm8350_i2c_write_device(struct wm8350 *wm8350, char reg, - int bytes, void *src) -{ - /* we add 1 byte for device register */ - u8 msg[(WM8350_MAX_REGISTER << 1) + 1]; - int ret; - - if (bytes > ((WM8350_MAX_REGISTER << 1) + 1)) - return -EINVAL; - - msg[0] = reg; - memcpy(&msg[1], src, bytes); - ret = i2c_master_send(wm8350->i2c_client, msg, bytes + 1); - if (ret < 0) - return ret; - if (ret != bytes + 1) - return -EIO; - return 0; -} +static const struct regmap_config wm8350_regmap = { + .reg_bits = 8, + .val_bits = 16, +}; static int wm8350_i2c_probe(struct i2c_client *i2c, const struct i2c_device_id *id) @@ -67,11 +38,16 @@ static int wm8350_i2c_probe(struct i2c_client *i2c, if (wm8350 == NULL) return -ENOMEM; + wm8350->regmap = devm_regmap_init_i2c(i2c, &wm8350_regmap); + if (IS_ERR(wm8350->regmap)) { + ret = PTR_ERR(wm8350->regmap); + dev_err(&i2c->dev, "Failed to allocate register map: %d\n", + ret); + return ret; + } + i2c_set_clientdata(i2c, wm8350); wm8350->dev = &i2c->dev; - wm8350->i2c_client = i2c; - wm8350->read_dev = wm8350_i2c_read_device; - wm8350->write_dev = wm8350_i2c_write_device; ret = wm8350_device_init(wm8350, i2c->irq, i2c->dev.platform_data); if (ret < 0) @@ -80,6 +56,7 @@ static int wm8350_i2c_probe(struct i2c_client *i2c, return ret; err: + regmap_exit(wm8350->regmap); return ret; } diff --git a/include/linux/mfd/wm8350/core.h b/include/linux/mfd/wm8350/core.h index 98fcc97..9192b64 100644 --- a/include/linux/mfd/wm8350/core.h +++ b/include/linux/mfd/wm8350/core.h @@ -602,6 +602,7 @@ extern const u16 wm8352_mode2_defaults[]; extern const u16 wm8352_mode3_defaults[]; struct wm8350; +struct regmap; struct wm8350_hwmon { struct platform_device *pdev; @@ -612,13 +613,7 @@ struct wm8350 { struct device *dev; /* device IO */ - union { - struct i2c_client *i2c_client; - struct spi_device *spi_device; - }; - int (*read_dev)(struct wm8350 *wm8350, char reg, int size, void *dest); - int (*write_dev)(struct wm8350 *wm8350, char reg, int size, - void *src); + struct regmap *regmap; u16 *reg_cache; struct mutex auxadc_mutex; -- cgit v0.10.2 From cc7a727941193e3e59be2e9f6522eb78bc7ee909 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 7 May 2012 10:03:22 +0100 Subject: mfd: Read CUST_ID from the wm8994 device Read CUST_ID from the device and log it for diagnostics. Signed-off-by: Mark Brown Signed-off-by: Samuel Ortiz diff --git a/drivers/mfd/wm8994-core.c b/drivers/mfd/wm8994-core.c index 9d7ca1e..60e6175 100644 --- a/drivers/mfd/wm8994-core.c +++ b/drivers/mfd/wm8994-core.c @@ -500,7 +500,8 @@ static __devinit int wm8994_device_init(struct wm8994 *wm8994, int irq) ret); goto err_enable; } - wm8994->revision = ret; + wm8994->revision = ret & WM8994_CHIP_REV_MASK; + wm8994->cust_id = (ret & WM8994_CUST_ID_MASK) >> WM8994_CUST_ID_SHIFT; switch (wm8994->type) { case WM8994: @@ -553,8 +554,8 @@ static __devinit int wm8994_device_init(struct wm8994 *wm8994, int irq) break; } - dev_info(wm8994->dev, "%s revision %c\n", devname, - 'A' + wm8994->revision); + dev_info(wm8994->dev, "%s revision %c CUST_ID %02x\n", devname, + 'A' + wm8994->revision, wm8994->cust_id); switch (wm8994->type) { case WM1811: diff --git a/include/linux/mfd/wm8994/core.h b/include/linux/mfd/wm8994/core.h index 9eff2a3..d41bc7b 100644 --- a/include/linux/mfd/wm8994/core.h +++ b/include/linux/mfd/wm8994/core.h @@ -57,6 +57,7 @@ struct wm8994 { enum wm8994_type type; int revision; + int cust_id; struct device *dev; struct regmap *regmap; diff --git a/include/linux/mfd/wm8994/registers.h b/include/linux/mfd/wm8994/registers.h index 86e6a03..0535489 100644 --- a/include/linux/mfd/wm8994/registers.h +++ b/include/linux/mfd/wm8994/registers.h @@ -2212,6 +2212,9 @@ /* * R256 (0x100) - Chip Revision */ +#define WM8994_CUST_ID_MASK 0xFF00 /* CUST_ID - [15:8] */ +#define WM8994_CUST_ID_SHIFT 8 /* CUST_ID - [15:8] */ +#define WM8994_CUST_ID_WIDTH 8 /* CUST_ID - [15:8] */ #define WM8994_CHIP_REV_MASK 0x000F /* CHIP_REV - [3:0] */ #define WM8994_CHIP_REV_SHIFT 0 /* CHIP_REV - [3:0] */ #define WM8994_CHIP_REV_WIDTH 4 /* CHIP_REV - [3:0] */ -- cgit v0.10.2 From ceb57d27e28a8f979cbfd6391b7da6da51484059 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 7 May 2012 10:03:23 +0100 Subject: mfd: Convert wm8994 to module_i2c_driver() Signed-off-by: Mark Brown Signed-off-by: Samuel Ortiz diff --git a/drivers/mfd/wm8994-core.c b/drivers/mfd/wm8994-core.c index 60e6175..1e321d3 100644 --- a/drivers/mfd/wm8994-core.c +++ b/drivers/mfd/wm8994-core.c @@ -733,23 +733,7 @@ static struct i2c_driver wm8994_i2c_driver = { .id_table = wm8994_i2c_id, }; -static int __init wm8994_i2c_init(void) -{ - int ret; - - ret = i2c_add_driver(&wm8994_i2c_driver); - if (ret != 0) - pr_err("Failed to register wm8994 I2C driver: %d\n", ret); - - return ret; -} -module_init(wm8994_i2c_init); - -static void __exit wm8994_i2c_exit(void) -{ - i2c_del_driver(&wm8994_i2c_driver); -} -module_exit(wm8994_i2c_exit); +module_i2c_driver(wm8994_i2c_driver); MODULE_DESCRIPTION("Core support for the WM8994 audio CODEC"); MODULE_LICENSE("GPL"); -- cgit v0.10.2 From 9f420bf0f4a74e404b73b42b7fc3c85c20c64ea7 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 7 May 2012 05:37:45 -0700 Subject: kbuild: all{no,yes,mod,def,rand}config only read files when instructed to. Prevent subtle surprises to both people working on the kconfig code and people using make allnoconfig allyesconfig allmoconfig and randconfig by only attempting to read a config file if KCONFIG_ALLCONFIG is set. Common sense suggests attempting to read the extra config files does not make sense unless requested. The documentation says the code won't attempt to read the extra config files unless requested. Current usage does not appear to include people depending on the code reading the config files without the variable being set So do the simple thing and stop reading config files when passed all{no,yes,mod,def,rand}config unless KCONFIG_ALLCONFIG environment variable is set. Signed-off-by: Eric W. Biederman Reported-by: Stephen Rothwell Signed-off-by: Michal Marek diff --git a/scripts/kconfig/conf.c b/scripts/kconfig/conf.c index 0fdda91..0dc4a2c 100644 --- a/scripts/kconfig/conf.c +++ b/scripts/kconfig/conf.c @@ -574,7 +574,9 @@ int main(int ac, char **av) case alldefconfig: case randconfig: name = getenv("KCONFIG_ALLCONFIG"); - if (name && (strcmp(name, "") != 0) && (strcmp(name, "1") != 0)) { + if (!name) + break; + if ((strcmp(name, "") != 0) && (strcmp(name, "1") != 0)) { if (conf_read_simple(name, S_DEF_USER)) { fprintf(stderr, _("*** Can't read seed configuration \"%s\"!\n"), -- cgit v0.10.2 From ca1182387e57470460294ce1e39e2d5518809811 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Fri, 30 Mar 2012 15:37:07 -0400 Subject: xen/setup: Only print "Freeing XXX-YYY pfn range: Z pages freed" if Z > 0 Otherwise we can get these meaningless: Freeing bad80-badf4 pfn range: 0 pages freed We also can do this for the summary ones - no point of printing "Set 0 page(s) to 1-1 mapping" Acked-by: David Vrabel [v1: Extended to the summary printks] Signed-off-by: Konrad Rzeszutek Wilk diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 1ba8dff..7b0ab77 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -114,8 +114,9 @@ static unsigned long __init xen_release_chunk(unsigned long start, len++; } } - printk(KERN_INFO "Freeing %lx-%lx pfn range: %lu pages freed\n", - start, end, len); + if (len) + printk(KERN_INFO "Freeing %lx-%lx pfn range: %lu pages freed\n", + start, end, len); return len; } @@ -162,8 +163,10 @@ static unsigned long __init xen_set_identity_and_release( } } - printk(KERN_INFO "Released %lu pages of unused memory\n", released); - printk(KERN_INFO "Set %ld page(s) to 1-1 mapping\n", identity); + if (released) + printk(KERN_INFO "Released %lu pages of unused memory\n", released); + if (identity) + printk(KERN_INFO "Set %ld page(s) to 1-1 mapping\n", identity); return released; } -- cgit v0.10.2 From 2e2fb75475c2fc74c98100f1468c8195fee49f3b Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Fri, 6 Apr 2012 10:07:11 -0400 Subject: xen/setup: Populate freed MFNs from non-RAM E820 entries and gaps to E820 RAM When the Xen hypervisor boots a PV kernel it hands it two pieces of information: nr_pages and a made up E820 entry. The nr_pages value defines the range from zero to nr_pages of PFNs which have a valid Machine Frame Number (MFN) underneath it. The E820 mirrors that (with the VGA hole): BIOS-provided physical RAM map: Xen: 0000000000000000 - 00000000000a0000 (usable) Xen: 00000000000a0000 - 0000000000100000 (reserved) Xen: 0000000000100000 - 0000000080800000 (usable) The fun comes when a PV guest that is run with a machine E820 - that can either be the initial domain or a PCI PV guest, where the E820 looks like the normal thing: BIOS-provided physical RAM map: Xen: 0000000000000000 - 000000000009e000 (usable) Xen: 000000000009ec00 - 0000000000100000 (reserved) Xen: 0000000000100000 - 0000000020000000 (usable) Xen: 0000000020000000 - 0000000020200000 (reserved) Xen: 0000000020200000 - 0000000040000000 (usable) Xen: 0000000040000000 - 0000000040200000 (reserved) Xen: 0000000040200000 - 00000000bad80000 (usable) Xen: 00000000bad80000 - 00000000badc9000 (ACPI NVS) .. With that overlaying the nr_pages directly on the E820 does not work as there are gaps and non-RAM regions that won't be used by the memory allocator. The 'xen_release_chunk' helps with that by punching holes in the P2M (PFN to MFN lookup tree) for those regions and tells us that: Freeing 20000-20200 pfn range: 512 pages freed Freeing 40000-40200 pfn range: 512 pages freed Freeing bad80-badf4 pfn range: 116 pages freed Freeing badf6-bae7f pfn range: 137 pages freed Freeing bb000-100000 pfn range: 282624 pages freed Released 283999 pages of unused memory Those 283999 pages are subtracted from the nr_pages and are returned to the hypervisor. The end result is that the initial domain boots with 1GB less memory as the nr_pages has been subtracted by the amount of pages residing within the PCI hole. It can balloon up to that if desired using 'xl mem-set 0 8092', but the balloon driver is not always compiled in for the initial domain. This patch, implements the populate hypercall (XENMEM_populate_physmap) which increases the the domain with the same amount of pages that were released. The other solution (that did not work) was to transplant the MFN in the P2M tree - the ones that were going to be freed were put in the E820_RAM regions past the nr_pages. But the modifications to the M2P array (the other side of creating PTEs) were not carried away. As the hypervisor is the only one capable of modifying that and the only two hypercalls that would do this are: the update_va_mapping (which won't work, as during initial bootup only PFNs up to nr_pages are mapped in the guest) or via the populate hypercall. The end result is that the kernel can now boot with the nr_pages without having to subtract the 283999 pages. On a 8GB machine, with various dom0_mem= parameters this is what we get: no dom0_mem -Memory: 6485264k/9435136k available (5817k kernel code, 1136060k absent, 1813812k reserved, 2899k data, 696k init) +Memory: 7619036k/9435136k available (5817k kernel code, 1136060k absent, 680040k reserved, 2899k data, 696k init) dom0_mem=3G -Memory: 2616536k/9435136k available (5817k kernel code, 1136060k absent, 5682540k reserved, 2899k data, 696k init) +Memory: 2703776k/9435136k available (5817k kernel code, 1136060k absent, 5595300k reserved, 2899k data, 696k init) dom0_mem=max:3G -Memory: 2696732k/4281724k available (5817k kernel code, 1136060k absent, 448932k reserved, 2899k data, 696k init) +Memory: 2702204k/4281724k available (5817k kernel code, 1136060k absent, 443460k reserved, 2899k data, 696k init) And the 'xm list' or 'xl list' now reflect what the dom0_mem= argument is. Acked-by: David Vrabel [v2: Use populate hypercall] [v3: Remove debug printks] [v4: Simplify code] Signed-off-by: Konrad Rzeszutek Wilk diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 7b0ab77..710af36 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -26,7 +26,6 @@ #include #include #include - #include "xen-ops.h" #include "vdso.h" @@ -120,7 +119,105 @@ static unsigned long __init xen_release_chunk(unsigned long start, return len; } +static unsigned long __init xen_populate_physmap(unsigned long start, + unsigned long end) +{ + struct xen_memory_reservation reservation = { + .address_bits = 0, + .extent_order = 0, + .domid = DOMID_SELF + }; + unsigned long len = 0; + int ret; + + for (pfn = start; pfn < end; pfn++) { + unsigned long frame; + + /* Make sure pfn does not exists to start with */ + if (pfn_to_mfn(pfn) != INVALID_P2M_ENTRY) + continue; + frame = pfn; + set_xen_guest_handle(reservation.extent_start, &frame); + reservation.nr_extents = 1; + + ret = HYPERVISOR_memory_op(XENMEM_populate_physmap, &reservation); + WARN(ret != 1, "Failed to populate pfn %lx err=%d\n", pfn, ret); + if (ret == 1) { + if (!early_set_phys_to_machine(pfn, frame)) { + set_xen_guest_handle(reservation.extent_start, &frame); + reservation.nr_extents = 1; + ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, + &reservation); + break; + } + len++; + } else + break; + } + if (len) + printk(KERN_INFO "Populated %lx-%lx pfn range: %lu pages added\n", + start, end, len); + return len; +} +static unsigned long __init xen_populate_chunk( + const struct e820entry *list, size_t map_size, + unsigned long max_pfn, unsigned long *last_pfn, + unsigned long credits_left) +{ + const struct e820entry *entry; + unsigned int i; + unsigned long done = 0; + unsigned long dest_pfn; + + for (i = 0, entry = list; i < map_size; i++, entry++) { + unsigned long credits = credits_left; + unsigned long s_pfn; + unsigned long e_pfn; + unsigned long pfns; + long capacity; + + if (credits <= 0) + break; + + if (entry->type != E820_RAM) + continue; + + e_pfn = PFN_UP(entry->addr + entry->size); + + /* We only care about E820 after the xen_start_info->nr_pages */ + if (e_pfn <= max_pfn) + continue; + + s_pfn = PFN_DOWN(entry->addr); + /* If the E820 falls within the nr_pages, we want to start + * at the nr_pages PFN. + * If that would mean going past the E820 entry, skip it + */ + if (s_pfn <= max_pfn) { + capacity = e_pfn - max_pfn; + dest_pfn = max_pfn; + } else { + /* last_pfn MUST be within E820_RAM regions */ + if (*last_pfn && e_pfn >= *last_pfn) + s_pfn = *last_pfn; + capacity = e_pfn - s_pfn; + dest_pfn = s_pfn; + } + /* If we had filled this E820_RAM entry, go to the next one. */ + if (capacity <= 0) + continue; + + if (credits > capacity) + credits = capacity; + + pfns = xen_populate_physmap(dest_pfn, dest_pfn + credits); + done += pfns; + credits_left -= pfns; + *last_pfn = (dest_pfn + pfns); + } + return done; +} static unsigned long __init xen_set_identity_and_release( const struct e820entry *list, size_t map_size, unsigned long nr_pages) { @@ -143,7 +240,6 @@ static unsigned long __init xen_set_identity_and_release( */ for (i = 0, entry = list; i < map_size; i++, entry++) { phys_addr_t end = entry->addr + entry->size; - if (entry->type == E820_RAM || i == map_size - 1) { unsigned long start_pfn = PFN_DOWN(start); unsigned long end_pfn = PFN_UP(end); @@ -220,7 +316,9 @@ char * __init xen_memory_setup(void) int rc; struct xen_memory_map memmap; unsigned long max_pages; + unsigned long last_pfn = 0; unsigned long extra_pages = 0; + unsigned long populated; int i; int op; @@ -260,9 +358,20 @@ char * __init xen_memory_setup(void) */ xen_released_pages = xen_set_identity_and_release( map, memmap.nr_entries, max_pfn); - extra_pages += xen_released_pages; /* + * Populate back the non-RAM pages and E820 gaps that had been + * released. */ + populated = xen_populate_chunk(map, memmap.nr_entries, + max_pfn, &last_pfn, xen_released_pages); + + extra_pages += (xen_released_pages - populated); + + if (last_pfn > max_pfn) { + max_pfn = min(MAX_DOMAIN_PAGES, last_pfn); + mem_end = PFN_PHYS(max_pfn); + } + /* * Clamp the amount of extra memory to a EXTRA_MEM_RATIO * factor the base size. On non-highmem systems, the base * size is the full initial memory allocation; on highmem it @@ -275,7 +384,6 @@ char * __init xen_memory_setup(void) */ extra_pages = min(EXTRA_MEM_RATIO * min(max_pfn, PFN_DOWN(MAXMEM)), extra_pages); - i = 0; while (i < memmap.nr_entries) { u64 addr = map[i].addr; -- cgit v0.10.2 From 96dc08b35c4af8cb5810450602590706f2593a5f Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Fri, 6 Apr 2012 16:10:20 -0400 Subject: xen/setup: Combine the two hypercall functions - since they are quite similar. They use the same set of arguments, so it is just the matter of using the proper hypercall. Acked-by: David Vrabel Signed-off-by: Konrad Rzeszutek Wilk diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 710af36..30ac05a 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -83,8 +83,8 @@ static void __init xen_add_extra_mem(u64 start, u64 size) __set_phys_to_machine(pfn, INVALID_P2M_ENTRY); } -static unsigned long __init xen_release_chunk(unsigned long start, - unsigned long end) +static unsigned long __init xen_do_chunk(unsigned long start, + unsigned long end, bool release) { struct xen_memory_reservation reservation = { .address_bits = 0, @@ -95,60 +95,36 @@ static unsigned long __init xen_release_chunk(unsigned long start, unsigned long pfn; int ret; - for(pfn = start; pfn < end; pfn++) { - unsigned long mfn = pfn_to_mfn(pfn); - - /* Make sure pfn exists to start with */ - if (mfn == INVALID_P2M_ENTRY || mfn_to_pfn(mfn) != pfn) - continue; - - set_xen_guest_handle(reservation.extent_start, &mfn); - reservation.nr_extents = 1; - - ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, - &reservation); - WARN(ret != 1, "Failed to release pfn %lx err=%d\n", pfn, ret); - if (ret == 1) { - __set_phys_to_machine(pfn, INVALID_P2M_ENTRY); - len++; - } - } - if (len) - printk(KERN_INFO "Freeing %lx-%lx pfn range: %lu pages freed\n", - start, end, len); - - return len; -} -static unsigned long __init xen_populate_physmap(unsigned long start, - unsigned long end) -{ - struct xen_memory_reservation reservation = { - .address_bits = 0, - .extent_order = 0, - .domid = DOMID_SELF - }; - unsigned long len = 0; - int ret; - for (pfn = start; pfn < end; pfn++) { unsigned long frame; + unsigned long mfn = pfn_to_mfn(pfn); - /* Make sure pfn does not exists to start with */ - if (pfn_to_mfn(pfn) != INVALID_P2M_ENTRY) - continue; - - frame = pfn; + if (release) { + /* Make sure pfn exists to start with */ + if (mfn == INVALID_P2M_ENTRY || mfn_to_pfn(mfn) != pfn) + continue; + frame = mfn; + } else { + if (mfn != INVALID_P2M_ENTRY) + continue; + frame = pfn; + } set_xen_guest_handle(reservation.extent_start, &frame); reservation.nr_extents = 1; - ret = HYPERVISOR_memory_op(XENMEM_populate_physmap, &reservation); - WARN(ret != 1, "Failed to populate pfn %lx err=%d\n", pfn, ret); + ret = HYPERVISOR_memory_op(release ? XENMEM_decrease_reservation : XENMEM_populate_physmap, + &reservation); + WARN(ret != 1, "Failed to %s pfn %lx err=%d\n", + release ? "release" : "populate", pfn, ret); + if (ret == 1) { - if (!early_set_phys_to_machine(pfn, frame)) { + if (!early_set_phys_to_machine(pfn, release ? INVALID_P2M_ENTRY : frame)) { + if (release) + break; set_xen_guest_handle(reservation.extent_start, &frame); reservation.nr_extents = 1; ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, - &reservation); + &reservation); break; } len++; @@ -156,8 +132,11 @@ static unsigned long __init xen_populate_physmap(unsigned long start, break; } if (len) - printk(KERN_INFO "Populated %lx-%lx pfn range: %lu pages added\n", - start, end, len); + printk(KERN_INFO "%s %lx-%lx pfn range: %lu pages %s\n", + release ? "Freeing" : "Populating", + start, end, len, + release ? "freed" : "added"); + return len; } static unsigned long __init xen_populate_chunk( @@ -211,7 +190,7 @@ static unsigned long __init xen_populate_chunk( if (credits > capacity) credits = capacity; - pfns = xen_populate_physmap(dest_pfn, dest_pfn + credits); + pfns = xen_do_chunk(dest_pfn, dest_pfn + credits, false); done += pfns; credits_left -= pfns; *last_pfn = (dest_pfn + pfns); @@ -249,8 +228,8 @@ static unsigned long __init xen_set_identity_and_release( if (start_pfn < end_pfn) { if (start_pfn < nr_pages) - released += xen_release_chunk( - start_pfn, min(end_pfn, nr_pages)); + released += xen_do_chunk( + start_pfn, min(end_pfn, nr_pages), true); identity += set_phys_range_identity( start_pfn, end_pfn); -- cgit v0.10.2 From 83d51ab473dddde7df858015070ed22b84ebe9a9 Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Thu, 3 May 2012 16:15:42 +0100 Subject: xen/setup: update VA mapping when releasing memory during setup In xen_memory_setup(), if a page that is being released has a VA mapping this must also be updated. Otherwise, the page will be not released completely -- it will still be referenced in Xen and won't be freed util the mapping is removed and this prevents it from being reallocated at a different PFN. This was already being done for the ISA memory region in xen_ident_map_ISA() but on many systems this was omitting a few pages as many systems marked a few pages below the ISA memory region as reserved in the e820 map. This fixes errors such as: (XEN) page_alloc.c:1148:d0 Over-allocation for domain 0: 2097153 > 2097152 (XEN) memory.c:133:d0 Could not allocate order=0 extent: id=0 memflags=0 (0 of 17) Signed-off-by: David Vrabel Signed-off-by: Konrad Rzeszutek Wilk diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index fe06bf4..ac90e56 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1308,7 +1308,6 @@ asmlinkage void __init xen_start_kernel(void) xen_raw_console_write("mapping kernel into physical memory\n"); pgd = xen_setup_kernel_pagetable(pgd, xen_start_info->nr_pages); - xen_ident_map_ISA(); /* Allocate and initialize top and mid mfn levels for p2m structure */ xen_build_mfn_list_list(); diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 91dc287..c9a3519 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1929,29 +1929,6 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot) #endif } -void __init xen_ident_map_ISA(void) -{ - unsigned long pa; - - /* - * If we're dom0, then linear map the ISA machine addresses into - * the kernel's address space. - */ - if (!xen_initial_domain()) - return; - - xen_raw_printk("Xen: setup ISA identity maps\n"); - - for (pa = ISA_START_ADDRESS; pa < ISA_END_ADDRESS; pa += PAGE_SIZE) { - pte_t pte = mfn_pte(PFN_DOWN(pa), PAGE_KERNEL_IO); - - if (HYPERVISOR_update_va_mapping(PAGE_OFFSET + pa, pte, 0)) - BUG(); - } - - xen_flush_tlb(); -} - static void __init xen_post_allocator_init(void) { pv_mmu_ops.set_pte = xen_set_pte; diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 30ac05a..3ebba07 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -139,6 +139,13 @@ static unsigned long __init xen_do_chunk(unsigned long start, return len; } + +static unsigned long __init xen_release_chunk(unsigned long start, + unsigned long end) +{ + return xen_do_chunk(start, end, true); +} + static unsigned long __init xen_populate_chunk( const struct e820entry *list, size_t map_size, unsigned long max_pfn, unsigned long *last_pfn, @@ -197,6 +204,29 @@ static unsigned long __init xen_populate_chunk( } return done; } + +static void __init xen_set_identity_and_release_chunk( + unsigned long start_pfn, unsigned long end_pfn, unsigned long nr_pages, + unsigned long *released, unsigned long *identity) +{ + unsigned long pfn; + + /* + * If the PFNs are currently mapped, the VA mapping also needs + * to be updated to be 1:1. + */ + for (pfn = start_pfn; pfn <= max_pfn_mapped && pfn < end_pfn; pfn++) + (void)HYPERVISOR_update_va_mapping( + (unsigned long)__va(pfn << PAGE_SHIFT), + mfn_pte(pfn, PAGE_KERNEL_IO), 0); + + if (start_pfn < nr_pages) + *released += xen_release_chunk( + start_pfn, min(end_pfn, nr_pages)); + + *identity += set_phys_range_identity(start_pfn, end_pfn); +} + static unsigned long __init xen_set_identity_and_release( const struct e820entry *list, size_t map_size, unsigned long nr_pages) { @@ -226,14 +256,11 @@ static unsigned long __init xen_set_identity_and_release( if (entry->type == E820_RAM) end_pfn = PFN_UP(entry->addr); - if (start_pfn < end_pfn) { - if (start_pfn < nr_pages) - released += xen_do_chunk( - start_pfn, min(end_pfn, nr_pages), true); + if (start_pfn < end_pfn) + xen_set_identity_and_release_chunk( + start_pfn, end_pfn, nr_pages, + &released, &identity); - identity += set_phys_range_identity( - start_pfn, end_pfn); - } start = end; } } diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index b095739..506fa08 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -28,7 +28,6 @@ void xen_setup_shared_info(void); void xen_build_mfn_list_list(void); void xen_setup_machphys_mapping(void); pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn); -void xen_ident_map_ISA(void); void xen_reserve_top(void); extern unsigned long xen_max_p2m_pfn; -- cgit v0.10.2 From f447d56d36af18c5104ff29dcb1327c0c0ac3634 Mon Sep 17 00:00:00 2001 From: Ben Guthro Date: Sat, 21 Apr 2012 00:11:04 +0800 Subject: xen: implement apic ipi interface Map native ipi vector to xen vector. Implement apic ipi interface with xen_send_IPI_one. Tested-by: Steven Noonan Signed-off-by: Ben Guthro Signed-off-by: Lin Ming Signed-off-by: Konrad Rzeszutek Wilk diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 4f51beb..1ed61c2 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -74,6 +74,7 @@ #include "xen-ops.h" #include "mmu.h" +#include "smp.h" #include "multicalls.h" EXPORT_SYMBOL_GPL(hypercall_page); @@ -849,6 +850,14 @@ static void set_xen_basic_apic_ops(void) apic->icr_write = xen_apic_icr_write; apic->wait_icr_idle = xen_apic_wait_icr_idle; apic->safe_wait_icr_idle = xen_safe_apic_wait_icr_idle; + +#ifdef CONFIG_SMP + apic->send_IPI_allbutself = xen_send_IPI_allbutself; + apic->send_IPI_mask_allbutself = xen_send_IPI_mask_allbutself; + apic->send_IPI_mask = xen_send_IPI_mask; + apic->send_IPI_all = xen_send_IPI_all; + apic->send_IPI_self = xen_send_IPI_self; +#endif } #endif diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 5fac691..2dc6628 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -465,8 +465,8 @@ static void xen_smp_send_reschedule(int cpu) xen_send_IPI_one(cpu, XEN_RESCHEDULE_VECTOR); } -static void xen_send_IPI_mask(const struct cpumask *mask, - enum ipi_vector vector) +static void __xen_send_IPI_mask(const struct cpumask *mask, + int vector) { unsigned cpu; @@ -478,7 +478,7 @@ static void xen_smp_send_call_function_ipi(const struct cpumask *mask) { int cpu; - xen_send_IPI_mask(mask, XEN_CALL_FUNCTION_VECTOR); + __xen_send_IPI_mask(mask, XEN_CALL_FUNCTION_VECTOR); /* Make sure other vcpus get a chance to run if they need to. */ for_each_cpu(cpu, mask) { @@ -491,10 +491,83 @@ static void xen_smp_send_call_function_ipi(const struct cpumask *mask) static void xen_smp_send_call_function_single_ipi(int cpu) { - xen_send_IPI_mask(cpumask_of(cpu), + __xen_send_IPI_mask(cpumask_of(cpu), XEN_CALL_FUNCTION_SINGLE_VECTOR); } +static inline int xen_map_vector(int vector) +{ + int xen_vector; + + switch (vector) { + case RESCHEDULE_VECTOR: + xen_vector = XEN_RESCHEDULE_VECTOR; + break; + case CALL_FUNCTION_VECTOR: + xen_vector = XEN_CALL_FUNCTION_VECTOR; + break; + case CALL_FUNCTION_SINGLE_VECTOR: + xen_vector = XEN_CALL_FUNCTION_SINGLE_VECTOR; + break; + default: + xen_vector = -1; + printk(KERN_ERR "xen: vector 0x%x is not implemented\n", + vector); + } + + return xen_vector; +} + +void xen_send_IPI_mask(const struct cpumask *mask, + int vector) +{ + int xen_vector = xen_map_vector(vector); + + if (xen_vector >= 0) + __xen_send_IPI_mask(mask, xen_vector); +} + +void xen_send_IPI_all(int vector) +{ + int xen_vector = xen_map_vector(vector); + + if (xen_vector >= 0) + __xen_send_IPI_mask(cpu_online_mask, xen_vector); +} + +void xen_send_IPI_self(int vector) +{ + int xen_vector = xen_map_vector(vector); + + if (xen_vector >= 0) + xen_send_IPI_one(smp_processor_id(), xen_vector); +} + +void xen_send_IPI_mask_allbutself(const struct cpumask *mask, + int vector) +{ + unsigned cpu; + unsigned int this_cpu = smp_processor_id(); + + if (!(num_online_cpus() > 1)) + return; + + for_each_cpu_and(cpu, mask, cpu_online_mask) { + if (this_cpu == cpu) + continue; + + xen_smp_send_call_function_single_ipi(cpu); + } +} + +void xen_send_IPI_allbutself(int vector) +{ + int xen_vector = xen_map_vector(vector); + + if (xen_vector >= 0) + xen_send_IPI_mask_allbutself(cpu_online_mask, xen_vector); +} + static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id) { irq_enter(); diff --git a/arch/x86/xen/smp.h b/arch/x86/xen/smp.h new file mode 100644 index 0000000..8981a76 --- /dev/null +++ b/arch/x86/xen/smp.h @@ -0,0 +1,12 @@ +#ifndef _XEN_SMP_H + +extern void xen_send_IPI_mask(const struct cpumask *mask, + int vector); +extern void xen_send_IPI_mask_allbutself(const struct cpumask *mask, + int vector); +extern void xen_send_IPI_allbutself(int vector); +extern void physflat_send_IPI_allbutself(int vector); +extern void xen_send_IPI_all(int vector); +extern void xen_send_IPI_self(int vector); + +#endif -- cgit v0.10.2 From 1ff2b0c303698e486f1e0886b4d9876200ef8ca5 Mon Sep 17 00:00:00 2001 From: Lin Ming Date: Sat, 21 Apr 2012 00:11:05 +0800 Subject: xen: implement IRQ_WORK_VECTOR handler Signed-off-by: Lin Ming Signed-off-by: Konrad Rzeszutek Wilk diff --git a/arch/x86/include/asm/xen/events.h b/arch/x86/include/asm/xen/events.h index 1df3541..cc146d5 100644 --- a/arch/x86/include/asm/xen/events.h +++ b/arch/x86/include/asm/xen/events.h @@ -6,6 +6,7 @@ enum ipi_vector { XEN_CALL_FUNCTION_VECTOR, XEN_CALL_FUNCTION_SINGLE_VECTOR, XEN_SPIN_UNLOCK_VECTOR, + XEN_IRQ_WORK_VECTOR, XEN_NR_IPIS, }; diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 2dc6628..3ec3f8e 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -41,10 +42,12 @@ cpumask_var_t xen_cpu_initialized_map; static DEFINE_PER_CPU(int, xen_resched_irq); static DEFINE_PER_CPU(int, xen_callfunc_irq); static DEFINE_PER_CPU(int, xen_callfuncsingle_irq); +static DEFINE_PER_CPU(int, xen_irq_work); static DEFINE_PER_CPU(int, xen_debug_irq) = -1; static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id); static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id); +static irqreturn_t xen_irq_work_interrupt(int irq, void *dev_id); /* * Reschedule call back. @@ -143,6 +146,17 @@ static int xen_smp_intr_init(unsigned int cpu) goto fail; per_cpu(xen_callfuncsingle_irq, cpu) = rc; + callfunc_name = kasprintf(GFP_KERNEL, "irqwork%d", cpu); + rc = bind_ipi_to_irqhandler(XEN_IRQ_WORK_VECTOR, + cpu, + xen_irq_work_interrupt, + IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING, + callfunc_name, + NULL); + if (rc < 0) + goto fail; + per_cpu(xen_irq_work, cpu) = rc; + return 0; fail: @@ -155,6 +169,8 @@ static int xen_smp_intr_init(unsigned int cpu) if (per_cpu(xen_callfuncsingle_irq, cpu) >= 0) unbind_from_irqhandler(per_cpu(xen_callfuncsingle_irq, cpu), NULL); + if (per_cpu(xen_irq_work, cpu) >= 0) + unbind_from_irqhandler(per_cpu(xen_irq_work, cpu), NULL); return rc; } @@ -509,6 +525,9 @@ static inline int xen_map_vector(int vector) case CALL_FUNCTION_SINGLE_VECTOR: xen_vector = XEN_CALL_FUNCTION_SINGLE_VECTOR; break; + case IRQ_WORK_VECTOR: + xen_vector = XEN_IRQ_WORK_VECTOR; + break; default: xen_vector = -1; printk(KERN_ERR "xen: vector 0x%x is not implemented\n", @@ -588,6 +607,16 @@ static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id) return IRQ_HANDLED; } +static irqreturn_t xen_irq_work_interrupt(int irq, void *dev_id) +{ + irq_enter(); + irq_work_run(); + inc_irq_stat(apic_irq_work_irqs); + irq_exit(); + + return IRQ_HANDLED; +} + static const struct smp_ops xen_smp_ops __initconst = { .smp_prepare_boot_cpu = xen_smp_prepare_boot_cpu, .smp_prepare_cpus = xen_smp_prepare_cpus, @@ -634,6 +663,7 @@ static void xen_hvm_cpu_die(unsigned int cpu) unbind_from_irqhandler(per_cpu(xen_callfunc_irq, cpu), NULL); unbind_from_irqhandler(per_cpu(xen_debug_irq, cpu), NULL); unbind_from_irqhandler(per_cpu(xen_callfuncsingle_irq, cpu), NULL); + unbind_from_irqhandler(per_cpu(xen_irq_work, cpu), NULL); native_cpu_die(cpu); } -- cgit v0.10.2 From 211063dc159695bd6072c5393e9bc729481c6ede Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 8 Dec 2011 17:32:23 +0800 Subject: xen/acpi/sleep: Enable ACPI sleep via the __acpi_os_prepare_sleep Provide the registration callback to call in the Xen's ACPI sleep functionality. This means that during S3/S5 we make a hypercall XENPF_enter_acpi_sleep with the proper PM1A/PM1B registers. Based of Ke Yu's initial idea. [ From http://xenbits.xensource.com/linux-2.6.18-xen.hg change c68699484a65 ] [v1: Added Copyright and license] [v2: Added check if PM1A/B the 16-bits MSB contain something. The spec only uses 16-bits but might have more in future] Signed-off-by: Liang Tang Signed-off-by: Konrad Rzeszutek Wilk diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 1ed61c2..eca90e5 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -42,6 +42,7 @@ #include #include #include +#include #include #include @@ -1373,6 +1374,8 @@ asmlinkage void __init xen_start_kernel(void) /* Make sure ACS will be enabled */ pci_request_acs(); + + xen_acpi_sleep_register(); } diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile index 9adc5be..fc34886 100644 --- a/drivers/xen/Makefile +++ b/drivers/xen/Makefile @@ -17,7 +17,7 @@ obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o obj-$(CONFIG_XEN_PVHVM) += platform-pci.o obj-$(CONFIG_XEN_TMEM) += tmem.o obj-$(CONFIG_SWIOTLB_XEN) += swiotlb-xen.o -obj-$(CONFIG_XEN_DOM0) += pci.o +obj-$(CONFIG_XEN_DOM0) += pci.o acpi.o obj-$(CONFIG_XEN_PCIDEV_BACKEND) += xen-pciback/ obj-$(CONFIG_XEN_PRIVCMD) += xen-privcmd.o obj-$(CONFIG_XEN_ACPI_PROCESSOR) += xen-acpi-processor.o diff --git a/drivers/xen/acpi.c b/drivers/xen/acpi.c new file mode 100644 index 0000000..119d42a --- /dev/null +++ b/drivers/xen/acpi.c @@ -0,0 +1,62 @@ +/****************************************************************************** + * acpi.c + * acpi file for domain 0 kernel + * + * Copyright (c) 2011 Konrad Rzeszutek Wilk + * Copyright (c) 2011 Yu Ke ke.yu@intel.com + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include + +int xen_acpi_notify_hypervisor_state(u8 sleep_state, + u32 pm1a_cnt, u32 pm1b_cnt) +{ + struct xen_platform_op op = { + .cmd = XENPF_enter_acpi_sleep, + .interface_version = XENPF_INTERFACE_VERSION, + .u = { + .enter_acpi_sleep = { + .pm1a_cnt_val = (u16)pm1a_cnt, + .pm1b_cnt_val = (u16)pm1b_cnt, + .sleep_state = sleep_state, + }, + }, + }; + + if ((pm1a_cnt & 0xffff0000) || (pm1b_cnt & 0xffff0000)) { + WARN(1, "Using more than 16bits of PM1A/B 0x%x/0x%x!" + "Email xen-devel@lists.xensource.com Thank you.\n", \ + pm1a_cnt, pm1b_cnt); + return -1; + } + + HYPERVISOR_dom0_op(&op); + return 1; +} diff --git a/include/xen/acpi.h b/include/xen/acpi.h new file mode 100644 index 0000000..48a9c01 --- /dev/null +++ b/include/xen/acpi.h @@ -0,0 +1,58 @@ +/****************************************************************************** + * acpi.h + * acpi file for domain 0 kernel + * + * Copyright (c) 2011 Konrad Rzeszutek Wilk + * Copyright (c) 2011 Yu Ke + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef _XEN_ACPI_H +#define _XEN_ACPI_H + +#include + +#ifdef CONFIG_XEN_DOM0 +#include +#include +#include + +int xen_acpi_notify_hypervisor_state(u8 sleep_state, + u32 pm1a_cnt, u32 pm1b_cnd); + +static inline void xen_acpi_sleep_register(void) +{ + if (xen_initial_domain()) + acpi_os_set_prepare_sleep( + &xen_acpi_notify_hypervisor_state); +} +#else +static inline void xen_acpi_sleep_register(void) +{ +} +#endif + +#endif /* _XEN_ACPI_H */ -- cgit v0.10.2 From f62805f1f30a40e354bd036b4cb799863a39be4b Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Tue, 24 Apr 2012 11:55:43 +0100 Subject: xen: enter/exit lazy_mmu_mode around m2p_override calls This patch is a significant performance improvement for the m2p_override: about 6% using the gntdev device. Each m2p_add/remove_override call issues a MULTI_grant_table_op and a __flush_tlb_single if kmap_op != NULL. Batching all the calls together is a great performance benefit because it means issuing one hypercall total rather than two hypercall per page. If paravirt_lazy_mode is set PARAVIRT_LAZY_MMU, all these calls are going to be batched together, otherwise they are issued one at a time. Adding arch_enter_lazy_mmu_mode/arch_leave_lazy_mmu_mode around the m2p_add/remove_override calls forces paravirt_lazy_mode to PARAVIRT_LAZY_MMU, therefore makes sure that they are always batched. However it is not safe to call arch_enter_lazy_mmu_mode if we are in interrupt context or if we are already in PARAVIRT_LAZY_MMU mode, so check for both conditions before doing so. Changes in v4: - rebased on 3.4-rc4: all the m2p_override users call gnttab_unmap_refs and gnttab_map_refs; - check whether we are in interrupt context and the lazy_mode we are in before calling arch_enter/leave_lazy_mmu_mode. Changes in v3: - do not call arch_enter/leave_lazy_mmu_mode in xen_blkbk_unmap, that can be called in interrupt context. Signed-off-by: Stefano Stabellini [v5: s/int lazy/bool lazy/] Signed-off-by: Konrad Rzeszutek Wilk diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index 9f514bb..487e468 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include @@ -827,6 +828,7 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops, struct page **pages, unsigned int count) { int i, ret; + bool lazy = false; pte_t *pte; unsigned long mfn; @@ -837,6 +839,11 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops, if (xen_feature(XENFEAT_auto_translated_physmap)) return ret; + if (!in_interrupt() && paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) { + arch_enter_lazy_mmu_mode(); + lazy = true; + } + for (i = 0; i < count; i++) { /* Do not add to override if the map failed. */ if (map_ops[i].status) @@ -855,6 +862,9 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops, return ret; } + if (lazy) + arch_leave_lazy_mmu_mode(); + return ret; } EXPORT_SYMBOL_GPL(gnttab_map_refs); @@ -863,6 +873,7 @@ int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops, struct page **pages, unsigned int count, bool clear_pte) { int i, ret; + bool lazy = false; ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, unmap_ops, count); if (ret) @@ -871,12 +882,20 @@ int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops, if (xen_feature(XENFEAT_auto_translated_physmap)) return ret; + if (!in_interrupt() && paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) { + arch_enter_lazy_mmu_mode(); + lazy = true; + } + for (i = 0; i < count; i++) { ret = m2p_remove_override(pages[i], clear_pte); if (ret) return ret; } + if (lazy) + arch_leave_lazy_mmu_mode(); + return ret; } EXPORT_SYMBOL_GPL(gnttab_unmap_refs); -- cgit v0.10.2 From 0ae28542a87e1c5e3364bd5da226a2a9fbf1577d Mon Sep 17 00:00:00 2001 From: Santosh Shilimkar Date: Sat, 28 Apr 2012 19:21:06 +0530 Subject: ARM: OMAP4: Don't compile cm2xxx_3xxx.c for OMAP4 only builds. Since OMAP4 code base now makes use of OMAP4 specific PRCM functions, cm2xxx_3xxx.c need not be compiled for OMAP4 only builds. Signed-off-by: Santosh Shilimkar Acked-by: Paul Walmsley diff --git a/arch/arm/mach-omap2/Makefile b/arch/arm/mach-omap2/Makefile index 49f92bc..56ed62e 100644 --- a/arch/arm/mach-omap2/Makefile +++ b/arch/arm/mach-omap2/Makefile @@ -86,10 +86,7 @@ obj-y += prm_common.o obj-$(CONFIG_ARCH_OMAP2) += prcm.o cm2xxx_3xxx.o prm2xxx_3xxx.o obj-$(CONFIG_ARCH_OMAP3) += prcm.o cm2xxx_3xxx.o prm2xxx_3xxx.o \ vc3xxx_data.o vp3xxx_data.o -# XXX The presence of cm2xxx_3xxx.o on the line below is temporary and -# will be removed once the OMAP4 part of the codebase is converted to -# use OMAP4-specific PRCM functions. -obj-$(CONFIG_ARCH_OMAP4) += prcm.o cm2xxx_3xxx.o cminst44xx.o \ +obj-$(CONFIG_ARCH_OMAP4) += prcm.o cminst44xx.o \ cm44xx.o prcm_mpu44xx.o \ prminst44xx.o vc44xx_data.o \ vp44xx_data.o prm44xx.o -- cgit v0.10.2 From eb401553106db50ed36b2ab72e957efff7c46c9b Mon Sep 17 00:00:00 2001 From: Santosh Shilimkar Date: Sat, 28 Apr 2012 19:57:32 +0530 Subject: ARM: OMAP2+: Clean up wrapping multiple objects in Makefile Signed-off-by: Santosh Shilimkar diff --git a/arch/arm/mach-omap2/Makefile b/arch/arm/mach-omap2/Makefile index 56ed62e..669e2b1 100644 --- a/arch/arm/mach-omap2/Makefile +++ b/arch/arm/mach-omap2/Makefile @@ -24,10 +24,11 @@ endif obj-$(CONFIG_TWL4030_CORE) += omap_twl.o # SMP support ONLY available for OMAP4 + obj-$(CONFIG_SMP) += omap-smp.o omap-headsmp.o obj-$(CONFIG_HOTPLUG_CPU) += omap-hotplug.o -obj-$(CONFIG_ARCH_OMAP4) += omap4-common.o omap-wakeupgen.o \ - sleep44xx.o +obj-$(CONFIG_ARCH_OMAP4) += omap4-common.o omap-wakeupgen.o +obj-$(CONFIG_ARCH_OMAP4) += sleep44xx.o plus_sec := $(call as-instr,.arch_extension sec,+sec) AFLAGS_omap-headsmp.o :=-Wa,-march=armv7-a$(plus_sec) @@ -64,10 +65,10 @@ endif ifeq ($(CONFIG_PM),y) obj-$(CONFIG_ARCH_OMAP2) += pm24xx.o obj-$(CONFIG_ARCH_OMAP2) += sleep24xx.o -obj-$(CONFIG_ARCH_OMAP3) += pm34xx.o sleep34xx.o \ - cpuidle34xx.o -obj-$(CONFIG_ARCH_OMAP4) += pm44xx.o omap-mpuss-lowpower.o \ - cpuidle44xx.o +obj-$(CONFIG_ARCH_OMAP3) += pm34xx.o sleep34xx.o +obj-$(CONFIG_ARCH_OMAP3) += cpuidle34xx.o +obj-$(CONFIG_ARCH_OMAP4) += pm44xx.o omap-mpuss-lowpower.o +obj-$(CONFIG_ARCH_OMAP4) += cpuidle44xx.o obj-$(CONFIG_PM_DEBUG) += pm-debug.o obj-$(CONFIG_OMAP_SMARTREFLEX) += sr_device.o smartreflex.o obj-$(CONFIG_OMAP_SMARTREFLEX_CLASS3) += smartreflex-class3.o @@ -84,85 +85,84 @@ endif # PRCM obj-y += prm_common.o obj-$(CONFIG_ARCH_OMAP2) += prcm.o cm2xxx_3xxx.o prm2xxx_3xxx.o -obj-$(CONFIG_ARCH_OMAP3) += prcm.o cm2xxx_3xxx.o prm2xxx_3xxx.o \ - vc3xxx_data.o vp3xxx_data.o -obj-$(CONFIG_ARCH_OMAP4) += prcm.o cminst44xx.o \ - cm44xx.o prcm_mpu44xx.o \ - prminst44xx.o vc44xx_data.o \ - vp44xx_data.o prm44xx.o +obj-$(CONFIG_ARCH_OMAP3) += prcm.o cm2xxx_3xxx.o prm2xxx_3xxx.o +obj-$(CONFIG_ARCH_OMAP3) += vc3xxx_data.o vp3xxx_data.o +obj-$(CONFIG_ARCH_OMAP4) += prcm.o cminst44xx.o cm44xx.o +obj-$(CONFIG_ARCH_OMAP4) += prcm_mpu44xx.o prminst44xx.o +obj-$(CONFIG_ARCH_OMAP4) += vc44xx_data.o vp44xx_data.o prm44xx.o # OMAP voltage domains voltagedomain-common := voltage.o vc.o vp.o -obj-$(CONFIG_ARCH_OMAP2) += $(voltagedomain-common) \ - voltagedomains2xxx_data.o -obj-$(CONFIG_ARCH_OMAP3) += $(voltagedomain-common) \ - voltagedomains3xxx_data.o -obj-$(CONFIG_ARCH_OMAP4) += $(voltagedomain-common) \ - voltagedomains44xx_data.o +obj-$(CONFIG_ARCH_OMAP2) += $(voltagedomain-common) +obj-$(CONFIG_ARCH_OMAP2) += voltagedomains2xxx_data.o +obj-$(CONFIG_ARCH_OMAP3) += $(voltagedomain-common) +obj-$(CONFIG_ARCH_OMAP3) += voltagedomains3xxx_data.o +obj-$(CONFIG_ARCH_OMAP4) += $(voltagedomain-common) +obj-$(CONFIG_ARCH_OMAP4) += voltagedomains44xx_data.o # OMAP powerdomain framework powerdomain-common += powerdomain.o powerdomain-common.o -obj-$(CONFIG_ARCH_OMAP2) += $(powerdomain-common) \ - powerdomain2xxx_3xxx.o \ - powerdomains2xxx_data.o \ - powerdomains2xxx_3xxx_data.o -obj-$(CONFIG_ARCH_OMAP3) += $(powerdomain-common) \ - powerdomain2xxx_3xxx.o \ - powerdomains3xxx_data.o \ - powerdomains2xxx_3xxx_data.o -obj-$(CONFIG_ARCH_OMAP4) += $(powerdomain-common) \ - powerdomain44xx.o \ - powerdomains44xx_data.o +obj-$(CONFIG_ARCH_OMAP2) += $(powerdomain-common) +obj-$(CONFIG_ARCH_OMAP2) += powerdomains2xxx_data.o +obj-$(CONFIG_ARCH_OMAP2) += powerdomain2xxx_3xxx.o +obj-$(CONFIG_ARCH_OMAP2) += powerdomains2xxx_3xxx_data.o +obj-$(CONFIG_ARCH_OMAP3) += $(powerdomain-common) +obj-$(CONFIG_ARCH_OMAP3) += powerdomain2xxx_3xxx.o +obj-$(CONFIG_ARCH_OMAP3) += powerdomains3xxx_data.o +obj-$(CONFIG_ARCH_OMAP3) += powerdomains2xxx_3xxx_data.o +obj-$(CONFIG_ARCH_OMAP4) += $(powerdomain-common) +obj-$(CONFIG_ARCH_OMAP4) += powerdomain44xx.o +obj-$(CONFIG_ARCH_OMAP4) += powerdomains44xx_data.o # PRCM clockdomain control -obj-$(CONFIG_ARCH_OMAP2) += clockdomain.o \ - clockdomain2xxx_3xxx.o \ - clockdomains2xxx_3xxx_data.o +obj-$(CONFIG_ARCH_OMAP2) += clockdomain.o +obj-$(CONFIG_ARCH_OMAP2) += clockdomain2xxx_3xxx.o +obj-$(CONFIG_ARCH_OMAP2) += clockdomains2xxx_3xxx_data.o obj-$(CONFIG_SOC_OMAP2420) += clockdomains2420_data.o obj-$(CONFIG_SOC_OMAP2430) += clockdomains2430_data.o -obj-$(CONFIG_ARCH_OMAP3) += clockdomain.o \ - clockdomain2xxx_3xxx.o \ - clockdomains2xxx_3xxx_data.o \ - clockdomains3xxx_data.o -obj-$(CONFIG_ARCH_OMAP4) += clockdomain.o \ - clockdomain44xx.o \ - clockdomains44xx_data.o +obj-$(CONFIG_ARCH_OMAP3) += clockdomain.o +obj-$(CONFIG_ARCH_OMAP3) += clockdomain2xxx_3xxx.o +obj-$(CONFIG_ARCH_OMAP3) += clockdomains2xxx_3xxx_data.o +obj-$(CONFIG_ARCH_OMAP3) += clockdomains3xxx_data.o +obj-$(CONFIG_ARCH_OMAP4) += clockdomain.o +obj-$(CONFIG_ARCH_OMAP4) += clockdomain44xx.o +obj-$(CONFIG_ARCH_OMAP4) += clockdomains44xx_data.o # Clock framework -obj-$(CONFIG_ARCH_OMAP2) += $(clock-common) clock2xxx.o \ - clkt2xxx_sys.o \ - clkt2xxx_dpllcore.o \ - clkt2xxx_virt_prcm_set.o \ - clkt2xxx_apll.o clkt2xxx_osc.o \ - clkt2xxx_dpll.o clkt_iclk.o +obj-$(CONFIG_ARCH_OMAP2) += $(clock-common) clock2xxx.o +obj-$(CONFIG_ARCH_OMAP2) += clkt2xxx_sys.o +obj-$(CONFIG_ARCH_OMAP2) += clkt2xxx_dpllcore.o +obj-$(CONFIG_ARCH_OMAP2) += clkt2xxx_virt_prcm_set.o +obj-$(CONFIG_ARCH_OMAP2) += clkt2xxx_apll.o clkt2xxx_osc.o +obj-$(CONFIG_ARCH_OMAP2) += clkt2xxx_dpll.o clkt_iclk.o obj-$(CONFIG_SOC_OMAP2420) += clock2420_data.o obj-$(CONFIG_SOC_OMAP2430) += clock2430.o clock2430_data.o -obj-$(CONFIG_ARCH_OMAP3) += $(clock-common) clock3xxx.o \ - clock34xx.o clkt34xx_dpll3m2.o \ - clock3517.o clock36xx.o \ - dpll3xxx.o clock3xxx_data.o \ - clkt_iclk.o -obj-$(CONFIG_ARCH_OMAP4) += $(clock-common) clock44xx_data.o \ - dpll3xxx.o dpll44xx.o +obj-$(CONFIG_ARCH_OMAP3) += $(clock-common) clock3xxx.o +obj-$(CONFIG_ARCH_OMAP3) += clock34xx.o clkt34xx_dpll3m2.o +obj-$(CONFIG_ARCH_OMAP3) += clock3517.o clock36xx.o +obj-$(CONFIG_ARCH_OMAP3) += dpll3xxx.o clock3xxx_data.o +obj-$(CONFIG_ARCH_OMAP3) += clkt_iclk.o +obj-$(CONFIG_ARCH_OMAP4) += $(clock-common) clock44xx_data.o +obj-$(CONFIG_ARCH_OMAP4) += dpll3xxx.o dpll44xx.o # OMAP2 clock rate set data (old "OPP" data) obj-$(CONFIG_SOC_OMAP2420) += opp2420_data.o obj-$(CONFIG_SOC_OMAP2430) += opp2430_data.o # hwmod data -obj-$(CONFIG_SOC_OMAP2420) += omap_hwmod_2xxx_ipblock_data.o \ - omap_hwmod_2xxx_3xxx_ipblock_data.o \ - omap_hwmod_2xxx_interconnect_data.o \ - omap_hwmod_2xxx_3xxx_interconnect_data.o \ - omap_hwmod_2420_data.o -obj-$(CONFIG_SOC_OMAP2430) += omap_hwmod_2xxx_ipblock_data.o \ - omap_hwmod_2xxx_3xxx_ipblock_data.o \ - omap_hwmod_2xxx_interconnect_data.o \ - omap_hwmod_2xxx_3xxx_interconnect_data.o \ - omap_hwmod_2430_data.o -obj-$(CONFIG_ARCH_OMAP3) += omap_hwmod_2xxx_3xxx_ipblock_data.o \ - omap_hwmod_2xxx_3xxx_interconnect_data.o \ - omap_hwmod_3xxx_data.o +obj-$(CONFIG_SOC_OMAP2420) += omap_hwmod_2xxx_ipblock_data.o +obj-$(CONFIG_SOC_OMAP2420) += omap_hwmod_2xxx_3xxx_ipblock_data.o +obj-$(CONFIG_SOC_OMAP2420) += omap_hwmod_2xxx_interconnect_data.o +obj-$(CONFIG_SOC_OMAP2420) += omap_hwmod_2xxx_3xxx_interconnect_data.o +obj-$(CONFIG_SOC_OMAP2420) += omap_hwmod_2420_data.o +obj-$(CONFIG_SOC_OMAP2430) += omap_hwmod_2xxx_ipblock_data.o +obj-$(CONFIG_SOC_OMAP2430) += omap_hwmod_2xxx_3xxx_ipblock_data.o +obj-$(CONFIG_SOC_OMAP2430) += omap_hwmod_2xxx_interconnect_data.o +obj-$(CONFIG_SOC_OMAP2430) += omap_hwmod_2xxx_3xxx_interconnect_data.o +obj-$(CONFIG_SOC_OMAP2430) += omap_hwmod_2430_data.o +obj-$(CONFIG_ARCH_OMAP3) += omap_hwmod_2xxx_3xxx_ipblock_data.o +obj-$(CONFIG_ARCH_OMAP3) += omap_hwmod_2xxx_3xxx_interconnect_data.o +obj-$(CONFIG_ARCH_OMAP3) += omap_hwmod_3xxx_data.o obj-$(CONFIG_ARCH_OMAP4) += omap_hwmod_44xx_data.o # EMU peripherals @@ -200,23 +200,19 @@ obj-$(CONFIG_MACH_OMAP3EVM) += board-omap3evm.o obj-$(CONFIG_MACH_OMAP3_PANDORA) += board-omap3pandora.o obj-$(CONFIG_MACH_OMAP_3430SDP) += board-3430sdp.o obj-$(CONFIG_MACH_NOKIA_N8X0) += board-n8x0.o -obj-$(CONFIG_MACH_NOKIA_RM680) += board-rm680.o \ - sdram-nokia.o -obj-$(CONFIG_MACH_NOKIA_RX51) += board-rx51.o \ - sdram-nokia.o \ - board-rx51-peripherals.o \ - board-rx51-video.o -obj-$(CONFIG_MACH_OMAP_ZOOM2) += board-zoom.o \ - board-zoom-peripherals.o \ - board-zoom-display.o \ - board-zoom-debugboard.o -obj-$(CONFIG_MACH_OMAP_ZOOM3) += board-zoom.o \ - board-zoom-peripherals.o \ - board-zoom-display.o \ - board-zoom-debugboard.o -obj-$(CONFIG_MACH_OMAP_3630SDP) += board-3630sdp.o \ - board-zoom-peripherals.o \ - board-zoom-display.o +obj-$(CONFIG_MACH_NOKIA_RM680) += board-rm680.o sdram-nokia.o +obj-$(CONFIG_MACH_NOKIA_RX51) += board-rx51.o sdram-nokia.o +obj-$(CONFIG_MACH_NOKIA_RX51) += board-rx51-peripherals.o +obj-$(CONFIG_MACH_NOKIA_RX51) += board-rx51-video.o +obj-$(CONFIG_MACH_OMAP_ZOOM2) += board-zoom.o board-zoom-peripherals.o +obj-$(CONFIG_MACH_OMAP_ZOOM2) += board-zoom-display.o +obj-$(CONFIG_MACH_OMAP_ZOOM2) += board-zoom-debugboard.o +obj-$(CONFIG_MACH_OMAP_ZOOM3) += board-zoom.o board-zoom-peripherals.o +obj-$(CONFIG_MACH_OMAP_ZOOM3) += board-zoom-display.o +obj-$(CONFIG_MACH_OMAP_ZOOM3) += board-zoom-debugboard.o +obj-$(CONFIG_MACH_OMAP_3630SDP) += board-3630sdp.o +obj-$(CONFIG_MACH_OMAP_3630SDP) += board-zoom-peripherals.o +obj-$(CONFIG_MACH_OMAP_3630SDP) += board-zoom-display.o obj-$(CONFIG_MACH_CM_T35) += board-cm-t35.o obj-$(CONFIG_MACH_CM_T3517) += board-cm-t3517.o obj-$(CONFIG_MACH_IGEP0020) += board-igep0020.o -- cgit v0.10.2 From 084753d18e6d2d95db21cc89514f225c8a7635b3 Mon Sep 17 00:00:00 2001 From: Santosh Shilimkar Date: Sat, 28 Apr 2012 20:06:10 +0530 Subject: ARM: OMAP4: Remove un-used WakeupGen register defines. Current OMAP code doesn't use any of the OMAP_WKG_ENB_SECURE_* registers. So remove those defines. Signed-off-by: Santosh Shilimkar diff --git a/arch/arm/mach-omap2/include/mach/omap-wakeupgen.h b/arch/arm/mach-omap2/include/mach/omap-wakeupgen.h index d79321b..548de90b 100644 --- a/arch/arm/mach-omap2/include/mach/omap-wakeupgen.h +++ b/arch/arm/mach-omap2/include/mach/omap-wakeupgen.h @@ -16,18 +16,10 @@ #define OMAP_WKG_ENB_B_0 0x14 #define OMAP_WKG_ENB_C_0 0x18 #define OMAP_WKG_ENB_D_0 0x1c -#define OMAP_WKG_ENB_SECURE_A_0 0x20 -#define OMAP_WKG_ENB_SECURE_B_0 0x24 -#define OMAP_WKG_ENB_SECURE_C_0 0x28 -#define OMAP_WKG_ENB_SECURE_D_0 0x2c #define OMAP_WKG_ENB_A_1 0x410 #define OMAP_WKG_ENB_B_1 0x414 #define OMAP_WKG_ENB_C_1 0x418 #define OMAP_WKG_ENB_D_1 0x41c -#define OMAP_WKG_ENB_SECURE_A_1 0x420 -#define OMAP_WKG_ENB_SECURE_B_1 0x424 -#define OMAP_WKG_ENB_SECURE_C_1 0x428 -#define OMAP_WKG_ENB_SECURE_D_1 0x42c #define OMAP_AUX_CORE_BOOT_0 0x800 #define OMAP_AUX_CORE_BOOT_1 0x804 #define OMAP_PTMSYNCREQ_MASK 0xc00 -- cgit v0.10.2 From d07c3df8019bf01306065c4271421d70ad23e5e8 Mon Sep 17 00:00:00 2001 From: Santosh Shilimkar Date: Sat, 28 Apr 2012 20:19:10 +0530 Subject: ARM: OMAP: dma: Make use of cpu_class_is_omap2() to avoid future patching. cpu_class_is_omap2() contains all OMAP2+ devices. So update the DMA code cpu checks accordingly so that there is no need to patch the file for any future OMAP2+ devices. In long run, all these attributes should come from hwmod dev_attr based on DMA IP version. Signed-off-by: Santosh Shilimkar diff --git a/arch/arm/plat-omap/dma.c b/arch/arm/plat-omap/dma.c index ecdb3da..c046a19 100644 --- a/arch/arm/plat-omap/dma.c +++ b/arch/arm/plat-omap/dma.c @@ -843,7 +843,7 @@ omap_dma_set_prio_lch(int lch, unsigned char read_prio, } l = p->dma_read(CCR, lch); l &= ~((1 << 6) | (1 << 26)); - if (cpu_is_omap2430() || cpu_is_omap34xx() || cpu_is_omap44xx()) + if (cpu_class_is_omap2() && !cpu_is_omap242x()) l |= ((read_prio & 0x1) << 6) | ((write_prio & 0x1) << 26); else l |= ((read_prio & 0x1) << 6); @@ -2057,7 +2057,7 @@ static int __devinit omap_system_dma_probe(struct platform_device *pdev) } } - if (cpu_is_omap2430() || cpu_is_omap34xx() || cpu_is_omap44xx()) + if (cpu_class_is_omap2() && !cpu_is_omap242x()) omap_dma_set_global_params(DMA_DEFAULT_ARB_RATE, DMA_DEFAULT_FIFO_DEPTH, 0); -- cgit v0.10.2 From 256aa5fc239d7c1986dd47b88c6297d582e7763c Mon Sep 17 00:00:00 2001 From: Santosh Shilimkar Date: Sat, 28 Apr 2012 20:25:31 +0530 Subject: ARM: All OMAP2PLUS machines use omap2 directory so just add one entry All OMAP2PLUS arch based machines makes use of mach-omap2 directory. So just add one entry so that there is no need to patch this file for any OMAP2+ devices. Signed-off-by: Santosh Shilimkar diff --git a/arch/arm/Makefile b/arch/arm/Makefile index 047a207..7a6bde0 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -164,9 +164,7 @@ machine-$(CONFIG_ARCH_MXS) := mxs machine-$(CONFIG_ARCH_NETX) := netx machine-$(CONFIG_ARCH_NOMADIK) := nomadik machine-$(CONFIG_ARCH_OMAP1) := omap1 -machine-$(CONFIG_ARCH_OMAP2) := omap2 -machine-$(CONFIG_ARCH_OMAP3) := omap2 -machine-$(CONFIG_ARCH_OMAP4) := omap2 +machine-$(CONFIG_ARCH_OMAP2PLUS) := omap2 machine-$(CONFIG_ARCH_ORION5X) := orion5x machine-$(CONFIG_ARCH_PICOXCELL) := picoxcell machine-$(CONFIG_ARCH_PNX4008) := pnx4008 -- cgit v0.10.2 From 7d6e11ef30ea61007e6059748e80bb27606281ea Mon Sep 17 00:00:00 2001 From: Santosh Shilimkar Date: Mon, 30 Apr 2012 15:47:42 +0530 Subject: ARM: OMAP4: Reduce the static IO mapping EMIF, GMPC and DMM driver can ioremap() the address space as part of driver intialisation and there is no need to have static IO mapping for them. Hence remove the un-used static IP space and let the respective drivers manage it as part if driver init. Signed-off-by: Santosh Shilimkar diff --git a/arch/arm/mach-omap2/io.c b/arch/arm/mach-omap2/io.c index 065bd76..595a5dd 100644 --- a/arch/arm/mach-omap2/io.c +++ b/arch/arm/mach-omap2/io.c @@ -215,41 +215,11 @@ static struct map_desc omap44xx_io_desc[] __initdata = { .type = MT_DEVICE, }, { - .virtual = OMAP44XX_GPMC_VIRT, - .pfn = __phys_to_pfn(OMAP44XX_GPMC_PHYS), - .length = OMAP44XX_GPMC_SIZE, - .type = MT_DEVICE, - }, - { - .virtual = OMAP44XX_EMIF1_VIRT, - .pfn = __phys_to_pfn(OMAP44XX_EMIF1_PHYS), - .length = OMAP44XX_EMIF1_SIZE, - .type = MT_DEVICE, - }, - { - .virtual = OMAP44XX_EMIF2_VIRT, - .pfn = __phys_to_pfn(OMAP44XX_EMIF2_PHYS), - .length = OMAP44XX_EMIF2_SIZE, - .type = MT_DEVICE, - }, - { - .virtual = OMAP44XX_DMM_VIRT, - .pfn = __phys_to_pfn(OMAP44XX_DMM_PHYS), - .length = OMAP44XX_DMM_SIZE, - .type = MT_DEVICE, - }, - { .virtual = L4_PER_44XX_VIRT, .pfn = __phys_to_pfn(L4_PER_44XX_PHYS), .length = L4_PER_44XX_SIZE, .type = MT_DEVICE, }, - { - .virtual = L4_EMU_44XX_VIRT, - .pfn = __phys_to_pfn(L4_EMU_44XX_PHYS), - .length = L4_EMU_44XX_SIZE, - .type = MT_DEVICE, - }, #ifdef CONFIG_OMAP4_ERRATA_I688 { .virtual = OMAP4_SRAM_VA, diff --git a/arch/arm/mach-omap2/iomap.h b/arch/arm/mach-omap2/iomap.h index 0812b15..80b8892 100644 --- a/arch/arm/mach-omap2/iomap.h +++ b/arch/arm/mach-omap2/iomap.h @@ -37,9 +37,6 @@ #define OMAP4_L3_PER_IO_OFFSET 0xb1100000 #define OMAP4_L3_PER_IO_ADDRESS(pa) IOMEM((pa) + OMAP4_L3_PER_IO_OFFSET) -#define OMAP4_GPMC_IO_OFFSET 0xa9000000 -#define OMAP4_GPMC_IO_ADDRESS(pa) IOMEM((pa) + OMAP4_GPMC_IO_OFFSET) - #define OMAP2_EMU_IO_OFFSET 0xaa800000 /* Emulation */ #define OMAP2_EMU_IO_ADDRESS(pa) IOMEM((pa) + OMAP2_EMU_IO_OFFSET) @@ -170,28 +167,3 @@ #define L4_ABE_44XX_VIRT (L4_ABE_44XX_PHYS + OMAP2_L4_IO_OFFSET) #define L4_ABE_44XX_SIZE SZ_1M -#define L4_EMU_44XX_PHYS L4_EMU_44XX_BASE - /* 0x54000000 --> 0xfe800000 */ -#define L4_EMU_44XX_VIRT (L4_EMU_44XX_PHYS + OMAP2_EMU_IO_OFFSET) -#define L4_EMU_44XX_SIZE SZ_8M - -#define OMAP44XX_GPMC_PHYS OMAP44XX_GPMC_BASE - /* 0x50000000 --> 0xf9000000 */ -#define OMAP44XX_GPMC_VIRT (OMAP44XX_GPMC_PHYS + OMAP4_GPMC_IO_OFFSET) -#define OMAP44XX_GPMC_SIZE SZ_1M - - -#define OMAP44XX_EMIF1_PHYS OMAP44XX_EMIF1_BASE - /* 0x4c000000 --> 0xfd100000 */ -#define OMAP44XX_EMIF1_VIRT (OMAP44XX_EMIF1_PHYS + OMAP4_L3_PER_IO_OFFSET) -#define OMAP44XX_EMIF1_SIZE SZ_1M - -#define OMAP44XX_EMIF2_PHYS OMAP44XX_EMIF2_BASE - /* 0x4d000000 --> 0xfd200000 */ -#define OMAP44XX_EMIF2_SIZE SZ_1M -#define OMAP44XX_EMIF2_VIRT (OMAP44XX_EMIF1_VIRT + OMAP44XX_EMIF1_SIZE) - -#define OMAP44XX_DMM_PHYS OMAP44XX_DMM_BASE - /* 0x4e000000 --> 0xfd300000 */ -#define OMAP44XX_DMM_SIZE SZ_1M -#define OMAP44XX_DMM_VIRT (OMAP44XX_EMIF2_VIRT + OMAP44XX_EMIF2_SIZE) -- cgit v0.10.2 From f6d5e079a03a33b4cb747285e61098d3d85010ce Mon Sep 17 00:00:00 2001 From: R Sricharan Date: Mon, 7 May 2012 14:02:25 +0530 Subject: ARM: OMAP2+: dma: Define dma capabilities register bitfields and use them. The system dma module has capabiities register indicating the support for descriptor loading, constant fill, etc. Use this instead of OMAP revision check to identify the features supported runtime. This avoids patching the code for feature SOCs which has those capabilities. Signed-off-by: R Sricharan Signed-off-by: Santosh Shilimkar diff --git a/arch/arm/mach-omap2/dma.c b/arch/arm/mach-omap2/dma.c index b19d849..ff75abe 100644 --- a/arch/arm/mach-omap2/dma.c +++ b/arch/arm/mach-omap2/dma.c @@ -227,10 +227,6 @@ static int __init omap2_system_dma_init_dev(struct omap_hwmod *oh, void *unused) dma_stride = OMAP2_DMA_STRIDE; dma_common_ch_start = CSDP; - if (cpu_is_omap3630() || cpu_is_omap44xx()) - dma_common_ch_end = CCDN; - else - dma_common_ch_end = CCFN; p = kzalloc(sizeof(struct omap_system_dma_plat_info), GFP_KERNEL); if (!p) { @@ -277,6 +273,13 @@ static int __init omap2_system_dma_init_dev(struct omap_hwmod *oh, void *unused) dev_err(&pdev->dev, "%s: kzalloc fail\n", __func__); return -ENOMEM; } + + /* Check the capabilities register for descriptor loading feature */ + if (dma_read(CAPS_0, 0) & DMA_HAS_DESCRIPTOR_CAPS) + dma_common_ch_end = CCDN; + else + dma_common_ch_end = CCFN; + return 0; } diff --git a/arch/arm/plat-omap/include/plat/dma.h b/arch/arm/plat-omap/include/plat/dma.h index dc562a5..7742204 100644 --- a/arch/arm/plat-omap/include/plat/dma.h +++ b/arch/arm/plat-omap/include/plat/dma.h @@ -312,6 +312,11 @@ #define CLEAR_CSR_ON_READ BIT(0xC) #define IS_WORD_16 BIT(0xD) +/* Defines for DMA Capabilities */ +#define DMA_HAS_TRANSPARENT_CAPS (0x1 << 18) +#define DMA_HAS_CONSTANT_FILL_CAPS (0x1 << 19) +#define DMA_HAS_DESCRIPTOR_CAPS (0x3 << 20) + enum omap_reg_offsets { GCR, GSCR, GRST1, HW_ID, -- cgit v0.10.2 From bb13fabcca35fbce73f8cfbf238dacfa2a223006 Mon Sep 17 00:00:00 2001 From: Shawn Guo Date: Thu, 26 Apr 2012 10:35:40 +0800 Subject: ARM: exynos: use machine specific hook for late init Signed-off-by: Shawn Guo Acked-by: Kukjin Kim diff --git a/arch/arm/mach-exynos/common.c b/arch/arm/mach-exynos/common.c index 8614aab..16da001 100644 --- a/arch/arm/mach-exynos/common.c +++ b/arch/arm/mach-exynos/common.c @@ -285,6 +285,11 @@ void exynos5_restart(char mode, const char *cmd) __raw_writel(0x1, EXYNOS_SWRESET); } +void __init exynos_init_late(void) +{ + exynos_pm_late_initcall(); +} + /* * exynos_map_io * diff --git a/arch/arm/mach-exynos/common.h b/arch/arm/mach-exynos/common.h index 677b546..aed2eeb 100644 --- a/arch/arm/mach-exynos/common.h +++ b/arch/arm/mach-exynos/common.h @@ -19,6 +19,13 @@ void exynos4_init_irq(void); void exynos5_init_irq(void); void exynos4_restart(char mode, const char *cmd); void exynos5_restart(char mode, const char *cmd); +void exynos_init_late(void); + +#ifdef CONFIG_PM_GENERIC_DOMAINS +int exynos_pm_late_initcall(void); +#else +static int exynos_pm_late_initcall(void) { return 0; } +#endif #ifdef CONFIG_ARCH_EXYNOS4 void exynos4_register_clocks(void); diff --git a/arch/arm/mach-exynos/mach-armlex4210.c b/arch/arm/mach-exynos/mach-armlex4210.c index d726fcd..40eaa8c 100644 --- a/arch/arm/mach-exynos/mach-armlex4210.c +++ b/arch/arm/mach-exynos/mach-armlex4210.c @@ -214,6 +214,7 @@ MACHINE_START(ARMLEX4210, "ARMLEX4210") .map_io = armlex4210_map_io, .handle_irq = gic_handle_irq, .init_machine = armlex4210_machine_init, + .init_late = exynos_init_late, .timer = &exynos4_timer, .restart = exynos4_restart, MACHINE_END diff --git a/arch/arm/mach-exynos/mach-exynos4-dt.c b/arch/arm/mach-exynos/mach-exynos4-dt.c index 8245f1c..e7e9743 100644 --- a/arch/arm/mach-exynos/mach-exynos4-dt.c +++ b/arch/arm/mach-exynos/mach-exynos4-dt.c @@ -83,6 +83,7 @@ DT_MACHINE_START(EXYNOS4210_DT, "Samsung Exynos4 (Flattened Device Tree)") .map_io = exynos4210_dt_map_io, .handle_irq = gic_handle_irq, .init_machine = exynos4210_dt_machine_init, + .init_late = exynos_init_late, .timer = &exynos4_timer, .dt_compat = exynos4210_dt_compat, .restart = exynos4_restart, diff --git a/arch/arm/mach-exynos/mach-exynos5-dt.c b/arch/arm/mach-exynos/mach-exynos5-dt.c index 4711c89..5b1ab97 100644 --- a/arch/arm/mach-exynos/mach-exynos5-dt.c +++ b/arch/arm/mach-exynos/mach-exynos5-dt.c @@ -72,6 +72,7 @@ DT_MACHINE_START(EXYNOS5_DT, "SAMSUNG EXYNOS5 (Flattened Device Tree)") .map_io = exynos5250_dt_map_io, .handle_irq = gic_handle_irq, .init_machine = exynos5250_dt_machine_init, + .init_late = exynos_init_late, .timer = &exynos4_timer, .dt_compat = exynos5250_dt_compat, .restart = exynos5_restart, diff --git a/arch/arm/mach-exynos/mach-nuri.c b/arch/arm/mach-exynos/mach-nuri.c index b4f1f90..5d68532 100644 --- a/arch/arm/mach-exynos/mach-nuri.c +++ b/arch/arm/mach-exynos/mach-nuri.c @@ -1350,6 +1350,7 @@ MACHINE_START(NURI, "NURI") .map_io = nuri_map_io, .handle_irq = gic_handle_irq, .init_machine = nuri_machine_init, + .init_late = exynos_init_late, .timer = &exynos4_timer, .reserve = &nuri_reserve, .restart = exynos4_restart, diff --git a/arch/arm/mach-exynos/mach-origen.c b/arch/arm/mach-exynos/mach-origen.c index 878d4c9..263ba44 100644 --- a/arch/arm/mach-exynos/mach-origen.c +++ b/arch/arm/mach-exynos/mach-origen.c @@ -735,6 +735,7 @@ MACHINE_START(ORIGEN, "ORIGEN") .map_io = origen_map_io, .handle_irq = gic_handle_irq, .init_machine = origen_machine_init, + .init_late = exynos_init_late, .timer = &exynos4_timer, .reserve = &origen_reserve, .restart = exynos4_restart, diff --git a/arch/arm/mach-exynos/mach-smdk4x12.c b/arch/arm/mach-exynos/mach-smdk4x12.c index d00e4f0..763967d 100644 --- a/arch/arm/mach-exynos/mach-smdk4x12.c +++ b/arch/arm/mach-exynos/mach-smdk4x12.c @@ -303,6 +303,7 @@ MACHINE_START(SMDK4412, "SMDK4412") .map_io = smdk4x12_map_io, .handle_irq = gic_handle_irq, .init_machine = smdk4x12_machine_init, + .init_late = exynos_init_late, .timer = &exynos4_timer, .restart = exynos4_restart, MACHINE_END diff --git a/arch/arm/mach-exynos/mach-smdkv310.c b/arch/arm/mach-exynos/mach-smdkv310.c index 83b91fa..f9d2b31 100644 --- a/arch/arm/mach-exynos/mach-smdkv310.c +++ b/arch/arm/mach-exynos/mach-smdkv310.c @@ -393,6 +393,7 @@ MACHINE_START(SMDKC210, "SMDKC210") .map_io = smdkv310_map_io, .handle_irq = gic_handle_irq, .init_machine = smdkv310_machine_init, + .init_late = exynos_init_late, .timer = &exynos4_timer, .restart = exynos4_restart, MACHINE_END diff --git a/arch/arm/mach-exynos/mach-universal_c210.c b/arch/arm/mach-exynos/mach-universal_c210.c index 7ebf79c..7c34886 100644 --- a/arch/arm/mach-exynos/mach-universal_c210.c +++ b/arch/arm/mach-exynos/mach-universal_c210.c @@ -1112,6 +1112,7 @@ MACHINE_START(UNIVERSAL_C210, "UNIVERSAL_C210") .map_io = universal_map_io, .handle_irq = gic_handle_irq, .init_machine = universal_machine_init, + .init_late = exynos_init_late, .timer = &exynos4_timer, .reserve = &universal_reserve, .restart = exynos4_restart, diff --git a/arch/arm/mach-exynos/pm_domains.c b/arch/arm/mach-exynos/pm_domains.c index 13b3068..e9fafcf 100644 --- a/arch/arm/mach-exynos/pm_domains.c +++ b/arch/arm/mach-exynos/pm_domains.c @@ -193,9 +193,8 @@ static __init int exynos4_pm_init_power_domain(void) } arch_initcall(exynos4_pm_init_power_domain); -static __init int exynos_pm_late_initcall(void) +int __init exynos_pm_late_initcall(void) { pm_genpd_poweroff_unused(); return 0; } -late_initcall(exynos_pm_late_initcall); -- cgit v0.10.2 From 8321b758e08cae7fb02663f26efee4ba985c2ae5 Mon Sep 17 00:00:00 2001 From: Shawn Guo Date: Thu, 26 Apr 2012 11:42:34 +0800 Subject: ARM: imx: use machine specific hook for late init Signed-off-by: Shawn Guo Acked-by: Sascha Hauer diff --git a/arch/arm/mach-imx/cpu-imx5.c b/arch/arm/mach-imx/cpu-imx5.c index aa15c51..8eb15a2 100644 --- a/arch/arm/mach-imx/cpu-imx5.c +++ b/arch/arm/mach-imx/cpu-imx5.c @@ -62,11 +62,8 @@ EXPORT_SYMBOL(mx51_revision); * Dependent on link order - so the assumption is that vfp_init is called * before us. */ -static int __init mx51_neon_fixup(void) +int __init mx51_neon_fixup(void) { - if (!cpu_is_mx51()) - return 0; - if (mx51_revision() < IMX_CHIP_REVISION_3_0 && (elf_hwcap & HWCAP_NEON)) { elf_hwcap &= ~HWCAP_NEON; @@ -75,7 +72,6 @@ static int __init mx51_neon_fixup(void) return 0; } -late_initcall(mx51_neon_fixup); #endif static int get_mx53_srev(void) diff --git a/arch/arm/mach-imx/imx51-dt.c b/arch/arm/mach-imx/imx51-dt.c index 5cca573..da4f4f5 100644 --- a/arch/arm/mach-imx/imx51-dt.c +++ b/arch/arm/mach-imx/imx51-dt.c @@ -115,6 +115,7 @@ DT_MACHINE_START(IMX51_DT, "Freescale i.MX51 (Device Tree Support)") .handle_irq = imx51_handle_irq, .timer = &imx51_timer, .init_machine = imx51_dt_init, + .init_late = imx51_init_late, .dt_compat = imx51_dt_board_compat, .restart = mxc_restart, MACHINE_END diff --git a/arch/arm/mach-imx/mach-cpuimx51.c b/arch/arm/mach-imx/mach-cpuimx51.c index 944025d..4bd7c76 100644 --- a/arch/arm/mach-imx/mach-cpuimx51.c +++ b/arch/arm/mach-imx/mach-cpuimx51.c @@ -297,5 +297,6 @@ MACHINE_START(EUKREA_CPUIMX51, "Eukrea CPUIMX51 Module") .handle_irq = imx51_handle_irq, .timer = &mxc_timer, .init_machine = eukrea_cpuimx51_init, + .init_late = imx51_init_late, .restart = mxc_restart, MACHINE_END diff --git a/arch/arm/mach-imx/mach-cpuimx51sd.c b/arch/arm/mach-imx/mach-cpuimx51sd.c index 9fbe923..0b8770f 100644 --- a/arch/arm/mach-imx/mach-cpuimx51sd.c +++ b/arch/arm/mach-imx/mach-cpuimx51sd.c @@ -335,5 +335,6 @@ MACHINE_START(EUKREA_CPUIMX51SD, "Eukrea CPUIMX51SD") .handle_irq = imx51_handle_irq, .timer = &mxc_timer, .init_machine = eukrea_cpuimx51sd_init, + .init_late = imx51_init_late, .restart = mxc_restart, MACHINE_END diff --git a/arch/arm/mach-imx/mach-mx51_3ds.c b/arch/arm/mach-imx/mach-mx51_3ds.c index 83eab41..3c5b163 100644 --- a/arch/arm/mach-imx/mach-mx51_3ds.c +++ b/arch/arm/mach-imx/mach-mx51_3ds.c @@ -175,5 +175,6 @@ MACHINE_START(MX51_3DS, "Freescale MX51 3-Stack Board") .handle_irq = imx51_handle_irq, .timer = &mx51_3ds_timer, .init_machine = mx51_3ds_init, + .init_late = imx51_init_late, .restart = mxc_restart, MACHINE_END diff --git a/arch/arm/mach-imx/mach-mx51_babbage.c b/arch/arm/mach-imx/mach-mx51_babbage.c index e4b822e..f3b0115 100644 --- a/arch/arm/mach-imx/mach-mx51_babbage.c +++ b/arch/arm/mach-imx/mach-mx51_babbage.c @@ -426,5 +426,6 @@ MACHINE_START(MX51_BABBAGE, "Freescale MX51 Babbage Board") .handle_irq = imx51_handle_irq, .timer = &mx51_babbage_timer, .init_machine = mx51_babbage_init, + .init_late = imx51_init_late, .restart = mxc_restart, MACHINE_END diff --git a/arch/arm/mach-imx/mach-mx51_efikamx.c b/arch/arm/mach-imx/mach-mx51_efikamx.c index 586e9f8..a9f3c7c 100644 --- a/arch/arm/mach-imx/mach-mx51_efikamx.c +++ b/arch/arm/mach-imx/mach-mx51_efikamx.c @@ -207,29 +207,32 @@ static void mx51_efikamx_power_off(void) static int __init mx51_efikamx_power_init(void) { - if (machine_is_mx51_efikamx()) { - pwgt1 = regulator_get(NULL, "pwgt1"); - pwgt2 = regulator_get(NULL, "pwgt2"); - if (!IS_ERR(pwgt1) && !IS_ERR(pwgt2)) { - regulator_enable(pwgt1); - regulator_enable(pwgt2); - } - gpio_request(EFIKAMX_POWEROFF, "poweroff"); - pm_power_off = mx51_efikamx_power_off; - - /* enable coincell charger. maybe need a small power driver ? */ - coincell = regulator_get(NULL, "coincell"); - if (!IS_ERR(coincell)) { - regulator_set_voltage(coincell, 3000000, 3000000); - regulator_enable(coincell); - } - - regulator_has_full_constraints(); + pwgt1 = regulator_get(NULL, "pwgt1"); + pwgt2 = regulator_get(NULL, "pwgt2"); + if (!IS_ERR(pwgt1) && !IS_ERR(pwgt2)) { + regulator_enable(pwgt1); + regulator_enable(pwgt2); + } + gpio_request(EFIKAMX_POWEROFF, "poweroff"); + pm_power_off = mx51_efikamx_power_off; + + /* enable coincell charger. maybe need a small power driver ? */ + coincell = regulator_get(NULL, "coincell"); + if (!IS_ERR(coincell)) { + regulator_set_voltage(coincell, 3000000, 3000000); + regulator_enable(coincell); } + regulator_has_full_constraints(); + return 0; } -late_initcall(mx51_efikamx_power_init); + +static void __init mx51_efikamx_init_late(void) +{ + imx51_init_late(); + mx51_efikamx_power_init(); +} static void __init mx51_efikamx_init(void) { @@ -293,5 +296,6 @@ MACHINE_START(MX51_EFIKAMX, "Genesi EfikaMX nettop") .handle_irq = imx51_handle_irq, .timer = &mx51_efikamx_timer, .init_machine = mx51_efikamx_init, + .init_late = mx51_efikamx_init_late, .restart = mx51_efikamx_restart, MACHINE_END diff --git a/arch/arm/mach-imx/mach-mx51_efikasb.c b/arch/arm/mach-imx/mach-mx51_efikasb.c index 24aded9..e73db38 100644 --- a/arch/arm/mach-imx/mach-mx51_efikasb.c +++ b/arch/arm/mach-imx/mach-mx51_efikasb.c @@ -211,22 +211,25 @@ static void mx51_efikasb_power_off(void) static int __init mx51_efikasb_power_init(void) { - if (machine_is_mx51_efikasb()) { - pwgt1 = regulator_get(NULL, "pwgt1"); - pwgt2 = regulator_get(NULL, "pwgt2"); - if (!IS_ERR(pwgt1) && !IS_ERR(pwgt2)) { - regulator_enable(pwgt1); - regulator_enable(pwgt2); - } - gpio_request(EFIKASB_POWEROFF, "poweroff"); - pm_power_off = mx51_efikasb_power_off; - - regulator_has_full_constraints(); + pwgt1 = regulator_get(NULL, "pwgt1"); + pwgt2 = regulator_get(NULL, "pwgt2"); + if (!IS_ERR(pwgt1) && !IS_ERR(pwgt2)) { + regulator_enable(pwgt1); + regulator_enable(pwgt2); } + gpio_request(EFIKASB_POWEROFF, "poweroff"); + pm_power_off = mx51_efikasb_power_off; + + regulator_has_full_constraints(); return 0; } -late_initcall(mx51_efikasb_power_init); + +static void __init mx51_efikasb_init_late(void) +{ + imx51_init_late(); + mx51_efikasb_power_init(); +} /* 01 R1.3 board 10 R2.0 board */ @@ -287,6 +290,7 @@ MACHINE_START(MX51_EFIKASB, "Genesi Efika Smartbook") .init_irq = mx51_init_irq, .handle_irq = imx51_handle_irq, .init_machine = efikasb_board_init, + .init_late = mx51_efikasb_init_late, .timer = &mx51_efikasb_timer, .restart = mxc_restart, MACHINE_END diff --git a/arch/arm/mach-imx/mach-pcm037.c b/arch/arm/mach-imx/mach-pcm037.c index 5fddf94..3aba3a9 100644 --- a/arch/arm/mach-imx/mach-pcm037.c +++ b/arch/arm/mach-imx/mach-pcm037.c @@ -694,6 +694,11 @@ static void __init pcm037_reserve(void) MX3_CAMERA_BUF_SIZE); } +static void __init pcm037_init_late(void) +{ + pcm037_eet_init_devices(); +} + MACHINE_START(PCM037, "Phytec Phycore pcm037") /* Maintainer: Pengutronix */ .atag_offset = 0x100, @@ -704,5 +709,6 @@ MACHINE_START(PCM037, "Phytec Phycore pcm037") .handle_irq = imx31_handle_irq, .timer = &pcm037_timer, .init_machine = pcm037_init, + .init_late = pcm037_init_late, .restart = mxc_restart, MACHINE_END diff --git a/arch/arm/mach-imx/mach-pcm037_eet.c b/arch/arm/mach-imx/mach-pcm037_eet.c index 1b7606b..11ffa81 100644 --- a/arch/arm/mach-imx/mach-pcm037_eet.c +++ b/arch/arm/mach-imx/mach-pcm037_eet.c @@ -160,9 +160,9 @@ static const struct gpio_keys_platform_data .rep = 0, /* No auto-repeat */ }; -static int __init eet_init_devices(void) +int __init pcm037_eet_init_devices(void) { - if (!machine_is_pcm037() || pcm037_variant() != PCM037_EET) + if (pcm037_variant() != PCM037_EET) return 0; mxc_iomux_setup_multiple_pins(pcm037_eet_pins, @@ -176,4 +176,3 @@ static int __init eet_init_devices(void) return 0; } -late_initcall(eet_init_devices); diff --git a/arch/arm/mach-imx/mm-imx5.c b/arch/arm/mach-imx/mm-imx5.c index e10f391..d6b7e9f 100644 --- a/arch/arm/mach-imx/mm-imx5.c +++ b/arch/arm/mach-imx/mm-imx5.c @@ -234,3 +234,8 @@ void __init imx53_soc_init(void) platform_device_register_simple("imx31-audmux", 0, imx53_audmux_res, ARRAY_SIZE(imx53_audmux_res)); } + +void __init imx51_init_late(void) +{ + mx51_neon_fixup(); +} diff --git a/arch/arm/mach-imx/pcm037.h b/arch/arm/mach-imx/pcm037.h index d692972..7d16769 100644 --- a/arch/arm/mach-imx/pcm037.h +++ b/arch/arm/mach-imx/pcm037.h @@ -8,4 +8,10 @@ enum pcm037_board_variant { extern enum pcm037_board_variant pcm037_variant(void); +#ifdef CONFIG_MACH_PCM037_EET +int pcm037_eet_init_devices(void); +#else +static inline int pcm037_eet_init_devices(void) { return 0; } +#endif + #endif diff --git a/arch/arm/plat-mxc/include/mach/common.h b/arch/arm/plat-mxc/include/mach/common.h index 0319c4a..cf663d8 100644 --- a/arch/arm/plat-mxc/include/mach/common.h +++ b/arch/arm/plat-mxc/include/mach/common.h @@ -53,6 +53,7 @@ extern void imx35_soc_init(void); extern void imx50_soc_init(void); extern void imx51_soc_init(void); extern void imx53_soc_init(void); +extern void imx51_init_late(void); extern void epit_timer_init(struct clk *timer_clk, void __iomem *base, int irq); extern void mxc_timer_init(struct clk *timer_clk, void __iomem *, int); extern int mx1_clocks_init(unsigned long fref); @@ -149,4 +150,10 @@ extern void imx6q_pm_init(void); static inline void imx6q_pm_init(void) {} #endif +#ifdef CONFIG_NEON +extern int mx51_neon_fixup(void); +#else +static inline int mx51_neon_fixup(void) { return 0; } +#endif + #endif -- cgit v0.10.2 From c633c531f8afbcfd422409c3350b8dc55baa485e Mon Sep 17 00:00:00 2001 From: Shawn Guo Date: Wed, 2 May 2012 15:53:20 +0800 Subject: ARM: msm: use machine specific hook for late init Cc: Daniel Walker Cc: Bryan Huntsman Signed-off-by: Shawn Guo Acked-by: David Brown diff --git a/arch/arm/mach-msm/board-halibut.c b/arch/arm/mach-msm/board-halibut.c index 26aac36..4fa3e99 100644 --- a/arch/arm/mach-msm/board-halibut.c +++ b/arch/arm/mach-msm/board-halibut.c @@ -94,6 +94,11 @@ static void __init halibut_map_io(void) msm_clock_init(msm_clocks_7x01a, msm_num_clocks_7x01a); } +static void __init halibut_init_late(void) +{ + smd_debugfs_init(); +} + MACHINE_START(HALIBUT, "Halibut Board (QCT SURF7200A)") .atag_offset = 0x100, .fixup = halibut_fixup, @@ -101,5 +106,6 @@ MACHINE_START(HALIBUT, "Halibut Board (QCT SURF7200A)") .init_early = halibut_init_early, .init_irq = halibut_init_irq, .init_machine = halibut_init, + .init_late = halibut_init_late, .timer = &msm_timer, MACHINE_END diff --git a/arch/arm/mach-msm/board-mahimahi.c b/arch/arm/mach-msm/board-mahimahi.c index 5a4882f..cf1f89a 100644 --- a/arch/arm/mach-msm/board-mahimahi.c +++ b/arch/arm/mach-msm/board-mahimahi.c @@ -71,6 +71,11 @@ static void __init mahimahi_map_io(void) msm_clock_init(); } +static void __init mahimahi_init_late(void) +{ + smd_debugfs_init(); +} + extern struct sys_timer msm_timer; MACHINE_START(MAHIMAHI, "mahimahi") @@ -79,5 +84,6 @@ MACHINE_START(MAHIMAHI, "mahimahi") .map_io = mahimahi_map_io, .init_irq = msm_init_irq, .init_machine = mahimahi_init, + .init_late = mahimahi_init_late, .timer = &msm_timer, MACHINE_END diff --git a/arch/arm/mach-msm/board-msm7x27.c b/arch/arm/mach-msm/board-msm7x27.c index 6d84ee7..451ab1d 100644 --- a/arch/arm/mach-msm/board-msm7x27.c +++ b/arch/arm/mach-msm/board-msm7x27.c @@ -128,11 +128,17 @@ static void __init msm7x2x_map_io(void) #endif } +static void __init msm7x2x_init_late(void) +{ + smd_debugfs_init(); +} + MACHINE_START(MSM7X27_SURF, "QCT MSM7x27 SURF") .atag_offset = 0x100, .map_io = msm7x2x_map_io, .init_irq = msm7x2x_init_irq, .init_machine = msm7x2x_init, + .init_late = msm7x2x_init_late, .timer = &msm_timer, MACHINE_END @@ -141,6 +147,7 @@ MACHINE_START(MSM7X27_FFA, "QCT MSM7x27 FFA") .map_io = msm7x2x_map_io, .init_irq = msm7x2x_init_irq, .init_machine = msm7x2x_init, + .init_late = msm7x2x_init_late, .timer = &msm_timer, MACHINE_END @@ -149,6 +156,7 @@ MACHINE_START(MSM7X25_SURF, "QCT MSM7x25 SURF") .map_io = msm7x2x_map_io, .init_irq = msm7x2x_init_irq, .init_machine = msm7x2x_init, + .init_late = msm7x2x_init_late, .timer = &msm_timer, MACHINE_END @@ -157,5 +165,6 @@ MACHINE_START(MSM7X25_FFA, "QCT MSM7x25 FFA") .map_io = msm7x2x_map_io, .init_irq = msm7x2x_init_irq, .init_machine = msm7x2x_init, + .init_late = msm7x2x_init_late, .timer = &msm_timer, MACHINE_END diff --git a/arch/arm/mach-msm/board-msm7x30.c b/arch/arm/mach-msm/board-msm7x30.c index db81ed5..6095d35 100644 --- a/arch/arm/mach-msm/board-msm7x30.c +++ b/arch/arm/mach-msm/board-msm7x30.c @@ -120,6 +120,11 @@ static void __init msm7x30_map_io(void) msm_clock_init(msm_clocks_7x30, msm_num_clocks_7x30); } +static void __init msm7x30_init_late(void) +{ + smd_debugfs_init(); +} + MACHINE_START(MSM7X30_SURF, "QCT MSM7X30 SURF") .atag_offset = 0x100, .fixup = msm7x30_fixup, @@ -127,6 +132,7 @@ MACHINE_START(MSM7X30_SURF, "QCT MSM7X30 SURF") .map_io = msm7x30_map_io, .init_irq = msm7x30_init_irq, .init_machine = msm7x30_init, + .init_late = msm7x30_init_late, .timer = &msm_timer, MACHINE_END @@ -137,6 +143,7 @@ MACHINE_START(MSM7X30_FFA, "QCT MSM7X30 FFA") .map_io = msm7x30_map_io, .init_irq = msm7x30_init_irq, .init_machine = msm7x30_init, + .init_late = msm7x30_init_late, .timer = &msm_timer, MACHINE_END @@ -147,5 +154,6 @@ MACHINE_START(MSM7X30_FLUID, "QCT MSM7X30 FLUID") .map_io = msm7x30_map_io, .init_irq = msm7x30_init_irq, .init_machine = msm7x30_init, + .init_late = msm7x30_init_late, .timer = &msm_timer, MACHINE_END diff --git a/arch/arm/mach-msm/board-msm8960.c b/arch/arm/mach-msm/board-msm8960.c index ed35981..65f4a1d 100644 --- a/arch/arm/mach-msm/board-msm8960.c +++ b/arch/arm/mach-msm/board-msm8960.c @@ -93,6 +93,11 @@ static void __init msm8960_rumi3_init(void) platform_add_devices(rumi3_devices, ARRAY_SIZE(rumi3_devices)); } +static void __init msm8960_init_late(void) +{ + smd_debugfs_init(); +} + MACHINE_START(MSM8960_SIM, "QCT MSM8960 SIMULATOR") .fixup = msm8960_fixup, .reserve = msm8960_reserve, @@ -101,6 +106,7 @@ MACHINE_START(MSM8960_SIM, "QCT MSM8960 SIMULATOR") .timer = &msm_timer, .handle_irq = gic_handle_irq, .init_machine = msm8960_sim_init, + .init_late = msm8960_init_late, MACHINE_END MACHINE_START(MSM8960_RUMI3, "QCT MSM8960 RUMI3") @@ -111,5 +117,6 @@ MACHINE_START(MSM8960_RUMI3, "QCT MSM8960 RUMI3") .timer = &msm_timer, .handle_irq = gic_handle_irq, .init_machine = msm8960_rumi3_init, + .init_late = msm8960_init_late, MACHINE_END diff --git a/arch/arm/mach-msm/board-msm8x60.c b/arch/arm/mach-msm/board-msm8x60.c index 962e711..62f2c93 100644 --- a/arch/arm/mach-msm/board-msm8x60.c +++ b/arch/arm/mach-msm/board-msm8x60.c @@ -68,6 +68,11 @@ static void __init msm8x60_init(void) { } +static void __init msm8x60_init_late(void) +{ + smd_debugfs_init(); +} + #ifdef CONFIG_OF static struct of_dev_auxdata msm_auxdata_lookup[] __initdata = { {} @@ -106,6 +111,7 @@ MACHINE_START(MSM8X60_RUMI3, "QCT MSM8X60 RUMI3") .init_irq = msm8x60_init_irq, .handle_irq = gic_handle_irq, .init_machine = msm8x60_init, + .init_late = msm8x60_init_late, .timer = &msm_timer, MACHINE_END @@ -116,6 +122,7 @@ MACHINE_START(MSM8X60_SURF, "QCT MSM8X60 SURF") .init_irq = msm8x60_init_irq, .handle_irq = gic_handle_irq, .init_machine = msm8x60_init, + .init_late = msm8x60_init_late, .timer = &msm_timer, MACHINE_END @@ -126,6 +133,7 @@ MACHINE_START(MSM8X60_SIM, "QCT MSM8X60 SIMULATOR") .init_irq = msm8x60_init_irq, .handle_irq = gic_handle_irq, .init_machine = msm8x60_init, + .init_late = msm8x60_init_late, .timer = &msm_timer, MACHINE_END @@ -136,6 +144,7 @@ MACHINE_START(MSM8X60_FFA, "QCT MSM8X60 FFA") .init_irq = msm8x60_init_irq, .handle_irq = gic_handle_irq, .init_machine = msm8x60_init, + .init_late = msm8x60_init_late, .timer = &msm_timer, MACHINE_END @@ -145,6 +154,7 @@ DT_MACHINE_START(MSM_DT, "Qualcomm MSM (Flattened Device Tree)") .map_io = msm8x60_map_io, .init_irq = msm8x60_init_irq, .init_machine = msm8x60_dt_init, + .init_late = msm8x60_init_late, .timer = &msm_timer, .dt_compat = msm8x60_fluid_match, MACHINE_END diff --git a/arch/arm/mach-msm/board-qsd8x50.c b/arch/arm/mach-msm/board-qsd8x50.c index 7e8909c..dd625aa 100644 --- a/arch/arm/mach-msm/board-qsd8x50.c +++ b/arch/arm/mach-msm/board-qsd8x50.c @@ -191,11 +191,17 @@ static void __init qsd8x50_init(void) qsd8x50_init_mmc(); } +static void __init qsd8x50_init_late(void) +{ + smd_debugfs_init(); +} + MACHINE_START(QSD8X50_SURF, "QCT QSD8X50 SURF") .atag_offset = 0x100, .map_io = qsd8x50_map_io, .init_irq = qsd8x50_init_irq, .init_machine = qsd8x50_init, + .init_late = qsd8x50_init_late, .timer = &msm_timer, MACHINE_END @@ -204,5 +210,6 @@ MACHINE_START(QSD8X50A_ST1_5, "QCT QSD8X50A ST1.5") .map_io = qsd8x50_map_io, .init_irq = qsd8x50_init_irq, .init_machine = qsd8x50_init, + .init_late = qsd8x50_init_late, .timer = &msm_timer, MACHINE_END diff --git a/arch/arm/mach-msm/board-sapphire.c b/arch/arm/mach-msm/board-sapphire.c index 4a8ea0d..2e569ab 100644 --- a/arch/arm/mach-msm/board-sapphire.c +++ b/arch/arm/mach-msm/board-sapphire.c @@ -101,6 +101,11 @@ static void __init sapphire_map_io(void) msm_clock_init(); } +static void __init sapphire_init_late(void) +{ + smd_debugfs_init(); +} + MACHINE_START(SAPPHIRE, "sapphire") /* Maintainer: Brian Swetland */ .atag_offset = 0x100, @@ -108,5 +113,6 @@ MACHINE_START(SAPPHIRE, "sapphire") .map_io = sapphire_map_io, .init_irq = sapphire_init_irq, .init_machine = sapphire_init, + .init_late = sapphire_init_late, .timer = &msm_timer, MACHINE_END diff --git a/arch/arm/mach-msm/board-trout.c b/arch/arm/mach-msm/board-trout.c index d4060a3..bbe13f1 100644 --- a/arch/arm/mach-msm/board-trout.c +++ b/arch/arm/mach-msm/board-trout.c @@ -98,6 +98,11 @@ static void __init trout_map_io(void) msm_clock_init(msm_clocks_7x01a, msm_num_clocks_7x01a); } +static void __init trout_init_late(void) +{ + smd_debugfs_init(); +} + MACHINE_START(TROUT, "HTC Dream") .atag_offset = 0x100, .fixup = trout_fixup, @@ -105,5 +110,6 @@ MACHINE_START(TROUT, "HTC Dream") .init_early = trout_init_early, .init_irq = trout_init_irq, .init_machine = trout_init, + .init_late = trout_init_late, .timer = &msm_timer, MACHINE_END diff --git a/arch/arm/mach-msm/include/mach/board.h b/arch/arm/mach-msm/include/mach/board.h index 2ce8f1f..435f8ed 100644 --- a/arch/arm/mach-msm/include/mach/board.h +++ b/arch/arm/mach-msm/include/mach/board.h @@ -47,4 +47,10 @@ int __init msm_add_sdcc(unsigned int controller, struct msm_mmc_platform_data *plat, unsigned int stat_irq, unsigned long stat_irq_flags); +#if defined(CONFIG_MSM_SMD) && defined(CONFIG_DEBUG_FS) +int smd_debugfs_init(void); +#else +static inline int smd_debugfs_init(void) { return 0; } +#endif + #endif diff --git a/arch/arm/mach-msm/smd_debug.c b/arch/arm/mach-msm/smd_debug.c index c56df9e..8056b3e 100644 --- a/arch/arm/mach-msm/smd_debug.c +++ b/arch/arm/mach-msm/smd_debug.c @@ -216,7 +216,7 @@ static void debug_create(const char *name, umode_t mode, debugfs_create_file(name, mode, dent, fill, &debug_ops); } -static int smd_debugfs_init(void) +int __init smd_debugfs_init(void) { struct dentry *dent; @@ -234,7 +234,6 @@ static int smd_debugfs_init(void) return 0; } -late_initcall(smd_debugfs_init); #endif -- cgit v0.10.2 From 82c3bd03535f1571426fdd19b7d832f76b7ac85e Mon Sep 17 00:00:00 2001 From: Shawn Guo Date: Thu, 26 Apr 2012 13:49:29 +0800 Subject: ARM: omap1: use machine specific hook for late init Signed-off-by: Shawn Guo Acked-by: Tony Lindgren diff --git a/arch/arm/mach-omap1/board-ams-delta.c b/arch/arm/mach-omap1/board-ams-delta.c index c1b681e..f2f8a58 100644 --- a/arch/arm/mach-omap1/board-ams-delta.c +++ b/arch/arm/mach-omap1/board-ams-delta.c @@ -595,7 +595,12 @@ gpio_free: gpio_free(AMS_DELTA_GPIO_PIN_MODEM_IRQ); return err; } -late_initcall(late_init); + +static void __init ams_delta_init_late(void) +{ + omap1_init_late(); + late_init(); +} static void __init ams_delta_map_io(void) { @@ -611,6 +616,7 @@ MACHINE_START(AMS_DELTA, "Amstrad E3 (Delta)") .reserve = omap_reserve, .init_irq = omap1_init_irq, .init_machine = ams_delta_init, + .init_late = ams_delta_init_late, .timer = &omap1_timer, .restart = omap1_restart, MACHINE_END diff --git a/arch/arm/mach-omap1/board-fsample.c b/arch/arm/mach-omap1/board-fsample.c index 80bd43c..a77ee57 100644 --- a/arch/arm/mach-omap1/board-fsample.c +++ b/arch/arm/mach-omap1/board-fsample.c @@ -383,6 +383,7 @@ MACHINE_START(OMAP_FSAMPLE, "OMAP730 F-Sample") .reserve = omap_reserve, .init_irq = omap1_init_irq, .init_machine = omap_fsample_init, + .init_late = omap1_init_late, .timer = &omap1_timer, .restart = omap1_restart, MACHINE_END diff --git a/arch/arm/mach-omap1/board-generic.c b/arch/arm/mach-omap1/board-generic.c index 9a5fe58..e75e2d5 100644 --- a/arch/arm/mach-omap1/board-generic.c +++ b/arch/arm/mach-omap1/board-generic.c @@ -88,6 +88,7 @@ MACHINE_START(OMAP_GENERIC, "Generic OMAP1510/1610/1710") .reserve = omap_reserve, .init_irq = omap1_init_irq, .init_machine = omap_generic_init, + .init_late = omap1_init_late, .timer = &omap1_timer, .restart = omap1_restart, MACHINE_END diff --git a/arch/arm/mach-omap1/board-h2.c b/arch/arm/mach-omap1/board-h2.c index 553a2e5..04cf994 100644 --- a/arch/arm/mach-omap1/board-h2.c +++ b/arch/arm/mach-omap1/board-h2.c @@ -446,6 +446,7 @@ MACHINE_START(OMAP_H2, "TI-H2") .reserve = omap_reserve, .init_irq = omap1_init_irq, .init_machine = h2_init, + .init_late = omap1_init_late, .timer = &omap1_timer, .restart = omap1_restart, MACHINE_END diff --git a/arch/arm/mach-omap1/board-h3.c b/arch/arm/mach-omap1/board-h3.c index 4c19f4c..a11ef0a 100644 --- a/arch/arm/mach-omap1/board-h3.c +++ b/arch/arm/mach-omap1/board-h3.c @@ -439,6 +439,7 @@ MACHINE_START(OMAP_H3, "TI OMAP1710 H3 board") .reserve = omap_reserve, .init_irq = omap1_init_irq, .init_machine = h3_init, + .init_late = omap1_init_late, .timer = &omap1_timer, .restart = omap1_restart, MACHINE_END diff --git a/arch/arm/mach-omap1/board-htcherald.c b/arch/arm/mach-omap1/board-htcherald.c index 60c06ee..118a9d4 100644 --- a/arch/arm/mach-omap1/board-htcherald.c +++ b/arch/arm/mach-omap1/board-htcherald.c @@ -605,6 +605,7 @@ MACHINE_START(HERALD, "HTC Herald") .reserve = omap_reserve, .init_irq = omap1_init_irq, .init_machine = htcherald_init, + .init_late = omap1_init_late, .timer = &omap1_timer, .restart = omap1_restart, MACHINE_END diff --git a/arch/arm/mach-omap1/board-innovator.c b/arch/arm/mach-omap1/board-innovator.c index 67d7fd5..7970223 100644 --- a/arch/arm/mach-omap1/board-innovator.c +++ b/arch/arm/mach-omap1/board-innovator.c @@ -457,6 +457,7 @@ MACHINE_START(OMAP_INNOVATOR, "TI-Innovator") .reserve = omap_reserve, .init_irq = omap1_init_irq, .init_machine = innovator_init, + .init_late = omap1_init_late, .timer = &omap1_timer, .restart = omap1_restart, MACHINE_END diff --git a/arch/arm/mach-omap1/board-nokia770.c b/arch/arm/mach-omap1/board-nokia770.c index d21dcc2..7212ae9 100644 --- a/arch/arm/mach-omap1/board-nokia770.c +++ b/arch/arm/mach-omap1/board-nokia770.c @@ -255,6 +255,7 @@ MACHINE_START(NOKIA770, "Nokia 770") .reserve = omap_reserve, .init_irq = omap1_init_irq, .init_machine = omap_nokia770_init, + .init_late = omap1_init_late, .timer = &omap1_timer, .restart = omap1_restart, MACHINE_END diff --git a/arch/arm/mach-omap1/board-osk.c b/arch/arm/mach-omap1/board-osk.c index a5f85dd..da8d872 100644 --- a/arch/arm/mach-omap1/board-osk.c +++ b/arch/arm/mach-omap1/board-osk.c @@ -574,6 +574,7 @@ MACHINE_START(OMAP_OSK, "TI-OSK") .reserve = omap_reserve, .init_irq = omap1_init_irq, .init_machine = osk_init, + .init_late = omap1_init_late, .timer = &omap1_timer, .restart = omap1_restart, MACHINE_END diff --git a/arch/arm/mach-omap1/board-palmte.c b/arch/arm/mach-omap1/board-palmte.c index a60e6c2..949b62a 100644 --- a/arch/arm/mach-omap1/board-palmte.c +++ b/arch/arm/mach-omap1/board-palmte.c @@ -267,6 +267,7 @@ MACHINE_START(OMAP_PALMTE, "OMAP310 based Palm Tungsten E") .reserve = omap_reserve, .init_irq = omap1_init_irq, .init_machine = omap_palmte_init, + .init_late = omap1_init_late, .timer = &omap1_timer, .restart = omap1_restart, MACHINE_END diff --git a/arch/arm/mach-omap1/board-palmtt.c b/arch/arm/mach-omap1/board-palmtt.c index 8d85487..7f1e1cf 100644 --- a/arch/arm/mach-omap1/board-palmtt.c +++ b/arch/arm/mach-omap1/board-palmtt.c @@ -313,6 +313,7 @@ MACHINE_START(OMAP_PALMTT, "OMAP1510 based Palm Tungsten|T") .reserve = omap_reserve, .init_irq = omap1_init_irq, .init_machine = omap_palmtt_init, + .init_late = omap1_init_late, .timer = &omap1_timer, .restart = omap1_restart, MACHINE_END diff --git a/arch/arm/mach-omap1/board-palmz71.c b/arch/arm/mach-omap1/board-palmz71.c index a2c5abc..0711685 100644 --- a/arch/arm/mach-omap1/board-palmz71.c +++ b/arch/arm/mach-omap1/board-palmz71.c @@ -330,6 +330,7 @@ MACHINE_START(OMAP_PALMZ71, "OMAP310 based Palm Zire71") .reserve = omap_reserve, .init_irq = omap1_init_irq, .init_machine = omap_palmz71_init, + .init_late = omap1_init_late, .timer = &omap1_timer, .restart = omap1_restart, MACHINE_END diff --git a/arch/arm/mach-omap1/board-perseus2.c b/arch/arm/mach-omap1/board-perseus2.c index 76d4ee0..512989b 100644 --- a/arch/arm/mach-omap1/board-perseus2.c +++ b/arch/arm/mach-omap1/board-perseus2.c @@ -345,6 +345,7 @@ MACHINE_START(OMAP_PERSEUS2, "OMAP730 Perseus2") .reserve = omap_reserve, .init_irq = omap1_init_irq, .init_machine = omap_perseus2_init, + .init_late = omap1_init_late, .timer = &omap1_timer, .restart = omap1_restart, MACHINE_END diff --git a/arch/arm/mach-omap1/board-sx1.c b/arch/arm/mach-omap1/board-sx1.c index f34cb74..3b7b82b 100644 --- a/arch/arm/mach-omap1/board-sx1.c +++ b/arch/arm/mach-omap1/board-sx1.c @@ -407,6 +407,7 @@ MACHINE_START(SX1, "OMAP310 based Siemens SX1") .reserve = omap_reserve, .init_irq = omap1_init_irq, .init_machine = omap_sx1_init, + .init_late = omap1_init_late, .timer = &omap1_timer, .restart = omap1_restart, MACHINE_END diff --git a/arch/arm/mach-omap1/board-voiceblue.c b/arch/arm/mach-omap1/board-voiceblue.c index 37232d0..afd67f0 100644 --- a/arch/arm/mach-omap1/board-voiceblue.c +++ b/arch/arm/mach-omap1/board-voiceblue.c @@ -294,6 +294,7 @@ MACHINE_START(VOICEBLUE, "VoiceBlue OMAP5910") .reserve = omap_reserve, .init_irq = omap1_init_irq, .init_machine = voiceblue_init, + .init_late = omap1_init_late, .timer = &omap1_timer, .restart = voiceblue_restart, MACHINE_END diff --git a/arch/arm/mach-omap1/common.h b/arch/arm/mach-omap1/common.h index af658ad..0b77e6d 100644 --- a/arch/arm/mach-omap1/common.h +++ b/arch/arm/mach-omap1/common.h @@ -52,8 +52,18 @@ static inline void omap16xx_map_io(void) } #endif +#ifdef CONFIG_OMAP_SERIAL_WAKE +int omap_serial_wakeup_init(void); +#else +static inline int omap_serial_wakeup_init(void) +{ + return 0; +} +#endif + void omap1_init_early(void); void omap1_init_irq(void); +void omap1_init_late(void); void omap1_restart(char, const char *); extern struct sys_timer omap1_timer; diff --git a/arch/arm/mach-omap1/io.c b/arch/arm/mach-omap1/io.c index d969a72..cf811f2 100644 --- a/arch/arm/mach-omap1/io.c +++ b/arch/arm/mach-omap1/io.c @@ -138,6 +138,11 @@ void __init omap1_init_early(void) omap_init_consistent_dma_size(); } +void __init omap1_init_late(void) +{ + omap_serial_wakeup_init(); +} + /* * NOTE: Please use ioremap + __raw_read/write where possible instead of these */ diff --git a/arch/arm/mach-omap1/serial.c b/arch/arm/mach-omap1/serial.c index 93ae8f2..6809c9e 100644 --- a/arch/arm/mach-omap1/serial.c +++ b/arch/arm/mach-omap1/serial.c @@ -237,7 +237,7 @@ static void __init omap_serial_set_port_wakeup(int gpio_nr) enable_irq_wake(gpio_to_irq(gpio_nr)); } -static int __init omap_serial_wakeup_init(void) +int __init omap_serial_wakeup_init(void) { if (!cpu_is_omap16xx()) return 0; @@ -251,7 +251,6 @@ static int __init omap_serial_wakeup_init(void) return 0; } -late_initcall(omap_serial_wakeup_init); #endif /* CONFIG_OMAP_SERIAL_WAKE */ -- cgit v0.10.2 From bbd707acee279a61177a604822db92e8164d00db Mon Sep 17 00:00:00 2001 From: Shawn Guo Date: Thu, 26 Apr 2012 16:06:50 +0800 Subject: ARM: omap2: use machine specific hook for late init Signed-off-by: Shawn Guo Acked-by: Tony Lindgren diff --git a/arch/arm/mach-omap2/board-2430sdp.c b/arch/arm/mach-omap2/board-2430sdp.c index e658f83..99ca6ba 100644 --- a/arch/arm/mach-omap2/board-2430sdp.c +++ b/arch/arm/mach-omap2/board-2430sdp.c @@ -303,6 +303,7 @@ MACHINE_START(OMAP_2430SDP, "OMAP2430 sdp2430 board") .init_irq = omap2_init_irq, .handle_irq = omap2_intc_handle_irq, .init_machine = omap_2430sdp_init, + .init_late = omap2430_init_late, .timer = &omap2_timer, .restart = omap_prcm_restart, MACHINE_END diff --git a/arch/arm/mach-omap2/board-3430sdp.c b/arch/arm/mach-omap2/board-3430sdp.c index da75f23..8ceb480 100644 --- a/arch/arm/mach-omap2/board-3430sdp.c +++ b/arch/arm/mach-omap2/board-3430sdp.c @@ -635,6 +635,7 @@ MACHINE_START(OMAP_3430SDP, "OMAP3430 3430SDP board") .init_irq = omap3_init_irq, .handle_irq = omap3_intc_handle_irq, .init_machine = omap_3430sdp_init, + .init_late = omap3430_init_late, .timer = &omap3_timer, .restart = omap_prcm_restart, MACHINE_END diff --git a/arch/arm/mach-omap2/board-3630sdp.c b/arch/arm/mach-omap2/board-3630sdp.c index 6ef350d..2dc9ba5 100644 --- a/arch/arm/mach-omap2/board-3630sdp.c +++ b/arch/arm/mach-omap2/board-3630sdp.c @@ -217,6 +217,7 @@ MACHINE_START(OMAP_3630SDP, "OMAP 3630SDP board") .init_irq = omap3_init_irq, .handle_irq = omap3_intc_handle_irq, .init_machine = omap_sdp_init, + .init_late = omap3630_init_late, .timer = &omap3_timer, .restart = omap_prcm_restart, MACHINE_END diff --git a/arch/arm/mach-omap2/board-4430sdp.c b/arch/arm/mach-omap2/board-4430sdp.c index 130ab00..3f6cf22 100644 --- a/arch/arm/mach-omap2/board-4430sdp.c +++ b/arch/arm/mach-omap2/board-4430sdp.c @@ -969,6 +969,7 @@ MACHINE_START(OMAP_4430SDP, "OMAP4430 4430SDP board") .init_irq = gic_init_irq, .handle_irq = gic_handle_irq, .init_machine = omap_4430sdp_init, + .init_late = omap4430_init_late, .timer = &omap4_timer, .restart = omap_prcm_restart, MACHINE_END diff --git a/arch/arm/mach-omap2/board-am3517crane.c b/arch/arm/mach-omap2/board-am3517crane.c index c3851e8..7231020 100644 --- a/arch/arm/mach-omap2/board-am3517crane.c +++ b/arch/arm/mach-omap2/board-am3517crane.c @@ -100,6 +100,7 @@ MACHINE_START(CRANEBOARD, "AM3517/05 CRANEBOARD") .init_irq = omap3_init_irq, .handle_irq = omap3_intc_handle_irq, .init_machine = am3517_crane_init, + .init_late = am35xx_init_late, .timer = &omap3_timer, .restart = omap_prcm_restart, MACHINE_END diff --git a/arch/arm/mach-omap2/board-am3517evm.c b/arch/arm/mach-omap2/board-am3517evm.c index 3645285..ae50e11 100644 --- a/arch/arm/mach-omap2/board-am3517evm.c +++ b/arch/arm/mach-omap2/board-am3517evm.c @@ -402,6 +402,7 @@ MACHINE_START(OMAP3517EVM, "OMAP3517/AM3517 EVM") .init_irq = omap3_init_irq, .handle_irq = omap3_intc_handle_irq, .init_machine = am3517_evm_init, + .init_late = am35xx_init_late, .timer = &omap3_timer, .restart = omap_prcm_restart, MACHINE_END diff --git a/arch/arm/mach-omap2/board-apollon.c b/arch/arm/mach-omap2/board-apollon.c index 768ece2..502c31e 100644 --- a/arch/arm/mach-omap2/board-apollon.c +++ b/arch/arm/mach-omap2/board-apollon.c @@ -356,6 +356,7 @@ MACHINE_START(OMAP_APOLLON, "OMAP24xx Apollon") .init_irq = omap2_init_irq, .handle_irq = omap2_intc_handle_irq, .init_machine = omap_apollon_init, + .init_late = omap2420_init_late, .timer = &omap2_timer, .restart = omap_prcm_restart, MACHINE_END diff --git a/arch/arm/mach-omap2/board-cm-t35.c b/arch/arm/mach-omap2/board-cm-t35.c index 909a8b9..f975061 100644 --- a/arch/arm/mach-omap2/board-cm-t35.c +++ b/arch/arm/mach-omap2/board-cm-t35.c @@ -686,6 +686,7 @@ MACHINE_START(CM_T35, "Compulab CM-T35") .init_irq = omap3_init_irq, .handle_irq = omap3_intc_handle_irq, .init_machine = cm_t35_init, + .init_late = omap35xx_init_late, .timer = &omap3_timer, .restart = omap_prcm_restart, MACHINE_END @@ -698,6 +699,7 @@ MACHINE_START(CM_T3730, "Compulab CM-T3730") .init_irq = omap3_init_irq, .handle_irq = omap3_intc_handle_irq, .init_machine = cm_t3730_init, + .init_late = omap3630_init_late, .timer = &omap3_timer, .restart = omap_prcm_restart, MACHINE_END diff --git a/arch/arm/mach-omap2/board-cm-t3517.c b/arch/arm/mach-omap2/board-cm-t3517.c index 9e66e16..a33ad46 100644 --- a/arch/arm/mach-omap2/board-cm-t3517.c +++ b/arch/arm/mach-omap2/board-cm-t3517.c @@ -303,6 +303,7 @@ MACHINE_START(CM_T3517, "Compulab CM-T3517") .init_irq = omap3_init_irq, .handle_irq = omap3_intc_handle_irq, .init_machine = cm_t3517_init, + .init_late = am35xx_init_late, .timer = &omap3_timer, .restart = omap_prcm_restart, MACHINE_END diff --git a/arch/arm/mach-omap2/board-devkit8000.c b/arch/arm/mach-omap2/board-devkit8000.c index a2010f0..f616c8c 100644 --- a/arch/arm/mach-omap2/board-devkit8000.c +++ b/arch/arm/mach-omap2/board-devkit8000.c @@ -664,6 +664,7 @@ MACHINE_START(DEVKIT8000, "OMAP3 Devkit8000") .init_irq = omap3_init_irq, .handle_irq = omap3_intc_handle_irq, .init_machine = devkit8000_init, + .init_late = omap35xx_init_late, .timer = &omap3_secure_timer, .restart = omap_prcm_restart, MACHINE_END diff --git a/arch/arm/mach-omap2/board-generic.c b/arch/arm/mach-omap2/board-generic.c index 098d183..56ffcc9 100644 --- a/arch/arm/mach-omap2/board-generic.c +++ b/arch/arm/mach-omap2/board-generic.c @@ -158,6 +158,7 @@ DT_MACHINE_START(OMAP4_DT, "Generic OMAP4 (Flattened Device Tree)") .init_irq = omap_init_irq, .handle_irq = gic_handle_irq, .init_machine = omap4_init, + .init_late = omap4430_init_late, .timer = &omap4_timer, .dt_compat = omap4_boards_compat, .restart = omap_prcm_restart, diff --git a/arch/arm/mach-omap2/board-h4.c b/arch/arm/mach-omap2/board-h4.c index 0bbbabe..876becf 100644 --- a/arch/arm/mach-omap2/board-h4.c +++ b/arch/arm/mach-omap2/board-h4.c @@ -398,6 +398,7 @@ MACHINE_START(OMAP_H4, "OMAP2420 H4 board") .init_irq = omap2_init_irq, .handle_irq = omap2_intc_handle_irq, .init_machine = omap_h4_init, + .init_late = omap2420_init_late, .timer = &omap2_timer, .restart = omap_prcm_restart, MACHINE_END diff --git a/arch/arm/mach-omap2/board-igep0020.c b/arch/arm/mach-omap2/board-igep0020.c index 930c0d3..21df74e 100644 --- a/arch/arm/mach-omap2/board-igep0020.c +++ b/arch/arm/mach-omap2/board-igep0020.c @@ -684,6 +684,7 @@ MACHINE_START(IGEP0020, "IGEP v2 board") .init_irq = omap3_init_irq, .handle_irq = omap3_intc_handle_irq, .init_machine = igep_init, + .init_late = omap35xx_init_late, .timer = &omap3_timer, .restart = omap_prcm_restart, MACHINE_END @@ -696,6 +697,7 @@ MACHINE_START(IGEP0030, "IGEP OMAP3 module") .init_irq = omap3_init_irq, .handle_irq = omap3_intc_handle_irq, .init_machine = igep_init, + .init_late = omap35xx_init_late, .timer = &omap3_timer, .restart = omap_prcm_restart, MACHINE_END diff --git a/arch/arm/mach-omap2/board-ldp.c b/arch/arm/mach-omap2/board-ldp.c index 1b60495..ef9e829 100644 --- a/arch/arm/mach-omap2/board-ldp.c +++ b/arch/arm/mach-omap2/board-ldp.c @@ -442,6 +442,7 @@ MACHINE_START(OMAP_LDP, "OMAP LDP board") .init_irq = omap3_init_irq, .handle_irq = omap3_intc_handle_irq, .init_machine = omap_ldp_init, + .init_late = omap3430_init_late, .timer = &omap3_timer, .restart = omap_prcm_restart, MACHINE_END diff --git a/arch/arm/mach-omap2/board-n8x0.c b/arch/arm/mach-omap2/board-n8x0.c index 518091c..8ca14e8 100644 --- a/arch/arm/mach-omap2/board-n8x0.c +++ b/arch/arm/mach-omap2/board-n8x0.c @@ -694,6 +694,7 @@ MACHINE_START(NOKIA_N800, "Nokia N800") .init_irq = omap2_init_irq, .handle_irq = omap2_intc_handle_irq, .init_machine = n8x0_init_machine, + .init_late = omap2420_init_late, .timer = &omap2_timer, .restart = omap_prcm_restart, MACHINE_END @@ -706,6 +707,7 @@ MACHINE_START(NOKIA_N810, "Nokia N810") .init_irq = omap2_init_irq, .handle_irq = omap2_intc_handle_irq, .init_machine = n8x0_init_machine, + .init_late = omap2420_init_late, .timer = &omap2_timer, .restart = omap_prcm_restart, MACHINE_END @@ -718,6 +720,7 @@ MACHINE_START(NOKIA_N810_WIMAX, "Nokia N810 WiMAX") .init_irq = omap2_init_irq, .handle_irq = omap2_intc_handle_irq, .init_machine = n8x0_init_machine, + .init_late = omap2420_init_late, .timer = &omap2_timer, .restart = omap_prcm_restart, MACHINE_END diff --git a/arch/arm/mach-omap2/board-omap3beagle.c b/arch/arm/mach-omap2/board-omap3beagle.c index 7be8d65..b6cd804 100644 --- a/arch/arm/mach-omap2/board-omap3beagle.c +++ b/arch/arm/mach-omap2/board-omap3beagle.c @@ -565,6 +565,7 @@ MACHINE_START(OMAP3_BEAGLE, "OMAP3 Beagle Board") .init_irq = omap3_init_irq, .handle_irq = omap3_intc_handle_irq, .init_machine = omap3_beagle_init, + .init_late = omap3_init_late, .timer = &omap3_secure_timer, .restart = omap_prcm_restart, MACHINE_END diff --git a/arch/arm/mach-omap2/board-omap3evm.c b/arch/arm/mach-omap2/board-omap3evm.c index 49df127..e98c230 100644 --- a/arch/arm/mach-omap2/board-omap3evm.c +++ b/arch/arm/mach-omap2/board-omap3evm.c @@ -692,6 +692,7 @@ MACHINE_START(OMAP3EVM, "OMAP3 EVM") .init_irq = omap3_init_irq, .handle_irq = omap3_intc_handle_irq, .init_machine = omap3_evm_init, + .init_late = omap35xx_init_late, .timer = &omap3_timer, .restart = omap_prcm_restart, MACHINE_END diff --git a/arch/arm/mach-omap2/board-omap3logic.c b/arch/arm/mach-omap2/board-omap3logic.c index 9b3c141..8927927 100644 --- a/arch/arm/mach-omap2/board-omap3logic.c +++ b/arch/arm/mach-omap2/board-omap3logic.c @@ -218,6 +218,7 @@ MACHINE_START(OMAP3_TORPEDO, "Logic OMAP3 Torpedo board") .init_irq = omap3_init_irq, .handle_irq = omap3_intc_handle_irq, .init_machine = omap3logic_init, + .init_late = omap35xx_init_late, .timer = &omap3_timer, .restart = omap_prcm_restart, MACHINE_END @@ -230,6 +231,7 @@ MACHINE_START(OMAP3530_LV_SOM, "OMAP Logic 3530 LV SOM board") .init_irq = omap3_init_irq, .handle_irq = omap3_intc_handle_irq, .init_machine = omap3logic_init, + .init_late = omap35xx_init_late, .timer = &omap3_timer, .restart = omap_prcm_restart, MACHINE_END diff --git a/arch/arm/mach-omap2/board-omap3pandora.c b/arch/arm/mach-omap2/board-omap3pandora.c index 33d995d..57aebee 100644 --- a/arch/arm/mach-omap2/board-omap3pandora.c +++ b/arch/arm/mach-omap2/board-omap3pandora.c @@ -622,6 +622,7 @@ MACHINE_START(OMAP3_PANDORA, "Pandora Handheld Console") .init_irq = omap3_init_irq, .handle_irq = omap3_intc_handle_irq, .init_machine = omap3pandora_init, + .init_late = omap35xx_init_late, .timer = &omap3_timer, .restart = omap_prcm_restart, MACHINE_END diff --git a/arch/arm/mach-omap2/board-omap3stalker.c b/arch/arm/mach-omap2/board-omap3stalker.c index 4dffc95..c05fe3a 100644 --- a/arch/arm/mach-omap2/board-omap3stalker.c +++ b/arch/arm/mach-omap2/board-omap3stalker.c @@ -457,6 +457,7 @@ MACHINE_START(SBC3530, "OMAP3 STALKER") .init_irq = omap3_init_irq, .handle_irq = omap3_intc_handle_irq, .init_machine = omap3_stalker_init, + .init_late = omap35xx_init_late, .timer = &omap3_secure_timer, .restart = omap_prcm_restart, MACHINE_END diff --git a/arch/arm/mach-omap2/board-omap3touchbook.c b/arch/arm/mach-omap2/board-omap3touchbook.c index ae2251f..485d14d 100644 --- a/arch/arm/mach-omap2/board-omap3touchbook.c +++ b/arch/arm/mach-omap2/board-omap3touchbook.c @@ -387,6 +387,7 @@ MACHINE_START(TOUCHBOOK, "OMAP3 touchbook Board") .init_irq = omap3_init_irq, .handle_irq = omap3_intc_handle_irq, .init_machine = omap3_touchbook_init, + .init_late = omap3430_init_late, .timer = &omap3_secure_timer, .restart = omap_prcm_restart, MACHINE_END diff --git a/arch/arm/mach-omap2/board-omap4panda.c b/arch/arm/mach-omap2/board-omap4panda.c index 1b782ba..7ca817f 100644 --- a/arch/arm/mach-omap2/board-omap4panda.c +++ b/arch/arm/mach-omap2/board-omap4panda.c @@ -582,6 +582,7 @@ MACHINE_START(OMAP4_PANDA, "OMAP4 Panda board") .init_irq = gic_init_irq, .handle_irq = gic_handle_irq, .init_machine = omap4_panda_init, + .init_late = omap4430_init_late, .timer = &omap4_timer, .restart = omap_prcm_restart, MACHINE_END diff --git a/arch/arm/mach-omap2/board-overo.c b/arch/arm/mach-omap2/board-overo.c index 33aa391..af7ac0b 100644 --- a/arch/arm/mach-omap2/board-overo.c +++ b/arch/arm/mach-omap2/board-overo.c @@ -571,6 +571,7 @@ MACHINE_START(OVERO, "Gumstix Overo") .init_irq = omap3_init_irq, .handle_irq = omap3_intc_handle_irq, .init_machine = overo_init, + .init_late = omap35xx_init_late, .timer = &omap3_timer, .restart = omap_prcm_restart, MACHINE_END diff --git a/arch/arm/mach-omap2/board-rm680.c b/arch/arm/mach-omap2/board-rm680.c index ae53d71..0ad1bb3b 100644 --- a/arch/arm/mach-omap2/board-rm680.c +++ b/arch/arm/mach-omap2/board-rm680.c @@ -151,6 +151,7 @@ MACHINE_START(NOKIA_RM680, "Nokia RM-680 board") .init_irq = omap3_init_irq, .handle_irq = omap3_intc_handle_irq, .init_machine = rm680_init, + .init_late = omap3630_init_late, .timer = &omap3_timer, .restart = omap_prcm_restart, MACHINE_END @@ -163,6 +164,7 @@ MACHINE_START(NOKIA_RM696, "Nokia RM-696 board") .init_irq = omap3_init_irq, .handle_irq = omap3_intc_handle_irq, .init_machine = rm680_init, + .init_late = omap3630_init_late, .timer = &omap3_timer, .restart = omap_prcm_restart, MACHINE_END diff --git a/arch/arm/mach-omap2/board-rx51.c b/arch/arm/mach-omap2/board-rx51.c index 27f01f0..5010f6d 100644 --- a/arch/arm/mach-omap2/board-rx51.c +++ b/arch/arm/mach-omap2/board-rx51.c @@ -129,6 +129,7 @@ MACHINE_START(NOKIA_RX51, "Nokia RX-51 board") .init_irq = omap3_init_irq, .handle_irq = omap3_intc_handle_irq, .init_machine = rx51_init, + .init_late = omap3430_init_late, .timer = &omap3_timer, .restart = omap_prcm_restart, MACHINE_END diff --git a/arch/arm/mach-omap2/board-ti8168evm.c b/arch/arm/mach-omap2/board-ti8168evm.c index ab9a7a9..d4c8392 100644 --- a/arch/arm/mach-omap2/board-ti8168evm.c +++ b/arch/arm/mach-omap2/board-ti8168evm.c @@ -52,6 +52,7 @@ MACHINE_START(TI8168EVM, "ti8168evm") .init_irq = ti81xx_init_irq, .timer = &omap3_timer, .init_machine = ti81xx_evm_init, + .init_late = ti81xx_init_late, .restart = omap_prcm_restart, MACHINE_END @@ -63,5 +64,6 @@ MACHINE_START(TI8148EVM, "ti8148evm") .init_irq = ti81xx_init_irq, .timer = &omap3_timer, .init_machine = ti81xx_evm_init, + .init_late = ti81xx_init_late, .restart = omap_prcm_restart, MACHINE_END diff --git a/arch/arm/mach-omap2/board-zoom.c b/arch/arm/mach-omap2/board-zoom.c index 5c20bcc..4e7e561 100644 --- a/arch/arm/mach-omap2/board-zoom.c +++ b/arch/arm/mach-omap2/board-zoom.c @@ -137,6 +137,7 @@ MACHINE_START(OMAP_ZOOM2, "OMAP Zoom2 board") .init_irq = omap3_init_irq, .handle_irq = omap3_intc_handle_irq, .init_machine = omap_zoom_init, + .init_late = omap3430_init_late, .timer = &omap3_timer, .restart = omap_prcm_restart, MACHINE_END @@ -149,6 +150,7 @@ MACHINE_START(OMAP_ZOOM3, "OMAP Zoom3 board") .init_irq = omap3_init_irq, .handle_irq = omap3_intc_handle_irq, .init_machine = omap_zoom_init, + .init_late = omap3630_init_late, .timer = &omap3_timer, .restart = omap_prcm_restart, MACHINE_END diff --git a/arch/arm/mach-omap2/common.h b/arch/arm/mach-omap2/common.h index 57da7f4..58ef29b 100644 --- a/arch/arm/mach-omap2/common.h +++ b/arch/arm/mach-omap2/common.h @@ -78,6 +78,42 @@ static inline void omap44xx_map_common_io(void) } #endif +#if defined(CONFIG_PM) && defined(CONFIG_ARCH_OMAP2) +int omap2_pm_init(void); +#else +static inline int omap2_pm_init(void) +{ + return 0; +} +#endif + +#if defined(CONFIG_PM) && defined(CONFIG_ARCH_OMAP3) +int omap3_pm_init(void); +#else +static inline int omap3_pm_init(void) +{ + return 0; +} +#endif + +#if defined(CONFIG_PM) && defined(CONFIG_ARCH_OMAP4) +int omap4_pm_init(void); +#else +static inline int omap4_pm_init(void) +{ + return 0; +} +#endif + +#ifdef CONFIG_OMAP_MUX +int omap_mux_late_init(void); +#else +static inline int omap_mux_late_init(void) +{ + return 0; +} +#endif + extern void omap2_init_common_infrastructure(void); extern struct sys_timer omap2_timer; @@ -94,6 +130,17 @@ void omap3_init_early(void); /* Do not use this one */ void am35xx_init_early(void); void ti81xx_init_early(void); void omap4430_init_early(void); +void omap3_init_late(void); /* Do not use this one */ +void omap4430_init_late(void); +void omap2420_init_late(void); +void omap2430_init_late(void); +void omap3430_init_late(void); +void omap35xx_init_late(void); +void omap3630_init_late(void); +void am35xx_init_late(void); +void ti81xx_init_late(void); +void omap4430_init_late(void); +int omap2_common_pm_late_init(void); void omap_prcm_restart(char, const char *); /* diff --git a/arch/arm/mach-omap2/io.c b/arch/arm/mach-omap2/io.c index 065bd76..cc86e71 100644 --- a/arch/arm/mach-omap2/io.c +++ b/arch/arm/mach-omap2/io.c @@ -397,6 +397,13 @@ void __init omap2420_init_early(void) omap_hwmod_init_postsetup(); omap2420_clk_init(); } + +void __init omap2420_init_late(void) +{ + omap_mux_late_init(); + omap2_common_pm_late_init(); + omap2_pm_init(); +} #endif #ifdef CONFIG_SOC_OMAP2430 @@ -412,6 +419,13 @@ void __init omap2430_init_early(void) omap_hwmod_init_postsetup(); omap2430_clk_init(); } + +void __init omap2430_init_late(void) +{ + omap_mux_late_init(); + omap2_common_pm_late_init(); + omap2_pm_init(); +} #endif /* @@ -466,6 +480,48 @@ void __init ti81xx_init_early(void) omap_hwmod_init_postsetup(); omap3xxx_clk_init(); } + +void __init omap3_init_late(void) +{ + omap_mux_late_init(); + omap2_common_pm_late_init(); + omap3_pm_init(); +} + +void __init omap3430_init_late(void) +{ + omap_mux_late_init(); + omap2_common_pm_late_init(); + omap3_pm_init(); +} + +void __init omap35xx_init_late(void) +{ + omap_mux_late_init(); + omap2_common_pm_late_init(); + omap3_pm_init(); +} + +void __init omap3630_init_late(void) +{ + omap_mux_late_init(); + omap2_common_pm_late_init(); + omap3_pm_init(); +} + +void __init am35xx_init_late(void) +{ + omap_mux_late_init(); + omap2_common_pm_late_init(); + omap3_pm_init(); +} + +void __init ti81xx_init_late(void) +{ + omap_mux_late_init(); + omap2_common_pm_late_init(); + omap3_pm_init(); +} #endif #ifdef CONFIG_ARCH_OMAP4 @@ -482,6 +538,13 @@ void __init omap4430_init_early(void) omap_hwmod_init_postsetup(); omap4xxx_clk_init(); } + +void __init omap4430_init_late(void) +{ + omap_mux_late_init(); + omap2_common_pm_late_init(); + omap4_pm_init(); +} #endif void __init omap_sdrc_init(struct omap_sdrc_params *sdrc_cs0, diff --git a/arch/arm/mach-omap2/mux.c b/arch/arm/mach-omap2/mux.c index 65c3391..0ede4d6 100644 --- a/arch/arm/mach-omap2/mux.c +++ b/arch/arm/mach-omap2/mux.c @@ -788,7 +788,7 @@ static void __init omap_mux_free_names(struct omap_mux *m) } /* Free all data except for GPIO pins unless CONFIG_DEBUG_FS is set */ -static int __init omap_mux_late_init(void) +int __init omap_mux_late_init(void) { struct omap_mux_partition *partition; int ret; @@ -823,7 +823,6 @@ static int __init omap_mux_late_init(void) return 0; } -late_initcall(omap_mux_late_init); static void __init omap_mux_package_fixup(struct omap_mux *p, struct omap_mux *superset) diff --git a/arch/arm/mach-omap2/pm.c b/arch/arm/mach-omap2/pm.c index d0c1c96..9cb5ced 100644 --- a/arch/arm/mach-omap2/pm.c +++ b/arch/arm/mach-omap2/pm.c @@ -295,7 +295,7 @@ static int __init omap2_common_pm_init(void) } postcore_initcall(omap2_common_pm_init); -static int __init omap2_common_pm_late_init(void) +int __init omap2_common_pm_late_init(void) { /* * In the case of DT, the PMIC and SR initialization will be done using @@ -322,4 +322,3 @@ static int __init omap2_common_pm_late_init(void) return 0; } -late_initcall(omap2_common_pm_late_init); diff --git a/arch/arm/mach-omap2/pm24xx.c b/arch/arm/mach-omap2/pm24xx.c index 95442b6..b2758b8 100644 --- a/arch/arm/mach-omap2/pm24xx.c +++ b/arch/arm/mach-omap2/pm24xx.c @@ -301,13 +301,10 @@ static void __init prcm_setup_regs(void) WKUP_MOD, PM_WKEN); } -static int __init omap2_pm_init(void) +int __init omap2_pm_init(void) { u32 l; - if (!cpu_is_omap24xx()) - return -ENODEV; - printk(KERN_INFO "Power Management for OMAP2 initializing\n"); l = omap2_prm_read_mod_reg(OCP_MOD, OMAP2_PRCM_REVISION_OFFSET); printk(KERN_INFO "PRCM revision %d.%d\n", (l >> 4) & 0x0f, l & 0x0f); @@ -373,17 +370,13 @@ static int __init omap2_pm_init(void) * These routines need to be in SRAM as that's the only * memory the MPU can see when it wakes up. */ - if (cpu_is_omap24xx()) { - omap2_sram_idle = omap_sram_push(omap24xx_idle_loop_suspend, - omap24xx_idle_loop_suspend_sz); + omap2_sram_idle = omap_sram_push(omap24xx_idle_loop_suspend, + omap24xx_idle_loop_suspend_sz); - omap2_sram_suspend = omap_sram_push(omap24xx_cpu_suspend, - omap24xx_cpu_suspend_sz); - } + omap2_sram_suspend = omap_sram_push(omap24xx_cpu_suspend, + omap24xx_cpu_suspend_sz); arm_pm_idle = omap2_pm_idle; return 0; } - -late_initcall(omap2_pm_init); diff --git a/arch/arm/mach-omap2/pm34xx.c b/arch/arm/mach-omap2/pm34xx.c index 703bd10..c769b88 100644 --- a/arch/arm/mach-omap2/pm34xx.c +++ b/arch/arm/mach-omap2/pm34xx.c @@ -699,15 +699,12 @@ static void __init pm_errata_configure(void) } } -static int __init omap3_pm_init(void) +int __init omap3_pm_init(void) { struct power_state *pwrst, *tmp; struct clockdomain *neon_clkdm, *per_clkdm, *mpu_clkdm, *core_clkdm; int ret; - if (!cpu_is_omap34xx()) - return -ENODEV; - if (!omap3_has_io_chain_ctrl()) pr_warning("PM: no software I/O chain control; some wakeups may be lost\n"); @@ -808,5 +805,3 @@ err2: err1: return ret; } - -late_initcall(omap3_pm_init); diff --git a/arch/arm/mach-omap2/pm44xx.c b/arch/arm/mach-omap2/pm44xx.c index 8856253..ea24174 100644 --- a/arch/arm/mach-omap2/pm44xx.c +++ b/arch/arm/mach-omap2/pm44xx.c @@ -141,15 +141,12 @@ static void omap_default_idle(void) * Initializes all powerdomain and clockdomain target states * and all PRCM settings. */ -static int __init omap4_pm_init(void) +int __init omap4_pm_init(void) { int ret; struct clockdomain *emif_clkdm, *mpuss_clkdm, *l3_1_clkdm, *l4wkup; struct clockdomain *ducati_clkdm, *l3_2_clkdm, *l4_per_clkdm; - if (!cpu_is_omap44xx()) - return -ENODEV; - if (omap_rev() == OMAP4430_REV_ES1_0) { WARN(1, "Power Management not supported on OMAP4430 ES1.0\n"); return -ENODEV; @@ -217,4 +214,3 @@ static int __init omap4_pm_init(void) err2: return ret; } -late_initcall(omap4_pm_init); -- cgit v0.10.2 From cafa61907cb0aabbedf3c248f197130dc5332147 Mon Sep 17 00:00:00 2001 From: Shawn Guo Date: Thu, 26 Apr 2012 20:40:32 +0800 Subject: ARM: pnx4008: use machine specific hook for late init Cc: Russell King Signed-off-by: Shawn Guo diff --git a/arch/arm/mach-pnx4008/core.c b/arch/arm/mach-pnx4008/core.c index be4c928..a00d2f1 100644 --- a/arch/arm/mach-pnx4008/core.c +++ b/arch/arm/mach-pnx4008/core.c @@ -265,6 +265,17 @@ static void pnx4008_restart(char mode, const char *cmd) soft_restart(0); } +#ifdef CONFIG_PM +extern int pnx4008_pm_init(void); +#else +static inline int pnx4008_pm_init(void) { return 0; } +#endif + +void __init pnx4008_init_late(void) +{ + pnx4008_pm_init(); +} + extern struct sys_timer pnx4008_timer; MACHINE_START(PNX4008, "Philips PNX4008") @@ -273,6 +284,7 @@ MACHINE_START(PNX4008, "Philips PNX4008") .map_io = pnx4008_map_io, .init_irq = pnx4008_init_irq, .init_machine = pnx4008_init, + .init_late = pnx4008_init_late, .timer = &pnx4008_timer, .restart = pnx4008_restart, MACHINE_END diff --git a/arch/arm/mach-pnx4008/pm.c b/arch/arm/mach-pnx4008/pm.c index f3e60a0..26f8d06 100644 --- a/arch/arm/mach-pnx4008/pm.c +++ b/arch/arm/mach-pnx4008/pm.c @@ -124,7 +124,7 @@ static const struct platform_suspend_ops pnx4008_pm_ops = { .valid = pnx4008_pm_valid, }; -static int __init pnx4008_pm_init(void) +int __init pnx4008_pm_init(void) { u32 sram_size_to_allocate; @@ -151,5 +151,3 @@ static int __init pnx4008_pm_init(void) suspend_set_ops(&pnx4008_pm_ops); return 0; } - -late_initcall(pnx4008_pm_init); -- cgit v0.10.2 From a4b4674e26da6b2c40f5b6485e165beb8f68d335 Mon Sep 17 00:00:00 2001 From: Shawn Guo Date: Thu, 26 Apr 2012 20:51:36 +0800 Subject: ARM: prima2: use machine specific hook for late init Signed-off-by: Shawn Guo Reviewed-by: Barry Song diff --git a/arch/arm/mach-prima2/common.h b/arch/arm/mach-prima2/common.h index b28a930..60d826f 100644 --- a/arch/arm/mach-prima2/common.h +++ b/arch/arm/mach-prima2/common.h @@ -24,4 +24,10 @@ static inline void sirfsoc_map_lluart(void) {} extern void __init sirfsoc_map_lluart(void); #endif +#ifdef CONFIG_SUSPEND +extern int sirfsoc_pm_init(void); +#else +static inline int sirfsoc_pm_init(void) { return 0; } +#endif + #endif diff --git a/arch/arm/mach-prima2/pm.c b/arch/arm/mach-prima2/pm.c index 26ebb57..fb5a791 100644 --- a/arch/arm/mach-prima2/pm.c +++ b/arch/arm/mach-prima2/pm.c @@ -85,12 +85,11 @@ static const struct platform_suspend_ops sirfsoc_pm_ops = { .valid = suspend_valid_only_mem, }; -static int __init sirfsoc_pm_init(void) +int __init sirfsoc_pm_init(void) { suspend_set_ops(&sirfsoc_pm_ops); return 0; } -late_initcall(sirfsoc_pm_init); static const struct of_device_id pwrc_ids[] = { { .compatible = "sirf,prima2-pwrc" }, diff --git a/arch/arm/mach-prima2/prima2.c b/arch/arm/mach-prima2/prima2.c index 02b9c05..8f0429d 100644 --- a/arch/arm/mach-prima2/prima2.c +++ b/arch/arm/mach-prima2/prima2.c @@ -25,6 +25,11 @@ void __init sirfsoc_mach_init(void) of_platform_bus_probe(NULL, sirfsoc_of_bus_ids, NULL); } +void __init sirfsoc_init_late(void) +{ + sirfsoc_pm_init(); +} + static const char *prima2cb_dt_match[] __initdata = { "sirf,prima2-cb", NULL @@ -39,6 +44,7 @@ MACHINE_START(PRIMA2_EVB, "prima2cb") .timer = &sirfsoc_timer, .dma_zone_size = SZ_256M, .init_machine = sirfsoc_mach_init, + .init_late = sirfsoc_init_late, .dt_compat = prima2cb_dt_match, .restart = sirfsoc_restart, MACHINE_END -- cgit v0.10.2 From cc8f252b713e9c7170bcf6346638dbf6d489d775 Mon Sep 17 00:00:00 2001 From: Shawn Guo Date: Thu, 26 Apr 2012 21:08:52 +0800 Subject: ARM: s3c64xx: use machine specific hook for late init Cc: Ben Dooks Signed-off-by: Shawn Guo Acked-by: Kukjin Kim diff --git a/arch/arm/mach-s3c64xx/common.c b/arch/arm/mach-s3c64xx/common.c index b313380..be746e3 100644 --- a/arch/arm/mach-s3c64xx/common.c +++ b/arch/arm/mach-s3c64xx/common.c @@ -384,3 +384,8 @@ void s3c64xx_restart(char mode, const char *cmd) /* if all else fails, or mode was for soft, jump to 0 */ soft_restart(0); } + +void __init s3c64xx_init_late(void) +{ + s3c64xx_pm_late_initcall(); +} diff --git a/arch/arm/mach-s3c64xx/common.h b/arch/arm/mach-s3c64xx/common.h index 7a10be6..6cfc99b 100644 --- a/arch/arm/mach-s3c64xx/common.h +++ b/arch/arm/mach-s3c64xx/common.h @@ -24,6 +24,7 @@ void s3c64xx_register_clocks(unsigned long xtal, unsigned armclk_limit); void s3c64xx_setup_clocks(void); void s3c64xx_restart(char mode, const char *cmd); +void s3c64xx_init_late(void); #ifdef CONFIG_CPU_S3C6400 @@ -51,4 +52,10 @@ extern void s3c6410_init_clocks(int xtal); #define s3c6410_init NULL #endif +#ifdef CONFIG_PM +int __init s3c64xx_pm_late_initcall(void); +#else +static inline int s3c64xx_pm_late_initcall(void) { return 0; } +#endif + #endif /* __ARCH_ARM_MACH_S3C64XX_COMMON_H */ diff --git a/arch/arm/mach-s3c64xx/mach-anw6410.c b/arch/arm/mach-s3c64xx/mach-anw6410.c index b86f277..8808aa5 100644 --- a/arch/arm/mach-s3c64xx/mach-anw6410.c +++ b/arch/arm/mach-s3c64xx/mach-anw6410.c @@ -241,6 +241,7 @@ MACHINE_START(ANW6410, "A&W6410") .handle_irq = vic_handle_irq, .map_io = anw6410_map_io, .init_machine = anw6410_machine_init, + .init_late = s3c64xx_init_late, .timer = &s3c24xx_timer, .restart = s3c64xx_restart, MACHINE_END diff --git a/arch/arm/mach-s3c64xx/mach-crag6410.c b/arch/arm/mach-s3c64xx/mach-crag6410.c index e20bf58..607d3f1 100644 --- a/arch/arm/mach-s3c64xx/mach-crag6410.c +++ b/arch/arm/mach-s3c64xx/mach-crag6410.c @@ -811,6 +811,7 @@ MACHINE_START(WLF_CRAGG_6410, "Wolfson Cragganmore 6410") .handle_irq = vic_handle_irq, .map_io = crag6410_map_io, .init_machine = crag6410_machine_init, + .init_late = s3c64xx_init_late, .timer = &s3c24xx_timer, .restart = s3c64xx_restart, MACHINE_END diff --git a/arch/arm/mach-s3c64xx/mach-hmt.c b/arch/arm/mach-s3c64xx/mach-hmt.c index 521e07b..1bf6b9d 100644 --- a/arch/arm/mach-s3c64xx/mach-hmt.c +++ b/arch/arm/mach-s3c64xx/mach-hmt.c @@ -272,6 +272,7 @@ MACHINE_START(HMT, "Airgoo-HMT") .handle_irq = vic_handle_irq, .map_io = hmt_map_io, .init_machine = hmt_machine_init, + .init_late = s3c64xx_init_late, .timer = &s3c24xx_timer, .restart = s3c64xx_restart, MACHINE_END diff --git a/arch/arm/mach-s3c64xx/mach-mini6410.c b/arch/arm/mach-s3c64xx/mach-mini6410.c index c34c2ab..5aef599 100644 --- a/arch/arm/mach-s3c64xx/mach-mini6410.c +++ b/arch/arm/mach-s3c64xx/mach-mini6410.c @@ -350,6 +350,7 @@ MACHINE_START(MINI6410, "MINI6410") .handle_irq = vic_handle_irq, .map_io = mini6410_map_io, .init_machine = mini6410_machine_init, + .init_late = s3c64xx_init_late, .timer = &s3c24xx_timer, .restart = s3c64xx_restart, MACHINE_END diff --git a/arch/arm/mach-s3c64xx/mach-ncp.c b/arch/arm/mach-s3c64xx/mach-ncp.c index 0efa2ba..cad2e05 100644 --- a/arch/arm/mach-s3c64xx/mach-ncp.c +++ b/arch/arm/mach-s3c64xx/mach-ncp.c @@ -104,6 +104,7 @@ MACHINE_START(NCP, "NCP") .handle_irq = vic_handle_irq, .map_io = ncp_map_io, .init_machine = ncp_machine_init, + .init_late = s3c64xx_init_late, .timer = &s3c24xx_timer, .restart = s3c64xx_restart, MACHINE_END diff --git a/arch/arm/mach-s3c64xx/mach-real6410.c b/arch/arm/mach-s3c64xx/mach-real6410.c index be2a9a2..a88b60f 100644 --- a/arch/arm/mach-s3c64xx/mach-real6410.c +++ b/arch/arm/mach-s3c64xx/mach-real6410.c @@ -331,6 +331,7 @@ MACHINE_START(REAL6410, "REAL6410") .handle_irq = vic_handle_irq, .map_io = real6410_map_io, .init_machine = real6410_machine_init, + .init_late = s3c64xx_init_late, .timer = &s3c24xx_timer, .restart = s3c64xx_restart, MACHINE_END diff --git a/arch/arm/mach-s3c64xx/mach-smartq5.c b/arch/arm/mach-s3c64xx/mach-smartq5.c index 3f42431..c5021d0 100644 --- a/arch/arm/mach-s3c64xx/mach-smartq5.c +++ b/arch/arm/mach-s3c64xx/mach-smartq5.c @@ -152,6 +152,7 @@ MACHINE_START(SMARTQ5, "SmartQ 5") .handle_irq = vic_handle_irq, .map_io = smartq_map_io, .init_machine = smartq5_machine_init, + .init_late = s3c64xx_init_late, .timer = &s3c24xx_timer, .restart = s3c64xx_restart, MACHINE_END diff --git a/arch/arm/mach-s3c64xx/mach-smartq7.c b/arch/arm/mach-s3c64xx/mach-smartq7.c index e5c09b6..aa9072a 100644 --- a/arch/arm/mach-s3c64xx/mach-smartq7.c +++ b/arch/arm/mach-s3c64xx/mach-smartq7.c @@ -168,6 +168,7 @@ MACHINE_START(SMARTQ7, "SmartQ 7") .handle_irq = vic_handle_irq, .map_io = smartq_map_io, .init_machine = smartq7_machine_init, + .init_late = s3c64xx_init_late, .timer = &s3c24xx_timer, .restart = s3c64xx_restart, MACHINE_END diff --git a/arch/arm/mach-s3c64xx/mach-smdk6400.c b/arch/arm/mach-s3c64xx/mach-smdk6400.c index 5f09653..b0f4525 100644 --- a/arch/arm/mach-s3c64xx/mach-smdk6400.c +++ b/arch/arm/mach-s3c64xx/mach-smdk6400.c @@ -93,6 +93,7 @@ MACHINE_START(SMDK6400, "SMDK6400") .handle_irq = vic_handle_irq, .map_io = smdk6400_map_io, .init_machine = smdk6400_machine_init, + .init_late = s3c64xx_init_late, .timer = &s3c24xx_timer, .restart = s3c64xx_restart, MACHINE_END diff --git a/arch/arm/mach-s3c64xx/mach-smdk6410.c b/arch/arm/mach-s3c64xx/mach-smdk6410.c index d55bc96..315a8b4 100644 --- a/arch/arm/mach-s3c64xx/mach-smdk6410.c +++ b/arch/arm/mach-s3c64xx/mach-smdk6410.c @@ -709,6 +709,7 @@ MACHINE_START(SMDK6410, "SMDK6410") .handle_irq = vic_handle_irq, .map_io = smdk6410_map_io, .init_machine = smdk6410_machine_init, + .init_late = s3c64xx_init_late, .timer = &s3c24xx_timer, .restart = s3c64xx_restart, MACHINE_END diff --git a/arch/arm/mach-s3c64xx/pm.c b/arch/arm/mach-s3c64xx/pm.c index 7d3e81b..7feb426 100644 --- a/arch/arm/mach-s3c64xx/pm.c +++ b/arch/arm/mach-s3c64xx/pm.c @@ -365,10 +365,9 @@ static __init int s3c64xx_pm_initcall(void) } arch_initcall(s3c64xx_pm_initcall); -static __init int s3c64xx_pm_late_initcall(void) +int __init s3c64xx_pm_late_initcall(void) { pm_genpd_poweroff_unused(); return 0; } -late_initcall(s3c64xx_pm_late_initcall); -- cgit v0.10.2 From 7fea1ba58e61c17fb59dfc50c408945f307addc6 Mon Sep 17 00:00:00 2001 From: Shawn Guo Date: Thu, 26 Apr 2012 21:22:45 +0800 Subject: ARM: sa1100: use machine specific hook for late init Cc: Russell King Signed-off-by: Shawn Guo diff --git a/arch/arm/mach-sa1100/assabet.c b/arch/arm/mach-sa1100/assabet.c index 375d3f7..d1dc7f1 100644 --- a/arch/arm/mach-sa1100/assabet.c +++ b/arch/arm/mach-sa1100/assabet.c @@ -538,6 +538,7 @@ MACHINE_START(ASSABET, "Intel-Assabet") .init_irq = sa1100_init_irq, .timer = &sa1100_timer, .init_machine = assabet_init, + .init_late = sa11x0_init_late, #ifdef CONFIG_SA1111 .dma_zone_size = SZ_1M, #endif diff --git a/arch/arm/mach-sa1100/badge4.c b/arch/arm/mach-sa1100/badge4.c index e0f0c03..b30fb99 100644 --- a/arch/arm/mach-sa1100/badge4.c +++ b/arch/arm/mach-sa1100/badge4.c @@ -305,6 +305,7 @@ MACHINE_START(BADGE4, "Hewlett-Packard Laboratories BadgePAD 4") .map_io = badge4_map_io, .nr_irqs = SA1100_NR_IRQS, .init_irq = sa1100_init_irq, + .init_late = sa11x0_init_late, .timer = &sa1100_timer, #ifdef CONFIG_SA1111 .dma_zone_size = SZ_1M, diff --git a/arch/arm/mach-sa1100/cerf.c b/arch/arm/mach-sa1100/cerf.c index 4a61f60..09d7f4b 100644 --- a/arch/arm/mach-sa1100/cerf.c +++ b/arch/arm/mach-sa1100/cerf.c @@ -134,5 +134,6 @@ MACHINE_START(CERF, "Intrinsyc CerfBoard/CerfCube") .init_irq = cerf_init_irq, .timer = &sa1100_timer, .init_machine = cerf_init, + .init_late = sa11x0_init_late, .restart = sa11x0_restart, MACHINE_END diff --git a/arch/arm/mach-sa1100/collie.c b/arch/arm/mach-sa1100/collie.c index c7f418b..ea5cff3 100644 --- a/arch/arm/mach-sa1100/collie.c +++ b/arch/arm/mach-sa1100/collie.c @@ -401,5 +401,6 @@ MACHINE_START(COLLIE, "Sharp-Collie") .init_irq = sa1100_init_irq, .timer = &sa1100_timer, .init_machine = collie_init, + .init_late = sa11x0_init_late, .restart = sa11x0_restart, MACHINE_END diff --git a/arch/arm/mach-sa1100/generic.c b/arch/arm/mach-sa1100/generic.c index 7c524b4..49468e6 100644 --- a/arch/arm/mach-sa1100/generic.c +++ b/arch/arm/mach-sa1100/generic.c @@ -359,6 +359,10 @@ static int __init sa1100_init(void) arch_initcall(sa1100_init); +void __init sa11x0_init_late(void) +{ + sa11x0_pm_init(); +} /* * Common I/O mapping: diff --git a/arch/arm/mach-sa1100/generic.h b/arch/arm/mach-sa1100/generic.h index 9eb3b3c..a5b7c13 100644 --- a/arch/arm/mach-sa1100/generic.h +++ b/arch/arm/mach-sa1100/generic.h @@ -11,6 +11,7 @@ extern void __init sa1100_map_io(void); extern void __init sa1100_init_irq(void); extern void __init sa1100_init_gpio(void); extern void sa11x0_restart(char, const char *); +extern void sa11x0_init_late(void); #define SET_BANK(__nr,__start,__size) \ mi->bank[__nr].start = (__start), \ @@ -41,3 +42,9 @@ void sa11x0_register_mcp(struct mcp_plat_data *data); struct sa1100fb_mach_info; void sa11x0_register_lcd(struct sa1100fb_mach_info *inf); + +#ifdef CONFIG_PM +int sa11x0_pm_init(void); +#else +static inline int sa11x0_pm_init(void) { return 0; } +#endif diff --git a/arch/arm/mach-sa1100/h3100.c b/arch/arm/mach-sa1100/h3100.c index b2e8d0f..e1571ea 100644 --- a/arch/arm/mach-sa1100/h3100.c +++ b/arch/arm/mach-sa1100/h3100.c @@ -110,6 +110,7 @@ MACHINE_START(H3100, "Compaq iPAQ H3100") .init_irq = sa1100_init_irq, .timer = &sa1100_timer, .init_machine = h3100_mach_init, + .init_late = sa11x0_init_late, .restart = sa11x0_restart, MACHINE_END diff --git a/arch/arm/mach-sa1100/h3600.c b/arch/arm/mach-sa1100/h3600.c index cb6659f..ba7a290 100644 --- a/arch/arm/mach-sa1100/h3600.c +++ b/arch/arm/mach-sa1100/h3600.c @@ -160,6 +160,7 @@ MACHINE_START(H3600, "Compaq iPAQ H3600") .init_irq = sa1100_init_irq, .timer = &sa1100_timer, .init_machine = h3600_mach_init, + .init_late = sa11x0_init_late, .restart = sa11x0_restart, MACHINE_END diff --git a/arch/arm/mach-sa1100/hackkit.c b/arch/arm/mach-sa1100/hackkit.c index 5535475..7f86bd9 100644 --- a/arch/arm/mach-sa1100/hackkit.c +++ b/arch/arm/mach-sa1100/hackkit.c @@ -199,5 +199,6 @@ MACHINE_START(HACKKIT, "HackKit Cpu Board") .init_irq = sa1100_init_irq, .timer = &sa1100_timer, .init_machine = hackkit_init, + .init_late = sa11x0_init_late, .restart = sa11x0_restart, MACHINE_END diff --git a/arch/arm/mach-sa1100/jornada720.c b/arch/arm/mach-sa1100/jornada720.c index ca7a7e8..e3084f4 100644 --- a/arch/arm/mach-sa1100/jornada720.c +++ b/arch/arm/mach-sa1100/jornada720.c @@ -348,6 +348,7 @@ MACHINE_START(JORNADA720, "HP Jornada 720") .init_irq = sa1100_init_irq, .timer = &sa1100_timer, .init_machine = jornada720_mach_init, + .init_late = sa11x0_init_late, #ifdef CONFIG_SA1111 .dma_zone_size = SZ_1M, #endif diff --git a/arch/arm/mach-sa1100/lart.c b/arch/arm/mach-sa1100/lart.c index eb6534e..b775a0a 100644 --- a/arch/arm/mach-sa1100/lart.c +++ b/arch/arm/mach-sa1100/lart.c @@ -147,6 +147,7 @@ MACHINE_START(LART, "LART") .nr_irqs = SA1100_NR_IRQS, .init_irq = sa1100_init_irq, .init_machine = lart_init, + .init_late = sa11x0_init_late, .timer = &sa1100_timer, .restart = sa11x0_restart, MACHINE_END diff --git a/arch/arm/mach-sa1100/nanoengine.c b/arch/arm/mach-sa1100/nanoengine.c index 8f6446b..41f69d9 100644 --- a/arch/arm/mach-sa1100/nanoengine.c +++ b/arch/arm/mach-sa1100/nanoengine.c @@ -112,5 +112,6 @@ MACHINE_START(NANOENGINE, "BSE nanoEngine") .init_irq = sa1100_init_irq, .timer = &sa1100_timer, .init_machine = nanoengine_init, + .init_late = sa11x0_init_late, .restart = sa11x0_restart, MACHINE_END diff --git a/arch/arm/mach-sa1100/pleb.c b/arch/arm/mach-sa1100/pleb.c index 1602575..37fe0a0 100644 --- a/arch/arm/mach-sa1100/pleb.c +++ b/arch/arm/mach-sa1100/pleb.c @@ -135,5 +135,6 @@ MACHINE_START(PLEB, "PLEB") .init_irq = sa1100_init_irq, .timer = &sa1100_timer, .init_machine = pleb_init, + .init_late = sa11x0_init_late, .restart = sa11x0_restart, MACHINE_END diff --git a/arch/arm/mach-sa1100/pm.c b/arch/arm/mach-sa1100/pm.c index 2fa499e..690cf0c 100644 --- a/arch/arm/mach-sa1100/pm.c +++ b/arch/arm/mach-sa1100/pm.c @@ -117,10 +117,8 @@ static const struct platform_suspend_ops sa11x0_pm_ops = { .valid = suspend_valid_only_mem, }; -static int __init sa11x0_pm_init(void) +int __init sa11x0_pm_init(void) { suspend_set_ops(&sa11x0_pm_ops); return 0; } - -late_initcall(sa11x0_pm_init); diff --git a/arch/arm/mach-sa1100/shannon.c b/arch/arm/mach-sa1100/shannon.c index ca8bf59..5d33fc3 100644 --- a/arch/arm/mach-sa1100/shannon.c +++ b/arch/arm/mach-sa1100/shannon.c @@ -104,5 +104,6 @@ MACHINE_START(SHANNON, "Shannon (AKA: Tuxscreen)") .init_irq = sa1100_init_irq, .timer = &sa1100_timer, .init_machine = shannon_init, + .init_late = sa11x0_init_late, .restart = sa11x0_restart, MACHINE_END diff --git a/arch/arm/mach-sa1100/simpad.c b/arch/arm/mach-sa1100/simpad.c index 3efae03..fbd5359 100644 --- a/arch/arm/mach-sa1100/simpad.c +++ b/arch/arm/mach-sa1100/simpad.c @@ -395,6 +395,7 @@ MACHINE_START(SIMPAD, "Simpad") .map_io = simpad_map_io, .nr_irqs = SA1100_NR_IRQS, .init_irq = sa1100_init_irq, + .init_late = sa11x0_init_late, .timer = &sa1100_timer, .restart = sa11x0_restart, MACHINE_END -- cgit v0.10.2 From 21cc1b7ede3cf456cf1d51f8a906093261f7c111 Mon Sep 17 00:00:00 2001 From: Shawn Guo Date: Thu, 26 Apr 2012 21:58:41 +0800 Subject: ARM: shmobile: use machine specific hook for late init Cc: Paul Mundt Signed-off-by: Shawn Guo Acked-by: Magnus Damm Acked-by: "Rafael J. Wysocki" diff --git a/arch/arm/mach-shmobile/Makefile b/arch/arm/mach-shmobile/Makefile index e7c2590..93d62fb 100644 --- a/arch/arm/mach-shmobile/Makefile +++ b/arch/arm/mach-shmobile/Makefile @@ -3,7 +3,7 @@ # # Common objects -obj-y := timer.o console.o clock.o +obj-y := timer.o console.o clock.o common.o # CPU objects obj-$(CONFIG_ARCH_SH7367) += setup-sh7367.o clock-sh7367.o intc-sh7367.o diff --git a/arch/arm/mach-shmobile/board-ag5evm.c b/arch/arm/mach-shmobile/board-ag5evm.c index cb224a3..a99e7e6 100644 --- a/arch/arm/mach-shmobile/board-ag5evm.c +++ b/arch/arm/mach-shmobile/board-ag5evm.c @@ -598,5 +598,6 @@ MACHINE_START(AG5EVM, "ag5evm") .init_irq = sh73a0_init_irq, .handle_irq = gic_handle_irq, .init_machine = ag5evm_init, + .init_late = shmobile_init_late, .timer = &shmobile_timer, MACHINE_END diff --git a/arch/arm/mach-shmobile/board-ap4evb.c b/arch/arm/mach-shmobile/board-ap4evb.c index b56dde2..522866d 100644 --- a/arch/arm/mach-shmobile/board-ap4evb.c +++ b/arch/arm/mach-shmobile/board-ap4evb.c @@ -1440,5 +1440,6 @@ MACHINE_START(AP4EVB, "ap4evb") .init_irq = sh7372_init_irq, .handle_irq = shmobile_handle_irq_intc, .init_machine = ap4evb_init, + .init_late = shmobile_init_late, .timer = &shmobile_timer, MACHINE_END diff --git a/arch/arm/mach-shmobile/board-bonito.c b/arch/arm/mach-shmobile/board-bonito.c index 81fd95f..e6b1fd3 100644 --- a/arch/arm/mach-shmobile/board-bonito.c +++ b/arch/arm/mach-shmobile/board-bonito.c @@ -500,5 +500,6 @@ MACHINE_START(BONITO, "bonito") .init_irq = r8a7740_init_irq, .handle_irq = shmobile_handle_irq_intc, .init_machine = bonito_init, + .init_late = shmobile_init_late, .timer = &shmobile_timer, MACHINE_END diff --git a/arch/arm/mach-shmobile/board-g3evm.c b/arch/arm/mach-shmobile/board-g3evm.c index 39b6cf8..796fa00 100644 --- a/arch/arm/mach-shmobile/board-g3evm.c +++ b/arch/arm/mach-shmobile/board-g3evm.c @@ -338,5 +338,6 @@ MACHINE_START(G3EVM, "g3evm") .init_irq = sh7367_init_irq, .handle_irq = shmobile_handle_irq_intc, .init_machine = g3evm_init, + .init_late = shmobile_init_late, .timer = &shmobile_timer, MACHINE_END diff --git a/arch/arm/mach-shmobile/board-g4evm.c b/arch/arm/mach-shmobile/board-g4evm.c index 0e5a39c..f125732 100644 --- a/arch/arm/mach-shmobile/board-g4evm.c +++ b/arch/arm/mach-shmobile/board-g4evm.c @@ -381,5 +381,6 @@ MACHINE_START(G4EVM, "g4evm") .init_irq = sh7377_init_irq, .handle_irq = shmobile_handle_irq_intc, .init_machine = g4evm_init, + .init_late = shmobile_init_late, .timer = &shmobile_timer, MACHINE_END diff --git a/arch/arm/mach-shmobile/board-kota2.c b/arch/arm/mach-shmobile/board-kota2.c index 200dcd4..f60f1b2 100644 --- a/arch/arm/mach-shmobile/board-kota2.c +++ b/arch/arm/mach-shmobile/board-kota2.c @@ -521,5 +521,6 @@ MACHINE_START(KOTA2, "kota2") .init_irq = sh73a0_init_irq, .handle_irq = gic_handle_irq, .init_machine = kota2_init, + .init_late = shmobile_init_late, .timer = &shmobile_timer, MACHINE_END diff --git a/arch/arm/mach-shmobile/board-mackerel.c b/arch/arm/mach-shmobile/board-mackerel.c index f49e28a..4cd438f 100644 --- a/arch/arm/mach-shmobile/board-mackerel.c +++ b/arch/arm/mach-shmobile/board-mackerel.c @@ -1623,5 +1623,6 @@ MACHINE_START(MACKEREL, "mackerel") .init_irq = sh7372_init_irq, .handle_irq = shmobile_handle_irq_intc, .init_machine = mackerel_init, + .init_late = shmobile_init_late, .timer = &shmobile_timer, MACHINE_END diff --git a/arch/arm/mach-shmobile/board-marzen.c b/arch/arm/mach-shmobile/board-marzen.c index ef0e13b..14de378 100644 --- a/arch/arm/mach-shmobile/board-marzen.c +++ b/arch/arm/mach-shmobile/board-marzen.c @@ -98,5 +98,6 @@ MACHINE_START(MARZEN, "marzen") .init_irq = r8a7779_init_irq, .handle_irq = gic_handle_irq, .init_machine = marzen_init, + .init_late = shmobile_init_late, .timer = &shmobile_timer, MACHINE_END diff --git a/arch/arm/mach-shmobile/common.c b/arch/arm/mach-shmobile/common.c new file mode 100644 index 0000000..608aba9 --- /dev/null +++ b/arch/arm/mach-shmobile/common.c @@ -0,0 +1,24 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ +#include +#include +#include + +void __init shmobile_init_late(void) +{ + shmobile_suspend_init(); + shmobile_cpuidle_init(); +} diff --git a/arch/arm/mach-shmobile/cpuidle.c b/arch/arm/mach-shmobile/cpuidle.c index 7e65591..7b541e9 100644 --- a/arch/arm/mach-shmobile/cpuidle.c +++ b/arch/arm/mach-shmobile/cpuidle.c @@ -46,7 +46,7 @@ static struct cpuidle_driver shmobile_cpuidle_driver = { void (*shmobile_cpuidle_setup)(struct cpuidle_driver *drv); -static int shmobile_cpuidle_init(void) +int shmobile_cpuidle_init(void) { struct cpuidle_device *dev = &shmobile_cpuidle_dev; struct cpuidle_driver *drv = &shmobile_cpuidle_driver; @@ -65,4 +65,3 @@ static int shmobile_cpuidle_init(void) return 0; } -late_initcall(shmobile_cpuidle_init); diff --git a/arch/arm/mach-shmobile/include/mach/common.h b/arch/arm/mach-shmobile/include/mach/common.h index 83ad3fe..2a527c4 100644 --- a/arch/arm/mach-shmobile/include/mach/common.h +++ b/arch/arm/mach-shmobile/include/mach/common.h @@ -83,4 +83,18 @@ extern void r8a7779_secondary_init(unsigned int cpu); extern int r8a7779_boot_secondary(unsigned int cpu); extern void r8a7779_smp_prepare_cpus(void); +extern void shmobile_init_late(void); + +#ifdef CONFIG_SUSPEND +int shmobile_suspend_init(void); +#else +static inline int shmobile_suspend_init(void) { return 0; } +#endif + +#ifdef CONFIG_CPU_IDLE +int shmobile_cpuidle_init(void); +#else +static inline int shmobile_cpuidle_init(void) { return 0; } +#endif + #endif /* __ARCH_MACH_COMMON_H */ diff --git a/arch/arm/mach-shmobile/suspend.c b/arch/arm/mach-shmobile/suspend.c index 4d1b86a..47d83f7 100644 --- a/arch/arm/mach-shmobile/suspend.c +++ b/arch/arm/mach-shmobile/suspend.c @@ -39,9 +39,8 @@ struct platform_suspend_ops shmobile_suspend_ops = { .valid = suspend_valid_only_mem, }; -static int __init shmobile_suspend_init(void) +int __init shmobile_suspend_init(void) { suspend_set_ops(&shmobile_suspend_ops); return 0; } -late_initcall(shmobile_suspend_init); -- cgit v0.10.2 From 390e0cfd320b28d6964b92d83d59168a33c28866 Mon Sep 17 00:00:00 2001 From: Shawn Guo Date: Wed, 2 May 2012 17:08:06 +0800 Subject: ARM: tegra: use machine specific hook for late init Cc: Colin Cross Cc: Olof Johansson Signed-off-by: Shawn Guo Acked-by: Stephen Warren diff --git a/arch/arm/mach-tegra/board-dt-tegra20.c b/arch/arm/mach-tegra/board-dt-tegra20.c index 0952494..0f0da6c 100644 --- a/arch/arm/mach-tegra/board-dt-tegra20.c +++ b/arch/arm/mach-tegra/board-dt-tegra20.c @@ -142,6 +142,7 @@ DT_MACHINE_START(TEGRA_DT, "nVidia Tegra20 (Flattened Device Tree)") .handle_irq = gic_handle_irq, .timer = &tegra_timer, .init_machine = tegra_dt_init, + .init_late = tegra_init_late, .restart = tegra_assert_system_reset, .dt_compat = tegra20_dt_board_compat, MACHINE_END diff --git a/arch/arm/mach-tegra/board-dt-tegra30.c b/arch/arm/mach-tegra/board-dt-tegra30.c index 5f7c03e..09d21b2 100644 --- a/arch/arm/mach-tegra/board-dt-tegra30.c +++ b/arch/arm/mach-tegra/board-dt-tegra30.c @@ -80,6 +80,7 @@ DT_MACHINE_START(TEGRA30_DT, "NVIDIA Tegra30 (Flattened Device Tree)") .handle_irq = gic_handle_irq, .timer = &tegra_timer, .init_machine = tegra30_dt_init, + .init_late = tegra_init_late, .restart = tegra_assert_system_reset, .dt_compat = tegra30_dt_board_compat, MACHINE_END diff --git a/arch/arm/mach-tegra/board-harmony.c b/arch/arm/mach-tegra/board-harmony.c index c00aadb..987115a 100644 --- a/arch/arm/mach-tegra/board-harmony.c +++ b/arch/arm/mach-tegra/board-harmony.c @@ -191,5 +191,6 @@ MACHINE_START(HARMONY, "harmony") .handle_irq = gic_handle_irq, .timer = &tegra_timer, .init_machine = tegra_harmony_init, + .init_late = tegra_init_late, .restart = tegra_assert_system_reset, MACHINE_END diff --git a/arch/arm/mach-tegra/board-paz00.c b/arch/arm/mach-tegra/board-paz00.c index 330afdf..2e8a4c6 100644 --- a/arch/arm/mach-tegra/board-paz00.c +++ b/arch/arm/mach-tegra/board-paz00.c @@ -221,5 +221,6 @@ MACHINE_START(PAZ00, "Toshiba AC100 / Dynabook AZ") .handle_irq = gic_handle_irq, .timer = &tegra_timer, .init_machine = tegra_paz00_init, + .init_late = tegra_init_late, .restart = tegra_assert_system_reset, MACHINE_END diff --git a/arch/arm/mach-tegra/board-seaboard.c b/arch/arm/mach-tegra/board-seaboard.c index d669847..273eaaa 100644 --- a/arch/arm/mach-tegra/board-seaboard.c +++ b/arch/arm/mach-tegra/board-seaboard.c @@ -289,6 +289,7 @@ MACHINE_START(SEABOARD, "seaboard") .handle_irq = gic_handle_irq, .timer = &tegra_timer, .init_machine = tegra_seaboard_init, + .init_late = tegra_init_late, .restart = tegra_assert_system_reset, MACHINE_END @@ -300,6 +301,7 @@ MACHINE_START(KAEN, "kaen") .handle_irq = gic_handle_irq, .timer = &tegra_timer, .init_machine = tegra_kaen_init, + .init_late = tegra_init_late, .restart = tegra_assert_system_reset, MACHINE_END @@ -311,5 +313,6 @@ MACHINE_START(WARIO, "wario") .handle_irq = gic_handle_irq, .timer = &tegra_timer, .init_machine = tegra_wario_init, + .init_late = tegra_init_late, .restart = tegra_assert_system_reset, MACHINE_END diff --git a/arch/arm/mach-tegra/board-trimslice.c b/arch/arm/mach-tegra/board-trimslice.c index cd52820a..0a2a0e5 100644 --- a/arch/arm/mach-tegra/board-trimslice.c +++ b/arch/arm/mach-tegra/board-trimslice.c @@ -180,5 +180,6 @@ MACHINE_START(TRIMSLICE, "trimslice") .handle_irq = gic_handle_irq, .timer = &tegra_timer, .init_machine = tegra_trimslice_init, + .init_late = tegra_init_late, .restart = tegra_assert_system_reset, MACHINE_END diff --git a/arch/arm/mach-tegra/board.h b/arch/arm/mach-tegra/board.h index 75d1543..6501496 100644 --- a/arch/arm/mach-tegra/board.h +++ b/arch/arm/mach-tegra/board.h @@ -32,5 +32,19 @@ void __init tegra_init_irq(void); void __init tegra_dt_init_irq(void); int __init tegra_pcie_init(bool init_port0, bool init_port1); +void tegra_init_late(void); + +#ifdef CONFIG_DEBUG_FS +int tegra_clk_debugfs_init(void); +#else +static inline int tegra_clk_debugfs_init(void) { return 0; } +#endif + +#if defined(CONFIG_ARCH_TEGRA_2x_SOC) && defined(CONFIG_DEBUG_FS) +int __init tegra_powergate_debugfs_init(void); +#else +static inline int tegra_powergate_debugfs_init(void) { return 0; } +#endif + extern struct sys_timer tegra_timer; #endif diff --git a/arch/arm/mach-tegra/clock.c b/arch/arm/mach-tegra/clock.c index 8dad8d1..58f981c 100644 --- a/arch/arm/mach-tegra/clock.c +++ b/arch/arm/mach-tegra/clock.c @@ -642,7 +642,7 @@ static int clk_debugfs_register(struct clk *c) return 0; } -static int __init clk_debugfs_init(void) +int __init tegra_clk_debugfs_init(void) { struct clk *c; struct dentry *d; @@ -669,5 +669,4 @@ err_out: return err; } -late_initcall(clk_debugfs_init); #endif diff --git a/arch/arm/mach-tegra/common.c b/arch/arm/mach-tegra/common.c index 22df10f..c90ec93 100644 --- a/arch/arm/mach-tegra/common.c +++ b/arch/arm/mach-tegra/common.c @@ -132,3 +132,9 @@ void __init tegra30_init_early(void) tegra_powergate_init(); } #endif + +void __init tegra_init_late(void) +{ + tegra_clk_debugfs_init(); + tegra_powergate_debugfs_init(); +} diff --git a/arch/arm/mach-tegra/powergate.c b/arch/arm/mach-tegra/powergate.c index c238699..f5b12fb 100644 --- a/arch/arm/mach-tegra/powergate.c +++ b/arch/arm/mach-tegra/powergate.c @@ -234,7 +234,7 @@ static const struct file_operations powergate_fops = { .release = single_release, }; -static int __init powergate_debugfs_init(void) +int __init tegra_powergate_debugfs_init(void) { struct dentry *d; int err = -ENOMEM; @@ -247,6 +247,4 @@ static int __init powergate_debugfs_init(void) return err; } -late_initcall(powergate_debugfs_init); - #endif -- cgit v0.10.2 From a010bc2b9e2ccc0b59057d4cc136d48a1f7cebcb Mon Sep 17 00:00:00 2001 From: Shawn Guo Date: Wed, 2 May 2012 17:10:07 +0800 Subject: ARM: ux500: use machine specific hook for late init Signed-off-by: Shawn Guo Acked-by: srinidhi kasagar Acked-by: Linus Walleij diff --git a/arch/arm/mach-ux500/board-mop500.c b/arch/arm/mach-ux500/board-mop500.c index 77d03c1..348ebd9 100644 --- a/arch/arm/mach-ux500/board-mop500.c +++ b/arch/arm/mach-ux500/board-mop500.c @@ -722,6 +722,7 @@ MACHINE_START(U8500, "ST-Ericsson MOP500 platform") .timer = &ux500_timer, .handle_irq = gic_handle_irq, .init_machine = mop500_init_machine, + .init_late = ux500_init_late, MACHINE_END MACHINE_START(HREFV60, "ST-Ericsson U8500 Platform HREFv60+") @@ -731,6 +732,7 @@ MACHINE_START(HREFV60, "ST-Ericsson U8500 Platform HREFv60+") .timer = &ux500_timer, .handle_irq = gic_handle_irq, .init_machine = hrefv60_init_machine, + .init_late = ux500_init_late, MACHINE_END MACHINE_START(SNOWBALL, "Calao Systems Snowball platform") @@ -741,6 +743,7 @@ MACHINE_START(SNOWBALL, "Calao Systems Snowball platform") .timer = &ux500_timer, .handle_irq = gic_handle_irq, .init_machine = snowball_init_machine, + .init_late = ux500_init_late, MACHINE_END #ifdef CONFIG_MACH_UX500_DT @@ -830,6 +833,7 @@ DT_MACHINE_START(U8500_DT, "ST-Ericsson U8500 platform (Device Tree Support)") .timer = &ux500_timer, .handle_irq = gic_handle_irq, .init_machine = u8500_init_machine, + .init_late = ux500_init_late, .dt_compat = u8500_dt_board_compat, MACHINE_END #endif diff --git a/arch/arm/mach-ux500/clock.c b/arch/arm/mach-ux500/clock.c index ec35f0a..ce00f5e 100644 --- a/arch/arm/mach-ux500/clock.c +++ b/arch/arm/mach-ux500/clock.c @@ -633,7 +633,7 @@ static int clk_debugfs_register(struct clk *c) return 0; } -static int __init clk_debugfs_init(void) +int __init clk_debugfs_init(void) { struct clk *c; struct dentry *d; @@ -655,7 +655,6 @@ err_out: return err; } -late_initcall(clk_debugfs_init); #endif /* defined(CONFIG_DEBUG_FS) */ unsigned long clk_smp_twd_rate = 500000000; @@ -694,12 +693,11 @@ static struct notifier_block clk_twd_cpufreq_nb = { .notifier_call = clk_twd_cpufreq_transition, }; -static int clk_init_smp_twd_cpufreq(void) +int clk_init_smp_twd_cpufreq(void) { return cpufreq_register_notifier(&clk_twd_cpufreq_nb, CPUFREQ_TRANSITION_NOTIFIER); } -late_initcall(clk_init_smp_twd_cpufreq); #endif diff --git a/arch/arm/mach-ux500/clock.h b/arch/arm/mach-ux500/clock.h index d776ada..65d27a1 100644 --- a/arch/arm/mach-ux500/clock.h +++ b/arch/arm/mach-ux500/clock.h @@ -150,3 +150,15 @@ struct clk clk_##_name = { \ int __init clk_db8500_ed_fixup(void); int __init clk_init(void); + +#ifdef CONFIG_DEBUG_FS +int clk_debugfs_init(void); +#else +static inline int clk_debugfs_init(void) { return 0; } +#endif + +#ifdef CONFIG_CPU_FREQ +int clk_init_smp_twd_cpufreq(void); +#else +static inline int clk_init_smp_twd_cpufreq(void) { return 0; } +#endif diff --git a/arch/arm/mach-ux500/cpu.c b/arch/arm/mach-ux500/cpu.c index d11f389..9856c57 100644 --- a/arch/arm/mach-ux500/cpu.c +++ b/arch/arm/mach-ux500/cpu.c @@ -67,6 +67,12 @@ void __init ux500_init_irq(void) clk_init(); } +void __init ux500_init_late(void) +{ + clk_debugfs_init(); + clk_init_smp_twd_cpufreq(); +} + static const char * __init ux500_get_machine(void) { return kasprintf(GFP_KERNEL, "DB%4x", dbx500_partnumber()); diff --git a/arch/arm/mach-ux500/include/mach/setup.h b/arch/arm/mach-ux500/include/mach/setup.h index 3dc00ff..7b5d865 100644 --- a/arch/arm/mach-ux500/include/mach/setup.h +++ b/arch/arm/mach-ux500/include/mach/setup.h @@ -22,6 +22,7 @@ extern struct device * __init u5500_init_devices(void); extern struct device * __init u8500_init_devices(void); extern void __init ux500_init_irq(void); +extern void __init ux500_init_late(void); extern void __init u5500_sdi_init(struct device *parent); -- cgit v0.10.2 From c0a120a4bc68f5dc5f5238e76013cc18bac0ffed Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Tue, 8 May 2012 13:59:38 +0100 Subject: mmc: mmci: Fix compiler error when CONFIG_OF is not set error: implicit declaration of function 'mmci_dt_populate_generic_pdata' This is due to the '#if CONFIG_OF' guards placed around mmci_dt_populate_generic_pdata(), but not around the call to it. We repair this by inserting a stub which elegantly returns when CONFIG_OF is not set. Reported-by: Paul Gortmaker Signed-off-by: Lee Jones Signed-off-by: Chris Ball diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c index 2303a16..efc822d 100644 --- a/drivers/mmc/host/mmci.c +++ b/drivers/mmc/host/mmci.c @@ -1241,6 +1241,12 @@ static void mmci_dt_populate_generic_pdata(struct device_node *np, pr_warn("%s: Unsupported bus width\n", np->full_name); } } +#else +static void mmci_dt_populate_generic_pdata(struct device_node *np, + struct mmci_platform_data *pdata) +{ + return; +} #endif static int __devinit mmci_probe(struct amba_device *dev, -- cgit v0.10.2 From 23b5e15a2994fb0c1444f92b76f09a482f32843c Mon Sep 17 00:00:00 2001 From: Shawn Guo Date: Sun, 29 Apr 2012 00:02:34 +0800 Subject: clk: mxs: add mxs specific clocks Add mxs specific clocks, pll, reference clock (PFD), integer divider and fractional divider. Signed-off-by: Shawn Guo diff --git a/drivers/clk/mxs/Makefile b/drivers/clk/mxs/Makefile new file mode 100644 index 0000000..d9472a8 --- /dev/null +++ b/drivers/clk/mxs/Makefile @@ -0,0 +1,5 @@ +# +# Makefile for mxs specific clk +# + +obj-y += clk.o clk-pll.o clk-ref.o clk-div.o clk-frac.o diff --git a/drivers/clk/mxs/clk-div.c b/drivers/clk/mxs/clk-div.c new file mode 100644 index 0000000..90e1da9 --- /dev/null +++ b/drivers/clk/mxs/clk-div.c @@ -0,0 +1,110 @@ +/* + * Copyright 2012 Freescale Semiconductor, Inc. + * + * The code contained herein is licensed under the GNU General Public + * License. You may obtain a copy of the GNU General Public License + * Version 2 or later at the following locations: + * + * http://www.opensource.org/licenses/gpl-license.html + * http://www.gnu.org/copyleft/gpl.html + */ + +#include +#include +#include +#include +#include "clk.h" + +/** + * struct clk_div - mxs integer divider clock + * @divider: the parent class + * @ops: pointer to clk_ops of parent class + * @reg: register address + * @busy: busy bit shift + * + * The mxs divider clock is a subclass of basic clk_divider with an + * addtional busy bit. + */ +struct clk_div { + struct clk_divider divider; + const struct clk_ops *ops; + void __iomem *reg; + u8 busy; +}; + +static inline struct clk_div *to_clk_div(struct clk_hw *hw) +{ + struct clk_divider *divider = container_of(hw, struct clk_divider, hw); + + return container_of(divider, struct clk_div, divider); +} + +static unsigned long clk_div_recalc_rate(struct clk_hw *hw, + unsigned long parent_rate) +{ + struct clk_div *div = to_clk_div(hw); + + return div->ops->recalc_rate(&div->divider.hw, parent_rate); +} + +static long clk_div_round_rate(struct clk_hw *hw, unsigned long rate, + unsigned long *prate) +{ + struct clk_div *div = to_clk_div(hw); + + return div->ops->round_rate(&div->divider.hw, rate, prate); +} + +static int clk_div_set_rate(struct clk_hw *hw, unsigned long rate, + unsigned long parent_rate) +{ + struct clk_div *div = to_clk_div(hw); + int ret; + + ret = div->ops->set_rate(&div->divider.hw, rate, parent_rate); + if (!ret) + ret = mxs_clk_wait(div->reg, div->busy); + + return ret; +} + +static struct clk_ops clk_div_ops = { + .recalc_rate = clk_div_recalc_rate, + .round_rate = clk_div_round_rate, + .set_rate = clk_div_set_rate, +}; + +struct clk *mxs_clk_div(const char *name, const char *parent_name, + void __iomem *reg, u8 shift, u8 width, u8 busy) +{ + struct clk_div *div; + struct clk *clk; + struct clk_init_data init; + + div = kzalloc(sizeof(*div), GFP_KERNEL); + if (!div) + return ERR_PTR(-ENOMEM); + + init.name = name; + init.ops = &clk_div_ops; + init.flags = CLK_SET_RATE_PARENT; + init.parent_names = (parent_name ? &parent_name: NULL); + init.num_parents = (parent_name ? 1 : 0); + + div->reg = reg; + div->busy = busy; + + div->divider.reg = reg; + div->divider.shift = shift; + div->divider.width = width; + div->divider.flags = CLK_DIVIDER_ONE_BASED; + div->divider.lock = &mxs_lock; + div->divider.hw.init = &init; + div->ops = &clk_divider_ops; + + clk = clk_register(NULL, &div->divider.hw); + if (IS_ERR(clk)) + kfree(div); + + return clk; +} diff --git a/drivers/clk/mxs/clk-frac.c b/drivers/clk/mxs/clk-frac.c new file mode 100644 index 0000000..e6aa6b5 --- /dev/null +++ b/drivers/clk/mxs/clk-frac.c @@ -0,0 +1,139 @@ +/* + * Copyright 2012 Freescale Semiconductor, Inc. + * + * The code contained herein is licensed under the GNU General Public + * License. You may obtain a copy of the GNU General Public License + * Version 2 or later at the following locations: + * + * http://www.opensource.org/licenses/gpl-license.html + * http://www.gnu.org/copyleft/gpl.html + */ + +#include +#include +#include +#include +#include +#include "clk.h" + +/** + * struct clk_frac - mxs fractional divider clock + * @hw: clk_hw for the fractional divider clock + * @reg: register address + * @shift: the divider bit shift + * @width: the divider bit width + * @busy: busy bit shift + * + * The clock is an adjustable fractional divider with a busy bit to wait + * when the divider is adjusted. + */ +struct clk_frac { + struct clk_hw hw; + void __iomem *reg; + u8 shift; + u8 width; + u8 busy; +}; + +#define to_clk_frac(_hw) container_of(_hw, struct clk_frac, hw) + +static unsigned long clk_frac_recalc_rate(struct clk_hw *hw, + unsigned long parent_rate) +{ + struct clk_frac *frac = to_clk_frac(hw); + u32 div; + + div = readl_relaxed(frac->reg) >> frac->shift; + div &= (1 << frac->width) - 1; + + return (parent_rate >> frac->width) * div; +} + +static long clk_frac_round_rate(struct clk_hw *hw, unsigned long rate, + unsigned long *prate) +{ + struct clk_frac *frac = to_clk_frac(hw); + unsigned long parent_rate = *prate; + u32 div; + u64 tmp; + + if (rate > parent_rate) + return -EINVAL; + + tmp = rate; + tmp <<= frac->width; + do_div(tmp, parent_rate); + div = tmp; + + if (!div) + return -EINVAL; + + return (parent_rate >> frac->width) * div; +} + +static int clk_frac_set_rate(struct clk_hw *hw, unsigned long rate, + unsigned long parent_rate) +{ + struct clk_frac *frac = to_clk_frac(hw); + unsigned long flags; + u32 div, val; + u64 tmp; + + if (rate > parent_rate) + return -EINVAL; + + tmp = rate; + tmp <<= frac->width; + do_div(tmp, parent_rate); + div = tmp; + + if (!div) + return -EINVAL; + + spin_lock_irqsave(&mxs_lock, flags); + + val = readl_relaxed(frac->reg); + val &= ~(((1 << frac->width) - 1) << frac->shift); + val |= div << frac->shift; + writel_relaxed(val, frac->reg); + + spin_unlock_irqrestore(&mxs_lock, flags); + + return mxs_clk_wait(frac->reg, frac->busy); +} + +static struct clk_ops clk_frac_ops = { + .recalc_rate = clk_frac_recalc_rate, + .round_rate = clk_frac_round_rate, + .set_rate = clk_frac_set_rate, +}; + +struct clk *mxs_clk_frac(const char *name, const char *parent_name, + void __iomem *reg, u8 shift, u8 width, u8 busy) +{ + struct clk_frac *frac; + struct clk *clk; + struct clk_init_data init; + + frac = kzalloc(sizeof(*frac), GFP_KERNEL); + if (!frac) + return ERR_PTR(-ENOMEM); + + init.name = name; + init.ops = &clk_frac_ops; + init.flags = CLK_SET_RATE_PARENT; + init.parent_names = (parent_name ? &parent_name: NULL); + init.num_parents = (parent_name ? 1 : 0); + + frac->reg = reg; + frac->shift = shift; + frac->width = width; + frac->busy = busy; + frac->hw.init = &init; + + clk = clk_register(NULL, &frac->hw); + if (IS_ERR(clk)) + kfree(frac); + + return clk; +} diff --git a/drivers/clk/mxs/clk-pll.c b/drivers/clk/mxs/clk-pll.c new file mode 100644 index 0000000..fadae41 --- /dev/null +++ b/drivers/clk/mxs/clk-pll.c @@ -0,0 +1,116 @@ +/* + * Copyright 2012 Freescale Semiconductor, Inc. + * + * The code contained herein is licensed under the GNU General Public + * License. You may obtain a copy of the GNU General Public License + * Version 2 or later at the following locations: + * + * http://www.opensource.org/licenses/gpl-license.html + * http://www.gnu.org/copyleft/gpl.html + */ + +#include +#include +#include +#include +#include +#include +#include "clk.h" + +/** + * struct clk_pll - mxs pll clock + * @hw: clk_hw for the pll + * @base: base address of the pll + * @power: the shift of power bit + * @rate: the clock rate of the pll + * + * The mxs pll is a fixed rate clock with power and gate control, + * and the shift of gate bit is always 31. + */ +struct clk_pll { + struct clk_hw hw; + void __iomem *base; + u8 power; + unsigned long rate; +}; + +#define to_clk_pll(_hw) container_of(_hw, struct clk_pll, hw) + +static int clk_pll_prepare(struct clk_hw *hw) +{ + struct clk_pll *pll = to_clk_pll(hw); + + writel_relaxed(1 << pll->power, pll->base + SET); + + udelay(10); + + return 0; +} + +static void clk_pll_unprepare(struct clk_hw *hw) +{ + struct clk_pll *pll = to_clk_pll(hw); + + writel_relaxed(1 << pll->power, pll->base + CLR); +} + +static int clk_pll_enable(struct clk_hw *hw) +{ + struct clk_pll *pll = to_clk_pll(hw); + + writel_relaxed(1 << 31, pll->base + CLR); + + return 0; +} + +static void clk_pll_disable(struct clk_hw *hw) +{ + struct clk_pll *pll = to_clk_pll(hw); + + writel_relaxed(1 << 31, pll->base + SET); +} + +static unsigned long clk_pll_recalc_rate(struct clk_hw *hw, + unsigned long parent_rate) +{ + struct clk_pll *pll = to_clk_pll(hw); + + return pll->rate; +} + +static const struct clk_ops clk_pll_ops = { + .prepare = clk_pll_prepare, + .unprepare = clk_pll_unprepare, + .enable = clk_pll_enable, + .disable = clk_pll_disable, + .recalc_rate = clk_pll_recalc_rate, +}; + +struct clk *mxs_clk_pll(const char *name, const char *parent_name, + void __iomem *base, u8 power, unsigned long rate) +{ + struct clk_pll *pll; + struct clk *clk; + struct clk_init_data init; + + pll = kzalloc(sizeof(*pll), GFP_KERNEL); + if (!pll) + return ERR_PTR(-ENOMEM); + + init.name = name; + init.ops = &clk_pll_ops; + init.flags = 0; + init.parent_names = (parent_name ? &parent_name: NULL); + init.num_parents = (parent_name ? 1 : 0); + + pll->base = base; + pll->rate = rate; + pll->power = power; + pll->hw.init = &init; + + clk = clk_register(NULL, &pll->hw); + if (IS_ERR(clk)) + kfree(pll); + + return clk; +} diff --git a/drivers/clk/mxs/clk-ref.c b/drivers/clk/mxs/clk-ref.c new file mode 100644 index 0000000..4adeed6 --- /dev/null +++ b/drivers/clk/mxs/clk-ref.c @@ -0,0 +1,154 @@ +/* + * Copyright 2012 Freescale Semiconductor, Inc. + * + * The code contained herein is licensed under the GNU General Public + * License. You may obtain a copy of the GNU General Public License + * Version 2 or later at the following locations: + * + * http://www.opensource.org/licenses/gpl-license.html + * http://www.gnu.org/copyleft/gpl.html + */ + +#include +#include +#include +#include +#include +#include "clk.h" + +/** + * struct clk_ref - mxs reference clock + * @hw: clk_hw for the reference clock + * @reg: register address + * @idx: the index of the reference clock within the same register + * + * The mxs reference clock sources from pll. Every 4 reference clocks share + * one register space, and @idx is used to identify them. Each reference + * clock has a gate control and a fractional * divider. The rate is calculated + * as pll rate * (18 / FRAC), where FRAC = 18 ~ 35. + */ +struct clk_ref { + struct clk_hw hw; + void __iomem *reg; + u8 idx; +}; + +#define to_clk_ref(_hw) container_of(_hw, struct clk_ref, hw) + +static int clk_ref_enable(struct clk_hw *hw) +{ + struct clk_ref *ref = to_clk_ref(hw); + + writel_relaxed(1 << ((ref->idx + 1) * 8 - 1), ref->reg + CLR); + + return 0; +} + +static void clk_ref_disable(struct clk_hw *hw) +{ + struct clk_ref *ref = to_clk_ref(hw); + + writel_relaxed(1 << ((ref->idx + 1) * 8 - 1), ref->reg + SET); +} + +static unsigned long clk_ref_recalc_rate(struct clk_hw *hw, + unsigned long parent_rate) +{ + struct clk_ref *ref = to_clk_ref(hw); + u64 tmp = parent_rate; + u8 frac = (readl_relaxed(ref->reg) >> (ref->idx * 8)) & 0x3f; + + tmp *= 18; + do_div(tmp, frac); + + return tmp; +} + +static long clk_ref_round_rate(struct clk_hw *hw, unsigned long rate, + unsigned long *prate) +{ + unsigned long parent_rate = *prate; + u64 tmp = parent_rate; + u8 frac; + + tmp = tmp * 18 + rate / 2; + do_div(tmp, rate); + frac = tmp; + + if (frac < 18) + frac = 18; + else if (frac > 35) + frac = 35; + + tmp = parent_rate; + tmp *= 18; + do_div(tmp, frac); + + return tmp; +} + +static int clk_ref_set_rate(struct clk_hw *hw, unsigned long rate, + unsigned long parent_rate) +{ + struct clk_ref *ref = to_clk_ref(hw); + unsigned long flags; + u64 tmp = parent_rate; + u32 val; + u8 frac, shift = ref->idx * 8; + + tmp = tmp * 18 + rate / 2; + do_div(tmp, rate); + frac = tmp; + + if (frac < 18) + frac = 18; + else if (frac > 35) + frac = 35; + + spin_lock_irqsave(&mxs_lock, flags); + + val = readl_relaxed(ref->reg); + val &= ~(0x3f << shift); + val |= frac << shift; + writel_relaxed(val, ref->reg); + + spin_unlock_irqrestore(&mxs_lock, flags); + + return 0; +} + +static const struct clk_ops clk_ref_ops = { + .enable = clk_ref_enable, + .disable = clk_ref_disable, + .recalc_rate = clk_ref_recalc_rate, + .round_rate = clk_ref_round_rate, + .set_rate = clk_ref_set_rate, +}; + +struct clk *mxs_clk_ref(const char *name, const char *parent_name, + void __iomem *reg, u8 idx) +{ + struct clk_ref *ref; + struct clk *clk; + struct clk_init_data init; + + ref = kzalloc(sizeof(*ref), GFP_KERNEL); + if (!ref) + return ERR_PTR(-ENOMEM); + + init.name = name; + init.ops = &clk_ref_ops; + init.flags = 0; + init.parent_names = (parent_name ? &parent_name: NULL); + init.num_parents = (parent_name ? 1 : 0); + + ref->reg = reg; + ref->idx = idx; + ref->hw.init = &init; + + clk = clk_register(NULL, &ref->hw); + if (IS_ERR(clk)) + kfree(ref); + + return clk; +} diff --git a/drivers/clk/mxs/clk.c b/drivers/clk/mxs/clk.c new file mode 100644 index 0000000..b24d560 --- /dev/null +++ b/drivers/clk/mxs/clk.c @@ -0,0 +1,28 @@ +/* + * Copyright 2012 Freescale Semiconductor, Inc. + * + * The code contained herein is licensed under the GNU General Public + * License. You may obtain a copy of the GNU General Public License + * Version 2 or later at the following locations: + * + * http://www.opensource.org/licenses/gpl-license.html + * http://www.gnu.org/copyleft/gpl.html + */ + +#include +#include +#include +#include + +DEFINE_SPINLOCK(mxs_lock); + +int mxs_clk_wait(void __iomem *reg, u8 shift) +{ + unsigned long timeout = jiffies + msecs_to_jiffies(10); + + while (readl_relaxed(reg) & (1 << shift)) + if (time_after(jiffies, timeout)) + return -ETIMEDOUT; + + return 0; +} diff --git a/drivers/clk/mxs/clk.h b/drivers/clk/mxs/clk.h new file mode 100644 index 0000000..81421e2 --- /dev/null +++ b/drivers/clk/mxs/clk.h @@ -0,0 +1,66 @@ +/* + * Copyright 2012 Freescale Semiconductor, Inc. + * + * The code contained herein is licensed under the GNU General Public + * License. You may obtain a copy of the GNU General Public License + * Version 2 or later at the following locations: + * + * http://www.opensource.org/licenses/gpl-license.html + * http://www.gnu.org/copyleft/gpl.html + */ + +#ifndef __MXS_CLK_H +#define __MXS_CLK_H + +#include +#include +#include + +#define SET 0x4 +#define CLR 0x8 + +extern spinlock_t mxs_lock; + +int mxs_clk_wait(void __iomem *reg, u8 shift); + +struct clk *mxs_clk_pll(const char *name, const char *parent_name, + void __iomem *base, u8 power, unsigned long rate); + +struct clk *mxs_clk_ref(const char *name, const char *parent_name, + void __iomem *reg, u8 idx); + +struct clk *mxs_clk_div(const char *name, const char *parent_name, + void __iomem *reg, u8 shift, u8 width, u8 busy); + +struct clk *mxs_clk_frac(const char *name, const char *parent_name, + void __iomem *reg, u8 shift, u8 width, u8 busy); + +static inline struct clk *mxs_clk_fixed(const char *name, int rate) +{ + return clk_register_fixed_rate(NULL, name, NULL, CLK_IS_ROOT, rate); +} + +static inline struct clk *mxs_clk_gate(const char *name, + const char *parent_name, void __iomem *reg, u8 shift) +{ + return clk_register_gate(NULL, name, parent_name, CLK_SET_RATE_PARENT, + reg, shift, CLK_GATE_SET_TO_DISABLE, + &mxs_lock); +} + +static inline struct clk *mxs_clk_mux(const char *name, void __iomem *reg, + u8 shift, u8 width, const char **parent_names, int num_parents) +{ + return clk_register_mux(NULL, name, parent_names, num_parents, + CLK_SET_RATE_PARENT, reg, shift, width, + 0, &mxs_lock); +} + +static inline struct clk *mxs_clk_fixed_factor(const char *name, + const char *parent_name, unsigned int mult, unsigned int div) +{ + return clk_register_fixed_factor(NULL, name, parent_name, + CLK_SET_RATE_PARENT, mult, div); +} + +#endif /* __MXS_CLK_H */ -- cgit v0.10.2 From ff261b7f641edc61ca05f0c93b5631c9c8622c08 Mon Sep 17 00:00:00 2001 From: Shawn Guo Date: Sun, 29 Apr 2012 00:02:35 +0800 Subject: clk: mxs: add clock support for imx23 Add imx23 clock support based on common clk framework. Signed-off-by: Shawn Guo diff --git a/drivers/clk/mxs/Makefile b/drivers/clk/mxs/Makefile index d9472a8..7086ad3 100644 --- a/drivers/clk/mxs/Makefile +++ b/drivers/clk/mxs/Makefile @@ -3,3 +3,5 @@ # obj-y += clk.o clk-pll.o clk-ref.o clk-div.o clk-frac.o + +obj-$(CONFIG_SOC_IMX23) += clk-imx23.o diff --git a/drivers/clk/mxs/clk-imx23.c b/drivers/clk/mxs/clk-imx23.c new file mode 100644 index 0000000..2ec76ff --- /dev/null +++ b/drivers/clk/mxs/clk-imx23.c @@ -0,0 +1,204 @@ +/* + * Copyright 2012 Freescale Semiconductor, Inc. + * + * The code contained herein is licensed under the GNU General Public + * License. You may obtain a copy of the GNU General Public License + * Version 2 or later at the following locations: + * + * http://www.opensource.org/licenses/gpl-license.html + * http://www.gnu.org/copyleft/gpl.html + */ + +#include +#include +#include +#include +#include +#include +#include +#include "clk.h" + +#define DIGCTRL MX23_IO_ADDRESS(MX23_DIGCTL_BASE_ADDR) +#define CLKCTRL MX23_IO_ADDRESS(MX23_CLKCTRL_BASE_ADDR) +#define PLLCTRL0 (CLKCTRL + 0x0000) +#define CPU (CLKCTRL + 0x0020) +#define HBUS (CLKCTRL + 0x0030) +#define XBUS (CLKCTRL + 0x0040) +#define XTAL (CLKCTRL + 0x0050) +#define PIX (CLKCTRL + 0x0060) +#define SSP (CLKCTRL + 0x0070) +#define GPMI (CLKCTRL + 0x0080) +#define SPDIF (CLKCTRL + 0x0090) +#define EMI (CLKCTRL + 0x00a0) +#define SAIF (CLKCTRL + 0x00c0) +#define TV (CLKCTRL + 0x00d0) +#define ETM (CLKCTRL + 0x00e0) +#define FRAC (CLKCTRL + 0x00f0) +#define CLKSEQ (CLKCTRL + 0x0110) + +#define BP_CPU_INTERRUPT_WAIT 12 +#define BP_CLKSEQ_BYPASS_SAIF 0 +#define BP_CLKSEQ_BYPASS_SSP 5 +#define BP_SAIF_DIV_FRAC_EN 16 +#define BP_FRAC_IOFRAC 24 + +static void __init clk_misc_init(void) +{ + u32 val; + + /* Gate off cpu clock in WFI for power saving */ + __mxs_setl(1 << BP_CPU_INTERRUPT_WAIT, CPU); + + /* Clear BYPASS for SAIF */ + __mxs_clrl(1 << BP_CLKSEQ_BYPASS_SAIF, CLKSEQ); + + /* SAIF has to use frac div for functional operation */ + val = readl_relaxed(SAIF); + val |= 1 << BP_SAIF_DIV_FRAC_EN; + writel_relaxed(val, SAIF); + + /* + * Source ssp clock from ref_io than ref_xtal, + * as ref_xtal only provides 24 MHz as maximum. + */ + __mxs_clrl(1 << BP_CLKSEQ_BYPASS_SSP, CLKSEQ); + + /* + * 480 MHz seems too high to be ssp clock source directly, + * so set frac to get a 288 MHz ref_io. + */ + __mxs_clrl(0x3f << BP_FRAC_IOFRAC, FRAC); + __mxs_setl(30 << BP_FRAC_IOFRAC, FRAC); +} + +static struct clk_lookup uart_lookups[] __initdata = { + { .dev_id = "duart", }, + { .dev_id = "mxs-auart.0", }, + { .dev_id = "mxs-auart.1", }, + { .dev_id = "8006c000.serial", }, + { .dev_id = "8006e000.serial", }, + { .dev_id = "80070000.serial", }, +}; + +static struct clk_lookup hbus_lookups[] __initdata = { + { .dev_id = "mxs-dma-apbh", }, + { .dev_id = "80004000.dma-apbh", }, +}; + +static struct clk_lookup xbus_lookups[] __initdata = { + { .dev_id = "duart", .con_id = "apb_pclk"}, + { .dev_id = "mxs-dma-apbx", }, + { .dev_id = "80024000.dma-apbx", }, +}; + +static struct clk_lookup ssp_lookups[] __initdata = { + { .dev_id = "mxs-mmc.0", }, + { .dev_id = "mxs-mmc.1", }, + { .dev_id = "80010000.ssp", }, + { .dev_id = "80034000.ssp", }, +}; + +static struct clk_lookup lcdif_lookups[] __initdata = { + { .dev_id = "imx23-fb", }, + { .dev_id = "80030000.lcdif", }, +}; + +static struct clk_lookup gpmi_lookups[] __initdata = { + { .dev_id = "imx23-gpmi-nand", }, + { .dev_id = "8000c000.gpmi", }, +}; + +static const char *sel_pll[] __initconst = { "pll", "ref_xtal", }; +static const char *sel_cpu[] __initconst = { "ref_cpu", "ref_xtal", }; +static const char *sel_pix[] __initconst = { "ref_pix", "ref_xtal", }; +static const char *sel_io[] __initconst = { "ref_io", "ref_xtal", }; +static const char *cpu_sels[] __initconst = { "cpu_pll", "cpu_xtal", }; +static const char *emi_sels[] __initconst = { "emi_pll", "emi_xtal", }; + +enum imx23_clk { + ref_xtal, pll, ref_cpu, ref_emi, ref_pix, ref_io, saif_sel, + lcdif_sel, gpmi_sel, ssp_sel, emi_sel, cpu, etm_sel, cpu_pll, + cpu_xtal, hbus, xbus, lcdif_div, ssp_div, gpmi_div, emi_pll, + emi_xtal, etm_div, saif_div, clk32k_div, rtc, adc, spdif_div, + clk32k, dri, pwm, filt, uart, ssp, gpmi, spdif, emi, saif, + lcdif, etm, usb, usb_pwr, + clk_max +}; + +static struct clk *clks[clk_max]; + +static enum imx23_clk clks_init_on[] __initdata = { + cpu, hbus, xbus, emi, uart, +}; + +int __init mx23_clocks_init(void) +{ + int i; + + clk_misc_init(); + + clks[ref_xtal] = mxs_clk_fixed("ref_xtal", 24000000); + clks[pll] = mxs_clk_pll("pll", "ref_xtal", PLLCTRL0, 16, 480000000); + clks[ref_cpu] = mxs_clk_ref("ref_cpu", "pll", FRAC, 0); + clks[ref_emi] = mxs_clk_ref("ref_emi", "pll", FRAC, 1); + clks[ref_pix] = mxs_clk_ref("ref_pix", "pll", FRAC, 2); + clks[ref_io] = mxs_clk_ref("ref_io", "pll", FRAC, 3); + clks[saif_sel] = mxs_clk_mux("saif_sel", CLKSEQ, 0, 1, sel_pll, ARRAY_SIZE(sel_pll)); + clks[lcdif_sel] = mxs_clk_mux("lcdif_sel", CLKSEQ, 1, 1, sel_pix, ARRAY_SIZE(sel_pix)); + clks[gpmi_sel] = mxs_clk_mux("gpmi_sel", CLKSEQ, 4, 1, sel_io, ARRAY_SIZE(sel_io)); + clks[ssp_sel] = mxs_clk_mux("ssp_sel", CLKSEQ, 5, 1, sel_io, ARRAY_SIZE(sel_io)); + clks[emi_sel] = mxs_clk_mux("emi_sel", CLKSEQ, 6, 1, emi_sels, ARRAY_SIZE(emi_sels)); + clks[cpu] = mxs_clk_mux("cpu", CLKSEQ, 7, 1, cpu_sels, ARRAY_SIZE(cpu_sels)); + clks[etm_sel] = mxs_clk_mux("etm_sel", CLKSEQ, 8, 1, sel_cpu, ARRAY_SIZE(sel_cpu)); + clks[cpu_pll] = mxs_clk_div("cpu_pll", "ref_cpu", CPU, 0, 6, 28); + clks[cpu_xtal] = mxs_clk_div("cpu_xtal", "ref_xtal", CPU, 16, 10, 29); + clks[hbus] = mxs_clk_div("hbus", "cpu", HBUS, 0, 5, 29); + clks[xbus] = mxs_clk_div("xbus", "ref_xtal", XBUS, 0, 10, 31); + clks[lcdif_div] = mxs_clk_div("lcdif_div", "lcdif_sel", PIX, 0, 12, 29); + clks[ssp_div] = mxs_clk_div("ssp_div", "ssp_sel", SSP, 0, 9, 29); + clks[gpmi_div] = mxs_clk_div("gpmi_div", "gpmi_sel", GPMI, 0, 10, 29); + clks[emi_pll] = mxs_clk_div("emi_pll", "ref_emi", EMI, 0, 6, 28); + clks[emi_xtal] = mxs_clk_div("emi_xtal", "ref_xtal", EMI, 8, 4, 29); + clks[etm_div] = mxs_clk_div("etm_div", "etm_sel", ETM, 0, 6, 29); + clks[saif_div] = mxs_clk_frac("saif_div", "saif_sel", SAIF, 0, 16, 29); + clks[clk32k_div] = mxs_clk_fixed_factor("clk32k_div", "ref_xtal", 1, 750); + clks[rtc] = mxs_clk_fixed_factor("rtc", "ref_xtal", 1, 768); + clks[adc] = mxs_clk_fixed_factor("adc", "clk32k", 1, 16); + clks[spdif_div] = mxs_clk_fixed_factor("spdif_div", "pll", 1, 4); + clks[clk32k] = mxs_clk_gate("clk32k", "clk32k_div", XTAL, 26); + clks[dri] = mxs_clk_gate("dri", "ref_xtal", XTAL, 28); + clks[pwm] = mxs_clk_gate("pwm", "ref_xtal", XTAL, 29); + clks[filt] = mxs_clk_gate("filt", "ref_xtal", XTAL, 30); + clks[uart] = mxs_clk_gate("uart", "ref_xtal", XTAL, 31); + clks[ssp] = mxs_clk_gate("ssp", "ssp_div", SSP, 31); + clks[gpmi] = mxs_clk_gate("gpmi", "gpmi_div", GPMI, 31); + clks[spdif] = mxs_clk_gate("spdif", "spdif_div", SPDIF, 31); + clks[emi] = mxs_clk_gate("emi", "emi_sel", EMI, 31); + clks[saif] = mxs_clk_gate("saif", "saif_div", SAIF, 31); + clks[lcdif] = mxs_clk_gate("lcdif", "lcdif_div", PIX, 31); + clks[etm] = mxs_clk_gate("etm", "etm_div", ETM, 31); + clks[usb] = mxs_clk_gate("usb", "usb_pwr", DIGCTRL, 2); + clks[usb_pwr] = clk_register_gate(NULL, "usb_pwr", "pll", 0, PLLCTRL0, 18, 0, &mxs_lock); + + for (i = 0; i < ARRAY_SIZE(clks); i++) + if (IS_ERR(clks[i])) { + pr_err("i.MX23 clk %d: register failed with %ld\n", + i, PTR_ERR(clks[i])); + return PTR_ERR(clks[i]); + } + + clk_register_clkdev(clks[clk32k], NULL, "timrot"); + clk_register_clkdevs(clks[hbus], hbus_lookups, ARRAY_SIZE(hbus_lookups)); + clk_register_clkdevs(clks[xbus], xbus_lookups, ARRAY_SIZE(xbus_lookups)); + clk_register_clkdevs(clks[uart], uart_lookups, ARRAY_SIZE(uart_lookups)); + clk_register_clkdevs(clks[ssp], ssp_lookups, ARRAY_SIZE(ssp_lookups)); + clk_register_clkdevs(clks[gpmi], gpmi_lookups, ARRAY_SIZE(gpmi_lookups)); + clk_register_clkdevs(clks[lcdif], lcdif_lookups, ARRAY_SIZE(lcdif_lookups)); + + for (i = 0; i < ARRAY_SIZE(clks_init_on); i++) + clk_prepare_enable(clks[clks_init_on[i]]); + + mxs_timer_init(NULL, MX23_INT_TIMER0); + + return 0; +} -- cgit v0.10.2 From 7d81397cd93da2850e0aec54c3ba4eb4908a675b Mon Sep 17 00:00:00 2001 From: Shawn Guo Date: Sun, 29 Apr 2012 00:02:36 +0800 Subject: clk: mxs: add clock support for imx28 Add imx28 clock support based on common clk framework. Signed-off-by: Shawn Guo diff --git a/drivers/clk/mxs/Makefile b/drivers/clk/mxs/Makefile index 7086ad3..7bedeec 100644 --- a/drivers/clk/mxs/Makefile +++ b/drivers/clk/mxs/Makefile @@ -5,3 +5,4 @@ obj-y += clk.o clk-pll.o clk-ref.o clk-div.o clk-frac.o obj-$(CONFIG_SOC_IMX23) += clk-imx23.o +obj-$(CONFIG_SOC_IMX28) += clk-imx28.o diff --git a/drivers/clk/mxs/clk-imx28.c b/drivers/clk/mxs/clk-imx28.c new file mode 100644 index 0000000..4bfd1f4 --- /dev/null +++ b/drivers/clk/mxs/clk-imx28.c @@ -0,0 +1,337 @@ +/* + * Copyright 2012 Freescale Semiconductor, Inc. + * + * The code contained herein is licensed under the GNU General Public + * License. You may obtain a copy of the GNU General Public License + * Version 2 or later at the following locations: + * + * http://www.opensource.org/licenses/gpl-license.html + * http://www.gnu.org/copyleft/gpl.html + */ + +#include +#include +#include +#include +#include +#include +#include +#include "clk.h" + +#define CLKCTRL MX28_IO_ADDRESS(MX28_CLKCTRL_BASE_ADDR) +#define PLL0CTRL0 (CLKCTRL + 0x0000) +#define PLL1CTRL0 (CLKCTRL + 0x0020) +#define PLL2CTRL0 (CLKCTRL + 0x0040) +#define CPU (CLKCTRL + 0x0050) +#define HBUS (CLKCTRL + 0x0060) +#define XBUS (CLKCTRL + 0x0070) +#define XTAL (CLKCTRL + 0x0080) +#define SSP0 (CLKCTRL + 0x0090) +#define SSP1 (CLKCTRL + 0x00a0) +#define SSP2 (CLKCTRL + 0x00b0) +#define SSP3 (CLKCTRL + 0x00c0) +#define GPMI (CLKCTRL + 0x00d0) +#define SPDIF (CLKCTRL + 0x00e0) +#define EMI (CLKCTRL + 0x00f0) +#define SAIF0 (CLKCTRL + 0x0100) +#define SAIF1 (CLKCTRL + 0x0110) +#define LCDIF (CLKCTRL + 0x0120) +#define ETM (CLKCTRL + 0x0130) +#define ENET (CLKCTRL + 0x0140) +#define FLEXCAN (CLKCTRL + 0x0160) +#define FRAC0 (CLKCTRL + 0x01b0) +#define FRAC1 (CLKCTRL + 0x01c0) +#define CLKSEQ (CLKCTRL + 0x01d0) + +#define BP_CPU_INTERRUPT_WAIT 12 +#define BP_SAIF_DIV_FRAC_EN 16 +#define BP_ENET_DIV_TIME 21 +#define BP_ENET_SLEEP 31 +#define BP_CLKSEQ_BYPASS_SAIF0 0 +#define BP_CLKSEQ_BYPASS_SSP0 3 +#define BP_FRAC0_IO1FRAC 16 +#define BP_FRAC0_IO0FRAC 24 + +#define DIGCTRL MX28_IO_ADDRESS(MX28_DIGCTL_BASE_ADDR) +#define BP_SAIF_CLKMUX 10 + +/* + * HW_SAIF_CLKMUX_SEL: + * DIRECT(0x0): SAIF0 clock pins selected for SAIF0 input clocks, and SAIF1 + * clock pins selected for SAIF1 input clocks. + * CROSSINPUT(0x1): SAIF1 clock inputs selected for SAIF0 input clocks, and + * SAIF0 clock inputs selected for SAIF1 input clocks. + * EXTMSTR0(0x2): SAIF0 clock pin selected for both SAIF0 and SAIF1 input + * clocks. + * EXTMSTR1(0x3): SAIF1 clock pin selected for both SAIF0 and SAIF1 input + * clocks. + */ +int mxs_saif_clkmux_select(unsigned int clkmux) +{ + if (clkmux > 0x3) + return -EINVAL; + + __mxs_clrl(0x3 << BP_SAIF_CLKMUX, DIGCTRL); + __mxs_setl(clkmux << BP_SAIF_CLKMUX, DIGCTRL); + + return 0; +} + +static void __init clk_misc_init(void) +{ + u32 val; + + /* Gate off cpu clock in WFI for power saving */ + __mxs_setl(1 << BP_CPU_INTERRUPT_WAIT, CPU); + + /* 0 is a bad default value for a divider */ + __mxs_setl(1 << BP_ENET_DIV_TIME, ENET); + + /* Clear BYPASS for SAIF */ + __mxs_clrl(0x3 << BP_CLKSEQ_BYPASS_SAIF0, CLKSEQ); + + /* SAIF has to use frac div for functional operation */ + val = readl_relaxed(SAIF0); + val |= 1 << BP_SAIF_DIV_FRAC_EN; + writel_relaxed(val, SAIF0); + + val = readl_relaxed(SAIF1); + val |= 1 << BP_SAIF_DIV_FRAC_EN; + writel_relaxed(val, SAIF1); + + /* Extra fec clock setting */ + val = readl_relaxed(ENET); + val &= ~(1 << BP_ENET_SLEEP); + writel_relaxed(val, ENET); + + /* + * Source ssp clock from ref_io than ref_xtal, + * as ref_xtal only provides 24 MHz as maximum. + */ + __mxs_clrl(0xf << BP_CLKSEQ_BYPASS_SSP0, CLKSEQ); + + /* + * 480 MHz seems too high to be ssp clock source directly, + * so set frac0 to get a 288 MHz ref_io0. + */ + val = readl_relaxed(FRAC0); + val &= ~(0x3f << BP_FRAC0_IO0FRAC); + val |= 30 << BP_FRAC0_IO0FRAC; + writel_relaxed(val, FRAC0); +} + +static struct clk_lookup uart_lookups[] __initdata = { + { .dev_id = "duart", }, + { .dev_id = "mxs-auart.0", }, + { .dev_id = "mxs-auart.1", }, + { .dev_id = "mxs-auart.2", }, + { .dev_id = "mxs-auart.3", }, + { .dev_id = "mxs-auart.4", }, + { .dev_id = "8006a000.serial", }, + { .dev_id = "8006c000.serial", }, + { .dev_id = "8006e000.serial", }, + { .dev_id = "80070000.serial", }, + { .dev_id = "80072000.serial", }, + { .dev_id = "80074000.serial", }, +}; + +static struct clk_lookup hbus_lookups[] __initdata = { + { .dev_id = "mxs-dma-apbh", }, + { .dev_id = "80004000.dma-apbh", }, +}; + +static struct clk_lookup xbus_lookups[] __initdata = { + { .dev_id = "duart", .con_id = "apb_pclk"}, + { .dev_id = "mxs-dma-apbx", }, + { .dev_id = "80024000.dma-apbx", }, +}; + +static struct clk_lookup ssp0_lookups[] __initdata = { + { .dev_id = "mxs-mmc.0", }, + { .dev_id = "80010000.ssp", }, +}; + +static struct clk_lookup ssp1_lookups[] __initdata = { + { .dev_id = "mxs-mmc.1", }, + { .dev_id = "80012000.ssp", }, +}; + +static struct clk_lookup ssp2_lookups[] __initdata = { + { .dev_id = "mxs-mmc.2", }, + { .dev_id = "80014000.ssp", }, +}; + +static struct clk_lookup ssp3_lookups[] __initdata = { + { .dev_id = "mxs-mmc.3", }, + { .dev_id = "80016000.ssp", }, +}; + +static struct clk_lookup lcdif_lookups[] __initdata = { + { .dev_id = "imx28-fb", }, + { .dev_id = "80030000.lcdif", }, +}; + +static struct clk_lookup gpmi_lookups[] __initdata = { + { .dev_id = "imx28-gpmi-nand", }, + { .dev_id = "8000c000.gpmi", }, +}; + +static struct clk_lookup fec_lookups[] __initdata = { + { .dev_id = "imx28-fec.0", }, + { .dev_id = "imx28-fec.1", }, + { .dev_id = "800f0000.ethernet", }, + { .dev_id = "800f4000.ethernet", }, +}; + +static struct clk_lookup can0_lookups[] __initdata = { + { .dev_id = "flexcan.0", }, + { .dev_id = "80032000.can", }, +}; + +static struct clk_lookup can1_lookups[] __initdata = { + { .dev_id = "flexcan.1", }, + { .dev_id = "80034000.can", }, +}; + +static struct clk_lookup saif0_lookups[] __initdata = { + { .dev_id = "mxs-saif.0", }, + { .dev_id = "80042000.saif", }, +}; + +static struct clk_lookup saif1_lookups[] __initdata = { + { .dev_id = "mxs-saif.1", }, + { .dev_id = "80046000.saif", }, +}; + +static const char *sel_cpu[] __initconst = { "ref_cpu", "ref_xtal", }; +static const char *sel_io0[] __initconst = { "ref_io0", "ref_xtal", }; +static const char *sel_io1[] __initconst = { "ref_io1", "ref_xtal", }; +static const char *sel_pix[] __initconst = { "ref_pix", "ref_xtal", }; +static const char *sel_gpmi[] __initconst = { "ref_gpmi", "ref_xtal", }; +static const char *sel_pll0[] __initconst = { "pll0", "ref_xtal", }; +static const char *cpu_sels[] __initconst = { "cpu_pll", "cpu_xtal", }; +static const char *emi_sels[] __initconst = { "emi_pll", "emi_xtal", }; +static const char *ptp_sels[] __initconst = { "ref_xtal", "pll0", }; + +enum imx28_clk { + ref_xtal, pll0, pll1, pll2, ref_cpu, ref_emi, ref_io0, ref_io1, + ref_pix, ref_hsadc, ref_gpmi, saif0_sel, saif1_sel, gpmi_sel, + ssp0_sel, ssp1_sel, ssp2_sel, ssp3_sel, emi_sel, etm_sel, + lcdif_sel, cpu, ptp_sel, cpu_pll, cpu_xtal, hbus, xbus, + ssp0_div, ssp1_div, ssp2_div, ssp3_div, gpmi_div, emi_pll, + emi_xtal, lcdif_div, etm_div, ptp, saif0_div, saif1_div, + clk32k_div, rtc, lradc, spdif_div, clk32k, pwm, uart, ssp0, + ssp1, ssp2, ssp3, gpmi, spdif, emi, saif0, saif1, lcdif, etm, + fec, can0, can1, usb0, usb1, usb0_pwr, usb1_pwr, enet_out, + clk_max +}; + +static struct clk *clks[clk_max]; + +static enum imx28_clk clks_init_on[] __initdata = { + cpu, hbus, xbus, emi, uart, +}; + +int __init mx28_clocks_init(void) +{ + int i; + + clk_misc_init(); + + clks[ref_xtal] = mxs_clk_fixed("ref_xtal", 24000000); + clks[pll0] = mxs_clk_pll("pll0", "ref_xtal", PLL0CTRL0, 17, 480000000); + clks[pll1] = mxs_clk_pll("pll1", "ref_xtal", PLL1CTRL0, 17, 480000000); + clks[pll2] = mxs_clk_pll("pll2", "ref_xtal", PLL2CTRL0, 23, 50000000); + clks[ref_cpu] = mxs_clk_ref("ref_cpu", "pll0", FRAC0, 0); + clks[ref_emi] = mxs_clk_ref("ref_emi", "pll0", FRAC0, 1); + clks[ref_io0] = mxs_clk_ref("ref_io0", "pll0", FRAC0, 2); + clks[ref_io1] = mxs_clk_ref("ref_io1", "pll0", FRAC0, 3); + clks[ref_pix] = mxs_clk_ref("ref_pix", "pll0", FRAC1, 0); + clks[ref_hsadc] = mxs_clk_ref("ref_hsadc", "pll0", FRAC1, 1); + clks[ref_gpmi] = mxs_clk_ref("ref_gpmi", "pll0", FRAC1, 2); + clks[saif0_sel] = mxs_clk_mux("saif0_sel", CLKSEQ, 0, 1, sel_pll0, ARRAY_SIZE(sel_pll0)); + clks[saif1_sel] = mxs_clk_mux("saif1_sel", CLKSEQ, 1, 1, sel_pll0, ARRAY_SIZE(sel_pll0)); + clks[gpmi_sel] = mxs_clk_mux("gpmi_sel", CLKSEQ, 2, 1, sel_gpmi, ARRAY_SIZE(sel_gpmi)); + clks[ssp0_sel] = mxs_clk_mux("ssp0_sel", CLKSEQ, 3, 1, sel_io0, ARRAY_SIZE(sel_io0)); + clks[ssp1_sel] = mxs_clk_mux("ssp1_sel", CLKSEQ, 4, 1, sel_io0, ARRAY_SIZE(sel_io0)); + clks[ssp2_sel] = mxs_clk_mux("ssp2_sel", CLKSEQ, 5, 1, sel_io1, ARRAY_SIZE(sel_io1)); + clks[ssp3_sel] = mxs_clk_mux("ssp3_sel", CLKSEQ, 6, 1, sel_io1, ARRAY_SIZE(sel_io1)); + clks[emi_sel] = mxs_clk_mux("emi_sel", CLKSEQ, 7, 1, emi_sels, ARRAY_SIZE(emi_sels)); + clks[etm_sel] = mxs_clk_mux("etm_sel", CLKSEQ, 8, 1, sel_cpu, ARRAY_SIZE(sel_cpu)); + clks[lcdif_sel] = mxs_clk_mux("lcdif_sel", CLKSEQ, 14, 1, sel_pix, ARRAY_SIZE(sel_pix)); + clks[cpu] = mxs_clk_mux("cpu", CLKSEQ, 18, 1, cpu_sels, ARRAY_SIZE(cpu_sels)); + clks[ptp_sel] = mxs_clk_mux("ptp_sel", ENET, 19, 1, ptp_sels, ARRAY_SIZE(ptp_sels)); + clks[cpu_pll] = mxs_clk_div("cpu_pll", "ref_cpu", CPU, 0, 6, 28); + clks[cpu_xtal] = mxs_clk_div("cpu_xtal", "ref_xtal", CPU, 16, 10, 29); + clks[hbus] = mxs_clk_div("hbus", "cpu", HBUS, 0, 5, 31); + clks[xbus] = mxs_clk_div("xbus", "ref_xtal", XBUS, 0, 10, 31); + clks[ssp0_div] = mxs_clk_div("ssp0_div", "ssp0_sel", SSP0, 0, 9, 29); + clks[ssp1_div] = mxs_clk_div("ssp1_div", "ssp1_sel", SSP1, 0, 9, 29); + clks[ssp2_div] = mxs_clk_div("ssp2_div", "ssp2_sel", SSP2, 0, 9, 29); + clks[ssp3_div] = mxs_clk_div("ssp3_div", "ssp3_sel", SSP3, 0, 9, 29); + clks[gpmi_div] = mxs_clk_div("gpmi_div", "gpmi_sel", GPMI, 0, 10, 29); + clks[emi_pll] = mxs_clk_div("emi_pll", "ref_emi", EMI, 0, 6, 28); + clks[emi_xtal] = mxs_clk_div("emi_xtal", "ref_xtal", EMI, 8, 4, 29); + clks[lcdif_div] = mxs_clk_div("lcdif_div", "lcdif_sel", LCDIF, 0, 13, 29); + clks[etm_div] = mxs_clk_div("etm_div", "etm_sel", ETM, 0, 7, 29); + clks[ptp] = mxs_clk_div("ptp", "ptp_sel", ENET, 21, 6, 27); + clks[saif0_div] = mxs_clk_frac("saif0_div", "saif0_sel", SAIF0, 0, 16, 29); + clks[saif1_div] = mxs_clk_frac("saif1_div", "saif1_sel", SAIF1, 0, 16, 29); + clks[clk32k_div] = mxs_clk_fixed_factor("clk32k_div", "ref_xtal", 1, 750); + clks[rtc] = mxs_clk_fixed_factor("rtc", "ref_xtal", 1, 768); + clks[lradc] = mxs_clk_fixed_factor("lradc", "clk32k", 1, 16); + clks[spdif_div] = mxs_clk_fixed_factor("spdif_div", "pll0", 1, 4); + clks[clk32k] = mxs_clk_gate("clk32k", "clk32k_div", XTAL, 26); + clks[pwm] = mxs_clk_gate("pwm", "ref_xtal", XTAL, 29); + clks[uart] = mxs_clk_gate("uart", "ref_xtal", XTAL, 31); + clks[ssp0] = mxs_clk_gate("ssp0", "ssp0_div", SSP0, 31); + clks[ssp1] = mxs_clk_gate("ssp1", "ssp1_div", SSP1, 31); + clks[ssp2] = mxs_clk_gate("ssp2", "ssp2_div", SSP2, 31); + clks[ssp3] = mxs_clk_gate("ssp3", "ssp3_div", SSP3, 31); + clks[gpmi] = mxs_clk_gate("gpmi", "gpmi_div", GPMI, 31); + clks[spdif] = mxs_clk_gate("spdif", "spdif_div", SPDIF, 31); + clks[emi] = mxs_clk_gate("emi", "emi_sel", EMI, 31); + clks[saif0] = mxs_clk_gate("saif0", "saif0_div", SAIF0, 31); + clks[saif1] = mxs_clk_gate("saif1", "saif1_div", SAIF1, 31); + clks[lcdif] = mxs_clk_gate("lcdif", "lcdif_div", LCDIF, 31); + clks[etm] = mxs_clk_gate("etm", "etm_div", ETM, 31); + clks[fec] = mxs_clk_gate("fec", "hbus", ENET, 30); + clks[can0] = mxs_clk_gate("can0", "ref_xtal", FLEXCAN, 30); + clks[can1] = mxs_clk_gate("can1", "ref_xtal", FLEXCAN, 28); + clks[usb0] = mxs_clk_gate("usb0", "usb0_pwr", DIGCTRL, 2); + clks[usb1] = mxs_clk_gate("usb1", "usb1_pwr", DIGCTRL, 16); + clks[usb0_pwr] = clk_register_gate(NULL, "usb0_pwr", "pll0", 0, PLL0CTRL0, 18, 0, &mxs_lock); + clks[usb1_pwr] = clk_register_gate(NULL, "usb1_pwr", "pll1", 0, PLL1CTRL0, 18, 0, &mxs_lock); + clks[enet_out] = clk_register_gate(NULL, "enet_out", "pll2", 0, ENET, 18, 0, &mxs_lock); + + for (i = 0; i < ARRAY_SIZE(clks); i++) + if (IS_ERR(clks[i])) { + pr_err("i.MX28 clk %d: register failed with %ld\n", + i, PTR_ERR(clks[i])); + return PTR_ERR(clks[i]); + } + + clk_register_clkdev(clks[clk32k], NULL, "timrot"); + clk_register_clkdev(clks[enet_out], NULL, "enet_out"); + clk_register_clkdevs(clks[hbus], hbus_lookups, ARRAY_SIZE(hbus_lookups)); + clk_register_clkdevs(clks[xbus], xbus_lookups, ARRAY_SIZE(xbus_lookups)); + clk_register_clkdevs(clks[uart], uart_lookups, ARRAY_SIZE(uart_lookups)); + clk_register_clkdevs(clks[ssp0], ssp0_lookups, ARRAY_SIZE(ssp0_lookups)); + clk_register_clkdevs(clks[ssp1], ssp1_lookups, ARRAY_SIZE(ssp1_lookups)); + clk_register_clkdevs(clks[ssp2], ssp2_lookups, ARRAY_SIZE(ssp2_lookups)); + clk_register_clkdevs(clks[ssp3], ssp3_lookups, ARRAY_SIZE(ssp3_lookups)); + clk_register_clkdevs(clks[gpmi], gpmi_lookups, ARRAY_SIZE(gpmi_lookups)); + clk_register_clkdevs(clks[saif0], saif0_lookups, ARRAY_SIZE(saif0_lookups)); + clk_register_clkdevs(clks[saif1], saif1_lookups, ARRAY_SIZE(saif1_lookups)); + clk_register_clkdevs(clks[lcdif], lcdif_lookups, ARRAY_SIZE(lcdif_lookups)); + clk_register_clkdevs(clks[fec], fec_lookups, ARRAY_SIZE(fec_lookups)); + clk_register_clkdevs(clks[can0], can0_lookups, ARRAY_SIZE(can0_lookups)); + clk_register_clkdevs(clks[can1], can1_lookups, ARRAY_SIZE(can1_lookups)); + + for (i = 0; i < ARRAY_SIZE(clks_init_on); i++) + clk_prepare_enable(clks[clks_init_on[i]]); + + mxs_timer_init(NULL, MX28_INT_TIMER0); + + return 0; +} -- cgit v0.10.2 From 39d1367e11e406ddb9bcd5f2e4798f3c355db7d9 Mon Sep 17 00:00:00 2001 From: Shawn Guo Date: Sun, 29 Apr 2012 00:02:37 +0800 Subject: ARM: mxs: request clock for timer When mxs_timer_init() does not have a timer_clk passed in, it should try to request clock from clkdev system. Signed-off-by: Shawn Guo diff --git a/arch/arm/mach-mxs/timer.c b/arch/arm/mach-mxs/timer.c index 564a632..575e8fd 100644 --- a/arch/arm/mach-mxs/timer.c +++ b/arch/arm/mach-mxs/timer.c @@ -20,6 +20,7 @@ * MA 02110-1301, USA. */ +#include #include #include #include @@ -245,6 +246,14 @@ static int __init mxs_clocksource_init(struct clk *timer_clk) void __init mxs_timer_init(struct clk *timer_clk, int irq) { + if (!timer_clk) { + timer_clk = clk_get_sys("timrot", NULL); + if (IS_ERR(timer_clk)) { + pr_err("%s: failed to get clk\n", __func__); + return; + } + } + clk_prepare_enable(timer_clk); /* -- cgit v0.10.2 From e80d76f84660268859ab82e82fd78c1ce8439323 Mon Sep 17 00:00:00 2001 From: Shawn Guo Date: Sun, 29 Apr 2012 00:02:38 +0800 Subject: ARM: mxs: change the lookup name for fec phy clock Change the fec phy clock lookup name to be more accurate. Signed-off-by: Shawn Guo diff --git a/arch/arm/mach-mxs/clock-mx28.c b/arch/arm/mach-mxs/clock-mx28.c index cea29c9..5a832ad 100644 --- a/arch/arm/mach-mxs/clock-mx28.c +++ b/arch/arm/mach-mxs/clock-mx28.c @@ -624,7 +624,7 @@ static struct clk_lookup lookups[] = { _REGISTER_CLOCK("mxs-auart.3", NULL, uart_clk) _REGISTER_CLOCK("mxs-auart.4", NULL, uart_clk) _REGISTER_CLOCK("rtc", NULL, rtc_clk) - _REGISTER_CLOCK("pll2", NULL, pll2_clk) + _REGISTER_CLOCK("enet_out", NULL, pll2_clk) _REGISTER_CLOCK("mxs-dma-apbh", NULL, hbus_clk) _REGISTER_CLOCK("mxs-dma-apbx", NULL, xbus_clk) _REGISTER_CLOCK("mxs-mmc.0", NULL, ssp0_clk) diff --git a/arch/arm/mach-mxs/mach-mx28evk.c b/arch/arm/mach-mxs/mach-mx28evk.c index e386c14..9596940 100644 --- a/arch/arm/mach-mxs/mach-mx28evk.c +++ b/arch/arm/mach-mxs/mach-mx28evk.c @@ -226,7 +226,7 @@ static void __init mx28evk_fec_reset(void) struct clk *clk; /* Enable fec phy clock */ - clk = clk_get_sys("pll2", NULL); + clk = clk_get_sys("enet_out", NULL); if (!IS_ERR(clk)) clk_prepare_enable(clk); -- cgit v0.10.2 From 2664681fa4101aef2bceb81bbe26a81a88131393 Mon Sep 17 00:00:00 2001 From: Shawn Guo Date: Sun, 29 Apr 2012 00:02:39 +0800 Subject: ARM: mxs: switch to common clk framework It switches mxs clock support to common clk framework based drivers. Signed-off-by: Shawn Guo diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 36586dba..2095a51 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -468,6 +468,7 @@ config ARCH_MXS select ARCH_REQUIRE_GPIOLIB select CLKDEV_LOOKUP select CLKSRC_MMIO + select COMMON_CLK select HAVE_CLK_PREPARE help Support for Freescale MXS-based family of processors diff --git a/arch/arm/mach-mxs/Makefile b/arch/arm/mach-mxs/Makefile index 908bf9a..6ce21a2 100644 --- a/arch/arm/mach-mxs/Makefile +++ b/arch/arm/mach-mxs/Makefile @@ -1,12 +1,9 @@ # Common support -obj-y := clock.o devices.o icoll.o iomux.o system.o timer.o mm.o +obj-y := devices.o icoll.o iomux.o system.o timer.o mm.o obj-$(CONFIG_MXS_OCOTP) += ocotp.o obj-$(CONFIG_PM) += pm.o -obj-$(CONFIG_SOC_IMX23) += clock-mx23.o -obj-$(CONFIG_SOC_IMX28) += clock-mx28.o - obj-$(CONFIG_MACH_STMP378X_DEVB) += mach-stmp378x_devb.o obj-$(CONFIG_MACH_MX23EVK) += mach-mx23evk.o obj-$(CONFIG_MACH_MX28EVK) += mach-mx28evk.o diff --git a/drivers/clk/Makefile b/drivers/clk/Makefile index 1f736bc..a576f54 100644 --- a/drivers/clk/Makefile +++ b/drivers/clk/Makefile @@ -2,3 +2,5 @@ obj-$(CONFIG_CLKDEV_LOOKUP) += clkdev.o obj-$(CONFIG_COMMON_CLK) += clk.o clk-fixed-rate.o clk-gate.o \ clk-mux.o clk-divider.o + +obj-$(CONFIG_ARCH_MXS) += mxs/ -- cgit v0.10.2 From 5da301a2056529cc5366f21d4325b96052d9cad1 Mon Sep 17 00:00:00 2001 From: Shawn Guo Date: Sun, 29 Apr 2012 00:02:40 +0800 Subject: ARM: mxs: remove old clock support The mxs clock has been switched to common clock framework, so the old clock support can be removed now. Signed-off-by: Shawn Guo diff --git a/arch/arm/mach-mxs/clock-mx23.c b/arch/arm/mach-mxs/clock-mx23.c deleted file mode 100644 index e3ac52c..0000000 --- a/arch/arm/mach-mxs/clock-mx23.c +++ /dev/null @@ -1,536 +0,0 @@ -/* - * Copyright (C) 2009-2010 Freescale Semiconductor, Inc. All Rights Reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - */ - -#include -#include -#include -#include -#include -#include - -#include -#include - -#include -#include -#include - -#include "regs-clkctrl-mx23.h" - -#define CLKCTRL_BASE_ADDR MX23_IO_ADDRESS(MX23_CLKCTRL_BASE_ADDR) -#define DIGCTRL_BASE_ADDR MX23_IO_ADDRESS(MX23_DIGCTL_BASE_ADDR) - -#define PARENT_RATE_SHIFT 8 - -static int _raw_clk_enable(struct clk *clk) -{ - u32 reg; - - if (clk->enable_reg) { - reg = __raw_readl(clk->enable_reg); - reg &= ~(1 << clk->enable_shift); - __raw_writel(reg, clk->enable_reg); - } - - return 0; -} - -static void _raw_clk_disable(struct clk *clk) -{ - u32 reg; - - if (clk->enable_reg) { - reg = __raw_readl(clk->enable_reg); - reg |= 1 << clk->enable_shift; - __raw_writel(reg, clk->enable_reg); - } -} - -/* - * ref_xtal_clk - */ -static unsigned long ref_xtal_clk_get_rate(struct clk *clk) -{ - return 24000000; -} - -static struct clk ref_xtal_clk = { - .get_rate = ref_xtal_clk_get_rate, -}; - -/* - * pll_clk - */ -static unsigned long pll_clk_get_rate(struct clk *clk) -{ - return 480000000; -} - -static int pll_clk_enable(struct clk *clk) -{ - __raw_writel(BM_CLKCTRL_PLLCTRL0_POWER | - BM_CLKCTRL_PLLCTRL0_EN_USB_CLKS, - CLKCTRL_BASE_ADDR + HW_CLKCTRL_PLLCTRL0_SET); - - /* Only a 10us delay is need. PLLCTRL1 LOCK bitfied is only a timer - * and is incorrect (excessive). Per definition of the PLLCTRL0 - * POWER field, waiting at least 10us. - */ - udelay(10); - - return 0; -} - -static void pll_clk_disable(struct clk *clk) -{ - __raw_writel(BM_CLKCTRL_PLLCTRL0_POWER | - BM_CLKCTRL_PLLCTRL0_EN_USB_CLKS, - CLKCTRL_BASE_ADDR + HW_CLKCTRL_PLLCTRL0_CLR); -} - -static struct clk pll_clk = { - .get_rate = pll_clk_get_rate, - .enable = pll_clk_enable, - .disable = pll_clk_disable, - .parent = &ref_xtal_clk, -}; - -/* - * ref_clk - */ -#define _CLK_GET_RATE_REF(name, sr, ss) \ -static unsigned long name##_get_rate(struct clk *clk) \ -{ \ - unsigned long parent_rate; \ - u32 reg, div; \ - \ - reg = __raw_readl(CLKCTRL_BASE_ADDR + HW_CLKCTRL_##sr); \ - div = (reg >> BP_CLKCTRL_##sr##_##ss##FRAC) & 0x3f; \ - parent_rate = clk_get_rate(clk->parent); \ - \ - return SH_DIV((parent_rate >> PARENT_RATE_SHIFT) * 18, \ - div, PARENT_RATE_SHIFT); \ -} - -_CLK_GET_RATE_REF(ref_cpu_clk, FRAC, CPU) -_CLK_GET_RATE_REF(ref_emi_clk, FRAC, EMI) -_CLK_GET_RATE_REF(ref_pix_clk, FRAC, PIX) -_CLK_GET_RATE_REF(ref_io_clk, FRAC, IO) - -#define _DEFINE_CLOCK_REF(name, er, es) \ - static struct clk name = { \ - .enable_reg = CLKCTRL_BASE_ADDR + HW_CLKCTRL_##er, \ - .enable_shift = BP_CLKCTRL_##er##_CLKGATE##es, \ - .get_rate = name##_get_rate, \ - .enable = _raw_clk_enable, \ - .disable = _raw_clk_disable, \ - .parent = &pll_clk, \ - } - -_DEFINE_CLOCK_REF(ref_cpu_clk, FRAC, CPU); -_DEFINE_CLOCK_REF(ref_emi_clk, FRAC, EMI); -_DEFINE_CLOCK_REF(ref_pix_clk, FRAC, PIX); -_DEFINE_CLOCK_REF(ref_io_clk, FRAC, IO); - -/* - * General clocks - * - * clk_get_rate - */ -static unsigned long rtc_clk_get_rate(struct clk *clk) -{ - /* ref_xtal_clk is implemented as the only parent */ - return clk_get_rate(clk->parent) / 768; -} - -static unsigned long clk32k_clk_get_rate(struct clk *clk) -{ - return clk->parent->get_rate(clk->parent) / 750; -} - -#define _CLK_GET_RATE(name, rs) \ -static unsigned long name##_get_rate(struct clk *clk) \ -{ \ - u32 reg, div; \ - \ - reg = __raw_readl(CLKCTRL_BASE_ADDR + HW_CLKCTRL_##rs); \ - \ - if (clk->parent == &ref_xtal_clk) \ - div = (reg & BM_CLKCTRL_##rs##_DIV_XTAL) >> \ - BP_CLKCTRL_##rs##_DIV_XTAL; \ - else \ - div = (reg & BM_CLKCTRL_##rs##_DIV_##rs) >> \ - BP_CLKCTRL_##rs##_DIV_##rs; \ - \ - if (!div) \ - return -EINVAL; \ - \ - return clk_get_rate(clk->parent) / div; \ -} - -_CLK_GET_RATE(cpu_clk, CPU) -_CLK_GET_RATE(emi_clk, EMI) - -#define _CLK_GET_RATE1(name, rs) \ -static unsigned long name##_get_rate(struct clk *clk) \ -{ \ - u32 reg, div; \ - \ - reg = __raw_readl(CLKCTRL_BASE_ADDR + HW_CLKCTRL_##rs); \ - div = (reg & BM_CLKCTRL_##rs##_DIV) >> BP_CLKCTRL_##rs##_DIV; \ - \ - if (!div) \ - return -EINVAL; \ - \ - return clk_get_rate(clk->parent) / div; \ -} - -_CLK_GET_RATE1(hbus_clk, HBUS) -_CLK_GET_RATE1(xbus_clk, XBUS) -_CLK_GET_RATE1(ssp_clk, SSP) -_CLK_GET_RATE1(gpmi_clk, GPMI) -_CLK_GET_RATE1(lcdif_clk, PIX) - -#define _CLK_GET_RATE_STUB(name) \ -static unsigned long name##_get_rate(struct clk *clk) \ -{ \ - return clk_get_rate(clk->parent); \ -} - -_CLK_GET_RATE_STUB(uart_clk) -_CLK_GET_RATE_STUB(audio_clk) -_CLK_GET_RATE_STUB(pwm_clk) - -/* - * clk_set_rate - */ -static int cpu_clk_set_rate(struct clk *clk, unsigned long rate) -{ - u32 reg, bm_busy, div_max, d, f, div, frac; - unsigned long diff, parent_rate, calc_rate; - - parent_rate = clk_get_rate(clk->parent); - - if (clk->parent == &ref_xtal_clk) { - div_max = BM_CLKCTRL_CPU_DIV_XTAL >> BP_CLKCTRL_CPU_DIV_XTAL; - bm_busy = BM_CLKCTRL_CPU_BUSY_REF_XTAL; - div = DIV_ROUND_UP(parent_rate, rate); - if (div == 0 || div > div_max) - return -EINVAL; - } else { - div_max = BM_CLKCTRL_CPU_DIV_CPU >> BP_CLKCTRL_CPU_DIV_CPU; - bm_busy = BM_CLKCTRL_CPU_BUSY_REF_CPU; - rate >>= PARENT_RATE_SHIFT; - parent_rate >>= PARENT_RATE_SHIFT; - diff = parent_rate; - div = frac = 1; - for (d = 1; d <= div_max; d++) { - f = parent_rate * 18 / d / rate; - if ((parent_rate * 18 / d) % rate) - f++; - if (f < 18 || f > 35) - continue; - - calc_rate = parent_rate * 18 / f / d; - if (calc_rate > rate) - continue; - - if (rate - calc_rate < diff) { - frac = f; - div = d; - diff = rate - calc_rate; - } - - if (diff == 0) - break; - } - - if (diff == parent_rate) - return -EINVAL; - - reg = __raw_readl(CLKCTRL_BASE_ADDR + HW_CLKCTRL_FRAC); - reg &= ~BM_CLKCTRL_FRAC_CPUFRAC; - reg |= frac; - __raw_writel(reg, CLKCTRL_BASE_ADDR + HW_CLKCTRL_FRAC); - } - - reg = __raw_readl(CLKCTRL_BASE_ADDR + HW_CLKCTRL_CPU); - reg &= ~BM_CLKCTRL_CPU_DIV_CPU; - reg |= div << BP_CLKCTRL_CPU_DIV_CPU; - __raw_writel(reg, CLKCTRL_BASE_ADDR + HW_CLKCTRL_CPU); - - mxs_clkctrl_timeout(HW_CLKCTRL_CPU, bm_busy); - - return 0; -} - -#define _CLK_SET_RATE(name, dr) \ -static int name##_set_rate(struct clk *clk, unsigned long rate) \ -{ \ - u32 reg, div_max, div; \ - unsigned long parent_rate; \ - \ - parent_rate = clk_get_rate(clk->parent); \ - div_max = BM_CLKCTRL_##dr##_DIV >> BP_CLKCTRL_##dr##_DIV; \ - \ - div = DIV_ROUND_UP(parent_rate, rate); \ - if (div == 0 || div > div_max) \ - return -EINVAL; \ - \ - reg = __raw_readl(CLKCTRL_BASE_ADDR + HW_CLKCTRL_##dr); \ - reg &= ~BM_CLKCTRL_##dr##_DIV; \ - reg |= div << BP_CLKCTRL_##dr##_DIV; \ - if (reg & (1 << clk->enable_shift)) { \ - pr_err("%s: clock is gated\n", __func__); \ - return -EINVAL; \ - } \ - __raw_writel(reg, CLKCTRL_BASE_ADDR + HW_CLKCTRL_##dr); \ - \ - mxs_clkctrl_timeout(HW_CLKCTRL_##dr, BM_CLKCTRL_##dr##_BUSY); \ - return 0; \ -} - -_CLK_SET_RATE(xbus_clk, XBUS) -_CLK_SET_RATE(ssp_clk, SSP) -_CLK_SET_RATE(gpmi_clk, GPMI) -_CLK_SET_RATE(lcdif_clk, PIX) - -#define _CLK_SET_RATE_STUB(name) \ -static int name##_set_rate(struct clk *clk, unsigned long rate) \ -{ \ - return -EINVAL; \ -} - -_CLK_SET_RATE_STUB(emi_clk) -_CLK_SET_RATE_STUB(uart_clk) -_CLK_SET_RATE_STUB(audio_clk) -_CLK_SET_RATE_STUB(pwm_clk) -_CLK_SET_RATE_STUB(clk32k_clk) - -/* - * clk_set_parent - */ -#define _CLK_SET_PARENT(name, bit) \ -static int name##_set_parent(struct clk *clk, struct clk *parent) \ -{ \ - if (parent != clk->parent) { \ - __raw_writel(BM_CLKCTRL_CLKSEQ_BYPASS_##bit, \ - CLKCTRL_BASE_ADDR + HW_CLKCTRL_CLKSEQ_TOG); \ - clk->parent = parent; \ - } \ - \ - return 0; \ -} - -_CLK_SET_PARENT(cpu_clk, CPU) -_CLK_SET_PARENT(emi_clk, EMI) -_CLK_SET_PARENT(ssp_clk, SSP) -_CLK_SET_PARENT(gpmi_clk, GPMI) -_CLK_SET_PARENT(lcdif_clk, PIX) - -#define _CLK_SET_PARENT_STUB(name) \ -static int name##_set_parent(struct clk *clk, struct clk *parent) \ -{ \ - if (parent != clk->parent) \ - return -EINVAL; \ - else \ - return 0; \ -} - -_CLK_SET_PARENT_STUB(uart_clk) -_CLK_SET_PARENT_STUB(audio_clk) -_CLK_SET_PARENT_STUB(pwm_clk) -_CLK_SET_PARENT_STUB(clk32k_clk) - -/* - * clk definition - */ -static struct clk cpu_clk = { - .get_rate = cpu_clk_get_rate, - .set_rate = cpu_clk_set_rate, - .set_parent = cpu_clk_set_parent, - .parent = &ref_cpu_clk, -}; - -static struct clk hbus_clk = { - .get_rate = hbus_clk_get_rate, - .parent = &cpu_clk, -}; - -static struct clk xbus_clk = { - .get_rate = xbus_clk_get_rate, - .set_rate = xbus_clk_set_rate, - .parent = &ref_xtal_clk, -}; - -static struct clk rtc_clk = { - .get_rate = rtc_clk_get_rate, - .parent = &ref_xtal_clk, -}; - -/* usb_clk gate is controlled in DIGCTRL other than CLKCTRL */ -static struct clk usb_clk = { - .enable_reg = DIGCTRL_BASE_ADDR, - .enable_shift = 2, - .enable = _raw_clk_enable, - .disable = _raw_clk_disable, - .parent = &pll_clk, -}; - -#define _DEFINE_CLOCK(name, er, es, p) \ - static struct clk name = { \ - .enable_reg = CLKCTRL_BASE_ADDR + HW_CLKCTRL_##er, \ - .enable_shift = BP_CLKCTRL_##er##_##es, \ - .get_rate = name##_get_rate, \ - .set_rate = name##_set_rate, \ - .set_parent = name##_set_parent, \ - .enable = _raw_clk_enable, \ - .disable = _raw_clk_disable, \ - .parent = p, \ - } - -_DEFINE_CLOCK(emi_clk, EMI, CLKGATE, &ref_xtal_clk); -_DEFINE_CLOCK(ssp_clk, SSP, CLKGATE, &ref_xtal_clk); -_DEFINE_CLOCK(gpmi_clk, GPMI, CLKGATE, &ref_xtal_clk); -_DEFINE_CLOCK(lcdif_clk, PIX, CLKGATE, &ref_xtal_clk); -_DEFINE_CLOCK(uart_clk, XTAL, UART_CLK_GATE, &ref_xtal_clk); -_DEFINE_CLOCK(audio_clk, XTAL, FILT_CLK24M_GATE, &ref_xtal_clk); -_DEFINE_CLOCK(pwm_clk, XTAL, PWM_CLK24M_GATE, &ref_xtal_clk); -_DEFINE_CLOCK(clk32k_clk, XTAL, TIMROT_CLK32K_GATE, &ref_xtal_clk); - -#define _REGISTER_CLOCK(d, n, c) \ - { \ - .dev_id = d, \ - .con_id = n, \ - .clk = &c, \ - }, - -static struct clk_lookup lookups[] = { - /* for amba bus driver */ - _REGISTER_CLOCK("duart", "apb_pclk", xbus_clk) - /* for amba-pl011 driver */ - _REGISTER_CLOCK("duart", NULL, uart_clk) - _REGISTER_CLOCK("mxs-auart.0", NULL, uart_clk) - _REGISTER_CLOCK("rtc", NULL, rtc_clk) - _REGISTER_CLOCK("mxs-dma-apbh", NULL, hbus_clk) - _REGISTER_CLOCK("mxs-dma-apbx", NULL, xbus_clk) - _REGISTER_CLOCK("mxs-mmc.0", NULL, ssp_clk) - _REGISTER_CLOCK("mxs-mmc.1", NULL, ssp_clk) - _REGISTER_CLOCK(NULL, "usb", usb_clk) - _REGISTER_CLOCK(NULL, "audio", audio_clk) - _REGISTER_CLOCK("mxs-pwm.0", NULL, pwm_clk) - _REGISTER_CLOCK("mxs-pwm.1", NULL, pwm_clk) - _REGISTER_CLOCK("mxs-pwm.2", NULL, pwm_clk) - _REGISTER_CLOCK("mxs-pwm.3", NULL, pwm_clk) - _REGISTER_CLOCK("mxs-pwm.4", NULL, pwm_clk) - _REGISTER_CLOCK("imx23-fb", NULL, lcdif_clk) - _REGISTER_CLOCK("imx23-gpmi-nand", NULL, gpmi_clk) -}; - -static int clk_misc_init(void) -{ - u32 reg; - int ret; - - /* Fix up parent per register setting */ - reg = __raw_readl(CLKCTRL_BASE_ADDR + HW_CLKCTRL_CLKSEQ); - cpu_clk.parent = (reg & BM_CLKCTRL_CLKSEQ_BYPASS_CPU) ? - &ref_xtal_clk : &ref_cpu_clk; - emi_clk.parent = (reg & BM_CLKCTRL_CLKSEQ_BYPASS_EMI) ? - &ref_xtal_clk : &ref_emi_clk; - ssp_clk.parent = (reg & BM_CLKCTRL_CLKSEQ_BYPASS_SSP) ? - &ref_xtal_clk : &ref_io_clk; - gpmi_clk.parent = (reg & BM_CLKCTRL_CLKSEQ_BYPASS_GPMI) ? - &ref_xtal_clk : &ref_io_clk; - lcdif_clk.parent = (reg & BM_CLKCTRL_CLKSEQ_BYPASS_PIX) ? - &ref_xtal_clk : &ref_pix_clk; - - /* Use int div over frac when both are available */ - __raw_writel(BM_CLKCTRL_CPU_DIV_XTAL_FRAC_EN, - CLKCTRL_BASE_ADDR + HW_CLKCTRL_CPU_CLR); - __raw_writel(BM_CLKCTRL_CPU_DIV_CPU_FRAC_EN, - CLKCTRL_BASE_ADDR + HW_CLKCTRL_CPU_CLR); - __raw_writel(BM_CLKCTRL_HBUS_DIV_FRAC_EN, - CLKCTRL_BASE_ADDR + HW_CLKCTRL_HBUS_CLR); - - reg = __raw_readl(CLKCTRL_BASE_ADDR + HW_CLKCTRL_XBUS); - reg &= ~BM_CLKCTRL_XBUS_DIV_FRAC_EN; - __raw_writel(reg, CLKCTRL_BASE_ADDR + HW_CLKCTRL_XBUS); - - reg = __raw_readl(CLKCTRL_BASE_ADDR + HW_CLKCTRL_SSP); - reg &= ~BM_CLKCTRL_SSP_DIV_FRAC_EN; - __raw_writel(reg, CLKCTRL_BASE_ADDR + HW_CLKCTRL_SSP); - - reg = __raw_readl(CLKCTRL_BASE_ADDR + HW_CLKCTRL_GPMI); - reg &= ~BM_CLKCTRL_GPMI_DIV_FRAC_EN; - __raw_writel(reg, CLKCTRL_BASE_ADDR + HW_CLKCTRL_GPMI); - - reg = __raw_readl(CLKCTRL_BASE_ADDR + HW_CLKCTRL_PIX); - reg &= ~BM_CLKCTRL_PIX_DIV_FRAC_EN; - __raw_writel(reg, CLKCTRL_BASE_ADDR + HW_CLKCTRL_PIX); - - /* - * Set safe hbus clock divider. A divider of 3 ensure that - * the Vddd voltage required for the cpu clock is sufficiently - * high for the hbus clock. - */ - reg = __raw_readl(CLKCTRL_BASE_ADDR + HW_CLKCTRL_HBUS); - reg &= BM_CLKCTRL_HBUS_DIV; - reg |= 3 << BP_CLKCTRL_HBUS_DIV; - __raw_writel(reg, CLKCTRL_BASE_ADDR + HW_CLKCTRL_HBUS); - - ret = mxs_clkctrl_timeout(HW_CLKCTRL_HBUS, BM_CLKCTRL_HBUS_BUSY); - - /* Gate off cpu clock in WFI for power saving */ - __raw_writel(BM_CLKCTRL_CPU_INTERRUPT_WAIT, - CLKCTRL_BASE_ADDR + HW_CLKCTRL_CPU_SET); - - /* - * 480 MHz seems too high to be ssp clock source directly, - * so set frac to get a 288 MHz ref_io. - */ - reg = __raw_readl(CLKCTRL_BASE_ADDR + HW_CLKCTRL_FRAC); - reg &= ~BM_CLKCTRL_FRAC_IOFRAC; - reg |= 30 << BP_CLKCTRL_FRAC_IOFRAC; - __raw_writel(reg, CLKCTRL_BASE_ADDR + HW_CLKCTRL_FRAC); - - return ret; -} - -int __init mx23_clocks_init(void) -{ - clk_misc_init(); - - /* - * source ssp clock from ref_io than ref_xtal, - * as ref_xtal only provides 24 MHz as maximum. - */ - clk_set_parent(&ssp_clk, &ref_io_clk); - - clk_prepare_enable(&cpu_clk); - clk_prepare_enable(&hbus_clk); - clk_prepare_enable(&xbus_clk); - clk_prepare_enable(&emi_clk); - clk_prepare_enable(&uart_clk); - - clkdev_add_table(lookups, ARRAY_SIZE(lookups)); - - mxs_timer_init(&clk32k_clk, MX23_INT_TIMER0); - - return 0; -} diff --git a/arch/arm/mach-mxs/clock-mx28.c b/arch/arm/mach-mxs/clock-mx28.c deleted file mode 100644 index 5a832ad..0000000 --- a/arch/arm/mach-mxs/clock-mx28.c +++ /dev/null @@ -1,803 +0,0 @@ -/* - * Copyright (C) 2009-2010 Freescale Semiconductor, Inc. All Rights Reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - */ - -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include -#include -#include -#include - -#include "regs-clkctrl-mx28.h" - -#define CLKCTRL_BASE_ADDR MX28_IO_ADDRESS(MX28_CLKCTRL_BASE_ADDR) -#define DIGCTRL_BASE_ADDR MX28_IO_ADDRESS(MX28_DIGCTL_BASE_ADDR) - -#define PARENT_RATE_SHIFT 8 - -static struct clk pll2_clk; -static struct clk cpu_clk; -static struct clk emi_clk; -static struct clk saif0_clk; -static struct clk saif1_clk; -static struct clk clk32k_clk; -static DEFINE_SPINLOCK(clkmux_lock); - -/* - * HW_SAIF_CLKMUX_SEL: - * DIRECT(0x0): SAIF0 clock pins selected for SAIF0 input clocks, and SAIF1 - * clock pins selected for SAIF1 input clocks. - * CROSSINPUT(0x1): SAIF1 clock inputs selected for SAIF0 input clocks, and - * SAIF0 clock inputs selected for SAIF1 input clocks. - * EXTMSTR0(0x2): SAIF0 clock pin selected for both SAIF0 and SAIF1 input - * clocks. - * EXTMSTR1(0x3): SAIF1 clock pin selected for both SAIF0 and SAIF1 input - * clocks. - */ -int mxs_saif_clkmux_select(unsigned int clkmux) -{ - if (clkmux > 0x3) - return -EINVAL; - - spin_lock(&clkmux_lock); - __raw_writel(BM_DIGCTL_CTRL_SAIF_CLKMUX, - DIGCTRL_BASE_ADDR + HW_DIGCTL_CTRL + MXS_CLR_ADDR); - __raw_writel(clkmux << BP_DIGCTL_CTRL_SAIF_CLKMUX, - DIGCTRL_BASE_ADDR + HW_DIGCTL_CTRL + MXS_SET_ADDR); - spin_unlock(&clkmux_lock); - - return 0; -} - -static int _raw_clk_enable(struct clk *clk) -{ - u32 reg; - - if (clk->enable_reg) { - reg = __raw_readl(clk->enable_reg); - reg &= ~(1 << clk->enable_shift); - __raw_writel(reg, clk->enable_reg); - } - - return 0; -} - -static void _raw_clk_disable(struct clk *clk) -{ - u32 reg; - - if (clk->enable_reg) { - reg = __raw_readl(clk->enable_reg); - reg |= 1 << clk->enable_shift; - __raw_writel(reg, clk->enable_reg); - } -} - -/* - * ref_xtal_clk - */ -static unsigned long ref_xtal_clk_get_rate(struct clk *clk) -{ - return 24000000; -} - -static struct clk ref_xtal_clk = { - .get_rate = ref_xtal_clk_get_rate, -}; - -/* - * pll_clk - */ -static unsigned long pll0_clk_get_rate(struct clk *clk) -{ - return 480000000; -} - -static unsigned long pll1_clk_get_rate(struct clk *clk) -{ - return 480000000; -} - -static unsigned long pll2_clk_get_rate(struct clk *clk) -{ - return 50000000; -} - -#define _CLK_ENABLE_PLL(name, r, g) \ -static int name##_enable(struct clk *clk) \ -{ \ - __raw_writel(BM_CLKCTRL_##r##CTRL0_POWER, \ - CLKCTRL_BASE_ADDR + HW_CLKCTRL_##r##CTRL0_SET); \ - udelay(10); \ - \ - if (clk == &pll2_clk) \ - __raw_writel(BM_CLKCTRL_##r##CTRL0_##g, \ - CLKCTRL_BASE_ADDR + HW_CLKCTRL_##r##CTRL0_CLR); \ - else \ - __raw_writel(BM_CLKCTRL_##r##CTRL0_##g, \ - CLKCTRL_BASE_ADDR + HW_CLKCTRL_##r##CTRL0_SET); \ - \ - return 0; \ -} - -_CLK_ENABLE_PLL(pll0_clk, PLL0, EN_USB_CLKS) -_CLK_ENABLE_PLL(pll1_clk, PLL1, EN_USB_CLKS) -_CLK_ENABLE_PLL(pll2_clk, PLL2, CLKGATE) - -#define _CLK_DISABLE_PLL(name, r, g) \ -static void name##_disable(struct clk *clk) \ -{ \ - __raw_writel(BM_CLKCTRL_##r##CTRL0_POWER, \ - CLKCTRL_BASE_ADDR + HW_CLKCTRL_##r##CTRL0_CLR); \ - \ - if (clk == &pll2_clk) \ - __raw_writel(BM_CLKCTRL_##r##CTRL0_##g, \ - CLKCTRL_BASE_ADDR + HW_CLKCTRL_##r##CTRL0_SET); \ - else \ - __raw_writel(BM_CLKCTRL_##r##CTRL0_##g, \ - CLKCTRL_BASE_ADDR + HW_CLKCTRL_##r##CTRL0_CLR); \ - \ -} - -_CLK_DISABLE_PLL(pll0_clk, PLL0, EN_USB_CLKS) -_CLK_DISABLE_PLL(pll1_clk, PLL1, EN_USB_CLKS) -_CLK_DISABLE_PLL(pll2_clk, PLL2, CLKGATE) - -#define _DEFINE_CLOCK_PLL(name) \ - static struct clk name = { \ - .get_rate = name##_get_rate, \ - .enable = name##_enable, \ - .disable = name##_disable, \ - .parent = &ref_xtal_clk, \ - } - -_DEFINE_CLOCK_PLL(pll0_clk); -_DEFINE_CLOCK_PLL(pll1_clk); -_DEFINE_CLOCK_PLL(pll2_clk); - -/* - * ref_clk - */ -#define _CLK_GET_RATE_REF(name, sr, ss) \ -static unsigned long name##_get_rate(struct clk *clk) \ -{ \ - unsigned long parent_rate; \ - u32 reg, div; \ - \ - reg = __raw_readl(CLKCTRL_BASE_ADDR + HW_CLKCTRL_##sr); \ - div = (reg >> BP_CLKCTRL_##sr##_##ss##FRAC) & 0x3f; \ - parent_rate = clk_get_rate(clk->parent); \ - \ - return SH_DIV((parent_rate >> PARENT_RATE_SHIFT) * 18, \ - div, PARENT_RATE_SHIFT); \ -} - -_CLK_GET_RATE_REF(ref_cpu_clk, FRAC0, CPU) -_CLK_GET_RATE_REF(ref_emi_clk, FRAC0, EMI) -_CLK_GET_RATE_REF(ref_io0_clk, FRAC0, IO0) -_CLK_GET_RATE_REF(ref_io1_clk, FRAC0, IO1) -_CLK_GET_RATE_REF(ref_pix_clk, FRAC1, PIX) -_CLK_GET_RATE_REF(ref_gpmi_clk, FRAC1, GPMI) - -#define _DEFINE_CLOCK_REF(name, er, es) \ - static struct clk name = { \ - .enable_reg = CLKCTRL_BASE_ADDR + HW_CLKCTRL_##er, \ - .enable_shift = BP_CLKCTRL_##er##_CLKGATE##es, \ - .get_rate = name##_get_rate, \ - .enable = _raw_clk_enable, \ - .disable = _raw_clk_disable, \ - .parent = &pll0_clk, \ - } - -_DEFINE_CLOCK_REF(ref_cpu_clk, FRAC0, CPU); -_DEFINE_CLOCK_REF(ref_emi_clk, FRAC0, EMI); -_DEFINE_CLOCK_REF(ref_io0_clk, FRAC0, IO0); -_DEFINE_CLOCK_REF(ref_io1_clk, FRAC0, IO1); -_DEFINE_CLOCK_REF(ref_pix_clk, FRAC1, PIX); -_DEFINE_CLOCK_REF(ref_gpmi_clk, FRAC1, GPMI); - -/* - * General clocks - * - * clk_get_rate - */ -static unsigned long lradc_clk_get_rate(struct clk *clk) -{ - return clk_get_rate(clk->parent) / 16; -} - -static unsigned long rtc_clk_get_rate(struct clk *clk) -{ - /* ref_xtal_clk is implemented as the only parent */ - return clk_get_rate(clk->parent) / 768; -} - -static unsigned long clk32k_clk_get_rate(struct clk *clk) -{ - return clk->parent->get_rate(clk->parent) / 750; -} - -static unsigned long spdif_clk_get_rate(struct clk *clk) -{ - return clk_get_rate(clk->parent) / 4; -} - -#define _CLK_GET_RATE(name, rs) \ -static unsigned long name##_get_rate(struct clk *clk) \ -{ \ - u32 reg, div; \ - \ - reg = __raw_readl(CLKCTRL_BASE_ADDR + HW_CLKCTRL_##rs); \ - \ - if (clk->parent == &ref_xtal_clk) \ - div = (reg & BM_CLKCTRL_##rs##_DIV_XTAL) >> \ - BP_CLKCTRL_##rs##_DIV_XTAL; \ - else \ - div = (reg & BM_CLKCTRL_##rs##_DIV_##rs) >> \ - BP_CLKCTRL_##rs##_DIV_##rs; \ - \ - if (!div) \ - return -EINVAL; \ - \ - return clk_get_rate(clk->parent) / div; \ -} - -_CLK_GET_RATE(cpu_clk, CPU) -_CLK_GET_RATE(emi_clk, EMI) - -#define _CLK_GET_RATE1(name, rs) \ -static unsigned long name##_get_rate(struct clk *clk) \ -{ \ - u32 reg, div; \ - \ - reg = __raw_readl(CLKCTRL_BASE_ADDR + HW_CLKCTRL_##rs); \ - div = (reg & BM_CLKCTRL_##rs##_DIV) >> BP_CLKCTRL_##rs##_DIV; \ - \ - if (!div) \ - return -EINVAL; \ - \ - if (clk == &saif0_clk || clk == &saif1_clk) \ - return clk_get_rate(clk->parent) >> 16 * div; \ - else \ - return clk_get_rate(clk->parent) / div; \ -} - -_CLK_GET_RATE1(hbus_clk, HBUS) -_CLK_GET_RATE1(xbus_clk, XBUS) -_CLK_GET_RATE1(ssp0_clk, SSP0) -_CLK_GET_RATE1(ssp1_clk, SSP1) -_CLK_GET_RATE1(ssp2_clk, SSP2) -_CLK_GET_RATE1(ssp3_clk, SSP3) -_CLK_GET_RATE1(gpmi_clk, GPMI) -_CLK_GET_RATE1(lcdif_clk, DIS_LCDIF) -_CLK_GET_RATE1(saif0_clk, SAIF0) -_CLK_GET_RATE1(saif1_clk, SAIF1) - -#define _CLK_GET_RATE_STUB(name) \ -static unsigned long name##_get_rate(struct clk *clk) \ -{ \ - return clk_get_rate(clk->parent); \ -} - -_CLK_GET_RATE_STUB(uart_clk) -_CLK_GET_RATE_STUB(pwm_clk) -_CLK_GET_RATE_STUB(can0_clk) -_CLK_GET_RATE_STUB(can1_clk) -_CLK_GET_RATE_STUB(fec_clk) - -/* - * clk_set_rate - */ -/* fool compiler */ -#define BM_CLKCTRL_CPU_DIV 0 -#define BP_CLKCTRL_CPU_DIV 0 -#define BM_CLKCTRL_CPU_BUSY 0 - -#define _CLK_SET_RATE(name, dr, fr, fs) \ -static int name##_set_rate(struct clk *clk, unsigned long rate) \ -{ \ - u32 reg, bm_busy, div_max, d, f, div, frac; \ - unsigned long diff, parent_rate, calc_rate; \ - \ - div_max = BM_CLKCTRL_##dr##_DIV >> BP_CLKCTRL_##dr##_DIV; \ - bm_busy = BM_CLKCTRL_##dr##_BUSY; \ - \ - if (clk->parent == &ref_xtal_clk) { \ - parent_rate = clk_get_rate(clk->parent); \ - div = DIV_ROUND_UP(parent_rate, rate); \ - if (clk == &cpu_clk) { \ - div_max = BM_CLKCTRL_CPU_DIV_XTAL >> \ - BP_CLKCTRL_CPU_DIV_XTAL; \ - bm_busy = BM_CLKCTRL_CPU_BUSY_REF_XTAL; \ - } \ - if (div == 0 || div > div_max) \ - return -EINVAL; \ - } else { \ - /* \ - * hack alert: this block modifies clk->parent, too, \ - * so the base to use it the grand parent. \ - */ \ - parent_rate = clk_get_rate(clk->parent->parent); \ - rate >>= PARENT_RATE_SHIFT; \ - parent_rate >>= PARENT_RATE_SHIFT; \ - diff = parent_rate; \ - div = frac = 1; \ - if (clk == &cpu_clk) { \ - div_max = BM_CLKCTRL_CPU_DIV_CPU >> \ - BP_CLKCTRL_CPU_DIV_CPU; \ - bm_busy = BM_CLKCTRL_CPU_BUSY_REF_CPU; \ - } \ - for (d = 1; d <= div_max; d++) { \ - f = parent_rate * 18 / d / rate; \ - if ((parent_rate * 18 / d) % rate) \ - f++; \ - if (f < 18 || f > 35) \ - continue; \ - \ - calc_rate = parent_rate * 18 / f / d; \ - if (calc_rate > rate) \ - continue; \ - \ - if (rate - calc_rate < diff) { \ - frac = f; \ - div = d; \ - diff = rate - calc_rate; \ - } \ - \ - if (diff == 0) \ - break; \ - } \ - \ - if (diff == parent_rate) \ - return -EINVAL; \ - \ - reg = __raw_readl(CLKCTRL_BASE_ADDR + HW_CLKCTRL_##fr); \ - reg &= ~BM_CLKCTRL_##fr##_##fs##FRAC; \ - reg |= frac << BP_CLKCTRL_##fr##_##fs##FRAC; \ - __raw_writel(reg, CLKCTRL_BASE_ADDR + HW_CLKCTRL_##fr); \ - } \ - \ - reg = __raw_readl(CLKCTRL_BASE_ADDR + HW_CLKCTRL_##dr); \ - if (clk == &cpu_clk) { \ - reg &= ~BM_CLKCTRL_CPU_DIV_CPU; \ - reg |= div << BP_CLKCTRL_CPU_DIV_CPU; \ - } else { \ - reg &= ~BM_CLKCTRL_##dr##_DIV; \ - reg |= div << BP_CLKCTRL_##dr##_DIV; \ - if (reg & (1 << clk->enable_shift)) { \ - pr_err("%s: clock is gated\n", __func__); \ - return -EINVAL; \ - } \ - } \ - __raw_writel(reg, CLKCTRL_BASE_ADDR + HW_CLKCTRL_##dr); \ - \ - return mxs_clkctrl_timeout(HW_CLKCTRL_##dr, bm_busy); \ -} - -_CLK_SET_RATE(cpu_clk, CPU, FRAC0, CPU) -_CLK_SET_RATE(ssp0_clk, SSP0, FRAC0, IO0) -_CLK_SET_RATE(ssp1_clk, SSP1, FRAC0, IO0) -_CLK_SET_RATE(ssp2_clk, SSP2, FRAC0, IO1) -_CLK_SET_RATE(ssp3_clk, SSP3, FRAC0, IO1) -_CLK_SET_RATE(lcdif_clk, DIS_LCDIF, FRAC1, PIX) -_CLK_SET_RATE(gpmi_clk, GPMI, FRAC1, GPMI) - -#define _CLK_SET_RATE1(name, dr) \ -static int name##_set_rate(struct clk *clk, unsigned long rate) \ -{ \ - u32 reg, div_max, div; \ - unsigned long parent_rate; \ - \ - parent_rate = clk_get_rate(clk->parent); \ - div_max = BM_CLKCTRL_##dr##_DIV >> BP_CLKCTRL_##dr##_DIV; \ - \ - div = DIV_ROUND_UP(parent_rate, rate); \ - if (div == 0 || div > div_max) \ - return -EINVAL; \ - \ - reg = __raw_readl(CLKCTRL_BASE_ADDR + HW_CLKCTRL_##dr); \ - reg &= ~BM_CLKCTRL_##dr##_DIV; \ - reg |= div << BP_CLKCTRL_##dr##_DIV; \ - if (reg & (1 << clk->enable_shift)) { \ - pr_err("%s: clock is gated\n", __func__); \ - return -EINVAL; \ - } \ - __raw_writel(reg, CLKCTRL_BASE_ADDR + HW_CLKCTRL_##dr); \ - \ - return mxs_clkctrl_timeout(HW_CLKCTRL_##dr, BM_CLKCTRL_##dr##_BUSY);\ -} - -_CLK_SET_RATE1(xbus_clk, XBUS) - -/* saif clock uses 16 bits frac div */ -#define _CLK_SET_RATE_SAIF(name, rs) \ -static int name##_set_rate(struct clk *clk, unsigned long rate) \ -{ \ - u16 div; \ - u32 reg; \ - u64 lrate; \ - unsigned long parent_rate; \ - \ - parent_rate = clk_get_rate(clk->parent); \ - if (rate > parent_rate) \ - return -EINVAL; \ - \ - lrate = (u64)rate << 16; \ - do_div(lrate, parent_rate); \ - div = (u16)lrate; \ - \ - if (!div) \ - return -EINVAL; \ - \ - reg = __raw_readl(CLKCTRL_BASE_ADDR + HW_CLKCTRL_##rs); \ - reg &= ~BM_CLKCTRL_##rs##_DIV; \ - reg |= div << BP_CLKCTRL_##rs##_DIV; \ - if (reg & (1 << clk->enable_shift)) { \ - pr_err("%s: clock is gated\n", __func__); \ - return -EINVAL; \ - } \ - __raw_writel(reg, CLKCTRL_BASE_ADDR + HW_CLKCTRL_##rs); \ - \ - return mxs_clkctrl_timeout(HW_CLKCTRL_##rs, BM_CLKCTRL_##rs##_BUSY);\ -} - -_CLK_SET_RATE_SAIF(saif0_clk, SAIF0) -_CLK_SET_RATE_SAIF(saif1_clk, SAIF1) - -#define _CLK_SET_RATE_STUB(name) \ -static int name##_set_rate(struct clk *clk, unsigned long rate) \ -{ \ - return -EINVAL; \ -} - -_CLK_SET_RATE_STUB(emi_clk) -_CLK_SET_RATE_STUB(uart_clk) -_CLK_SET_RATE_STUB(pwm_clk) -_CLK_SET_RATE_STUB(spdif_clk) -_CLK_SET_RATE_STUB(clk32k_clk) -_CLK_SET_RATE_STUB(can0_clk) -_CLK_SET_RATE_STUB(can1_clk) -_CLK_SET_RATE_STUB(fec_clk) - -/* - * clk_set_parent - */ -#define _CLK_SET_PARENT(name, bit) \ -static int name##_set_parent(struct clk *clk, struct clk *parent) \ -{ \ - if (parent != clk->parent) { \ - __raw_writel(BM_CLKCTRL_CLKSEQ_BYPASS_##bit, \ - CLKCTRL_BASE_ADDR + HW_CLKCTRL_CLKSEQ_TOG); \ - clk->parent = parent; \ - } \ - \ - return 0; \ -} - -_CLK_SET_PARENT(cpu_clk, CPU) -_CLK_SET_PARENT(emi_clk, EMI) -_CLK_SET_PARENT(ssp0_clk, SSP0) -_CLK_SET_PARENT(ssp1_clk, SSP1) -_CLK_SET_PARENT(ssp2_clk, SSP2) -_CLK_SET_PARENT(ssp3_clk, SSP3) -_CLK_SET_PARENT(lcdif_clk, DIS_LCDIF) -_CLK_SET_PARENT(gpmi_clk, GPMI) -_CLK_SET_PARENT(saif0_clk, SAIF0) -_CLK_SET_PARENT(saif1_clk, SAIF1) - -#define _CLK_SET_PARENT_STUB(name) \ -static int name##_set_parent(struct clk *clk, struct clk *parent) \ -{ \ - if (parent != clk->parent) \ - return -EINVAL; \ - else \ - return 0; \ -} - -_CLK_SET_PARENT_STUB(pwm_clk) -_CLK_SET_PARENT_STUB(uart_clk) -_CLK_SET_PARENT_STUB(clk32k_clk) -_CLK_SET_PARENT_STUB(spdif_clk) -_CLK_SET_PARENT_STUB(fec_clk) -_CLK_SET_PARENT_STUB(can0_clk) -_CLK_SET_PARENT_STUB(can1_clk) - -/* - * clk definition - */ -static struct clk cpu_clk = { - .get_rate = cpu_clk_get_rate, - .set_rate = cpu_clk_set_rate, - .set_parent = cpu_clk_set_parent, - .parent = &ref_cpu_clk, -}; - -static struct clk hbus_clk = { - .get_rate = hbus_clk_get_rate, - .parent = &cpu_clk, -}; - -static struct clk xbus_clk = { - .get_rate = xbus_clk_get_rate, - .set_rate = xbus_clk_set_rate, - .parent = &ref_xtal_clk, -}; - -static struct clk lradc_clk = { - .get_rate = lradc_clk_get_rate, - .parent = &clk32k_clk, -}; - -static struct clk rtc_clk = { - .get_rate = rtc_clk_get_rate, - .parent = &ref_xtal_clk, -}; - -/* usb_clk gate is controlled in DIGCTRL other than CLKCTRL */ -static struct clk usb0_clk = { - .enable_reg = DIGCTRL_BASE_ADDR, - .enable_shift = 2, - .enable = _raw_clk_enable, - .disable = _raw_clk_disable, - .parent = &pll0_clk, -}; - -static struct clk usb1_clk = { - .enable_reg = DIGCTRL_BASE_ADDR, - .enable_shift = 16, - .enable = _raw_clk_enable, - .disable = _raw_clk_disable, - .parent = &pll1_clk, -}; - -#define _DEFINE_CLOCK(name, er, es, p) \ - static struct clk name = { \ - .enable_reg = CLKCTRL_BASE_ADDR + HW_CLKCTRL_##er, \ - .enable_shift = BP_CLKCTRL_##er##_##es, \ - .get_rate = name##_get_rate, \ - .set_rate = name##_set_rate, \ - .set_parent = name##_set_parent, \ - .enable = _raw_clk_enable, \ - .disable = _raw_clk_disable, \ - .parent = p, \ - } - -_DEFINE_CLOCK(emi_clk, EMI, CLKGATE, &ref_xtal_clk); -_DEFINE_CLOCK(ssp0_clk, SSP0, CLKGATE, &ref_xtal_clk); -_DEFINE_CLOCK(ssp1_clk, SSP1, CLKGATE, &ref_xtal_clk); -_DEFINE_CLOCK(ssp2_clk, SSP2, CLKGATE, &ref_xtal_clk); -_DEFINE_CLOCK(ssp3_clk, SSP3, CLKGATE, &ref_xtal_clk); -_DEFINE_CLOCK(lcdif_clk, DIS_LCDIF, CLKGATE, &ref_xtal_clk); -_DEFINE_CLOCK(gpmi_clk, GPMI, CLKGATE, &ref_xtal_clk); -_DEFINE_CLOCK(saif0_clk, SAIF0, CLKGATE, &ref_xtal_clk); -_DEFINE_CLOCK(saif1_clk, SAIF1, CLKGATE, &ref_xtal_clk); -_DEFINE_CLOCK(can0_clk, FLEXCAN, STOP_CAN0, &ref_xtal_clk); -_DEFINE_CLOCK(can1_clk, FLEXCAN, STOP_CAN1, &ref_xtal_clk); -_DEFINE_CLOCK(pwm_clk, XTAL, PWM_CLK24M_GATE, &ref_xtal_clk); -_DEFINE_CLOCK(uart_clk, XTAL, UART_CLK_GATE, &ref_xtal_clk); -_DEFINE_CLOCK(clk32k_clk, XTAL, TIMROT_CLK32K_GATE, &ref_xtal_clk); -_DEFINE_CLOCK(spdif_clk, SPDIF, CLKGATE, &pll0_clk); -_DEFINE_CLOCK(fec_clk, ENET, DISABLE, &hbus_clk); - -#define _REGISTER_CLOCK(d, n, c) \ - { \ - .dev_id = d, \ - .con_id = n, \ - .clk = &c, \ - }, - -static struct clk_lookup lookups[] = { - /* for amba bus driver */ - _REGISTER_CLOCK("duart", "apb_pclk", xbus_clk) - /* for amba-pl011 driver */ - _REGISTER_CLOCK("duart", NULL, uart_clk) - _REGISTER_CLOCK("imx28-fec.0", NULL, fec_clk) - _REGISTER_CLOCK("imx28-fec.1", NULL, fec_clk) - _REGISTER_CLOCK("imx28-gpmi-nand", NULL, gpmi_clk) - _REGISTER_CLOCK("mxs-auart.0", NULL, uart_clk) - _REGISTER_CLOCK("mxs-auart.1", NULL, uart_clk) - _REGISTER_CLOCK("mxs-auart.2", NULL, uart_clk) - _REGISTER_CLOCK("mxs-auart.3", NULL, uart_clk) - _REGISTER_CLOCK("mxs-auart.4", NULL, uart_clk) - _REGISTER_CLOCK("rtc", NULL, rtc_clk) - _REGISTER_CLOCK("enet_out", NULL, pll2_clk) - _REGISTER_CLOCK("mxs-dma-apbh", NULL, hbus_clk) - _REGISTER_CLOCK("mxs-dma-apbx", NULL, xbus_clk) - _REGISTER_CLOCK("mxs-mmc.0", NULL, ssp0_clk) - _REGISTER_CLOCK("mxs-mmc.1", NULL, ssp1_clk) - _REGISTER_CLOCK("mxs-mmc.2", NULL, ssp2_clk) - _REGISTER_CLOCK("mxs-mmc.3", NULL, ssp3_clk) - _REGISTER_CLOCK("flexcan.0", NULL, can0_clk) - _REGISTER_CLOCK("flexcan.1", NULL, can1_clk) - _REGISTER_CLOCK(NULL, "usb0", usb0_clk) - _REGISTER_CLOCK(NULL, "usb1", usb1_clk) - _REGISTER_CLOCK("mxs-pwm.0", NULL, pwm_clk) - _REGISTER_CLOCK("mxs-pwm.1", NULL, pwm_clk) - _REGISTER_CLOCK("mxs-pwm.2", NULL, pwm_clk) - _REGISTER_CLOCK("mxs-pwm.3", NULL, pwm_clk) - _REGISTER_CLOCK("mxs-pwm.4", NULL, pwm_clk) - _REGISTER_CLOCK("mxs-pwm.5", NULL, pwm_clk) - _REGISTER_CLOCK("mxs-pwm.6", NULL, pwm_clk) - _REGISTER_CLOCK("mxs-pwm.7", NULL, pwm_clk) - _REGISTER_CLOCK(NULL, "lradc", lradc_clk) - _REGISTER_CLOCK(NULL, "spdif", spdif_clk) - _REGISTER_CLOCK("imx28-fb", NULL, lcdif_clk) - _REGISTER_CLOCK("mxs-saif.0", NULL, saif0_clk) - _REGISTER_CLOCK("mxs-saif.1", NULL, saif1_clk) -}; - -static int clk_misc_init(void) -{ - u32 reg; - int ret; - - /* Fix up parent per register setting */ - reg = __raw_readl(CLKCTRL_BASE_ADDR + HW_CLKCTRL_CLKSEQ); - cpu_clk.parent = (reg & BM_CLKCTRL_CLKSEQ_BYPASS_CPU) ? - &ref_xtal_clk : &ref_cpu_clk; - emi_clk.parent = (reg & BM_CLKCTRL_CLKSEQ_BYPASS_EMI) ? - &ref_xtal_clk : &ref_emi_clk; - ssp0_clk.parent = (reg & BM_CLKCTRL_CLKSEQ_BYPASS_SSP0) ? - &ref_xtal_clk : &ref_io0_clk; - ssp1_clk.parent = (reg & BM_CLKCTRL_CLKSEQ_BYPASS_SSP1) ? - &ref_xtal_clk : &ref_io0_clk; - ssp2_clk.parent = (reg & BM_CLKCTRL_CLKSEQ_BYPASS_SSP2) ? - &ref_xtal_clk : &ref_io1_clk; - ssp3_clk.parent = (reg & BM_CLKCTRL_CLKSEQ_BYPASS_SSP3) ? - &ref_xtal_clk : &ref_io1_clk; - lcdif_clk.parent = (reg & BM_CLKCTRL_CLKSEQ_BYPASS_DIS_LCDIF) ? - &ref_xtal_clk : &ref_pix_clk; - gpmi_clk.parent = (reg & BM_CLKCTRL_CLKSEQ_BYPASS_GPMI) ? - &ref_xtal_clk : &ref_gpmi_clk; - saif0_clk.parent = (reg & BM_CLKCTRL_CLKSEQ_BYPASS_SAIF0) ? - &ref_xtal_clk : &pll0_clk; - saif1_clk.parent = (reg & BM_CLKCTRL_CLKSEQ_BYPASS_SAIF1) ? - &ref_xtal_clk : &pll0_clk; - - /* Use int div over frac when both are available */ - __raw_writel(BM_CLKCTRL_CPU_DIV_XTAL_FRAC_EN, - CLKCTRL_BASE_ADDR + HW_CLKCTRL_CPU_CLR); - __raw_writel(BM_CLKCTRL_CPU_DIV_CPU_FRAC_EN, - CLKCTRL_BASE_ADDR + HW_CLKCTRL_CPU_CLR); - __raw_writel(BM_CLKCTRL_HBUS_DIV_FRAC_EN, - CLKCTRL_BASE_ADDR + HW_CLKCTRL_HBUS_CLR); - - reg = __raw_readl(CLKCTRL_BASE_ADDR + HW_CLKCTRL_XBUS); - reg &= ~BM_CLKCTRL_XBUS_DIV_FRAC_EN; - __raw_writel(reg, CLKCTRL_BASE_ADDR + HW_CLKCTRL_XBUS); - - reg = __raw_readl(CLKCTRL_BASE_ADDR + HW_CLKCTRL_SSP0); - reg &= ~BM_CLKCTRL_SSP0_DIV_FRAC_EN; - __raw_writel(reg, CLKCTRL_BASE_ADDR + HW_CLKCTRL_SSP0); - - reg = __raw_readl(CLKCTRL_BASE_ADDR + HW_CLKCTRL_SSP1); - reg &= ~BM_CLKCTRL_SSP1_DIV_FRAC_EN; - __raw_writel(reg, CLKCTRL_BASE_ADDR + HW_CLKCTRL_SSP1); - - reg = __raw_readl(CLKCTRL_BASE_ADDR + HW_CLKCTRL_SSP2); - reg &= ~BM_CLKCTRL_SSP2_DIV_FRAC_EN; - __raw_writel(reg, CLKCTRL_BASE_ADDR + HW_CLKCTRL_SSP2); - - reg = __raw_readl(CLKCTRL_BASE_ADDR + HW_CLKCTRL_SSP3); - reg &= ~BM_CLKCTRL_SSP3_DIV_FRAC_EN; - __raw_writel(reg, CLKCTRL_BASE_ADDR + HW_CLKCTRL_SSP3); - - reg = __raw_readl(CLKCTRL_BASE_ADDR + HW_CLKCTRL_GPMI); - reg &= ~BM_CLKCTRL_GPMI_DIV_FRAC_EN; - __raw_writel(reg, CLKCTRL_BASE_ADDR + HW_CLKCTRL_GPMI); - - reg = __raw_readl(CLKCTRL_BASE_ADDR + HW_CLKCTRL_DIS_LCDIF); - reg &= ~BM_CLKCTRL_DIS_LCDIF_DIV_FRAC_EN; - __raw_writel(reg, CLKCTRL_BASE_ADDR + HW_CLKCTRL_DIS_LCDIF); - - /* SAIF has to use frac div for functional operation */ - reg = __raw_readl(CLKCTRL_BASE_ADDR + HW_CLKCTRL_SAIF0); - reg |= BM_CLKCTRL_SAIF0_DIV_FRAC_EN; - __raw_writel(reg, CLKCTRL_BASE_ADDR + HW_CLKCTRL_SAIF0); - - reg = __raw_readl(CLKCTRL_BASE_ADDR + HW_CLKCTRL_SAIF1); - reg |= BM_CLKCTRL_SAIF1_DIV_FRAC_EN; - __raw_writel(reg, CLKCTRL_BASE_ADDR + HW_CLKCTRL_SAIF1); - - /* - * Set safe hbus clock divider. A divider of 3 ensure that - * the Vddd voltage required for the cpu clock is sufficiently - * high for the hbus clock. - */ - reg = __raw_readl(CLKCTRL_BASE_ADDR + HW_CLKCTRL_HBUS); - reg &= BM_CLKCTRL_HBUS_DIV; - reg |= 3 << BP_CLKCTRL_HBUS_DIV; - __raw_writel(reg, CLKCTRL_BASE_ADDR + HW_CLKCTRL_HBUS); - - ret = mxs_clkctrl_timeout(HW_CLKCTRL_HBUS, BM_CLKCTRL_HBUS_ASM_BUSY); - - /* Gate off cpu clock in WFI for power saving */ - __raw_writel(BM_CLKCTRL_CPU_INTERRUPT_WAIT, - CLKCTRL_BASE_ADDR + HW_CLKCTRL_CPU_SET); - - /* - * Extra fec clock setting - * The DENX M28 uses an external clock source - * and the clock output must not be enabled - */ - if (!machine_is_m28evk()) { - reg = __raw_readl(CLKCTRL_BASE_ADDR + HW_CLKCTRL_ENET); - reg &= ~BM_CLKCTRL_ENET_SLEEP; - reg |= BM_CLKCTRL_ENET_CLK_OUT_EN; - __raw_writel(reg, CLKCTRL_BASE_ADDR + HW_CLKCTRL_ENET); - } - - /* - * 480 MHz seems too high to be ssp clock source directly, - * so set frac0 to get a 288 MHz ref_io0. - */ - reg = __raw_readl(CLKCTRL_BASE_ADDR + HW_CLKCTRL_FRAC0); - reg &= ~BM_CLKCTRL_FRAC0_IO0FRAC; - reg |= 30 << BP_CLKCTRL_FRAC0_IO0FRAC; - __raw_writel(reg, CLKCTRL_BASE_ADDR + HW_CLKCTRL_FRAC0); - - return ret; -} - -int __init mx28_clocks_init(void) -{ - clk_misc_init(); - - /* - * source ssp clock from ref_io0 than ref_xtal, - * as ref_xtal only provides 24 MHz as maximum. - */ - clk_set_parent(&ssp0_clk, &ref_io0_clk); - clk_set_parent(&ssp1_clk, &ref_io0_clk); - clk_set_parent(&ssp2_clk, &ref_io1_clk); - clk_set_parent(&ssp3_clk, &ref_io1_clk); - - clk_prepare_enable(&cpu_clk); - clk_prepare_enable(&hbus_clk); - clk_prepare_enable(&xbus_clk); - clk_prepare_enable(&emi_clk); - clk_prepare_enable(&uart_clk); - - clk_set_parent(&lcdif_clk, &ref_pix_clk); - clk_set_parent(&saif0_clk, &pll0_clk); - clk_set_parent(&saif1_clk, &pll0_clk); - - /* - * Set an initial clock rate for the saif internal logic to work - * properly. This is important when working in EXTMASTER mode that - * uses the other saif's BITCLK&LRCLK but it still needs a basic - * clock which should be fast enough for the internal logic. - */ - clk_set_rate(&saif0_clk, 24000000); - clk_set_rate(&saif1_clk, 24000000); - - clkdev_add_table(lookups, ARRAY_SIZE(lookups)); - - mxs_timer_init(&clk32k_clk, MX28_INT_TIMER0); - - return 0; -} diff --git a/arch/arm/mach-mxs/clock.c b/arch/arm/mach-mxs/clock.c deleted file mode 100644 index 97a6f4a..0000000 --- a/arch/arm/mach-mxs/clock.c +++ /dev/null @@ -1,211 +0,0 @@ -/* - * Based on arch/arm/plat-omap/clock.c - * - * Copyright (C) 2004 - 2005 Nokia corporation - * Written by Tuukka Tikkanen - * Modified for omap shared clock framework by Tony Lindgren - * Copyright 2007 Freescale Semiconductor, Inc. All Rights Reserved. - * Copyright 2008 Juergen Beisert, kernel@pengutronix.de - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, - * MA 02110-1301, USA. - */ - -/* #define DEBUG */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -static LIST_HEAD(clocks); -static DEFINE_MUTEX(clocks_mutex); - -/*------------------------------------------------------------------------- - * Standard clock functions defined in include/linux/clk.h - *-------------------------------------------------------------------------*/ - -static void __clk_disable(struct clk *clk) -{ - if (clk == NULL || IS_ERR(clk)) - return; - WARN_ON(!clk->usecount); - - if (!(--clk->usecount)) { - if (clk->disable) - clk->disable(clk); - __clk_disable(clk->parent); - } -} - -static int __clk_enable(struct clk *clk) -{ - if (clk == NULL || IS_ERR(clk)) - return -EINVAL; - - if (clk->usecount++ == 0) { - __clk_enable(clk->parent); - - if (clk->enable) - clk->enable(clk); - } - return 0; -} - -/* - * The clk_enable/clk_disable could be called by drivers in atomic context, - * so they should not really hold mutex. Instead, clk_prepare/clk_unprepare - * can hold a mutex, as the pair will only be called in non-atomic context. - * Before migrating to common clk framework, we can have __clk_enable and - * __clk_disable called in clk_prepare/clk_unprepare with mutex held and - * leave clk_enable/clk_disable as the dummy functions. - */ -int clk_prepare(struct clk *clk) -{ - int ret = 0; - - if (clk == NULL || IS_ERR(clk)) - return -EINVAL; - - mutex_lock(&clocks_mutex); - ret = __clk_enable(clk); - mutex_unlock(&clocks_mutex); - - return ret; -} -EXPORT_SYMBOL(clk_prepare); - -void clk_unprepare(struct clk *clk) -{ - if (clk == NULL || IS_ERR(clk)) - return; - - mutex_lock(&clocks_mutex); - __clk_disable(clk); - mutex_unlock(&clocks_mutex); -} -EXPORT_SYMBOL(clk_unprepare); - -int clk_enable(struct clk *clk) -{ - return 0; -} -EXPORT_SYMBOL(clk_enable); - -void clk_disable(struct clk *clk) -{ - /* nothing to do */ -} -EXPORT_SYMBOL(clk_disable); - -/* Retrieve the *current* clock rate. If the clock itself - * does not provide a special calculation routine, ask - * its parent and so on, until one is able to return - * a valid clock rate - */ -unsigned long clk_get_rate(struct clk *clk) -{ - if (clk == NULL || IS_ERR(clk)) - return 0UL; - - if (clk->get_rate) - return clk->get_rate(clk); - - return clk_get_rate(clk->parent); -} -EXPORT_SYMBOL(clk_get_rate); - -/* Round the requested clock rate to the nearest supported - * rate that is less than or equal to the requested rate. - * This is dependent on the clock's current parent. - */ -long clk_round_rate(struct clk *clk, unsigned long rate) -{ - if (clk == NULL || IS_ERR(clk) || !clk->round_rate) - return 0; - - return clk->round_rate(clk, rate); -} -EXPORT_SYMBOL(clk_round_rate); - -/* Set the clock to the requested clock rate. The rate must - * match a supported rate exactly based on what clk_round_rate returns - */ -int clk_set_rate(struct clk *clk, unsigned long rate) -{ - int ret = -EINVAL; - - if (clk == NULL || IS_ERR(clk) || clk->set_rate == NULL || rate == 0) - return ret; - - mutex_lock(&clocks_mutex); - ret = clk->set_rate(clk, rate); - mutex_unlock(&clocks_mutex); - - return ret; -} -EXPORT_SYMBOL(clk_set_rate); - -/* Set the clock's parent to another clock source */ -int clk_set_parent(struct clk *clk, struct clk *parent) -{ - int ret = -EINVAL; - struct clk *old; - - if (clk == NULL || IS_ERR(clk) || parent == NULL || - IS_ERR(parent) || clk->set_parent == NULL) - return ret; - - if (clk->usecount) - clk_prepare_enable(parent); - - mutex_lock(&clocks_mutex); - ret = clk->set_parent(clk, parent); - if (ret == 0) { - old = clk->parent; - clk->parent = parent; - } else { - old = parent; - } - mutex_unlock(&clocks_mutex); - - if (clk->usecount) - clk_disable(old); - - return ret; -} -EXPORT_SYMBOL(clk_set_parent); - -/* Retrieve the clock's parent clock source */ -struct clk *clk_get_parent(struct clk *clk) -{ - struct clk *ret = NULL; - - if (clk == NULL || IS_ERR(clk)) - return ret; - - return clk->parent; -} -EXPORT_SYMBOL(clk_get_parent); diff --git a/arch/arm/mach-mxs/include/mach/clock.h b/arch/arm/mach-mxs/include/mach/clock.h deleted file mode 100644 index 592c9ab..0000000 --- a/arch/arm/mach-mxs/include/mach/clock.h +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Copyright 2005-2007 Freescale Semiconductor, Inc. All Rights Reserved. - * Copyright 2008 Juergen Beisert, kernel@pengutronix.de - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, - * MA 02110-1301, USA. - */ - -#ifndef __MACH_MXS_CLOCK_H__ -#define __MACH_MXS_CLOCK_H__ - -#ifndef __ASSEMBLY__ -#include - -struct module; - -struct clk { - int id; - /* Source clock this clk depends on */ - struct clk *parent; - /* Reference count of clock enable/disable */ - __s8 usecount; - /* Register bit position for clock's enable/disable control. */ - u8 enable_shift; - /* Register address for clock's enable/disable control. */ - void __iomem *enable_reg; - u32 flags; - /* get the current clock rate (always a fresh value) */ - unsigned long (*get_rate) (struct clk *); - /* Function ptr to set the clock to a new rate. The rate must match a - supported rate returned from round_rate. Leave blank if clock is not - programmable */ - int (*set_rate) (struct clk *, unsigned long); - /* Function ptr to round the requested clock rate to the nearest - supported rate that is less than or equal to the requested rate. */ - unsigned long (*round_rate) (struct clk *, unsigned long); - /* Function ptr to enable the clock. Leave blank if clock can not - be gated. */ - int (*enable) (struct clk *); - /* Function ptr to disable the clock. Leave blank if clock can not - be gated. */ - void (*disable) (struct clk *); - /* Function ptr to set the parent clock of the clock. */ - int (*set_parent) (struct clk *, struct clk *); -}; - -int clk_register(struct clk *clk); -void clk_unregister(struct clk *clk); - -#endif /* __ASSEMBLY__ */ -#endif /* __MACH_MXS_CLOCK_H__ */ diff --git a/arch/arm/mach-mxs/include/mach/common.h b/arch/arm/mach-mxs/include/mach/common.h index c50c3ea..e1237ab 100644 --- a/arch/arm/mach-mxs/include/mach/common.h +++ b/arch/arm/mach-mxs/include/mach/common.h @@ -31,6 +31,4 @@ extern void mx28_init_irq(void); extern void icoll_init_irq(void); -extern int mxs_clkctrl_timeout(unsigned int reg_offset, unsigned int mask); - #endif /* __MACH_MXS_COMMON_H__ */ diff --git a/arch/arm/mach-mxs/regs-clkctrl-mx23.h b/arch/arm/mach-mxs/regs-clkctrl-mx23.h deleted file mode 100644 index 0ea5c9d..0000000 --- a/arch/arm/mach-mxs/regs-clkctrl-mx23.h +++ /dev/null @@ -1,331 +0,0 @@ -/* - * Freescale CLKCTRL Register Definitions - * - * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved. - * Copyright 2008-2010 Freescale Semiconductor, Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * This file is created by xml file. Don't Edit it. - * - * Xml Revision: 1.48 - * Template revision: 26195 - */ - -#ifndef __REGS_CLKCTRL_MX23_H__ -#define __REGS_CLKCTRL_MX23_H__ - - -#define HW_CLKCTRL_PLLCTRL0 (0x00000000) -#define HW_CLKCTRL_PLLCTRL0_SET (0x00000004) -#define HW_CLKCTRL_PLLCTRL0_CLR (0x00000008) -#define HW_CLKCTRL_PLLCTRL0_TOG (0x0000000c) - -#define BP_CLKCTRL_PLLCTRL0_LFR_SEL 28 -#define BM_CLKCTRL_PLLCTRL0_LFR_SEL 0x30000000 -#define BF_CLKCTRL_PLLCTRL0_LFR_SEL(v) \ - (((v) << 28) & BM_CLKCTRL_PLLCTRL0_LFR_SEL) -#define BV_CLKCTRL_PLLCTRL0_LFR_SEL__DEFAULT 0x0 -#define BV_CLKCTRL_PLLCTRL0_LFR_SEL__TIMES_2 0x1 -#define BV_CLKCTRL_PLLCTRL0_LFR_SEL__TIMES_05 0x2 -#define BV_CLKCTRL_PLLCTRL0_LFR_SEL__UNDEFINED 0x3 -#define BP_CLKCTRL_PLLCTRL0_CP_SEL 24 -#define BM_CLKCTRL_PLLCTRL0_CP_SEL 0x03000000 -#define BF_CLKCTRL_PLLCTRL0_CP_SEL(v) \ - (((v) << 24) & BM_CLKCTRL_PLLCTRL0_CP_SEL) -#define BV_CLKCTRL_PLLCTRL0_CP_SEL__DEFAULT 0x0 -#define BV_CLKCTRL_PLLCTRL0_CP_SEL__TIMES_2 0x1 -#define BV_CLKCTRL_PLLCTRL0_CP_SEL__TIMES_05 0x2 -#define BV_CLKCTRL_PLLCTRL0_CP_SEL__UNDEFINED 0x3 -#define BP_CLKCTRL_PLLCTRL0_DIV_SEL 20 -#define BM_CLKCTRL_PLLCTRL0_DIV_SEL 0x00300000 -#define BF_CLKCTRL_PLLCTRL0_DIV_SEL(v) \ - (((v) << 20) & BM_CLKCTRL_PLLCTRL0_DIV_SEL) -#define BV_CLKCTRL_PLLCTRL0_DIV_SEL__DEFAULT 0x0 -#define BV_CLKCTRL_PLLCTRL0_DIV_SEL__LOWER 0x1 -#define BV_CLKCTRL_PLLCTRL0_DIV_SEL__LOWEST 0x2 -#define BV_CLKCTRL_PLLCTRL0_DIV_SEL__UNDEFINED 0x3 -#define BM_CLKCTRL_PLLCTRL0_EN_USB_CLKS 0x00040000 -#define BM_CLKCTRL_PLLCTRL0_POWER 0x00010000 - -#define HW_CLKCTRL_PLLCTRL1 (0x00000010) - -#define BM_CLKCTRL_PLLCTRL1_LOCK 0x80000000 -#define BM_CLKCTRL_PLLCTRL1_FORCE_LOCK 0x40000000 -#define BP_CLKCTRL_PLLCTRL1_LOCK_COUNT 0 -#define BM_CLKCTRL_PLLCTRL1_LOCK_COUNT 0x0000FFFF -#define BF_CLKCTRL_PLLCTRL1_LOCK_COUNT(v) \ - (((v) << 0) & BM_CLKCTRL_PLLCTRL1_LOCK_COUNT) - -#define HW_CLKCTRL_CPU (0x00000020) -#define HW_CLKCTRL_CPU_SET (0x00000024) -#define HW_CLKCTRL_CPU_CLR (0x00000028) -#define HW_CLKCTRL_CPU_TOG (0x0000002c) - -#define BM_CLKCTRL_CPU_BUSY_REF_XTAL 0x20000000 -#define BM_CLKCTRL_CPU_BUSY_REF_CPU 0x10000000 -#define BM_CLKCTRL_CPU_DIV_XTAL_FRAC_EN 0x04000000 -#define BP_CLKCTRL_CPU_DIV_XTAL 16 -#define BM_CLKCTRL_CPU_DIV_XTAL 0x03FF0000 -#define BF_CLKCTRL_CPU_DIV_XTAL(v) \ - (((v) << 16) & BM_CLKCTRL_CPU_DIV_XTAL) -#define BM_CLKCTRL_CPU_INTERRUPT_WAIT 0x00001000 -#define BM_CLKCTRL_CPU_DIV_CPU_FRAC_EN 0x00000400 -#define BP_CLKCTRL_CPU_DIV_CPU 0 -#define BM_CLKCTRL_CPU_DIV_CPU 0x0000003F -#define BF_CLKCTRL_CPU_DIV_CPU(v) \ - (((v) << 0) & BM_CLKCTRL_CPU_DIV_CPU) - -#define HW_CLKCTRL_HBUS (0x00000030) -#define HW_CLKCTRL_HBUS_SET (0x00000034) -#define HW_CLKCTRL_HBUS_CLR (0x00000038) -#define HW_CLKCTRL_HBUS_TOG (0x0000003c) - -#define BM_CLKCTRL_HBUS_BUSY 0x20000000 -#define BM_CLKCTRL_HBUS_DCP_AS_ENABLE 0x10000000 -#define BM_CLKCTRL_HBUS_PXP_AS_ENABLE 0x08000000 -#define BM_CLKCTRL_HBUS_APBHDMA_AS_ENABLE 0x04000000 -#define BM_CLKCTRL_HBUS_APBXDMA_AS_ENABLE 0x02000000 -#define BM_CLKCTRL_HBUS_TRAFFIC_JAM_AS_ENABLE 0x01000000 -#define BM_CLKCTRL_HBUS_TRAFFIC_AS_ENABLE 0x00800000 -#define BM_CLKCTRL_HBUS_CPU_DATA_AS_ENABLE 0x00400000 -#define BM_CLKCTRL_HBUS_CPU_INSTR_AS_ENABLE 0x00200000 -#define BM_CLKCTRL_HBUS_AUTO_SLOW_MODE 0x00100000 -#define BP_CLKCTRL_HBUS_SLOW_DIV 16 -#define BM_CLKCTRL_HBUS_SLOW_DIV 0x00070000 -#define BF_CLKCTRL_HBUS_SLOW_DIV(v) \ - (((v) << 16) & BM_CLKCTRL_HBUS_SLOW_DIV) -#define BV_CLKCTRL_HBUS_SLOW_DIV__BY1 0x0 -#define BV_CLKCTRL_HBUS_SLOW_DIV__BY2 0x1 -#define BV_CLKCTRL_HBUS_SLOW_DIV__BY4 0x2 -#define BV_CLKCTRL_HBUS_SLOW_DIV__BY8 0x3 -#define BV_CLKCTRL_HBUS_SLOW_DIV__BY16 0x4 -#define BV_CLKCTRL_HBUS_SLOW_DIV__BY32 0x5 -#define BM_CLKCTRL_HBUS_DIV_FRAC_EN 0x00000020 -#define BP_CLKCTRL_HBUS_DIV 0 -#define BM_CLKCTRL_HBUS_DIV 0x0000001F -#define BF_CLKCTRL_HBUS_DIV(v) \ - (((v) << 0) & BM_CLKCTRL_HBUS_DIV) - -#define HW_CLKCTRL_XBUS (0x00000040) - -#define BM_CLKCTRL_XBUS_BUSY 0x80000000 -#define BM_CLKCTRL_XBUS_DIV_FRAC_EN 0x00000400 -#define BP_CLKCTRL_XBUS_DIV 0 -#define BM_CLKCTRL_XBUS_DIV 0x000003FF -#define BF_CLKCTRL_XBUS_DIV(v) \ - (((v) << 0) & BM_CLKCTRL_XBUS_DIV) - -#define HW_CLKCTRL_XTAL (0x00000050) -#define HW_CLKCTRL_XTAL_SET (0x00000054) -#define HW_CLKCTRL_XTAL_CLR (0x00000058) -#define HW_CLKCTRL_XTAL_TOG (0x0000005c) - -#define BP_CLKCTRL_XTAL_UART_CLK_GATE 31 -#define BM_CLKCTRL_XTAL_UART_CLK_GATE 0x80000000 -#define BP_CLKCTRL_XTAL_FILT_CLK24M_GATE 30 -#define BM_CLKCTRL_XTAL_FILT_CLK24M_GATE 0x40000000 -#define BP_CLKCTRL_XTAL_PWM_CLK24M_GATE 29 -#define BM_CLKCTRL_XTAL_PWM_CLK24M_GATE 0x20000000 -#define BM_CLKCTRL_XTAL_DRI_CLK24M_GATE 0x10000000 -#define BM_CLKCTRL_XTAL_DIGCTRL_CLK1M_GATE 0x08000000 -#define BP_CLKCTRL_XTAL_TIMROT_CLK32K_GATE 26 -#define BM_CLKCTRL_XTAL_TIMROT_CLK32K_GATE 0x04000000 -#define BP_CLKCTRL_XTAL_DIV_UART 0 -#define BM_CLKCTRL_XTAL_DIV_UART 0x00000003 -#define BF_CLKCTRL_XTAL_DIV_UART(v) \ - (((v) << 0) & BM_CLKCTRL_XTAL_DIV_UART) - -#define HW_CLKCTRL_PIX (0x00000060) - -#define BP_CLKCTRL_PIX_CLKGATE 31 -#define BM_CLKCTRL_PIX_CLKGATE 0x80000000 -#define BM_CLKCTRL_PIX_BUSY 0x20000000 -#define BM_CLKCTRL_PIX_DIV_FRAC_EN 0x00001000 -#define BP_CLKCTRL_PIX_DIV 0 -#define BM_CLKCTRL_PIX_DIV 0x00000FFF -#define BF_CLKCTRL_PIX_DIV(v) \ - (((v) << 0) & BM_CLKCTRL_PIX_DIV) - -#define HW_CLKCTRL_SSP (0x00000070) - -#define BP_CLKCTRL_SSP_CLKGATE 31 -#define BM_CLKCTRL_SSP_CLKGATE 0x80000000 -#define BM_CLKCTRL_SSP_BUSY 0x20000000 -#define BM_CLKCTRL_SSP_DIV_FRAC_EN 0x00000200 -#define BP_CLKCTRL_SSP_DIV 0 -#define BM_CLKCTRL_SSP_DIV 0x000001FF -#define BF_CLKCTRL_SSP_DIV(v) \ - (((v) << 0) & BM_CLKCTRL_SSP_DIV) - -#define HW_CLKCTRL_GPMI (0x00000080) - -#define BP_CLKCTRL_GPMI_CLKGATE 31 -#define BM_CLKCTRL_GPMI_CLKGATE 0x80000000 -#define BM_CLKCTRL_GPMI_BUSY 0x20000000 -#define BM_CLKCTRL_GPMI_DIV_FRAC_EN 0x00000400 -#define BP_CLKCTRL_GPMI_DIV 0 -#define BM_CLKCTRL_GPMI_DIV 0x000003FF -#define BF_CLKCTRL_GPMI_DIV(v) \ - (((v) << 0) & BM_CLKCTRL_GPMI_DIV) - -#define HW_CLKCTRL_SPDIF (0x00000090) - -#define BM_CLKCTRL_SPDIF_CLKGATE 0x80000000 - -#define HW_CLKCTRL_EMI (0x000000a0) - -#define BP_CLKCTRL_EMI_CLKGATE 31 -#define BM_CLKCTRL_EMI_CLKGATE 0x80000000 -#define BM_CLKCTRL_EMI_SYNC_MODE_EN 0x40000000 -#define BM_CLKCTRL_EMI_BUSY_REF_XTAL 0x20000000 -#define BM_CLKCTRL_EMI_BUSY_REF_EMI 0x10000000 -#define BM_CLKCTRL_EMI_BUSY_REF_CPU 0x08000000 -#define BM_CLKCTRL_EMI_BUSY_SYNC_MODE 0x04000000 -#define BM_CLKCTRL_EMI_BUSY_DCC_RESYNC 0x00020000 -#define BM_CLKCTRL_EMI_DCC_RESYNC_ENABLE 0x00010000 -#define BP_CLKCTRL_EMI_DIV_XTAL 8 -#define BM_CLKCTRL_EMI_DIV_XTAL 0x00000F00 -#define BF_CLKCTRL_EMI_DIV_XTAL(v) \ - (((v) << 8) & BM_CLKCTRL_EMI_DIV_XTAL) -#define BP_CLKCTRL_EMI_DIV_EMI 0 -#define BM_CLKCTRL_EMI_DIV_EMI 0x0000003F -#define BF_CLKCTRL_EMI_DIV_EMI(v) \ - (((v) << 0) & BM_CLKCTRL_EMI_DIV_EMI) - -#define HW_CLKCTRL_IR (0x000000b0) - -#define BM_CLKCTRL_IR_CLKGATE 0x80000000 -#define BM_CLKCTRL_IR_AUTO_DIV 0x20000000 -#define BM_CLKCTRL_IR_IR_BUSY 0x10000000 -#define BM_CLKCTRL_IR_IROV_BUSY 0x08000000 -#define BP_CLKCTRL_IR_IROV_DIV 16 -#define BM_CLKCTRL_IR_IROV_DIV 0x01FF0000 -#define BF_CLKCTRL_IR_IROV_DIV(v) \ - (((v) << 16) & BM_CLKCTRL_IR_IROV_DIV) -#define BP_CLKCTRL_IR_IR_DIV 0 -#define BM_CLKCTRL_IR_IR_DIV 0x000003FF -#define BF_CLKCTRL_IR_IR_DIV(v) \ - (((v) << 0) & BM_CLKCTRL_IR_IR_DIV) - -#define HW_CLKCTRL_SAIF (0x000000c0) - -#define BM_CLKCTRL_SAIF_CLKGATE 0x80000000 -#define BM_CLKCTRL_SAIF_BUSY 0x20000000 -#define BM_CLKCTRL_SAIF_DIV_FRAC_EN 0x00010000 -#define BP_CLKCTRL_SAIF_DIV 0 -#define BM_CLKCTRL_SAIF_DIV 0x0000FFFF -#define BF_CLKCTRL_SAIF_DIV(v) \ - (((v) << 0) & BM_CLKCTRL_SAIF_DIV) - -#define HW_CLKCTRL_TV (0x000000d0) - -#define BM_CLKCTRL_TV_CLK_TV108M_GATE 0x80000000 -#define BM_CLKCTRL_TV_CLK_TV_GATE 0x40000000 - -#define HW_CLKCTRL_ETM (0x000000e0) - -#define BM_CLKCTRL_ETM_CLKGATE 0x80000000 -#define BM_CLKCTRL_ETM_BUSY 0x20000000 -#define BM_CLKCTRL_ETM_DIV_FRAC_EN 0x00000040 -#define BP_CLKCTRL_ETM_DIV 0 -#define BM_CLKCTRL_ETM_DIV 0x0000003F -#define BF_CLKCTRL_ETM_DIV(v) \ - (((v) << 0) & BM_CLKCTRL_ETM_DIV) - -#define HW_CLKCTRL_FRAC (0x000000f0) -#define HW_CLKCTRL_FRAC_SET (0x000000f4) -#define HW_CLKCTRL_FRAC_CLR (0x000000f8) -#define HW_CLKCTRL_FRAC_TOG (0x000000fc) - -#define BP_CLKCTRL_FRAC_CLKGATEIO 31 -#define BM_CLKCTRL_FRAC_CLKGATEIO 0x80000000 -#define BM_CLKCTRL_FRAC_IO_STABLE 0x40000000 -#define BP_CLKCTRL_FRAC_IOFRAC 24 -#define BM_CLKCTRL_FRAC_IOFRAC 0x3F000000 -#define BF_CLKCTRL_FRAC_IOFRAC(v) \ - (((v) << 24) & BM_CLKCTRL_FRAC_IOFRAC) -#define BP_CLKCTRL_FRAC_CLKGATEPIX 23 -#define BM_CLKCTRL_FRAC_CLKGATEPIX 0x00800000 -#define BM_CLKCTRL_FRAC_PIX_STABLE 0x00400000 -#define BP_CLKCTRL_FRAC_PIXFRAC 16 -#define BM_CLKCTRL_FRAC_PIXFRAC 0x003F0000 -#define BF_CLKCTRL_FRAC_PIXFRAC(v) \ - (((v) << 16) & BM_CLKCTRL_FRAC_PIXFRAC) -#define BP_CLKCTRL_FRAC_CLKGATEEMI 15 -#define BM_CLKCTRL_FRAC_CLKGATEEMI 0x00008000 -#define BM_CLKCTRL_FRAC_EMI_STABLE 0x00004000 -#define BP_CLKCTRL_FRAC_EMIFRAC 8 -#define BM_CLKCTRL_FRAC_EMIFRAC 0x00003F00 -#define BF_CLKCTRL_FRAC_EMIFRAC(v) \ - (((v) << 8) & BM_CLKCTRL_FRAC_EMIFRAC) -#define BP_CLKCTRL_FRAC_CLKGATECPU 7 -#define BM_CLKCTRL_FRAC_CLKGATECPU 0x00000080 -#define BM_CLKCTRL_FRAC_CPU_STABLE 0x00000040 -#define BP_CLKCTRL_FRAC_CPUFRAC 0 -#define BM_CLKCTRL_FRAC_CPUFRAC 0x0000003F -#define BF_CLKCTRL_FRAC_CPUFRAC(v) \ - (((v) << 0) & BM_CLKCTRL_FRAC_CPUFRAC) - -#define HW_CLKCTRL_FRAC1 (0x00000100) -#define HW_CLKCTRL_FRAC1_SET (0x00000104) -#define HW_CLKCTRL_FRAC1_CLR (0x00000108) -#define HW_CLKCTRL_FRAC1_TOG (0x0000010c) - -#define BM_CLKCTRL_FRAC1_CLKGATEVID 0x80000000 -#define BM_CLKCTRL_FRAC1_VID_STABLE 0x40000000 - -#define HW_CLKCTRL_CLKSEQ (0x00000110) -#define HW_CLKCTRL_CLKSEQ_SET (0x00000114) -#define HW_CLKCTRL_CLKSEQ_CLR (0x00000118) -#define HW_CLKCTRL_CLKSEQ_TOG (0x0000011c) - -#define BM_CLKCTRL_CLKSEQ_BYPASS_ETM 0x00000100 -#define BM_CLKCTRL_CLKSEQ_BYPASS_CPU 0x00000080 -#define BM_CLKCTRL_CLKSEQ_BYPASS_EMI 0x00000040 -#define BM_CLKCTRL_CLKSEQ_BYPASS_SSP 0x00000020 -#define BM_CLKCTRL_CLKSEQ_BYPASS_GPMI 0x00000010 -#define BM_CLKCTRL_CLKSEQ_BYPASS_IR 0x00000008 -#define BM_CLKCTRL_CLKSEQ_BYPASS_PIX 0x00000002 -#define BM_CLKCTRL_CLKSEQ_BYPASS_SAIF 0x00000001 - -#define HW_CLKCTRL_RESET (0x00000120) - -#define BM_CLKCTRL_RESET_CHIP 0x00000002 -#define BM_CLKCTRL_RESET_DIG 0x00000001 - -#define HW_CLKCTRL_STATUS (0x00000130) - -#define BP_CLKCTRL_STATUS_CPU_LIMIT 30 -#define BM_CLKCTRL_STATUS_CPU_LIMIT 0xC0000000 -#define BF_CLKCTRL_STATUS_CPU_LIMIT(v) \ - (((v) << 30) & BM_CLKCTRL_STATUS_CPU_LIMIT) - -#define HW_CLKCTRL_VERSION (0x00000140) - -#define BP_CLKCTRL_VERSION_MAJOR 24 -#define BM_CLKCTRL_VERSION_MAJOR 0xFF000000 -#define BF_CLKCTRL_VERSION_MAJOR(v) \ - (((v) << 24) & BM_CLKCTRL_VERSION_MAJOR) -#define BP_CLKCTRL_VERSION_MINOR 16 -#define BM_CLKCTRL_VERSION_MINOR 0x00FF0000 -#define BF_CLKCTRL_VERSION_MINOR(v) \ - (((v) << 16) & BM_CLKCTRL_VERSION_MINOR) -#define BP_CLKCTRL_VERSION_STEP 0 -#define BM_CLKCTRL_VERSION_STEP 0x0000FFFF -#define BF_CLKCTRL_VERSION_STEP(v) \ - (((v) << 0) & BM_CLKCTRL_VERSION_STEP) - -#endif /* __REGS_CLKCTRL_MX23_H__ */ diff --git a/arch/arm/mach-mxs/regs-clkctrl-mx28.h b/arch/arm/mach-mxs/regs-clkctrl-mx28.h deleted file mode 100644 index 7d1b061..0000000 --- a/arch/arm/mach-mxs/regs-clkctrl-mx28.h +++ /dev/null @@ -1,486 +0,0 @@ -/* - * Freescale CLKCTRL Register Definitions - * - * Copyright 2009-2010 Freescale Semiconductor, Inc. All Rights Reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * This file is created by xml file. Don't Edit it. - * - * Xml Revision: 1.48 - * Template revision: 26195 - */ - -#ifndef __REGS_CLKCTRL_MX28_H__ -#define __REGS_CLKCTRL_MX28_H__ - -#define HW_CLKCTRL_PLL0CTRL0 (0x00000000) -#define HW_CLKCTRL_PLL0CTRL0_SET (0x00000004) -#define HW_CLKCTRL_PLL0CTRL0_CLR (0x00000008) -#define HW_CLKCTRL_PLL0CTRL0_TOG (0x0000000c) - -#define BP_CLKCTRL_PLL0CTRL0_LFR_SEL 28 -#define BM_CLKCTRL_PLL0CTRL0_LFR_SEL 0x30000000 -#define BF_CLKCTRL_PLL0CTRL0_LFR_SEL(v) \ - (((v) << 28) & BM_CLKCTRL_PLL0CTRL0_LFR_SEL) -#define BV_CLKCTRL_PLL0CTRL0_LFR_SEL__DEFAULT 0x0 -#define BV_CLKCTRL_PLL0CTRL0_LFR_SEL__TIMES_2 0x1 -#define BV_CLKCTRL_PLL0CTRL0_LFR_SEL__TIMES_05 0x2 -#define BV_CLKCTRL_PLL0CTRL0_LFR_SEL__UNDEFINED 0x3 -#define BP_CLKCTRL_PLL0CTRL0_CP_SEL 24 -#define BM_CLKCTRL_PLL0CTRL0_CP_SEL 0x03000000 -#define BF_CLKCTRL_PLL0CTRL0_CP_SEL(v) \ - (((v) << 24) & BM_CLKCTRL_PLL0CTRL0_CP_SEL) -#define BV_CLKCTRL_PLL0CTRL0_CP_SEL__DEFAULT 0x0 -#define BV_CLKCTRL_PLL0CTRL0_CP_SEL__TIMES_2 0x1 -#define BV_CLKCTRL_PLL0CTRL0_CP_SEL__TIMES_05 0x2 -#define BV_CLKCTRL_PLL0CTRL0_CP_SEL__UNDEFINED 0x3 -#define BP_CLKCTRL_PLL0CTRL0_DIV_SEL 20 -#define BM_CLKCTRL_PLL0CTRL0_DIV_SEL 0x00300000 -#define BF_CLKCTRL_PLL0CTRL0_DIV_SEL(v) \ - (((v) << 20) & BM_CLKCTRL_PLL0CTRL0_DIV_SEL) -#define BV_CLKCTRL_PLL0CTRL0_DIV_SEL__DEFAULT 0x0 -#define BV_CLKCTRL_PLL0CTRL0_DIV_SEL__LOWER 0x1 -#define BV_CLKCTRL_PLL0CTRL0_DIV_SEL__LOWEST 0x2 -#define BV_CLKCTRL_PLL0CTRL0_DIV_SEL__UNDEFINED 0x3 -#define BM_CLKCTRL_PLL0CTRL0_EN_USB_CLKS 0x00040000 -#define BM_CLKCTRL_PLL0CTRL0_POWER 0x00020000 - -#define HW_CLKCTRL_PLL0CTRL1 (0x00000010) - -#define BM_CLKCTRL_PLL0CTRL1_LOCK 0x80000000 -#define BM_CLKCTRL_PLL0CTRL1_FORCE_LOCK 0x40000000 -#define BP_CLKCTRL_PLL0CTRL1_LOCK_COUNT 0 -#define BM_CLKCTRL_PLL0CTRL1_LOCK_COUNT 0x0000FFFF -#define BF_CLKCTRL_PLL0CTRL1_LOCK_COUNT(v) \ - (((v) << 0) & BM_CLKCTRL_PLL0CTRL1_LOCK_COUNT) - -#define HW_CLKCTRL_PLL1CTRL0 (0x00000020) -#define HW_CLKCTRL_PLL1CTRL0_SET (0x00000024) -#define HW_CLKCTRL_PLL1CTRL0_CLR (0x00000028) -#define HW_CLKCTRL_PLL1CTRL0_TOG (0x0000002c) - -#define BM_CLKCTRL_PLL1CTRL0_CLKGATEEMI 0x80000000 -#define BP_CLKCTRL_PLL1CTRL0_LFR_SEL 28 -#define BM_CLKCTRL_PLL1CTRL0_LFR_SEL 0x30000000 -#define BF_CLKCTRL_PLL1CTRL0_LFR_SEL(v) \ - (((v) << 28) & BM_CLKCTRL_PLL1CTRL0_LFR_SEL) -#define BV_CLKCTRL_PLL1CTRL0_LFR_SEL__DEFAULT 0x0 -#define BV_CLKCTRL_PLL1CTRL0_LFR_SEL__TIMES_2 0x1 -#define BV_CLKCTRL_PLL1CTRL0_LFR_SEL__TIMES_05 0x2 -#define BV_CLKCTRL_PLL1CTRL0_LFR_SEL__UNDEFINED 0x3 -#define BP_CLKCTRL_PLL1CTRL0_CP_SEL 24 -#define BM_CLKCTRL_PLL1CTRL0_CP_SEL 0x03000000 -#define BF_CLKCTRL_PLL1CTRL0_CP_SEL(v) \ - (((v) << 24) & BM_CLKCTRL_PLL1CTRL0_CP_SEL) -#define BV_CLKCTRL_PLL1CTRL0_CP_SEL__DEFAULT 0x0 -#define BV_CLKCTRL_PLL1CTRL0_CP_SEL__TIMES_2 0x1 -#define BV_CLKCTRL_PLL1CTRL0_CP_SEL__TIMES_05 0x2 -#define BV_CLKCTRL_PLL1CTRL0_CP_SEL__UNDEFINED 0x3 -#define BP_CLKCTRL_PLL1CTRL0_DIV_SEL 20 -#define BM_CLKCTRL_PLL1CTRL0_DIV_SEL 0x00300000 -#define BF_CLKCTRL_PLL1CTRL0_DIV_SEL(v) \ - (((v) << 20) & BM_CLKCTRL_PLL1CTRL0_DIV_SEL) -#define BV_CLKCTRL_PLL1CTRL0_DIV_SEL__DEFAULT 0x0 -#define BV_CLKCTRL_PLL1CTRL0_DIV_SEL__LOWER 0x1 -#define BV_CLKCTRL_PLL1CTRL0_DIV_SEL__LOWEST 0x2 -#define BV_CLKCTRL_PLL1CTRL0_DIV_SEL__UNDEFINED 0x3 -#define BM_CLKCTRL_PLL1CTRL0_EN_USB_CLKS 0x00040000 -#define BM_CLKCTRL_PLL1CTRL0_POWER 0x00020000 - -#define HW_CLKCTRL_PLL1CTRL1 (0x00000030) - -#define BM_CLKCTRL_PLL1CTRL1_LOCK 0x80000000 -#define BM_CLKCTRL_PLL1CTRL1_FORCE_LOCK 0x40000000 -#define BP_CLKCTRL_PLL1CTRL1_LOCK_COUNT 0 -#define BM_CLKCTRL_PLL1CTRL1_LOCK_COUNT 0x0000FFFF -#define BF_CLKCTRL_PLL1CTRL1_LOCK_COUNT(v) \ - (((v) << 0) & BM_CLKCTRL_PLL1CTRL1_LOCK_COUNT) - -#define HW_CLKCTRL_PLL2CTRL0 (0x00000040) -#define HW_CLKCTRL_PLL2CTRL0_SET (0x00000044) -#define HW_CLKCTRL_PLL2CTRL0_CLR (0x00000048) -#define HW_CLKCTRL_PLL2CTRL0_TOG (0x0000004c) - -#define BM_CLKCTRL_PLL2CTRL0_CLKGATE 0x80000000 -#define BP_CLKCTRL_PLL2CTRL0_LFR_SEL 28 -#define BM_CLKCTRL_PLL2CTRL0_LFR_SEL 0x30000000 -#define BF_CLKCTRL_PLL2CTRL0_LFR_SEL(v) \ - (((v) << 28) & BM_CLKCTRL_PLL2CTRL0_LFR_SEL) -#define BM_CLKCTRL_PLL2CTRL0_HOLD_RING_OFF_B 0x04000000 -#define BP_CLKCTRL_PLL2CTRL0_CP_SEL 24 -#define BM_CLKCTRL_PLL2CTRL0_CP_SEL 0x03000000 -#define BF_CLKCTRL_PLL2CTRL0_CP_SEL(v) \ - (((v) << 24) & BM_CLKCTRL_PLL2CTRL0_CP_SEL) -#define BM_CLKCTRL_PLL2CTRL0_POWER 0x00800000 - -#define HW_CLKCTRL_CPU (0x00000050) -#define HW_CLKCTRL_CPU_SET (0x00000054) -#define HW_CLKCTRL_CPU_CLR (0x00000058) -#define HW_CLKCTRL_CPU_TOG (0x0000005c) - -#define BM_CLKCTRL_CPU_BUSY_REF_XTAL 0x20000000 -#define BM_CLKCTRL_CPU_BUSY_REF_CPU 0x10000000 -#define BM_CLKCTRL_CPU_DIV_XTAL_FRAC_EN 0x04000000 -#define BP_CLKCTRL_CPU_DIV_XTAL 16 -#define BM_CLKCTRL_CPU_DIV_XTAL 0x03FF0000 -#define BF_CLKCTRL_CPU_DIV_XTAL(v) \ - (((v) << 16) & BM_CLKCTRL_CPU_DIV_XTAL) -#define BM_CLKCTRL_CPU_INTERRUPT_WAIT 0x00001000 -#define BM_CLKCTRL_CPU_DIV_CPU_FRAC_EN 0x00000400 -#define BP_CLKCTRL_CPU_DIV_CPU 0 -#define BM_CLKCTRL_CPU_DIV_CPU 0x0000003F -#define BF_CLKCTRL_CPU_DIV_CPU(v) \ - (((v) << 0) & BM_CLKCTRL_CPU_DIV_CPU) - -#define HW_CLKCTRL_HBUS (0x00000060) -#define HW_CLKCTRL_HBUS_SET (0x00000064) -#define HW_CLKCTRL_HBUS_CLR (0x00000068) -#define HW_CLKCTRL_HBUS_TOG (0x0000006c) - -#define BM_CLKCTRL_HBUS_ASM_BUSY 0x80000000 -#define BM_CLKCTRL_HBUS_DCP_AS_ENABLE 0x40000000 -#define BM_CLKCTRL_HBUS_PXP_AS_ENABLE 0x20000000 -#define BM_CLKCTRL_HBUS_ASM_EMIPORT_AS_ENABLE 0x08000000 -#define BM_CLKCTRL_HBUS_APBHDMA_AS_ENABLE 0x04000000 -#define BM_CLKCTRL_HBUS_APBXDMA_AS_ENABLE 0x02000000 -#define BM_CLKCTRL_HBUS_TRAFFIC_JAM_AS_ENABLE 0x01000000 -#define BM_CLKCTRL_HBUS_TRAFFIC_AS_ENABLE 0x00800000 -#define BM_CLKCTRL_HBUS_CPU_DATA_AS_ENABLE 0x00400000 -#define BM_CLKCTRL_HBUS_CPU_INSTR_AS_ENABLE 0x00200000 -#define BM_CLKCTRL_HBUS_ASM_ENABLE 0x00100000 -#define BM_CLKCTRL_HBUS_AUTO_CLEAR_DIV_ENABLE 0x00080000 -#define BP_CLKCTRL_HBUS_SLOW_DIV 16 -#define BM_CLKCTRL_HBUS_SLOW_DIV 0x00070000 -#define BF_CLKCTRL_HBUS_SLOW_DIV(v) \ - (((v) << 16) & BM_CLKCTRL_HBUS_SLOW_DIV) -#define BV_CLKCTRL_HBUS_SLOW_DIV__BY1 0x0 -#define BV_CLKCTRL_HBUS_SLOW_DIV__BY2 0x1 -#define BV_CLKCTRL_HBUS_SLOW_DIV__BY4 0x2 -#define BV_CLKCTRL_HBUS_SLOW_DIV__BY8 0x3 -#define BV_CLKCTRL_HBUS_SLOW_DIV__BY16 0x4 -#define BV_CLKCTRL_HBUS_SLOW_DIV__BY32 0x5 -#define BM_CLKCTRL_HBUS_DIV_FRAC_EN 0x00000020 -#define BP_CLKCTRL_HBUS_DIV 0 -#define BM_CLKCTRL_HBUS_DIV 0x0000001F -#define BF_CLKCTRL_HBUS_DIV(v) \ - (((v) << 0) & BM_CLKCTRL_HBUS_DIV) - -#define HW_CLKCTRL_XBUS (0x00000070) - -#define BM_CLKCTRL_XBUS_BUSY 0x80000000 -#define BM_CLKCTRL_XBUS_AUTO_CLEAR_DIV_ENABLE 0x00000800 -#define BM_CLKCTRL_XBUS_DIV_FRAC_EN 0x00000400 -#define BP_CLKCTRL_XBUS_DIV 0 -#define BM_CLKCTRL_XBUS_DIV 0x000003FF -#define BF_CLKCTRL_XBUS_DIV(v) \ - (((v) << 0) & BM_CLKCTRL_XBUS_DIV) - -#define HW_CLKCTRL_XTAL (0x00000080) -#define HW_CLKCTRL_XTAL_SET (0x00000084) -#define HW_CLKCTRL_XTAL_CLR (0x00000088) -#define HW_CLKCTRL_XTAL_TOG (0x0000008c) - -#define BP_CLKCTRL_XTAL_UART_CLK_GATE 31 -#define BM_CLKCTRL_XTAL_UART_CLK_GATE 0x80000000 -#define BP_CLKCTRL_XTAL_PWM_CLK24M_GATE 29 -#define BM_CLKCTRL_XTAL_PWM_CLK24M_GATE 0x20000000 -#define BP_CLKCTRL_XTAL_TIMROT_CLK32K_GATE 26 -#define BM_CLKCTRL_XTAL_TIMROT_CLK32K_GATE 0x04000000 -#define BP_CLKCTRL_XTAL_DIV_UART 0 -#define BM_CLKCTRL_XTAL_DIV_UART 0x00000003 -#define BF_CLKCTRL_XTAL_DIV_UART(v) \ - (((v) << 0) & BM_CLKCTRL_XTAL_DIV_UART) - -#define HW_CLKCTRL_SSP0 (0x00000090) - -#define BP_CLKCTRL_SSP0_CLKGATE 31 -#define BM_CLKCTRL_SSP0_CLKGATE 0x80000000 -#define BM_CLKCTRL_SSP0_BUSY 0x20000000 -#define BM_CLKCTRL_SSP0_DIV_FRAC_EN 0x00000200 -#define BP_CLKCTRL_SSP0_DIV 0 -#define BM_CLKCTRL_SSP0_DIV 0x000001FF -#define BF_CLKCTRL_SSP0_DIV(v) \ - (((v) << 0) & BM_CLKCTRL_SSP0_DIV) - -#define HW_CLKCTRL_SSP1 (0x000000a0) - -#define BP_CLKCTRL_SSP1_CLKGATE 31 -#define BM_CLKCTRL_SSP1_CLKGATE 0x80000000 -#define BM_CLKCTRL_SSP1_BUSY 0x20000000 -#define BM_CLKCTRL_SSP1_DIV_FRAC_EN 0x00000200 -#define BP_CLKCTRL_SSP1_DIV 0 -#define BM_CLKCTRL_SSP1_DIV 0x000001FF -#define BF_CLKCTRL_SSP1_DIV(v) \ - (((v) << 0) & BM_CLKCTRL_SSP1_DIV) - -#define HW_CLKCTRL_SSP2 (0x000000b0) - -#define BP_CLKCTRL_SSP2_CLKGATE 31 -#define BM_CLKCTRL_SSP2_CLKGATE 0x80000000 -#define BM_CLKCTRL_SSP2_BUSY 0x20000000 -#define BM_CLKCTRL_SSP2_DIV_FRAC_EN 0x00000200 -#define BP_CLKCTRL_SSP2_DIV 0 -#define BM_CLKCTRL_SSP2_DIV 0x000001FF -#define BF_CLKCTRL_SSP2_DIV(v) \ - (((v) << 0) & BM_CLKCTRL_SSP2_DIV) - -#define HW_CLKCTRL_SSP3 (0x000000c0) - -#define BP_CLKCTRL_SSP3_CLKGATE 31 -#define BM_CLKCTRL_SSP3_CLKGATE 0x80000000 -#define BM_CLKCTRL_SSP3_BUSY 0x20000000 -#define BM_CLKCTRL_SSP3_DIV_FRAC_EN 0x00000200 -#define BP_CLKCTRL_SSP3_DIV 0 -#define BM_CLKCTRL_SSP3_DIV 0x000001FF -#define BF_CLKCTRL_SSP3_DIV(v) \ - (((v) << 0) & BM_CLKCTRL_SSP3_DIV) - -#define HW_CLKCTRL_GPMI (0x000000d0) - -#define BP_CLKCTRL_GPMI_CLKGATE 31 -#define BM_CLKCTRL_GPMI_CLKGATE 0x80000000 -#define BM_CLKCTRL_GPMI_BUSY 0x20000000 -#define BM_CLKCTRL_GPMI_DIV_FRAC_EN 0x00000400 -#define BP_CLKCTRL_GPMI_DIV 0 -#define BM_CLKCTRL_GPMI_DIV 0x000003FF -#define BF_CLKCTRL_GPMI_DIV(v) \ - (((v) << 0) & BM_CLKCTRL_GPMI_DIV) - -#define HW_CLKCTRL_SPDIF (0x000000e0) - -#define BP_CLKCTRL_SPDIF_CLKGATE 31 -#define BM_CLKCTRL_SPDIF_CLKGATE 0x80000000 - -#define HW_CLKCTRL_EMI (0x000000f0) - -#define BP_CLKCTRL_EMI_CLKGATE 31 -#define BM_CLKCTRL_EMI_CLKGATE 0x80000000 -#define BM_CLKCTRL_EMI_SYNC_MODE_EN 0x40000000 -#define BM_CLKCTRL_EMI_BUSY_REF_XTAL 0x20000000 -#define BM_CLKCTRL_EMI_BUSY_REF_EMI 0x10000000 -#define BM_CLKCTRL_EMI_BUSY_REF_CPU 0x08000000 -#define BM_CLKCTRL_EMI_BUSY_SYNC_MODE 0x04000000 -#define BM_CLKCTRL_EMI_BUSY_DCC_RESYNC 0x00020000 -#define BM_CLKCTRL_EMI_DCC_RESYNC_ENABLE 0x00010000 -#define BP_CLKCTRL_EMI_DIV_XTAL 8 -#define BM_CLKCTRL_EMI_DIV_XTAL 0x00000F00 -#define BF_CLKCTRL_EMI_DIV_XTAL(v) \ - (((v) << 8) & BM_CLKCTRL_EMI_DIV_XTAL) -#define BP_CLKCTRL_EMI_DIV_EMI 0 -#define BM_CLKCTRL_EMI_DIV_EMI 0x0000003F -#define BF_CLKCTRL_EMI_DIV_EMI(v) \ - (((v) << 0) & BM_CLKCTRL_EMI_DIV_EMI) - -#define HW_CLKCTRL_SAIF0 (0x00000100) - -#define BP_CLKCTRL_SAIF0_CLKGATE 31 -#define BM_CLKCTRL_SAIF0_CLKGATE 0x80000000 -#define BM_CLKCTRL_SAIF0_BUSY 0x20000000 -#define BM_CLKCTRL_SAIF0_DIV_FRAC_EN 0x00010000 -#define BP_CLKCTRL_SAIF0_DIV 0 -#define BM_CLKCTRL_SAIF0_DIV 0x0000FFFF -#define BF_CLKCTRL_SAIF0_DIV(v) \ - (((v) << 0) & BM_CLKCTRL_SAIF0_DIV) - -#define HW_CLKCTRL_SAIF1 (0x00000110) - -#define BP_CLKCTRL_SAIF1_CLKGATE 31 -#define BM_CLKCTRL_SAIF1_CLKGATE 0x80000000 -#define BM_CLKCTRL_SAIF1_BUSY 0x20000000 -#define BM_CLKCTRL_SAIF1_DIV_FRAC_EN 0x00010000 -#define BP_CLKCTRL_SAIF1_DIV 0 -#define BM_CLKCTRL_SAIF1_DIV 0x0000FFFF -#define BF_CLKCTRL_SAIF1_DIV(v) \ - (((v) << 0) & BM_CLKCTRL_SAIF1_DIV) - -#define HW_CLKCTRL_DIS_LCDIF (0x00000120) - -#define BP_CLKCTRL_DIS_LCDIF_CLKGATE 31 -#define BM_CLKCTRL_DIS_LCDIF_CLKGATE 0x80000000 -#define BM_CLKCTRL_DIS_LCDIF_BUSY 0x20000000 -#define BM_CLKCTRL_DIS_LCDIF_DIV_FRAC_EN 0x00002000 -#define BP_CLKCTRL_DIS_LCDIF_DIV 0 -#define BM_CLKCTRL_DIS_LCDIF_DIV 0x00001FFF -#define BF_CLKCTRL_DIS_LCDIF_DIV(v) \ - (((v) << 0) & BM_CLKCTRL_DIS_LCDIF_DIV) - -#define HW_CLKCTRL_ETM (0x00000130) - -#define BM_CLKCTRL_ETM_CLKGATE 0x80000000 -#define BM_CLKCTRL_ETM_BUSY 0x20000000 -#define BM_CLKCTRL_ETM_DIV_FRAC_EN 0x00000080 -#define BP_CLKCTRL_ETM_DIV 0 -#define BM_CLKCTRL_ETM_DIV 0x0000007F -#define BF_CLKCTRL_ETM_DIV(v) \ - (((v) << 0) & BM_CLKCTRL_ETM_DIV) - -#define HW_CLKCTRL_ENET (0x00000140) - -#define BM_CLKCTRL_ENET_SLEEP 0x80000000 -#define BP_CLKCTRL_ENET_DISABLE 30 -#define BM_CLKCTRL_ENET_DISABLE 0x40000000 -#define BM_CLKCTRL_ENET_STATUS 0x20000000 -#define BM_CLKCTRL_ENET_BUSY_TIME 0x08000000 -#define BP_CLKCTRL_ENET_DIV_TIME 21 -#define BM_CLKCTRL_ENET_DIV_TIME 0x07E00000 -#define BF_CLKCTRL_ENET_DIV_TIME(v) \ - (((v) << 21) & BM_CLKCTRL_ENET_DIV_TIME) -#define BM_CLKCTRL_ENET_BUSY 0x08000000 -#define BP_CLKCTRL_ENET_DIV 21 -#define BM_CLKCTRL_ENET_DIV 0x07E00000 -#define BF_CLKCTRL_ENET_DIV(v) \ - (((v) << 21) & BM_CLKCTRL_ENET_DIV) -#define BP_CLKCTRL_ENET_TIME_SEL 19 -#define BM_CLKCTRL_ENET_TIME_SEL 0x00180000 -#define BF_CLKCTRL_ENET_TIME_SEL(v) \ - (((v) << 19) & BM_CLKCTRL_ENET_TIME_SEL) -#define BV_CLKCTRL_ENET_TIME_SEL__XTAL 0x0 -#define BV_CLKCTRL_ENET_TIME_SEL__PLL 0x1 -#define BV_CLKCTRL_ENET_TIME_SEL__RMII_CLK 0x2 -#define BV_CLKCTRL_ENET_TIME_SEL__UNDEFINED 0x3 -#define BM_CLKCTRL_ENET_CLK_OUT_EN 0x00040000 -#define BM_CLKCTRL_ENET_RESET_BY_SW_CHIP 0x00020000 -#define BM_CLKCTRL_ENET_RESET_BY_SW 0x00010000 - -#define HW_CLKCTRL_HSADC (0x00000150) - -#define BM_CLKCTRL_HSADC_RESETB 0x40000000 -#define BP_CLKCTRL_HSADC_FREQDIV 28 -#define BM_CLKCTRL_HSADC_FREQDIV 0x30000000 -#define BF_CLKCTRL_HSADC_FREQDIV(v) \ - (((v) << 28) & BM_CLKCTRL_HSADC_FREQDIV) - -#define HW_CLKCTRL_FLEXCAN (0x00000160) - -#define BP_CLKCTRL_FLEXCAN_STOP_CAN0 30 -#define BM_CLKCTRL_FLEXCAN_STOP_CAN0 0x40000000 -#define BM_CLKCTRL_FLEXCAN_CAN0_STATUS 0x20000000 -#define BP_CLKCTRL_FLEXCAN_STOP_CAN1 28 -#define BM_CLKCTRL_FLEXCAN_STOP_CAN1 0x10000000 -#define BM_CLKCTRL_FLEXCAN_CAN1_STATUS 0x08000000 - -#define HW_CLKCTRL_FRAC0 (0x000001b0) -#define HW_CLKCTRL_FRAC0_SET (0x000001b4) -#define HW_CLKCTRL_FRAC0_CLR (0x000001b8) -#define HW_CLKCTRL_FRAC0_TOG (0x000001bc) - -#define BP_CLKCTRL_FRAC0_CLKGATEIO0 31 -#define BM_CLKCTRL_FRAC0_CLKGATEIO0 0x80000000 -#define BM_CLKCTRL_FRAC0_IO0_STABLE 0x40000000 -#define BP_CLKCTRL_FRAC0_IO0FRAC 24 -#define BM_CLKCTRL_FRAC0_IO0FRAC 0x3F000000 -#define BF_CLKCTRL_FRAC0_IO0FRAC(v) \ - (((v) << 24) & BM_CLKCTRL_FRAC0_IO0FRAC) -#define BP_CLKCTRL_FRAC0_CLKGATEIO1 23 -#define BM_CLKCTRL_FRAC0_CLKGATEIO1 0x00800000 -#define BM_CLKCTRL_FRAC0_IO1_STABLE 0x00400000 -#define BP_CLKCTRL_FRAC0_IO1FRAC 16 -#define BM_CLKCTRL_FRAC0_IO1FRAC 0x003F0000 -#define BF_CLKCTRL_FRAC0_IO1FRAC(v) \ - (((v) << 16) & BM_CLKCTRL_FRAC0_IO1FRAC) -#define BP_CLKCTRL_FRAC0_CLKGATEEMI 15 -#define BM_CLKCTRL_FRAC0_CLKGATEEMI 0x00008000 -#define BM_CLKCTRL_FRAC0_EMI_STABLE 0x00004000 -#define BP_CLKCTRL_FRAC0_EMIFRAC 8 -#define BM_CLKCTRL_FRAC0_EMIFRAC 0x00003F00 -#define BF_CLKCTRL_FRAC0_EMIFRAC(v) \ - (((v) << 8) & BM_CLKCTRL_FRAC0_EMIFRAC) -#define BP_CLKCTRL_FRAC0_CLKGATECPU 7 -#define BM_CLKCTRL_FRAC0_CLKGATECPU 0x00000080 -#define BM_CLKCTRL_FRAC0_CPU_STABLE 0x00000040 -#define BP_CLKCTRL_FRAC0_CPUFRAC 0 -#define BM_CLKCTRL_FRAC0_CPUFRAC 0x0000003F -#define BF_CLKCTRL_FRAC0_CPUFRAC(v) \ - (((v) << 0) & BM_CLKCTRL_FRAC0_CPUFRAC) - -#define HW_CLKCTRL_FRAC1 (0x000001c0) -#define HW_CLKCTRL_FRAC1_SET (0x000001c4) -#define HW_CLKCTRL_FRAC1_CLR (0x000001c8) -#define HW_CLKCTRL_FRAC1_TOG (0x000001cc) - -#define BP_CLKCTRL_FRAC1_CLKGATEGPMI 23 -#define BM_CLKCTRL_FRAC1_CLKGATEGPMI 0x00800000 -#define BM_CLKCTRL_FRAC1_GPMI_STABLE 0x00400000 -#define BP_CLKCTRL_FRAC1_GPMIFRAC 16 -#define BM_CLKCTRL_FRAC1_GPMIFRAC 0x003F0000 -#define BF_CLKCTRL_FRAC1_GPMIFRAC(v) \ - (((v) << 16) & BM_CLKCTRL_FRAC1_GPMIFRAC) -#define BP_CLKCTRL_FRAC1_CLKGATEHSADC 15 -#define BM_CLKCTRL_FRAC1_CLKGATEHSADC 0x00008000 -#define BM_CLKCTRL_FRAC1_HSADC_STABLE 0x00004000 -#define BP_CLKCTRL_FRAC1_HSADCFRAC 8 -#define BM_CLKCTRL_FRAC1_HSADCFRAC 0x00003F00 -#define BF_CLKCTRL_FRAC1_HSADCFRAC(v) \ - (((v) << 8) & BM_CLKCTRL_FRAC1_HSADCFRAC) -#define BP_CLKCTRL_FRAC1_CLKGATEPIX 7 -#define BM_CLKCTRL_FRAC1_CLKGATEPIX 0x00000080 -#define BM_CLKCTRL_FRAC1_PIX_STABLE 0x00000040 -#define BP_CLKCTRL_FRAC1_PIXFRAC 0 -#define BM_CLKCTRL_FRAC1_PIXFRAC 0x0000003F -#define BF_CLKCTRL_FRAC1_PIXFRAC(v) \ - (((v) << 0) & BM_CLKCTRL_FRAC1_PIXFRAC) - -#define HW_CLKCTRL_CLKSEQ (0x000001d0) -#define HW_CLKCTRL_CLKSEQ_SET (0x000001d4) -#define HW_CLKCTRL_CLKSEQ_CLR (0x000001d8) -#define HW_CLKCTRL_CLKSEQ_TOG (0x000001dc) - -#define BM_CLKCTRL_CLKSEQ_BYPASS_CPU 0x00040000 -#define BM_CLKCTRL_CLKSEQ_BYPASS_DIS_LCDIF 0x00004000 -#define BV_CLKCTRL_CLKSEQ_BYPASS_DIS_LCDIF__BYPASS 0x1 -#define BV_CLKCTRL_CLKSEQ_BYPASS_DIS_LCDIF__PFD 0x0 -#define BM_CLKCTRL_CLKSEQ_BYPASS_ETM 0x00000100 -#define BM_CLKCTRL_CLKSEQ_BYPASS_EMI 0x00000080 -#define BM_CLKCTRL_CLKSEQ_BYPASS_SSP3 0x00000040 -#define BM_CLKCTRL_CLKSEQ_BYPASS_SSP2 0x00000020 -#define BM_CLKCTRL_CLKSEQ_BYPASS_SSP1 0x00000010 -#define BM_CLKCTRL_CLKSEQ_BYPASS_SSP0 0x00000008 -#define BM_CLKCTRL_CLKSEQ_BYPASS_GPMI 0x00000004 -#define BM_CLKCTRL_CLKSEQ_BYPASS_SAIF1 0x00000002 -#define BM_CLKCTRL_CLKSEQ_BYPASS_SAIF0 0x00000001 - -#define HW_CLKCTRL_RESET (0x000001e0) - -#define BM_CLKCTRL_RESET_WDOG_POR_DISABLE 0x00000020 -#define BM_CLKCTRL_RESET_EXTERNAL_RESET_ENABLE 0x00000010 -#define BM_CLKCTRL_RESET_THERMAL_RESET_ENABLE 0x00000008 -#define BM_CLKCTRL_RESET_THERMAL_RESET_DEFAULT 0x00000004 -#define BM_CLKCTRL_RESET_CHIP 0x00000002 -#define BM_CLKCTRL_RESET_DIG 0x00000001 - -#define HW_CLKCTRL_STATUS (0x000001f0) - -#define BP_CLKCTRL_STATUS_CPU_LIMIT 30 -#define BM_CLKCTRL_STATUS_CPU_LIMIT 0xC0000000 -#define BF_CLKCTRL_STATUS_CPU_LIMIT(v) \ - (((v) << 30) & BM_CLKCTRL_STATUS_CPU_LIMIT) - -#define HW_CLKCTRL_VERSION (0x00000200) - -#define BP_CLKCTRL_VERSION_MAJOR 24 -#define BM_CLKCTRL_VERSION_MAJOR 0xFF000000 -#define BF_CLKCTRL_VERSION_MAJOR(v) \ - (((v) << 24) & BM_CLKCTRL_VERSION_MAJOR) -#define BP_CLKCTRL_VERSION_MINOR 16 -#define BM_CLKCTRL_VERSION_MINOR 0x00FF0000 -#define BF_CLKCTRL_VERSION_MINOR(v) \ - (((v) << 16) & BM_CLKCTRL_VERSION_MINOR) -#define BP_CLKCTRL_VERSION_STEP 0 -#define BM_CLKCTRL_VERSION_STEP 0x0000FFFF -#define BF_CLKCTRL_VERSION_STEP(v) \ - (((v) << 0) & BM_CLKCTRL_VERSION_STEP) - -#endif /* __REGS_CLKCTRL_MX28_H__ */ diff --git a/arch/arm/mach-mxs/system.c b/arch/arm/mach-mxs/system.c index 80ac1fc..30042e2 100644 --- a/arch/arm/mach-mxs/system.c +++ b/arch/arm/mach-mxs/system.c @@ -37,8 +37,6 @@ #define MXS_MODULE_CLKGATE (1 << 30) #define MXS_MODULE_SFTRST (1 << 31) -#define CLKCTRL_TIMEOUT 10 /* 10 ms */ - static void __iomem *mxs_clkctrl_reset_addr; /* @@ -139,17 +137,3 @@ error: return -ETIMEDOUT; } EXPORT_SYMBOL(mxs_reset_block); - -int mxs_clkctrl_timeout(unsigned int reg_offset, unsigned int mask) -{ - unsigned long timeout = jiffies + msecs_to_jiffies(CLKCTRL_TIMEOUT); - while (readl_relaxed(MXS_IO_ADDRESS(MXS_CLKCTRL_BASE_ADDR) - + reg_offset) & mask) { - if (time_after(jiffies, timeout)) { - pr_err("Timeout at CLKCTRL + 0x%x\n", reg_offset); - return -ETIMEDOUT; - } - } - - return 0; -} -- cgit v0.10.2 From 50260924afd4b745bfb6e5f1caee381a1875fc31 Mon Sep 17 00:00:00 2001 From: Shawn Guo Date: Sun, 29 Apr 2012 00:02:41 +0800 Subject: ARM: mxs: remove now unused timer_clk argument from mxs_timer_init With old mxs clock support removed, the timer_clk argument of mxs_timer_init is unused now, so remove it. Signed-off-by: Shawn Guo diff --git a/arch/arm/mach-mxs/include/mach/common.h b/arch/arm/mach-mxs/include/mach/common.h index e1237ab..9bdc953 100644 --- a/arch/arm/mach-mxs/include/mach/common.h +++ b/arch/arm/mach-mxs/include/mach/common.h @@ -11,11 +11,9 @@ #ifndef __MACH_MXS_COMMON_H__ #define __MACH_MXS_COMMON_H__ -struct clk; - extern const u32 *mxs_get_ocotp(void); extern int mxs_reset_block(void __iomem *); -extern void mxs_timer_init(struct clk *, int); +extern void mxs_timer_init(int); extern void mxs_restart(char, const char *); extern int mxs_saif_clkmux_select(unsigned int clkmux); diff --git a/arch/arm/mach-mxs/timer.c b/arch/arm/mach-mxs/timer.c index 575e8fd..02d36de 100644 --- a/arch/arm/mach-mxs/timer.c +++ b/arch/arm/mach-mxs/timer.c @@ -244,14 +244,14 @@ static int __init mxs_clocksource_init(struct clk *timer_clk) return 0; } -void __init mxs_timer_init(struct clk *timer_clk, int irq) +void __init mxs_timer_init(int irq) { - if (!timer_clk) { - timer_clk = clk_get_sys("timrot", NULL); - if (IS_ERR(timer_clk)) { - pr_err("%s: failed to get clk\n", __func__); - return; - } + struct clk *timer_clk; + + timer_clk = clk_get_sys("timrot", NULL); + if (IS_ERR(timer_clk)) { + pr_err("%s: failed to get clk\n", __func__); + return; } clk_prepare_enable(timer_clk); diff --git a/drivers/clk/mxs/clk-imx23.c b/drivers/clk/mxs/clk-imx23.c index 2ec76ff..dcae112 100644 --- a/drivers/clk/mxs/clk-imx23.c +++ b/drivers/clk/mxs/clk-imx23.c @@ -198,7 +198,7 @@ int __init mx23_clocks_init(void) for (i = 0; i < ARRAY_SIZE(clks_init_on); i++) clk_prepare_enable(clks[clks_init_on[i]]); - mxs_timer_init(NULL, MX23_INT_TIMER0); + mxs_timer_init(MX23_INT_TIMER0); return 0; } diff --git a/drivers/clk/mxs/clk-imx28.c b/drivers/clk/mxs/clk-imx28.c index 4bfd1f4..b2a3257 100644 --- a/drivers/clk/mxs/clk-imx28.c +++ b/drivers/clk/mxs/clk-imx28.c @@ -331,7 +331,7 @@ int __init mx28_clocks_init(void) for (i = 0; i < ARRAY_SIZE(clks_init_on); i++) clk_prepare_enable(clks[clks_init_on[i]]); - mxs_timer_init(NULL, MX28_INT_TIMER0); + mxs_timer_init(MX28_INT_TIMER0); return 0; } -- cgit v0.10.2 From 14546c337588370dced50dcaf43398939be9829e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 6 May 2012 19:10:59 -0400 Subject: NFS: Don't do a full flush to disk on close() if we hold a delegation If we hold a delegation then we know that it should be safe to continue to cache the data beyond the close(). However since the process that wrote the data may die after close(), we may still want to send the data to server before those RPCSEC_GSS credentials expire. We therefore compromise by starting writeback to the server, but don't wait for completion. Signed-off-by: Trond Myklebust diff --git a/fs/nfs/file.c b/fs/nfs/file.c index aa9b709..8eda8a6 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -174,6 +174,13 @@ nfs_file_flush(struct file *file, fl_owner_t id) if ((file->f_mode & FMODE_WRITE) == 0) return 0; + /* + * If we're holding a write delegation, then just start the i/o + * but don't wait for completion (or send a commit). + */ + if (nfs_have_delegation(inode, FMODE_WRITE)) + return filemap_fdatawrite(file->f_mapping); + /* Flush writes to the server and return any errors */ return vfs_fsync(file, 0); } -- cgit v0.10.2 From c57d1bc5e043dbb5ba82ded07003d71a8033d899 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 6 May 2012 19:34:17 -0400 Subject: NFS: nfs_inode_return_delegation() should always flush dirty data The assumption is that if you are in a situation where you need to return the delegation, then you should probably stop caching the data anyway. Signed-off-by: Trond Myklebust diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 89af1d2..a19cb5a 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -380,6 +380,10 @@ void nfs_inode_return_delegation_noreclaim(struct inode *inode) * nfs_inode_return_delegation - synchronously return a delegation * @inode: inode to process * + * This routine will always flush any dirty data to disk on the + * assumption that if we need to return the delegation, then + * we should stop caching. + * * Returns zero on success, or a negative errno value. */ int nfs_inode_return_delegation(struct inode *inode) @@ -389,10 +393,10 @@ int nfs_inode_return_delegation(struct inode *inode) struct nfs_delegation *delegation; int err = 0; + nfs_wb_all(inode); if (rcu_access_pointer(nfsi->delegation) != NULL) { delegation = nfs_detach_delegation(nfsi, server); if (delegation != NULL) { - nfs_wb_all(inode); err = __nfs_inode_return_delegation(inode, delegation, 1); } } diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h index cd6a7a8..72709c4 100644 --- a/fs/nfs/delegation.h +++ b/fs/nfs/delegation.h @@ -66,6 +66,7 @@ static inline int nfs_have_delegation(struct inode *inode, fmode_t flags) static inline int nfs_inode_return_delegation(struct inode *inode) { + nfs_wb_all(inode); return 0; } #endif -- cgit v0.10.2 From dc327ed4cd320be689596365372a3683208c3ba0 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 6 May 2012 19:46:30 -0400 Subject: NFSv4: nfs_client_return_marked_delegations can't flush data Since even filemap_flush() needs to lock pages that are dirty, we cannot risk calling it from the state manager context. Therefore, we need to move the call to filemap_flush() to nfs_async_inode_return_delegation(). Signed-off-by: Trond Myklebust diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index a19cb5a..bd3a960 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -316,6 +316,10 @@ out: * nfs_client_return_marked_delegations - return previously marked delegations * @clp: nfs_client to process * + * Note that this function is designed to be called by the state + * manager thread. For this reason, it cannot flush the dirty data, + * since that could deadlock in case of a state recovery error. + * * Returns zero on success, or a negative errno value. */ int nfs_client_return_marked_delegations(struct nfs_client *clp) @@ -340,11 +344,9 @@ restart: server); rcu_read_unlock(); - if (delegation != NULL) { - filemap_flush(inode->i_mapping); + if (delegation != NULL) err = __nfs_inode_return_delegation(inode, delegation, 0); - } iput(inode); if (!err) goto restart; @@ -542,6 +544,8 @@ int nfs_async_inode_return_delegation(struct inode *inode, struct nfs_client *clp = server->nfs_client; struct nfs_delegation *delegation; + filemap_flush(inode->i_mapping); + rcu_read_lock(); delegation = rcu_dereference(NFS_I(inode)->delegation); -- cgit v0.10.2 From 433de739bbc22a5b2c87602116566ce27e3b4cab Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Tue, 8 May 2012 21:22:24 +0300 Subject: x86, realmode: 16-bit real-mode code support for relocs tool A new option is added to the relocs tool called '--realmode'. This option causes the generation of 16-bit segment relocations and 32-bit linear relocations for the real-mode code. When the real-mode code is moved to the low-memory during kernel initialization, these relocation entries can be used to relocate the code properly. In the assembly code 16-bit segment relocations must be relative to the 'real_mode_seg' absolute symbol. Linear relocations must be relative to a symbol prefixed with 'pa_'. 16-bit segment relocation is used to load cs:ip in 16-bit code. Linear relocations are used in the 32-bit code for relocatable data references. They are declared in the linker script of the real-mode code. The relocs tool is moved to scripts/x86-relocs.c so it will be compiled before building the arch/x86 tree. Signed-off-by: H. Peter Anvin Link: http://lkml.kernel.org/r/1336501366-28617-2-git-send-email-jarkko.sakkinen@intel.com Signed-off-by: Jarkko Sakkinen Signed-off-by: H. Peter Anvin diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index fd55a2f..0435e8a 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -40,13 +40,12 @@ OBJCOPYFLAGS_vmlinux.bin := -R .comment -S $(obj)/vmlinux.bin: vmlinux FORCE $(call if_changed,objcopy) +targets += vmlinux.bin.all vmlinux.relocs -targets += vmlinux.bin.all vmlinux.relocs relocs -hostprogs-$(CONFIG_X86_NEED_RELOCS) += relocs - -quiet_cmd_relocs = RELOCS $@ - cmd_relocs = $(obj)/relocs $< > $@;$(obj)/relocs --abs-relocs $< -$(obj)/vmlinux.relocs: vmlinux $(obj)/relocs FORCE +CMD_RELOCS = scripts/x86-relocs +quiet_cmd_relocs = RELOCS $@ + cmd_relocs = $(CMD_RELOCS) $< > $@;$(CMD_RELOCS) --abs-relocs $< +$(obj)/vmlinux.relocs: vmlinux FORCE $(call if_changed,relocs) vmlinux.bin.all-y := $(obj)/vmlinux.bin diff --git a/arch/x86/boot/compressed/relocs.c b/arch/x86/boot/compressed/relocs.c deleted file mode 100644 index fb7117a..0000000 --- a/arch/x86/boot/compressed/relocs.c +++ /dev/null @@ -1,678 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#define USE_BSD -#include -#include -#include - -static void die(char *fmt, ...); - -#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) -static Elf32_Ehdr ehdr; -static unsigned long reloc_count, reloc_idx; -static unsigned long *relocs; - -struct section { - Elf32_Shdr shdr; - struct section *link; - Elf32_Sym *symtab; - Elf32_Rel *reltab; - char *strtab; -}; -static struct section *secs; - -/* - * Following symbols have been audited. There values are constant and do - * not change if bzImage is loaded at a different physical address than - * the address for which it has been compiled. Don't warn user about - * absolute relocations present w.r.t these symbols. - */ -static const char abs_sym_regex[] = - "^(xen_irq_disable_direct_reloc$|" - "xen_save_fl_direct_reloc$|" - "VDSO|" - "__crc_)"; -static regex_t abs_sym_regex_c; -static int is_abs_reloc(const char *sym_name) -{ - return !regexec(&abs_sym_regex_c, sym_name, 0, NULL, 0); -} - -/* - * These symbols are known to be relative, even if the linker marks them - * as absolute (typically defined outside any section in the linker script.) - */ -static const char rel_sym_regex[] = - "^_end$"; -static regex_t rel_sym_regex_c; -static int is_rel_reloc(const char *sym_name) -{ - return !regexec(&rel_sym_regex_c, sym_name, 0, NULL, 0); -} - -static void regex_init(void) -{ - char errbuf[128]; - int err; - - err = regcomp(&abs_sym_regex_c, abs_sym_regex, - REG_EXTENDED|REG_NOSUB); - if (err) { - regerror(err, &abs_sym_regex_c, errbuf, sizeof errbuf); - die("%s", errbuf); - } - - err = regcomp(&rel_sym_regex_c, rel_sym_regex, - REG_EXTENDED|REG_NOSUB); - if (err) { - regerror(err, &rel_sym_regex_c, errbuf, sizeof errbuf); - die("%s", errbuf); - } -} - -static void die(char *fmt, ...) -{ - va_list ap; - va_start(ap, fmt); - vfprintf(stderr, fmt, ap); - va_end(ap); - exit(1); -} - -static const char *sym_type(unsigned type) -{ - static const char *type_name[] = { -#define SYM_TYPE(X) [X] = #X - SYM_TYPE(STT_NOTYPE), - SYM_TYPE(STT_OBJECT), - SYM_TYPE(STT_FUNC), - SYM_TYPE(STT_SECTION), - SYM_TYPE(STT_FILE), - SYM_TYPE(STT_COMMON), - SYM_TYPE(STT_TLS), -#undef SYM_TYPE - }; - const char *name = "unknown sym type name"; - if (type < ARRAY_SIZE(type_name)) { - name = type_name[type]; - } - return name; -} - -static const char *sym_bind(unsigned bind) -{ - static const char *bind_name[] = { -#define SYM_BIND(X) [X] = #X - SYM_BIND(STB_LOCAL), - SYM_BIND(STB_GLOBAL), - SYM_BIND(STB_WEAK), -#undef SYM_BIND - }; - const char *name = "unknown sym bind name"; - if (bind < ARRAY_SIZE(bind_name)) { - name = bind_name[bind]; - } - return name; -} - -static const char *sym_visibility(unsigned visibility) -{ - static const char *visibility_name[] = { -#define SYM_VISIBILITY(X) [X] = #X - SYM_VISIBILITY(STV_DEFAULT), - SYM_VISIBILITY(STV_INTERNAL), - SYM_VISIBILITY(STV_HIDDEN), - SYM_VISIBILITY(STV_PROTECTED), -#undef SYM_VISIBILITY - }; - const char *name = "unknown sym visibility name"; - if (visibility < ARRAY_SIZE(visibility_name)) { - name = visibility_name[visibility]; - } - return name; -} - -static const char *rel_type(unsigned type) -{ - static const char *type_name[] = { -#define REL_TYPE(X) [X] = #X - REL_TYPE(R_386_NONE), - REL_TYPE(R_386_32), - REL_TYPE(R_386_PC32), - REL_TYPE(R_386_GOT32), - REL_TYPE(R_386_PLT32), - REL_TYPE(R_386_COPY), - REL_TYPE(R_386_GLOB_DAT), - REL_TYPE(R_386_JMP_SLOT), - REL_TYPE(R_386_RELATIVE), - REL_TYPE(R_386_GOTOFF), - REL_TYPE(R_386_GOTPC), -#undef REL_TYPE - }; - const char *name = "unknown type rel type name"; - if (type < ARRAY_SIZE(type_name) && type_name[type]) { - name = type_name[type]; - } - return name; -} - -static const char *sec_name(unsigned shndx) -{ - const char *sec_strtab; - const char *name; - sec_strtab = secs[ehdr.e_shstrndx].strtab; - name = ""; - if (shndx < ehdr.e_shnum) { - name = sec_strtab + secs[shndx].shdr.sh_name; - } - else if (shndx == SHN_ABS) { - name = "ABSOLUTE"; - } - else if (shndx == SHN_COMMON) { - name = "COMMON"; - } - return name; -} - -static const char *sym_name(const char *sym_strtab, Elf32_Sym *sym) -{ - const char *name; - name = ""; - if (sym->st_name) { - name = sym_strtab + sym->st_name; - } - else { - name = sec_name(secs[sym->st_shndx].shdr.sh_name); - } - return name; -} - - - -#if BYTE_ORDER == LITTLE_ENDIAN -#define le16_to_cpu(val) (val) -#define le32_to_cpu(val) (val) -#endif -#if BYTE_ORDER == BIG_ENDIAN -#define le16_to_cpu(val) bswap_16(val) -#define le32_to_cpu(val) bswap_32(val) -#endif - -static uint16_t elf16_to_cpu(uint16_t val) -{ - return le16_to_cpu(val); -} - -static uint32_t elf32_to_cpu(uint32_t val) -{ - return le32_to_cpu(val); -} - -static void read_ehdr(FILE *fp) -{ - if (fread(&ehdr, sizeof(ehdr), 1, fp) != 1) { - die("Cannot read ELF header: %s\n", - strerror(errno)); - } - if (memcmp(ehdr.e_ident, ELFMAG, SELFMAG) != 0) { - die("No ELF magic\n"); - } - if (ehdr.e_ident[EI_CLASS] != ELFCLASS32) { - die("Not a 32 bit executable\n"); - } - if (ehdr.e_ident[EI_DATA] != ELFDATA2LSB) { - die("Not a LSB ELF executable\n"); - } - if (ehdr.e_ident[EI_VERSION] != EV_CURRENT) { - die("Unknown ELF version\n"); - } - /* Convert the fields to native endian */ - ehdr.e_type = elf16_to_cpu(ehdr.e_type); - ehdr.e_machine = elf16_to_cpu(ehdr.e_machine); - ehdr.e_version = elf32_to_cpu(ehdr.e_version); - ehdr.e_entry = elf32_to_cpu(ehdr.e_entry); - ehdr.e_phoff = elf32_to_cpu(ehdr.e_phoff); - ehdr.e_shoff = elf32_to_cpu(ehdr.e_shoff); - ehdr.e_flags = elf32_to_cpu(ehdr.e_flags); - ehdr.e_ehsize = elf16_to_cpu(ehdr.e_ehsize); - ehdr.e_phentsize = elf16_to_cpu(ehdr.e_phentsize); - ehdr.e_phnum = elf16_to_cpu(ehdr.e_phnum); - ehdr.e_shentsize = elf16_to_cpu(ehdr.e_shentsize); - ehdr.e_shnum = elf16_to_cpu(ehdr.e_shnum); - ehdr.e_shstrndx = elf16_to_cpu(ehdr.e_shstrndx); - - if ((ehdr.e_type != ET_EXEC) && (ehdr.e_type != ET_DYN)) { - die("Unsupported ELF header type\n"); - } - if (ehdr.e_machine != EM_386) { - die("Not for x86\n"); - } - if (ehdr.e_version != EV_CURRENT) { - die("Unknown ELF version\n"); - } - if (ehdr.e_ehsize != sizeof(Elf32_Ehdr)) { - die("Bad Elf header size\n"); - } - if (ehdr.e_phentsize != sizeof(Elf32_Phdr)) { - die("Bad program header entry\n"); - } - if (ehdr.e_shentsize != sizeof(Elf32_Shdr)) { - die("Bad section header entry\n"); - } - if (ehdr.e_shstrndx >= ehdr.e_shnum) { - die("String table index out of bounds\n"); - } -} - -static void read_shdrs(FILE *fp) -{ - int i; - Elf32_Shdr shdr; - - secs = calloc(ehdr.e_shnum, sizeof(struct section)); - if (!secs) { - die("Unable to allocate %d section headers\n", - ehdr.e_shnum); - } - if (fseek(fp, ehdr.e_shoff, SEEK_SET) < 0) { - die("Seek to %d failed: %s\n", - ehdr.e_shoff, strerror(errno)); - } - for (i = 0; i < ehdr.e_shnum; i++) { - struct section *sec = &secs[i]; - if (fread(&shdr, sizeof shdr, 1, fp) != 1) - die("Cannot read ELF section headers %d/%d: %s\n", - i, ehdr.e_shnum, strerror(errno)); - sec->shdr.sh_name = elf32_to_cpu(shdr.sh_name); - sec->shdr.sh_type = elf32_to_cpu(shdr.sh_type); - sec->shdr.sh_flags = elf32_to_cpu(shdr.sh_flags); - sec->shdr.sh_addr = elf32_to_cpu(shdr.sh_addr); - sec->shdr.sh_offset = elf32_to_cpu(shdr.sh_offset); - sec->shdr.sh_size = elf32_to_cpu(shdr.sh_size); - sec->shdr.sh_link = elf32_to_cpu(shdr.sh_link); - sec->shdr.sh_info = elf32_to_cpu(shdr.sh_info); - sec->shdr.sh_addralign = elf32_to_cpu(shdr.sh_addralign); - sec->shdr.sh_entsize = elf32_to_cpu(shdr.sh_entsize); - if (sec->shdr.sh_link < ehdr.e_shnum) - sec->link = &secs[sec->shdr.sh_link]; - } - -} - -static void read_strtabs(FILE *fp) -{ - int i; - for (i = 0; i < ehdr.e_shnum; i++) { - struct section *sec = &secs[i]; - if (sec->shdr.sh_type != SHT_STRTAB) { - continue; - } - sec->strtab = malloc(sec->shdr.sh_size); - if (!sec->strtab) { - die("malloc of %d bytes for strtab failed\n", - sec->shdr.sh_size); - } - if (fseek(fp, sec->shdr.sh_offset, SEEK_SET) < 0) { - die("Seek to %d failed: %s\n", - sec->shdr.sh_offset, strerror(errno)); - } - if (fread(sec->strtab, 1, sec->shdr.sh_size, fp) - != sec->shdr.sh_size) { - die("Cannot read symbol table: %s\n", - strerror(errno)); - } - } -} - -static void read_symtabs(FILE *fp) -{ - int i,j; - for (i = 0; i < ehdr.e_shnum; i++) { - struct section *sec = &secs[i]; - if (sec->shdr.sh_type != SHT_SYMTAB) { - continue; - } - sec->symtab = malloc(sec->shdr.sh_size); - if (!sec->symtab) { - die("malloc of %d bytes for symtab failed\n", - sec->shdr.sh_size); - } - if (fseek(fp, sec->shdr.sh_offset, SEEK_SET) < 0) { - die("Seek to %d failed: %s\n", - sec->shdr.sh_offset, strerror(errno)); - } - if (fread(sec->symtab, 1, sec->shdr.sh_size, fp) - != sec->shdr.sh_size) { - die("Cannot read symbol table: %s\n", - strerror(errno)); - } - for (j = 0; j < sec->shdr.sh_size/sizeof(Elf32_Sym); j++) { - Elf32_Sym *sym = &sec->symtab[j]; - sym->st_name = elf32_to_cpu(sym->st_name); - sym->st_value = elf32_to_cpu(sym->st_value); - sym->st_size = elf32_to_cpu(sym->st_size); - sym->st_shndx = elf16_to_cpu(sym->st_shndx); - } - } -} - - -static void read_relocs(FILE *fp) -{ - int i,j; - for (i = 0; i < ehdr.e_shnum; i++) { - struct section *sec = &secs[i]; - if (sec->shdr.sh_type != SHT_REL) { - continue; - } - sec->reltab = malloc(sec->shdr.sh_size); - if (!sec->reltab) { - die("malloc of %d bytes for relocs failed\n", - sec->shdr.sh_size); - } - if (fseek(fp, sec->shdr.sh_offset, SEEK_SET) < 0) { - die("Seek to %d failed: %s\n", - sec->shdr.sh_offset, strerror(errno)); - } - if (fread(sec->reltab, 1, sec->shdr.sh_size, fp) - != sec->shdr.sh_size) { - die("Cannot read symbol table: %s\n", - strerror(errno)); - } - for (j = 0; j < sec->shdr.sh_size/sizeof(Elf32_Rel); j++) { - Elf32_Rel *rel = &sec->reltab[j]; - rel->r_offset = elf32_to_cpu(rel->r_offset); - rel->r_info = elf32_to_cpu(rel->r_info); - } - } -} - - -static void print_absolute_symbols(void) -{ - int i; - printf("Absolute symbols\n"); - printf(" Num: Value Size Type Bind Visibility Name\n"); - for (i = 0; i < ehdr.e_shnum; i++) { - struct section *sec = &secs[i]; - char *sym_strtab; - int j; - - if (sec->shdr.sh_type != SHT_SYMTAB) { - continue; - } - sym_strtab = sec->link->strtab; - for (j = 0; j < sec->shdr.sh_size/sizeof(Elf32_Sym); j++) { - Elf32_Sym *sym; - const char *name; - sym = &sec->symtab[j]; - name = sym_name(sym_strtab, sym); - if (sym->st_shndx != SHN_ABS) { - continue; - } - printf("%5d %08x %5d %10s %10s %12s %s\n", - j, sym->st_value, sym->st_size, - sym_type(ELF32_ST_TYPE(sym->st_info)), - sym_bind(ELF32_ST_BIND(sym->st_info)), - sym_visibility(ELF32_ST_VISIBILITY(sym->st_other)), - name); - } - } - printf("\n"); -} - -static void print_absolute_relocs(void) -{ - int i, printed = 0; - - for (i = 0; i < ehdr.e_shnum; i++) { - struct section *sec = &secs[i]; - struct section *sec_applies, *sec_symtab; - char *sym_strtab; - Elf32_Sym *sh_symtab; - int j; - if (sec->shdr.sh_type != SHT_REL) { - continue; - } - sec_symtab = sec->link; - sec_applies = &secs[sec->shdr.sh_info]; - if (!(sec_applies->shdr.sh_flags & SHF_ALLOC)) { - continue; - } - sh_symtab = sec_symtab->symtab; - sym_strtab = sec_symtab->link->strtab; - for (j = 0; j < sec->shdr.sh_size/sizeof(Elf32_Rel); j++) { - Elf32_Rel *rel; - Elf32_Sym *sym; - const char *name; - rel = &sec->reltab[j]; - sym = &sh_symtab[ELF32_R_SYM(rel->r_info)]; - name = sym_name(sym_strtab, sym); - if (sym->st_shndx != SHN_ABS) { - continue; - } - - /* Absolute symbols are not relocated if bzImage is - * loaded at a non-compiled address. Display a warning - * to user at compile time about the absolute - * relocations present. - * - * User need to audit the code to make sure - * some symbols which should have been section - * relative have not become absolute because of some - * linker optimization or wrong programming usage. - * - * Before warning check if this absolute symbol - * relocation is harmless. - */ - if (is_abs_reloc(name) || is_rel_reloc(name)) - continue; - - if (!printed) { - printf("WARNING: Absolute relocations" - " present\n"); - printf("Offset Info Type Sym.Value " - "Sym.Name\n"); - printed = 1; - } - - printf("%08x %08x %10s %08x %s\n", - rel->r_offset, - rel->r_info, - rel_type(ELF32_R_TYPE(rel->r_info)), - sym->st_value, - name); - } - } - - if (printed) - printf("\n"); -} - -static void walk_relocs(void (*visit)(Elf32_Rel *rel, Elf32_Sym *sym)) -{ - int i; - /* Walk through the relocations */ - for (i = 0; i < ehdr.e_shnum; i++) { - char *sym_strtab; - Elf32_Sym *sh_symtab; - struct section *sec_applies, *sec_symtab; - int j; - struct section *sec = &secs[i]; - - if (sec->shdr.sh_type != SHT_REL) { - continue; - } - sec_symtab = sec->link; - sec_applies = &secs[sec->shdr.sh_info]; - if (!(sec_applies->shdr.sh_flags & SHF_ALLOC)) { - continue; - } - sh_symtab = sec_symtab->symtab; - sym_strtab = sec_symtab->link->strtab; - for (j = 0; j < sec->shdr.sh_size/sizeof(Elf32_Rel); j++) { - Elf32_Rel *rel; - Elf32_Sym *sym; - unsigned r_type; - rel = &sec->reltab[j]; - sym = &sh_symtab[ELF32_R_SYM(rel->r_info)]; - r_type = ELF32_R_TYPE(rel->r_info); - /* Don't visit relocations to absolute symbols */ - if (sym->st_shndx == SHN_ABS && - !is_rel_reloc(sym_name(sym_strtab, sym))) { - continue; - } - switch (r_type) { - case R_386_NONE: - case R_386_PC32: - /* - * NONE can be ignored and and PC relative - * relocations don't need to be adjusted. - */ - break; - case R_386_32: - /* Visit relocations that need to be adjusted */ - visit(rel, sym); - break; - default: - die("Unsupported relocation type: %s (%d)\n", - rel_type(r_type), r_type); - break; - } - } - } -} - -static void count_reloc(Elf32_Rel *rel, Elf32_Sym *sym) -{ - reloc_count += 1; -} - -static void collect_reloc(Elf32_Rel *rel, Elf32_Sym *sym) -{ - /* Remember the address that needs to be adjusted. */ - relocs[reloc_idx++] = rel->r_offset; -} - -static int cmp_relocs(const void *va, const void *vb) -{ - const unsigned long *a, *b; - a = va; b = vb; - return (*a == *b)? 0 : (*a > *b)? 1 : -1; -} - -static void emit_relocs(int as_text) -{ - int i; - /* Count how many relocations I have and allocate space for them. */ - reloc_count = 0; - walk_relocs(count_reloc); - relocs = malloc(reloc_count * sizeof(relocs[0])); - if (!relocs) { - die("malloc of %d entries for relocs failed\n", - reloc_count); - } - /* Collect up the relocations */ - reloc_idx = 0; - walk_relocs(collect_reloc); - - /* Order the relocations for more efficient processing */ - qsort(relocs, reloc_count, sizeof(relocs[0]), cmp_relocs); - - /* Print the relocations */ - if (as_text) { - /* Print the relocations in a form suitable that - * gas will like. - */ - printf(".section \".data.reloc\",\"a\"\n"); - printf(".balign 4\n"); - for (i = 0; i < reloc_count; i++) { - printf("\t .long 0x%08lx\n", relocs[i]); - } - printf("\n"); - } - else { - unsigned char buf[4]; - /* Print a stop */ - fwrite("\0\0\0\0", 4, 1, stdout); - /* Now print each relocation */ - for (i = 0; i < reloc_count; i++) { - put_unaligned_le32(relocs[i], buf); - fwrite(buf, 4, 1, stdout); - } - } -} - -static void usage(void) -{ - die("relocs [--abs-syms |--abs-relocs | --text] vmlinux\n"); -} - -int main(int argc, char **argv) -{ - int show_absolute_syms, show_absolute_relocs; - int as_text; - const char *fname; - FILE *fp; - int i; - - regex_init(); - - show_absolute_syms = 0; - show_absolute_relocs = 0; - as_text = 0; - fname = NULL; - for (i = 1; i < argc; i++) { - char *arg = argv[i]; - if (*arg == '-') { - if (strcmp(argv[1], "--abs-syms") == 0) { - show_absolute_syms = 1; - continue; - } - - if (strcmp(argv[1], "--abs-relocs") == 0) { - show_absolute_relocs = 1; - continue; - } - else if (strcmp(argv[1], "--text") == 0) { - as_text = 1; - continue; - } - } - else if (!fname) { - fname = arg; - continue; - } - usage(); - } - if (!fname) { - usage(); - } - fp = fopen(fname, "r"); - if (!fp) { - die("Cannot open %s: %s\n", - fname, strerror(errno)); - } - read_ehdr(fp); - read_shdrs(fp); - read_strtabs(fp); - read_symtabs(fp); - read_relocs(fp); - if (show_absolute_syms) { - print_absolute_symbols(); - return 0; - } - if (show_absolute_relocs) { - print_absolute_relocs(); - return 0; - } - emit_relocs(as_text); - return 0; -} diff --git a/scripts/.gitignore b/scripts/.gitignore index 105b21f..68c0f32 100644 --- a/scripts/.gitignore +++ b/scripts/.gitignore @@ -9,3 +9,4 @@ unifdef ihex2fw recordmcount docproc +x86-relocs diff --git a/scripts/Makefile b/scripts/Makefile index df7678f..a241359d 100644 --- a/scripts/Makefile +++ b/scripts/Makefile @@ -8,11 +8,14 @@ # conmakehash: Create arrays for initializing the kernel console tables # docproc: Used in Documentation/DocBook +HOST_EXTRACFLAGS += -I$(srctree)/tools/include + hostprogs-$(CONFIG_KALLSYMS) += kallsyms hostprogs-$(CONFIG_LOGO) += pnmtologo hostprogs-$(CONFIG_VT) += conmakehash hostprogs-$(CONFIG_IKCONFIG) += bin2c hostprogs-$(BUILD_C_RECORDMCOUNT) += recordmcount +hostprogs-$(CONFIG_X86) += x86-relocs always := $(hostprogs-y) $(hostprogs-m) diff --git a/scripts/x86-relocs.c b/scripts/x86-relocs.c new file mode 100644 index 0000000..0291470 --- /dev/null +++ b/scripts/x86-relocs.c @@ -0,0 +1,797 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#define USE_BSD +#include +#include +#include + +static void die(char *fmt, ...); + +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) +static Elf32_Ehdr ehdr; +static unsigned long reloc_count, reloc_idx; +static unsigned long *relocs; +static unsigned long reloc16_count, reloc16_idx; +static unsigned long *relocs16; + +struct section { + Elf32_Shdr shdr; + struct section *link; + Elf32_Sym *symtab; + Elf32_Rel *reltab; + char *strtab; +}; +static struct section *secs; + +enum symtype { + S_ABS, + S_REL, + S_SEG, + S_LIN, + S_NSYMTYPES +}; + +static const char * const sym_regex_kernel[S_NSYMTYPES] = { +/* + * Following symbols have been audited. There values are constant and do + * not change if bzImage is loaded at a different physical address than + * the address for which it has been compiled. Don't warn user about + * absolute relocations present w.r.t these symbols. + */ + [S_ABS] = + "^(xen_irq_disable_direct_reloc$|" + "xen_save_fl_direct_reloc$|" + "VDSO|" + "__crc_)", + +/* + * These symbols are known to be relative, even if the linker marks them + * as absolute (typically defined outside any section in the linker script.) + */ + [S_REL] = + "^_end$", +}; + + +static const char * const sym_regex_realmode[S_NSYMTYPES] = { +/* + * These are 16-bit segment symbols when compiling 16-bit code. + */ + [S_SEG] = + "^real_mode_seg$", + +/* + * These are offsets belonging to segments, as opposed to linear addresses, + * when compiling 16-bit code. + */ + [S_LIN] = + "^pa_", +}; + +static const char * const *sym_regex; + +static regex_t sym_regex_c[S_NSYMTYPES]; +static int is_reloc(enum symtype type, const char *sym_name) +{ + return sym_regex[type] && + !regexec(&sym_regex_c[type], sym_name, 0, NULL, 0); +} + +static void regex_init(int use_real_mode) +{ + char errbuf[128]; + int err; + int i; + + if (use_real_mode) + sym_regex = sym_regex_realmode; + else + sym_regex = sym_regex_kernel; + + for (i = 0; i < S_NSYMTYPES; i++) { + if (!sym_regex[i]) + continue; + + err = regcomp(&sym_regex_c[i], sym_regex[i], + REG_EXTENDED|REG_NOSUB); + + if (err) { + regerror(err, &sym_regex_c[i], errbuf, sizeof errbuf); + die("%s", errbuf); + } + } +} + +static void die(char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + exit(1); +} + +static const char *sym_type(unsigned type) +{ + static const char *type_name[] = { +#define SYM_TYPE(X) [X] = #X + SYM_TYPE(STT_NOTYPE), + SYM_TYPE(STT_OBJECT), + SYM_TYPE(STT_FUNC), + SYM_TYPE(STT_SECTION), + SYM_TYPE(STT_FILE), + SYM_TYPE(STT_COMMON), + SYM_TYPE(STT_TLS), +#undef SYM_TYPE + }; + const char *name = "unknown sym type name"; + if (type < ARRAY_SIZE(type_name)) { + name = type_name[type]; + } + return name; +} + +static const char *sym_bind(unsigned bind) +{ + static const char *bind_name[] = { +#define SYM_BIND(X) [X] = #X + SYM_BIND(STB_LOCAL), + SYM_BIND(STB_GLOBAL), + SYM_BIND(STB_WEAK), +#undef SYM_BIND + }; + const char *name = "unknown sym bind name"; + if (bind < ARRAY_SIZE(bind_name)) { + name = bind_name[bind]; + } + return name; +} + +static const char *sym_visibility(unsigned visibility) +{ + static const char *visibility_name[] = { +#define SYM_VISIBILITY(X) [X] = #X + SYM_VISIBILITY(STV_DEFAULT), + SYM_VISIBILITY(STV_INTERNAL), + SYM_VISIBILITY(STV_HIDDEN), + SYM_VISIBILITY(STV_PROTECTED), +#undef SYM_VISIBILITY + }; + const char *name = "unknown sym visibility name"; + if (visibility < ARRAY_SIZE(visibility_name)) { + name = visibility_name[visibility]; + } + return name; +} + +static const char *rel_type(unsigned type) +{ + static const char *type_name[] = { +#define REL_TYPE(X) [X] = #X + REL_TYPE(R_386_NONE), + REL_TYPE(R_386_32), + REL_TYPE(R_386_PC32), + REL_TYPE(R_386_GOT32), + REL_TYPE(R_386_PLT32), + REL_TYPE(R_386_COPY), + REL_TYPE(R_386_GLOB_DAT), + REL_TYPE(R_386_JMP_SLOT), + REL_TYPE(R_386_RELATIVE), + REL_TYPE(R_386_GOTOFF), + REL_TYPE(R_386_GOTPC), + REL_TYPE(R_386_8), + REL_TYPE(R_386_PC8), + REL_TYPE(R_386_16), + REL_TYPE(R_386_PC16), +#undef REL_TYPE + }; + const char *name = "unknown type rel type name"; + if (type < ARRAY_SIZE(type_name) && type_name[type]) { + name = type_name[type]; + } + return name; +} + +static const char *sec_name(unsigned shndx) +{ + const char *sec_strtab; + const char *name; + sec_strtab = secs[ehdr.e_shstrndx].strtab; + name = ""; + if (shndx < ehdr.e_shnum) { + name = sec_strtab + secs[shndx].shdr.sh_name; + } + else if (shndx == SHN_ABS) { + name = "ABSOLUTE"; + } + else if (shndx == SHN_COMMON) { + name = "COMMON"; + } + return name; +} + +static const char *sym_name(const char *sym_strtab, Elf32_Sym *sym) +{ + const char *name; + name = ""; + if (sym->st_name) { + name = sym_strtab + sym->st_name; + } + else { + name = sec_name(sym->st_shndx); + } + return name; +} + + + +#if BYTE_ORDER == LITTLE_ENDIAN +#define le16_to_cpu(val) (val) +#define le32_to_cpu(val) (val) +#endif +#if BYTE_ORDER == BIG_ENDIAN +#define le16_to_cpu(val) bswap_16(val) +#define le32_to_cpu(val) bswap_32(val) +#endif + +static uint16_t elf16_to_cpu(uint16_t val) +{ + return le16_to_cpu(val); +} + +static uint32_t elf32_to_cpu(uint32_t val) +{ + return le32_to_cpu(val); +} + +static void read_ehdr(FILE *fp) +{ + if (fread(&ehdr, sizeof(ehdr), 1, fp) != 1) { + die("Cannot read ELF header: %s\n", + strerror(errno)); + } + if (memcmp(ehdr.e_ident, ELFMAG, SELFMAG) != 0) { + die("No ELF magic\n"); + } + if (ehdr.e_ident[EI_CLASS] != ELFCLASS32) { + die("Not a 32 bit executable\n"); + } + if (ehdr.e_ident[EI_DATA] != ELFDATA2LSB) { + die("Not a LSB ELF executable\n"); + } + if (ehdr.e_ident[EI_VERSION] != EV_CURRENT) { + die("Unknown ELF version\n"); + } + /* Convert the fields to native endian */ + ehdr.e_type = elf16_to_cpu(ehdr.e_type); + ehdr.e_machine = elf16_to_cpu(ehdr.e_machine); + ehdr.e_version = elf32_to_cpu(ehdr.e_version); + ehdr.e_entry = elf32_to_cpu(ehdr.e_entry); + ehdr.e_phoff = elf32_to_cpu(ehdr.e_phoff); + ehdr.e_shoff = elf32_to_cpu(ehdr.e_shoff); + ehdr.e_flags = elf32_to_cpu(ehdr.e_flags); + ehdr.e_ehsize = elf16_to_cpu(ehdr.e_ehsize); + ehdr.e_phentsize = elf16_to_cpu(ehdr.e_phentsize); + ehdr.e_phnum = elf16_to_cpu(ehdr.e_phnum); + ehdr.e_shentsize = elf16_to_cpu(ehdr.e_shentsize); + ehdr.e_shnum = elf16_to_cpu(ehdr.e_shnum); + ehdr.e_shstrndx = elf16_to_cpu(ehdr.e_shstrndx); + + if ((ehdr.e_type != ET_EXEC) && (ehdr.e_type != ET_DYN)) { + die("Unsupported ELF header type\n"); + } + if (ehdr.e_machine != EM_386) { + die("Not for x86\n"); + } + if (ehdr.e_version != EV_CURRENT) { + die("Unknown ELF version\n"); + } + if (ehdr.e_ehsize != sizeof(Elf32_Ehdr)) { + die("Bad Elf header size\n"); + } + if (ehdr.e_phentsize != sizeof(Elf32_Phdr)) { + die("Bad program header entry\n"); + } + if (ehdr.e_shentsize != sizeof(Elf32_Shdr)) { + die("Bad section header entry\n"); + } + if (ehdr.e_shstrndx >= ehdr.e_shnum) { + die("String table index out of bounds\n"); + } +} + +static void read_shdrs(FILE *fp) +{ + int i; + Elf32_Shdr shdr; + + secs = calloc(ehdr.e_shnum, sizeof(struct section)); + if (!secs) { + die("Unable to allocate %d section headers\n", + ehdr.e_shnum); + } + if (fseek(fp, ehdr.e_shoff, SEEK_SET) < 0) { + die("Seek to %d failed: %s\n", + ehdr.e_shoff, strerror(errno)); + } + for (i = 0; i < ehdr.e_shnum; i++) { + struct section *sec = &secs[i]; + if (fread(&shdr, sizeof shdr, 1, fp) != 1) + die("Cannot read ELF section headers %d/%d: %s\n", + i, ehdr.e_shnum, strerror(errno)); + sec->shdr.sh_name = elf32_to_cpu(shdr.sh_name); + sec->shdr.sh_type = elf32_to_cpu(shdr.sh_type); + sec->shdr.sh_flags = elf32_to_cpu(shdr.sh_flags); + sec->shdr.sh_addr = elf32_to_cpu(shdr.sh_addr); + sec->shdr.sh_offset = elf32_to_cpu(shdr.sh_offset); + sec->shdr.sh_size = elf32_to_cpu(shdr.sh_size); + sec->shdr.sh_link = elf32_to_cpu(shdr.sh_link); + sec->shdr.sh_info = elf32_to_cpu(shdr.sh_info); + sec->shdr.sh_addralign = elf32_to_cpu(shdr.sh_addralign); + sec->shdr.sh_entsize = elf32_to_cpu(shdr.sh_entsize); + if (sec->shdr.sh_link < ehdr.e_shnum) + sec->link = &secs[sec->shdr.sh_link]; + } + +} + +static void read_strtabs(FILE *fp) +{ + int i; + for (i = 0; i < ehdr.e_shnum; i++) { + struct section *sec = &secs[i]; + if (sec->shdr.sh_type != SHT_STRTAB) { + continue; + } + sec->strtab = malloc(sec->shdr.sh_size); + if (!sec->strtab) { + die("malloc of %d bytes for strtab failed\n", + sec->shdr.sh_size); + } + if (fseek(fp, sec->shdr.sh_offset, SEEK_SET) < 0) { + die("Seek to %d failed: %s\n", + sec->shdr.sh_offset, strerror(errno)); + } + if (fread(sec->strtab, 1, sec->shdr.sh_size, fp) + != sec->shdr.sh_size) { + die("Cannot read symbol table: %s\n", + strerror(errno)); + } + } +} + +static void read_symtabs(FILE *fp) +{ + int i,j; + for (i = 0; i < ehdr.e_shnum; i++) { + struct section *sec = &secs[i]; + if (sec->shdr.sh_type != SHT_SYMTAB) { + continue; + } + sec->symtab = malloc(sec->shdr.sh_size); + if (!sec->symtab) { + die("malloc of %d bytes for symtab failed\n", + sec->shdr.sh_size); + } + if (fseek(fp, sec->shdr.sh_offset, SEEK_SET) < 0) { + die("Seek to %d failed: %s\n", + sec->shdr.sh_offset, strerror(errno)); + } + if (fread(sec->symtab, 1, sec->shdr.sh_size, fp) + != sec->shdr.sh_size) { + die("Cannot read symbol table: %s\n", + strerror(errno)); + } + for (j = 0; j < sec->shdr.sh_size/sizeof(Elf32_Sym); j++) { + Elf32_Sym *sym = &sec->symtab[j]; + sym->st_name = elf32_to_cpu(sym->st_name); + sym->st_value = elf32_to_cpu(sym->st_value); + sym->st_size = elf32_to_cpu(sym->st_size); + sym->st_shndx = elf16_to_cpu(sym->st_shndx); + } + } +} + + +static void read_relocs(FILE *fp) +{ + int i,j; + for (i = 0; i < ehdr.e_shnum; i++) { + struct section *sec = &secs[i]; + if (sec->shdr.sh_type != SHT_REL) { + continue; + } + sec->reltab = malloc(sec->shdr.sh_size); + if (!sec->reltab) { + die("malloc of %d bytes for relocs failed\n", + sec->shdr.sh_size); + } + if (fseek(fp, sec->shdr.sh_offset, SEEK_SET) < 0) { + die("Seek to %d failed: %s\n", + sec->shdr.sh_offset, strerror(errno)); + } + if (fread(sec->reltab, 1, sec->shdr.sh_size, fp) + != sec->shdr.sh_size) { + die("Cannot read symbol table: %s\n", + strerror(errno)); + } + for (j = 0; j < sec->shdr.sh_size/sizeof(Elf32_Rel); j++) { + Elf32_Rel *rel = &sec->reltab[j]; + rel->r_offset = elf32_to_cpu(rel->r_offset); + rel->r_info = elf32_to_cpu(rel->r_info); + } + } +} + + +static void print_absolute_symbols(void) +{ + int i; + printf("Absolute symbols\n"); + printf(" Num: Value Size Type Bind Visibility Name\n"); + for (i = 0; i < ehdr.e_shnum; i++) { + struct section *sec = &secs[i]; + char *sym_strtab; + int j; + + if (sec->shdr.sh_type != SHT_SYMTAB) { + continue; + } + sym_strtab = sec->link->strtab; + for (j = 0; j < sec->shdr.sh_size/sizeof(Elf32_Sym); j++) { + Elf32_Sym *sym; + const char *name; + sym = &sec->symtab[j]; + name = sym_name(sym_strtab, sym); + if (sym->st_shndx != SHN_ABS) { + continue; + } + printf("%5d %08x %5d %10s %10s %12s %s\n", + j, sym->st_value, sym->st_size, + sym_type(ELF32_ST_TYPE(sym->st_info)), + sym_bind(ELF32_ST_BIND(sym->st_info)), + sym_visibility(ELF32_ST_VISIBILITY(sym->st_other)), + name); + } + } + printf("\n"); +} + +static void print_absolute_relocs(void) +{ + int i, printed = 0; + + for (i = 0; i < ehdr.e_shnum; i++) { + struct section *sec = &secs[i]; + struct section *sec_applies, *sec_symtab; + char *sym_strtab; + Elf32_Sym *sh_symtab; + int j; + if (sec->shdr.sh_type != SHT_REL) { + continue; + } + sec_symtab = sec->link; + sec_applies = &secs[sec->shdr.sh_info]; + if (!(sec_applies->shdr.sh_flags & SHF_ALLOC)) { + continue; + } + sh_symtab = sec_symtab->symtab; + sym_strtab = sec_symtab->link->strtab; + for (j = 0; j < sec->shdr.sh_size/sizeof(Elf32_Rel); j++) { + Elf32_Rel *rel; + Elf32_Sym *sym; + const char *name; + rel = &sec->reltab[j]; + sym = &sh_symtab[ELF32_R_SYM(rel->r_info)]; + name = sym_name(sym_strtab, sym); + if (sym->st_shndx != SHN_ABS) { + continue; + } + + /* Absolute symbols are not relocated if bzImage is + * loaded at a non-compiled address. Display a warning + * to user at compile time about the absolute + * relocations present. + * + * User need to audit the code to make sure + * some symbols which should have been section + * relative have not become absolute because of some + * linker optimization or wrong programming usage. + * + * Before warning check if this absolute symbol + * relocation is harmless. + */ + if (is_reloc(S_ABS, name) || is_reloc(S_REL, name)) + continue; + + if (!printed) { + printf("WARNING: Absolute relocations" + " present\n"); + printf("Offset Info Type Sym.Value " + "Sym.Name\n"); + printed = 1; + } + + printf("%08x %08x %10s %08x %s\n", + rel->r_offset, + rel->r_info, + rel_type(ELF32_R_TYPE(rel->r_info)), + sym->st_value, + name); + } + } + + if (printed) + printf("\n"); +} + +static void walk_relocs(void (*visit)(Elf32_Rel *rel, Elf32_Sym *sym), + int use_real_mode) +{ + int i; + /* Walk through the relocations */ + for (i = 0; i < ehdr.e_shnum; i++) { + char *sym_strtab; + Elf32_Sym *sh_symtab; + struct section *sec_applies, *sec_symtab; + int j; + struct section *sec = &secs[i]; + + if (sec->shdr.sh_type != SHT_REL) { + continue; + } + sec_symtab = sec->link; + sec_applies = &secs[sec->shdr.sh_info]; + if (!(sec_applies->shdr.sh_flags & SHF_ALLOC)) { + continue; + } + sh_symtab = sec_symtab->symtab; + sym_strtab = sec_symtab->link->strtab; + for (j = 0; j < sec->shdr.sh_size/sizeof(Elf32_Rel); j++) { + Elf32_Rel *rel; + Elf32_Sym *sym; + unsigned r_type; + const char *symname; + rel = &sec->reltab[j]; + sym = &sh_symtab[ELF32_R_SYM(rel->r_info)]; + r_type = ELF32_R_TYPE(rel->r_info); + + switch (r_type) { + case R_386_NONE: + case R_386_PC32: + case R_386_PC16: + case R_386_PC8: + /* + * NONE can be ignored and and PC relative + * relocations don't need to be adjusted. + */ + break; + + case R_386_16: + symname = sym_name(sym_strtab, sym); + if (!use_real_mode) + goto bad; + if (sym->st_shndx == SHN_ABS) { + if (is_reloc(S_ABS, symname)) + break; + else if (!is_reloc(S_SEG, symname)) + goto bad; + } else { + if (is_reloc(S_LIN, symname)) + goto bad; + else + break; + } + visit(rel, sym); + break; + + case R_386_32: + symname = sym_name(sym_strtab, sym); + if (sym->st_shndx == SHN_ABS) { + if (is_reloc(S_ABS, symname)) + break; + else if (!is_reloc(S_REL, symname)) + goto bad; + } else { + if (use_real_mode && + !is_reloc(S_LIN, symname)) + break; + } + visit(rel, sym); + break; + default: + die("Unsupported relocation type: %s (%d)\n", + rel_type(r_type), r_type); + break; + bad: + symname = sym_name(sym_strtab, sym); + die("Invalid %s relocation: %s\n", + rel_type(r_type), symname); + } + } + } +} + +static void count_reloc(Elf32_Rel *rel, Elf32_Sym *sym) +{ + if (ELF32_R_TYPE(rel->r_info) == R_386_16) + reloc16_count++; + else + reloc_count++; +} + +static void collect_reloc(Elf32_Rel *rel, Elf32_Sym *sym) +{ + /* Remember the address that needs to be adjusted. */ + if (ELF32_R_TYPE(rel->r_info) == R_386_16) + relocs16[reloc16_idx++] = rel->r_offset; + else + relocs[reloc_idx++] = rel->r_offset; +} + +static int cmp_relocs(const void *va, const void *vb) +{ + const unsigned long *a, *b; + a = va; b = vb; + return (*a == *b)? 0 : (*a > *b)? 1 : -1; +} + +static int write32(unsigned int v, FILE *f) +{ + unsigned char buf[4]; + + put_unaligned_le32(v, buf); + return fwrite(buf, 1, 4, f) == 4 ? 0 : -1; +} + +static void emit_relocs(int as_text, int use_real_mode) +{ + int i; + /* Count how many relocations I have and allocate space for them. */ + reloc_count = 0; + walk_relocs(count_reloc, use_real_mode); + relocs = malloc(reloc_count * sizeof(relocs[0])); + if (!relocs) { + die("malloc of %d entries for relocs failed\n", + reloc_count); + } + + relocs16 = malloc(reloc16_count * sizeof(relocs[0])); + if (!relocs16) { + die("malloc of %d entries for relocs16 failed\n", + reloc16_count); + } + /* Collect up the relocations */ + reloc_idx = 0; + walk_relocs(collect_reloc, use_real_mode); + + if (reloc16_count && !use_real_mode) + die("Segment relocations found but --realmode not specified\n"); + + /* Order the relocations for more efficient processing */ + qsort(relocs, reloc_count, sizeof(relocs[0]), cmp_relocs); + qsort(relocs16, reloc16_count, sizeof(relocs16[0]), cmp_relocs); + + /* Print the relocations */ + if (as_text) { + /* Print the relocations in a form suitable that + * gas will like. + */ + printf(".section \".data.reloc\",\"a\"\n"); + printf(".balign 4\n"); + if (use_real_mode) { + printf("\t.long %lu\n", reloc16_count); + for (i = 0; i < reloc16_count; i++) + printf("\t.long 0x%08lx\n", relocs16[i]); + printf("\t.long %lu\n", reloc_count); + for (i = 0; i < reloc_count; i++) { + printf("\t.long 0x%08lx\n", relocs[i]); + } + } else { + /* Print a stop */ + printf("\t.long 0x%08lx\n", (unsigned long)0); + for (i = 0; i < reloc_count; i++) { + printf("\t.long 0x%08lx\n", relocs[i]); + } + } + + printf("\n"); + } + else { + if (use_real_mode) { + write32(reloc16_count, stdout); + for (i = 0; i < reloc16_count; i++) + write32(relocs16[i], stdout); + write32(reloc_count, stdout); + + /* Now print each relocation */ + for (i = 0; i < reloc_count; i++) + write32(relocs[i], stdout); + } else { + /* Print a stop */ + write32(0, stdout); + + /* Now print each relocation */ + for (i = 0; i < reloc_count; i++) { + write32(relocs[i], stdout); + } + } + } +} + +static void usage(void) +{ + die("relocs [--abs-syms|--abs-relocs|--text|--realmode] vmlinux\n"); +} + +int main(int argc, char **argv) +{ + int show_absolute_syms, show_absolute_relocs; + int as_text, use_real_mode; + const char *fname; + FILE *fp; + int i; + + show_absolute_syms = 0; + show_absolute_relocs = 0; + as_text = 0; + use_real_mode = 0; + fname = NULL; + for (i = 1; i < argc; i++) { + char *arg = argv[i]; + if (*arg == '-') { + if (strcmp(arg, "--abs-syms") == 0) { + show_absolute_syms = 1; + continue; + } + if (strcmp(arg, "--abs-relocs") == 0) { + show_absolute_relocs = 1; + continue; + } + if (strcmp(arg, "--text") == 0) { + as_text = 1; + continue; + } + if (strcmp(arg, "--realmode") == 0) { + use_real_mode = 1; + continue; + } + } + else if (!fname) { + fname = arg; + continue; + } + usage(); + } + if (!fname) { + usage(); + } + regex_init(use_real_mode); + fp = fopen(fname, "r"); + if (!fp) { + die("Cannot open %s: %s\n", + fname, strerror(errno)); + } + read_ehdr(fp); + read_shdrs(fp); + read_strtabs(fp); + read_symtabs(fp); + read_relocs(fp); + if (show_absolute_syms) { + print_absolute_symbols(); + return 0; + } + if (show_absolute_relocs) { + print_absolute_relocs(); + return 0; + } + emit_relocs(as_text, use_real_mode); + return 0; +} -- cgit v0.10.2 From b3266bd6ff52efb9e57c7fbfff4c8f7363dfaab3 Mon Sep 17 00:00:00 2001 From: Jarkko Sakkinen Date: Tue, 8 May 2012 21:22:25 +0300 Subject: x86, realmode: realmode.bin infrastructure Create realmode.bin and realmode.relocs files. Piggy pack them into relocatable object that will be included into .init.data section of the main kernel image. The first file includes binary image of the real-mode code. The latter file includes all relocations. The layout of the binary image is specified in realmode.lds.S. The makefile generates pa_ prefixed symbols for each exported global. These are used in 32-bit code and in realmode header to define symbols that need to be relocated. Signed-off-by: Jarkko Sakkinen Link: http://lkml.kernel.org/r/1336501366-28617-3-git-send-email-jarkko.sakkinen@intel.com Originally-by: H. Peter Anvin Signed-off-by: H. Peter Anvin diff --git a/arch/x86/Kbuild b/arch/x86/Kbuild index 0e9dec6..e5287d8 100644 --- a/arch/x86/Kbuild +++ b/arch/x86/Kbuild @@ -1,4 +1,3 @@ - obj-$(CONFIG_KVM) += kvm/ # Xen paravirtualization support @@ -7,6 +6,7 @@ obj-$(CONFIG_XEN) += xen/ # lguest paravirtualization support obj-$(CONFIG_LGUEST_GUEST) += lguest/ +obj-y += realmode/ obj-y += kernel/ obj-y += mm/ diff --git a/arch/x86/realmode/Makefile b/arch/x86/realmode/Makefile new file mode 100644 index 0000000..f22a4f8 --- /dev/null +++ b/arch/x86/realmode/Makefile @@ -0,0 +1,20 @@ +# +# arch/x86/realmode/Makefile +# +# This file is subject to the terms and conditions of the GNU General Public +# License. See the file "COPYING" in the main directory of this archive +# for more details. +# +# + +subdir- := rm + +obj-y += rmpiggy.o + +$(obj)/rmpiggy.o: $(obj)/rm/realmode.relocs $(obj)/rm/realmode.bin + +$(obj)/rm/realmode.bin: FORCE + $(Q)$(MAKE) $(build)=$(obj)/rm $@ + +$(obj)/rm/realmode.relocs: FORCE + $(Q)$(MAKE) $(build)=$(obj)/rm $@ diff --git a/arch/x86/realmode/rm/.gitignore b/arch/x86/realmode/rm/.gitignore new file mode 100644 index 0000000..b6ed3a2 --- /dev/null +++ b/arch/x86/realmode/rm/.gitignore @@ -0,0 +1,3 @@ +pasyms.h +realmode.lds +realmode.relocs diff --git a/arch/x86/realmode/rm/Makefile b/arch/x86/realmode/rm/Makefile new file mode 100644 index 0000000..7c3f202 --- /dev/null +++ b/arch/x86/realmode/rm/Makefile @@ -0,0 +1,63 @@ +# +# arch/x86/realmode/Makefile +# +# This file is subject to the terms and conditions of the GNU General Public +# License. See the file "COPYING" in the main directory of this archive +# for more details. +# +# + +subdir- := wakeup + +always := realmode.bin + +realmode-y += header.o + +targets += $(realmode-y) + +REALMODE_OBJS = $(addprefix $(obj)/,$(realmode-y)) + +sed-pasyms := -n -r -e 's/^([0-9a-fA-F]+) [ABCDGRSTVW] (.+)$$/pa_\2 = \2;/p' + +quiet_cmd_pasyms = PASYMS $@ + cmd_pasyms = $(NM) $(filter-out FORCE,$^) | \ + sed $(sed-pasyms) | sort | uniq > $@ + +$(obj)/pasyms.h: $(REALMODE_OBJS) FORCE + $(call if_changed,pasyms) + +$(obj)/realmode.lds: $(obj)/pasyms.h + +LDFLAGS_realmode.elf := --emit-relocs -T +CPPFLAGS_realmode.lds += -P -C -I$(obj) + +$(obj)/realmode.elf: $(obj)/realmode.lds $(REALMODE_OBJS) FORCE + $(call if_changed,ld) + +OBJCOPYFLAGS_realmode.bin := -O binary + +$(obj)/realmode.bin: $(obj)/realmode.elf + $(call if_changed,objcopy) + +quiet_cmd_relocs = RELOCS $@ + cmd_relocs = scripts/x86-relocs --realmode $< > $@ +$(obj)/realmode.relocs: $(obj)/realmode.elf FORCE + $(call if_changed,relocs) + +# --------------------------------------------------------------------------- + +# How to compile the 16-bit code. Note we always compile for -march=i386, +# that way we can complain to the user if the CPU is insufficient. +KBUILD_CFLAGS := $(LINUXINCLUDE) -m32 -g -Os -D_SETUP -D__KERNEL__ \ + -DDISABLE_BRANCH_PROFILING \ + -Wall -Wstrict-prototypes \ + -march=i386 -mregparm=3 \ + -include $(srctree)/$(src)/../../boot/code16gcc.h \ + -fno-strict-aliasing -fomit-frame-pointer \ + $(call cc-option, -ffreestanding) \ + $(call cc-option, -fno-toplevel-reorder,\ + $(call cc-option, -fno-unit-at-a-time)) \ + $(call cc-option, -fno-stack-protector) \ + $(call cc-option, -mpreferred-stack-boundary=2) +KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__ +GCOV_PROFILE := n diff --git a/arch/x86/realmode/rm/header.S b/arch/x86/realmode/rm/header.S new file mode 100644 index 0000000..7be17f2 --- /dev/null +++ b/arch/x86/realmode/rm/header.S @@ -0,0 +1,16 @@ +/* + * Real-mode blob header; this should match realmode.h and be + * readonly; for mutable data instead add pointers into the .data + * or .bss sections as appropriate. + */ + +#include +#include + + .section ".header", "a" + +ENTRY(real_mode_header) + .long pa_text_start + .long pa_ro_end + .long pa_end +END(real_mode_header) diff --git a/arch/x86/realmode/rm/realmode.lds.S b/arch/x86/realmode/rm/realmode.lds.S new file mode 100644 index 0000000..c5b8a4f --- /dev/null +++ b/arch/x86/realmode/rm/realmode.lds.S @@ -0,0 +1,68 @@ +/* + * realmode.lds.S + * + * Linker script for the real-mode code + */ + +#include + +#undef i386 + +OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386") +OUTPUT_ARCH(i386) + +SECTIONS +{ + real_mode_seg = 0; + + . = 0; + .header : { + pa_real_mode_base = .; + *(.header) + } + + . = ALIGN(4); + .rodata : { + *(.rodata) + *(.rodata.*) + } + + . = ALIGN(PAGE_SIZE); + .text : { + pa_text_start = .; + *(.text) + *(.text.*) + } + + .text32 : { + *(.text32) + *(.text32.*) + pa_ro_end = .; + } + + . = ALIGN(PAGE_SIZE); + .data : { + *(.data) + *(.data.*) + } + + . = ALIGN(128); + .bss : { + *(.bss*) + } + + /* End signature for integrity checking */ + . = ALIGN(4); + .signature : { + *(.signature) + pa_end = .; + } + + /DISCARD/ : { + *(.note*) + *(.debug*) + *(.eh_frame*) + } + +#include "pasyms.h" +} diff --git a/arch/x86/realmode/rmpiggy.S b/arch/x86/realmode/rmpiggy.S new file mode 100644 index 0000000..6047d7f --- /dev/null +++ b/arch/x86/realmode/rmpiggy.S @@ -0,0 +1,18 @@ +/* + * Wrapper script for the realmode binary as a transport object + * before copying to low memory. + */ +#include +#include + + .section ".init.data","aw" + + .balign PAGE_SIZE + +ENTRY(real_mode_blob) + .incbin "arch/x86/realmode/rm/realmode.bin" +END(real_mode_blob) + +ENTRY(real_mode_relocs) + .incbin "arch/x86/realmode/rm/realmode.relocs" +END(real_mode_relocs) -- cgit v0.10.2 From 084ee1c641a068bfd1194d545f7dc9ab2043eb35 Mon Sep 17 00:00:00 2001 From: Jarkko Sakkinen Date: Tue, 8 May 2012 21:22:26 +0300 Subject: x86, realmode: Relocator for realmode code Implements relocator for real mode code that is called as part of setup_arch(). Processes segment relocations and linear relocations. Real-mode code is relocated to a free hole below 1 MB. Signed-off-by: Jarkko Sakkinen Link: http://lkml.kernel.org/r/1336501366-28617-4-git-send-email-jarkko.sakkinen@intel.com Signed-off-by: H. Peter Anvin diff --git a/arch/x86/include/asm/realmode.h b/arch/x86/include/asm/realmode.h new file mode 100644 index 0000000..dc1bba5 --- /dev/null +++ b/arch/x86/include/asm/realmode.h @@ -0,0 +1,26 @@ +#ifndef _ARCH_X86_REALMODE_H +#define _ARCH_X86_REALMODE_H + +#include +#include + +/* This must match data at realmode.S */ +struct real_mode_header { + u32 text_start; + u32 ro_end; + u32 end; +} __attribute__((__packed__)); + +extern struct real_mode_header real_mode_header; +extern unsigned char *real_mode_base; + +extern unsigned long init_rsp; +extern unsigned long initial_code; +extern unsigned long initial_gs; + +extern unsigned char real_mode_blob[]; +extern unsigned char real_mode_relocs[]; + +extern void __init setup_real_mode(void); + +#endif /* _ARCH_X86_REALMODE_H */ diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 532d2e0..f9e19d4 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -36,6 +36,7 @@ obj-y += pci-iommu_table.o obj-y += resource.o obj-y += trampoline.o trampoline_$(BITS).o +obj-y += realmode.o obj-y += process.o obj-y += i387.o xsave.o obj-y += ptrace.o diff --git a/arch/x86/kernel/realmode.c b/arch/x86/kernel/realmode.c new file mode 100644 index 0000000..7415c42 --- /dev/null +++ b/arch/x86/kernel/realmode.c @@ -0,0 +1,79 @@ +#include +#include + +#include +#include +#include + +unsigned char *real_mode_base; +struct real_mode_header real_mode_header; + +void __init setup_real_mode(void) +{ + phys_addr_t mem; + u16 real_mode_seg; + u32 *rel; + u32 count; + u32 *ptr; + u16 *seg; + int i; + + struct real_mode_header *header = + (struct real_mode_header *) real_mode_blob; + + size_t size = PAGE_ALIGN(header->end); + + /* Has to be in very low memory so we can execute real-mode AP code. */ + mem = memblock_find_in_range(0, 1<<20, size, PAGE_SIZE); + if (!mem) + panic("Cannot allocate trampoline\n"); + + real_mode_base = __va(mem); + memblock_reserve(mem, size); + + printk(KERN_DEBUG "Base memory trampoline at [%p] %llx size %zu\n", + real_mode_base, (unsigned long long)mem, size); + + memcpy(real_mode_base, real_mode_blob, size); + + real_mode_seg = __pa(real_mode_base) >> 4; + rel = (u32 *) real_mode_relocs; + + /* 16-bit segment relocations. */ + count = rel[0]; + rel = &rel[1]; + for (i = 0; i < count; i++) { + seg = (u16 *) (real_mode_base + rel[i]); + *seg = real_mode_seg; + } + + /* 32-bit linear relocations. */ + count = rel[i]; + rel = &rel[i + 1]; + for (i = 0; i < count; i++) { + ptr = (u32 *) (real_mode_base + rel[i]); + *ptr += __pa(real_mode_base); + } + + /* Copied header will contain relocated physical addresses. */ + memcpy(&real_mode_header, real_mode_base, + sizeof(struct real_mode_header)); +} + +/* + * set_real_mode_permissions() gets called very early, to guarantee the + * availability of low memory. This is before the proper kernel page + * tables are set up, so we cannot set page permissions in that + * function. Thus, we use an arch_initcall instead. + */ +static int __init set_real_mode_permissions(void) +{ + size_t all_size = + PAGE_ALIGN(real_mode_header.end) - + __pa(real_mode_base); + + set_memory_x((unsigned long) real_mode_base, all_size >> PAGE_SHIFT); + return 0; +} + +arch_initcall(set_real_mode_permissions); diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 1a29015..56e4124 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -74,6 +74,7 @@ #include #include #include +#include #include #include #include @@ -918,6 +919,7 @@ void __init setup_arch(char **cmdline_p) max_pfn_mapped< Date: Tue, 8 May 2012 21:22:27 +0300 Subject: x86, realmode: Move reboot_32.S to unified realmode code Migrated reboot_32.S from x86_trampoline to the real-mode blob. Signed-off-by: Jarkko Sakkinen Link: http://lkml.kernel.org/r/1336501366-28617-5-git-send-email-jarkko.sakkinen@intel.com Signed-off-by: H. Peter Anvin diff --git a/arch/x86/include/asm/realmode.h b/arch/x86/include/asm/realmode.h index dc1bba5..bf26b06 100644 --- a/arch/x86/include/asm/realmode.h +++ b/arch/x86/include/asm/realmode.h @@ -9,6 +9,10 @@ struct real_mode_header { u32 text_start; u32 ro_end; u32 end; + /* reboot */ +#ifdef CONFIG_X86_32 + u32 machine_real_restart_asm; +#endif } __attribute__((__packed__)); extern struct real_mode_header real_mode_header; diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index f9e19d4..b71ef35 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -49,7 +49,6 @@ obj-$(CONFIG_STACKTRACE) += stacktrace.o obj-y += cpu/ obj-y += acpi/ obj-y += reboot.o -obj-$(CONFIG_X86_32) += reboot_32.o obj-$(CONFIG_MCA) += mca_32.o obj-$(CONFIG_X86_MSR) += msr.o obj-$(CONFIG_X86_CPUID) += cpuid.o diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index d840e69..050eff2 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -24,6 +24,7 @@ #ifdef CONFIG_X86_32 # include # include +# include #else # include #endif @@ -332,15 +333,10 @@ static int __init reboot_init(void) } core_initcall(reboot_init); -extern const unsigned char machine_real_restart_asm[]; -extern const u64 machine_real_restart_gdt[3]; - void machine_real_restart(unsigned int type) { - void *restart_va; - unsigned long restart_pa; - void (*restart_lowmem)(unsigned int); - u64 *lowmem_gdt; + void (*restart_lowmem)(unsigned int) = (void (*)(unsigned int)) + real_mode_header.machine_real_restart_asm; local_irq_disable(); @@ -369,21 +365,6 @@ void machine_real_restart(unsigned int type) too. */ *((unsigned short *)0x472) = reboot_mode; - /* Patch the GDT in the low memory trampoline */ - lowmem_gdt = TRAMPOLINE_SYM(machine_real_restart_gdt); - - restart_va = TRAMPOLINE_SYM(machine_real_restart_asm); - restart_pa = virt_to_phys(restart_va); - restart_lowmem = (void (*)(unsigned int))restart_pa; - - /* GDT[0]: GDT self-pointer */ - lowmem_gdt[0] = - (u64)(sizeof(machine_real_restart_gdt) - 1) + - ((u64)virt_to_phys(lowmem_gdt) << 16); - /* GDT[1]: 64K real mode code segment */ - lowmem_gdt[1] = - GDT_ENTRY(0x009b, restart_pa, 0xffff); - /* Jump to the identity-mapped low memory code */ restart_lowmem(type); } diff --git a/arch/x86/kernel/reboot_32.S b/arch/x86/kernel/reboot_32.S deleted file mode 100644 index 1d5c46d..0000000 --- a/arch/x86/kernel/reboot_32.S +++ /dev/null @@ -1,135 +0,0 @@ -#include -#include -#include -#include - -/* - * The following code and data reboots the machine by switching to real - * mode and jumping to the BIOS reset entry point, as if the CPU has - * really been reset. The previous version asked the keyboard - * controller to pulse the CPU reset line, which is more thorough, but - * doesn't work with at least one type of 486 motherboard. It is easy - * to stop this code working; hence the copious comments. - * - * This code is called with the restart type (0 = BIOS, 1 = APM) in %eax. - */ - .section ".x86_trampoline","a" - .balign 16 - .code32 -ENTRY(machine_real_restart_asm) -r_base = . - /* Get our own relocated address */ - call 1f -1: popl %ebx - subl $(1b - r_base), %ebx - - /* Compute the equivalent real-mode segment */ - movl %ebx, %ecx - shrl $4, %ecx - - /* Patch post-real-mode segment jump */ - movw (dispatch_table - r_base)(%ebx,%eax,2),%ax - movw %ax, (101f - r_base)(%ebx) - movw %cx, (102f - r_base)(%ebx) - - /* Set up the IDT for real mode. */ - lidtl (machine_real_restart_idt - r_base)(%ebx) - - /* - * Set up a GDT from which we can load segment descriptors for real - * mode. The GDT is not used in real mode; it is just needed here to - * prepare the descriptors. - */ - lgdtl (machine_real_restart_gdt - r_base)(%ebx) - - /* - * Load the data segment registers with 16-bit compatible values - */ - movl $16, %ecx - movl %ecx, %ds - movl %ecx, %es - movl %ecx, %fs - movl %ecx, %gs - movl %ecx, %ss - ljmpl $8, $1f - r_base - -/* - * This is 16-bit protected mode code to disable paging and the cache, - * switch to real mode and jump to the BIOS reset code. - * - * The instruction that switches to real mode by writing to CR0 must be - * followed immediately by a far jump instruction, which set CS to a - * valid value for real mode, and flushes the prefetch queue to avoid - * running instructions that have already been decoded in protected - * mode. - * - * Clears all the flags except ET, especially PG (paging), PE - * (protected-mode enable) and TS (task switch for coprocessor state - * save). Flushes the TLB after paging has been disabled. Sets CD and - * NW, to disable the cache on a 486, and invalidates the cache. This - * is more like the state of a 486 after reset. I don't know if - * something else should be done for other chips. - * - * More could be done here to set up the registers as if a CPU reset had - * occurred; hopefully real BIOSs don't assume much. This is not the - * actual BIOS entry point, anyway (that is at 0xfffffff0). - * - * Most of this work is probably excessive, but it is what is tested. - */ - .code16 -1: - xorl %ecx, %ecx - movl %cr0, %eax - andl $0x00000011, %eax - orl $0x60000000, %eax - movl %eax, %cr0 - movl %ecx, %cr3 - movl %cr0, %edx - andl $0x60000000, %edx /* If no cache bits -> no wbinvd */ - jz 2f - wbinvd -2: - andb $0x10, %al - movl %eax, %cr0 - .byte 0xea /* ljmpw */ -101: .word 0 /* Offset */ -102: .word 0 /* Segment */ - -bios: - ljmpw $0xf000, $0xfff0 - -apm: - movw $0x1000, %ax - movw %ax, %ss - movw $0xf000, %sp - movw $0x5307, %ax - movw $0x0001, %bx - movw $0x0003, %cx - int $0x15 - -END(machine_real_restart_asm) - - .balign 16 - /* These must match +#include +#include +#include + +/* + * The following code and data reboots the machine by switching to real + * mode and jumping to the BIOS reset entry point, as if the CPU has + * really been reset. The previous version asked the keyboard + * controller to pulse the CPU reset line, which is more thorough, but + * doesn't work with at least one type of 486 motherboard. It is easy + * to stop this code working; hence the copious comments. + * + * This code is called with the restart type (0 = BIOS, 1 = APM) in %eax. + */ + .section ".text32", "ax" + .code32 + .globl machine_real_restart_asm + + .balign 16 +machine_real_restart_asm: + /* Set up the IDT for real mode. */ + lidtl pa_machine_real_restart_idt + + /* + * Set up a GDT from which we can load segment descriptors for real + * mode. The GDT is not used in real mode; it is just needed here to + * prepare the descriptors. + */ + lgdtl pa_machine_real_restart_gdt + + /* + * Load the data segment registers with 16-bit compatible values + */ + movl $16, %ecx + movl %ecx, %ds + movl %ecx, %es + movl %ecx, %fs + movl %ecx, %gs + movl %ecx, %ss + ljmpw $8, $1f + +/* + * This is 16-bit protected mode code to disable paging and the cache, + * switch to real mode and jump to the BIOS reset code. + * + * The instruction that switches to real mode by writing to CR0 must be + * followed immediately by a far jump instruction, which set CS to a + * valid value for real mode, and flushes the prefetch queue to avoid + * running instructions that have already been decoded in protected + * mode. + * + * Clears all the flags except ET, especially PG (paging), PE + * (protected-mode enable) and TS (task switch for coprocessor state + * save). Flushes the TLB after paging has been disabled. Sets CD and + * NW, to disable the cache on a 486, and invalidates the cache. This + * is more like the state of a 486 after reset. I don't know if + * something else should be done for other chips. + * + * More could be done here to set up the registers as if a CPU reset had + * occurred; hopefully real BIOSs don't assume much. This is not the + * actual BIOS entry point, anyway (that is at 0xfffffff0). + * + * Most of this work is probably excessive, but it is what is tested. + */ + .text + .code16 + + .balign 16 +machine_real_restart_asm16: +1: + xorl %ecx, %ecx + movl %cr0, %edx + andl $0x00000011, %edx + orl $0x60000000, %edx + movl %edx, %cr0 + movl %ecx, %cr3 + movl %cr0, %edx + andl $0x60000000, %edx /* If no cache bits -> no wbinvd */ + jz 2f + wbinvd +2: + andb $0x10, %dl + movl %edx, %cr0 + .byte 0xea /* ljmpw */ + .word 3f /* Offset */ + .word real_mode_seg /* Segment */ + +3: + testb $0, %al + jz bios + +apm: + movw $0x1000, %ax + movw %ax, %ss + movw $0xf000, %sp + movw $0x5307, %ax + movw $0x0001, %bx + movw $0x0003, %cx + int $0x15 + /* This should never return... */ + +bios: + ljmpw $0xf000, $0xfff0 + + .section ".rodata", "a" + .globl machine_real_restart_idt, machine_real_restart_gdt + + .balign 16 +machine_real_restart_idt: + .word 0xffff /* Length - real mode default value */ + .long 0 /* Base - real mode default value */ + + .balign 16 +machine_real_restart_gdt: + /* Self-pointer */ + .word 0xffff /* Length - real mode default value */ + .long pa_machine_real_restart_gdt + .word 0 + + /* + * 16-bit code segment pointing to real_mode_seg + * Selector value 8 + */ + .word 0xffff /* Limit */ + .long 0x9b000000 + pa_real_mode_base + .word 0 + + /* + * 16-bit data segment with the selector value 16 = 0x10 and + * base value 0x100; since this is consistent with real mode + * semantics we don't have to reload the segments once CR0.PE = 0. + */ + .quad GDT_ENTRY(0x0093, 0x100, 0xffff) -- cgit v0.10.2 From 48927bbb97c7d4cf343c05827ab9ac30c60678cb Mon Sep 17 00:00:00 2001 From: Jarkko Sakkinen Date: Tue, 8 May 2012 21:22:28 +0300 Subject: x86, realmode: Move SMP trampoline to unified realmode code Migrated SMP trampoline code to the real mode blob. SMP trampoline code is not yet removed from .x86_trampoline because it is needed by the wakeup code. [ hpa: always enable compiling startup_32_smp in head_32.S... it is only a few instructions which go into .init on UP builds, and it makes the rest of the code less #ifdef ugly. ] Signed-off-by: Jarkko Sakkinen Link: http://lkml.kernel.org/r/1336501366-28617-6-git-send-email-jarkko.sakkinen@intel.com Signed-off-by: H. Peter Anvin diff --git a/arch/x86/include/asm/realmode.h b/arch/x86/include/asm/realmode.h index bf26b06..9b4a5da 100644 --- a/arch/x86/include/asm/realmode.h +++ b/arch/x86/include/asm/realmode.h @@ -13,6 +13,17 @@ struct real_mode_header { #ifdef CONFIG_X86_32 u32 machine_real_restart_asm; #endif + /* SMP trampoline */ + u32 trampoline_data; + u32 trampoline_status; +#ifdef CONFIG_X86_32 + u32 startup_32_smp; + u32 boot_gdt; +#else + u32 startup_64_smp; + u32 level3_ident_pgt; + u32 level3_kernel_pgt; +#endif } __attribute__((__packed__)); extern struct real_mode_header real_mode_header; @@ -25,6 +36,13 @@ extern unsigned long initial_gs; extern unsigned char real_mode_blob[]; extern unsigned char real_mode_relocs[]; +#ifdef CONFIG_X86_32 +extern unsigned char startup_32_smp[]; +extern unsigned char boot_gdt[]; +#else +extern unsigned char secondary_startup_64[]; +#endif + extern void __init setup_real_mode(void); #endif /* _ARCH_X86_REALMODE_H */ diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index ce0be7c..a3c2b4f 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -273,10 +273,7 @@ num_subarch_entries = (. - subarch_entries) / 4 * If cpu hotplug is not supported then this code can go in init section * which will be freed later */ - __CPUINIT - -#ifdef CONFIG_SMP ENTRY(startup_32_smp) cld movl $(__BOOT_DS),%eax @@ -287,7 +284,7 @@ ENTRY(startup_32_smp) movl pa(stack_start),%ecx movl %eax,%ss leal -__PAGE_OFFSET(%ecx),%esp -#endif /* CONFIG_SMP */ + default_entry: /* diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 40f4eb3..d70bc2e 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -136,10 +136,6 @@ ident_complete: /* Fixup phys_base */ addq %rbp, phys_base(%rip) - /* Fixup trampoline */ - addq %rbp, trampoline_level4_pgt + 0(%rip) - addq %rbp, trampoline_level4_pgt + (511*8)(%rip) - /* Due to ENTRY(), sometimes the empty space gets filled with * zeros. Better take a jmp than relying on empty space being * filled with 0x90 (nop) diff --git a/arch/x86/kernel/realmode.c b/arch/x86/kernel/realmode.c index 7415c42..a465775 100644 --- a/arch/x86/kernel/realmode.c +++ b/arch/x86/kernel/realmode.c @@ -58,6 +58,20 @@ void __init setup_real_mode(void) /* Copied header will contain relocated physical addresses. */ memcpy(&real_mode_header, real_mode_base, sizeof(struct real_mode_header)); + +#ifdef CONFIG_X86_32 + *((u32 *)__va(real_mode_header.startup_32_smp)) = __pa(startup_32_smp); + *((u32 *)__va(real_mode_header.boot_gdt)) = __pa(boot_gdt); +#else + *((u64 *) __va(real_mode_header.startup_64_smp)) = + (u64) __pa(secondary_startup_64); + + *((u64 *) __va(real_mode_header.level3_ident_pgt)) = + __pa(level3_ident_pgt) + _KERNPG_TABLE; + + *((u64 *) __va(real_mode_header.level3_kernel_pgt)) = + __pa(level3_kernel_pgt) + _KERNPG_TABLE; +#endif } /* diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 6e1e406..c7971ea 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -57,7 +57,7 @@ #include #include #include -#include +#include #include #include #include @@ -73,6 +73,8 @@ #include #include +#include + /* State of each CPU */ DEFINE_PER_CPU(int, cpu_state) = { 0 }; @@ -662,8 +664,12 @@ static void __cpuinit announce_cpu(int cpu, int apicid) */ static int __cpuinit do_boot_cpu(int apicid, int cpu) { + volatile u32 *trampoline_status = + (volatile u32 *) __va(real_mode_header.trampoline_status); + /* start_ip had better be page-aligned! */ + unsigned long start_ip = real_mode_header.trampoline_data; + unsigned long boot_error = 0; - unsigned long start_ip; int timeout; struct create_idle c_idle = { .cpu = cpu, @@ -713,9 +719,6 @@ do_rest: initial_code = (unsigned long)start_secondary; stack_start = c_idle.idle->thread.sp; - /* start_ip had better be page-aligned! */ - start_ip = trampoline_address(); - /* So we see what's up */ announce_cpu(cpu, apicid); @@ -778,8 +781,7 @@ do_rest: pr_debug("CPU%d: has booted.\n", cpu); } else { boot_error = 1; - if (*(volatile u32 *)TRAMPOLINE_SYM(trampoline_status) - == 0xA5A5A5A5) + if (*trampoline_status == 0xA5A5A5A5) /* trampoline started but...? */ pr_err("CPU%d: Stuck ??\n", cpu); else @@ -805,7 +807,7 @@ do_rest: } /* mark "stuck" area as not stuck */ - *(volatile u32 *)TRAMPOLINE_SYM(trampoline_status) = 0; + *trampoline_status = 0; if (get_uv_system_type() != UV_NON_UNIQUE_APIC) { /* diff --git a/arch/x86/realmode/rm/Makefile b/arch/x86/realmode/rm/Makefile index 3f851c4..56ec64f 100644 --- a/arch/x86/realmode/rm/Makefile +++ b/arch/x86/realmode/rm/Makefile @@ -13,6 +13,7 @@ always := realmode.bin realmode-y += header.o realmode-$(CONFIG_X86_32) += reboot_32.o +realmode-y += trampoline_$(BITS).o targets += $(realmode-y) diff --git a/arch/x86/realmode/rm/header.S b/arch/x86/realmode/rm/header.S index db21401..a979004 100644 --- a/arch/x86/realmode/rm/header.S +++ b/arch/x86/realmode/rm/header.S @@ -16,4 +16,15 @@ ENTRY(real_mode_header) #ifdef CONFIG_X86_32 .long pa_machine_real_restart_asm #endif + /* SMP trampoline */ + .long pa_trampoline_data + .long pa_trampoline_status +#ifdef CONFIG_X86_32 + .long pa_startup_32_smp + .long pa_boot_gdt +#else + .long pa_startup_64_smp + .long pa_level3_ident_pgt + .long pa_level3_kernel_pgt +#endif END(real_mode_header) diff --git a/arch/x86/realmode/rm/trampoline_32.S b/arch/x86/realmode/rm/trampoline_32.S new file mode 100644 index 0000000..18cb7fc --- /dev/null +++ b/arch/x86/realmode/rm/trampoline_32.S @@ -0,0 +1,86 @@ +/* + * + * Trampoline.S Derived from Setup.S by Linus Torvalds + * + * 4 Jan 1997 Michael Chastain: changed to gnu as. + * + * This is only used for booting secondary CPUs in SMP machine + * + * Entry: CS:IP point to the start of our code, we are + * in real mode with no stack, but the rest of the + * trampoline page to make our stack and everything else + * is a mystery. + * + * We jump into arch/x86/kernel/head_32.S. + * + * On entry to trampoline_data, the processor is in real mode + * with 16-bit addressing and 16-bit data. CS has some value + * and IP is zero. Thus, we load CS to the physical segment + * of the real mode code before doing anything further. + * + * The structure real_mode_header includes entries that need + * to be set up before executing this code: + * + * startup_32_smp + * boot_gdt + */ + +#include +#include +#include +#include + + .text + .code16 + .globl trampoline_data + + .balign PAGE_SIZE +trampoline_data: + wbinvd # Needed for NUMA-Q should be harmless for others + + .byte 0xea # ljmpw + .word 1f # Offset + .word real_mode_seg # Segment +1: + mov %cs, %ax # Code and data in the same place + mov %ax, %ds + + cli # We should be safe anyway + + movl $0xA5A5A5A5, trampoline_status + # write marker for master knows we're running + + /* GDT tables in non default location kernel can be beyond 16MB and + * lgdt will not be able to load the address as in real mode default + * operand size is 16bit. Use lgdtl instead to force operand size + * to 32 bit. + */ + + lidtl boot_idt_descr # load idt with 0, 0 + lgdtl boot_gdt_descr # load gdt with whatever is appropriate + + xor %ax, %ax + inc %ax # protected mode (PE) bit + lmsw %ax # into protected mode + + # flush prefetch and jump to startup_32_smp in arch/i386/kernel/head.S + ljmpl *(startup_32_smp) + + .data + .globl startup_32_smp, boot_gdt, trampoline_status + +boot_gdt_descr: + .word __BOOT_DS + 7 # gdt limit +boot_gdt: + .long 0 # gdt base + +boot_idt_descr: + .word 0 # idt limit = 0 + .long 0 # idt base = 0L + +trampoline_status: + .long 0 + +startup_32_smp: + .long 0x00000000 + .word __BOOT_CS, 0 diff --git a/arch/x86/realmode/rm/trampoline_64.S b/arch/x86/realmode/rm/trampoline_64.S new file mode 100644 index 0000000..063da00 --- /dev/null +++ b/arch/x86/realmode/rm/trampoline_64.S @@ -0,0 +1,175 @@ +/* + * + * Trampoline.S Derived from Setup.S by Linus Torvalds + * + * 4 Jan 1997 Michael Chastain: changed to gnu as. + * 15 Sept 2005 Eric Biederman: 64bit PIC support + * + * Entry: CS:IP point to the start of our code, we are + * in real mode with no stack, but the rest of the + * trampoline page to make our stack and everything else + * is a mystery. + * + * On entry to trampoline_data, the processor is in real mode + * with 16-bit addressing and 16-bit data. CS has some value + * and IP is zero. Thus, data addresses need to be absolute + * (no relocation) and are taken with regard to r_base. + * + * With the addition of trampoline_level4_pgt this code can + * now enter a 64bit kernel that lives at arbitrary 64bit + * physical addresses. + * + * If you work on this file, check the object module with objdump + * --full-contents --reloc to make sure there are no relocation + * entries. + */ + +#include +#include +#include +#include +#include +#include +#include + + .text + .balign PAGE_SIZE + .code16 + +ENTRY(trampoline_data) + cli # We should be safe anyway + wbinvd + + .byte 0xea # ljmpw + .word 1f # Offset + .word real_mode_seg # Segment +1: + mov %cs, %ax # Code and data in the same place + mov %ax, %ds + mov %ax, %es + mov %ax, %ss + + movl $0xA5A5A5A5, trampoline_status + # write marker for master knows we're running + + # Setup stack + movw $trampoline_stack_end, %sp + + call verify_cpu # Verify the cpu supports long mode + testl %eax, %eax # Check for return code + jnz no_longmode + + /* + * GDT tables in non default location kernel can be beyond 16MB and + * lgdt will not be able to load the address as in real mode default + * operand size is 16bit. Use lgdtl instead to force operand size + * to 32 bit. + */ + + lidtl tidt # load idt with 0, 0 + lgdtl tgdt # load gdt with whatever is appropriate + + mov $X86_CR0_PE, %ax # protected mode (PE) bit + lmsw %ax # into protected mode + + # flush prefetch and jump to startup_32 + ljmpl *(startup_32_vector) + +no_longmode: + hlt + jmp no_longmode +#include "../kernel/verify_cpu.S" + + .code32 + .balign 4 +ENTRY(startup_32) + movl $__KERNEL_DS, %eax # Initialize the %ds segment register + movl %eax, %ds + + movl $X86_CR4_PAE, %eax + movl %eax, %cr4 # Enable PAE mode + + movl pa_startup_64_smp, %esi + movl pa_startup_64_smp_high, %edi + + # Setup trampoline 4 level pagetables + leal pa_trampoline_level4_pgt, %eax + movl %eax, %cr3 + + movl $MSR_EFER, %ecx + movl $(1 << _EFER_LME), %eax # Enable Long Mode + xorl %edx, %edx + wrmsr + + # Enable paging and in turn activate Long Mode + # Enable protected mode + movl $(X86_CR0_PG | X86_CR0_PE), %eax + movl %eax, %cr0 + + /* + * At this point we're in long mode but in 32bit compatibility mode + * with EFER.LME = 1, CS.L = 0, CS.D = 1 (and in turn + * EFER.LMA = 1). Now we want to jump in 64bit mode, to do that we use + * the new gdt/idt that has __KERNEL_CS with CS.L = 1. + */ + ljmpl *(pa_startup_64_vector) + + .code64 + .balign 4 +ENTRY(startup_64) + # Now jump into the kernel using virtual addresses + movl %edi, %eax + shlq $32, %rax + addl %esi, %eax + jmp *%rax + + # Careful these need to be in the same 64K segment as the above; +tidt: + .word 0 # idt limit = 0 + .word 0, 0 # idt base = 0L + + # Duplicate the global descriptor table + # so the kernel can live anywhere + .balign 4 + .globl tgdt +tgdt: + .short tgdt_end - tgdt # gdt limit + .long pa_tgdt + .short 0 + .quad 0x00cf9b000000ffff # __KERNEL32_CS + .quad 0x00af9b000000ffff # __KERNEL_CS + .quad 0x00cf93000000ffff # __KERNEL_DS +tgdt_end: + + .balign 4 +startup_32_vector: + .long pa_startup_32 + .word __KERNEL32_CS, 0 + + .balign 4 + .globl startup_64_vector +startup_64_vector: + .long pa_startup_64 + .word __KERNEL_CS, 0 + + .data + + .balign 4 +ENTRY(trampoline_status) + .long 0 + +trampoline_stack: + .org 0x1000 +trampoline_stack_end: + + .globl level3_ident_pgt + .globl level3_kernel_pgt +ENTRY(trampoline_level4_pgt) + level3_ident_pgt: .quad 0 + .fill 510,8,0 + level3_kernel_pgt: .quad 0 + + .globl startup_64_smp + .globl startup_64_smp_high +startup_64_smp: .long 0 +startup_64_smp_high: .long 0 -- cgit v0.10.2 From c9b77ccb52a5c77233b0e557b7d4417b00ef4012 Mon Sep 17 00:00:00 2001 From: Jarkko Sakkinen Date: Tue, 8 May 2012 21:22:29 +0300 Subject: x86, realmode: Move ACPI wakeup to unified realmode code Migrated ACPI wakeup code to the real-mode blob. Code existing in .x86_trampoline can be completely removed. Static descriptor table in wakeup_asm.S is courtesy of H. Peter Anvin. Signed-off-by: Jarkko Sakkinen Link: http://lkml.kernel.org/r/1336501366-28617-7-git-send-email-jarkko.sakkinen@intel.com Cc: Rafael J. Wysocki Cc: Len Brown Signed-off-by: H. Peter Anvin diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index 610001d..724aa44 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h @@ -29,7 +29,6 @@ #include #include #include -#include #define COMPILER_DEPENDENT_INT64 long long #define COMPILER_DEPENDENT_UINT64 unsigned long long @@ -118,7 +117,6 @@ static inline void acpi_disable_pci(void) extern int acpi_suspend_lowlevel(void); extern const unsigned char acpi_wakeup_code[]; -#define acpi_wakeup_address (__pa(TRAMPOLINE_SYM(acpi_wakeup_code))) /* early initialization routine */ extern void acpi_reserve_wakeup_memory(void); diff --git a/arch/x86/include/asm/realmode.h b/arch/x86/include/asm/realmode.h index 9b4a5da..1bfc74d 100644 --- a/arch/x86/include/asm/realmode.h +++ b/arch/x86/include/asm/realmode.h @@ -24,6 +24,10 @@ struct real_mode_header { u32 level3_ident_pgt; u32 level3_kernel_pgt; #endif +#ifdef CONFIG_ACPI_SLEEP + u32 wakeup_start; + u32 wakeup_header; +#endif } __attribute__((__packed__)); extern struct real_mode_header real_mode_header; diff --git a/arch/x86/include/asm/trampoline.h b/arch/x86/include/asm/trampoline.h deleted file mode 100644 index feca311..0000000 --- a/arch/x86/include/asm/trampoline.h +++ /dev/null @@ -1,39 +0,0 @@ -#ifndef _ASM_X86_TRAMPOLINE_H -#define _ASM_X86_TRAMPOLINE_H - -#ifndef __ASSEMBLY__ - -#include -#include - -/* - * Trampoline 80x86 program as an array. These are in the init rodata - * segment, but that's okay, because we only care about the relative - * addresses of the symbols. - */ -extern const unsigned char x86_trampoline_start []; -extern const unsigned char x86_trampoline_end []; -extern unsigned char *x86_trampoline_base; - -extern unsigned long init_rsp; -extern unsigned long initial_code; -extern unsigned long initial_gs; - -extern void __init setup_trampolines(void); - -extern const unsigned char trampoline_data[]; -extern const unsigned char trampoline_status[]; - -#define TRAMPOLINE_SYM(x) \ - ((void *)(x86_trampoline_base + \ - ((const unsigned char *)(x) - x86_trampoline_start))) - -/* Address of the SMP trampoline */ -static inline unsigned long trampoline_address(void) -{ - return virt_to_phys(TRAMPOLINE_SYM(trampoline_data)); -} - -#endif /* __ASSEMBLY__ */ - -#endif /* _ASM_X86_TRAMPOLINE_H */ diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index b71ef35..4a20f44 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -35,7 +35,6 @@ obj-y += tsc.o io_delay.o rtc.o obj-y += pci-iommu_table.o obj-y += resource.o -obj-y += trampoline.o trampoline_$(BITS).o obj-y += realmode.o obj-y += process.o obj-y += i387.o xsave.o diff --git a/arch/x86/kernel/acpi/Makefile b/arch/x86/kernel/acpi/Makefile index 6f35260..163b225 100644 --- a/arch/x86/kernel/acpi/Makefile +++ b/arch/x86/kernel/acpi/Makefile @@ -1,14 +1,7 @@ -subdir- := realmode - obj-$(CONFIG_ACPI) += boot.o -obj-$(CONFIG_ACPI_SLEEP) += sleep.o wakeup_rm.o wakeup_$(BITS).o +obj-$(CONFIG_ACPI_SLEEP) += sleep.o wakeup_$(BITS).o ifneq ($(CONFIG_ACPI_PROCESSOR),) obj-y += cstate.o endif -$(obj)/wakeup_rm.o: $(obj)/realmode/wakeup.bin - -$(obj)/realmode/wakeup.bin: FORCE - $(Q)$(MAKE) $(build)=$(obj)/realmode - diff --git a/arch/x86/kernel/acpi/realmode/.gitignore b/arch/x86/kernel/acpi/realmode/.gitignore deleted file mode 100644 index 58f1f48..0000000 --- a/arch/x86/kernel/acpi/realmode/.gitignore +++ /dev/null @@ -1,3 +0,0 @@ -wakeup.bin -wakeup.elf -wakeup.lds diff --git a/arch/x86/kernel/acpi/realmode/Makefile b/arch/x86/kernel/acpi/realmode/Makefile deleted file mode 100644 index 6a564ac..0000000 --- a/arch/x86/kernel/acpi/realmode/Makefile +++ /dev/null @@ -1,59 +0,0 @@ -# -# arch/x86/kernel/acpi/realmode/Makefile -# -# This file is subject to the terms and conditions of the GNU General Public -# License. See the file "COPYING" in the main directory of this archive -# for more details. -# - -always := wakeup.bin -targets := wakeup.elf wakeup.lds - -wakeup-y += wakeup.o wakemain.o video-mode.o copy.o bioscall.o regs.o - -# The link order of the video-*.o modules can matter. In particular, -# video-vga.o *must* be listed first, followed by video-vesa.o. -# Hardware-specific drivers should follow in the order they should be -# probed, and video-bios.o should typically be last. -wakeup-y += video-vga.o -wakeup-y += video-vesa.o -wakeup-y += video-bios.o - -targets += $(wakeup-y) - -bootsrc := $(src)/../../../boot - -# --------------------------------------------------------------------------- - -# How to compile the 16-bit code. Note we always compile for -march=i386, -# that way we can complain to the user if the CPU is insufficient. -# Compile with _SETUP since this is similar to the boot-time setup code. -KBUILD_CFLAGS := $(LINUXINCLUDE) -g -Os -D_SETUP -D_WAKEUP -D__KERNEL__ \ - -I$(srctree)/$(bootsrc) \ - $(cflags-y) \ - -Wall -Wstrict-prototypes \ - -march=i386 -mregparm=3 \ - -include $(srctree)/$(bootsrc)/code16gcc.h \ - -fno-strict-aliasing -fomit-frame-pointer \ - $(call cc-option, -ffreestanding) \ - $(call cc-option, -fno-toplevel-reorder,\ - $(call cc-option, -fno-unit-at-a-time)) \ - $(call cc-option, -fno-stack-protector) \ - $(call cc-option, -mpreferred-stack-boundary=2) -KBUILD_CFLAGS += $(call cc-option, -m32) -KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__ -GCOV_PROFILE := n - -WAKEUP_OBJS = $(addprefix $(obj)/,$(wakeup-y)) - -LDFLAGS_wakeup.elf := -T - -CPPFLAGS_wakeup.lds += -P -C - -$(obj)/wakeup.elf: $(obj)/wakeup.lds $(WAKEUP_OBJS) FORCE - $(call if_changed,ld) - -OBJCOPYFLAGS_wakeup.bin := -O binary - -$(obj)/wakeup.bin: $(obj)/wakeup.elf FORCE - $(call if_changed,objcopy) diff --git a/arch/x86/kernel/acpi/realmode/bioscall.S b/arch/x86/kernel/acpi/realmode/bioscall.S deleted file mode 100644 index f51eb0b..0000000 --- a/arch/x86/kernel/acpi/realmode/bioscall.S +++ /dev/null @@ -1 +0,0 @@ -#include "../../../boot/bioscall.S" diff --git a/arch/x86/kernel/acpi/realmode/copy.S b/arch/x86/kernel/acpi/realmode/copy.S deleted file mode 100644 index dc59ebe..0000000 --- a/arch/x86/kernel/acpi/realmode/copy.S +++ /dev/null @@ -1 +0,0 @@ -#include "../../../boot/copy.S" diff --git a/arch/x86/kernel/acpi/realmode/regs.c b/arch/x86/kernel/acpi/realmode/regs.c deleted file mode 100644 index 6206033..0000000 --- a/arch/x86/kernel/acpi/realmode/regs.c +++ /dev/null @@ -1 +0,0 @@ -#include "../../../boot/regs.c" diff --git a/arch/x86/kernel/acpi/realmode/video-bios.c b/arch/x86/kernel/acpi/realmode/video-bios.c deleted file mode 100644 index 7deabc1..0000000 --- a/arch/x86/kernel/acpi/realmode/video-bios.c +++ /dev/null @@ -1 +0,0 @@ -#include "../../../boot/video-bios.c" diff --git a/arch/x86/kernel/acpi/realmode/video-mode.c b/arch/x86/kernel/acpi/realmode/video-mode.c deleted file mode 100644 index 328ad20..0000000 --- a/arch/x86/kernel/acpi/realmode/video-mode.c +++ /dev/null @@ -1 +0,0 @@ -#include "../../../boot/video-mode.c" diff --git a/arch/x86/kernel/acpi/realmode/video-vesa.c b/arch/x86/kernel/acpi/realmode/video-vesa.c deleted file mode 100644 index 9dbb967..0000000 --- a/arch/x86/kernel/acpi/realmode/video-vesa.c +++ /dev/null @@ -1 +0,0 @@ -#include "../../../boot/video-vesa.c" diff --git a/arch/x86/kernel/acpi/realmode/video-vga.c b/arch/x86/kernel/acpi/realmode/video-vga.c deleted file mode 100644 index bcc8125..0000000 --- a/arch/x86/kernel/acpi/realmode/video-vga.c +++ /dev/null @@ -1 +0,0 @@ -#include "../../../boot/video-vga.c" diff --git a/arch/x86/kernel/acpi/realmode/wakemain.c b/arch/x86/kernel/acpi/realmode/wakemain.c deleted file mode 100644 index 883962d..0000000 --- a/arch/x86/kernel/acpi/realmode/wakemain.c +++ /dev/null @@ -1,81 +0,0 @@ -#include "wakeup.h" -#include "boot.h" - -static void udelay(int loops) -{ - while (loops--) - io_delay(); /* Approximately 1 us */ -} - -static void beep(unsigned int hz) -{ - u8 enable; - - if (!hz) { - enable = 0x00; /* Turn off speaker */ - } else { - u16 div = 1193181/hz; - - outb(0xb6, 0x43); /* Ctr 2, squarewave, load, binary */ - io_delay(); - outb(div, 0x42); /* LSB of counter */ - io_delay(); - outb(div >> 8, 0x42); /* MSB of counter */ - io_delay(); - - enable = 0x03; /* Turn on speaker */ - } - inb(0x61); /* Dummy read of System Control Port B */ - io_delay(); - outb(enable, 0x61); /* Enable timer 2 output to speaker */ - io_delay(); -} - -#define DOT_HZ 880 -#define DASH_HZ 587 -#define US_PER_DOT 125000 - -/* Okay, this is totally silly, but it's kind of fun. */ -static void send_morse(const char *pattern) -{ - char s; - - while ((s = *pattern++)) { - switch (s) { - case '.': - beep(DOT_HZ); - udelay(US_PER_DOT); - beep(0); - udelay(US_PER_DOT); - break; - case '-': - beep(DASH_HZ); - udelay(US_PER_DOT * 3); - beep(0); - udelay(US_PER_DOT); - break; - default: /* Assume it's a space */ - udelay(US_PER_DOT * 3); - break; - } - } -} - -void main(void) -{ - /* Kill machine if structures are wrong */ - if (wakeup_header.real_magic != 0x12345678) - while (1); - - if (wakeup_header.realmode_flags & 4) - send_morse("...-"); - - if (wakeup_header.realmode_flags & 1) - asm volatile("lcallw $0xc000,$3"); - - if (wakeup_header.realmode_flags & 2) { - /* Need to call BIOS */ - probe_cards(0); - set_mode(wakeup_header.video_mode); - } -} diff --git a/arch/x86/kernel/acpi/realmode/wakeup.S b/arch/x86/kernel/acpi/realmode/wakeup.S deleted file mode 100644 index b4fd836..0000000 --- a/arch/x86/kernel/acpi/realmode/wakeup.S +++ /dev/null @@ -1,170 +0,0 @@ -/* - * ACPI wakeup real mode startup stub - */ -#include -#include -#include -#include -#include -#include "wakeup.h" - - .code16 - .section ".jump", "ax" - .globl _start -_start: - cli - jmp wakeup_code - -/* This should match the structure in wakeup.h */ - .section ".header", "a" - .globl wakeup_header -wakeup_header: -video_mode: .short 0 /* Video mode number */ -pmode_return: .byte 0x66, 0xea /* ljmpl */ - .long 0 /* offset goes here */ - .short __KERNEL_CS -pmode_cr0: .long 0 /* Saved %cr0 */ -pmode_cr3: .long 0 /* Saved %cr3 */ -pmode_cr4: .long 0 /* Saved %cr4 */ -pmode_efer: .quad 0 /* Saved EFER */ -pmode_gdt: .quad 0 -pmode_misc_en: .quad 0 /* Saved MISC_ENABLE MSR */ -pmode_behavior: .long 0 /* Wakeup behavior flags */ -realmode_flags: .long 0 -real_magic: .long 0 -trampoline_segment: .word 0 -_pad1: .byte 0 -wakeup_jmp: .byte 0xea /* ljmpw */ -wakeup_jmp_off: .word 3f -wakeup_jmp_seg: .word 0 -wakeup_gdt: .quad 0, 0, 0 -signature: .long WAKEUP_HEADER_SIGNATURE - - .text - .code16 -wakeup_code: - cld - - /* Apparently some dimwit BIOS programmers don't know how to - program a PM to RM transition, and we might end up here with - junk in the data segment descriptor registers. The only way - to repair that is to go into PM and fix it ourselves... */ - movw $16, %cx - lgdtl %cs:wakeup_gdt - movl %cr0, %eax - orb $X86_CR0_PE, %al - movl %eax, %cr0 - jmp 1f -1: ljmpw $8, $2f -2: - movw %cx, %ds - movw %cx, %es - movw %cx, %ss - movw %cx, %fs - movw %cx, %gs - - andb $~X86_CR0_PE, %al - movl %eax, %cr0 - jmp wakeup_jmp -3: - /* Set up segments */ - movw %cs, %ax - movw %ax, %ds - movw %ax, %es - movw %ax, %ss - lidtl wakeup_idt - - movl $wakeup_stack_end, %esp - - /* Clear the EFLAGS */ - pushl $0 - popfl - - /* Check header signature... */ - movl signature, %eax - cmpl $WAKEUP_HEADER_SIGNATURE, %eax - jne bogus_real_magic - - /* Check we really have everything... */ - movl end_signature, %eax - cmpl $WAKEUP_END_SIGNATURE, %eax - jne bogus_real_magic - - /* Call the C code */ - calll main - - /* Restore MISC_ENABLE before entering protected mode, in case - BIOS decided to clear XD_DISABLE during S3. */ - movl pmode_behavior, %eax - btl $WAKEUP_BEHAVIOR_RESTORE_MISC_ENABLE, %eax - jnc 1f - - movl pmode_misc_en, %eax - movl pmode_misc_en + 4, %edx - movl $MSR_IA32_MISC_ENABLE, %ecx - wrmsr -1: - - /* Do any other stuff... */ - -#ifndef CONFIG_64BIT - /* This could also be done in C code... */ - movl pmode_cr3, %eax - movl %eax, %cr3 - - movl pmode_cr4, %ecx - jecxz 1f - movl %ecx, %cr4 -1: - movl pmode_efer, %eax - movl pmode_efer + 4, %edx - movl %eax, %ecx - orl %edx, %ecx - jz 1f - movl $MSR_EFER, %ecx - wrmsr -1: - - lgdtl pmode_gdt - - /* This really couldn't... */ - movl pmode_cr0, %eax - movl %eax, %cr0 - jmp pmode_return -#else - pushw $0 - pushw trampoline_segment - pushw $0 - lret -#endif - -bogus_real_magic: -1: - hlt - jmp 1b - - .data - .balign 8 - - /* This is the standard real-mode IDT */ -wakeup_idt: - .word 0xffff /* limit */ - .long 0 /* address */ - .word 0 - - .globl HEAP, heap_end -HEAP: - .long wakeup_heap -heap_end: - .long wakeup_stack - - .bss -wakeup_heap: - .space 2048 -wakeup_stack: - .space 2048 -wakeup_stack_end: - - .section ".signature","a" -end_signature: - .long WAKEUP_END_SIGNATURE diff --git a/arch/x86/kernel/acpi/realmode/wakeup.h b/arch/x86/kernel/acpi/realmode/wakeup.h deleted file mode 100644 index 97a29e1..0000000 --- a/arch/x86/kernel/acpi/realmode/wakeup.h +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Definitions for the wakeup data structure at the head of the - * wakeup code. - */ - -#ifndef ARCH_X86_KERNEL_ACPI_RM_WAKEUP_H -#define ARCH_X86_KERNEL_ACPI_RM_WAKEUP_H - -#ifndef __ASSEMBLY__ -#include - -/* This must match data at wakeup.S */ -struct wakeup_header { - u16 video_mode; /* Video mode number */ - u16 _jmp1; /* ljmpl opcode, 32-bit only */ - u32 pmode_entry; /* Protected mode resume point, 32-bit only */ - u16 _jmp2; /* CS value, 32-bit only */ - u32 pmode_cr0; /* Protected mode cr0 */ - u32 pmode_cr3; /* Protected mode cr3 */ - u32 pmode_cr4; /* Protected mode cr4 */ - u32 pmode_efer_low; /* Protected mode EFER */ - u32 pmode_efer_high; - u64 pmode_gdt; - u32 pmode_misc_en_low; /* Protected mode MISC_ENABLE */ - u32 pmode_misc_en_high; - u32 pmode_behavior; /* Wakeup routine behavior flags */ - u32 realmode_flags; - u32 real_magic; - u16 trampoline_segment; /* segment with trampoline code, 64-bit only */ - u8 _pad1; - u8 wakeup_jmp; - u16 wakeup_jmp_off; - u16 wakeup_jmp_seg; - u64 wakeup_gdt[3]; - u32 signature; /* To check we have correct structure */ -} __attribute__((__packed__)); - -extern struct wakeup_header wakeup_header; -#endif - -#define WAKEUP_HEADER_OFFSET 8 -#define WAKEUP_HEADER_SIGNATURE 0x51ee1111 -#define WAKEUP_END_SIGNATURE 0x65a22c82 - -/* Wakeup behavior bits */ -#define WAKEUP_BEHAVIOR_RESTORE_MISC_ENABLE 0 - -#endif /* ARCH_X86_KERNEL_ACPI_RM_WAKEUP_H */ diff --git a/arch/x86/kernel/acpi/realmode/wakeup.lds.S b/arch/x86/kernel/acpi/realmode/wakeup.lds.S deleted file mode 100644 index d4f8010..0000000 --- a/arch/x86/kernel/acpi/realmode/wakeup.lds.S +++ /dev/null @@ -1,62 +0,0 @@ -/* - * wakeup.ld - * - * Linker script for the real-mode wakeup code - */ -#undef i386 -#include "wakeup.h" - -OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386") -OUTPUT_ARCH(i386) -ENTRY(_start) - -SECTIONS -{ - . = 0; - .jump : { - *(.jump) - } = 0x90909090 - - . = WAKEUP_HEADER_OFFSET; - .header : { - *(.header) - } - - . = ALIGN(16); - .text : { - *(.text*) - } = 0x90909090 - - . = ALIGN(16); - .rodata : { - *(.rodata*) - } - - .videocards : { - video_cards = .; - *(.videocards) - video_cards_end = .; - } - - . = ALIGN(16); - .data : { - *(.data*) - } - - . = ALIGN(16); - .bss : { - __bss_start = .; - *(.bss) - __bss_end = .; - } - - .signature : { - *(.signature) - } - - _end = .; - - /DISCARD/ : { - *(.note*) - } -} diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index 146a49c..d941b62 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c @@ -14,8 +14,9 @@ #include #include #include +#include -#include "realmode/wakeup.h" +#include "../../realmode/rm/wakeup/wakeup.h" #include "sleep.h" unsigned long acpi_realmode_flags; @@ -36,13 +37,9 @@ asmlinkage void acpi_enter_s3(void) */ int acpi_suspend_lowlevel(void) { - struct wakeup_header *header; - /* address in low memory of the wakeup routine. */ - char *acpi_realmode; + struct wakeup_header *header = + (struct wakeup_header *) __va(real_mode_header.wakeup_header); - acpi_realmode = TRAMPOLINE_SYM(acpi_wakeup_code); - - header = (struct wakeup_header *)(acpi_realmode + WAKEUP_HEADER_OFFSET); if (header->signature != WAKEUP_HEADER_SIGNATURE) { printk(KERN_ERR "wakeup header does not match\n"); return -EINVAL; @@ -50,27 +47,6 @@ int acpi_suspend_lowlevel(void) header->video_mode = saved_video_mode; - header->wakeup_jmp_seg = acpi_wakeup_address >> 4; - - /* - * Set up the wakeup GDT. We set these up as Big Real Mode, - * that is, with limits set to 4 GB. At least the Lenovo - * Thinkpad X61 is known to need this for the video BIOS - * initialization quirk to work; this is likely to also - * be the case for other laptops or integrated video devices. - */ - - /* GDT[0]: GDT self-pointer */ - header->wakeup_gdt[0] = - (u64)(sizeof(header->wakeup_gdt) - 1) + - ((u64)__pa(&header->wakeup_gdt) << 16); - /* GDT[1]: big real mode-like code segment */ - header->wakeup_gdt[1] = - GDT_ENTRY(0x809b, acpi_wakeup_address, 0xfffff); - /* GDT[2]: big real mode-like data segment */ - header->wakeup_gdt[2] = - GDT_ENTRY(0x8093, acpi_wakeup_address, 0xfffff); - #ifndef CONFIG_64BIT store_gdt((struct desc_ptr *)&header->pmode_gdt); @@ -95,7 +71,6 @@ int acpi_suspend_lowlevel(void) header->pmode_cr3 = (u32)__pa(&initial_page_table); saved_magic = 0x12345678; #else /* CONFIG_64BIT */ - header->trampoline_segment = trampoline_address() >> 4; #ifdef CONFIG_SMP stack_start = (unsigned long)temp_stack + sizeof(temp_stack); early_gdt_descr.address = diff --git a/arch/x86/kernel/acpi/sleep.h b/arch/x86/kernel/acpi/sleep.h index d68677a..5653a57 100644 --- a/arch/x86/kernel/acpi/sleep.h +++ b/arch/x86/kernel/acpi/sleep.h @@ -2,8 +2,8 @@ * Variables and functions used by the code in sleep.c */ -#include #include +#include extern unsigned long saved_video_mode; extern long saved_magic; diff --git a/arch/x86/kernel/acpi/wakeup_rm.S b/arch/x86/kernel/acpi/wakeup_rm.S deleted file mode 100644 index 63b8ab5..0000000 --- a/arch/x86/kernel/acpi/wakeup_rm.S +++ /dev/null @@ -1,12 +0,0 @@ -/* - * Wrapper script for the realmode binary as a transport object - * before copying to low memory. - */ -#include - - .section ".x86_trampoline","a" - .balign PAGE_SIZE - .globl acpi_wakeup_code -acpi_wakeup_code: - .incbin "arch/x86/kernel/acpi/realmode/wakeup.bin" - .size acpi_wakeup_code, .-acpi_wakeup_code diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c index 51ff186..c18f59d 100644 --- a/arch/x86/kernel/head32.c +++ b/arch/x86/kernel/head32.c @@ -14,7 +14,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 3a3b779..037df57 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -24,7 +24,6 @@ #include #include #include -#include #include static void __init zap_identity_mappings(void) diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index ca470e4..f44d311 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -27,7 +27,6 @@ #include #include #include -#include #include #include diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 56e4124..7a14fec 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -73,7 +73,6 @@ #include #include -#include #include #include #include @@ -918,7 +917,6 @@ void __init setup_arch(char **cmdline_p) printk(KERN_DEBUG "initial memory mapped : 0 - %08lx\n", max_pfn_mapped< #include -#include +#include #include #include #include @@ -201,7 +201,8 @@ static int tboot_setup_sleep(void) add_mac_region(e820.map[i].addr, e820.map[i].size); } - tboot->acpi_sinfo.kernel_s3_resume_vector = acpi_wakeup_address; + tboot->acpi_sinfo.kernel_s3_resume_vector = + real_mode_header.wakeup_start; return 0; } diff --git a/arch/x86/kernel/trampoline.c b/arch/x86/kernel/trampoline.c deleted file mode 100644 index a73b610..0000000 --- a/arch/x86/kernel/trampoline.c +++ /dev/null @@ -1,42 +0,0 @@ -#include -#include - -#include -#include -#include - -unsigned char *x86_trampoline_base; - -void __init setup_trampolines(void) -{ - phys_addr_t mem; - size_t size = PAGE_ALIGN(x86_trampoline_end - x86_trampoline_start); - - /* Has to be in very low memory so we can execute real-mode AP code. */ - mem = memblock_find_in_range(0, 1<<20, size, PAGE_SIZE); - if (!mem) - panic("Cannot allocate trampoline\n"); - - x86_trampoline_base = __va(mem); - memblock_reserve(mem, size); - - printk(KERN_DEBUG "Base memory trampoline at [%p] %llx size %zu\n", - x86_trampoline_base, (unsigned long long)mem, size); - - memcpy(x86_trampoline_base, x86_trampoline_start, size); -} - -/* - * setup_trampolines() gets called very early, to guarantee the - * availability of low memory. This is before the proper kernel page - * tables are set up, so we cannot set page permissions in that - * function. Thus, we use an arch_initcall instead. - */ -static int __init configure_trampolines(void) -{ - size_t size = PAGE_ALIGN(x86_trampoline_end - x86_trampoline_start); - - set_memory_x((unsigned long)x86_trampoline_base, size >> PAGE_SHIFT); - return 0; -} -arch_initcall(configure_trampolines); diff --git a/arch/x86/kernel/trampoline_32.S b/arch/x86/kernel/trampoline_32.S deleted file mode 100644 index 451c0a7..0000000 --- a/arch/x86/kernel/trampoline_32.S +++ /dev/null @@ -1,83 +0,0 @@ -/* - * - * Trampoline.S Derived from Setup.S by Linus Torvalds - * - * 4 Jan 1997 Michael Chastain: changed to gnu as. - * - * This is only used for booting secondary CPUs in SMP machine - * - * Entry: CS:IP point to the start of our code, we are - * in real mode with no stack, but the rest of the - * trampoline page to make our stack and everything else - * is a mystery. - * - * We jump into arch/x86/kernel/head_32.S. - * - * On entry to trampoline_data, the processor is in real mode - * with 16-bit addressing and 16-bit data. CS has some value - * and IP is zero. Thus, data addresses need to be absolute - * (no relocation) and are taken with regard to r_base. - * - * If you work on this file, check the object module with - * objdump --reloc to make sure there are no relocation - * entries except for: - * - * TYPE VALUE - * R_386_32 startup_32_smp - * R_386_32 boot_gdt - */ - -#include -#include -#include -#include - -#ifdef CONFIG_SMP - - .section ".x86_trampoline","a" - .balign PAGE_SIZE - .code16 - -ENTRY(trampoline_data) -r_base = . - wbinvd # Needed for NUMA-Q should be harmless for others - mov %cs, %ax # Code and data in the same place - mov %ax, %ds - - cli # We should be safe anyway - - movl $0xA5A5A5A5, trampoline_status - r_base - # write marker for master knows we're running - - /* GDT tables in non default location kernel can be beyond 16MB and - * lgdt will not be able to load the address as in real mode default - * operand size is 16bit. Use lgdtl instead to force operand size - * to 32 bit. - */ - - lidtl boot_idt_descr - r_base # load idt with 0, 0 - lgdtl boot_gdt_descr - r_base # load gdt with whatever is appropriate - - xor %ax, %ax - inc %ax # protected mode (PE) bit - lmsw %ax # into protected mode - # flush prefetch and jump to startup_32_smp in arch/i386/kernel/head.S - ljmpl $__BOOT_CS, $(startup_32_smp-__PAGE_OFFSET) - - # These need to be in the same 64K segment as the above; - # hence we don't use the boot_gdt_descr defined in head.S -boot_gdt_descr: - .word __BOOT_DS + 7 # gdt limit - .long boot_gdt - __PAGE_OFFSET # gdt base - -boot_idt_descr: - .word 0 # idt limit = 0 - .long 0 # idt base = 0L - -ENTRY(trampoline_status) - .long 0 - -.globl trampoline_end -trampoline_end: - -#endif /* CONFIG_SMP */ diff --git a/arch/x86/kernel/trampoline_64.S b/arch/x86/kernel/trampoline_64.S deleted file mode 100644 index 09ff517..0000000 --- a/arch/x86/kernel/trampoline_64.S +++ /dev/null @@ -1,171 +0,0 @@ -/* - * - * Trampoline.S Derived from Setup.S by Linus Torvalds - * - * 4 Jan 1997 Michael Chastain: changed to gnu as. - * 15 Sept 2005 Eric Biederman: 64bit PIC support - * - * Entry: CS:IP point to the start of our code, we are - * in real mode with no stack, but the rest of the - * trampoline page to make our stack and everything else - * is a mystery. - * - * On entry to trampoline_data, the processor is in real mode - * with 16-bit addressing and 16-bit data. CS has some value - * and IP is zero. Thus, data addresses need to be absolute - * (no relocation) and are taken with regard to r_base. - * - * With the addition of trampoline_level4_pgt this code can - * now enter a 64bit kernel that lives at arbitrary 64bit - * physical addresses. - * - * If you work on this file, check the object module with objdump - * --full-contents --reloc to make sure there are no relocation - * entries. - */ - -#include -#include -#include -#include -#include -#include -#include - - .section ".x86_trampoline","a" - .balign PAGE_SIZE - .code16 - -ENTRY(trampoline_data) -r_base = . - cli # We should be safe anyway - wbinvd - mov %cs, %ax # Code and data in the same place - mov %ax, %ds - mov %ax, %es - mov %ax, %ss - - - movl $0xA5A5A5A5, trampoline_status - r_base - # write marker for master knows we're running - - # Setup stack - movw $(trampoline_stack_end - r_base), %sp - - call verify_cpu # Verify the cpu supports long mode - testl %eax, %eax # Check for return code - jnz no_longmode - - mov %cs, %ax - movzx %ax, %esi # Find the 32bit trampoline location - shll $4, %esi - - # Fixup the absolute vectors - leal (startup_32 - r_base)(%esi), %eax - movl %eax, startup_32_vector - r_base - leal (startup_64 - r_base)(%esi), %eax - movl %eax, startup_64_vector - r_base - leal (tgdt - r_base)(%esi), %eax - movl %eax, (tgdt + 2 - r_base) - - /* - * GDT tables in non default location kernel can be beyond 16MB and - * lgdt will not be able to load the address as in real mode default - * operand size is 16bit. Use lgdtl instead to force operand size - * to 32 bit. - */ - - lidtl tidt - r_base # load idt with 0, 0 - lgdtl tgdt - r_base # load gdt with whatever is appropriate - - mov $X86_CR0_PE, %ax # protected mode (PE) bit - lmsw %ax # into protected mode - - # flush prefetch and jump to startup_32 - ljmpl *(startup_32_vector - r_base) - - .code32 - .balign 4 -startup_32: - movl $__KERNEL_DS, %eax # Initialize the %ds segment register - movl %eax, %ds - - movl $X86_CR4_PAE, %eax - movl %eax, %cr4 # Enable PAE mode - - # Setup trampoline 4 level pagetables - leal (trampoline_level4_pgt - r_base)(%esi), %eax - movl %eax, %cr3 - - movl $MSR_EFER, %ecx - movl $(1 << _EFER_LME), %eax # Enable Long Mode - xorl %edx, %edx - wrmsr - - # Enable paging and in turn activate Long Mode - # Enable protected mode - movl $(X86_CR0_PG | X86_CR0_PE), %eax - movl %eax, %cr0 - - /* - * At this point we're in long mode but in 32bit compatibility mode - * with EFER.LME = 1, CS.L = 0, CS.D = 1 (and in turn - * EFER.LMA = 1). Now we want to jump in 64bit mode, to do that we use - * the new gdt/idt that has __KERNEL_CS with CS.L = 1. - */ - ljmp *(startup_64_vector - r_base)(%esi) - - .code64 - .balign 4 -startup_64: - # Now jump into the kernel using virtual addresses - movq $secondary_startup_64, %rax - jmp *%rax - - .code16 -no_longmode: - hlt - jmp no_longmode -#include "verify_cpu.S" - - .balign 4 - # Careful these need to be in the same 64K segment as the above; -tidt: - .word 0 # idt limit = 0 - .word 0, 0 # idt base = 0L - - # Duplicate the global descriptor table - # so the kernel can live anywhere - .balign 4 -tgdt: - .short tgdt_end - tgdt # gdt limit - .long tgdt - r_base - .short 0 - .quad 0x00cf9b000000ffff # __KERNEL32_CS - .quad 0x00af9b000000ffff # __KERNEL_CS - .quad 0x00cf93000000ffff # __KERNEL_DS -tgdt_end: - - .balign 4 -startup_32_vector: - .long startup_32 - r_base - .word __KERNEL32_CS, 0 - - .balign 4 -startup_64_vector: - .long startup_64 - r_base - .word __KERNEL_CS, 0 - - .balign 4 -ENTRY(trampoline_status) - .long 0 - -trampoline_stack: - .org 0x1000 -trampoline_stack_end: -ENTRY(trampoline_level4_pgt) - .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE - .fill 510,8,0 - .quad level3_kernel_pgt - __START_KERNEL_map + _KERNPG_TABLE - -ENTRY(trampoline_end) diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 0f703f1..22a1530 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -197,18 +197,6 @@ SECTIONS INIT_DATA_SECTION(16) - /* - * Code and data for a variety of lowlevel trampolines, to be - * copied into base memory (< 1 MiB) during initialization. - * Since it is copied early, the main copy can be discarded - * afterwards. - */ - .x86_trampoline : AT(ADDR(.x86_trampoline) - LOAD_OFFSET) { - x86_trampoline_start = .; - *(.x86_trampoline) - x86_trampoline_end = .; - } - .x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) { __x86_cpu_dev_start = .; *(.x86_cpu_dev.init) diff --git a/arch/x86/realmode/rm/Makefile b/arch/x86/realmode/rm/Makefile index 56ec64f..2432acb 100644 --- a/arch/x86/realmode/rm/Makefile +++ b/arch/x86/realmode/rm/Makefile @@ -14,9 +14,13 @@ always := realmode.bin realmode-y += header.o realmode-$(CONFIG_X86_32) += reboot_32.o realmode-y += trampoline_$(BITS).o +realmode-$(CONFIG_ACPI_SLEEP) += wakeup/wakeup.o targets += $(realmode-y) +$(obj)/wakeup/wakeup.o: FORCE + $(Q)$(MAKE) $(build)=$(obj)/wakeup $@ + REALMODE_OBJS = $(addprefix $(obj)/,$(realmode-y)) sed-pasyms := -n -r -e 's/^([0-9a-fA-F]+) [ABCDGRSTVW] (.+)$$/pa_\2 = \2;/p' diff --git a/arch/x86/realmode/rm/header.S b/arch/x86/realmode/rm/header.S index a979004..730b131 100644 --- a/arch/x86/realmode/rm/header.S +++ b/arch/x86/realmode/rm/header.S @@ -27,4 +27,9 @@ ENTRY(real_mode_header) .long pa_level3_ident_pgt .long pa_level3_kernel_pgt #endif + /* ACPI sleep */ +#ifdef CONFIG_ACPI_SLEEP + .long pa_wakeup_start + .long pa_wakeup_header +#endif END(real_mode_header) diff --git a/arch/x86/realmode/rm/realmode.lds.S b/arch/x86/realmode/rm/realmode.lds.S index c5b8a4f..91b83ea 100644 --- a/arch/x86/realmode/rm/realmode.lds.S +++ b/arch/x86/realmode/rm/realmode.lds.S @@ -25,6 +25,10 @@ SECTIONS .rodata : { *(.rodata) *(.rodata.*) + . = ALIGN(16); + video_cards = .; + *(.videocards) + video_cards_end = .; } . = ALIGN(PAGE_SIZE); diff --git a/arch/x86/realmode/rm/wakeup/.gitignore b/arch/x86/realmode/rm/wakeup/.gitignore new file mode 100644 index 0000000..58f1f48 --- /dev/null +++ b/arch/x86/realmode/rm/wakeup/.gitignore @@ -0,0 +1,3 @@ +wakeup.bin +wakeup.elf +wakeup.lds diff --git a/arch/x86/realmode/rm/wakeup/Makefile b/arch/x86/realmode/rm/wakeup/Makefile new file mode 100644 index 0000000..4c85332 --- /dev/null +++ b/arch/x86/realmode/rm/wakeup/Makefile @@ -0,0 +1,33 @@ +# +# arch/x86/kernel/acpi/realmode/Makefile +# +# This file is subject to the terms and conditions of the GNU General Public +# License. See the file "COPYING" in the main directory of this archive +# for more details. +# + +always := wakeup.o + +wakeup-y += wakeup_asm.o wakemain.o video-mode.o +wakeup-y += copy.o bioscall.o regs.o + +# The link order of the video-*.o modules can matter. In particular, +# video-vga.o *must* be listed first, followed by video-vesa.o. +# Hardware-specific drivers should follow in the order they should be +# probed, and video-bios.o should typically be last. +wakeup-y += video-vga.o +wakeup-y += video-vesa.o +wakeup-y += video-bios.o + +targets += $(wakeup-y) + +WAKEUP_OBJS = $(addprefix $(obj)/,$(wakeup-y)) + +LDFLAGS_wakeup.o := -m elf_i386 -r +$(obj)/wakeup.o: $(WAKEUP_OBJS) FORCE + $(call if_changed,ld) + +bootsrc := $(src)/../../../boot + +ccflags-y += -D_WAKEUP -I$(srctree)/$(bootsrc) +asflags-y += -D_WAKEUP -I$(srctree)/$(bootsrc) diff --git a/arch/x86/realmode/rm/wakeup/bioscall.S b/arch/x86/realmode/rm/wakeup/bioscall.S new file mode 100644 index 0000000..f51eb0b --- /dev/null +++ b/arch/x86/realmode/rm/wakeup/bioscall.S @@ -0,0 +1 @@ +#include "../../../boot/bioscall.S" diff --git a/arch/x86/realmode/rm/wakeup/copy.S b/arch/x86/realmode/rm/wakeup/copy.S new file mode 100644 index 0000000..dc59ebe --- /dev/null +++ b/arch/x86/realmode/rm/wakeup/copy.S @@ -0,0 +1 @@ +#include "../../../boot/copy.S" diff --git a/arch/x86/realmode/rm/wakeup/regs.c b/arch/x86/realmode/rm/wakeup/regs.c new file mode 100644 index 0000000..6206033 --- /dev/null +++ b/arch/x86/realmode/rm/wakeup/regs.c @@ -0,0 +1 @@ +#include "../../../boot/regs.c" diff --git a/arch/x86/realmode/rm/wakeup/video-bios.c b/arch/x86/realmode/rm/wakeup/video-bios.c new file mode 100644 index 0000000..7deabc1 --- /dev/null +++ b/arch/x86/realmode/rm/wakeup/video-bios.c @@ -0,0 +1 @@ +#include "../../../boot/video-bios.c" diff --git a/arch/x86/realmode/rm/wakeup/video-mode.c b/arch/x86/realmode/rm/wakeup/video-mode.c new file mode 100644 index 0000000..328ad20 --- /dev/null +++ b/arch/x86/realmode/rm/wakeup/video-mode.c @@ -0,0 +1 @@ +#include "../../../boot/video-mode.c" diff --git a/arch/x86/realmode/rm/wakeup/video-vesa.c b/arch/x86/realmode/rm/wakeup/video-vesa.c new file mode 100644 index 0000000..9dbb967 --- /dev/null +++ b/arch/x86/realmode/rm/wakeup/video-vesa.c @@ -0,0 +1 @@ +#include "../../../boot/video-vesa.c" diff --git a/arch/x86/realmode/rm/wakeup/video-vga.c b/arch/x86/realmode/rm/wakeup/video-vga.c new file mode 100644 index 0000000..bcc8125 --- /dev/null +++ b/arch/x86/realmode/rm/wakeup/video-vga.c @@ -0,0 +1 @@ +#include "../../../boot/video-vga.c" diff --git a/arch/x86/realmode/rm/wakeup/wakemain.c b/arch/x86/realmode/rm/wakeup/wakemain.c new file mode 100644 index 0000000..91405d5 --- /dev/null +++ b/arch/x86/realmode/rm/wakeup/wakemain.c @@ -0,0 +1,82 @@ +#include "wakeup.h" +#include "boot.h" + +static void udelay(int loops) +{ + while (loops--) + io_delay(); /* Approximately 1 us */ +} + +static void beep(unsigned int hz) +{ + u8 enable; + + if (!hz) { + enable = 0x00; /* Turn off speaker */ + } else { + u16 div = 1193181/hz; + + outb(0xb6, 0x43); /* Ctr 2, squarewave, load, binary */ + io_delay(); + outb(div, 0x42); /* LSB of counter */ + io_delay(); + outb(div >> 8, 0x42); /* MSB of counter */ + io_delay(); + + enable = 0x03; /* Turn on speaker */ + } + inb(0x61); /* Dummy read of System Control Port B */ + io_delay(); + outb(enable, 0x61); /* Enable timer 2 output to speaker */ + io_delay(); +} + +#define DOT_HZ 880 +#define DASH_HZ 587 +#define US_PER_DOT 125000 + +/* Okay, this is totally silly, but it's kind of fun. */ +static void send_morse(const char *pattern) +{ + char s; + + while ((s = *pattern++)) { + switch (s) { + case '.': + beep(DOT_HZ); + udelay(US_PER_DOT); + beep(0); + udelay(US_PER_DOT); + break; + case '-': + beep(DASH_HZ); + udelay(US_PER_DOT * 3); + beep(0); + udelay(US_PER_DOT); + break; + default: /* Assume it's a space */ + udelay(US_PER_DOT * 3); + break; + } + } +} + +void main(void) +{ + /* Kill machine if structures are wrong */ + if (wakeup_header.real_magic != 0x12345678) + while (1) + ; + + if (wakeup_header.realmode_flags & 4) + send_morse("...-"); + + if (wakeup_header.realmode_flags & 1) + asm volatile("lcallw $0xc000,$3"); + + if (wakeup_header.realmode_flags & 2) { + /* Need to call BIOS */ + probe_cards(0); + set_mode(wakeup_header.video_mode); + } +} diff --git a/arch/x86/realmode/rm/wakeup/wakeup.h b/arch/x86/realmode/rm/wakeup/wakeup.h new file mode 100644 index 0000000..2dfaf06 --- /dev/null +++ b/arch/x86/realmode/rm/wakeup/wakeup.h @@ -0,0 +1,41 @@ +/* + * Definitions for the wakeup data structure at the head of the + * wakeup code. + */ + +#ifndef ARCH_X86_KERNEL_ACPI_RM_WAKEUP_H +#define ARCH_X86_KERNEL_ACPI_RM_WAKEUP_H + +#ifndef __ASSEMBLY__ +#include + +/* This must match data at wakeup.S */ +struct wakeup_header { + u16 video_mode; /* Video mode number */ + u32 pmode_entry; /* Protected mode resume point, 32-bit only */ + u16 pmode_cs; + u32 pmode_cr0; /* Protected mode cr0 */ + u32 pmode_cr3; /* Protected mode cr3 */ + u32 pmode_cr4; /* Protected mode cr4 */ + u32 pmode_efer_low; /* Protected mode EFER */ + u32 pmode_efer_high; + u64 pmode_gdt; + u32 pmode_misc_en_low; /* Protected mode MISC_ENABLE */ + u32 pmode_misc_en_high; + u32 pmode_behavior; /* Wakeup routine behavior flags */ + u32 realmode_flags; + u32 real_magic; + u32 signature; /* To check we have correct structure */ +} __attribute__((__packed__)); + +extern struct wakeup_header wakeup_header; +#endif + +#define WAKEUP_HEADER_OFFSET 8 +#define WAKEUP_HEADER_SIGNATURE 0x51ee1111 +#define WAKEUP_END_SIGNATURE 0x65a22c82 + +/* Wakeup behavior bits */ +#define WAKEUP_BEHAVIOR_RESTORE_MISC_ENABLE 0 + +#endif /* ARCH_X86_KERNEL_ACPI_RM_WAKEUP_H */ diff --git a/arch/x86/realmode/rm/wakeup/wakeup_asm.S b/arch/x86/realmode/rm/wakeup/wakeup_asm.S new file mode 100644 index 0000000..b61126c --- /dev/null +++ b/arch/x86/realmode/rm/wakeup/wakeup_asm.S @@ -0,0 +1,189 @@ +/* + * ACPI wakeup real mode startup stub + */ +#include +#include +#include +#include +#include +#include "wakeup.h" + + .code16 + +/* This should match the structure in wakeup.h */ + .section ".data", "aw" + .globl wakeup_header +wakeup_header: +video_mode: .short 0 /* Video mode number */ +pmode_entry: .long 0 +pmode_cs: .short __KERNEL_CS +pmode_cr0: .long 0 /* Saved %cr0 */ +pmode_cr3: .long 0 /* Saved %cr3 */ +pmode_cr4: .long 0 /* Saved %cr4 */ +pmode_efer: .quad 0 /* Saved EFER */ +pmode_gdt: .quad 0 +pmode_misc_en: .quad 0 /* Saved MISC_ENABLE MSR */ +pmode_behavior: .long 0 /* Wakeup behavior flags */ +realmode_flags: .long 0 +real_magic: .long 0 +signature: .long WAKEUP_HEADER_SIGNATURE + .size wakeup_header, .-wakeup_header + + .text + .code16 + .globl wakeup_start +wakeup_start: + cli + cld + + .byte 0xea /* ljmpw */ + .word 3f + .word real_mode_seg +3: + /* Apparently some dimwit BIOS programmers don't know how to + program a PM to RM transition, and we might end up here with + junk in the data segment descriptor registers. The only way + to repair that is to go into PM and fix it ourselves... */ + movw $16, %cx + lgdtl %cs:wakeup_gdt + movl %cr0, %eax + orb $X86_CR0_PE, %al + movl %eax, %cr0 + ljmpw $8, $2f +2: + movw %cx, %ds + movw %cx, %es + movw %cx, %ss + movw %cx, %fs + movw %cx, %gs + + andb $~X86_CR0_PE, %al + movl %eax, %cr0 + .byte 0xea /* ljmpw */ + .word 3f + .word real_mode_seg +3: + /* Set up segments */ + movw %cs, %ax + movw %ax, %ds + movw %ax, %es + movw %ax, %ss + lidtl wakeup_idt + + movl $wakeup_stack_end, %esp + + /* Clear the EFLAGS */ + pushl $0 + popfl + + /* Check header signature... */ + movl signature, %eax + cmpl $WAKEUP_HEADER_SIGNATURE, %eax + jne bogus_real_magic + + /* Check we really have everything... */ + movl end_signature, %eax + cmpl $WAKEUP_END_SIGNATURE, %eax + jne bogus_real_magic + + /* Call the C code */ + calll main + + /* Restore MISC_ENABLE before entering protected mode, in case + BIOS decided to clear XD_DISABLE during S3. */ + movl pmode_behavior, %eax + btl $WAKEUP_BEHAVIOR_RESTORE_MISC_ENABLE, %eax + jnc 1f + + movl pmode_misc_en, %eax + movl pmode_misc_en + 4, %edx + movl $MSR_IA32_MISC_ENABLE, %ecx + wrmsr +1: + + /* Do any other stuff... */ + +#ifndef CONFIG_64BIT + /* This could also be done in C code... */ + movl pmode_cr3, %eax + movl %eax, %cr3 + + movl pmode_cr4, %ecx + jecxz 1f + movl %ecx, %cr4 +1: + movl pmode_efer, %eax + movl pmode_efer + 4, %edx + movl %eax, %ecx + orl %edx, %ecx + jz 1f + movl $MSR_EFER, %ecx + wrmsr +1: + + lgdtl pmode_gdt + + /* This really couldn't... */ + movl pmode_cr0, %eax + movl %eax, %cr0 + ljmpl *pmode_entry +#else + jmp trampoline_data +#endif + +bogus_real_magic: +1: + hlt + jmp 1b + + .section ".rodata","a" + + /* + * Set up the wakeup GDT. We set these up as Big Real Mode, + * that is, with limits set to 4 GB. At least the Lenovo + * Thinkpad X61 is known to need this for the video BIOS + * initialization quirk to work; this is likely to also + * be the case for other laptops or integrated video devices. + */ + + .globl wakeup_gdt + .balign 16 +wakeup_gdt: + .word 3*8-1 /* Self-descriptor */ + .long pa_wakeup_gdt + .word 0 + + .word 0xffff /* 16-bit code segment @ real_mode_base */ + .long 0x9b000000 + pa_real_mode_base + .word 0x008f /* big real mode */ + + .word 0xffff /* 16-bit data segment @ real_mode_base */ + .long 0x93000000 + pa_real_mode_base + .word 0x008f /* big real mode */ + .size wakeup_gdt, .-wakeup_gdt + + .data + .balign 8 + + /* This is the standard real-mode IDT */ +wakeup_idt: + .word 0xffff /* limit */ + .long 0 /* address */ + .word 0 + + .globl HEAP, heap_end +HEAP: + .long wakeup_heap +heap_end: + .long wakeup_stack + + .bss +wakeup_heap: + .space 2048 +wakeup_stack: + .space 2048 +wakeup_stack_end: + + .section ".signature","a" +end_signature: + .long WAKEUP_END_SIGNATURE diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c index eb6fd23..e77aa4a 100644 --- a/drivers/acpi/sleep.c +++ b/drivers/acpi/sleep.c @@ -25,6 +25,8 @@ #include #include +#include + #include "internal.h" #include "sleep.h" @@ -91,13 +93,13 @@ static struct notifier_block tts_notifier = { static int acpi_sleep_prepare(u32 acpi_state) { #ifdef CONFIG_ACPI_SLEEP + unsigned long wakeup_pa = real_mode_header.wakeup_start; /* do we have a wakeup address for S2 and S3? */ if (acpi_state == ACPI_STATE_S3) { - if (!acpi_wakeup_address) { + if (!wakeup_pa) return -EFAULT; - } acpi_set_firmware_waking_vector( - (acpi_physical_address)acpi_wakeup_address); + (acpi_physical_address)wakeup_pa); } ACPI_FLUSH_CPU_CACHE(); -- cgit v0.10.2 From f156ffc439951b63cfa9f4d999a8d54267f13282 Mon Sep 17 00:00:00 2001 From: Jarkko Sakkinen Date: Tue, 8 May 2012 21:22:30 +0300 Subject: x86, realmode: Set permission for real mode pages Set proper permissions for rodata, text and data, removing the realmode trampoline area as a remaining RWX memory mapping in the kernel. Signed-off-by: Jarkko Sakkinen Link: http://lkml.kernel.org/r/1336501366-28617-8-git-send-email-jarkko.sakkinen@intel.com Signed-off-by: H. Peter Anvin diff --git a/arch/x86/kernel/realmode.c b/arch/x86/kernel/realmode.c index a465775..d85ac20 100644 --- a/arch/x86/kernel/realmode.c +++ b/arch/x86/kernel/realmode.c @@ -86,7 +86,21 @@ static int __init set_real_mode_permissions(void) PAGE_ALIGN(real_mode_header.end) - __pa(real_mode_base); - set_memory_x((unsigned long) real_mode_base, all_size >> PAGE_SHIFT); + size_t ro_size = + PAGE_ALIGN(real_mode_header.ro_end) - + __pa(real_mode_base); + + size_t text_size = + PAGE_ALIGN(real_mode_header.ro_end) - + real_mode_header.text_start; + + unsigned long text_start = + (unsigned long) __va(real_mode_header.text_start); + + set_memory_nx((unsigned long) real_mode_base, all_size >> PAGE_SHIFT); + set_memory_ro((unsigned long) real_mode_base, ro_size >> PAGE_SHIFT); + set_memory_x((unsigned long) text_start, text_size >> PAGE_SHIFT); + return 0; } -- cgit v0.10.2 From 2a6de3148cfd1a0240a85ea4a1cad34d250d882f Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Tue, 8 May 2012 21:22:31 +0300 Subject: x86, realmode: Allow absolute pa_* symbols in the realmode code Allow pa_* symbols to be absolute (outside any section) in the realmode linker script. Some versions of GNU ld are known to be unhappy about symbols defined in a section that is otherwise empty. Signed-off-by: H. Peter Anvin Link: http://lkml.kernel.org/r/1336501366-28617-9-git-send-email-jarkko.sakkinen@intel.com diff --git a/scripts/x86-relocs.c b/scripts/x86-relocs.c index 0291470..74e16bb 100644 --- a/scripts/x86-relocs.c +++ b/scripts/x86-relocs.c @@ -62,6 +62,13 @@ static const char * const sym_regex_kernel[S_NSYMTYPES] = { static const char * const sym_regex_realmode[S_NSYMTYPES] = { /* + * These symbols are known to be relative, even if the linker marks them + * as absolute (typically defined outside any section in the linker script.) + */ + [S_REL] = + "^pa_", + +/* * These are 16-bit segment symbols when compiling 16-bit code. */ [S_SEG] = -- cgit v0.10.2 From 487f50ffeb142d8f86fff6e43a8852ce3d46c173 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Tue, 8 May 2012 21:22:32 +0300 Subject: x86, realmode: Add .text64 section, make barrier symbols absolute Add a .text64 section. The purpose of this is to keep 16-, 32- and 64-bit code segregated into separate sections, mainly to keep disassembly sane. Move barrier symbols out of sections to avoid the "symbol in empty section" problem in some versions of GNU ld. Signed-off-by: H. Peter Anvin Link: http://lkml.kernel.org/r/1336501366-28617-10-git-send-email-jarkko.sakkinen@intel.com diff --git a/arch/x86/realmode/rm/realmode.lds.S b/arch/x86/realmode/rm/realmode.lds.S index 91b83ea..4d4afca 100644 --- a/arch/x86/realmode/rm/realmode.lds.S +++ b/arch/x86/realmode/rm/realmode.lds.S @@ -32,8 +32,8 @@ SECTIONS } . = ALIGN(PAGE_SIZE); + pa_text_start = .; .text : { - pa_text_start = .; *(.text) *(.text.*) } @@ -41,9 +41,14 @@ SECTIONS .text32 : { *(.text32) *(.text32.*) - pa_ro_end = .; } + .text64 : { + *(.text64) + *(.text64.*) + } + pa_ro_end = .; + . = ALIGN(PAGE_SIZE); .data : { *(.data) @@ -59,8 +64,8 @@ SECTIONS . = ALIGN(4); .signature : { *(.signature) - pa_end = .; } + pa_end = .; /DISCARD/ : { *(.note*) -- cgit v0.10.2 From 024742861124ef26dae4cfc620250f8f47ac934a Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Tue, 8 May 2012 21:22:33 +0300 Subject: x86, realmode: Move bits to the proper sections in trampoline_64.S Move various bits to the sections they really belong in in trampoline_64.S. Use GLOBAL() rather than ENTRY() for data objects: ENTRY() should only be used with code and forces alignment to 16 bytes. Signed-off-by: H. Peter Anvin Link: http://lkml.kernel.org/r/1336501366-28617-11-git-send-email-jarkko.sakkinen@intel.com diff --git a/arch/x86/realmode/rm/trampoline_64.S b/arch/x86/realmode/rm/trampoline_64.S index 063da00..66c58cf 100644 --- a/arch/x86/realmode/rm/trampoline_64.S +++ b/arch/x86/realmode/rm/trampoline_64.S @@ -80,6 +80,7 @@ no_longmode: jmp no_longmode #include "../kernel/verify_cpu.S" + .section ".text32","ax" .code32 .balign 4 ENTRY(startup_32) @@ -114,6 +115,7 @@ ENTRY(startup_32) */ ljmpl *(pa_startup_64_vector) + .section ".text64","ax" .code64 .balign 4 ENTRY(startup_64) @@ -123,7 +125,8 @@ ENTRY(startup_64) addl %esi, %eax jmp *%rax - # Careful these need to be in the same 64K segment as the above; + .section ".rodata","a" + .balign 16 tidt: .word 0 # idt limit = 0 .word 0, 0 # idt base = 0L @@ -153,9 +156,8 @@ startup_64_vector: .word __KERNEL_CS, 0 .data - .balign 4 -ENTRY(trampoline_status) +GLOBAL(trampoline_status) .long 0 trampoline_stack: @@ -164,7 +166,7 @@ trampoline_stack_end: .globl level3_ident_pgt .globl level3_kernel_pgt -ENTRY(trampoline_level4_pgt) +GLOBAL(trampoline_level4_pgt) level3_ident_pgt: .quad 0 .fill 510,8,0 level3_kernel_pgt: .quad 0 -- cgit v0.10.2 From f7436a9da902922a48cccc208099763b87d6171f Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Tue, 8 May 2012 21:22:34 +0300 Subject: x86, realmode: Align .data section in trampoline_32.S Specify the alignment of the .data section in trampoline_32.S. Signed-off-by: H. Peter Anvin Link: http://lkml.kernel.org/r/1336501366-28617-12-git-send-email-jarkko.sakkinen@intel.com diff --git a/arch/x86/realmode/rm/trampoline_32.S b/arch/x86/realmode/rm/trampoline_32.S index 18cb7fc..1f9e331 100644 --- a/arch/x86/realmode/rm/trampoline_32.S +++ b/arch/x86/realmode/rm/trampoline_32.S @@ -68,7 +68,7 @@ trampoline_data: .data .globl startup_32_smp, boot_gdt, trampoline_status - + .balign 4 boot_gdt_descr: .word __BOOT_DS + 7 # gdt limit boot_gdt: -- cgit v0.10.2 From 056a43a6d3ab903a798d8ee4435ad67d6fccc3e6 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Tue, 8 May 2012 21:22:35 +0300 Subject: x86, realmode: Remove indirect jumps in trampoline_64.S Remove indirect jumps in trampoline_64.S which are no longer necessary: the realmode code can relocate the absolute jumps correctly from the start. Signed-off-by: H. Peter Anvin Link: http://lkml.kernel.org/r/1336501366-28617-13-git-send-email-jarkko.sakkinen@intel.com diff --git a/arch/x86/realmode/rm/trampoline_64.S b/arch/x86/realmode/rm/trampoline_64.S index 66c58cf..77b72b4 100644 --- a/arch/x86/realmode/rm/trampoline_64.S +++ b/arch/x86/realmode/rm/trampoline_64.S @@ -73,7 +73,7 @@ ENTRY(trampoline_data) lmsw %ax # into protected mode # flush prefetch and jump to startup_32 - ljmpl *(startup_32_vector) + ljmpl $__KERNEL32_CS, $pa_startup_32 no_longmode: hlt @@ -113,7 +113,7 @@ ENTRY(startup_32) * EFER.LMA = 1). Now we want to jump in 64bit mode, to do that we use * the new gdt/idt that has __KERNEL_CS with CS.L = 1. */ - ljmpl *(pa_startup_64_vector) + ljmpl $__KERNEL_CS, $pa_startup_64 .section ".text64","ax" .code64 @@ -144,17 +144,6 @@ tgdt: .quad 0x00cf93000000ffff # __KERNEL_DS tgdt_end: - .balign 4 -startup_32_vector: - .long pa_startup_32 - .word __KERNEL32_CS, 0 - - .balign 4 - .globl startup_64_vector -startup_64_vector: - .long pa_startup_64 - .word __KERNEL_CS, 0 - .data .balign 4 GLOBAL(trampoline_status) -- cgit v0.10.2 From 968ff9ee56f1e3ed4ff4a6d10185865dc77d8f7e Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Tue, 8 May 2012 21:22:36 +0300 Subject: x86, realmode: Remove indirect jumps in trampoline_32 and wakeup_asm Remove indirect jumps in trampoline_32.S and the 32-bit part of wakeup_asm.S. There exist systems which are known to do weird things if an SMI comes in right after a mode switch, and the safest way to deal with it is to always follow with a simple absolute far jump. In the 64-bit code we then to a register indirect near jump; follow that pattern for the 32-bit code. Signed-off-by: H. Peter Anvin Link: http://lkml.kernel.org/r/1336501366-28617-14-git-send-email-jarkko.sakkinen@intel.com diff --git a/arch/x86/realmode/rm/trampoline_32.S b/arch/x86/realmode/rm/trampoline_32.S index 1f9e331..1315ef4 100644 --- a/arch/x86/realmode/rm/trampoline_32.S +++ b/arch/x86/realmode/rm/trampoline_32.S @@ -47,24 +47,29 @@ trampoline_data: cli # We should be safe anyway + movl startup_32_smp, %eax # where we need to go + movl $0xA5A5A5A5, trampoline_status # write marker for master knows we're running - /* GDT tables in non default location kernel can be beyond 16MB and + /* + * GDT tables in non default location kernel can be beyond 16MB and * lgdt will not be able to load the address as in real mode default * operand size is 16bit. Use lgdtl instead to force operand size * to 32 bit. */ - lidtl boot_idt_descr # load idt with 0, 0 lgdtl boot_gdt_descr # load gdt with whatever is appropriate - xor %ax, %ax - inc %ax # protected mode (PE) bit - lmsw %ax # into protected mode + movw $1, %dx # protected mode (PE) bit + lmsw %dx # into protected mode - # flush prefetch and jump to startup_32_smp in arch/i386/kernel/head.S - ljmpl *(startup_32_smp) + ljmpl $__BOOT_CS, $pa_startup_32 + + .section ".text32","ax" + .code32 +ENTRY(startup_32) # note: also used from wakeup_asm.S + jmp *%eax .data .globl startup_32_smp, boot_gdt, trampoline_status @@ -82,5 +87,4 @@ trampoline_status: .long 0 startup_32_smp: - .long 0x00000000 - .word __BOOT_CS, 0 + .long 0 diff --git a/arch/x86/realmode/rm/wakeup/wakeup_asm.S b/arch/x86/realmode/rm/wakeup/wakeup_asm.S index b61126c..4c5c5f2 100644 --- a/arch/x86/realmode/rm/wakeup/wakeup_asm.S +++ b/arch/x86/realmode/rm/wakeup/wakeup_asm.S @@ -124,9 +124,11 @@ wakeup_start: lgdtl pmode_gdt /* This really couldn't... */ - movl pmode_cr0, %eax - movl %eax, %cr0 - ljmpl *pmode_entry + movl pmode_entry, %eax + movl pmode_cr0, %ecx + movl %ecx, %cr0 + ljmpl $__KERNEL_CS, $pa_startup_32 + /* -> jmp *%eax in trampoline_32.S */ #else jmp trampoline_data #endif -- cgit v0.10.2 From e5684ec438a094bec0f7d5c52652c0901b48b613 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Tue, 8 May 2012 21:22:37 +0300 Subject: x86, realmode: Replace open-coded ljmpw with a macro We cannot code an ljmpw to the real-mode segment directly, because gas refuses to assemble an ljmp with a symbolic segment. Instead of open-coding it everywhere, define a macro and use it for this case. This is specifically an ljmpw from a 16-bit segment. This is okay, as one should never enter real mode from a 32-bit segment: if one do, the CPU ends up in a bizarre (and useless) mode sometimes called "unreal mode" where segments behave like real mode but the default address and operand sizes is 32 bits. Signed-off-by: H. Peter Anvin Link: http://lkml.kernel.org/r/1336501366-28617-15-git-send-email-jarkko.sakkinen@intel.com diff --git a/arch/x86/realmode/rm/realmode.h b/arch/x86/realmode/rm/realmode.h new file mode 100644 index 0000000..15ab633 --- /dev/null +++ b/arch/x86/realmode/rm/realmode.h @@ -0,0 +1,16 @@ +#ifndef ARCH_X86_REALMODE_RM_REALMODE_H +#define ARCH_X86_REALMODE_RM_REALMODE_H + +#ifdef __ASSEMBLY__ + +/* + * 16-bit ljmpw to the real_mode_seg + * + * This must be open-coded since gas will choke on using a + * relocatable symbol for the segment portion. + */ +#define LJMPW_RM(to) .byte 0xea ; .word (to), real_mode_seg + +#endif /* __ASSEMBLY__ */ + +#endif /* ARCH_X86_REALMODE_RM_REALMODE_H */ diff --git a/arch/x86/realmode/rm/reboot_32.S b/arch/x86/realmode/rm/reboot_32.S index 83803c2..e90f8c4 100644 --- a/arch/x86/realmode/rm/reboot_32.S +++ b/arch/x86/realmode/rm/reboot_32.S @@ -2,6 +2,7 @@ #include #include #include +#include "realmode.h" /* * The following code and data reboots the machine by switching to real @@ -82,10 +83,7 @@ machine_real_restart_asm16: 2: andb $0x10, %dl movl %edx, %cr0 - .byte 0xea /* ljmpw */ - .word 3f /* Offset */ - .word real_mode_seg /* Segment */ - + LJMPW_RM(3f) 3: testb $0, %al jz bios diff --git a/arch/x86/realmode/rm/trampoline_32.S b/arch/x86/realmode/rm/trampoline_32.S index 1315ef4..279f82e 100644 --- a/arch/x86/realmode/rm/trampoline_32.S +++ b/arch/x86/realmode/rm/trampoline_32.S @@ -29,6 +29,7 @@ #include #include #include +#include "realmode.h" .text .code16 @@ -38,9 +39,7 @@ trampoline_data: wbinvd # Needed for NUMA-Q should be harmless for others - .byte 0xea # ljmpw - .word 1f # Offset - .word real_mode_seg # Segment + LJMPW_RM(1f) 1: mov %cs, %ax # Code and data in the same place mov %ax, %ds diff --git a/arch/x86/realmode/rm/trampoline_64.S b/arch/x86/realmode/rm/trampoline_64.S index 77b72b4..7459c52 100644 --- a/arch/x86/realmode/rm/trampoline_64.S +++ b/arch/x86/realmode/rm/trampoline_64.S @@ -31,6 +31,7 @@ #include #include #include +#include "realmode.h" .text .balign PAGE_SIZE @@ -40,9 +41,7 @@ ENTRY(trampoline_data) cli # We should be safe anyway wbinvd - .byte 0xea # ljmpw - .word 1f # Offset - .word real_mode_seg # Segment + LJMPW_RM(1f) 1: mov %cs, %ax # Code and data in the same place mov %ax, %ds diff --git a/arch/x86/realmode/rm/wakeup/wakeup_asm.S b/arch/x86/realmode/rm/wakeup/wakeup_asm.S index 4c5c5f2..8064e1c 100644 --- a/arch/x86/realmode/rm/wakeup/wakeup_asm.S +++ b/arch/x86/realmode/rm/wakeup/wakeup_asm.S @@ -6,6 +6,7 @@ #include #include #include +#include "../realmode.h" #include "wakeup.h" .code16 @@ -36,9 +37,7 @@ wakeup_start: cli cld - .byte 0xea /* ljmpw */ - .word 3f - .word real_mode_seg + LJMPW_RM(3f) 3: /* Apparently some dimwit BIOS programmers don't know how to program a PM to RM transition, and we might end up here with @@ -59,9 +58,7 @@ wakeup_start: andb $~X86_CR0_PE, %al movl %eax, %cr0 - .byte 0xea /* ljmpw */ - .word 3f - .word real_mode_seg + LJMPW_RM(3f) 3: /* Set up segments */ movw %cs, %ax -- cgit v0.10.2 From be60828920d23758da8124bed771404a0438f369 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Tue, 8 May 2012 21:22:38 +0300 Subject: x86, realmode: Move trampoline_*.S early in the link order Move trampoline_*.S earlier in the link order so it ends up being first in the text segment; since the SIPI vector requires 4K alignment it otherwise ends up padding the .text segment with that much completely unnecessarily. Signed-off-by: H. Peter Anvin Link: http://lkml.kernel.org/r/1336501366-28617-16-git-send-email-jarkko.sakkinen@intel.com diff --git a/arch/x86/realmode/rm/Makefile b/arch/x86/realmode/rm/Makefile index 2432acb..2423142 100644 --- a/arch/x86/realmode/rm/Makefile +++ b/arch/x86/realmode/rm/Makefile @@ -12,8 +12,8 @@ subdir- := wakeup always := realmode.bin realmode-y += header.o -realmode-$(CONFIG_X86_32) += reboot_32.o realmode-y += trampoline_$(BITS).o +realmode-$(CONFIG_X86_32) += reboot_32.o realmode-$(CONFIG_ACPI_SLEEP) += wakeup/wakeup.o targets += $(realmode-y) -- cgit v0.10.2 From 6feb592dceaed1a6cf26c9747b1180958d5156cd Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Tue, 8 May 2012 21:22:39 +0300 Subject: x86, realmode: Fix always-zero test in reboot_32.S A test instruction is an "and", and an and with zero is always zero. This would cause us to always take the BIOS path, not the APM path, in case anyone actually cares... Signed-off-by: H. Peter Anvin Link: http://lkml.kernel.org/r/1336501366-28617-17-git-send-email-jarkko.sakkinen@intel.com diff --git a/arch/x86/realmode/rm/reboot_32.S b/arch/x86/realmode/rm/reboot_32.S index e90f8c4..50ba994 100644 --- a/arch/x86/realmode/rm/reboot_32.S +++ b/arch/x86/realmode/rm/reboot_32.S @@ -85,7 +85,7 @@ machine_real_restart_asm16: movl %edx, %cr0 LJMPW_RM(3f) 3: - testb $0, %al + andw %ax, %ax jz bios apm: -- cgit v0.10.2 From 8e029fcdd8702719c9179317cae9ef84ebe7027e Mon Sep 17 00:00:00 2001 From: Jarkko Sakkinen Date: Tue, 8 May 2012 21:22:40 +0300 Subject: x86, realmode: fix 64-bit wakeup sequence There were number of issues in wakeup sequence: - Wakeup stack was placed in hardcoded address. - NX bit in EFER was not enabled. - Initialization incorrectly set physical address of secondary_startup_64. - Some alignment issues. This patch fixes these issues and in addition: - Unifies coding conventions in .S files. - Sets alignments of code and data right. Signed-off-by: Jarkko Sakkinen Link: http://lkml.kernel.org/r/1336501366-28617-18-git-send-email-jarkko.sakkinen@intel.com Originally-by: H. Peter Anvin Cc: Rafael J. Wysocki Cc: Len Brown Signed-off-by: H. Peter Anvin diff --git a/arch/x86/kernel/realmode.c b/arch/x86/kernel/realmode.c index d85ac20..e7bf82a 100644 --- a/arch/x86/kernel/realmode.c +++ b/arch/x86/kernel/realmode.c @@ -64,7 +64,7 @@ void __init setup_real_mode(void) *((u32 *)__va(real_mode_header.boot_gdt)) = __pa(boot_gdt); #else *((u64 *) __va(real_mode_header.startup_64_smp)) = - (u64) __pa(secondary_startup_64); + (u64)secondary_startup_64; *((u64 *) __va(real_mode_header.level3_ident_pgt)) = __pa(level3_ident_pgt) + _KERNPG_TABLE; diff --git a/arch/x86/realmode/rm/Makefile b/arch/x86/realmode/rm/Makefile index 2423142..c2c27a4 100644 --- a/arch/x86/realmode/rm/Makefile +++ b/arch/x86/realmode/rm/Makefile @@ -13,6 +13,7 @@ always := realmode.bin realmode-y += header.o realmode-y += trampoline_$(BITS).o +realmode-y += stack.o realmode-$(CONFIG_X86_32) += reboot_32.o realmode-$(CONFIG_ACPI_SLEEP) += wakeup/wakeup.o diff --git a/arch/x86/realmode/rm/header.S b/arch/x86/realmode/rm/header.S index 730b131..a91ec8f 100644 --- a/arch/x86/realmode/rm/header.S +++ b/arch/x86/realmode/rm/header.S @@ -9,7 +9,7 @@ .section ".header", "a" -ENTRY(real_mode_header) +GLOBAL(real_mode_header) .long pa_text_start .long pa_ro_end .long pa_end diff --git a/arch/x86/realmode/rm/reboot_32.S b/arch/x86/realmode/rm/reboot_32.S index 50ba994..8d9bfd1 100644 --- a/arch/x86/realmode/rm/reboot_32.S +++ b/arch/x86/realmode/rm/reboot_32.S @@ -16,10 +16,9 @@ */ .section ".text32", "ax" .code32 - .globl machine_real_restart_asm - .balign 16 -machine_real_restart_asm: + .balign 16 +ENTRY(machine_real_restart_asm) /* Set up the IDT for real mode. */ lidtl pa_machine_real_restart_idt @@ -67,7 +66,7 @@ machine_real_restart_asm: .text .code16 - .balign 16 + .balign 16 machine_real_restart_asm16: 1: xorl %ecx, %ecx @@ -102,15 +101,15 @@ bios: ljmpw $0xf000, $0xfff0 .section ".rodata", "a" - .globl machine_real_restart_idt, machine_real_restart_gdt - .balign 16 -machine_real_restart_idt: + .balign 16 +GLOBAL(machine_real_restart_idt) .word 0xffff /* Length - real mode default value */ .long 0 /* Base - real mode default value */ +END(machine_real_restart_idt) - .balign 16 -machine_real_restart_gdt: + .balign 16 +GLOBAL(machine_real_restart_gdt) /* Self-pointer */ .word 0xffff /* Length - real mode default value */ .long pa_machine_real_restart_gdt @@ -130,3 +129,4 @@ machine_real_restart_gdt: * semantics we don't have to reload the segments once CR0.PE = 0. */ .quad GDT_ENTRY(0x0093, 0x100, 0xffff) +END(machine_real_restart_gdt) diff --git a/arch/x86/realmode/rm/stack.S b/arch/x86/realmode/rm/stack.S new file mode 100644 index 0000000..867ae87 --- /dev/null +++ b/arch/x86/realmode/rm/stack.S @@ -0,0 +1,19 @@ +/* + * Common heap and stack allocations + */ + +#include + + .data +GLOBAL(HEAP) + .long rm_heap +GLOBAL(heap_end) + .long rm_stack + + .bss + .balign 16 +GLOBAL(rm_heap) + .space 2048 +GLOBAL(rm_stack) + .space 2048 +GLOBAL(rm_stack_end) diff --git a/arch/x86/realmode/rm/trampoline_32.S b/arch/x86/realmode/rm/trampoline_32.S index 279f82e..1ecdbb5 100644 --- a/arch/x86/realmode/rm/trampoline_32.S +++ b/arch/x86/realmode/rm/trampoline_32.S @@ -33,10 +33,9 @@ .text .code16 - .globl trampoline_data - .balign PAGE_SIZE -trampoline_data: + .balign PAGE_SIZE +ENTRY(trampoline_data) wbinvd # Needed for NUMA-Q should be harmless for others LJMPW_RM(1f) @@ -70,20 +69,22 @@ trampoline_data: ENTRY(startup_32) # note: also used from wakeup_asm.S jmp *%eax - .data - .globl startup_32_smp, boot_gdt, trampoline_status - .balign 4 -boot_gdt_descr: - .word __BOOT_DS + 7 # gdt limit -boot_gdt: - .long 0 # gdt base + .section ".rodata","a" + .balign 4 boot_idt_descr: .word 0 # idt limit = 0 .long 0 # idt base = 0L -trampoline_status: - .long 0 + .data -startup_32_smp: - .long 0 +boot_gdt_descr: + .word __BOOT_DS + 7 # gdt limit +GLOBAL(boot_gdt) + .long 0 # gdt base + + .bss + + .balign 4 +GLOBAL(trampoline_status) .space 4 +GLOBAL(startup_32_smp) .space 4 diff --git a/arch/x86/realmode/rm/trampoline_64.S b/arch/x86/realmode/rm/trampoline_64.S index 7459c52..f71ea08 100644 --- a/arch/x86/realmode/rm/trampoline_64.S +++ b/arch/x86/realmode/rm/trampoline_64.S @@ -52,7 +52,7 @@ ENTRY(trampoline_data) # write marker for master knows we're running # Setup stack - movw $trampoline_stack_end, %sp + movl $rm_stack_end, %esp call verify_cpu # Verify the cpu supports long mode testl %eax, %eax # Check for return code @@ -68,8 +68,11 @@ ENTRY(trampoline_data) lidtl tidt # load idt with 0, 0 lgdtl tgdt # load gdt with whatever is appropriate - mov $X86_CR0_PE, %ax # protected mode (PE) bit - lmsw %ax # into protected mode + movw $__KERNEL_DS, %dx # Data segment descriptor + + # Enable protected mode + movl $X86_CR0_PE, %eax # protected mode (PE) bit + movl %eax, %cr0 # into protected mode # flush prefetch and jump to startup_32 ljmpl $__KERNEL32_CS, $pa_startup_32 @@ -83,27 +86,27 @@ no_longmode: .code32 .balign 4 ENTRY(startup_32) - movl $__KERNEL_DS, %eax # Initialize the %ds segment register - movl %eax, %ds + movl %edx, %ss + addl $pa_real_mode_base, %esp + movl %edx, %ds + movl %edx, %es + movl %edx, %fs + movl %edx, %gs movl $X86_CR4_PAE, %eax movl %eax, %cr4 # Enable PAE mode - movl pa_startup_64_smp, %esi - movl pa_startup_64_smp_high, %edi - - # Setup trampoline 4 level pagetables - leal pa_trampoline_level4_pgt, %eax + # Setup trampoline 4 level pagetables + movl $pa_level3_ident_pgt, %eax movl %eax, %cr3 movl $MSR_EFER, %ecx - movl $(1 << _EFER_LME), %eax # Enable Long Mode + movl $((1 << _EFER_LME) | (1 << _EFER_NX)), %eax # Enable Long Mode xorl %edx, %edx wrmsr # Enable paging and in turn activate Long Mode - # Enable protected mode - movl $(X86_CR0_PG | X86_CR0_PE), %eax + movl $(X86_CR0_PG | X86_CR0_WP | X86_CR0_PE), %eax movl %eax, %cr0 /* @@ -119,10 +122,7 @@ ENTRY(startup_32) .balign 4 ENTRY(startup_64) # Now jump into the kernel using virtual addresses - movl %edi, %eax - shlq $32, %rax - addl %esi, %eax - jmp *%rax + jmpq *startup_64_smp(%rip) .section ".rodata","a" .balign 16 @@ -132,10 +132,10 @@ tidt: # Duplicate the global descriptor table # so the kernel can live anywhere - .balign 4 + .balign 16 .globl tgdt tgdt: - .short tgdt_end - tgdt # gdt limit + .short tgdt_end - tgdt - 1 # gdt limit .long pa_tgdt .short 0 .quad 0x00cf9b000000ffff # __KERNEL32_CS @@ -143,23 +143,12 @@ tgdt: .quad 0x00cf93000000ffff # __KERNEL_DS tgdt_end: - .data - .balign 4 -GLOBAL(trampoline_status) - .long 0 - -trampoline_stack: - .org 0x1000 -trampoline_stack_end: - - .globl level3_ident_pgt - .globl level3_kernel_pgt -GLOBAL(trampoline_level4_pgt) - level3_ident_pgt: .quad 0 - .fill 510,8,0 - level3_kernel_pgt: .quad 0 - - .globl startup_64_smp - .globl startup_64_smp_high -startup_64_smp: .long 0 -startup_64_smp_high: .long 0 + .bss + + .balign PAGE_SIZE +GLOBAL(level3_ident_pgt) .space 511*8 +GLOBAL(level3_kernel_pgt) .space 8 + + .balign 8 +GLOBAL(startup_64_smp) .space 8 +GLOBAL(trampoline_status) .space 4 diff --git a/arch/x86/realmode/rm/wakeup/wakeup_asm.S b/arch/x86/realmode/rm/wakeup/wakeup_asm.S index 8064e1c..f81c1cd 100644 --- a/arch/x86/realmode/rm/wakeup/wakeup_asm.S +++ b/arch/x86/realmode/rm/wakeup/wakeup_asm.S @@ -1,6 +1,7 @@ /* * ACPI wakeup real mode startup stub */ +#include #include #include #include @@ -9,31 +10,33 @@ #include "../realmode.h" #include "wakeup.h" - .code16 + .code16 /* This should match the structure in wakeup.h */ - .section ".data", "aw" - .globl wakeup_header -wakeup_header: -video_mode: .short 0 /* Video mode number */ -pmode_entry: .long 0 -pmode_cs: .short __KERNEL_CS -pmode_cr0: .long 0 /* Saved %cr0 */ -pmode_cr3: .long 0 /* Saved %cr3 */ -pmode_cr4: .long 0 /* Saved %cr4 */ -pmode_efer: .quad 0 /* Saved EFER */ -pmode_gdt: .quad 0 -pmode_misc_en: .quad 0 /* Saved MISC_ENABLE MSR */ -pmode_behavior: .long 0 /* Wakeup behavior flags */ -realmode_flags: .long 0 -real_magic: .long 0 -signature: .long WAKEUP_HEADER_SIGNATURE - .size wakeup_header, .-wakeup_header + .section ".data", "aw" + + .balign 16 +GLOBAL(wakeup_header) + video_mode: .short 0 /* Video mode number */ + pmode_entry: .long 0 + pmode_cs: .short __KERNEL_CS + pmode_cr0: .long 0 /* Saved %cr0 */ + pmode_cr3: .long 0 /* Saved %cr3 */ + pmode_cr4: .long 0 /* Saved %cr4 */ + pmode_efer: .quad 0 /* Saved EFER */ + pmode_gdt: .quad 0 + pmode_misc_en: .quad 0 /* Saved MISC_ENABLE MSR */ + pmode_behavior: .long 0 /* Wakeup behavior flags */ + realmode_flags: .long 0 + real_magic: .long 0 + signature: .long WAKEUP_HEADER_SIGNATURE +END(wakeup_header) .text .code16 - .globl wakeup_start -wakeup_start: + + .balign 16 +ENTRY(wakeup_start) cli cld @@ -62,12 +65,14 @@ wakeup_start: 3: /* Set up segments */ movw %cs, %ax + movw %ax, %ss + movl $rm_stack_end, %esp movw %ax, %ds movw %ax, %es - movw %ax, %ss - lidtl wakeup_idt + movw %ax, %fs + movw %ax, %gs - movl $wakeup_stack_end, %esp + lidtl wakeup_idt /* Clear the EFLAGS */ pushl $0 @@ -145,9 +150,8 @@ bogus_real_magic: * be the case for other laptops or integrated video devices. */ - .globl wakeup_gdt .balign 16 -wakeup_gdt: +GLOBAL(wakeup_gdt) .word 3*8-1 /* Self-descriptor */ .long pa_wakeup_gdt .word 0 @@ -159,29 +163,18 @@ wakeup_gdt: .word 0xffff /* 16-bit data segment @ real_mode_base */ .long 0x93000000 + pa_real_mode_base .word 0x008f /* big real mode */ - .size wakeup_gdt, .-wakeup_gdt +END(wakeup_gdt) - .data + .section ".rodata","a" .balign 8 /* This is the standard real-mode IDT */ -wakeup_idt: + .balign 16 +GLOBAL(wakeup_idt) .word 0xffff /* limit */ .long 0 /* address */ .word 0 - - .globl HEAP, heap_end -HEAP: - .long wakeup_heap -heap_end: - .long wakeup_stack - - .bss -wakeup_heap: - .space 2048 -wakeup_stack: - .space 2048 -wakeup_stack_end: +END(wakeup_idt) .section ".signature","a" end_signature: diff --git a/arch/x86/realmode/rmpiggy.S b/arch/x86/realmode/rmpiggy.S index 6047d7f..fd72a99 100644 --- a/arch/x86/realmode/rmpiggy.S +++ b/arch/x86/realmode/rmpiggy.S @@ -9,10 +9,10 @@ .balign PAGE_SIZE -ENTRY(real_mode_blob) +GLOBAL(real_mode_blob) .incbin "arch/x86/realmode/rm/realmode.bin" END(real_mode_blob) -ENTRY(real_mode_relocs) +GLOBAL(real_mode_relocs) .incbin "arch/x86/realmode/rm/realmode.relocs" END(real_mode_relocs) -- cgit v0.10.2 From b429dbf6e866bd6dadb56fae66f61f611cde57ff Mon Sep 17 00:00:00 2001 From: Jarkko Sakkinen Date: Tue, 8 May 2012 21:22:41 +0300 Subject: x86, realmode: don't copy real_mode_header Replaced copying of real_mode_header with a pointer to beginning of RM memory. Signed-off-by: Jarkko Sakkinen Link: http://lkml.kernel.org/r/1336501366-28617-19-git-send-email-jarkko.sakkinen@intel.com Signed-off-by: H. Peter Anvin diff --git a/arch/x86/include/asm/realmode.h b/arch/x86/include/asm/realmode.h index 1bfc74d..d3ae49f 100644 --- a/arch/x86/include/asm/realmode.h +++ b/arch/x86/include/asm/realmode.h @@ -8,7 +8,6 @@ struct real_mode_header { u32 text_start; u32 ro_end; - u32 end; /* reboot */ #ifdef CONFIG_X86_32 u32 machine_real_restart_asm; @@ -30,8 +29,8 @@ struct real_mode_header { #endif } __attribute__((__packed__)); -extern struct real_mode_header real_mode_header; -extern unsigned char *real_mode_base; +extern struct real_mode_header *real_mode_header; +extern unsigned char real_mode_blob_end[]; extern unsigned long init_rsp; extern unsigned long initial_code; diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index d941b62..6ca3f54 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c @@ -38,7 +38,7 @@ asmlinkage void acpi_enter_s3(void) int acpi_suspend_lowlevel(void) { struct wakeup_header *header = - (struct wakeup_header *) __va(real_mode_header.wakeup_header); + (struct wakeup_header *) __va(real_mode_header->wakeup_header); if (header->signature != WAKEUP_HEADER_SIGNATURE) { printk(KERN_ERR "wakeup header does not match\n"); diff --git a/arch/x86/kernel/realmode.c b/arch/x86/kernel/realmode.c index e7bf82a..632c810 100644 --- a/arch/x86/kernel/realmode.c +++ b/arch/x86/kernel/realmode.c @@ -5,8 +5,7 @@ #include #include -unsigned char *real_mode_base; -struct real_mode_header real_mode_header; +struct real_mode_header *real_mode_header; void __init setup_real_mode(void) { @@ -17,33 +16,32 @@ void __init setup_real_mode(void) u32 *ptr; u16 *seg; int i; + unsigned char *base; - struct real_mode_header *header = - (struct real_mode_header *) real_mode_blob; - - size_t size = PAGE_ALIGN(header->end); + size_t size = PAGE_ALIGN(real_mode_blob_end - real_mode_blob); /* Has to be in very low memory so we can execute real-mode AP code. */ mem = memblock_find_in_range(0, 1<<20, size, PAGE_SIZE); if (!mem) panic("Cannot allocate trampoline\n"); - real_mode_base = __va(mem); + base = __va(mem); memblock_reserve(mem, size); + real_mode_header = (struct real_mode_header *) base; printk(KERN_DEBUG "Base memory trampoline at [%p] %llx size %zu\n", - real_mode_base, (unsigned long long)mem, size); + base, (unsigned long long)mem, size); - memcpy(real_mode_base, real_mode_blob, size); + memcpy(base, real_mode_blob, size); - real_mode_seg = __pa(real_mode_base) >> 4; + real_mode_seg = __pa(base) >> 4; rel = (u32 *) real_mode_relocs; /* 16-bit segment relocations. */ count = rel[0]; rel = &rel[1]; for (i = 0; i < count; i++) { - seg = (u16 *) (real_mode_base + rel[i]); + seg = (u16 *) (base + rel[i]); *seg = real_mode_seg; } @@ -51,25 +49,21 @@ void __init setup_real_mode(void) count = rel[i]; rel = &rel[i + 1]; for (i = 0; i < count; i++) { - ptr = (u32 *) (real_mode_base + rel[i]); - *ptr += __pa(real_mode_base); + ptr = (u32 *) (base + rel[i]); + *ptr += __pa(base); } - /* Copied header will contain relocated physical addresses. */ - memcpy(&real_mode_header, real_mode_base, - sizeof(struct real_mode_header)); - #ifdef CONFIG_X86_32 - *((u32 *)__va(real_mode_header.startup_32_smp)) = __pa(startup_32_smp); - *((u32 *)__va(real_mode_header.boot_gdt)) = __pa(boot_gdt); + *((u32 *)__va(real_mode_header->startup_32_smp)) = __pa(startup_32_smp); + *((u32 *)__va(real_mode_header->boot_gdt)) = __pa(boot_gdt); #else - *((u64 *) __va(real_mode_header.startup_64_smp)) = + *((u64 *) __va(real_mode_header->startup_64_smp)) = (u64)secondary_startup_64; - *((u64 *) __va(real_mode_header.level3_ident_pgt)) = + *((u64 *) __va(real_mode_header->level3_ident_pgt)) = __pa(level3_ident_pgt) + _KERNPG_TABLE; - *((u64 *) __va(real_mode_header.level3_kernel_pgt)) = + *((u64 *) __va(real_mode_header->level3_kernel_pgt)) = __pa(level3_kernel_pgt) + _KERNPG_TABLE; #endif } @@ -82,23 +76,22 @@ void __init setup_real_mode(void) */ static int __init set_real_mode_permissions(void) { - size_t all_size = - PAGE_ALIGN(real_mode_header.end) - - __pa(real_mode_base); + unsigned char *base = (unsigned char *) real_mode_header; + size_t size = PAGE_ALIGN(real_mode_blob_end - real_mode_blob); size_t ro_size = - PAGE_ALIGN(real_mode_header.ro_end) - - __pa(real_mode_base); + PAGE_ALIGN(real_mode_header->ro_end) - + __pa(base); size_t text_size = - PAGE_ALIGN(real_mode_header.ro_end) - - real_mode_header.text_start; + PAGE_ALIGN(real_mode_header->ro_end) - + real_mode_header->text_start; unsigned long text_start = - (unsigned long) __va(real_mode_header.text_start); + (unsigned long) __va(real_mode_header->text_start); - set_memory_nx((unsigned long) real_mode_base, all_size >> PAGE_SHIFT); - set_memory_ro((unsigned long) real_mode_base, ro_size >> PAGE_SHIFT); + set_memory_nx((unsigned long) base, size >> PAGE_SHIFT); + set_memory_ro((unsigned long) base, ro_size >> PAGE_SHIFT); set_memory_x((unsigned long) text_start, text_size >> PAGE_SHIFT); return 0; diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 050eff2..658f856 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -336,7 +336,7 @@ core_initcall(reboot_init); void machine_real_restart(unsigned int type) { void (*restart_lowmem)(unsigned int) = (void (*)(unsigned int)) - real_mode_header.machine_real_restart_asm; + real_mode_header->machine_real_restart_asm; local_irq_disable(); diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index c7971ea..b8c0661 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -665,9 +665,9 @@ static void __cpuinit announce_cpu(int cpu, int apicid) static int __cpuinit do_boot_cpu(int apicid, int cpu) { volatile u32 *trampoline_status = - (volatile u32 *) __va(real_mode_header.trampoline_status); + (volatile u32 *) __va(real_mode_header->trampoline_status); /* start_ip had better be page-aligned! */ - unsigned long start_ip = real_mode_header.trampoline_data; + unsigned long start_ip = real_mode_header->trampoline_data; unsigned long boot_error = 0; int timeout; diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c index c136e23..65adda4 100644 --- a/arch/x86/kernel/tboot.c +++ b/arch/x86/kernel/tboot.c @@ -202,7 +202,7 @@ static int tboot_setup_sleep(void) } tboot->acpi_sinfo.kernel_s3_resume_vector = - real_mode_header.wakeup_start; + real_mode_header->wakeup_start; return 0; } diff --git a/arch/x86/realmode/rm/header.S b/arch/x86/realmode/rm/header.S index a91ec8f..c83005c 100644 --- a/arch/x86/realmode/rm/header.S +++ b/arch/x86/realmode/rm/header.S @@ -12,7 +12,6 @@ GLOBAL(real_mode_header) .long pa_text_start .long pa_ro_end - .long pa_end #ifdef CONFIG_X86_32 .long pa_machine_real_restart_asm #endif diff --git a/arch/x86/realmode/rm/realmode.lds.S b/arch/x86/realmode/rm/realmode.lds.S index 4d4afca..86b2e8d 100644 --- a/arch/x86/realmode/rm/realmode.lds.S +++ b/arch/x86/realmode/rm/realmode.lds.S @@ -65,7 +65,6 @@ SECTIONS .signature : { *(.signature) } - pa_end = .; /DISCARD/ : { *(.note*) diff --git a/arch/x86/realmode/rmpiggy.S b/arch/x86/realmode/rmpiggy.S index fd72a99..204c6ec 100644 --- a/arch/x86/realmode/rmpiggy.S +++ b/arch/x86/realmode/rmpiggy.S @@ -13,6 +13,8 @@ GLOBAL(real_mode_blob) .incbin "arch/x86/realmode/rm/realmode.bin" END(real_mode_blob) +GLOBAL(real_mode_blob_end); + GLOBAL(real_mode_relocs) .incbin "arch/x86/realmode/rm/realmode.relocs" END(real_mode_relocs) diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c index e77aa4a..0613900 100644 --- a/drivers/acpi/sleep.c +++ b/drivers/acpi/sleep.c @@ -93,7 +93,7 @@ static struct notifier_block tts_notifier = { static int acpi_sleep_prepare(u32 acpi_state) { #ifdef CONFIG_ACPI_SLEEP - unsigned long wakeup_pa = real_mode_header.wakeup_start; + unsigned long wakeup_pa = real_mode_header->wakeup_start; /* do we have a wakeup address for S2 and S3? */ if (acpi_state == ACPI_STATE_S3) { if (!wakeup_pa) -- cgit v0.10.2 From c4845474a01f699966272536e8416222e3f2d2cb Mon Sep 17 00:00:00 2001 From: Jarkko Sakkinen Date: Tue, 8 May 2012 21:22:42 +0300 Subject: x86, realmode: flattened rm hierachy Simplified hierarchy under rm directory to a flat directory because it is not anymore really justified to have own directory for wakeup code. It only adds more complexity. Signed-off-by: Jarkko Sakkinen Link: http://lkml.kernel.org/r/1336501366-28617-20-git-send-email-jarkko.sakkinen@intel.com Signed-off-by: H. Peter Anvin diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index 6ca3f54..95bf99d 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c @@ -16,7 +16,7 @@ #include #include -#include "../../realmode/rm/wakeup/wakeup.h" +#include "../../realmode/rm/wakeup.h" #include "sleep.h" unsigned long acpi_realmode_flags; diff --git a/arch/x86/realmode/rm/Makefile b/arch/x86/realmode/rm/Makefile index c2c27a4..fc8854b 100644 --- a/arch/x86/realmode/rm/Makefile +++ b/arch/x86/realmode/rm/Makefile @@ -7,21 +7,26 @@ # # -subdir- := wakeup - always := realmode.bin realmode-y += header.o realmode-y += trampoline_$(BITS).o realmode-y += stack.o realmode-$(CONFIG_X86_32) += reboot_32.o -realmode-$(CONFIG_ACPI_SLEEP) += wakeup/wakeup.o +realmode-$(CONFIG_ACPI_SLEEP) += $(wakeup-objs) + +wakeup-objs := wakeup_asm.o wakemain.o video-mode.o +wakeup-objs += copy.o bioscall.o regs.o +# The link order of the video-*.o modules can matter. In particular, +# video-vga.o *must* be listed first, followed by video-vesa.o. +# Hardware-specific drivers should follow in the order they should be +# probed, and video-bios.o should typically be last. +wakeup-objs += video-vga.o +wakeup-objs += video-vesa.o +wakeup-objs += video-bios.o targets += $(realmode-y) -$(obj)/wakeup/wakeup.o: FORCE - $(Q)$(MAKE) $(build)=$(obj)/wakeup $@ - REALMODE_OBJS = $(addprefix $(obj)/,$(realmode-y)) sed-pasyms := -n -r -e 's/^([0-9a-fA-F]+) [ABCDGRSTVW] (.+)$$/pa_\2 = \2;/p' @@ -55,7 +60,8 @@ $(obj)/realmode.relocs: $(obj)/realmode.elf FORCE # How to compile the 16-bit code. Note we always compile for -march=i386, # that way we can complain to the user if the CPU is insufficient. -KBUILD_CFLAGS := $(LINUXINCLUDE) -m32 -g -Os -D_SETUP -D__KERNEL__ \ +KBUILD_CFLAGS := $(LINUXINCLUDE) -m32 -g -Os -D_SETUP -D__KERNEL__ -D_WAKEUP \ + -I$(srctree)/arch/x86/boot \ -DDISABLE_BRANCH_PROFILING \ -Wall -Wstrict-prototypes \ -march=i386 -mregparm=3 \ diff --git a/arch/x86/realmode/rm/bioscall.S b/arch/x86/realmode/rm/bioscall.S new file mode 100644 index 0000000..16162d1 --- /dev/null +++ b/arch/x86/realmode/rm/bioscall.S @@ -0,0 +1 @@ +#include "../../boot/bioscall.S" diff --git a/arch/x86/realmode/rm/copy.S b/arch/x86/realmode/rm/copy.S new file mode 100644 index 0000000..b785e6f --- /dev/null +++ b/arch/x86/realmode/rm/copy.S @@ -0,0 +1 @@ +#include "../../boot/copy.S" diff --git a/arch/x86/realmode/rm/regs.c b/arch/x86/realmode/rm/regs.c new file mode 100644 index 0000000..fbb15b9 --- /dev/null +++ b/arch/x86/realmode/rm/regs.c @@ -0,0 +1 @@ +#include "../../boot/regs.c" diff --git a/arch/x86/realmode/rm/video-bios.c b/arch/x86/realmode/rm/video-bios.c new file mode 100644 index 0000000..848b25a --- /dev/null +++ b/arch/x86/realmode/rm/video-bios.c @@ -0,0 +1 @@ +#include "../../boot/video-bios.c" diff --git a/arch/x86/realmode/rm/video-mode.c b/arch/x86/realmode/rm/video-mode.c new file mode 100644 index 0000000..2a98b7e --- /dev/null +++ b/arch/x86/realmode/rm/video-mode.c @@ -0,0 +1 @@ +#include "../../boot/video-mode.c" diff --git a/arch/x86/realmode/rm/video-vesa.c b/arch/x86/realmode/rm/video-vesa.c new file mode 100644 index 0000000..413eddd --- /dev/null +++ b/arch/x86/realmode/rm/video-vesa.c @@ -0,0 +1 @@ +#include "../../boot/video-vesa.c" diff --git a/arch/x86/realmode/rm/video-vga.c b/arch/x86/realmode/rm/video-vga.c new file mode 100644 index 0000000..3085f5c --- /dev/null +++ b/arch/x86/realmode/rm/video-vga.c @@ -0,0 +1 @@ +#include "../../boot/video-vga.c" diff --git a/arch/x86/realmode/rm/wakemain.c b/arch/x86/realmode/rm/wakemain.c new file mode 100644 index 0000000..91405d5 --- /dev/null +++ b/arch/x86/realmode/rm/wakemain.c @@ -0,0 +1,82 @@ +#include "wakeup.h" +#include "boot.h" + +static void udelay(int loops) +{ + while (loops--) + io_delay(); /* Approximately 1 us */ +} + +static void beep(unsigned int hz) +{ + u8 enable; + + if (!hz) { + enable = 0x00; /* Turn off speaker */ + } else { + u16 div = 1193181/hz; + + outb(0xb6, 0x43); /* Ctr 2, squarewave, load, binary */ + io_delay(); + outb(div, 0x42); /* LSB of counter */ + io_delay(); + outb(div >> 8, 0x42); /* MSB of counter */ + io_delay(); + + enable = 0x03; /* Turn on speaker */ + } + inb(0x61); /* Dummy read of System Control Port B */ + io_delay(); + outb(enable, 0x61); /* Enable timer 2 output to speaker */ + io_delay(); +} + +#define DOT_HZ 880 +#define DASH_HZ 587 +#define US_PER_DOT 125000 + +/* Okay, this is totally silly, but it's kind of fun. */ +static void send_morse(const char *pattern) +{ + char s; + + while ((s = *pattern++)) { + switch (s) { + case '.': + beep(DOT_HZ); + udelay(US_PER_DOT); + beep(0); + udelay(US_PER_DOT); + break; + case '-': + beep(DASH_HZ); + udelay(US_PER_DOT * 3); + beep(0); + udelay(US_PER_DOT); + break; + default: /* Assume it's a space */ + udelay(US_PER_DOT * 3); + break; + } + } +} + +void main(void) +{ + /* Kill machine if structures are wrong */ + if (wakeup_header.real_magic != 0x12345678) + while (1) + ; + + if (wakeup_header.realmode_flags & 4) + send_morse("...-"); + + if (wakeup_header.realmode_flags & 1) + asm volatile("lcallw $0xc000,$3"); + + if (wakeup_header.realmode_flags & 2) { + /* Need to call BIOS */ + probe_cards(0); + set_mode(wakeup_header.video_mode); + } +} diff --git a/arch/x86/realmode/rm/wakeup.h b/arch/x86/realmode/rm/wakeup.h new file mode 100644 index 0000000..2dfaf06 --- /dev/null +++ b/arch/x86/realmode/rm/wakeup.h @@ -0,0 +1,41 @@ +/* + * Definitions for the wakeup data structure at the head of the + * wakeup code. + */ + +#ifndef ARCH_X86_KERNEL_ACPI_RM_WAKEUP_H +#define ARCH_X86_KERNEL_ACPI_RM_WAKEUP_H + +#ifndef __ASSEMBLY__ +#include + +/* This must match data at wakeup.S */ +struct wakeup_header { + u16 video_mode; /* Video mode number */ + u32 pmode_entry; /* Protected mode resume point, 32-bit only */ + u16 pmode_cs; + u32 pmode_cr0; /* Protected mode cr0 */ + u32 pmode_cr3; /* Protected mode cr3 */ + u32 pmode_cr4; /* Protected mode cr4 */ + u32 pmode_efer_low; /* Protected mode EFER */ + u32 pmode_efer_high; + u64 pmode_gdt; + u32 pmode_misc_en_low; /* Protected mode MISC_ENABLE */ + u32 pmode_misc_en_high; + u32 pmode_behavior; /* Wakeup routine behavior flags */ + u32 realmode_flags; + u32 real_magic; + u32 signature; /* To check we have correct structure */ +} __attribute__((__packed__)); + +extern struct wakeup_header wakeup_header; +#endif + +#define WAKEUP_HEADER_OFFSET 8 +#define WAKEUP_HEADER_SIGNATURE 0x51ee1111 +#define WAKEUP_END_SIGNATURE 0x65a22c82 + +/* Wakeup behavior bits */ +#define WAKEUP_BEHAVIOR_RESTORE_MISC_ENABLE 0 + +#endif /* ARCH_X86_KERNEL_ACPI_RM_WAKEUP_H */ diff --git a/arch/x86/realmode/rm/wakeup/.gitignore b/arch/x86/realmode/rm/wakeup/.gitignore deleted file mode 100644 index 58f1f48..0000000 --- a/arch/x86/realmode/rm/wakeup/.gitignore +++ /dev/null @@ -1,3 +0,0 @@ -wakeup.bin -wakeup.elf -wakeup.lds diff --git a/arch/x86/realmode/rm/wakeup/Makefile b/arch/x86/realmode/rm/wakeup/Makefile deleted file mode 100644 index 4c85332..0000000 --- a/arch/x86/realmode/rm/wakeup/Makefile +++ /dev/null @@ -1,33 +0,0 @@ -# -# arch/x86/kernel/acpi/realmode/Makefile -# -# This file is subject to the terms and conditions of the GNU General Public -# License. See the file "COPYING" in the main directory of this archive -# for more details. -# - -always := wakeup.o - -wakeup-y += wakeup_asm.o wakemain.o video-mode.o -wakeup-y += copy.o bioscall.o regs.o - -# The link order of the video-*.o modules can matter. In particular, -# video-vga.o *must* be listed first, followed by video-vesa.o. -# Hardware-specific drivers should follow in the order they should be -# probed, and video-bios.o should typically be last. -wakeup-y += video-vga.o -wakeup-y += video-vesa.o -wakeup-y += video-bios.o - -targets += $(wakeup-y) - -WAKEUP_OBJS = $(addprefix $(obj)/,$(wakeup-y)) - -LDFLAGS_wakeup.o := -m elf_i386 -r -$(obj)/wakeup.o: $(WAKEUP_OBJS) FORCE - $(call if_changed,ld) - -bootsrc := $(src)/../../../boot - -ccflags-y += -D_WAKEUP -I$(srctree)/$(bootsrc) -asflags-y += -D_WAKEUP -I$(srctree)/$(bootsrc) diff --git a/arch/x86/realmode/rm/wakeup/bioscall.S b/arch/x86/realmode/rm/wakeup/bioscall.S deleted file mode 100644 index f51eb0b..0000000 --- a/arch/x86/realmode/rm/wakeup/bioscall.S +++ /dev/null @@ -1 +0,0 @@ -#include "../../../boot/bioscall.S" diff --git a/arch/x86/realmode/rm/wakeup/copy.S b/arch/x86/realmode/rm/wakeup/copy.S deleted file mode 100644 index dc59ebe..0000000 --- a/arch/x86/realmode/rm/wakeup/copy.S +++ /dev/null @@ -1 +0,0 @@ -#include "../../../boot/copy.S" diff --git a/arch/x86/realmode/rm/wakeup/regs.c b/arch/x86/realmode/rm/wakeup/regs.c deleted file mode 100644 index 6206033..0000000 --- a/arch/x86/realmode/rm/wakeup/regs.c +++ /dev/null @@ -1 +0,0 @@ -#include "../../../boot/regs.c" diff --git a/arch/x86/realmode/rm/wakeup/video-bios.c b/arch/x86/realmode/rm/wakeup/video-bios.c deleted file mode 100644 index 7deabc1..0000000 --- a/arch/x86/realmode/rm/wakeup/video-bios.c +++ /dev/null @@ -1 +0,0 @@ -#include "../../../boot/video-bios.c" diff --git a/arch/x86/realmode/rm/wakeup/video-mode.c b/arch/x86/realmode/rm/wakeup/video-mode.c deleted file mode 100644 index 328ad20..0000000 --- a/arch/x86/realmode/rm/wakeup/video-mode.c +++ /dev/null @@ -1 +0,0 @@ -#include "../../../boot/video-mode.c" diff --git a/arch/x86/realmode/rm/wakeup/video-vesa.c b/arch/x86/realmode/rm/wakeup/video-vesa.c deleted file mode 100644 index 9dbb967..0000000 --- a/arch/x86/realmode/rm/wakeup/video-vesa.c +++ /dev/null @@ -1 +0,0 @@ -#include "../../../boot/video-vesa.c" diff --git a/arch/x86/realmode/rm/wakeup/video-vga.c b/arch/x86/realmode/rm/wakeup/video-vga.c deleted file mode 100644 index bcc8125..0000000 --- a/arch/x86/realmode/rm/wakeup/video-vga.c +++ /dev/null @@ -1 +0,0 @@ -#include "../../../boot/video-vga.c" diff --git a/arch/x86/realmode/rm/wakeup/wakemain.c b/arch/x86/realmode/rm/wakeup/wakemain.c deleted file mode 100644 index 91405d5..0000000 --- a/arch/x86/realmode/rm/wakeup/wakemain.c +++ /dev/null @@ -1,82 +0,0 @@ -#include "wakeup.h" -#include "boot.h" - -static void udelay(int loops) -{ - while (loops--) - io_delay(); /* Approximately 1 us */ -} - -static void beep(unsigned int hz) -{ - u8 enable; - - if (!hz) { - enable = 0x00; /* Turn off speaker */ - } else { - u16 div = 1193181/hz; - - outb(0xb6, 0x43); /* Ctr 2, squarewave, load, binary */ - io_delay(); - outb(div, 0x42); /* LSB of counter */ - io_delay(); - outb(div >> 8, 0x42); /* MSB of counter */ - io_delay(); - - enable = 0x03; /* Turn on speaker */ - } - inb(0x61); /* Dummy read of System Control Port B */ - io_delay(); - outb(enable, 0x61); /* Enable timer 2 output to speaker */ - io_delay(); -} - -#define DOT_HZ 880 -#define DASH_HZ 587 -#define US_PER_DOT 125000 - -/* Okay, this is totally silly, but it's kind of fun. */ -static void send_morse(const char *pattern) -{ - char s; - - while ((s = *pattern++)) { - switch (s) { - case '.': - beep(DOT_HZ); - udelay(US_PER_DOT); - beep(0); - udelay(US_PER_DOT); - break; - case '-': - beep(DASH_HZ); - udelay(US_PER_DOT * 3); - beep(0); - udelay(US_PER_DOT); - break; - default: /* Assume it's a space */ - udelay(US_PER_DOT * 3); - break; - } - } -} - -void main(void) -{ - /* Kill machine if structures are wrong */ - if (wakeup_header.real_magic != 0x12345678) - while (1) - ; - - if (wakeup_header.realmode_flags & 4) - send_morse("...-"); - - if (wakeup_header.realmode_flags & 1) - asm volatile("lcallw $0xc000,$3"); - - if (wakeup_header.realmode_flags & 2) { - /* Need to call BIOS */ - probe_cards(0); - set_mode(wakeup_header.video_mode); - } -} diff --git a/arch/x86/realmode/rm/wakeup/wakeup.h b/arch/x86/realmode/rm/wakeup/wakeup.h deleted file mode 100644 index 2dfaf06..0000000 --- a/arch/x86/realmode/rm/wakeup/wakeup.h +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Definitions for the wakeup data structure at the head of the - * wakeup code. - */ - -#ifndef ARCH_X86_KERNEL_ACPI_RM_WAKEUP_H -#define ARCH_X86_KERNEL_ACPI_RM_WAKEUP_H - -#ifndef __ASSEMBLY__ -#include - -/* This must match data at wakeup.S */ -struct wakeup_header { - u16 video_mode; /* Video mode number */ - u32 pmode_entry; /* Protected mode resume point, 32-bit only */ - u16 pmode_cs; - u32 pmode_cr0; /* Protected mode cr0 */ - u32 pmode_cr3; /* Protected mode cr3 */ - u32 pmode_cr4; /* Protected mode cr4 */ - u32 pmode_efer_low; /* Protected mode EFER */ - u32 pmode_efer_high; - u64 pmode_gdt; - u32 pmode_misc_en_low; /* Protected mode MISC_ENABLE */ - u32 pmode_misc_en_high; - u32 pmode_behavior; /* Wakeup routine behavior flags */ - u32 realmode_flags; - u32 real_magic; - u32 signature; /* To check we have correct structure */ -} __attribute__((__packed__)); - -extern struct wakeup_header wakeup_header; -#endif - -#define WAKEUP_HEADER_OFFSET 8 -#define WAKEUP_HEADER_SIGNATURE 0x51ee1111 -#define WAKEUP_END_SIGNATURE 0x65a22c82 - -/* Wakeup behavior bits */ -#define WAKEUP_BEHAVIOR_RESTORE_MISC_ENABLE 0 - -#endif /* ARCH_X86_KERNEL_ACPI_RM_WAKEUP_H */ diff --git a/arch/x86/realmode/rm/wakeup/wakeup_asm.S b/arch/x86/realmode/rm/wakeup/wakeup_asm.S deleted file mode 100644 index f81c1cd..0000000 --- a/arch/x86/realmode/rm/wakeup/wakeup_asm.S +++ /dev/null @@ -1,181 +0,0 @@ -/* - * ACPI wakeup real mode startup stub - */ -#include -#include -#include -#include -#include -#include -#include "../realmode.h" -#include "wakeup.h" - - .code16 - -/* This should match the structure in wakeup.h */ - .section ".data", "aw" - - .balign 16 -GLOBAL(wakeup_header) - video_mode: .short 0 /* Video mode number */ - pmode_entry: .long 0 - pmode_cs: .short __KERNEL_CS - pmode_cr0: .long 0 /* Saved %cr0 */ - pmode_cr3: .long 0 /* Saved %cr3 */ - pmode_cr4: .long 0 /* Saved %cr4 */ - pmode_efer: .quad 0 /* Saved EFER */ - pmode_gdt: .quad 0 - pmode_misc_en: .quad 0 /* Saved MISC_ENABLE MSR */ - pmode_behavior: .long 0 /* Wakeup behavior flags */ - realmode_flags: .long 0 - real_magic: .long 0 - signature: .long WAKEUP_HEADER_SIGNATURE -END(wakeup_header) - - .text - .code16 - - .balign 16 -ENTRY(wakeup_start) - cli - cld - - LJMPW_RM(3f) -3: - /* Apparently some dimwit BIOS programmers don't know how to - program a PM to RM transition, and we might end up here with - junk in the data segment descriptor registers. The only way - to repair that is to go into PM and fix it ourselves... */ - movw $16, %cx - lgdtl %cs:wakeup_gdt - movl %cr0, %eax - orb $X86_CR0_PE, %al - movl %eax, %cr0 - ljmpw $8, $2f -2: - movw %cx, %ds - movw %cx, %es - movw %cx, %ss - movw %cx, %fs - movw %cx, %gs - - andb $~X86_CR0_PE, %al - movl %eax, %cr0 - LJMPW_RM(3f) -3: - /* Set up segments */ - movw %cs, %ax - movw %ax, %ss - movl $rm_stack_end, %esp - movw %ax, %ds - movw %ax, %es - movw %ax, %fs - movw %ax, %gs - - lidtl wakeup_idt - - /* Clear the EFLAGS */ - pushl $0 - popfl - - /* Check header signature... */ - movl signature, %eax - cmpl $WAKEUP_HEADER_SIGNATURE, %eax - jne bogus_real_magic - - /* Check we really have everything... */ - movl end_signature, %eax - cmpl $WAKEUP_END_SIGNATURE, %eax - jne bogus_real_magic - - /* Call the C code */ - calll main - - /* Restore MISC_ENABLE before entering protected mode, in case - BIOS decided to clear XD_DISABLE during S3. */ - movl pmode_behavior, %eax - btl $WAKEUP_BEHAVIOR_RESTORE_MISC_ENABLE, %eax - jnc 1f - - movl pmode_misc_en, %eax - movl pmode_misc_en + 4, %edx - movl $MSR_IA32_MISC_ENABLE, %ecx - wrmsr -1: - - /* Do any other stuff... */ - -#ifndef CONFIG_64BIT - /* This could also be done in C code... */ - movl pmode_cr3, %eax - movl %eax, %cr3 - - movl pmode_cr4, %ecx - jecxz 1f - movl %ecx, %cr4 -1: - movl pmode_efer, %eax - movl pmode_efer + 4, %edx - movl %eax, %ecx - orl %edx, %ecx - jz 1f - movl $MSR_EFER, %ecx - wrmsr -1: - - lgdtl pmode_gdt - - /* This really couldn't... */ - movl pmode_entry, %eax - movl pmode_cr0, %ecx - movl %ecx, %cr0 - ljmpl $__KERNEL_CS, $pa_startup_32 - /* -> jmp *%eax in trampoline_32.S */ -#else - jmp trampoline_data -#endif - -bogus_real_magic: -1: - hlt - jmp 1b - - .section ".rodata","a" - - /* - * Set up the wakeup GDT. We set these up as Big Real Mode, - * that is, with limits set to 4 GB. At least the Lenovo - * Thinkpad X61 is known to need this for the video BIOS - * initialization quirk to work; this is likely to also - * be the case for other laptops or integrated video devices. - */ - - .balign 16 -GLOBAL(wakeup_gdt) - .word 3*8-1 /* Self-descriptor */ - .long pa_wakeup_gdt - .word 0 - - .word 0xffff /* 16-bit code segment @ real_mode_base */ - .long 0x9b000000 + pa_real_mode_base - .word 0x008f /* big real mode */ - - .word 0xffff /* 16-bit data segment @ real_mode_base */ - .long 0x93000000 + pa_real_mode_base - .word 0x008f /* big real mode */ -END(wakeup_gdt) - - .section ".rodata","a" - .balign 8 - - /* This is the standard real-mode IDT */ - .balign 16 -GLOBAL(wakeup_idt) - .word 0xffff /* limit */ - .long 0 /* address */ - .word 0 -END(wakeup_idt) - - .section ".signature","a" -end_signature: - .long WAKEUP_END_SIGNATURE diff --git a/arch/x86/realmode/rm/wakeup_asm.S b/arch/x86/realmode/rm/wakeup_asm.S new file mode 100644 index 0000000..8a57c5a --- /dev/null +++ b/arch/x86/realmode/rm/wakeup_asm.S @@ -0,0 +1,181 @@ +/* + * ACPI wakeup real mode startup stub + */ +#include +#include +#include +#include +#include +#include +#include "realmode.h" +#include "wakeup.h" + + .code16 + +/* This should match the structure in wakeup.h */ + .section ".data", "aw" + + .balign 16 +GLOBAL(wakeup_header) + video_mode: .short 0 /* Video mode number */ + pmode_entry: .long 0 + pmode_cs: .short __KERNEL_CS + pmode_cr0: .long 0 /* Saved %cr0 */ + pmode_cr3: .long 0 /* Saved %cr3 */ + pmode_cr4: .long 0 /* Saved %cr4 */ + pmode_efer: .quad 0 /* Saved EFER */ + pmode_gdt: .quad 0 + pmode_misc_en: .quad 0 /* Saved MISC_ENABLE MSR */ + pmode_behavior: .long 0 /* Wakeup behavior flags */ + realmode_flags: .long 0 + real_magic: .long 0 + signature: .long WAKEUP_HEADER_SIGNATURE +END(wakeup_header) + + .text + .code16 + + .balign 16 +ENTRY(wakeup_start) + cli + cld + + LJMPW_RM(3f) +3: + /* Apparently some dimwit BIOS programmers don't know how to + program a PM to RM transition, and we might end up here with + junk in the data segment descriptor registers. The only way + to repair that is to go into PM and fix it ourselves... */ + movw $16, %cx + lgdtl %cs:wakeup_gdt + movl %cr0, %eax + orb $X86_CR0_PE, %al + movl %eax, %cr0 + ljmpw $8, $2f +2: + movw %cx, %ds + movw %cx, %es + movw %cx, %ss + movw %cx, %fs + movw %cx, %gs + + andb $~X86_CR0_PE, %al + movl %eax, %cr0 + LJMPW_RM(3f) +3: + /* Set up segments */ + movw %cs, %ax + movw %ax, %ss + movl $rm_stack_end, %esp + movw %ax, %ds + movw %ax, %es + movw %ax, %fs + movw %ax, %gs + + lidtl wakeup_idt + + /* Clear the EFLAGS */ + pushl $0 + popfl + + /* Check header signature... */ + movl signature, %eax + cmpl $WAKEUP_HEADER_SIGNATURE, %eax + jne bogus_real_magic + + /* Check we really have everything... */ + movl end_signature, %eax + cmpl $WAKEUP_END_SIGNATURE, %eax + jne bogus_real_magic + + /* Call the C code */ + calll main + + /* Restore MISC_ENABLE before entering protected mode, in case + BIOS decided to clear XD_DISABLE during S3. */ + movl pmode_behavior, %eax + btl $WAKEUP_BEHAVIOR_RESTORE_MISC_ENABLE, %eax + jnc 1f + + movl pmode_misc_en, %eax + movl pmode_misc_en + 4, %edx + movl $MSR_IA32_MISC_ENABLE, %ecx + wrmsr +1: + + /* Do any other stuff... */ + +#ifndef CONFIG_64BIT + /* This could also be done in C code... */ + movl pmode_cr3, %eax + movl %eax, %cr3 + + movl pmode_cr4, %ecx + jecxz 1f + movl %ecx, %cr4 +1: + movl pmode_efer, %eax + movl pmode_efer + 4, %edx + movl %eax, %ecx + orl %edx, %ecx + jz 1f + movl $MSR_EFER, %ecx + wrmsr +1: + + lgdtl pmode_gdt + + /* This really couldn't... */ + movl pmode_entry, %eax + movl pmode_cr0, %ecx + movl %ecx, %cr0 + ljmpl $__KERNEL_CS, $pa_startup_32 + /* -> jmp *%eax in trampoline_32.S */ +#else + jmp trampoline_data +#endif + +bogus_real_magic: +1: + hlt + jmp 1b + + .section ".rodata","a" + + /* + * Set up the wakeup GDT. We set these up as Big Real Mode, + * that is, with limits set to 4 GB. At least the Lenovo + * Thinkpad X61 is known to need this for the video BIOS + * initialization quirk to work; this is likely to also + * be the case for other laptops or integrated video devices. + */ + + .balign 16 +GLOBAL(wakeup_gdt) + .word 3*8-1 /* Self-descriptor */ + .long pa_wakeup_gdt + .word 0 + + .word 0xffff /* 16-bit code segment @ real_mode_base */ + .long 0x9b000000 + pa_real_mode_base + .word 0x008f /* big real mode */ + + .word 0xffff /* 16-bit data segment @ real_mode_base */ + .long 0x93000000 + pa_real_mode_base + .word 0x008f /* big real mode */ +END(wakeup_gdt) + + .section ".rodata","a" + .balign 8 + + /* This is the standard real-mode IDT */ + .balign 16 +GLOBAL(wakeup_idt) + .word 0xffff /* limit */ + .long 0 /* address */ + .word 0 +END(wakeup_idt) + + .section ".signature","a" +end_signature: + .long WAKEUP_END_SIGNATURE -- cgit v0.10.2 From f37240f16bec91f15ce564515f70a6ca9715ce96 Mon Sep 17 00:00:00 2001 From: Jarkko Sakkinen Date: Tue, 8 May 2012 21:22:43 +0300 Subject: x86, realmode: header for trampoline code Added header for trampoline code that can be used to supply input data to it. This makes interface between real mode code and kernel cleaner and simpler. Replaced two confusing pointers to level4 pgt in trampoline_64.S with a single pointer to the beginning of the page table. Signed-off-by: Jarkko Sakkinen Link: http://lkml.kernel.org/r/1336501366-28617-21-git-send-email-jarkko.sakkinen@intel.com Signed-off-by: H. Peter Anvin diff --git a/arch/x86/include/asm/realmode.h b/arch/x86/include/asm/realmode.h index d3ae49f..1421eed 100644 --- a/arch/x86/include/asm/realmode.h +++ b/arch/x86/include/asm/realmode.h @@ -8,25 +8,33 @@ struct real_mode_header { u32 text_start; u32 ro_end; - /* reboot */ -#ifdef CONFIG_X86_32 - u32 machine_real_restart_asm; -#endif /* SMP trampoline */ - u32 trampoline_data; + u32 trampoline_start; u32 trampoline_status; -#ifdef CONFIG_X86_32 - u32 startup_32_smp; - u32 boot_gdt; -#else - u32 startup_64_smp; - u32 level3_ident_pgt; - u32 level3_kernel_pgt; + u32 trampoline_header; +#ifdef CONFIG_X86_64 + u32 trampoline_pgd; #endif + /* ACPI S3 wakeup */ #ifdef CONFIG_ACPI_SLEEP u32 wakeup_start; u32 wakeup_header; #endif + /* APM/BIOS reboot */ +#ifdef CONFIG_X86_32 + u32 machine_real_restart_asm; +#endif +} __attribute__((__packed__)); + +/* This must match data at trampoline_32/64.S */ +struct trampoline_header { +#ifdef CONFIG_X86_32 + u32 start; + u16 gdt_limit; + u32 gdt_base; +#else + u64 start; +#endif } __attribute__((__packed__)); extern struct real_mode_header *real_mode_header; diff --git a/arch/x86/kernel/realmode.c b/arch/x86/kernel/realmode.c index 632c810..712fba8 100644 --- a/arch/x86/kernel/realmode.c +++ b/arch/x86/kernel/realmode.c @@ -17,8 +17,11 @@ void __init setup_real_mode(void) u16 *seg; int i; unsigned char *base; - + struct trampoline_header *trampoline_header; size_t size = PAGE_ALIGN(real_mode_blob_end - real_mode_blob); +#ifdef CONFIG_X86_64 + u64 *trampoline_pgd; +#endif /* Has to be in very low memory so we can execute real-mode AP code. */ mem = memblock_find_in_range(0, 1<<20, size, PAGE_SIZE); @@ -28,7 +31,6 @@ void __init setup_real_mode(void) base = __va(mem); memblock_reserve(mem, size); real_mode_header = (struct real_mode_header *) base; - printk(KERN_DEBUG "Base memory trampoline at [%p] %llx size %zu\n", base, (unsigned long long)mem, size); @@ -53,18 +55,19 @@ void __init setup_real_mode(void) *ptr += __pa(base); } + /* Must be perfomed *after* relocation. */ + trampoline_header = (struct trampoline_header *) + __va(real_mode_header->trampoline_header); + #ifdef CONFIG_X86_32 - *((u32 *)__va(real_mode_header->startup_32_smp)) = __pa(startup_32_smp); - *((u32 *)__va(real_mode_header->boot_gdt)) = __pa(boot_gdt); + trampoline_header->start = __pa(startup_32_smp); + trampoline_header->gdt_limit = __BOOT_DS + 7; + trampoline_header->gdt_base = __pa(boot_gdt); #else - *((u64 *) __va(real_mode_header->startup_64_smp)) = - (u64)secondary_startup_64; - - *((u64 *) __va(real_mode_header->level3_ident_pgt)) = - __pa(level3_ident_pgt) + _KERNPG_TABLE; - - *((u64 *) __va(real_mode_header->level3_kernel_pgt)) = - __pa(level3_kernel_pgt) + _KERNPG_TABLE; + trampoline_header->start = (u64) secondary_startup_64; + trampoline_pgd = (u64 *) __va(real_mode_header->trampoline_pgd); + trampoline_pgd[0] = __pa(level3_ident_pgt) + _KERNPG_TABLE; + trampoline_pgd[511] = __pa(level3_kernel_pgt) + _KERNPG_TABLE; #endif } diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index b8c0661..757c4b1 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -667,7 +667,7 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu) volatile u32 *trampoline_status = (volatile u32 *) __va(real_mode_header->trampoline_status); /* start_ip had better be page-aligned! */ - unsigned long start_ip = real_mode_header->trampoline_data; + unsigned long start_ip = real_mode_header->trampoline_start; unsigned long boot_error = 0; int timeout; diff --git a/arch/x86/realmode/rm/header.S b/arch/x86/realmode/rm/header.S index c83005c..b4c3263 100644 --- a/arch/x86/realmode/rm/header.S +++ b/arch/x86/realmode/rm/header.S @@ -7,28 +7,25 @@ #include #include - .section ".header", "a" + .section ".header", "a" GLOBAL(real_mode_header) - .long pa_text_start - .long pa_ro_end -#ifdef CONFIG_X86_32 - .long pa_machine_real_restart_asm -#endif - /* SMP trampoline */ - .long pa_trampoline_data - .long pa_trampoline_status -#ifdef CONFIG_X86_32 - .long pa_startup_32_smp - .long pa_boot_gdt -#else - .long pa_startup_64_smp - .long pa_level3_ident_pgt - .long pa_level3_kernel_pgt + .long pa_text_start + .long pa_ro_end + /* SMP trampoline */ + .long pa_trampoline_start + .long pa_trampoline_status + .long pa_trampoline_header +#ifdef CONFIG_X86_64 + .long pa_trampoline_pgd; #endif - /* ACPI sleep */ + /* ACPI S3 wakeup */ #ifdef CONFIG_ACPI_SLEEP - .long pa_wakeup_start - .long pa_wakeup_header + .long pa_wakeup_start + .long pa_wakeup_header +#endif + /* APM/BIOS reboot */ +#ifdef CONFIG_X86_32 + .long pa_machine_real_restart_asm #endif END(real_mode_header) diff --git a/arch/x86/realmode/rm/trampoline_32.S b/arch/x86/realmode/rm/trampoline_32.S index 1ecdbb5..6fc064b 100644 --- a/arch/x86/realmode/rm/trampoline_32.S +++ b/arch/x86/realmode/rm/trampoline_32.S @@ -13,16 +13,10 @@ * * We jump into arch/x86/kernel/head_32.S. * - * On entry to trampoline_data, the processor is in real mode + * On entry to trampoline_start, the processor is in real mode * with 16-bit addressing and 16-bit data. CS has some value * and IP is zero. Thus, we load CS to the physical segment * of the real mode code before doing anything further. - * - * The structure real_mode_header includes entries that need - * to be set up before executing this code: - * - * startup_32_smp - * boot_gdt */ #include @@ -35,7 +29,7 @@ .code16 .balign PAGE_SIZE -ENTRY(trampoline_data) +ENTRY(trampoline_start) wbinvd # Needed for NUMA-Q should be harmless for others LJMPW_RM(1f) @@ -45,7 +39,7 @@ ENTRY(trampoline_data) cli # We should be safe anyway - movl startup_32_smp, %eax # where we need to go + movl tr_start, %eax # where we need to go movl $0xA5A5A5A5, trampoline_status # write marker for master knows we're running @@ -56,8 +50,8 @@ ENTRY(trampoline_data) * operand size is 16bit. Use lgdtl instead to force operand size * to 32 bit. */ - lidtl boot_idt_descr # load idt with 0, 0 - lgdtl boot_gdt_descr # load gdt with whatever is appropriate + lidtl tr_idt # load idt with 0, 0 + lgdtl tr_gdt # load gdt with whatever is appropriate movw $1, %dx # protected mode (PE) bit lmsw %dx # into protected mode @@ -69,22 +63,4 @@ ENTRY(trampoline_data) ENTRY(startup_32) # note: also used from wakeup_asm.S jmp *%eax - .section ".rodata","a" - - .balign 4 -boot_idt_descr: - .word 0 # idt limit = 0 - .long 0 # idt base = 0L - - .data - -boot_gdt_descr: - .word __BOOT_DS + 7 # gdt limit -GLOBAL(boot_gdt) - .long 0 # gdt base - - .bss - - .balign 4 -GLOBAL(trampoline_status) .space 4 -GLOBAL(startup_32_smp) .space 4 +#include "trampoline_common.S" diff --git a/arch/x86/realmode/rm/trampoline_64.S b/arch/x86/realmode/rm/trampoline_64.S index f71ea08..3f72932 100644 --- a/arch/x86/realmode/rm/trampoline_64.S +++ b/arch/x86/realmode/rm/trampoline_64.S @@ -10,7 +10,7 @@ * trampoline page to make our stack and everything else * is a mystery. * - * On entry to trampoline_data, the processor is in real mode + * On entry to trampoline_start, the processor is in real mode * with 16-bit addressing and 16-bit data. CS has some value * and IP is zero. Thus, data addresses need to be absolute * (no relocation) and are taken with regard to r_base. @@ -37,7 +37,7 @@ .balign PAGE_SIZE .code16 -ENTRY(trampoline_data) +ENTRY(trampoline_start) cli # We should be safe anyway wbinvd @@ -97,7 +97,7 @@ ENTRY(startup_32) movl %eax, %cr4 # Enable PAE mode # Setup trampoline 4 level pagetables - movl $pa_level3_ident_pgt, %eax + movl $pa_trampoline_pgd, %eax movl %eax, %cr3 movl $MSR_EFER, %ecx @@ -122,7 +122,7 @@ ENTRY(startup_32) .balign 4 ENTRY(startup_64) # Now jump into the kernel using virtual addresses - jmpq *startup_64_smp(%rip) + jmpq *tr_start(%rip) .section ".rodata","a" .balign 16 @@ -143,12 +143,4 @@ tgdt: .quad 0x00cf93000000ffff # __KERNEL_DS tgdt_end: - .bss - - .balign PAGE_SIZE -GLOBAL(level3_ident_pgt) .space 511*8 -GLOBAL(level3_kernel_pgt) .space 8 - - .balign 8 -GLOBAL(startup_64_smp) .space 8 -GLOBAL(trampoline_status) .space 4 +#include "trampoline_common.S" diff --git a/arch/x86/realmode/rm/trampoline_common.S b/arch/x86/realmode/rm/trampoline_common.S new file mode 100644 index 0000000..c3f951c --- /dev/null +++ b/arch/x86/realmode/rm/trampoline_common.S @@ -0,0 +1,23 @@ + .section ".rodata","a" + + .balign 4 +tr_idt: .fill 1, 6, 0 + + .bss + + .balign 4 +GLOBAL(trampoline_status) .space 4 + +GLOBAL(trampoline_header) +#ifdef CONFIG_X86_32 + tr_start: .space 4 + tr_gdt: .space 6 +#else + tr_start: .space 8 +#endif +END(trampoline_header) + +#ifdef CONFIG_X86_64 + .balign PAGE_SIZE +GLOBAL(trampoline_pgd) .space PAGE_SIZE +#endif diff --git a/arch/x86/realmode/rm/wakeup_asm.S b/arch/x86/realmode/rm/wakeup_asm.S index 8a57c5a..46108f0 100644 --- a/arch/x86/realmode/rm/wakeup_asm.S +++ b/arch/x86/realmode/rm/wakeup_asm.S @@ -132,7 +132,7 @@ ENTRY(wakeup_start) ljmpl $__KERNEL_CS, $pa_startup_32 /* -> jmp *%eax in trampoline_32.S */ #else - jmp trampoline_data + jmp trampoline_start #endif bogus_real_magic: -- cgit v0.10.2 From 87d0bab2cba3c31624b80cc68bcf5e29ef969458 Mon Sep 17 00:00:00 2001 From: Hiroshi DOYU Date: Mon, 7 May 2012 12:24:48 +0200 Subject: ARM: tegra: Add Tegra AHB driver Tegra AHB Bus conforms to the AMBA Specification (Rev 2.0) Advanced High-performance Bus (AHB) architecture. The AHB Arbiter controls AHB bus master arbitration. This effectively forms a second level of arbitration for access to the memory controller through the AHB Slave Memory device. The AHB pre-fetch logic can be configured to enhance performance for devices doing sequential access. Each AHB master is assigned to either the high or low priority bin. Both Tegra20/30 have this AHB bus. Some of configuration params could be passed from DT too if needed. Signed-off-by: Hiroshi DOYU Acked-by: Arnd Bergmann Cc: Felipe Balbi Signed-off-by: Stephen Warren diff --git a/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-ahb.txt b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-ahb.txt new file mode 100644 index 0000000..234406d --- /dev/null +++ b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-ahb.txt @@ -0,0 +1,11 @@ +NVIDIA Tegra AHB + +Required properties: +- compatible : "nvidia,tegra20-ahb" or "nvidia,tegra30-ahb" +- reg : Should contain 1 register ranges(address and length) + +Example: + ahb: ahb@6000c004 { + compatible = "nvidia,tegra20-ahb"; + reg = <0x6000c004 0x10c>; /* AHB Arbitration + Gizmo Controller */ + }; diff --git a/arch/arm/mach-tegra/Kconfig b/arch/arm/mach-tegra/Kconfig index d0f2546..8bf27b5 100644 --- a/arch/arm/mach-tegra/Kconfig +++ b/arch/arm/mach-tegra/Kconfig @@ -50,6 +50,14 @@ config TEGRA_PCI depends on ARCH_TEGRA_2x_SOC select PCI +config TEGRA_AHB + bool "Enable AHB driver for NVIDIA Tegra SoCs" + default y + help + Adds AHB configuration functionality for NVIDIA Tegra SoCs, + which controls AHB bus master arbitration and some + perfomance parameters(priority, prefech size). + comment "Tegra board type" config MACH_HARMONY diff --git a/drivers/Makefile b/drivers/Makefile index 95952c8..fd71763 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -18,7 +18,7 @@ obj-$(CONFIG_SFI) += sfi/ # PnP must come after ACPI since it will eventually need to check if acpi # was used and do nothing if so obj-$(CONFIG_PNP) += pnp/ -obj-$(CONFIG_ARM_AMBA) += amba/ +obj-y += amba/ # Many drivers will want to use DMA so this has to be made available # really early. obj-$(CONFIG_DMA_ENGINE) += dma/ diff --git a/drivers/amba/Makefile b/drivers/amba/Makefile index 40fe740..66e81c2 100644 --- a/drivers/amba/Makefile +++ b/drivers/amba/Makefile @@ -1,2 +1,2 @@ -obj-y += bus.o - +obj-$(CONFIG_ARM_AMBA) += bus.o +obj-$(CONFIG_TEGRA_AHB) += tegra-ahb.o diff --git a/drivers/amba/tegra-ahb.c b/drivers/amba/tegra-ahb.c new file mode 100644 index 0000000..106a780 --- /dev/null +++ b/drivers/amba/tegra-ahb.c @@ -0,0 +1,261 @@ +/* + * Copyright (c) 2012, NVIDIA CORPORATION. All rights reserved. + * Copyright (C) 2011 Google, Inc. + * + * Author: + * Jay Cheng + * James Wylder + * Benoit Goby + * Colin Cross + * Hiroshi DOYU + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include +#include +#include +#include + +#define DRV_NAME "tegra-ahb" + +#define AHB_ARBITRATION_DISABLE 0x00 +#define AHB_ARBITRATION_PRIORITY_CTRL 0x04 +#define AHB_PRIORITY_WEIGHT(x) (((x) & 0x7) << 29) +#define PRIORITY_SELECT_USB BIT(6) +#define PRIORITY_SELECT_USB2 BIT(18) +#define PRIORITY_SELECT_USB3 BIT(17) + +#define AHB_GIZMO_AHB_MEM 0x0c +#define ENB_FAST_REARBITRATE BIT(2) +#define DONT_SPLIT_AHB_WR BIT(7) + +#define AHB_GIZMO_APB_DMA 0x10 +#define AHB_GIZMO_IDE 0x18 +#define AHB_GIZMO_USB 0x1c +#define AHB_GIZMO_AHB_XBAR_BRIDGE 0x20 +#define AHB_GIZMO_CPU_AHB_BRIDGE 0x24 +#define AHB_GIZMO_COP_AHB_BRIDGE 0x28 +#define AHB_GIZMO_XBAR_APB_CTLR 0x2c +#define AHB_GIZMO_VCP_AHB_BRIDGE 0x30 +#define AHB_GIZMO_NAND 0x3c +#define AHB_GIZMO_SDMMC4 0x44 +#define AHB_GIZMO_XIO 0x48 +#define AHB_GIZMO_BSEV 0x60 +#define AHB_GIZMO_BSEA 0x70 +#define AHB_GIZMO_NOR 0x74 +#define AHB_GIZMO_USB2 0x78 +#define AHB_GIZMO_USB3 0x7c +#define IMMEDIATE BIT(18) + +#define AHB_GIZMO_SDMMC1 0x80 +#define AHB_GIZMO_SDMMC2 0x84 +#define AHB_GIZMO_SDMMC3 0x88 +#define AHB_MEM_PREFETCH_CFG_X 0xd8 +#define AHB_ARBITRATION_XBAR_CTRL 0xdc +#define AHB_MEM_PREFETCH_CFG3 0xe0 +#define AHB_MEM_PREFETCH_CFG4 0xe4 +#define AHB_MEM_PREFETCH_CFG1 0xec +#define AHB_MEM_PREFETCH_CFG2 0xf0 +#define PREFETCH_ENB BIT(31) +#define MST_ID(x) (((x) & 0x1f) << 26) +#define AHBDMA_MST_ID MST_ID(5) +#define USB_MST_ID MST_ID(6) +#define USB2_MST_ID MST_ID(18) +#define USB3_MST_ID MST_ID(17) +#define ADDR_BNDRY(x) (((x) & 0xf) << 21) +#define INACTIVITY_TIMEOUT(x) (((x) & 0xffff) << 0) + +#define AHB_ARBITRATION_AHB_MEM_WRQUE_MST_ID 0xf8 + +static const u32 tegra_ahb_gizmo[] = { + AHB_ARBITRATION_DISABLE, + AHB_ARBITRATION_PRIORITY_CTRL, + AHB_GIZMO_AHB_MEM, + AHB_GIZMO_APB_DMA, + AHB_GIZMO_IDE, + AHB_GIZMO_USB, + AHB_GIZMO_AHB_XBAR_BRIDGE, + AHB_GIZMO_CPU_AHB_BRIDGE, + AHB_GIZMO_COP_AHB_BRIDGE, + AHB_GIZMO_XBAR_APB_CTLR, + AHB_GIZMO_VCP_AHB_BRIDGE, + AHB_GIZMO_NAND, + AHB_GIZMO_SDMMC4, + AHB_GIZMO_XIO, + AHB_GIZMO_BSEV, + AHB_GIZMO_BSEA, + AHB_GIZMO_NOR, + AHB_GIZMO_USB2, + AHB_GIZMO_USB3, + AHB_GIZMO_SDMMC1, + AHB_GIZMO_SDMMC2, + AHB_GIZMO_SDMMC3, + AHB_MEM_PREFETCH_CFG_X, + AHB_ARBITRATION_XBAR_CTRL, + AHB_MEM_PREFETCH_CFG3, + AHB_MEM_PREFETCH_CFG4, + AHB_MEM_PREFETCH_CFG1, + AHB_MEM_PREFETCH_CFG2, + AHB_ARBITRATION_AHB_MEM_WRQUE_MST_ID, +}; + +struct tegra_ahb { + void __iomem *regs; + struct device *dev; + u32 ctx[0]; +}; + +static inline u32 gizmo_readl(struct tegra_ahb *ahb, u32 offset) +{ + return readl(ahb->regs + offset); +} + +static inline void gizmo_writel(struct tegra_ahb *ahb, u32 value, u32 offset) +{ + writel(value, ahb->regs + offset); +} + +static int tegra_ahb_suspend(struct device *dev) +{ + int i; + struct tegra_ahb *ahb = dev_get_drvdata(dev); + + for (i = 0; i < ARRAY_SIZE(tegra_ahb_gizmo); i++) + ahb->ctx[i] = gizmo_readl(ahb, tegra_ahb_gizmo[i]); + return 0; +} + +static int tegra_ahb_resume(struct device *dev) +{ + int i; + struct tegra_ahb *ahb = dev_get_drvdata(dev); + + for (i = 0; i < ARRAY_SIZE(tegra_ahb_gizmo); i++) + gizmo_writel(ahb, ahb->ctx[i], tegra_ahb_gizmo[i]); + return 0; +} + +static UNIVERSAL_DEV_PM_OPS(tegra_ahb_pm, + tegra_ahb_suspend, + tegra_ahb_resume, NULL); + +static void tegra_ahb_gizmo_init(struct tegra_ahb *ahb) +{ + u32 val; + + val = gizmo_readl(ahb, AHB_GIZMO_AHB_MEM); + val |= ENB_FAST_REARBITRATE | IMMEDIATE | DONT_SPLIT_AHB_WR; + gizmo_writel(ahb, val, AHB_GIZMO_AHB_MEM); + + val = gizmo_readl(ahb, AHB_GIZMO_USB); + val |= IMMEDIATE; + gizmo_writel(ahb, val, AHB_GIZMO_USB); + + val = gizmo_readl(ahb, AHB_GIZMO_USB2); + val |= IMMEDIATE; + gizmo_writel(ahb, val, AHB_GIZMO_USB2); + + val = gizmo_readl(ahb, AHB_GIZMO_USB3); + val |= IMMEDIATE; + gizmo_writel(ahb, val, AHB_GIZMO_USB3); + + val = gizmo_readl(ahb, AHB_ARBITRATION_PRIORITY_CTRL); + val |= PRIORITY_SELECT_USB | + PRIORITY_SELECT_USB2 | + PRIORITY_SELECT_USB3 | + AHB_PRIORITY_WEIGHT(7); + gizmo_writel(ahb, val, AHB_ARBITRATION_PRIORITY_CTRL); + + val = gizmo_readl(ahb, AHB_MEM_PREFETCH_CFG1); + val &= ~MST_ID(~0); + val |= PREFETCH_ENB | + AHBDMA_MST_ID | + ADDR_BNDRY(0xc) | + INACTIVITY_TIMEOUT(0x1000); + gizmo_writel(ahb, val, AHB_MEM_PREFETCH_CFG1); + + val = gizmo_readl(ahb, AHB_MEM_PREFETCH_CFG2); + val &= ~MST_ID(~0); + val |= PREFETCH_ENB | + USB_MST_ID | + ADDR_BNDRY(0xc) | + INACTIVITY_TIMEOUT(0x1000); + gizmo_writel(ahb, val, AHB_MEM_PREFETCH_CFG2); + + val = gizmo_readl(ahb, AHB_MEM_PREFETCH_CFG3); + val &= ~MST_ID(~0); + val |= PREFETCH_ENB | + USB3_MST_ID | + ADDR_BNDRY(0xc) | + INACTIVITY_TIMEOUT(0x1000); + gizmo_writel(ahb, val, AHB_MEM_PREFETCH_CFG3); + + val = gizmo_readl(ahb, AHB_MEM_PREFETCH_CFG4); + val &= ~MST_ID(~0); + val |= PREFETCH_ENB | + USB2_MST_ID | + ADDR_BNDRY(0xc) | + INACTIVITY_TIMEOUT(0x1000); + gizmo_writel(ahb, val, AHB_MEM_PREFETCH_CFG4); +} + +static int __devinit tegra_ahb_probe(struct platform_device *pdev) +{ + struct resource *res; + struct tegra_ahb *ahb; + size_t bytes; + + bytes = sizeof(*ahb) + sizeof(u32) * ARRAY_SIZE(tegra_ahb_gizmo); + ahb = devm_kzalloc(&pdev->dev, bytes, GFP_KERNEL); + if (!ahb) + return -ENOMEM; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) + return -ENODEV; + ahb->regs = devm_request_and_ioremap(&pdev->dev, res); + if (!ahb->regs) + return -EBUSY; + + ahb->dev = &pdev->dev; + platform_set_drvdata(pdev, ahb); + tegra_ahb_gizmo_init(ahb); + return 0; +} + +static int __devexit tegra_ahb_remove(struct platform_device *pdev) +{ + return 0; +} + +static const struct of_device_id tegra_ahb_of_match[] __devinitconst = { + { .compatible = "nvidia,tegra30-ahb", }, + { .compatible = "nvidia,tegra20-ahb", }, + {}, +}; + +static struct platform_driver tegra_ahb_driver = { + .probe = tegra_ahb_probe, + .remove = __devexit_p(tegra_ahb_remove), + .driver = { + .name = DRV_NAME, + .owner = THIS_MODULE, + .of_match_table = tegra_ahb_of_match, + .pm = &tegra_ahb_pm, + }, +}; +module_platform_driver(tegra_ahb_driver); + +MODULE_AUTHOR("Hiroshi DOYU "); +MODULE_DESCRIPTION("Tegra AHB driver"); +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS("platform:" DRV_NAME); -- cgit v0.10.2 From 89c788bab1f0374ae05aef7560066442ceb55384 Mon Sep 17 00:00:00 2001 From: Hiroshi DOYU Date: Mon, 7 May 2012 09:43:46 +0300 Subject: ARM: tegra: Add SMMU enabler in AHB Add extern func, "tegra_ahb_enable_smmu()" to inform AHB that SMMU is ready. Signed-off-by: Hiroshi DOYU Cc: Felipe Balbi Signed-off-by: Stephen Warren diff --git a/arch/arm/mach-tegra/include/mach/tegra-ahb.h b/arch/arm/mach-tegra/include/mach/tegra-ahb.h new file mode 100644 index 0000000..e0f8c84 --- /dev/null +++ b/arch/arm/mach-tegra/include/mach/tegra-ahb.h @@ -0,0 +1,19 @@ +/* + * Copyright (c) 2012, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#ifndef __MACH_TEGRA_AHB_H__ +#define __MACH_TEGRA_AHB_H__ + +extern int tegra_ahb_enable_smmu(struct device_node *ahb); + +#endif /* __MACH_TEGRA_AHB_H__ */ diff --git a/drivers/amba/tegra-ahb.c b/drivers/amba/tegra-ahb.c index 106a780..aa0b1f1 100644 --- a/drivers/amba/tegra-ahb.c +++ b/drivers/amba/tegra-ahb.c @@ -76,6 +76,10 @@ #define AHB_ARBITRATION_AHB_MEM_WRQUE_MST_ID 0xf8 +#define AHB_ARBITRATION_XBAR_CTRL_SMMU_INIT_DONE BIT(17) + +static struct platform_driver tegra_ahb_driver; + static const u32 tegra_ahb_gizmo[] = { AHB_ARBITRATION_DISABLE, AHB_ARBITRATION_PRIORITY_CTRL, @@ -124,6 +128,34 @@ static inline void gizmo_writel(struct tegra_ahb *ahb, u32 value, u32 offset) writel(value, ahb->regs + offset); } +#ifdef CONFIG_ARCH_TEGRA_3x_SOC +static int tegra_ahb_match_by_smmu(struct device *dev, void *data) +{ + struct tegra_ahb *ahb = dev_get_drvdata(dev); + struct device_node *dn = data; + + return (ahb->dev->of_node == dn) ? 1 : 0; +} + +int tegra_ahb_enable_smmu(struct device_node *dn) +{ + struct device *dev; + u32 val; + struct tegra_ahb *ahb; + + dev = driver_find_device(&tegra_ahb_driver.driver, NULL, dn, + tegra_ahb_match_by_smmu); + if (!dev) + return -EPROBE_DEFER; + ahb = dev_get_drvdata(dev); + val = gizmo_readl(ahb, AHB_ARBITRATION_XBAR_CTRL); + val |= AHB_ARBITRATION_XBAR_CTRL_SMMU_INIT_DONE; + gizmo_writel(ahb, val, AHB_ARBITRATION_XBAR_CTRL); + return 0; +} +EXPORT_SYMBOL(tegra_ahb_enable_smmu); +#endif + static int tegra_ahb_suspend(struct device *dev) { int i; -- cgit v0.10.2 From 63f5c3b2b18dcaca0fc8983b52a3f5d4d70a0590 Mon Sep 17 00:00:00 2001 From: Mike Turquette Date: Wed, 2 May 2012 16:23:43 -0700 Subject: clk: prevent spurious parent rate propagation Patch 'clk: always pass parent_rate into .round_rate' made a subtle change to the semantics of .round_rate. It is now expected for the parent's rate to always be passed in, simplifying the implemenation of various .round_rate callback definitions. However the patch also introduced a bug in clk_calc_new_rates whereby a clock without the CLK_SET_RATE_PARENT flag set could still propagate a rate change up to a parent clock if the the .round_rate callback modified the &best_parent_rate value in any way. This patch fixes the issue at the framework level (in clk_calc_new_rates) by specifically handling the case where the CLK_SET_RATE_PARENT flag is not set. Signed-off-by: Mike Turquette Acked-by: Sascha Hauer diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index 8149764..7ceca0e 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -774,12 +774,18 @@ static struct clk *clk_calc_new_rates(struct clk *clk, unsigned long rate) if (IS_ERR_OR_NULL(clk)) return NULL; + /* save parent rate, if it exists */ + if (clk->parent) + best_parent_rate = clk->parent->rate; + /* never propagate up to the parent */ if (!(clk->flags & CLK_SET_RATE_PARENT)) { if (!clk->ops->round_rate) { clk->new_rate = clk->rate; return NULL; } + new_rate = clk->ops->round_rate(clk->hw, rate, &best_parent_rate); + goto out; } /* need clk->parent from here on out */ @@ -795,7 +801,6 @@ static struct clk *clk_calc_new_rates(struct clk *clk, unsigned long rate) goto out; } - best_parent_rate = clk->parent->rate; new_rate = clk->ops->round_rate(clk->hw, rate, &best_parent_rate); if (best_parent_rate != clk->parent->rate) { -- cgit v0.10.2 From d269b974e32c5dcf043acd07f9ad96e715019ffd Mon Sep 17 00:00:00 2001 From: Mike Turquette Date: Wed, 2 May 2012 15:45:32 -0700 Subject: clk: remove COMMON_CLK_DISABLE_UNUSED Exposing this option generates confusion and incorrect behavior for single-image builds across platforms. Enable this behavior permanently. Signed-off-by: Mike Turquette Acked-by: Saravana Kannan diff --git a/drivers/clk/Kconfig b/drivers/clk/Kconfig index f05a60d..4864407 100644 --- a/drivers/clk/Kconfig +++ b/drivers/clk/Kconfig @@ -23,17 +23,6 @@ config COMMON_CLK menu "Common Clock Framework" depends on COMMON_CLK -config COMMON_CLK_DISABLE_UNUSED - bool "Disabled unused clocks at boot" - depends on COMMON_CLK - ---help--- - Traverses the entire clock tree and disables any clocks that are - enabled in hardware but have not been enabled by any device drivers. - This saves power and keeps the software model of the clock in line - with reality. - - If in doubt, say "N". - config COMMON_CLK_DEBUG bool "DebugFS representation of clock tree" depends on COMMON_CLK diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index 7ceca0e..e5d5dc1 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -196,7 +196,6 @@ late_initcall(clk_debug_init); static inline int clk_debug_register(struct clk *clk) { return 0; } #endif -#ifdef CONFIG_COMMON_CLK_DISABLE_UNUSED /* caller must hold prepare_lock */ static void clk_disable_unused_subtree(struct clk *clk) { @@ -246,7 +245,6 @@ static int clk_disable_unused(void) return 0; } late_initcall(clk_disable_unused); -#endif /*** helper functions ***/ -- cgit v0.10.2 From 31df9db99549cd29bbe5e32da4492970e6f97191 Mon Sep 17 00:00:00 2001 From: Mike Turquette Date: Sun, 6 May 2012 18:48:11 -0700 Subject: clk: mux: assign init data The original conversion to struct clk_hw_init failed to add the pointer assignment in clk_register_mux. Signed-off-by: Mike Turquette Reported-by: Sascha Hauer diff --git a/drivers/clk/clk-mux.c b/drivers/clk/clk-mux.c index 8e97491..fd36a8e 100644 --- a/drivers/clk/clk-mux.c +++ b/drivers/clk/clk-mux.c @@ -116,6 +116,7 @@ struct clk *clk_register_mux(struct device *dev, const char *name, mux->width = width; mux->flags = clk_mux_flags; mux->lock = lock; + mux->hw.init = &init; clk = clk_register(dev, &mux->hw); -- cgit v0.10.2 From f0948f59dbc8e725a96ba16da666e8f5cdd43ba8 Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Thu, 3 May 2012 15:36:14 +0530 Subject: clk: add a fixed factor clock Having fixed factors/dividers in hardware is a common pattern, so add a basic clock type doing this. It basically describes a fixed factor clock using a nominator and a denominator. Signed-off-by: Sascha Hauer Reviewed-by: Viresh Kumar Tested-by: Shawn Guo [mturquette@linaro.org: constify parent_names in static init macro] [mturquette@linaro.org: copy/paste bug from mux in static init macro] [mturquette@linaro.org: fix error handling in clk_register_fixed_factor] [mturquette@linaro.org: improve division accuracy; thanks to Saravana] Signed-off-by: Mike Turquette diff --git a/drivers/clk/Makefile b/drivers/clk/Makefile index 1f736bc..24aa714 100644 --- a/drivers/clk/Makefile +++ b/drivers/clk/Makefile @@ -1,4 +1,4 @@ obj-$(CONFIG_CLKDEV_LOOKUP) += clkdev.o obj-$(CONFIG_COMMON_CLK) += clk.o clk-fixed-rate.o clk-gate.o \ - clk-mux.o clk-divider.o + clk-mux.o clk-divider.o clk-fixed-factor.o diff --git a/drivers/clk/clk-fixed-factor.c b/drivers/clk/clk-fixed-factor.c new file mode 100644 index 0000000..c8c003e --- /dev/null +++ b/drivers/clk/clk-fixed-factor.c @@ -0,0 +1,95 @@ +/* + * Copyright (C) 2011 Sascha Hauer, Pengutronix + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Standard functionality for the common clock API. + */ +#include +#include +#include +#include + +/* + * DOC: basic fixed multiplier and divider clock that cannot gate + * + * Traits of this clock: + * prepare - clk_prepare only ensures that parents are prepared + * enable - clk_enable only ensures that parents are enabled + * rate - rate is fixed. clk->rate = parent->rate / div * mult + * parent - fixed parent. No clk_set_parent support + */ + +#define to_clk_fixed_factor(_hw) container_of(_hw, struct clk_fixed_factor, hw) + +static unsigned long clk_factor_recalc_rate(struct clk_hw *hw, + unsigned long parent_rate) +{ + struct clk_fixed_factor *fix = to_clk_fixed_factor(hw); + + return parent_rate * fix->mult / fix->div; +} + +static long clk_factor_round_rate(struct clk_hw *hw, unsigned long rate, + unsigned long *prate) +{ + struct clk_fixed_factor *fix = to_clk_fixed_factor(hw); + + if (__clk_get_flags(hw->clk) & CLK_SET_RATE_PARENT) { + unsigned long best_parent; + + best_parent = (rate / fix->mult) * fix->div; + *prate = __clk_round_rate(__clk_get_parent(hw->clk), + best_parent); + } + + return (*prate / fix->div) * fix->mult; +} + +static int clk_factor_set_rate(struct clk_hw *hw, unsigned long rate, + unsigned long parent_rate) +{ + return 0; +} + +struct clk_ops clk_fixed_factor_ops = { + .round_rate = clk_factor_round_rate, + .set_rate = clk_factor_set_rate, + .recalc_rate = clk_factor_recalc_rate, +}; +EXPORT_SYMBOL_GPL(clk_fixed_factor_ops); + +struct clk *clk_register_fixed_factor(struct device *dev, const char *name, + const char *parent_name, unsigned long flags, + unsigned int mult, unsigned int div) +{ + struct clk_fixed_factor *fix; + struct clk_init_data init; + struct clk *clk; + + fix = kmalloc(sizeof(*fix), GFP_KERNEL); + if (!fix) { + pr_err("%s: could not allocate fixed factor clk\n", __func__); + return ERR_PTR(-ENOMEM); + } + + /* struct clk_fixed_factor assignments */ + fix->mult = mult; + fix->div = div; + fix->hw.init = &init; + + init.name = name; + init.ops = &clk_fixed_factor_ops; + init.flags = flags; + init.parent_names = &parent_name; + init.num_parents = 1; + + clk = clk_register(dev, &fix->hw); + + if (IS_ERR(clk)) + kfree(fix); + + return clk; +} diff --git a/include/linux/clk-private.h b/include/linux/clk-private.h index b258532..eb3f84b 100644 --- a/include/linux/clk-private.h +++ b/include/linux/clk-private.h @@ -143,6 +143,26 @@ struct clk { DEFINE_CLK(_name, clk_mux_ops, _flags, _parent_names, \ _parents); +#define DEFINE_CLK_FIXED_FACTOR(_name, _parent_name, \ + _parent_ptr, _flags, \ + _mult, _div) \ + static struct clk _name; \ + static const char *_name##_parent_names[] = { \ + _parent_name, \ + }; \ + static struct clk *_name##_parents[] = { \ + _parent_ptr, \ + }; \ + static struct clk_fixed_factor _name##_hw = { \ + .hw = { \ + .clk = &_name, \ + }, \ + .mult = _mult, \ + .div = _div, \ + }; \ + DEFINE_CLK(_name, clk_fixed_factor_ops, _flags, \ + _name##_parent_names, _name##_parents); + /** * __clk_init - initialize the data structures in a struct clk * @dev: device initializing this clk, placeholder for now diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index 5db3412..c1c23b9 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -277,6 +277,29 @@ struct clk *clk_register_mux(struct device *dev, const char *name, u8 clk_mux_flags, spinlock_t *lock); /** + * struct clk_fixed_factor - fixed multiplier and divider clock + * + * @hw: handle between common and hardware-specific interfaces + * @mult: multiplier + * @div: divider + * + * Clock with a fixed multiplier and divider. The output frequency is the + * parent clock rate divided by div and multiplied by mult. + * Implements .recalc_rate, .set_rate and .round_rate + */ + +struct clk_fixed_factor { + struct clk_hw hw; + unsigned int mult; + unsigned int div; +}; + +extern struct clk_ops clk_fixed_factor_ops; +struct clk *clk_register_fixed_factor(struct device *dev, const char *name, + const char *parent_name, unsigned long flags, + unsigned int mult, unsigned int div); + +/** * clk_register - allocate a new clock, register it and return an opaque cookie * @dev: device that is registering this clock * @hw: link to hardware-specific clock data -- cgit v0.10.2 From f2604c141a00c00b92b7fd2f9d2455517fdd6c15 Mon Sep 17 00:00:00 2001 From: Jarkko Sakkinen Date: Tue, 8 May 2012 21:22:44 +0300 Subject: x86, realmode: move relocs from scripts/ to arch/x86/tools Moved relocs tool from scripts/ to arch/x86/tools because it is architecture specific script. Added new target archscripts that can be used to build scripts needed building an architecture. Signed-off-by: Jarkko Sakkinen Link: http://lkml.kernel.org/r/1336501366-28617-22-git-send-email-jarkko.sakkinen@intel.com Signed-off-by: H. Peter Anvin Cc: Sam Ravnborg Cc: Michal Marek diff --git a/Makefile b/Makefile index 9e384ae..ed1bd29 100644 --- a/Makefile +++ b/Makefile @@ -442,7 +442,7 @@ asm-generic: no-dot-config-targets := clean mrproper distclean \ cscope gtags TAGS tags help %docs check% coccicheck \ - include/linux/version.h headers_% archheaders \ + include/linux/version.h headers_% archheaders archscripts \ kernelversion %src-pkg config-targets := 0 @@ -979,7 +979,7 @@ prepare1: prepare2 include/linux/version.h include/generated/utsrelease.h \ include/config/auto.conf $(cmd_crmodverdir) -archprepare: archheaders prepare1 scripts_basic +archprepare: archheaders archscripts prepare1 scripts_basic prepare0: archprepare FORCE $(Q)$(MAKE) $(build)=. @@ -1049,8 +1049,11 @@ hdr-dst = $(if $(KBUILD_HEADERS), dst=include/asm-$(hdr-arch), dst=include/asm) PHONY += archheaders archheaders: +PHONY += archscripts +archscripts: + PHONY += __headers -__headers: include/linux/version.h scripts_basic asm-generic archheaders FORCE +__headers: include/linux/version.h scripts_basic asm-generic archheaders archscripts FORCE $(Q)$(MAKE) $(build)=scripts build_unifdef PHONY += headers_install_all diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 41a7237..94e91e4 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -134,6 +134,9 @@ KBUILD_CFLAGS += $(call cc-option,-mno-avx,) KBUILD_CFLAGS += $(mflags-y) KBUILD_AFLAGS += $(mflags-y) +archscripts: + $(Q)$(MAKE) $(build)=arch/x86/tools relocs + ### # Syscall table generation diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index 0435e8a..e398bb5 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -42,8 +42,8 @@ $(obj)/vmlinux.bin: vmlinux FORCE targets += vmlinux.bin.all vmlinux.relocs -CMD_RELOCS = scripts/x86-relocs -quiet_cmd_relocs = RELOCS $@ +CMD_RELOCS = arch/x86/tools/relocs +quiet_cmd_relocs = RELOCS $@ cmd_relocs = $(CMD_RELOCS) $< > $@;$(CMD_RELOCS) --abs-relocs $< $(obj)/vmlinux.relocs: vmlinux FORCE $(call if_changed,relocs) diff --git a/arch/x86/realmode/rm/Makefile b/arch/x86/realmode/rm/Makefile index fc8854b..de40bc4 100644 --- a/arch/x86/realmode/rm/Makefile +++ b/arch/x86/realmode/rm/Makefile @@ -52,7 +52,7 @@ $(obj)/realmode.bin: $(obj)/realmode.elf $(call if_changed,objcopy) quiet_cmd_relocs = RELOCS $@ - cmd_relocs = scripts/x86-relocs --realmode $< > $@ + cmd_relocs = arch/x86/tools/relocs --realmode $< > $@ $(obj)/realmode.relocs: $(obj)/realmode.elf FORCE $(call if_changed,relocs) diff --git a/arch/x86/tools/.gitignore b/arch/x86/tools/.gitignore new file mode 100644 index 0000000..be0ed06 --- /dev/null +++ b/arch/x86/tools/.gitignore @@ -0,0 +1 @@ +relocs diff --git a/arch/x86/tools/Makefile b/arch/x86/tools/Makefile index d511aa9..733057b 100644 --- a/arch/x86/tools/Makefile +++ b/arch/x86/tools/Makefile @@ -36,3 +36,7 @@ HOSTCFLAGS_insn_sanity.o := -Wall -I$(objtree)/arch/x86/lib/ -I$(srctree)/arch/x $(obj)/test_get_len.o: $(srctree)/arch/x86/lib/insn.c $(srctree)/arch/x86/lib/inat.c $(srctree)/arch/x86/include/asm/inat_types.h $(srctree)/arch/x86/include/asm/inat.h $(srctree)/arch/x86/include/asm/insn.h $(objtree)/arch/x86/lib/inat-tables.c $(obj)/insn_sanity.o: $(srctree)/arch/x86/lib/insn.c $(srctree)/arch/x86/lib/inat.c $(srctree)/arch/x86/include/asm/inat_types.h $(srctree)/arch/x86/include/asm/inat.h $(srctree)/arch/x86/include/asm/insn.h $(objtree)/arch/x86/lib/inat-tables.c + +HOST_EXTRACFLAGS += -I$(srctree)/tools/include +hostprogs-y += relocs +relocs: $(obj)/relocs diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c new file mode 100644 index 0000000..74e16bb --- /dev/null +++ b/arch/x86/tools/relocs.c @@ -0,0 +1,804 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#define USE_BSD +#include +#include +#include + +static void die(char *fmt, ...); + +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) +static Elf32_Ehdr ehdr; +static unsigned long reloc_count, reloc_idx; +static unsigned long *relocs; +static unsigned long reloc16_count, reloc16_idx; +static unsigned long *relocs16; + +struct section { + Elf32_Shdr shdr; + struct section *link; + Elf32_Sym *symtab; + Elf32_Rel *reltab; + char *strtab; +}; +static struct section *secs; + +enum symtype { + S_ABS, + S_REL, + S_SEG, + S_LIN, + S_NSYMTYPES +}; + +static const char * const sym_regex_kernel[S_NSYMTYPES] = { +/* + * Following symbols have been audited. There values are constant and do + * not change if bzImage is loaded at a different physical address than + * the address for which it has been compiled. Don't warn user about + * absolute relocations present w.r.t these symbols. + */ + [S_ABS] = + "^(xen_irq_disable_direct_reloc$|" + "xen_save_fl_direct_reloc$|" + "VDSO|" + "__crc_)", + +/* + * These symbols are known to be relative, even if the linker marks them + * as absolute (typically defined outside any section in the linker script.) + */ + [S_REL] = + "^_end$", +}; + + +static const char * const sym_regex_realmode[S_NSYMTYPES] = { +/* + * These symbols are known to be relative, even if the linker marks them + * as absolute (typically defined outside any section in the linker script.) + */ + [S_REL] = + "^pa_", + +/* + * These are 16-bit segment symbols when compiling 16-bit code. + */ + [S_SEG] = + "^real_mode_seg$", + +/* + * These are offsets belonging to segments, as opposed to linear addresses, + * when compiling 16-bit code. + */ + [S_LIN] = + "^pa_", +}; + +static const char * const *sym_regex; + +static regex_t sym_regex_c[S_NSYMTYPES]; +static int is_reloc(enum symtype type, const char *sym_name) +{ + return sym_regex[type] && + !regexec(&sym_regex_c[type], sym_name, 0, NULL, 0); +} + +static void regex_init(int use_real_mode) +{ + char errbuf[128]; + int err; + int i; + + if (use_real_mode) + sym_regex = sym_regex_realmode; + else + sym_regex = sym_regex_kernel; + + for (i = 0; i < S_NSYMTYPES; i++) { + if (!sym_regex[i]) + continue; + + err = regcomp(&sym_regex_c[i], sym_regex[i], + REG_EXTENDED|REG_NOSUB); + + if (err) { + regerror(err, &sym_regex_c[i], errbuf, sizeof errbuf); + die("%s", errbuf); + } + } +} + +static void die(char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + exit(1); +} + +static const char *sym_type(unsigned type) +{ + static const char *type_name[] = { +#define SYM_TYPE(X) [X] = #X + SYM_TYPE(STT_NOTYPE), + SYM_TYPE(STT_OBJECT), + SYM_TYPE(STT_FUNC), + SYM_TYPE(STT_SECTION), + SYM_TYPE(STT_FILE), + SYM_TYPE(STT_COMMON), + SYM_TYPE(STT_TLS), +#undef SYM_TYPE + }; + const char *name = "unknown sym type name"; + if (type < ARRAY_SIZE(type_name)) { + name = type_name[type]; + } + return name; +} + +static const char *sym_bind(unsigned bind) +{ + static const char *bind_name[] = { +#define SYM_BIND(X) [X] = #X + SYM_BIND(STB_LOCAL), + SYM_BIND(STB_GLOBAL), + SYM_BIND(STB_WEAK), +#undef SYM_BIND + }; + const char *name = "unknown sym bind name"; + if (bind < ARRAY_SIZE(bind_name)) { + name = bind_name[bind]; + } + return name; +} + +static const char *sym_visibility(unsigned visibility) +{ + static const char *visibility_name[] = { +#define SYM_VISIBILITY(X) [X] = #X + SYM_VISIBILITY(STV_DEFAULT), + SYM_VISIBILITY(STV_INTERNAL), + SYM_VISIBILITY(STV_HIDDEN), + SYM_VISIBILITY(STV_PROTECTED), +#undef SYM_VISIBILITY + }; + const char *name = "unknown sym visibility name"; + if (visibility < ARRAY_SIZE(visibility_name)) { + name = visibility_name[visibility]; + } + return name; +} + +static const char *rel_type(unsigned type) +{ + static const char *type_name[] = { +#define REL_TYPE(X) [X] = #X + REL_TYPE(R_386_NONE), + REL_TYPE(R_386_32), + REL_TYPE(R_386_PC32), + REL_TYPE(R_386_GOT32), + REL_TYPE(R_386_PLT32), + REL_TYPE(R_386_COPY), + REL_TYPE(R_386_GLOB_DAT), + REL_TYPE(R_386_JMP_SLOT), + REL_TYPE(R_386_RELATIVE), + REL_TYPE(R_386_GOTOFF), + REL_TYPE(R_386_GOTPC), + REL_TYPE(R_386_8), + REL_TYPE(R_386_PC8), + REL_TYPE(R_386_16), + REL_TYPE(R_386_PC16), +#undef REL_TYPE + }; + const char *name = "unknown type rel type name"; + if (type < ARRAY_SIZE(type_name) && type_name[type]) { + name = type_name[type]; + } + return name; +} + +static const char *sec_name(unsigned shndx) +{ + const char *sec_strtab; + const char *name; + sec_strtab = secs[ehdr.e_shstrndx].strtab; + name = ""; + if (shndx < ehdr.e_shnum) { + name = sec_strtab + secs[shndx].shdr.sh_name; + } + else if (shndx == SHN_ABS) { + name = "ABSOLUTE"; + } + else if (shndx == SHN_COMMON) { + name = "COMMON"; + } + return name; +} + +static const char *sym_name(const char *sym_strtab, Elf32_Sym *sym) +{ + const char *name; + name = ""; + if (sym->st_name) { + name = sym_strtab + sym->st_name; + } + else { + name = sec_name(sym->st_shndx); + } + return name; +} + + + +#if BYTE_ORDER == LITTLE_ENDIAN +#define le16_to_cpu(val) (val) +#define le32_to_cpu(val) (val) +#endif +#if BYTE_ORDER == BIG_ENDIAN +#define le16_to_cpu(val) bswap_16(val) +#define le32_to_cpu(val) bswap_32(val) +#endif + +static uint16_t elf16_to_cpu(uint16_t val) +{ + return le16_to_cpu(val); +} + +static uint32_t elf32_to_cpu(uint32_t val) +{ + return le32_to_cpu(val); +} + +static void read_ehdr(FILE *fp) +{ + if (fread(&ehdr, sizeof(ehdr), 1, fp) != 1) { + die("Cannot read ELF header: %s\n", + strerror(errno)); + } + if (memcmp(ehdr.e_ident, ELFMAG, SELFMAG) != 0) { + die("No ELF magic\n"); + } + if (ehdr.e_ident[EI_CLASS] != ELFCLASS32) { + die("Not a 32 bit executable\n"); + } + if (ehdr.e_ident[EI_DATA] != ELFDATA2LSB) { + die("Not a LSB ELF executable\n"); + } + if (ehdr.e_ident[EI_VERSION] != EV_CURRENT) { + die("Unknown ELF version\n"); + } + /* Convert the fields to native endian */ + ehdr.e_type = elf16_to_cpu(ehdr.e_type); + ehdr.e_machine = elf16_to_cpu(ehdr.e_machine); + ehdr.e_version = elf32_to_cpu(ehdr.e_version); + ehdr.e_entry = elf32_to_cpu(ehdr.e_entry); + ehdr.e_phoff = elf32_to_cpu(ehdr.e_phoff); + ehdr.e_shoff = elf32_to_cpu(ehdr.e_shoff); + ehdr.e_flags = elf32_to_cpu(ehdr.e_flags); + ehdr.e_ehsize = elf16_to_cpu(ehdr.e_ehsize); + ehdr.e_phentsize = elf16_to_cpu(ehdr.e_phentsize); + ehdr.e_phnum = elf16_to_cpu(ehdr.e_phnum); + ehdr.e_shentsize = elf16_to_cpu(ehdr.e_shentsize); + ehdr.e_shnum = elf16_to_cpu(ehdr.e_shnum); + ehdr.e_shstrndx = elf16_to_cpu(ehdr.e_shstrndx); + + if ((ehdr.e_type != ET_EXEC) && (ehdr.e_type != ET_DYN)) { + die("Unsupported ELF header type\n"); + } + if (ehdr.e_machine != EM_386) { + die("Not for x86\n"); + } + if (ehdr.e_version != EV_CURRENT) { + die("Unknown ELF version\n"); + } + if (ehdr.e_ehsize != sizeof(Elf32_Ehdr)) { + die("Bad Elf header size\n"); + } + if (ehdr.e_phentsize != sizeof(Elf32_Phdr)) { + die("Bad program header entry\n"); + } + if (ehdr.e_shentsize != sizeof(Elf32_Shdr)) { + die("Bad section header entry\n"); + } + if (ehdr.e_shstrndx >= ehdr.e_shnum) { + die("String table index out of bounds\n"); + } +} + +static void read_shdrs(FILE *fp) +{ + int i; + Elf32_Shdr shdr; + + secs = calloc(ehdr.e_shnum, sizeof(struct section)); + if (!secs) { + die("Unable to allocate %d section headers\n", + ehdr.e_shnum); + } + if (fseek(fp, ehdr.e_shoff, SEEK_SET) < 0) { + die("Seek to %d failed: %s\n", + ehdr.e_shoff, strerror(errno)); + } + for (i = 0; i < ehdr.e_shnum; i++) { + struct section *sec = &secs[i]; + if (fread(&shdr, sizeof shdr, 1, fp) != 1) + die("Cannot read ELF section headers %d/%d: %s\n", + i, ehdr.e_shnum, strerror(errno)); + sec->shdr.sh_name = elf32_to_cpu(shdr.sh_name); + sec->shdr.sh_type = elf32_to_cpu(shdr.sh_type); + sec->shdr.sh_flags = elf32_to_cpu(shdr.sh_flags); + sec->shdr.sh_addr = elf32_to_cpu(shdr.sh_addr); + sec->shdr.sh_offset = elf32_to_cpu(shdr.sh_offset); + sec->shdr.sh_size = elf32_to_cpu(shdr.sh_size); + sec->shdr.sh_link = elf32_to_cpu(shdr.sh_link); + sec->shdr.sh_info = elf32_to_cpu(shdr.sh_info); + sec->shdr.sh_addralign = elf32_to_cpu(shdr.sh_addralign); + sec->shdr.sh_entsize = elf32_to_cpu(shdr.sh_entsize); + if (sec->shdr.sh_link < ehdr.e_shnum) + sec->link = &secs[sec->shdr.sh_link]; + } + +} + +static void read_strtabs(FILE *fp) +{ + int i; + for (i = 0; i < ehdr.e_shnum; i++) { + struct section *sec = &secs[i]; + if (sec->shdr.sh_type != SHT_STRTAB) { + continue; + } + sec->strtab = malloc(sec->shdr.sh_size); + if (!sec->strtab) { + die("malloc of %d bytes for strtab failed\n", + sec->shdr.sh_size); + } + if (fseek(fp, sec->shdr.sh_offset, SEEK_SET) < 0) { + die("Seek to %d failed: %s\n", + sec->shdr.sh_offset, strerror(errno)); + } + if (fread(sec->strtab, 1, sec->shdr.sh_size, fp) + != sec->shdr.sh_size) { + die("Cannot read symbol table: %s\n", + strerror(errno)); + } + } +} + +static void read_symtabs(FILE *fp) +{ + int i,j; + for (i = 0; i < ehdr.e_shnum; i++) { + struct section *sec = &secs[i]; + if (sec->shdr.sh_type != SHT_SYMTAB) { + continue; + } + sec->symtab = malloc(sec->shdr.sh_size); + if (!sec->symtab) { + die("malloc of %d bytes for symtab failed\n", + sec->shdr.sh_size); + } + if (fseek(fp, sec->shdr.sh_offset, SEEK_SET) < 0) { + die("Seek to %d failed: %s\n", + sec->shdr.sh_offset, strerror(errno)); + } + if (fread(sec->symtab, 1, sec->shdr.sh_size, fp) + != sec->shdr.sh_size) { + die("Cannot read symbol table: %s\n", + strerror(errno)); + } + for (j = 0; j < sec->shdr.sh_size/sizeof(Elf32_Sym); j++) { + Elf32_Sym *sym = &sec->symtab[j]; + sym->st_name = elf32_to_cpu(sym->st_name); + sym->st_value = elf32_to_cpu(sym->st_value); + sym->st_size = elf32_to_cpu(sym->st_size); + sym->st_shndx = elf16_to_cpu(sym->st_shndx); + } + } +} + + +static void read_relocs(FILE *fp) +{ + int i,j; + for (i = 0; i < ehdr.e_shnum; i++) { + struct section *sec = &secs[i]; + if (sec->shdr.sh_type != SHT_REL) { + continue; + } + sec->reltab = malloc(sec->shdr.sh_size); + if (!sec->reltab) { + die("malloc of %d bytes for relocs failed\n", + sec->shdr.sh_size); + } + if (fseek(fp, sec->shdr.sh_offset, SEEK_SET) < 0) { + die("Seek to %d failed: %s\n", + sec->shdr.sh_offset, strerror(errno)); + } + if (fread(sec->reltab, 1, sec->shdr.sh_size, fp) + != sec->shdr.sh_size) { + die("Cannot read symbol table: %s\n", + strerror(errno)); + } + for (j = 0; j < sec->shdr.sh_size/sizeof(Elf32_Rel); j++) { + Elf32_Rel *rel = &sec->reltab[j]; + rel->r_offset = elf32_to_cpu(rel->r_offset); + rel->r_info = elf32_to_cpu(rel->r_info); + } + } +} + + +static void print_absolute_symbols(void) +{ + int i; + printf("Absolute symbols\n"); + printf(" Num: Value Size Type Bind Visibility Name\n"); + for (i = 0; i < ehdr.e_shnum; i++) { + struct section *sec = &secs[i]; + char *sym_strtab; + int j; + + if (sec->shdr.sh_type != SHT_SYMTAB) { + continue; + } + sym_strtab = sec->link->strtab; + for (j = 0; j < sec->shdr.sh_size/sizeof(Elf32_Sym); j++) { + Elf32_Sym *sym; + const char *name; + sym = &sec->symtab[j]; + name = sym_name(sym_strtab, sym); + if (sym->st_shndx != SHN_ABS) { + continue; + } + printf("%5d %08x %5d %10s %10s %12s %s\n", + j, sym->st_value, sym->st_size, + sym_type(ELF32_ST_TYPE(sym->st_info)), + sym_bind(ELF32_ST_BIND(sym->st_info)), + sym_visibility(ELF32_ST_VISIBILITY(sym->st_other)), + name); + } + } + printf("\n"); +} + +static void print_absolute_relocs(void) +{ + int i, printed = 0; + + for (i = 0; i < ehdr.e_shnum; i++) { + struct section *sec = &secs[i]; + struct section *sec_applies, *sec_symtab; + char *sym_strtab; + Elf32_Sym *sh_symtab; + int j; + if (sec->shdr.sh_type != SHT_REL) { + continue; + } + sec_symtab = sec->link; + sec_applies = &secs[sec->shdr.sh_info]; + if (!(sec_applies->shdr.sh_flags & SHF_ALLOC)) { + continue; + } + sh_symtab = sec_symtab->symtab; + sym_strtab = sec_symtab->link->strtab; + for (j = 0; j < sec->shdr.sh_size/sizeof(Elf32_Rel); j++) { + Elf32_Rel *rel; + Elf32_Sym *sym; + const char *name; + rel = &sec->reltab[j]; + sym = &sh_symtab[ELF32_R_SYM(rel->r_info)]; + name = sym_name(sym_strtab, sym); + if (sym->st_shndx != SHN_ABS) { + continue; + } + + /* Absolute symbols are not relocated if bzImage is + * loaded at a non-compiled address. Display a warning + * to user at compile time about the absolute + * relocations present. + * + * User need to audit the code to make sure + * some symbols which should have been section + * relative have not become absolute because of some + * linker optimization or wrong programming usage. + * + * Before warning check if this absolute symbol + * relocation is harmless. + */ + if (is_reloc(S_ABS, name) || is_reloc(S_REL, name)) + continue; + + if (!printed) { + printf("WARNING: Absolute relocations" + " present\n"); + printf("Offset Info Type Sym.Value " + "Sym.Name\n"); + printed = 1; + } + + printf("%08x %08x %10s %08x %s\n", + rel->r_offset, + rel->r_info, + rel_type(ELF32_R_TYPE(rel->r_info)), + sym->st_value, + name); + } + } + + if (printed) + printf("\n"); +} + +static void walk_relocs(void (*visit)(Elf32_Rel *rel, Elf32_Sym *sym), + int use_real_mode) +{ + int i; + /* Walk through the relocations */ + for (i = 0; i < ehdr.e_shnum; i++) { + char *sym_strtab; + Elf32_Sym *sh_symtab; + struct section *sec_applies, *sec_symtab; + int j; + struct section *sec = &secs[i]; + + if (sec->shdr.sh_type != SHT_REL) { + continue; + } + sec_symtab = sec->link; + sec_applies = &secs[sec->shdr.sh_info]; + if (!(sec_applies->shdr.sh_flags & SHF_ALLOC)) { + continue; + } + sh_symtab = sec_symtab->symtab; + sym_strtab = sec_symtab->link->strtab; + for (j = 0; j < sec->shdr.sh_size/sizeof(Elf32_Rel); j++) { + Elf32_Rel *rel; + Elf32_Sym *sym; + unsigned r_type; + const char *symname; + rel = &sec->reltab[j]; + sym = &sh_symtab[ELF32_R_SYM(rel->r_info)]; + r_type = ELF32_R_TYPE(rel->r_info); + + switch (r_type) { + case R_386_NONE: + case R_386_PC32: + case R_386_PC16: + case R_386_PC8: + /* + * NONE can be ignored and and PC relative + * relocations don't need to be adjusted. + */ + break; + + case R_386_16: + symname = sym_name(sym_strtab, sym); + if (!use_real_mode) + goto bad; + if (sym->st_shndx == SHN_ABS) { + if (is_reloc(S_ABS, symname)) + break; + else if (!is_reloc(S_SEG, symname)) + goto bad; + } else { + if (is_reloc(S_LIN, symname)) + goto bad; + else + break; + } + visit(rel, sym); + break; + + case R_386_32: + symname = sym_name(sym_strtab, sym); + if (sym->st_shndx == SHN_ABS) { + if (is_reloc(S_ABS, symname)) + break; + else if (!is_reloc(S_REL, symname)) + goto bad; + } else { + if (use_real_mode && + !is_reloc(S_LIN, symname)) + break; + } + visit(rel, sym); + break; + default: + die("Unsupported relocation type: %s (%d)\n", + rel_type(r_type), r_type); + break; + bad: + symname = sym_name(sym_strtab, sym); + die("Invalid %s relocation: %s\n", + rel_type(r_type), symname); + } + } + } +} + +static void count_reloc(Elf32_Rel *rel, Elf32_Sym *sym) +{ + if (ELF32_R_TYPE(rel->r_info) == R_386_16) + reloc16_count++; + else + reloc_count++; +} + +static void collect_reloc(Elf32_Rel *rel, Elf32_Sym *sym) +{ + /* Remember the address that needs to be adjusted. */ + if (ELF32_R_TYPE(rel->r_info) == R_386_16) + relocs16[reloc16_idx++] = rel->r_offset; + else + relocs[reloc_idx++] = rel->r_offset; +} + +static int cmp_relocs(const void *va, const void *vb) +{ + const unsigned long *a, *b; + a = va; b = vb; + return (*a == *b)? 0 : (*a > *b)? 1 : -1; +} + +static int write32(unsigned int v, FILE *f) +{ + unsigned char buf[4]; + + put_unaligned_le32(v, buf); + return fwrite(buf, 1, 4, f) == 4 ? 0 : -1; +} + +static void emit_relocs(int as_text, int use_real_mode) +{ + int i; + /* Count how many relocations I have and allocate space for them. */ + reloc_count = 0; + walk_relocs(count_reloc, use_real_mode); + relocs = malloc(reloc_count * sizeof(relocs[0])); + if (!relocs) { + die("malloc of %d entries for relocs failed\n", + reloc_count); + } + + relocs16 = malloc(reloc16_count * sizeof(relocs[0])); + if (!relocs16) { + die("malloc of %d entries for relocs16 failed\n", + reloc16_count); + } + /* Collect up the relocations */ + reloc_idx = 0; + walk_relocs(collect_reloc, use_real_mode); + + if (reloc16_count && !use_real_mode) + die("Segment relocations found but --realmode not specified\n"); + + /* Order the relocations for more efficient processing */ + qsort(relocs, reloc_count, sizeof(relocs[0]), cmp_relocs); + qsort(relocs16, reloc16_count, sizeof(relocs16[0]), cmp_relocs); + + /* Print the relocations */ + if (as_text) { + /* Print the relocations in a form suitable that + * gas will like. + */ + printf(".section \".data.reloc\",\"a\"\n"); + printf(".balign 4\n"); + if (use_real_mode) { + printf("\t.long %lu\n", reloc16_count); + for (i = 0; i < reloc16_count; i++) + printf("\t.long 0x%08lx\n", relocs16[i]); + printf("\t.long %lu\n", reloc_count); + for (i = 0; i < reloc_count; i++) { + printf("\t.long 0x%08lx\n", relocs[i]); + } + } else { + /* Print a stop */ + printf("\t.long 0x%08lx\n", (unsigned long)0); + for (i = 0; i < reloc_count; i++) { + printf("\t.long 0x%08lx\n", relocs[i]); + } + } + + printf("\n"); + } + else { + if (use_real_mode) { + write32(reloc16_count, stdout); + for (i = 0; i < reloc16_count; i++) + write32(relocs16[i], stdout); + write32(reloc_count, stdout); + + /* Now print each relocation */ + for (i = 0; i < reloc_count; i++) + write32(relocs[i], stdout); + } else { + /* Print a stop */ + write32(0, stdout); + + /* Now print each relocation */ + for (i = 0; i < reloc_count; i++) { + write32(relocs[i], stdout); + } + } + } +} + +static void usage(void) +{ + die("relocs [--abs-syms|--abs-relocs|--text|--realmode] vmlinux\n"); +} + +int main(int argc, char **argv) +{ + int show_absolute_syms, show_absolute_relocs; + int as_text, use_real_mode; + const char *fname; + FILE *fp; + int i; + + show_absolute_syms = 0; + show_absolute_relocs = 0; + as_text = 0; + use_real_mode = 0; + fname = NULL; + for (i = 1; i < argc; i++) { + char *arg = argv[i]; + if (*arg == '-') { + if (strcmp(arg, "--abs-syms") == 0) { + show_absolute_syms = 1; + continue; + } + if (strcmp(arg, "--abs-relocs") == 0) { + show_absolute_relocs = 1; + continue; + } + if (strcmp(arg, "--text") == 0) { + as_text = 1; + continue; + } + if (strcmp(arg, "--realmode") == 0) { + use_real_mode = 1; + continue; + } + } + else if (!fname) { + fname = arg; + continue; + } + usage(); + } + if (!fname) { + usage(); + } + regex_init(use_real_mode); + fp = fopen(fname, "r"); + if (!fp) { + die("Cannot open %s: %s\n", + fname, strerror(errno)); + } + read_ehdr(fp); + read_shdrs(fp); + read_strtabs(fp); + read_symtabs(fp); + read_relocs(fp); + if (show_absolute_syms) { + print_absolute_symbols(); + return 0; + } + if (show_absolute_relocs) { + print_absolute_relocs(); + return 0; + } + emit_relocs(as_text, use_real_mode); + return 0; +} diff --git a/scripts/Makefile b/scripts/Makefile index a241359d..3626666 100644 --- a/scripts/Makefile +++ b/scripts/Makefile @@ -15,7 +15,6 @@ hostprogs-$(CONFIG_LOGO) += pnmtologo hostprogs-$(CONFIG_VT) += conmakehash hostprogs-$(CONFIG_IKCONFIG) += bin2c hostprogs-$(BUILD_C_RECORDMCOUNT) += recordmcount -hostprogs-$(CONFIG_X86) += x86-relocs always := $(hostprogs-y) $(hostprogs-m) diff --git a/scripts/x86-relocs.c b/scripts/x86-relocs.c deleted file mode 100644 index 74e16bb..0000000 --- a/scripts/x86-relocs.c +++ /dev/null @@ -1,804 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#define USE_BSD -#include -#include -#include - -static void die(char *fmt, ...); - -#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) -static Elf32_Ehdr ehdr; -static unsigned long reloc_count, reloc_idx; -static unsigned long *relocs; -static unsigned long reloc16_count, reloc16_idx; -static unsigned long *relocs16; - -struct section { - Elf32_Shdr shdr; - struct section *link; - Elf32_Sym *symtab; - Elf32_Rel *reltab; - char *strtab; -}; -static struct section *secs; - -enum symtype { - S_ABS, - S_REL, - S_SEG, - S_LIN, - S_NSYMTYPES -}; - -static const char * const sym_regex_kernel[S_NSYMTYPES] = { -/* - * Following symbols have been audited. There values are constant and do - * not change if bzImage is loaded at a different physical address than - * the address for which it has been compiled. Don't warn user about - * absolute relocations present w.r.t these symbols. - */ - [S_ABS] = - "^(xen_irq_disable_direct_reloc$|" - "xen_save_fl_direct_reloc$|" - "VDSO|" - "__crc_)", - -/* - * These symbols are known to be relative, even if the linker marks them - * as absolute (typically defined outside any section in the linker script.) - */ - [S_REL] = - "^_end$", -}; - - -static const char * const sym_regex_realmode[S_NSYMTYPES] = { -/* - * These symbols are known to be relative, even if the linker marks them - * as absolute (typically defined outside any section in the linker script.) - */ - [S_REL] = - "^pa_", - -/* - * These are 16-bit segment symbols when compiling 16-bit code. - */ - [S_SEG] = - "^real_mode_seg$", - -/* - * These are offsets belonging to segments, as opposed to linear addresses, - * when compiling 16-bit code. - */ - [S_LIN] = - "^pa_", -}; - -static const char * const *sym_regex; - -static regex_t sym_regex_c[S_NSYMTYPES]; -static int is_reloc(enum symtype type, const char *sym_name) -{ - return sym_regex[type] && - !regexec(&sym_regex_c[type], sym_name, 0, NULL, 0); -} - -static void regex_init(int use_real_mode) -{ - char errbuf[128]; - int err; - int i; - - if (use_real_mode) - sym_regex = sym_regex_realmode; - else - sym_regex = sym_regex_kernel; - - for (i = 0; i < S_NSYMTYPES; i++) { - if (!sym_regex[i]) - continue; - - err = regcomp(&sym_regex_c[i], sym_regex[i], - REG_EXTENDED|REG_NOSUB); - - if (err) { - regerror(err, &sym_regex_c[i], errbuf, sizeof errbuf); - die("%s", errbuf); - } - } -} - -static void die(char *fmt, ...) -{ - va_list ap; - va_start(ap, fmt); - vfprintf(stderr, fmt, ap); - va_end(ap); - exit(1); -} - -static const char *sym_type(unsigned type) -{ - static const char *type_name[] = { -#define SYM_TYPE(X) [X] = #X - SYM_TYPE(STT_NOTYPE), - SYM_TYPE(STT_OBJECT), - SYM_TYPE(STT_FUNC), - SYM_TYPE(STT_SECTION), - SYM_TYPE(STT_FILE), - SYM_TYPE(STT_COMMON), - SYM_TYPE(STT_TLS), -#undef SYM_TYPE - }; - const char *name = "unknown sym type name"; - if (type < ARRAY_SIZE(type_name)) { - name = type_name[type]; - } - return name; -} - -static const char *sym_bind(unsigned bind) -{ - static const char *bind_name[] = { -#define SYM_BIND(X) [X] = #X - SYM_BIND(STB_LOCAL), - SYM_BIND(STB_GLOBAL), - SYM_BIND(STB_WEAK), -#undef SYM_BIND - }; - const char *name = "unknown sym bind name"; - if (bind < ARRAY_SIZE(bind_name)) { - name = bind_name[bind]; - } - return name; -} - -static const char *sym_visibility(unsigned visibility) -{ - static const char *visibility_name[] = { -#define SYM_VISIBILITY(X) [X] = #X - SYM_VISIBILITY(STV_DEFAULT), - SYM_VISIBILITY(STV_INTERNAL), - SYM_VISIBILITY(STV_HIDDEN), - SYM_VISIBILITY(STV_PROTECTED), -#undef SYM_VISIBILITY - }; - const char *name = "unknown sym visibility name"; - if (visibility < ARRAY_SIZE(visibility_name)) { - name = visibility_name[visibility]; - } - return name; -} - -static const char *rel_type(unsigned type) -{ - static const char *type_name[] = { -#define REL_TYPE(X) [X] = #X - REL_TYPE(R_386_NONE), - REL_TYPE(R_386_32), - REL_TYPE(R_386_PC32), - REL_TYPE(R_386_GOT32), - REL_TYPE(R_386_PLT32), - REL_TYPE(R_386_COPY), - REL_TYPE(R_386_GLOB_DAT), - REL_TYPE(R_386_JMP_SLOT), - REL_TYPE(R_386_RELATIVE), - REL_TYPE(R_386_GOTOFF), - REL_TYPE(R_386_GOTPC), - REL_TYPE(R_386_8), - REL_TYPE(R_386_PC8), - REL_TYPE(R_386_16), - REL_TYPE(R_386_PC16), -#undef REL_TYPE - }; - const char *name = "unknown type rel type name"; - if (type < ARRAY_SIZE(type_name) && type_name[type]) { - name = type_name[type]; - } - return name; -} - -static const char *sec_name(unsigned shndx) -{ - const char *sec_strtab; - const char *name; - sec_strtab = secs[ehdr.e_shstrndx].strtab; - name = ""; - if (shndx < ehdr.e_shnum) { - name = sec_strtab + secs[shndx].shdr.sh_name; - } - else if (shndx == SHN_ABS) { - name = "ABSOLUTE"; - } - else if (shndx == SHN_COMMON) { - name = "COMMON"; - } - return name; -} - -static const char *sym_name(const char *sym_strtab, Elf32_Sym *sym) -{ - const char *name; - name = ""; - if (sym->st_name) { - name = sym_strtab + sym->st_name; - } - else { - name = sec_name(sym->st_shndx); - } - return name; -} - - - -#if BYTE_ORDER == LITTLE_ENDIAN -#define le16_to_cpu(val) (val) -#define le32_to_cpu(val) (val) -#endif -#if BYTE_ORDER == BIG_ENDIAN -#define le16_to_cpu(val) bswap_16(val) -#define le32_to_cpu(val) bswap_32(val) -#endif - -static uint16_t elf16_to_cpu(uint16_t val) -{ - return le16_to_cpu(val); -} - -static uint32_t elf32_to_cpu(uint32_t val) -{ - return le32_to_cpu(val); -} - -static void read_ehdr(FILE *fp) -{ - if (fread(&ehdr, sizeof(ehdr), 1, fp) != 1) { - die("Cannot read ELF header: %s\n", - strerror(errno)); - } - if (memcmp(ehdr.e_ident, ELFMAG, SELFMAG) != 0) { - die("No ELF magic\n"); - } - if (ehdr.e_ident[EI_CLASS] != ELFCLASS32) { - die("Not a 32 bit executable\n"); - } - if (ehdr.e_ident[EI_DATA] != ELFDATA2LSB) { - die("Not a LSB ELF executable\n"); - } - if (ehdr.e_ident[EI_VERSION] != EV_CURRENT) { - die("Unknown ELF version\n"); - } - /* Convert the fields to native endian */ - ehdr.e_type = elf16_to_cpu(ehdr.e_type); - ehdr.e_machine = elf16_to_cpu(ehdr.e_machine); - ehdr.e_version = elf32_to_cpu(ehdr.e_version); - ehdr.e_entry = elf32_to_cpu(ehdr.e_entry); - ehdr.e_phoff = elf32_to_cpu(ehdr.e_phoff); - ehdr.e_shoff = elf32_to_cpu(ehdr.e_shoff); - ehdr.e_flags = elf32_to_cpu(ehdr.e_flags); - ehdr.e_ehsize = elf16_to_cpu(ehdr.e_ehsize); - ehdr.e_phentsize = elf16_to_cpu(ehdr.e_phentsize); - ehdr.e_phnum = elf16_to_cpu(ehdr.e_phnum); - ehdr.e_shentsize = elf16_to_cpu(ehdr.e_shentsize); - ehdr.e_shnum = elf16_to_cpu(ehdr.e_shnum); - ehdr.e_shstrndx = elf16_to_cpu(ehdr.e_shstrndx); - - if ((ehdr.e_type != ET_EXEC) && (ehdr.e_type != ET_DYN)) { - die("Unsupported ELF header type\n"); - } - if (ehdr.e_machine != EM_386) { - die("Not for x86\n"); - } - if (ehdr.e_version != EV_CURRENT) { - die("Unknown ELF version\n"); - } - if (ehdr.e_ehsize != sizeof(Elf32_Ehdr)) { - die("Bad Elf header size\n"); - } - if (ehdr.e_phentsize != sizeof(Elf32_Phdr)) { - die("Bad program header entry\n"); - } - if (ehdr.e_shentsize != sizeof(Elf32_Shdr)) { - die("Bad section header entry\n"); - } - if (ehdr.e_shstrndx >= ehdr.e_shnum) { - die("String table index out of bounds\n"); - } -} - -static void read_shdrs(FILE *fp) -{ - int i; - Elf32_Shdr shdr; - - secs = calloc(ehdr.e_shnum, sizeof(struct section)); - if (!secs) { - die("Unable to allocate %d section headers\n", - ehdr.e_shnum); - } - if (fseek(fp, ehdr.e_shoff, SEEK_SET) < 0) { - die("Seek to %d failed: %s\n", - ehdr.e_shoff, strerror(errno)); - } - for (i = 0; i < ehdr.e_shnum; i++) { - struct section *sec = &secs[i]; - if (fread(&shdr, sizeof shdr, 1, fp) != 1) - die("Cannot read ELF section headers %d/%d: %s\n", - i, ehdr.e_shnum, strerror(errno)); - sec->shdr.sh_name = elf32_to_cpu(shdr.sh_name); - sec->shdr.sh_type = elf32_to_cpu(shdr.sh_type); - sec->shdr.sh_flags = elf32_to_cpu(shdr.sh_flags); - sec->shdr.sh_addr = elf32_to_cpu(shdr.sh_addr); - sec->shdr.sh_offset = elf32_to_cpu(shdr.sh_offset); - sec->shdr.sh_size = elf32_to_cpu(shdr.sh_size); - sec->shdr.sh_link = elf32_to_cpu(shdr.sh_link); - sec->shdr.sh_info = elf32_to_cpu(shdr.sh_info); - sec->shdr.sh_addralign = elf32_to_cpu(shdr.sh_addralign); - sec->shdr.sh_entsize = elf32_to_cpu(shdr.sh_entsize); - if (sec->shdr.sh_link < ehdr.e_shnum) - sec->link = &secs[sec->shdr.sh_link]; - } - -} - -static void read_strtabs(FILE *fp) -{ - int i; - for (i = 0; i < ehdr.e_shnum; i++) { - struct section *sec = &secs[i]; - if (sec->shdr.sh_type != SHT_STRTAB) { - continue; - } - sec->strtab = malloc(sec->shdr.sh_size); - if (!sec->strtab) { - die("malloc of %d bytes for strtab failed\n", - sec->shdr.sh_size); - } - if (fseek(fp, sec->shdr.sh_offset, SEEK_SET) < 0) { - die("Seek to %d failed: %s\n", - sec->shdr.sh_offset, strerror(errno)); - } - if (fread(sec->strtab, 1, sec->shdr.sh_size, fp) - != sec->shdr.sh_size) { - die("Cannot read symbol table: %s\n", - strerror(errno)); - } - } -} - -static void read_symtabs(FILE *fp) -{ - int i,j; - for (i = 0; i < ehdr.e_shnum; i++) { - struct section *sec = &secs[i]; - if (sec->shdr.sh_type != SHT_SYMTAB) { - continue; - } - sec->symtab = malloc(sec->shdr.sh_size); - if (!sec->symtab) { - die("malloc of %d bytes for symtab failed\n", - sec->shdr.sh_size); - } - if (fseek(fp, sec->shdr.sh_offset, SEEK_SET) < 0) { - die("Seek to %d failed: %s\n", - sec->shdr.sh_offset, strerror(errno)); - } - if (fread(sec->symtab, 1, sec->shdr.sh_size, fp) - != sec->shdr.sh_size) { - die("Cannot read symbol table: %s\n", - strerror(errno)); - } - for (j = 0; j < sec->shdr.sh_size/sizeof(Elf32_Sym); j++) { - Elf32_Sym *sym = &sec->symtab[j]; - sym->st_name = elf32_to_cpu(sym->st_name); - sym->st_value = elf32_to_cpu(sym->st_value); - sym->st_size = elf32_to_cpu(sym->st_size); - sym->st_shndx = elf16_to_cpu(sym->st_shndx); - } - } -} - - -static void read_relocs(FILE *fp) -{ - int i,j; - for (i = 0; i < ehdr.e_shnum; i++) { - struct section *sec = &secs[i]; - if (sec->shdr.sh_type != SHT_REL) { - continue; - } - sec->reltab = malloc(sec->shdr.sh_size); - if (!sec->reltab) { - die("malloc of %d bytes for relocs failed\n", - sec->shdr.sh_size); - } - if (fseek(fp, sec->shdr.sh_offset, SEEK_SET) < 0) { - die("Seek to %d failed: %s\n", - sec->shdr.sh_offset, strerror(errno)); - } - if (fread(sec->reltab, 1, sec->shdr.sh_size, fp) - != sec->shdr.sh_size) { - die("Cannot read symbol table: %s\n", - strerror(errno)); - } - for (j = 0; j < sec->shdr.sh_size/sizeof(Elf32_Rel); j++) { - Elf32_Rel *rel = &sec->reltab[j]; - rel->r_offset = elf32_to_cpu(rel->r_offset); - rel->r_info = elf32_to_cpu(rel->r_info); - } - } -} - - -static void print_absolute_symbols(void) -{ - int i; - printf("Absolute symbols\n"); - printf(" Num: Value Size Type Bind Visibility Name\n"); - for (i = 0; i < ehdr.e_shnum; i++) { - struct section *sec = &secs[i]; - char *sym_strtab; - int j; - - if (sec->shdr.sh_type != SHT_SYMTAB) { - continue; - } - sym_strtab = sec->link->strtab; - for (j = 0; j < sec->shdr.sh_size/sizeof(Elf32_Sym); j++) { - Elf32_Sym *sym; - const char *name; - sym = &sec->symtab[j]; - name = sym_name(sym_strtab, sym); - if (sym->st_shndx != SHN_ABS) { - continue; - } - printf("%5d %08x %5d %10s %10s %12s %s\n", - j, sym->st_value, sym->st_size, - sym_type(ELF32_ST_TYPE(sym->st_info)), - sym_bind(ELF32_ST_BIND(sym->st_info)), - sym_visibility(ELF32_ST_VISIBILITY(sym->st_other)), - name); - } - } - printf("\n"); -} - -static void print_absolute_relocs(void) -{ - int i, printed = 0; - - for (i = 0; i < ehdr.e_shnum; i++) { - struct section *sec = &secs[i]; - struct section *sec_applies, *sec_symtab; - char *sym_strtab; - Elf32_Sym *sh_symtab; - int j; - if (sec->shdr.sh_type != SHT_REL) { - continue; - } - sec_symtab = sec->link; - sec_applies = &secs[sec->shdr.sh_info]; - if (!(sec_applies->shdr.sh_flags & SHF_ALLOC)) { - continue; - } - sh_symtab = sec_symtab->symtab; - sym_strtab = sec_symtab->link->strtab; - for (j = 0; j < sec->shdr.sh_size/sizeof(Elf32_Rel); j++) { - Elf32_Rel *rel; - Elf32_Sym *sym; - const char *name; - rel = &sec->reltab[j]; - sym = &sh_symtab[ELF32_R_SYM(rel->r_info)]; - name = sym_name(sym_strtab, sym); - if (sym->st_shndx != SHN_ABS) { - continue; - } - - /* Absolute symbols are not relocated if bzImage is - * loaded at a non-compiled address. Display a warning - * to user at compile time about the absolute - * relocations present. - * - * User need to audit the code to make sure - * some symbols which should have been section - * relative have not become absolute because of some - * linker optimization or wrong programming usage. - * - * Before warning check if this absolute symbol - * relocation is harmless. - */ - if (is_reloc(S_ABS, name) || is_reloc(S_REL, name)) - continue; - - if (!printed) { - printf("WARNING: Absolute relocations" - " present\n"); - printf("Offset Info Type Sym.Value " - "Sym.Name\n"); - printed = 1; - } - - printf("%08x %08x %10s %08x %s\n", - rel->r_offset, - rel->r_info, - rel_type(ELF32_R_TYPE(rel->r_info)), - sym->st_value, - name); - } - } - - if (printed) - printf("\n"); -} - -static void walk_relocs(void (*visit)(Elf32_Rel *rel, Elf32_Sym *sym), - int use_real_mode) -{ - int i; - /* Walk through the relocations */ - for (i = 0; i < ehdr.e_shnum; i++) { - char *sym_strtab; - Elf32_Sym *sh_symtab; - struct section *sec_applies, *sec_symtab; - int j; - struct section *sec = &secs[i]; - - if (sec->shdr.sh_type != SHT_REL) { - continue; - } - sec_symtab = sec->link; - sec_applies = &secs[sec->shdr.sh_info]; - if (!(sec_applies->shdr.sh_flags & SHF_ALLOC)) { - continue; - } - sh_symtab = sec_symtab->symtab; - sym_strtab = sec_symtab->link->strtab; - for (j = 0; j < sec->shdr.sh_size/sizeof(Elf32_Rel); j++) { - Elf32_Rel *rel; - Elf32_Sym *sym; - unsigned r_type; - const char *symname; - rel = &sec->reltab[j]; - sym = &sh_symtab[ELF32_R_SYM(rel->r_info)]; - r_type = ELF32_R_TYPE(rel->r_info); - - switch (r_type) { - case R_386_NONE: - case R_386_PC32: - case R_386_PC16: - case R_386_PC8: - /* - * NONE can be ignored and and PC relative - * relocations don't need to be adjusted. - */ - break; - - case R_386_16: - symname = sym_name(sym_strtab, sym); - if (!use_real_mode) - goto bad; - if (sym->st_shndx == SHN_ABS) { - if (is_reloc(S_ABS, symname)) - break; - else if (!is_reloc(S_SEG, symname)) - goto bad; - } else { - if (is_reloc(S_LIN, symname)) - goto bad; - else - break; - } - visit(rel, sym); - break; - - case R_386_32: - symname = sym_name(sym_strtab, sym); - if (sym->st_shndx == SHN_ABS) { - if (is_reloc(S_ABS, symname)) - break; - else if (!is_reloc(S_REL, symname)) - goto bad; - } else { - if (use_real_mode && - !is_reloc(S_LIN, symname)) - break; - } - visit(rel, sym); - break; - default: - die("Unsupported relocation type: %s (%d)\n", - rel_type(r_type), r_type); - break; - bad: - symname = sym_name(sym_strtab, sym); - die("Invalid %s relocation: %s\n", - rel_type(r_type), symname); - } - } - } -} - -static void count_reloc(Elf32_Rel *rel, Elf32_Sym *sym) -{ - if (ELF32_R_TYPE(rel->r_info) == R_386_16) - reloc16_count++; - else - reloc_count++; -} - -static void collect_reloc(Elf32_Rel *rel, Elf32_Sym *sym) -{ - /* Remember the address that needs to be adjusted. */ - if (ELF32_R_TYPE(rel->r_info) == R_386_16) - relocs16[reloc16_idx++] = rel->r_offset; - else - relocs[reloc_idx++] = rel->r_offset; -} - -static int cmp_relocs(const void *va, const void *vb) -{ - const unsigned long *a, *b; - a = va; b = vb; - return (*a == *b)? 0 : (*a > *b)? 1 : -1; -} - -static int write32(unsigned int v, FILE *f) -{ - unsigned char buf[4]; - - put_unaligned_le32(v, buf); - return fwrite(buf, 1, 4, f) == 4 ? 0 : -1; -} - -static void emit_relocs(int as_text, int use_real_mode) -{ - int i; - /* Count how many relocations I have and allocate space for them. */ - reloc_count = 0; - walk_relocs(count_reloc, use_real_mode); - relocs = malloc(reloc_count * sizeof(relocs[0])); - if (!relocs) { - die("malloc of %d entries for relocs failed\n", - reloc_count); - } - - relocs16 = malloc(reloc16_count * sizeof(relocs[0])); - if (!relocs16) { - die("malloc of %d entries for relocs16 failed\n", - reloc16_count); - } - /* Collect up the relocations */ - reloc_idx = 0; - walk_relocs(collect_reloc, use_real_mode); - - if (reloc16_count && !use_real_mode) - die("Segment relocations found but --realmode not specified\n"); - - /* Order the relocations for more efficient processing */ - qsort(relocs, reloc_count, sizeof(relocs[0]), cmp_relocs); - qsort(relocs16, reloc16_count, sizeof(relocs16[0]), cmp_relocs); - - /* Print the relocations */ - if (as_text) { - /* Print the relocations in a form suitable that - * gas will like. - */ - printf(".section \".data.reloc\",\"a\"\n"); - printf(".balign 4\n"); - if (use_real_mode) { - printf("\t.long %lu\n", reloc16_count); - for (i = 0; i < reloc16_count; i++) - printf("\t.long 0x%08lx\n", relocs16[i]); - printf("\t.long %lu\n", reloc_count); - for (i = 0; i < reloc_count; i++) { - printf("\t.long 0x%08lx\n", relocs[i]); - } - } else { - /* Print a stop */ - printf("\t.long 0x%08lx\n", (unsigned long)0); - for (i = 0; i < reloc_count; i++) { - printf("\t.long 0x%08lx\n", relocs[i]); - } - } - - printf("\n"); - } - else { - if (use_real_mode) { - write32(reloc16_count, stdout); - for (i = 0; i < reloc16_count; i++) - write32(relocs16[i], stdout); - write32(reloc_count, stdout); - - /* Now print each relocation */ - for (i = 0; i < reloc_count; i++) - write32(relocs[i], stdout); - } else { - /* Print a stop */ - write32(0, stdout); - - /* Now print each relocation */ - for (i = 0; i < reloc_count; i++) { - write32(relocs[i], stdout); - } - } - } -} - -static void usage(void) -{ - die("relocs [--abs-syms|--abs-relocs|--text|--realmode] vmlinux\n"); -} - -int main(int argc, char **argv) -{ - int show_absolute_syms, show_absolute_relocs; - int as_text, use_real_mode; - const char *fname; - FILE *fp; - int i; - - show_absolute_syms = 0; - show_absolute_relocs = 0; - as_text = 0; - use_real_mode = 0; - fname = NULL; - for (i = 1; i < argc; i++) { - char *arg = argv[i]; - if (*arg == '-') { - if (strcmp(arg, "--abs-syms") == 0) { - show_absolute_syms = 1; - continue; - } - if (strcmp(arg, "--abs-relocs") == 0) { - show_absolute_relocs = 1; - continue; - } - if (strcmp(arg, "--text") == 0) { - as_text = 1; - continue; - } - if (strcmp(arg, "--realmode") == 0) { - use_real_mode = 1; - continue; - } - } - else if (!fname) { - fname = arg; - continue; - } - usage(); - } - if (!fname) { - usage(); - } - regex_init(use_real_mode); - fp = fopen(fname, "r"); - if (!fp) { - die("Cannot open %s: %s\n", - fname, strerror(errno)); - } - read_ehdr(fp); - read_shdrs(fp); - read_strtabs(fp); - read_symtabs(fp); - read_relocs(fp); - if (show_absolute_syms) { - print_absolute_symbols(); - return 0; - } - if (show_absolute_relocs) { - print_absolute_relocs(); - return 0; - } - emit_relocs(as_text, use_real_mode); - return 0; -} -- cgit v0.10.2 From bf8b88e97716feb750c3d34492f00d9c085e1183 Mon Sep 17 00:00:00 2001 From: Jarkko Sakkinen Date: Tue, 8 May 2012 21:22:45 +0300 Subject: x86, realmode: fixes compilation issue in tboot.c Fixed include path of wakeup.h in tboot.c. Signed-off-by: Jarkko Sakkinen Link: http://lkml.kernel.org/r/1336501366-28617-23-git-send-email-jarkko.sakkinen@intel.com Signed-off-by: H. Peter Anvin diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c index 65adda4..f84fe00 100644 --- a/arch/x86/kernel/tboot.c +++ b/arch/x86/kernel/tboot.c @@ -44,7 +44,7 @@ #include #include -#include "acpi/realmode/wakeup.h" +#include "../realmode/rm/wakeup.h" /* Global pointer to shared data; NULL means no measured launch. */ struct tboot *tboot __read_mostly; -- cgit v0.10.2 From cda846f101fb1396b6924f1d9b68ac3d42de5403 Mon Sep 17 00:00:00 2001 From: Jarkko Sakkinen Date: Tue, 8 May 2012 21:22:46 +0300 Subject: x86, realmode: read cr4 and EFER from kernel for 64-bit trampoline This patch changes 64-bit trampoline so that CR4 and EFER are provided by the kernel instead of using fixed values. Signed-off-by: Jarkko Sakkinen Link: http://lkml.kernel.org/r/1336501366-28617-24-git-send-email-jarkko.sakkinen@intel.com Signed-off-by: H. Peter Anvin diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 4fa7dcc..404583c 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -544,13 +544,16 @@ static inline void load_sp0(struct tss_struct *tss, * enable), so that any CPU's that boot up * after us can get the correct flags. */ -extern unsigned long mmu_cr4_features; +extern unsigned long mmu_cr4_features; +extern u32 *trampoline_cr4_features; static inline void set_in_cr4(unsigned long mask) { unsigned long cr4; mmu_cr4_features |= mask; + if (trampoline_cr4_features) + *trampoline_cr4_features = mmu_cr4_features; cr4 = read_cr4(); cr4 |= mask; write_cr4(cr4); @@ -561,6 +564,8 @@ static inline void clear_in_cr4(unsigned long mask) unsigned long cr4; mmu_cr4_features &= ~mask; + if (trampoline_cr4_features) + *trampoline_cr4_features = mmu_cr4_features; cr4 = read_cr4(); cr4 &= ~mask; write_cr4(cr4); diff --git a/arch/x86/include/asm/realmode.h b/arch/x86/include/asm/realmode.h index 1421eed..937dc60 100644 --- a/arch/x86/include/asm/realmode.h +++ b/arch/x86/include/asm/realmode.h @@ -24,18 +24,22 @@ struct real_mode_header { #ifdef CONFIG_X86_32 u32 machine_real_restart_asm; #endif -} __attribute__((__packed__)); +}; /* This must match data at trampoline_32/64.S */ struct trampoline_header { #ifdef CONFIG_X86_32 u32 start; + u16 gdt_pad; u16 gdt_limit; u32 gdt_base; #else u64 start; + u32 cr4; + u32 efer_low; + u32 efer_high; #endif -} __attribute__((__packed__)); +}; extern struct real_mode_header *real_mode_header; extern unsigned char real_mode_blob_end[]; diff --git a/arch/x86/kernel/realmode.c b/arch/x86/kernel/realmode.c index 712fba8..66ac276 100644 --- a/arch/x86/kernel/realmode.c +++ b/arch/x86/kernel/realmode.c @@ -6,6 +6,7 @@ #include struct real_mode_header *real_mode_header; +u32 *trampoline_cr4_features; void __init setup_real_mode(void) { @@ -64,7 +65,14 @@ void __init setup_real_mode(void) trampoline_header->gdt_limit = __BOOT_DS + 7; trampoline_header->gdt_base = __pa(boot_gdt); #else + if (rdmsr_safe(MSR_EFER, &trampoline_header->efer_low, + &trampoline_header->efer_high)) + BUG(); + trampoline_header->start = (u64) secondary_startup_64; + trampoline_cr4_features = &trampoline_header->cr4; + *trampoline_cr4_features = read_cr4(); + trampoline_pgd = (u64 *) __va(real_mode_header->trampoline_pgd); trampoline_pgd[0] = __pa(level3_ident_pgt) + _KERNPG_TABLE; trampoline_pgd[511] = __pa(level3_kernel_pgt) + _KERNPG_TABLE; diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 7a14fec..efcf305 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -975,6 +975,8 @@ void __init setup_arch(char **cmdline_p) if (boot_cpu_data.cpuid_level >= 0) { /* A CPU has %cr4 if and only if it has CPUID */ mmu_cr4_features = read_cr4(); + if (trampoline_cr4_features) + *trampoline_cr4_features = mmu_cr4_features; } #ifdef CONFIG_X86_32 diff --git a/arch/x86/realmode/rm/header.S b/arch/x86/realmode/rm/header.S index b4c3263..4612d53 100644 --- a/arch/x86/realmode/rm/header.S +++ b/arch/x86/realmode/rm/header.S @@ -9,6 +9,7 @@ .section ".header", "a" + .balign 16 GLOBAL(real_mode_header) .long pa_text_start .long pa_ro_end diff --git a/arch/x86/realmode/rm/trampoline_64.S b/arch/x86/realmode/rm/trampoline_64.S index 3f72932..66e26f0 100644 --- a/arch/x86/realmode/rm/trampoline_64.S +++ b/arch/x86/realmode/rm/trampoline_64.S @@ -34,9 +34,9 @@ #include "realmode.h" .text - .balign PAGE_SIZE .code16 + .balign PAGE_SIZE ENTRY(trampoline_start) cli # We should be safe anyway wbinvd @@ -65,8 +65,8 @@ ENTRY(trampoline_start) * to 32 bit. */ - lidtl tidt # load idt with 0, 0 - lgdtl tgdt # load gdt with whatever is appropriate + lidtl tr_idt # load idt with 0, 0 + lgdtl tr_gdt # load gdt with whatever is appropriate movw $__KERNEL_DS, %dx # Data segment descriptor @@ -93,16 +93,17 @@ ENTRY(startup_32) movl %edx, %fs movl %edx, %gs - movl $X86_CR4_PAE, %eax + movl pa_tr_cr4, %eax movl %eax, %cr4 # Enable PAE mode # Setup trampoline 4 level pagetables movl $pa_trampoline_pgd, %eax movl %eax, %cr3 + # Set up EFER + movl pa_tr_efer, %eax + movl pa_tr_efer + 4, %edx movl $MSR_EFER, %ecx - movl $((1 << _EFER_LME) | (1 << _EFER_NX)), %eax # Enable Long Mode - xorl %edx, %edx wrmsr # Enable paging and in turn activate Long Mode @@ -124,23 +125,4 @@ ENTRY(startup_64) # Now jump into the kernel using virtual addresses jmpq *tr_start(%rip) - .section ".rodata","a" - .balign 16 -tidt: - .word 0 # idt limit = 0 - .word 0, 0 # idt base = 0L - - # Duplicate the global descriptor table - # so the kernel can live anywhere - .balign 16 - .globl tgdt -tgdt: - .short tgdt_end - tgdt - 1 # gdt limit - .long pa_tgdt - .short 0 - .quad 0x00cf9b000000ffff # __KERNEL32_CS - .quad 0x00af9b000000ffff # __KERNEL_CS - .quad 0x00cf93000000ffff # __KERNEL_DS -tgdt_end: - #include "trampoline_common.S" diff --git a/arch/x86/realmode/rm/trampoline_common.S b/arch/x86/realmode/rm/trampoline_common.S index c3f951c..cac444b 100644 --- a/arch/x86/realmode/rm/trampoline_common.S +++ b/arch/x86/realmode/rm/trampoline_common.S @@ -1,5 +1,20 @@ .section ".rodata","a" +#ifdef CONFIG_X86_64 + # Duplicate the global descriptor table + # so the kernel can live anywhere + .balign 16 + .globl tr_gdt +tr_gdt: + .short tr_gdt_end - tr_gdt - 1 # gdt limit + .long pa_tr_gdt + .short 0 + .quad 0x00cf9b000000ffff # __KERNEL32_CS + .quad 0x00af9b000000ffff # __KERNEL_CS + .quad 0x00cf93000000ffff # __KERNEL_DS +tr_gdt_end: +#endif + .balign 4 tr_idt: .fill 1, 6, 0 @@ -8,12 +23,16 @@ tr_idt: .fill 1, 6, 0 .balign 4 GLOBAL(trampoline_status) .space 4 + .balign 8 GLOBAL(trampoline_header) #ifdef CONFIG_X86_32 tr_start: .space 4 + tr_gdt_pad: .space 2 tr_gdt: .space 6 #else tr_start: .space 8 + GLOBAL(tr_cr4) .space 4 + GLOBAL(tr_efer) .space 8 #endif END(trampoline_header) -- cgit v0.10.2 From 2f129bf4aab684bef1e82e747b709a5025ecb698 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Thu, 15 Dec 2011 08:15:07 +0100 Subject: ARM: Orion: Add clocks using the generic clk infrastructure. Add tclk as a fixed rate clock for all platforms. In addition, on kirkwood, add a gated clock for most of the clocks which can be gated. Signed-off-by: Andrew Lunn Tested-by: Jamie Lentin [mturquette@linaro.org: removed redundant CLKDEV_LOOKUP from Kconfig] [mturquette@linaro.org: removed redundant clk.h from mach-dove/common.c] Signed-off-by: Mike Turquette diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index cf006d4..9be624a 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1139,6 +1139,7 @@ config PLAT_ORION bool select CLKSRC_MMIO select GENERIC_IRQ_CHIP + select COMMON_CLK config PLAT_PXA bool diff --git a/arch/arm/mach-dove/common.c b/arch/arm/mach-dove/common.c index bda7aca..63fe6e6 100644 --- a/arch/arm/mach-dove/common.c +++ b/arch/arm/mach-dove/common.c @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include #include #include @@ -68,6 +68,17 @@ void __init dove_map_io(void) } /***************************************************************************** + * CLK tree + ****************************************************************************/ +static struct clk *tclk; + +static void __init clk_init(void) +{ + tclk = clk_register_fixed_rate(NULL, "tclk", NULL, CLK_IS_ROOT, + get_tclk()); +} + +/***************************************************************************** * EHCI0 ****************************************************************************/ void __init dove_ehci0_init(void) @@ -272,18 +283,17 @@ void __init dove_sdio1_init(void) void __init dove_init(void) { - int tclk; - - tclk = get_tclk(); - printk(KERN_INFO "Dove 88AP510 SoC, "); - printk(KERN_INFO "TCLK = %dMHz\n", (tclk + 499999) / 1000000); + printk(KERN_INFO "TCLK = %dMHz\n", (get_tclk() + 499999) / 1000000); #ifdef CONFIG_CACHE_TAUROS2 tauros2_init(); #endif dove_setup_cpu_mbus(); + /* Setup root of clk tree */ + clk_init(); + /* internal devices that every board has */ dove_rtc_init(); dove_xor0_init(); diff --git a/arch/arm/mach-kirkwood/board-dt.c b/arch/arm/mach-kirkwood/board-dt.c index 1c672d9..87856b5 100644 --- a/arch/arm/mach-kirkwood/board-dt.c +++ b/arch/arm/mach-kirkwood/board-dt.c @@ -42,6 +42,9 @@ static void __init kirkwood_dt_init(void) kirkwood_l2_init(); #endif + /* Setup root of clk tree */ + kirkwood_clk_init(); + /* internal devices that every board has */ kirkwood_wdt_init(); kirkwood_xor0_init(); diff --git a/arch/arm/mach-kirkwood/common.c b/arch/arm/mach-kirkwood/common.c index a02cae8..57b8d1e 100644 --- a/arch/arm/mach-kirkwood/common.c +++ b/arch/arm/mach-kirkwood/common.c @@ -15,6 +15,8 @@ #include #include #include +#include +#include #include #include #include @@ -31,6 +33,7 @@ #include #include #include +#include #include "common.h" /***************************************************************************** @@ -69,6 +72,41 @@ unsigned int kirkwood_clk_ctrl = CGC_DUNIT | CGC_RESERVED; /***************************************************************************** + * CLK tree + ****************************************************************************/ +static DEFINE_SPINLOCK(gating_lock); +static struct clk *tclk; + +static struct clk __init *kirkwood_register_gate(const char *name, u8 bit_idx) +{ + return clk_register_gate(NULL, name, "tclk", CLK_IGNORE_UNUSED, + (void __iomem *)CLOCK_GATING_CTRL, + bit_idx, 0, &gating_lock); +} + +void __init kirkwood_clk_init(void) +{ + tclk = clk_register_fixed_rate(NULL, "tclk", NULL, + CLK_IS_ROOT, kirkwood_tclk); + + kirkwood_register_gate("runit", CGC_BIT_RUNIT); + kirkwood_register_gate("ge0", CGC_BIT_GE0); + kirkwood_register_gate("ge1", CGC_BIT_GE1); + kirkwood_register_gate("sata0", CGC_BIT_SATA0); + kirkwood_register_gate("sata1", CGC_BIT_SATA1); + kirkwood_register_gate("usb0", CGC_BIT_USB0); + kirkwood_register_gate("sdio", CGC_BIT_SDIO); + kirkwood_register_gate("crypto", CGC_BIT_CRYPTO); + kirkwood_register_gate("xor0", CGC_BIT_XOR0); + kirkwood_register_gate("xor1", CGC_BIT_XOR1); + kirkwood_register_gate("pex0", CGC_BIT_PEX0); + kirkwood_register_gate("pex1", CGC_BIT_PEX1); + kirkwood_register_gate("audio", CGC_BIT_AUDIO); + kirkwood_register_gate("tdm", CGC_BIT_TDM); + kirkwood_register_gate("tsu", CGC_BIT_TSU); +} + +/***************************************************************************** * EHCI0 ****************************************************************************/ void __init kirkwood_ehci_init(void) @@ -465,6 +503,9 @@ void __init kirkwood_init(void) kirkwood_l2_init(); #endif + /* Setup root of clk tree */ + kirkwood_clk_init(); + /* internal devices that every board has */ kirkwood_rtc_init(); kirkwood_wdt_init(); diff --git a/arch/arm/mach-kirkwood/common.h b/arch/arm/mach-kirkwood/common.h index fa8e768..0729b11 100644 --- a/arch/arm/mach-kirkwood/common.h +++ b/arch/arm/mach-kirkwood/common.h @@ -50,6 +50,7 @@ void kirkwood_nand_init(struct mtd_partition *parts, int nr_parts, int delay); void kirkwood_nand_init_rnb(struct mtd_partition *parts, int nr_parts, int (*dev_ready)(struct mtd_info *)); void kirkwood_audio_init(void); void kirkwood_restart(char, const char *); +void kirkwood_clk_init(void); /* board init functions for boards not fully converted to fdt */ #ifdef CONFIG_MACH_DREAMPLUG_DT diff --git a/arch/arm/mach-kirkwood/include/mach/bridge-regs.h b/arch/arm/mach-kirkwood/include/mach/bridge-regs.h index 957bd79..3eee37a 100644 --- a/arch/arm/mach-kirkwood/include/mach/bridge-regs.h +++ b/arch/arm/mach-kirkwood/include/mach/bridge-regs.h @@ -43,6 +43,22 @@ #define L2_WRITETHROUGH 0x00000010 #define CLOCK_GATING_CTRL (BRIDGE_VIRT_BASE | 0x11c) +#define CGC_BIT_GE0 (0) +#define CGC_BIT_PEX0 (2) +#define CGC_BIT_USB0 (3) +#define CGC_BIT_SDIO (4) +#define CGC_BIT_TSU (5) +#define CGC_BIT_DUNIT (6) +#define CGC_BIT_RUNIT (7) +#define CGC_BIT_XOR0 (8) +#define CGC_BIT_AUDIO (9) +#define CGC_BIT_SATA0 (14) +#define CGC_BIT_SATA1 (15) +#define CGC_BIT_XOR1 (16) +#define CGC_BIT_CRYPTO (17) +#define CGC_BIT_PEX1 (18) +#define CGC_BIT_GE1 (19) +#define CGC_BIT_TDM (20) #define CGC_GE0 (1 << 0) #define CGC_PEX0 (1 << 2) #define CGC_USB0 (1 << 3) diff --git a/arch/arm/mach-mv78xx0/common.c b/arch/arm/mach-mv78xx0/common.c index a5dcf766..7373320 100644 --- a/arch/arm/mach-mv78xx0/common.c +++ b/arch/arm/mach-mv78xx0/common.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -103,24 +104,24 @@ static void get_pclk_l2clk(int hclk, int core_index, int *pclk, int *l2clk) static int get_tclk(void) { - int tclk; + int tclk_freq; /* * TCLK tick rate is configured by DEV_A[2:0] strap pins. */ switch ((readl(SAMPLE_AT_RESET_HIGH) >> 6) & 7) { case 1: - tclk = 166666667; + tclk_freq = 166666667; break; case 3: - tclk = 200000000; + tclk_freq = 200000000; break; default: panic("unknown TCLK PLL setting: %.8x\n", readl(SAMPLE_AT_RESET_HIGH)); } - return tclk; + return tclk_freq; } @@ -166,6 +167,17 @@ void __init mv78xx0_map_io(void) /***************************************************************************** + * CLK tree + ****************************************************************************/ +static struct clk *tclk; + +static void __init clk_init(void) +{ + tclk = clk_register_fixed_rate(NULL, "tclk", NULL, CLK_IS_ROOT, + get_tclk()); +} + +/***************************************************************************** * EHCI ****************************************************************************/ void __init mv78xx0_ehci0_init(void) @@ -378,25 +390,26 @@ void __init mv78xx0_init(void) int hclk; int pclk; int l2clk; - int tclk; core_index = mv78xx0_core_index(); hclk = get_hclk(); get_pclk_l2clk(hclk, core_index, &pclk, &l2clk); - tclk = get_tclk(); printk(KERN_INFO "%s ", mv78xx0_id()); printk("core #%d, ", core_index); printk("PCLK = %dMHz, ", (pclk + 499999) / 1000000); printk("L2 = %dMHz, ", (l2clk + 499999) / 1000000); printk("HCLK = %dMHz, ", (hclk + 499999) / 1000000); - printk("TCLK = %dMHz\n", (tclk + 499999) / 1000000); + printk("TCLK = %dMHz\n", (get_tclk() + 499999) / 1000000); mv78xx0_setup_cpu_mbus(); #ifdef CONFIG_CACHE_FEROCEON_L2 feroceon_l2_init(is_l2_writethrough()); #endif + + /* Setup root of clk tree */ + clk_init(); } void mv78xx0_restart(char mode, const char *cmd) diff --git a/arch/arm/mach-orion5x/common.c b/arch/arm/mach-orion5x/common.c index 2448166..8166052 100644 --- a/arch/arm/mach-orion5x/common.c +++ b/arch/arm/mach-orion5x/common.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -70,6 +71,17 @@ void __init orion5x_map_io(void) /***************************************************************************** + * CLK tree + ****************************************************************************/ +static struct clk *tclk; + +static void __init clk_init(void) +{ + tclk = clk_register_fixed_rate(NULL, "tclk", NULL, CLK_IS_ROOT, + orion5x_tclk); +} + +/***************************************************************************** * EHCI0 ****************************************************************************/ void __init orion5x_ehci0_init(void) @@ -276,6 +288,9 @@ void __init orion5x_init(void) */ orion5x_setup_cpu_mbus_bridge(); + /* Setup root of clk tree */ + clk_init(); + /* * Don't issue "Wait for Interrupt" instruction if we are * running on D0 5281 silicon. diff --git a/arch/arm/plat-orion/common.c b/arch/arm/plat-orion/common.c index 74daf5e..4fdd2e7 100644 --- a/arch/arm/plat-orion/common.c +++ b/arch/arm/plat-orion/common.c @@ -14,6 +14,8 @@ #include #include #include +#include +#include #include #include #include -- cgit v0.10.2 From 4574b886698dfad6209102fed6136622b5fe1c21 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Fri, 6 Apr 2012 17:17:26 +0200 Subject: ARM: Orion: SPI: Add clk/clkdev support. Remove now redundant tclk from SPI platform data. This makes the platform data empty, so remove it. Signed-off-by: Andrew Lunn Tested-by: Jamie Lentin Signed-off-by: Mike Turquette diff --git a/arch/arm/mach-dove/common.c b/arch/arm/mach-dove/common.c index 63fe6e6..da5b404 100644 --- a/arch/arm/mach-dove/common.c +++ b/arch/arm/mach-dove/common.c @@ -76,6 +76,8 @@ static void __init clk_init(void) { tclk = clk_register_fixed_rate(NULL, "tclk", NULL, CLK_IS_ROOT, get_tclk()); + + orion_clkdev_init(tclk); } /***************************************************************************** @@ -162,12 +164,12 @@ void __init dove_uart3_init(void) ****************************************************************************/ void __init dove_spi0_init(void) { - orion_spi_init(DOVE_SPI0_PHYS_BASE, get_tclk()); + orion_spi_init(DOVE_SPI0_PHYS_BASE); } void __init dove_spi1_init(void) { - orion_spi_1_init(DOVE_SPI1_PHYS_BASE, get_tclk()); + orion_spi_1_init(DOVE_SPI1_PHYS_BASE); } /***************************************************************************** diff --git a/arch/arm/mach-dove/dove-db-setup.c b/arch/arm/mach-dove/dove-db-setup.c index ea77ae4..bc2867f 100644 --- a/arch/arm/mach-dove/dove-db-setup.c +++ b/arch/arm/mach-dove/dove-db-setup.c @@ -20,7 +20,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/arm/mach-kirkwood/board-dreamplug.c b/arch/arm/mach-kirkwood/board-dreamplug.c index 9854539..55e357a 100644 --- a/arch/arm/mach-kirkwood/board-dreamplug.c +++ b/arch/arm/mach-kirkwood/board-dreamplug.c @@ -27,7 +27,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/arm/mach-kirkwood/common.c b/arch/arm/mach-kirkwood/common.c index 57b8d1e..476e0b9 100644 --- a/arch/arm/mach-kirkwood/common.c +++ b/arch/arm/mach-kirkwood/common.c @@ -86,10 +86,12 @@ static struct clk __init *kirkwood_register_gate(const char *name, u8 bit_idx) void __init kirkwood_clk_init(void) { + struct clk *runit; + tclk = clk_register_fixed_rate(NULL, "tclk", NULL, CLK_IS_ROOT, kirkwood_tclk); - kirkwood_register_gate("runit", CGC_BIT_RUNIT); + runit = kirkwood_register_gate("runit", CGC_BIT_RUNIT); kirkwood_register_gate("ge0", CGC_BIT_GE0); kirkwood_register_gate("ge1", CGC_BIT_GE1); kirkwood_register_gate("sata0", CGC_BIT_SATA0); @@ -104,6 +106,10 @@ void __init kirkwood_clk_init(void) kirkwood_register_gate("audio", CGC_BIT_AUDIO); kirkwood_register_gate("tdm", CGC_BIT_TDM); kirkwood_register_gate("tsu", CGC_BIT_TSU); + + /* clkdev entries, mapping clks to devices */ + orion_clkdev_add(NULL, "orion_spi.0", runit); + orion_clkdev_add(NULL, "orion_spi.1", runit); } /***************************************************************************** @@ -270,7 +276,7 @@ void __init kirkwood_sdio_init(struct mvsdio_platform_data *mvsdio_data) void __init kirkwood_spi_init() { kirkwood_clk_ctrl |= CGC_RUNIT; - orion_spi_init(SPI_PHYS_BASE, kirkwood_tclk); + orion_spi_init(SPI_PHYS_BASE); } diff --git a/arch/arm/mach-kirkwood/mv88f6281gtw_ge-setup.c b/arch/arm/mach-kirkwood/mv88f6281gtw_ge-setup.c index 85f6169..6d8364a 100644 --- a/arch/arm/mach-kirkwood/mv88f6281gtw_ge-setup.c +++ b/arch/arm/mach-kirkwood/mv88f6281gtw_ge-setup.c @@ -23,7 +23,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/arm/mach-kirkwood/rd88f6192-nas-setup.c b/arch/arm/mach-kirkwood/rd88f6192-nas-setup.c index fd2c9c8..f742a66 100644 --- a/arch/arm/mach-kirkwood/rd88f6192-nas-setup.c +++ b/arch/arm/mach-kirkwood/rd88f6192-nas-setup.c @@ -16,7 +16,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/arm/mach-kirkwood/t5325-setup.c b/arch/arm/mach-kirkwood/t5325-setup.c index f9d2a11..bad738e 100644 --- a/arch/arm/mach-kirkwood/t5325-setup.c +++ b/arch/arm/mach-kirkwood/t5325-setup.c @@ -16,7 +16,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/arm/mach-kirkwood/tsx1x-common.c b/arch/arm/mach-kirkwood/tsx1x-common.c index 24294b2..8943ede 100644 --- a/arch/arm/mach-kirkwood/tsx1x-common.c +++ b/arch/arm/mach-kirkwood/tsx1x-common.c @@ -4,7 +4,6 @@ #include #include #include -#include #include #include #include "common.h" diff --git a/arch/arm/mach-mv78xx0/common.c b/arch/arm/mach-mv78xx0/common.c index 7373320..4c24b46 100644 --- a/arch/arm/mach-mv78xx0/common.c +++ b/arch/arm/mach-mv78xx0/common.c @@ -175,6 +175,8 @@ static void __init clk_init(void) { tclk = clk_register_fixed_rate(NULL, "tclk", NULL, CLK_IS_ROOT, get_tclk()); + + orion_clkdev_init(tclk); } /***************************************************************************** diff --git a/arch/arm/mach-orion5x/common.c b/arch/arm/mach-orion5x/common.c index 8166052..2ef82e2 100644 --- a/arch/arm/mach-orion5x/common.c +++ b/arch/arm/mach-orion5x/common.c @@ -79,6 +79,8 @@ static void __init clk_init(void) { tclk = clk_register_fixed_rate(NULL, "tclk", NULL, CLK_IS_ROOT, orion5x_tclk); + + orion_clkdev_init(tclk); } /***************************************************************************** @@ -144,7 +146,7 @@ void __init orion5x_sata_init(struct mv_sata_platform_data *sata_data) ****************************************************************************/ void __init orion5x_spi_init() { - orion_spi_init(SPI_PHYS_BASE, orion5x_tclk); + orion_spi_init(SPI_PHYS_BASE); } diff --git a/arch/arm/mach-orion5x/rd88f6183ap-ge-setup.c b/arch/arm/mach-orion5x/rd88f6183ap-ge-setup.c index 2c5fab0..7b97a9a 100644 --- a/arch/arm/mach-orion5x/rd88f6183ap-ge-setup.c +++ b/arch/arm/mach-orion5x/rd88f6183ap-ge-setup.c @@ -16,7 +16,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/arm/plat-orion/common.c b/arch/arm/plat-orion/common.c index 4fdd2e7..bbe50a9 100644 --- a/arch/arm/plat-orion/common.c +++ b/arch/arm/plat-orion/common.c @@ -19,12 +19,32 @@ #include #include #include -#include #include #include #include #include +/* Create a clkdev entry for a given device/clk */ +void __init orion_clkdev_add(const char *con_id, const char *dev_id, + struct clk *clk) +{ + struct clk_lookup *cl; + + cl = clkdev_alloc(clk, con_id, dev_id); + if (cl) + clkdev_add(cl); +} + +/* Create clkdev entries for all orion platforms except kirkwood. + Kirkwood has gated clocks for some of its peripherals, so creates + its own clkdev entries. For all the other orion devices, create + clkdev entries to the tclk. */ +void __init orion_clkdev_init(struct clk *tclk) +{ + orion_clkdev_add(NULL, "orion_spi.0", tclk); + orion_clkdev_add(NULL, "orion_spi.1", tclk); +} + /* Fill in the resources structure and link it into the platform device structure. There is always a memory region, and nearly always an interrupt.*/ @@ -523,44 +543,32 @@ void __init orion_i2c_1_init(unsigned long mapbase, /***************************************************************************** * SPI ****************************************************************************/ -static struct orion_spi_info orion_spi_plat_data; static struct resource orion_spi_resources; static struct platform_device orion_spi = { .name = "orion_spi", .id = 0, - .dev = { - .platform_data = &orion_spi_plat_data, - }, }; -static struct orion_spi_info orion_spi_1_plat_data; static struct resource orion_spi_1_resources; static struct platform_device orion_spi_1 = { .name = "orion_spi", .id = 1, - .dev = { - .platform_data = &orion_spi_1_plat_data, - }, }; /* Note: The SPI silicon core does have interrupts. However the * current Linux software driver does not use interrupts. */ -void __init orion_spi_init(unsigned long mapbase, - unsigned long tclk) +void __init orion_spi_init(unsigned long mapbase) { - orion_spi_plat_data.tclk = tclk; fill_resources(&orion_spi, &orion_spi_resources, mapbase, SZ_512 - 1, NO_IRQ); platform_device_register(&orion_spi); } -void __init orion_spi_1_init(unsigned long mapbase, - unsigned long tclk) +void __init orion_spi_1_init(unsigned long mapbase) { - orion_spi_1_plat_data.tclk = tclk; fill_resources(&orion_spi_1, &orion_spi_1_resources, mapbase, SZ_512 - 1, NO_IRQ); platform_device_register(&orion_spi_1); diff --git a/arch/arm/plat-orion/include/plat/common.h b/arch/arm/plat-orion/include/plat/common.h index a7fa005..d188a1a 100644 --- a/arch/arm/plat-orion/include/plat/common.h +++ b/arch/arm/plat-orion/include/plat/common.h @@ -70,11 +70,9 @@ void __init orion_i2c_1_init(unsigned long mapbase, unsigned long irq, unsigned long freq_m); -void __init orion_spi_init(unsigned long mapbase, - unsigned long tclk); +void __init orion_spi_init(unsigned long mapbase); -void __init orion_spi_1_init(unsigned long mapbase, - unsigned long tclk); +void __init orion_spi_1_init(unsigned long mapbase); void __init orion_wdt_init(unsigned long tclk); @@ -106,4 +104,9 @@ void __init orion_crypto_init(unsigned long mapbase, unsigned long srambase, unsigned long sram_size, unsigned long irq); + +void __init orion_clkdev_add(const char *con_id, const char *dev_id, + struct clk *clk); + +void __init orion_clkdev_init(struct clk *tclk); #endif diff --git a/drivers/spi/spi-orion.c b/drivers/spi/spi-orion.c index e496f79..dfd04e9 100644 --- a/drivers/spi/spi-orion.c +++ b/drivers/spi/spi-orion.c @@ -16,8 +16,8 @@ #include #include #include -#include #include +#include #include #define DRIVER_NAME "orion_spi" @@ -46,6 +46,7 @@ struct orion_spi { unsigned int max_speed; unsigned int min_speed; struct orion_spi_info *spi_info; + struct clk *clk; }; static struct workqueue_struct *orion_spi_wq; @@ -104,7 +105,7 @@ static int orion_spi_baudrate_set(struct spi_device *spi, unsigned int speed) orion_spi = spi_master_get_devdata(spi->master); - tclk_hz = orion_spi->spi_info->tclk; + tclk_hz = clk_get_rate(orion_spi->clk); /* * the supported rates are: 4,6,8...30 @@ -450,6 +451,7 @@ static int __init orion_spi_probe(struct platform_device *pdev) struct orion_spi *spi; struct resource *r; struct orion_spi_info *spi_info; + unsigned long tclk_hz; int status = 0; spi_info = pdev->dev.platform_data; @@ -476,19 +478,28 @@ static int __init orion_spi_probe(struct platform_device *pdev) spi->master = master; spi->spi_info = spi_info; - spi->max_speed = DIV_ROUND_UP(spi_info->tclk, 4); - spi->min_speed = DIV_ROUND_UP(spi_info->tclk, 30); + spi->clk = clk_get(&pdev->dev, NULL); + if (IS_ERR(spi->clk)) { + status = PTR_ERR(spi->clk); + goto out; + } + + clk_prepare(spi->clk); + clk_enable(spi->clk); + tclk_hz = clk_get_rate(spi->clk); + spi->max_speed = DIV_ROUND_UP(tclk_hz, 4); + spi->min_speed = DIV_ROUND_UP(tclk_hz, 30); r = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (r == NULL) { status = -ENODEV; - goto out; + goto out_rel_clk; } if (!request_mem_region(r->start, resource_size(r), dev_name(&pdev->dev))) { status = -EBUSY; - goto out; + goto out_rel_clk; } spi->base = ioremap(r->start, SZ_1K); @@ -508,7 +519,9 @@ static int __init orion_spi_probe(struct platform_device *pdev) out_rel_mem: release_mem_region(r->start, resource_size(r)); - +out_rel_clk: + clk_disable_unprepare(spi->clk); + clk_put(spi->clk); out: spi_master_put(master); return status; @@ -526,6 +539,9 @@ static int __exit orion_spi_remove(struct platform_device *pdev) cancel_work_sync(&spi->work); + clk_disable_unprepare(spi->clk); + clk_put(spi->clk); + r = platform_get_resource(pdev, IORESOURCE_MEM, 0); release_mem_region(r->start, resource_size(r)); diff --git a/include/linux/spi/orion_spi.h b/include/linux/spi/orion_spi.h deleted file mode 100644 index b4d9fa6..0000000 --- a/include/linux/spi/orion_spi.h +++ /dev/null @@ -1,17 +0,0 @@ -/* - * orion_spi.h - * - * This file is licensed under the terms of the GNU General Public - * License version 2. This program is licensed "as is" without any - * warranty of any kind, whether express or implied. - */ - -#ifndef __LINUX_SPI_ORION_SPI_H -#define __LINUX_SPI_ORION_SPI_H - -struct orion_spi_info { - u32 tclk; /* no support yet */ -}; - - -#endif -- cgit v0.10.2 From 452503ebc7cc4cce5b9e52cf2f03255365a53234 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Sat, 24 Dec 2011 01:24:24 +0100 Subject: ARM: Orion: Eth: Add clk/clkdev support. The t_clk is moved from the shared part of the ethernet driver into the per port section. Each port can have its own gated clock, which it needs to enable/disable, as oppossed to there being one clock shared by all ports. In practice, only kirkwood supports this at the moment. Signed-off-by: Andrew Lunn Tested-by: Jamie Lentin Signed-off-by: Mike Turquette diff --git a/arch/arm/mach-dove/common.c b/arch/arm/mach-dove/common.c index da5b404..0276696 100644 --- a/arch/arm/mach-dove/common.c +++ b/arch/arm/mach-dove/common.c @@ -102,8 +102,7 @@ void __init dove_ehci1_init(void) void __init dove_ge00_init(struct mv643xx_eth_platform_data *eth_data) { orion_ge00_init(eth_data, - DOVE_GE00_PHYS_BASE, IRQ_DOVE_GE00_SUM, - 0, get_tclk()); + DOVE_GE00_PHYS_BASE, IRQ_DOVE_GE00_SUM, 0); } /***************************************************************************** diff --git a/arch/arm/mach-kirkwood/common.c b/arch/arm/mach-kirkwood/common.c index 476e0b9..c223544 100644 --- a/arch/arm/mach-kirkwood/common.c +++ b/arch/arm/mach-kirkwood/common.c @@ -86,14 +86,14 @@ static struct clk __init *kirkwood_register_gate(const char *name, u8 bit_idx) void __init kirkwood_clk_init(void) { - struct clk *runit; + struct clk *runit, *ge0, *ge1; tclk = clk_register_fixed_rate(NULL, "tclk", NULL, CLK_IS_ROOT, kirkwood_tclk); runit = kirkwood_register_gate("runit", CGC_BIT_RUNIT); - kirkwood_register_gate("ge0", CGC_BIT_GE0); - kirkwood_register_gate("ge1", CGC_BIT_GE1); + ge0 = kirkwood_register_gate("ge0", CGC_BIT_GE0); + ge1 = kirkwood_register_gate("ge1", CGC_BIT_GE1); kirkwood_register_gate("sata0", CGC_BIT_SATA0); kirkwood_register_gate("sata1", CGC_BIT_SATA1); kirkwood_register_gate("usb0", CGC_BIT_USB0); @@ -110,6 +110,8 @@ void __init kirkwood_clk_init(void) /* clkdev entries, mapping clks to devices */ orion_clkdev_add(NULL, "orion_spi.0", runit); orion_clkdev_add(NULL, "orion_spi.1", runit); + orion_clkdev_add(NULL, MV643XX_ETH_NAME ".0", ge0); + orion_clkdev_add(NULL, MV643XX_ETH_NAME ".1", ge1); } /***************************************************************************** @@ -131,7 +133,7 @@ void __init kirkwood_ge00_init(struct mv643xx_eth_platform_data *eth_data) orion_ge00_init(eth_data, GE00_PHYS_BASE, IRQ_KIRKWOOD_GE00_SUM, - IRQ_KIRKWOOD_GE00_ERR, kirkwood_tclk); + IRQ_KIRKWOOD_GE00_ERR); } @@ -145,7 +147,7 @@ void __init kirkwood_ge01_init(struct mv643xx_eth_platform_data *eth_data) orion_ge01_init(eth_data, GE01_PHYS_BASE, IRQ_KIRKWOOD_GE01_SUM, - IRQ_KIRKWOOD_GE01_ERR, kirkwood_tclk); + IRQ_KIRKWOOD_GE01_ERR); } diff --git a/arch/arm/mach-mv78xx0/common.c b/arch/arm/mach-mv78xx0/common.c index 4c24b46..ad4d037 100644 --- a/arch/arm/mach-mv78xx0/common.c +++ b/arch/arm/mach-mv78xx0/common.c @@ -213,7 +213,7 @@ void __init mv78xx0_ge00_init(struct mv643xx_eth_platform_data *eth_data) { orion_ge00_init(eth_data, GE00_PHYS_BASE, IRQ_MV78XX0_GE00_SUM, - IRQ_MV78XX0_GE_ERR, get_tclk()); + IRQ_MV78XX0_GE_ERR); } @@ -224,7 +224,7 @@ void __init mv78xx0_ge01_init(struct mv643xx_eth_platform_data *eth_data) { orion_ge01_init(eth_data, GE01_PHYS_BASE, IRQ_MV78XX0_GE01_SUM, - NO_IRQ, get_tclk()); + NO_IRQ); } @@ -248,7 +248,7 @@ void __init mv78xx0_ge10_init(struct mv643xx_eth_platform_data *eth_data) orion_ge10_init(eth_data, GE10_PHYS_BASE, IRQ_MV78XX0_GE10_SUM, - NO_IRQ, get_tclk()); + NO_IRQ); } @@ -272,7 +272,7 @@ void __init mv78xx0_ge11_init(struct mv643xx_eth_platform_data *eth_data) orion_ge11_init(eth_data, GE11_PHYS_BASE, IRQ_MV78XX0_GE11_SUM, - NO_IRQ, get_tclk()); + NO_IRQ); } /***************************************************************************** diff --git a/arch/arm/mach-orion5x/common.c b/arch/arm/mach-orion5x/common.c index 2ef82e2..3fc7318 100644 --- a/arch/arm/mach-orion5x/common.c +++ b/arch/arm/mach-orion5x/common.c @@ -109,7 +109,7 @@ void __init orion5x_eth_init(struct mv643xx_eth_platform_data *eth_data) { orion_ge00_init(eth_data, ORION5X_ETH_PHYS_BASE, IRQ_ORION5X_ETH_SUM, - IRQ_ORION5X_ETH_ERR, orion5x_tclk); + IRQ_ORION5X_ETH_ERR); } diff --git a/arch/arm/plat-orion/common.c b/arch/arm/plat-orion/common.c index bbe50a9..a33733b 100644 --- a/arch/arm/plat-orion/common.c +++ b/arch/arm/plat-orion/common.c @@ -43,6 +43,10 @@ void __init orion_clkdev_init(struct clk *tclk) { orion_clkdev_add(NULL, "orion_spi.0", tclk); orion_clkdev_add(NULL, "orion_spi.1", tclk); + orion_clkdev_add(NULL, MV643XX_ETH_NAME ".0", tclk); + orion_clkdev_add(NULL, MV643XX_ETH_NAME ".1", tclk); + orion_clkdev_add(NULL, MV643XX_ETH_NAME ".2", tclk); + orion_clkdev_add(NULL, MV643XX_ETH_NAME ".3", tclk); } /* Fill in the resources structure and link it into the platform @@ -225,13 +229,11 @@ void __init orion_rtc_init(unsigned long mapbase, ****************************************************************************/ static __init void ge_complete( struct mv643xx_eth_shared_platform_data *orion_ge_shared_data, - int tclk, struct resource *orion_ge_resource, unsigned long irq, struct platform_device *orion_ge_shared, struct mv643xx_eth_platform_data *eth_data, struct platform_device *orion_ge) { - orion_ge_shared_data->t_clk = tclk; orion_ge_resource->start = irq; orion_ge_resource->end = irq; eth_data->shared = orion_ge_shared; @@ -282,12 +284,11 @@ static struct platform_device orion_ge00 = { void __init orion_ge00_init(struct mv643xx_eth_platform_data *eth_data, unsigned long mapbase, unsigned long irq, - unsigned long irq_err, - int tclk) + unsigned long irq_err) { fill_resources(&orion_ge00_shared, orion_ge00_shared_resources, mapbase + 0x2000, SZ_16K - 1, irq_err); - ge_complete(&orion_ge00_shared_data, tclk, + ge_complete(&orion_ge00_shared_data, orion_ge00_resources, irq, &orion_ge00_shared, eth_data, &orion_ge00); } @@ -335,12 +336,11 @@ static struct platform_device orion_ge01 = { void __init orion_ge01_init(struct mv643xx_eth_platform_data *eth_data, unsigned long mapbase, unsigned long irq, - unsigned long irq_err, - int tclk) + unsigned long irq_err) { fill_resources(&orion_ge01_shared, orion_ge01_shared_resources, mapbase + 0x2000, SZ_16K - 1, irq_err); - ge_complete(&orion_ge01_shared_data, tclk, + ge_complete(&orion_ge01_shared_data, orion_ge01_resources, irq, &orion_ge01_shared, eth_data, &orion_ge01); } @@ -388,12 +388,11 @@ static struct platform_device orion_ge10 = { void __init orion_ge10_init(struct mv643xx_eth_platform_data *eth_data, unsigned long mapbase, unsigned long irq, - unsigned long irq_err, - int tclk) + unsigned long irq_err) { fill_resources(&orion_ge10_shared, orion_ge10_shared_resources, mapbase + 0x2000, SZ_16K - 1, irq_err); - ge_complete(&orion_ge10_shared_data, tclk, + ge_complete(&orion_ge10_shared_data, orion_ge10_resources, irq, &orion_ge10_shared, eth_data, &orion_ge10); } @@ -441,12 +440,11 @@ static struct platform_device orion_ge11 = { void __init orion_ge11_init(struct mv643xx_eth_platform_data *eth_data, unsigned long mapbase, unsigned long irq, - unsigned long irq_err, - int tclk) + unsigned long irq_err) { fill_resources(&orion_ge11_shared, orion_ge11_shared_resources, mapbase + 0x2000, SZ_16K - 1, irq_err); - ge_complete(&orion_ge11_shared_data, tclk, + ge_complete(&orion_ge11_shared_data, orion_ge11_resources, irq, &orion_ge11_shared, eth_data, &orion_ge11); } diff --git a/arch/arm/plat-orion/include/plat/common.h b/arch/arm/plat-orion/include/plat/common.h index d188a1a..00d8761 100644 --- a/arch/arm/plat-orion/include/plat/common.h +++ b/arch/arm/plat-orion/include/plat/common.h @@ -39,29 +39,26 @@ void __init orion_rtc_init(unsigned long mapbase, void __init orion_ge00_init(struct mv643xx_eth_platform_data *eth_data, unsigned long mapbase, unsigned long irq, - unsigned long irq_err, - int tclk); + unsigned long irq_err); void __init orion_ge01_init(struct mv643xx_eth_platform_data *eth_data, unsigned long mapbase, unsigned long irq, - unsigned long irq_err, - int tclk); + unsigned long irq_err); void __init orion_ge10_init(struct mv643xx_eth_platform_data *eth_data, unsigned long mapbase, unsigned long irq, - unsigned long irq_err, - int tclk); + unsigned long irq_err); void __init orion_ge11_init(struct mv643xx_eth_platform_data *eth_data, unsigned long mapbase, unsigned long irq, - unsigned long irq_err, - int tclk); + unsigned long irq_err); void __init orion_ge00_switch_init(struct dsa_platform_data *d, int irq); + void __init orion_i2c_init(unsigned long mapbase, unsigned long irq, unsigned long freq_m); diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c index 5e1ca0f..99cd233 100644 --- a/drivers/net/ethernet/marvell/mv643xx_eth.c +++ b/drivers/net/ethernet/marvell/mv643xx_eth.c @@ -57,6 +57,7 @@ #include #include #include +#include static char mv643xx_eth_driver_name[] = "mv643xx_eth"; static char mv643xx_eth_driver_version[] = "1.4"; @@ -289,10 +290,10 @@ struct mv643xx_eth_shared_private { /* * Hardware-specific parameters. */ - unsigned int t_clk; int extended_rx_coal_limit; int tx_bw_control; int tx_csum_limit; + }; #define TX_BW_CONTROL_ABSENT 0 @@ -431,6 +432,12 @@ struct mv643xx_eth_private { int tx_desc_sram_size; int txq_count; struct tx_queue txq[8]; + + /* + * Hardware-specific parameters. + */ + struct clk *clk; + unsigned int t_clk; }; @@ -1010,7 +1017,7 @@ static void tx_set_rate(struct mv643xx_eth_private *mp, int rate, int burst) int mtu; int bucket_size; - token_rate = ((rate / 1000) * 64) / (mp->shared->t_clk / 1000); + token_rate = ((rate / 1000) * 64) / (mp->t_clk / 1000); if (token_rate > 1023) token_rate = 1023; @@ -1042,7 +1049,7 @@ static void txq_set_rate(struct tx_queue *txq, int rate, int burst) int token_rate; int bucket_size; - token_rate = ((rate / 1000) * 64) / (mp->shared->t_clk / 1000); + token_rate = ((rate / 1000) * 64) / (mp->t_clk / 1000); if (token_rate > 1023) token_rate = 1023; @@ -1309,7 +1316,7 @@ static unsigned int get_rx_coal(struct mv643xx_eth_private *mp) temp = (val & 0x003fff00) >> 8; temp *= 64000000; - do_div(temp, mp->shared->t_clk); + do_div(temp, mp->t_clk); return (unsigned int)temp; } @@ -1319,7 +1326,7 @@ static void set_rx_coal(struct mv643xx_eth_private *mp, unsigned int usec) u64 temp; u32 val; - temp = (u64)usec * mp->shared->t_clk; + temp = (u64)usec * mp->t_clk; temp += 31999999; do_div(temp, 64000000); @@ -1345,7 +1352,7 @@ static unsigned int get_tx_coal(struct mv643xx_eth_private *mp) temp = (rdlp(mp, TX_FIFO_URGENT_THRESHOLD) & 0x3fff0) >> 4; temp *= 64000000; - do_div(temp, mp->shared->t_clk); + do_div(temp, mp->t_clk); return (unsigned int)temp; } @@ -1354,7 +1361,7 @@ static void set_tx_coal(struct mv643xx_eth_private *mp, unsigned int usec) { u64 temp; - temp = (u64)usec * mp->shared->t_clk; + temp = (u64)usec * mp->t_clk; temp += 31999999; do_div(temp, 64000000); @@ -2662,10 +2669,6 @@ static int mv643xx_eth_shared_probe(struct platform_device *pdev) if (dram) mv643xx_eth_conf_mbus_windows(msp, dram); - /* - * Detect hardware parameters. - */ - msp->t_clk = (pd != NULL && pd->t_clk != 0) ? pd->t_clk : 133000000; msp->tx_csum_limit = (pd != NULL && pd->tx_csum_limit) ? pd->tx_csum_limit : 9 * 1024; infer_hw_params(msp); @@ -2890,6 +2893,18 @@ static int mv643xx_eth_probe(struct platform_device *pdev) mp->dev = dev; + /* + * Get the clk rate, if there is one, otherwise use the default. + */ + mp->clk = clk_get(&pdev->dev, (pdev->id ? "1" : "0")); + if (!IS_ERR(mp->clk)) { + clk_prepare_enable(mp->clk); + mp->t_clk = clk_get_rate(mp->clk); + } else { + mp->t_clk = 133000000; + printk(KERN_WARNING "Unable to get clock"); + } + set_params(mp, pd); netif_set_real_num_tx_queues(dev, mp->txq_count); netif_set_real_num_rx_queues(dev, mp->rxq_count); @@ -2978,6 +2993,11 @@ static int mv643xx_eth_remove(struct platform_device *pdev) if (mp->phy != NULL) phy_detach(mp->phy); cancel_work_sync(&mp->tx_timeout_task); + + if (!IS_ERR(mp->clk)) { + clk_disable_unprepare(mp->clk); + clk_put(mp->clk); + } free_netdev(mp->dev); platform_set_drvdata(pdev, NULL); diff --git a/include/linux/mv643xx_eth.h b/include/linux/mv643xx_eth.h index 30b0c4e..51bf8ad 100644 --- a/include/linux/mv643xx_eth.h +++ b/include/linux/mv643xx_eth.h @@ -18,7 +18,6 @@ struct mv643xx_eth_shared_platform_data { struct mbus_dram_target_info *dram; struct platform_device *shared_smi; - unsigned int t_clk; /* * Max packet size for Tx IP/Layer 4 checksum, when set to 0, default * limit of 9KiB will be used. -- cgit v0.10.2 From 4f04be62af95119d258b8035f498100e43c8c527 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Sun, 4 Mar 2012 16:57:31 +0100 Subject: ARM: Orion: WDT: Add clk/clkdev support Remove tclk from platform data. This makes the platform data structure empty, so remove it. Signed-off-by: Andrew Lunn Tested-by: Jamie Lentin Signed-off-by: Mike Turquette diff --git a/arch/arm/mach-kirkwood/common.c b/arch/arm/mach-kirkwood/common.c index c223544..880f366 100644 --- a/arch/arm/mach-kirkwood/common.c +++ b/arch/arm/mach-kirkwood/common.c @@ -112,6 +112,7 @@ void __init kirkwood_clk_init(void) orion_clkdev_add(NULL, "orion_spi.1", runit); orion_clkdev_add(NULL, MV643XX_ETH_NAME ".0", ge0); orion_clkdev_add(NULL, MV643XX_ETH_NAME ".1", ge1); + orion_clkdev_add(NULL, "orion_wdt", tclk); } /***************************************************************************** @@ -351,7 +352,7 @@ void __init kirkwood_xor1_init(void) ****************************************************************************/ void __init kirkwood_wdt_init(void) { - orion_wdt_init(kirkwood_tclk); + orion_wdt_init(); } diff --git a/arch/arm/mach-orion5x/common.c b/arch/arm/mach-orion5x/common.c index 3fc7318..067bdd7 100644 --- a/arch/arm/mach-orion5x/common.c +++ b/arch/arm/mach-orion5x/common.c @@ -193,7 +193,7 @@ static void __init orion5x_crypto_init(void) ****************************************************************************/ void __init orion5x_wdt_init(void) { - orion_wdt_init(orion5x_tclk); + orion_wdt_init(); } diff --git a/arch/arm/plat-orion/common.c b/arch/arm/plat-orion/common.c index a33733b..d349998 100644 --- a/arch/arm/plat-orion/common.c +++ b/arch/arm/plat-orion/common.c @@ -19,7 +19,6 @@ #include #include #include -#include #include #include #include @@ -47,6 +46,7 @@ void __init orion_clkdev_init(struct clk *tclk) orion_clkdev_add(NULL, MV643XX_ETH_NAME ".1", tclk); orion_clkdev_add(NULL, MV643XX_ETH_NAME ".2", tclk); orion_clkdev_add(NULL, MV643XX_ETH_NAME ".3", tclk); + orion_clkdev_add(NULL, "orion_wdt", tclk); } /* Fill in the resources structure and link it into the platform @@ -575,24 +575,18 @@ void __init orion_spi_1_init(unsigned long mapbase) /***************************************************************************** * Watchdog ****************************************************************************/ -static struct orion_wdt_platform_data orion_wdt_data; - static struct resource orion_wdt_resource = DEFINE_RES_MEM(TIMER_VIRT_BASE, 0x28); static struct platform_device orion_wdt_device = { .name = "orion_wdt", .id = -1, - .dev = { - .platform_data = &orion_wdt_data, - }, - .resource = &orion_wdt_resource, .num_resources = 1, + .resource = &orion_wdt_resource, }; -void __init orion_wdt_init(unsigned long tclk) +void __init orion_wdt_init(void) { - orion_wdt_data.tclk = tclk; platform_device_register(&orion_wdt_device); } diff --git a/arch/arm/plat-orion/include/plat/common.h b/arch/arm/plat-orion/include/plat/common.h index 00d8761..c3bfa91 100644 --- a/arch/arm/plat-orion/include/plat/common.h +++ b/arch/arm/plat-orion/include/plat/common.h @@ -71,7 +71,7 @@ void __init orion_spi_init(unsigned long mapbase); void __init orion_spi_1_init(unsigned long mapbase); -void __init orion_wdt_init(unsigned long tclk); +void __init orion_wdt_init(void); void __init orion_xor0_init(unsigned long mapbase_low, unsigned long mapbase_high, diff --git a/arch/arm/plat-orion/include/plat/orion_wdt.h b/arch/arm/plat-orion/include/plat/orion_wdt.h deleted file mode 100644 index 665c362..0000000 --- a/arch/arm/plat-orion/include/plat/orion_wdt.h +++ /dev/null @@ -1,18 +0,0 @@ -/* - * arch/arm/plat-orion/include/plat/orion_wdt.h - * - * This file is licensed under the terms of the GNU General Public - * License version 2. This program is licensed "as is" without any - * warranty of any kind, whether express or implied. - */ - -#ifndef __PLAT_ORION_WDT_H -#define __PLAT_ORION_WDT_H - -struct orion_wdt_platform_data { - u32 tclk; /* no support yet */ -}; - - -#endif - diff --git a/drivers/watchdog/orion_wdt.c b/drivers/watchdog/orion_wdt.c index 788aa15..0f57369 100644 --- a/drivers/watchdog/orion_wdt.c +++ b/drivers/watchdog/orion_wdt.c @@ -24,8 +24,8 @@ #include #include #include +#include #include -#include /* * Watchdog timer block registers. @@ -41,6 +41,7 @@ static bool nowayout = WATCHDOG_NOWAYOUT; static int heartbeat = -1; /* module parameter (seconds) */ static unsigned int wdt_max_duration; /* (seconds) */ +static struct clk *clk; static unsigned int wdt_tclk; static void __iomem *wdt_reg; static unsigned long wdt_status; @@ -237,16 +238,16 @@ static struct miscdevice orion_wdt_miscdev = { static int __devinit orion_wdt_probe(struct platform_device *pdev) { - struct orion_wdt_platform_data *pdata = pdev->dev.platform_data; struct resource *res; int ret; - if (pdata) { - wdt_tclk = pdata->tclk; - } else { - pr_err("misses platform data\n"); + clk = clk_get(&pdev->dev, NULL); + if (IS_ERR(clk)) { + printk(KERN_ERR "Orion Watchdog missing clock\n"); return -ENODEV; } + clk_prepare_enable(clk); + wdt_tclk = clk_get_rate(clk); res = platform_get_resource(pdev, IORESOURCE_MEM, 0); @@ -282,6 +283,9 @@ static int __devexit orion_wdt_remove(struct platform_device *pdev) if (!ret) orion_wdt_miscdev.parent = NULL; + clk_disable_unprepare(clk); + clk_put(clk); + return ret; } -- cgit v0.10.2 From 74c335761acdfd94736d28ba0b941a2efb9c81f0 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Sat, 24 Dec 2011 03:06:34 +0100 Subject: ARM: Orion: UART: Get the clock rate via clk_get_rate(). Let the machine pass to the platform which clock is used by the uart. Enable the clock and use clk_get_rate() to determine its rate. Signed-off-by: Andrew Lunn Tested-by: Jamie Lentin Signed-off-by: Mike Turquette diff --git a/arch/arm/mach-dove/common.c b/arch/arm/mach-dove/common.c index 0276696..0ab0f81 100644 --- a/arch/arm/mach-dove/common.c +++ b/arch/arm/mach-dove/common.c @@ -128,7 +128,7 @@ void __init dove_sata_init(struct mv_sata_platform_data *sata_data) void __init dove_uart0_init(void) { orion_uart0_init(DOVE_UART0_VIRT_BASE, DOVE_UART0_PHYS_BASE, - IRQ_DOVE_UART_0, get_tclk()); + IRQ_DOVE_UART_0, tclk); } /***************************************************************************** @@ -137,7 +137,7 @@ void __init dove_uart0_init(void) void __init dove_uart1_init(void) { orion_uart1_init(DOVE_UART1_VIRT_BASE, DOVE_UART1_PHYS_BASE, - IRQ_DOVE_UART_1, get_tclk()); + IRQ_DOVE_UART_1, tclk); } /***************************************************************************** @@ -146,7 +146,7 @@ void __init dove_uart1_init(void) void __init dove_uart2_init(void) { orion_uart2_init(DOVE_UART2_VIRT_BASE, DOVE_UART2_PHYS_BASE, - IRQ_DOVE_UART_2, get_tclk()); + IRQ_DOVE_UART_2, tclk); } /***************************************************************************** @@ -155,7 +155,7 @@ void __init dove_uart2_init(void) void __init dove_uart3_init(void) { orion_uart3_init(DOVE_UART3_VIRT_BASE, DOVE_UART3_PHYS_BASE, - IRQ_DOVE_UART_3, get_tclk()); + IRQ_DOVE_UART_3, tclk); } /***************************************************************************** diff --git a/arch/arm/mach-kirkwood/common.c b/arch/arm/mach-kirkwood/common.c index 880f366..46d7b43 100644 --- a/arch/arm/mach-kirkwood/common.c +++ b/arch/arm/mach-kirkwood/common.c @@ -299,7 +299,7 @@ void __init kirkwood_i2c_init(void) void __init kirkwood_uart0_init(void) { orion_uart0_init(UART0_VIRT_BASE, UART0_PHYS_BASE, - IRQ_KIRKWOOD_UART_0, kirkwood_tclk); + IRQ_KIRKWOOD_UART_0, tclk); } @@ -309,7 +309,7 @@ void __init kirkwood_uart0_init(void) void __init kirkwood_uart1_init(void) { orion_uart1_init(UART1_VIRT_BASE, UART1_PHYS_BASE, - IRQ_KIRKWOOD_UART_1, kirkwood_tclk); + IRQ_KIRKWOOD_UART_1, tclk); } /***************************************************************************** diff --git a/arch/arm/mach-mv78xx0/common.c b/arch/arm/mach-mv78xx0/common.c index ad4d037..b4c53b8 100644 --- a/arch/arm/mach-mv78xx0/common.c +++ b/arch/arm/mach-mv78xx0/common.c @@ -299,7 +299,7 @@ void __init mv78xx0_sata_init(struct mv_sata_platform_data *sata_data) void __init mv78xx0_uart0_init(void) { orion_uart0_init(UART0_VIRT_BASE, UART0_PHYS_BASE, - IRQ_MV78XX0_UART_0, get_tclk()); + IRQ_MV78XX0_UART_0, tclk); } @@ -309,7 +309,7 @@ void __init mv78xx0_uart0_init(void) void __init mv78xx0_uart1_init(void) { orion_uart1_init(UART1_VIRT_BASE, UART1_PHYS_BASE, - IRQ_MV78XX0_UART_1, get_tclk()); + IRQ_MV78XX0_UART_1, tclk); } @@ -319,7 +319,7 @@ void __init mv78xx0_uart1_init(void) void __init mv78xx0_uart2_init(void) { orion_uart2_init(UART2_VIRT_BASE, UART2_PHYS_BASE, - IRQ_MV78XX0_UART_2, get_tclk()); + IRQ_MV78XX0_UART_2, tclk); } /***************************************************************************** @@ -328,7 +328,7 @@ void __init mv78xx0_uart2_init(void) void __init mv78xx0_uart3_init(void) { orion_uart3_init(UART3_VIRT_BASE, UART3_PHYS_BASE, - IRQ_MV78XX0_UART_3, get_tclk()); + IRQ_MV78XX0_UART_3, tclk); } /***************************************************************************** diff --git a/arch/arm/mach-orion5x/common.c b/arch/arm/mach-orion5x/common.c index 067bdd7..fd36e02 100644 --- a/arch/arm/mach-orion5x/common.c +++ b/arch/arm/mach-orion5x/common.c @@ -156,7 +156,7 @@ void __init orion5x_spi_init() void __init orion5x_uart0_init(void) { orion_uart0_init(UART0_VIRT_BASE, UART0_PHYS_BASE, - IRQ_ORION5X_UART0, orion5x_tclk); + IRQ_ORION5X_UART0, tclk); } /***************************************************************************** @@ -165,7 +165,7 @@ void __init orion5x_uart0_init(void) void __init orion5x_uart1_init(void) { orion_uart1_init(UART1_VIRT_BASE, UART1_PHYS_BASE, - IRQ_ORION5X_UART1, orion5x_tclk); + IRQ_ORION5X_UART1, tclk); } /***************************************************************************** diff --git a/arch/arm/plat-orion/common.c b/arch/arm/plat-orion/common.c index d349998..61fd837 100644 --- a/arch/arm/plat-orion/common.c +++ b/arch/arm/plat-orion/common.c @@ -75,6 +75,12 @@ static void fill_resources(struct platform_device *device, /***************************************************************************** * UART ****************************************************************************/ +static unsigned long __init uart_get_clk_rate(struct clk *clk) +{ + clk_prepare_enable(clk); + return clk_get_rate(clk); +} + static void __init uart_complete( struct platform_device *orion_uart, struct plat_serial8250_port *data, @@ -82,12 +88,12 @@ static void __init uart_complete( unsigned int membase, resource_size_t mapbase, unsigned int irq, - unsigned int uartclk) + struct clk *clk) { data->mapbase = mapbase; data->membase = (void __iomem *)membase; data->irq = irq; - data->uartclk = uartclk; + data->uartclk = uart_get_clk_rate(clk); orion_uart->dev.platform_data = data; fill_resources(orion_uart, resources, mapbase, 0xff, irq); @@ -116,10 +122,10 @@ static struct platform_device orion_uart0 = { void __init orion_uart0_init(unsigned int membase, resource_size_t mapbase, unsigned int irq, - unsigned int uartclk) + struct clk *clk) { uart_complete(&orion_uart0, orion_uart0_data, orion_uart0_resources, - membase, mapbase, irq, uartclk); + membase, mapbase, irq, clk); } /***************************************************************************** @@ -144,10 +150,10 @@ static struct platform_device orion_uart1 = { void __init orion_uart1_init(unsigned int membase, resource_size_t mapbase, unsigned int irq, - unsigned int uartclk) + struct clk *clk) { uart_complete(&orion_uart1, orion_uart1_data, orion_uart1_resources, - membase, mapbase, irq, uartclk); + membase, mapbase, irq, clk); } /***************************************************************************** @@ -172,10 +178,10 @@ static struct platform_device orion_uart2 = { void __init orion_uart2_init(unsigned int membase, resource_size_t mapbase, unsigned int irq, - unsigned int uartclk) + struct clk *clk) { uart_complete(&orion_uart2, orion_uart2_data, orion_uart2_resources, - membase, mapbase, irq, uartclk); + membase, mapbase, irq, clk); } /***************************************************************************** @@ -200,10 +206,10 @@ static struct platform_device orion_uart3 = { void __init orion_uart3_init(unsigned int membase, resource_size_t mapbase, unsigned int irq, - unsigned int uartclk) + struct clk *clk) { uart_complete(&orion_uart3, orion_uart3_data, orion_uart3_resources, - membase, mapbase, irq, uartclk); + membase, mapbase, irq, clk); } /***************************************************************************** diff --git a/arch/arm/plat-orion/include/plat/common.h b/arch/arm/plat-orion/include/plat/common.h index c3bfa91..e00fdb2 100644 --- a/arch/arm/plat-orion/include/plat/common.h +++ b/arch/arm/plat-orion/include/plat/common.h @@ -16,22 +16,22 @@ struct dsa_platform_data; void __init orion_uart0_init(unsigned int membase, resource_size_t mapbase, unsigned int irq, - unsigned int uartclk); + struct clk *clk); void __init orion_uart1_init(unsigned int membase, resource_size_t mapbase, unsigned int irq, - unsigned int uartclk); + struct clk *clk); void __init orion_uart2_init(unsigned int membase, resource_size_t mapbase, unsigned int irq, - unsigned int uartclk); + struct clk *clk); void __init orion_uart3_init(unsigned int membase, resource_size_t mapbase, unsigned int irq, - unsigned int uartclk); + struct clk *clk); void __init orion_rtc_init(unsigned long mapbase, unsigned long irq); -- cgit v0.10.2 From eee989902aab45f0ca2739727ef615420802649c Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Sat, 18 Feb 2012 22:26:42 +0100 Subject: ARM: Orion: SATA: Add per channel clk/clkdev support. The Orion kirkwood chips have a gatable clock per SATA channel. Add code to get and enable this clk if it exists. Signed-off-by: Andrew Lunn Tested-by: Jamie Lentin Signed-off-by: Mike Turquette diff --git a/arch/arm/mach-kirkwood/common.c b/arch/arm/mach-kirkwood/common.c index 46d7b43..c9fef5b 100644 --- a/arch/arm/mach-kirkwood/common.c +++ b/arch/arm/mach-kirkwood/common.c @@ -86,7 +86,7 @@ static struct clk __init *kirkwood_register_gate(const char *name, u8 bit_idx) void __init kirkwood_clk_init(void) { - struct clk *runit, *ge0, *ge1; + struct clk *runit, *ge0, *ge1, *sata0, *sata1, *usb0; tclk = clk_register_fixed_rate(NULL, "tclk", NULL, CLK_IS_ROOT, kirkwood_tclk); @@ -94,8 +94,8 @@ void __init kirkwood_clk_init(void) runit = kirkwood_register_gate("runit", CGC_BIT_RUNIT); ge0 = kirkwood_register_gate("ge0", CGC_BIT_GE0); ge1 = kirkwood_register_gate("ge1", CGC_BIT_GE1); - kirkwood_register_gate("sata0", CGC_BIT_SATA0); - kirkwood_register_gate("sata1", CGC_BIT_SATA1); + sata0 = kirkwood_register_gate("sata0", CGC_BIT_SATA0); + sata1 = kirkwood_register_gate("sata1", CGC_BIT_SATA1); kirkwood_register_gate("usb0", CGC_BIT_USB0); kirkwood_register_gate("sdio", CGC_BIT_SDIO); kirkwood_register_gate("crypto", CGC_BIT_CRYPTO); @@ -113,6 +113,8 @@ void __init kirkwood_clk_init(void) orion_clkdev_add(NULL, MV643XX_ETH_NAME ".0", ge0); orion_clkdev_add(NULL, MV643XX_ETH_NAME ".1", ge1); orion_clkdev_add(NULL, "orion_wdt", tclk); + orion_clkdev_add("0", "sata_mv.0", sata0); + orion_clkdev_add("1", "sata_mv.0", sata1); } /***************************************************************************** diff --git a/drivers/ata/sata_mv.c b/drivers/ata/sata_mv.c index 7336d4a..24712ad 100644 --- a/drivers/ata/sata_mv.c +++ b/drivers/ata/sata_mv.c @@ -553,6 +553,7 @@ struct mv_host_priv { #if defined(CONFIG_HAVE_CLK) struct clk *clk; + struct clk **port_clks; #endif /* * These consistent DMA memory pools give us guaranteed @@ -4027,6 +4028,9 @@ static int mv_platform_probe(struct platform_device *pdev) struct resource *res; int n_ports = 0; int rc; +#if defined(CONFIG_HAVE_CLK) + int port; +#endif ata_print_version_once(&pdev->dev, DRV_VERSION); @@ -4054,6 +4058,13 @@ static int mv_platform_probe(struct platform_device *pdev) if (!host || !hpriv) return -ENOMEM; +#if defined(CONFIG_HAVE_CLK) + hpriv->port_clks = devm_kzalloc(&pdev->dev, + sizeof(struct clk *) * n_ports, + GFP_KERNEL); + if (!hpriv->port_clks) + return -ENOMEM; +#endif host->private_data = hpriv; hpriv->n_ports = n_ports; hpriv->board_idx = chip_soc; @@ -4066,9 +4077,17 @@ static int mv_platform_probe(struct platform_device *pdev) #if defined(CONFIG_HAVE_CLK) hpriv->clk = clk_get(&pdev->dev, NULL); if (IS_ERR(hpriv->clk)) - dev_notice(&pdev->dev, "cannot get clkdev\n"); + dev_notice(&pdev->dev, "cannot get optional clkdev\n"); else - clk_enable(hpriv->clk); + clk_prepare_enable(hpriv->clk); + + for (port = 0; port < n_ports; port++) { + char port_number[16]; + sprintf(port_number, "%d", port); + hpriv->port_clks[port] = clk_get(&pdev->dev, port_number); + if (!IS_ERR(hpriv->port_clks[port])) + clk_prepare_enable(hpriv->port_clks[port]); + } #endif /* @@ -4098,9 +4117,15 @@ static int mv_platform_probe(struct platform_device *pdev) err: #if defined(CONFIG_HAVE_CLK) if (!IS_ERR(hpriv->clk)) { - clk_disable(hpriv->clk); + clk_disable_unprepare(hpriv->clk); clk_put(hpriv->clk); } + for (port = 0; port < n_ports; port++) { + if (!IS_ERR(hpriv->port_clks[port])) { + clk_disable_unprepare(hpriv->port_clks[port]); + clk_put(hpriv->port_clks[port]); + } + } #endif return rc; @@ -4119,14 +4144,21 @@ static int __devexit mv_platform_remove(struct platform_device *pdev) struct ata_host *host = platform_get_drvdata(pdev); #if defined(CONFIG_HAVE_CLK) struct mv_host_priv *hpriv = host->private_data; + int port; #endif ata_host_detach(host); #if defined(CONFIG_HAVE_CLK) if (!IS_ERR(hpriv->clk)) { - clk_disable(hpriv->clk); + clk_disable_unprepare(hpriv->clk); clk_put(hpriv->clk); } + for (port = 0; port < host->n_ports; port++) { + if (!IS_ERR(hpriv->port_clks[port])) { + clk_disable_unprepare(hpriv->port_clks[port]); + clk_put(hpriv->port_clks[port]); + } + } #endif return 0; } -- cgit v0.10.2 From 8c869edaee07c623066266827371235fb9c12e01 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Sun, 15 Apr 2012 12:53:47 +0200 Subject: ARM: Orion: EHCI: Add support for enabling clocks Not all platforms support gating the clock, so it is not an error if the clock does not exist. However, if it does exist, we should enable/disable it as appropriate. Signed-off-by: Andrew Lunn Tested-by: Jamie Lentin Signed-off-by: Mike Turquette diff --git a/arch/arm/mach-kirkwood/common.c b/arch/arm/mach-kirkwood/common.c index c9fef5b..b0f20c0 100644 --- a/arch/arm/mach-kirkwood/common.c +++ b/arch/arm/mach-kirkwood/common.c @@ -96,7 +96,7 @@ void __init kirkwood_clk_init(void) ge1 = kirkwood_register_gate("ge1", CGC_BIT_GE1); sata0 = kirkwood_register_gate("sata0", CGC_BIT_SATA0); sata1 = kirkwood_register_gate("sata1", CGC_BIT_SATA1); - kirkwood_register_gate("usb0", CGC_BIT_USB0); + usb0 = kirkwood_register_gate("usb0", CGC_BIT_USB0); kirkwood_register_gate("sdio", CGC_BIT_SDIO); kirkwood_register_gate("crypto", CGC_BIT_CRYPTO); kirkwood_register_gate("xor0", CGC_BIT_XOR0); @@ -115,6 +115,7 @@ void __init kirkwood_clk_init(void) orion_clkdev_add(NULL, "orion_wdt", tclk); orion_clkdev_add("0", "sata_mv.0", sata0); orion_clkdev_add("1", "sata_mv.0", sata1); + orion_clkdev_add(NULL, "orion-ehci.0", usb0); } /***************************************************************************** diff --git a/drivers/usb/host/ehci-orion.c b/drivers/usb/host/ehci-orion.c index 6c6a5a3..82de107 100644 --- a/drivers/usb/host/ehci-orion.c +++ b/drivers/usb/host/ehci-orion.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #define rdl(off) __raw_readl(hcd->regs + (off)) @@ -198,6 +199,7 @@ static int __devinit ehci_orion_drv_probe(struct platform_device *pdev) struct resource *res; struct usb_hcd *hcd; struct ehci_hcd *ehci; + struct clk *clk; void __iomem *regs; int irq, err; @@ -238,6 +240,14 @@ static int __devinit ehci_orion_drv_probe(struct platform_device *pdev) goto err2; } + /* Not all platforms can gate the clock, so it is not + an error if the clock does not exists. */ + clk = clk_get(&pdev->dev, NULL); + if (!IS_ERR(clk)) { + clk_prepare_enable(clk); + clk_put(clk); + } + hcd = usb_create_hcd(&ehci_orion_hc_driver, &pdev->dev, dev_name(&pdev->dev)); if (!hcd) { @@ -301,12 +311,18 @@ err1: static int __exit ehci_orion_drv_remove(struct platform_device *pdev) { struct usb_hcd *hcd = platform_get_drvdata(pdev); + struct clk *clk; usb_remove_hcd(hcd); iounmap(hcd->regs); release_mem_region(hcd->rsrc_start, hcd->rsrc_len); usb_put_hcd(hcd); + clk = clk_get(&pdev->dev, NULL); + if (!IS_ERR(clk)) { + clk_disable_unprepare(clk); + clk_put(clk); + } return 0; } -- cgit v0.10.2 From 9c2bd504b55ce3e680ae0d3768e78c15fef3448d Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Sun, 19 Feb 2012 11:01:22 +0100 Subject: ARM: Orion: NAND: Add support for clk, if there is one. Not all orion platforms can gate the clock, but if it does exist, enable/disable it as appropriate. v2: Fix the name of the clkdev entry. Signed-off-by: Andrew Lunn Tested-by: Jamie Lentin Signed-off-by: Mike Turquette diff --git a/arch/arm/mach-kirkwood/common.c b/arch/arm/mach-kirkwood/common.c index b0f20c0..99adebc 100644 --- a/arch/arm/mach-kirkwood/common.c +++ b/arch/arm/mach-kirkwood/common.c @@ -116,6 +116,7 @@ void __init kirkwood_clk_init(void) orion_clkdev_add("0", "sata_mv.0", sata0); orion_clkdev_add("1", "sata_mv.0", sata1); orion_clkdev_add(NULL, "orion-ehci.0", usb0); + orion_clkdev_add(NULL, "orion_nand", runit); } /***************************************************************************** diff --git a/drivers/mtd/nand/orion_nand.c b/drivers/mtd/nand/orion_nand.c index 1d3bfb2..fdc4786 100644 --- a/drivers/mtd/nand/orion_nand.c +++ b/drivers/mtd/nand/orion_nand.c @@ -16,6 +16,8 @@ #include #include #include +#include +#include #include #include #include @@ -77,6 +79,7 @@ static int __init orion_nand_probe(struct platform_device *pdev) struct nand_chip *nc; struct orion_nand_data *board; struct resource *res; + struct clk *clk; void __iomem *io_base; int ret = 0; @@ -123,6 +126,14 @@ static int __init orion_nand_probe(struct platform_device *pdev) platform_set_drvdata(pdev, mtd); + /* Not all platforms can gate the clock, so it is not + an error if the clock does not exists. */ + clk = clk_get(&pdev->dev, NULL); + if (!IS_ERR(clk)) { + clk_prepare_enable(clk); + clk_put(clk); + } + if (nand_scan(mtd, 1)) { ret = -ENXIO; goto no_dev; @@ -151,6 +162,7 @@ static int __devexit orion_nand_remove(struct platform_device *pdev) { struct mtd_info *mtd = platform_get_drvdata(pdev); struct nand_chip *nc = mtd->priv; + struct clk *clk; nand_release(mtd); @@ -158,6 +170,12 @@ static int __devexit orion_nand_remove(struct platform_device *pdev) kfree(nc); + clk = clk_get(&pdev->dev, NULL); + if (!IS_ERR(clk)) { + clk_disable_unprepare(clk); + clk_put(clk); + } + return 0; } -- cgit v0.10.2 From f4f7561e032777cd7376800ac97352d5b1684d8f Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Sun, 19 Feb 2012 11:39:27 +0100 Subject: ARM: Orion: SDIO: Add support for clk. Some orion devices can gate the SDIO clock. If the clock exists, enable/disable it as appropriate. Signed-off-by: Andrew Lunn Tested-by: Jamie Lentin Signed-off-by: Mike Turquette diff --git a/arch/arm/mach-kirkwood/common.c b/arch/arm/mach-kirkwood/common.c index 99adebc..88a1667 100644 --- a/arch/arm/mach-kirkwood/common.c +++ b/arch/arm/mach-kirkwood/common.c @@ -86,7 +86,7 @@ static struct clk __init *kirkwood_register_gate(const char *name, u8 bit_idx) void __init kirkwood_clk_init(void) { - struct clk *runit, *ge0, *ge1, *sata0, *sata1, *usb0; + struct clk *runit, *ge0, *ge1, *sata0, *sata1, *usb0, *sdio; tclk = clk_register_fixed_rate(NULL, "tclk", NULL, CLK_IS_ROOT, kirkwood_tclk); @@ -97,7 +97,7 @@ void __init kirkwood_clk_init(void) sata0 = kirkwood_register_gate("sata0", CGC_BIT_SATA0); sata1 = kirkwood_register_gate("sata1", CGC_BIT_SATA1); usb0 = kirkwood_register_gate("usb0", CGC_BIT_USB0); - kirkwood_register_gate("sdio", CGC_BIT_SDIO); + sdio = kirkwood_register_gate("sdio", CGC_BIT_SDIO); kirkwood_register_gate("crypto", CGC_BIT_CRYPTO); kirkwood_register_gate("xor0", CGC_BIT_XOR0); kirkwood_register_gate("xor1", CGC_BIT_XOR1); @@ -117,6 +117,7 @@ void __init kirkwood_clk_init(void) orion_clkdev_add("1", "sata_mv.0", sata1); orion_clkdev_add(NULL, "orion-ehci.0", usb0); orion_clkdev_add(NULL, "orion_nand", runit); + orion_clkdev_add(NULL, "mvsdio", sdio); } /***************************************************************************** diff --git a/drivers/mmc/host/mvsdio.c b/drivers/mmc/host/mvsdio.c index eeb8cd1..3b9136c 100644 --- a/drivers/mmc/host/mvsdio.c +++ b/drivers/mmc/host/mvsdio.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -51,6 +52,7 @@ struct mvsd_host { struct device *dev; struct resource *res; int irq; + struct clk *clk; int gpio_card_detect; int gpio_write_protect; }; @@ -770,6 +772,13 @@ static int __init mvsd_probe(struct platform_device *pdev) } else host->irq = irq; + /* Not all platforms can gate the clock, so it is not + an error if the clock does not exists. */ + host->clk = clk_get(&pdev->dev, NULL); + if (!IS_ERR(host->clk)) { + clk_prepare_enable(host->clk); + } + if (mvsd_data->gpio_card_detect) { ret = gpio_request(mvsd_data->gpio_card_detect, DRIVER_NAME " cd"); @@ -854,6 +863,11 @@ static int __exit mvsd_remove(struct platform_device *pdev) mvsd_power_down(host); iounmap(host->base); release_resource(host->res); + + if (!IS_ERR(host->clk)) { + clk_disable_unprepare(host->clk); + clk_put(host->clk); + } mmc_free_host(mmc); } platform_set_drvdata(pdev, NULL); -- cgit v0.10.2 From 1f80b126d06cf5c88b7f03a80c79ffd85053688a Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Sun, 19 Feb 2012 11:56:19 +0100 Subject: ARM: Orion: CESA: Add support for clk Some orion platforms support gating of the clock. If the clock exists enable/disbale it as appropriate. Signed-off-by: Andrew Lunn Tested-by: Jamie Lentin Signed-off-by: Mike Turquette diff --git a/arch/arm/mach-kirkwood/common.c b/arch/arm/mach-kirkwood/common.c index 88a1667..b9b341f 100644 --- a/arch/arm/mach-kirkwood/common.c +++ b/arch/arm/mach-kirkwood/common.c @@ -87,6 +87,7 @@ static struct clk __init *kirkwood_register_gate(const char *name, u8 bit_idx) void __init kirkwood_clk_init(void) { struct clk *runit, *ge0, *ge1, *sata0, *sata1, *usb0, *sdio; + struct clk *crypto; tclk = clk_register_fixed_rate(NULL, "tclk", NULL, CLK_IS_ROOT, kirkwood_tclk); @@ -98,7 +99,7 @@ void __init kirkwood_clk_init(void) sata1 = kirkwood_register_gate("sata1", CGC_BIT_SATA1); usb0 = kirkwood_register_gate("usb0", CGC_BIT_USB0); sdio = kirkwood_register_gate("sdio", CGC_BIT_SDIO); - kirkwood_register_gate("crypto", CGC_BIT_CRYPTO); + crypto = kirkwood_register_gate("crypto", CGC_BIT_CRYPTO); kirkwood_register_gate("xor0", CGC_BIT_XOR0); kirkwood_register_gate("xor1", CGC_BIT_XOR1); kirkwood_register_gate("pex0", CGC_BIT_PEX0); @@ -118,6 +119,7 @@ void __init kirkwood_clk_init(void) orion_clkdev_add(NULL, "orion-ehci.0", usb0); orion_clkdev_add(NULL, "orion_nand", runit); orion_clkdev_add(NULL, "mvsdio", sdio); + orion_clkdev_add(NULL, "mv_crypto", crypto); } /***************************************************************************** diff --git a/drivers/crypto/mv_cesa.c b/drivers/crypto/mv_cesa.c index e6ecc5f..1cc6b3f 100644 --- a/drivers/crypto/mv_cesa.c +++ b/drivers/crypto/mv_cesa.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -79,6 +80,7 @@ struct crypto_priv { void __iomem *reg; void __iomem *sram; int irq; + struct clk *clk; struct task_struct *queue_th; /* the lock protects queue and eng_st */ @@ -1053,6 +1055,12 @@ static int mv_probe(struct platform_device *pdev) if (ret) goto err_thread; + /* Not all platforms can gate the clock, so it is not + an error if the clock does not exists. */ + cp->clk = clk_get(&pdev->dev, NULL); + if (!IS_ERR(cp->clk)) + clk_prepare_enable(cp->clk); + writel(SEC_INT_ACCEL0_DONE, cpg->reg + SEC_ACCEL_INT_MASK); writel(SEC_CFG_STOP_DIG_ERR, cpg->reg + SEC_ACCEL_CFG); writel(SRAM_CONFIG, cpg->reg + SEC_ACCEL_DESC_P0); @@ -1118,6 +1126,12 @@ static int mv_remove(struct platform_device *pdev) memset(cp->sram, 0, cp->sram_size); iounmap(cp->sram); iounmap(cp->reg); + + if (!IS_ERR(cp->clk)) { + clk_disable_unprepare(cp->clk); + clk_put(cp->clk); + } + kfree(cp); cpg = NULL; return 0; -- cgit v0.10.2 From c510182b1c68e2f2bf61e69f6c65bcf61a188809 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Sun, 19 Feb 2012 13:30:26 +0100 Subject: ARM: Orion: XOR: Add support for clk Some orion platforms can gate the XOR driver clock. If the clock exisits, unable/disable it as appropriate. Signed-off-by: Andrew Lunn Tested-by: Jamie Lentin Signed-off-by: Mike Turquette diff --git a/arch/arm/mach-kirkwood/common.c b/arch/arm/mach-kirkwood/common.c index b9b341f..ab27d06 100644 --- a/arch/arm/mach-kirkwood/common.c +++ b/arch/arm/mach-kirkwood/common.c @@ -87,7 +87,7 @@ static struct clk __init *kirkwood_register_gate(const char *name, u8 bit_idx) void __init kirkwood_clk_init(void) { struct clk *runit, *ge0, *ge1, *sata0, *sata1, *usb0, *sdio; - struct clk *crypto; + struct clk *crypto, *xor0, *xor1; tclk = clk_register_fixed_rate(NULL, "tclk", NULL, CLK_IS_ROOT, kirkwood_tclk); @@ -100,8 +100,8 @@ void __init kirkwood_clk_init(void) usb0 = kirkwood_register_gate("usb0", CGC_BIT_USB0); sdio = kirkwood_register_gate("sdio", CGC_BIT_SDIO); crypto = kirkwood_register_gate("crypto", CGC_BIT_CRYPTO); - kirkwood_register_gate("xor0", CGC_BIT_XOR0); - kirkwood_register_gate("xor1", CGC_BIT_XOR1); + xor0 = kirkwood_register_gate("xor0", CGC_BIT_XOR0); + xor1 = kirkwood_register_gate("xor1", CGC_BIT_XOR1); kirkwood_register_gate("pex0", CGC_BIT_PEX0); kirkwood_register_gate("pex1", CGC_BIT_PEX1); kirkwood_register_gate("audio", CGC_BIT_AUDIO); @@ -120,6 +120,8 @@ void __init kirkwood_clk_init(void) orion_clkdev_add(NULL, "orion_nand", runit); orion_clkdev_add(NULL, "mvsdio", sdio); orion_clkdev_add(NULL, "mv_crypto", crypto); + orion_clkdev_add(NULL, MV_XOR_SHARED_NAME ".0", xor0); + orion_clkdev_add(NULL, MV_XOR_SHARED_NAME ".1", xor1); } /***************************************************************************** @@ -336,7 +338,6 @@ void __init kirkwood_crypto_init(void) void __init kirkwood_xor0_init(void) { kirkwood_clk_ctrl |= CGC_XOR0; - orion_xor0_init(XOR0_PHYS_BASE, XOR0_HIGH_PHYS_BASE, IRQ_KIRKWOOD_XOR_00, IRQ_KIRKWOOD_XOR_01); } @@ -348,7 +349,6 @@ void __init kirkwood_xor0_init(void) void __init kirkwood_xor1_init(void) { kirkwood_clk_ctrl |= CGC_XOR1; - orion_xor1_init(XOR1_PHYS_BASE, XOR1_HIGH_PHYS_BASE, IRQ_KIRKWOOD_XOR_10, IRQ_KIRKWOOD_XOR_11); } diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c index fa5d55f..0b12e68 100644 --- a/drivers/dma/mv_xor.c +++ b/drivers/dma/mv_xor.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include "dmaengine.h" @@ -1307,11 +1308,25 @@ static int mv_xor_shared_probe(struct platform_device *pdev) if (dram) mv_xor_conf_mbus_windows(msp, dram); + /* Not all platforms can gate the clock, so it is not + * an error if the clock does not exists. + */ + msp->clk = clk_get(&pdev->dev, NULL); + if (!IS_ERR(msp->clk)) + clk_prepare_enable(msp->clk); + return 0; } static int mv_xor_shared_remove(struct platform_device *pdev) { + struct mv_xor_shared_private *msp = platform_get_drvdata(pdev); + + if (!IS_ERR(msp->clk)) { + clk_disable_unprepare(msp->clk); + clk_put(msp->clk); + } + return 0; } diff --git a/drivers/dma/mv_xor.h b/drivers/dma/mv_xor.h index 654876b..a5b422f 100644 --- a/drivers/dma/mv_xor.h +++ b/drivers/dma/mv_xor.h @@ -55,6 +55,7 @@ struct mv_xor_shared_private { void __iomem *xor_base; void __iomem *xor_high_base; + struct clk *clk; }; -- cgit v0.10.2 From 27e53cfbce493bb062212263aa24bbbc5a1077f4 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Thu, 8 Mar 2012 21:45:59 +0100 Subject: ARM: Orion: PCIE: Add support for clk Prepare and enable the clocks when the board indicates the pcie buses will be used. Signed-off-by: Andrew Lunn Tested-by: Jamie Lentin Signed-off-by: Mike Turquette diff --git a/arch/arm/mach-kirkwood/common.c b/arch/arm/mach-kirkwood/common.c index ab27d06..aa36e1b 100644 --- a/arch/arm/mach-kirkwood/common.c +++ b/arch/arm/mach-kirkwood/common.c @@ -87,7 +87,7 @@ static struct clk __init *kirkwood_register_gate(const char *name, u8 bit_idx) void __init kirkwood_clk_init(void) { struct clk *runit, *ge0, *ge1, *sata0, *sata1, *usb0, *sdio; - struct clk *crypto, *xor0, *xor1; + struct clk *crypto, *xor0, *xor1, *pex0, *pex1; tclk = clk_register_fixed_rate(NULL, "tclk", NULL, CLK_IS_ROOT, kirkwood_tclk); @@ -102,8 +102,8 @@ void __init kirkwood_clk_init(void) crypto = kirkwood_register_gate("crypto", CGC_BIT_CRYPTO); xor0 = kirkwood_register_gate("xor0", CGC_BIT_XOR0); xor1 = kirkwood_register_gate("xor1", CGC_BIT_XOR1); - kirkwood_register_gate("pex0", CGC_BIT_PEX0); - kirkwood_register_gate("pex1", CGC_BIT_PEX1); + pex0 = kirkwood_register_gate("pex0", CGC_BIT_PEX0); + pex1 = kirkwood_register_gate("pex1", CGC_BIT_PEX1); kirkwood_register_gate("audio", CGC_BIT_AUDIO); kirkwood_register_gate("tdm", CGC_BIT_TDM); kirkwood_register_gate("tsu", CGC_BIT_TSU); @@ -122,6 +122,8 @@ void __init kirkwood_clk_init(void) orion_clkdev_add(NULL, "mv_crypto", crypto); orion_clkdev_add(NULL, MV_XOR_SHARED_NAME ".0", xor0); orion_clkdev_add(NULL, MV_XOR_SHARED_NAME ".1", xor1); + orion_clkdev_add("0", "pcie", pex0); + orion_clkdev_add("1", "pcie", pex1); } /***************************************************************************** diff --git a/arch/arm/mach-kirkwood/pcie.c b/arch/arm/mach-kirkwood/pcie.c index f56a011..881933a 100644 --- a/arch/arm/mach-kirkwood/pcie.c +++ b/arch/arm/mach-kirkwood/pcie.c @@ -11,6 +11,7 @@ #include #include #include +#include #include