From c6e565f2bef551326b65d801206d688708f1eeb5 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 25 Jul 2012 21:19:47 +0000 Subject: powerpc: Remove remaining iSeries chunks In commit f5339277 "powerpc: Remove FW_FEATURE ISERIES from arch code", we removed the bulk of the iSeries code, but missed a few bits. Remove the mschunks bits, these were only ever used on iSeries as far as I know, and are definitely not used anymore. Make it even clearer that phys_to_abs() is a nop, by making it a macro. We still have a few users of this, but should clean those up. Signed-off-by: Michael Ellerman Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/include/asm/abs_addr.h b/arch/powerpc/include/asm/abs_addr.h index 9d92ba0..419aab7 100644 --- a/arch/powerpc/include/asm/abs_addr.h +++ b/arch/powerpc/include/asm/abs_addr.h @@ -18,38 +18,10 @@ #include #include -struct mschunks_map { - unsigned long num_chunks; - unsigned long chunk_size; - unsigned long chunk_shift; - unsigned long chunk_mask; - u32 *mapping; -}; - -extern struct mschunks_map mschunks_map; - -/* Chunks are 256 KB */ -#define MSCHUNKS_CHUNK_SHIFT (18) -#define MSCHUNKS_CHUNK_SIZE (1UL << MSCHUNKS_CHUNK_SHIFT) -#define MSCHUNKS_OFFSET_MASK (MSCHUNKS_CHUNK_SIZE - 1) - -static inline unsigned long chunk_to_addr(unsigned long chunk) -{ - return chunk << MSCHUNKS_CHUNK_SHIFT; -} - -static inline unsigned long addr_to_chunk(unsigned long addr) -{ - return addr >> MSCHUNKS_CHUNK_SHIFT; -} - -static inline unsigned long phys_to_abs(unsigned long pa) -{ - return pa; -} +#define phys_to_abs(pa) (pa) /* Convenience macros */ -#define virt_to_abs(va) phys_to_abs(__pa(va)) +#define virt_to_abs(va) __pa(va) #define abs_to_virt(aa) __va(aa) #endif /* __KERNEL__ */ -- cgit v0.10.2 From 7187dafc1e7fde551001ba91ea94cf6faa73256a Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 25 Jul 2012 21:19:48 +0000 Subject: powerpc/crypto: Remove users of virt_to_abs() and phys_to_abs() in nx crypto driver phys_to_abs() is a nop, don't use it. virt_to_abs() is just a wrapper around __pa(), call __pa() directly. Signed-off-by: Michael Ellerman Signed-off-by: Benjamin Herrenschmidt diff --git a/drivers/crypto/nx/nx.c b/drivers/crypto/nx/nx.c index d7f179c..638110e 100644 --- a/drivers/crypto/nx/nx.c +++ b/drivers/crypto/nx/nx.c @@ -34,7 +34,6 @@ #include #include #include -#include #include #include @@ -104,10 +103,10 @@ struct nx_sg *nx_build_sg_list(struct nx_sg *sg_head, /* determine the start and end for this address range - slightly * different if this is in VMALLOC_REGION */ if (is_vmalloc_addr(start_addr)) - sg_addr = phys_to_abs(page_to_phys(vmalloc_to_page(start_addr))) + sg_addr = page_to_phys(vmalloc_to_page(start_addr)) + offset_in_page(sg_addr); else - sg_addr = virt_to_abs(sg_addr); + sg_addr = __pa(sg_addr); end_addr = sg_addr + len; @@ -265,17 +264,17 @@ void nx_ctx_init(struct nx_crypto_ctx *nx_ctx, unsigned int function) nx_ctx->csbcpb->csb.valid |= NX_CSB_VALID_BIT; nx_ctx->op.flags = function; - nx_ctx->op.csbcpb = virt_to_abs(nx_ctx->csbcpb); - nx_ctx->op.in = virt_to_abs(nx_ctx->in_sg); - nx_ctx->op.out = virt_to_abs(nx_ctx->out_sg); + nx_ctx->op.csbcpb = __pa(nx_ctx->csbcpb); + nx_ctx->op.in = __pa(nx_ctx->in_sg); + nx_ctx->op.out = __pa(nx_ctx->out_sg); if (nx_ctx->csbcpb_aead) { nx_ctx->csbcpb_aead->csb.valid |= NX_CSB_VALID_BIT; nx_ctx->op_aead.flags = function; - nx_ctx->op_aead.csbcpb = virt_to_abs(nx_ctx->csbcpb_aead); - nx_ctx->op_aead.in = virt_to_abs(nx_ctx->in_sg); - nx_ctx->op_aead.out = virt_to_abs(nx_ctx->out_sg); + nx_ctx->op_aead.csbcpb = __pa(nx_ctx->csbcpb_aead); + nx_ctx->op_aead.in = __pa(nx_ctx->in_sg); + nx_ctx->op_aead.out = __pa(nx_ctx->out_sg); } } -- cgit v0.10.2 From aa97c32c1e6d6564cc385c36bfd28a2663a9bad6 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 25 Jul 2012 21:19:49 +0000 Subject: IB/ehca: Don't use phys_to_abs(), it's a nop Since commit f5339277 phys_to_abs() is 100% a nop, so just remove it. Signed-off-by: Michael Ellerman Signed-off-by: Benjamin Herrenschmidt diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.c b/drivers/infiniband/hw/ehca/ehca_mrmw.c index b781b2c..47baa54 100644 --- a/drivers/infiniband/hw/ehca/ehca_mrmw.c +++ b/drivers/infiniband/hw/ehca/ehca_mrmw.c @@ -1870,9 +1870,8 @@ static int ehca_set_pagebuf_user1(struct ehca_mr_pginfo *pginfo, for (i = pginfo->u.usr.next_nmap; i < chunk->nmap; ) { pgaddr = page_to_pfn(sg_page(&chunk->page_list[i])) << PAGE_SHIFT ; - *kpage = phys_to_abs(pgaddr + - (pginfo->next_hwpage * - pginfo->hwpage_size)); + *kpage = pgaddr + (pginfo->next_hwpage * + pginfo->hwpage_size); if ( !(*kpage) ) { ehca_gen_err("pgaddr=%llx " "chunk->page_list[i]=%llx " @@ -1927,7 +1926,7 @@ static int ehca_check_kpages_per_ate(struct scatterlist *page_list, u64 pgaddr = page_to_pfn(sg_page(&page_list[t])) << PAGE_SHIFT; if (ehca_debug_level >= 3) ehca_gen_dbg("chunk_page=%llx value=%016llx", pgaddr, - *(u64 *)abs_to_virt(phys_to_abs(pgaddr))); + *(u64 *)abs_to_virt(pgaddr)); if (pgaddr - PAGE_SIZE != *prev_pgaddr) { ehca_gen_err("uncontiguous page found pgaddr=%llx " "prev_pgaddr=%llx page_list_i=%x", @@ -1962,7 +1961,7 @@ static int ehca_set_pagebuf_user2(struct ehca_mr_pginfo *pginfo, if (nr_kpages == kpages_per_hwpage) { pgaddr = ( page_to_pfn(sg_page(&chunk->page_list[i])) << PAGE_SHIFT ); - *kpage = phys_to_abs(pgaddr); + *kpage = pgaddr; if ( !(*kpage) ) { ehca_gen_err("pgaddr=%llx i=%x", pgaddr, i); @@ -1990,13 +1989,11 @@ static int ehca_set_pagebuf_user2(struct ehca_mr_pginfo *pginfo, (pginfo->hwpage_size - 1)) >> PAGE_SHIFT; nr_kpages -= pginfo->kpage_cnt; - *kpage = phys_to_abs( - pgaddr & - ~(pginfo->hwpage_size - 1)); + *kpage = pgaddr & + ~(pginfo->hwpage_size - 1); } if (ehca_debug_level >= 3) { - u64 val = *(u64 *)abs_to_virt( - phys_to_abs(pgaddr)); + u64 val = *(u64 *)abs_to_virt(pgaddr); ehca_gen_dbg("kpage=%llx chunk_page=%llx " "value=%016llx", *kpage, pgaddr, val); @@ -2084,9 +2081,8 @@ static int ehca_set_pagebuf_phys(struct ehca_mr_pginfo *pginfo, pginfo->num_hwpages, i); return -EFAULT; } - *kpage = phys_to_abs( - (pbuf->addr & ~(pginfo->hwpage_size - 1)) + - (pginfo->next_hwpage * pginfo->hwpage_size)); + *kpage = (pbuf->addr & ~(pginfo->hwpage_size - 1)) + + (pginfo->next_hwpage * pginfo->hwpage_size); if ( !(*kpage) && pbuf->addr ) { ehca_gen_err("pbuf->addr=%llx pbuf->size=%llx " "next_hwpage=%llx", pbuf->addr, @@ -2124,8 +2120,8 @@ static int ehca_set_pagebuf_fmr(struct ehca_mr_pginfo *pginfo, /* loop over desired page_list entries */ fmrlist = pginfo->u.fmr.page_list + pginfo->u.fmr.next_listelem; for (i = 0; i < number; i++) { - *kpage = phys_to_abs((*fmrlist & ~(pginfo->hwpage_size - 1)) + - pginfo->next_hwpage * pginfo->hwpage_size); + *kpage = (*fmrlist & ~(pginfo->hwpage_size - 1)) + + pginfo->next_hwpage * pginfo->hwpage_size; if ( !(*kpage) ) { ehca_gen_err("*fmrlist=%llx fmrlist=%p " "next_listelem=%llx next_hwpage=%llx", @@ -2152,8 +2148,7 @@ static int ehca_set_pagebuf_fmr(struct ehca_mr_pginfo *pginfo, u64 prev = *kpage; /* check if adrs are contiguous */ for (j = 1; j < cnt_per_hwpage; j++) { - u64 p = phys_to_abs(fmrlist[j] & - ~(pginfo->hwpage_size - 1)); + u64 p = fmrlist[j] & ~(pginfo->hwpage_size - 1); if (prev + pginfo->u.fmr.fmr_pgsize != p) { ehca_gen_err("uncontiguous fmr pages " "found prev=%llx p=%llx " @@ -2388,8 +2383,8 @@ static int ehca_update_busmap(unsigned long pfn, unsigned long nr_pages) memset(ehca_bmap, 0xFF, EHCA_TOP_MAP_SIZE); } - start_section = phys_to_abs(pfn * PAGE_SIZE) / EHCA_SECTSIZE; - end_section = phys_to_abs((pfn + nr_pages) * PAGE_SIZE) / EHCA_SECTSIZE; + start_section = (pfn * PAGE_SIZE) / EHCA_SECTSIZE; + end_section = ((pfn + nr_pages) * PAGE_SIZE) / EHCA_SECTSIZE; for (i = start_section; i < end_section; i++) { int ret; top = ehca_calc_index(i, EHCA_TOP_INDEX_SHIFT); -- cgit v0.10.2 From 70267a7f47a5565519da5ca2dde35d3ebf7d6ffb Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 25 Jul 2012 21:19:50 +0000 Subject: powerpc/mm: Replace abs_to_virt() with __va() abs_to_virt() is just a wrapper around __va(), call __va() directly. Signed-off-by: Michael Ellerman Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 377e5cb..ba45739b 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -43,7 +43,6 @@ #include #include #include -#include #include #include #include @@ -651,7 +650,7 @@ static void __init htab_initialize(void) DBG("Hash table allocated at %lx, size: %lx\n", table, htab_size_bytes); - htab_address = abs_to_virt(table); + htab_address = __va(table); /* htab absolute addr + encoded htabsize */ _SDR1 = table + __ilog2(pteg_count) - 11; -- cgit v0.10.2 From 7db90c0222efeae676baa2c72758fcc00666e84f Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 25 Jul 2012 21:19:51 +0000 Subject: powerpc/pasemi: Remove uses of virt_to_abs() and abs_to_virt() These days they are just wrappers around __pa() and __va() respectively. Signed-off-by: Michael Ellerman Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/platforms/pasemi/iommu.c b/arch/powerpc/platforms/pasemi/iommu.c index 14943ef..f03fbc2 100644 --- a/arch/powerpc/platforms/pasemi/iommu.c +++ b/arch/powerpc/platforms/pasemi/iommu.c @@ -24,7 +24,6 @@ #include #include #include -#include #include #define IOBMAP_PAGE_SHIFT 12 @@ -99,7 +98,7 @@ static int iobmap_build(struct iommu_table *tbl, long index, ip = ((u32 *)tbl->it_base) + index; while (npages--) { - rpn = virt_to_abs(uaddr) >> IOBMAP_PAGE_SHIFT; + rpn = __pa(uaddr) >> IOBMAP_PAGE_SHIFT; *(ip++) = IOBMAP_L2E_V | rpn; /* invalidate tlb, can be optimized more */ @@ -258,7 +257,7 @@ void __init alloc_iobmap_l2(void) return; #endif /* For 2G space, 8x64 pages (2^21 bytes) is max total l2 size */ - iob_l2_base = (u32 *)abs_to_virt(memblock_alloc_base(1UL<<21, 1UL<<21, 0x80000000)); + iob_l2_base = (u32 *)__va(memblock_alloc_base(1UL<<21, 1UL<<21, 0x80000000)); printk(KERN_INFO "IOBMAP L2 allocated at: %p\n", iob_l2_base); } -- cgit v0.10.2 From 579468a9f4634c18a59ce8435c12b0623c8b924f Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 25 Jul 2012 21:19:52 +0000 Subject: powerpc/dart: Remove uses of virt_to_abs() and abs_to_virt() These days they are just wrappers around __pa() and __va() respectively. Signed-off-by: Michael Ellerman Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/sysdev/dart_iommu.c b/arch/powerpc/sysdev/dart_iommu.c index 4f2680f..8ef63a0 100644 --- a/arch/powerpc/sysdev/dart_iommu.c +++ b/arch/powerpc/sysdev/dart_iommu.c @@ -43,7 +43,6 @@ #include #include #include -#include #include #include @@ -167,7 +166,7 @@ static int dart_build(struct iommu_table *tbl, long index, */ l = npages; while (l--) { - rpn = virt_to_abs(uaddr) >> DART_PAGE_SHIFT; + rpn = __pa(uaddr) >> DART_PAGE_SHIFT; *(dp++) = DARTMAP_VALID | (rpn & DARTMAP_RPNMASK); @@ -244,7 +243,7 @@ static int __init dart_init(struct device_node *dart_node) panic("DART: Cannot map registers!"); /* Map in DART table */ - dart_vbase = ioremap(virt_to_abs(dart_tablebase), dart_tablesize); + dart_vbase = ioremap(__pa(dart_tablebase), dart_tablesize); /* Fill initial table */ for (i = 0; i < dart_tablesize/4; i++) @@ -463,7 +462,7 @@ void __init alloc_dart_table(void) * will blow up an entire large page anyway in the kernel mapping */ dart_tablebase = (unsigned long) - abs_to_virt(memblock_alloc_base(1UL<<24, 1UL<<24, 0x80000000L)); + __va(memblock_alloc_base(1UL<<24, 1UL<<24, 0x80000000L)); printk(KERN_INFO "DART table allocated at: %lx\n", dart_tablebase); } -- cgit v0.10.2 From b4b8d1e48e05313b163a36946be1a82e5bc5f9ad Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 25 Jul 2012 21:19:53 +0000 Subject: IB/ehca: Remove uses of virt_to_abs() and abs_to_virt() abs_to_virt() simply calls __va() and we'd like to get rid of it, so replace all abs_to_virt() uses with __va(). __va() returns void *, so when assigning to a pointer there's no need to cast. Similarly virt_to_abs() just calls __pa(). Signed-off-by: Michael Ellerman Signed-off-by: Benjamin Herrenschmidt diff --git a/drivers/infiniband/hw/ehca/ehca_cq.c b/drivers/infiniband/hw/ehca/ehca_cq.c index d9b0ebc..8f52901 100644 --- a/drivers/infiniband/hw/ehca/ehca_cq.c +++ b/drivers/infiniband/hw/ehca/ehca_cq.c @@ -220,7 +220,7 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector, cq = ERR_PTR(-EAGAIN); goto create_cq_exit4; } - rpage = virt_to_abs(vpage); + rpage = __pa(vpage); h_ret = hipz_h_register_rpage_cq(adapter_handle, my_cq->ipz_cq_handle, diff --git a/drivers/infiniband/hw/ehca/ehca_eq.c b/drivers/infiniband/hw/ehca/ehca_eq.c index 818d721..90da674 100644 --- a/drivers/infiniband/hw/ehca/ehca_eq.c +++ b/drivers/infiniband/hw/ehca/ehca_eq.c @@ -101,7 +101,7 @@ int ehca_create_eq(struct ehca_shca *shca, if (!vpage) goto create_eq_exit2; - rpage = virt_to_abs(vpage); + rpage = __pa(vpage); h_ret = hipz_h_register_rpage_eq(shca->ipz_hca_handle, eq->ipz_eq_handle, &eq->pf, diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.c b/drivers/infiniband/hw/ehca/ehca_mrmw.c index 47baa54..8784486 100644 --- a/drivers/infiniband/hw/ehca/ehca_mrmw.c +++ b/drivers/infiniband/hw/ehca/ehca_mrmw.c @@ -1136,7 +1136,7 @@ int ehca_reg_mr_rpages(struct ehca_shca *shca, } if (rnum > 1) { - rpage = virt_to_abs(kpage); + rpage = __pa(kpage); if (!rpage) { ehca_err(&shca->ib_device, "kpage=%p i=%x", kpage, i); @@ -1231,7 +1231,7 @@ inline int ehca_rereg_mr_rereg1(struct ehca_shca *shca, pginfo->num_kpages, pginfo->num_hwpages, kpage); goto ehca_rereg_mr_rereg1_exit1; } - rpage = virt_to_abs(kpage); + rpage = __pa(kpage); if (!rpage) { ehca_err(&shca->ib_device, "kpage=%p", kpage); ret = -EFAULT; @@ -1525,7 +1525,7 @@ static inline void *ehca_calc_sectbase(int top, int dir, int idx) unsigned long ret = idx; ret |= dir << EHCA_DIR_INDEX_SHIFT; ret |= top << EHCA_TOP_INDEX_SHIFT; - return abs_to_virt(ret << SECTION_SIZE_BITS); + return __va(ret << SECTION_SIZE_BITS); } #define ehca_bmap_valid(entry) \ @@ -1537,7 +1537,7 @@ static u64 ehca_reg_mr_section(int top, int dir, int idx, u64 *kpage, { u64 h_ret = 0; unsigned long page = 0; - u64 rpage = virt_to_abs(kpage); + u64 rpage = __pa(kpage); int page_count; void *sectbase = ehca_calc_sectbase(top, dir, idx); @@ -1553,7 +1553,7 @@ static u64 ehca_reg_mr_section(int top, int dir, int idx, u64 *kpage, for (rnum = 0; (rnum < MAX_RPAGES) && (page < page_count); rnum++) { void *pg = sectbase + ((page++) * pginfo->hwpage_size); - kpage[rnum] = virt_to_abs(pg); + kpage[rnum] = __pa(pg); } h_ret = hipz_h_register_rpage_mr(shca->ipz_hca_handle, mr, @@ -1926,7 +1926,7 @@ static int ehca_check_kpages_per_ate(struct scatterlist *page_list, u64 pgaddr = page_to_pfn(sg_page(&page_list[t])) << PAGE_SHIFT; if (ehca_debug_level >= 3) ehca_gen_dbg("chunk_page=%llx value=%016llx", pgaddr, - *(u64 *)abs_to_virt(pgaddr)); + *(u64 *)__va(pgaddr)); if (pgaddr - PAGE_SIZE != *prev_pgaddr) { ehca_gen_err("uncontiguous page found pgaddr=%llx " "prev_pgaddr=%llx page_list_i=%x", @@ -1993,7 +1993,7 @@ static int ehca_set_pagebuf_user2(struct ehca_mr_pginfo *pginfo, ~(pginfo->hwpage_size - 1); } if (ehca_debug_level >= 3) { - u64 val = *(u64 *)abs_to_virt(pgaddr); + u64 val = *(u64 *)__va(pgaddr); ehca_gen_dbg("kpage=%llx chunk_page=%llx " "value=%016llx", *kpage, pgaddr, val); @@ -2503,7 +2503,7 @@ static u64 ehca_map_vaddr(void *caddr) if (!ehca_bmap) return EHCA_INVAL_ADDR; - abs_addr = virt_to_abs(caddr); + abs_addr = __pa(caddr); top = ehca_calc_index(abs_addr, EHCA_TOP_INDEX_SHIFT + EHCA_SECTSHIFT); if (!ehca_bmap_valid(ehca_bmap->top[top])) return EHCA_INVAL_ADDR; diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c index 964f855..1493939 100644 --- a/drivers/infiniband/hw/ehca/ehca_qp.c +++ b/drivers/infiniband/hw/ehca/ehca_qp.c @@ -321,7 +321,7 @@ static inline int init_qp_queue(struct ehca_shca *shca, ret = -EINVAL; goto init_qp_queue1; } - rpage = virt_to_abs(vpage); + rpage = __pa(vpage); h_ret = hipz_h_register_rpage_qp(ipz_hca_handle, my_qp->ipz_qp_handle, @@ -1094,7 +1094,7 @@ static int prepare_sqe_rts(struct ehca_qp *my_qp, struct ehca_shca *shca, ehca_dbg(&shca->ib_device, "qp_num=%x bad_send_wqe_p=%p", qp_num, bad_send_wqe_p); /* convert wqe pointer to vadr */ - bad_send_wqe_v = abs_to_virt((u64)bad_send_wqe_p); + bad_send_wqe_v = __va((u64)bad_send_wqe_p); if (ehca_debug_level >= 2) ehca_dmp(bad_send_wqe_v, 32, "qp_num=%x bad_wqe", qp_num); squeue = &my_qp->ipz_squeue; @@ -1138,7 +1138,7 @@ static int calc_left_cqes(u64 wqe_p, struct ipz_queue *ipz_queue, /* convert real to abs address */ wqe_p = wqe_p & (~(1UL << 63)); - wqe_v = abs_to_virt(wqe_p); + wqe_v = __va(wqe_p); if (ipz_queue_abs_to_offset(ipz_queue, wqe_p, &q_ofs)) { ehca_gen_err("Invalid offset for calculating left cqes " diff --git a/drivers/infiniband/hw/ehca/ehca_reqs.c b/drivers/infiniband/hw/ehca/ehca_reqs.c index fd05f48..47f9498 100644 --- a/drivers/infiniband/hw/ehca/ehca_reqs.c +++ b/drivers/infiniband/hw/ehca/ehca_reqs.c @@ -135,7 +135,7 @@ static void trace_send_wr_ud(const struct ib_send_wr *send_wr) mad_hdr->attr_mod); } for (j = 0; j < send_wr->num_sge; j++) { - u8 *data = (u8 *)abs_to_virt(sge->addr); + u8 *data = __va(sge->addr); ehca_gen_dbg("send_wr#%x sge#%x addr=%p length=%x " "lkey=%x", idx, j, data, sge->length, sge->lkey); diff --git a/drivers/infiniband/hw/ehca/ehca_tools.h b/drivers/infiniband/hw/ehca/ehca_tools.h index 54c0d23..d280b12 100644 --- a/drivers/infiniband/hw/ehca/ehca_tools.h +++ b/drivers/infiniband/hw/ehca/ehca_tools.h @@ -59,7 +59,6 @@ #include #include -#include #include #include #include diff --git a/drivers/infiniband/hw/ehca/hcp_if.c b/drivers/infiniband/hw/ehca/hcp_if.c index e6f9cdd..2d41d04 100644 --- a/drivers/infiniband/hw/ehca/hcp_if.c +++ b/drivers/infiniband/hw/ehca/hcp_if.c @@ -396,7 +396,7 @@ u64 hipz_h_query_port(const struct ipz_adapter_handle adapter_handle, struct hipz_query_port *query_port_response_block) { u64 ret; - u64 r_cb = virt_to_abs(query_port_response_block); + u64 r_cb = __pa(query_port_response_block); if (r_cb & (EHCA_PAGESIZE-1)) { ehca_gen_err("response block not page aligned"); @@ -438,7 +438,7 @@ u64 hipz_h_modify_port(const struct ipz_adapter_handle adapter_handle, u64 hipz_h_query_hca(const struct ipz_adapter_handle adapter_handle, struct hipz_query_hca *query_hca_rblock) { - u64 r_cb = virt_to_abs(query_hca_rblock); + u64 r_cb = __pa(query_hca_rblock); if (r_cb & (EHCA_PAGESIZE-1)) { ehca_gen_err("response_block=%p not page aligned", @@ -577,7 +577,7 @@ u64 hipz_h_modify_qp(const struct ipz_adapter_handle adapter_handle, adapter_handle.handle, /* r4 */ qp_handle.handle, /* r5 */ update_mask, /* r6 */ - virt_to_abs(mqpcb), /* r7 */ + __pa(mqpcb), /* r7 */ 0, 0, 0, 0, 0); if (ret == H_NOT_ENOUGH_RESOURCES) @@ -595,7 +595,7 @@ u64 hipz_h_query_qp(const struct ipz_adapter_handle adapter_handle, return ehca_plpar_hcall_norets(H_QUERY_QP, adapter_handle.handle, /* r4 */ qp_handle.handle, /* r5 */ - virt_to_abs(qqpcb), /* r6 */ + __pa(qqpcb), /* r6 */ 0, 0, 0, 0); } @@ -787,7 +787,7 @@ u64 hipz_h_register_rpage_mr(const struct ipz_adapter_handle adapter_handle, if (count > 1) { u64 *kpage; int i; - kpage = (u64 *)abs_to_virt(logical_address_of_page); + kpage = __va(logical_address_of_page); for (i = 0; i < count; i++) ehca_gen_dbg("kpage[%d]=%p", i, (void *)kpage[i]); @@ -944,7 +944,7 @@ u64 hipz_h_error_data(const struct ipz_adapter_handle adapter_handle, void *rblock, unsigned long *byte_count) { - u64 r_cb = virt_to_abs(rblock); + u64 r_cb = __pa(rblock); if (r_cb & (EHCA_PAGESIZE-1)) { ehca_gen_err("rblock not page aligned."); diff --git a/drivers/infiniband/hw/ehca/ipz_pt_fn.c b/drivers/infiniband/hw/ehca/ipz_pt_fn.c index 1898d6e..62c71fa 100644 --- a/drivers/infiniband/hw/ehca/ipz_pt_fn.c +++ b/drivers/infiniband/hw/ehca/ipz_pt_fn.c @@ -81,7 +81,7 @@ int ipz_queue_abs_to_offset(struct ipz_queue *queue, u64 addr, u64 *q_offset) { int i; for (i = 0; i < queue->queue_length / queue->pagesize; i++) { - u64 page = (u64)virt_to_abs(queue->queue_pages[i]); + u64 page = __pa(queue->queue_pages[i]); if (addr >= page && addr < page + queue->pagesize) { *q_offset = addr - page + i * queue->pagesize; return 0; -- cgit v0.10.2 From 48817c58066fe61d3dde100e2c0dd7054418ee92 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 25 Jul 2012 21:19:54 +0000 Subject: drivers/macintosh/smu.c: Replace abs_to_virt() with __va() abs_to_virt() is just a wrapper around __va(), call __va() directly. Signed-off-by: Michael Ellerman Signed-off-by: Benjamin Herrenschmidt diff --git a/drivers/macintosh/smu.c b/drivers/macintosh/smu.c index 54ac7ff..7d5a6b4 100644 --- a/drivers/macintosh/smu.c +++ b/drivers/macintosh/smu.c @@ -45,7 +45,6 @@ #include #include #include -#include #include #define VERSION "0.7" @@ -502,7 +501,7 @@ int __init smu_init (void) * 32 bits value safely */ smu->cmd_buf_abs = (u32)smu_cmdbuf_abs; - smu->cmd_buf = (struct smu_cmd_buf *)abs_to_virt(smu_cmdbuf_abs); + smu->cmd_buf = __va(smu_cmdbuf_abs); smu->db_node = of_find_node_by_name(NULL, "smu-doorbell"); if (smu->db_node == NULL) { -- cgit v0.10.2 From 67e1dbcb1f6058c309e465f212466bbbb2325a88 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 25 Jul 2012 21:19:55 +0000 Subject: ehea: Remove uses of virt_to_abs() and abs_to_virt() abs_to_virt() simply calls __va() and we'd like to get rid of it, so replace all abs_to_virt() uses with __va(). Similarly virt_to_abs() just calls __pa(). Signed-off-by: Michael Ellerman Signed-off-by: Benjamin Herrenschmidt diff --git a/drivers/net/ethernet/ibm/ehea/ehea.h b/drivers/net/ethernet/ibm/ehea/ehea.h index b8e46cc..6be7b98 100644 --- a/drivers/net/ethernet/ibm/ehea/ehea.h +++ b/drivers/net/ethernet/ibm/ehea/ehea.h @@ -35,7 +35,6 @@ #include #include -#include #include #define DRV_NAME "ehea" diff --git a/drivers/net/ethernet/ibm/ehea/ehea_phyp.c b/drivers/net/ethernet/ibm/ehea/ehea_phyp.c index 30f9033..d3a130c 100644 --- a/drivers/net/ethernet/ibm/ehea/ehea_phyp.c +++ b/drivers/net/ethernet/ibm/ehea/ehea_phyp.c @@ -141,7 +141,7 @@ u64 ehea_h_query_ehea_qp(const u64 adapter_handle, const u8 qp_category, qp_category, /* R5 */ qp_handle, /* R6 */ sel_mask, /* R7 */ - virt_to_abs(cb_addr), /* R8 */ + __pa(cb_addr), /* R8 */ 0, 0); } @@ -415,7 +415,7 @@ u64 ehea_h_modify_ehea_qp(const u64 adapter_handle, const u8 cat, (u64) cat, /* R5 */ qp_handle, /* R6 */ sel_mask, /* R7 */ - virt_to_abs(cb_addr), /* R8 */ + __pa(cb_addr), /* R8 */ 0, 0, 0, 0); /* R9-R12 */ *inv_attr_id = outs[0]; @@ -528,7 +528,7 @@ u64 ehea_h_query_ehea(const u64 adapter_handle, void *cb_addr) { u64 hret, cb_logaddr; - cb_logaddr = virt_to_abs(cb_addr); + cb_logaddr = __pa(cb_addr); hret = ehea_plpar_hcall_norets(H_QUERY_HEA, adapter_handle, /* R4 */ @@ -545,7 +545,7 @@ u64 ehea_h_query_ehea_port(const u64 adapter_handle, const u16 port_num, void *cb_addr) { u64 port_info; - u64 cb_logaddr = virt_to_abs(cb_addr); + u64 cb_logaddr = __pa(cb_addr); u64 arr_index = 0; port_info = EHEA_BMASK_SET(H_MEHEAPORT_CAT, cb_cat) @@ -567,7 +567,7 @@ u64 ehea_h_modify_ehea_port(const u64 adapter_handle, const u16 port_num, unsigned long outs[PLPAR_HCALL9_BUFSIZE]; u64 port_info; u64 arr_index = 0; - u64 cb_logaddr = virt_to_abs(cb_addr); + u64 cb_logaddr = __pa(cb_addr); port_info = EHEA_BMASK_SET(H_MEHEAPORT_CAT, cb_cat) | EHEA_BMASK_SET(H_MEHEAPORT_PN, port_num); @@ -621,6 +621,6 @@ u64 ehea_h_error_data(const u64 adapter_handle, const u64 ressource_handle, return ehea_plpar_hcall_norets(H_ERROR_DATA, adapter_handle, /* R4 */ ressource_handle, /* R5 */ - virt_to_abs(rblock), /* R6 */ + __pa(rblock), /* R6 */ 0, 0, 0, 0); /* R7-R12 */ } diff --git a/drivers/net/ethernet/ibm/ehea/ehea_qmr.c b/drivers/net/ethernet/ibm/ehea/ehea_qmr.c index cb66f57..27f88175 100644 --- a/drivers/net/ethernet/ibm/ehea/ehea_qmr.c +++ b/drivers/net/ethernet/ibm/ehea/ehea_qmr.c @@ -163,7 +163,7 @@ struct ehea_cq *ehea_create_cq(struct ehea_adapter *adapter, goto out_kill_hwq; } - rpage = virt_to_abs(vpage); + rpage = __pa(vpage); hret = ehea_h_register_rpage(adapter->handle, 0, EHEA_CQ_REGISTER_ORIG, cq->fw_handle, rpage, 1); @@ -290,7 +290,7 @@ struct ehea_eq *ehea_create_eq(struct ehea_adapter *adapter, goto out_kill_hwq; } - rpage = virt_to_abs(vpage); + rpage = __pa(vpage); hret = ehea_h_register_rpage(adapter->handle, 0, EHEA_EQ_REGISTER_ORIG, @@ -395,7 +395,7 @@ static int ehea_qp_alloc_register(struct ehea_qp *qp, struct hw_queue *hw_queue, pr_err("hw_qpageit_get_inc failed\n"); goto out_kill_hwq; } - rpage = virt_to_abs(vpage); + rpage = __pa(vpage); hret = ehea_h_register_rpage(adapter->handle, 0, h_call_q_selector, qp->fw_handle, rpage, 1); @@ -790,7 +790,7 @@ u64 ehea_map_vaddr(void *caddr) if (!ehea_bmap) return EHEA_INVAL_ADDR; - index = virt_to_abs(caddr) >> SECTION_SIZE_BITS; + index = __pa(caddr) >> SECTION_SIZE_BITS; top = (index >> EHEA_TOP_INDEX_SHIFT) & EHEA_INDEX_MASK; if (!ehea_bmap->top[top]) return EHEA_INVAL_ADDR; @@ -812,7 +812,7 @@ static inline void *ehea_calc_sectbase(int top, int dir, int idx) unsigned long ret = idx; ret |= dir << EHEA_DIR_INDEX_SHIFT; ret |= top << EHEA_TOP_INDEX_SHIFT; - return abs_to_virt(ret << SECTION_SIZE_BITS); + return __va(ret << SECTION_SIZE_BITS); } static u64 ehea_reg_mr_section(int top, int dir, int idx, u64 *pt, @@ -822,7 +822,7 @@ static u64 ehea_reg_mr_section(int top, int dir, int idx, u64 *pt, void *pg; u64 j, m, hret; unsigned long k = 0; - u64 pt_abs = virt_to_abs(pt); + u64 pt_abs = __pa(pt); void *sectbase = ehea_calc_sectbase(top, dir, idx); @@ -830,7 +830,7 @@ static u64 ehea_reg_mr_section(int top, int dir, int idx, u64 *pt, for (m = 0; m < EHEA_MAX_RPAGE; m++) { pg = sectbase + ((k++) * EHEA_PAGESIZE); - pt[m] = virt_to_abs(pg); + pt[m] = __pa(pg); } hret = ehea_h_register_rpage_mr(adapter->handle, mr->handle, 0, 0, pt_abs, EHEA_MAX_RPAGE); -- cgit v0.10.2 From 3d2675238a71545372f19723f9f066e8ad831f70 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 25 Jul 2012 21:19:56 +0000 Subject: powerpc/kernel: Remove uses of abs_to_virt() and virt_to_abs() These days they are just __va() and __pa() respectively. Signed-off-by: Michael Ellerman Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c index 355b9d8..8032b97 100644 --- a/arch/powerpc/kernel/dma.c +++ b/arch/powerpc/kernel/dma.c @@ -14,7 +14,6 @@ #include #include #include -#include #include /* @@ -50,7 +49,7 @@ void *dma_direct_alloc_coherent(struct device *dev, size_t size, return NULL; ret = page_address(page); memset(ret, 0, size); - *dma_handle = virt_to_abs(ret) + get_dma_offset(dev); + *dma_handle = __pa(ret) + get_dma_offset(dev); return ret; #endif diff --git a/arch/powerpc/kernel/rtas_flash.c b/arch/powerpc/kernel/rtas_flash.c index 2c0ee64..20b0120 100644 --- a/arch/powerpc/kernel/rtas_flash.c +++ b/arch/powerpc/kernel/rtas_flash.c @@ -21,7 +21,6 @@ #include #include #include -#include #define MODULE_VERS "1.0" #define MODULE_NAME "rtas_flash" @@ -582,7 +581,7 @@ static void rtas_flash_firmware(int reboot_type) flist = (struct flash_block_list *)&rtas_data_buf[0]; flist->num_blocks = 0; flist->next = rtas_firmware_flash_list; - rtas_block_list = virt_to_abs(flist); + rtas_block_list = __pa(flist); if (rtas_block_list >= 4UL*1024*1024*1024) { printk(KERN_ALERT "FLASH: kernel bug...flash list header addr above 4GB\n"); spin_unlock(&rtas_data_buf_lock); @@ -596,13 +595,13 @@ static void rtas_flash_firmware(int reboot_type) for (f = flist; f; f = next) { /* Translate data addrs to absolute */ for (i = 0; i < f->num_blocks; i++) { - f->blocks[i].data = (char *)virt_to_abs(f->blocks[i].data); + f->blocks[i].data = (char *)__pa(f->blocks[i].data); image_size += f->blocks[i].length; } next = f->next; /* Don't translate NULL pointer for last entry */ if (f->next) - f->next = (struct flash_block_list *)virt_to_abs(f->next); + f->next = (struct flash_block_list *)__pa(f->next); else f->next = NULL; /* make num_blocks into the version/length field */ -- cgit v0.10.2 From 474e3d569b63f7275cfec072d7ef7b2ffb8904c8 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 25 Jul 2012 21:19:57 +0000 Subject: powerpc/pseries: Remove uses of abs_to_virt() and virt_to_abs() These days they are just __va() and __pa() respectively. Signed-off-by: Michael Ellerman Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index bca220f..e150093 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -41,7 +41,6 @@ #include #include #include -#include #include #include #include @@ -99,7 +98,7 @@ static int tce_build_pSeries(struct iommu_table *tbl, long index, while (npages--) { /* can't move this out since we might cross MEMBLOCK boundary */ - rpn = (virt_to_abs(uaddr)) >> TCE_SHIFT; + rpn = __pa(uaddr) >> TCE_SHIFT; *tcep = proto_tce | (rpn & TCE_RPN_MASK) << TCE_RPN_SHIFT; uaddr += TCE_PAGE_SIZE; @@ -148,7 +147,7 @@ static int tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum, int ret = 0; long tcenum_start = tcenum, npages_start = npages; - rpn = (virt_to_abs(uaddr)) >> TCE_SHIFT; + rpn = __pa(uaddr) >> TCE_SHIFT; proto_tce = TCE_PCI_READ; if (direction != DMA_TO_DEVICE) proto_tce |= TCE_PCI_WRITE; @@ -217,7 +216,7 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, __get_cpu_var(tce_page) = tcep; } - rpn = (virt_to_abs(uaddr)) >> TCE_SHIFT; + rpn = __pa(uaddr) >> TCE_SHIFT; proto_tce = TCE_PCI_READ; if (direction != DMA_TO_DEVICE) proto_tce |= TCE_PCI_WRITE; @@ -237,7 +236,7 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, rc = plpar_tce_put_indirect((u64)tbl->it_index, (u64)tcenum << 12, - (u64)virt_to_abs(tcep), + (u64)__pa(tcep), limit); npages -= limit; @@ -441,7 +440,7 @@ static int tce_setrange_multi_pSeriesLP(unsigned long start_pfn, rc = plpar_tce_put_indirect(liobn, dma_offset, - (u64)virt_to_abs(tcep), + (u64)__pa(tcep), limit); num_tce -= limit; -- cgit v0.10.2 From d88dc13a2463cac2a909d6b8570b7d47f88f1b6e Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 25 Jul 2012 21:19:58 +0000 Subject: powerpc/mm: Remove uses of abs_to_virt() and virt_to_abs() These days they are just __va() and __pa() respectively. Signed-off-by: Michael Ellerman Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/mm/stab.c b/arch/powerpc/mm/stab.c index 9106ebb..3f8efa6 100644 --- a/arch/powerpc/mm/stab.c +++ b/arch/powerpc/mm/stab.c @@ -20,7 +20,6 @@ #include #include #include -#include struct stab_entry { unsigned long esid_data; @@ -257,7 +256,7 @@ void __init stabs_alloc(void) memset((void *)newstab, 0, HW_PAGE_SIZE); paca[cpu].stab_addr = newstab; - paca[cpu].stab_real = virt_to_abs(newstab); + paca[cpu].stab_real = __pa(newstab); printk(KERN_INFO "Segment table for CPU %d at 0x%llx " "virtual, 0x%llx absolute\n", cpu, paca[cpu].stab_addr, paca[cpu].stab_real); -- cgit v0.10.2 From f6b3df625ab8285398b4acf02942a8e742e72efc Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 25 Jul 2012 21:19:59 +0000 Subject: powerpc/ps3: Replace virt_to_abs() with __pa() virt_to_abs() is just a wrapper around __pa(), call __pa() directly. Signed-off-by: Michael Ellerman Signed-off-by: Benjamin Herrenschmidt diff --git a/drivers/video/ps3fb.c b/drivers/video/ps3fb.c index 213fbbc..4e292f2 100644 --- a/drivers/video/ps3fb.c +++ b/drivers/video/ps3fb.c @@ -31,7 +31,6 @@ #include #include -#include #include #include #include @@ -1141,7 +1140,7 @@ static int __devinit ps3fb_probe(struct ps3_system_bus_device *dev) */ fb_start = ps3fb_videomemory.address + GPU_FB_START; info->screen_base = (char __force __iomem *)fb_start; - info->fix.smem_start = virt_to_abs(fb_start); + info->fix.smem_start = __pa(fb_start); info->fix.smem_len = ps3fb_videomemory.size - GPU_FB_START; info->pseudo_palette = par->pseudo_palette; -- cgit v0.10.2 From 6bfa5c586b87a36f742e0525e6c337074b68978d Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 25 Jul 2012 21:20:00 +0000 Subject: powerpc: Remove phys_to_abs() now all users have been removed Signed-off-by: Michael Ellerman Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/include/asm/abs_addr.h b/arch/powerpc/include/asm/abs_addr.h index 419aab7..4c92fba 100644 --- a/arch/powerpc/include/asm/abs_addr.h +++ b/arch/powerpc/include/asm/abs_addr.h @@ -18,8 +18,6 @@ #include #include -#define phys_to_abs(pa) (pa) - /* Convenience macros */ #define virt_to_abs(va) __pa(va) #define abs_to_virt(aa) __va(aa) -- cgit v0.10.2 From 4130b5d7b733c94055c53056b43aa887dc163d99 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 25 Jul 2012 21:20:01 +0000 Subject: powerpc: Remove abs_to_virt() now all users have been fixed Signed-off-by: Michael Ellerman Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/include/asm/abs_addr.h b/arch/powerpc/include/asm/abs_addr.h index 4c92fba..80a9535 100644 --- a/arch/powerpc/include/asm/abs_addr.h +++ b/arch/powerpc/include/asm/abs_addr.h @@ -20,7 +20,6 @@ /* Convenience macros */ #define virt_to_abs(va) __pa(va) -#define abs_to_virt(aa) __va(aa) #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_ABS_ADDR_H */ -- cgit v0.10.2 From c6424e32c30a06a13e5d2505058a8496bd318d6a Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 25 Jul 2012 21:20:02 +0000 Subject: powerpc: Remove virt_to_abs() now all users have been fixed Signed-off-by: Michael Ellerman Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/include/asm/abs_addr.h b/arch/powerpc/include/asm/abs_addr.h index 80a9535..dc667d4 100644 --- a/arch/powerpc/include/asm/abs_addr.h +++ b/arch/powerpc/include/asm/abs_addr.h @@ -18,8 +18,5 @@ #include #include -/* Convenience macros */ -#define virt_to_abs(va) __pa(va) - #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_ABS_ADDR_H */ -- cgit v0.10.2 From beacc6da8649f5c0841ac9b326dcf0c4dad823cd Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 25 Jul 2012 21:20:03 +0000 Subject: powerpc: Remove all includes of It's empty now, apart from other includes. Fixup a few files that were getting things via this header. Signed-off-by: Michael Ellerman Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/kernel/dma-swiotlb.c b/arch/powerpc/kernel/dma-swiotlb.c index 4694365..a720b54 100644 --- a/arch/powerpc/kernel/dma-swiotlb.c +++ b/arch/powerpc/kernel/dma-swiotlb.c @@ -12,6 +12,7 @@ */ #include +#include #include #include #include @@ -20,7 +21,6 @@ #include #include #include -#include unsigned int ppc_swiotlb_enable; diff --git a/arch/powerpc/kernel/ibmebus.c b/arch/powerpc/kernel/ibmebus.c index b01d14e..8220baa 100644 --- a/arch/powerpc/kernel/ibmebus.c +++ b/arch/powerpc/kernel/ibmebus.c @@ -47,7 +47,6 @@ #include #include #include -#include static struct device ibmebus_bus_device = { /* fake "parent" device */ .init_name = "ibmebus", diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c index 02b3221..201ba59 100644 --- a/arch/powerpc/kernel/vio.c +++ b/arch/powerpc/kernel/vio.c @@ -33,7 +33,6 @@ #include #include #include -#include #include #include diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index 90039bc..f21e8ce 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -14,10 +14,10 @@ #include #include +#include #include #include -#include #include #include #include diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index 620b7ac..95a4529 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -62,7 +62,6 @@ #include #include #include -#include #include #include "mmu_decl.h" diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index 249a063..297d495 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -51,7 +51,6 @@ #include #include #include -#include #include #include "mmu_decl.h" diff --git a/arch/powerpc/platforms/pasemi/iommu.c b/arch/powerpc/platforms/pasemi/iommu.c index f03fbc2..7d2d036 100644 --- a/arch/powerpc/platforms/pasemi/iommu.c +++ b/arch/powerpc/platforms/pasemi/iommu.c @@ -19,6 +19,7 @@ #undef DEBUG +#include #include #include #include diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 9cda6a1..5e75dcf 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -30,7 +30,6 @@ #include #include #include -#include #include "powernv.h" #include "pci.h" diff --git a/arch/powerpc/platforms/powernv/pci-p5ioc2.c b/arch/powerpc/platforms/powernv/pci-p5ioc2.c index 2649677..6b4bef4 100644 --- a/arch/powerpc/platforms/powernv/pci-p5ioc2.c +++ b/arch/powerpc/platforms/powernv/pci-p5ioc2.c @@ -30,7 +30,6 @@ #include #include #include -#include #include "powernv.h" #include "pci.h" diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index be3cfc5..1e908ae 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -30,7 +30,6 @@ #include #include #include -#include #include #include "powernv.h" diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index e150093..6153eea 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include /* for show_stack */ #include diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 5f3ef87..177055d 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -31,7 +31,6 @@ #include #include #include -#include #include #include #include -- cgit v0.10.2 From baa436b3676ac0eea48dfbeaf7babf53a2305bba Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 25 Jul 2012 21:20:04 +0000 Subject: powerpc: Remove It contains no code and is not included by anyone. Signed-off-by: Michael Ellerman Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/include/asm/abs_addr.h b/arch/powerpc/include/asm/abs_addr.h deleted file mode 100644 index dc667d4..0000000 --- a/arch/powerpc/include/asm/abs_addr.h +++ /dev/null @@ -1,22 +0,0 @@ -#ifndef _ASM_POWERPC_ABS_ADDR_H -#define _ASM_POWERPC_ABS_ADDR_H -#ifdef __KERNEL__ - - -/* - * c 2001 PPC 64 Team, IBM Corp - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include - -#include -#include -#include - -#endif /* __KERNEL__ */ -#endif /* _ASM_POWERPC_ABS_ADDR_H */ -- cgit v0.10.2 From 92057a493af4bb56928a762ad0423200b835d995 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Mon, 23 Jul 2012 21:33:13 +0000 Subject: hvc_console: Better kernel console support hvc_console has two methods to instanciate the consoles. hvc_instanciate is meant to be called at early boot, while hvc_alloc is called for more dynamically probed objects. Currently, it only deals with adding kernel consoles in the former case, which means for example that if a console only uses dynamic probing, it will never be usable as a kernel console even when specifying console=hvc0 explicitly, which could be considered annoying... More specifically, on pseries, we only do the early instanciate for the console currently used by the firmware, so if you have your firmware configured to go to a video card, for example, you cannot get your kernel console, oops messages, etc... on your serial port or hypervisor console, which would be handy to deal with oopses. This fixes it by checking if hvc_console.flags & CON_ENABLED is set when registering a new dynamic console, and if not, redo the index check and re-register the console if the index matches, allowing console=hvcN to work. Signed-off-by: Benjamin Herrenschmidt diff --git a/drivers/tty/hvc/hvc_console.c b/drivers/tty/hvc/hvc_console.c index 2d691eb..f1d4d96 100644 --- a/drivers/tty/hvc/hvc_console.c +++ b/drivers/tty/hvc/hvc_console.c @@ -245,6 +245,20 @@ static void hvc_port_destruct(struct tty_port *port) kfree(hp); } +static void hvc_check_console(int index) +{ + /* Already enabled, bail out */ + if (hvc_console.flags & CON_ENABLED) + return; + + /* If this index is what the user requested, then register + * now (setup won't fail at this point). It's ok to just + * call register again if previously .setup failed. + */ + if (index == hvc_console.index) + register_console(&hvc_console); +} + /* * hvc_instantiate() is an early console discovery method which locates * consoles * prior to the vio subsystem discovering them. Hotplugged @@ -275,12 +289,8 @@ int hvc_instantiate(uint32_t vtermno, int index, const struct hv_ops *ops) if (last_hvc < index) last_hvc = index; - /* if this index is what the user requested, then register - * now (setup won't fail at this point). It's ok to just - * call register again if previously .setup failed. - */ - if (index == hvc_console.index) - register_console(&hvc_console); + /* check if we need to re-register the kernel console */ + hvc_check_console(index); return 0; } @@ -858,10 +868,15 @@ struct hvc_struct *hvc_alloc(uint32_t vtermno, int data, i = ++last_hvc; hp->index = i; + cons_ops[i] = ops; + vtermnos[i] = vtermno; list_add_tail(&(hp->next), &hvc_structs); spin_unlock(&hvc_structs_lock); + /* check if we need to re-register the kernel console */ + hvc_check_console(i); + return hp; } EXPORT_SYMBOL_GPL(hvc_alloc); @@ -874,8 +889,12 @@ int hvc_remove(struct hvc_struct *hp) tty = tty_port_tty_get(&hp->port); spin_lock_irqsave(&hp->lock, flags); - if (hp->index < MAX_NR_HVC_CONSOLES) + if (hp->index < MAX_NR_HVC_CONSOLES) { + console_lock(); vtermnos[hp->index] = -1; + cons_ops[hp->index] = NULL; + console_unlock(); + } /* Don't whack hp->irq because tty_hangup() will need to free the irq. */ -- cgit v0.10.2 From d1cfc0ce5d3196ccd88b9d868bb7cbcab61e0a31 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Mon, 23 Jul 2012 21:47:38 +0000 Subject: hvc_vio: Improve registration of udbg backend The pseries hvterm driver only registers a udbg backend (for xmon and other low level debugging mechanisms) when hvc0 is recognized as the firmware console at boot time, not if it's detected later on, for example because the firmware is using a graphics card. This can make debugging challenging especially under X11, and there's really no good reason for that limitation, so let's hookup udbg whenever hvc0 is detected instead. Signed-off-by: Benjamin Herrenschmidt diff --git a/drivers/tty/hvc/hvc_vio.c b/drivers/tty/hvc/hvc_vio.c index ee30779..070c0ee6 100644 --- a/drivers/tty/hvc/hvc_vio.c +++ b/drivers/tty/hvc/hvc_vio.c @@ -230,6 +230,69 @@ static const struct hv_ops hvterm_hvsi_ops = { .tiocmset = hvterm_hvsi_tiocmset, }; +static void udbg_hvc_putc(char c) +{ + int count = -1; + + if (!hvterm_privs[0]) + return; + + if (c == '\n') + udbg_hvc_putc('\r'); + + do { + switch(hvterm_privs[0]->proto) { + case HV_PROTOCOL_RAW: + count = hvterm_raw_put_chars(0, &c, 1); + break; + case HV_PROTOCOL_HVSI: + count = hvterm_hvsi_put_chars(0, &c, 1); + break; + } + } while(count == 0); +} + +static int udbg_hvc_getc_poll(void) +{ + int rc = 0; + char c; + + if (!hvterm_privs[0]) + return -1; + + switch(hvterm_privs[0]->proto) { + case HV_PROTOCOL_RAW: + rc = hvterm_raw_get_chars(0, &c, 1); + break; + case HV_PROTOCOL_HVSI: + rc = hvterm_hvsi_get_chars(0, &c, 1); + break; + } + if (!rc) + return -1; + return c; +} + +static int udbg_hvc_getc(void) +{ + int ch; + + if (!hvterm_privs[0]) + return -1; + + for (;;) { + ch = udbg_hvc_getc_poll(); + if (ch == -1) { + /* This shouldn't be needed...but... */ + volatile unsigned long delay; + for (delay=0; delay < 2000000; delay++) + ; + } else { + return ch; + } + } +} + static int __devinit hvc_vio_probe(struct vio_dev *vdev, const struct vio_device_id *id) { @@ -289,6 +352,13 @@ static int __devinit hvc_vio_probe(struct vio_dev *vdev, return PTR_ERR(hp); dev_set_drvdata(&vdev->dev, hp); + /* register udbg if it's not there already for console 0 */ + if (hp->index == 0 && !udbg_putc) { + udbg_putc = udbg_hvc_putc; + udbg_getc = udbg_hvc_getc; + udbg_getc_poll = udbg_hvc_getc_poll; + } + return 0; } @@ -331,59 +401,6 @@ static void __exit hvc_vio_exit(void) } module_exit(hvc_vio_exit); -static void udbg_hvc_putc(char c) -{ - int count = -1; - - if (c == '\n') - udbg_hvc_putc('\r'); - - do { - switch(hvterm_priv0.proto) { - case HV_PROTOCOL_RAW: - count = hvterm_raw_put_chars(0, &c, 1); - break; - case HV_PROTOCOL_HVSI: - count = hvterm_hvsi_put_chars(0, &c, 1); - break; - } - } while(count == 0); -} - -static int udbg_hvc_getc_poll(void) -{ - int rc = 0; - char c; - - switch(hvterm_priv0.proto) { - case HV_PROTOCOL_RAW: - rc = hvterm_raw_get_chars(0, &c, 1); - break; - case HV_PROTOCOL_HVSI: - rc = hvterm_hvsi_get_chars(0, &c, 1); - break; - } - if (!rc) - return -1; - return c; -} - -static int udbg_hvc_getc(void) -{ - int ch; - for (;;) { - ch = udbg_hvc_getc_poll(); - if (ch == -1) { - /* This shouldn't be needed...but... */ - volatile unsigned long delay; - for (delay=0; delay < 2000000; delay++) - ; - } else { - return ch; - } - } -} - void __init hvc_vio_init_early(void) { struct device_node *stdout_node; -- cgit v0.10.2 From d3dbeef657fdc9e870e0b01f811bbb906af052f8 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Sun, 19 Aug 2012 21:44:01 +0000 Subject: powerpc: Rename 64-bit PVR constants to PVR_foo We have an old FIXME in reg.h which points out that we should standardise on PVR_foo for our PVR #defines. Currently we use PVR_ on 32-bit and PV_ on 64-bit. So do that rename and remove the FIXME. Seeing as we're touching all but one usage of __is_processor(), rename it to something less ugly and more indicative of what it does, which is simply to check the PVR version. Signed-off-by: Michael Ellerman Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 6386086..43bb15f 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -937,7 +937,7 @@ #define PVR_VER(pvr) (((pvr) >> 16) & 0xFFFF) /* Version field */ #define PVR_REV(pvr) (((pvr) >> 0) & 0xFFFF) /* Revison field */ -#define __is_processor(pv) (PVR_VER(mfspr(SPRN_PVR)) == (pv)) +#define pvr_version_is(pvr) (PVR_VER(mfspr(SPRN_PVR)) == (pvr)) /* * IBM has further subdivided the standard PowerPC 16-bit version and @@ -1002,25 +1002,24 @@ #define PVR_476_ISS 0x00052000 /* 64-bit processors */ -/* XXX the prefix should be PVR_, we'll do a global sweep to fix it one day */ -#define PV_NORTHSTAR 0x0033 -#define PV_PULSAR 0x0034 -#define PV_POWER4 0x0035 -#define PV_ICESTAR 0x0036 -#define PV_SSTAR 0x0037 -#define PV_POWER4p 0x0038 -#define PV_970 0x0039 -#define PV_POWER5 0x003A -#define PV_POWER5p 0x003B -#define PV_970FX 0x003C -#define PV_POWER6 0x003E -#define PV_POWER7 0x003F -#define PV_630 0x0040 -#define PV_630p 0x0041 -#define PV_970MP 0x0044 -#define PV_970GX 0x0045 -#define PV_BE 0x0070 -#define PV_PA6T 0x0090 +#define PVR_NORTHSTAR 0x0033 +#define PVR_PULSAR 0x0034 +#define PVR_POWER4 0x0035 +#define PVR_ICESTAR 0x0036 +#define PVR_SSTAR 0x0037 +#define PVR_POWER4p 0x0038 +#define PVR_970 0x0039 +#define PVR_POWER5 0x003A +#define PVR_POWER5p 0x003B +#define PVR_970FX 0x003C +#define PVR_POWER6 0x003E +#define PVR_POWER7 0x003F +#define PVR_630 0x0040 +#define PVR_630p 0x0041 +#define PVR_970MP 0x0044 +#define PVR_970GX 0x0045 +#define PVR_BE 0x0070 +#define PVR_PA6T 0x0090 /* Macros for setting and retrieving special purpose registers */ #ifndef __ASSEMBLY__ diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 0794a30..ce68278 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -1691,7 +1691,7 @@ static void __init prom_initialize_tce_table(void) * else will impact performance, so we always allocate 8MB. * Anton */ - if (__is_processor(PV_POWER4) || __is_processor(PV_POWER4p)) + if (pvr_version_is(PVR_POWER4) || pvr_version_is(PVR_POWER4p)) minsize = 8UL << 20; else minsize = 4UL << 20; diff --git a/arch/powerpc/oprofile/op_model_power4.c b/arch/powerpc/oprofile/op_model_power4.c index 95ae77d..caffffa 100644 --- a/arch/powerpc/oprofile/op_model_power4.c +++ b/arch/powerpc/oprofile/op_model_power4.c @@ -61,10 +61,10 @@ static int power4_reg_setup(struct op_counter_config *ctr, else mmcr0_val |= MMCR0_PROBLEM_DISABLE; - if (__is_processor(PV_POWER4) || __is_processor(PV_POWER4p) || - __is_processor(PV_970) || __is_processor(PV_970FX) || - __is_processor(PV_970MP) || __is_processor(PV_970GX) || - __is_processor(PV_POWER5) || __is_processor(PV_POWER5p)) + if (pvr_version_is(PVR_POWER4) || pvr_version_is(PVR_POWER4p) || + pvr_version_is(PVR_970) || pvr_version_is(PVR_970FX) || + pvr_version_is(PVR_970MP) || pvr_version_is(PVR_970GX) || + pvr_version_is(PVR_POWER5) || pvr_version_is(PVR_POWER5p)) use_slot_nums = 1; return 0; @@ -84,9 +84,9 @@ extern void ppc_enable_pmcs(void); */ static inline int mmcra_must_set_sample(void) { - if (__is_processor(PV_POWER4) || __is_processor(PV_POWER4p) || - __is_processor(PV_970) || __is_processor(PV_970FX) || - __is_processor(PV_970MP) || __is_processor(PV_970GX)) + if (pvr_version_is(PVR_POWER4) || pvr_version_is(PVR_POWER4p) || + pvr_version_is(PVR_970) || pvr_version_is(PVR_970FX) || + pvr_version_is(PVR_970MP) || pvr_version_is(PVR_970GX)) return 1; return 0; @@ -276,7 +276,7 @@ static bool pmc_overflow(unsigned long val) * PMCs because a user might set a period of less than 256 and we * don't want to mistakenly reset them. */ - if (__is_processor(PV_POWER7) && ((0x80000000 - val) <= 256)) + if (pvr_version_is(PVR_POWER7) && ((0x80000000 - val) <= 256)) return true; return false; diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 77b49dd..331cf09 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -1396,7 +1396,7 @@ static bool pmc_overflow(unsigned long val) * PMCs because a user might set a period of less than 256 and we * don't want to mistakenly reset them. */ - if (__is_processor(PV_POWER7) && ((0x80000000 - val) <= 256)) + if (pvr_version_is(PVR_POWER7) && ((0x80000000 - val) <= 256)) return true; return false; diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c index 467dc38..6077c43 100644 --- a/drivers/scsi/ipr.c +++ b/drivers/scsi/ipr.c @@ -6304,14 +6304,14 @@ static struct ata_port_info sata_port_info = { #ifdef CONFIG_PPC_PSERIES static const u16 ipr_blocked_processors[] = { - PV_NORTHSTAR, - PV_PULSAR, - PV_POWER4, - PV_ICESTAR, - PV_SSTAR, - PV_POWER4p, - PV_630, - PV_630p + PVR_NORTHSTAR, + PVR_PULSAR, + PVR_POWER4, + PVR_ICESTAR, + PVR_SSTAR, + PVR_POWER4p, + PVR_630, + PVR_630p }; /** @@ -6331,7 +6331,7 @@ static int ipr_invalid_adapter(struct ipr_ioa_cfg *ioa_cfg) if ((ioa_cfg->type == 0x5702) && (ioa_cfg->pdev->revision < 4)) { for (i = 0; i < ARRAY_SIZE(ipr_blocked_processors); i++){ - if (__is_processor(ipr_blocked_processors[i])) + if (pvr_version_is(ipr_blocked_processors[i])) return 1; } } -- cgit v0.10.2 From 7118e7e648e0a902bf4f38c55a25fcff730f33d7 Mon Sep 17 00:00:00 2001 From: Ananth N Mavinakayanahalli Date: Thu, 23 Aug 2012 21:26:02 +0000 Subject: powerpc: Consolidate {k,u}probe definitions Move is_trap() and relatives to a common file to be shared between kprobes and uprobes. Code movement only; no change in functionality. Suggested by Michael Ellerman. Signed-off-by: Ananth N Mavinakayanahalli Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/include/asm/kprobes.h b/arch/powerpc/include/asm/kprobes.h index be0171a..dc58197 100644 --- a/arch/powerpc/include/asm/kprobes.h +++ b/arch/powerpc/include/asm/kprobes.h @@ -29,21 +29,16 @@ #include #include #include +#include #define __ARCH_WANT_KPROBES_INSN_SLOT struct pt_regs; struct kprobe; -typedef unsigned int kprobe_opcode_t; -#define BREAKPOINT_INSTRUCTION 0x7fe00008 /* trap */ +typedef opcode_t kprobe_opcode_t; #define MAX_INSN_SIZE 1 -#define IS_TW(instr) (((instr) & 0xfc0007fe) == 0x7c000008) -#define IS_TD(instr) (((instr) & 0xfc0007fe) == 0x7c000088) -#define IS_TDI(instr) (((instr) & 0xfc000000) == 0x08000000) -#define IS_TWI(instr) (((instr) & 0xfc000000) == 0x0c000000) - #ifdef CONFIG_PPC64 /* * 64bit powerpc uses function descriptors. @@ -72,12 +67,6 @@ typedef unsigned int kprobe_opcode_t; addr = (kprobe_opcode_t *)kallsyms_lookup_name(dot_name); \ } \ } - -#define is_trap(instr) (IS_TW(instr) || IS_TD(instr) || \ - IS_TWI(instr) || IS_TDI(instr)) -#else -/* Use stock kprobe_lookup_name since ppc32 doesn't use function descriptors */ -#define is_trap(instr) (IS_TW(instr) || IS_TWI(instr)) #endif #define flush_insn_slot(p) do { } while (0) diff --git a/arch/powerpc/include/asm/probes.h b/arch/powerpc/include/asm/probes.h new file mode 100644 index 0000000..610e000 --- /dev/null +++ b/arch/powerpc/include/asm/probes.h @@ -0,0 +1,42 @@ +#ifndef _ASM_POWERPC_PROBES_H +#define _ASM_POWERPC_PROBES_H +#ifdef __KERNEL__ +/* + * Definitions common to probes files + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright IBM Corporation, 2012 + */ +#include + +typedef u32 opcode_t; +#define BREAKPOINT_INSTRUCTION 0x7fe00008 /* trap */ + +/* Trap definitions per ISA */ +#define IS_TW(instr) (((instr) & 0xfc0007fe) == 0x7c000008) +#define IS_TD(instr) (((instr) & 0xfc0007fe) == 0x7c000088) +#define IS_TDI(instr) (((instr) & 0xfc000000) == 0x08000000) +#define IS_TWI(instr) (((instr) & 0xfc000000) == 0x0c000000) + +#ifdef CONFIG_PPC64 +#define is_trap(instr) (IS_TW(instr) || IS_TD(instr) || \ + IS_TWI(instr) || IS_TDI(instr)) +#else +#define is_trap(instr) (IS_TW(instr) || IS_TWI(instr)) +#endif /* CONFIG_PPC64 */ + +#endif /* __KERNEL__ */ +#endif /* _ASM_POWERPC_PROBES_H */ -- cgit v0.10.2 From 41ab5266c3622354353433618edb92ab278025fa Mon Sep 17 00:00:00 2001 From: Ananth N Mavinakayanahalli Date: Thu, 23 Aug 2012 21:27:09 +0000 Subject: powerpc: Add trap_nr to thread_struct Add thread_struct.trap_nr and use it to store the last exception the thread experienced. In this patch, we populate the field at various places where we force_sig_info() to the process. This is also used in uprobes to determine if the probed instruction caused an exception. Signed-off-by: Ananth N Mavinakayanahalli Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index 53b6dfa..e0f6710 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -219,6 +219,7 @@ struct thread_struct { #endif /* CONFIG_HAVE_HW_BREAKPOINT */ #endif unsigned long dabr; /* Data address breakpoint register */ + unsigned long trap_nr; /* last trap # on this thread */ #ifdef CONFIG_ALTIVEC /* Complete AltiVec register set */ vector128 vr[32] __attribute__((aligned(16))); diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 710f400..8e701a4 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -258,6 +258,7 @@ void do_send_trap(struct pt_regs *regs, unsigned long address, { siginfo_t info; + current->thread.trap_nr = signal_code; if (notify_die(DIE_DABR_MATCH, "dabr_match", regs, error_code, 11, SIGSEGV) == NOTIFY_STOP) return; @@ -275,6 +276,7 @@ void do_dabr(struct pt_regs *regs, unsigned long address, { siginfo_t info; + current->thread.trap_nr = TRAP_HWBKPT; if (notify_die(DIE_DABR_MATCH, "dabr_match", regs, error_code, 11, SIGSEGV) == NOTIFY_STOP) return; diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 1589723..8831322 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -251,6 +251,7 @@ void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr) if (arch_irqs_disabled() && !arch_irq_disabled_regs(regs)) local_irq_enable(); + current->thread.trap_nr = code; memset(&info, 0, sizeof(info)); info.si_signo = signr; info.si_code = code; diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 08ffcf5..995f924 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -133,6 +133,7 @@ static int do_sigbus(struct pt_regs *regs, unsigned long address) up_read(¤t->mm->mmap_sem); if (user_mode(regs)) { + current->thread.trap_nr = BUS_ADRERR; info.si_signo = SIGBUS; info.si_errno = 0; info.si_code = BUS_ADRERR; -- cgit v0.10.2 From 8b7b80b9ebb46dd88fbb94e918297295cf312b59 Mon Sep 17 00:00:00 2001 From: Ananth N Mavinakayanahalli Date: Thu, 23 Aug 2012 21:31:32 +0000 Subject: powerpc: Uprobes port to powerpc This is the port of uprobes to powerpc. Usage is similar to x86. [root@xxxx ~]# ./bin/perf probe -x /lib64/libc.so.6 malloc Added new event: probe_libc:malloc (on 0xb4860) You can now use it in all perf tools, such as: perf record -e probe_libc:malloc -aR sleep 1 [root@xxxx ~]# ./bin/perf record -e probe_libc:malloc -aR sleep 20 [ perf record: Woken up 22 times to write data ] [ perf record: Captured and wrote 5.843 MB perf.data (~255302 samples) ] [root@xxxx ~]# ./bin/perf report --stdio ... 69.05% tar libc-2.12.so [.] malloc 28.57% rm libc-2.12.so [.] malloc 1.32% avahi-daemon libc-2.12.so [.] malloc 0.58% bash libc-2.12.so [.] malloc 0.28% sshd libc-2.12.so [.] malloc 0.08% irqbalance libc-2.12.so [.] malloc 0.05% bzip2 libc-2.12.so [.] malloc 0.04% sleep libc-2.12.so [.] malloc 0.03% multipathd libc-2.12.so [.] malloc 0.01% sendmail libc-2.12.so [.] malloc 0.01% automount libc-2.12.so [.] malloc The trap_nr addition patch is a prereq. Signed-off-by: Ananth N Mavinakayanahalli Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 352f416..98e513b 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -239,6 +239,9 @@ config PPC_OF_PLATFORM_PCI config ARCH_SUPPORTS_DEBUG_PAGEALLOC def_bool y +config ARCH_SUPPORTS_UPROBES + def_bool y + config PPC_ADV_DEBUG_REGS bool depends on 40x || BOOKE diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h index faf9352..e942203 100644 --- a/arch/powerpc/include/asm/thread_info.h +++ b/arch/powerpc/include/asm/thread_info.h @@ -102,6 +102,7 @@ static inline struct thread_info *current_thread_info(void) #define TIF_RESTOREALL 11 /* Restore all regs (implies NOERROR) */ #define TIF_NOERROR 12 /* Force successful syscall return */ #define TIF_NOTIFY_RESUME 13 /* callback before returning to user */ +#define TIF_UPROBE 14 /* breakpointed or single-stepping */ #define TIF_SYSCALL_TRACEPOINT 15 /* syscall tracepoint instrumentation */ /* as above, but as bit values */ @@ -118,12 +119,13 @@ static inline struct thread_info *current_thread_info(void) #define _TIF_RESTOREALL (1< + */ + +#include +#include + +typedef opcode_t uprobe_opcode_t; + +#define MAX_UINSN_BYTES 4 +#define UPROBE_XOL_SLOT_BYTES (MAX_UINSN_BYTES) + +/* The following alias is needed for reference from arch-agnostic code */ +#define UPROBE_SWBP_INSN BREAKPOINT_INSTRUCTION +#define UPROBE_SWBP_INSN_SIZE 4 /* swbp insn size in bytes */ + +struct arch_uprobe { + union { + u8 insn[MAX_UINSN_BYTES]; + u32 ainsn; + }; +}; + +struct arch_uprobe_task { + unsigned long saved_trap_nr; +}; + +extern int arch_uprobe_analyze_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long addr); +extern int arch_uprobe_pre_xol(struct arch_uprobe *aup, struct pt_regs *regs); +extern int arch_uprobe_post_xol(struct arch_uprobe *aup, struct pt_regs *regs); +extern bool arch_uprobe_xol_was_trapped(struct task_struct *tsk); +extern int arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val, void *data); +extern void arch_uprobe_abort_xol(struct arch_uprobe *aup, struct pt_regs *regs); +#endif /* _ASM_UPROBES_H */ diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index bb282dd..cde12f8 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -96,6 +96,7 @@ obj-$(CONFIG_MODULES) += ppc_ksyms.o obj-$(CONFIG_BOOTX_TEXT) += btext.o obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_KPROBES) += kprobes.o +obj-$(CONFIG_UPROBES) += uprobes.o obj-$(CONFIG_PPC_UDBG_16550) += legacy_serial.o udbg_16550.o obj-$(CONFIG_STACKTRACE) += stacktrace.o obj-$(CONFIG_SWIOTLB) += dma-swiotlb.o diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c index 5c023c9..29be271 100644 --- a/arch/powerpc/kernel/signal.c +++ b/arch/powerpc/kernel/signal.c @@ -11,6 +11,7 @@ #include #include +#include #include #include #include @@ -157,6 +158,11 @@ static int do_signal(struct pt_regs *regs) void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags) { + if (thread_info_flags & _TIF_UPROBE) { + clear_thread_flag(TIF_UPROBE); + uprobe_notify_resume(regs); + } + if (thread_info_flags & _TIF_SIGPENDING) do_signal(regs); diff --git a/arch/powerpc/kernel/uprobes.c b/arch/powerpc/kernel/uprobes.c new file mode 100644 index 0000000..d2d46d1 --- /dev/null +++ b/arch/powerpc/kernel/uprobes.c @@ -0,0 +1,184 @@ +/* + * User-space Probes (UProbes) for powerpc + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright IBM Corporation, 2007-2012 + * + * Adapted from the x86 port by Ananth N Mavinakayanahalli + */ +#include +#include +#include +#include +#include +#include + +#include + +#define UPROBE_TRAP_NR UINT_MAX + +/** + * arch_uprobe_analyze_insn + * @mm: the probed address space. + * @arch_uprobe: the probepoint information. + * @addr: vaddr to probe. + * Return 0 on success or a -ve number on error. + */ +int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, + struct mm_struct *mm, unsigned long addr) +{ + if (addr & 0x03) + return -EINVAL; + + /* + * We currently don't support a uprobe on an already + * existing breakpoint instruction underneath + */ + if (is_trap(auprobe->ainsn)) + return -ENOTSUPP; + return 0; +} + +/* + * arch_uprobe_pre_xol - prepare to execute out of line. + * @auprobe: the probepoint information. + * @regs: reflects the saved user state of current task. + */ +int arch_uprobe_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) +{ + struct arch_uprobe_task *autask = ¤t->utask->autask; + + autask->saved_trap_nr = current->thread.trap_nr; + current->thread.trap_nr = UPROBE_TRAP_NR; + regs->nip = current->utask->xol_vaddr; + return 0; +} + +/** + * uprobe_get_swbp_addr - compute address of swbp given post-swbp regs + * @regs: Reflects the saved state of the task after it has hit a breakpoint + * instruction. + * Return the address of the breakpoint instruction. + */ +unsigned long uprobe_get_swbp_addr(struct pt_regs *regs) +{ + return instruction_pointer(regs); +} + +/* + * If xol insn itself traps and generates a signal (SIGILL/SIGSEGV/etc), + * then detect the case where a singlestepped instruction jumps back to its + * own address. It is assumed that anything like do_page_fault/do_trap/etc + * sets thread.trap_nr != UINT_MAX. + * + * arch_uprobe_pre_xol/arch_uprobe_post_xol save/restore thread.trap_nr, + * arch_uprobe_xol_was_trapped() simply checks that ->trap_nr is not equal to + * UPROBE_TRAP_NR == UINT_MAX set by arch_uprobe_pre_xol(). + */ +bool arch_uprobe_xol_was_trapped(struct task_struct *t) +{ + if (t->thread.trap_nr != UPROBE_TRAP_NR) + return true; + + return false; +} + +/* + * Called after single-stepping. To avoid the SMP problems that can + * occur when we temporarily put back the original opcode to + * single-step, we single-stepped a copy of the instruction. + * + * This function prepares to resume execution after the single-step. + */ +int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) +{ + struct uprobe_task *utask = current->utask; + + WARN_ON_ONCE(current->thread.trap_nr != UPROBE_TRAP_NR); + + current->thread.trap_nr = utask->autask.saved_trap_nr; + + /* + * On powerpc, except for loads and stores, most instructions + * including ones that alter code flow (branches, calls, returns) + * are emulated in the kernel. We get here only if the emulation + * support doesn't exist and have to fix-up the next instruction + * to be executed. + */ + regs->nip = utask->vaddr + MAX_UINSN_BYTES; + return 0; +} + +/* callback routine for handling exceptions. */ +int arch_uprobe_exception_notify(struct notifier_block *self, + unsigned long val, void *data) +{ + struct die_args *args = data; + struct pt_regs *regs = args->regs; + + /* regs == NULL is a kernel bug */ + if (WARN_ON(!regs)) + return NOTIFY_DONE; + + /* We are only interested in userspace traps */ + if (!user_mode(regs)) + return NOTIFY_DONE; + + switch (val) { + case DIE_BPT: + if (uprobe_pre_sstep_notifier(regs)) + return NOTIFY_STOP; + break; + case DIE_SSTEP: + if (uprobe_post_sstep_notifier(regs)) + return NOTIFY_STOP; + default: + break; + } + return NOTIFY_DONE; +} + +/* + * This function gets called when XOL instruction either gets trapped or + * the thread has a fatal signal, so reset the instruction pointer to its + * probed address. + */ +void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) +{ + struct uprobe_task *utask = current->utask; + + current->thread.trap_nr = utask->autask.saved_trap_nr; + instruction_pointer_set(regs, utask->vaddr); +} + +/* + * See if the instruction can be emulated. + * Returns true if instruction was emulated, false otherwise. + */ +bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) +{ + int ret; + + /* + * emulate_step() returns 1 if the insn was successfully emulated. + * For all other cases, we need to single-step in hardware. + */ + ret = emulate_step(regs, auprobe->ainsn); + if (ret > 0) + return true; + + return false; +} -- cgit v0.10.2 From a1310757218e0757ee77d2ce05c43028430ed9dd Mon Sep 17 00:00:00 2001 From: Mihai Caraman Date: Mon, 6 Aug 2012 03:27:03 +0000 Subject: powerpc/booke64: Fix machine check handler to use the right prolog Machine check exception handler was using a wrong prolog. Hypervisors like KVM which are called early from the exception handler rely on the interrupt source. Signed-off-by: Mihai Caraman Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index 98be7f0..0243b1b 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -297,7 +297,7 @@ interrupt_end_book3e: /* Machine Check Interrupt */ START_EXCEPTION(machine_check); - CRIT_EXCEPTION_PROLOG(0x200, PROLOG_ADDITION_NONE) + MC_EXCEPTION_PROLOG(0x200, PROLOG_ADDITION_NONE) // EXCEPTION_COMMON(0x200, PACA_EXMC, INTS_DISABLE) // bl special_reg_save_mc // addi r3,r1,STACK_FRAME_OVERHEAD -- cgit v0.10.2 From 5473eb1c07009956be5f4df65d51f8e24d8eb1de Mon Sep 17 00:00:00 2001 From: Mihai Caraman Date: Mon, 6 Aug 2012 03:27:04 +0000 Subject: powerpc/booke64: Use GSRR registers in Guest Doorbell interrupts Guest Doorbell interrupts use guest save and restore registers. Add a new Guest Doorbell exception type to accommodate GSRR0/1 SPRs usage in exception prolog and fix the exception handler. Signed-off-by: Mihai Caraman Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/include/asm/exception-64e.h b/arch/powerpc/include/asm/exception-64e.h index ac13add..e73452f 100644 --- a/arch/powerpc/include/asm/exception-64e.h +++ b/arch/powerpc/include/asm/exception-64e.h @@ -37,6 +37,7 @@ * critical data */ +#define PACA_EXGDBELL PACA_EXGEN /* We are out of SPRGs so we save some things in the PACA. The normal * exception frame is smaller than the CRIT or MC one though diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 43bb15f..c195bf0 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -863,6 +863,7 @@ #define SPRN_SPRG_TLB_EXFRAME SPRN_SPRG2 #define SPRN_SPRG_TLB_SCRATCH SPRN_SPRG6 #define SPRN_SPRG_GEN_SCRATCH SPRN_SPRG0 +#define SPRN_SPRG_GDBELL_SCRATCH SPRN_SPRG_GEN_SCRATCH #define SET_PACA(rX) mtspr SPRN_SPRG_PACA,rX #define GET_PACA(rX) mfspr rX,SPRN_SPRG_PACA diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index 0243b1b..0f58a95 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -59,6 +59,10 @@ #define SPRN_GEN_SRR0 SPRN_SRR0 #define SPRN_GEN_SRR1 SPRN_SRR1 +#define GDBELL_SET_KSTACK GEN_SET_KSTACK +#define SPRN_GDBELL_SRR0 SPRN_GSRR0 +#define SPRN_GDBELL_SRR1 SPRN_GSRR1 + #define CRIT_SET_KSTACK \ ld r1,PACA_CRIT_STACK(r13); \ subi r1,r1,SPECIAL_EXC_FRAME_SIZE; @@ -89,10 +93,13 @@ #define MC_EXCEPTION_PROLOG(n, addition) \ EXCEPTION_PROLOG(n, MC, addition##_MC(n)) +#define GDBELL_EXCEPTION_PROLOG(n, addition) \ + EXCEPTION_PROLOG(n, GDBELL, addition##_GDBELL(n)) /* Variants of the "addition" argument for the prolog */ #define PROLOG_ADDITION_NONE_GEN(n) +#define PROLOG_ADDITION_NONE_GDBELL(n) #define PROLOG_ADDITION_NONE_CRIT(n) #define PROLOG_ADDITION_NONE_DBG(n) #define PROLOG_ADDITION_NONE_MC(n) @@ -543,8 +550,18 @@ kernel_dbg_exc: // b ret_from_crit_except b . -/* Guest Doorbell */ - MASKABLE_EXCEPTION(0x2c0, guest_doorbell, .unknown_exception, ACK_NONE) +/* + * Guest doorbell interrupt + * This general exception use GSRRx save/restore registers + */ + START_EXCEPTION(guest_doorbell); + GDBELL_EXCEPTION_PROLOG(0x2c0, PROLOG_ADDITION_NONE) + EXCEPTION_COMMON(0x2c0, PACA_EXGEN, INTS_KEEP) + addi r3,r1,STACK_FRAME_OVERHEAD + bl .save_nvgprs + INTS_RESTORE_HARD + bl .unknown_exception + b .ret_from_except /* Guest Doorbell critical Interrupt */ START_EXCEPTION(guest_doorbell_crit); -- cgit v0.10.2 From fecff0f7243edaea33071865ee5e35839be44f10 Mon Sep 17 00:00:00 2001 From: Mihai Caraman Date: Mon, 6 Aug 2012 03:27:05 +0000 Subject: powerpc/booke64: Add DO_KVM kernel hooks Hook DO_KVM macro into 64-bit booke for KVM integration. Extend interrupt handlers' parameter list with interrupt vector numbers to accomodate the macro. Only the bolted version of tlb miss handers is addressed now. Signed-off-by: Mihai Caraman Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index 0f58a95..3b1ad1b 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -25,6 +25,8 @@ #include #include #include +#include +#include /* XXX This will ultimately add space for a special exception save * structure used to save things like SRR0/SRR1, SPRGs, MAS, etc... @@ -35,12 +37,14 @@ #define SPECIAL_EXC_FRAME_SIZE INT_FRAME_SIZE /* Exception prolog code for all exceptions */ -#define EXCEPTION_PROLOG(n, type, addition) \ +#define EXCEPTION_PROLOG(n, intnum, type, addition) \ mtspr SPRN_SPRG_##type##_SCRATCH,r13; /* get spare registers */ \ mfspr r13,SPRN_SPRG_PACA; /* get PACA */ \ std r10,PACA_EX##type+EX_R10(r13); \ std r11,PACA_EX##type+EX_R11(r13); \ mfcr r10; /* save CR */ \ + mfspr r11,SPRN_##type##_SRR1;/* what are we coming from */ \ + DO_KVM intnum,SPRN_##type##_SRR1; /* KVM hook */ \ addition; /* additional code for that exc. */ \ std r1,PACA_EX##type+EX_R1(r13); /* save old r1 in the PACA */ \ stw r10,PACA_EX##type+EX_CR(r13); /* save old CR in the PACA */ \ @@ -81,20 +85,20 @@ #define SPRN_MC_SRR0 SPRN_MCSRR0 #define SPRN_MC_SRR1 SPRN_MCSRR1 -#define NORMAL_EXCEPTION_PROLOG(n, addition) \ - EXCEPTION_PROLOG(n, GEN, addition##_GEN(n)) +#define NORMAL_EXCEPTION_PROLOG(n, intnum, addition) \ + EXCEPTION_PROLOG(n, intnum, GEN, addition##_GEN(n)) -#define CRIT_EXCEPTION_PROLOG(n, addition) \ - EXCEPTION_PROLOG(n, CRIT, addition##_CRIT(n)) +#define CRIT_EXCEPTION_PROLOG(n, intnum, addition) \ + EXCEPTION_PROLOG(n, intnum, CRIT, addition##_CRIT(n)) -#define DBG_EXCEPTION_PROLOG(n, addition) \ - EXCEPTION_PROLOG(n, DBG, addition##_DBG(n)) +#define DBG_EXCEPTION_PROLOG(n, intnum, addition) \ + EXCEPTION_PROLOG(n, intnum, DBG, addition##_DBG(n)) -#define MC_EXCEPTION_PROLOG(n, addition) \ - EXCEPTION_PROLOG(n, MC, addition##_MC(n)) +#define MC_EXCEPTION_PROLOG(n, intnum, addition) \ + EXCEPTION_PROLOG(n, intnum, MC, addition##_MC(n)) -#define GDBELL_EXCEPTION_PROLOG(n, addition) \ - EXCEPTION_PROLOG(n, GDBELL, addition##_GDBELL(n)) +#define GDBELL_EXCEPTION_PROLOG(n, intnum, addition) \ + EXCEPTION_PROLOG(n, intnum, GDBELL, addition##_GDBELL(n)) /* Variants of the "addition" argument for the prolog */ @@ -240,9 +244,9 @@ exc_##n##_bad_stack: \ 1: -#define MASKABLE_EXCEPTION(trapnum, label, hdlr, ack) \ +#define MASKABLE_EXCEPTION(trapnum, intnum, label, hdlr, ack) \ START_EXCEPTION(label); \ - NORMAL_EXCEPTION_PROLOG(trapnum, PROLOG_ADDITION_MASKABLE) \ + NORMAL_EXCEPTION_PROLOG(trapnum, intnum, PROLOG_ADDITION_MASKABLE)\ EXCEPTION_COMMON(trapnum, PACA_EXGEN, INTS_DISABLE) \ ack(r8); \ CHECK_NAPPING(); \ @@ -293,7 +297,8 @@ interrupt_end_book3e: /* Critical Input Interrupt */ START_EXCEPTION(critical_input); - CRIT_EXCEPTION_PROLOG(0x100, PROLOG_ADDITION_NONE) + CRIT_EXCEPTION_PROLOG(0x100, BOOKE_INTERRUPT_CRITICAL, + PROLOG_ADDITION_NONE) // EXCEPTION_COMMON(0x100, PACA_EXCRIT, INTS_DISABLE) // bl special_reg_save_crit // CHECK_NAPPING(); @@ -304,7 +309,8 @@ interrupt_end_book3e: /* Machine Check Interrupt */ START_EXCEPTION(machine_check); - MC_EXCEPTION_PROLOG(0x200, PROLOG_ADDITION_NONE) + MC_EXCEPTION_PROLOG(0x200, BOOKE_INTERRUPT_MACHINE_CHECK, + PROLOG_ADDITION_NONE) // EXCEPTION_COMMON(0x200, PACA_EXMC, INTS_DISABLE) // bl special_reg_save_mc // addi r3,r1,STACK_FRAME_OVERHEAD @@ -315,7 +321,8 @@ interrupt_end_book3e: /* Data Storage Interrupt */ START_EXCEPTION(data_storage) - NORMAL_EXCEPTION_PROLOG(0x300, PROLOG_ADDITION_2REGS) + NORMAL_EXCEPTION_PROLOG(0x300, BOOKE_INTERRUPT_DATA_STORAGE, + PROLOG_ADDITION_2REGS) mfspr r14,SPRN_DEAR mfspr r15,SPRN_ESR EXCEPTION_COMMON(0x300, PACA_EXGEN, INTS_DISABLE) @@ -323,18 +330,21 @@ interrupt_end_book3e: /* Instruction Storage Interrupt */ START_EXCEPTION(instruction_storage); - NORMAL_EXCEPTION_PROLOG(0x400, PROLOG_ADDITION_2REGS) + NORMAL_EXCEPTION_PROLOG(0x400, BOOKE_INTERRUPT_INST_STORAGE, + PROLOG_ADDITION_2REGS) li r15,0 mr r14,r10 EXCEPTION_COMMON(0x400, PACA_EXGEN, INTS_DISABLE) b storage_fault_common /* External Input Interrupt */ - MASKABLE_EXCEPTION(0x500, external_input, .do_IRQ, ACK_NONE) + MASKABLE_EXCEPTION(0x500, BOOKE_INTERRUPT_EXTERNAL, + external_input, .do_IRQ, ACK_NONE) /* Alignment */ START_EXCEPTION(alignment); - NORMAL_EXCEPTION_PROLOG(0x600, PROLOG_ADDITION_2REGS) + NORMAL_EXCEPTION_PROLOG(0x600, BOOKE_INTERRUPT_ALIGNMENT, + PROLOG_ADDITION_2REGS) mfspr r14,SPRN_DEAR mfspr r15,SPRN_ESR EXCEPTION_COMMON(0x600, PACA_EXGEN, INTS_KEEP) @@ -342,7 +352,8 @@ interrupt_end_book3e: /* Program Interrupt */ START_EXCEPTION(program); - NORMAL_EXCEPTION_PROLOG(0x700, PROLOG_ADDITION_1REG) + NORMAL_EXCEPTION_PROLOG(0x700, BOOKE_INTERRUPT_PROGRAM, + PROLOG_ADDITION_1REG) mfspr r14,SPRN_ESR EXCEPTION_COMMON(0x700, PACA_EXGEN, INTS_DISABLE) std r14,_DSISR(r1) @@ -354,7 +365,8 @@ interrupt_end_book3e: /* Floating Point Unavailable Interrupt */ START_EXCEPTION(fp_unavailable); - NORMAL_EXCEPTION_PROLOG(0x800, PROLOG_ADDITION_NONE) + NORMAL_EXCEPTION_PROLOG(0x800, BOOKE_INTERRUPT_FP_UNAVAIL, + PROLOG_ADDITION_NONE) /* we can probably do a shorter exception entry for that one... */ EXCEPTION_COMMON(0x800, PACA_EXGEN, INTS_KEEP) ld r12,_MSR(r1) @@ -369,14 +381,17 @@ interrupt_end_book3e: b .ret_from_except /* Decrementer Interrupt */ - MASKABLE_EXCEPTION(0x900, decrementer, .timer_interrupt, ACK_DEC) + MASKABLE_EXCEPTION(0x900, BOOKE_INTERRUPT_DECREMENTER, + decrementer, .timer_interrupt, ACK_DEC) /* Fixed Interval Timer Interrupt */ - MASKABLE_EXCEPTION(0x980, fixed_interval, .unknown_exception, ACK_FIT) + MASKABLE_EXCEPTION(0x980, BOOKE_INTERRUPT_FIT, + fixed_interval, .unknown_exception, ACK_FIT) /* Watchdog Timer Interrupt */ START_EXCEPTION(watchdog); - CRIT_EXCEPTION_PROLOG(0x9f0, PROLOG_ADDITION_NONE) + CRIT_EXCEPTION_PROLOG(0x9f0, BOOKE_INTERRUPT_WATCHDOG, + PROLOG_ADDITION_NONE) // EXCEPTION_COMMON(0x9f0, PACA_EXCRIT, INTS_DISABLE) // bl special_reg_save_crit // CHECK_NAPPING(); @@ -395,7 +410,8 @@ interrupt_end_book3e: /* Auxiliary Processor Unavailable Interrupt */ START_EXCEPTION(ap_unavailable); - NORMAL_EXCEPTION_PROLOG(0xf20, PROLOG_ADDITION_NONE) + NORMAL_EXCEPTION_PROLOG(0xf20, BOOKE_INTERRUPT_AP_UNAVAIL, + PROLOG_ADDITION_NONE) EXCEPTION_COMMON(0xf20, PACA_EXGEN, INTS_DISABLE) bl .save_nvgprs addi r3,r1,STACK_FRAME_OVERHEAD @@ -404,7 +420,8 @@ interrupt_end_book3e: /* Debug exception as a critical interrupt*/ START_EXCEPTION(debug_crit); - CRIT_EXCEPTION_PROLOG(0xd00, PROLOG_ADDITION_2REGS) + CRIT_EXCEPTION_PROLOG(0xd00, BOOKE_INTERRUPT_DEBUG, + PROLOG_ADDITION_2REGS) /* * If there is a single step or branch-taken exception in an @@ -469,7 +486,8 @@ kernel_dbg_exc: /* Debug exception as a debug interrupt*/ START_EXCEPTION(debug_debug); - DBG_EXCEPTION_PROLOG(0xd08, PROLOG_ADDITION_2REGS) + DBG_EXCEPTION_PROLOG(0xd00, BOOKE_INTERRUPT_DEBUG, + PROLOG_ADDITION_2REGS) /* * If there is a single step or branch-taken exception in an @@ -530,18 +548,21 @@ kernel_dbg_exc: b .ret_from_except START_EXCEPTION(perfmon); - NORMAL_EXCEPTION_PROLOG(0x260, PROLOG_ADDITION_NONE) + NORMAL_EXCEPTION_PROLOG(0x260, BOOKE_INTERRUPT_PERFORMANCE_MONITOR, + PROLOG_ADDITION_NONE) EXCEPTION_COMMON(0x260, PACA_EXGEN, INTS_DISABLE) addi r3,r1,STACK_FRAME_OVERHEAD bl .performance_monitor_exception b .ret_from_except_lite /* Doorbell interrupt */ - MASKABLE_EXCEPTION(0x280, doorbell, .doorbell_exception, ACK_NONE) + MASKABLE_EXCEPTION(0x280, BOOKE_INTERRUPT_DOORBELL, + doorbell, .doorbell_exception, ACK_NONE) /* Doorbell critical Interrupt */ START_EXCEPTION(doorbell_crit); - CRIT_EXCEPTION_PROLOG(0x2a0, PROLOG_ADDITION_NONE) + CRIT_EXCEPTION_PROLOG(0x2a0, BOOKE_INTERRUPT_DOORBELL_CRITICAL, + PROLOG_ADDITION_NONE) // EXCEPTION_COMMON(0x2a0, PACA_EXCRIT, INTS_DISABLE) // bl special_reg_save_crit // CHECK_NAPPING(); @@ -555,7 +576,8 @@ kernel_dbg_exc: * This general exception use GSRRx save/restore registers */ START_EXCEPTION(guest_doorbell); - GDBELL_EXCEPTION_PROLOG(0x2c0, PROLOG_ADDITION_NONE) + GDBELL_EXCEPTION_PROLOG(0x2c0, BOOKE_INTERRUPT_GUEST_DBELL, + PROLOG_ADDITION_NONE) EXCEPTION_COMMON(0x2c0, PACA_EXGEN, INTS_KEEP) addi r3,r1,STACK_FRAME_OVERHEAD bl .save_nvgprs @@ -565,7 +587,8 @@ kernel_dbg_exc: /* Guest Doorbell critical Interrupt */ START_EXCEPTION(guest_doorbell_crit); - CRIT_EXCEPTION_PROLOG(0x2e0, PROLOG_ADDITION_NONE) + CRIT_EXCEPTION_PROLOG(0x2e0, BOOKE_INTERRUPT_GUEST_DBELL_CRIT, + PROLOG_ADDITION_NONE) // EXCEPTION_COMMON(0x2e0, PACA_EXCRIT, INTS_DISABLE) // bl special_reg_save_crit // CHECK_NAPPING(); @@ -576,7 +599,8 @@ kernel_dbg_exc: /* Hypervisor call */ START_EXCEPTION(hypercall); - NORMAL_EXCEPTION_PROLOG(0x310, PROLOG_ADDITION_NONE) + NORMAL_EXCEPTION_PROLOG(0x310, BOOKE_INTERRUPT_HV_SYSCALL, + PROLOG_ADDITION_NONE) EXCEPTION_COMMON(0x310, PACA_EXGEN, INTS_KEEP) addi r3,r1,STACK_FRAME_OVERHEAD bl .save_nvgprs @@ -586,7 +610,8 @@ kernel_dbg_exc: /* Embedded Hypervisor priviledged */ START_EXCEPTION(ehpriv); - NORMAL_EXCEPTION_PROLOG(0x320, PROLOG_ADDITION_NONE) + NORMAL_EXCEPTION_PROLOG(0x320, BOOKE_INTERRUPT_HV_PRIV, + PROLOG_ADDITION_NONE) EXCEPTION_COMMON(0x320, PACA_EXGEN, INTS_KEEP) addi r3,r1,STACK_FRAME_OVERHEAD bl .save_nvgprs diff --git a/arch/powerpc/mm/tlb_low_64e.S b/arch/powerpc/mm/tlb_low_64e.S index f09d48e..d884fa4 100644 --- a/arch/powerpc/mm/tlb_low_64e.S +++ b/arch/powerpc/mm/tlb_low_64e.S @@ -20,6 +20,8 @@ #include #include #include +#include +#include #ifdef CONFIG_PPC_64K_PAGES #define VPTE_PMD_SHIFT (PTE_INDEX_SIZE+1) @@ -37,12 +39,18 @@ * * **********************************************************************/ -.macro tlb_prolog_bolted addr +.macro tlb_prolog_bolted intnum addr mtspr SPRN_SPRG_TLB_SCRATCH,r13 mfspr r13,SPRN_SPRG_PACA std r10,PACA_EXTLB+EX_TLB_R10(r13) mfcr r10 std r11,PACA_EXTLB+EX_TLB_R11(r13) +#ifdef CONFIG_KVM_BOOKE_HV +BEGIN_FTR_SECTION + mfspr r11, SPRN_SRR1 +END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV) +#endif + DO_KVM \intnum, SPRN_SRR1 std r16,PACA_EXTLB+EX_TLB_R16(r13) mfspr r16,\addr /* get faulting address */ std r14,PACA_EXTLB+EX_TLB_R14(r13) @@ -66,7 +74,7 @@ /* Data TLB miss */ START_EXCEPTION(data_tlb_miss_bolted) - tlb_prolog_bolted SPRN_DEAR + tlb_prolog_bolted BOOKE_INTERRUPT_DTLB_MISS SPRN_DEAR /* We need _PAGE_PRESENT and _PAGE_ACCESSED set */ @@ -214,7 +222,7 @@ itlb_miss_fault_bolted: /* Instruction TLB miss */ START_EXCEPTION(instruction_tlb_miss_bolted) - tlb_prolog_bolted SPRN_SRR0 + tlb_prolog_bolted BOOKE_INTERRUPT_ITLB_MISS SPRN_SRR0 rldicl. r10,r16,64-PGTABLE_EADDR_SIZE,PGTABLE_EADDR_SIZE+4 srdi r15,r16,60 /* get region */ -- cgit v0.10.2 From 79b5c8dbaa4528a6fd03a4d9d8a6d56a46293a3a Mon Sep 17 00:00:00 2001 From: Mihai Caraman Date: Mon, 6 Aug 2012 03:27:06 +0000 Subject: powerpc/booke64: Eemove mfspr srr1 duplicate in exception prolog Refactor exception prolog to get rid of mfspr srr1 duplicate. This was introduced by KVM integration, with DO_KVM macro logic expecting srr1 value earlier in r11. Reserve r11 to hold srr1's value also required at the end of the prolog and free up r10 to serve as spare in addition macros. For syscalls case this change does not add any performance penalty. For irq soft-disabled case the change adds a store/load of conditional register value to/from a paca slot. Paca slots fit in one 64-byte cache line so these additional operations have little impact on performance. Signed-off-by: Mihai Caraman Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index 3b1ad1b..83c20e8 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -45,10 +45,9 @@ mfcr r10; /* save CR */ \ mfspr r11,SPRN_##type##_SRR1;/* what are we coming from */ \ DO_KVM intnum,SPRN_##type##_SRR1; /* KVM hook */ \ + stw r10,PACA_EX##type+EX_CR(r13); /* save old CR in the PACA */ \ addition; /* additional code for that exc. */ \ std r1,PACA_EX##type+EX_R1(r13); /* save old r1 in the PACA */ \ - stw r10,PACA_EX##type+EX_CR(r13); /* save old CR in the PACA */ \ - mfspr r11,SPRN_##type##_SRR1;/* what are we coming from */ \ type##_SET_KSTACK; /* get special stack if necessary */\ andi. r10,r11,MSR_PR; /* save stack pointer */ \ beq 1f; /* branch around if supervisor */ \ @@ -109,8 +108,8 @@ #define PROLOG_ADDITION_NONE_MC(n) #define PROLOG_ADDITION_MASKABLE_GEN(n) \ - lbz r11,PACASOFTIRQEN(r13); /* are irqs soft-disabled ? */ \ - cmpwi cr0,r11,0; /* yes -> go out of line */ \ + lbz r10,PACASOFTIRQEN(r13); /* are irqs soft-disabled ? */ \ + cmpwi cr0,r10,0; /* yes -> go out of line */ \ beq masked_interrupt_book3e_##n #define PROLOG_ADDITION_2REGS_GEN(n) \ @@ -624,44 +623,42 @@ kernel_dbg_exc: * accordingly and if the interrupt is level sensitive, we hard disable */ +.macro masked_interrupt_book3e paca_irq full_mask + lbz r10,PACAIRQHAPPENED(r13) + ori r10,r10,\paca_irq + stb r10,PACAIRQHAPPENED(r13) + + .if \full_mask == 1 + rldicl r10,r11,48,1 /* clear MSR_EE */ + rotldi r11,r10,16 + mtspr SPRN_SRR1,r11 + .endif + + lwz r11,PACA_EXGEN+EX_CR(r13) + mtcr r11 + ld r10,PACA_EXGEN+EX_R10(r13) + ld r11,PACA_EXGEN+EX_R11(r13) + mfspr r13,SPRN_SPRG_GEN_SCRATCH + rfi + b . +.endm + masked_interrupt_book3e_0x500: - /* XXX When adding support for EPR, use PACA_IRQ_EE_EDGE */ - li r11,PACA_IRQ_EE - b masked_interrupt_book3e_full_mask + // XXX When adding support for EPR, use PACA_IRQ_EE_EDGE + masked_interrupt_book3e PACA_IRQ_EE 1 masked_interrupt_book3e_0x900: - ACK_DEC(r11); - li r11,PACA_IRQ_DEC - b masked_interrupt_book3e_no_mask + ACK_DEC(r10); + masked_interrupt_book3e PACA_IRQ_DEC 0 + masked_interrupt_book3e_0x980: - ACK_FIT(r11); - li r11,PACA_IRQ_DEC - b masked_interrupt_book3e_no_mask + ACK_FIT(r10); + masked_interrupt_book3e PACA_IRQ_DEC 0 + masked_interrupt_book3e_0x280: masked_interrupt_book3e_0x2c0: - li r11,PACA_IRQ_DBELL - b masked_interrupt_book3e_no_mask + masked_interrupt_book3e PACA_IRQ_DBELL 0 -masked_interrupt_book3e_no_mask: - mtcr r10 - lbz r10,PACAIRQHAPPENED(r13) - or r10,r10,r11 - stb r10,PACAIRQHAPPENED(r13) - b 1f -masked_interrupt_book3e_full_mask: - mtcr r10 - lbz r10,PACAIRQHAPPENED(r13) - or r10,r10,r11 - stb r10,PACAIRQHAPPENED(r13) - mfspr r10,SPRN_SRR1 - rldicl r11,r10,48,1 /* clear MSR_EE */ - rotldi r10,r11,16 - mtspr SPRN_SRR1,r10 -1: ld r10,PACA_EXGEN+EX_R10(r13); - ld r11,PACA_EXGEN+EX_R11(r13); - mfspr r13,SPRN_SPRG_GEN_SCRATCH; - rfi - b . /* * Called from arch_local_irq_enable when an interrupt needs * to be resent. r3 contains either 0x500,0x900,0x260 or 0x280 -- cgit v0.10.2 From 8b64a9dfb091f1eca8b7e58da82f1e7d1d5fe0ad Mon Sep 17 00:00:00 2001 From: Mihai Caraman Date: Mon, 6 Aug 2012 03:27:07 +0000 Subject: powerpc/booke64: Use SPRG0/3 scratch for bolted TLB miss & crit int Embedded.Hypervisor category defines GSPRG0..3 physical registers for guests. Avoid SPRG4-7 usage as scratch in host exception handlers, otherwise guest SPRG4-7 registers will be clobbered. For bolted TLB miss exception handlers, which is the version currently supported by KVM, use SPRN_SPRG_GEN_SCRATCH aka SPRG0 instead of SPRN_SPRG_TLB_SCRATCH aka SPRG6. Keep using TLB PACA slots to fit in one 64-byte cache line. For critical exception handlers use SPRG3 instead of SPRG7. Provide a routine to store and restore user-visible SPRGs. This will be subsequently used to restore VDSO information in SPRG3. Add EX_R13 to paca slots to free up SPRG3 and change the critical exception epilog to use it. Signed-off-by: Mihai Caraman Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/include/asm/exception-64e.h b/arch/powerpc/include/asm/exception-64e.h index e73452f..51fa43e 100644 --- a/arch/powerpc/include/asm/exception-64e.h +++ b/arch/powerpc/include/asm/exception-64e.h @@ -46,8 +46,9 @@ #define EX_CR (1 * 8) #define EX_R10 (2 * 8) #define EX_R11 (3 * 8) -#define EX_R14 (4 * 8) -#define EX_R15 (5 * 8) +#define EX_R13 (4 * 8) +#define EX_R14 (5 * 8) +#define EX_R15 (6 * 8) /* * The TLB miss exception uses different slots. diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index c195bf0..a391244 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -761,7 +761,8 @@ * 64-bit embedded * - SPRG0 generic exception scratch * - SPRG2 TLB exception stack - * - SPRG3 CPU and NUMA node for VDSO getcpu (user visible) + * - SPRG3 critical exception scratch and + * CPU and NUMA node for VDSO getcpu (user visible) * - SPRG4 unused (user visible) * - SPRG6 TLB miss scratch (user visible, sorry !) * - SPRG7 critical exception scratch @@ -858,7 +859,7 @@ #ifdef CONFIG_PPC_BOOK3E_64 #define SPRN_SPRG_MC_SCRATCH SPRN_SPRG8 -#define SPRN_SPRG_CRIT_SCRATCH SPRN_SPRG7 +#define SPRN_SPRG_CRIT_SCRATCH SPRN_SPRG3 #define SPRN_SPRG_DBG_SCRATCH SPRN_SPRG9 #define SPRN_SPRG_TLB_EXFRAME SPRN_SPRG2 #define SPRN_SPRG_TLB_SCRATCH SPRN_SPRG6 diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index 83c20e8..7476b0a 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -42,6 +42,7 @@ mfspr r13,SPRN_SPRG_PACA; /* get PACA */ \ std r10,PACA_EX##type+EX_R10(r13); \ std r11,PACA_EX##type+EX_R11(r13); \ + PROLOG_STORE_RESTORE_SCRATCH_##type; \ mfcr r10; /* save CR */ \ mfspr r11,SPRN_##type##_SRR1;/* what are we coming from */ \ DO_KVM intnum,SPRN_##type##_SRR1; /* KVM hook */ \ @@ -99,6 +100,18 @@ #define GDBELL_EXCEPTION_PROLOG(n, intnum, addition) \ EXCEPTION_PROLOG(n, intnum, GDBELL, addition##_GDBELL(n)) +/* + * Store user-visible scratch in PACA exception slots and restore proper value + */ +#define PROLOG_STORE_RESTORE_SCRATCH_GEN +#define PROLOG_STORE_RESTORE_SCRATCH_GDBELL +#define PROLOG_STORE_RESTORE_SCRATCH_DBG +#define PROLOG_STORE_RESTORE_SCRATCH_MC + +#define PROLOG_STORE_RESTORE_SCRATCH_CRIT \ + mfspr r10,SPRN_SPRG_CRIT_SCRATCH; /* get r13 */ \ + std r10,PACA_EXCRIT+EX_R13(r13) + /* Variants of the "addition" argument for the prolog */ #define PROLOG_ADDITION_NONE_GEN(n) @@ -454,7 +467,7 @@ interrupt_end_book3e: mtcr r10 ld r10,PACA_EXCRIT+EX_R10(r13) /* restore registers */ ld r11,PACA_EXCRIT+EX_R11(r13) - mfspr r13,SPRN_SPRG_CRIT_SCRATCH + ld r13,PACA_EXCRIT+EX_R13(r13) rfci /* Normal debug exception */ @@ -467,7 +480,7 @@ interrupt_end_book3e: /* Now we mash up things to make it look like we are coming on a * normal exception */ - mfspr r15,SPRN_SPRG_CRIT_SCRATCH + ld r15,PACA_EXCRIT+EX_R13(r13) mtspr SPRN_SPRG_GEN_SCRATCH,r15 mfspr r14,SPRN_DBSR EXCEPTION_COMMON(0xd00, PACA_EXCRIT, INTS_DISABLE) diff --git a/arch/powerpc/mm/tlb_low_64e.S b/arch/powerpc/mm/tlb_low_64e.S index d884fa4..b4113bf 100644 --- a/arch/powerpc/mm/tlb_low_64e.S +++ b/arch/powerpc/mm/tlb_low_64e.S @@ -40,7 +40,7 @@ **********************************************************************/ .macro tlb_prolog_bolted intnum addr - mtspr SPRN_SPRG_TLB_SCRATCH,r13 + mtspr SPRN_SPRG_GEN_SCRATCH,r13 mfspr r13,SPRN_SPRG_PACA std r10,PACA_EXTLB+EX_TLB_R10(r13) mfcr r10 @@ -69,7 +69,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV) ld r15,PACA_EXTLB+EX_TLB_R15(r13) TLB_MISS_RESTORE_STATS_BOLTED ld r16,PACA_EXTLB+EX_TLB_R16(r13) - mfspr r13,SPRN_SPRG_TLB_SCRATCH + mfspr r13,SPRN_SPRG_GEN_SCRATCH .endm /* Data TLB miss */ -- cgit v0.10.2 From 0127262c01f0beb485f917c720d1d95d165dfdbf Mon Sep 17 00:00:00 2001 From: Mihai Caraman Date: Thu, 6 Sep 2012 02:49:44 +0000 Subject: powerpc: Restore VDSO information on critical exception om BookE Critical exception on 64-bit booke uses user-visible SPRG3 as scratch. Restore VDSO information in SPRG3 on exception prolog. Use a common sprg3 field in PACA for all powerpc64 architectures. Signed-off-by: Mihai Caraman Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h index bfcd00c..88609b2 100644 --- a/arch/powerpc/include/asm/kvm_book3s_asm.h +++ b/arch/powerpc/include/asm/kvm_book3s_asm.h @@ -74,7 +74,6 @@ struct kvmppc_host_state { ulong vmhandler; ulong scratch0; ulong scratch1; - ulong sprg3; u8 in_guest; u8 restore_hid5; u8 napping; diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index daf813f..7796519 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -136,6 +136,7 @@ struct paca_struct { u8 io_sync; /* writel() needs spin_unlock sync */ u8 irq_work_pending; /* IRQ_WORK interrupt while soft-disable */ u8 nap_state_lost; /* NV GPR values lost in power7_idle */ + u64 sprg3; /* Saved user-visible sprg */ #ifdef CONFIG_PPC_POWERNV /* Pointer to OPAL machine check event structure set by the diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 85b05c4..72f2616 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -205,6 +205,7 @@ int main(void) DEFINE(PACA_SYSTEM_TIME, offsetof(struct paca_struct, system_time)); DEFINE(PACA_TRAP_SAVE, offsetof(struct paca_struct, trap_save)); DEFINE(PACA_NAPSTATELOST, offsetof(struct paca_struct, nap_state_lost)); + DEFINE(PACA_SPRG3, offsetof(struct paca_struct, sprg3)); #endif /* CONFIG_PPC64 */ /* RTAS */ @@ -533,7 +534,6 @@ int main(void) HSTATE_FIELD(HSTATE_VMHANDLER, vmhandler); HSTATE_FIELD(HSTATE_SCRATCH0, scratch0); HSTATE_FIELD(HSTATE_SCRATCH1, scratch1); - HSTATE_FIELD(HSTATE_SPRG3, sprg3); HSTATE_FIELD(HSTATE_IN_GUEST, in_guest); HSTATE_FIELD(HSTATE_RESTORE_HID5, restore_hid5); HSTATE_FIELD(HSTATE_NAPPING, napping); diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index 7476b0a..87a82fb 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -110,7 +110,9 @@ #define PROLOG_STORE_RESTORE_SCRATCH_CRIT \ mfspr r10,SPRN_SPRG_CRIT_SCRATCH; /* get r13 */ \ - std r10,PACA_EXCRIT+EX_R13(r13) + std r10,PACA_EXCRIT+EX_R13(r13); \ + ld r11,PACA_SPRG3(r13); \ + mtspr SPRN_SPRG_CRIT_SCRATCH,r11; /* Variants of the "addition" argument for the prolog */ diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index b67db22..1b2076f 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -723,9 +723,7 @@ int __cpuinit vdso_getcpu_init(void) val = (cpu & 0xfff) | ((node & 0xffff) << 16); mtspr(SPRN_SPRG3, val); -#ifdef CONFIG_KVM_BOOK3S_HANDLER - get_paca()->kvm_hstate.sprg3 = val; -#endif + get_paca()->sprg3 = val; put_cpu(); diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 5a84c8d..39a21bf 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -1065,7 +1065,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) mtspr SPRN_DABRX,r6 /* Restore SPRG3 */ - ld r3,HSTATE_SPRG3(r13) + ld r3,PACA_SPRG3(r13) mtspr SPRN_SPRG3,r3 /* -- cgit v0.10.2 From 28e1e58fb668e262648fb8ee8a24154633f40507 Mon Sep 17 00:00:00 2001 From: Ananth N Mavinakayanahalli Date: Wed, 5 Sep 2012 22:17:04 +0000 Subject: powerpc/kprobes: Rename opcode_t in probes.h to ppc_opcode_t commit: 8b7b80b9ebb46dd88fbb94e918297295cf312b59 [24/29] powerpc: Uprobes port to powerpc Caused a clash with the fore200e driver: In file included from drivers/atm/fore200e.c:70:0: drivers/atm/fore200e.h:263:3: error: redefinition of typedef 'opcode_t' with different type arch/powerpc/include/asm/probes.h:25:13: note: previous declaration of 'opcode_t' was here Fix the namespace clash by making opcode_t in probes.h to ppc_opcode_t. Signed-off-by: Ananth N Mavinakayanahalli Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/include/asm/kprobes.h b/arch/powerpc/include/asm/kprobes.h index dc58197..7b6feab 100644 --- a/arch/powerpc/include/asm/kprobes.h +++ b/arch/powerpc/include/asm/kprobes.h @@ -36,7 +36,7 @@ struct pt_regs; struct kprobe; -typedef opcode_t kprobe_opcode_t; +typedef ppc_opcode_t kprobe_opcode_t; #define MAX_INSN_SIZE 1 #ifdef CONFIG_PPC64 diff --git a/arch/powerpc/include/asm/probes.h b/arch/powerpc/include/asm/probes.h index 610e000..5f1e15b 100644 --- a/arch/powerpc/include/asm/probes.h +++ b/arch/powerpc/include/asm/probes.h @@ -22,7 +22,7 @@ */ #include -typedef u32 opcode_t; +typedef u32 ppc_opcode_t; #define BREAKPOINT_INSTRUCTION 0x7fe00008 /* trap */ /* Trap definitions per ISA */ diff --git a/arch/powerpc/include/asm/uprobes.h b/arch/powerpc/include/asm/uprobes.h index 13ba507..b532060 100644 --- a/arch/powerpc/include/asm/uprobes.h +++ b/arch/powerpc/include/asm/uprobes.h @@ -25,7 +25,7 @@ #include #include -typedef opcode_t uprobe_opcode_t; +typedef ppc_opcode_t uprobe_opcode_t; #define MAX_UINSN_BYTES 4 #define UPROBE_XOL_SLOT_BYTES (MAX_UINSN_BYTES) -- cgit v0.10.2 From cf1a4cf8754afb248e498815c7957aeb4faca79f Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Sun, 3 Jun 2012 22:15:25 +0000 Subject: powerpc/pci: Save P2P bridge resource if possible When PCI probe flag PCI_REASSIGN_ALL_RSRC has been passed into PCI core, it's hoped that all resources to be reassigned by PCI core. As to particular P2P (PCI-to-PCI) bridge, the size of the corresponding BAR (I/O, MMIO, prefetchable MMIO) is calculated by the resources required by the PCI devices behind the P2P bridge. That means that the information like start/end address retrieved from the hardware registers of the P2P bridge is meainingless in the case. However, we still count that in and the BARs might have been configured by firmware with non-zero size. That leads to space waste. The patch explicitly sets the size of P2P bridge BARs to zero in case that resource reassignment is expected with PCI probe flag PCI_REASSIGN_ALL_RSRC. In the result, it will save overall resource required by the system without waste. Signed-off-by: Gavin Shan Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index 2aa04f2..4cb7147 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -960,13 +960,14 @@ static void __devinit pcibios_fixup_bridge(struct pci_bus *bus) if (i >= 3 && bus->self->transparent) continue; - /* If we are going to re-assign everything, mark the resource - * as unset and move it down to 0 + /* If we're going to reassign everything, we can + * shrink the P2P resource to have size as being + * of 0 in order to save space. */ if (pci_has_flag(PCI_REASSIGN_ALL_RSRC)) { res->flags |= IORESOURCE_UNSET; - res->end -= res->start; res->start = 0; + res->end = -1; continue; } @@ -1228,7 +1229,14 @@ void pcibios_allocate_bus_resources(struct pci_bus *bus) pr_warning("PCI: Cannot allocate resource region " "%d of PCI bridge %d, will remap\n", i, bus->number); clear_resource: - res->start = res->end = 0; + /* The resource might be figured out when doing + * reassignment based on the resources required + * by the downstream PCI devices. Here we set + * the size of the resource to be 0 in order to + * save more space. + */ + res->start = 0; + res->end = -1; res->flags = 0; } -- cgit v0.10.2 From 752f5216f1eaabb0cfa84eaecd0ce17d79c7d2cf Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Mon, 4 Jun 2012 16:47:03 +0000 Subject: powerpc/pseries: Round up MSI-X requests The pseries firmware currently refuses any non power of two MSI-X request. Unfortunately most network drivers end up asking for that because they want a power of two for RX queues and one or two extra for everything else. This patch rounds up the firmware request to the next power of two if the quota allows it. If this fails we fall back to using the original request size. Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c index 109fdb7..8bc89e4 100644 --- a/arch/powerpc/platforms/pseries/msi.c +++ b/arch/powerpc/platforms/pseries/msi.c @@ -387,12 +387,13 @@ static int check_msix_entries(struct pci_dev *pdev) return 0; } -static int rtas_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) +static int rtas_setup_msi_irqs(struct pci_dev *pdev, int nvec_in, int type) { struct pci_dn *pdn; int hwirq, virq, i, rc; struct msi_desc *entry; struct msi_msg msg; + int nvec = nvec_in; pdn = get_pdn(pdev); if (!pdn) @@ -402,10 +403,23 @@ static int rtas_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) return -EINVAL; /* + * Firmware currently refuse any non power of two allocation + * so we round up if the quota will allow it. + */ + if (type == PCI_CAP_ID_MSIX) { + int m = roundup_pow_of_two(nvec); + int quota = msi_quota_for_device(pdev, m); + + if (quota >= m) + nvec = m; + } + + /* * Try the new more explicit firmware interface, if that fails fall * back to the old interface. The old interface is known to never * return MSI-Xs. */ +again: if (type == PCI_CAP_ID_MSI) { rc = rtas_change_msi(pdn, RTAS_CHANGE_MSI_FN, nvec); @@ -417,6 +431,10 @@ static int rtas_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) rc = rtas_change_msi(pdn, RTAS_CHANGE_MSIX_FN, nvec); if (rc != nvec) { + if (nvec != nvec_in) { + nvec = nvec_in; + goto again; + } pr_debug("rtas_msi: rtas_change_msi() failed\n"); return rc; } -- cgit v0.10.2 From 22d8ce887991f8f70a68fb68972f187ec79d44b7 Mon Sep 17 00:00:00 2001 From: "sukadev@linux.vnet.ibm.com" Date: Mon, 16 Jul 2012 11:22:02 +0000 Subject: powerpc: Define Power7+ PV constant PV_POWER7p This definition will be used by subsequent perf and oprofile patches Signed-off-by: Sukadev Bhattiprolu Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index a391244..ad400a5 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -1020,6 +1020,7 @@ #define PVR_630p 0x0041 #define PVR_970MP 0x0044 #define PVR_970GX 0x0045 +#define PVR_POWER7p 0x004A #define PVR_BE 0x0070 #define PVR_PA6T 0x0090 -- cgit v0.10.2 From adbf115d66649b3a52e6875f93c9f131ba690cce Mon Sep 17 00:00:00 2001 From: "Carl E. Love" Date: Fri, 3 Aug 2012 03:02:17 +0000 Subject: powerpc/oprofile: Fix marked events support on Power7+ not set. Starting with Power 7+ we need to check for marked events if the SIAR register is valid, i.e. it contains the correct address of the instruction at the time the performance counter overflowed. The mmcra register on Power 7+, contains a new bit to indicate that the contents of the SIAR is valid. If the event is not marked, then the sample is recorded independently of the SIAR valid bit setting. For older processors, there is no SIAR valid bit to check so the samples are always recorded. This is done by forcing the cntr_marked_events bit mask to zero. The code will always record the sample in this case since the bit mask says the event is not a marked event even if it really is a marked event. Signed-off-by: Carl Love Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/oprofile/op_model_power4.c b/arch/powerpc/oprofile/op_model_power4.c index caffffa..315f949 100644 --- a/arch/powerpc/oprofile/op_model_power4.c +++ b/arch/powerpc/oprofile/op_model_power4.c @@ -21,6 +21,13 @@ #include #define dbg(args...) +#define OPROFILE_PM_PMCSEL_MSK 0xffULL +#define OPROFILE_PM_UNIT_SHIFT 60 +#define OPROFILE_PM_UNIT_MSK 0xfULL +#define OPROFILE_MAX_PMC_NUM 3 +#define OPROFILE_PMSEL_FIELD_WIDTH 8 +#define OPROFILE_UNIT_FIELD_WIDTH 4 +#define MMCRA_SIAR_VALID_MASK 0x10000000ULL static unsigned long reset_value[OP_MAX_COUNTER]; @@ -31,6 +38,61 @@ static int use_slot_nums; static u32 mmcr0_val; static u64 mmcr1_val; static u64 mmcra_val; +static u32 cntr_marked_events; + +static int power7_marked_instr_event(u64 mmcr1) +{ + u64 psel, unit; + int pmc, cntr_marked_events = 0; + + /* Given the MMCR1 value, look at the field for each counter to + * determine if it is a marked event. Code based on the function + * power7_marked_instr_event() in file arch/powerpc/perf/power7-pmu.c. + */ + for (pmc = 0; pmc < 4; pmc++) { + psel = mmcr1 & (OPROFILE_PM_PMCSEL_MSK + << (OPROFILE_MAX_PMC_NUM - pmc) + * OPROFILE_MAX_PMC_NUM); + psel = (psel >> ((OPROFILE_MAX_PMC_NUM - pmc) + * OPROFILE_PMSEL_FIELD_WIDTH)) & ~1ULL; + unit = mmcr1 & (OPROFILE_PM_UNIT_MSK + << (OPROFILE_PM_UNIT_SHIFT + - (pmc * OPROFILE_PMSEL_FIELD_WIDTH ))); + unit = unit >> (OPROFILE_PM_UNIT_SHIFT + - (pmc * OPROFILE_PMSEL_FIELD_WIDTH)); + + switch (psel >> 4) { + case 2: + cntr_marked_events |= (pmc == 1 || pmc == 3) << pmc; + break; + case 3: + if (psel == 0x3c) { + cntr_marked_events |= (pmc == 0) << pmc; + break; + } + + if (psel == 0x3e) { + cntr_marked_events |= (pmc != 1) << pmc; + break; + } + + cntr_marked_events |= 1 << pmc; + break; + case 4: + case 5: + cntr_marked_events |= (unit == 0xd) << pmc; + break; + case 6: + if (psel == 0x64) + cntr_marked_events |= (pmc >= 2) << pmc; + break; + case 8: + cntr_marked_events |= (unit == 0xd) << pmc; + break; + } + } + return cntr_marked_events; +} static int power4_reg_setup(struct op_counter_config *ctr, struct op_system_config *sys, @@ -47,6 +109,23 @@ static int power4_reg_setup(struct op_counter_config *ctr, mmcr1_val = sys->mmcr1; mmcra_val = sys->mmcra; + /* Power 7+ and newer architectures: + * Determine which counter events in the group (the group of events is + * specified by the bit settings in the MMCR1 register) are marked + * events for use in the interrupt handler. Do the calculation once + * before OProfile starts. Information is used in the interrupt + * handler. Starting with Power 7+ we only record the sample for + * marked events if the SIAR valid bit is set. For non marked events + * the sample is always recorded. + */ + if (pvr_version_is(PVR_POWER7p)) + cntr_marked_events = power7_marked_instr_event(mmcr1_val); + else + cntr_marked_events = 0; /* For older processors, set the bit map + * to zero so the sample will always be + * be recorded. + */ + for (i = 0; i < cur_cpu_spec->num_pmcs; ++i) reset_value[i] = 0x80000000UL - ctr[i].count; @@ -291,6 +370,7 @@ static void power4_handle_interrupt(struct pt_regs *regs, int i; unsigned int mmcr0; unsigned long mmcra; + bool siar_valid = false; mmcra = mfspr(SPRN_MMCRA); @@ -300,11 +380,29 @@ static void power4_handle_interrupt(struct pt_regs *regs, /* set the PMM bit (see comment below) */ mtmsrd(mfmsr() | MSR_PMM); + /* Check that the SIAR valid bit in MMCRA is set to 1. */ + if ((mmcra & MMCRA_SIAR_VALID_MASK) == MMCRA_SIAR_VALID_MASK) + siar_valid = true; + for (i = 0; i < cur_cpu_spec->num_pmcs; ++i) { val = classic_ctr_read(i); if (pmc_overflow(val)) { if (oprofile_running && ctr[i].enabled) { - oprofile_add_ext_sample(pc, regs, i, is_kernel); + /* Power 7+ and newer architectures: + * If the event is a marked event, then only + * save the sample if the SIAR valid bit is + * set. If the event is not marked, then + * always save the sample. + * Note, the Sample enable bit in the MMCRA + * register must be set to 1 if the group + * contains a marked event. + */ + if ((siar_valid && + (cntr_marked_events & (1 << i))) + || !(cntr_marked_events & (1 << i))) + oprofile_add_ext_sample(pc, regs, i, + is_kernel); + classic_ctr_write(i, reset_value[i]); } else { classic_ctr_write(i, 0); -- cgit v0.10.2 From 0090e02b947c5de1cf978f52ac12c7b532e61154 Mon Sep 17 00:00:00 2001 From: Matthew McClintock Date: Thu, 6 Sep 2012 08:48:54 +0000 Subject: powerpc: Fix build dependencies for c files requiring libfdt.h Several files in obj-plat depend on libfdt header file. Sometimes when building one can see the following issue. This patch adds libfdt as dependency to those object files | In file included from arch/powerpc/boot/treeboot-iss4xx.c:33:0: | arch/powerpc/boot/libfdt.h:854:1: error: unterminated comment | In file included from arch/powerpc/boot/treeboot-iss4xx.c:33:0: | arch/powerpc/boot/libfdt.h:1:0: error: unterminated #ifndef | BOOTCC arch/powerpc/boot/inffast.o | make[1]: *** [arch/powerpc/boot/treeboot-iss4xx.o] Error 1 | make[1]: *** Waiting for unfinished jobs.... | BOOTCC arch/powerpc/boot/inflate.o | make: *** [uImage] Error 2 | ERROR: oe_runmake failed | ERROR: Function failed: do_compile (see /srv/home/pokybuild/yocto-autobuilder/yocto-slave/p1022ds/build/build/tmp/work/p1022ds-poky-linux-gnuspe/linux-qoriq-sdk-3.0.34-r5/temp/log.do_compile.2167 for further information) NOTE: recipe linux-qoriq-sdk-3.0.34-r5: task do_compile: Failed Signed-off-by: Matthew McClintock Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index b7d8333..6a15c96 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -107,6 +107,7 @@ src-boot := $(addprefix $(obj)/, $(src-boot)) obj-boot := $(addsuffix .o, $(basename $(src-boot))) obj-wlib := $(addsuffix .o, $(basename $(addprefix $(obj)/, $(src-wlib)))) obj-plat := $(addsuffix .o, $(basename $(addprefix $(obj)/, $(src-plat)))) +obj-plat: $(libfdt) quiet_cmd_copy_zlib = COPY $@ cmd_copy_zlib = sed "s@__used@@;s@]*\).*@\"\1\"@" $< > $@ -- cgit v0.10.2 From a84fcd46870113e92523e1ebb9a0ec75f66e03a2 Mon Sep 17 00:00:00 2001 From: Suzuki Poulose Date: Tue, 21 Aug 2012 01:42:33 +0000 Subject: powerpc: Change memory_limit from phys_addr_t to unsigned long long There are some device-tree nodes, whose values are of type phys_addr_t. The phys_addr_t is variable sized based on the CONFIG_PHSY_T_64BIT. Change these to a fixed unsigned long long for consistency. This patch does the change only for memory_limit. The following is a list of such variables which need the change: 1) kernel_end, crashk_size - in arch/powerpc/kernel/machine_kexec.c 2) (struct resource *)crashk_res.start - We could export a local static variable from machine_kexec.c. Changing the above values might break the kexec-tools. So, I will fix kexec-tools first to handle the different sized values and then change the above. Suggested-by: Benjamin Herrenschmidt Signed-off-by: Suzuki K. Poulose Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h index d084ce1..8b9a306 100644 --- a/arch/powerpc/include/asm/setup.h +++ b/arch/powerpc/include/asm/setup.h @@ -9,7 +9,7 @@ extern void ppc_printk_progress(char *s, unsigned short hex); extern unsigned int rtas_data; extern int mem_init_done; /* set on boot once kmalloc can be called */ extern int init_bootmem_done; /* set once bootmem is available */ -extern phys_addr_t memory_limit; +extern unsigned long long memory_limit; extern unsigned long klimit; extern void *zalloc_maybe_bootmem(size_t size, gfp_t mask); diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index 18bdf74..06c8202 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -289,8 +289,7 @@ int __init fadump_reserve_mem(void) else memory_limit = memblock_end_of_DRAM(); printk(KERN_INFO "Adjusted memory_limit for firmware-assisted" - " dump, now %#016llx\n", - (unsigned long long)memory_limit); + " dump, now %#016llx\n", memory_limit); } if (memory_limit) memory_boundary = memory_limit; diff --git a/arch/powerpc/kernel/machine_kexec.c b/arch/powerpc/kernel/machine_kexec.c index 5df7777..4074eff 100644 --- a/arch/powerpc/kernel/machine_kexec.c +++ b/arch/powerpc/kernel/machine_kexec.c @@ -165,7 +165,7 @@ void __init reserve_crashkernel(void) if (memory_limit && memory_limit <= crashk_res.end) { memory_limit = crashk_res.end + 1; printk("Adjusted memory limit for crashkernel, now 0x%llx\n", - (unsigned long long)memory_limit); + memory_limit); } printk(KERN_INFO "Reserving %ldMB of memory at %ldMB " diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index f191bf0..37725e8 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -78,7 +78,7 @@ static int __init early_parse_mem(char *p) return 1; memory_limit = PAGE_ALIGN(memparse(p, &p)); - DBG("memory limit = 0x%llx\n", (unsigned long long)memory_limit); + DBG("memory limit = 0x%llx\n", memory_limit); return 0; } @@ -661,7 +661,7 @@ void __init early_init_devtree(void *params) /* make sure we've parsed cmdline for mem= before this */ if (memory_limit) - first_memblock_size = min(first_memblock_size, memory_limit); + first_memblock_size = min_t(u64, first_memblock_size, memory_limit); setup_initial_memory_limit(memstart_addr, first_memblock_size); /* Reserve MEMBLOCK regions used by kernel, initrd, dt, etc... */ memblock_reserve(PHYSICAL_START, __pa(klimit) - PHYSICAL_START); diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index fbdad0e..44cf2b2 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -62,7 +62,7 @@ int init_bootmem_done; int mem_init_done; -phys_addr_t memory_limit; +unsigned long long memory_limit; #ifdef CONFIG_HIGHMEM pte_t *kmap_pte; -- cgit v0.10.2 From 4bc77a5ed215b4ec9cc39d5f55323b2e68000055 Mon Sep 17 00:00:00 2001 From: Suzuki Poulose Date: Tue, 21 Aug 2012 01:42:43 +0000 Subject: powerpc: Export memory limit via device tree The powerpc kernel doesn't export the memory limit enforced by 'mem=' kernel parameter. This is required for building the ELF header in kexec-tools to limit the vmcore to capture only the used memory. On powerpc the kexec-tools depends on the device-tree for memory related information, unlike /proc/iomem on the x86. Without this information, the kexec-tools assumes the entire System RAM and vmcore creates an unnecessarily larger dump. This patch exports the memory limit, if present, via chosen/linux,memory-limit property, so that the vmcore can be limited to the memory limit. The prom_init seems to export this value in the same node. But doesn't really appear there. Also the memory_limit gets adjusted with the processing of crashkernel= parameter. This patch makes sure we get the actual limit. The kexec-tools will use the value to limit the 'end' of the memory regions. Tested this patch on ppc64 and ppc32(ppc440) with a kexec-tools patch by Mahesh. Signed-off-by: Suzuki K. Poulose Tested-by: Mahesh J. Salgaonkar Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/kernel/machine_kexec.c b/arch/powerpc/kernel/machine_kexec.c index 4074eff..fa9f6c7 100644 --- a/arch/powerpc/kernel/machine_kexec.c +++ b/arch/powerpc/kernel/machine_kexec.c @@ -204,6 +204,12 @@ static struct property crashk_size_prop = { .value = &crashk_size, }; +static struct property memory_limit_prop = { + .name = "linux,memory-limit", + .length = sizeof(unsigned long long), + .value = &memory_limit, +}; + static void __init export_crashk_values(struct device_node *node) { struct property *prop; @@ -223,6 +229,12 @@ static void __init export_crashk_values(struct device_node *node) crashk_size = resource_size(&crashk_res); prom_add_property(node, &crashk_size_prop); } + + /* + * memory_limit is required by the kexec-tools to limit the + * crash regions to the actual memory used. + */ + prom_update_property(node, &memory_limit_prop); } static int __init kexec_setup(void) -- cgit v0.10.2 From ab046a937629172d5da2bbf4867d89507280759f Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Wed, 5 Sep 2012 19:17:47 +0000 Subject: powerpc: Pack arch_hw_breakpoint to avoid holes in struct No functional change Signed-off-by: Michael Neuling Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/include/asm/hw_breakpoint.h b/arch/powerpc/include/asm/hw_breakpoint.h index be04330..39b323e 100644 --- a/arch/powerpc/include/asm/hw_breakpoint.h +++ b/arch/powerpc/include/asm/hw_breakpoint.h @@ -27,10 +27,10 @@ #ifdef CONFIG_HAVE_HW_BREAKPOINT struct arch_hw_breakpoint { - bool extraneous_interrupt; - u8 len; /* length of the target data symbol */ - int type; unsigned long address; + int type; + u8 len; /* length of the target data symbol */ + bool extraneous_interrupt; }; #include -- cgit v0.10.2 From 3f4693eeeae45e9253031633adb0c7fde13a5026 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Wed, 5 Sep 2012 19:17:48 +0000 Subject: powerpc: Use consistent name info for arch_hw_breakpoint Change bp_info to info to be consistent with the rest of this file. Signed-off-by: Michael Neuling Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c index 956a4c4..6767445 100644 --- a/arch/powerpc/kernel/hw_breakpoint.c +++ b/arch/powerpc/kernel/hw_breakpoint.c @@ -294,7 +294,7 @@ int __kprobes single_step_dabr_instruction(struct die_args *args) { struct pt_regs *regs = args->regs; struct perf_event *bp = NULL; - struct arch_hw_breakpoint *bp_info; + struct arch_hw_breakpoint *info; bp = current->thread.last_hit_ubp; /* @@ -304,16 +304,16 @@ int __kprobes single_step_dabr_instruction(struct die_args *args) if (!bp) return NOTIFY_DONE; - bp_info = counter_arch_bp(bp); + info = counter_arch_bp(bp); /* * We shall invoke the user-defined callback function in the single * stepping handler to confirm to 'trigger-after-execute' semantics */ - if (!bp_info->extraneous_interrupt) + if (!info->extraneous_interrupt) perf_bp_event(bp, regs); - set_dabr(bp_info->address | bp_info->type | DABR_TRANSLATION); + set_dabr(info->address | info->type | DABR_TRANSLATION); current->thread.last_hit_ubp = NULL; /* -- cgit v0.10.2 From 06c887666803608c3efe3807eba0e362307db7c4 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Wed, 5 Sep 2012 19:17:49 +0000 Subject: powerpc: Use the XDABR hcall We never use the XDABR hcall since we check for DABR hcall first. XDABR syscall is better since it allows us to also set the DABRX. Signed-off-by: Michael Neuling Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 51ecac9..36b7744 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -529,10 +529,10 @@ static void __init pSeries_init_early(void) if (firmware_has_feature(FW_FEATURE_LPAR)) hvc_vio_init_early(); #endif - if (firmware_has_feature(FW_FEATURE_DABR)) - ppc_md.set_dabr = pseries_set_dabr; - else if (firmware_has_feature(FW_FEATURE_XDABR)) + if (firmware_has_feature(FW_FEATURE_XDABR)) ppc_md.set_dabr = pseries_set_xdabr; + else if (firmware_has_feature(FW_FEATURE_DABR)) + ppc_md.set_dabr = pseries_set_dabr; pSeries_cmo_feature_init(); iommu_init_early_pSeries(); -- cgit v0.10.2 From fca826460e55d4e8f2deceb09f22f9340a0db24c Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Mon, 26 Mar 2012 09:01:29 +0000 Subject: powerpc: 512x: Fix mpc5121_clk_get() If try_module_get() fails, mpc5121_clk_get() might return a wrong clock. Signed-off-by: Richard Weinberger Signed-off-by: Anatolij Gustschin diff --git a/arch/powerpc/platforms/512x/clock.c b/arch/powerpc/platforms/512x/clock.c index 1d8700f..9f771e0 100644 --- a/arch/powerpc/platforms/512x/clock.c +++ b/arch/powerpc/platforms/512x/clock.c @@ -54,14 +54,16 @@ static DEFINE_MUTEX(clocks_mutex); static struct clk *mpc5121_clk_get(struct device *dev, const char *id) { struct clk *p, *clk = ERR_PTR(-ENOENT); - int dev_match = 0; - int id_match = 0; + int dev_match; + int id_match; if (dev == NULL || id == NULL) return clk; mutex_lock(&clocks_mutex); list_for_each_entry(p, &clocks, node) { + dev_match = id_match = 0; + if (dev == p->dev) dev_match++; if (strcmp(id, p->name) == 0) -- cgit v0.10.2 From 12e36309f8774f4ccc769d5e3ff11ef092e524bc Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Tue, 3 Apr 2012 17:15:00 +0000 Subject: powerpc: Option FB_FSL_DIU is not really optional for mpc512x In powerpc randconfig builds, this keeps showing up: CC arch/powerpc/platforms/512x/mpc512x_shared.o arch/powerpc/platforms/512x/mpc512x_shared.c:70:9: warning: 'enum fsl_diu_monitor_port' declared inside parameter list arch/powerpc/platforms/512x/mpc512x_shared.c:70:9: warning: its scope is only this definition or declaration, which is probably not what you want arch/powerpc/platforms/512x/mpc512x_shared.c:69:56: error: parameter 1 ('port') has incomplete type arch/powerpc/platforms/512x/mpc512x_shared.c:69:5: warning: function declaration isn't a prototype arch/powerpc/platforms/512x/mpc512x_shared.c:84:9: warning: 'enum fsl_diu_monitor_port' declared inside parameter list arch/powerpc/platforms/512x/mpc512x_shared.c:83:56: error: parameter 1 ('port') has incomplete type arch/powerpc/platforms/512x/mpc512x_shared.c:83:6: warning: function declaration isn't a prototype arch/powerpc/platforms/512x/mpc512x_shared.c:88:36: warning: 'enum fsl_diu_monitor_port' declared inside parameter list arch/powerpc/platforms/512x/mpc512x_shared.c:88:57: error: parameter 1 ('port') has incomplete type arch/powerpc/platforms/512x/mpc512x_shared.c:88:6: warning: function declaration isn't a prototype arch/powerpc/platforms/512x/mpc512x_shared.c:187:54: error: parameter 1 ('port') has incomplete type arch/powerpc/platforms/512x/mpc512x_shared.c:187:1: error: return type is an incomplete type arch/powerpc/platforms/512x/mpc512x_shared.c:187:1: warning: function declaration isn't a prototype arch/powerpc/platforms/512x/mpc512x_shared.c: In function 'mpc512x_valid_monitor_port': arch/powerpc/platforms/512x/mpc512x_shared.c:189:9: error: 'FSL_DIU_PORT_DVI' undeclared (first use in this function) arch/powerpc/platforms/512x/mpc512x_shared.c:189:9: note: each undeclared identifier is reported only once for each function it appears in arch/powerpc/platforms/512x/mpc512x_shared.c:189:2: warning: 'return' with a value, in function returning void make[2]: *** [arch/powerpc/platforms/512x/mpc512x_shared.o] Error 1 The reason is that mpc512x_shared.c has a couple token #ifdef on FB_FSL_DIU/FB_FSL_DIU_MODULE, but they don't come close to masking all the DIU dependencies, as the above fail shows. Rather than sprinkle more pointless #ifdef in this file, just remove the existing two, and make FB_FSL_DIU part of the dependency. The mpc512x_defconfig already has the line "CONFIG_FB_FSL_DIU=y" so this change should be zero impact on real world configs. Signed-off-by: Paul Gortmaker Signed-off-by: Anatolij Gustschin diff --git a/arch/powerpc/platforms/512x/Kconfig b/arch/powerpc/platforms/512x/Kconfig index c169998..b62508b 100644 --- a/arch/powerpc/platforms/512x/Kconfig +++ b/arch/powerpc/platforms/512x/Kconfig @@ -2,6 +2,7 @@ config PPC_MPC512x bool "512x-based boards" depends on 6xx select FSL_SOC + select FB_FSL_DIU select IPIC select PPC_CLOCK select PPC_PCI_CHOICE diff --git a/arch/powerpc/platforms/512x/mpc512x_shared.c b/arch/powerpc/platforms/512x/mpc512x_shared.c index cfe958e..1650e09 100644 --- a/arch/powerpc/platforms/512x/mpc512x_shared.c +++ b/arch/powerpc/platforms/512x/mpc512x_shared.c @@ -191,8 +191,6 @@ mpc512x_valid_monitor_port(enum fsl_diu_monitor_port port) static struct fsl_diu_shared_fb __attribute__ ((__aligned__(8))) diu_shared_fb; -#if defined(CONFIG_FB_FSL_DIU) || \ - defined(CONFIG_FB_FSL_DIU_MODULE) static inline void mpc512x_free_bootmem(struct page *page) { __ClearPageReserved(page); @@ -220,7 +218,6 @@ void mpc512x_release_bootmem(void) } diu_ops.release_bootmem = NULL; } -#endif /* * Check if DIU was pre-initialized. If so, perform steps @@ -323,15 +320,12 @@ void __init mpc512x_setup_diu(void) } } -#if defined(CONFIG_FB_FSL_DIU) || \ - defined(CONFIG_FB_FSL_DIU_MODULE) diu_ops.get_pixel_format = mpc512x_get_pixel_format; diu_ops.set_gamma_table = mpc512x_set_gamma_table; diu_ops.set_monitor_port = mpc512x_set_monitor_port; diu_ops.set_pixel_clock = mpc512x_set_pixel_clock; diu_ops.valid_monitor_port = mpc512x_valid_monitor_port; diu_ops.release_bootmem = mpc512x_release_bootmem; -#endif } void __init mpc512x_init_IRQ(void) -- cgit v0.10.2 From 407821a34fce89b4f0b031dbab5cec7d059f46bc Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 7 Sep 2012 15:31:44 +0000 Subject: powerpc: Initialise paca.data_offset with poison It's possible for the cpu_possible_mask to change between the time we initialise the pacas and the time we setup per_cpu areas. Obviously impossible cpus shouldn't ever be running, but stranger things have happened. So be paranoid and initialise data_offset with a poison value in case we don't set it up later. Based on a patch from Anton Blanchard. Signed-off-by: Michael Ellerman Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index fbe1a12..cd6da85 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -142,6 +142,7 @@ void __init initialise_paca(struct paca_struct *new_paca, int cpu) new_paca->hw_cpu_id = 0xffff; new_paca->kexec_state = KEXEC_STATE_NONE; new_paca->__current = &init_task; + new_paca->data_offset = 0xfeeeeeeeeeeeeeeeULL; #ifdef CONFIG_PPC_STD_MMU_64 new_paca->slb_shadow_ptr = &slb_shadow[cpu]; #endif /* CONFIG_PPC_STD_MMU_64 */ -- cgit v0.10.2 From 35e5cfe27e674e4580408b34e7fc97e18460e048 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Fri, 7 Sep 2012 22:44:02 +0000 Subject: powerpc/eeh: Move EEH initialization around Currently, we have 3 phases for EEH initialization on pSeries platform. All of them are done through builtin functions: platform initialization, EEH device creation, and EEH subsystem enablement. All of them are done no later than ppc_md.setup_arch. That means that the slab/slub isn't ready yet, so we have to allocate memory chunks on basis of PAGE_SIZE for those dynamically created EEH devices. That's pretty expensive. In order to utilize slab/slub for memory allocation, we have to move the EEH initialization functions around, but all of them should be called after slab is ready. Signed-off-by: Gavin Shan Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index d60f998..06dedff 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -117,11 +117,6 @@ extern int eeh_subsystem_enabled; void * __devinit eeh_dev_init(struct device_node *dn, void *data); void __devinit eeh_dev_phb_init_dynamic(struct pci_controller *phb); -void __init eeh_dev_phb_init(void); -void __init eeh_init(void); -#ifdef CONFIG_PPC_PSERIES -int __init eeh_pseries_init(void); -#endif int __init eeh_ops_register(struct eeh_ops *ops); int __exit eeh_ops_unregister(const char *name); unsigned long eeh_check_failure(const volatile void __iomem *token, @@ -156,17 +151,6 @@ static inline void *eeh_dev_init(struct device_node *dn, void *data) static inline void eeh_dev_phb_init_dynamic(struct pci_controller *phb) { } -static inline void eeh_dev_phb_init(void) { } - -static inline void eeh_init(void) { } - -#ifdef CONFIG_PPC_PSERIES -static inline int eeh_pseries_init(void) -{ - return 0; -} -#endif /* CONFIG_PPC_PSERIES */ - static inline unsigned long eeh_check_failure(const volatile void __iomem *token, unsigned long val) { return val; diff --git a/arch/powerpc/kernel/rtas_pci.c b/arch/powerpc/kernel/rtas_pci.c index 179af90..140735c 100644 --- a/arch/powerpc/kernel/rtas_pci.c +++ b/arch/powerpc/kernel/rtas_pci.c @@ -275,9 +275,6 @@ void __init find_and_init_phbs(void) of_node_put(root); pci_devs_phb_init(); - /* Create EEH devices for all PHBs */ - eeh_dev_phb_init(); - /* * PCI_PROBE_ONLY and PCI_REASSIGN_ALL_BUS can be set via properties * in chosen. diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c index ecd394c..e819448 100644 --- a/arch/powerpc/platforms/pseries/eeh.c +++ b/arch/powerpc/platforms/pseries/eeh.c @@ -982,7 +982,7 @@ int __exit eeh_ops_unregister(const char *name) * Even if force-off is set, the EEH hardware is still enabled, so that * newer systems can boot. */ -void __init eeh_init(void) +static int __init eeh_init(void) { struct pci_controller *hose, *tmp; struct device_node *phb; @@ -992,11 +992,11 @@ void __init eeh_init(void) if (!eeh_ops) { pr_warning("%s: Platform EEH operation not found\n", __func__); - return; + return -EEXIST; } else if ((ret = eeh_ops->init())) { pr_warning("%s: Failed to call platform init function (%d)\n", __func__, ret); - return; + return ret; } raw_spin_lock_init(&confirm_error_lock); @@ -1011,8 +1011,12 @@ void __init eeh_init(void) printk(KERN_INFO "EEH: PCI Enhanced I/O Error Handling Enabled\n"); else printk(KERN_WARNING "EEH: No capable adapters found\n"); + + return ret; } +core_initcall_sync(eeh_init); + /** * eeh_add_device_early - Enable EEH for the indicated device_node * @dn: device node for which to set up EEH diff --git a/arch/powerpc/platforms/pseries/eeh_dev.c b/arch/powerpc/platforms/pseries/eeh_dev.c index c4507d0..ab68c59 100644 --- a/arch/powerpc/platforms/pseries/eeh_dev.c +++ b/arch/powerpc/platforms/pseries/eeh_dev.c @@ -93,10 +93,14 @@ void __devinit eeh_dev_phb_init_dynamic(struct pci_controller *phb) * Scan all the existing PHBs and create EEH devices for their OF * nodes and their children OF nodes */ -void __init eeh_dev_phb_init(void) +static int __init eeh_dev_phb_init(void) { struct pci_controller *phb, *tmp; list_for_each_entry_safe(phb, tmp, &hose_list, list_node) eeh_dev_phb_init_dynamic(phb); + + return 0; } + +core_initcall(eeh_dev_phb_init); diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c index c33360ec..5e2805a 100644 --- a/arch/powerpc/platforms/pseries/eeh_pseries.c +++ b/arch/powerpc/platforms/pseries/eeh_pseries.c @@ -559,7 +559,9 @@ static struct eeh_ops pseries_eeh_ops = { * EEH initialization on pseries platform. This function should be * called before any EEH related functions. */ -int __init eeh_pseries_init(void) +static int __init eeh_pseries_init(void) { return eeh_ops_register(&pseries_eeh_ops); } + +early_initcall(eeh_pseries_init); diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 36b7744..4a2cd48 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -388,10 +388,8 @@ static void __init pSeries_setup_arch(void) /* Find and initialize PCI host bridges */ init_pci_config_tokens(); - eeh_pseries_init(); find_and_init_phbs(); pSeries_reconfig_notifier_register(&pci_dn_reconfig_nb); - eeh_init(); pSeries_nvram_init(); -- cgit v0.10.2 From 7e4bbaf0bf13c33be275e8a17997597dfd0ed03a Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Fri, 7 Sep 2012 22:44:03 +0000 Subject: powerpc/eeh: Use slab to allocate eeh devices The EEH initialization functions have been postponed until slab/slub are ready. So we use slab/slub to allocate the memory chunks for newly creatd EEH devices. That would save lots of memory. The patch also does cleanup to replace "kmalloc" with "kzalloc" so that we needn't clear the allocated memory chunk explicitly. Signed-off-by: Gavin Shan Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/platforms/pseries/eeh_cache.c b/arch/powerpc/platforms/pseries/eeh_cache.c index e5ae1c6..f50b717 100644 --- a/arch/powerpc/platforms/pseries/eeh_cache.c +++ b/arch/powerpc/platforms/pseries/eeh_cache.c @@ -151,7 +151,7 @@ pci_addr_cache_insert(struct pci_dev *dev, unsigned long alo, return piar; } } - piar = kmalloc(sizeof(struct pci_io_addr_range), GFP_ATOMIC); + piar = kzalloc(sizeof(struct pci_io_addr_range), GFP_ATOMIC); if (!piar) return NULL; diff --git a/arch/powerpc/platforms/pseries/eeh_dev.c b/arch/powerpc/platforms/pseries/eeh_dev.c index ab68c59..8e3443b 100644 --- a/arch/powerpc/platforms/pseries/eeh_dev.c +++ b/arch/powerpc/platforms/pseries/eeh_dev.c @@ -55,7 +55,7 @@ void * __devinit eeh_dev_init(struct device_node *dn, void *data) struct eeh_dev *edev; /* Allocate EEH device */ - edev = zalloc_maybe_bootmem(sizeof(*edev), GFP_KERNEL); + edev = kzalloc(sizeof(*edev), GFP_KERNEL); if (!edev) { pr_warning("%s: out of memory\n", __func__); return NULL; diff --git a/arch/powerpc/platforms/pseries/eeh_event.c b/arch/powerpc/platforms/pseries/eeh_event.c index fb50631..6132772 100644 --- a/arch/powerpc/platforms/pseries/eeh_event.c +++ b/arch/powerpc/platforms/pseries/eeh_event.c @@ -139,7 +139,7 @@ int eeh_send_failure_event(struct eeh_dev *edev) printk(KERN_ERR "EEH: PCI location = %s\n", location); return 1; } - event = kmalloc(sizeof(*event), GFP_ATOMIC); + event = kzalloc(sizeof(*event), GFP_ATOMIC); if (event == NULL) { printk(KERN_ERR "EEH: out of memory, event not handled\n"); return 1; -- cgit v0.10.2 From 3ea1ae989a9ddcfe15b80dc8cea5c4d38a13b9ab Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Fri, 7 Sep 2012 22:44:04 +0000 Subject: powerpc/eeh: More logs for EEH initialization The patch adds more logs to EEH initialization functions for debugging purpose. Also, the machine type (pSeries) is checked in the platform initialization to assure it's the correct platform to invoke it. Signed-off-by: Gavin Shan Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/platforms/pseries/eeh_dev.c b/arch/powerpc/platforms/pseries/eeh_dev.c index 8e3443b..a0cee3a 100644 --- a/arch/powerpc/platforms/pseries/eeh_dev.c +++ b/arch/powerpc/platforms/pseries/eeh_dev.c @@ -100,6 +100,8 @@ static int __init eeh_dev_phb_init(void) list_for_each_entry_safe(phb, tmp, &hose_list, list_node) eeh_dev_phb_init_dynamic(phb); + pr_info("EEH: devices created\n"); + return 0; } diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c index 5e2805a..cf6d6cc 100644 --- a/arch/powerpc/platforms/pseries/eeh_pseries.c +++ b/arch/powerpc/platforms/pseries/eeh_pseries.c @@ -561,7 +561,19 @@ static struct eeh_ops pseries_eeh_ops = { */ static int __init eeh_pseries_init(void) { - return eeh_ops_register(&pseries_eeh_ops); + int ret = -EINVAL; + + if (!machine_is(pseries)) + return ret; + + ret = eeh_ops_register(&pseries_eeh_ops); + if (!ret) + pr_info("EEH: pSeries platform initialized\n"); + else + pr_info("EEH: pSeries platform initialization failure (%d)\n", + ret); + + return ret; } early_initcall(eeh_pseries_init); -- cgit v0.10.2 From 968f968f9b9aaa6ff92c20090c600fc7c112af55 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Fri, 7 Sep 2012 22:44:05 +0000 Subject: powerpc/eeh: Introduce eeh_pe struct As defined in PAPR 2.4, Partitionable Endpoint (PE) is an I/O subtree that can be treated as a unit for the purposes of partitioning and error recovery. Therefore, eeh core should be aware of PE. With eeh_pe struct, we can support PE explicitly. Further more, it makes all the stuff much more data centralized. Another important reason is for eeh core to support multiple platforms. Some of them like pSeries figures out PEs through OF nodes while others like powernv have to do that through PCI bus/device tree. With explicit PE support, eeh core will be implemented based on the centrialized data and platform dependent implementations figure it out by their feasible ways. When the struct is designed, following factors are taken in account: * Reflecting the relationships of PEs. PE might have parent as well children. * Reflecting the association of PE and (eeh) devices. * PEs have PHB boundary. * PE should have unique address assigned in the corresponding PHB domain. Signed-off-by: Gavin Shan Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index 06dedff..f77b6d7 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -32,6 +32,42 @@ struct device_node; #ifdef CONFIG_EEH /* + * The struct is used to trace PE related EEH functionality. + * In theory, there will have one instance of the struct to + * be created against particular PE. In nature, PEs corelate + * to each other. the struct has to reflect that hierarchy in + * order to easily pick up those affected PEs when one particular + * PE has EEH errors. + * + * Also, one particular PE might be composed of PCI device, PCI + * bus and its subordinate components. The struct also need ship + * the information. Further more, one particular PE is only meaingful + * in the corresponding PHB. Therefore, the root PEs should be created + * against existing PHBs in on-to-one fashion. + */ +#define EEH_PE_PHB 1 /* PHB PE */ +#define EEH_PE_DEVICE 2 /* Device PE */ +#define EEH_PE_BUS 3 /* Bus PE */ + +#define EEH_PE_ISOLATED (1 << 0) /* Isolated PE */ +#define EEH_PE_RECOVERING (1 << 1) /* Recovering PE */ + +struct eeh_pe { + int type; /* PE type: PHB/Bus/Device */ + int state; /* PE EEH dependent mode */ + int config_addr; /* Traditional PCI address */ + int addr; /* PE configuration address */ + struct pci_controller *phb; /* Associated PHB */ + int check_count; /* Times of ignored error */ + int freeze_count; /* Times of froze up */ + int false_positives; /* Times of reported #ff's */ + struct eeh_pe *parent; /* Parent PE */ + struct list_head child_list; /* Link PE to the child list */ + struct list_head edevs; /* Link list of EEH devices */ + struct list_head child; /* Child PEs */ +}; + +/* * The struct is used to trace EEH state for the associated * PCI device node or PCI device. In future, it might * represent PE as well so that the EEH device to form @@ -53,6 +89,8 @@ struct eeh_dev { int freeze_count; /* Times of froze up */ int false_positives; /* Times of reported #ff's */ u32 config_space[16]; /* Saved PCI config space */ + struct eeh_pe *pe; /* Associated PE */ + struct list_head list; /* Form link list in the PE */ struct pci_controller *phb; /* Associated PHB */ struct device_node *dn; /* Associated device node */ struct pci_dev *pdev; /* Associated PCI device */ -- cgit v0.10.2 From 646a8499403e049845dda3141ec1ac021f029f11 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Fri, 7 Sep 2012 22:44:06 +0000 Subject: powerpc/eeh: Introduce global mutex The patch introduces global mutex for EEH so that the core data structures can be protected by that. Also, 2 inline functions are exported for that: eeh_lock() and eeh_unlock(). Signed-off-by: Gavin Shan Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index f77b6d7..248b3d9 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -146,6 +146,17 @@ struct eeh_ops { extern struct eeh_ops *eeh_ops; extern int eeh_subsystem_enabled; +extern struct mutex eeh_mutex; + +static inline void eeh_lock(void) +{ + mutex_lock(&eeh_mutex); +} + +static inline void eeh_unlock(void) +{ + mutex_unlock(&eeh_mutex); +} /* * Max number of EEH freezes allowed before we consider the device @@ -206,6 +217,10 @@ static inline void eeh_add_device_tree_early(struct device_node *dn) { } static inline void eeh_add_device_tree_late(struct pci_bus *bus) { } static inline void eeh_remove_bus_device(struct pci_dev *dev) { } + +static inline void eeh_lock(void) { } +static inline void eeh_unlock(void) { } + #define EEH_POSSIBLE_ERROR(val, type) (0) #define EEH_IO_ERROR_VALUE(size) (-1UL) #endif /* CONFIG_EEH */ diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c index e819448..0ba7e3b 100644 --- a/arch/powerpc/platforms/pseries/eeh.c +++ b/arch/powerpc/platforms/pseries/eeh.c @@ -92,6 +92,9 @@ struct eeh_ops *eeh_ops = NULL; int eeh_subsystem_enabled; EXPORT_SYMBOL(eeh_subsystem_enabled); +/* Global EEH mutex */ +DEFINE_MUTEX(eeh_mutex); + /* Lock to avoid races due to multiple reports of an error */ static DEFINE_RAW_SPINLOCK(confirm_error_lock); -- cgit v0.10.2 From 55037d176107c33ac79528bf9ab282a6b0b51e16 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Fri, 7 Sep 2012 22:44:07 +0000 Subject: powerpc/eeh: Create PEs for PHBs For one particular PE, it's only meaningful in the ancestor PHB domain. Therefore, each PHB should have its own PE hierarchy tree to trace those PEs created against the PHB. The patch creates PEs for the PHBs and put those PEs into the global link list traced by "eeh_phb_pe". The link list of PEs would be first level of overall PE hierarchy tree across the system. Signed-off-by: Gavin Shan Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index 248b3d9..7b9c7d6 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -164,6 +164,8 @@ static inline void eeh_unlock(void) */ #define EEH_MAX_ALLOWED_FREEZES 5 +int __devinit eeh_phb_pe_create(struct pci_controller *phb); + void * __devinit eeh_dev_init(struct device_node *dn, void *data); void __devinit eeh_dev_phb_init_dynamic(struct pci_controller *phb); int __init eeh_ops_register(struct eeh_ops *ops); diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile index c222189..890622b 100644 --- a/arch/powerpc/platforms/pseries/Makefile +++ b/arch/powerpc/platforms/pseries/Makefile @@ -6,8 +6,9 @@ obj-y := lpar.o hvCall.o nvram.o reconfig.o \ firmware.o power.o dlpar.o mobility.o obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_SCANLOG) += scanlog.o -obj-$(CONFIG_EEH) += eeh.o eeh_dev.o eeh_cache.o eeh_driver.o \ - eeh_event.o eeh_sysfs.o eeh_pseries.o +obj-$(CONFIG_EEH) += eeh.o eeh_pe.o eeh_dev.o eeh_cache.o \ + eeh_driver.o eeh_event.o eeh_sysfs.o \ + eeh_pseries.o obj-$(CONFIG_KEXEC) += kexec.o obj-$(CONFIG_PCI) += pci.o pci_dlpar.o obj-$(CONFIG_PSERIES_MSI) += msi.o diff --git a/arch/powerpc/platforms/pseries/eeh_dev.c b/arch/powerpc/platforms/pseries/eeh_dev.c index a0cee3a..6644234 100644 --- a/arch/powerpc/platforms/pseries/eeh_dev.c +++ b/arch/powerpc/platforms/pseries/eeh_dev.c @@ -65,6 +65,7 @@ void * __devinit eeh_dev_init(struct device_node *dn, void *data) PCI_DN(dn)->edev = edev; edev->dn = dn; edev->phb = phb; + INIT_LIST_HEAD(&edev->list); return NULL; } @@ -80,6 +81,9 @@ void __devinit eeh_dev_phb_init_dynamic(struct pci_controller *phb) { struct device_node *dn = phb->dn; + /* EEH PE for PHB */ + eeh_phb_pe_create(phb); + /* EEH device for PHB */ eeh_dev_init(dn, phb); diff --git a/arch/powerpc/platforms/pseries/eeh_pe.c b/arch/powerpc/platforms/pseries/eeh_pe.c new file mode 100644 index 0000000..7dee7dc --- /dev/null +++ b/arch/powerpc/platforms/pseries/eeh_pe.c @@ -0,0 +1,120 @@ +/* + * The file intends to implement PE based on the information from + * platforms. Basically, there have 3 types of PEs: PHB/Bus/Device. + * All the PEs should be organized as hierarchy tree. The first level + * of the tree will be associated to existing PHBs since the particular + * PE is only meaningful in one PHB domain. + * + * Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2012. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include +#include +#include +#include + +#include +#include + +static LIST_HEAD(eeh_phb_pe); + +/** + * eeh_pe_alloc - Allocate PE + * @phb: PCI controller + * @type: PE type + * + * Allocate PE instance dynamically. + */ +static struct eeh_pe *eeh_pe_alloc(struct pci_controller *phb, int type) +{ + struct eeh_pe *pe; + + /* Allocate PHB PE */ + pe = kzalloc(sizeof(struct eeh_pe), GFP_KERNEL); + if (!pe) return NULL; + + /* Initialize PHB PE */ + pe->type = type; + pe->phb = phb; + INIT_LIST_HEAD(&pe->child_list); + INIT_LIST_HEAD(&pe->child); + INIT_LIST_HEAD(&pe->edevs); + + return pe; +} + +/** + * eeh_phb_pe_create - Create PHB PE + * @phb: PCI controller + * + * The function should be called while the PHB is detected during + * system boot or PCI hotplug in order to create PHB PE. + */ +int __devinit eeh_phb_pe_create(struct pci_controller *phb) +{ + struct eeh_pe *pe; + + /* Allocate PHB PE */ + pe = eeh_pe_alloc(phb, EEH_PE_PHB); + if (!pe) { + pr_err("%s: out of memory!\n", __func__); + return -ENOMEM; + } + + /* Put it into the list */ + eeh_lock(); + list_add_tail(&pe->child, &eeh_phb_pe); + eeh_unlock(); + + pr_debug("EEH: Add PE for PHB#%d\n", phb->global_number); + + return 0; +} + +/** + * eeh_phb_pe_get - Retrieve PHB PE based on the given PHB + * @phb: PCI controller + * + * The overall PEs form hierarchy tree. The first layer of the + * hierarchy tree is composed of PHB PEs. The function is used + * to retrieve the corresponding PHB PE according to the given PHB. + */ +static struct eeh_pe *eeh_phb_pe_get(struct pci_controller *phb) +{ + struct eeh_pe *pe; + + eeh_lock(); + + list_for_each_entry(pe, &eeh_phb_pe, child) { + /* + * Actually, we needn't check the type since + * the PE for PHB has been determined when that + * was created. + */ + if (pe->type == EEH_PE_PHB && + pe->phb == phb) { + eeh_unlock(); + return pe; + } + } + + eeh_unlock(); + + return NULL; +} -- cgit v0.10.2 From 22f4ab123f10e1862579785c73d9e60fa24afd4f Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Fri, 7 Sep 2012 22:44:08 +0000 Subject: powerpc/eeh: Search PE based on requirement The patch implements searching PE based on the following requirements: * Search PE according to PE address, which is traditional PE address that is composed of PCI bus/device/function number, or unified PE address assigned by firmware or platform. * Search parent PE according to the given EEH device. It's useful when creating new PE and put it into right position. Signed-off-by: Gavin Shan Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index 7b9c7d6..1cc1388 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -164,6 +164,7 @@ static inline void eeh_unlock(void) */ #define EEH_MAX_ALLOWED_FREEZES 5 +typedef void *(*eeh_traverse_func)(void *data, void *flag); int __devinit eeh_phb_pe_create(struct pci_controller *phb); void * __devinit eeh_dev_init(struct device_node *dn, void *data); diff --git a/arch/powerpc/platforms/pseries/eeh_pe.c b/arch/powerpc/platforms/pseries/eeh_pe.c index 7dee7dc..f74b350 100644 --- a/arch/powerpc/platforms/pseries/eeh_pe.c +++ b/arch/powerpc/platforms/pseries/eeh_pe.c @@ -118,3 +118,146 @@ static struct eeh_pe *eeh_phb_pe_get(struct pci_controller *phb) return NULL; } + +/** + * eeh_pe_next - Retrieve the next PE in the tree + * @pe: current PE + * @root: root PE + * + * The function is used to retrieve the next PE in the + * hierarchy PE tree. + */ +static struct eeh_pe *eeh_pe_next(struct eeh_pe *pe, + struct eeh_pe *root) +{ + struct list_head *next = pe->child_list.next; + + if (next == &pe->child_list) { + while (1) { + if (pe == root) + return NULL; + next = pe->child.next; + if (next != &pe->parent->child_list) + break; + pe = pe->parent; + } + } + + return list_entry(next, struct eeh_pe, child); +} + +/** + * eeh_pe_traverse - Traverse PEs in the specified PHB + * @root: root PE + * @fn: callback + * @flag: extra parameter to callback + * + * The function is used to traverse the specified PE and its + * child PEs. The traversing is to be terminated once the + * callback returns something other than NULL, or no more PEs + * to be traversed. + */ +static void *eeh_pe_traverse(struct eeh_pe *root, + eeh_traverse_func fn, void *flag) +{ + struct eeh_pe *pe; + void *ret; + + for (pe = root; pe; pe = eeh_pe_next(pe, root)) { + ret = fn(pe, flag); + if (ret) return ret; + } + + return NULL; +} + +/** + * __eeh_pe_get - Check the PE address + * @data: EEH PE + * @flag: EEH device + * + * For one particular PE, it can be identified by PE address + * or tranditional BDF address. BDF address is composed of + * Bus/Device/Function number. The extra data referred by flag + * indicates which type of address should be used. + */ +static void *__eeh_pe_get(void *data, void *flag) +{ + struct eeh_pe *pe = (struct eeh_pe *)data; + struct eeh_dev *edev = (struct eeh_dev *)flag; + + /* Unexpected PHB PE */ + if (pe->type == EEH_PE_PHB) + return NULL; + + /* We prefer PE address */ + if (edev->pe_config_addr && + (edev->pe_config_addr == pe->addr)) + return pe; + + /* Try BDF address */ + if (edev->pe_config_addr && + (edev->config_addr == pe->config_addr)) + return pe; + + return NULL; +} + +/** + * eeh_pe_get - Search PE based on the given address + * @edev: EEH device + * + * Search the corresponding PE based on the specified address which + * is included in the eeh device. The function is used to check if + * the associated PE has been created against the PE address. It's + * notable that the PE address has 2 format: traditional PE address + * which is composed of PCI bus/device/function number, or unified + * PE address. + */ +static struct eeh_pe *eeh_pe_get(struct eeh_dev *edev) +{ + struct eeh_pe *root = eeh_phb_pe_get(edev->phb); + struct eeh_pe *pe; + + eeh_lock(); + pe = eeh_pe_traverse(root, __eeh_pe_get, edev); + eeh_unlock(); + + return pe; +} + +/** + * eeh_pe_get_parent - Retrieve the parent PE + * @edev: EEH device + * + * The whole PEs existing in the system are organized as hierarchy + * tree. The function is used to retrieve the parent PE according + * to the parent EEH device. + */ +static struct eeh_pe *eeh_pe_get_parent(struct eeh_dev *edev) +{ + struct device_node *dn; + struct eeh_dev *parent; + + /* + * It might have the case for the indirect parent + * EEH device already having associated PE, but + * the direct parent EEH device doesn't have yet. + */ + dn = edev->dn->parent; + while (dn) { + /* We're poking out of PCI territory */ + if (!PCI_DN(dn)) return NULL; + + parent = of_node_to_eeh_dev(dn); + /* We're poking out of PCI territory */ + if (!parent) return NULL; + + if (parent->pe) + return parent->pe; + + dn = dn->parent; + } + + return NULL; +} -- cgit v0.10.2 From 9b84348c92a122f3ccecda04083ada620312aa53 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Fri, 7 Sep 2012 22:44:09 +0000 Subject: powerpc/eeh: Create PEs duing EEH initialization The patch creates PEs and associated the newly created PEs with it parent/silbing as well as EEH devices. It would become more straight to trace EEH errors and recover them accordingly. Once the EEH functionality on one PCI IOA has been enabled, we tries to create PE against it. If there's existing PE, to which the current PCI IOA should be attached, the existing PE will be converted from "device" type to "bus" type accordingly. Signed-off-by: Gavin Shan Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index 1cc1388..6b13790 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -166,6 +166,7 @@ static inline void eeh_unlock(void) typedef void *(*eeh_traverse_func)(void *data, void *flag); int __devinit eeh_phb_pe_create(struct pci_controller *phb); +int eeh_add_to_parent_pe(struct eeh_dev *edev); void * __devinit eeh_dev_init(struct device_node *dn, void *data); void __devinit eeh_dev_phb_init_dynamic(struct pci_controller *phb); diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c index 0ba7e3b..8f214906 100644 --- a/arch/powerpc/platforms/pseries/eeh.c +++ b/arch/powerpc/platforms/pseries/eeh.c @@ -895,6 +895,8 @@ static void *eeh_early_enable(struct device_node *dn, void *data) eeh_subsystem_enabled = 1; edev->mode |= EEH_MODE_SUPPORTED; + eeh_add_to_parent_pe(edev); + pr_debug("EEH: %s: eeh enabled, config=%x pe_config=%x\n", dn->full_name, edev->config_addr, edev->pe_config_addr); @@ -908,6 +910,10 @@ static void *eeh_early_enable(struct device_node *dn, void *data) /* Parent supports EEH. */ edev->mode |= EEH_MODE_SUPPORTED; edev->config_addr = of_node_to_eeh_dev(dn->parent)->config_addr; + edev->pe_config_addr = of_node_to_eeh_dev(dn->parent)->pe_config_addr; + + eeh_add_to_parent_pe(edev); + return NULL; } } diff --git a/arch/powerpc/platforms/pseries/eeh_pe.c b/arch/powerpc/platforms/pseries/eeh_pe.c index f74b350..1d63273 100644 --- a/arch/powerpc/platforms/pseries/eeh_pe.c +++ b/arch/powerpc/platforms/pseries/eeh_pe.c @@ -261,3 +261,83 @@ static struct eeh_pe *eeh_pe_get_parent(struct eeh_dev *edev) return NULL; } + +/** + * eeh_add_to_parent_pe - Add EEH device to parent PE + * @edev: EEH device + * + * Add EEH device to the parent PE. If the parent PE already + * exists, the PE type will be changed to EEH_PE_BUS. Otherwise, + * we have to create new PE to hold the EEH device and the new + * PE will be linked to its parent PE as well. + */ +int eeh_add_to_parent_pe(struct eeh_dev *edev) +{ + struct eeh_pe *pe, *parent; + + /* + * Search the PE has been existing or not according + * to the PE address. If that has been existing, the + * PE should be composed of PCI bus and its subordinate + * components. + */ + pe = eeh_pe_get(edev); + if (pe) { + if (!edev->pe_config_addr) { + pr_err("%s: PE with addr 0x%x already exists\n", + __func__, edev->config_addr); + return -EEXIST; + } + + /* Mark the PE as type of PCI bus */ + pe->type = EEH_PE_BUS; + edev->pe = pe; + + /* Put the edev to PE */ + list_add_tail(&edev->list, &pe->edevs); + pr_debug("EEH: Add %s to Bus PE#%x\n", + edev->dn->full_name, pe->addr); + + return 0; + } + + /* Create a new EEH PE */ + pe = eeh_pe_alloc(edev->phb, EEH_PE_DEVICE); + if (!pe) { + pr_err("%s: out of memory!\n", __func__); + return -ENOMEM; + } + pe->addr = edev->pe_config_addr; + pe->config_addr = edev->config_addr; + + /* + * Put the new EEH PE into hierarchy tree. If the parent + * can't be found, the newly created PE will be attached + * to PHB directly. Otherwise, we have to associate the + * PE with its parent. + */ + parent = eeh_pe_get_parent(edev); + if (!parent) { + parent = eeh_phb_pe_get(edev->phb); + if (!parent) { + pr_err("%s: No PHB PE is found (PHB Domain=%d)\n", + __func__, edev->phb->global_number); + edev->pe = NULL; + kfree(pe); + return -EEXIST; + } + } + pe->parent = parent; + + /* + * Put the newly created PE into the child list and + * link the EEH device accordingly. + */ + list_add_tail(&pe->child, &parent->child_list); + list_add_tail(&edev->list, &pe->edevs); + edev->pe = pe; + pr_debug("EEH: Add %s to Device PE#%x, Parent PE#%x\n", + edev->dn->full_name, pe->addr, pe->parent->addr); + + return 0; +} -- cgit v0.10.2 From 82e8882f7f430806bd6bfd203aa4c3f41e3812e9 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Fri, 7 Sep 2012 22:44:10 +0000 Subject: powerpc/eeh: Remove PE at appropriate time During PCI hotplug and EEH recovery, the PE hierarchy tree might be changed due to the PCI topology changes. At later point when the PCI device is added, the PE will be created dynamically again. The patch introduces new function to remove EEH devices from the associated PE. That also can cause that the parent PE is removed from the PE tree if the parent PE doesn't include valid EEH devices and child PEs. Signed-off-by: Gavin Shan Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index 6b13790..250ae27 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -167,6 +167,7 @@ static inline void eeh_unlock(void) typedef void *(*eeh_traverse_func)(void *data, void *flag); int __devinit eeh_phb_pe_create(struct pci_controller *phb); int eeh_add_to_parent_pe(struct eeh_dev *edev); +int eeh_rmv_from_parent_pe(struct eeh_dev *edev); void * __devinit eeh_dev_init(struct device_node *dn, void *data); void __devinit eeh_dev_phb_init_dynamic(struct pci_controller *phb); diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c index 8f214906..b60863b 100644 --- a/arch/powerpc/platforms/pseries/eeh.c +++ b/arch/powerpc/platforms/pseries/eeh.c @@ -1156,6 +1156,7 @@ static void eeh_remove_device(struct pci_dev *dev) dev->dev.archdata.edev = NULL; pci_dev_put(dev); + eeh_rmv_from_parent_pe(edev); pci_addr_cache_remove_device(dev); eeh_sysfs_remove_device(dev); } diff --git a/arch/powerpc/platforms/pseries/eeh_pe.c b/arch/powerpc/platforms/pseries/eeh_pe.c index 1d63273..e941e63 100644 --- a/arch/powerpc/platforms/pseries/eeh_pe.c +++ b/arch/powerpc/platforms/pseries/eeh_pe.c @@ -341,3 +341,50 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev) return 0; } + +/** + * eeh_rmv_from_parent_pe - Remove one EEH device from the associated PE + * @edev: EEH device + * + * The PE hierarchy tree might be changed when doing PCI hotplug. + * Also, the PCI devices or buses could be removed from the system + * during EEH recovery. So we have to call the function remove the + * corresponding PE accordingly if necessary. + */ +int eeh_rmv_from_parent_pe(struct eeh_dev *edev) +{ + struct eeh_pe *pe, *parent; + + if (!edev->pe) { + pr_warning("%s: No PE found for EEH device %s\n", + __func__, edev->dn->full_name); + return -EEXIST; + } + + /* Remove the EEH device */ + pe = edev->pe; + edev->pe = NULL; + list_del(&edev->list); + + /* + * Check if the parent PE includes any EEH devices. + * If not, we should delete that. Also, we should + * delete the parent PE if it doesn't have associated + * child PEs and EEH devices. + */ + while (1) { + parent = pe->parent; + if (pe->type == EEH_PE_PHB) + break; + + if (list_empty(&pe->edevs) && + list_empty(&pe->child_list)) { + list_del(&pe->child); + kfree(pe); + } + + pe = parent; + } + + return 0; +} -- cgit v0.10.2 From c533b46cc7da58e5ccbd20ac2f607ff60d24eb4e Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Fri, 7 Sep 2012 22:44:11 +0000 Subject: powerpc/eeh: Build EEH event based on PE The original implementation builds EEH event based on EEH device. We already had dedicated struct to depict PE. It's reasonable to build EEH event based on PE. Signed-off-by: Gavin Shan Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/include/asm/eeh_event.h b/arch/powerpc/include/asm/eeh_event.h index c68b012..dc722b5 100644 --- a/arch/powerpc/include/asm/eeh_event.h +++ b/arch/powerpc/include/asm/eeh_event.h @@ -28,10 +28,10 @@ */ struct eeh_event { struct list_head list; /* to form event queue */ - struct eeh_dev *edev; /* EEH device */ + struct eeh_pe *pe; /* EEH PE */ }; -int eeh_send_failure_event(struct eeh_dev *edev); +int eeh_send_failure_event(struct eeh_pe *pe); struct eeh_dev *handle_eeh_events(struct eeh_event *); #endif /* __KERNEL__ */ diff --git a/arch/powerpc/platforms/pseries/eeh_event.c b/arch/powerpc/platforms/pseries/eeh_event.c index 6132772..7f89f1e 100644 --- a/arch/powerpc/platforms/pseries/eeh_event.c +++ b/arch/powerpc/platforms/pseries/eeh_event.c @@ -119,36 +119,23 @@ static void eeh_thread_launcher(struct work_struct *dummy) /** * eeh_send_failure_event - Generate a PCI error event - * @edev: EEH device + * @pe: EEH PE * * This routine can be called within an interrupt context; * the actual event will be delivered in a normal context * (from a workqueue). */ -int eeh_send_failure_event(struct eeh_dev *edev) +int eeh_send_failure_event(struct eeh_pe *pe) { unsigned long flags; struct eeh_event *event; - struct device_node *dn = eeh_dev_to_of_node(edev); - const char *location; - - if (!mem_init_done) { - printk(KERN_ERR "EEH: event during early boot not handled\n"); - location = of_get_property(dn, "ibm,loc-code", NULL); - printk(KERN_ERR "EEH: device node = %s\n", dn->full_name); - printk(KERN_ERR "EEH: PCI location = %s\n", location); - return 1; - } - event = kzalloc(sizeof(*event), GFP_ATOMIC); - if (event == NULL) { - printk(KERN_ERR "EEH: out of memory, event not handled\n"); - return 1; - } - - if (edev->pdev) - pci_dev_get(edev->pdev); - event->edev = edev; + event = kzalloc(sizeof(*event), GFP_ATOMIC); + if (!event) { + pr_err("EEH: out of memory, event not handled\n"); + return -ENOMEM; + } + event->pe = pe; /* We may or may not be called in an interrupt context */ spin_lock_irqsave(&eeh_eventlist_lock, flags); -- cgit v0.10.2 From 5b6635294418a954b5f911375b86d3552cf93f36 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Fri, 7 Sep 2012 22:44:12 +0000 Subject: powerpc/eeh: Trace EEH state based on PE Since we've introduced dedicated struct to trace individual PEs, it's reasonable to trace its state through the dedicated struct instead of using "eeh_dev" any more. The patches implements the state tracing based on PE. It's notable that the PE state will be applied to the specified PE as well as its child PEs. That complies with the rule that problematic parent PE will prevent those child PEs from working properly. Signed-off-by: Gavin Shan Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index 250ae27..f86a85f 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -67,6 +67,9 @@ struct eeh_pe { struct list_head child; /* Child PEs */ }; +#define eeh_pe_for_each_dev(pe, edev) \ + list_for_each_entry(edev, &pe->edevs, list) + /* * The struct is used to trace EEH state for the associated * PCI device node or PCI device. In future, it might diff --git a/arch/powerpc/include/asm/ppc-pci.h b/arch/powerpc/include/asm/ppc-pci.h index 80fa704..c7e5bd6 100644 --- a/arch/powerpc/include/asm/ppc-pci.h +++ b/arch/powerpc/include/asm/ppc-pci.h @@ -57,8 +57,8 @@ int eeh_reset_pe(struct eeh_dev *); void eeh_restore_bars(struct eeh_dev *); int rtas_write_config(struct pci_dn *, int where, int size, u32 val); int rtas_read_config(struct pci_dn *, int where, int size, u32 *val); -void eeh_mark_slot(struct device_node *dn, int mode_flag); -void eeh_clear_slot(struct device_node *dn, int mode_flag); +void eeh_pe_state_mark(struct eeh_pe *pe, int state); +void eeh_pe_state_clear(struct eeh_pe *pe, int state); struct device_node *eeh_find_device_pe(struct device_node *dn); void eeh_sysfs_add_device(struct pci_dev *pdev); diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c index b60863b..9c623c2 100644 --- a/arch/powerpc/platforms/pseries/eeh.c +++ b/arch/powerpc/platforms/pseries/eeh.c @@ -279,108 +279,6 @@ struct device_node *eeh_find_device_pe(struct device_node *dn) } /** - * __eeh_mark_slot - Mark all child devices as failed - * @parent: parent device - * @mode_flag: failure flag - * - * Mark all devices that are children of this device as failed. - * Mark the device driver too, so that it can see the failure - * immediately; this is critical, since some drivers poll - * status registers in interrupts ... If a driver is polling, - * and the slot is frozen, then the driver can deadlock in - * an interrupt context, which is bad. - */ -static void __eeh_mark_slot(struct device_node *parent, int mode_flag) -{ - struct device_node *dn; - - for_each_child_of_node(parent, dn) { - if (of_node_to_eeh_dev(dn)) { - /* Mark the pci device driver too */ - struct pci_dev *dev = of_node_to_eeh_dev(dn)->pdev; - - of_node_to_eeh_dev(dn)->mode |= mode_flag; - - if (dev && dev->driver) - dev->error_state = pci_channel_io_frozen; - - __eeh_mark_slot(dn, mode_flag); - } - } -} - -/** - * eeh_mark_slot - Mark the indicated device and its children as failed - * @dn: parent device - * @mode_flag: failure flag - * - * Mark the indicated device and its child devices as failed. - * The device drivers are marked as failed as well. - */ -void eeh_mark_slot(struct device_node *dn, int mode_flag) -{ - struct pci_dev *dev; - dn = eeh_find_device_pe(dn); - - /* Back up one, since config addrs might be shared */ - if (!pcibios_find_pci_bus(dn) && of_node_to_eeh_dev(dn->parent)) - dn = dn->parent; - - of_node_to_eeh_dev(dn)->mode |= mode_flag; - - /* Mark the pci device too */ - dev = of_node_to_eeh_dev(dn)->pdev; - if (dev) - dev->error_state = pci_channel_io_frozen; - - __eeh_mark_slot(dn, mode_flag); -} - -/** - * __eeh_clear_slot - Clear failure flag for the child devices - * @parent: parent device - * @mode_flag: flag to be cleared - * - * Clear failure flag for the child devices. - */ -static void __eeh_clear_slot(struct device_node *parent, int mode_flag) -{ - struct device_node *dn; - - for_each_child_of_node(parent, dn) { - if (of_node_to_eeh_dev(dn)) { - of_node_to_eeh_dev(dn)->mode &= ~mode_flag; - of_node_to_eeh_dev(dn)->check_count = 0; - __eeh_clear_slot(dn, mode_flag); - } - } -} - -/** - * eeh_clear_slot - Clear failure flag for the indicated device and its children - * @dn: parent device - * @mode_flag: flag to be cleared - * - * Clear failure flag for the indicated device and its children. - */ -void eeh_clear_slot(struct device_node *dn, int mode_flag) -{ - unsigned long flags; - raw_spin_lock_irqsave(&confirm_error_lock, flags); - - dn = eeh_find_device_pe(dn); - - /* Back up one, since config addrs might be shared */ - if (!pcibios_find_pci_bus(dn) && of_node_to_eeh_dev(dn->parent)) - dn = dn->parent; - - of_node_to_eeh_dev(dn)->mode &= ~mode_flag; - of_node_to_eeh_dev(dn)->check_count = 0; - __eeh_clear_slot(dn, mode_flag); - raw_spin_unlock_irqrestore(&confirm_error_lock, flags); -} - -/** * eeh_dn_check_failure - Check if all 1's data is due to EEH slot freeze * @dn: device node * @dev: pci device, if known diff --git a/arch/powerpc/platforms/pseries/eeh_pe.c b/arch/powerpc/platforms/pseries/eeh_pe.c index e941e63..0dac1b6 100644 --- a/arch/powerpc/platforms/pseries/eeh_pe.c +++ b/arch/powerpc/platforms/pseries/eeh_pe.c @@ -388,3 +388,82 @@ int eeh_rmv_from_parent_pe(struct eeh_dev *edev) return 0; } + +/** + * __eeh_pe_state_mark - Mark the state for the PE + * @data: EEH PE + * @flag: state + * + * The function is used to mark the indicated state for the given + * PE. Also, the associated PCI devices will be put into IO frozen + * state as well. + */ +static void *__eeh_pe_state_mark(void *data, void *flag) +{ + struct eeh_pe *pe = (struct eeh_pe *)data; + int state = *((int *)flag); + struct eeh_dev *tmp; + struct pci_dev *pdev; + + /* + * Mark the PE with the indicated state. Also, + * the associated PCI device will be put into + * I/O frozen state to avoid I/O accesses from + * the PCI device driver. + */ + pe->state |= state; + eeh_pe_for_each_dev(pe, tmp) { + pdev = eeh_dev_to_pci_dev(tmp); + if (pdev) + pdev->error_state = pci_channel_io_frozen; + } + + return NULL; +} + +/** + * eeh_pe_state_mark - Mark specified state for PE and its associated device + * @pe: EEH PE + * + * EEH error affects the current PE and its child PEs. The function + * is used to mark appropriate state for the affected PEs and the + * associated devices. + */ +void eeh_pe_state_mark(struct eeh_pe *pe, int state) +{ + eeh_pe_traverse(pe, __eeh_pe_state_mark, &state); +} + +/** + * __eeh_pe_state_clear - Clear state for the PE + * @data: EEH PE + * @flag: state + * + * The function is used to clear the indicated state from the + * given PE. Besides, we also clear the check count of the PE + * as well. + */ +static void *__eeh_pe_state_clear(void *data, void *flag) +{ + struct eeh_pe *pe = (struct eeh_pe *)data; + int state = *((int *)flag); + + pe->state &= ~state; + pe->check_count = 0; + + return NULL; +} + +/** + * eeh_pe_state_clear - Clear state for the PE and its children + * @pe: PE + * @state: state to be cleared + * + * When the PE and its children has been recovered from error, + * we need clear the error state for that. The function is used + * for the purpose. + */ +void eeh_pe_state_clear(struct eeh_pe *pe, int state) +{ + eeh_pe_traverse(pe, __eeh_pe_state_clear, &state); +} -- cgit v0.10.2 From 66523d9f2d799de901a5ae7bbed6c3f663fb0b00 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Fri, 7 Sep 2012 22:44:13 +0000 Subject: powerpc/eeh: Trace error based on PE from beginning There're 2 conditions to trigger EEH error detection: invalid value returned from reading I/O or config space. On each case, the function eeh_dn_check_failure will be called to initialize EEH event and put it into the poll for further processing. The patch changes the function for a little bit so that the EEH error will be traced based on PE instead of EEH device any more. Also, the function eeh_find_device_pe() has been removed since the eeh device is tracing the PE by struct eeh_dev::pe. Signed-off-by: Gavin Shan Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/include/asm/ppc-pci.h b/arch/powerpc/include/asm/ppc-pci.h index c7e5bd6..3e301b1 100644 --- a/arch/powerpc/include/asm/ppc-pci.h +++ b/arch/powerpc/include/asm/ppc-pci.h @@ -59,7 +59,6 @@ int rtas_write_config(struct pci_dn *, int where, int size, u32 val); int rtas_read_config(struct pci_dn *, int where, int size, u32 *val); void eeh_pe_state_mark(struct eeh_pe *pe, int state); void eeh_pe_state_clear(struct eeh_pe *pe, int state); -struct device_node *eeh_find_device_pe(struct device_node *dn); void eeh_sysfs_add_device(struct pci_dev *pdev); void eeh_sysfs_remove_device(struct pci_dev *pdev); diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c index 9c623c2..f210160 100644 --- a/arch/powerpc/platforms/pseries/eeh.c +++ b/arch/powerpc/platforms/pseries/eeh.c @@ -264,21 +264,6 @@ static inline unsigned long eeh_token_to_phys(unsigned long token) } /** - * eeh_find_device_pe - Retrieve the PE for the given device - * @dn: device node - * - * Return the PE under which this device lies - */ -struct device_node *eeh_find_device_pe(struct device_node *dn) -{ - while (dn->parent && of_node_to_eeh_dev(dn->parent) && - (of_node_to_eeh_dev(dn->parent)->mode & EEH_MODE_SUPPORTED)) { - dn = dn->parent; - } - return dn; -} - -/** * eeh_dn_check_failure - Check if all 1's data is due to EEH slot freeze * @dn: device node * @dev: pci device, if known @@ -297,6 +282,7 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev) { int ret; unsigned long flags; + struct eeh_pe *pe; struct eeh_dev *edev; int rc = 0; const char *location; @@ -306,23 +292,26 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev) if (!eeh_subsystem_enabled) return 0; - if (!dn) { + if (dn) { + edev = of_node_to_eeh_dev(dn); + } else if (dev) { + edev = pci_dev_to_eeh_dev(dev); + dn = pci_device_to_OF_node(dev); + } else { eeh_stats.no_dn++; return 0; } - dn = eeh_find_device_pe(dn); - edev = of_node_to_eeh_dev(dn); + pe = edev->pe; /* Access to IO BARs might get this far and still not want checking. */ - if (!(edev->mode & EEH_MODE_SUPPORTED) || - edev->mode & EEH_MODE_NOCHECK) { + if (!pe) { eeh_stats.ignored_check++; - pr_debug("EEH: Ignored check (%x) for %s %s\n", - edev->mode, eeh_pci_name(dev), dn->full_name); + pr_debug("EEH: Ignored check for %s %s\n", + eeh_pci_name(dev), dn->full_name); return 0; } - if (!edev->config_addr && !edev->pe_config_addr) { + if (!pe->addr && !pe->config_addr) { eeh_stats.no_cfg_addr++; return 0; } @@ -335,13 +324,13 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev) */ raw_spin_lock_irqsave(&confirm_error_lock, flags); rc = 1; - if (edev->mode & EEH_MODE_ISOLATED) { - edev->check_count++; - if (edev->check_count % EEH_MAX_FAILS == 0) { + if (pe->state & EEH_PE_ISOLATED) { + pe->check_count++; + if (pe->check_count % EEH_MAX_FAILS == 0) { location = of_get_property(dn, "ibm,loc-code", NULL); printk(KERN_ERR "EEH: %d reads ignored for recovering device at " "location=%s driver=%s pci addr=%s\n", - edev->check_count, location, + pe->check_count, location, eeh_driver_name(dev), eeh_pci_name(dev)); printk(KERN_ERR "EEH: Might be infinite loop in %s driver\n", eeh_driver_name(dev)); @@ -357,7 +346,7 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev) * function zero of a multi-function device. * In any case they must share a common PHB. */ - ret = eeh_ops->get_state(dn, NULL); + ret = eeh_ops->get_state(pe, NULL); /* Note that config-io to empty slots may fail; * they are empty when they don't have children. @@ -370,7 +359,7 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev) (ret & (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) == (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) { eeh_stats.false_positives++; - edev->false_positives ++; + pe->false_positives++; rc = 0; goto dn_unlock; } @@ -381,10 +370,10 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev) * with other functions on this device, and functions under * bridges. */ - eeh_mark_slot(dn, EEH_MODE_ISOLATED); + eeh_pe_state_mark(pe, EEH_PE_ISOLATED); raw_spin_unlock_irqrestore(&confirm_error_lock, flags); - eeh_send_failure_event(edev); + eeh_send_failure_event(pe); /* Most EEH events are due to device driver bugs. Having * a stack trace will help the device-driver authors figure diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c index 8bc89e4..d19f497 100644 --- a/arch/powerpc/platforms/pseries/msi.c +++ b/arch/powerpc/platforms/pseries/msi.c @@ -210,6 +210,7 @@ static struct device_node *find_pe_total_msi(struct pci_dev *dev, int *total) static struct device_node *find_pe_dn(struct pci_dev *dev, int *total) { struct device_node *dn; + struct eeh_dev *edev; /* Found our PE and assume 8 at that point. */ @@ -217,7 +218,10 @@ static struct device_node *find_pe_dn(struct pci_dev *dev, int *total) if (!dn) return NULL; - dn = eeh_find_device_pe(dn); + /* Get the top level device in the PE */ + edev = of_node_to_eeh_dev(dn); + edev = list_first_entry(&edev->pe->edevs, struct eeh_dev, list); + dn = eeh_dev_to_of_node(edev); if (!dn) return NULL; -- cgit v0.10.2 From 371a395d2f1fe296c89735547672d70f4dcc2949 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Fri, 7 Sep 2012 22:44:14 +0000 Subject: powerpc/eeh: Make EEH operations based on PE Originally, all the EEH operations were implemented based on OF node. Actually, it explicitly breaks the rules that the operation target is PE instead of device. Therefore, the patch makes all the operations based on PE instead of device. Unfortunately, the backend for config space has to be kept as original because it doesn't depend on PE. Signed-off-by: Gavin Shan Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index f86a85f..5e45a1c 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -136,13 +136,13 @@ static inline struct pci_dev *eeh_dev_to_pci_dev(struct eeh_dev *edev) struct eeh_ops { char *name; int (*init)(void); - int (*set_option)(struct device_node *dn, int option); - int (*get_pe_addr)(struct device_node *dn); - int (*get_state)(struct device_node *dn, int *state); - int (*reset)(struct device_node *dn, int option); - int (*wait_state)(struct device_node *dn, int max_wait); - int (*get_log)(struct device_node *dn, int severity, char *drv_log, unsigned long len); - int (*configure_bridge)(struct device_node *dn); + int (*set_option)(struct eeh_pe *pe, int option); + int (*get_pe_addr)(struct eeh_pe *pe); + int (*get_state)(struct eeh_pe *pe, int *state); + int (*reset)(struct eeh_pe *pe, int option); + int (*wait_state)(struct eeh_pe *pe, int max_wait); + int (*get_log)(struct eeh_pe *pe, int severity, char *drv_log, unsigned long len); + int (*configure_bridge)(struct eeh_pe *pe); int (*read_config)(struct device_node *dn, int where, int size, u32 *val); int (*write_config)(struct device_node *dn, int where, int size, u32 val); }; diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c index f210160..3c8658e 100644 --- a/arch/powerpc/platforms/pseries/eeh.c +++ b/arch/powerpc/platforms/pseries/eeh.c @@ -729,6 +729,7 @@ static void *eeh_early_enable(struct device_node *dn, void *data) const u32 *regs; int enable; struct eeh_dev *edev = of_node_to_eeh_dev(dn); + struct eeh_pe pe; edev->class_code = 0; edev->mode = 0; @@ -755,9 +756,14 @@ static void *eeh_early_enable(struct device_node *dn, void *data) */ regs = of_get_property(dn, "reg", NULL); if (regs) { + /* Initialize the fake PE */ + memset(&pe, 0, sizeof(struct eeh_pe)); + pe.phb = edev->phb; + pe.config_addr = regs[0]; + /* First register entry is addr (00BBSS00) */ /* Try to enable eeh */ - ret = eeh_ops->set_option(dn, EEH_OPT_ENABLE); + ret = eeh_ops->set_option(&pe, EEH_OPT_ENABLE); enable = 0; if (ret == 0) { @@ -766,14 +772,15 @@ static void *eeh_early_enable(struct device_node *dn, void *data) /* If the newer, better, ibm,get-config-addr-info is supported, * then use that instead. */ - edev->pe_config_addr = eeh_ops->get_pe_addr(dn); + edev->pe_config_addr = eeh_ops->get_pe_addr(&pe); + pe.addr = edev->pe_config_addr; /* Some older systems (Power4) allow the * ibm,set-eeh-option call to succeed even on nodes * where EEH is not supported. Verify support * explicitly. */ - ret = eeh_ops->get_state(dn, NULL); + ret = eeh_ops->get_state(&pe, NULL); if (ret > 0 && ret != EEH_STATE_NOT_SUPPORT) enable = 1; } diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c index cf6d6cc..fdeef77 100644 --- a/arch/powerpc/platforms/pseries/eeh_pseries.c +++ b/arch/powerpc/platforms/pseries/eeh_pseries.c @@ -134,22 +134,18 @@ static int pseries_eeh_init(void) /** * pseries_eeh_set_option - Initialize EEH or MMIO/DMA reenable - * @dn: device node + * @pe: EEH PE * @option: operation to be issued * * The function is used to control the EEH functionality globally. * Currently, following options are support according to PAPR: * Enable EEH, Disable EEH, Enable MMIO and Enable DMA */ -static int pseries_eeh_set_option(struct device_node *dn, int option) +static int pseries_eeh_set_option(struct eeh_pe *pe, int option) { int ret = 0; - struct eeh_dev *edev; - const u32 *reg; int config_addr; - edev = of_node_to_eeh_dev(dn); - /* * When we're enabling or disabling EEH functioality on * the particular PE, the PE config address is possibly @@ -159,15 +155,11 @@ static int pseries_eeh_set_option(struct device_node *dn, int option) switch (option) { case EEH_OPT_DISABLE: case EEH_OPT_ENABLE: - reg = of_get_property(dn, "reg", NULL); - config_addr = reg[0]; - break; - case EEH_OPT_THAW_MMIO: case EEH_OPT_THAW_DMA: - config_addr = edev->config_addr; - if (edev->pe_config_addr) - config_addr = edev->pe_config_addr; + config_addr = pe->config_addr; + if (pe->addr) + config_addr = pe->addr; break; default: @@ -177,15 +169,15 @@ static int pseries_eeh_set_option(struct device_node *dn, int option) } ret = rtas_call(ibm_set_eeh_option, 4, 1, NULL, - config_addr, BUID_HI(edev->phb->buid), - BUID_LO(edev->phb->buid), option); + config_addr, BUID_HI(pe->phb->buid), + BUID_LO(pe->phb->buid), option); return ret; } /** * pseries_eeh_get_pe_addr - Retrieve PE address - * @dn: device node + * @pe: EEH PE * * Retrieve the assocated PE address. Actually, there're 2 RTAS * function calls dedicated for the purpose. We need implement @@ -196,14 +188,11 @@ static int pseries_eeh_set_option(struct device_node *dn, int option) * It's notable that zero'ed return value means invalid PE config * address. */ -static int pseries_eeh_get_pe_addr(struct device_node *dn) +static int pseries_eeh_get_pe_addr(struct eeh_pe *pe) { - struct eeh_dev *edev; int ret = 0; int rets[3]; - edev = of_node_to_eeh_dev(dn); - if (ibm_get_config_addr_info2 != RTAS_UNKNOWN_SERVICE) { /* * First of all, we need to make sure there has one PE @@ -211,18 +200,18 @@ static int pseries_eeh_get_pe_addr(struct device_node *dn) * meaningless. */ ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets, - edev->config_addr, BUID_HI(edev->phb->buid), - BUID_LO(edev->phb->buid), 1); + pe->config_addr, BUID_HI(pe->phb->buid), + BUID_LO(pe->phb->buid), 1); if (ret || (rets[0] == 0)) return 0; /* Retrieve the associated PE config address */ ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets, - edev->config_addr, BUID_HI(edev->phb->buid), - BUID_LO(edev->phb->buid), 0); + pe->config_addr, BUID_HI(pe->phb->buid), + BUID_LO(pe->phb->buid), 0); if (ret) { - pr_warning("%s: Failed to get PE address for %s\n", - __func__, dn->full_name); + pr_warning("%s: Failed to get address for PHB#%d-PE#%x\n", + __func__, pe->phb->global_number, pe->config_addr); return 0; } @@ -231,11 +220,11 @@ static int pseries_eeh_get_pe_addr(struct device_node *dn) if (ibm_get_config_addr_info != RTAS_UNKNOWN_SERVICE) { ret = rtas_call(ibm_get_config_addr_info, 4, 2, rets, - edev->config_addr, BUID_HI(edev->phb->buid), - BUID_LO(edev->phb->buid), 0); + pe->config_addr, BUID_HI(pe->phb->buid), + BUID_LO(pe->phb->buid), 0); if (ret) { - pr_warning("%s: Failed to get PE address for %s\n", - __func__, dn->full_name); + pr_warning("%s: Failed to get address for PHB#%d-PE#%x\n", + __func__, pe->phb->global_number, pe->config_addr); return 0; } @@ -247,7 +236,7 @@ static int pseries_eeh_get_pe_addr(struct device_node *dn) /** * pseries_eeh_get_state - Retrieve PE state - * @dn: PE associated device node + * @pe: EEH PE * @state: return value * * Retrieve the state of the specified PE. On RTAS compliant @@ -258,30 +247,28 @@ static int pseries_eeh_get_pe_addr(struct device_node *dn) * RTAS calls for the purpose, we need to try the new one and back * to the old one if the new one couldn't work properly. */ -static int pseries_eeh_get_state(struct device_node *dn, int *state) +static int pseries_eeh_get_state(struct eeh_pe *pe, int *state) { - struct eeh_dev *edev; int config_addr; int ret; int rets[4]; int result; /* Figure out PE config address if possible */ - edev = of_node_to_eeh_dev(dn); - config_addr = edev->config_addr; - if (edev->pe_config_addr) - config_addr = edev->pe_config_addr; + config_addr = pe->config_addr; + if (pe->addr) + config_addr = pe->addr; if (ibm_read_slot_reset_state2 != RTAS_UNKNOWN_SERVICE) { ret = rtas_call(ibm_read_slot_reset_state2, 3, 4, rets, - config_addr, BUID_HI(edev->phb->buid), - BUID_LO(edev->phb->buid)); + config_addr, BUID_HI(pe->phb->buid), + BUID_LO(pe->phb->buid)); } else if (ibm_read_slot_reset_state != RTAS_UNKNOWN_SERVICE) { /* Fake PE unavailable info */ rets[2] = 0; ret = rtas_call(ibm_read_slot_reset_state, 3, 3, rets, - config_addr, BUID_HI(edev->phb->buid), - BUID_LO(edev->phb->buid)); + config_addr, BUID_HI(pe->phb->buid), + BUID_LO(pe->phb->buid)); } else { return EEH_STATE_NOT_SUPPORT; } @@ -333,34 +320,32 @@ static int pseries_eeh_get_state(struct device_node *dn, int *state) /** * pseries_eeh_reset - Reset the specified PE - * @dn: PE associated device node + * @pe: EEH PE * @option: reset option * * Reset the specified PE */ -static int pseries_eeh_reset(struct device_node *dn, int option) +static int pseries_eeh_reset(struct eeh_pe *pe, int option) { - struct eeh_dev *edev; int config_addr; int ret; /* Figure out PE address */ - edev = of_node_to_eeh_dev(dn); - config_addr = edev->config_addr; - if (edev->pe_config_addr) - config_addr = edev->pe_config_addr; + config_addr = pe->config_addr; + if (pe->addr) + config_addr = pe->addr; /* Reset PE through RTAS call */ ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL, - config_addr, BUID_HI(edev->phb->buid), - BUID_LO(edev->phb->buid), option); + config_addr, BUID_HI(pe->phb->buid), + BUID_LO(pe->phb->buid), option); /* If fundamental-reset not supported, try hot-reset */ if (option == EEH_RESET_FUNDAMENTAL && ret == -8) { ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL, - config_addr, BUID_HI(edev->phb->buid), - BUID_LO(edev->phb->buid), EEH_RESET_HOT); + config_addr, BUID_HI(pe->phb->buid), + BUID_LO(pe->phb->buid), EEH_RESET_HOT); } return ret; @@ -368,13 +353,13 @@ static int pseries_eeh_reset(struct device_node *dn, int option) /** * pseries_eeh_wait_state - Wait for PE state - * @dn: PE associated device node + * @pe: EEH PE * @max_wait: maximal period in microsecond * * Wait for the state of associated PE. It might take some time * to retrieve the PE's state. */ -static int pseries_eeh_wait_state(struct device_node *dn, int max_wait) +static int pseries_eeh_wait_state(struct eeh_pe *pe, int max_wait) { int ret; int mwait; @@ -391,7 +376,7 @@ static int pseries_eeh_wait_state(struct device_node *dn, int max_wait) #define EEH_STATE_MAX_WAIT_TIME (300 * 1000) while (1) { - ret = pseries_eeh_get_state(dn, &mwait); + ret = pseries_eeh_get_state(pe, &mwait); /* * If the PE's state is temporarily unavailable, @@ -426,7 +411,7 @@ static int pseries_eeh_wait_state(struct device_node *dn, int max_wait) /** * pseries_eeh_get_log - Retrieve error log - * @dn: device node + * @pe: EEH PE * @severity: temporary or permanent error log * @drv_log: driver log to be combined with retrieved error log * @len: length of driver log @@ -435,24 +420,22 @@ static int pseries_eeh_wait_state(struct device_node *dn, int max_wait) * Actually, the error will be retrieved through the dedicated * RTAS call. */ -static int pseries_eeh_get_log(struct device_node *dn, int severity, char *drv_log, unsigned long len) +static int pseries_eeh_get_log(struct eeh_pe *pe, int severity, char *drv_log, unsigned long len) { - struct eeh_dev *edev; int config_addr; unsigned long flags; int ret; - edev = of_node_to_eeh_dev(dn); spin_lock_irqsave(&slot_errbuf_lock, flags); memset(slot_errbuf, 0, eeh_error_buf_size); /* Figure out the PE address */ - config_addr = edev->config_addr; - if (edev->pe_config_addr) - config_addr = edev->pe_config_addr; + config_addr = pe->config_addr; + if (pe->addr) + config_addr = pe->addr; ret = rtas_call(ibm_slot_error_detail, 8, 1, NULL, config_addr, - BUID_HI(edev->phb->buid), BUID_LO(edev->phb->buid), + BUID_HI(pe->phb->buid), BUID_LO(pe->phb->buid), virt_to_phys(drv_log), len, virt_to_phys(slot_errbuf), eeh_error_buf_size, severity); @@ -465,40 +448,38 @@ static int pseries_eeh_get_log(struct device_node *dn, int severity, char *drv_l /** * pseries_eeh_configure_bridge - Configure PCI bridges in the indicated PE - * @dn: PE associated device node + * @pe: EEH PE * * The function will be called to reconfigure the bridges included * in the specified PE so that the mulfunctional PE would be recovered * again. */ -static int pseries_eeh_configure_bridge(struct device_node *dn) +static int pseries_eeh_configure_bridge(struct eeh_pe *pe) { - struct eeh_dev *edev; int config_addr; int ret; /* Figure out the PE address */ - edev = of_node_to_eeh_dev(dn); - config_addr = edev->config_addr; - if (edev->pe_config_addr) - config_addr = edev->pe_config_addr; + config_addr = pe->config_addr; + if (pe->addr) + config_addr = pe->addr; /* Use new configure-pe function, if supported */ if (ibm_configure_pe != RTAS_UNKNOWN_SERVICE) { ret = rtas_call(ibm_configure_pe, 3, 1, NULL, - config_addr, BUID_HI(edev->phb->buid), - BUID_LO(edev->phb->buid)); + config_addr, BUID_HI(pe->phb->buid), + BUID_LO(pe->phb->buid)); } else if (ibm_configure_bridge != RTAS_UNKNOWN_SERVICE) { ret = rtas_call(ibm_configure_bridge, 3, 1, NULL, - config_addr, BUID_HI(edev->phb->buid), - BUID_LO(edev->phb->buid)); + config_addr, BUID_HI(pe->phb->buid), + BUID_LO(pe->phb->buid)); } else { return -EFAULT; } if (ret) - pr_warning("%s: Unable to configure bridge %d for %s\n", - __func__, ret, dn->full_name); + pr_warning("%s: Unable to configure bridge PHB#%d-PE#%x (%d)\n", + __func__, pe->phb->global_number, pe->addr, ret); return ret; } -- cgit v0.10.2 From 9e6d2cf65e3dbaf783917c92c15d31d419b0d648 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Fri, 7 Sep 2012 22:44:15 +0000 Subject: powerpc/eeh: Device bars restore based on PE The patch introduces the function to traverse the devices of the specified PE and its child PEs. Also, the restore on device bars is implemented based on the traverse function. Signed-off-by: Gavin Shan Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index 5e45a1c..629fb27 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -171,6 +171,9 @@ typedef void *(*eeh_traverse_func)(void *data, void *flag); int __devinit eeh_phb_pe_create(struct pci_controller *phb); int eeh_add_to_parent_pe(struct eeh_dev *edev); int eeh_rmv_from_parent_pe(struct eeh_dev *edev); +void *eeh_pe_dev_traverse(struct eeh_pe *root, + eeh_traverse_func fn, void *flag); +void eeh_pe_restore_bars(struct eeh_pe *pe); void * __devinit eeh_dev_init(struct device_node *dn, void *data); void __devinit eeh_dev_phb_init_dynamic(struct pci_controller *phb); diff --git a/arch/powerpc/include/asm/ppc-pci.h b/arch/powerpc/include/asm/ppc-pci.h index 3e301b1..5cbe3f2 100644 --- a/arch/powerpc/include/asm/ppc-pci.h +++ b/arch/powerpc/include/asm/ppc-pci.h @@ -54,7 +54,6 @@ struct pci_dev *pci_addr_cache_get_device(unsigned long addr); void eeh_slot_error_detail(struct eeh_dev *edev, int severity); int eeh_pci_enable(struct eeh_dev *edev, int function); int eeh_reset_pe(struct eeh_dev *); -void eeh_restore_bars(struct eeh_dev *); int rtas_write_config(struct pci_dn *, int where, int size, u32 val); int rtas_read_config(struct pci_dn *, int where, int size, u32 *val); void eeh_pe_state_mark(struct eeh_pe *pe, int state); diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c index 3c8658e..b5fcecb 100644 --- a/arch/powerpc/platforms/pseries/eeh.c +++ b/arch/powerpc/platforms/pseries/eeh.c @@ -610,85 +610,6 @@ int eeh_reset_pe(struct eeh_dev *edev) return -1; } -/** Save and restore of PCI BARs - * - * Although firmware will set up BARs during boot, it doesn't - * set up device BAR's after a device reset, although it will, - * if requested, set up bridge configuration. Thus, we need to - * configure the PCI devices ourselves. - */ - -/** - * eeh_restore_one_device_bars - Restore the Base Address Registers for one device - * @edev: PCI device associated EEH device - * - * Loads the PCI configuration space base address registers, - * the expansion ROM base address, the latency timer, and etc. - * from the saved values in the device node. - */ -static inline void eeh_restore_one_device_bars(struct eeh_dev *edev) -{ - int i; - u32 cmd; - struct device_node *dn = eeh_dev_to_of_node(edev); - - if (!edev->phb) - return; - - for (i=4; i<10; i++) { - eeh_ops->write_config(dn, i*4, 4, edev->config_space[i]); - } - - /* 12 == Expansion ROM Address */ - eeh_ops->write_config(dn, 12*4, 4, edev->config_space[12]); - -#define BYTE_SWAP(OFF) (8*((OFF)/4)+3-(OFF)) -#define SAVED_BYTE(OFF) (((u8 *)(edev->config_space))[BYTE_SWAP(OFF)]) - - eeh_ops->write_config(dn, PCI_CACHE_LINE_SIZE, 1, - SAVED_BYTE(PCI_CACHE_LINE_SIZE)); - - eeh_ops->write_config(dn, PCI_LATENCY_TIMER, 1, - SAVED_BYTE(PCI_LATENCY_TIMER)); - - /* max latency, min grant, interrupt pin and line */ - eeh_ops->write_config(dn, 15*4, 4, edev->config_space[15]); - - /* Restore PERR & SERR bits, some devices require it, - * don't touch the other command bits - */ - eeh_ops->read_config(dn, PCI_COMMAND, 4, &cmd); - if (edev->config_space[1] & PCI_COMMAND_PARITY) - cmd |= PCI_COMMAND_PARITY; - else - cmd &= ~PCI_COMMAND_PARITY; - if (edev->config_space[1] & PCI_COMMAND_SERR) - cmd |= PCI_COMMAND_SERR; - else - cmd &= ~PCI_COMMAND_SERR; - eeh_ops->write_config(dn, PCI_COMMAND, 4, cmd); -} - -/** - * eeh_restore_bars - Restore the PCI config space info - * @edev: EEH device - * - * This routine performs a recursive walk to the children - * of this device as well. - */ -void eeh_restore_bars(struct eeh_dev *edev) -{ - struct device_node *dn; - if (!edev) - return; - - if ((edev->mode & EEH_MODE_SUPPORTED) && !IS_BRIDGE(edev->class_code)) - eeh_restore_one_device_bars(edev); - - for_each_child_of_node(eeh_dev_to_of_node(edev), dn) - eeh_restore_bars(of_node_to_eeh_dev(dn)); -} - /** * eeh_save_bars - Save device bars * @edev: PCI device associated EEH device diff --git a/arch/powerpc/platforms/pseries/eeh_pe.c b/arch/powerpc/platforms/pseries/eeh_pe.c index 0dac1b6..2b7e85b 100644 --- a/arch/powerpc/platforms/pseries/eeh_pe.c +++ b/arch/powerpc/platforms/pseries/eeh_pe.c @@ -172,6 +172,38 @@ static void *eeh_pe_traverse(struct eeh_pe *root, } /** + * eeh_pe_dev_traverse - Traverse the devices from the PE + * @root: EEH PE + * @fn: function callback + * @flag: extra parameter to callback + * + * The function is used to traverse the devices of the specified + * PE and its child PEs. + */ +void *eeh_pe_dev_traverse(struct eeh_pe *root, + eeh_traverse_func fn, void *flag) +{ + struct eeh_pe *pe; + struct eeh_dev *edev; + void *ret; + + if (!root) { + pr_warning("%s: Invalid PE %p\n", __func__, root); + return NULL; + } + + /* Traverse root PE */ + for (pe = root; pe; pe = eeh_pe_next(pe, root)) { + eeh_pe_for_each_dev(pe, edev) { + ret = fn(edev, flag); + if (ret) return ret; + } + } + + return NULL; +} + +/** * __eeh_pe_get - Check the PE address * @data: EEH PE * @flag: EEH device @@ -467,3 +499,65 @@ void eeh_pe_state_clear(struct eeh_pe *pe, int state) { eeh_pe_traverse(pe, __eeh_pe_state_clear, &state); } + +/** + * eeh_restore_one_device_bars - Restore the Base Address Registers for one device + * @data: EEH device + * @flag: Unused + * + * Loads the PCI configuration space base address registers, + * the expansion ROM base address, the latency timer, and etc. + * from the saved values in the device node. + */ +static void *eeh_restore_one_device_bars(void *data, void *flag) +{ + int i; + u32 cmd; + struct eeh_dev *edev = (struct eeh_dev *)data; + struct device_node *dn = eeh_dev_to_of_node(edev); + + for (i = 4; i < 10; i++) + eeh_ops->write_config(dn, i*4, 4, edev->config_space[i]); + /* 12 == Expansion ROM Address */ + eeh_ops->write_config(dn, 12*4, 4, edev->config_space[12]); + +#define BYTE_SWAP(OFF) (8*((OFF)/4)+3-(OFF)) +#define SAVED_BYTE(OFF) (((u8 *)(edev->config_space))[BYTE_SWAP(OFF)]) + + eeh_ops->write_config(dn, PCI_CACHE_LINE_SIZE, 1, + SAVED_BYTE(PCI_CACHE_LINE_SIZE)); + eeh_ops->write_config(dn, PCI_LATENCY_TIMER, 1, + SAVED_BYTE(PCI_LATENCY_TIMER)); + + /* max latency, min grant, interrupt pin and line */ + eeh_ops->write_config(dn, 15*4, 4, edev->config_space[15]); + + /* + * Restore PERR & SERR bits, some devices require it, + * don't touch the other command bits + */ + eeh_ops->read_config(dn, PCI_COMMAND, 4, &cmd); + if (edev->config_space[1] & PCI_COMMAND_PARITY) + cmd |= PCI_COMMAND_PARITY; + else + cmd &= ~PCI_COMMAND_PARITY; + if (edev->config_space[1] & PCI_COMMAND_SERR) + cmd |= PCI_COMMAND_SERR; + else + cmd &= ~PCI_COMMAND_SERR; + eeh_ops->write_config(dn, PCI_COMMAND, 4, cmd); + + return NULL; +} + +/** + * eeh_pe_restore_bars - Restore the PCI config space info + * @pe: EEH PE + * + * This routine performs a recursive walk to the children + * of this device as well. + */ +void eeh_pe_restore_bars(struct eeh_pe *pe) +{ + eeh_pe_dev_traverse(pe, eeh_restore_one_device_bars, NULL); +} -- cgit v0.10.2 From ff477966c626e440dd1737801ae4d52cf1f22bff Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Fri, 7 Sep 2012 22:44:16 +0000 Subject: powerpc/eeh: I/O enable and log retrival based on PE The patch refactors the original implementation in order to enable I/O and retrieve EEH log based on PE. Signed-off-by: Gavin Shan Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/include/asm/ppc-pci.h b/arch/powerpc/include/asm/ppc-pci.h index 5cbe3f2..5e34b10 100644 --- a/arch/powerpc/include/asm/ppc-pci.h +++ b/arch/powerpc/include/asm/ppc-pci.h @@ -51,8 +51,8 @@ void pci_addr_cache_build(void); void pci_addr_cache_insert_device(struct pci_dev *dev); void pci_addr_cache_remove_device(struct pci_dev *dev); struct pci_dev *pci_addr_cache_get_device(unsigned long addr); -void eeh_slot_error_detail(struct eeh_dev *edev, int severity); -int eeh_pci_enable(struct eeh_dev *edev, int function); +void eeh_slot_error_detail(struct eeh_pe *pe, int severity); +int eeh_pci_enable(struct eeh_pe *pe, int function); int eeh_reset_pe(struct eeh_dev *); int rtas_write_config(struct pci_dn *, int where, int size, u32 val); int rtas_read_config(struct pci_dn *, int where, int size, u32 *val); diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c index b5fcecb..4572361 100644 --- a/arch/powerpc/platforms/pseries/eeh.c +++ b/arch/powerpc/platforms/pseries/eeh.c @@ -207,22 +207,12 @@ static size_t eeh_gather_pci_data(struct eeh_dev *edev, char * buf, size_t len) } } - /* Gather status on devices under the bridge */ - if (dev->class >> 16 == PCI_BASE_CLASS_BRIDGE) { - struct device_node *child; - - for_each_child_of_node(dn, child) { - if (of_node_to_eeh_dev(child)) - n += eeh_gather_pci_data(of_node_to_eeh_dev(child), buf+n, len-n); - } - } - return n; } /** * eeh_slot_error_detail - Generate combined log including driver log and error log - * @edev: device to report error log for + * @pe: EEH PE * @severity: temporary or permanent error log * * This routine should be called to generate the combined log, which @@ -230,17 +220,22 @@ static size_t eeh_gather_pci_data(struct eeh_dev *edev, char * buf, size_t len) * out from the config space of the corresponding PCI device, while * the error log is fetched through platform dependent function call. */ -void eeh_slot_error_detail(struct eeh_dev *edev, int severity) +void eeh_slot_error_detail(struct eeh_pe *pe, int severity) { size_t loglen = 0; - pci_regs_buf[0] = 0; + struct eeh_dev *edev; - eeh_pci_enable(edev, EEH_OPT_THAW_MMIO); - eeh_ops->configure_bridge(eeh_dev_to_of_node(edev)); - eeh_restore_bars(edev); - loglen = eeh_gather_pci_data(edev, pci_regs_buf, EEH_PCI_REGS_LOG_LEN); + eeh_pci_enable(pe, EEH_OPT_THAW_MMIO); + eeh_ops->configure_bridge(pe); + eeh_pe_restore_bars(pe); - eeh_ops->get_log(eeh_dev_to_of_node(edev), severity, pci_regs_buf, loglen); + pci_regs_buf[0] = 0; + eeh_pe_for_each_dev(pe, edev) { + loglen += eeh_gather_pci_data(edev, pci_regs_buf, + EEH_PCI_REGS_LOG_LEN); + } + + eeh_ops->get_log(pe, severity, pci_regs_buf, loglen); } /** @@ -427,23 +422,22 @@ EXPORT_SYMBOL(eeh_check_failure); /** * eeh_pci_enable - Enable MMIO or DMA transfers for this slot - * @edev: pci device node + * @pe: EEH PE * * This routine should be called to reenable frozen MMIO or DMA * so that it would work correctly again. It's useful while doing * recovery or log collection on the indicated device. */ -int eeh_pci_enable(struct eeh_dev *edev, int function) +int eeh_pci_enable(struct eeh_pe *pe, int function) { int rc; - struct device_node *dn = eeh_dev_to_of_node(edev); - rc = eeh_ops->set_option(dn, function); + rc = eeh_ops->set_option(pe, function); if (rc) - printk(KERN_WARNING "EEH: Unexpected state change %d, err=%d dn=%s\n", - function, rc, dn->full_name); + pr_warning("%s: Unexpected state change %d on PHB#%d-PE#%x, err=%d\n", + __func__, function, pe->phb->global_number, pe->addr, rc); - rc = eeh_ops->wait_state(dn, PCI_BUS_RESET_WAIT_MSEC); + rc = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC); if (rc > 0 && (rc & EEH_STATE_MMIO_ENABLED) && (function == EEH_OPT_THAW_MMIO)) return 0; -- cgit v0.10.2 From c270a24c59bd9a4ac7dbcbd964cbd14270b3a361 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Fri, 7 Sep 2012 22:44:17 +0000 Subject: powerpc/eeh: Do reset based on PE The patch implements reset based on PE instead of eeh device. Also, The functions used to retrieve the reset type, either hot or fundamental reset, have been reworked for a little bit. More specificly, it's implemented based the the eeh device traverse function. Signed-off-by: Gavin Shan Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/include/asm/ppc-pci.h b/arch/powerpc/include/asm/ppc-pci.h index 5e34b10..2a80f08 100644 --- a/arch/powerpc/include/asm/ppc-pci.h +++ b/arch/powerpc/include/asm/ppc-pci.h @@ -53,7 +53,7 @@ void pci_addr_cache_remove_device(struct pci_dev *dev); struct pci_dev *pci_addr_cache_get_device(unsigned long addr); void eeh_slot_error_detail(struct eeh_pe *pe, int severity); int eeh_pci_enable(struct eeh_pe *pe, int function); -int eeh_reset_pe(struct eeh_dev *); +int eeh_reset_pe(struct eeh_pe *); int rtas_write_config(struct pci_dn *, int where, int size, u32 val); int rtas_read_config(struct pci_dn *, int where, int size, u32 *val); void eeh_pe_state_mark(struct eeh_pe *pe, int state); diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c index 4572361..56a022b 100644 --- a/arch/powerpc/platforms/pseries/eeh.c +++ b/arch/powerpc/platforms/pseries/eeh.c @@ -455,17 +455,24 @@ int eeh_pci_enable(struct eeh_pe *pe, int function) */ int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state state) { - struct device_node *dn = pci_device_to_OF_node(dev); + struct eeh_dev *edev = pci_dev_to_eeh_dev(dev); + struct eeh_pe *pe = edev->pe; + + if (!pe) { + pr_err("%s: No PE found on PCI device %s\n", + __func__, pci_name(dev)); + return -EINVAL; + } switch (state) { case pcie_deassert_reset: - eeh_ops->reset(dn, EEH_RESET_DEACTIVATE); + eeh_ops->reset(pe, EEH_RESET_DEACTIVATE); break; case pcie_hot_reset: - eeh_ops->reset(dn, EEH_RESET_HOT); + eeh_ops->reset(pe, EEH_RESET_HOT); break; case pcie_warm_reset: - eeh_ops->reset(dn, EEH_RESET_FUNDAMENTAL); + eeh_ops->reset(pe, EEH_RESET_FUNDAMENTAL); break; default: return -EINVAL; @@ -475,66 +482,37 @@ int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state stat } /** - * __eeh_set_pe_freset - Check the required reset for child devices - * @parent: parent device - * @freset: return value - * - * Each device might have its preferred reset type: fundamental or - * hot reset. The routine is used to collect the information from - * the child devices so that they could be reset accordingly. - */ -void __eeh_set_pe_freset(struct device_node *parent, unsigned int *freset) -{ - struct device_node *dn; - - for_each_child_of_node(parent, dn) { - if (of_node_to_eeh_dev(dn)) { - struct pci_dev *dev = of_node_to_eeh_dev(dn)->pdev; - - if (dev && dev->driver) - *freset |= dev->needs_freset; - - __eeh_set_pe_freset(dn, freset); - } - } -} - -/** - * eeh_set_pe_freset - Check the required reset for the indicated device and its children - * @dn: parent device - * @freset: return value + * eeh_set_pe_freset - Check the required reset for the indicated device + * @data: EEH device + * @flag: return value * * Each device might have its preferred reset type: fundamental or * hot reset. The routine is used to collected the information for * the indicated device and its children so that the bunch of the * devices could be reset properly. */ -void eeh_set_pe_freset(struct device_node *dn, unsigned int *freset) +static void *eeh_set_dev_freset(void *data, void *flag) { struct pci_dev *dev; - dn = eeh_find_device_pe(dn); - - /* Back up one, since config addrs might be shared */ - if (!pcibios_find_pci_bus(dn) && of_node_to_eeh_dev(dn->parent)) - dn = dn->parent; + unsigned int *freset = (unsigned int *)flag; + struct eeh_dev *edev = (struct eeh_dev *)data; - dev = of_node_to_eeh_dev(dn)->pdev; + dev = eeh_dev_to_pci_dev(edev); if (dev) *freset |= dev->needs_freset; - __eeh_set_pe_freset(dn, freset); + return NULL; } /** * eeh_reset_pe_once - Assert the pci #RST line for 1/4 second - * @edev: pci device node to be reset. + * @pe: EEH PE * * Assert the PCI #RST line for 1/4 second. */ -static void eeh_reset_pe_once(struct eeh_dev *edev) +static void eeh_reset_pe_once(struct eeh_pe *pe) { unsigned int freset = 0; - struct device_node *dn = eeh_dev_to_of_node(edev); /* Determine type of EEH reset required for * Partitionable Endpoint, a hot-reset (1) @@ -542,12 +520,12 @@ static void eeh_reset_pe_once(struct eeh_dev *edev) * A fundamental reset required by any device under * Partitionable Endpoint trumps hot-reset. */ - eeh_set_pe_freset(dn, &freset); + eeh_pe_dev_traverse(pe, eeh_set_dev_freset, &freset); if (freset) - eeh_ops->reset(dn, EEH_RESET_FUNDAMENTAL); + eeh_ops->reset(pe, EEH_RESET_FUNDAMENTAL); else - eeh_ops->reset(dn, EEH_RESET_HOT); + eeh_ops->reset(pe, EEH_RESET_HOT); /* The PCI bus requires that the reset be held high for at least * a 100 milliseconds. We wait a bit longer 'just in case'. @@ -559,9 +537,9 @@ static void eeh_reset_pe_once(struct eeh_dev *edev) * pci slot reset line is dropped. Make sure we don't miss * these, and clear the flag now. */ - eeh_clear_slot(dn, EEH_MODE_ISOLATED); + eeh_pe_state_clear(pe, EEH_MODE_ISOLATED); - eeh_ops->reset(dn, EEH_RESET_DEACTIVATE); + eeh_ops->reset(pe, EEH_RESET_DEACTIVATE); /* After a PCI slot has been reset, the PCI Express spec requires * a 1.5 second idle time for the bus to stabilize, before starting @@ -573,32 +551,31 @@ static void eeh_reset_pe_once(struct eeh_dev *edev) /** * eeh_reset_pe - Reset the indicated PE - * @edev: PCI device associated EEH device + * @pe: EEH PE * * This routine should be called to reset indicated device, including * PE. A PE might include multiple PCI devices and sometimes PCI bridges * might be involved as well. */ -int eeh_reset_pe(struct eeh_dev *edev) +int eeh_reset_pe(struct eeh_pe *pe) { int i, rc; - struct device_node *dn = eeh_dev_to_of_node(edev); /* Take three shots at resetting the bus */ for (i=0; i<3; i++) { - eeh_reset_pe_once(edev); + eeh_reset_pe_once(pe); - rc = eeh_ops->wait_state(dn, PCI_BUS_RESET_WAIT_MSEC); + rc = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC); if (rc == (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) return 0; if (rc < 0) { - printk(KERN_ERR "EEH: unrecoverable slot failure %s\n", - dn->full_name); + pr_err("%s: Unrecoverable slot failure on PHB#%d-PE#%x", + __func__, pe->phb->global_number, pe->addr); return -1; } - printk(KERN_ERR "EEH: bus reset %d failed on slot %s, rc=%d\n", - i+1, dn->full_name, rc); + pr_err("EEH: bus reset %d failed on PHB#%d-PE#%x, rc=%d\n", + i+1, pe->phb->global_number, pe->addr, rc); } return -1; -- cgit v0.10.2 From 120dc496617eb7105d577c6041cbc052ffb1d8c7 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Fri, 7 Sep 2012 22:44:18 +0000 Subject: powerpc/eeh: Make EEH handler PE sensitive Once eeh error is found, eeh event will be created and put it into the global linked list. At the mean while, kernel thread will be started to process it. The handler for the kernel thread originally was eeh device sensitive. The patch reworks the handler of the kernel thread so that it's PE sensitive. Signed-off-by: Gavin Shan Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/platforms/pseries/eeh_event.c b/arch/powerpc/platforms/pseries/eeh_event.c index 7f89f1e..ba7005a 100644 --- a/arch/powerpc/platforms/pseries/eeh_event.c +++ b/arch/powerpc/platforms/pseries/eeh_event.c @@ -57,7 +57,7 @@ static int eeh_event_handler(void * dummy) { unsigned long flags; struct eeh_event *event; - struct eeh_dev *edev; + struct eeh_pe *pe; set_task_comm(current, "eehd"); @@ -76,28 +76,23 @@ static int eeh_event_handler(void * dummy) /* Serialize processing of EEH events */ mutex_lock(&eeh_event_mutex); - edev = event->edev; - eeh_mark_slot(eeh_dev_to_of_node(edev), EEH_MODE_RECOVERING); - - printk(KERN_INFO "EEH: Detected PCI bus error on device %s\n", - eeh_pci_name(edev->pdev)); + pe = event->pe; + eeh_pe_state_mark(pe, EEH_PE_RECOVERING); + pr_info("EEH: Detected PCI bus error on PHB#%d-PE#%x\n", + pe->phb->global_number, pe->addr); set_current_state(TASK_INTERRUPTIBLE); /* Don't add to load average */ - edev = handle_eeh_events(event); - - if (edev) { - eeh_clear_slot(eeh_dev_to_of_node(edev), EEH_MODE_RECOVERING); - pci_dev_put(edev->pdev); - } + handle_eeh_events(event); + eeh_pe_state_clear(pe, EEH_PE_RECOVERING); kfree(event); mutex_unlock(&eeh_event_mutex); /* If there are no new errors after an hour, clear the counter. */ - if (edev && edev->freeze_count>0) { + if (pe && pe->freeze_count > 0) { msleep_interruptible(3600*1000); - if (edev->freeze_count>0) - edev->freeze_count--; + if (pe->freeze_count > 0) + pe->freeze_count--; } -- cgit v0.10.2 From 9b3c76f08122f5efdbe4992a64b8478cc92dd983 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Fri, 7 Sep 2012 22:44:19 +0000 Subject: powerpc/eeh: Handle EEH error based on PE The patch reworks the current implementation so that the eeh errors will be handled basing on PE instead of eeh device. Signed-off-by: Gavin Shan Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index 629fb27..d25a693 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -174,6 +174,7 @@ int eeh_rmv_from_parent_pe(struct eeh_dev *edev); void *eeh_pe_dev_traverse(struct eeh_pe *root, eeh_traverse_func fn, void *flag); void eeh_pe_restore_bars(struct eeh_pe *pe); +struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe); void * __devinit eeh_dev_init(struct device_node *dn, void *data); void __devinit eeh_dev_phb_init_dynamic(struct pci_controller *phb); diff --git a/arch/powerpc/include/asm/eeh_event.h b/arch/powerpc/include/asm/eeh_event.h index dc722b5..de67d83 100644 --- a/arch/powerpc/include/asm/eeh_event.h +++ b/arch/powerpc/include/asm/eeh_event.h @@ -32,7 +32,7 @@ struct eeh_event { }; int eeh_send_failure_event(struct eeh_pe *pe); -struct eeh_dev *handle_eeh_events(struct eeh_event *); +void eeh_handle_event(struct eeh_pe *pe); #endif /* __KERNEL__ */ #endif /* ASM_POWERPC_EEH_EVENT_H */ diff --git a/arch/powerpc/platforms/pseries/eeh_driver.c b/arch/powerpc/platforms/pseries/eeh_driver.c index baf92cd..343c807 100644 --- a/arch/powerpc/platforms/pseries/eeh_driver.c +++ b/arch/powerpc/platforms/pseries/eeh_driver.c @@ -116,28 +116,35 @@ static void eeh_enable_irq(struct pci_dev *dev) /** * eeh_report_error - Report pci error to each device driver - * @dev: PCI device + * @data: eeh device * @userdata: return value * * Report an EEH error to each device driver, collect up and * merge the device driver responses. Cumulative response * passed back in "userdata". */ -static int eeh_report_error(struct pci_dev *dev, void *userdata) +static void *eeh_report_error(void *data, void *userdata) { + struct eeh_dev *edev = (struct eeh_dev *)data; + struct pci_dev *dev = eeh_dev_to_pci_dev(edev); enum pci_ers_result rc, *res = userdata; struct pci_driver *driver = dev->driver; + /* We might not have the associated PCI device, + * then we should continue for next one. + */ + if (!dev) return NULL; + dev->error_state = pci_channel_io_frozen; if (!driver) - return 0; + return NULL; eeh_disable_irq(dev); if (!driver->err_handler || !driver->err_handler->error_detected) - return 0; + return NULL; rc = driver->err_handler->error_detected(dev, pci_channel_io_frozen); @@ -145,27 +152,31 @@ static int eeh_report_error(struct pci_dev *dev, void *userdata) if (rc == PCI_ERS_RESULT_NEED_RESET) *res = rc; if (*res == PCI_ERS_RESULT_NONE) *res = rc; - return 0; + return NULL; } /** * eeh_report_mmio_enabled - Tell drivers that MMIO has been enabled - * @dev: PCI device + * @data: eeh device * @userdata: return value * * Tells each device driver that IO ports, MMIO and config space I/O * are now enabled. Collects up and merges the device driver responses. * Cumulative response passed back in "userdata". */ -static int eeh_report_mmio_enabled(struct pci_dev *dev, void *userdata) +static void *eeh_report_mmio_enabled(void *data, void *userdata) { + struct eeh_dev *edev = (struct eeh_dev *)data; + struct pci_dev *dev = eeh_dev_to_pci_dev(edev); enum pci_ers_result rc, *res = userdata; - struct pci_driver *driver = dev->driver; + struct pci_driver *driver; - if (!driver || + if (!dev) return NULL; + + if (!(driver = dev->driver) || !driver->err_handler || !driver->err_handler->mmio_enabled) - return 0; + return NULL; rc = driver->err_handler->mmio_enabled(dev); @@ -173,12 +184,12 @@ static int eeh_report_mmio_enabled(struct pci_dev *dev, void *userdata) if (rc == PCI_ERS_RESULT_NEED_RESET) *res = rc; if (*res == PCI_ERS_RESULT_NONE) *res = rc; - return 0; + return NULL; } /** * eeh_report_reset - Tell device that slot has been reset - * @dev: PCI device + * @data: eeh device * @userdata: return value * * This routine must be called while EEH tries to reset particular @@ -186,13 +197,15 @@ static int eeh_report_mmio_enabled(struct pci_dev *dev, void *userdata) * some actions, usually to save data the driver needs so that the * driver can work again while the device is recovered. */ -static int eeh_report_reset(struct pci_dev *dev, void *userdata) +static void *eeh_report_reset(void *data, void *userdata) { + struct eeh_dev *edev = (struct eeh_dev *)data; + struct pci_dev *dev = eeh_dev_to_pci_dev(edev); enum pci_ers_result rc, *res = userdata; - struct pci_driver *driver = dev->driver; + struct pci_driver *driver; - if (!driver) - return 0; + if (!dev || !(driver = dev->driver)) + return NULL; dev->error_state = pci_channel_io_normal; @@ -200,7 +213,7 @@ static int eeh_report_reset(struct pci_dev *dev, void *userdata) if (!driver->err_handler || !driver->err_handler->slot_reset) - return 0; + return NULL; rc = driver->err_handler->slot_reset(dev); if ((*res == PCI_ERS_RESULT_NONE) || @@ -208,82 +221,89 @@ static int eeh_report_reset(struct pci_dev *dev, void *userdata) if (*res == PCI_ERS_RESULT_DISCONNECT && rc == PCI_ERS_RESULT_NEED_RESET) *res = rc; - return 0; + return NULL; } /** * eeh_report_resume - Tell device to resume normal operations - * @dev: PCI device + * @data: eeh device * @userdata: return value * * This routine must be called to notify the device driver that it * could resume so that the device driver can do some initialization * to make the recovered device work again. */ -static int eeh_report_resume(struct pci_dev *dev, void *userdata) +static void *eeh_report_resume(void *data, void *userdata) { - struct pci_driver *driver = dev->driver; + struct eeh_dev *edev = (struct eeh_dev *)data; + struct pci_dev *dev = eeh_dev_to_pci_dev(edev); + struct pci_driver *driver; + + if (!dev) return NULL; dev->error_state = pci_channel_io_normal; - if (!driver) - return 0; + if (!(driver = dev->driver)) + return NULL; eeh_enable_irq(dev); if (!driver->err_handler || !driver->err_handler->resume) - return 0; + return NULL; driver->err_handler->resume(dev); - return 0; + return NULL; } /** * eeh_report_failure - Tell device driver that device is dead. - * @dev: PCI device + * @data: eeh device * @userdata: return value * * This informs the device driver that the device is permanently * dead, and that no further recovery attempts will be made on it. */ -static int eeh_report_failure(struct pci_dev *dev, void *userdata) +static void *eeh_report_failure(void *data, void *userdata) { - struct pci_driver *driver = dev->driver; + struct eeh_dev *edev = (struct eeh_dev *)data; + struct pci_dev *dev = eeh_dev_to_pci_dev(edev); + struct pci_driver *driver; + + if (!dev) return NULL; dev->error_state = pci_channel_io_perm_failure; - if (!driver) - return 0; + if (!(driver = dev->driver)) + return NULL; eeh_disable_irq(dev); if (!driver->err_handler || !driver->err_handler->error_detected) - return 0; + return NULL; driver->err_handler->error_detected(dev, pci_channel_io_perm_failure); - return 0; + return NULL; } /** * eeh_reset_device - Perform actual reset of a pci slot - * @edev: PE associated EEH device + * @pe: EEH PE * @bus: PCI bus corresponding to the isolcated slot * * This routine must be called to do reset on the indicated PE. * During the reset, udev might be invoked because those affected * PCI devices will be removed and then added. */ -static int eeh_reset_device(struct eeh_dev *edev, struct pci_bus *bus) +static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus) { - struct device_node *dn; int cnt, rc; /* pcibios will clear the counter; save the value */ - cnt = edev->freeze_count; + cnt = pe->freeze_count; if (bus) pcibios_remove_pci_devices(bus); @@ -292,25 +312,13 @@ static int eeh_reset_device(struct eeh_dev *edev, struct pci_bus *bus) * Reconfigure bridges and devices. Don't try to bring the system * up if the reset failed for some reason. */ - rc = eeh_reset_pe(edev); + rc = eeh_reset_pe(pe); if (rc) return rc; - /* Walk over all functions on this device. */ - dn = eeh_dev_to_of_node(edev); - if (!pcibios_find_pci_bus(dn) && of_node_to_eeh_dev(dn->parent)) - dn = dn->parent->child; - - while (dn) { - struct eeh_dev *pedev = of_node_to_eeh_dev(dn); - - /* On Power4, always true because eeh_pe_config_addr=0 */ - if (edev->pe_config_addr == pedev->pe_config_addr) { - eeh_ops->configure_bridge(dn); - eeh_restore_bars(pedev); - } - dn = dn->sibling; - } + /* Restore PE */ + eeh_ops->configure_bridge(pe); + eeh_pe_restore_bars(pe); /* Give the system 5 seconds to finish running the user-space * hotplug shutdown scripts, e.g. ifdown for ethernet. Yes, @@ -322,7 +330,7 @@ static int eeh_reset_device(struct eeh_dev *edev, struct pci_bus *bus) ssleep(5); pcibios_add_pci_devices(bus); } - edev->freeze_count = cnt; + pe->freeze_count = cnt; return 0; } @@ -334,7 +342,7 @@ static int eeh_reset_device(struct eeh_dev *edev, struct pci_bus *bus) /** * eeh_handle_event - Reset a PCI device after hard lockup. - * @event: EEH event + * @pe: EEH PE * * While PHB detects address or data parity errors on particular PCI * slot, the associated PE will be frozen. Besides, DMA's occurring @@ -349,69 +357,24 @@ static int eeh_reset_device(struct eeh_dev *edev, struct pci_bus *bus) * drivers (which cause a second set of hotplug events to go out to * userspace). */ -struct eeh_dev *handle_eeh_events(struct eeh_event *event) +void eeh_handle_event(struct eeh_pe *pe) { - struct device_node *frozen_dn; - struct eeh_dev *frozen_edev; struct pci_bus *frozen_bus; int rc = 0; enum pci_ers_result result = PCI_ERS_RESULT_NONE; - const char *location, *pci_str, *drv_str, *bus_pci_str, *bus_drv_str; - - frozen_dn = eeh_find_device_pe(eeh_dev_to_of_node(event->edev)); - if (!frozen_dn) { - location = of_get_property(eeh_dev_to_of_node(event->edev), "ibm,loc-code", NULL); - location = location ? location : "unknown"; - printk(KERN_ERR "EEH: Error: Cannot find partition endpoint " - "for location=%s pci addr=%s\n", - location, eeh_pci_name(eeh_dev_to_pci_dev(event->edev))); - return NULL; - } - - frozen_bus = pcibios_find_pci_bus(frozen_dn); - location = of_get_property(frozen_dn, "ibm,loc-code", NULL); - location = location ? location : "unknown"; - - /* There are two different styles for coming up with the PE. - * In the old style, it was the highest EEH-capable device - * which was always an EADS pci bridge. In the new style, - * there might not be any EADS bridges, and even when there are, - * the firmware marks them as "EEH incapable". So another - * two-step is needed to find the pci bus.. - */ - if (!frozen_bus) - frozen_bus = pcibios_find_pci_bus(frozen_dn->parent); + frozen_bus = eeh_pe_bus_get(pe); if (!frozen_bus) { - printk(KERN_ERR "EEH: Cannot find PCI bus " - "for location=%s dn=%s\n", - location, frozen_dn->full_name); - return NULL; + pr_err("%s: Cannot find PCI bus for PHB#%d-PE#%x\n", + __func__, pe->phb->global_number, pe->addr); + return; } - frozen_edev = of_node_to_eeh_dev(frozen_dn); - frozen_edev->freeze_count++; - pci_str = eeh_pci_name(eeh_dev_to_pci_dev(event->edev)); - drv_str = eeh_pcid_name(eeh_dev_to_pci_dev(event->edev)); - - if (frozen_edev->freeze_count > EEH_MAX_ALLOWED_FREEZES) + pe->freeze_count++; + if (pe->freeze_count > EEH_MAX_ALLOWED_FREEZES) goto excess_failures; - - printk(KERN_WARNING - "EEH: This PCI device has failed %d times in the last hour:\n", - frozen_edev->freeze_count); - - if (frozen_edev->pdev) { - bus_pci_str = pci_name(frozen_edev->pdev); - bus_drv_str = eeh_pcid_name(frozen_edev->pdev); - printk(KERN_WARNING - "EEH: Bus location=%s driver=%s pci addr=%s\n", - location, bus_drv_str, bus_pci_str); - } - - printk(KERN_WARNING - "EEH: Device location=%s driver=%s pci addr=%s\n", - location, drv_str, pci_str); + pr_warning("EEH: This PCI device has failed %d times in the last hour\n", + pe->freeze_count); /* Walk the various device drivers attached to this slot through * a reset sequence, giving each an opportunity to do what it needs @@ -419,12 +382,12 @@ struct eeh_dev *handle_eeh_events(struct eeh_event *event) * status ... if any child can't handle the reset, then the entire * slot is dlpar removed and added. */ - pci_walk_bus(frozen_bus, eeh_report_error, &result); + eeh_pe_dev_traverse(pe, eeh_report_error, &result); /* Get the current PCI slot state. This can take a long time, * sometimes over 3 seconds for certain systems. */ - rc = eeh_ops->wait_state(eeh_dev_to_of_node(frozen_edev), MAX_WAIT_FOR_RECOVERY*1000); + rc = eeh_ops->wait_state(pe, MAX_WAIT_FOR_RECOVERY*1000); if (rc < 0 || rc == EEH_STATE_NOT_SUPPORT) { printk(KERN_WARNING "EEH: Permanent failure\n"); goto hard_fail; @@ -434,14 +397,14 @@ struct eeh_dev *handle_eeh_events(struct eeh_event *event) * don't post the error log until after all dev drivers * have been informed. */ - eeh_slot_error_detail(frozen_edev, EEH_LOG_TEMP); + eeh_slot_error_detail(pe, EEH_LOG_TEMP); /* If all device drivers were EEH-unaware, then shut * down all of the device drivers, and hope they * go down willingly, without panicing the system. */ if (result == PCI_ERS_RESULT_NONE) { - rc = eeh_reset_device(frozen_edev, frozen_bus); + rc = eeh_reset_device(pe, frozen_bus); if (rc) { printk(KERN_WARNING "EEH: Unable to reset, rc=%d\n", rc); goto hard_fail; @@ -450,7 +413,7 @@ struct eeh_dev *handle_eeh_events(struct eeh_event *event) /* If all devices reported they can proceed, then re-enable MMIO */ if (result == PCI_ERS_RESULT_CAN_RECOVER) { - rc = eeh_pci_enable(frozen_edev, EEH_OPT_THAW_MMIO); + rc = eeh_pci_enable(pe, EEH_OPT_THAW_MMIO); if (rc < 0) goto hard_fail; @@ -458,13 +421,13 @@ struct eeh_dev *handle_eeh_events(struct eeh_event *event) result = PCI_ERS_RESULT_NEED_RESET; } else { result = PCI_ERS_RESULT_NONE; - pci_walk_bus(frozen_bus, eeh_report_mmio_enabled, &result); + eeh_pe_dev_traverse(pe, eeh_report_mmio_enabled, &result); } } /* If all devices reported they can proceed, then re-enable DMA */ if (result == PCI_ERS_RESULT_CAN_RECOVER) { - rc = eeh_pci_enable(frozen_edev, EEH_OPT_THAW_DMA); + rc = eeh_pci_enable(pe, EEH_OPT_THAW_DMA); if (rc < 0) goto hard_fail; @@ -482,13 +445,13 @@ struct eeh_dev *handle_eeh_events(struct eeh_event *event) /* If any device called out for a reset, then reset the slot */ if (result == PCI_ERS_RESULT_NEED_RESET) { - rc = eeh_reset_device(frozen_edev, NULL); + rc = eeh_reset_device(pe, NULL); if (rc) { printk(KERN_WARNING "EEH: Cannot reset, rc=%d\n", rc); goto hard_fail; } result = PCI_ERS_RESULT_NONE; - pci_walk_bus(frozen_bus, eeh_report_reset, &result); + eeh_pe_dev_traverse(pe, eeh_report_reset, &result); } /* All devices should claim they have recovered by now. */ @@ -499,9 +462,9 @@ struct eeh_dev *handle_eeh_events(struct eeh_event *event) } /* Tell all device drivers that they can resume operations */ - pci_walk_bus(frozen_bus, eeh_report_resume, NULL); + eeh_pe_dev_traverse(pe, eeh_report_resume, NULL); - return frozen_edev; + return; excess_failures: /* @@ -509,30 +472,26 @@ excess_failures: * are due to poorly seated PCI cards. Only 10% or so are * due to actual, failed cards. */ - printk(KERN_ERR - "EEH: PCI device at location=%s driver=%s pci addr=%s\n" - "has failed %d times in the last hour " - "and has been permanently disabled.\n" - "Please try reseating this device or replacing it.\n", - location, drv_str, pci_str, frozen_edev->freeze_count); + pr_err("EEH: PHB#%d-PE#%x has failed %d times in the\n" + "last hour and has been permanently disabled.\n" + "Please try reseating or replacing it.\n", + pe->phb->global_number, pe->addr, + pe->freeze_count); goto perm_error; hard_fail: - printk(KERN_ERR - "EEH: Unable to recover from failure of PCI device " - "at location=%s driver=%s pci addr=%s\n" - "Please try reseating this device or replacing it.\n", - location, drv_str, pci_str); + pr_err("EEH: Unable to recover from failure from PHB#%d-PE#%x.\n" + "Please try reseating or replacing it\n", + pe->phb->global_number, pe->addr); perm_error: - eeh_slot_error_detail(frozen_edev, EEH_LOG_PERM); + eeh_slot_error_detail(pe, EEH_LOG_PERM); /* Notify all devices that they're about to go down. */ - pci_walk_bus(frozen_bus, eeh_report_failure, NULL); + eeh_pe_dev_traverse(pe, eeh_report_failure, NULL); /* Shut down the device drivers for good. */ - pcibios_remove_pci_devices(frozen_bus); - - return NULL; + if (frozen_bus) + pcibios_remove_pci_devices(frozen_bus); } diff --git a/arch/powerpc/platforms/pseries/eeh_event.c b/arch/powerpc/platforms/pseries/eeh_event.c index ba7005a..51faaac 100644 --- a/arch/powerpc/platforms/pseries/eeh_event.c +++ b/arch/powerpc/platforms/pseries/eeh_event.c @@ -82,7 +82,7 @@ static int eeh_event_handler(void * dummy) pe->phb->global_number, pe->addr); set_current_state(TASK_INTERRUPTIBLE); /* Don't add to load average */ - handle_eeh_events(event); + eeh_handle_event(pe); eeh_pe_state_clear(pe, EEH_PE_RECOVERING); kfree(event); diff --git a/arch/powerpc/platforms/pseries/eeh_pe.c b/arch/powerpc/platforms/pseries/eeh_pe.c index 2b7e85b..904123c 100644 --- a/arch/powerpc/platforms/pseries/eeh_pe.c +++ b/arch/powerpc/platforms/pseries/eeh_pe.c @@ -561,3 +561,31 @@ void eeh_pe_restore_bars(struct eeh_pe *pe) { eeh_pe_dev_traverse(pe, eeh_restore_one_device_bars, NULL); } + +/** + * eeh_pe_bus_get - Retrieve PCI bus according to the given PE + * @pe: EEH PE + * + * Retrieve the PCI bus according to the given PE. Basically, + * there're 3 types of PEs: PHB/Bus/Device. For PHB PE, the + * primary PCI bus will be retrieved. The parent bus will be + * returned for BUS PE. However, we don't have associated PCI + * bus for DEVICE PE. + */ +struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe) +{ + struct pci_bus *bus = NULL; + struct eeh_dev *edev; + struct pci_dev *pdev; + + if (pe->type == EEH_PE_PHB) { + bus = pe->phb->bus; + } else if (pe->type == EEH_PE_BUS) { + edev = list_first_entry(&pe->edevs, struct eeh_dev, list); + pdev = eeh_dev_to_pci_dev(edev); + if (pdev) + bus = pdev->bus; + } + + return bus; +} -- cgit v0.10.2 From dbbceee12f2160ef1ac848316212f97bf5bc4c16 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Fri, 7 Sep 2012 22:44:20 +0000 Subject: powerpc/eeh: Move stats to PE The patch removes the eeh related statistics for eeh device since they have been maintained by the corresponding eeh PE. Also, the flags used to trace the state of eeh device and PE have been reworked for a little bit. Signed-off-by: Gavin Shan Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index d25a693..792d2d7 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -77,20 +77,13 @@ struct eeh_pe { * another tree except the currently existing tree of PCI * buses and PCI devices */ -#define EEH_MODE_SUPPORTED (1<<0) /* EEH supported on the device */ -#define EEH_MODE_NOCHECK (1<<1) /* EEH check should be skipped */ -#define EEH_MODE_ISOLATED (1<<2) /* The device has been isolated */ -#define EEH_MODE_RECOVERING (1<<3) /* Recovering the device */ -#define EEH_MODE_IRQ_DISABLED (1<<4) /* Interrupt disabled */ +#define EEH_DEV_IRQ_DISABLED (1<<0) /* Interrupt disabled */ struct eeh_dev { int mode; /* EEH mode */ int class_code; /* Class code of the device */ int config_addr; /* Config address */ int pe_config_addr; /* PE config address */ - int check_count; /* Times of ignored error */ - int freeze_count; /* Times of froze up */ - int false_positives; /* Times of reported #ff's */ u32 config_space[16]; /* Saved PCI config space */ struct eeh_pe *pe; /* Associated PE */ struct list_head list; /* Form link list in the PE */ diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c index 56a022b..6d43230 100644 --- a/arch/powerpc/platforms/pseries/eeh.c +++ b/arch/powerpc/platforms/pseries/eeh.c @@ -537,7 +537,7 @@ static void eeh_reset_pe_once(struct eeh_pe *pe) * pci slot reset line is dropped. Make sure we don't miss * these, and clear the flag now. */ - eeh_pe_state_clear(pe, EEH_MODE_ISOLATED); + eeh_pe_state_clear(pe, EEH_PE_ISOLATED); eeh_ops->reset(pe, EEH_RESET_DEACTIVATE); @@ -625,9 +625,6 @@ static void *eeh_early_enable(struct device_node *dn, void *data) edev->class_code = 0; edev->mode = 0; - edev->check_count = 0; - edev->freeze_count = 0; - edev->false_positives = 0; if (!of_device_is_available(dn)) return NULL; @@ -637,10 +634,8 @@ static void *eeh_early_enable(struct device_node *dn, void *data) return NULL; /* There is nothing to check on PCI to ISA bridges */ - if (dn->type && !strcmp(dn->type, "isa")) { - edev->mode |= EEH_MODE_NOCHECK; + if (dn->type && !strcmp(dn->type, "isa")) return NULL; - } edev->class_code = *class_code; /* Ok... see if this device supports EEH. Some do, some don't, @@ -679,7 +674,6 @@ static void *eeh_early_enable(struct device_node *dn, void *data) if (enable) { eeh_subsystem_enabled = 1; - edev->mode |= EEH_MODE_SUPPORTED; eeh_add_to_parent_pe(edev); @@ -692,9 +686,8 @@ static void *eeh_early_enable(struct device_node *dn, void *data) * EEH parent, in which case we mark it as supported. */ if (dn->parent && of_node_to_eeh_dev(dn->parent) && - (of_node_to_eeh_dev(dn->parent)->mode & EEH_MODE_SUPPORTED)) { + of_node_to_eeh_dev(dn->parent)->pe) { /* Parent supports EEH. */ - edev->mode |= EEH_MODE_SUPPORTED; edev->config_addr = of_node_to_eeh_dev(dn->parent)->config_addr; edev->pe_config_addr = of_node_to_eeh_dev(dn->parent)->pe_config_addr; diff --git a/arch/powerpc/platforms/pseries/eeh_cache.c b/arch/powerpc/platforms/pseries/eeh_cache.c index f50b717..a191057 100644 --- a/arch/powerpc/platforms/pseries/eeh_cache.c +++ b/arch/powerpc/platforms/pseries/eeh_cache.c @@ -192,8 +192,7 @@ static void __pci_addr_cache_insert_device(struct pci_dev *dev) } /* Skip any devices for which EEH is not enabled. */ - if (!(edev->mode & EEH_MODE_SUPPORTED) || - edev->mode & EEH_MODE_NOCHECK) { + if (!edev->pe) { #ifdef DEBUG pr_info("PCI: skip building address cache for=%s - %s\n", pci_name(dev), dn->full_name); diff --git a/arch/powerpc/platforms/pseries/eeh_driver.c b/arch/powerpc/platforms/pseries/eeh_driver.c index 343c807..8370ce7 100644 --- a/arch/powerpc/platforms/pseries/eeh_driver.c +++ b/arch/powerpc/platforms/pseries/eeh_driver.c @@ -93,7 +93,7 @@ static void eeh_disable_irq(struct pci_dev *dev) if (!irq_has_action(dev->irq)) return; - edev->mode |= EEH_MODE_IRQ_DISABLED; + edev->mode |= EEH_DEV_IRQ_DISABLED; disable_irq_nosync(dev->irq); } @@ -108,8 +108,8 @@ static void eeh_enable_irq(struct pci_dev *dev) { struct eeh_dev *edev = pci_dev_to_eeh_dev(dev); - if ((edev->mode) & EEH_MODE_IRQ_DISABLED) { - edev->mode &= ~EEH_MODE_IRQ_DISABLED; + if ((edev->mode) & EEH_DEV_IRQ_DISABLED) { + edev->mode &= ~EEH_DEV_IRQ_DISABLED; enable_irq(dev->irq); } } diff --git a/arch/powerpc/platforms/pseries/eeh_sysfs.c b/arch/powerpc/platforms/pseries/eeh_sysfs.c index 243b351..d377083 100644 --- a/arch/powerpc/platforms/pseries/eeh_sysfs.c +++ b/arch/powerpc/platforms/pseries/eeh_sysfs.c @@ -53,9 +53,6 @@ static DEVICE_ATTR(_name, S_IRUGO, eeh_show_##_name, NULL); EEH_SHOW_ATTR(eeh_mode, mode, "0x%x"); EEH_SHOW_ATTR(eeh_config_addr, config_addr, "0x%x"); EEH_SHOW_ATTR(eeh_pe_config_addr, pe_config_addr, "0x%x"); -EEH_SHOW_ATTR(eeh_check_count, check_count, "%d" ); -EEH_SHOW_ATTR(eeh_freeze_count, freeze_count, "%d" ); -EEH_SHOW_ATTR(eeh_false_positives, false_positives, "%d" ); void eeh_sysfs_add_device(struct pci_dev *pdev) { @@ -64,9 +61,6 @@ void eeh_sysfs_add_device(struct pci_dev *pdev) rc += device_create_file(&pdev->dev, &dev_attr_eeh_mode); rc += device_create_file(&pdev->dev, &dev_attr_eeh_config_addr); rc += device_create_file(&pdev->dev, &dev_attr_eeh_pe_config_addr); - rc += device_create_file(&pdev->dev, &dev_attr_eeh_check_count); - rc += device_create_file(&pdev->dev, &dev_attr_eeh_false_positives); - rc += device_create_file(&pdev->dev, &dev_attr_eeh_freeze_count); if (rc) printk(KERN_WARNING "EEH: Unable to create sysfs entries\n"); @@ -77,8 +71,5 @@ void eeh_sysfs_remove_device(struct pci_dev *pdev) device_remove_file(&pdev->dev, &dev_attr_eeh_mode); device_remove_file(&pdev->dev, &dev_attr_eeh_config_addr); device_remove_file(&pdev->dev, &dev_attr_eeh_pe_config_addr); - device_remove_file(&pdev->dev, &dev_attr_eeh_check_count); - device_remove_file(&pdev->dev, &dev_attr_eeh_false_positives); - device_remove_file(&pdev->dev, &dev_attr_eeh_freeze_count); } -- cgit v0.10.2 From d7bb88629dd64242fbbd7dd34ecad073afdbafad Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Fri, 7 Sep 2012 22:44:21 +0000 Subject: powerpc/eeh: Probe mode support While EEH module is installed, PCI devices is checked one by one to see if it supports eeh. On different platforms, the PCI devices are referred through different ways when the EEH module is loaded. For example, on pSeries platform, that is done by OF node. However, we would do that by real PCI devices (struct pci_dev) on PowerNV platform in future. So we needs some mechanism to differentiate those cases by classifying them to probe modes, either from OF nodes or real PCI devices. The patch implements the support to eeh probe mode. Also, the EEH on pSeries has set it into EEH_PROBE_MODE_DEVTREE. That means the probe will be done based on OF nodes on pSeries platform. In addition, On pSeries platform, it's done by OF nodes. The patch moves the the probe function from EEH core to platform dependent backend and some cleanup applied. Signed-off-by: Gavin Shan Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index 792d2d7..895fd0d 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -129,6 +129,8 @@ static inline struct pci_dev *eeh_dev_to_pci_dev(struct eeh_dev *edev) struct eeh_ops { char *name; int (*init)(void); + void* (*of_probe)(struct device_node *dn, void *flag); + void* (*dev_probe)(struct pci_dev *dev, void *flag); int (*set_option)(struct eeh_pe *pe, int option); int (*get_pe_addr)(struct eeh_pe *pe); int (*get_state)(struct eeh_pe *pe, int *state); @@ -143,6 +145,25 @@ struct eeh_ops { extern struct eeh_ops *eeh_ops; extern int eeh_subsystem_enabled; extern struct mutex eeh_mutex; +extern int eeh_probe_mode; + +#define EEH_PROBE_MODE_DEV (1<<0) /* From PCI device */ +#define EEH_PROBE_MODE_DEVTREE (1<<1) /* From device tree */ + +static inline void eeh_probe_mode_set(int flag) +{ + eeh_probe_mode = flag; +} + +static inline int eeh_probe_mode_devtree(void) +{ + return (eeh_probe_mode == EEH_PROBE_MODE_DEVTREE); +} + +static inline int eeh_probe_mode_dev(void) +{ + return (eeh_probe_mode == EEH_PROBE_MODE_DEV); +} static inline void eeh_lock(void) { diff --git a/arch/powerpc/include/asm/ppc-pci.h b/arch/powerpc/include/asm/ppc-pci.h index 2a80f08..56d55c7 100644 --- a/arch/powerpc/include/asm/ppc-pci.h +++ b/arch/powerpc/include/asm/ppc-pci.h @@ -54,6 +54,7 @@ struct pci_dev *pci_addr_cache_get_device(unsigned long addr); void eeh_slot_error_detail(struct eeh_pe *pe, int severity); int eeh_pci_enable(struct eeh_pe *pe, int function); int eeh_reset_pe(struct eeh_pe *); +void eeh_save_bars(struct eeh_dev *edev); int rtas_write_config(struct pci_dn *, int where, int size, u32 val); int rtas_read_config(struct pci_dn *, int where, int size, u32 *val); void eeh_pe_state_mark(struct eeh_pe *pe, int state); diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c index 6d43230..4bdc278 100644 --- a/arch/powerpc/platforms/pseries/eeh.c +++ b/arch/powerpc/platforms/pseries/eeh.c @@ -92,6 +92,17 @@ struct eeh_ops *eeh_ops = NULL; int eeh_subsystem_enabled; EXPORT_SYMBOL(eeh_subsystem_enabled); +/* + * EEH probe mode support. The intention is to support multiple + * platforms for EEH. Some platforms like pSeries do PCI emunation + * based on device tree. However, other platforms like powernv probe + * PCI devices from hardware. The flag is used to distinguish that. + * In addition, struct eeh_ops::probe would be invoked for particular + * OF node or PCI device so that the corresponding PE would be created + * there. + */ +int eeh_probe_mode; + /* Global EEH mutex */ DEFINE_MUTEX(eeh_mutex); @@ -590,7 +601,7 @@ int eeh_reset_pe(struct eeh_pe *pe) * PCI devices are added individually; but, for the restore, * an entire slot is reset at a time. */ -static void eeh_save_bars(struct eeh_dev *edev) +void eeh_save_bars(struct eeh_dev *edev) { int i; struct device_node *dn; @@ -604,108 +615,6 @@ static void eeh_save_bars(struct eeh_dev *edev) } /** - * eeh_early_enable - Early enable EEH on the indicated device - * @dn: device node - * @data: BUID - * - * Enable EEH functionality on the specified PCI device. The function - * is expected to be called before real PCI probing is done. However, - * the PHBs have been initialized at this point. - */ -static void *eeh_early_enable(struct device_node *dn, void *data) -{ - int ret; - const u32 *class_code = of_get_property(dn, "class-code", NULL); - const u32 *vendor_id = of_get_property(dn, "vendor-id", NULL); - const u32 *device_id = of_get_property(dn, "device-id", NULL); - const u32 *regs; - int enable; - struct eeh_dev *edev = of_node_to_eeh_dev(dn); - struct eeh_pe pe; - - edev->class_code = 0; - edev->mode = 0; - - if (!of_device_is_available(dn)) - return NULL; - - /* Ignore bad nodes. */ - if (!class_code || !vendor_id || !device_id) - return NULL; - - /* There is nothing to check on PCI to ISA bridges */ - if (dn->type && !strcmp(dn->type, "isa")) - return NULL; - edev->class_code = *class_code; - - /* Ok... see if this device supports EEH. Some do, some don't, - * and the only way to find out is to check each and every one. - */ - regs = of_get_property(dn, "reg", NULL); - if (regs) { - /* Initialize the fake PE */ - memset(&pe, 0, sizeof(struct eeh_pe)); - pe.phb = edev->phb; - pe.config_addr = regs[0]; - - /* First register entry is addr (00BBSS00) */ - /* Try to enable eeh */ - ret = eeh_ops->set_option(&pe, EEH_OPT_ENABLE); - - enable = 0; - if (ret == 0) { - edev->config_addr = regs[0]; - - /* If the newer, better, ibm,get-config-addr-info is supported, - * then use that instead. - */ - edev->pe_config_addr = eeh_ops->get_pe_addr(&pe); - pe.addr = edev->pe_config_addr; - - /* Some older systems (Power4) allow the - * ibm,set-eeh-option call to succeed even on nodes - * where EEH is not supported. Verify support - * explicitly. - */ - ret = eeh_ops->get_state(&pe, NULL); - if (ret > 0 && ret != EEH_STATE_NOT_SUPPORT) - enable = 1; - } - - if (enable) { - eeh_subsystem_enabled = 1; - - eeh_add_to_parent_pe(edev); - - pr_debug("EEH: %s: eeh enabled, config=%x pe_config=%x\n", - dn->full_name, edev->config_addr, - edev->pe_config_addr); - } else { - - /* This device doesn't support EEH, but it may have an - * EEH parent, in which case we mark it as supported. - */ - if (dn->parent && of_node_to_eeh_dev(dn->parent) && - of_node_to_eeh_dev(dn->parent)->pe) { - /* Parent supports EEH. */ - edev->config_addr = of_node_to_eeh_dev(dn->parent)->config_addr; - edev->pe_config_addr = of_node_to_eeh_dev(dn->parent)->pe_config_addr; - - eeh_add_to_parent_pe(edev); - - return NULL; - } - } - } else { - printk(KERN_WARNING "EEH: %s: unable to get reg property.\n", - dn->full_name); - } - - eeh_save_bars(edev); - return NULL; -} - -/** * eeh_ops_register - Register platform dependent EEH operations * @ops: platform dependent EEH operations * @@ -790,15 +699,18 @@ static int __init eeh_init(void) raw_spin_lock_init(&confirm_error_lock); /* Enable EEH for all adapters */ - list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { - phb = hose->dn; - traverse_pci_devices(phb, eeh_early_enable, NULL); + if (eeh_probe_mode_devtree()) { + list_for_each_entry_safe(hose, tmp, + &hose_list, list_node) { + phb = hose->dn; + traverse_pci_devices(phb, eeh_ops->of_probe, NULL); + } } if (eeh_subsystem_enabled) - printk(KERN_INFO "EEH: PCI Enhanced I/O Error Handling Enabled\n"); + pr_info("EEH: PCI Enhanced I/O Error Handling Enabled\n"); else - printk(KERN_WARNING "EEH: No capable adapters found\n"); + pr_warning("EEH: No capable adapters found\n"); return ret; } @@ -829,7 +741,8 @@ static void eeh_add_device_early(struct device_node *dn) if (NULL == phb || 0 == phb->buid) return; - eeh_early_enable(dn, NULL); + /* FIXME: hotplug support on POWERNV */ + eeh_ops->of_probe(dn, NULL); } /** diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c index fdeef77..19506f9 100644 --- a/arch/powerpc/platforms/pseries/eeh_pseries.c +++ b/arch/powerpc/platforms/pseries/eeh_pseries.c @@ -129,10 +129,104 @@ static int pseries_eeh_init(void) eeh_error_buf_size = RTAS_ERROR_LOG_MAX; } + /* Set EEH probe mode */ + eeh_probe_mode_set(EEH_PROBE_MODE_DEVTREE); + return 0; } /** + * pseries_eeh_of_probe - EEH probe on the given device + * @dn: OF node + * @flag: Unused + * + * When EEH module is installed during system boot, all PCI devices + * are checked one by one to see if it supports EEH. The function + * is introduced for the purpose. + */ +static void *pseries_eeh_of_probe(struct device_node *dn, void *flag) +{ + struct eeh_dev *edev; + struct eeh_pe pe; + const u32 *class_code, *vendor_id, *device_id; + const u32 *regs; + int enable = 0; + int ret; + + /* Retrieve OF node and eeh device */ + edev = of_node_to_eeh_dev(dn); + if (!of_device_is_available(dn)) + return NULL; + + /* Retrieve class/vendor/device IDs */ + class_code = of_get_property(dn, "class-code", NULL); + vendor_id = of_get_property(dn, "vendor-id", NULL); + device_id = of_get_property(dn, "device-id", NULL); + + /* Skip for bad OF node or PCI-ISA bridge */ + if (!class_code || !vendor_id || !device_id) + return NULL; + if (dn->type && !strcmp(dn->type, "isa")) + return NULL; + + /* Update class code and mode of eeh device */ + edev->class_code = *class_code; + edev->mode = 0; + + /* Retrieve the device address */ + regs = of_get_property(dn, "reg", NULL); + if (!regs) { + pr_warning("%s: OF node property %s::reg not found\n", + __func__, dn->full_name); + return NULL; + } + + /* Initialize the fake PE */ + memset(&pe, 0, sizeof(struct eeh_pe)); + pe.phb = edev->phb; + pe.config_addr = regs[0]; + + /* Enable EEH on the device */ + ret = eeh_ops->set_option(&pe, EEH_OPT_ENABLE); + if (!ret) { + edev->config_addr = regs[0]; + /* Retrieve PE address */ + edev->pe_config_addr = eeh_ops->get_pe_addr(&pe); + pe.addr = edev->pe_config_addr; + + /* Some older systems (Power4) allow the ibm,set-eeh-option + * call to succeed even on nodes where EEH is not supported. + * Verify support explicitly. + */ + ret = eeh_ops->get_state(&pe, NULL); + if (ret > 0 && ret != EEH_STATE_NOT_SUPPORT) + enable = 1; + + if (enable) { + eeh_subsystem_enabled = 1; + eeh_add_to_parent_pe(edev); + + pr_debug("%s: EEH enabled on %s PHB#%d-PE#%x, config addr#%x\n", + __func__, dn->full_name, pe.phb->global_number, + pe.addr, pe.config_addr); + } else if (dn->parent && of_node_to_eeh_dev(dn->parent) && + (of_node_to_eeh_dev(dn->parent))->pe) { + /* This device doesn't support EEH, but it may have an + * EEH parent, in which case we mark it as supported. + */ + edev->config_addr = of_node_to_eeh_dev(dn->parent)->config_addr; + edev->pe_config_addr = of_node_to_eeh_dev(dn->parent)->pe_config_addr; + eeh_add_to_parent_pe(edev); + } + } + + /* Save memory bars */ + eeh_save_bars(edev); + + return NULL; +} + +/** * pseries_eeh_set_option - Initialize EEH or MMIO/DMA reenable * @pe: EEH PE * @option: operation to be issued @@ -523,6 +617,8 @@ static int pseries_eeh_write_config(struct device_node *dn, int where, int size, static struct eeh_ops pseries_eeh_ops = { .name = "pseries", .init = pseries_eeh_init, + .of_probe = pseries_eeh_of_probe, + .dev_probe = NULL, .set_option = pseries_eeh_set_option, .get_pe_addr = pseries_eeh_get_pe_addr, .get_state = pseries_eeh_get_state, -- cgit v0.10.2 From f8f7d63fd96ead101415a1302035137a866f8998 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Fri, 7 Sep 2012 22:44:22 +0000 Subject: powerpc/eeh: Trace eeh device from I/O cache The idea comes from Benjamin Herrenschmidt. The eeh cache helps fetching the pci device according to the given I/O address. Since the eeh cache is serving for eeh, it's reasonable for eeh cache to trace eeh device except pci device. The patch make eeh cache to trace eeh device. Also, the major eeh entry function eeh_dn_check_failure has been renamed to eeh_dev_check_failure since it will take eeh device as input parameter. Signed-off-by: Gavin Shan Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index 895fd0d..ebfdb7c 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -196,7 +196,7 @@ int __init eeh_ops_register(struct eeh_ops *ops); int __exit eeh_ops_unregister(const char *name); unsigned long eeh_check_failure(const volatile void __iomem *token, unsigned long val); -int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev); +int eeh_dev_check_failure(struct eeh_dev *edev); void __init pci_addr_cache_build(void); void eeh_add_device_tree_early(struct device_node *); void eeh_add_device_tree_late(struct pci_bus *); @@ -231,10 +231,7 @@ static inline unsigned long eeh_check_failure(const volatile void __iomem *token return val; } -static inline int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev) -{ - return 0; -} +#define eeh_dev_check_failure(x) (0) static inline void pci_addr_cache_build(void) { } diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h index 8cccbee..973df4d 100644 --- a/arch/powerpc/include/asm/pci-bridge.h +++ b/arch/powerpc/include/asm/pci-bridge.h @@ -184,6 +184,8 @@ static inline struct eeh_dev *of_node_to_eeh_dev(struct device_node *dn) { return PCI_DN(dn)->edev; } +#else +#define of_node_to_eeh_dev(x) (NULL) #endif /** Find the bus corresponding to the indicated device node */ diff --git a/arch/powerpc/include/asm/ppc-pci.h b/arch/powerpc/include/asm/ppc-pci.h index 56d55c7..962a902 100644 --- a/arch/powerpc/include/asm/ppc-pci.h +++ b/arch/powerpc/include/asm/ppc-pci.h @@ -50,7 +50,7 @@ extern int rtas_setup_phb(struct pci_controller *phb); void pci_addr_cache_build(void); void pci_addr_cache_insert_device(struct pci_dev *dev); void pci_addr_cache_remove_device(struct pci_dev *dev); -struct pci_dev *pci_addr_cache_get_device(unsigned long addr); +struct eeh_dev *pci_addr_cache_get_device(unsigned long addr); void eeh_slot_error_detail(struct eeh_pe *pe, int severity); int eeh_pci_enable(struct eeh_pe *pe, int function); int eeh_reset_pe(struct eeh_pe *); diff --git a/arch/powerpc/kernel/rtas_pci.c b/arch/powerpc/kernel/rtas_pci.c index 140735c..6de63e3 100644 --- a/arch/powerpc/kernel/rtas_pci.c +++ b/arch/powerpc/kernel/rtas_pci.c @@ -81,7 +81,7 @@ int rtas_read_config(struct pci_dn *pdn, int where, int size, u32 *val) return PCIBIOS_DEVICE_NOT_FOUND; if (returnval == EEH_IO_ERROR_VALUE(size) && - eeh_dn_check_failure (pdn->node, NULL)) + eeh_dev_check_failure(of_node_to_eeh_dev(pdn->node))) return PCIBIOS_DEVICE_NOT_FOUND; return PCIBIOS_SUCCESSFUL; diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c index 4bdc278..9e618424 100644 --- a/arch/powerpc/platforms/pseries/eeh.c +++ b/arch/powerpc/platforms/pseries/eeh.c @@ -270,9 +270,8 @@ static inline unsigned long eeh_token_to_phys(unsigned long token) } /** - * eeh_dn_check_failure - Check if all 1's data is due to EEH slot freeze - * @dn: device node - * @dev: pci device, if known + * eeh_dev_check_failure - Check if all 1's data is due to EEH slot freeze + * @edev: eeh device * * Check for an EEH failure for the given device node. Call this * routine if the result of a read was all 0xff's and you want to @@ -284,12 +283,13 @@ static inline unsigned long eeh_token_to_phys(unsigned long token) * * It is safe to call this routine in an interrupt context. */ -int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev) +int eeh_dev_check_failure(struct eeh_dev *edev) { int ret; unsigned long flags; + struct device_node *dn; + struct pci_dev *dev; struct eeh_pe *pe; - struct eeh_dev *edev; int rc = 0; const char *location; @@ -298,15 +298,12 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev) if (!eeh_subsystem_enabled) return 0; - if (dn) { - edev = of_node_to_eeh_dev(dn); - } else if (dev) { - edev = pci_dev_to_eeh_dev(dev); - dn = pci_device_to_OF_node(dev); - } else { + if (!edev) { eeh_stats.no_dn++; return 0; } + dn = eeh_dev_to_of_node(edev); + dev = eeh_dev_to_pci_dev(edev); pe = edev->pe; /* Access to IO BARs might get this far and still not want checking. */ @@ -393,7 +390,7 @@ dn_unlock: return rc; } -EXPORT_SYMBOL_GPL(eeh_dn_check_failure); +EXPORT_SYMBOL_GPL(eeh_dev_check_failure); /** * eeh_check_failure - Check if all 1's data is due to EEH slot freeze @@ -410,21 +407,19 @@ EXPORT_SYMBOL_GPL(eeh_dn_check_failure); unsigned long eeh_check_failure(const volatile void __iomem *token, unsigned long val) { unsigned long addr; - struct pci_dev *dev; - struct device_node *dn; + struct eeh_dev *edev; /* Finding the phys addr + pci device; this is pretty quick. */ addr = eeh_token_to_phys((unsigned long __force) token); - dev = pci_addr_cache_get_device(addr); - if (!dev) { + edev = pci_addr_cache_get_device(addr); + if (!edev) { eeh_stats.no_device++; return val; } - dn = pci_device_to_OF_node(dev); - eeh_dn_check_failure(dn, dev); + eeh_dev_check_failure(edev); - pci_dev_put(dev); + pci_dev_put(eeh_dev_to_pci_dev(edev)); return val; } diff --git a/arch/powerpc/platforms/pseries/eeh_cache.c b/arch/powerpc/platforms/pseries/eeh_cache.c index a191057..6c5ef75 100644 --- a/arch/powerpc/platforms/pseries/eeh_cache.c +++ b/arch/powerpc/platforms/pseries/eeh_cache.c @@ -50,6 +50,7 @@ struct pci_io_addr_range { struct rb_node rb_node; unsigned long addr_lo; unsigned long addr_hi; + struct eeh_dev *edev; struct pci_dev *pcidev; unsigned int flags; }; @@ -59,7 +60,7 @@ static struct pci_io_addr_cache { spinlock_t piar_lock; } pci_io_addr_cache_root; -static inline struct pci_dev *__pci_addr_cache_get_device(unsigned long addr) +static inline struct eeh_dev *__pci_addr_cache_get_device(unsigned long addr) { struct rb_node *n = pci_io_addr_cache_root.rb_root.rb_node; @@ -74,7 +75,7 @@ static inline struct pci_dev *__pci_addr_cache_get_device(unsigned long addr) n = n->rb_right; } else { pci_dev_get(piar->pcidev); - return piar->pcidev; + return piar->edev; } } } @@ -92,15 +93,15 @@ static inline struct pci_dev *__pci_addr_cache_get_device(unsigned long addr) * from zero (that is, they do *not* have pci_io_addr added in). * It is safe to call this function within an interrupt. */ -struct pci_dev *pci_addr_cache_get_device(unsigned long addr) +struct eeh_dev *pci_addr_cache_get_device(unsigned long addr) { - struct pci_dev *dev; + struct eeh_dev *edev; unsigned long flags; spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags); - dev = __pci_addr_cache_get_device(addr); + edev = __pci_addr_cache_get_device(addr); spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags); - return dev; + return edev; } #ifdef DEBUG @@ -158,6 +159,7 @@ pci_addr_cache_insert(struct pci_dev *dev, unsigned long alo, pci_dev_get(dev); piar->addr_lo = alo; piar->addr_hi = ahi; + piar->edev = pci_dev_to_eeh_dev(dev); piar->pcidev = dev; piar->flags = flags; -- cgit v0.10.2 From 3ab96a02e829131c19db8ed99239289acdb4e3dc Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Fri, 7 Sep 2012 22:44:23 +0000 Subject: powerpc/eeh: Cleanup on EEH PCI address cache The patch does cleanup on EEH PCI address cache based on the fact EEH core is the only user of the component. * Cleanup on function names so that they all have prefix "eeh" and looks more short. * Function printk() has been replaced with pr_debug() or pr_warning() accordingly. Signed-off-by: Gavin Shan Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index ebfdb7c..58c5ee6 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -197,7 +197,7 @@ int __exit eeh_ops_unregister(const char *name); unsigned long eeh_check_failure(const volatile void __iomem *token, unsigned long val); int eeh_dev_check_failure(struct eeh_dev *edev); -void __init pci_addr_cache_build(void); +void __init eeh_addr_cache_build(void); void eeh_add_device_tree_early(struct device_node *); void eeh_add_device_tree_late(struct pci_bus *); void eeh_remove_bus_device(struct pci_dev *); @@ -233,7 +233,7 @@ static inline unsigned long eeh_check_failure(const volatile void __iomem *token #define eeh_dev_check_failure(x) (0) -static inline void pci_addr_cache_build(void) { } +static inline void eeh_addr_cache_build(void) { } static inline void eeh_add_device_tree_early(struct device_node *dn) { } diff --git a/arch/powerpc/include/asm/ppc-pci.h b/arch/powerpc/include/asm/ppc-pci.h index 962a902..ed57fa7 100644 --- a/arch/powerpc/include/asm/ppc-pci.h +++ b/arch/powerpc/include/asm/ppc-pci.h @@ -47,10 +47,9 @@ extern int rtas_setup_phb(struct pci_controller *phb); #ifdef CONFIG_EEH -void pci_addr_cache_build(void); -void pci_addr_cache_insert_device(struct pci_dev *dev); -void pci_addr_cache_remove_device(struct pci_dev *dev); -struct eeh_dev *pci_addr_cache_get_device(unsigned long addr); +void eeh_addr_cache_insert_dev(struct pci_dev *dev); +void eeh_addr_cache_rmv_dev(struct pci_dev *dev); +struct eeh_dev *eeh_addr_cache_get_dev(unsigned long addr); void eeh_slot_error_detail(struct eeh_pe *pe, int severity); int eeh_pci_enable(struct eeh_pe *pe, int function); int eeh_reset_pe(struct eeh_pe *); diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c index 9e618424..18c168b 100644 --- a/arch/powerpc/platforms/pseries/eeh.c +++ b/arch/powerpc/platforms/pseries/eeh.c @@ -411,7 +411,7 @@ unsigned long eeh_check_failure(const volatile void __iomem *token, unsigned lon /* Finding the phys addr + pci device; this is pretty quick. */ addr = eeh_token_to_phys((unsigned long __force) token); - edev = pci_addr_cache_get_device(addr); + edev = eeh_addr_cache_get_dev(addr); if (!edev) { eeh_stats.no_device++; return val; @@ -787,7 +787,7 @@ static void eeh_add_device_late(struct pci_dev *dev) edev->pdev = dev; dev->dev.archdata.edev = edev; - pci_addr_cache_insert_device(dev); + eeh_addr_cache_insert_dev(dev); eeh_sysfs_add_device(dev); } @@ -844,7 +844,7 @@ static void eeh_remove_device(struct pci_dev *dev) pci_dev_put(dev); eeh_rmv_from_parent_pe(edev); - pci_addr_cache_remove_device(dev); + eeh_addr_cache_rmv_dev(dev); eeh_sysfs_remove_device(dev); } diff --git a/arch/powerpc/platforms/pseries/eeh_cache.c b/arch/powerpc/platforms/pseries/eeh_cache.c index 6c5ef75..5a4c879 100644 --- a/arch/powerpc/platforms/pseries/eeh_cache.c +++ b/arch/powerpc/platforms/pseries/eeh_cache.c @@ -60,7 +60,7 @@ static struct pci_io_addr_cache { spinlock_t piar_lock; } pci_io_addr_cache_root; -static inline struct eeh_dev *__pci_addr_cache_get_device(unsigned long addr) +static inline struct eeh_dev *__eeh_addr_cache_get_device(unsigned long addr) { struct rb_node *n = pci_io_addr_cache_root.rb_root.rb_node; @@ -84,7 +84,7 @@ static inline struct eeh_dev *__pci_addr_cache_get_device(unsigned long addr) } /** - * pci_addr_cache_get_device - Get device, given only address + * eeh_addr_cache_get_dev - Get device, given only address * @addr: mmio (PIO) phys address or i/o port number * * Given an mmio phys address, or a port number, find a pci device @@ -93,13 +93,13 @@ static inline struct eeh_dev *__pci_addr_cache_get_device(unsigned long addr) * from zero (that is, they do *not* have pci_io_addr added in). * It is safe to call this function within an interrupt. */ -struct eeh_dev *pci_addr_cache_get_device(unsigned long addr) +struct eeh_dev *eeh_addr_cache_get_dev(unsigned long addr) { struct eeh_dev *edev; unsigned long flags; spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags); - edev = __pci_addr_cache_get_device(addr); + edev = __eeh_addr_cache_get_device(addr); spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags); return edev; } @@ -109,7 +109,7 @@ struct eeh_dev *pci_addr_cache_get_device(unsigned long addr) * Handy-dandy debug print routine, does nothing more * than print out the contents of our addr cache. */ -static void pci_addr_cache_print(struct pci_io_addr_cache *cache) +static void eeh_addr_cache_print(struct pci_io_addr_cache *cache) { struct rb_node *n; int cnt = 0; @@ -118,7 +118,7 @@ static void pci_addr_cache_print(struct pci_io_addr_cache *cache) while (n) { struct pci_io_addr_range *piar; piar = rb_entry(n, struct pci_io_addr_range, rb_node); - printk(KERN_DEBUG "PCI: %s addr range %d [%lx-%lx]: %s\n", + pr_debug("PCI: %s addr range %d [%lx-%lx]: %s\n", (piar->flags & IORESOURCE_IO) ? "i/o" : "mem", cnt, piar->addr_lo, piar->addr_hi, pci_name(piar->pcidev)); cnt++; @@ -129,7 +129,7 @@ static void pci_addr_cache_print(struct pci_io_addr_cache *cache) /* Insert address range into the rb tree. */ static struct pci_io_addr_range * -pci_addr_cache_insert(struct pci_dev *dev, unsigned long alo, +eeh_addr_cache_insert(struct pci_dev *dev, unsigned long alo, unsigned long ahi, unsigned int flags) { struct rb_node **p = &pci_io_addr_cache_root.rb_root.rb_node; @@ -147,7 +147,7 @@ pci_addr_cache_insert(struct pci_dev *dev, unsigned long alo, } else { if (dev != piar->pcidev || alo != piar->addr_lo || ahi != piar->addr_hi) { - printk(KERN_WARNING "PIAR: overlapping address range\n"); + pr_warning("PIAR: overlapping address range\n"); } return piar; } @@ -164,7 +164,7 @@ pci_addr_cache_insert(struct pci_dev *dev, unsigned long alo, piar->flags = flags; #ifdef DEBUG - printk(KERN_DEBUG "PIAR: insert range=[%lx:%lx] dev=%s\n", + pr_debug("PIAR: insert range=[%lx:%lx] dev=%s\n", alo, ahi, pci_name(dev)); #endif @@ -174,7 +174,7 @@ pci_addr_cache_insert(struct pci_dev *dev, unsigned long alo, return piar; } -static void __pci_addr_cache_insert_device(struct pci_dev *dev) +static void __eeh_addr_cache_insert_dev(struct pci_dev *dev) { struct device_node *dn; struct eeh_dev *edev; @@ -182,7 +182,7 @@ static void __pci_addr_cache_insert_device(struct pci_dev *dev) dn = pci_device_to_OF_node(dev); if (!dn) { - printk(KERN_WARNING "PCI: no pci dn found for dev=%s\n", pci_name(dev)); + pr_warning("PCI: no pci dn found for dev=%s\n", pci_name(dev)); return; } @@ -213,19 +213,19 @@ static void __pci_addr_cache_insert_device(struct pci_dev *dev) continue; if (start == 0 || ~start == 0 || end == 0 || ~end == 0) continue; - pci_addr_cache_insert(dev, start, end, flags); + eeh_addr_cache_insert(dev, start, end, flags); } } /** - * pci_addr_cache_insert_device - Add a device to the address cache + * eeh_addr_cache_insert_dev - Add a device to the address cache * @dev: PCI device whose I/O addresses we are interested in. * * In order to support the fast lookup of devices based on addresses, * we maintain a cache of devices that can be quickly searched. * This routine adds a device to that cache. */ -void pci_addr_cache_insert_device(struct pci_dev *dev) +void eeh_addr_cache_insert_dev(struct pci_dev *dev) { unsigned long flags; @@ -234,11 +234,11 @@ void pci_addr_cache_insert_device(struct pci_dev *dev) return; spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags); - __pci_addr_cache_insert_device(dev); + __eeh_addr_cache_insert_dev(dev); spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags); } -static inline void __pci_addr_cache_remove_device(struct pci_dev *dev) +static inline void __eeh_addr_cache_rmv_dev(struct pci_dev *dev) { struct rb_node *n; @@ -259,7 +259,7 @@ restart: } /** - * pci_addr_cache_remove_device - remove pci device from addr cache + * eeh_addr_cache_rmv_dev - remove pci device from addr cache * @dev: device to remove * * Remove a device from the addr-cache tree. @@ -267,17 +267,17 @@ restart: * the tree multiple times (once per resource). * But so what; device removal doesn't need to be that fast. */ -void pci_addr_cache_remove_device(struct pci_dev *dev) +void eeh_addr_cache_rmv_dev(struct pci_dev *dev) { unsigned long flags; spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags); - __pci_addr_cache_remove_device(dev); + __eeh_addr_cache_rmv_dev(dev); spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags); } /** - * pci_addr_cache_build - Build a cache of I/O addresses + * eeh_addr_cache_build - Build a cache of I/O addresses * * Build a cache of pci i/o addresses. This cache will be used to * find the pci device that corresponds to a given address. @@ -285,7 +285,7 @@ void pci_addr_cache_remove_device(struct pci_dev *dev) * Must be run late in boot process, after the pci controllers * have been scanned for devices (after all device resources are known). */ -void __init pci_addr_cache_build(void) +void __init eeh_addr_cache_build(void) { struct device_node *dn; struct eeh_dev *edev; @@ -294,7 +294,7 @@ void __init pci_addr_cache_build(void) spin_lock_init(&pci_io_addr_cache_root.piar_lock); for_each_pci_dev(dev) { - pci_addr_cache_insert_device(dev); + eeh_addr_cache_insert_dev(dev); dn = pci_device_to_OF_node(dev); if (!dn) @@ -313,7 +313,7 @@ void __init pci_addr_cache_build(void) #ifdef DEBUG /* Verify tree built up above, echo back the list of addrs. */ - pci_addr_cache_print(&pci_io_addr_cache_root); + eeh_addr_cache_print(&pci_io_addr_cache_root); #endif } diff --git a/arch/powerpc/platforms/pseries/pci.c b/arch/powerpc/platforms/pseries/pci.c index 2c6ded2..56b864d 100644 --- a/arch/powerpc/platforms/pseries/pci.c +++ b/arch/powerpc/platforms/pseries/pci.c @@ -73,7 +73,7 @@ void __init pSeries_final_fixup(void) { pSeries_request_regions(); - pci_addr_cache_build(); + eeh_addr_cache_build(); } /* -- cgit v0.10.2 From 4474ef055c5d8cb8eaf002d69e49af71e3aa3a88 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Thu, 6 Sep 2012 21:24:56 +0000 Subject: powerpc: Rework set_dabr so it can take a DABRX value as well Rework set_dabr to take a DABRX value as well. Both the pseries and PS3 hypervisors do some checks on the DABRX values that are passed in the hcall. This patch stops bogus values from being passed to hypervisor. Also, in the case where we are clearing the breakpoint, where DABR and DABRX are zero, we modify the DABRX value to make it valid so that the hcall won't fail. Signed-off-by: Michael Neuling Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/include/asm/debug.h b/arch/powerpc/include/asm/debug.h index 716d2f0..32de257 100644 --- a/arch/powerpc/include/asm/debug.h +++ b/arch/powerpc/include/asm/debug.h @@ -44,7 +44,7 @@ static inline int debugger_dabr_match(struct pt_regs *regs) { return 0; } static inline int debugger_fault_handler(struct pt_regs *regs) { return 0; } #endif -extern int set_dabr(unsigned long dabr); +extern int set_dabr(unsigned long dabr, unsigned long dabrx); #ifdef CONFIG_PPC_ADV_DEBUG_REGS extern void do_send_trap(struct pt_regs *regs, unsigned long address, unsigned long error_code, int signal_code, int brkpt); diff --git a/arch/powerpc/include/asm/hw_breakpoint.h b/arch/powerpc/include/asm/hw_breakpoint.h index 39b323e..c6f48eb 100644 --- a/arch/powerpc/include/asm/hw_breakpoint.h +++ b/arch/powerpc/include/asm/hw_breakpoint.h @@ -61,7 +61,7 @@ extern void ptrace_triggered(struct perf_event *bp, struct perf_sample_data *data, struct pt_regs *regs); static inline void hw_breakpoint_disable(void) { - set_dabr(0); + set_dabr(0, 0); } extern void thread_change_pc(struct task_struct *tsk, struct pt_regs *regs); diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index 42ce570..236b477 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -180,7 +180,8 @@ struct machdep_calls { void (*enable_pmcs)(void); /* Set DABR for this platform, leave empty for default implemenation */ - int (*set_dabr)(unsigned long dabr); + int (*set_dabr)(unsigned long dabr, + unsigned long dabrx); #ifdef CONFIG_PPC32 /* XXX for now */ /* A general init function, called by ppc_init in init/main.c. diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index 7e7fa13..83efc6e 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -219,6 +219,7 @@ struct thread_struct { #endif /* CONFIG_HAVE_HW_BREAKPOINT */ #endif unsigned long dabr; /* Data address breakpoint register */ + unsigned long dabrx; /* ... extension */ unsigned long trap_nr; /* last trap # on this thread */ #ifdef CONFIG_ALTIVEC /* Complete AltiVec register set */ diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index ad400a5..121a90b 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -208,6 +208,9 @@ #define SPRN_DABRX 0x3F7 /* Data Address Breakpoint Register Extension */ #define DABRX_USER (1UL << 0) #define DABRX_KERNEL (1UL << 1) +#define DABRX_HYP (1UL << 2) +#define DABRX_BTI (1UL << 3) +#define DABRX_ALL (DABRX_BTI | DABRX_HYP | DABRX_KERNEL | DABRX_USER) #define SPRN_DAR 0x013 /* Data Address Register */ #define SPRN_DBCR 0x136 /* e300 Data Breakpoint Control Reg */ #define SPRN_DSISR 0x012 /* Data Storage Interrupt Status Register */ diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c index 6767445..6891d79 100644 --- a/arch/powerpc/kernel/hw_breakpoint.c +++ b/arch/powerpc/kernel/hw_breakpoint.c @@ -73,7 +73,7 @@ int arch_install_hw_breakpoint(struct perf_event *bp) * If so, DABR will be populated in single_step_dabr_instruction(). */ if (current->thread.last_hit_ubp != bp) - set_dabr(info->address | info->type | DABR_TRANSLATION); + set_dabr(info->address | info->type | DABR_TRANSLATION, DABRX_ALL); return 0; } @@ -97,7 +97,7 @@ void arch_uninstall_hw_breakpoint(struct perf_event *bp) } *slot = NULL; - set_dabr(0); + set_dabr(0, 0); } /* @@ -197,7 +197,7 @@ void thread_change_pc(struct task_struct *tsk, struct pt_regs *regs) info = counter_arch_bp(tsk->thread.last_hit_ubp); regs->msr &= ~MSR_SE; - set_dabr(info->address | info->type | DABR_TRANSLATION); + set_dabr(info->address | info->type | DABR_TRANSLATION, DABRX_ALL); tsk->thread.last_hit_ubp = NULL; } @@ -215,7 +215,7 @@ int __kprobes hw_breakpoint_handler(struct die_args *args) unsigned long dar = regs->dar; /* Disable breakpoints during exception handling */ - set_dabr(0); + set_dabr(0, 0); /* * The counter may be concurrently released but that can only @@ -281,7 +281,7 @@ int __kprobes hw_breakpoint_handler(struct die_args *args) if (!info->extraneous_interrupt) perf_bp_event(bp, regs); - set_dabr(info->address | info->type | DABR_TRANSLATION); + set_dabr(info->address | info->type | DABR_TRANSLATION, DABRX_ALL); out: rcu_read_unlock(); return rc; @@ -313,7 +313,7 @@ int __kprobes single_step_dabr_instruction(struct die_args *args) if (!info->extraneous_interrupt) perf_bp_event(bp, regs); - set_dabr(info->address | info->type | DABR_TRANSLATION); + set_dabr(info->address | info->type | DABR_TRANSLATION, DABRX_ALL); current->thread.last_hit_ubp = NULL; /* diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 2e743de..50e504c 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -285,7 +285,7 @@ void do_dabr(struct pt_regs *regs, unsigned long address, return; /* Clear the DABR */ - set_dabr(0); + set_dabr(0, 0); /* Deliver the signal to userspace */ info.si_signo = SIGTRAP; @@ -366,18 +366,19 @@ static void set_debug_reg_defaults(struct thread_struct *thread) { if (thread->dabr) { thread->dabr = 0; - set_dabr(0); + thread->dabrx = 0; + set_dabr(0, 0); } } #endif /* !CONFIG_HAVE_HW_BREAKPOINT */ #endif /* CONFIG_PPC_ADV_DEBUG_REGS */ -int set_dabr(unsigned long dabr) +int set_dabr(unsigned long dabr, unsigned long dabrx) { __get_cpu_var(current_dabr) = dabr; if (ppc_md.set_dabr) - return ppc_md.set_dabr(dabr); + return ppc_md.set_dabr(dabr, dabrx); /* XXX should we have a CPU_FTR_HAS_DABR ? */ #ifdef CONFIG_PPC_ADV_DEBUG_REGS @@ -387,9 +388,8 @@ int set_dabr(unsigned long dabr) #endif #elif defined(CONFIG_PPC_BOOK3S) mtspr(SPRN_DABR, dabr); + mtspr(SPRN_DABRX, dabrx); #endif - - return 0; } @@ -482,7 +482,7 @@ struct task_struct *__switch_to(struct task_struct *prev, */ #ifndef CONFIG_HAVE_HW_BREAKPOINT if (unlikely(__get_cpu_var(current_dabr) != new->thread.dabr)) - set_dabr(new->thread.dabr); + set_dabr(new->thread.dabr, new->thread.dabrx); #endif /* CONFIG_HAVE_HW_BREAKPOINT */ #endif diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index c10fc28..79d8e56 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -960,6 +960,7 @@ int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, thread->ptrace_bps[0] = bp; ptrace_put_breakpoints(task); thread->dabr = data; + thread->dabrx = DABRX_ALL; return 0; } @@ -983,6 +984,7 @@ int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, /* Move contents to the DABR register */ task->thread.dabr = data; + task->thread.dabrx = DABRX_ALL; #else /* CONFIG_PPC_ADV_DEBUG_REGS */ /* As described above, it was assumed 3 bits were passed with the data * address, but we will assume only the mode bits will be passed @@ -1397,6 +1399,7 @@ static long ppc_set_hwdebug(struct task_struct *child, dabr |= DABR_DATA_WRITE; child->thread.dabr = dabr; + child->thread.dabrx = DABRX_ALL; return 1; #endif /* !CONFIG_PPC_ADV_DEBUG_DVCS */ diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c index 29be271..a2dc757 100644 --- a/arch/powerpc/kernel/signal.c +++ b/arch/powerpc/kernel/signal.c @@ -131,7 +131,7 @@ static int do_signal(struct pt_regs *regs) * triggered inside the kernel. */ if (current->thread.dabr) - set_dabr(current->thread.dabr); + set_dabr(current->thread.dabr, current->thread.dabrx); #endif /* Re-enable the breakpoints for the signal stack */ thread_change_pc(current, regs); diff --git a/arch/powerpc/platforms/cell/beat.c b/arch/powerpc/platforms/cell/beat.c index 852592b..affcf56 100644 --- a/arch/powerpc/platforms/cell/beat.c +++ b/arch/powerpc/platforms/cell/beat.c @@ -136,9 +136,9 @@ ssize_t beat_nvram_get_size(void) return BEAT_NVRAM_SIZE; } -int beat_set_xdabr(unsigned long dabr) +int beat_set_xdabr(unsigned long dabr, unsigned long dabrx) { - if (beat_set_dabr(dabr, DABRX_KERNEL | DABRX_USER)) + if (beat_set_dabr(dabr, dabrx)) return -1; return 0; } diff --git a/arch/powerpc/platforms/cell/beat.h b/arch/powerpc/platforms/cell/beat.h index 32c8efc..bfcb8e3 100644 --- a/arch/powerpc/platforms/cell/beat.h +++ b/arch/powerpc/platforms/cell/beat.h @@ -32,7 +32,7 @@ void beat_get_rtc_time(struct rtc_time *); ssize_t beat_nvram_get_size(void); ssize_t beat_nvram_read(char *, size_t, loff_t *); ssize_t beat_nvram_write(char *, size_t, loff_t *); -int beat_set_xdabr(unsigned long); +int beat_set_xdabr(unsigned long, unsigned long); void beat_power_save(void); void beat_kexec_cpu_down(int, int); diff --git a/arch/powerpc/platforms/ps3/setup.c b/arch/powerpc/platforms/ps3/setup.c index 2d664c5..3f509f8 100644 --- a/arch/powerpc/platforms/ps3/setup.c +++ b/arch/powerpc/platforms/ps3/setup.c @@ -184,11 +184,15 @@ early_param("ps3flash", early_parse_ps3flash); #define prealloc_ps3flash_bounce_buffer() do { } while (0) #endif -static int ps3_set_dabr(unsigned long dabr) +static int ps3_set_dabr(unsigned long dabr, unsigned long dabrx) { - enum {DABR_USER = 1, DABR_KERNEL = 2,}; + /* Have to set at least one bit in the DABRX */ + if (dabrx == 0 && dabr == 0) + dabrx = DABRX_USER; + /* hypervisor only allows us to set BTI, Kernel and user */ + dabrx &= DABRX_BTI | DABRX_KERNEL | DABRX_USER; - return lv1_set_dabr(dabr, DABR_KERNEL | DABR_USER) ? -1 : 0; + return lv1_set_dabr(dabr, dabrx) ? -1 : 0; } static void __init ps3_setup_arch(void) diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 4a2cd48..a3a6965 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -414,16 +414,20 @@ static int __init pSeries_init_panel(void) } machine_arch_initcall(pseries, pSeries_init_panel); -static int pseries_set_dabr(unsigned long dabr) +static int pseries_set_dabr(unsigned long dabr, unsigned long dabrx) { return plpar_hcall_norets(H_SET_DABR, dabr); } -static int pseries_set_xdabr(unsigned long dabr) +static int pseries_set_xdabr(unsigned long dabr, unsigned long dabrx) { - /* We want to catch accesses from kernel and userspace */ - return plpar_hcall_norets(H_SET_XDABR, dabr, - H_DABRX_KERNEL | H_DABRX_USER); + /* Have to set at least one bit in the DABRX according to PAPR */ + if (dabrx == 0 && dabr == 0) + dabrx = DABRX_USER; + /* PAPR says we can only set kernel and user bits */ + dabrx &= H_DABRX_KERNEL | H_DABRX_USER; + + return plpar_hcall_norets(H_SET_XDABR, dabr, dabrx); } #define CMO_CHARACTERISTICS_TOKEN 44 diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index 9b49c65e..987f441 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -740,7 +740,7 @@ static void insert_bpts(void) static void insert_cpu_bpts(void) { if (dabr.enabled) - set_dabr(dabr.address | (dabr.enabled & 7)); + set_dabr(dabr.address | (dabr.enabled & 7), DABRX_ALL); if (iabr && cpu_has_feature(CPU_FTR_IABR)) mtspr(SPRN_IABR, iabr->address | (iabr->enabled & (BP_IABR|BP_IABR_TE))); @@ -768,7 +768,7 @@ static void remove_bpts(void) static void remove_cpu_bpts(void) { - set_dabr(0); + set_dabr(0, 0); if (cpu_has_feature(CPU_FTR_IABR)) mtspr(SPRN_IABR, 0); } -- cgit v0.10.2 From cd14457304c9d232267a3a76a2a43f1f791e545d Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Thu, 6 Sep 2012 21:24:57 +0000 Subject: powerpc: Dynamically calculate the dabrx based on kernel/user/hypervisor Currently we mark the DABRX to interrupt on all matches (hypervisor/kernel/user and then filter in software. We can be a lot smarter now that we can set the DABRX dynamically. This sets the DABRX based on the flags passed by the user. Signed-off-by: Michael Neuling Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/include/asm/hw_breakpoint.h b/arch/powerpc/include/asm/hw_breakpoint.h index c6f48eb..4234245 100644 --- a/arch/powerpc/include/asm/hw_breakpoint.h +++ b/arch/powerpc/include/asm/hw_breakpoint.h @@ -28,6 +28,7 @@ struct arch_hw_breakpoint { unsigned long address; + unsigned long dabrx; int type; u8 len; /* length of the target data symbol */ bool extraneous_interrupt; diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c index 6891d79..a89cae4 100644 --- a/arch/powerpc/kernel/hw_breakpoint.c +++ b/arch/powerpc/kernel/hw_breakpoint.c @@ -73,7 +73,7 @@ int arch_install_hw_breakpoint(struct perf_event *bp) * If so, DABR will be populated in single_step_dabr_instruction(). */ if (current->thread.last_hit_ubp != bp) - set_dabr(info->address | info->type | DABR_TRANSLATION, DABRX_ALL); + set_dabr(info->address | info->type | DABR_TRANSLATION, info->dabrx); return 0; } @@ -170,6 +170,13 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp) info->address = bp->attr.bp_addr; info->len = bp->attr.bp_len; + info->dabrx = DABRX_ALL; + if (bp->attr.exclude_user) + info->dabrx &= ~DABRX_USER; + if (bp->attr.exclude_kernel) + info->dabrx &= ~DABRX_KERNEL; + if (bp->attr.exclude_hv) + info->dabrx &= ~DABRX_HYP; /* * Since breakpoint length can be a maximum of HW_BREAKPOINT_LEN(8) @@ -197,7 +204,7 @@ void thread_change_pc(struct task_struct *tsk, struct pt_regs *regs) info = counter_arch_bp(tsk->thread.last_hit_ubp); regs->msr &= ~MSR_SE; - set_dabr(info->address | info->type | DABR_TRANSLATION, DABRX_ALL); + set_dabr(info->address | info->type | DABR_TRANSLATION, info->dabrx); tsk->thread.last_hit_ubp = NULL; } @@ -281,7 +288,7 @@ int __kprobes hw_breakpoint_handler(struct die_args *args) if (!info->extraneous_interrupt) perf_bp_event(bp, regs); - set_dabr(info->address | info->type | DABR_TRANSLATION, DABRX_ALL); + set_dabr(info->address | info->type | DABR_TRANSLATION, info->dabrx); out: rcu_read_unlock(); return rc; @@ -313,7 +320,7 @@ int __kprobes single_step_dabr_instruction(struct die_args *args) if (!info->extraneous_interrupt) perf_bp_event(bp, regs); - set_dabr(info->address | info->type | DABR_TRANSLATION, DABRX_ALL); + set_dabr(info->address | info->type | DABR_TRANSLATION, info->dabrx); current->thread.last_hit_ubp = NULL; /* diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index a3a6965..e3cb7ae 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -425,7 +425,7 @@ static int pseries_set_xdabr(unsigned long dabr, unsigned long dabrx) if (dabrx == 0 && dabr == 0) dabrx = DABRX_USER; /* PAPR says we can only set kernel and user bits */ - dabrx &= H_DABRX_KERNEL | H_DABRX_USER; + dabrx &= DABRX_KERNEL | DABRX_USER; return plpar_hcall_norets(H_SET_XDABR, dabr, dabrx); } -- cgit v0.10.2 From 4715fed600109e0bd3d6cfa2b925a643173a08b4 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Thu, 6 Sep 2012 21:24:58 +0000 Subject: powerpc: cleanup old DABRX #defines These are no longer used so get rid of them Signed-off-by: Michael Neuling Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index 423cf9e..7a86706 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -152,11 +152,6 @@ #define H_VASI_RESUMED 5 #define H_VASI_COMPLETED 6 -/* DABRX flags */ -#define H_DABRX_HYPERVISOR (1UL<<(63-61)) -#define H_DABRX_KERNEL (1UL<<(63-62)) -#define H_DABRX_USER (1UL<<(63-63)) - /* Each control block has to be on a 4K boundary */ #define H_CB_ALIGNMENT 4096 -- cgit v0.10.2 From 11f63d3fb9c4257b1f005db8d11deba9f992bdf6 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Tue, 4 Sep 2012 15:19:35 +0000 Subject: powerpc/iommu: Add ppc_md.tce_get() callback for use by VFIO The upcoming VFIO support requires a way to know which entry in the TCE map is not empty in order to do cleanup at QEMU exit/crash. This patch adds such functionality to POWERNV platform code. Signed-off-by: Alexey Kardashevskiy Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index 1e908ae..c01688a 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -446,6 +446,11 @@ static void pnv_tce_free(struct iommu_table *tbl, long index, long npages) pnv_tce_invalidate(tbl, tces, tcep - 1); } +static unsigned long pnv_tce_get(struct iommu_table *tbl, long index) +{ + return ((u64 *)tbl->it_base)[index - tbl->it_offset]; +} + void pnv_pci_setup_iommu_table(struct iommu_table *tbl, void *tce_mem, u64 tce_size, u64 dma_offset) @@ -596,6 +601,7 @@ void __init pnv_pci_init(void) ppc_md.pci_dma_dev_setup = pnv_pci_dma_dev_setup; ppc_md.tce_build = pnv_tce_build; ppc_md.tce_free = pnv_tce_free; + ppc_md.tce_get = pnv_tce_get; ppc_md.pci_probe_mode = pnv_pci_probe_mode; set_pci_dma_ops(&dma_iommu_ops); -- cgit v0.10.2 From 6b5e7229bbd59f0cfce7015fd46736fc93d8c8c3 Mon Sep 17 00:00:00 2001 From: Joe MacDonald Date: Tue, 21 Aug 2012 08:22:28 +0000 Subject: powerpc/mm: Match variable types to API sys_subpage_prot() takes an unsigned long for 'addr' then does some stuff with it and the result is stored in a signed int, i, which is eventually used as the size parameter in a copy_from_user call. Update 'i' to be an unsigned long as well and since 'nw' is used in a size_t context which, depending on whether this is 32- or 64-bit may be unsigned int or unsigned long, switch that to a size_t and always be right. Finally, since we're in the neighbourhood, make the same changes to subpage_prot_clear(). Cc: Paul Mackerras Cc: Benjamin Herrenschmidt Signed-off-by: Joe MacDonald Signed-off-by: Paul Gortmaker Acked-by: Paul Mackerras Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/mm/subpage-prot.c b/arch/powerpc/mm/subpage-prot.c index e4f8f1f..7c415dd 100644 --- a/arch/powerpc/mm/subpage-prot.c +++ b/arch/powerpc/mm/subpage-prot.c @@ -95,7 +95,8 @@ static void subpage_prot_clear(unsigned long addr, unsigned long len) struct mm_struct *mm = current->mm; struct subpage_prot_table *spt = &mm->context.spt; u32 **spm, *spp; - int i, nw; + unsigned long i; + size_t nw; unsigned long next, limit; down_write(&mm->mmap_sem); @@ -144,7 +145,8 @@ long sys_subpage_prot(unsigned long addr, unsigned long len, u32 __user *map) struct mm_struct *mm = current->mm; struct subpage_prot_table *spt = &mm->context.spt; u32 **spm, *spp; - int i, nw; + unsigned long i; + size_t nw; unsigned long next, limit; int err; -- cgit v0.10.2 From 618c9b7464081b112f6a9f4690c4b73088873bea Mon Sep 17 00:00:00 2001 From: Tang Yuantian Date: Fri, 13 Jul 2012 10:27:34 +0800 Subject: powerpc/85xx: L2sram - Add compatible string to the device id list The following platforms are supported: mpc8544, mpc8572, mpc8536, p1021, p1025, p1024, p1010. Signed-off-by: Tang Yuantian Signed-off-by: Kumar Gala diff --git a/arch/powerpc/sysdev/fsl_85xx_l2ctlr.c b/arch/powerpc/sysdev/fsl_85xx_l2ctlr.c index 68ac3aa..d131c8a 100644 --- a/arch/powerpc/sysdev/fsl_85xx_l2ctlr.c +++ b/arch/powerpc/sysdev/fsl_85xx_l2ctlr.c @@ -193,6 +193,16 @@ static struct of_device_id mpc85xx_l2ctlr_of_match[] = { { .compatible = "fsl,mpc8548-l2-cache-controller", }, + { .compatible = "fsl,mpc8544-l2-cache-controller",}, + { .compatible = "fsl,mpc8572-l2-cache-controller",}, + { .compatible = "fsl,mpc8536-l2-cache-controller",}, + { .compatible = "fsl,p1021-l2-cache-controller",}, + { .compatible = "fsl,p1012-l2-cache-controller",}, + { .compatible = "fsl,p1025-l2-cache-controller",}, + { .compatible = "fsl,p1016-l2-cache-controller",}, + { .compatible = "fsl,p1024-l2-cache-controller",}, + { .compatible = "fsl,p1015-l2-cache-controller",}, + { .compatible = "fsl,p1010-l2-cache-controller",}, {}, }; -- cgit v0.10.2 From b05193c44c24f303791cda913fa37b8bef33cc4f Mon Sep 17 00:00:00 2001 From: Timur Tabi Date: Fri, 13 Jul 2012 17:40:43 -0500 Subject: powerpc/85xx: remove P1020RDB and P2020RDB CAMP device trees We only need two examples of CAMP device trees in the upstream kernel. Co-operative Asymmetric Multi-Processing (CAMP) is a technique where two or more operating systems (typically multiple copies of the same Linux kernel) are loaded into memory, and each kernel is given a subset of the available cores to execute on. For example, on a four-core system, one kernel runs on cores 0 and 1, and the other runs on cores 2 and 3. The devices are also partitioned among the operating systems, and this is done with customized device trees. Each kernel gets its own device tree that has only the devices that it should know about. Unfortunately, this approach is very hackish. The kernels are trusted to only access devices in their respective device trees, and the partitioning only works for devices that can be handled. Crafting the device trees is a tricky process, and getting U-Boot to load and start all kernels is cumbersome. But most importantly, each CAMP setup is very application-specific, since the actual partitioning of resources is done in the DTS by the system designer. Therefore, it doesn't make a lot of sense to have a lot of CAMP device trees, since we only expect them to be used as examples. Signed-off-by: Timur Tabi Signed-off-by: Kumar Gala diff --git a/arch/powerpc/boot/dts/p1020rdb_camp_core0.dts b/arch/powerpc/boot/dts/p1020rdb_camp_core0.dts deleted file mode 100644 index 41b4585..0000000 --- a/arch/powerpc/boot/dts/p1020rdb_camp_core0.dts +++ /dev/null @@ -1,63 +0,0 @@ -/* - * P1020 RDB Core0 Device Tree Source in CAMP mode. - * - * In CAMP mode, each core needs to have its own dts. Only mpic and L2 cache - * can be shared, all the other devices must be assigned to one core only. - * This dts file allows core0 to have memory, l2, i2c, spi, gpio, tdm, dma, usb, - * eth1, eth2, sdhc, crypto, global-util, message, pci0, pci1, msi. - * - * Please note to add "-b 0" for core0's dts compiling. - * - * Copyright 2011 Freescale Semiconductor Inc. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - */ - -/include/ "p1020rdb.dts" - -/ { - model = "fsl,P1020RDB"; - compatible = "fsl,P1020RDB", "fsl,MPC85XXRDB-CAMP"; - - aliases { - ethernet1 = &enet1; - ethernet2 = &enet2; - serial0 = &serial0; - pci0 = &pci0; - pci1 = &pci1; - }; - - cpus { - PowerPC,P1020@1 { - status = "disabled"; - }; - }; - - memory { - device_type = "memory"; - }; - - localbus@ffe05000 { - status = "disabled"; - }; - - soc@ffe00000 { - serial1: serial@4600 { - status = "disabled"; - }; - - enet0: ethernet@b0000 { - status = "disabled"; - }; - - mpic: pic@40000 { - protected-sources = < - 42 29 30 34 /* serial1, enet0-queue-group0 */ - 17 18 24 45 /* enet0-queue-group1, crypto */ - >; - }; - }; -}; diff --git a/arch/powerpc/boot/dts/p1020rdb_camp_core1.dts b/arch/powerpc/boot/dts/p1020rdb_camp_core1.dts deleted file mode 100644 index 5174538..0000000 --- a/arch/powerpc/boot/dts/p1020rdb_camp_core1.dts +++ /dev/null @@ -1,141 +0,0 @@ -/* - * P1020 RDB Core1 Device Tree Source in CAMP mode. - * - * In CAMP mode, each core needs to have its own dts. Only mpic and L2 cache - * can be shared, all the other devices must be assigned to one core only. - * This dts allows core1 to have l2, eth0, crypto. - * - * Please note to add "-b 1" for core1's dts compiling. - * - * Copyright 2011 Freescale Semiconductor Inc. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - */ - -/include/ "p1020rdb.dts" - -/ { - model = "fsl,P1020RDB"; - compatible = "fsl,P1020RDB", "fsl,MPC85XXRDB-CAMP"; - - aliases { - ethernet0 = &enet0; - serial0 = &serial1; - }; - - cpus { - PowerPC,P1020@0 { - status = "disabled"; - }; - }; - - memory { - device_type = "memory"; - }; - - localbus@ffe05000 { - status = "disabled"; - }; - - soc@ffe00000 { - ecm-law@0 { - status = "disabled"; - }; - - ecm@1000 { - status = "disabled"; - }; - - memory-controller@2000 { - status = "disabled"; - }; - - i2c@3000 { - status = "disabled"; - }; - - i2c@3100 { - status = "disabled"; - }; - - serial0: serial@4500 { - status = "disabled"; - }; - - spi@7000 { - status = "disabled"; - }; - - gpio: gpio-controller@f000 { - status = "disabled"; - }; - - dma@21300 { - status = "disabled"; - }; - - mdio@24000 { - status = "disabled"; - }; - - mdio@25000 { - status = "disabled"; - }; - - enet1: ethernet@b1000 { - status = "disabled"; - }; - - enet2: ethernet@b2000 { - status = "disabled"; - }; - - usb@22000 { - status = "disabled"; - }; - - sdhci@2e000 { - status = "disabled"; - }; - - mpic: pic@40000 { - protected-sources = < - 16 /* ecm, mem, L2, pci0, pci1 */ - 43 42 59 /* i2c, serial0, spi */ - 47 63 62 /* gpio, tdm */ - 20 21 22 23 /* dma */ - 03 02 /* mdio */ - 35 36 40 /* enet1-queue-group0 */ - 51 52 67 /* enet1-queue-group1 */ - 31 32 33 /* enet2-queue-group0 */ - 25 26 27 /* enet2-queue-group1 */ - 28 72 58 /* usb, sdhci, crypto */ - 0xb0 0xb1 0xb2 /* message */ - 0xb3 0xb4 0xb5 - 0xb6 0xb7 - 0xe0 0xe1 0xe2 /* msi */ - 0xe3 0xe4 0xe5 - 0xe6 0xe7 /* sdhci, crypto , pci */ - >; - }; - - msi@41600 { - status = "disabled"; - }; - - global-utilities@e0000 { //global utilities block - status = "disabled"; - }; - }; - - pci0: pcie@ffe09000 { - status = "disabled"; - }; - - pci1: pcie@ffe0a000 { - status = "disabled"; - }; -}; diff --git a/arch/powerpc/boot/dts/p2020rdb_camp_core0.dts b/arch/powerpc/boot/dts/p2020rdb_camp_core0.dts deleted file mode 100644 index 66aac86..0000000 --- a/arch/powerpc/boot/dts/p2020rdb_camp_core0.dts +++ /dev/null @@ -1,67 +0,0 @@ -/* - * P2020 RDB Core0 Device Tree Source in CAMP mode. - * - * In CAMP mode, each core needs to have its own dts. Only mpic and L2 cache - * can be shared, all the other devices must be assigned to one core only. - * This dts file allows core0 to have memory, l2, i2c, spi, gpio, dma1, usb, - * eth1, eth2, sdhc, crypto, global-util, pci0. - * - * Copyright 2009-2011 Freescale Semiconductor Inc. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - */ - -/include/ "p2020rdb.dts" - -/ { - model = "fsl,P2020RDB"; - compatible = "fsl,P2020RDB", "fsl,MPC85XXRDB-CAMP"; - - cpus { - PowerPC,P2020@1 { - status = "disabled"; - }; - }; - - localbus@ffe05000 { - status = "disabled"; - }; - - soc@ffe00000 { - serial1: serial@4600 { - status = "disabled"; - }; - - dma@c300 { - status = "disabled"; - }; - - enet0: ethernet@24000 { - status = "disabled"; - }; - - mpic: pic@40000 { - protected-sources = < - 42 76 77 78 79 /* serial1 , dma2 */ - 29 30 34 26 /* enet0, pci1 */ - 0xe0 0xe1 0xe2 0xe3 /* msi */ - 0xe4 0xe5 0xe6 0xe7 - >; - }; - - msi@41600 { - status = "disabled"; - }; - }; - - pci0: pcie@ffe08000 { - status = "disabled"; - }; - - pci2: pcie@ffe0a000 { - status = "disabled"; - }; -}; diff --git a/arch/powerpc/boot/dts/p2020rdb_camp_core1.dts b/arch/powerpc/boot/dts/p2020rdb_camp_core1.dts deleted file mode 100644 index 9bd8ef4..0000000 --- a/arch/powerpc/boot/dts/p2020rdb_camp_core1.dts +++ /dev/null @@ -1,125 +0,0 @@ -/* - * P2020 RDB Core1 Device Tree Source in CAMP mode. - * - * In CAMP mode, each core needs to have its own dts. Only mpic and L2 cache - * can be shared, all the other devices must be assigned to one core only. - * This dts allows core1 to have l2, dma2, eth0, pci1, msi. - * - * Please note to add "-b 1" for core1's dts compiling. - * - * Copyright 2009-2011 Freescale Semiconductor Inc. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - */ - -/include/ "p2020rdb.dts" - -/ { - model = "fsl,P2020RDB"; - compatible = "fsl,P2020RDB", "fsl,MPC85XXRDB-CAMP"; - - cpus { - PowerPC,P2020@0 { - status = "disabled"; - }; - }; - - localbus@ffe05000 { - status = "disabled"; - }; - - soc@ffe00000 { - ecm-law@0 { - status = "disabled"; - }; - - ecm@1000 { - status = "disabled"; - }; - - memory-controller@2000 { - status = "disabled"; - }; - - i2c@3000 { - status = "disabled"; - }; - - i2c@3100 { - status = "disabled"; - }; - - serial0: serial@4500 { - status = "disabled"; - }; - - spi@7000 { - status = "disabled"; - }; - - gpio: gpio-controller@f000 { - status = "disabled"; - }; - - dma@21300 { - status = "disabled"; - }; - - usb@22000 { - status = "disabled"; - }; - - mdio@24520 { - status = "disabled"; - }; - - mdio@25520 { - status = "disabled"; - }; - - mdio@26520 { - status = "disabled"; - }; - - enet1: ethernet@25000 { - status = "disabled"; - }; - - enet2: ethernet@26000 { - status = "disabled"; - }; - - sdhci@2e000 { - status = "disabled"; - }; - - crypto@30000 { - status = "disabled"; - }; - - mpic: pic@40000 { - protected-sources = < - 17 18 43 42 59 47 /*ecm, mem, i2c, serial0, spi,gpio */ - 16 20 21 22 23 28 /* L2, dma1, USB */ - 03 35 36 40 31 32 33 /* mdio, enet1, enet2 */ - 72 45 58 25 /* sdhci, crypto , pci */ - >; - }; - - global-utilities@e0000 { //global utilities block - status = "disabled"; - }; - - }; - - pci0: pcie@ffe08000 { - status = "disabled"; - }; - - pci1: pcie@ffe09000 { - status = "disabled"; - }; -}; -- cgit v0.10.2 From 708998c9cfc7b324cd3ffde3e7f74d32bca1ac67 Mon Sep 17 00:00:00 2001 From: Timur Tabi Date: Thu, 26 Jul 2012 10:08:52 -0500 Subject: powerpc/fsl-pci: add fsl,qoriq-pcie-v2.4 compatible string The PCI controller on the Freescale P5040 is v2.4. Signed-off-by: Timur Tabi Signed-off-by: Kumar Gala diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c index c37f461..6938792 100644 --- a/arch/powerpc/sysdev/fsl_pci.c +++ b/arch/powerpc/sysdev/fsl_pci.c @@ -818,6 +818,7 @@ static const struct of_device_id pci_ids[] = { { .compatible = "fsl,p1010-pcie", }, { .compatible = "fsl,p1023-pcie", }, { .compatible = "fsl,p4080-pcie", }, + { .compatible = "fsl,qoriq-pcie-v2.4", }, { .compatible = "fsl,qoriq-pcie-v2.3", }, { .compatible = "fsl,qoriq-pcie-v2.2", }, {}, -- cgit v0.10.2 From 7a4da6f70b28b3f66d5650e06fed90f7c608c0e1 Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Thu, 26 Jul 2012 10:08:53 -0500 Subject: powerpc/85xx: add Freescale P5040 SOC and SEC v5.2 device trees Add device tree (dtsi) files for the Freescale P5040 SOC. Since this SOC introduces SEC v5.2, add the dtsi file for that also. Signed-off-by: Kim Phillips Signed-off-by: Timur Tabi Signed-off-by: Kumar Gala diff --git a/arch/powerpc/boot/dts/fsl/p5040si-post.dtsi b/arch/powerpc/boot/dts/fsl/p5040si-post.dtsi new file mode 100644 index 0000000..db2c9a7 --- /dev/null +++ b/arch/powerpc/boot/dts/fsl/p5040si-post.dtsi @@ -0,0 +1,320 @@ +/* + * P5040 Silicon/SoC Device Tree Source (post include) + * + * Copyright 2012 Freescale Semiconductor Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Freescale Semiconductor nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * + * ALTERNATIVELY, this software may be distributed under the terms of the + * GNU General Public License ("GPL") as published by the Free Software + * Foundation, either version 2 of that License or (at your option) any + * later version. + * + * This software is provided by Freescale Semiconductor "as is" and any + * express or implied warranties, including, but not limited to, the implied + * warranties of merchantability and fitness for a particular purpose are + * disclaimed. In no event shall Freescale Semiconductor be liable for any + * direct, indirect, incidental, special, exemplary, or consequential damages + * (including, but not limited to, procurement of substitute goods or services; + * loss of use, data, or profits; or business interruption) however caused and + * on any theory of liability, whether in contract, strict liability, or tort + * (including negligence or otherwise) arising in any way out of the use of this + * software, even if advised of the possibility of such damage. + */ + +&lbc { + compatible = "fsl,p5040-elbc", "fsl,elbc", "simple-bus"; + interrupts = <25 2 0 0>; + #address-cells = <2>; + #size-cells = <1>; +}; + +/* controller at 0x200000 */ +&pci0 { + compatible = "fsl,p5040-pcie", "fsl,qoriq-pcie-v2.4"; + device_type = "pci"; + #size-cells = <2>; + #address-cells = <3>; + bus-range = <0x0 0xff>; + clock-frequency = <33333333>; + interrupts = <16 2 1 15>; + pcie@0 { + reg = <0 0 0 0 0>; + #interrupt-cells = <1>; + #size-cells = <2>; + #address-cells = <3>; + device_type = "pci"; + interrupts = <16 2 1 15>; + interrupt-map-mask = <0xf800 0 0 7>; + interrupt-map = < + /* IDSEL 0x0 */ + 0000 0 0 1 &mpic 40 1 0 0 + 0000 0 0 2 &mpic 1 1 0 0 + 0000 0 0 3 &mpic 2 1 0 0 + 0000 0 0 4 &mpic 3 1 0 0 + >; + }; +}; + +/* controller at 0x201000 */ +&pci1 { + compatible = "fsl,p5040-pcie", "fsl,qoriq-pcie-v2.4"; + device_type = "pci"; + #size-cells = <2>; + #address-cells = <3>; + bus-range = <0 0xff>; + clock-frequency = <33333333>; + interrupts = <16 2 1 14>; + pcie@0 { + reg = <0 0 0 0 0>; + #interrupt-cells = <1>; + #size-cells = <2>; + #address-cells = <3>; + device_type = "pci"; + interrupts = <16 2 1 14>; + interrupt-map-mask = <0xf800 0 0 7>; + interrupt-map = < + /* IDSEL 0x0 */ + 0000 0 0 1 &mpic 41 1 0 0 + 0000 0 0 2 &mpic 5 1 0 0 + 0000 0 0 3 &mpic 6 1 0 0 + 0000 0 0 4 &mpic 7 1 0 0 + >; + }; +}; + +/* controller at 0x202000 */ +&pci2 { + compatible = "fsl,p5040-pcie", "fsl,qoriq-pcie-v2.4"; + device_type = "pci"; + #size-cells = <2>; + #address-cells = <3>; + bus-range = <0x0 0xff>; + clock-frequency = <33333333>; + interrupts = <16 2 1 13>; + pcie@0 { + reg = <0 0 0 0 0>; + #interrupt-cells = <1>; + #size-cells = <2>; + #address-cells = <3>; + device_type = "pci"; + interrupts = <16 2 1 13>; + interrupt-map-mask = <0xf800 0 0 7>; + interrupt-map = < + /* IDSEL 0x0 */ + 0000 0 0 1 &mpic 42 1 0 0 + 0000 0 0 2 &mpic 9 1 0 0 + 0000 0 0 3 &mpic 10 1 0 0 + 0000 0 0 4 &mpic 11 1 0 0 + >; + }; +}; + +&dcsr { + #address-cells = <1>; + #size-cells = <1>; + compatible = "fsl,dcsr", "simple-bus"; + + dcsr-epu@0 { + compatible = "fsl,dcsr-epu"; + interrupts = <52 2 0 0 + 84 2 0 0 + 85 2 0 0>; + reg = <0x0 0x1000>; + }; + dcsr-npc { + compatible = "fsl,dcsr-npc"; + reg = <0x1000 0x1000 0x1000000 0x8000>; + }; + dcsr-nxc@2000 { + compatible = "fsl,dcsr-nxc"; + reg = <0x2000 0x1000>; + }; + dcsr-corenet { + compatible = "fsl,dcsr-corenet"; + reg = <0x8000 0x1000 0xB0000 0x1000>; + }; + dcsr-dpaa@9000 { + compatible = "fsl,p5040-dcsr-dpaa", "fsl,dcsr-dpaa"; + reg = <0x9000 0x1000>; + }; + dcsr-ocn@11000 { + compatible = "fsl,p5040-dcsr-ocn", "fsl,dcsr-ocn"; + reg = <0x11000 0x1000>; + }; + dcsr-ddr@12000 { + compatible = "fsl,dcsr-ddr"; + dev-handle = <&ddr1>; + reg = <0x12000 0x1000>; + }; + dcsr-ddr@13000 { + compatible = "fsl,dcsr-ddr"; + dev-handle = <&ddr2>; + reg = <0x13000 0x1000>; + }; + dcsr-nal@18000 { + compatible = "fsl,p5040-dcsr-nal", "fsl,dcsr-nal"; + reg = <0x18000 0x1000>; + }; + dcsr-rcpm@22000 { + compatible = "fsl,p5040-dcsr-rcpm", "fsl,dcsr-rcpm"; + reg = <0x22000 0x1000>; + }; + dcsr-cpu-sb-proxy@40000 { + compatible = "fsl,dcsr-e5500-sb-proxy", "fsl,dcsr-cpu-sb-proxy"; + cpu-handle = <&cpu0>; + reg = <0x40000 0x1000>; + }; + dcsr-cpu-sb-proxy@41000 { + compatible = "fsl,dcsr-e5500-sb-proxy", "fsl,dcsr-cpu-sb-proxy"; + cpu-handle = <&cpu1>; + reg = <0x41000 0x1000>; + }; + dcsr-cpu-sb-proxy@42000 { + compatible = "fsl,dcsr-e5500-sb-proxy", "fsl,dcsr-cpu-sb-proxy"; + cpu-handle = <&cpu2>; + reg = <0x42000 0x1000>; + }; + dcsr-cpu-sb-proxy@43000 { + compatible = "fsl,dcsr-e5500-sb-proxy", "fsl,dcsr-cpu-sb-proxy"; + cpu-handle = <&cpu3>; + reg = <0x43000 0x1000>; + }; +}; + +&soc { + #address-cells = <1>; + #size-cells = <1>; + device_type = "soc"; + compatible = "simple-bus"; + + soc-sram-error { + compatible = "fsl,soc-sram-error"; + interrupts = <16 2 1 29>; + }; + + corenet-law@0 { + compatible = "fsl,corenet-law"; + reg = <0x0 0x1000>; + fsl,num-laws = <32>; + }; + + ddr1: memory-controller@8000 { + compatible = "fsl,qoriq-memory-controller-v4.5", "fsl,qoriq-memory-controller"; + reg = <0x8000 0x1000>; + interrupts = <16 2 1 23>; + }; + + ddr2: memory-controller@9000 { + compatible = "fsl,qoriq-memory-controller-v4.5","fsl,qoriq-memory-controller"; + reg = <0x9000 0x1000>; + interrupts = <16 2 1 22>; + }; + + cpc: l3-cache-controller@10000 { + compatible = "fsl,p5040-l3-cache-controller", "fsl,p4080-l3-cache-controller", "cache"; + reg = <0x10000 0x1000 + 0x11000 0x1000>; + interrupts = <16 2 1 27 + 16 2 1 26>; + }; + + corenet-cf@18000 { + compatible = "fsl,corenet-cf"; + reg = <0x18000 0x1000>; + interrupts = <16 2 1 31>; + fsl,ccf-num-csdids = <32>; + fsl,ccf-num-snoopids = <32>; + }; + + iommu@20000 { + compatible = "fsl,pamu-v1.0", "fsl,pamu"; + reg = <0x20000 0x5000>; + interrupts = < + 24 2 0 0 + 16 2 1 30>; + }; + +/include/ "qoriq-mpic.dtsi" + + guts: global-utilities@e0000 { + compatible = "fsl,p5040-device-config", "fsl,qoriq-device-config-1.0"; + reg = <0xe0000 0xe00>; + fsl,has-rstcr; + #sleep-cells = <1>; + fsl,liodn-bits = <12>; + }; + + pins: global-utilities@e0e00 { + compatible = "fsl,p5040-pin-control", "fsl,qoriq-pin-control-1.0"; + reg = <0xe0e00 0x200>; + #sleep-cells = <2>; + }; + + clockgen: global-utilities@e1000 { + compatible = "fsl,p5040-clockgen", "fsl,qoriq-clockgen-1.0"; + reg = <0xe1000 0x1000>; + clock-frequency = <0>; + }; + + rcpm: global-utilities@e2000 { + compatible = "fsl,p5040-rcpm", "fsl,qoriq-rcpm-1.0"; + reg = <0xe2000 0x1000>; + #sleep-cells = <1>; + }; + + sfp: sfp@e8000 { + compatible = "fsl,p5040-sfp", "fsl,qoriq-sfp-1.0"; + reg = <0xe8000 0x1000>; + }; + + serdes: serdes@ea000 { + compatible = "fsl,p5040-serdes"; + reg = <0xea000 0x1000>; + }; + +/include/ "qoriq-dma-0.dtsi" +/include/ "qoriq-dma-1.dtsi" +/include/ "qoriq-espi-0.dtsi" + spi@110000 { + fsl,espi-num-chipselects = <4>; + }; + +/include/ "qoriq-esdhc-0.dtsi" + sdhc@114000 { + sdhci,auto-cmd12; + }; + +/include/ "qoriq-i2c-0.dtsi" +/include/ "qoriq-i2c-1.dtsi" +/include/ "qoriq-duart-0.dtsi" +/include/ "qoriq-duart-1.dtsi" +/include/ "qoriq-gpio-0.dtsi" +/include/ "qoriq-usb2-mph-0.dtsi" + usb0: usb@210000 { + compatible = "fsl-usb2-mph-v1.6", "fsl,mpc85xx-usb2-mph", "fsl-usb2-mph"; + phy_type = "utmi"; + port0; + }; + +/include/ "qoriq-usb2-dr-0.dtsi" + usb1: usb@211000 { + compatible = "fsl-usb2-dr-v1.6", "fsl,mpc85xx-usb2-dr", "fsl-usb2-dr"; + dr_mode = "host"; + phy_type = "utmi"; + }; + +/include/ "qoriq-sata2-0.dtsi" +/include/ "qoriq-sata2-1.dtsi" +/include/ "qoriq-sec5.2-0.dtsi" +}; diff --git a/arch/powerpc/boot/dts/fsl/p5040si-pre.dtsi b/arch/powerpc/boot/dts/fsl/p5040si-pre.dtsi new file mode 100644 index 0000000..52721b6 --- /dev/null +++ b/arch/powerpc/boot/dts/fsl/p5040si-pre.dtsi @@ -0,0 +1,111 @@ +/* + * P5040 Silicon/SoC Device Tree Source (pre include) + * + * Copyright 2012 Freescale Semiconductor Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Freescale Semiconductor nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * + * ALTERNATIVELY, this software may be distributed under the terms of the + * GNU General Public License ("GPL") as published by the Free Software + * Foundation, either version 2 of that License or (at your option) any + * later version. + * + * This software is provided by Freescale Semiconductor "as is" and any + * express or implied warranties, including, but not limited to, the implied + * warranties of merchantability and fitness for a particular purpose are + * disclaimed. In no event shall Freescale Semiconductor be liable for any + * direct, indirect, incidental, special, exemplary, or consequential damages + * (including, but not limited to, procurement of substitute goods or services; + * loss of use, data, or profits; or business interruption) however caused and + * on any theory of liability, whether in contract, strict liability, or tort + * (including negligence or otherwise) arising in any way out of the use of this + * software, even if advised of the possibility of such damage. + */ + +/dts-v1/; +/ { + compatible = "fsl,P5040"; + #address-cells = <2>; + #size-cells = <2>; + interrupt-parent = <&mpic>; + + aliases { + ccsr = &soc; + dcsr = &dcsr; + + serial0 = &serial0; + serial1 = &serial1; + serial2 = &serial2; + serial3 = &serial3; + pci0 = &pci0; + pci1 = &pci1; + pci2 = &pci2; + usb0 = &usb0; + usb1 = &usb1; + dma0 = &dma0; + dma1 = &dma1; + sdhc = &sdhc; + msi0 = &msi0; + msi1 = &msi1; + msi2 = &msi2; + + crypto = &crypto; + sec_jr0 = &sec_jr0; + sec_jr1 = &sec_jr1; + sec_jr2 = &sec_jr2; + sec_jr3 = &sec_jr3; + rtic_a = &rtic_a; + rtic_b = &rtic_b; + rtic_c = &rtic_c; + rtic_d = &rtic_d; + sec_mon = &sec_mon; + }; + + cpus { + #address-cells = <1>; + #size-cells = <0>; + + cpu0: PowerPC,e5500@0 { + device_type = "cpu"; + reg = <0>; + next-level-cache = <&L2_0>; + L2_0: l2-cache { + next-level-cache = <&cpc>; + }; + }; + cpu1: PowerPC,e5500@1 { + device_type = "cpu"; + reg = <1>; + next-level-cache = <&L2_1>; + L2_1: l2-cache { + next-level-cache = <&cpc>; + }; + }; + cpu2: PowerPC,e5500@2 { + device_type = "cpu"; + reg = <2>; + next-level-cache = <&L2_2>; + L2_2: l2-cache { + next-level-cache = <&cpc>; + }; + }; + cpu3: PowerPC,e5500@3 { + device_type = "cpu"; + reg = <3>; + next-level-cache = <&L2_3>; + L2_3: l2-cache { + next-level-cache = <&cpc>; + }; + }; + }; +}; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-sec5.2-0.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-sec5.2-0.dtsi new file mode 100644 index 0000000..7b2ab8a --- /dev/null +++ b/arch/powerpc/boot/dts/fsl/qoriq-sec5.2-0.dtsi @@ -0,0 +1,118 @@ +/* + * QorIQ Sec/Crypto 5.2 device tree stub [ controller @ offset 0x300000 ] + * + * Copyright 2011-2012 Freescale Semiconductor Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Freescale Semiconductor nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * + * ALTERNATIVELY, this software may be distributed under the terms of the + * GNU General Public License ("GPL") as published by the Free Software + * Foundation, either version 2 of that License or (at your option) any + * later version. + * + * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +crypto: crypto@300000 { + compatible = "fsl,sec-v5.2", "fsl,sec-v5.0", "fsl,sec-v4.0"; + #address-cells = <1>; + #size-cells = <1>; + reg = <0x300000 0x10000>; + ranges = <0 0x300000 0x10000>; + interrupts = <92 2 0 0>; + + sec_jr0: jr@1000 { + compatible = "fsl,sec-v5.2-job-ring", + "fsl,sec-v5.0-job-ring", + "fsl,sec-v4.0-job-ring"; + reg = <0x1000 0x1000>; + interrupts = <88 2 0 0>; + }; + + sec_jr1: jr@2000 { + compatible = "fsl,sec-v5.2-job-ring", + "fsl,sec-v5.0-job-ring", + "fsl,sec-v4.0-job-ring"; + reg = <0x2000 0x1000>; + interrupts = <89 2 0 0>; + }; + + sec_jr2: jr@3000 { + compatible = "fsl,sec-v5.2-job-ring", + "fsl,sec-v5.0-job-ring", + "fsl,sec-v4.0-job-ring"; + reg = <0x3000 0x1000>; + interrupts = <90 2 0 0>; + }; + + sec_jr3: jr@4000 { + compatible = "fsl,sec-v5.2-job-ring", + "fsl,sec-v5.0-job-ring", + "fsl,sec-v4.0-job-ring"; + reg = <0x4000 0x1000>; + interrupts = <91 2 0 0>; + }; + + rtic@6000 { + compatible = "fsl,sec-v5.2-rtic", + "fsl,sec-v5.0-rtic", + "fsl,sec-v4.0-rtic"; + #address-cells = <1>; + #size-cells = <1>; + reg = <0x6000 0x100>; + ranges = <0x0 0x6100 0xe00>; + + rtic_a: rtic-a@0 { + compatible = "fsl,sec-v5.2-rtic-memory", + "fsl,sec-v5.0-rtic-memory", + "fsl,sec-v4.0-rtic-memory"; + reg = <0x00 0x20 0x100 0x80>; + }; + + rtic_b: rtic-b@20 { + compatible = "fsl,sec-v5.2-rtic-memory", + "fsl,sec-v5.0-rtic-memory", + "fsl,sec-v4.0-rtic-memory"; + reg = <0x20 0x20 0x200 0x80>; + }; + + rtic_c: rtic-c@40 { + compatible = "fsl,sec-v5.2-rtic-memory", + "fsl,sec-v5.0-rtic-memory", + "fsl,sec-v4.0-rtic-memory"; + reg = <0x40 0x20 0x300 0x80>; + }; + + rtic_d: rtic-d@60 { + compatible = "fsl,sec-v5.2-rtic-memory", + "fsl,sec-v5.0-rtic-memory", + "fsl,sec-v4.0-rtic-memory"; + reg = <0x60 0x20 0x500 0x80>; + }; + }; +}; + +sec_mon: sec_mon@314000 { + compatible = "fsl,sec-v5.2-mon", "fsl,sec-v5.0-mon", "fsl,sec-v4.0-mon"; + reg = <0x314000 0x1000>; + interrupts = <93 2 0 0>; +}; -- cgit v0.10.2 From 4c30c143f02f1ab8d9740c61db1ce335a5f95095 Mon Sep 17 00:00:00 2001 From: Timur Tabi Date: Thu, 26 Jul 2012 10:08:54 -0500 Subject: powerpc/85xx: Add support for P5040DS board Add support for the Freescale P5040DS Reference Board ("Superhydra"), which is similar to the P5020DS. Features of the P5040 are listed below, but not all of these features (e.g. DPAA networking) are currently supported. Four P5040 single-threaded e5500 cores built Up to 2.4 GHz with 64-bit ISA support Three levels of instruction: user, supervisor, hypervisor CoreNet platform cache (CPC) 2.0 MB configures as dual 1 MB blocks hierarchical interconnect fabric Two 64-bit DDR3/3L SDRAM memory controllers with ECC and interleaving support Up to 1600MT/s Memory pre-fetch engine DPAA incorporating acceleration for the following functions Packet parsing, classification, and distribution (FMAN) Queue management for scheduling, packet sequencing and congestion management (QMAN) Hardware buffer management for buffer allocation and de-allocation (BMAN) Cryptography acceleration (SEC 5.0) at up to 40 Gbps SerDes 20 lanes at up to 5 Gbps Supports SGMII, XAUI, PCIe rev1.1/2.0, SATA Ethernet interfaces Two 10 Gbps Ethernet MACs Ten 1 Gbps Ethernet MACs High-speed peripheral interfaces Two PCI Express 2.0/3.0 controllers Additional peripheral interfaces Two serial ATA (SATA 2.0) controllers Two high-speed USB 2.0 controllers with integrated PHY Enhanced secure digital host controller (SD/MMC/eMMC) Enhanced serial peripheral interface (eSPI) Two I2C controllers Four UARTs Integrated flash controller supporting NAND and NOR flash DMA Dual four channel Support for hardware virtualization and partitioning enforcement Extra privileged level for hypervisor support QorIQ Trust Architecture 1.1 Secure boot, secure debug, tamper detection, volatile key storage Signed-off-by: Timur Tabi Signed-off-by: Kumar Gala diff --git a/arch/powerpc/boot/dts/p5040ds.dts b/arch/powerpc/boot/dts/p5040ds.dts new file mode 100644 index 0000000..d86bf2e --- /dev/null +++ b/arch/powerpc/boot/dts/p5040ds.dts @@ -0,0 +1,203 @@ +/* + * P5040DS Device Tree Source + * + * Copyright 2012 Freescale Semiconductor Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Freescale Semiconductor nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * + * ALTERNATIVELY, this software may be distributed under the terms of the + * GNU General Public License ("GPL") as published by the Free Software + * Foundation, either version 2 of that License or (at your option) any + * later version. + * + * This software is provided by Freescale Semiconductor "as is" and any + * express or implied warranties, including, but not limited to, the implied + * warranties of merchantability and fitness for a particular purpose are + * disclaimed. In no event shall Freescale Semiconductor be liable for any + * direct, indirect, incidental, special, exemplary, or consequential damages + * (including, but not limited to, procurement of substitute goods or services; + * loss of use, data, or profits; or business interruption) however caused and + * on any theory of liability, whether in contract, strict liability, or tort + * (including negligence or otherwise) arising in any way out of the use of this + * software, even if advised of the possibility of such damage. + */ + +/include/ "fsl/p5040si-pre.dtsi" + +/ { + model = "fsl,P5040DS"; + compatible = "fsl,P5040DS"; + #address-cells = <2>; + #size-cells = <2>; + interrupt-parent = <&mpic>; + + memory { + device_type = "memory"; + }; + + dcsr: dcsr@f00000000 { + ranges = <0x00000000 0xf 0x00000000 0x01008000>; + }; + + soc: soc@ffe000000 { + ranges = <0x00000000 0xf 0xfe000000 0x1000000>; + reg = <0xf 0xfe000000 0 0x00001000>; + spi@110000 { + flash@0 { + #address-cells = <1>; + #size-cells = <1>; + compatible = "spansion,s25sl12801"; + reg = <0>; + spi-max-frequency = <40000000>; /* input clock */ + partition@u-boot { + label = "u-boot"; + reg = <0x00000000 0x00100000>; + }; + partition@kernel { + label = "kernel"; + reg = <0x00100000 0x00500000>; + }; + partition@dtb { + label = "dtb"; + reg = <0x00600000 0x00100000>; + }; + partition@fs { + label = "file system"; + reg = <0x00700000 0x00900000>; + }; + }; + }; + + i2c@118100 { + eeprom@51 { + compatible = "at24,24c256"; + reg = <0x51>; + }; + eeprom@52 { + compatible = "at24,24c256"; + reg = <0x52>; + }; + }; + + i2c@119100 { + rtc@68 { + compatible = "dallas,ds3232"; + reg = <0x68>; + interrupts = <0x1 0x1 0 0>; + }; + }; + }; + + lbc: localbus@ffe124000 { + reg = <0xf 0xfe124000 0 0x1000>; + ranges = <0 0 0xf 0xe8000000 0x08000000 + 2 0 0xf 0xffa00000 0x00040000 + 3 0 0xf 0xffdf0000 0x00008000>; + + flash@0,0 { + compatible = "cfi-flash"; + reg = <0 0 0x08000000>; + bank-width = <2>; + device-width = <2>; + }; + + nand@2,0 { + #address-cells = <1>; + #size-cells = <1>; + compatible = "fsl,elbc-fcm-nand"; + reg = <0x2 0x0 0x40000>; + + partition@0 { + label = "NAND U-Boot Image"; + reg = <0x0 0x02000000>; + }; + + partition@2000000 { + label = "NAND Root File System"; + reg = <0x02000000 0x10000000>; + }; + + partition@12000000 { + label = "NAND Compressed RFS Image"; + reg = <0x12000000 0x08000000>; + }; + + partition@1a000000 { + label = "NAND Linux Kernel Image"; + reg = <0x1a000000 0x04000000>; + }; + + partition@1e000000 { + label = "NAND DTB Image"; + reg = <0x1e000000 0x01000000>; + }; + + partition@1f000000 { + label = "NAND Writable User area"; + reg = <0x1f000000 0x01000000>; + }; + }; + + board-control@3,0 { + compatible = "fsl,p5040ds-fpga", "fsl,fpga-ngpixis"; + reg = <3 0 0x40>; + }; + }; + + pci0: pcie@ffe200000 { + reg = <0xf 0xfe200000 0 0x1000>; + ranges = <0x02000000 0 0xe0000000 0xc 0x00000000 0x0 0x20000000 + 0x01000000 0 0x00000000 0xf 0xf8000000 0x0 0x00010000>; + pcie@0 { + ranges = <0x02000000 0 0xe0000000 + 0x02000000 0 0xe0000000 + 0 0x20000000 + + 0x01000000 0 0x00000000 + 0x01000000 0 0x00000000 + 0 0x00010000>; + }; + }; + + pci1: pcie@ffe201000 { + reg = <0xf 0xfe201000 0 0x1000>; + ranges = <0x02000000 0x0 0xe0000000 0xc 0x20000000 0x0 0x20000000 + 0x01000000 0x0 0x00000000 0xf 0xf8010000 0x0 0x00010000>; + pcie@0 { + ranges = <0x02000000 0 0xe0000000 + 0x02000000 0 0xe0000000 + 0 0x20000000 + + 0x01000000 0 0x00000000 + 0x01000000 0 0x00000000 + 0 0x00010000>; + }; + }; + + pci2: pcie@ffe202000 { + reg = <0xf 0xfe202000 0 0x1000>; + ranges = <0x02000000 0 0xe0000000 0xc 0x40000000 0 0x20000000 + 0x01000000 0 0x00000000 0xf 0xf8020000 0 0x00010000>; + pcie@0 { + ranges = <0x02000000 0 0xe0000000 + 0x02000000 0 0xe0000000 + 0 0x20000000 + + 0x01000000 0 0x00000000 + 0x01000000 0 0x00000000 + 0 0x00010000>; + }; + }; +}; + +/include/ "fsl/p5040si-post.dtsi" diff --git a/arch/powerpc/configs/corenet32_smp_defconfig b/arch/powerpc/configs/corenet32_smp_defconfig index 8b3d57c..1c0f243 100644 --- a/arch/powerpc/configs/corenet32_smp_defconfig +++ b/arch/powerpc/configs/corenet32_smp_defconfig @@ -27,6 +27,7 @@ CONFIG_P2041_RDB=y CONFIG_P3041_DS=y CONFIG_P4080_DS=y CONFIG_P5020_DS=y +CONFIG_P5040_DS=y CONFIG_HIGHMEM=y # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set CONFIG_BINFMT_MISC=m diff --git a/arch/powerpc/configs/corenet64_smp_defconfig b/arch/powerpc/configs/corenet64_smp_defconfig index 0516e22..88fa5c4 100644 --- a/arch/powerpc/configs/corenet64_smp_defconfig +++ b/arch/powerpc/configs/corenet64_smp_defconfig @@ -23,6 +23,7 @@ CONFIG_MODVERSIONS=y CONFIG_PARTITION_ADVANCED=y CONFIG_MAC_PARTITION=y CONFIG_P5020_DS=y +CONFIG_P5040_DS=y # CONFIG_PPC_OF_BOOT_TRAMPOLINE is not set CONFIG_BINFMT_MISC=m CONFIG_IRQ_ALL_CPUS=y diff --git a/arch/powerpc/platforms/85xx/Kconfig b/arch/powerpc/platforms/85xx/Kconfig index 159c01e..31f0618 100644 --- a/arch/powerpc/platforms/85xx/Kconfig +++ b/arch/powerpc/platforms/85xx/Kconfig @@ -254,6 +254,20 @@ config P5020_DS help This option enables support for the P5020 DS board +config P5040_DS + bool "Freescale P5040 DS" + select DEFAULT_UIMAGE + select E500 + select PPC_E500MC + select PHYS_64BIT + select SWIOTLB + select ARCH_REQUIRE_GPIOLIB + select GPIO_MPC8XXX + select HAS_RAPIDIO + select PPC_EPAPR_HV_PIC + help + This option enables support for the P5040 DS board + config PPC_QEMU_E500 bool "QEMU generic e500 platform" depends on EXPERIMENTAL diff --git a/arch/powerpc/platforms/85xx/Makefile b/arch/powerpc/platforms/85xx/Makefile index 3dfe811..d99268a 100644 --- a/arch/powerpc/platforms/85xx/Makefile +++ b/arch/powerpc/platforms/85xx/Makefile @@ -20,6 +20,7 @@ obj-$(CONFIG_P2041_RDB) += p2041_rdb.o corenet_ds.o obj-$(CONFIG_P3041_DS) += p3041_ds.o corenet_ds.o obj-$(CONFIG_P4080_DS) += p4080_ds.o corenet_ds.o obj-$(CONFIG_P5020_DS) += p5020_ds.o corenet_ds.o +obj-$(CONFIG_P5040_DS) += p5040_ds.o corenet_ds.o obj-$(CONFIG_STX_GP3) += stx_gp3.o obj-$(CONFIG_TQM85xx) += tqm85xx.o obj-$(CONFIG_SBC8548) += sbc8548.o diff --git a/arch/powerpc/platforms/85xx/corenet_ds.c b/arch/powerpc/platforms/85xx/corenet_ds.c index 925b028..473d573 100644 --- a/arch/powerpc/platforms/85xx/corenet_ds.c +++ b/arch/powerpc/platforms/85xx/corenet_ds.c @@ -63,7 +63,9 @@ void __init corenet_ds_setup_arch(void) #ifdef CONFIG_PCI for_each_node_by_type(np, "pci") { if (of_device_is_compatible(np, "fsl,p4080-pcie") || - of_device_is_compatible(np, "fsl,qoriq-pcie-v2.2")) { + of_device_is_compatible(np, "fsl,qoriq-pcie-v2.2") || + of_device_is_compatible(np, "fsl,qoriq-pcie-v2.3") || + of_device_is_compatible(np, "fsl,qoriq-pcie-v2.4")) { fsl_add_bridge(np, 0); hose = pci_find_hose_for_OF_device(np); max = min(max, hose->dma_window_base_cur + @@ -99,6 +101,12 @@ static const struct of_device_id of_device_ids[] __devinitconst = { { .compatible = "fsl,qoriq-pcie-v2.2", }, + { + .compatible = "fsl,qoriq-pcie-v2.3", + }, + { + .compatible = "fsl,qoriq-pcie-v2.4", + }, /* The following two are for the Freescale hypervisor */ { .name = "hypervisor", diff --git a/arch/powerpc/platforms/85xx/p5040_ds.c b/arch/powerpc/platforms/85xx/p5040_ds.c new file mode 100644 index 0000000..8e22a34 --- /dev/null +++ b/arch/powerpc/platforms/85xx/p5040_ds.c @@ -0,0 +1,89 @@ +/* + * P5040 DS Setup + * + * Copyright 2009-2010 Freescale Semiconductor Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#include +#include + +#include +#include +#include + +#include + +#include +#include +#include + +#include "corenet_ds.h" + +/* + * Called very early, device-tree isn't unflattened + */ +static int __init p5040_ds_probe(void) +{ + unsigned long root = of_get_flat_dt_root(); +#ifdef CONFIG_SMP + extern struct smp_ops_t smp_85xx_ops; +#endif + + if (of_flat_dt_is_compatible(root, "fsl,P5040DS")) + return 1; + + /* Check if we're running under the Freescale hypervisor */ + if (of_flat_dt_is_compatible(root, "fsl,P5040DS-hv")) { + ppc_md.init_IRQ = ehv_pic_init; + ppc_md.get_irq = ehv_pic_get_irq; + ppc_md.restart = fsl_hv_restart; + ppc_md.power_off = fsl_hv_halt; + ppc_md.halt = fsl_hv_halt; +#ifdef CONFIG_SMP + /* + * Disable the timebase sync operations because we can't write + * to the timebase registers under the hypervisor. + */ + smp_85xx_ops.give_timebase = NULL; + smp_85xx_ops.take_timebase = NULL; +#endif + return 1; + } + + return 0; +} + +define_machine(p5040_ds) { + .name = "P5040 DS", + .probe = p5040_ds_probe, + .setup_arch = corenet_ds_setup_arch, + .init_IRQ = corenet_ds_pic_init, +#ifdef CONFIG_PCI + .pcibios_fixup_bus = fsl_pcibios_fixup_bus, +#endif +/* coreint doesn't play nice with lazy EE, use legacy mpic for now */ +#ifdef CONFIG_PPC64 + .get_irq = mpic_get_irq, +#else + .get_irq = mpic_get_coreint_irq, +#endif + .restart = fsl_rstcr_restart, + .calibrate_decr = generic_calibrate_decr, + .progress = udbg_progress, +#ifdef CONFIG_PPC64 + .power_save = book3e_idle, +#else + .power_save = e500_idle, +#endif +}; + +machine_device_initcall(p5040_ds, corenet_ds_publish_devices); + +#ifdef CONFIG_SWIOTLB +machine_arch_initcall(p5040_ds, swiotlb_setup_bus_notifier); +#endif -- cgit v0.10.2 From 34f84b5b5bc83f4fc208cc278f572e6d926f976b Mon Sep 17 00:00:00 2001 From: Timur Tabi Date: Mon, 23 Jul 2012 18:12:29 -0500 Subject: powerpc/85xx: introduce support for the Freescale / iVeia P1022RDK The Freescale / iVeia P1022RDK reference board is a small-factor board with a Freescale P1022 SOC. It includes: 1) 512 MB 64-bit DDR3-800 (max) memory 2) 8MB SPI serial flash memory for boot loader 3) Bootable 4-bit SD/MMC port 4) Two 10/100/1000 Ethernet connectors 5) One SATA port 6) Two USB ports 7) One PCIe x4 slot 8) DVI video connector 9) Audio input and output jacks, powered by a Wolfson WM8960 codec. Unlike the P1022DS, the P1022RDK does not have any localbus devices, presumably because of the localbus / DIU multiplexing restriction of the P1022 SOC. Signed-off-by: Timur Tabi Signed-off-by: Kumar Gala diff --git a/arch/powerpc/boot/dts/p1022rdk.dts b/arch/powerpc/boot/dts/p1022rdk.dts new file mode 100644 index 0000000..51d82de --- /dev/null +++ b/arch/powerpc/boot/dts/p1022rdk.dts @@ -0,0 +1,188 @@ +/* + * P1022 RDK 32-bit Physical Address Map Device Tree Source + * + * Copyright 2012 Freescale Semiconductor Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Freescale Semiconductor nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * + * ALTERNATIVELY, this software may be distributed under the terms of the + * GNU General Public License ("GPL") as published by the Free Software + * Foundation, either version 2 of that License or (at your option) any + * later version. + * + * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/include/ "fsl/p1022si-pre.dtsi" +/ { + model = "fsl,P1022RDK"; + compatible = "fsl,P1022RDK"; + + memory { + device_type = "memory"; + }; + + board_lbc: lbc: localbus@ffe05000 { + /* The P1022 RDK does not have any localbus devices */ + status = "disabled"; + }; + + board_soc: soc: soc@ffe00000 { + ranges = <0x0 0x0 0xffe00000 0x100000>; + + i2c@3100 { + wm8960:codec@1a { + compatible = "wlf,wm8960"; + reg = <0x1a>; + /* MCLK source is a stand-alone oscillator */ + clock-frequency = <12288000>; + }; + rtc@68 { + compatible = "stm,m41t62"; + reg = <0x68>; + }; + adt7461@4c{ + compatible = "adi,adt7461"; + reg = <0x4c>; + }; + zl6100@21{ + compatible = "isil,zl6100"; + reg = <0x21>; + }; + zl6100@24{ + compatible = "isil,zl6100"; + reg = <0x24>; + }; + zl6100@26{ + compatible = "isil,zl6100"; + reg = <0x26>; + }; + zl6100@29{ + compatible = "isil,zl6100"; + reg = <0x29>; + }; + }; + + spi@7000 { + flash@0 { + #address-cells = <1>; + #size-cells = <1>; + compatible = "spansion,m25p80"; + reg = <0>; + spi-max-frequency = <1000000>; + partition@0 { + label = "full-spi-flash"; + reg = <0x00000000 0x00100000>; + }; + }; + }; + + ssi@15000 { + fsl,mode = "i2s-slave"; + codec-handle = <&wm8960>; + }; + + usb@22000 { + phy_type = "ulpi"; + }; + + usb@23000 { + phy_type = "ulpi"; + }; + + mdio@24000 { + phy0: ethernet-phy@0 { + interrupts = <3 1 0 0>; + reg = <0x1>; + }; + phy1: ethernet-phy@1 { + interrupts = <9 1 0 0>; + reg = <0x2>; + }; + }; + + mdio@25000 { + tbi0: tbi-phy@11 { + reg = <0x11>; + device_type = "tbi-phy"; + }; + }; + + ethernet@b0000 { + phy-handle = <&phy0>; + phy-connection-type = "rgmii-id"; + }; + + ethernet@b1000 { + phy-handle = <&phy1>; + tbi-handle = <&tbi0>; + phy-connection-type = "sgmii"; + }; + }; + + pci0: pcie@ffe09000 { + ranges = <0x2000000 0x0 0xe0000000 0 0xa0000000 0x0 0x20000000 + 0x1000000 0x0 0x00000000 0 0xffc10000 0x0 0x10000>; + reg = <0x0 0xffe09000 0 0x1000>; + pcie@0 { + ranges = <0x2000000 0x0 0xe0000000 + 0x2000000 0x0 0xe0000000 + 0x0 0x20000000 + + 0x1000000 0x0 0x0 + 0x1000000 0x0 0x0 + 0x0 0x100000>; + }; + }; + + pci1: pcie@ffe0a000 { + ranges = <0x2000000 0x0 0xe0000000 0 0xc0000000 0x0 0x20000000 + 0x1000000 0x0 0x00000000 0 0xffc20000 0x0 0x10000>; + reg = <0 0xffe0a000 0 0x1000>; + pcie@0 { + ranges = <0x2000000 0x0 0xe0000000 + 0x2000000 0x0 0xe0000000 + 0x0 0x20000000 + + 0x1000000 0x0 0x0 + 0x1000000 0x0 0x0 + 0x0 0x100000>; + }; + }; + + pci2: pcie@ffe0b000 { + ranges = <0x2000000 0x0 0xe0000000 0 0x80000000 0x0 0x20000000 + 0x1000000 0x0 0x00000000 0 0xffc00000 0x0 0x10000>; + reg = <0 0xffe0b000 0 0x1000>; + pcie@0 { + ranges = <0x2000000 0x0 0xe0000000 + 0x2000000 0x0 0xe0000000 + 0x0 0x20000000 + + 0x1000000 0x0 0x0 + 0x1000000 0x0 0x0 + 0x0 0x100000>; + }; + }; +}; + +/include/ "fsl/p1022si-post.dtsi" diff --git a/arch/powerpc/configs/mpc85xx_defconfig b/arch/powerpc/configs/mpc85xx_defconfig index 8b5bda2..cf815e8 100644 --- a/arch/powerpc/configs/mpc85xx_defconfig +++ b/arch/powerpc/configs/mpc85xx_defconfig @@ -30,6 +30,7 @@ CONFIG_MPC85xx_DS=y CONFIG_MPC85xx_RDB=y CONFIG_P1010_RDB=y CONFIG_P1022_DS=y +CONFIG_P1022_RDK=y CONFIG_P1023_RDS=y CONFIG_SOCRATES=y CONFIG_KSI8560=y diff --git a/arch/powerpc/configs/mpc85xx_smp_defconfig b/arch/powerpc/configs/mpc85xx_smp_defconfig index b0974e7..502cd9e 100644 --- a/arch/powerpc/configs/mpc85xx_smp_defconfig +++ b/arch/powerpc/configs/mpc85xx_smp_defconfig @@ -32,6 +32,7 @@ CONFIG_MPC85xx_DS=y CONFIG_MPC85xx_RDB=y CONFIG_P1010_RDB=y CONFIG_P1022_DS=y +CONFIG_P1022_RDK=y CONFIG_P1023_RDS=y CONFIG_SOCRATES=y CONFIG_KSI8560=y diff --git a/arch/powerpc/platforms/85xx/Kconfig b/arch/powerpc/platforms/85xx/Kconfig index 31f0618..02d02a0 100644 --- a/arch/powerpc/platforms/85xx/Kconfig +++ b/arch/powerpc/platforms/85xx/Kconfig @@ -104,6 +104,13 @@ config P1022_DS help This option enables support for the Freescale P1022DS reference board. +config P1022_RDK + bool "Freescale / iVeia P1022 RDK" + select DEFAULT_UIMAGE + help + This option enables support for the Freescale / iVeia P1022RDK + reference board. + config P1023_RDS bool "Freescale P1023 RDS" select DEFAULT_UIMAGE diff --git a/arch/powerpc/platforms/85xx/Makefile b/arch/powerpc/platforms/85xx/Makefile index d99268a..76f679c 100644 --- a/arch/powerpc/platforms/85xx/Makefile +++ b/arch/powerpc/platforms/85xx/Makefile @@ -15,6 +15,7 @@ obj-$(CONFIG_MPC85xx_MDS) += mpc85xx_mds.o obj-$(CONFIG_MPC85xx_RDB) += mpc85xx_rdb.o obj-$(CONFIG_P1010_RDB) += p1010rdb.o obj-$(CONFIG_P1022_DS) += p1022_ds.o +obj-$(CONFIG_P1022_RDK) += p1022_rdk.o obj-$(CONFIG_P1023_RDS) += p1023_rds.o obj-$(CONFIG_P2041_RDB) += p2041_rdb.o corenet_ds.o obj-$(CONFIG_P3041_DS) += p3041_ds.o corenet_ds.o diff --git a/arch/powerpc/platforms/85xx/p1022_rdk.c b/arch/powerpc/platforms/85xx/p1022_rdk.c new file mode 100644 index 0000000..b3cf11b --- /dev/null +++ b/arch/powerpc/platforms/85xx/p1022_rdk.c @@ -0,0 +1,195 @@ +/* + * P1022 RDK board specific routines + * + * Copyright 2012 Freescale Semiconductor, Inc. + * + * Author: Timur Tabi + * + * Based on p1022_ds.c + * + * This file is licensed under the terms of the GNU General Public License + * version 2. This program is licensed "as is" without any warranty of any + * kind, whether express or implied. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include "smp.h" + +#include "mpc85xx.h" + +#if defined(CONFIG_FB_FSL_DIU) || defined(CONFIG_FB_FSL_DIU_MODULE) + +/* DIU Pixel Clock bits of the CLKDVDR Global Utilities register */ +#define CLKDVDR_PXCKEN 0x80000000 +#define CLKDVDR_PXCKINV 0x10000000 +#define CLKDVDR_PXCKDLY 0x06000000 +#define CLKDVDR_PXCLK_MASK 0x00FF0000 + +/** + * p1022rdk_set_monitor_port: switch the output to a different monitor port + */ +static void p1022rdk_set_monitor_port(enum fsl_diu_monitor_port port) +{ + if (port != FSL_DIU_PORT_DVI) { + pr_err("p1022rdk: unsupported monitor port %i\n", port); + return; + } +} + +/** + * p1022rdk_set_pixel_clock: program the DIU's clock + * + * @pixclock: the wavelength, in picoseconds, of the clock + */ +void p1022rdk_set_pixel_clock(unsigned int pixclock) +{ + struct device_node *guts_np = NULL; + struct ccsr_guts __iomem *guts; + unsigned long freq; + u64 temp; + u32 pxclk; + + /* Map the global utilities registers. */ + guts_np = of_find_compatible_node(NULL, NULL, "fsl,p1022-guts"); + if (!guts_np) { + pr_err("p1022rdk: missing global utilties device node\n"); + return; + } + + guts = of_iomap(guts_np, 0); + of_node_put(guts_np); + if (!guts) { + pr_err("p1022rdk: could not map global utilties device\n"); + return; + } + + /* Convert pixclock from a wavelength to a frequency */ + temp = 1000000000000ULL; + do_div(temp, pixclock); + freq = temp; + + /* + * 'pxclk' is the ratio of the platform clock to the pixel clock. + * This number is programmed into the CLKDVDR register, and the valid + * range of values is 2-255. + */ + pxclk = DIV_ROUND_CLOSEST(fsl_get_sys_freq(), freq); + pxclk = clamp_t(u32, pxclk, 2, 255); + + /* Disable the pixel clock, and set it to non-inverted and no delay */ + clrbits32(&guts->clkdvdr, + CLKDVDR_PXCKEN | CLKDVDR_PXCKDLY | CLKDVDR_PXCLK_MASK); + + /* Enable the clock and set the pxclk */ + setbits32(&guts->clkdvdr, CLKDVDR_PXCKEN | (pxclk << 16)); + + iounmap(guts); +} + +/** + * p1022rdk_valid_monitor_port: set the monitor port for sysfs + */ +enum fsl_diu_monitor_port +p1022rdk_valid_monitor_port(enum fsl_diu_monitor_port port) +{ + return FSL_DIU_PORT_DVI; +} + +#endif + +void __init p1022_rdk_pic_init(void) +{ + struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN | + MPIC_SINGLE_DEST_CPU, + 0, 256, " OpenPIC "); + BUG_ON(mpic == NULL); + mpic_init(mpic); +} + +/* + * Setup the architecture + */ +static void __init p1022_rdk_setup_arch(void) +{ +#ifdef CONFIG_PCI + struct device_node *np; +#endif + dma_addr_t max = 0xffffffff; + + if (ppc_md.progress) + ppc_md.progress("p1022_rdk_setup_arch()", 0); + +#ifdef CONFIG_PCI + for_each_compatible_node(np, "pci", "fsl,p1022-pcie") { + struct resource rsrc; + struct pci_controller *hose; + + of_address_to_resource(np, 0, &rsrc); + + if ((rsrc.start & 0xfffff) == 0x8000) + fsl_add_bridge(np, 1); + else + fsl_add_bridge(np, 0); + + hose = pci_find_hose_for_OF_device(np); + max = min(max, hose->dma_window_base_cur + + hose->dma_window_size); + } +#endif + +#if defined(CONFIG_FB_FSL_DIU) || defined(CONFIG_FB_FSL_DIU_MODULE) + diu_ops.set_monitor_port = p1022rdk_set_monitor_port; + diu_ops.set_pixel_clock = p1022rdk_set_pixel_clock; + diu_ops.valid_monitor_port = p1022rdk_valid_monitor_port; +#endif + + mpc85xx_smp_init(); + +#ifdef CONFIG_SWIOTLB + if ((memblock_end_of_DRAM() - 1) > max) { + ppc_swiotlb_enable = 1; + set_pci_dma_ops(&swiotlb_dma_ops); + ppc_md.pci_dma_dev_setup = pci_dma_dev_setup_swiotlb; + } +#endif + + pr_info("Freescale / iVeia P1022 RDK reference board\n"); +} + +machine_device_initcall(p1022_rdk, mpc85xx_common_publish_devices); + +machine_arch_initcall(p1022_rdk, swiotlb_setup_bus_notifier); + +/* + * Called very early, device-tree isn't unflattened + */ +static int __init p1022_rdk_probe(void) +{ + unsigned long root = of_get_flat_dt_root(); + + return of_flat_dt_is_compatible(root, "fsl,p1022rdk"); +} + +define_machine(p1022_rdk) { + .name = "P1022 RDK", + .probe = p1022_rdk_probe, + .setup_arch = p1022_rdk_setup_arch, + .init_IRQ = p1022_rdk_pic_init, +#ifdef CONFIG_PCI + .pcibios_fixup_bus = fsl_pcibios_fixup_bus, +#endif + .get_irq = mpic_get_irq, + .restart = fsl_rstcr_restart, + .calibrate_decr = generic_calibrate_decr, + .progress = udbg_progress, +}; -- cgit v0.10.2 From 03bcb7e35f7e5f759fe03f10a12cd4060dabe60d Mon Sep 17 00:00:00 2001 From: Varun Sethi Date: Mon, 9 Jul 2012 14:15:42 +0530 Subject: powerpc/mpic: finish supporting timer group B on Freescale chips Previously, these interrupts would be mapped, but the offset calculation was broken, and only the first group was initialized. Signed-off-by: Varun Sethi Signed-off-by: Scott Wood Signed-off-by: Kumar Gala diff --git a/arch/powerpc/include/asm/mpic.h b/arch/powerpc/include/asm/mpic.h index c9f698a..e14d35d 100644 --- a/arch/powerpc/include/asm/mpic.h +++ b/arch/powerpc/include/asm/mpic.h @@ -63,6 +63,7 @@ */ #define MPIC_TIMER_BASE 0x01100 #define MPIC_TIMER_STRIDE 0x40 +#define MPIC_TIMER_GROUP_STRIDE 0x1000 #define MPIC_TIMER_CURRENT_CNT 0x00000 #define MPIC_TIMER_BASE_CNT 0x00010 @@ -110,6 +111,9 @@ #define MPIC_VECPRI_SENSE_MASK 0x00400000 #define MPIC_IRQ_DESTINATION 0x00010 +#define MPIC_FSL_BRR1 0x00000 +#define MPIC_FSL_BRR1_VER 0x0000ffff + #define MPIC_MAX_IRQ_SOURCES 2048 #define MPIC_MAX_CPUS 32 #define MPIC_MAX_ISU 32 @@ -296,6 +300,7 @@ struct mpic phys_addr_t paddr; /* The various ioremap'ed bases */ + struct mpic_reg_bank thiscpuregs; struct mpic_reg_bank gregs; struct mpic_reg_bank tmregs; struct mpic_reg_bank cpuregs[MPIC_MAX_CPUS]; diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c index bfc6211..7e32db7 100644 --- a/arch/powerpc/sysdev/mpic.c +++ b/arch/powerpc/sysdev/mpic.c @@ -6,7 +6,7 @@ * with various broken implementations of this HW. * * Copyright (C) 2004 Benjamin Herrenschmidt, IBM Corp. - * Copyright 2010-2011 Freescale Semiconductor, Inc. + * Copyright 2010-2012 Freescale Semiconductor, Inc. * * This file is subject to the terms and conditions of the GNU General Public * License. See the file COPYING in the main directory of this archive @@ -221,24 +221,24 @@ static inline void _mpic_ipi_write(struct mpic *mpic, unsigned int ipi, u32 valu _mpic_write(mpic->reg_type, &mpic->gregs, offset, value); } -static inline u32 _mpic_tm_read(struct mpic *mpic, unsigned int tm) +static inline unsigned int mpic_tm_offset(struct mpic *mpic, unsigned int tm) { - unsigned int offset = MPIC_INFO(TIMER_VECTOR_PRI) + - ((tm & 3) * MPIC_INFO(TIMER_STRIDE)); + return (tm >> 2) * MPIC_TIMER_GROUP_STRIDE + + (tm & 3) * MPIC_INFO(TIMER_STRIDE); +} - if (tm >= 4) - offset += 0x1000 / 4; +static inline u32 _mpic_tm_read(struct mpic *mpic, unsigned int tm) +{ + unsigned int offset = mpic_tm_offset(mpic, tm) + + MPIC_INFO(TIMER_VECTOR_PRI); return _mpic_read(mpic->reg_type, &mpic->tmregs, offset); } static inline void _mpic_tm_write(struct mpic *mpic, unsigned int tm, u32 value) { - unsigned int offset = MPIC_INFO(TIMER_VECTOR_PRI) + - ((tm & 3) * MPIC_INFO(TIMER_STRIDE)); - - if (tm >= 4) - offset += 0x1000 / 4; + unsigned int offset = mpic_tm_offset(mpic, tm) + + MPIC_INFO(TIMER_VECTOR_PRI); _mpic_write(mpic->reg_type, &mpic->tmregs, offset, value); } @@ -1301,6 +1301,16 @@ struct mpic * __init mpic_alloc(struct device_node *node, mpic_map(mpic, mpic->paddr, &mpic->gregs, MPIC_INFO(GREG_BASE), 0x1000); mpic_map(mpic, mpic->paddr, &mpic->tmregs, MPIC_INFO(TIMER_BASE), 0x1000); + if (mpic->flags & MPIC_FSL) { + /* + * Yes, Freescale really did put global registers in the + * magic per-cpu area -- and they don't even show up in the + * non-magic per-cpu copies that this driver normally uses. + */ + mpic_map(mpic, mpic->paddr, &mpic->thiscpuregs, + MPIC_CPU_THISBASE, 0x1000); + } + /* Reset */ /* When using a device-node, reset requests are only honored if the MPIC @@ -1440,6 +1450,7 @@ void __init mpic_assign_isu(struct mpic *mpic, unsigned int isu_num, void __init mpic_init(struct mpic *mpic) { int i, cpu; + int num_timers = 4; BUG_ON(mpic->num_sources == 0); @@ -1448,15 +1459,30 @@ void __init mpic_init(struct mpic *mpic) /* Set current processor priority to max */ mpic_cpu_write(MPIC_INFO(CPU_CURRENT_TASK_PRI), 0xf); + if (mpic->flags & MPIC_FSL) { + u32 brr1 = _mpic_read(mpic->reg_type, &mpic->thiscpuregs, + MPIC_FSL_BRR1); + u32 version = brr1 & MPIC_FSL_BRR1_VER; + + /* + * Timer group B is present at the latest in MPIC 3.1 (e.g. + * mpc8536). It is not present in MPIC 2.0 (e.g. mpc8544). + * I don't know about the status of intermediate versions (or + * whether they even exist). + */ + if (version >= 0x0301) + num_timers = 8; + } + /* Initialize timers to our reserved vectors and mask them for now */ - for (i = 0; i < 4; i++) { + for (i = 0; i < num_timers; i++) { + unsigned int offset = mpic_tm_offset(mpic, i); + mpic_write(mpic->tmregs, - i * MPIC_INFO(TIMER_STRIDE) + - MPIC_INFO(TIMER_DESTINATION), + offset + MPIC_INFO(TIMER_DESTINATION), 1 << hard_smp_processor_id()); mpic_write(mpic->tmregs, - i * MPIC_INFO(TIMER_STRIDE) + - MPIC_INFO(TIMER_VECTOR_PRI), + offset + MPIC_INFO(TIMER_VECTOR_PRI), MPIC_VECPRI_MASK | (9 << MPIC_VECPRI_PRIORITY_SHIFT) | (mpic->timer_vecs[0] + i)); -- cgit v0.10.2 From 15f34eb12340b2c2e0cd90c5987ad6b5f73b79b7 Mon Sep 17 00:00:00 2001 From: Zhao Chenhui Date: Fri, 20 Jul 2012 20:42:33 +0800 Subject: powerpc/85xx: Replace epapr spin table macros/defines with a struct Signed-off-by: Zhao Chenhui Signed-off-by: Kumar Gala diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms/85xx/smp.c index ff42490..4827709 100644 --- a/arch/powerpc/platforms/85xx/smp.c +++ b/arch/powerpc/platforms/85xx/smp.c @@ -2,7 +2,7 @@ * Author: Andy Fleming * Kumar Gala * - * Copyright 2006-2008, 2011 Freescale Semiconductor Inc. + * Copyright 2006-2008, 2011-2012 Freescale Semiconductor Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -31,23 +32,21 @@ extern void __early_start(void); -#define BOOT_ENTRY_ADDR_UPPER 0 -#define BOOT_ENTRY_ADDR_LOWER 1 -#define BOOT_ENTRY_R3_UPPER 2 -#define BOOT_ENTRY_R3_LOWER 3 -#define BOOT_ENTRY_RESV 4 -#define BOOT_ENTRY_PIR 5 -#define BOOT_ENTRY_R6_UPPER 6 -#define BOOT_ENTRY_R6_LOWER 7 -#define NUM_BOOT_ENTRY 8 -#define SIZE_BOOT_ENTRY (NUM_BOOT_ENTRY * sizeof(u32)) +struct epapr_spin_table { + u32 addr_h; + u32 addr_l; + u32 r3_h; + u32 r3_l; + u32 reserved; + u32 pir; +}; static int __init smp_85xx_kick_cpu(int nr) { unsigned long flags; const u64 *cpu_rel_addr; - __iomem u32 *bptr_vaddr; + __iomem struct epapr_spin_table *spin_table; struct device_node *np; int n = 0, hw_cpu = get_hard_smp_processor_id(nr); int ioremappable; @@ -75,19 +74,20 @@ smp_85xx_kick_cpu(int nr) /* Map the spin table */ if (ioremappable) - bptr_vaddr = ioremap(*cpu_rel_addr, SIZE_BOOT_ENTRY); + spin_table = ioremap(*cpu_rel_addr, + sizeof(struct epapr_spin_table)); else - bptr_vaddr = phys_to_virt(*cpu_rel_addr); + spin_table = phys_to_virt(*cpu_rel_addr); local_irq_save(flags); - out_be32(bptr_vaddr + BOOT_ENTRY_PIR, hw_cpu); + out_be32(&spin_table->pir, hw_cpu); #ifdef CONFIG_PPC32 - out_be32(bptr_vaddr + BOOT_ENTRY_ADDR_LOWER, __pa(__early_start)); + out_be32(&spin_table->addr_l, __pa(__early_start)); if (!ioremappable) - flush_dcache_range((ulong)bptr_vaddr, - (ulong)(bptr_vaddr + SIZE_BOOT_ENTRY)); + flush_dcache_range((ulong)spin_table, + (ulong)spin_table + sizeof(struct epapr_spin_table)); /* Wait a bit for the CPU to ack. */ while ((__secondary_hold_acknowledge != hw_cpu) && (++n < 1000)) @@ -95,18 +95,18 @@ smp_85xx_kick_cpu(int nr) #else smp_generic_kick_cpu(nr); - out_be64((u64 *)(bptr_vaddr + BOOT_ENTRY_ADDR_UPPER), - __pa((u64)*((unsigned long long *) generic_secondary_smp_init))); + out_be64((u64 *)(&spin_table->addr_h), + __pa((u64)*((unsigned long long *)generic_secondary_smp_init))); if (!ioremappable) - flush_dcache_range((ulong)bptr_vaddr, - (ulong)(bptr_vaddr + SIZE_BOOT_ENTRY)); + flush_dcache_range((ulong)spin_table, + (ulong)spin_table + sizeof(struct epapr_spin_table)); #endif local_irq_restore(flags); if (ioremappable) - iounmap(bptr_vaddr); + iounmap(spin_table); pr_debug("waited %d msecs for CPU #%d.\n", n, nr); -- cgit v0.10.2 From ae5cab476342bc7311945cf89d5cbd8d57f4a5a9 Mon Sep 17 00:00:00 2001 From: Zhao Chenhui Date: Fri, 20 Jul 2012 20:42:34 +0800 Subject: powerpc/smp: add generic_set_cpu_up() to set cpu_state as CPU_UP_PREPARE In the case of cpu hotplug, the cpu_state should be set to CPU_UP_PREPARE when kicking cpu. Otherwise, the cpu_state is always CPU_DEAD after calling generic_set_cpu_dead(), which makes the delay in generic_cpu_die() not happen. Signed-off-by: Zhao Chenhui Signed-off-by: Kumar Gala diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h index ebc24dc..ce8e2bd 100644 --- a/arch/powerpc/include/asm/smp.h +++ b/arch/powerpc/include/asm/smp.h @@ -65,6 +65,7 @@ int generic_cpu_disable(void); void generic_cpu_die(unsigned int cpu); void generic_mach_cpu_die(void); void generic_set_cpu_dead(unsigned int cpu); +void generic_set_cpu_up(unsigned int cpu); int generic_check_cpu_restart(unsigned int cpu); #endif diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 8d4214a..a51ed20 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -102,7 +102,7 @@ int __devinit smp_generic_kick_cpu(int nr) * Ok it's not there, so it might be soft-unplugged, let's * try to bring it back */ - per_cpu(cpu_state, nr) = CPU_UP_PREPARE; + generic_set_cpu_up(nr); smp_wmb(); smp_send_reschedule(nr); #endif /* CONFIG_HOTPLUG_CPU */ @@ -413,6 +413,16 @@ void generic_set_cpu_dead(unsigned int cpu) per_cpu(cpu_state, cpu) = CPU_DEAD; } +/* + * The cpu_state should be set to CPU_UP_PREPARE in kick_cpu(), otherwise + * the cpu_state is always CPU_DEAD after calling generic_set_cpu_dead(), + * which makes the delay in generic_cpu_die() not happen. + */ +void generic_set_cpu_up(unsigned int cpu) +{ + per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; +} + int generic_check_cpu_restart(unsigned int cpu) { return per_cpu(cpu_state, cpu) == CPU_UP_PREPARE; -- cgit v0.10.2 From bf34526374a334ddfafaed73b0d8bf7eb4dea833 Mon Sep 17 00:00:00 2001 From: Zhao Chenhui Date: Fri, 20 Jul 2012 20:42:35 +0800 Subject: powerpc/85xx: implement hardware timebase sync Do hardware timebase sync. Firstly, stop all timebases, and transfer the timebase value of the boot core to the other core. Finally, start all timebases. Only apply to dual-core chips, such as MPC8572, P2020, etc. Signed-off-by: Zhao Chenhui Signed-off-by: Li Yang Signed-off-by: Kumar Gala diff --git a/arch/powerpc/include/asm/fsl_guts.h b/arch/powerpc/include/asm/fsl_guts.h index aa4c488..dd5ba2c 100644 --- a/arch/powerpc/include/asm/fsl_guts.h +++ b/arch/powerpc/include/asm/fsl_guts.h @@ -48,6 +48,8 @@ struct ccsr_guts { __be32 dmuxcr; /* 0x.0068 - DMA Mux Control Register */ u8 res06c[0x70 - 0x6c]; __be32 devdisr; /* 0x.0070 - Device Disable Control */ +#define CCSR_GUTS_DEVDISR_TB1 0x00001000 +#define CCSR_GUTS_DEVDISR_TB0 0x00004000 __be32 devdisr2; /* 0x.0074 - Device Disable Control 2 */ u8 res078[0x7c - 0x78]; __be32 pmjcr; /* 0x.007c - 4 Power Management Jog Control Register */ diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms/85xx/smp.c index 4827709..7ed52a6 100644 --- a/arch/powerpc/platforms/85xx/smp.c +++ b/arch/powerpc/platforms/85xx/smp.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -41,6 +42,64 @@ struct epapr_spin_table { u32 pir; }; +static struct ccsr_guts __iomem *guts; +static u64 timebase; +static int tb_req; +static int tb_valid; + +static void mpc85xx_timebase_freeze(int freeze) +{ + uint32_t mask; + + mask = CCSR_GUTS_DEVDISR_TB0 | CCSR_GUTS_DEVDISR_TB1; + if (freeze) + setbits32(&guts->devdisr, mask); + else + clrbits32(&guts->devdisr, mask); + + in_be32(&guts->devdisr); +} + +static void mpc85xx_give_timebase(void) +{ + unsigned long flags; + + local_irq_save(flags); + + while (!tb_req) + barrier(); + tb_req = 0; + + mpc85xx_timebase_freeze(1); + timebase = get_tb(); + mb(); + tb_valid = 1; + + while (tb_valid) + barrier(); + + mpc85xx_timebase_freeze(0); + + local_irq_restore(flags); +} + +static void mpc85xx_take_timebase(void) +{ + unsigned long flags; + + local_irq_save(flags); + + tb_req = 1; + while (!tb_valid) + barrier(); + + set_tb(timebase >> 32, timebase & 0xffffffff); + isync(); + tb_valid = 0; + + local_irq_restore(flags); +} + static int __init smp_85xx_kick_cpu(int nr) { @@ -228,6 +287,16 @@ smp_85xx_setup_cpu(int cpu_nr) doorbell_setup_this_cpu(); } +static const struct of_device_id mpc85xx_smp_guts_ids[] = { + { .compatible = "fsl,mpc8572-guts", }, + { .compatible = "fsl,p1020-guts", }, + { .compatible = "fsl,p1021-guts", }, + { .compatible = "fsl,p1022-guts", }, + { .compatible = "fsl,p1023-guts", }, + { .compatible = "fsl,p2020-guts", }, + {}, +}; + void __init mpc85xx_smp_init(void) { struct device_node *np; @@ -249,6 +318,19 @@ void __init mpc85xx_smp_init(void) smp_85xx_ops.cause_ipi = doorbell_cause_ipi; } + np = of_find_matching_node(NULL, mpc85xx_smp_guts_ids); + if (np) { + guts = of_iomap(np, 0); + of_node_put(np); + if (!guts) { + pr_err("%s: Could not map guts node address\n", + __func__); + return; + } + smp_85xx_ops.give_timebase = mpc85xx_give_timebase; + smp_85xx_ops.take_timebase = mpc85xx_take_timebase; + } + smp_ops = &smp_85xx_ops; #ifdef CONFIG_KEXEC -- cgit v0.10.2 From d0832a75075b1119635e0f48549e378040cf5e67 Mon Sep 17 00:00:00 2001 From: Zhao Chenhui Date: Fri, 20 Jul 2012 20:42:36 +0800 Subject: powerpc/85xx: add HOTPLUG_CPU support Add support to disable and re-enable individual cores at runtime on MPC85xx/QorIQ SMP machines. Currently support e500v1/e500v2 core. MPC85xx machines use ePAPR spin-table in boot page for CPU kick-off. This patch uses the boot page from bootloader to boot core at runtime. It supports 32-bit and 36-bit physical address. Signed-off-by: Li Yang Signed-off-by: Jin Qing Signed-off-by: Zhao Chenhui Signed-off-by: Kumar Gala diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 98e513b..b8bab10 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -215,7 +215,8 @@ config ARCH_HIBERNATION_POSSIBLE config ARCH_SUSPEND_POSSIBLE def_bool y depends on ADB_PMU || PPC_EFIKA || PPC_LITE5200 || PPC_83xx || \ - (PPC_85xx && !SMP) || PPC_86xx || PPC_PSERIES || 44x || 40x + (PPC_85xx && !PPC_E500MC) || PPC_86xx || PPC_PSERIES \ + || 44x || 40x config PPC_DCR_NATIVE bool @@ -328,7 +329,8 @@ config SWIOTLB config HOTPLUG_CPU bool "Support for enabling/disabling CPUs" - depends on SMP && HOTPLUG && EXPERIMENTAL && (PPC_PSERIES || PPC_PMAC || PPC_POWERNV) + depends on SMP && HOTPLUG && EXPERIMENTAL && (PPC_PSERIES || \ + PPC_PMAC || PPC_POWERNV || (PPC_85xx && !PPC_E500MC)) ---help--- Say Y here to be able to disable and re-enable individual CPUs at runtime on SMP machines. diff --git a/arch/powerpc/include/asm/cacheflush.h b/arch/powerpc/include/asm/cacheflush.h index ab9e402..b843e35 100644 --- a/arch/powerpc/include/asm/cacheflush.h +++ b/arch/powerpc/include/asm/cacheflush.h @@ -30,6 +30,8 @@ extern void flush_dcache_page(struct page *page); #define flush_dcache_mmap_lock(mapping) do { } while (0) #define flush_dcache_mmap_unlock(mapping) do { } while (0) +extern void __flush_disable_L1(void); + extern void __flush_icache_range(unsigned long, unsigned long); static inline void flush_icache_range(unsigned long start, unsigned long stop) { diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h index ce8e2bd..e807e9d 100644 --- a/arch/powerpc/include/asm/smp.h +++ b/arch/powerpc/include/asm/smp.h @@ -191,6 +191,7 @@ extern unsigned long __secondary_hold_spinloop; extern unsigned long __secondary_hold_acknowledge; extern char __secondary_hold; +extern void __early_start(void); #endif /* __ASSEMBLY__ */ #endif /* __KERNEL__ */ diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index 0f59863..b221541 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -1043,6 +1043,34 @@ _GLOBAL(flush_dcache_L1) blr +/* Flush L1 d-cache, invalidate and disable d-cache and i-cache */ +_GLOBAL(__flush_disable_L1) + mflr r10 + bl flush_dcache_L1 /* Flush L1 d-cache */ + mtlr r10 + + mfspr r4, SPRN_L1CSR0 /* Invalidate and disable d-cache */ + li r5, 2 + rlwimi r4, r5, 0, 3 + + msync + isync + mtspr SPRN_L1CSR0, r4 + isync + +1: mfspr r4, SPRN_L1CSR0 /* Wait for the invalidate to finish */ + andi. r4, r4, 2 + bne 1b + + mfspr r4, SPRN_L1CSR1 /* Invalidate and disable i-cache */ + li r5, 2 + rlwimi r4, r5, 0, 3 + + mtspr SPRN_L1CSR1, r4 + isync + + blr + #ifdef CONFIG_SMP /* When we get here, r24 needs to hold the CPU # */ .globl __secondary_start diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms/85xx/smp.c index 7ed52a6..6fcfa12 100644 --- a/arch/powerpc/platforms/85xx/smp.c +++ b/arch/powerpc/platforms/85xx/smp.c @@ -31,8 +31,6 @@ #include #include "smp.h" -extern void __early_start(void); - struct epapr_spin_table { u32 addr_h; u32 addr_l; @@ -100,15 +98,45 @@ static void mpc85xx_take_timebase(void) local_irq_restore(flags); } -static int __init -smp_85xx_kick_cpu(int nr) +#ifdef CONFIG_HOTPLUG_CPU +static void __cpuinit smp_85xx_mach_cpu_die(void) +{ + unsigned int cpu = smp_processor_id(); + u32 tmp; + + local_irq_disable(); + idle_task_exit(); + generic_set_cpu_dead(cpu); + mb(); + + mtspr(SPRN_TCR, 0); + + __flush_disable_L1(); + tmp = (mfspr(SPRN_HID0) & ~(HID0_DOZE|HID0_SLEEP)) | HID0_NAP; + mtspr(SPRN_HID0, tmp); + isync(); + + /* Enter NAP mode. */ + tmp = mfmsr(); + tmp |= MSR_WE; + mb(); + mtmsr(tmp); + isync(); + + while (1) + ; +} +#endif + +static int __cpuinit smp_85xx_kick_cpu(int nr) { unsigned long flags; const u64 *cpu_rel_addr; __iomem struct epapr_spin_table *spin_table; struct device_node *np; - int n = 0, hw_cpu = get_hard_smp_processor_id(nr); + int hw_cpu = get_hard_smp_processor_id(nr); int ioremappable; + int ret = 0; WARN_ON(nr < 0 || nr >= NR_CPUS); WARN_ON(hw_cpu < 0 || hw_cpu >= NR_CPUS); @@ -139,9 +167,34 @@ smp_85xx_kick_cpu(int nr) spin_table = phys_to_virt(*cpu_rel_addr); local_irq_save(flags); +#ifdef CONFIG_PPC32 +#ifdef CONFIG_HOTPLUG_CPU + /* Corresponding to generic_set_cpu_dead() */ + generic_set_cpu_up(nr); + + if (system_state == SYSTEM_RUNNING) { + out_be32(&spin_table->addr_l, 0); + /* + * We don't set the BPTR register here since it already points + * to the boot page properly. + */ + mpic_reset_core(hw_cpu); + + /* wait until core is ready... */ + if (!spin_event_timeout(in_be32(&spin_table->addr_l) == 1, + 10000, 100)) { + pr_err("%s: timeout waiting for core %d to reset\n", + __func__, hw_cpu); + ret = -ENOENT; + goto out; + } + + /* clear the acknowledge status */ + __secondary_hold_acknowledge = -1; + } +#endif out_be32(&spin_table->pir, hw_cpu); -#ifdef CONFIG_PPC32 out_be32(&spin_table->addr_l, __pa(__early_start)); if (!ioremappable) @@ -149,11 +202,18 @@ smp_85xx_kick_cpu(int nr) (ulong)spin_table + sizeof(struct epapr_spin_table)); /* Wait a bit for the CPU to ack. */ - while ((__secondary_hold_acknowledge != hw_cpu) && (++n < 1000)) - mdelay(1); + if (!spin_event_timeout(__secondary_hold_acknowledge == hw_cpu, + 10000, 100)) { + pr_err("%s: timeout waiting for core %d to ack\n", + __func__, hw_cpu); + ret = -ENOENT; + goto out; + } +out: #else smp_generic_kick_cpu(nr); + out_be32(&spin_table->pir, hw_cpu); out_be64((u64 *)(&spin_table->addr_h), __pa((u64)*((unsigned long long *)generic_secondary_smp_init))); @@ -167,13 +227,15 @@ smp_85xx_kick_cpu(int nr) if (ioremappable) iounmap(spin_table); - pr_debug("waited %d msecs for CPU #%d.\n", n, nr); - - return 0; + return ret; } struct smp_ops_t smp_85xx_ops = { .kick_cpu = smp_85xx_kick_cpu, +#ifdef CONFIG_HOTPLUG_CPU + .cpu_disable = generic_cpu_disable, + .cpu_die = generic_cpu_die, +#endif #ifdef CONFIG_KEXEC .give_timebase = smp_generic_give_timebase, .take_timebase = smp_generic_take_timebase, @@ -277,8 +339,7 @@ static void mpc85xx_smp_machine_kexec(struct kimage *image) } #endif /* CONFIG_KEXEC */ -static void __init -smp_85xx_setup_cpu(int cpu_nr) +static void __cpuinit smp_85xx_setup_cpu(int cpu_nr) { if (smp_85xx_ops.probe == smp_mpic_probe) mpic_setup_this_cpu(); @@ -329,6 +390,9 @@ void __init mpc85xx_smp_init(void) } smp_85xx_ops.give_timebase = mpc85xx_give_timebase; smp_85xx_ops.take_timebase = mpc85xx_take_timebase; +#ifdef CONFIG_HOTPLUG_CPU + ppc_md.cpu_die = smp_85xx_mach_cpu_die; +#endif } smp_ops = &smp_85xx_ops; -- cgit v0.10.2 From 7e0f4872a33c6da38e727cf42c939cc32294fce6 Mon Sep 17 00:00:00 2001 From: Varun Sethi Date: Mon, 9 Jul 2012 18:25:31 +0530 Subject: powepc/booke: Separate out E.HV check and ivor setup code. Move the E.HV check and CPU_FTR_EMB_HV flag manipulation to the cpu setup code. Create a separate routine for E.HV ivors setup. Signed-off-by: Varun Sethi Signed-off-by: Kumar Gala diff --git a/arch/powerpc/kernel/cpu_setup_fsl_booke.S b/arch/powerpc/kernel/cpu_setup_fsl_booke.S index 69fdd23..a55d028 100644 --- a/arch/powerpc/kernel/cpu_setup_fsl_booke.S +++ b/arch/powerpc/kernel/cpu_setup_fsl_booke.S @@ -16,6 +16,8 @@ #include #include #include +#include +#include _GLOBAL(__e500_icache_setup) mfspr r0, SPRN_L1CSR1 @@ -73,12 +75,33 @@ _GLOBAL(__setup_cpu_e500v2) mtlr r4 blr _GLOBAL(__setup_cpu_e500mc) - mr r5, r4 - mflr r4 + mflr r5 bl __e500_icache_setup bl __e500_dcache_setup bl __setup_e500mc_ivors - mtlr r4 + /* + * We only want to touch IVOR38-41 if we're running on hardware + * that supports category E.HV. The architectural way to determine + * this is MMUCFG[LPIDSIZE]. + */ + mfspr r3, SPRN_MMUCFG + rlwinm. r3, r3, 0, MMUCFG_LPIDSIZE + beq 1f + bl __setup_ehv_ivors + b 2f +1: + lwz r3, CPU_SPEC_FEATURES(r4) + /* We need this check as cpu_setup is also called for + * the secondary cores. So, if we have already cleared + * the feature on the primary core, avoid doing it on the + * secondary core. + */ + andis. r6, r3, CPU_FTR_EMB_HV@h + beq 2f + rlwinm r3, r3, 0, ~CPU_FTR_EMB_HV + stw r3, CPU_SPEC_FEATURES(r4) +2: + mtlr r5 blr #endif /* Right now, restore and setup are the same thing */ diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index b221541..6f62a73 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -895,15 +895,11 @@ _GLOBAL(__setup_e500mc_ivors) mtspr SPRN_IVOR36,r3 li r3,CriticalDoorbell@l mtspr SPRN_IVOR37,r3 + sync + blr - /* - * We only want to touch IVOR38-41 if we're running on hardware - * that supports category E.HV. The architectural way to determine - * this is MMUCFG[LPIDSIZE]. - */ - mfspr r3, SPRN_MMUCFG - andis. r3, r3, MMUCFG_LPIDSIZE@h - beq no_hv +/* setup ehv ivors for */ +_GLOBAL(__setup_ehv_ivors) li r3,GuestDoorbell@l mtspr SPRN_IVOR38,r3 li r3,CriticalGuestDoorbell@l @@ -912,14 +908,8 @@ _GLOBAL(__setup_e500mc_ivors) mtspr SPRN_IVOR40,r3 li r3,Ehvpriv@l mtspr SPRN_IVOR41,r3 -skip_hv_ivors: sync blr -no_hv: - lwz r3, CPU_SPEC_FEATURES(r5) - rlwinm r3, r3, 0, ~CPU_FTR_EMB_HV - stw r3, CPU_SPEC_FEATURES(r5) - b skip_hv_ivors #ifdef CONFIG_SPE /* -- cgit v0.10.2 From 2c71b0cc4a626d8bd02b88b3b92ce18be4d54c6d Mon Sep 17 00:00:00 2001 From: Varun Sethi Date: Mon, 9 Jul 2012 18:28:21 +0530 Subject: powerpc/booke: Merge the 32 bit e5500/e500mc cpu setup code. Merge the 32 bit cpu setup code for e500mc/e5500 and define the "cpu_restore" routine (for e5500/e6500) only for the 64 bit case. The cpu_restore routine is used in the 64 bit case for setting up the secondary cores. Signed-off-by: Varun Sethi Signed-off-by: Kumar Gala diff --git a/arch/powerpc/kernel/cpu_setup_fsl_booke.S b/arch/powerpc/kernel/cpu_setup_fsl_booke.S index a55d028..021822d 100644 --- a/arch/powerpc/kernel/cpu_setup_fsl_booke.S +++ b/arch/powerpc/kernel/cpu_setup_fsl_booke.S @@ -75,6 +75,7 @@ _GLOBAL(__setup_cpu_e500v2) mtlr r4 blr _GLOBAL(__setup_cpu_e500mc) +_GLOBAL(__setup_cpu_e5500) mflr r5 bl __e500_icache_setup bl __e500_dcache_setup @@ -104,19 +105,17 @@ _GLOBAL(__setup_cpu_e500mc) mtlr r5 blr #endif +#ifdef CONFIG_PPC_BOOK3E_64 /* Right now, restore and setup are the same thing */ _GLOBAL(__restore_cpu_e5500) _GLOBAL(__setup_cpu_e5500) mflr r4 bl __e500_icache_setup bl __e500_dcache_setup -#ifdef CONFIG_PPC_BOOK3E_64 bl .__setup_base_ivors bl .setup_perfmon_ivor bl .setup_doorbell_ivors bl .setup_ehv_ivors -#else - bl __setup_e500mc_ivors -#endif mtlr r4 blr +#endif diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index 455faa3..0514c21 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -2016,7 +2016,9 @@ static struct cpu_spec __initdata cpu_specs[] = { .oprofile_cpu_type = "ppc/e500mc", .oprofile_type = PPC_OPROFILE_FSL_EMB, .cpu_setup = __setup_cpu_e5500, +#ifndef CONFIG_PPC32 .cpu_restore = __restore_cpu_e5500, +#endif .machine_check = machine_check_e500mc, .platform = "ppce5500", }, @@ -2034,7 +2036,9 @@ static struct cpu_spec __initdata cpu_specs[] = { .oprofile_cpu_type = "ppc/e6500", .oprofile_type = PPC_OPROFILE_FSL_EMB, .cpu_setup = __setup_cpu_e5500, +#ifndef CONFIG_PPC32 .cpu_restore = __restore_cpu_e5500, +#endif .machine_check = machine_check_e500mc, .platform = "ppce6500", }, -- cgit v0.10.2 From 0778407f836d632cce90e5d9df490a7c5e6e398a Mon Sep 17 00:00:00 2001 From: Varun Sethi Date: Mon, 9 Jul 2012 18:31:51 +0530 Subject: powerpc/booke: Separate out restore_e5500/setup_e5500 routines. For the 64 bit case separate out e5500 cpu_setup and cpu_restore functions. The cpu_setup function (for the primary core) is passed the cpu_spec pointer, which is not there in case of the cpu_restore function. Also, in our case we will have to manipulate the CPU_FTR_EMB_HV flag on the primary core. Signed-off-by: Varun Sethi Signed-off-by: Kumar Gala diff --git a/arch/powerpc/kernel/cpu_setup_fsl_booke.S b/arch/powerpc/kernel/cpu_setup_fsl_booke.S index 021822d..1345e1b 100644 --- a/arch/powerpc/kernel/cpu_setup_fsl_booke.S +++ b/arch/powerpc/kernel/cpu_setup_fsl_booke.S @@ -105,17 +105,45 @@ _GLOBAL(__setup_cpu_e5500) mtlr r5 blr #endif + #ifdef CONFIG_PPC_BOOK3E_64 -/* Right now, restore and setup are the same thing */ _GLOBAL(__restore_cpu_e5500) -_GLOBAL(__setup_cpu_e5500) mflr r4 bl __e500_icache_setup bl __e500_dcache_setup bl .__setup_base_ivors bl .setup_perfmon_ivor bl .setup_doorbell_ivors + /* + * We only want to touch IVOR38-41 if we're running on hardware + * that supports category E.HV. The architectural way to determine + * this is MMUCFG[LPIDSIZE]. + */ + mfspr r10,SPRN_MMUCFG + rlwinm. r10,r10,0,MMUCFG_LPIDSIZE + beq 1f bl .setup_ehv_ivors +1: mtlr r4 blr + +_GLOBAL(__setup_cpu_e5500) + mflr r5 + bl __e500_icache_setup + bl __e500_dcache_setup + bl .__setup_base_ivors + bl .setup_perfmon_ivor + bl .setup_doorbell_ivors + /* + * We only want to touch IVOR38-41 if we're running on hardware + * that supports category E.HV. The architectural way to determine + * this is MMUCFG[LPIDSIZE]. + */ + mfspr r10,SPRN_MMUCFG + rlwinm. r10,r10,0,MMUCFG_LPIDSIZE + beq 1f + bl .setup_ehv_ivors +1: + mtlr r5 + blr #endif diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index 87a82fb..4684e33 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -1356,25 +1356,11 @@ _GLOBAL(setup_perfmon_ivor) _GLOBAL(setup_doorbell_ivors) SET_IVOR(36, 0x280) /* Processor Doorbell */ SET_IVOR(37, 0x2a0) /* Processor Doorbell Crit */ - - /* Check MMUCFG[LPIDSIZE] to determine if we have category E.HV */ - mfspr r10,SPRN_MMUCFG - rlwinm. r10,r10,0,MMUCFG_LPIDSIZE - beqlr - - SET_IVOR(38, 0x2c0) /* Guest Processor Doorbell */ - SET_IVOR(39, 0x2e0) /* Guest Processor Doorbell Crit/MC */ blr _GLOBAL(setup_ehv_ivors) - /* - * We may be running as a guest and lack E.HV even on a chip - * that normally has it. - */ - mfspr r10,SPRN_MMUCFG - rlwinm. r10,r10,0,MMUCFG_LPIDSIZE - beqlr - SET_IVOR(40, 0x300) /* Embedded Hypervisor System Call */ SET_IVOR(41, 0x320) /* Embedded Hypervisor Privilege */ + SET_IVOR(38, 0x2c0) /* Guest Processor Doorbell */ + SET_IVOR(39, 0x2e0) /* Guest Processor Doorbell Crit/MC */ blr -- cgit v0.10.2 From 39be5b4a7f232870aad0b3c130791eacd0d34347 Mon Sep 17 00:00:00 2001 From: Varun Sethi Date: Mon, 9 Jul 2012 18:34:01 +0530 Subject: powerpc/booke: Add CPU_FTR_EMB_HV check for e5500. Added CPU_FTR_EMB_HV feature check for e5500. Signed-off-by: Varun Sethi Signed-off-by: Mihai Caraman Signed-off-by: Kumar Gala diff --git a/arch/powerpc/kernel/cpu_setup_fsl_booke.S b/arch/powerpc/kernel/cpu_setup_fsl_booke.S index 1345e1b..dcd8819 100644 --- a/arch/powerpc/kernel/cpu_setup_fsl_booke.S +++ b/arch/powerpc/kernel/cpu_setup_fsl_booke.S @@ -143,7 +143,13 @@ _GLOBAL(__setup_cpu_e5500) rlwinm. r10,r10,0,MMUCFG_LPIDSIZE beq 1f bl .setup_ehv_ivors + b 2f 1: + ld r10,CPU_SPEC_FEATURES(r4) + LOAD_REG_IMMEDIATE(r9,CPU_FTR_EMB_HV) + andc r10,r10,r9 + std r10,CPU_SPEC_FEATURES(r4) +2: mtlr r5 blr #endif -- cgit v0.10.2 From 688ba1dbee8a49a2efe507cd9ae69634d92bb640 Mon Sep 17 00:00:00 2001 From: Jia Hongtao Date: Fri, 3 Aug 2012 18:14:10 +0800 Subject: powerpc/swiotlb: Enable at early stage and disable if not necessary Remove the dependency on PCI initialization for SWIOTLB initialization. So that PCI can be initialized at proper time. SWIOTLB is partly determined by PCI inbound/outbound map which is assigned in PCI initialization. But swiotlb_init() should be done at the stage of mem_init() which is much earlier than PCI initialization. So we reserve the memory for SWIOTLB first and free it if not necessary. All boards are converted to fit this change. Signed-off-by: Jia Hongtao Signed-off-by: Li Yang Acked-by: Tony Breeds Signed-off-by: Kumar Gala diff --git a/arch/powerpc/include/asm/swiotlb.h b/arch/powerpc/include/asm/swiotlb.h index 8979d4c..de99d6e 100644 --- a/arch/powerpc/include/asm/swiotlb.h +++ b/arch/powerpc/include/asm/swiotlb.h @@ -22,4 +22,10 @@ int __init swiotlb_setup_bus_notifier(void); extern void pci_dma_dev_setup_swiotlb(struct pci_dev *pdev); +#ifdef CONFIG_SWIOTLB +void swiotlb_detect_4g(void); +#else +static inline void swiotlb_detect_4g(void) {} +#endif + #endif /* __ASM_SWIOTLB_H */ diff --git a/arch/powerpc/kernel/dma-swiotlb.c b/arch/powerpc/kernel/dma-swiotlb.c index a720b54..bd1a2ab 100644 --- a/arch/powerpc/kernel/dma-swiotlb.c +++ b/arch/powerpc/kernel/dma-swiotlb.c @@ -105,3 +105,23 @@ int __init swiotlb_setup_bus_notifier(void) &ppc_swiotlb_plat_bus_notifier); return 0; } + +void swiotlb_detect_4g(void) +{ + if ((memblock_end_of_DRAM() - 1) > 0xffffffff) + ppc_swiotlb_enable = 1; +} + +static int __init swiotlb_late_init(void) +{ + if (ppc_swiotlb_enable) { + swiotlb_print_info(); + set_pci_dma_ops(&swiotlb_dma_ops); + ppc_md.pci_dma_dev_setup = pci_dma_dev_setup_swiotlb; + } else { + swiotlb_free(); + } + + return 0; +} +subsys_initcall(swiotlb_late_init); diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 44cf2b2..0dba506 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -300,8 +300,7 @@ void __init mem_init(void) unsigned long reservedpages = 0, codesize, initsize, datasize, bsssize; #ifdef CONFIG_SWIOTLB - if (ppc_swiotlb_enable) - swiotlb_init(1); + swiotlb_init(0); #endif num_physpages = memblock_phys_mem_size() >> PAGE_SHIFT; diff --git a/arch/powerpc/platforms/44x/currituck.c b/arch/powerpc/platforms/44x/currituck.c index 9f6c33d6..6bd89a0 100644 --- a/arch/powerpc/platforms/44x/currituck.c +++ b/arch/powerpc/platforms/44x/currituck.c @@ -21,7 +21,6 @@ */ #include -#include #include #include #include @@ -159,13 +158,8 @@ static void __init ppc47x_setup_arch(void) /* No need to check the DMA config as we /know/ our windows are all of * RAM. Lets hope that doesn't change */ -#ifdef CONFIG_SWIOTLB - if ((memblock_end_of_DRAM() - 1) > 0xffffffff) { - ppc_swiotlb_enable = 1; - set_pci_dma_ops(&swiotlb_dma_ops); - ppc_md.pci_dma_dev_setup = pci_dma_dev_setup_swiotlb; - } -#endif + swiotlb_detect_4g(); + ppc47x_smp_init(); } diff --git a/arch/powerpc/platforms/85xx/mpc85xx_ds.c b/arch/powerpc/platforms/85xx/mpc85xx_ds.c index 6d3265f..56f8c8f 100644 --- a/arch/powerpc/platforms/85xx/mpc85xx_ds.c +++ b/arch/powerpc/platforms/85xx/mpc85xx_ds.c @@ -159,6 +159,7 @@ static void __init mpc85xx_ds_setup_arch(void) if (ppc_md.progress) ppc_md.progress("mpc85xx_ds_setup_arch()", 0); + swiotlb_detect_4g(); mpc85xx_ds_pci_init(); mpc85xx_smp_init(); diff --git a/arch/powerpc/platforms/85xx/qemu_e500.c b/arch/powerpc/platforms/85xx/qemu_e500.c index 95a2e53..3c5490c 100644 --- a/arch/powerpc/platforms/85xx/qemu_e500.c +++ b/arch/powerpc/platforms/85xx/qemu_e500.c @@ -42,6 +42,7 @@ static void __init qemu_e500_setup_arch(void) ppc_md.progress("qemu_e500_setup_arch()", 0); fsl_pci_init(); + swiotlb_detect_4g(); mpc85xx_smp_init(); } diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c index 6938792..da7a3d7 100644 --- a/arch/powerpc/sysdev/fsl_pci.c +++ b/arch/powerpc/sysdev/fsl_pci.c @@ -872,11 +872,8 @@ void __devinit fsl_pci_init(void) * we need SWIOTLB to handle buffers located outside of * dma capable memory region */ - if (memblock_end_of_DRAM() - 1 > max) { + if (memblock_end_of_DRAM() - 1 > max) ppc_swiotlb_enable = 1; - set_pci_dma_ops(&swiotlb_dma_ops); - ppc_md.pci_dma_dev_setup = pci_dma_dev_setup_swiotlb; - } #endif } #endif -- cgit v0.10.2 From 0a4081641d722d3dee140505a86330ecf44db0fe Mon Sep 17 00:00:00 2001 From: Varun Sethi Date: Wed, 8 Aug 2012 09:36:09 +0530 Subject: powerpc/mpic: FSL MPIC error interrupt support. All SOC device error interrupts are muxed and delivered to the core as a single MPIC error interrupt. Currently all the device drivers requiring access to device errors have to register for the MPIC error interrupt as a shared interrupt. With this patch we add interrupt demuxing capability in the mpic driver, allowing device drivers to register for their individual error interrupts. This is achieved by handling error interrupts in a cascaded fashion. MPIC error interrupt is handled by the "error_int_handler", which subsequently demuxes it using the EISR and delivers it to the respective drivers. The error interrupt capability is dependent on the MPIC EIMR register, which was introduced in FSL MPIC version 4.1 (P4080 rev2). So, error interrupt demuxing capability is dependent on the MPIC version and can be used for versions >= 4.1. Signed-off-by: Varun Sethi Signed-off-by: Bogdan Hamciuc Signed-off-by: Kumar Gala diff --git a/arch/powerpc/include/asm/mpic.h b/arch/powerpc/include/asm/mpic.h index e14d35d..c0f9ef9 100644 --- a/arch/powerpc/include/asm/mpic.h +++ b/arch/powerpc/include/asm/mpic.h @@ -118,6 +118,9 @@ #define MPIC_MAX_CPUS 32 #define MPIC_MAX_ISU 32 +#define MPIC_MAX_ERR 32 +#define MPIC_FSL_ERR_INT 16 + /* * Tsi108 implementation of MPIC has many differences from the original one */ @@ -270,6 +273,7 @@ struct mpic struct irq_chip hc_ipi; #endif struct irq_chip hc_tm; + struct irq_chip hc_err; const char *name; /* Flags */ unsigned int flags; @@ -283,6 +287,8 @@ struct mpic /* vector numbers used for internal sources (ipi/timers) */ unsigned int ipi_vecs[4]; unsigned int timer_vecs[8]; + /* vector numbers used for FSL MPIC error interrupts */ + unsigned int err_int_vecs[MPIC_MAX_ERR]; /* Spurious vector to program into unused sources */ unsigned int spurious_vec; @@ -306,6 +312,9 @@ struct mpic struct mpic_reg_bank cpuregs[MPIC_MAX_CPUS]; struct mpic_reg_bank isus[MPIC_MAX_ISU]; + /* ioremap'ed base for error interrupt registers */ + u32 __iomem *err_regs; + /* Protected sources */ unsigned long *protected; @@ -370,6 +379,11 @@ struct mpic #define MPIC_NO_RESET 0x00004000 /* Freescale MPIC (compatible includes "fsl,mpic") */ #define MPIC_FSL 0x00008000 +/* Freescale MPIC supports EIMR (error interrupt mask register). + * This flag is set for MPIC version >= 4.1 (version determined + * from the BRR1 register). +*/ +#define MPIC_FSL_HAS_EIMR 0x00010000 /* MPIC HW modification ID */ #define MPIC_REGSET_MASK 0xf0000000 diff --git a/arch/powerpc/sysdev/Makefile b/arch/powerpc/sysdev/Makefile index 1bd7ecb..a57600b 100644 --- a/arch/powerpc/sysdev/Makefile +++ b/arch/powerpc/sysdev/Makefile @@ -15,7 +15,7 @@ obj-$(CONFIG_PPC_DCR_NATIVE) += dcr-low.o obj-$(CONFIG_PPC_PMI) += pmi.o obj-$(CONFIG_U3_DART) += dart_iommu.o obj-$(CONFIG_MMIO_NVRAM) += mmio_nvram.o -obj-$(CONFIG_FSL_SOC) += fsl_soc.o +obj-$(CONFIG_FSL_SOC) += fsl_soc.o fsl_mpic_err.o obj-$(CONFIG_FSL_PCI) += fsl_pci.o $(fsl-msi-obj-y) obj-$(CONFIG_FSL_PMC) += fsl_pmc.o obj-$(CONFIG_FSL_LBC) += fsl_lbc.o diff --git a/arch/powerpc/sysdev/fsl_mpic_err.c b/arch/powerpc/sysdev/fsl_mpic_err.c new file mode 100644 index 0000000..b83f325 --- /dev/null +++ b/arch/powerpc/sysdev/fsl_mpic_err.c @@ -0,0 +1,149 @@ +/* + * Copyright (C) 2012 Freescale Semiconductor, Inc. + * + * Author: Varun Sethi + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 of the + * License. + * + */ + +#include +#include +#include + +#include +#include +#include + +#include "mpic.h" + +#define MPIC_ERR_INT_BASE 0x3900 +#define MPIC_ERR_INT_EISR 0x0000 +#define MPIC_ERR_INT_EIMR 0x0010 + +static inline u32 mpic_fsl_err_read(u32 __iomem *base, unsigned int err_reg) +{ + return in_be32(base + (err_reg >> 2)); +} + +static inline void mpic_fsl_err_write(u32 __iomem *base, u32 value) +{ + out_be32(base + (MPIC_ERR_INT_EIMR >> 2), value); +} + +static void fsl_mpic_mask_err(struct irq_data *d) +{ + u32 eimr; + struct mpic *mpic = irq_data_get_irq_chip_data(d); + unsigned int src = virq_to_hw(d->irq) - mpic->err_int_vecs[0]; + + eimr = mpic_fsl_err_read(mpic->err_regs, MPIC_ERR_INT_EIMR); + eimr |= (1 << (31 - src)); + mpic_fsl_err_write(mpic->err_regs, eimr); +} + +static void fsl_mpic_unmask_err(struct irq_data *d) +{ + u32 eimr; + struct mpic *mpic = irq_data_get_irq_chip_data(d); + unsigned int src = virq_to_hw(d->irq) - mpic->err_int_vecs[0]; + + eimr = mpic_fsl_err_read(mpic->err_regs, MPIC_ERR_INT_EIMR); + eimr &= ~(1 << (31 - src)); + mpic_fsl_err_write(mpic->err_regs, eimr); +} + +static struct irq_chip fsl_mpic_err_chip = { + .irq_disable = fsl_mpic_mask_err, + .irq_mask = fsl_mpic_mask_err, + .irq_unmask = fsl_mpic_unmask_err, +}; + +int mpic_setup_error_int(struct mpic *mpic, int intvec) +{ + int i; + + mpic->err_regs = ioremap(mpic->paddr + MPIC_ERR_INT_BASE, 0x1000); + if (!mpic->err_regs) { + pr_err("could not map mpic error registers\n"); + return -ENOMEM; + } + mpic->hc_err = fsl_mpic_err_chip; + mpic->hc_err.name = mpic->name; + mpic->flags |= MPIC_FSL_HAS_EIMR; + /* allocate interrupt vectors for error interrupts */ + for (i = MPIC_MAX_ERR - 1; i >= 0; i--) + mpic->err_int_vecs[i] = --intvec; + + return 0; +} + +int mpic_map_error_int(struct mpic *mpic, unsigned int virq, irq_hw_number_t hw) +{ + if ((mpic->flags & MPIC_FSL_HAS_EIMR) && + (hw >= mpic->err_int_vecs[0] && + hw <= mpic->err_int_vecs[MPIC_MAX_ERR - 1])) { + WARN_ON(mpic->flags & MPIC_SECONDARY); + + pr_debug("mpic: mapping as Error Interrupt\n"); + irq_set_chip_data(virq, mpic); + irq_set_chip_and_handler(virq, &mpic->hc_err, + handle_level_irq); + return 1; + } + + return 0; +} + +static irqreturn_t fsl_error_int_handler(int irq, void *data) +{ + struct mpic *mpic = (struct mpic *) data; + u32 eisr, eimr; + int errint; + unsigned int cascade_irq; + + eisr = mpic_fsl_err_read(mpic->err_regs, MPIC_ERR_INT_EISR); + eimr = mpic_fsl_err_read(mpic->err_regs, MPIC_ERR_INT_EIMR); + + if (!(eisr & ~eimr)) + return IRQ_NONE; + + while (eisr) { + errint = __builtin_clz(eisr); + cascade_irq = irq_linear_revmap(mpic->irqhost, + mpic->err_int_vecs[errint]); + WARN_ON(cascade_irq == NO_IRQ); + if (cascade_irq != NO_IRQ) { + generic_handle_irq(cascade_irq); + } else { + eimr |= 1 << (31 - errint); + mpic_fsl_err_write(mpic->err_regs, eimr); + } + eisr &= ~(1 << (31 - errint)); + } + + return IRQ_HANDLED; +} + +void mpic_err_int_init(struct mpic *mpic, irq_hw_number_t irqnum) +{ + unsigned int virq; + int ret; + + virq = irq_create_mapping(mpic->irqhost, irqnum); + if (virq == NO_IRQ) { + pr_err("Error interrupt setup failed\n"); + return; + } + + /* Mask all error interrupts */ + mpic_fsl_err_write(mpic->err_regs, ~0); + + ret = request_irq(virq, fsl_error_int_handler, IRQF_NO_THREAD, + "mpic-error-int", mpic); + if (ret) + pr_err("Failed to register error interrupt handler\n"); +} diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c index 7e32db7..9c6e535 100644 --- a/arch/powerpc/sysdev/mpic.c +++ b/arch/powerpc/sysdev/mpic.c @@ -1026,6 +1026,9 @@ static int mpic_host_map(struct irq_domain *h, unsigned int virq, return 0; } + if (mpic_map_error_int(mpic, virq, hw)) + return 0; + if (hw >= mpic->num_sources) return -EINVAL; @@ -1085,7 +1088,16 @@ static int mpic_host_xlate(struct irq_domain *h, struct device_node *ct, */ switch (intspec[2]) { case 0: - case 1: /* no EISR/EIMR support for now, treat as shared IRQ */ + break; + case 1: + if (!(mpic->flags & MPIC_FSL_HAS_EIMR)) + break; + + if (intspec[3] >= ARRAY_SIZE(mpic->err_int_vecs)) + return -EINVAL; + + *out_hwirq = mpic->err_int_vecs[intspec[3]]; + break; case 2: if (intspec[0] >= ARRAY_SIZE(mpic->ipi_vecs)) @@ -1302,6 +1314,9 @@ struct mpic * __init mpic_alloc(struct device_node *node, mpic_map(mpic, mpic->paddr, &mpic->tmregs, MPIC_INFO(TIMER_BASE), 0x1000); if (mpic->flags & MPIC_FSL) { + u32 brr1, version; + int ret; + /* * Yes, Freescale really did put global registers in the * magic per-cpu area -- and they don't even show up in the @@ -1309,6 +1324,29 @@ struct mpic * __init mpic_alloc(struct device_node *node, */ mpic_map(mpic, mpic->paddr, &mpic->thiscpuregs, MPIC_CPU_THISBASE, 0x1000); + + brr1 = _mpic_read(mpic->reg_type, &mpic->thiscpuregs, + MPIC_FSL_BRR1); + version = brr1 & MPIC_FSL_BRR1_VER; + + /* Error interrupt mask register (EIMR) is required for + * handling individual device error interrupts. EIMR + * was added in MPIC version 4.1. + * + * Over here we reserve vector number space for error + * interrupt vectors. This space is stolen from the + * global vector number space, as in case of ipis + * and timer interrupts. + * + * Available vector space = intvec_top - 12, where 12 + * is the number of vectors which have been consumed by + * ipis and timer interrupts. + */ + if (version >= 0x401) { + ret = mpic_setup_error_int(mpic, intvec_top - 12); + if (ret) + return NULL; + } } /* Reset */ @@ -1474,6 +1512,10 @@ void __init mpic_init(struct mpic *mpic) num_timers = 8; } + /* FSL mpic error interrupt intialization */ + if (mpic->flags & MPIC_FSL_HAS_EIMR) + mpic_err_int_init(mpic, MPIC_FSL_ERR_INT); + /* Initialize timers to our reserved vectors and mask them for now */ for (i = 0; i < num_timers; i++) { unsigned int offset = mpic_tm_offset(mpic, i); diff --git a/arch/powerpc/sysdev/mpic.h b/arch/powerpc/sysdev/mpic.h index 13f3e89..24bf07a 100644 --- a/arch/powerpc/sysdev/mpic.h +++ b/arch/powerpc/sysdev/mpic.h @@ -40,4 +40,26 @@ extern int mpic_set_affinity(struct irq_data *d, const struct cpumask *cpumask, bool force); extern void mpic_reset_core(int cpu); +#ifdef CONFIG_FSL_SOC +extern int mpic_map_error_int(struct mpic *mpic, unsigned int virq, irq_hw_number_t hw); +extern void mpic_err_int_init(struct mpic *mpic, irq_hw_number_t irqnum); +extern int mpic_setup_error_int(struct mpic *mpic, int intvec); +#else +static inline int mpic_map_error_int(struct mpic *mpic, unsigned int virq, irq_hw_number_t hw) +{ + return 0; +} + + +static inline void mpic_err_int_init(struct mpic *mpic, irq_hw_number_t irqnum) +{ + return; +} + +static inline int mpic_setup_error_int(struct mpic *mpic, int intvec) +{ + return -1; +} +#endif + #endif /* _POWERPC_SYSDEV_MPIC_H */ -- cgit v0.10.2 From 2eb28006431c984ba943b2c05869c47bdc5608b7 Mon Sep 17 00:00:00 2001 From: Olivia Yin Date: Thu, 9 Aug 2012 15:42:34 +0800 Subject: powerpc/e500v2: Add Power ISA properties to comply with ePAPR 1.1 power-isa-version and power-isa-* are cpu node general properties defined in ePAPR. If the power-isa-version property exists, then for each category from the Categories section of Book I of the Power ISA version indicated, the existence of a property named power-isa-[CAT], where [CAT] is the abbreviated category name with all uppercase letters converted to lowercase, indicates that the category is supported by the implementation. The patch update all e500v2 platforms. Signed-off-by: Liu Yu Signed-off-by: Olivia Yin Signed-off-by: Kumar Gala diff --git a/arch/powerpc/boot/dts/fsl/e500v2_power_isa.dtsi b/arch/powerpc/boot/dts/fsl/e500v2_power_isa.dtsi new file mode 100644 index 0000000..f492814 --- /dev/null +++ b/arch/powerpc/boot/dts/fsl/e500v2_power_isa.dtsi @@ -0,0 +1,52 @@ +/* + * e500v2 Power ISA Device Tree Source (include) + * + * Copyright 2012 Freescale Semiconductor Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Freescale Semiconductor nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * + * ALTERNATIVELY, this software may be distributed under the terms of the + * GNU General Public License ("GPL") as published by the Free Software + * Foundation, either version 2 of that License or (at your option) any + * later version. + * + * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/ { + cpus { + power-isa-version = "2.03"; + power-isa-b; // Base + power-isa-e; // Embedded + power-isa-atb; // Alternate Time Base + power-isa-cs; // Cache Specification + power-isa-e.le; // Embedded.Little-Endian + power-isa-e.pm; // Embedded.Performance Monitor + power-isa-ecl; // Embedded Cache Locking + power-isa-mmc; // Memory Coherence + power-isa-sp; // Signal Processing Engine + power-isa-sp.fd; // SPE.Embedded Float Scalar Double + power-isa-sp.fs; // SPE.Embedded Float Scalar Single + power-isa-sp.fv; // SPE.Embedded Float Vector + mmu-type = "power-embedded"; + }; +}; diff --git a/arch/powerpc/boot/dts/fsl/mpc8536si-pre.dtsi b/arch/powerpc/boot/dts/fsl/mpc8536si-pre.dtsi index 7de45a7..152906f 100644 --- a/arch/powerpc/boot/dts/fsl/mpc8536si-pre.dtsi +++ b/arch/powerpc/boot/dts/fsl/mpc8536si-pre.dtsi @@ -33,6 +33,9 @@ */ /dts-v1/; + +/include/ "e500v2_power_isa.dtsi" + / { compatible = "fsl,MPC8536"; #address-cells = <2>; diff --git a/arch/powerpc/boot/dts/fsl/mpc8544si-pre.dtsi b/arch/powerpc/boot/dts/fsl/mpc8544si-pre.dtsi index 8777f92..5a69baf 100644 --- a/arch/powerpc/boot/dts/fsl/mpc8544si-pre.dtsi +++ b/arch/powerpc/boot/dts/fsl/mpc8544si-pre.dtsi @@ -33,6 +33,9 @@ */ /dts-v1/; + +/include/ "e500v2_power_isa.dtsi" + / { compatible = "fsl,MPC8544"; #address-cells = <2>; diff --git a/arch/powerpc/boot/dts/fsl/mpc8548si-pre.dtsi b/arch/powerpc/boot/dts/fsl/mpc8548si-pre.dtsi index 720422d..fc1ce97 100644 --- a/arch/powerpc/boot/dts/fsl/mpc8548si-pre.dtsi +++ b/arch/powerpc/boot/dts/fsl/mpc8548si-pre.dtsi @@ -33,6 +33,9 @@ */ /dts-v1/; + +/include/ "e500v2_power_isa.dtsi" + / { compatible = "fsl,MPC8548"; #address-cells = <2>; diff --git a/arch/powerpc/boot/dts/fsl/mpc8568si-pre.dtsi b/arch/powerpc/boot/dts/fsl/mpc8568si-pre.dtsi index eacd62c..122ca3b 100644 --- a/arch/powerpc/boot/dts/fsl/mpc8568si-pre.dtsi +++ b/arch/powerpc/boot/dts/fsl/mpc8568si-pre.dtsi @@ -33,6 +33,9 @@ */ /dts-v1/; + +/include/ "e500v2_power_isa.dtsi" + / { compatible = "fsl,MPC8568"; #address-cells = <2>; diff --git a/arch/powerpc/boot/dts/fsl/mpc8569si-pre.dtsi b/arch/powerpc/boot/dts/fsl/mpc8569si-pre.dtsi index b07064d..2cd15a2 100644 --- a/arch/powerpc/boot/dts/fsl/mpc8569si-pre.dtsi +++ b/arch/powerpc/boot/dts/fsl/mpc8569si-pre.dtsi @@ -33,6 +33,9 @@ */ /dts-v1/; + +/include/ "e500v2_power_isa.dtsi" + / { compatible = "fsl,MPC8569"; #address-cells = <2>; diff --git a/arch/powerpc/boot/dts/fsl/mpc8572si-pre.dtsi b/arch/powerpc/boot/dts/fsl/mpc8572si-pre.dtsi index ca18832..28c2a86 100644 --- a/arch/powerpc/boot/dts/fsl/mpc8572si-pre.dtsi +++ b/arch/powerpc/boot/dts/fsl/mpc8572si-pre.dtsi @@ -33,6 +33,9 @@ */ /dts-v1/; + +/include/ "e500v2_power_isa.dtsi" + / { compatible = "fsl,MPC8572"; #address-cells = <2>; diff --git a/arch/powerpc/boot/dts/fsl/p1010si-pre.dtsi b/arch/powerpc/boot/dts/fsl/p1010si-pre.dtsi index 7354a8f..6e76f9b 100644 --- a/arch/powerpc/boot/dts/fsl/p1010si-pre.dtsi +++ b/arch/powerpc/boot/dts/fsl/p1010si-pre.dtsi @@ -33,6 +33,9 @@ */ /dts-v1/; + +/include/ "e500v2_power_isa.dtsi" + / { compatible = "fsl,P1010"; #address-cells = <2>; diff --git a/arch/powerpc/boot/dts/fsl/p1020si-pre.dtsi b/arch/powerpc/boot/dts/fsl/p1020si-pre.dtsi index 6f0376e..fed9c4c 100644 --- a/arch/powerpc/boot/dts/fsl/p1020si-pre.dtsi +++ b/arch/powerpc/boot/dts/fsl/p1020si-pre.dtsi @@ -33,6 +33,9 @@ */ /dts-v1/; + +/include/ "e500v2_power_isa.dtsi" + / { compatible = "fsl,P1020"; #address-cells = <2>; diff --git a/arch/powerpc/boot/dts/fsl/p1021si-pre.dtsi b/arch/powerpc/boot/dts/fsl/p1021si-pre.dtsi index 4abd54b..36161b5 100644 --- a/arch/powerpc/boot/dts/fsl/p1021si-pre.dtsi +++ b/arch/powerpc/boot/dts/fsl/p1021si-pre.dtsi @@ -33,6 +33,9 @@ */ /dts-v1/; + +/include/ "e500v2_power_isa.dtsi" + / { compatible = "fsl,P1021"; #address-cells = <2>; diff --git a/arch/powerpc/boot/dts/fsl/p1022si-pre.dtsi b/arch/powerpc/boot/dts/fsl/p1022si-pre.dtsi index e930f4f..1956dea 100644 --- a/arch/powerpc/boot/dts/fsl/p1022si-pre.dtsi +++ b/arch/powerpc/boot/dts/fsl/p1022si-pre.dtsi @@ -33,6 +33,9 @@ */ /dts-v1/; + +/include/ "e500v2_power_isa.dtsi" + / { compatible = "fsl,P1022"; #address-cells = <2>; diff --git a/arch/powerpc/boot/dts/fsl/p1023si-pre.dtsi b/arch/powerpc/boot/dts/fsl/p1023si-pre.dtsi index ac45f6d..132a152 100644 --- a/arch/powerpc/boot/dts/fsl/p1023si-pre.dtsi +++ b/arch/powerpc/boot/dts/fsl/p1023si-pre.dtsi @@ -33,6 +33,9 @@ */ /dts-v1/; + +/include/ "e500v2_power_isa.dtsi" + / { compatible = "fsl,P1023"; #address-cells = <2>; diff --git a/arch/powerpc/boot/dts/fsl/p2020si-pre.dtsi b/arch/powerpc/boot/dts/fsl/p2020si-pre.dtsi index 3213288..42bf3c6 100644 --- a/arch/powerpc/boot/dts/fsl/p2020si-pre.dtsi +++ b/arch/powerpc/boot/dts/fsl/p2020si-pre.dtsi @@ -33,6 +33,9 @@ */ /dts-v1/; + +/include/ "e500v2_power_isa.dtsi" + / { compatible = "fsl,P2020"; #address-cells = <2>; diff --git a/arch/powerpc/boot/dts/mpc8540ads.dts b/arch/powerpc/boot/dts/mpc8540ads.dts index f99fb11..2d31863 100644 --- a/arch/powerpc/boot/dts/mpc8540ads.dts +++ b/arch/powerpc/boot/dts/mpc8540ads.dts @@ -11,6 +11,8 @@ /dts-v1/; +/include/ "fsl/e500v2_power_isa.dtsi" + / { model = "MPC8540ADS"; compatible = "MPC8540ADS", "MPC85xxADS"; diff --git a/arch/powerpc/boot/dts/mpc8541cds.dts b/arch/powerpc/boot/dts/mpc8541cds.dts index 0f5e939..1c03c266 100644 --- a/arch/powerpc/boot/dts/mpc8541cds.dts +++ b/arch/powerpc/boot/dts/mpc8541cds.dts @@ -11,6 +11,8 @@ /dts-v1/; +/include/ "fsl/e500v2_power_isa.dtsi" + / { model = "MPC8541CDS"; compatible = "MPC8541CDS", "MPC85xxCDS"; diff --git a/arch/powerpc/boot/dts/mpc8555cds.dts b/arch/powerpc/boot/dts/mpc8555cds.dts index fe10438..36a7ea1 100644 --- a/arch/powerpc/boot/dts/mpc8555cds.dts +++ b/arch/powerpc/boot/dts/mpc8555cds.dts @@ -11,6 +11,8 @@ /dts-v1/; +/include/ "fsl/e500v2_power_isa.dtsi" + / { model = "MPC8555CDS"; compatible = "MPC8555CDS", "MPC85xxCDS"; diff --git a/arch/powerpc/boot/dts/mpc8560ads.dts b/arch/powerpc/boot/dts/mpc8560ads.dts index 6e85e1b..1a43f5a 100644 --- a/arch/powerpc/boot/dts/mpc8560ads.dts +++ b/arch/powerpc/boot/dts/mpc8560ads.dts @@ -11,6 +11,8 @@ /dts-v1/; +/include/ "fsl/e500v2_power_isa.dtsi" + / { model = "MPC8560ADS"; compatible = "MPC8560ADS", "MPC85xxADS"; -- cgit v0.10.2 From 2f4acb057c9c11f8e9d944cf218091de02e3e815 Mon Sep 17 00:00:00 2001 From: Olivia Yin Date: Thu, 9 Aug 2012 15:42:35 +0800 Subject: powerpc/e500mc: Add Power ISA properties to comply with ePAPR 1.1 power-isa-version and power-isa-* are cpu node general properties defined in ePAPR. If the power-isa-version property exists, then for each category from the Categories section of Book I of the Power ISA version indicated, the existence of a property named power-isa-[CAT], where [CAT] is the abbreviated category name with all uppercase letters converted to lowercase, indicates that the category is supported by the implementation. The patch update all the e500mc platforms. Signed-off-by: Liu Yu Signed-off-by: Olivia Yin Signed-off-by: Kumar Gala diff --git a/arch/powerpc/boot/dts/fsl/e500mc_power_isa.dtsi b/arch/powerpc/boot/dts/fsl/e500mc_power_isa.dtsi new file mode 100644 index 0000000..870c653 --- /dev/null +++ b/arch/powerpc/boot/dts/fsl/e500mc_power_isa.dtsi @@ -0,0 +1,58 @@ +/* + * e500mc Power ISA Device Tree Source (include) + * + * Copyright 2012 Freescale Semiconductor Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Freescale Semiconductor nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * + * ALTERNATIVELY, this software may be distributed under the terms of the + * GNU General Public License ("GPL") as published by the Free Software + * Foundation, either version 2 of that License or (at your option) any + * later version. + * + * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/ { + cpus { + power-isa-version = "2.06"; + power-isa-b; // Base + power-isa-e; // Embedded + power-isa-atb; // Alternate Time Base + power-isa-cs; // Cache Specification + power-isa-ds; // Decorated Storage + power-isa-e.ed; // Embedded.Enhanced Debug + power-isa-e.pd; // Embedded.External PID + power-isa-e.hv; // Embedded.Hypervisor + power-isa-e.le; // Embedded.Little-Endian + power-isa-e.pm; // Embedded.Performance Monitor + power-isa-e.pc; // Embedded.Processor Control + power-isa-ecl; // Embedded Cache Locking + power-isa-exp; // External Proxy + power-isa-fp; // Floating Point + power-isa-fp.r; // Floating Point.Record + power-isa-mmc; // Memory Coherence + power-isa-scpm; // Store Conditional Page Mobility + power-isa-wt; // Wait + mmu-type = "power-embedded"; + }; +}; diff --git a/arch/powerpc/boot/dts/fsl/p2041si-pre.dtsi b/arch/powerpc/boot/dts/fsl/p2041si-pre.dtsi index 2d0a40d..7a2697d 100644 --- a/arch/powerpc/boot/dts/fsl/p2041si-pre.dtsi +++ b/arch/powerpc/boot/dts/fsl/p2041si-pre.dtsi @@ -33,6 +33,9 @@ */ /dts-v1/; + +/include/ "e500mc_power_isa.dtsi" + / { compatible = "fsl,P2041"; #address-cells = <2>; diff --git a/arch/powerpc/boot/dts/fsl/p3041si-pre.dtsi b/arch/powerpc/boot/dts/fsl/p3041si-pre.dtsi index 136def3..c9ca2c3 100644 --- a/arch/powerpc/boot/dts/fsl/p3041si-pre.dtsi +++ b/arch/powerpc/boot/dts/fsl/p3041si-pre.dtsi @@ -33,6 +33,9 @@ */ /dts-v1/; + +/include/ "e500mc_power_isa.dtsi" + / { compatible = "fsl,P3041"; #address-cells = <2>; diff --git a/arch/powerpc/boot/dts/fsl/p4080si-pre.dtsi b/arch/powerpc/boot/dts/fsl/p4080si-pre.dtsi index b9556ee..493d9a0 100644 --- a/arch/powerpc/boot/dts/fsl/p4080si-pre.dtsi +++ b/arch/powerpc/boot/dts/fsl/p4080si-pre.dtsi @@ -33,6 +33,9 @@ */ /dts-v1/; + +/include/ "e500mc_power_isa.dtsi" + / { compatible = "fsl,P4080"; #address-cells = <2>; -- cgit v0.10.2 From 8778721912adaf1076f427867384130480c86ca8 Mon Sep 17 00:00:00 2001 From: Olivia Yin Date: Thu, 9 Aug 2012 15:42:36 +0800 Subject: powerpc/e5500: Add Power ISA properties to comply with ePAPR 1.1 power-isa-version and power-isa-* are cpu node general properties defined in ePAPR. If the power-isa-version property exists, then for each category from the Categories section of Book I of the Power ISA version indicated, the existence of a property named power-isa-[CAT], where [CAT] is the abbreviated category name with all uppercase letters converted to lowercase, indicates that the category is supported by the implementation. This patch update all the e5500 platforms. Signed-off-by: Liu Yu Signed-off-by: Olivia Yin Signed-off-by: Kumar Gala diff --git a/arch/powerpc/boot/dts/fsl/e5500_power_isa.dtsi b/arch/powerpc/boot/dts/fsl/e5500_power_isa.dtsi new file mode 100644 index 0000000..3230212 --- /dev/null +++ b/arch/powerpc/boot/dts/fsl/e5500_power_isa.dtsi @@ -0,0 +1,59 @@ +/* + * e5500 Power ISA Device Tree Source (include) + * + * Copyright 2012 Freescale Semiconductor Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Freescale Semiconductor nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * + * ALTERNATIVELY, this software may be distributed under the terms of the + * GNU General Public License ("GPL") as published by the Free Software + * Foundation, either version 2 of that License or (at your option) any + * later version. + * + * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/ { + cpus { + power-isa-version = "2.06"; + power-isa-b; // Base + power-isa-e; // Embedded + power-isa-atb; // Alternate Time Base + power-isa-cs; // Cache Specification + power-isa-ds; // Decorated Storage + power-isa-e.ed; // Embedded.Enhanced Debug + power-isa-e.pd; // Embedded.External PID + power-isa-e.hv; // Embedded.Hypervisor + power-isa-e.le; // Embedded.Little-Endian + power-isa-e.pm; // Embedded.Performance Monitor + power-isa-e.pc; // Embedded.Processor Control + power-isa-ecl; // Embedded Cache Locking + power-isa-exp; // External Proxy + power-isa-fp; // Floating Point + power-isa-fp.r; // Floating Point.Record + power-isa-mmc; // Memory Coherence + power-isa-scpm; // Store Conditional Page Mobility + power-isa-wt; // Wait + power-isa-64; // 64-bit + mmu-type = "power-embedded"; + }; +}; diff --git a/arch/powerpc/boot/dts/fsl/p5020si-pre.dtsi b/arch/powerpc/boot/dts/fsl/p5020si-pre.dtsi index ae823a4..0a198b0 100644 --- a/arch/powerpc/boot/dts/fsl/p5020si-pre.dtsi +++ b/arch/powerpc/boot/dts/fsl/p5020si-pre.dtsi @@ -33,6 +33,9 @@ */ /dts-v1/; + +/include/ "e5500_power_isa.dtsi" + / { compatible = "fsl,P5020"; #address-cells = <2>; diff --git a/arch/powerpc/boot/dts/fsl/p5040si-pre.dtsi b/arch/powerpc/boot/dts/fsl/p5040si-pre.dtsi index 52721b6..40ca943 100644 --- a/arch/powerpc/boot/dts/fsl/p5040si-pre.dtsi +++ b/arch/powerpc/boot/dts/fsl/p5040si-pre.dtsi @@ -33,6 +33,9 @@ */ /dts-v1/; + +/include/ "e5500_power_isa.dtsi" + / { compatible = "fsl,P5040"; #address-cells = <2>; -- cgit v0.10.2 From f4dfef75adb12fec7693d0c469e2de6c7d45e975 Mon Sep 17 00:00:00 2001 From: Prabhakar Kushwaha Date: Wed, 15 Aug 2012 20:43:52 +0530 Subject: powerpc: Update Integrated Flash controller device tree bindings Freescale's Integrated Flash controller (IFC) may have one or two interrupts. In case of single interrupt line, it will cover all IFC interrupts. Update this information in IFC device tree bindings Signed-off-by: Prabhakar Kushwaha Signed-off-by: Kumar Gala diff --git a/Documentation/devicetree/bindings/powerpc/fsl/ifc.txt b/Documentation/devicetree/bindings/powerpc/fsl/ifc.txt index 939a26d..d5e3704 100644 --- a/Documentation/devicetree/bindings/powerpc/fsl/ifc.txt +++ b/Documentation/devicetree/bindings/powerpc/fsl/ifc.txt @@ -12,9 +12,12 @@ Properties: - #size-cells : Either one or two, depending on how large each chipselect can be. - reg : Offset and length of the register set for the device -- interrupts : IFC has two interrupts. The first one is the "common" - interrupt(CM_EVTER_STAT), and second is the NAND interrupt - (NAND_EVTER_STAT). +- interrupts: IFC may have one or two interrupts. If two interrupt + specifiers are present, the first is the "common" + interrupt (CM_EVTER_STAT), and the second is the NAND + interrupt (NAND_EVTER_STAT). If there is only one, + that interrupt reports both types of event. + - ranges : Each range corresponds to a single chipselect, and covers the entire access window as configured. -- cgit v0.10.2 From 0adbe663bdf9d80a9ab1d225a5cc030fe2be10b5 Mon Sep 17 00:00:00 2001 From: Prabhakar Kushwaha Date: Thu, 16 Aug 2012 09:28:22 +0530 Subject: powerpc/mpc85xx: Add new ext fields to Integrated FLash Controller Freescale's Integrated Flash controller(IFC) v1.1.0 supports 40 bit address bus width. In case more than 32 bit address is used, the EXT registers should be set. Add support of ext registers. Signed-off-by: Kumar Gala Signed-off-by: York Sun Signed-off-by: Prabhakar Kushwaha Signed-off-by: Kumar Gala diff --git a/arch/powerpc/include/asm/fsl_ifc.h b/arch/powerpc/include/asm/fsl_ifc.h index b955012..b8a4b9b 100644 --- a/arch/powerpc/include/asm/fsl_ifc.h +++ b/arch/powerpc/include/asm/fsl_ifc.h @@ -768,22 +768,24 @@ struct fsl_ifc_gpcm { */ struct fsl_ifc_regs { __be32 ifc_rev; - u32 res1[0x3]; + u32 res1[0x2]; struct { + __be32 cspr_ext; __be32 cspr; - u32 res2[0x2]; + u32 res2; } cspr_cs[FSL_IFC_BANK_COUNT]; - u32 res3[0x18]; + u32 res3[0x19]; struct { __be32 amask; u32 res4[0x2]; } amask_cs[FSL_IFC_BANK_COUNT]; - u32 res5[0x18]; + u32 res5[0x17]; struct { + __be32 csor_ext; __be32 csor; - u32 res6[0x2]; + u32 res6; } csor_cs[FSL_IFC_BANK_COUNT]; - u32 res7[0x18]; + u32 res7[0x19]; struct { __be32 ftim[4]; u32 res8[0x8]; -- cgit v0.10.2 From 17ae4f0aef09c1bcf380dcea555e83d27436e7d5 Mon Sep 17 00:00:00 2001 From: Jia Hongtao Date: Tue, 28 Aug 2012 10:00:55 +0800 Subject: powerpc: add adt7461 thermal monitor support to applicable boards Add thermal monitor support to following boards: P1022DS, MPC8536DS, P2041RDB, P3041DS, P4080DS, P5020DS, P5040DS Signed-off-by: Jia Hongtao Signed-off-by: Li Yang Signed-off-by: Kumar Gala diff --git a/arch/powerpc/boot/dts/mpc8536ds.dtsi b/arch/powerpc/boot/dts/mpc8536ds.dtsi index d304a2d..7c3dde8 100644 --- a/arch/powerpc/boot/dts/mpc8536ds.dtsi +++ b/arch/powerpc/boot/dts/mpc8536ds.dtsi @@ -132,6 +132,10 @@ reg = <0x68>; interrupts = <0 0x1 0 0>; }; + adt7461@4c { + compatible = "adi,adt7461"; + reg = <0x4c>; + }; }; spi@7000 { diff --git a/arch/powerpc/boot/dts/p1022ds.dtsi b/arch/powerpc/boot/dts/p1022ds.dtsi index c3344b0..873da35 100644 --- a/arch/powerpc/boot/dts/p1022ds.dtsi +++ b/arch/powerpc/boot/dts/p1022ds.dtsi @@ -149,6 +149,10 @@ compatible = "dallas,ds1339"; reg = <0x68>; }; + adt7461@4c { + compatible = "adi,adt7461"; + reg = <0x4c>; + }; }; spi@7000 { diff --git a/arch/powerpc/boot/dts/p2041rdb.dts b/arch/powerpc/boot/dts/p2041rdb.dts index baab034..d97ad74 100644 --- a/arch/powerpc/boot/dts/p2041rdb.dts +++ b/arch/powerpc/boot/dts/p2041rdb.dts @@ -94,6 +94,10 @@ compatible = "pericom,pt7c4338"; reg = <0x68>; }; + adt7461@4c { + compatible = "adi,adt7461"; + reg = <0x4c>; + }; }; i2c@118100 { diff --git a/arch/powerpc/boot/dts/p3041ds.dts b/arch/powerpc/boot/dts/p3041ds.dts index 6cdcadc..2fed3bc 100644 --- a/arch/powerpc/boot/dts/p3041ds.dts +++ b/arch/powerpc/boot/dts/p3041ds.dts @@ -98,6 +98,10 @@ reg = <0x68>; interrupts = <0x1 0x1 0 0>; }; + adt7461@4c { + compatible = "adi,adt7461"; + reg = <0x4c>; + }; }; }; diff --git a/arch/powerpc/boot/dts/p4080ds.dts b/arch/powerpc/boot/dts/p4080ds.dts index 3e20460..1cf6148 100644 --- a/arch/powerpc/boot/dts/p4080ds.dts +++ b/arch/powerpc/boot/dts/p4080ds.dts @@ -96,6 +96,10 @@ reg = <0x68>; interrupts = <0x1 0x1 0 0>; }; + adt7461@4c { + compatible = "adi,adt7461"; + reg = <0x4c>; + }; }; usb0: usb@210000 { diff --git a/arch/powerpc/boot/dts/p5020ds.dts b/arch/powerpc/boot/dts/p5020ds.dts index 27c07ed..2869fea 100644 --- a/arch/powerpc/boot/dts/p5020ds.dts +++ b/arch/powerpc/boot/dts/p5020ds.dts @@ -98,6 +98,10 @@ reg = <0x68>; interrupts = <0x1 0x1 0 0>; }; + adt7461@4c { + compatible = "adi,adt7461"; + reg = <0x4c>; + }; }; }; diff --git a/arch/powerpc/boot/dts/p5040ds.dts b/arch/powerpc/boot/dts/p5040ds.dts index d86bf2e..8165c93 100644 --- a/arch/powerpc/boot/dts/p5040ds.dts +++ b/arch/powerpc/boot/dts/p5040ds.dts @@ -146,6 +146,10 @@ label = "NAND Writable User area"; reg = <0x1f000000 0x01000000>; }; + adt7461@4c { + compatible = "adi,adt7461"; + reg = <0x4c>; + }; }; board-control@3,0 { -- cgit v0.10.2 From 3a0f880160ba1179db291d72062b13f2d373b9ee Mon Sep 17 00:00:00 2001 From: Wang Dongsheng Date: Wed, 29 Aug 2012 11:00:49 +0800 Subject: powerpc/8544ds: add partition table for norflash create partition table for norflash. Signed-off-by: Wang Dongsheng Signed-off-by: Kumar Gala diff --git a/arch/powerpc/boot/dts/mpc8544ds.dts b/arch/powerpc/boot/dts/mpc8544ds.dts index e934987..ed38874 100644 --- a/arch/powerpc/boot/dts/mpc8544ds.dts +++ b/arch/powerpc/boot/dts/mpc8544ds.dts @@ -20,8 +20,10 @@ reg = <0 0 0 0>; // Filled by U-Boot }; - lbc: localbus@e0005000 { + board_lbc: lbc: localbus@e0005000 { reg = <0 0xe0005000 0 0x1000>; + + ranges = <0x0 0x0 0x0 0xff800000 0x800000>; }; board_soc: soc: soc8544@e0000000 { diff --git a/arch/powerpc/boot/dts/mpc8544ds.dtsi b/arch/powerpc/boot/dts/mpc8544ds.dtsi index 77ebc9f..b219d03 100644 --- a/arch/powerpc/boot/dts/mpc8544ds.dtsi +++ b/arch/powerpc/boot/dts/mpc8544ds.dtsi @@ -32,6 +32,45 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +&board_lbc { + nor@0,0 { + #address-cells = <1>; + #size-cells = <1>; + compatible = "cfi-flash"; + reg = <0x0 0x0 0x800000>; + bank-width = <2>; + device-width = <1>; + + partition@0 { + reg = <0x0 0x10000>; + label = "dtb-nor"; + }; + + partition@20000 { + reg = <0x20000 0x30000>; + label = "diagnostic-nor"; + read-only; + }; + + partition@200000 { + reg = <0x200000 0x200000>; + label = "dink-nor"; + read-only; + }; + + partition@400000 { + reg = <0x400000 0x380000>; + label = "kernel-nor"; + }; + + partition@780000 { + reg = <0x780000 0x80000>; + label = "u-boot-nor"; + read-only; + }; + }; +}; + &board_soc { enet0: ethernet@24000 { phy-handle = <&phy0>; -- cgit v0.10.2 From c8c4e2c3d911cb004db1ae8483df3795d8d20459 Mon Sep 17 00:00:00 2001 From: Shaohui Xie Date: Wed, 12 Sep 2012 18:36:03 +0800 Subject: powerpc/p5040: fix dtb build warning of p5040ds.dtb Device node adt7461 was wrongly added in p5040ds.dts, it should be added into i2c instead of localbus, when build p5040ds.dtb, a warning will dump: Warning (reg_format): "reg" property in /localbus@ffe124000/nand@2,0/adt7461@4c has invalid length (4 bytes) (#address-cells == 1, #size-cells == 1) This was introduced by: commit ea6b1ba692bcb5f6e39f409a78cf8b04fdf23baa Author: Jia Hongtao Date: Tue Aug 28 10:00:55 2012 +0800 powerpc: add adt7461 thermal monitor support to applicable boards Add thermal monitor support to following boards: P1022DS, MPC8536DS, P2041RDB, P3041DS, P4080DS, P5020DS, P5040DS Signed-off-by: Shaohui Xie Signed-off-by: Kumar Gala diff --git a/arch/powerpc/boot/dts/p5040ds.dts b/arch/powerpc/boot/dts/p5040ds.dts index 8165c93..860b5cc 100644 --- a/arch/powerpc/boot/dts/p5040ds.dts +++ b/arch/powerpc/boot/dts/p5040ds.dts @@ -95,6 +95,10 @@ reg = <0x68>; interrupts = <0x1 0x1 0 0>; }; + adt7461@4c { + compatible = "adi,adt7461"; + reg = <0x4c>; + }; }; }; @@ -146,10 +150,6 @@ label = "NAND Writable User area"; reg = <0x1f000000 0x01000000>; }; - adt7461@4c { - compatible = "adi,adt7461"; - reg = <0x4c>; - }; }; board-control@3,0 { -- cgit v0.10.2 From 6cc1b4e931f8d8dccdcdb05b758a7d1178ad6b49 Mon Sep 17 00:00:00 2001 From: Roy Zang Date: Mon, 3 Sep 2012 17:22:09 +0800 Subject: powerpc/pci: Add IP revision register define for Freescale PCIe controller Signed-off-by: Roy Zang Signed-off-by: Kumar Gala diff --git a/arch/powerpc/sysdev/fsl_pci.h b/arch/powerpc/sysdev/fsl_pci.h index baa0fd1..54ed82c 100644 --- a/arch/powerpc/sysdev/fsl_pci.h +++ b/arch/powerpc/sysdev/fsl_pci.h @@ -16,6 +16,7 @@ #define PCIE_LTSSM 0x0404 /* PCIE Link Training and Status */ #define PCIE_LTSSM_L0 0x16 /* L0 state */ +#define PCIE_IP_REV_2_2 0x02080202 /* PCIE IP block version Rev2.2 */ #define PIWAR_EN 0x80000000 /* Enable */ #define PIWAR_PF 0x20000000 /* prefetch */ #define PIWAR_TGI_LOCAL 0x00f00000 /* target - local memory */ @@ -57,7 +58,9 @@ struct ccsr_pci { __be32 pex_pme_mes_disr; /* 0x.024 - PCIE PME and message disable register */ __be32 pex_pme_mes_ier; /* 0x.028 - PCIE PME and message interrupt enable register */ __be32 pex_pmcr; /* 0x.02c - PCIE power management command register */ - u8 res3[3024]; + u8 res3[3016]; + __be32 block_rev1; /* 0x.bf8 - PCIE Block Revision register 1 */ + __be32 block_rev2; /* 0x.bfc - PCIE Block Revision register 2 */ /* PCI/PCI Express outbound window 0-4 * Window 0 is the default window and is the only window enabled upon reset. -- cgit v0.10.2 From 9e67886becd7fab36c97ef43bb81515c18a66be1 Mon Sep 17 00:00:00 2001 From: Roy Zang Date: Mon, 3 Sep 2012 17:22:10 +0800 Subject: powerpc/pci: Use PCIe IP block revision register instead of compatible Freescale PCIe IP block revision bigger than rev2.2 will also need redefine the sequence of inbound windows. So change to use IP block revision instead of compatible for the judgment. Signed-off-by: Roy Zang Signed-off-by: Kumar Gala diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c index da7a3d7..23acaf4 100644 --- a/arch/powerpc/sysdev/fsl_pci.c +++ b/arch/powerpc/sysdev/fsl_pci.c @@ -143,18 +143,20 @@ static void __init setup_pci_atmu(struct pci_controller *hose, pr_debug("PCI memory map start 0x%016llx, size 0x%016llx\n", (u64)rsrc->start, (u64)resource_size(rsrc)); - if (of_device_is_compatible(hose->dn, "fsl,qoriq-pcie-v2.2")) { - win_idx = 2; - start_idx = 0; - end_idx = 3; - } - pci = ioremap(rsrc->start, resource_size(rsrc)); if (!pci) { dev_err(hose->parent, "Unable to map ATMU registers\n"); return; } + if (early_find_capability(hose, 0, 0, PCI_CAP_ID_EXP)) { + if (in_be32(&pci->block_rev1) >= PCIE_IP_REV_2_2) { + win_idx = 2; + start_idx = 0; + end_idx = 3; + } + } + /* Disable all windows (except powar0 since it's ignored) */ for(i = 1; i < 5; i++) out_be32(&pci->pow[i].powar, 0); -- cgit v0.10.2 From 905e75c46dba5f3061049277e4eb7110beedba43 Mon Sep 17 00:00:00 2001 From: Jia Hongtao Date: Tue, 28 Aug 2012 15:44:08 +0800 Subject: powerpc/fsl-pci: Unify pci/pcie initialization code We unified the Freescale pci/pcie initialization by changing the fsl_pci to a platform driver. In previous PCI code architecture the initialization routine is called at board_setup_arch stage. Now the initialization is done in probe function which is architectural better. Also It's convenient for adding PM support for PCI controller in later patch. Now we registered pci controllers as platform devices. So we combine two initialization code as one platform driver. Signed-off-by: Jia Hongtao Signed-off-by: Li Yang Signed-off-by: Chunhe Lan Signed-off-by: Kumar Gala diff --git a/arch/powerpc/platforms/85xx/common.c b/arch/powerpc/platforms/85xx/common.c index 67dac22..d0861a0 100644 --- a/arch/powerpc/platforms/85xx/common.c +++ b/arch/powerpc/platforms/85xx/common.c @@ -27,6 +27,16 @@ static struct of_device_id __initdata mpc85xx_common_ids[] = { { .compatible = "fsl,mpc8548-guts", }, /* Probably unnecessary? */ { .compatible = "gpio-leds", }, + /* For all PCI controllers */ + { .compatible = "fsl,mpc8540-pci", }, + { .compatible = "fsl,mpc8548-pcie", }, + { .compatible = "fsl,p1022-pcie", }, + { .compatible = "fsl,p1010-pcie", }, + { .compatible = "fsl,p1023-pcie", }, + { .compatible = "fsl,p4080-pcie", }, + { .compatible = "fsl,qoriq-pcie-v2.4", }, + { .compatible = "fsl,qoriq-pcie-v2.3", }, + { .compatible = "fsl,qoriq-pcie-v2.2", }, {}, }; diff --git a/arch/powerpc/platforms/85xx/corenet_ds.c b/arch/powerpc/platforms/85xx/corenet_ds.c index 473d573..ed69c92 100644 --- a/arch/powerpc/platforms/85xx/corenet_ds.c +++ b/arch/powerpc/platforms/85xx/corenet_ds.c @@ -16,7 +16,6 @@ #include #include #include -#include #include #include @@ -52,39 +51,16 @@ void __init corenet_ds_pic_init(void) */ void __init corenet_ds_setup_arch(void) { -#ifdef CONFIG_PCI - struct device_node *np; - struct pci_controller *hose; -#endif - dma_addr_t max = 0xffffffff; - mpc85xx_smp_init(); -#ifdef CONFIG_PCI - for_each_node_by_type(np, "pci") { - if (of_device_is_compatible(np, "fsl,p4080-pcie") || - of_device_is_compatible(np, "fsl,qoriq-pcie-v2.2") || - of_device_is_compatible(np, "fsl,qoriq-pcie-v2.3") || - of_device_is_compatible(np, "fsl,qoriq-pcie-v2.4")) { - fsl_add_bridge(np, 0); - hose = pci_find_hose_for_OF_device(np); - max = min(max, hose->dma_window_base_cur + - hose->dma_window_size); - } - } - -#ifdef CONFIG_PPC64 +#if defined(CONFIG_PCI) && defined(CONFIG_PPC64) pci_devs_phb_init(); #endif -#endif -#ifdef CONFIG_SWIOTLB - if ((memblock_end_of_DRAM() - 1) > max) { - ppc_swiotlb_enable = 1; - set_pci_dma_ops(&swiotlb_dma_ops); - ppc_md.pci_dma_dev_setup = pci_dma_dev_setup_swiotlb; - } -#endif + fsl_pci_assign_primary(); + + swiotlb_detect_4g(); + pr_info("%s board from Freescale Semiconductor\n", ppc_md.name); } diff --git a/arch/powerpc/platforms/85xx/ge_imp3a.c b/arch/powerpc/platforms/85xx/ge_imp3a.c index b6a728b..e6285ae 100644 --- a/arch/powerpc/platforms/85xx/ge_imp3a.c +++ b/arch/powerpc/platforms/85xx/ge_imp3a.c @@ -22,7 +22,6 @@ #include #include #include -#include #include #include @@ -84,53 +83,39 @@ void __init ge_imp3a_pic_init(void) of_node_put(cascade_node); } -#ifdef CONFIG_PCI -static int primary_phb_addr; -#endif /* CONFIG_PCI */ - -/* - * Setup the architecture - */ -static void __init ge_imp3a_setup_arch(void) +static void ge_imp3a_pci_assign_primary(void) { - struct device_node *regs; #ifdef CONFIG_PCI struct device_node *np; - struct pci_controller *hose; -#endif - dma_addr_t max = 0xffffffff; + struct resource rsrc; - if (ppc_md.progress) - ppc_md.progress("ge_imp3a_setup_arch()", 0); - -#ifdef CONFIG_PCI for_each_node_by_type(np, "pci") { if (of_device_is_compatible(np, "fsl,mpc8540-pci") || of_device_is_compatible(np, "fsl,mpc8548-pcie") || of_device_is_compatible(np, "fsl,p2020-pcie")) { - struct resource rsrc; of_address_to_resource(np, 0, &rsrc); - if ((rsrc.start & 0xfffff) == primary_phb_addr) - fsl_add_bridge(np, 1); - else - fsl_add_bridge(np, 0); - - hose = pci_find_hose_for_OF_device(np); - max = min(max, hose->dma_window_base_cur + - hose->dma_window_size); + if ((rsrc.start & 0xfffff) == 0x9000) + fsl_pci_primary = np; } } #endif +} + +/* + * Setup the architecture + */ +static void __init ge_imp3a_setup_arch(void) +{ + struct device_node *regs; + + if (ppc_md.progress) + ppc_md.progress("ge_imp3a_setup_arch()", 0); mpc85xx_smp_init(); -#ifdef CONFIG_SWIOTLB - if ((memblock_end_of_DRAM() - 1) > max) { - ppc_swiotlb_enable = 1; - set_pci_dma_ops(&swiotlb_dma_ops); - ppc_md.pci_dma_dev_setup = pci_dma_dev_setup_swiotlb; - } -#endif + ge_imp3a_pci_assign_primary(); + + swiotlb_detect_4g(); /* Remap basic board registers */ regs = of_find_compatible_node(NULL, NULL, "ge,imp3a-fpga-regs"); @@ -215,17 +200,10 @@ static int __init ge_imp3a_probe(void) { unsigned long root = of_get_flat_dt_root(); - if (of_flat_dt_is_compatible(root, "ge,IMP3A")) { -#ifdef CONFIG_PCI - primary_phb_addr = 0x9000; -#endif - return 1; - } - - return 0; + return of_flat_dt_is_compatible(root, "ge,IMP3A"); } -machine_device_initcall(ge_imp3a, mpc85xx_common_publish_devices); +machine_arch_initcall(ge_imp3a, mpc85xx_common_publish_devices); machine_arch_initcall(ge_imp3a, swiotlb_setup_bus_notifier); diff --git a/arch/powerpc/platforms/85xx/mpc8536_ds.c b/arch/powerpc/platforms/85xx/mpc8536_ds.c index 767c7cf..15ce4b5 100644 --- a/arch/powerpc/platforms/85xx/mpc8536_ds.c +++ b/arch/powerpc/platforms/85xx/mpc8536_ds.c @@ -17,7 +17,6 @@ #include #include #include -#include #include #include @@ -46,46 +45,17 @@ void __init mpc8536_ds_pic_init(void) */ static void __init mpc8536_ds_setup_arch(void) { -#ifdef CONFIG_PCI - struct device_node *np; - struct pci_controller *hose; -#endif - dma_addr_t max = 0xffffffff; - if (ppc_md.progress) ppc_md.progress("mpc8536_ds_setup_arch()", 0); -#ifdef CONFIG_PCI - for_each_node_by_type(np, "pci") { - if (of_device_is_compatible(np, "fsl,mpc8540-pci") || - of_device_is_compatible(np, "fsl,mpc8548-pcie")) { - struct resource rsrc; - of_address_to_resource(np, 0, &rsrc); - if ((rsrc.start & 0xfffff) == 0x8000) - fsl_add_bridge(np, 1); - else - fsl_add_bridge(np, 0); - - hose = pci_find_hose_for_OF_device(np); - max = min(max, hose->dma_window_base_cur + - hose->dma_window_size); - } - } - -#endif + fsl_pci_assign_primary(); -#ifdef CONFIG_SWIOTLB - if ((memblock_end_of_DRAM() - 1) > max) { - ppc_swiotlb_enable = 1; - set_pci_dma_ops(&swiotlb_dma_ops); - ppc_md.pci_dma_dev_setup = pci_dma_dev_setup_swiotlb; - } -#endif + swiotlb_detect_4g(); printk("MPC8536 DS board from Freescale Semiconductor\n"); } -machine_device_initcall(mpc8536_ds, mpc85xx_common_publish_devices); +machine_arch_initcall(mpc8536_ds, mpc85xx_common_publish_devices); machine_arch_initcall(mpc8536_ds, swiotlb_setup_bus_notifier); diff --git a/arch/powerpc/platforms/85xx/mpc85xx_ads.c b/arch/powerpc/platforms/85xx/mpc85xx_ads.c index 29ee8fc..7d12a19 100644 --- a/arch/powerpc/platforms/85xx/mpc85xx_ads.c +++ b/arch/powerpc/platforms/85xx/mpc85xx_ads.c @@ -137,10 +137,6 @@ static void __init init_ioports(void) static void __init mpc85xx_ads_setup_arch(void) { -#ifdef CONFIG_PCI - struct device_node *np; -#endif - if (ppc_md.progress) ppc_md.progress("mpc85xx_ads_setup_arch()", 0); @@ -150,11 +146,10 @@ static void __init mpc85xx_ads_setup_arch(void) #endif #ifdef CONFIG_PCI - for_each_compatible_node(np, "pci", "fsl,mpc8540-pci") - fsl_add_bridge(np, 1); - ppc_md.pci_exclude_device = mpc85xx_exclude_device; #endif + + fsl_pci_assign_primary(); } static void mpc85xx_ads_show_cpuinfo(struct seq_file *m) @@ -173,7 +168,7 @@ static void mpc85xx_ads_show_cpuinfo(struct seq_file *m) seq_printf(m, "PLL setting\t: 0x%x\n", ((phid1 >> 24) & 0x3f)); } -machine_device_initcall(mpc85xx_ads, mpc85xx_common_publish_devices); +machine_arch_initcall(mpc85xx_ads, mpc85xx_common_publish_devices); /* * Called very early, device-tree isn't unflattened diff --git a/arch/powerpc/platforms/85xx/mpc85xx_cds.c b/arch/powerpc/platforms/85xx/mpc85xx_cds.c index 11156fb..c474505 100644 --- a/arch/powerpc/platforms/85xx/mpc85xx_cds.c +++ b/arch/powerpc/platforms/85xx/mpc85xx_cds.c @@ -276,6 +276,33 @@ machine_device_initcall(mpc85xx_cds, mpc85xx_cds_8259_attach); #endif /* CONFIG_PPC_I8259 */ +static void mpc85xx_cds_pci_assign_primary(void) +{ +#ifdef CONFIG_PCI + struct device_node *np; + + if (fsl_pci_primary) + return; + + /* + * MPC85xx_CDS has ISA bridge but unfortunately there is no + * isa node in device tree. We now looking for i8259 node as + * a workaround for such a broken device tree. This routine + * is for complying to all device trees. + */ + np = of_find_node_by_name(NULL, "i8259"); + while ((fsl_pci_primary = of_get_parent(np))) { + of_node_put(np); + np = fsl_pci_primary; + + if ((of_device_is_compatible(np, "fsl,mpc8540-pci") || + of_device_is_compatible(np, "fsl,mpc8548-pcie")) && + of_device_is_available(np)) + return; + } +#endif +} + /* * Setup the architecture */ @@ -309,21 +336,12 @@ static void __init mpc85xx_cds_setup_arch(void) } #ifdef CONFIG_PCI - for_each_node_by_type(np, "pci") { - if (of_device_is_compatible(np, "fsl,mpc8540-pci") || - of_device_is_compatible(np, "fsl,mpc8548-pcie")) { - struct resource rsrc; - of_address_to_resource(np, 0, &rsrc); - if ((rsrc.start & 0xfffff) == 0x8000) - fsl_add_bridge(np, 1); - else - fsl_add_bridge(np, 0); - } - } - ppc_md.pci_irq_fixup = mpc85xx_cds_pci_irq_fixup; ppc_md.pci_exclude_device = mpc85xx_exclude_device; #endif + + mpc85xx_cds_pci_assign_primary(); + fsl_pci_assign_primary(); } static void mpc85xx_cds_show_cpuinfo(struct seq_file *m) @@ -355,7 +373,7 @@ static int __init mpc85xx_cds_probe(void) return of_flat_dt_is_compatible(root, "MPC85xxCDS"); } -machine_device_initcall(mpc85xx_cds, mpc85xx_common_publish_devices); +machine_arch_initcall(mpc85xx_cds, mpc85xx_common_publish_devices); define_machine(mpc85xx_cds) { .name = "MPC85xx CDS", diff --git a/arch/powerpc/platforms/85xx/mpc85xx_ds.c b/arch/powerpc/platforms/85xx/mpc85xx_ds.c index 56f8c8f..9ebb91e 100644 --- a/arch/powerpc/platforms/85xx/mpc85xx_ds.c +++ b/arch/powerpc/platforms/85xx/mpc85xx_ds.c @@ -20,7 +20,6 @@ #include #include #include -#include #include #include @@ -129,13 +128,11 @@ static int mpc85xx_exclude_device(struct pci_controller *hose, } #endif /* CONFIG_PCI */ -static void __init mpc85xx_ds_pci_init(void) +static void __init mpc85xx_ds_uli_init(void) { #ifdef CONFIG_PCI struct device_node *node; - fsl_pci_init(); - /* See if we have a ULI under the primary */ node = of_find_node_by_name(NULL, "uli1575"); @@ -160,7 +157,8 @@ static void __init mpc85xx_ds_setup_arch(void) ppc_md.progress("mpc85xx_ds_setup_arch()", 0); swiotlb_detect_4g(); - mpc85xx_ds_pci_init(); + fsl_pci_assign_primary(); + mpc85xx_ds_uli_init(); mpc85xx_smp_init(); printk("MPC85xx DS board from Freescale Semiconductor\n"); @@ -176,9 +174,9 @@ static int __init mpc8544_ds_probe(void) return !!of_flat_dt_is_compatible(root, "MPC8544DS"); } -machine_device_initcall(mpc8544_ds, mpc85xx_common_publish_devices); -machine_device_initcall(mpc8572_ds, mpc85xx_common_publish_devices); -machine_device_initcall(p2020_ds, mpc85xx_common_publish_devices); +machine_arch_initcall(mpc8544_ds, mpc85xx_common_publish_devices); +machine_arch_initcall(mpc8572_ds, mpc85xx_common_publish_devices); +machine_arch_initcall(p2020_ds, mpc85xx_common_publish_devices); machine_arch_initcall(mpc8544_ds, swiotlb_setup_bus_notifier); machine_arch_initcall(mpc8572_ds, swiotlb_setup_bus_notifier); diff --git a/arch/powerpc/platforms/85xx/mpc85xx_mds.c b/arch/powerpc/platforms/85xx/mpc85xx_mds.c index 8e4b094..8498f73 100644 --- a/arch/powerpc/platforms/85xx/mpc85xx_mds.c +++ b/arch/powerpc/platforms/85xx/mpc85xx_mds.c @@ -327,44 +327,16 @@ static void __init mpc85xx_mds_qeic_init(void) { } static void __init mpc85xx_mds_setup_arch(void) { -#ifdef CONFIG_PCI - struct pci_controller *hose; - struct device_node *np; -#endif - dma_addr_t max = 0xffffffff; - if (ppc_md.progress) ppc_md.progress("mpc85xx_mds_setup_arch()", 0); -#ifdef CONFIG_PCI - for_each_node_by_type(np, "pci") { - if (of_device_is_compatible(np, "fsl,mpc8540-pci") || - of_device_is_compatible(np, "fsl,mpc8548-pcie")) { - struct resource rsrc; - of_address_to_resource(np, 0, &rsrc); - if ((rsrc.start & 0xfffff) == 0x8000) - fsl_add_bridge(np, 1); - else - fsl_add_bridge(np, 0); - - hose = pci_find_hose_for_OF_device(np); - max = min(max, hose->dma_window_base_cur + - hose->dma_window_size); - } - } -#endif - mpc85xx_smp_init(); mpc85xx_mds_qe_init(); -#ifdef CONFIG_SWIOTLB - if ((memblock_end_of_DRAM() - 1) > max) { - ppc_swiotlb_enable = 1; - set_pci_dma_ops(&swiotlb_dma_ops); - ppc_md.pci_dma_dev_setup = pci_dma_dev_setup_swiotlb; - } -#endif + fsl_pci_assign_primary(); + + swiotlb_detect_4g(); } @@ -409,9 +381,9 @@ static int __init mpc85xx_publish_devices(void) return mpc85xx_common_publish_devices(); } -machine_device_initcall(mpc8568_mds, mpc85xx_publish_devices); -machine_device_initcall(mpc8569_mds, mpc85xx_publish_devices); -machine_device_initcall(p1021_mds, mpc85xx_common_publish_devices); +machine_arch_initcall(mpc8568_mds, mpc85xx_publish_devices); +machine_arch_initcall(mpc8569_mds, mpc85xx_publish_devices); +machine_arch_initcall(p1021_mds, mpc85xx_common_publish_devices); machine_arch_initcall(mpc8568_mds, swiotlb_setup_bus_notifier); machine_arch_initcall(mpc8569_mds, swiotlb_setup_bus_notifier); diff --git a/arch/powerpc/platforms/85xx/mpc85xx_rdb.c b/arch/powerpc/platforms/85xx/mpc85xx_rdb.c index 1910fdc..ede8771 100644 --- a/arch/powerpc/platforms/85xx/mpc85xx_rdb.c +++ b/arch/powerpc/platforms/85xx/mpc85xx_rdb.c @@ -86,23 +86,17 @@ void __init mpc85xx_rdb_pic_init(void) */ static void __init mpc85xx_rdb_setup_arch(void) { -#if defined(CONFIG_PCI) || defined(CONFIG_QUICC_ENGINE) +#ifdef CONFIG_QUICC_ENGINE struct device_node *np; #endif if (ppc_md.progress) ppc_md.progress("mpc85xx_rdb_setup_arch()", 0); -#ifdef CONFIG_PCI - for_each_node_by_type(np, "pci") { - if (of_device_is_compatible(np, "fsl,mpc8548-pcie")) - fsl_add_bridge(np, 0); - } - -#endif - mpc85xx_smp_init(); + fsl_pci_assign_primary(); + #ifdef CONFIG_QUICC_ENGINE np = of_find_compatible_node(NULL, NULL, "fsl,qe"); if (!np) { @@ -161,15 +155,15 @@ qe_fail: printk(KERN_INFO "MPC85xx RDB board from Freescale Semiconductor\n"); } -machine_device_initcall(p2020_rdb, mpc85xx_common_publish_devices); -machine_device_initcall(p2020_rdb_pc, mpc85xx_common_publish_devices); -machine_device_initcall(p1020_mbg_pc, mpc85xx_common_publish_devices); -machine_device_initcall(p1020_rdb, mpc85xx_common_publish_devices); -machine_device_initcall(p1020_rdb_pc, mpc85xx_common_publish_devices); -machine_device_initcall(p1020_utm_pc, mpc85xx_common_publish_devices); -machine_device_initcall(p1021_rdb_pc, mpc85xx_common_publish_devices); -machine_device_initcall(p1025_rdb, mpc85xx_common_publish_devices); -machine_device_initcall(p1024_rdb, mpc85xx_common_publish_devices); +machine_arch_initcall(p2020_rdb, mpc85xx_common_publish_devices); +machine_arch_initcall(p2020_rdb_pc, mpc85xx_common_publish_devices); +machine_arch_initcall(p1020_mbg_pc, mpc85xx_common_publish_devices); +machine_arch_initcall(p1020_rdb, mpc85xx_common_publish_devices); +machine_arch_initcall(p1020_rdb_pc, mpc85xx_common_publish_devices); +machine_arch_initcall(p1020_utm_pc, mpc85xx_common_publish_devices); +machine_arch_initcall(p1021_rdb_pc, mpc85xx_common_publish_devices); +machine_arch_initcall(p1025_rdb, mpc85xx_common_publish_devices); +machine_arch_initcall(p1024_rdb, mpc85xx_common_publish_devices); /* * Called very early, device-tree isn't unflattened diff --git a/arch/powerpc/platforms/85xx/p1010rdb.c b/arch/powerpc/platforms/85xx/p1010rdb.c index dbaf443..0252961 100644 --- a/arch/powerpc/platforms/85xx/p1010rdb.c +++ b/arch/powerpc/platforms/85xx/p1010rdb.c @@ -46,25 +46,15 @@ void __init p1010_rdb_pic_init(void) */ static void __init p1010_rdb_setup_arch(void) { -#ifdef CONFIG_PCI - struct device_node *np; -#endif - if (ppc_md.progress) ppc_md.progress("p1010_rdb_setup_arch()", 0); -#ifdef CONFIG_PCI - for_each_node_by_type(np, "pci") { - if (of_device_is_compatible(np, "fsl,p1010-pcie")) - fsl_add_bridge(np, 0); - } - -#endif + fsl_pci_assign_primary(); printk(KERN_INFO "P1010 RDB board from Freescale Semiconductor\n"); } -machine_device_initcall(p1010_rdb, mpc85xx_common_publish_devices); +machine_arch_initcall(p1010_rdb, mpc85xx_common_publish_devices); machine_arch_initcall(p1010_rdb, swiotlb_setup_bus_notifier); /* diff --git a/arch/powerpc/platforms/85xx/p1022_ds.c b/arch/powerpc/platforms/85xx/p1022_ds.c index 3c732ac..848a3e9 100644 --- a/arch/powerpc/platforms/85xx/p1022_ds.c +++ b/arch/powerpc/platforms/85xx/p1022_ds.c @@ -18,7 +18,6 @@ #include #include -#include #include #include #include @@ -507,32 +506,9 @@ early_param("video", early_video_setup); */ static void __init p1022_ds_setup_arch(void) { -#ifdef CONFIG_PCI - struct device_node *np; -#endif - dma_addr_t max = 0xffffffff; - if (ppc_md.progress) ppc_md.progress("p1022_ds_setup_arch()", 0); -#ifdef CONFIG_PCI - for_each_compatible_node(np, "pci", "fsl,p1022-pcie") { - struct resource rsrc; - struct pci_controller *hose; - - of_address_to_resource(np, 0, &rsrc); - - if ((rsrc.start & 0xfffff) == 0x8000) - fsl_add_bridge(np, 1); - else - fsl_add_bridge(np, 0); - - hose = pci_find_hose_for_OF_device(np); - max = min(max, hose->dma_window_base_cur + - hose->dma_window_size); - } -#endif - #if defined(CONFIG_FB_FSL_DIU) || defined(CONFIG_FB_FSL_DIU_MODULE) diu_ops.get_pixel_format = p1022ds_get_pixel_format; diu_ops.set_gamma_table = p1022ds_set_gamma_table; @@ -601,18 +577,14 @@ static void __init p1022_ds_setup_arch(void) mpc85xx_smp_init(); -#ifdef CONFIG_SWIOTLB - if ((memblock_end_of_DRAM() - 1) > max) { - ppc_swiotlb_enable = 1; - set_pci_dma_ops(&swiotlb_dma_ops); - ppc_md.pci_dma_dev_setup = pci_dma_dev_setup_swiotlb; - } -#endif + fsl_pci_assign_primary(); + + swiotlb_detect_4g(); pr_info("Freescale P1022 DS reference board\n"); } -machine_device_initcall(p1022_ds, mpc85xx_common_publish_devices); +machine_arch_initcall(p1022_ds, mpc85xx_common_publish_devices); machine_arch_initcall(p1022_ds, swiotlb_setup_bus_notifier); diff --git a/arch/powerpc/platforms/85xx/p1022_rdk.c b/arch/powerpc/platforms/85xx/p1022_rdk.c index b3cf11b..55ffa1c 100644 --- a/arch/powerpc/platforms/85xx/p1022_rdk.c +++ b/arch/powerpc/platforms/85xx/p1022_rdk.c @@ -14,7 +14,6 @@ #include #include -#include #include #include #include @@ -121,32 +120,9 @@ void __init p1022_rdk_pic_init(void) */ static void __init p1022_rdk_setup_arch(void) { -#ifdef CONFIG_PCI - struct device_node *np; -#endif - dma_addr_t max = 0xffffffff; - if (ppc_md.progress) ppc_md.progress("p1022_rdk_setup_arch()", 0); -#ifdef CONFIG_PCI - for_each_compatible_node(np, "pci", "fsl,p1022-pcie") { - struct resource rsrc; - struct pci_controller *hose; - - of_address_to_resource(np, 0, &rsrc); - - if ((rsrc.start & 0xfffff) == 0x8000) - fsl_add_bridge(np, 1); - else - fsl_add_bridge(np, 0); - - hose = pci_find_hose_for_OF_device(np); - max = min(max, hose->dma_window_base_cur + - hose->dma_window_size); - } -#endif - #if defined(CONFIG_FB_FSL_DIU) || defined(CONFIG_FB_FSL_DIU_MODULE) diu_ops.set_monitor_port = p1022rdk_set_monitor_port; diu_ops.set_pixel_clock = p1022rdk_set_pixel_clock; @@ -155,18 +131,14 @@ static void __init p1022_rdk_setup_arch(void) mpc85xx_smp_init(); -#ifdef CONFIG_SWIOTLB - if ((memblock_end_of_DRAM() - 1) > max) { - ppc_swiotlb_enable = 1; - set_pci_dma_ops(&swiotlb_dma_ops); - ppc_md.pci_dma_dev_setup = pci_dma_dev_setup_swiotlb; - } -#endif + fsl_pci_assign_primary(); + + swiotlb_detect_4g(); pr_info("Freescale / iVeia P1022 RDK reference board\n"); } -machine_device_initcall(p1022_rdk, mpc85xx_common_publish_devices); +machine_arch_initcall(p1022_rdk, mpc85xx_common_publish_devices); machine_arch_initcall(p1022_rdk, swiotlb_setup_bus_notifier); diff --git a/arch/powerpc/platforms/85xx/p1023_rds.c b/arch/powerpc/platforms/85xx/p1023_rds.c index 2990e8b..9cc60a7 100644 --- a/arch/powerpc/platforms/85xx/p1023_rds.c +++ b/arch/powerpc/platforms/85xx/p1023_rds.c @@ -80,15 +80,12 @@ static void __init mpc85xx_rds_setup_arch(void) } } -#ifdef CONFIG_PCI - for_each_compatible_node(np, "pci", "fsl,p1023-pcie") - fsl_add_bridge(np, 0); -#endif - mpc85xx_smp_init(); + + fsl_pci_assign_primary(); } -machine_device_initcall(p1023_rds, mpc85xx_common_publish_devices); +machine_arch_initcall(p1023_rds, mpc85xx_common_publish_devices); static void __init mpc85xx_rds_pic_init(void) { diff --git a/arch/powerpc/platforms/85xx/p2041_rdb.c b/arch/powerpc/platforms/85xx/p2041_rdb.c index 6541fa2..000c089 100644 --- a/arch/powerpc/platforms/85xx/p2041_rdb.c +++ b/arch/powerpc/platforms/85xx/p2041_rdb.c @@ -80,7 +80,7 @@ define_machine(p2041_rdb) { .power_save = e500_idle, }; -machine_device_initcall(p2041_rdb, corenet_ds_publish_devices); +machine_arch_initcall(p2041_rdb, corenet_ds_publish_devices); #ifdef CONFIG_SWIOTLB machine_arch_initcall(p2041_rdb, swiotlb_setup_bus_notifier); diff --git a/arch/powerpc/platforms/85xx/p3041_ds.c b/arch/powerpc/platforms/85xx/p3041_ds.c index f238efa..b3edc20 100644 --- a/arch/powerpc/platforms/85xx/p3041_ds.c +++ b/arch/powerpc/platforms/85xx/p3041_ds.c @@ -82,7 +82,7 @@ define_machine(p3041_ds) { .power_save = e500_idle, }; -machine_device_initcall(p3041_ds, corenet_ds_publish_devices); +machine_arch_initcall(p3041_ds, corenet_ds_publish_devices); #ifdef CONFIG_SWIOTLB machine_arch_initcall(p3041_ds, swiotlb_setup_bus_notifier); diff --git a/arch/powerpc/platforms/85xx/p4080_ds.c b/arch/powerpc/platforms/85xx/p4080_ds.c index c92417d..54df106 100644 --- a/arch/powerpc/platforms/85xx/p4080_ds.c +++ b/arch/powerpc/platforms/85xx/p4080_ds.c @@ -81,7 +81,7 @@ define_machine(p4080_ds) { .power_save = e500_idle, }; -machine_device_initcall(p4080_ds, corenet_ds_publish_devices); +machine_arch_initcall(p4080_ds, corenet_ds_publish_devices); #ifdef CONFIG_SWIOTLB machine_arch_initcall(p4080_ds, swiotlb_setup_bus_notifier); #endif diff --git a/arch/powerpc/platforms/85xx/p5020_ds.c b/arch/powerpc/platforms/85xx/p5020_ds.c index 17bef15..753a42c 100644 --- a/arch/powerpc/platforms/85xx/p5020_ds.c +++ b/arch/powerpc/platforms/85xx/p5020_ds.c @@ -91,7 +91,7 @@ define_machine(p5020_ds) { #endif }; -machine_device_initcall(p5020_ds, corenet_ds_publish_devices); +machine_arch_initcall(p5020_ds, corenet_ds_publish_devices); #ifdef CONFIG_SWIOTLB machine_arch_initcall(p5020_ds, swiotlb_setup_bus_notifier); diff --git a/arch/powerpc/platforms/85xx/p5040_ds.c b/arch/powerpc/platforms/85xx/p5040_ds.c index 8e22a34..1138185 100644 --- a/arch/powerpc/platforms/85xx/p5040_ds.c +++ b/arch/powerpc/platforms/85xx/p5040_ds.c @@ -82,7 +82,7 @@ define_machine(p5040_ds) { #endif }; -machine_device_initcall(p5040_ds, corenet_ds_publish_devices); +machine_arch_initcall(p5040_ds, corenet_ds_publish_devices); #ifdef CONFIG_SWIOTLB machine_arch_initcall(p5040_ds, swiotlb_setup_bus_notifier); diff --git a/arch/powerpc/platforms/85xx/qemu_e500.c b/arch/powerpc/platforms/85xx/qemu_e500.c index 3c5490c..f6ea561 100644 --- a/arch/powerpc/platforms/85xx/qemu_e500.c +++ b/arch/powerpc/platforms/85xx/qemu_e500.c @@ -41,7 +41,7 @@ static void __init qemu_e500_setup_arch(void) { ppc_md.progress("qemu_e500_setup_arch()", 0); - fsl_pci_init(); + fsl_pci_assign_primary(); swiotlb_detect_4g(); mpc85xx_smp_init(); } @@ -56,7 +56,7 @@ static int __init qemu_e500_probe(void) return !!of_flat_dt_is_compatible(root, "fsl,qemu-e500"); } -machine_device_initcall(qemu_e500, mpc85xx_common_publish_devices); +machine_arch_initcall(qemu_e500, mpc85xx_common_publish_devices); define_machine(qemu_e500) { .name = "QEMU e500", diff --git a/arch/powerpc/platforms/85xx/sbc8548.c b/arch/powerpc/platforms/85xx/sbc8548.c index cd3a66b..f621218 100644 --- a/arch/powerpc/platforms/85xx/sbc8548.c +++ b/arch/powerpc/platforms/85xx/sbc8548.c @@ -88,26 +88,11 @@ static int __init sbc8548_hw_rev(void) */ static void __init sbc8548_setup_arch(void) { -#ifdef CONFIG_PCI - struct device_node *np; -#endif - if (ppc_md.progress) ppc_md.progress("sbc8548_setup_arch()", 0); -#ifdef CONFIG_PCI - for_each_node_by_type(np, "pci") { - if (of_device_is_compatible(np, "fsl,mpc8540-pci") || - of_device_is_compatible(np, "fsl,mpc8548-pcie")) { - struct resource rsrc; - of_address_to_resource(np, 0, &rsrc); - if ((rsrc.start & 0xfffff) == 0x8000) - fsl_add_bridge(np, 1); - else - fsl_add_bridge(np, 0); - } - } -#endif + fsl_pci_assign_primary(); + sbc_rev = sbc8548_hw_rev(); } @@ -128,7 +113,7 @@ static void sbc8548_show_cpuinfo(struct seq_file *m) seq_printf(m, "PLL setting\t: 0x%x\n", ((phid1 >> 24) & 0x3f)); } -machine_device_initcall(sbc8548, mpc85xx_common_publish_devices); +machine_arch_initcall(sbc8548, mpc85xx_common_publish_devices); /* * Called very early, device-tree isn't unflattened diff --git a/arch/powerpc/platforms/85xx/socrates.c b/arch/powerpc/platforms/85xx/socrates.c index b9c6daa..ae368e0 100644 --- a/arch/powerpc/platforms/85xx/socrates.c +++ b/arch/powerpc/platforms/85xx/socrates.c @@ -66,20 +66,13 @@ static void __init socrates_pic_init(void) */ static void __init socrates_setup_arch(void) { -#ifdef CONFIG_PCI - struct device_node *np; -#endif - if (ppc_md.progress) ppc_md.progress("socrates_setup_arch()", 0); -#ifdef CONFIG_PCI - for_each_compatible_node(np, "pci", "fsl,mpc8540-pci") - fsl_add_bridge(np, 1); -#endif + fsl_pci_assign_primary(); } -machine_device_initcall(socrates, mpc85xx_common_publish_devices); +machine_arch_initcall(socrates, mpc85xx_common_publish_devices); /* * Called very early, device-tree isn't unflattened diff --git a/arch/powerpc/platforms/85xx/stx_gp3.c b/arch/powerpc/platforms/85xx/stx_gp3.c index e050800..6f4939b 100644 --- a/arch/powerpc/platforms/85xx/stx_gp3.c +++ b/arch/powerpc/platforms/85xx/stx_gp3.c @@ -60,21 +60,14 @@ static void __init stx_gp3_pic_init(void) */ static void __init stx_gp3_setup_arch(void) { -#ifdef CONFIG_PCI - struct device_node *np; -#endif - if (ppc_md.progress) ppc_md.progress("stx_gp3_setup_arch()", 0); + fsl_pci_assign_primary(); + #ifdef CONFIG_CPM2 cpm2_reset(); #endif - -#ifdef CONFIG_PCI - for_each_compatible_node(np, "pci", "fsl,mpc8540-pci") - fsl_add_bridge(np, 1); -#endif } static void stx_gp3_show_cpuinfo(struct seq_file *m) @@ -93,7 +86,7 @@ static void stx_gp3_show_cpuinfo(struct seq_file *m) seq_printf(m, "PLL setting\t: 0x%x\n", ((phid1 >> 24) & 0x3f)); } -machine_device_initcall(stx_gp3, mpc85xx_common_publish_devices); +machine_arch_initcall(stx_gp3, mpc85xx_common_publish_devices); /* * Called very early, device-tree isn't unflattened diff --git a/arch/powerpc/platforms/85xx/tqm85xx.c b/arch/powerpc/platforms/85xx/tqm85xx.c index 3e70a20..d8941eea 100644 --- a/arch/powerpc/platforms/85xx/tqm85xx.c +++ b/arch/powerpc/platforms/85xx/tqm85xx.c @@ -59,10 +59,6 @@ static void __init tqm85xx_pic_init(void) */ static void __init tqm85xx_setup_arch(void) { -#ifdef CONFIG_PCI - struct device_node *np; -#endif - if (ppc_md.progress) ppc_md.progress("tqm85xx_setup_arch()", 0); @@ -70,20 +66,7 @@ static void __init tqm85xx_setup_arch(void) cpm2_reset(); #endif -#ifdef CONFIG_PCI - for_each_node_by_type(np, "pci") { - if (of_device_is_compatible(np, "fsl,mpc8540-pci") || - of_device_is_compatible(np, "fsl,mpc8548-pcie")) { - struct resource rsrc; - if (!of_address_to_resource(np, 0, &rsrc)) { - if ((rsrc.start & 0xfffff) == 0x8000) - fsl_add_bridge(np, 1); - else - fsl_add_bridge(np, 0); - } - } - } -#endif + fsl_pci_assign_primary(); } static void tqm85xx_show_cpuinfo(struct seq_file *m) @@ -123,7 +106,7 @@ static void __devinit tqm85xx_ti1520_fixup(struct pci_dev *pdev) DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_TI, PCI_DEVICE_ID_TI_1520, tqm85xx_ti1520_fixup); -machine_device_initcall(tqm85xx, mpc85xx_common_publish_devices); +machine_arch_initcall(tqm85xx, mpc85xx_common_publish_devices); static const char *board[] __initdata = { "tqc,tqm8540", diff --git a/arch/powerpc/platforms/85xx/xes_mpc85xx.c b/arch/powerpc/platforms/85xx/xes_mpc85xx.c index 41c6875..dcbf7e4 100644 --- a/arch/powerpc/platforms/85xx/xes_mpc85xx.c +++ b/arch/powerpc/platforms/85xx/xes_mpc85xx.c @@ -111,18 +111,11 @@ static void xes_mpc85xx_fixups(void) } } -#ifdef CONFIG_PCI -static int primary_phb_addr; -#endif - /* * Setup the architecture */ static void __init xes_mpc85xx_setup_arch(void) { -#ifdef CONFIG_PCI - struct device_node *np; -#endif struct device_node *root; const char *model = "Unknown"; @@ -137,26 +130,14 @@ static void __init xes_mpc85xx_setup_arch(void) xes_mpc85xx_fixups(); -#ifdef CONFIG_PCI - for_each_node_by_type(np, "pci") { - if (of_device_is_compatible(np, "fsl,mpc8540-pci") || - of_device_is_compatible(np, "fsl,mpc8548-pcie")) { - struct resource rsrc; - of_address_to_resource(np, 0, &rsrc); - if ((rsrc.start & 0xfffff) == primary_phb_addr) - fsl_add_bridge(np, 1); - else - fsl_add_bridge(np, 0); - } - } -#endif - mpc85xx_smp_init(); + + fsl_pci_assign_primary(); } -machine_device_initcall(xes_mpc8572, mpc85xx_common_publish_devices); -machine_device_initcall(xes_mpc8548, mpc85xx_common_publish_devices); -machine_device_initcall(xes_mpc8540, mpc85xx_common_publish_devices); +machine_arch_initcall(xes_mpc8572, mpc85xx_common_publish_devices); +machine_arch_initcall(xes_mpc8548, mpc85xx_common_publish_devices); +machine_arch_initcall(xes_mpc8540, mpc85xx_common_publish_devices); /* * Called very early, device-tree isn't unflattened @@ -165,42 +146,21 @@ static int __init xes_mpc8572_probe(void) { unsigned long root = of_get_flat_dt_root(); - if (of_flat_dt_is_compatible(root, "xes,MPC8572")) { -#ifdef CONFIG_PCI - primary_phb_addr = 0x8000; -#endif - return 1; - } else { - return 0; - } + return of_flat_dt_is_compatible(root, "xes,MPC8572"); } static int __init xes_mpc8548_probe(void) { unsigned long root = of_get_flat_dt_root(); - if (of_flat_dt_is_compatible(root, "xes,MPC8548")) { -#ifdef CONFIG_PCI - primary_phb_addr = 0xb000; -#endif - return 1; - } else { - return 0; - } + return of_flat_dt_is_compatible(root, "xes,MPC8548"); } static int __init xes_mpc8540_probe(void) { unsigned long root = of_get_flat_dt_root(); - if (of_flat_dt_is_compatible(root, "xes,MPC8540")) { -#ifdef CONFIG_PCI - primary_phb_addr = 0xb000; -#endif - return 1; - } else { - return 0; - } + return of_flat_dt_is_compatible(root, "xes,MPC8540"); } define_machine(xes_mpc8572) { diff --git a/arch/powerpc/platforms/86xx/gef_ppc9a.c b/arch/powerpc/platforms/86xx/gef_ppc9a.c index 563aafa8..bf53387 100644 --- a/arch/powerpc/platforms/86xx/gef_ppc9a.c +++ b/arch/powerpc/platforms/86xx/gef_ppc9a.c @@ -73,13 +73,6 @@ static void __init gef_ppc9a_init_irq(void) static void __init gef_ppc9a_setup_arch(void) { struct device_node *regs; -#ifdef CONFIG_PCI - struct device_node *np; - - for_each_compatible_node(np, "pci", "fsl,mpc8641-pcie") { - fsl_add_bridge(np, 1); - } -#endif printk(KERN_INFO "GE Intelligent Platforms PPC9A 6U VME SBC\n"); @@ -87,6 +80,8 @@ static void __init gef_ppc9a_setup_arch(void) mpc86xx_smp_init(); #endif + fsl_pci_assign_primary(); + /* Remap basic board registers */ regs = of_find_compatible_node(NULL, NULL, "gef,ppc9a-fpga-regs"); if (regs) { @@ -221,6 +216,7 @@ static long __init mpc86xx_time_init(void) static __initdata struct of_device_id of_bus_ids[] = { { .compatible = "simple-bus", }, { .compatible = "gianfar", }, + { .compatible = "fsl,mpc8641-pcie", }, {}, }; @@ -231,7 +227,7 @@ static int __init declare_of_platform_devices(void) return 0; } -machine_device_initcall(gef_ppc9a, declare_of_platform_devices); +machine_arch_initcall(gef_ppc9a, declare_of_platform_devices); define_machine(gef_ppc9a) { .name = "GE PPC9A", diff --git a/arch/powerpc/platforms/86xx/gef_sbc310.c b/arch/powerpc/platforms/86xx/gef_sbc310.c index cc6a91a..0b78513 100644 --- a/arch/powerpc/platforms/86xx/gef_sbc310.c +++ b/arch/powerpc/platforms/86xx/gef_sbc310.c @@ -73,20 +73,14 @@ static void __init gef_sbc310_init_irq(void) static void __init gef_sbc310_setup_arch(void) { struct device_node *regs; -#ifdef CONFIG_PCI - struct device_node *np; - - for_each_compatible_node(np, "pci", "fsl,mpc8641-pcie") { - fsl_add_bridge(np, 1); - } -#endif - printk(KERN_INFO "GE Intelligent Platforms SBC310 6U VPX SBC\n"); #ifdef CONFIG_SMP mpc86xx_smp_init(); #endif + fsl_pci_assign_primary(); + /* Remap basic board registers */ regs = of_find_compatible_node(NULL, NULL, "gef,fpga-regs"); if (regs) { @@ -209,6 +203,7 @@ static long __init mpc86xx_time_init(void) static __initdata struct of_device_id of_bus_ids[] = { { .compatible = "simple-bus", }, { .compatible = "gianfar", }, + { .compatible = "fsl,mpc8641-pcie", }, {}, }; @@ -219,7 +214,7 @@ static int __init declare_of_platform_devices(void) return 0; } -machine_device_initcall(gef_sbc310, declare_of_platform_devices); +machine_arch_initcall(gef_sbc310, declare_of_platform_devices); define_machine(gef_sbc310) { .name = "GE SBC310", diff --git a/arch/powerpc/platforms/86xx/gef_sbc610.c b/arch/powerpc/platforms/86xx/gef_sbc610.c index aead6b3..b9eb174 100644 --- a/arch/powerpc/platforms/86xx/gef_sbc610.c +++ b/arch/powerpc/platforms/86xx/gef_sbc610.c @@ -73,13 +73,6 @@ static void __init gef_sbc610_init_irq(void) static void __init gef_sbc610_setup_arch(void) { struct device_node *regs; -#ifdef CONFIG_PCI - struct device_node *np; - - for_each_compatible_node(np, "pci", "fsl,mpc8641-pcie") { - fsl_add_bridge(np, 1); - } -#endif printk(KERN_INFO "GE Intelligent Platforms SBC610 6U VPX SBC\n"); @@ -87,6 +80,8 @@ static void __init gef_sbc610_setup_arch(void) mpc86xx_smp_init(); #endif + fsl_pci_assign_primary(); + /* Remap basic board registers */ regs = of_find_compatible_node(NULL, NULL, "gef,fpga-regs"); if (regs) { @@ -198,6 +193,7 @@ static long __init mpc86xx_time_init(void) static __initdata struct of_device_id of_bus_ids[] = { { .compatible = "simple-bus", }, { .compatible = "gianfar", }, + { .compatible = "fsl,mpc8641-pcie", }, {}, }; @@ -208,7 +204,7 @@ static int __init declare_of_platform_devices(void) return 0; } -machine_device_initcall(gef_sbc610, declare_of_platform_devices); +machine_arch_initcall(gef_sbc610, declare_of_platform_devices); define_machine(gef_sbc610) { .name = "GE SBC610", diff --git a/arch/powerpc/platforms/86xx/mpc8610_hpcd.c b/arch/powerpc/platforms/86xx/mpc8610_hpcd.c index 62cd3c5..a817398 100644 --- a/arch/powerpc/platforms/86xx/mpc8610_hpcd.c +++ b/arch/powerpc/platforms/86xx/mpc8610_hpcd.c @@ -91,6 +91,9 @@ static struct of_device_id __initdata mpc8610_ids[] = { { .compatible = "simple-bus", }, /* So that the DMA channel nodes can be probed individually: */ { .compatible = "fsl,eloplus-dma", }, + /* PCI controllers */ + { .compatible = "fsl,mpc8610-pci", }, + { .compatible = "fsl,mpc8641-pcie", }, {} }; @@ -107,7 +110,7 @@ static int __init mpc8610_declare_of_platform_devices(void) return 0; } -machine_device_initcall(mpc86xx_hpcd, mpc8610_declare_of_platform_devices); +machine_arch_initcall(mpc86xx_hpcd, mpc8610_declare_of_platform_devices); #if defined(CONFIG_FB_FSL_DIU) || defined(CONFIG_FB_FSL_DIU_MODULE) @@ -278,25 +281,13 @@ mpc8610hpcd_valid_monitor_port(enum fsl_diu_monitor_port port) static void __init mpc86xx_hpcd_setup_arch(void) { struct resource r; - struct device_node *np; unsigned char *pixis; if (ppc_md.progress) ppc_md.progress("mpc86xx_hpcd_setup_arch()", 0); -#ifdef CONFIG_PCI - for_each_node_by_type(np, "pci") { - if (of_device_is_compatible(np, "fsl,mpc8610-pci") - || of_device_is_compatible(np, "fsl,mpc8641-pcie")) { - struct resource rsrc; - of_address_to_resource(np, 0, &rsrc); - if ((rsrc.start & 0xfffff) == 0xa000) - fsl_add_bridge(np, 1); - else - fsl_add_bridge(np, 0); - } - } -#endif + fsl_pci_assign_primary(); + #if defined(CONFIG_FB_FSL_DIU) || defined(CONFIG_FB_FSL_DIU_MODULE) diu_ops.get_pixel_format = mpc8610hpcd_get_pixel_format; diu_ops.set_gamma_table = mpc8610hpcd_set_gamma_table; diff --git a/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c b/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c index 817245b..e8bf3fa 100644 --- a/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c +++ b/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c @@ -19,7 +19,6 @@ #include #include #include -#include #include #include @@ -51,15 +50,8 @@ extern int uli_exclude_device(struct pci_controller *hose, static int mpc86xx_exclude_device(struct pci_controller *hose, u_char bus, u_char devfn) { - struct device_node* node; - struct resource rsrc; - - node = hose->dn; - of_address_to_resource(node, 0, &rsrc); - - if ((rsrc.start & 0xfffff) == 0x8000) { + if (hose->dn == fsl_pci_primary) return uli_exclude_device(hose, bus, devfn); - } return PCIBIOS_SUCCESSFUL; } @@ -69,30 +61,11 @@ static int mpc86xx_exclude_device(struct pci_controller *hose, static void __init mpc86xx_hpcn_setup_arch(void) { -#ifdef CONFIG_PCI - struct device_node *np; - struct pci_controller *hose; -#endif - dma_addr_t max = 0xffffffff; - if (ppc_md.progress) ppc_md.progress("mpc86xx_hpcn_setup_arch()", 0); #ifdef CONFIG_PCI - for_each_compatible_node(np, "pci", "fsl,mpc8641-pcie") { - struct resource rsrc; - of_address_to_resource(np, 0, &rsrc); - if ((rsrc.start & 0xfffff) == 0x8000) - fsl_add_bridge(np, 1); - else - fsl_add_bridge(np, 0); - hose = pci_find_hose_for_OF_device(np); - max = min(max, hose->dma_window_base_cur + - hose->dma_window_size); - } - ppc_md.pci_exclude_device = mpc86xx_exclude_device; - #endif printk("MPC86xx HPCN board from Freescale Semiconductor\n"); @@ -101,13 +74,9 @@ mpc86xx_hpcn_setup_arch(void) mpc86xx_smp_init(); #endif -#ifdef CONFIG_SWIOTLB - if ((memblock_end_of_DRAM() - 1) > max) { - ppc_swiotlb_enable = 1; - set_pci_dma_ops(&swiotlb_dma_ops); - ppc_md.pci_dma_dev_setup = pci_dma_dev_setup_swiotlb; - } -#endif + fsl_pci_assign_primary(); + + swiotlb_detect_4g(); } @@ -162,6 +131,7 @@ static __initdata struct of_device_id of_bus_ids[] = { { .compatible = "simple-bus", }, { .compatible = "fsl,srio", }, { .compatible = "gianfar", }, + { .compatible = "fsl,mpc8641-pcie", }, {}, }; @@ -171,7 +141,7 @@ static int __init declare_of_platform_devices(void) return 0; } -machine_device_initcall(mpc86xx_hpcn, declare_of_platform_devices); +machine_arch_initcall(mpc86xx_hpcn, declare_of_platform_devices); machine_arch_initcall(mpc86xx_hpcn, swiotlb_setup_bus_notifier); define_machine(mpc86xx_hpcn) { diff --git a/arch/powerpc/platforms/86xx/sbc8641d.c b/arch/powerpc/platforms/86xx/sbc8641d.c index e7007d0..b47a8fd 100644 --- a/arch/powerpc/platforms/86xx/sbc8641d.c +++ b/arch/powerpc/platforms/86xx/sbc8641d.c @@ -38,23 +38,16 @@ static void __init sbc8641_setup_arch(void) { -#ifdef CONFIG_PCI - struct device_node *np; -#endif - if (ppc_md.progress) ppc_md.progress("sbc8641_setup_arch()", 0); -#ifdef CONFIG_PCI - for_each_compatible_node(np, "pci", "fsl,mpc8641-pcie") - fsl_add_bridge(np, 0); -#endif - printk("SBC8641 board from Wind River\n"); #ifdef CONFIG_SMP mpc86xx_smp_init(); #endif + + fsl_pci_assign_primary(); } @@ -102,6 +95,7 @@ mpc86xx_time_init(void) static __initdata struct of_device_id of_bus_ids[] = { { .compatible = "simple-bus", }, { .compatible = "gianfar", }, + { .compatible = "fsl,mpc8641-pcie", }, {}, }; @@ -111,7 +105,7 @@ static int __init declare_of_platform_devices(void) return 0; } -machine_device_initcall(sbc8641, declare_of_platform_devices); +machine_arch_initcall(sbc8641, declare_of_platform_devices); define_machine(sbc8641) { .name = "SBC8641D", diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c index 23acaf4..2ff3576 100644 --- a/arch/powerpc/sysdev/fsl_pci.c +++ b/arch/powerpc/sysdev/fsl_pci.c @@ -828,54 +828,78 @@ static const struct of_device_id pci_ids[] = { struct device_node *fsl_pci_primary; -void __devinit fsl_pci_init(void) +void fsl_pci_assign_primary(void) { - int ret; - struct device_node *node; - struct pci_controller *hose; - dma_addr_t max = 0xffffffff; + struct device_node *np; /* Callers can specify the primary bus using other means. */ - if (!fsl_pci_primary) { - /* If a PCI host bridge contains an ISA node, it's primary. */ - node = of_find_node_by_type(NULL, "isa"); - while ((fsl_pci_primary = of_get_parent(node))) { - of_node_put(node); - node = fsl_pci_primary; - - if (of_match_node(pci_ids, node)) - break; - } + if (fsl_pci_primary) + return; + + /* If a PCI host bridge contains an ISA node, it's primary. */ + np = of_find_node_by_type(NULL, "isa"); + while ((fsl_pci_primary = of_get_parent(np))) { + of_node_put(np); + np = fsl_pci_primary; + + if (of_match_node(pci_ids, np) && of_device_is_available(np)) + return; } - node = NULL; - for_each_node_by_type(node, "pci") { - if (of_match_node(pci_ids, node)) { - /* - * If there's no PCI host bridge with ISA, arbitrarily - * designate one as primary. This can go away once - * various bugs with primary-less systems are fixed. - */ - if (!fsl_pci_primary) - fsl_pci_primary = node; - - ret = fsl_add_bridge(node, fsl_pci_primary == node); - if (ret == 0) { - hose = pci_find_hose_for_OF_device(node); - max = min(max, hose->dma_window_base_cur + - hose->dma_window_size); - } + /* + * If there's no PCI host bridge with ISA, arbitrarily + * designate one as primary. This can go away once + * various bugs with primary-less systems are fixed. + */ + for_each_matching_node(np, pci_ids) { + if (of_device_is_available(np)) { + fsl_pci_primary = np; + of_node_put(np); + return; } } +} + +static int __devinit fsl_pci_probe(struct platform_device *pdev) +{ + int ret; + struct device_node *node; + struct pci_controller *hose; + + node = pdev->dev.of_node; + ret = fsl_add_bridge(node, fsl_pci_primary == node); #ifdef CONFIG_SWIOTLB - /* - * if we couldn't map all of DRAM via the dma windows - * we need SWIOTLB to handle buffers located outside of - * dma capable memory region - */ - if (memblock_end_of_DRAM() - 1 > max) - ppc_swiotlb_enable = 1; + if (ret == 0) { + hose = pci_find_hose_for_OF_device(pdev->dev.of_node); + + /* + * if we couldn't map all of DRAM via the dma windows + * we need SWIOTLB to handle buffers located outside of + * dma capable memory region + */ + if (memblock_end_of_DRAM() - 1 > hose->dma_window_base_cur + + hose->dma_window_size) + ppc_swiotlb_enable = 1; + } #endif + + mpc85xx_pci_err_probe(pdev); + + return 0; +} + +static struct platform_driver fsl_pci_driver = { + .driver = { + .name = "fsl-pci", + .of_match_table = pci_ids, + }, + .probe = fsl_pci_probe, +}; + +static int __init fsl_pci_init(void) +{ + return platform_driver_register(&fsl_pci_driver); } +arch_initcall(fsl_pci_init); #endif diff --git a/arch/powerpc/sysdev/fsl_pci.h b/arch/powerpc/sysdev/fsl_pci.h index 54ed82c..d078537 100644 --- a/arch/powerpc/sysdev/fsl_pci.h +++ b/arch/powerpc/sysdev/fsl_pci.h @@ -98,10 +98,19 @@ u64 fsl_pci_immrbar_base(struct pci_controller *hose); extern struct device_node *fsl_pci_primary; -#ifdef CONFIG_FSL_PCI -void fsl_pci_init(void); +#ifdef CONFIG_PCI +void fsl_pci_assign_primary(void); #else -static inline void fsl_pci_init(void) {} +static inline void fsl_pci_assign_primary(void) {} +#endif + +#ifdef CONFIG_EDAC_MPC85XX +int mpc85xx_pci_err_probe(struct platform_device *op); +#else +static inline int mpc85xx_pci_err_probe(struct platform_device *op) +{ + return -ENOTSUPP; +} #endif #endif /* __POWERPC_FSL_PCI_H */ diff --git a/drivers/edac/mpc85xx_edac.c b/drivers/edac/mpc85xx_edac.c index a1e791e..4fe66fa 100644 --- a/drivers/edac/mpc85xx_edac.c +++ b/drivers/edac/mpc85xx_edac.c @@ -212,7 +212,7 @@ static irqreturn_t mpc85xx_pci_isr(int irq, void *dev_id) return IRQ_HANDLED; } -static int __devinit mpc85xx_pci_err_probe(struct platform_device *op) +int __devinit mpc85xx_pci_err_probe(struct platform_device *op) { struct edac_pci_ctl_info *pci; struct mpc85xx_pci_pdata *pdata; @@ -226,6 +226,16 @@ static int __devinit mpc85xx_pci_err_probe(struct platform_device *op) if (!pci) return -ENOMEM; + /* make sure error reporting method is sane */ + switch (edac_op_state) { + case EDAC_OPSTATE_POLL: + case EDAC_OPSTATE_INT: + break; + default: + edac_op_state = EDAC_OPSTATE_INT; + break; + } + pdata = pci->pvt_info; pdata->name = "mpc85xx_pci_err"; pdata->irq = NO_IRQ; @@ -315,6 +325,7 @@ err: devres_release_group(&op->dev, mpc85xx_pci_err_probe); return res; } +EXPORT_SYMBOL(mpc85xx_pci_err_probe); static int mpc85xx_pci_err_remove(struct platform_device *op) { @@ -338,27 +349,6 @@ static int mpc85xx_pci_err_remove(struct platform_device *op) return 0; } -static struct of_device_id mpc85xx_pci_err_of_match[] = { - { - .compatible = "fsl,mpc8540-pcix", - }, - { - .compatible = "fsl,mpc8540-pci", - }, - {}, -}; -MODULE_DEVICE_TABLE(of, mpc85xx_pci_err_of_match); - -static struct platform_driver mpc85xx_pci_err_driver = { - .probe = mpc85xx_pci_err_probe, - .remove = __devexit_p(mpc85xx_pci_err_remove), - .driver = { - .name = "mpc85xx_pci_err", - .owner = THIS_MODULE, - .of_match_table = mpc85xx_pci_err_of_match, - }, -}; - #endif /* CONFIG_PCI */ /**************************** L2 Err device ***************************/ @@ -1210,12 +1200,6 @@ static int __init mpc85xx_mc_init(void) if (res) printk(KERN_WARNING EDAC_MOD_STR "L2 fails to register\n"); -#ifdef CONFIG_PCI - res = platform_driver_register(&mpc85xx_pci_err_driver); - if (res) - printk(KERN_WARNING EDAC_MOD_STR "PCI fails to register\n"); -#endif - #ifdef CONFIG_FSL_SOC_BOOKE pvr = mfspr(SPRN_PVR); @@ -1252,9 +1236,6 @@ static void __exit mpc85xx_mc_exit(void) on_each_cpu(mpc85xx_mc_restore_hid1, NULL, 0); } #endif -#ifdef CONFIG_PCI - platform_driver_unregister(&mpc85xx_pci_err_driver); -#endif platform_driver_unregister(&mpc85xx_l2_err_driver); platform_driver_unregister(&mpc85xx_mc_err_driver); } -- cgit v0.10.2 From 56ebc06393e86cd3ffd66655c84df0146158d951 Mon Sep 17 00:00:00 2001 From: Anatolij Gustschin Date: Wed, 29 Aug 2012 21:31:08 +0000 Subject: powerpc/mpc5200: add dts files for ifm camera machines Add common o2d dtsi file to reuse it for other configurations. Add machine compatible string to mpc5200 simple platform file. Add dts files for O2D, O2I, O2MNT, O2DNT2, O2D300 and O3DNT boards. Signed-off-by: Anatolij Gustschin diff --git a/arch/powerpc/boot/dts/o2d.dts b/arch/powerpc/boot/dts/o2d.dts new file mode 100644 index 0000000..9f6dd4d --- /dev/null +++ b/arch/powerpc/boot/dts/o2d.dts @@ -0,0 +1,47 @@ +/* + * O2D Device Tree Source + * + * Copyright (C) 2012 DENX Software Engineering + * Anatolij Gustschin + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +/include/ "o2d.dtsi" + +/ { + model = "ifm,o2d"; + compatible = "ifm,o2d"; + + memory { + reg = <0x00000000 0x08000000>; // 128MB + }; + + localbus { + ranges = <0 0 0xfc000000 0x02000000 + 3 0 0xe3000000 0x00100000>; + + flash@0,0 { + compatible = "cfi-flash"; + reg = <0 0 0x02000000>; + bank-width = <2>; + device-width = <2>; + #size-cells = <1>; + #address-cells = <1>; + + partition@60000 { + label = "kernel"; + reg = <0x00060000 0x00260000>; + read-only; + }; + /* o2d specific partitions */ + partition@2c0000 { + label = "o2d user defined"; + reg = <0x002c0000 0x01d40000>; + }; + }; + }; +}; diff --git a/arch/powerpc/boot/dts/o2d.dtsi b/arch/powerpc/boot/dts/o2d.dtsi new file mode 100644 index 0000000..3444eb8 --- /dev/null +++ b/arch/powerpc/boot/dts/o2d.dtsi @@ -0,0 +1,139 @@ +/* + * O2D base Device Tree Source + * + * Copyright (C) 2012 DENX Software Engineering + * Anatolij Gustschin + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +/include/ "mpc5200b.dtsi" + +/ { + model = "ifm,o2d"; + compatible = "ifm,o2d"; + + memory { + reg = <0x00000000 0x04000000>; // 64MB + }; + + soc5200@f0000000 { + + gpio_simple: gpio@b00 { + }; + + timer@600 { // General Purpose Timer + #gpio-cells = <2>; + gpio-controller; + fsl,has-wdt; + fsl,wdt-on-boot = <0>; + }; + + timer@610 { + #gpio-cells = <2>; + gpio-controller; + }; + + timer7: timer@670 { + }; + + rtc@800 { + status = "disabled"; + }; + + psc@2000 { // PSC1 + compatible = "fsl,mpc5200b-psc-spi","fsl,mpc5200-psc-spi"; + #address-cells = <1>; + #size-cells = <0>; + cell-index = <0>; + + spidev@0 { + compatible = "spidev"; + spi-max-frequency = <250000>; + reg = <0>; + }; + }; + + psc@2200 { // PSC2 + status = "disabled"; + }; + + psc@2400 { // PSC3 + status = "disabled"; + }; + + psc@2600 { // PSC4 + compatible = "fsl,mpc5200b-psc-uart","fsl,mpc5200-psc-uart"; + }; + + psc@2800 { // PSC5 + compatible = "fsl,mpc5200b-psc-uart","fsl,mpc5200-psc-uart"; + }; + + psc@2c00 { // PSC6 + status = "disabled"; + }; + + ethernet@3000 { + phy-handle = <&phy0>; + }; + + mdio@3000 { + phy0: ethernet-phy@0 { + reg = <0>; + }; + }; + + sclpc@3c00 { + compatible = "fsl,mpc5200-lpbfifo"; + reg = <0x3c00 0x60>; + interrupts = <3 23 0>; + }; + }; + + localbus { + ranges = <0 0 0xff000000 0x01000000 + 3 0 0xe3000000 0x00100000>; + + // flash device at LocalPlus Bus CS0 + flash@0,0 { + compatible = "cfi-flash"; + reg = <0 0 0x01000000>; + bank-width = <1>; + device-width = <2>; + #size-cells = <1>; + #address-cells = <1>; + no-unaligned-direct-access; + + /* common layout for all machines */ + partition@0 { + label = "u-boot"; + reg = <0x00000000 0x00040000>; + read-only; + }; + partition@40000 { + label = "env"; + reg = <0x00040000 0x00020000>; + read-only; + }; + }; + + csi@3,0 { + compatible = "ifm,o2d-csi"; + reg = <3 0 0x00100000>; + ifm,csi-clk-handle = <&timer7>; + gpios = <&gpio_simple 23 0 /* imag_capture */ + &gpio_simple 26 0 /* imag_reset */ + &gpio_simple 29 0>; /* imag_master_en */ + + interrupts = <1 1 2>; /* IRQ1, edge falling */ + + ifm,csi-addr-bus-width = <24>; + ifm,csi-data-bus-width = <8>; + ifm,csi-wait-cycles = <0>; + }; + }; +}; diff --git a/arch/powerpc/boot/dts/o2d300.dts b/arch/powerpc/boot/dts/o2d300.dts new file mode 100644 index 0000000..29affe0 --- /dev/null +++ b/arch/powerpc/boot/dts/o2d300.dts @@ -0,0 +1,52 @@ +/* + * O2D300 Device Tree Source + * + * Copyright (C) 2012 DENX Software Engineering + * Anatolij Gustschin + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +/include/ "o2d.dtsi" + +/ { + model = "ifm,o2d300"; + compatible = "ifm,o2d"; + + localbus { + ranges = <0 0 0xfc000000 0x02000000 + 3 0 0xe3000000 0x00100000>; + flash@0,0 { + compatible = "cfi-flash"; + reg = <0 0 0x02000000>; + bank-width = <2>; + device-width = <2>; + #size-cells = <1>; + #address-cells = <1>; + + partition@40000 { + label = "env_1"; + reg = <0x00040000 0x00020000>; + read-only; + }; + partition@60000 { + label = "env_2"; + reg = <0x00060000 0x00020000>; + read-only; + }; + partition@80000 { + label = "kernel"; + reg = <0x00080000 0x00260000>; + read-only; + }; + /* o2d300 specific partitions */ + partition@2e0000 { + label = "o2d300 user defined"; + reg = <0x002e0000 0x01d20000>; + }; + }; + }; +}; diff --git a/arch/powerpc/boot/dts/o2dnt2.dts b/arch/powerpc/boot/dts/o2dnt2.dts new file mode 100644 index 0000000..a0f5b97 --- /dev/null +++ b/arch/powerpc/boot/dts/o2dnt2.dts @@ -0,0 +1,48 @@ +/* + * O2DNT2 Device Tree Source + * + * Copyright (C) 2012 DENX Software Engineering + * Anatolij Gustschin + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +/include/ "o2d.dtsi" + +/ { + model = "ifm,o2dnt2"; + compatible = "ifm,o2d"; + + memory { + reg = <0x00000000 0x08000000>; // 128MB + }; + + localbus { + ranges = <0 0 0xfc000000 0x02000000 + 3 0 0xe3000000 0x00100000>; + + flash@0,0 { + compatible = "cfi-flash"; + reg = <0 0 0x02000000>; + bank-width = <2>; + device-width = <2>; + #size-cells = <1>; + #address-cells = <1>; + + partition@60000 { + label = "kernel"; + reg = <0x00060000 0x00260000>; + read-only; + }; + + /* o2dnt2 specific partitions */ + partition@2c0000 { + label = "o2dnt2 user defined"; + reg = <0x002c0000 0x01d40000>; + }; + }; + }; +}; diff --git a/arch/powerpc/boot/dts/o2i.dts b/arch/powerpc/boot/dts/o2i.dts new file mode 100644 index 0000000..e3cc99d --- /dev/null +++ b/arch/powerpc/boot/dts/o2i.dts @@ -0,0 +1,33 @@ +/* + * O2I Device Tree Source + * + * Copyright (C) 2012 DENX Software Engineering + * Anatolij Gustschin + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +/include/ "o2d.dtsi" + +/ { + model = "ifm,o2i"; + compatible = "ifm,o2d"; + + localbus { + flash@0,0 { + partition@60000 { + label = "kernel"; + reg = <0x00060000 0x00260000>; + read-only; + }; + /* o2i specific partitions */ + partition@2c0000 { + label = "o2i user defined"; + reg = <0x002c0000 0x00d40000>; + }; + }; + }; +}; diff --git a/arch/powerpc/boot/dts/o2mnt.dts b/arch/powerpc/boot/dts/o2mnt.dts new file mode 100644 index 0000000..d91859a --- /dev/null +++ b/arch/powerpc/boot/dts/o2mnt.dts @@ -0,0 +1,33 @@ +/* + * O2MNT Device Tree Source + * + * Copyright (C) 2012 DENX Software Engineering + * Anatolij Gustschin + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +/include/ "o2d.dtsi" + +/ { + model = "ifm,o2mnt"; + compatible = "ifm,o2d"; + + localbus { + flash@0,0 { + partition@60000 { + label = "kernel"; + reg = <0x00060000 0x00260000>; + read-only; + }; + /* add o2mnt specific partitions */ + partition@2c0000 { + label = "o2mnt user defined"; + reg = <0x002c0000 0x00d40000>; + }; + }; + }; +}; diff --git a/arch/powerpc/boot/dts/o3dnt.dts b/arch/powerpc/boot/dts/o3dnt.dts new file mode 100644 index 0000000..acce493 --- /dev/null +++ b/arch/powerpc/boot/dts/o3dnt.dts @@ -0,0 +1,48 @@ +/* + * O3DNT Device Tree Source + * + * Copyright (C) 2012 DENX Software Engineering + * Anatolij Gustschin + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +/include/ "o2d.dtsi" + +/ { + model = "ifm,o3dnt"; + compatible = "ifm,o2d"; + + memory { + reg = <0x00000000 0x04000000>; // 64MB + }; + + localbus { + ranges = <0 0 0xfc000000 0x01000000 + 3 0 0xe3000000 0x00100000>; + + flash@0,0 { + compatible = "cfi-flash"; + reg = <0 0 0x01000000>; + bank-width = <2>; + device-width = <2>; + #size-cells = <1>; + #address-cells = <1>; + + partition@60000 { + label = "kernel"; + reg = <0x00060000 0x00260000>; + read-only; + }; + + /* o3dnt specific partitions */ + partition@2c0000 { + label = "o3dnt user defined"; + reg = <0x002c0000 0x00d40000>; + }; + }; + }; +}; diff --git a/arch/powerpc/platforms/52xx/mpc5200_simple.c b/arch/powerpc/platforms/52xx/mpc5200_simple.c index c0aa040..9cf3602 100644 --- a/arch/powerpc/platforms/52xx/mpc5200_simple.c +++ b/arch/powerpc/platforms/52xx/mpc5200_simple.c @@ -52,6 +52,7 @@ static void __init mpc5200_simple_setup_arch(void) static const char *board[] __initdata = { "anonymous,a4m072", "anon,charon", + "ifm,o2d", "intercontrol,digsy-mtc", "manroland,mucmc52", "manroland,uc101", -- cgit v0.10.2 From 00e4b1975bbcd10445808cec0a3a1442e8deca53 Mon Sep 17 00:00:00 2001 From: Anatolij Gustschin Date: Fri, 24 Aug 2012 05:39:39 +0000 Subject: dt/misc: add bindings documentation for ifm camera sensor interface IFM O2D cameras use special sensor bus interface glue-logic to connect camera sensors to mpc5200 LocalPlus bus. Add device tree bindings documentation for it. Signed-off-by: Anatolij Gustschin diff --git a/Documentation/devicetree/bindings/misc/ifm-csi.txt b/Documentation/devicetree/bindings/misc/ifm-csi.txt new file mode 100644 index 0000000..5bdfffb --- /dev/null +++ b/Documentation/devicetree/bindings/misc/ifm-csi.txt @@ -0,0 +1,41 @@ +IFM camera sensor interface on mpc5200 LocalPlus bus + +Required properties: +- compatible: "ifm,o2d-csi" +- reg: specifies sensor chip select number and associated address range +- interrupts: external interrupt line number and interrupt sense mode + of the interrupt line signaling frame valid events +- gpios: three gpio-specifiers for "capture", "reset" and "master enable" + GPIOs (strictly in this order). +- ifm,csi-clk-handle: the phandle to a node in the DT describing the sensor + clock generator. This node is usually a general purpose timer controller. +- ifm,csi-addr-bus-width: address bus width (valid values are 16, 24, 25) +- ifm,csi-data-bus-width: data bus width (valid values are 8 and 16) +- ifm,csi-wait-cycles: sensor bus wait cycles + +Optional properties: +- ifm,csi-byte-swap: if this property is present, the byte swapping on + the bus will be enabled. + +Example: + + csi@3,0 { + compatible = "ifm,o2d-csi"; + reg = <3 0 0x00100000>; /* CS 3, 1 MiB range */ + interrupts = <1 1 2>; /* IRQ1, edge falling */ + + ifm,csi-clk-handle = <&timer7>; + gpios = <&gpio_simple 23 0 /* image_capture */ + &gpio_simple 26 0 /* image_reset */ + &gpio_simple 29 0>; /* image_master_en */ + + ifm,csi-addr-bus-width = <24>; + ifm,csi-data-bus-width = <8>; + ifm,csi-wait-cycles = <0>; + }; + +The base address of the used chip select is specified in the +ranges property of the parent localbus node, for example: + + ranges = <0 0 0xff000000 0x01000000 + 3 0 0xe3000000 0x00100000>; -- cgit v0.10.2 From 9d9fd8871b4136755160bf95a9abd0d23d738ddc Mon Sep 17 00:00:00 2001 From: Anatolij Gustschin Date: Wed, 29 Aug 2012 21:33:28 +0000 Subject: powerpc/mpc52xx_lpbfifo: optionally defer fifo transfer start Currently fifo transfer is started when submitting a transfer request. Add posibility to defer the fifo transfer and start it later by calling additional function. This change is backward compatible, the behaviour of mpc52xx_lpbfifo_submit() is the same for previous driver users, so there is no need to adapt them. Signed-off-by: Anatolij Gustschin diff --git a/arch/powerpc/include/asm/mpc52xx.h b/arch/powerpc/include/asm/mpc52xx.h index 1f41382..0acc7c7c 100644 --- a/arch/powerpc/include/asm/mpc52xx.h +++ b/arch/powerpc/include/asm/mpc52xx.h @@ -307,6 +307,7 @@ struct mpc52xx_lpbfifo_request { size_t size; size_t pos; /* current position of transfer */ int flags; + int defer_xfer_start; /* What to do when finished */ void (*callback)(struct mpc52xx_lpbfifo_request *); @@ -323,6 +324,7 @@ struct mpc52xx_lpbfifo_request { extern int mpc52xx_lpbfifo_submit(struct mpc52xx_lpbfifo_request *req); extern void mpc52xx_lpbfifo_abort(struct mpc52xx_lpbfifo_request *req); extern void mpc52xx_lpbfifo_poll(void); +extern int mpc52xx_lpbfifo_start_xfer(struct mpc52xx_lpbfifo_request *req); /* mpc52xx_pic.c */ extern void mpc52xx_init_irq(void); diff --git a/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c b/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c index d61fb1c..2351f9e 100644 --- a/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c +++ b/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c @@ -170,7 +170,8 @@ static void mpc52xx_lpbfifo_kick(struct mpc52xx_lpbfifo_request *req) out_be32(lpbfifo.regs + LPBFIFO_REG_CONTROL, bit_fields); /* Kick it off */ - out_8(lpbfifo.regs + LPBFIFO_REG_PACKET_SIZE, 0x01); + if (!lpbfifo.req->defer_xfer_start) + out_8(lpbfifo.regs + LPBFIFO_REG_PACKET_SIZE, 0x01); if (dma) bcom_enable(lpbfifo.bcom_cur_task); } @@ -421,6 +422,38 @@ int mpc52xx_lpbfifo_submit(struct mpc52xx_lpbfifo_request *req) } EXPORT_SYMBOL(mpc52xx_lpbfifo_submit); +int mpc52xx_lpbfifo_start_xfer(struct mpc52xx_lpbfifo_request *req) +{ + unsigned long flags; + + if (!lpbfifo.regs) + return -ENODEV; + + spin_lock_irqsave(&lpbfifo.lock, flags); + + /* + * If the req pointer is already set and a transfer was + * started on submit, then this transfer is in progress + */ + if (lpbfifo.req && !lpbfifo.req->defer_xfer_start) { + spin_unlock_irqrestore(&lpbfifo.lock, flags); + return -EBUSY; + } + + /* + * If the req was previously submitted but not + * started, start it now + */ + if (lpbfifo.req && lpbfifo.req == req && + lpbfifo.req->defer_xfer_start) { + out_8(lpbfifo.regs + LPBFIFO_REG_PACKET_SIZE, 0x01); + } + + spin_unlock_irqrestore(&lpbfifo.lock, flags); + return 0; +} +EXPORT_SYMBOL(mpc52xx_lpbfifo_start_xfer); + void mpc52xx_lpbfifo_abort(struct mpc52xx_lpbfifo_request *req) { unsigned long flags; -- cgit v0.10.2 From b92a66a65cb4480774066d9a3080c77eb34b7232 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Mon, 10 Sep 2012 00:35:26 +0000 Subject: powerpc: Add denormalisation exception handling for POWER6/7 On POWER6 and POWER7 if the input operand to an instruction is a denormalised single precision binary floating point value we can take a denormalisation exception where it's expected that the hypervisor (HV=1) will fix up the inputs before the instruction is run. This adds code to handle this denormalisation exception for POWER6 and POWER7. It also add a CONFIG_PPC_DENORMALISATION option and sets it in pseries/ppc64_defconfig. This is useful on bare metal systems only. Based on patch from Milton Miller. Signed-off-by: Michael Neuling Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 98e513b..748ccaa 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -560,6 +560,14 @@ config SCHED_SMT when dealing with POWER5 cpus at a cost of slightly increased overhead in some places. If unsure say N here. +config PPC_DENORMALISATION + bool "PowerPC denormalisation exception handling" + depends on PPC_BOOK3S_64 + default "n" + ---help--- + Add support for handling denormalisation of single precision + values. Useful for bare metal only. If unsure say Y here. + config CMDLINE_BOOL bool "Default bootloader kernel arguments" diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig index db27c82..e263e6a 100644 --- a/arch/powerpc/configs/ppc64_defconfig +++ b/arch/powerpc/configs/ppc64_defconfig @@ -51,6 +51,7 @@ CONFIG_KEXEC=y CONFIG_IRQ_ALL_CPUS=y CONFIG_MEMORY_HOTREMOVE=y CONFIG_SCHED_SMT=y +CONFIG_PPC_DENORMALISATION=y CONFIG_PCCARD=y CONFIG_ELECTRA_CF=y CONFIG_HOTPLUG_PCI=m diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig index 1f65b3c9..c169dfb 100644 --- a/arch/powerpc/configs/pseries_defconfig +++ b/arch/powerpc/configs/pseries_defconfig @@ -48,6 +48,7 @@ CONFIG_MEMORY_HOTREMOVE=y CONFIG_PPC_64K_PAGES=y CONFIG_PPC_SUBPAGE_PROT=y CONFIG_SCHED_SMT=y +CONFIG_PPC_DENORMALISATION=y CONFIG_HOTPLUG_PCI=m CONFIG_HOTPLUG_PCI_RPA=m CONFIG_HOTPLUG_PCI_RPA_DLPAR=m diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index 4c25319..5f73ce6 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -126,6 +126,7 @@ #define PPC_INST_TLBIVAX 0x7c000624 #define PPC_INST_TLBSRX_DOT 0x7c0006a5 #define PPC_INST_XXLOR 0xf0000510 +#define PPC_INST_XVCPSGNDP 0xf0000780 #define PPC_INST_NAP 0x4c000364 #define PPC_INST_SLEEP 0x4c0003a4 @@ -277,6 +278,8 @@ VSX_XX1((s), a, b)) #define XXLOR(t, a, b) stringify_in_c(.long PPC_INST_XXLOR | \ VSX_XX3((t), a, b)) +#define XVCPSGNDP(t, a, b) stringify_in_c(.long (PPC_INST_XVCPSGNDP | \ + VSX_XX3((t), (a), (b)))) #define PPC_NAP stringify_in_c(.long PPC_INST_NAP) #define PPC_SLEEP stringify_in_c(.long PPC_INST_SLEEP) diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 121a90b..a1096fb 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -524,6 +524,7 @@ #define SPRN_HSRR0 0x13A /* Save/Restore Register 0 */ #define SPRN_HSRR1 0x13B /* Save/Restore Register 1 */ +#define HSRR1_DENORM 0x00100000 /* Denorm exception */ #define SPRN_TBCTL 0x35f /* PA6T Timebase control register */ #define TBCTL_FREEZE 0x0000000000000000ull /* Freeze all tbs */ diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 39aa97d..5eb0056 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -275,6 +275,31 @@ vsx_unavailable_pSeries_1: STD_EXCEPTION_PSERIES(0x1300, 0x1300, instruction_breakpoint) KVM_HANDLER_PR_SKIP(PACA_EXGEN, EXC_STD, 0x1300) + . = 0x1500 + .global denorm_Hypervisor +denorm_exception_hv: + HMT_MEDIUM + mtspr SPRN_SPRG_HSCRATCH0,r13 + mfspr r13,SPRN_SPRG_HPACA + std r9,PACA_EXGEN+EX_R9(r13) + std r10,PACA_EXGEN+EX_R10(r13) + std r11,PACA_EXGEN+EX_R11(r13) + std r12,PACA_EXGEN+EX_R12(r13) + mfspr r9,SPRN_SPRG_HSCRATCH0 + std r9,PACA_EXGEN+EX_R13(r13) + mfcr r9 + +#ifdef CONFIG_PPC_DENORMALISATION + mfspr r10,SPRN_HSRR1 + mfspr r11,SPRN_HSRR0 /* save HSRR0 */ + andis. r10,r10,(HSRR1_DENORM)@h /* denorm? */ + addi r11,r11,-4 /* HSRR0 is next instruction */ + bne+ denorm_assist +#endif + + EXCEPTION_PROLOG_PSERIES_1(denorm_common, EXC_HV) + KVM_HANDLER_SKIP(PACA_EXGEN, EXC_STD, 0x1500) + #ifdef CONFIG_CBE_RAS STD_EXCEPTION_HV(0x1600, 0x1602, cbe_maintenance) KVM_HANDLER_SKIP(PACA_EXGEN, EXC_HV, 0x1602) @@ -336,6 +361,103 @@ do_stab_bolted_pSeries: KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0x900) KVM_HANDLER(PACA_EXGEN, EXC_HV, 0x982) +#ifdef CONFIG_PPC_DENORMALISATION +denorm_assist: +BEGIN_FTR_SECTION +/* + * To denormalise we need to move a copy of the register to itself. + * For POWER6 do that here for all FP regs. + */ + mfmsr r10 + ori r10,r10,(MSR_FP|MSR_FE0|MSR_FE1) + xori r10,r10,(MSR_FE0|MSR_FE1) + mtmsrd r10 + sync + fmr 0,0 + fmr 1,1 + fmr 2,2 + fmr 3,3 + fmr 4,4 + fmr 5,5 + fmr 6,6 + fmr 7,7 + fmr 8,8 + fmr 9,9 + fmr 10,10 + fmr 11,11 + fmr 12,12 + fmr 13,13 + fmr 14,14 + fmr 15,15 + fmr 16,16 + fmr 17,17 + fmr 18,18 + fmr 19,19 + fmr 20,20 + fmr 21,21 + fmr 22,22 + fmr 23,23 + fmr 24,24 + fmr 25,25 + fmr 26,26 + fmr 27,27 + fmr 28,28 + fmr 29,29 + fmr 30,30 + fmr 31,31 +FTR_SECTION_ELSE +/* + * To denormalise we need to move a copy of the register to itself. + * For POWER7 do that here for the first 32 VSX registers only. + */ + mfmsr r10 + oris r10,r10,MSR_VSX@h + mtmsrd r10 + sync + XVCPSGNDP(0,0,0) + XVCPSGNDP(1,1,1) + XVCPSGNDP(2,2,2) + XVCPSGNDP(3,3,3) + XVCPSGNDP(4,4,4) + XVCPSGNDP(5,5,5) + XVCPSGNDP(6,6,6) + XVCPSGNDP(7,7,7) + XVCPSGNDP(8,8,8) + XVCPSGNDP(9,9,9) + XVCPSGNDP(10,10,10) + XVCPSGNDP(11,11,11) + XVCPSGNDP(12,12,12) + XVCPSGNDP(13,13,13) + XVCPSGNDP(14,14,14) + XVCPSGNDP(15,15,15) + XVCPSGNDP(16,16,16) + XVCPSGNDP(17,17,17) + XVCPSGNDP(18,18,18) + XVCPSGNDP(19,19,19) + XVCPSGNDP(20,20,20) + XVCPSGNDP(21,21,21) + XVCPSGNDP(22,22,22) + XVCPSGNDP(23,23,23) + XVCPSGNDP(24,24,24) + XVCPSGNDP(25,25,25) + XVCPSGNDP(26,26,26) + XVCPSGNDP(27,27,27) + XVCPSGNDP(28,28,28) + XVCPSGNDP(29,29,29) + XVCPSGNDP(30,30,30) + XVCPSGNDP(31,31,31) +ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_206) + mtspr SPRN_HSRR0,r11 + mtcrf 0x80,r9 + ld r9,PACA_EXGEN+EX_R9(r13) + ld r10,PACA_EXGEN+EX_R10(r13) + ld r11,PACA_EXGEN+EX_R11(r13) + ld r12,PACA_EXGEN+EX_R12(r13) + ld r13,PACA_EXGEN+EX_R13(r13) + HRFID + b . +#endif + .align 7 /* moved from 0xe00 */ STD_EXCEPTION_HV(., 0xe02, h_data_storage) @@ -495,6 +617,7 @@ machine_check_common: STD_EXCEPTION_COMMON(0xe60, hmi_exception, .unknown_exception) STD_EXCEPTION_COMMON_ASYNC(0xf00, performance_monitor, .performance_monitor_exception) STD_EXCEPTION_COMMON(0x1300, instruction_breakpoint, .instruction_breakpoint_exception) + STD_EXCEPTION_COMMON(0x1502, denorm, .unknown_exception) #ifdef CONFIG_ALTIVEC STD_EXCEPTION_COMMON(0x1700, altivec_assist, .altivec_assist_exception) #else -- cgit v0.10.2 From f3d3444572dbf01d36884f9b3fe69a611420ad37 Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Wed, 12 Sep 2012 13:00:09 +0000 Subject: powerpc/mm: Fix typo in PTRS_PER_PUD PTRS_PER_PUD should be based on PUD_INDEX_SIZE, not PMD_INDEX_SIZE. We got away with it because PUD and PMD had the same index size, but this is no longer true with Aneesh's patchset to support a 46-bit user effective address space. Signed-off-by: Scott Wood Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/include/asm/pgtable-ppc64-4k.h b/arch/powerpc/include/asm/pgtable-ppc64-4k.h index 6eefdcf..d6489a2 100644 --- a/arch/powerpc/include/asm/pgtable-ppc64-4k.h +++ b/arch/powerpc/include/asm/pgtable-ppc64-4k.h @@ -19,7 +19,7 @@ #define PTRS_PER_PTE (1 << PTE_INDEX_SIZE) #define PTRS_PER_PMD (1 << PMD_INDEX_SIZE) -#define PTRS_PER_PUD (1 << PMD_INDEX_SIZE) +#define PTRS_PER_PUD (1 << PUD_INDEX_SIZE) #define PTRS_PER_PGD (1 << PGD_INDEX_SIZE) /* PMD_SHIFT determines what a second-level page table entry can map */ -- cgit v0.10.2 From f03839236392f5bc6107af5c9f749e8a48cd616c Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Mon, 10 Sep 2012 02:52:47 +0000 Subject: powerpc/mm: Replace open coded CONTEXT_BITS value To clarify the meaning for future readers, replace the open coded 19 with CONTEXT_BITS Reviewed-by: Paul Mackerras Signed-off-by: Aneesh Kumar K.V Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/mm/mmu_context_hash64.c b/arch/powerpc/mm/mmu_context_hash64.c index 40677aa..daa076c 100644 --- a/arch/powerpc/mm/mmu_context_hash64.c +++ b/arch/powerpc/mm/mmu_context_hash64.c @@ -34,7 +34,7 @@ static DEFINE_IDA(mmu_context_ida); * Each segment contains 2^28 bytes. Each context maps 2^44 bytes, * so we can support 2^19-1 contexts (19 == 35 + 28 - 44). */ -#define MAX_CONTEXT ((1UL << 19) - 1) +#define MAX_CONTEXT ((1UL << CONTEXT_BITS) - 1) int __init_new_context(void) { -- cgit v0.10.2 From f6412b742ac754b4bbf3acaa9c05f825505671a1 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Mon, 10 Sep 2012 02:52:48 +0000 Subject: powerpc/mm: Use hpt_va to compute virtual address Don't open code the same Reviewed-by: Paul Mackerras Signed-off-by: Aneesh Kumar K.V Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/platforms/cell/beat_htab.c b/arch/powerpc/platforms/cell/beat_htab.c index 943c9d3..b83077e 100644 --- a/arch/powerpc/platforms/cell/beat_htab.c +++ b/arch/powerpc/platforms/cell/beat_htab.c @@ -259,7 +259,7 @@ static void beat_lpar_hpte_updateboltedpp(unsigned long newpp, u64 dummy0, dummy1; vsid = get_kernel_vsid(ea, MMU_SEGSIZE_256M); - va = (vsid << 28) | (ea & 0x0fffffff); + va = hpt_va(ea, vsid, MMU_SEGSIZE_256M); raw_spin_lock(&beat_htab_lock); slot = beat_lpar_hpte_find(va, psize); -- cgit v0.10.2 From dcda287a9b26309ae43a091d0ecde16f8f61b4c0 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Mon, 10 Sep 2012 02:52:49 +0000 Subject: powerpc/mm: Simplify hpte_decode This patch simplify hpte_decode for easy switching of virtual address to virtual page number in the later patch Reviewed-by: Paul Mackerras Signed-off-by: Aneesh Kumar K.V Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index f21e8ce..ebf685a 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -351,9 +351,10 @@ static void native_hpte_invalidate(unsigned long slot, unsigned long va, static void hpte_decode(struct hash_pte *hpte, unsigned long slot, int *psize, int *ssize, unsigned long *va) { + unsigned long avpn, pteg, vpi; unsigned long hpte_r = hpte->r; unsigned long hpte_v = hpte->v; - unsigned long avpn; + unsigned long vsid, seg_off; int i, size, shift, penc; if (!(hpte_v & HPTE_V_LARGE)) @@ -380,32 +381,38 @@ static void hpte_decode(struct hash_pte *hpte, unsigned long slot, } /* This works for all page sizes, and for 256M and 1T segments */ + *ssize = hpte_v >> HPTE_V_SSIZE_SHIFT; shift = mmu_psize_defs[size].shift; - avpn = (HPTE_V_AVPN_VAL(hpte_v) & ~mmu_psize_defs[size].avpnm) << 23; - - if (shift < 23) { - unsigned long vpi, vsid, pteg; - pteg = slot / HPTES_PER_GROUP; - if (hpte_v & HPTE_V_SECONDARY) - pteg = ~pteg; - switch (hpte_v >> HPTE_V_SSIZE_SHIFT) { - case MMU_SEGSIZE_256M: - vpi = ((avpn >> 28) ^ pteg) & htab_hash_mask; - break; - case MMU_SEGSIZE_1T: - vsid = avpn >> 40; + avpn = (HPTE_V_AVPN_VAL(hpte_v) & ~mmu_psize_defs[size].avpnm); + pteg = slot / HPTES_PER_GROUP; + if (hpte_v & HPTE_V_SECONDARY) + pteg = ~pteg; + + switch (*ssize) { + case MMU_SEGSIZE_256M: + /* We only have 28 - 23 bits of seg_off in avpn */ + seg_off = (avpn & 0x1f) << 23; + vsid = avpn >> 5; + /* We can find more bits from the pteg value */ + if (shift < 23) { + vpi = (vsid ^ pteg) & htab_hash_mask; + seg_off |= vpi << shift; + } + *va = vsid << SID_SHIFT | seg_off; + case MMU_SEGSIZE_1T: + /* We only have 40 - 23 bits of seg_off in avpn */ + seg_off = (avpn & 0x1ffff) << 23; + vsid = avpn >> 17; + if (shift < 23) { vpi = (vsid ^ (vsid << 25) ^ pteg) & htab_hash_mask; - break; - default: - avpn = vpi = size = 0; + seg_off |= vpi << shift; } - avpn |= (vpi << mmu_psize_defs[size].shift); + *va = vsid << SID_SHIFT_1T | seg_off; + default: + *va = size = 0; } - - *va = avpn; *psize = size; - *ssize = hpte_v >> HPTE_V_SSIZE_SHIFT; } /* -- cgit v0.10.2 From 5524a27d39b68770f203d8d42eb5a95dde4933bc Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Mon, 10 Sep 2012 02:52:50 +0000 Subject: powerpc/mm: Convert virtual address to vpn This patch convert different functions to take virtual page number instead of virtual address. Virtual page number is virtual address shifted right by VPN_SHIFT (12) bits. This enable us to have an address range of upto 76 bits. Reviewed-by: Paul Mackerras Signed-off-by: Aneesh Kumar K.V Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index f0e0c6a..7aefdb3 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -59,7 +59,7 @@ struct hpte_cache { struct hlist_node list_vpte; struct hlist_node list_vpte_long; struct rcu_head rcu_head; - u64 host_va; + u64 host_vpn; u64 pfn; ulong slot; struct kvmppc_pte pte; diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index 8111e1b..c423197 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -34,19 +34,19 @@ struct machdep_calls { char *name; #ifdef CONFIG_PPC64 void (*hpte_invalidate)(unsigned long slot, - unsigned long va, + unsigned long vpn, int psize, int ssize, int local); long (*hpte_updatepp)(unsigned long slot, unsigned long newpp, - unsigned long va, + unsigned long vpn, int psize, int ssize, int local); void (*hpte_updateboltedpp)(unsigned long newpp, unsigned long ea, int psize, int ssize); long (*hpte_insert)(unsigned long hpte_group, - unsigned long va, + unsigned long vpn, unsigned long prpn, unsigned long rflags, unsigned long vflags, diff --git a/arch/powerpc/include/asm/mmu-hash64.h b/arch/powerpc/include/asm/mmu-hash64.h index 1c65a59..6aeb498 100644 --- a/arch/powerpc/include/asm/mmu-hash64.h +++ b/arch/powerpc/include/asm/mmu-hash64.h @@ -154,9 +154,25 @@ struct mmu_psize_def #define MMU_SEGSIZE_256M 0 #define MMU_SEGSIZE_1T 1 +/* + * encode page number shift. + * in order to fit the 78 bit va in a 64 bit variable we shift the va by + * 12 bits. This enable us to address upto 76 bit va. + * For hpt hash from a va we can ignore the page size bits of va and for + * hpte encoding we ignore up to 23 bits of va. So ignoring lower 12 bits ensure + * we work in all cases including 4k page size. + */ +#define VPN_SHIFT 12 #ifndef __ASSEMBLY__ +static inline int segment_shift(int ssize) +{ + if (ssize == MMU_SEGSIZE_256M) + return SID_SHIFT; + return SID_SHIFT_1T; +} + /* * The current system page and segment sizes */ @@ -180,18 +196,39 @@ extern unsigned long tce_alloc_start, tce_alloc_end; extern int mmu_ci_restrictions; /* + * This computes the AVPN and B fields of the first dword of a HPTE, + * for use when we want to match an existing PTE. The bottom 7 bits + * of the returned value are zero. + */ +static inline unsigned long hpte_encode_avpn(unsigned long vpn, int psize, + int ssize) +{ + unsigned long v; + /* + * The AVA field omits the low-order 23 bits of the 78 bits VA. + * These bits are not needed in the PTE, because the + * low-order b of these bits are part of the byte offset + * into the virtual page and, if b < 23, the high-order + * 23-b of these bits are always used in selecting the + * PTEGs to be searched + */ + v = (vpn >> (23 - VPN_SHIFT)) & ~(mmu_psize_defs[psize].avpnm); + v <<= HPTE_V_AVPN_SHIFT; + v |= ((unsigned long) ssize) << HPTE_V_SSIZE_SHIFT; + return v; +} + +/* * This function sets the AVPN and L fields of the HPTE appropriately * for the page size */ -static inline unsigned long hpte_encode_v(unsigned long va, int psize, - int ssize) +static inline unsigned long hpte_encode_v(unsigned long vpn, + int psize, int ssize) { unsigned long v; - v = (va >> 23) & ~(mmu_psize_defs[psize].avpnm); - v <<= HPTE_V_AVPN_SHIFT; + v = hpte_encode_avpn(vpn, psize, ssize); if (psize != MMU_PAGE_4K) v |= HPTE_V_LARGE; - v |= ((unsigned long) ssize) << HPTE_V_SSIZE_SHIFT; return v; } @@ -216,30 +253,37 @@ static inline unsigned long hpte_encode_r(unsigned long pa, int psize) } /* - * Build a VA given VSID, EA and segment size + * Build a VPN_SHIFT bit shifted va given VSID, EA and segment size. */ -static inline unsigned long hpt_va(unsigned long ea, unsigned long vsid, - int ssize) +static inline unsigned long hpt_vpn(unsigned long ea, + unsigned long vsid, int ssize) { - if (ssize == MMU_SEGSIZE_256M) - return (vsid << 28) | (ea & 0xfffffffUL); - return (vsid << 40) | (ea & 0xffffffffffUL); + unsigned long mask; + int s_shift = segment_shift(ssize); + + mask = (1ul << (s_shift - VPN_SHIFT)) - 1; + return (vsid << (s_shift - VPN_SHIFT)) | ((ea >> VPN_SHIFT) & mask); } /* * This hashes a virtual address */ - -static inline unsigned long hpt_hash(unsigned long va, unsigned int shift, - int ssize) +static inline unsigned long hpt_hash(unsigned long vpn, + unsigned int shift, int ssize) { + int mask; unsigned long hash, vsid; + /* VPN_SHIFT can be atmost 12 */ if (ssize == MMU_SEGSIZE_256M) { - hash = (va >> 28) ^ ((va & 0x0fffffffUL) >> shift); + mask = (1ul << (SID_SHIFT - VPN_SHIFT)) - 1; + hash = (vpn >> (SID_SHIFT - VPN_SHIFT)) ^ + ((vpn & mask) >> (shift - VPN_SHIFT)); } else { - vsid = va >> 40; - hash = vsid ^ (vsid << 25) ^ ((va & 0xffffffffffUL) >> shift); + mask = (1ul << (SID_SHIFT_1T - VPN_SHIFT)) - 1; + vsid = vpn >> (SID_SHIFT_1T - VPN_SHIFT); + hash = vsid ^ (vsid << 25) ^ + ((vpn & mask) >> (shift - VPN_SHIFT)) ; } return hash & 0x7fffffffffUL; } diff --git a/arch/powerpc/include/asm/pte-hash64-64k.h b/arch/powerpc/include/asm/pte-hash64-64k.h index 59247e8..eedf427c9 100644 --- a/arch/powerpc/include/asm/pte-hash64-64k.h +++ b/arch/powerpc/include/asm/pte-hash64-64k.h @@ -58,14 +58,16 @@ /* Trick: we set __end to va + 64k, which happens works for * a 16M page as well as we want only one iteration */ -#define pte_iterate_hashed_subpages(rpte, psize, va, index, shift) \ - do { \ - unsigned long __end = va + PAGE_SIZE; \ - unsigned __split = (psize == MMU_PAGE_4K || \ - psize == MMU_PAGE_64K_AP); \ - shift = mmu_psize_defs[psize].shift; \ - for (index = 0; va < __end; index++, va += (1L << shift)) { \ - if (!__split || __rpte_sub_valid(rpte, index)) do { \ +#define pte_iterate_hashed_subpages(rpte, psize, vpn, index, shift) \ + do { \ + unsigned long __end = vpn + (1UL << (PAGE_SHIFT - VPN_SHIFT)); \ + unsigned __split = (psize == MMU_PAGE_4K || \ + psize == MMU_PAGE_64K_AP); \ + shift = mmu_psize_defs[psize].shift; \ + for (index = 0; vpn < __end; index++, \ + vpn += (1L << (shift - VPN_SHIFT))) { \ + if (!__split || __rpte_sub_valid(rpte, index)) \ + do { #define pte_iterate_hashed_end() } while(0); } } while(0) diff --git a/arch/powerpc/include/asm/tlbflush.h b/arch/powerpc/include/asm/tlbflush.h index 81143fc..fc02d1d 100644 --- a/arch/powerpc/include/asm/tlbflush.h +++ b/arch/powerpc/include/asm/tlbflush.h @@ -95,7 +95,7 @@ struct ppc64_tlb_batch { unsigned long index; struct mm_struct *mm; real_pte_t pte[PPC64_TLB_BATCH_NR]; - unsigned long vaddr[PPC64_TLB_BATCH_NR]; + unsigned long vpn[PPC64_TLB_BATCH_NR]; unsigned int psize; int ssize; }; @@ -127,7 +127,7 @@ static inline void arch_leave_lazy_mmu_mode(void) #define arch_flush_lazy_mmu_mode() do {} while (0) -extern void flush_hash_page(unsigned long va, real_pte_t pte, int psize, +extern void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize, int ssize, int local); extern void flush_hash_range(unsigned long number, int local); diff --git a/arch/powerpc/kvm/book3s_32_mmu_host.c b/arch/powerpc/kvm/book3s_32_mmu_host.c index 837f13e..00aa612 100644 --- a/arch/powerpc/kvm/book3s_32_mmu_host.c +++ b/arch/powerpc/kvm/book3s_32_mmu_host.c @@ -141,7 +141,7 @@ extern char etext[]; int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte) { pfn_t hpaddr; - u64 va; + u64 vpn; u64 vsid; struct kvmppc_sid_map *map; volatile u32 *pteg; @@ -173,7 +173,7 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte) BUG_ON(!map); vsid = map->host_vsid; - va = (vsid << SID_SHIFT) | (eaddr & ~ESID_MASK); + vpn = (vsid << (SID_SHIFT - VPN_SHIFT)) | ((eaddr & ~ESID_MASK) >> VPN_SHIFT) next_pteg: if (rr == 16) { @@ -244,11 +244,11 @@ next_pteg: dprintk_mmu("KVM: %c%c Map 0x%llx: [%lx] 0x%llx (0x%llx) -> %lx\n", orig_pte->may_write ? 'w' : '-', orig_pte->may_execute ? 'x' : '-', - orig_pte->eaddr, (ulong)pteg, va, + orig_pte->eaddr, (ulong)pteg, vpn, orig_pte->vpage, hpaddr); pte->slot = (ulong)&pteg[rr]; - pte->host_va = va; + pte->host_vpn = vpn; pte->pte = *orig_pte; pte->pfn = hpaddr >> PAGE_SHIFT; diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c index 0688b6b..4d72f9e 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_host.c +++ b/arch/powerpc/kvm/book3s_64_mmu_host.c @@ -33,7 +33,7 @@ void kvmppc_mmu_invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte) { - ppc_md.hpte_invalidate(pte->slot, pte->host_va, + ppc_md.hpte_invalidate(pte->slot, pte->host_vpn, MMU_PAGE_4K, MMU_SEGSIZE_256M, false); } @@ -80,8 +80,9 @@ static struct kvmppc_sid_map *find_sid_vsid(struct kvm_vcpu *vcpu, u64 gvsid) int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte) { + unsigned long vpn; pfn_t hpaddr; - ulong hash, hpteg, va; + ulong hash, hpteg; u64 vsid; int ret; int rflags = 0x192; @@ -117,7 +118,7 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte) } vsid = map->host_vsid; - va = hpt_va(orig_pte->eaddr, vsid, MMU_SEGSIZE_256M); + vpn = hpt_vpn(orig_pte->eaddr, vsid, MMU_SEGSIZE_256M); if (!orig_pte->may_write) rflags |= HPTE_R_PP; @@ -129,7 +130,7 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte) else kvmppc_mmu_flush_icache(hpaddr >> PAGE_SHIFT); - hash = hpt_hash(va, PTE_SIZE, MMU_SEGSIZE_256M); + hash = hpt_hash(vpn, PTE_SIZE, MMU_SEGSIZE_256M); map_again: hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP); @@ -141,7 +142,8 @@ map_again: goto out; } - ret = ppc_md.hpte_insert(hpteg, va, hpaddr, rflags, vflags, MMU_PAGE_4K, MMU_SEGSIZE_256M); + ret = ppc_md.hpte_insert(hpteg, vpn, hpaddr, rflags, vflags, + MMU_PAGE_4K, MMU_SEGSIZE_256M); if (ret < 0) { /* If we couldn't map a primary PTE, try a secondary */ @@ -152,7 +154,8 @@ map_again: } else { struct hpte_cache *pte = kvmppc_mmu_hpte_cache_next(vcpu); - trace_kvm_book3s_64_mmu_map(rflags, hpteg, va, hpaddr, orig_pte); + trace_kvm_book3s_64_mmu_map(rflags, hpteg, + vpn, hpaddr, orig_pte); /* The ppc_md code may give us a secondary entry even though we asked for a primary. Fix up. */ @@ -162,7 +165,7 @@ map_again: } pte->slot = hpteg + (ret & 7); - pte->host_va = va; + pte->host_vpn = vpn; pte->pte = *orig_pte; pte->pfn = hpaddr >> PAGE_SHIFT; diff --git a/arch/powerpc/kvm/trace.h b/arch/powerpc/kvm/trace.h index 877186b..ddb6a21 100644 --- a/arch/powerpc/kvm/trace.h +++ b/arch/powerpc/kvm/trace.h @@ -189,7 +189,7 @@ TRACE_EVENT(kvm_book3s_mmu_map, TP_ARGS(pte), TP_STRUCT__entry( - __field( u64, host_va ) + __field( u64, host_vpn ) __field( u64, pfn ) __field( ulong, eaddr ) __field( u64, vpage ) @@ -198,7 +198,7 @@ TRACE_EVENT(kvm_book3s_mmu_map, ), TP_fast_assign( - __entry->host_va = pte->host_va; + __entry->host_vpn = pte->host_vpn; __entry->pfn = pte->pfn; __entry->eaddr = pte->pte.eaddr; __entry->vpage = pte->pte.vpage; @@ -208,8 +208,8 @@ TRACE_EVENT(kvm_book3s_mmu_map, (pte->pte.may_execute ? 0x1 : 0); ), - TP_printk("Map: hva=%llx pfn=%llx ea=%lx vp=%llx ra=%lx [%x]", - __entry->host_va, __entry->pfn, __entry->eaddr, + TP_printk("Map: hvpn=%llx pfn=%llx ea=%lx vp=%llx ra=%lx [%x]", + __entry->host_vpn, __entry->pfn, __entry->eaddr, __entry->vpage, __entry->raddr, __entry->flags) ); @@ -218,7 +218,7 @@ TRACE_EVENT(kvm_book3s_mmu_invalidate, TP_ARGS(pte), TP_STRUCT__entry( - __field( u64, host_va ) + __field( u64, host_vpn ) __field( u64, pfn ) __field( ulong, eaddr ) __field( u64, vpage ) @@ -227,7 +227,7 @@ TRACE_EVENT(kvm_book3s_mmu_invalidate, ), TP_fast_assign( - __entry->host_va = pte->host_va; + __entry->host_vpn = pte->host_vpn; __entry->pfn = pte->pfn; __entry->eaddr = pte->pte.eaddr; __entry->vpage = pte->pte.vpage; @@ -238,7 +238,7 @@ TRACE_EVENT(kvm_book3s_mmu_invalidate, ), TP_printk("Flush: hva=%llx pfn=%llx ea=%lx vp=%llx ra=%lx [%x]", - __entry->host_va, __entry->pfn, __entry->eaddr, + __entry->host_vpn, __entry->pfn, __entry->eaddr, __entry->vpage, __entry->raddr, __entry->flags) ); diff --git a/arch/powerpc/mm/hash_low_64.S b/arch/powerpc/mm/hash_low_64.S index 602aeb0..5658508 100644 --- a/arch/powerpc/mm/hash_low_64.S +++ b/arch/powerpc/mm/hash_low_64.S @@ -63,7 +63,7 @@ _GLOBAL(__hash_page_4K) /* Save non-volatile registers. * r31 will hold "old PTE" * r30 is "new PTE" - * r29 is "va" + * r29 is vpn * r28 is a hash value * r27 is hashtab mask (maybe dynamic patched instead ?) */ @@ -111,10 +111,10 @@ BEGIN_FTR_SECTION cmpdi r9,0 /* check segment size */ bne 3f END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT) - /* Calc va and put it in r29 */ - rldicr r29,r5,28,63-28 - rldicl r3,r3,0,36 - or r29,r3,r29 + /* Calc vpn and put it in r29 */ + sldi r29,r5,SID_SHIFT - VPN_SHIFT + rldicl r28,r3,64 - VPN_SHIFT,64 - (SID_SHIFT - VPN_SHIFT) + or r29,r28,r29 /* Calculate hash value for primary slot and store it in r28 */ rldicl r5,r5,0,25 /* vsid & 0x0000007fffffffff */ @@ -122,14 +122,19 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT) xor r28,r5,r0 b 4f -3: /* Calc VA and hash in r29 and r28 for 1T segment */ - sldi r29,r5,40 /* vsid << 40 */ - clrldi r3,r3,24 /* ea & 0xffffffffff */ +3: /* Calc vpn and put it in r29 */ + sldi r29,r5,SID_SHIFT_1T - VPN_SHIFT + rldicl r28,r3,64 - VPN_SHIFT,64 - (SID_SHIFT_1T - VPN_SHIFT) + or r29,r28,r29 + + /* + * calculate hash value for primary slot and + * store it in r28 for 1T segment + */ rldic r28,r5,25,25 /* (vsid << 25) & 0x7fffffffff */ clrldi r5,r5,40 /* vsid & 0xffffff */ rldicl r0,r3,64-12,36 /* (ea >> 12) & 0xfffffff */ xor r28,r28,r5 - or r29,r3,r29 /* VA */ xor r28,r28,r0 /* hash */ /* Convert linux PTE bits into HW equivalents */ @@ -185,7 +190,7 @@ htab_insert_pte: /* Call ppc_md.hpte_insert */ ld r6,STK_PARAM(R4)(r1) /* Retrieve new pp bits */ - mr r4,r29 /* Retrieve va */ + mr r4,r29 /* Retrieve vpn */ li r7,0 /* !bolted, !secondary */ li r8,MMU_PAGE_4K /* page size */ ld r9,STK_PARAM(R9)(r1) /* segment size */ @@ -208,7 +213,7 @@ _GLOBAL(htab_call_hpte_insert1) /* Call ppc_md.hpte_insert */ ld r6,STK_PARAM(R4)(r1) /* Retrieve new pp bits */ - mr r4,r29 /* Retrieve va */ + mr r4,r29 /* Retrieve vpn */ li r7,HPTE_V_SECONDARY /* !bolted, secondary */ li r8,MMU_PAGE_4K /* page size */ ld r9,STK_PARAM(R9)(r1) /* segment size */ @@ -278,7 +283,7 @@ htab_modify_pte: add r3,r0,r3 /* add slot idx */ /* Call ppc_md.hpte_updatepp */ - mr r5,r29 /* va */ + mr r5,r29 /* vpn */ li r6,MMU_PAGE_4K /* page size */ ld r7,STK_PARAM(R9)(r1) /* segment size */ ld r8,STK_PARAM(R8)(r1) /* get "local" param */ @@ -339,7 +344,7 @@ _GLOBAL(__hash_page_4K) /* Save non-volatile registers. * r31 will hold "old PTE" * r30 is "new PTE" - * r29 is "va" + * r29 is vpn * r28 is a hash value * r27 is hashtab mask (maybe dynamic patched instead ?) * r26 is the hidx mask @@ -394,10 +399,14 @@ BEGIN_FTR_SECTION cmpdi r9,0 /* check segment size */ bne 3f END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT) - /* Calc va and put it in r29 */ - rldicr r29,r5,28,63-28 /* r29 = (vsid << 28) */ - rldicl r3,r3,0,36 /* r3 = (ea & 0x0fffffff) */ - or r29,r3,r29 /* r29 = va */ + /* Calc vpn and put it in r29 */ + sldi r29,r5,SID_SHIFT - VPN_SHIFT + /* + * clrldi r3,r3,64 - SID_SHIFT --> ea & 0xfffffff + * srdi r28,r3,VPN_SHIFT + */ + rldicl r28,r3,64 - VPN_SHIFT,64 - (SID_SHIFT - VPN_SHIFT) + or r29,r28,r29 /* Calculate hash value for primary slot and store it in r28 */ rldicl r5,r5,0,25 /* vsid & 0x0000007fffffffff */ @@ -405,14 +414,23 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT) xor r28,r5,r0 b 4f -3: /* Calc VA and hash in r29 and r28 for 1T segment */ - sldi r29,r5,40 /* vsid << 40 */ - clrldi r3,r3,24 /* ea & 0xffffffffff */ +3: /* Calc vpn and put it in r29 */ + sldi r29,r5,SID_SHIFT_1T - VPN_SHIFT + /* + * clrldi r3,r3,64 - SID_SHIFT_1T --> ea & 0xffffffffff + * srdi r28,r3,VPN_SHIFT + */ + rldicl r28,r3,64 - VPN_SHIFT,64 - (SID_SHIFT_1T - VPN_SHIFT) + or r29,r28,r29 + + /* + * Calculate hash value for primary slot and + * store it in r28 for 1T segment + */ rldic r28,r5,25,25 /* (vsid << 25) & 0x7fffffffff */ clrldi r5,r5,40 /* vsid & 0xffffff */ rldicl r0,r3,64-12,36 /* (ea >> 12) & 0xfffffff */ xor r28,r28,r5 - or r29,r3,r29 /* VA */ xor r28,r28,r0 /* hash */ /* Convert linux PTE bits into HW equivalents */ @@ -488,7 +506,7 @@ htab_special_pfn: /* Call ppc_md.hpte_insert */ ld r6,STK_PARAM(R4)(r1) /* Retrieve new pp bits */ - mr r4,r29 /* Retrieve va */ + mr r4,r29 /* Retrieve vpn */ li r7,0 /* !bolted, !secondary */ li r8,MMU_PAGE_4K /* page size */ ld r9,STK_PARAM(R9)(r1) /* segment size */ @@ -515,7 +533,7 @@ _GLOBAL(htab_call_hpte_insert1) /* Call ppc_md.hpte_insert */ ld r6,STK_PARAM(R4)(r1) /* Retrieve new pp bits */ - mr r4,r29 /* Retrieve va */ + mr r4,r29 /* Retrieve vpn */ li r7,HPTE_V_SECONDARY /* !bolted, secondary */ li r8,MMU_PAGE_4K /* page size */ ld r9,STK_PARAM(R9)(r1) /* segment size */ @@ -547,7 +565,7 @@ _GLOBAL(htab_call_hpte_remove) * useless now that the segment has been switched to 4k pages. */ htab_inval_old_hpte: - mr r3,r29 /* virtual addr */ + mr r3,r29 /* vpn */ mr r4,r31 /* PTE.pte */ li r5,0 /* PTE.hidx */ li r6,MMU_PAGE_64K /* psize */ @@ -620,7 +638,7 @@ htab_modify_pte: add r3,r0,r3 /* add slot idx */ /* Call ppc_md.hpte_updatepp */ - mr r5,r29 /* va */ + mr r5,r29 /* vpn */ li r6,MMU_PAGE_4K /* page size */ ld r7,STK_PARAM(R9)(r1) /* segment size */ ld r8,STK_PARAM(R8)(r1) /* get "local" param */ @@ -676,7 +694,7 @@ _GLOBAL(__hash_page_64K) /* Save non-volatile registers. * r31 will hold "old PTE" * r30 is "new PTE" - * r29 is "va" + * r29 is vpn * r28 is a hash value * r27 is hashtab mask (maybe dynamic patched instead ?) */ @@ -729,10 +747,10 @@ BEGIN_FTR_SECTION cmpdi r9,0 /* check segment size */ bne 3f END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT) - /* Calc va and put it in r29 */ - rldicr r29,r5,28,63-28 - rldicl r3,r3,0,36 - or r29,r3,r29 + /* Calc vpn and put it in r29 */ + sldi r29,r5,SID_SHIFT - VPN_SHIFT + rldicl r28,r3,64 - VPN_SHIFT,64 - (SID_SHIFT - VPN_SHIFT) + or r29,r28,r29 /* Calculate hash value for primary slot and store it in r28 */ rldicl r5,r5,0,25 /* vsid & 0x0000007fffffffff */ @@ -740,14 +758,19 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT) xor r28,r5,r0 b 4f -3: /* Calc VA and hash in r29 and r28 for 1T segment */ - sldi r29,r5,40 /* vsid << 40 */ - clrldi r3,r3,24 /* ea & 0xffffffffff */ +3: /* Calc vpn and put it in r29 */ + sldi r29,r5,SID_SHIFT_1T - VPN_SHIFT + rldicl r28,r3,64 - VPN_SHIFT,64 - (SID_SHIFT_1T - VPN_SHIFT) + or r29,r28,r29 + + /* + * calculate hash value for primary slot and + * store it in r28 for 1T segment + */ rldic r28,r5,25,25 /* (vsid << 25) & 0x7fffffffff */ clrldi r5,r5,40 /* vsid & 0xffffff */ rldicl r0,r3,64-16,40 /* (ea >> 16) & 0xffffff */ xor r28,r28,r5 - or r29,r3,r29 /* VA */ xor r28,r28,r0 /* hash */ /* Convert linux PTE bits into HW equivalents */ @@ -806,7 +829,7 @@ ht64_insert_pte: /* Call ppc_md.hpte_insert */ ld r6,STK_PARAM(R4)(r1) /* Retrieve new pp bits */ - mr r4,r29 /* Retrieve va */ + mr r4,r29 /* Retrieve vpn */ li r7,0 /* !bolted, !secondary */ li r8,MMU_PAGE_64K ld r9,STK_PARAM(R9)(r1) /* segment size */ @@ -829,7 +852,7 @@ _GLOBAL(ht64_call_hpte_insert1) /* Call ppc_md.hpte_insert */ ld r6,STK_PARAM(R4)(r1) /* Retrieve new pp bits */ - mr r4,r29 /* Retrieve va */ + mr r4,r29 /* Retrieve vpn */ li r7,HPTE_V_SECONDARY /* !bolted, secondary */ li r8,MMU_PAGE_64K ld r9,STK_PARAM(R9)(r1) /* segment size */ @@ -899,7 +922,7 @@ ht64_modify_pte: add r3,r0,r3 /* add slot idx */ /* Call ppc_md.hpte_updatepp */ - mr r5,r29 /* va */ + mr r5,r29 /* vpn */ li r6,MMU_PAGE_64K ld r7,STK_PARAM(R9)(r1) /* segment size */ ld r8,STK_PARAM(R8)(r1) /* get "local" param */ diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index ebf685a..a4a1c72 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -39,22 +39,35 @@ DEFINE_RAW_SPINLOCK(native_tlbie_lock); -static inline void __tlbie(unsigned long va, int psize, int ssize) +static inline void __tlbie(unsigned long vpn, int psize, int ssize) { + unsigned long va; unsigned int penc; - /* clear top 16 bits, non SLS segment */ + /* + * We need 14 to 65 bits of va for a tlibe of 4K page + * With vpn we ignore the lower VPN_SHIFT bits already. + * And top two bits are already ignored because we can + * only accomadate 76 bits in a 64 bit vpn with a VPN_SHIFT + * of 12. + */ + va = vpn << VPN_SHIFT; + /* + * clear top 16 bits of 64bit va, non SLS segment + * Older versions of the architecture (2.02 and earler) require the + * masking of the top 16 bits. + */ va &= ~(0xffffULL << 48); switch (psize) { case MMU_PAGE_4K: - va &= ~0xffful; va |= ssize << 8; asm volatile(ASM_FTR_IFCLR("tlbie %0,0", PPC_TLBIE(%1,%0), %2) : : "r" (va), "r"(0), "i" (CPU_FTR_ARCH_206) : "memory"); break; default: + /* We need 14 to 14 + i bits of va */ penc = mmu_psize_defs[psize].penc; va &= ~((1ul << mmu_psize_defs[psize].shift) - 1); va |= penc << 12; @@ -67,21 +80,28 @@ static inline void __tlbie(unsigned long va, int psize, int ssize) } } -static inline void __tlbiel(unsigned long va, int psize, int ssize) +static inline void __tlbiel(unsigned long vpn, int psize, int ssize) { + unsigned long va; unsigned int penc; - /* clear top 16 bits, non SLS segment */ + /* VPN_SHIFT can be atmost 12 */ + va = vpn << VPN_SHIFT; + /* + * clear top 16 bits of 64 bit va, non SLS segment + * Older versions of the architecture (2.02 and earler) require the + * masking of the top 16 bits. + */ va &= ~(0xffffULL << 48); switch (psize) { case MMU_PAGE_4K: - va &= ~0xffful; va |= ssize << 8; asm volatile(".long 0x7c000224 | (%0 << 11) | (0 << 21)" : : "r"(va) : "memory"); break; default: + /* We need 14 to 14 + i bits of va */ penc = mmu_psize_defs[psize].penc; va &= ~((1ul << mmu_psize_defs[psize].shift) - 1); va |= penc << 12; @@ -94,7 +114,7 @@ static inline void __tlbiel(unsigned long va, int psize, int ssize) } -static inline void tlbie(unsigned long va, int psize, int ssize, int local) +static inline void tlbie(unsigned long vpn, int psize, int ssize, int local) { unsigned int use_local = local && mmu_has_feature(MMU_FTR_TLBIEL); int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE); @@ -105,10 +125,10 @@ static inline void tlbie(unsigned long va, int psize, int ssize, int local) raw_spin_lock(&native_tlbie_lock); asm volatile("ptesync": : :"memory"); if (use_local) { - __tlbiel(va, psize, ssize); + __tlbiel(vpn, psize, ssize); asm volatile("ptesync": : :"memory"); } else { - __tlbie(va, psize, ssize); + __tlbie(vpn, psize, ssize); asm volatile("eieio; tlbsync; ptesync": : :"memory"); } if (lock_tlbie && !use_local) @@ -134,7 +154,7 @@ static inline void native_unlock_hpte(struct hash_pte *hptep) clear_bit_unlock(HPTE_LOCK_BIT, word); } -static long native_hpte_insert(unsigned long hpte_group, unsigned long va, +static long native_hpte_insert(unsigned long hpte_group, unsigned long vpn, unsigned long pa, unsigned long rflags, unsigned long vflags, int psize, int ssize) { @@ -143,9 +163,9 @@ static long native_hpte_insert(unsigned long hpte_group, unsigned long va, int i; if (!(vflags & HPTE_V_BOLTED)) { - DBG_LOW(" insert(group=%lx, va=%016lx, pa=%016lx," + DBG_LOW(" insert(group=%lx, vpn=%016lx, pa=%016lx," " rflags=%lx, vflags=%lx, psize=%d)\n", - hpte_group, va, pa, rflags, vflags, psize); + hpte_group, vpn, pa, rflags, vflags, psize); } for (i = 0; i < HPTES_PER_GROUP; i++) { @@ -163,7 +183,7 @@ static long native_hpte_insert(unsigned long hpte_group, unsigned long va, if (i == HPTES_PER_GROUP) return -1; - hpte_v = hpte_encode_v(va, psize, ssize) | vflags | HPTE_V_VALID; + hpte_v = hpte_encode_v(vpn, psize, ssize) | vflags | HPTE_V_VALID; hpte_r = hpte_encode_r(pa, psize) | rflags; if (!(vflags & HPTE_V_BOLTED)) { @@ -225,17 +245,17 @@ static long native_hpte_remove(unsigned long hpte_group) } static long native_hpte_updatepp(unsigned long slot, unsigned long newpp, - unsigned long va, int psize, int ssize, + unsigned long vpn, int psize, int ssize, int local) { struct hash_pte *hptep = htab_address + slot; unsigned long hpte_v, want_v; int ret = 0; - want_v = hpte_encode_v(va, psize, ssize); + want_v = hpte_encode_v(vpn, psize, ssize); - DBG_LOW(" update(va=%016lx, avpnv=%016lx, hash=%016lx, newpp=%x)", - va, want_v & HPTE_V_AVPN, slot, newpp); + DBG_LOW(" update(vpn=%016lx, avpnv=%016lx, group=%lx, newpp=%lx)", + vpn, want_v & HPTE_V_AVPN, slot, newpp); native_lock_hpte(hptep); @@ -254,12 +274,12 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp, native_unlock_hpte(hptep); /* Ensure it is out of the tlb too. */ - tlbie(va, psize, ssize, local); + tlbie(vpn, psize, ssize, local); return ret; } -static long native_hpte_find(unsigned long va, int psize, int ssize) +static long native_hpte_find(unsigned long vpn, int psize, int ssize) { struct hash_pte *hptep; unsigned long hash; @@ -267,8 +287,8 @@ static long native_hpte_find(unsigned long va, int psize, int ssize) long slot; unsigned long want_v, hpte_v; - hash = hpt_hash(va, mmu_psize_defs[psize].shift, ssize); - want_v = hpte_encode_v(va, psize, ssize); + hash = hpt_hash(vpn, mmu_psize_defs[psize].shift, ssize); + want_v = hpte_encode_v(vpn, psize, ssize); /* Bolted mappings are only ever in the primary group */ slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; @@ -295,14 +315,15 @@ static long native_hpte_find(unsigned long va, int psize, int ssize) static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea, int psize, int ssize) { - unsigned long vsid, va; + unsigned long vpn; + unsigned long vsid; long slot; struct hash_pte *hptep; vsid = get_kernel_vsid(ea, ssize); - va = hpt_va(ea, vsid, ssize); + vpn = hpt_vpn(ea, vsid, ssize); - slot = native_hpte_find(va, psize, ssize); + slot = native_hpte_find(vpn, psize, ssize); if (slot == -1) panic("could not find page to bolt\n"); hptep = htab_address + slot; @@ -312,10 +333,10 @@ static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea, (newpp & (HPTE_R_PP | HPTE_R_N)); /* Ensure it is out of the tlb too. */ - tlbie(va, psize, ssize, 0); + tlbie(vpn, psize, ssize, 0); } -static void native_hpte_invalidate(unsigned long slot, unsigned long va, +static void native_hpte_invalidate(unsigned long slot, unsigned long vpn, int psize, int ssize, int local) { struct hash_pte *hptep = htab_address + slot; @@ -325,9 +346,9 @@ static void native_hpte_invalidate(unsigned long slot, unsigned long va, local_irq_save(flags); - DBG_LOW(" invalidate(va=%016lx, hash: %x)\n", va, slot); + DBG_LOW(" invalidate(vpn=%016lx, hash: %lx)\n", vpn, slot); - want_v = hpte_encode_v(va, psize, ssize); + want_v = hpte_encode_v(vpn, psize, ssize); native_lock_hpte(hptep); hpte_v = hptep->v; @@ -339,7 +360,7 @@ static void native_hpte_invalidate(unsigned long slot, unsigned long va, hptep->v = 0; /* Invalidate the TLB */ - tlbie(va, psize, ssize, local); + tlbie(vpn, psize, ssize, local); local_irq_restore(flags); } @@ -349,7 +370,7 @@ static void native_hpte_invalidate(unsigned long slot, unsigned long va, #define LP_MASK(i) ((0xFF >> (i)) << LP_SHIFT) static void hpte_decode(struct hash_pte *hpte, unsigned long slot, - int *psize, int *ssize, unsigned long *va) + int *psize, int *ssize, unsigned long *vpn) { unsigned long avpn, pteg, vpi; unsigned long hpte_r = hpte->r; @@ -399,7 +420,7 @@ static void hpte_decode(struct hash_pte *hpte, unsigned long slot, vpi = (vsid ^ pteg) & htab_hash_mask; seg_off |= vpi << shift; } - *va = vsid << SID_SHIFT | seg_off; + *vpn = vsid << (SID_SHIFT - VPN_SHIFT) | seg_off >> VPN_SHIFT; case MMU_SEGSIZE_1T: /* We only have 40 - 23 bits of seg_off in avpn */ seg_off = (avpn & 0x1ffff) << 23; @@ -408,9 +429,9 @@ static void hpte_decode(struct hash_pte *hpte, unsigned long slot, vpi = (vsid ^ (vsid << 25) ^ pteg) & htab_hash_mask; seg_off |= vpi << shift; } - *va = vsid << SID_SHIFT_1T | seg_off; + *vpn = vsid << (SID_SHIFT_1T - VPN_SHIFT) | seg_off >> VPN_SHIFT; default: - *va = size = 0; + *vpn = size = 0; } *psize = size; } @@ -425,9 +446,10 @@ static void hpte_decode(struct hash_pte *hpte, unsigned long slot, */ static void native_hpte_clear(void) { + unsigned long vpn = 0; unsigned long slot, slots, flags; struct hash_pte *hptep = htab_address; - unsigned long hpte_v, va; + unsigned long hpte_v; unsigned long pteg_count; int psize, ssize; @@ -455,9 +477,9 @@ static void native_hpte_clear(void) * already hold the native_tlbie_lock. */ if (hpte_v & HPTE_V_VALID) { - hpte_decode(hptep, slot, &psize, &ssize, &va); + hpte_decode(hptep, slot, &psize, &ssize, &vpn); hptep->v = 0; - __tlbie(va, psize, ssize); + __tlbie(vpn, psize, ssize); } } @@ -472,7 +494,8 @@ static void native_hpte_clear(void) */ static void native_flush_hash_range(unsigned long number, int local) { - unsigned long va, hash, index, hidx, shift, slot; + unsigned long vpn; + unsigned long hash, index, hidx, shift, slot; struct hash_pte *hptep; unsigned long hpte_v; unsigned long want_v; @@ -486,18 +509,18 @@ static void native_flush_hash_range(unsigned long number, int local) local_irq_save(flags); for (i = 0; i < number; i++) { - va = batch->vaddr[i]; + vpn = batch->vpn[i]; pte = batch->pte[i]; - pte_iterate_hashed_subpages(pte, psize, va, index, shift) { - hash = hpt_hash(va, shift, ssize); + pte_iterate_hashed_subpages(pte, psize, vpn, index, shift) { + hash = hpt_hash(vpn, shift, ssize); hidx = __rpte_to_hidx(pte, index); if (hidx & _PTEIDX_SECONDARY) hash = ~hash; slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; slot += hidx & _PTEIDX_GROUP_IX; hptep = htab_address + slot; - want_v = hpte_encode_v(va, psize, ssize); + want_v = hpte_encode_v(vpn, psize, ssize); native_lock_hpte(hptep); hpte_v = hptep->v; if (!HPTE_V_COMPARE(hpte_v, want_v) || @@ -512,12 +535,12 @@ static void native_flush_hash_range(unsigned long number, int local) mmu_psize_defs[psize].tlbiel && local) { asm volatile("ptesync":::"memory"); for (i = 0; i < number; i++) { - va = batch->vaddr[i]; + vpn = batch->vpn[i]; pte = batch->pte[i]; - pte_iterate_hashed_subpages(pte, psize, va, index, - shift) { - __tlbiel(va, psize, ssize); + pte_iterate_hashed_subpages(pte, psize, + vpn, index, shift) { + __tlbiel(vpn, psize, ssize); } pte_iterate_hashed_end(); } asm volatile("ptesync":::"memory"); @@ -529,12 +552,12 @@ static void native_flush_hash_range(unsigned long number, int local) asm volatile("ptesync":::"memory"); for (i = 0; i < number; i++) { - va = batch->vaddr[i]; + vpn = batch->vpn[i]; pte = batch->pte[i]; - pte_iterate_hashed_subpages(pte, psize, va, index, - shift) { - __tlbie(va, psize, ssize); + pte_iterate_hashed_subpages(pte, psize, + vpn, index, shift) { + __tlbie(vpn, psize, ssize); } pte_iterate_hashed_end(); } asm volatile("eieio; tlbsync; ptesync":::"memory"); diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index ba45739b..7d4ffd7 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -191,18 +191,18 @@ int htab_bolt_mapping(unsigned long vstart, unsigned long vend, vaddr += step, paddr += step) { unsigned long hash, hpteg; unsigned long vsid = get_kernel_vsid(vaddr, ssize); - unsigned long va = hpt_va(vaddr, vsid, ssize); + unsigned long vpn = hpt_vpn(vaddr, vsid, ssize); unsigned long tprot = prot; /* Make kernel text executable */ if (overlaps_kernel_text(vaddr, vaddr + step)) tprot &= ~HPTE_R_N; - hash = hpt_hash(va, shift, ssize); + hash = hpt_hash(vpn, shift, ssize); hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP); BUG_ON(!ppc_md.hpte_insert); - ret = ppc_md.hpte_insert(hpteg, va, paddr, tprot, + ret = ppc_md.hpte_insert(hpteg, vpn, paddr, tprot, HPTE_V_BOLTED, psize, ssize); if (ret < 0) @@ -1152,21 +1152,21 @@ void hash_preload(struct mm_struct *mm, unsigned long ea, /* WARNING: This is called from hash_low_64.S, if you change this prototype, * do not forget to update the assembly call site ! */ -void flush_hash_page(unsigned long va, real_pte_t pte, int psize, int ssize, +void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize, int ssize, int local) { unsigned long hash, index, shift, hidx, slot; - DBG_LOW("flush_hash_page(va=%016lx)\n", va); - pte_iterate_hashed_subpages(pte, psize, va, index, shift) { - hash = hpt_hash(va, shift, ssize); + DBG_LOW("flush_hash_page(vpn=%016lx)\n", vpn); + pte_iterate_hashed_subpages(pte, psize, vpn, index, shift) { + hash = hpt_hash(vpn, shift, ssize); hidx = __rpte_to_hidx(pte, index); if (hidx & _PTEIDX_SECONDARY) hash = ~hash; slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; slot += hidx & _PTEIDX_GROUP_IX; DBG_LOW(" sub %ld: hash=%lx, hidx=%lx\n", index, slot, hidx); - ppc_md.hpte_invalidate(slot, va, psize, ssize, local); + ppc_md.hpte_invalidate(slot, vpn, psize, ssize, local); } pte_iterate_hashed_end(); } @@ -1180,7 +1180,7 @@ void flush_hash_range(unsigned long number, int local) &__get_cpu_var(ppc64_tlb_batch); for (i = 0; i < number; i++) - flush_hash_page(batch->vaddr[i], batch->pte[i], + flush_hash_page(batch->vpn[i], batch->pte[i], batch->psize, batch->ssize, local); } } @@ -1207,14 +1207,14 @@ static void kernel_map_linear_page(unsigned long vaddr, unsigned long lmi) { unsigned long hash, hpteg; unsigned long vsid = get_kernel_vsid(vaddr, mmu_kernel_ssize); - unsigned long va = hpt_va(vaddr, vsid, mmu_kernel_ssize); + unsigned long vpn = hpt_vpn(vaddr, vsid, mmu_kernel_ssize); unsigned long mode = htab_convert_pte_flags(PAGE_KERNEL); int ret; - hash = hpt_hash(va, PAGE_SHIFT, mmu_kernel_ssize); + hash = hpt_hash(vpn, PAGE_SHIFT, mmu_kernel_ssize); hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP); - ret = ppc_md.hpte_insert(hpteg, va, __pa(vaddr), + ret = ppc_md.hpte_insert(hpteg, vpn, __pa(vaddr), mode, HPTE_V_BOLTED, mmu_linear_psize, mmu_kernel_ssize); BUG_ON (ret < 0); @@ -1228,9 +1228,9 @@ static void kernel_unmap_linear_page(unsigned long vaddr, unsigned long lmi) { unsigned long hash, hidx, slot; unsigned long vsid = get_kernel_vsid(vaddr, mmu_kernel_ssize); - unsigned long va = hpt_va(vaddr, vsid, mmu_kernel_ssize); + unsigned long vpn = hpt_vpn(vaddr, vsid, mmu_kernel_ssize); - hash = hpt_hash(va, PAGE_SHIFT, mmu_kernel_ssize); + hash = hpt_hash(vpn, PAGE_SHIFT, mmu_kernel_ssize); spin_lock(&linear_map_hash_lock); BUG_ON(!(linear_map_hash_slots[lmi] & 0x80)); hidx = linear_map_hash_slots[lmi] & 0x7f; @@ -1240,7 +1240,7 @@ static void kernel_unmap_linear_page(unsigned long vaddr, unsigned long lmi) hash = ~hash; slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; slot += hidx & _PTEIDX_GROUP_IX; - ppc_md.hpte_invalidate(slot, va, mmu_linear_psize, mmu_kernel_ssize, 0); + ppc_md.hpte_invalidate(slot, vpn, mmu_linear_psize, mmu_kernel_ssize, 0); } void kernel_map_pages(struct page *page, int numpages, int enable) diff --git a/arch/powerpc/mm/hugetlbpage-hash64.c b/arch/powerpc/mm/hugetlbpage-hash64.c index cc5c273..cecad34 100644 --- a/arch/powerpc/mm/hugetlbpage-hash64.c +++ b/arch/powerpc/mm/hugetlbpage-hash64.c @@ -18,14 +18,15 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, pte_t *ptep, unsigned long trap, int local, int ssize, unsigned int shift, unsigned int mmu_psize) { + unsigned long vpn; unsigned long old_pte, new_pte; - unsigned long va, rflags, pa, sz; + unsigned long rflags, pa, sz; long slot; BUG_ON(shift != mmu_psize_defs[mmu_psize].shift); /* Search the Linux page table for a match with va */ - va = hpt_va(ea, vsid, ssize); + vpn = hpt_vpn(ea, vsid, ssize); /* At this point, we have a pte (old_pte) which can be used to build * or update an HPTE. There are 2 cases: @@ -69,19 +70,19 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, /* There MIGHT be an HPTE for this pte */ unsigned long hash, slot; - hash = hpt_hash(va, shift, ssize); + hash = hpt_hash(vpn, shift, ssize); if (old_pte & _PAGE_F_SECOND) hash = ~hash; slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; slot += (old_pte & _PAGE_F_GIX) >> 12; - if (ppc_md.hpte_updatepp(slot, rflags, va, mmu_psize, + if (ppc_md.hpte_updatepp(slot, rflags, vpn, mmu_psize, ssize, local) == -1) old_pte &= ~_PAGE_HPTEFLAGS; } if (likely(!(old_pte & _PAGE_HASHPTE))) { - unsigned long hash = hpt_hash(va, shift, ssize); + unsigned long hash = hpt_hash(vpn, shift, ssize); unsigned long hpte_group; pa = pte_pfn(__pte(old_pte)) << PAGE_SHIFT; @@ -101,14 +102,14 @@ repeat: _PAGE_COHERENT | _PAGE_GUARDED)); /* Insert into the hash table, primary slot */ - slot = ppc_md.hpte_insert(hpte_group, va, pa, rflags, 0, + slot = ppc_md.hpte_insert(hpte_group, vpn, pa, rflags, 0, mmu_psize, ssize); /* Primary is full, try the secondary */ if (unlikely(slot == -1)) { hpte_group = ((~hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; - slot = ppc_md.hpte_insert(hpte_group, va, pa, rflags, + slot = ppc_md.hpte_insert(hpte_group, vpn, pa, rflags, HPTE_V_SECONDARY, mmu_psize, ssize); if (slot == -1) { diff --git a/arch/powerpc/mm/tlb_hash64.c b/arch/powerpc/mm/tlb_hash64.c index 31f1820..ae758b3 100644 --- a/arch/powerpc/mm/tlb_hash64.c +++ b/arch/powerpc/mm/tlb_hash64.c @@ -42,8 +42,9 @@ DEFINE_PER_CPU(struct ppc64_tlb_batch, ppc64_tlb_batch); void hpte_need_flush(struct mm_struct *mm, unsigned long addr, pte_t *ptep, unsigned long pte, int huge) { + unsigned long vpn; struct ppc64_tlb_batch *batch = &get_cpu_var(ppc64_tlb_batch); - unsigned long vsid, vaddr; + unsigned long vsid; unsigned int psize; int ssize; real_pte_t rpte; @@ -86,7 +87,7 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr, vsid = get_kernel_vsid(addr, mmu_kernel_ssize); ssize = mmu_kernel_ssize; } - vaddr = hpt_va(addr, vsid, ssize); + vpn = hpt_vpn(addr, vsid, ssize); rpte = __real_pte(__pte(pte), ptep); /* @@ -96,7 +97,7 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr, * and decide to use local invalidates instead... */ if (!batch->active) { - flush_hash_page(vaddr, rpte, psize, ssize, 0); + flush_hash_page(vpn, rpte, psize, ssize, 0); put_cpu_var(ppc64_tlb_batch); return; } @@ -122,7 +123,7 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr, batch->ssize = ssize; } batch->pte[i] = rpte; - batch->vaddr[i] = vaddr; + batch->vpn[i] = vpn; batch->index = ++i; if (i >= PPC64_TLB_BATCH_NR) __flush_tlb_pending(batch); @@ -146,7 +147,7 @@ void __flush_tlb_pending(struct ppc64_tlb_batch *batch) if (cpumask_equal(mm_cpumask(batch->mm), tmp)) local = 1; if (i == 1) - flush_hash_page(batch->vaddr[0], batch->pte[0], + flush_hash_page(batch->vpn[0], batch->pte[0], batch->psize, batch->ssize, local); else flush_hash_range(i, local); diff --git a/arch/powerpc/platforms/cell/beat_htab.c b/arch/powerpc/platforms/cell/beat_htab.c index b83077e..0f6f839 100644 --- a/arch/powerpc/platforms/cell/beat_htab.c +++ b/arch/powerpc/platforms/cell/beat_htab.c @@ -88,7 +88,7 @@ static inline unsigned int beat_read_mask(unsigned hpte_group) } static long beat_lpar_hpte_insert(unsigned long hpte_group, - unsigned long va, unsigned long pa, + unsigned long vpn, unsigned long pa, unsigned long rflags, unsigned long vflags, int psize, int ssize) { @@ -103,7 +103,7 @@ static long beat_lpar_hpte_insert(unsigned long hpte_group, "rflags=%lx, vflags=%lx, psize=%d)\n", hpte_group, va, pa, rflags, vflags, psize); - hpte_v = hpte_encode_v(va, psize, MMU_SEGSIZE_256M) | + hpte_v = hpte_encode_v(vpn, psize, MMU_SEGSIZE_256M) | vflags | HPTE_V_VALID; hpte_r = hpte_encode_r(pa, psize) | rflags; @@ -184,14 +184,14 @@ static void beat_lpar_hptab_clear(void) */ static long beat_lpar_hpte_updatepp(unsigned long slot, unsigned long newpp, - unsigned long va, + unsigned long vpn, int psize, int ssize, int local) { unsigned long lpar_rc; u64 dummy0, dummy1; unsigned long want_v; - want_v = hpte_encode_v(va, psize, MMU_SEGSIZE_256M); + want_v = hpte_encode_v(vpn, psize, MMU_SEGSIZE_256M); DBG_LOW(" update: " "avpnv=%016lx, slot=%016lx, psize: %d, newpp %016lx ... ", @@ -220,15 +220,15 @@ static long beat_lpar_hpte_updatepp(unsigned long slot, return 0; } -static long beat_lpar_hpte_find(unsigned long va, int psize) +static long beat_lpar_hpte_find(unsigned long vpn, int psize) { unsigned long hash; unsigned long i, j; long slot; unsigned long want_v, hpte_v; - hash = hpt_hash(va, mmu_psize_defs[psize].shift, MMU_SEGSIZE_256M); - want_v = hpte_encode_v(va, psize, MMU_SEGSIZE_256M); + hash = hpt_hash(vpn, mmu_psize_defs[psize].shift, MMU_SEGSIZE_256M); + want_v = hpte_encode_v(vpn, psize, MMU_SEGSIZE_256M); for (j = 0; j < 2; j++) { slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; @@ -255,14 +255,15 @@ static void beat_lpar_hpte_updateboltedpp(unsigned long newpp, unsigned long ea, int psize, int ssize) { - unsigned long lpar_rc, slot, vsid, va; + unsigned long vpn; + unsigned long lpar_rc, slot, vsid; u64 dummy0, dummy1; vsid = get_kernel_vsid(ea, MMU_SEGSIZE_256M); - va = hpt_va(ea, vsid, MMU_SEGSIZE_256M); + vpn = hpt_vpn(ea, vsid, MMU_SEGSIZE_256M); raw_spin_lock(&beat_htab_lock); - slot = beat_lpar_hpte_find(va, psize); + slot = beat_lpar_hpte_find(vpn, psize); BUG_ON(slot == -1); lpar_rc = beat_write_htab_entry(0, slot, 0, newpp, 0, 7, @@ -272,7 +273,7 @@ static void beat_lpar_hpte_updateboltedpp(unsigned long newpp, BUG_ON(lpar_rc != 0); } -static void beat_lpar_hpte_invalidate(unsigned long slot, unsigned long va, +static void beat_lpar_hpte_invalidate(unsigned long slot, unsigned long vpn, int psize, int ssize, int local) { unsigned long want_v; @@ -282,7 +283,7 @@ static void beat_lpar_hpte_invalidate(unsigned long slot, unsigned long va, DBG_LOW(" inval : slot=%lx, va=%016lx, psize: %d, local: %d\n", slot, va, psize, local); - want_v = hpte_encode_v(va, psize, MMU_SEGSIZE_256M); + want_v = hpte_encode_v(vpn, psize, MMU_SEGSIZE_256M); raw_spin_lock_irqsave(&beat_htab_lock, flags); dummy1 = beat_lpar_hpte_getword0(slot); @@ -311,7 +312,7 @@ void __init hpte_init_beat(void) } static long beat_lpar_hpte_insert_v3(unsigned long hpte_group, - unsigned long va, unsigned long pa, + unsigned long vpn, unsigned long pa, unsigned long rflags, unsigned long vflags, int psize, int ssize) { @@ -322,11 +323,11 @@ static long beat_lpar_hpte_insert_v3(unsigned long hpte_group, return -1; if (!(vflags & HPTE_V_BOLTED)) - DBG_LOW("hpte_insert(group=%lx, va=%016lx, pa=%016lx, " + DBG_LOW("hpte_insert(group=%lx, vpn=%016lx, pa=%016lx, " "rflags=%lx, vflags=%lx, psize=%d)\n", - hpte_group, va, pa, rflags, vflags, psize); + hpte_group, vpn, pa, rflags, vflags, psize); - hpte_v = hpte_encode_v(va, psize, MMU_SEGSIZE_256M) | + hpte_v = hpte_encode_v(vpn, psize, MMU_SEGSIZE_256M) | vflags | HPTE_V_VALID; hpte_r = hpte_encode_r(pa, psize) | rflags; @@ -364,14 +365,14 @@ static long beat_lpar_hpte_insert_v3(unsigned long hpte_group, */ static long beat_lpar_hpte_updatepp_v3(unsigned long slot, unsigned long newpp, - unsigned long va, + unsigned long vpn, int psize, int ssize, int local) { unsigned long lpar_rc; unsigned long want_v; unsigned long pss; - want_v = hpte_encode_v(va, psize, MMU_SEGSIZE_256M); + want_v = hpte_encode_v(vpn, psize, MMU_SEGSIZE_256M); pss = (psize == MMU_PAGE_4K) ? -1UL : mmu_psize_defs[psize].penc; DBG_LOW(" update: " @@ -392,16 +393,16 @@ static long beat_lpar_hpte_updatepp_v3(unsigned long slot, return 0; } -static void beat_lpar_hpte_invalidate_v3(unsigned long slot, unsigned long va, +static void beat_lpar_hpte_invalidate_v3(unsigned long slot, unsigned long vpn, int psize, int ssize, int local) { unsigned long want_v; unsigned long lpar_rc; unsigned long pss; - DBG_LOW(" inval : slot=%lx, va=%016lx, psize: %d, local: %d\n", - slot, va, psize, local); - want_v = hpte_encode_v(va, psize, MMU_SEGSIZE_256M); + DBG_LOW(" inval : slot=%lx, vpn=%016lx, psize: %d, local: %d\n", + slot, vpn, psize, local); + want_v = hpte_encode_v(vpn, psize, MMU_SEGSIZE_256M); pss = (psize == MMU_PAGE_4K) ? -1UL : mmu_psize_defs[psize].penc; lpar_rc = beat_invalidate_htab_entry3(0, slot, want_v, pss); diff --git a/arch/powerpc/platforms/ps3/htab.c b/arch/powerpc/platforms/ps3/htab.c index 3124cf7..d00d7b0 100644 --- a/arch/powerpc/platforms/ps3/htab.c +++ b/arch/powerpc/platforms/ps3/htab.c @@ -43,7 +43,7 @@ enum ps3_lpar_vas_id { static DEFINE_SPINLOCK(ps3_htab_lock); -static long ps3_hpte_insert(unsigned long hpte_group, unsigned long va, +static long ps3_hpte_insert(unsigned long hpte_group, unsigned long vpn, unsigned long pa, unsigned long rflags, unsigned long vflags, int psize, int ssize) { @@ -61,7 +61,7 @@ static long ps3_hpte_insert(unsigned long hpte_group, unsigned long va, */ vflags &= ~HPTE_V_SECONDARY; - hpte_v = hpte_encode_v(va, psize, ssize) | vflags | HPTE_V_VALID; + hpte_v = hpte_encode_v(vpn, psize, ssize) | vflags | HPTE_V_VALID; hpte_r = hpte_encode_r(ps3_mm_phys_to_lpar(pa), psize) | rflags; spin_lock_irqsave(&ps3_htab_lock, flags); @@ -75,8 +75,8 @@ static long ps3_hpte_insert(unsigned long hpte_group, unsigned long va, if (result) { /* all entries bolted !*/ - pr_info("%s:result=%d va=%lx pa=%lx ix=%lx v=%llx r=%llx\n", - __func__, result, va, pa, hpte_group, hpte_v, hpte_r); + pr_info("%s:result=%d vpn=%lx pa=%lx ix=%lx v=%llx r=%llx\n", + __func__, result, vpn, pa, hpte_group, hpte_v, hpte_r); BUG(); } @@ -107,7 +107,7 @@ static long ps3_hpte_remove(unsigned long hpte_group) } static long ps3_hpte_updatepp(unsigned long slot, unsigned long newpp, - unsigned long va, int psize, int ssize, int local) + unsigned long vpn, int psize, int ssize, int local) { int result; u64 hpte_v, want_v, hpte_rs; @@ -115,7 +115,7 @@ static long ps3_hpte_updatepp(unsigned long slot, unsigned long newpp, unsigned long flags; long ret; - want_v = hpte_encode_v(va, psize, ssize); + want_v = hpte_encode_v(vpn, psize, ssize); spin_lock_irqsave(&ps3_htab_lock, flags); @@ -125,8 +125,8 @@ static long ps3_hpte_updatepp(unsigned long slot, unsigned long newpp, &hpte_rs); if (result) { - pr_info("%s: res=%d read va=%lx slot=%lx psize=%d\n", - __func__, result, va, slot, psize); + pr_info("%s: res=%d read vpn=%lx slot=%lx psize=%d\n", + __func__, result, vpn, slot, psize); BUG(); } @@ -159,7 +159,7 @@ static void ps3_hpte_updateboltedpp(unsigned long newpp, unsigned long ea, panic("ps3_hpte_updateboltedpp() not implemented"); } -static void ps3_hpte_invalidate(unsigned long slot, unsigned long va, +static void ps3_hpte_invalidate(unsigned long slot, unsigned long vpn, int psize, int ssize, int local) { unsigned long flags; @@ -170,8 +170,8 @@ static void ps3_hpte_invalidate(unsigned long slot, unsigned long va, result = lv1_write_htab_entry(PS3_LPAR_VAS_ID_CURRENT, slot, 0, 0); if (result) { - pr_info("%s: res=%d va=%lx slot=%lx psize=%d\n", - __func__, result, va, slot, psize); + pr_info("%s: res=%d vpn=%lx slot=%lx psize=%d\n", + __func__, result, vpn, slot, psize); BUG(); } diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 177055d..0da39fe 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -107,9 +107,9 @@ void vpa_init(int cpu) } static long pSeries_lpar_hpte_insert(unsigned long hpte_group, - unsigned long va, unsigned long pa, - unsigned long rflags, unsigned long vflags, - int psize, int ssize) + unsigned long vpn, unsigned long pa, + unsigned long rflags, unsigned long vflags, + int psize, int ssize) { unsigned long lpar_rc; unsigned long flags; @@ -117,11 +117,11 @@ static long pSeries_lpar_hpte_insert(unsigned long hpte_group, unsigned long hpte_v, hpte_r; if (!(vflags & HPTE_V_BOLTED)) - pr_devel("hpte_insert(group=%lx, va=%016lx, pa=%016lx, " - "rflags=%lx, vflags=%lx, psize=%d)\n", - hpte_group, va, pa, rflags, vflags, psize); + pr_devel("hpte_insert(group=%lx, vpn=%016lx, " + "pa=%016lx, rflags=%lx, vflags=%lx, psize=%d)\n", + hpte_group, vpn, pa, rflags, vflags, psize); - hpte_v = hpte_encode_v(va, psize, ssize) | vflags | HPTE_V_VALID; + hpte_v = hpte_encode_v(vpn, psize, ssize) | vflags | HPTE_V_VALID; hpte_r = hpte_encode_r(pa, psize) | rflags; if (!(vflags & HPTE_V_BOLTED)) @@ -226,22 +226,6 @@ static void pSeries_lpar_hptab_clear(void) } /* - * This computes the AVPN and B fields of the first dword of a HPTE, - * for use when we want to match an existing PTE. The bottom 7 bits - * of the returned value are zero. - */ -static inline unsigned long hpte_encode_avpn(unsigned long va, int psize, - int ssize) -{ - unsigned long v; - - v = (va >> 23) & ~(mmu_psize_defs[psize].avpnm); - v <<= HPTE_V_AVPN_SHIFT; - v |= ((unsigned long) ssize) << HPTE_V_SSIZE_SHIFT; - return v; -} - -/* * NOTE: for updatepp ops we are fortunate that the linux "newpp" bits and * the low 3 bits of flags happen to line up. So no transform is needed. * We can probably optimize here and assume the high bits of newpp are @@ -249,14 +233,14 @@ static inline unsigned long hpte_encode_avpn(unsigned long va, int psize, */ static long pSeries_lpar_hpte_updatepp(unsigned long slot, unsigned long newpp, - unsigned long va, + unsigned long vpn, int psize, int ssize, int local) { unsigned long lpar_rc; unsigned long flags = (newpp & 7) | H_AVPN; unsigned long want_v; - want_v = hpte_encode_avpn(va, psize, ssize); + want_v = hpte_encode_avpn(vpn, psize, ssize); pr_devel(" update: avpnv=%016lx, hash=%016lx, f=%lx, psize: %d ...", want_v, slot, flags, psize); @@ -294,15 +278,15 @@ static unsigned long pSeries_lpar_hpte_getword0(unsigned long slot) return dword0; } -static long pSeries_lpar_hpte_find(unsigned long va, int psize, int ssize) +static long pSeries_lpar_hpte_find(unsigned long vpn, int psize, int ssize) { unsigned long hash; unsigned long i; long slot; unsigned long want_v, hpte_v; - hash = hpt_hash(va, mmu_psize_defs[psize].shift, ssize); - want_v = hpte_encode_avpn(va, psize, ssize); + hash = hpt_hash(vpn, mmu_psize_defs[psize].shift, ssize); + want_v = hpte_encode_avpn(vpn, psize, ssize); /* Bolted entries are always in the primary group */ slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; @@ -322,12 +306,13 @@ static void pSeries_lpar_hpte_updateboltedpp(unsigned long newpp, unsigned long ea, int psize, int ssize) { - unsigned long lpar_rc, slot, vsid, va, flags; + unsigned long vpn; + unsigned long lpar_rc, slot, vsid, flags; vsid = get_kernel_vsid(ea, ssize); - va = hpt_va(ea, vsid, ssize); + vpn = hpt_vpn(ea, vsid, ssize); - slot = pSeries_lpar_hpte_find(va, psize, ssize); + slot = pSeries_lpar_hpte_find(vpn, psize, ssize); BUG_ON(slot == -1); flags = newpp & 7; @@ -336,17 +321,17 @@ static void pSeries_lpar_hpte_updateboltedpp(unsigned long newpp, BUG_ON(lpar_rc != H_SUCCESS); } -static void pSeries_lpar_hpte_invalidate(unsigned long slot, unsigned long va, +static void pSeries_lpar_hpte_invalidate(unsigned long slot, unsigned long vpn, int psize, int ssize, int local) { unsigned long want_v; unsigned long lpar_rc; unsigned long dummy1, dummy2; - pr_devel(" inval : slot=%lx, va=%016lx, psize: %d, local: %d\n", - slot, va, psize, local); + pr_devel(" inval : slot=%lx, vpn=%016lx, psize: %d, local: %d\n", + slot, vpn, psize, local); - want_v = hpte_encode_avpn(va, psize, ssize); + want_v = hpte_encode_avpn(vpn, psize, ssize); lpar_rc = plpar_pte_remove(H_AVPN, slot, want_v, &dummy1, &dummy2); if (lpar_rc == H_NOT_FOUND) return; @@ -357,15 +342,16 @@ static void pSeries_lpar_hpte_invalidate(unsigned long slot, unsigned long va, static void pSeries_lpar_hpte_removebolted(unsigned long ea, int psize, int ssize) { - unsigned long slot, vsid, va; + unsigned long vpn; + unsigned long slot, vsid; vsid = get_kernel_vsid(ea, ssize); - va = hpt_va(ea, vsid, ssize); + vpn = hpt_vpn(ea, vsid, ssize); - slot = pSeries_lpar_hpte_find(va, psize, ssize); + slot = pSeries_lpar_hpte_find(vpn, psize, ssize); BUG_ON(slot == -1); - pSeries_lpar_hpte_invalidate(slot, va, psize, ssize, 0); + pSeries_lpar_hpte_invalidate(slot, vpn, psize, ssize, 0); } /* Flag bits for H_BULK_REMOVE */ @@ -381,12 +367,12 @@ static void pSeries_lpar_hpte_removebolted(unsigned long ea, */ static void pSeries_lpar_flush_hash_range(unsigned long number, int local) { + unsigned long vpn; unsigned long i, pix, rc; unsigned long flags = 0; struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch); int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE); unsigned long param[9]; - unsigned long va; unsigned long hash, index, shift, hidx, slot; real_pte_t pte; int psize, ssize; @@ -398,21 +384,21 @@ static void pSeries_lpar_flush_hash_range(unsigned long number, int local) ssize = batch->ssize; pix = 0; for (i = 0; i < number; i++) { - va = batch->vaddr[i]; + vpn = batch->vpn[i]; pte = batch->pte[i]; - pte_iterate_hashed_subpages(pte, psize, va, index, shift) { - hash = hpt_hash(va, shift, ssize); + pte_iterate_hashed_subpages(pte, psize, vpn, index, shift) { + hash = hpt_hash(vpn, shift, ssize); hidx = __rpte_to_hidx(pte, index); if (hidx & _PTEIDX_SECONDARY) hash = ~hash; slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; slot += hidx & _PTEIDX_GROUP_IX; if (!firmware_has_feature(FW_FEATURE_BULK_REMOVE)) { - pSeries_lpar_hpte_invalidate(slot, va, psize, + pSeries_lpar_hpte_invalidate(slot, vpn, psize, ssize, local); } else { param[pix] = HBR_REQUEST | HBR_AVPN | slot; - param[pix+1] = hpte_encode_avpn(va, psize, + param[pix+1] = hpte_encode_avpn(vpn, psize, ssize); pix += 2; if (pix == 8) { -- cgit v0.10.2 From 67550080b8288ee9b61e132e8ebc87d563f9c516 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Mon, 10 Sep 2012 02:52:51 +0000 Subject: powerpc/mm: Make KERN_VIRT_SIZE not dependend on PGTABLE_RANGE As we keep increasing PGTABLE_RANGE we need not increase the virual map area for kernel. Reviewed-by: Paul Mackerras Signed-off-by: Aneesh Kumar K.V Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/include/asm/pgtable-ppc64.h b/arch/powerpc/include/asm/pgtable-ppc64.h index c420561..8af1cf2 100644 --- a/arch/powerpc/include/asm/pgtable-ppc64.h +++ b/arch/powerpc/include/asm/pgtable-ppc64.h @@ -41,7 +41,7 @@ #else #define KERN_VIRT_START ASM_CONST(0xD000000000000000) #endif -#define KERN_VIRT_SIZE PGTABLE_RANGE +#define KERN_VIRT_SIZE ASM_CONST(0x0000100000000000) /* * The vmalloc space starts at the beginning of that region, and -- cgit v0.10.2 From 7aa0727f3302931e698b3a7979ae5b9a4600da4e Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Mon, 10 Sep 2012 02:52:52 +0000 Subject: powerpc/mm: Increase the slice range to 64TB This patch makes the high psizes mask as an unsigned char array so that we can have more than 16TB. Currently we support upto 64TB Reviewed-by: Paul Mackerras Signed-off-by: Aneesh Kumar K.V Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/include/asm/mmu-hash64.h b/arch/powerpc/include/asm/mmu-hash64.h index 6aeb498..7cbd541 100644 --- a/arch/powerpc/include/asm/mmu-hash64.h +++ b/arch/powerpc/include/asm/mmu-hash64.h @@ -460,7 +460,11 @@ typedef struct { #ifdef CONFIG_PPC_MM_SLICES u64 low_slices_psize; /* SLB page size encodings */ - u64 high_slices_psize; /* 4 bits per slice for now */ + /* + * Right now we support 64TB and 4 bits for each + * 1TB slice we need 32 bytes for 64TB. + */ + unsigned char high_slices_psize[32]; /* 4 bits per slice for now */ #else u16 sllp; /* SLB page size encoding */ #endif diff --git a/arch/powerpc/include/asm/page_64.h b/arch/powerpc/include/asm/page_64.h index fed85e6..6c9bef4 100644 --- a/arch/powerpc/include/asm/page_64.h +++ b/arch/powerpc/include/asm/page_64.h @@ -82,7 +82,11 @@ extern u64 ppc64_pft_size; struct slice_mask { u16 low_slices; - u16 high_slices; + /* + * This should be derived out of PGTABLE_RANGE. For the current + * max 64TB, u64 should be ok. + */ + u64 high_slices; }; struct mm_struct; diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 7d4ffd7..3a292be 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -803,16 +803,19 @@ unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap) #ifdef CONFIG_PPC_MM_SLICES unsigned int get_paca_psize(unsigned long addr) { - unsigned long index, slices; + u64 lpsizes; + unsigned char *hpsizes; + unsigned long index, mask_index; if (addr < SLICE_LOW_TOP) { - slices = get_paca()->context.low_slices_psize; + lpsizes = get_paca()->context.low_slices_psize; index = GET_LOW_SLICE_INDEX(addr); - } else { - slices = get_paca()->context.high_slices_psize; - index = GET_HIGH_SLICE_INDEX(addr); + return (lpsizes >> (index * 4)) & 0xF; } - return (slices >> (index * 4)) & 0xF; + hpsizes = get_paca()->context.high_slices_psize; + index = GET_HIGH_SLICE_INDEX(addr); + mask_index = index & 0x1; + return (hpsizes[index >> 1] >> (mask_index * 4)) & 0xF; } #else diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S index b9ee79ce..e132dc6 100644 --- a/arch/powerpc/mm/slb_low.S +++ b/arch/powerpc/mm/slb_low.S @@ -108,17 +108,31 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT) * between 4k and 64k standard page size */ #ifdef CONFIG_PPC_MM_SLICES + /* r10 have esid */ cmpldi r10,16 - - /* Get the slice index * 4 in r11 and matching slice size mask in r9 */ - ld r9,PACALOWSLICESPSIZE(r13) - sldi r11,r10,2 + /* below SLICE_LOW_TOP */ blt 5f - ld r9,PACAHIGHSLICEPSIZE(r13) - srdi r11,r10,(SLICE_HIGH_SHIFT - SLICE_LOW_SHIFT - 2) - andi. r11,r11,0x3c + /* + * Handle hpsizes, + * r9 is get_paca()->context.high_slices_psize[index], r11 is mask_index + */ + srdi r11,r10,(SLICE_HIGH_SHIFT - SLICE_LOW_SHIFT + 1) /* index */ + addi r9,r11,PACAHIGHSLICEPSIZE + lbzx r9,r13,r9 /* r9 is hpsizes[r11] */ + /* r11 = (r10 >> (SLICE_HIGH_SHIFT - SLICE_LOW_SHIFT)) & 0x1 */ + rldicl r11,r10,(64 - (SLICE_HIGH_SHIFT - SLICE_LOW_SHIFT)),63 + b 6f -5: /* Extract the psize and multiply to get an array offset */ +5: + /* + * Handle lpsizes + * r9 is get_paca()->context.low_slices_psize, r11 is index + */ + ld r9,PACALOWSLICESPSIZE(r13) + mr r11,r10 +6: + sldi r11,r11,2 /* index * 4 */ + /* Extract the psize and multiply to get an array offset */ srd r9,r9,r11 andi. r9,r9,0xf mulli r9,r9,MMUPSIZEDEFSIZE diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c index 73709f7..b4e996a 100644 --- a/arch/powerpc/mm/slice.c +++ b/arch/powerpc/mm/slice.c @@ -42,7 +42,7 @@ int _slice_debug = 1; static void slice_print_mask(const char *label, struct slice_mask mask) { - char *p, buf[16 + 3 + 16 + 1]; + char *p, buf[16 + 3 + 64 + 1]; int i; if (!_slice_debug) @@ -54,7 +54,7 @@ static void slice_print_mask(const char *label, struct slice_mask mask) *(p++) = '-'; *(p++) = ' '; for (i = 0; i < SLICE_NUM_HIGH; i++) - *(p++) = (mask.high_slices & (1 << i)) ? '1' : '0'; + *(p++) = (mask.high_slices & (1ul << i)) ? '1' : '0'; *(p++) = 0; printk(KERN_DEBUG "%s:%s\n", label, buf); @@ -84,8 +84,8 @@ static struct slice_mask slice_range_to_mask(unsigned long start, } if ((start + len) > SLICE_LOW_TOP) - ret.high_slices = (1u << (GET_HIGH_SLICE_INDEX(end) + 1)) - - (1u << GET_HIGH_SLICE_INDEX(start)); + ret.high_slices = (1ul << (GET_HIGH_SLICE_INDEX(end) + 1)) + - (1ul << GET_HIGH_SLICE_INDEX(start)); return ret; } @@ -135,26 +135,31 @@ static struct slice_mask slice_mask_for_free(struct mm_struct *mm) for (i = 0; i < SLICE_NUM_HIGH; i++) if (!slice_high_has_vma(mm, i)) - ret.high_slices |= 1u << i; + ret.high_slices |= 1ul << i; return ret; } static struct slice_mask slice_mask_for_size(struct mm_struct *mm, int psize) { + unsigned char *hpsizes; + int index, mask_index; struct slice_mask ret = { 0, 0 }; unsigned long i; - u64 psizes; + u64 lpsizes; - psizes = mm->context.low_slices_psize; + lpsizes = mm->context.low_slices_psize; for (i = 0; i < SLICE_NUM_LOW; i++) - if (((psizes >> (i * 4)) & 0xf) == psize) + if (((lpsizes >> (i * 4)) & 0xf) == psize) ret.low_slices |= 1u << i; - psizes = mm->context.high_slices_psize; - for (i = 0; i < SLICE_NUM_HIGH; i++) - if (((psizes >> (i * 4)) & 0xf) == psize) - ret.high_slices |= 1u << i; + hpsizes = mm->context.high_slices_psize; + for (i = 0; i < SLICE_NUM_HIGH; i++) { + mask_index = i & 0x1; + index = i >> 1; + if (((hpsizes[index] >> (mask_index * 4)) & 0xf) == psize) + ret.high_slices |= 1ul << i; + } return ret; } @@ -183,8 +188,10 @@ static void slice_flush_segments(void *parm) static void slice_convert(struct mm_struct *mm, struct slice_mask mask, int psize) { + int index, mask_index; /* Write the new slice psize bits */ - u64 lpsizes, hpsizes; + unsigned char *hpsizes; + u64 lpsizes; unsigned long i, flags; slice_dbg("slice_convert(mm=%p, psize=%d)\n", mm, psize); @@ -201,14 +208,18 @@ static void slice_convert(struct mm_struct *mm, struct slice_mask mask, int psiz lpsizes = (lpsizes & ~(0xful << (i * 4))) | (((unsigned long)psize) << (i * 4)); - hpsizes = mm->context.high_slices_psize; - for (i = 0; i < SLICE_NUM_HIGH; i++) - if (mask.high_slices & (1u << i)) - hpsizes = (hpsizes & ~(0xful << (i * 4))) | - (((unsigned long)psize) << (i * 4)); - + /* Assign the value back */ mm->context.low_slices_psize = lpsizes; - mm->context.high_slices_psize = hpsizes; + + hpsizes = mm->context.high_slices_psize; + for (i = 0; i < SLICE_NUM_HIGH; i++) { + mask_index = i & 0x1; + index = i >> 1; + if (mask.high_slices & (1ul << i)) + hpsizes[index] = (hpsizes[index] & + ~(0xf << (mask_index * 4))) | + (((unsigned long)psize) << (mask_index * 4)); + } slice_dbg(" lsps=%lx, hsps=%lx\n", mm->context.low_slices_psize, @@ -587,18 +598,19 @@ unsigned long arch_get_unmapped_area_topdown(struct file *filp, unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr) { - u64 psizes; - int index; + unsigned char *hpsizes; + int index, mask_index; if (addr < SLICE_LOW_TOP) { - psizes = mm->context.low_slices_psize; + u64 lpsizes; + lpsizes = mm->context.low_slices_psize; index = GET_LOW_SLICE_INDEX(addr); - } else { - psizes = mm->context.high_slices_psize; - index = GET_HIGH_SLICE_INDEX(addr); + return (lpsizes >> (index * 4)) & 0xf; } - - return (psizes >> (index * 4)) & 0xf; + hpsizes = mm->context.high_slices_psize; + index = GET_HIGH_SLICE_INDEX(addr); + mask_index = index & 0x1; + return (hpsizes[index >> 1] >> (mask_index * 4)) & 0xf; } EXPORT_SYMBOL_GPL(get_slice_psize); @@ -618,7 +630,9 @@ EXPORT_SYMBOL_GPL(get_slice_psize); */ void slice_set_user_psize(struct mm_struct *mm, unsigned int psize) { - unsigned long flags, lpsizes, hpsizes; + int index, mask_index; + unsigned char *hpsizes; + unsigned long flags, lpsizes; unsigned int old_psize; int i; @@ -639,15 +653,21 @@ void slice_set_user_psize(struct mm_struct *mm, unsigned int psize) if (((lpsizes >> (i * 4)) & 0xf) == old_psize) lpsizes = (lpsizes & ~(0xful << (i * 4))) | (((unsigned long)psize) << (i * 4)); + /* Assign the value back */ + mm->context.low_slices_psize = lpsizes; hpsizes = mm->context.high_slices_psize; - for (i = 0; i < SLICE_NUM_HIGH; i++) - if (((hpsizes >> (i * 4)) & 0xf) == old_psize) - hpsizes = (hpsizes & ~(0xful << (i * 4))) | - (((unsigned long)psize) << (i * 4)); + for (i = 0; i < SLICE_NUM_HIGH; i++) { + mask_index = i & 0x1; + index = i >> 1; + if (((hpsizes[index] >> (mask_index * 4)) & 0xf) == old_psize) + hpsizes[index] = (hpsizes[index] & + ~(0xf << (mask_index * 4))) | + (((unsigned long)psize) << (mask_index * 4)); + } + + - mm->context.low_slices_psize = lpsizes; - mm->context.high_slices_psize = hpsizes; slice_dbg(" lsps=%lx, hsps=%lx\n", mm->context.low_slices_psize, @@ -660,18 +680,27 @@ void slice_set_user_psize(struct mm_struct *mm, unsigned int psize) void slice_set_psize(struct mm_struct *mm, unsigned long address, unsigned int psize) { + unsigned char *hpsizes; unsigned long i, flags; - u64 *p; + u64 *lpsizes; spin_lock_irqsave(&slice_convert_lock, flags); if (address < SLICE_LOW_TOP) { i = GET_LOW_SLICE_INDEX(address); - p = &mm->context.low_slices_psize; + lpsizes = &mm->context.low_slices_psize; + *lpsizes = (*lpsizes & ~(0xful << (i * 4))) | + ((unsigned long) psize << (i * 4)); } else { + int index, mask_index; i = GET_HIGH_SLICE_INDEX(address); - p = &mm->context.high_slices_psize; + hpsizes = mm->context.high_slices_psize; + mask_index = i & 0x1; + index = i >> 1; + hpsizes[index] = (hpsizes[index] & + ~(0xf << (mask_index * 4))) | + (((unsigned long)psize) << (mask_index * 4)); } - *p = (*p & ~(0xful << (i * 4))) | ((unsigned long) psize << (i * 4)); + spin_unlock_irqrestore(&slice_convert_lock, flags); #ifdef CONFIG_SPU_BASE -- cgit v0.10.2 From ac8dc2823a30a2d166fed6e919ab2e576f8fca84 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Mon, 10 Sep 2012 02:52:53 +0000 Subject: powerpc/mm: Use the required number of VSID bits in slbmte ASM_VSID_SCRAMBLE can leave non-zero bits in the high 28 bits of the result for 256MB segment (40 bits for 1T segment). Properly mask them before using the values in slbmte Reviewed-by: Paul Mackerras Signed-off-by: Aneesh Kumar K.V Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S index e132dc6..3b75f19 100644 --- a/arch/powerpc/mm/slb_low.S +++ b/arch/powerpc/mm/slb_low.S @@ -223,7 +223,11 @@ _GLOBAL(slb_allocate_user) */ slb_finish_load: ASM_VSID_SCRAMBLE(r10,r9,256M) - rldimi r11,r10,SLB_VSID_SHIFT,16 /* combine VSID and flags */ + /* + * bits above VSID_BITS_256M need to be ignored from r10 + * also combine VSID and flags + */ + rldimi r11,r10,SLB_VSID_SHIFT,(64 - (SLB_VSID_SHIFT + VSID_BITS_256M)) /* r3 = EA, r11 = VSID data */ /* @@ -287,7 +291,11 @@ _GLOBAL(slb_compare_rr_to_size) slb_finish_load_1T: srdi r10,r10,40-28 /* get 1T ESID */ ASM_VSID_SCRAMBLE(r10,r9,1T) - rldimi r11,r10,SLB_VSID_SHIFT_1T,16 /* combine VSID and flags */ + /* + * bits above VSID_BITS_1T need to be ignored from r10 + * also combine VSID and flags + */ + rldimi r11,r10,SLB_VSID_SHIFT_1T,(64 - (SLB_VSID_SHIFT_1T + VSID_BITS_1T)) li r10,MMU_SEGSIZE_1T rldimi r11,r10,SLB_VSID_SSIZE_SHIFT,0 /* insert segment size */ -- cgit v0.10.2 From 735cafc32b661f08a266a8d754e6cfbd82c11704 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Mon, 10 Sep 2012 02:52:54 +0000 Subject: powerpc/mm: Use 32bit array for slb cache With larger vsid we need to track more bits of ESID in slb cache for slb invalidate. Reviewed-by: Paul Mackerras Signed-off-by: Aneesh Kumar K.V Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index 7796519..e9e7a69 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -100,7 +100,7 @@ struct paca_struct { /* SLB related definitions */ u16 vmalloc_sllp; u16 slb_cache_ptr; - u16 slb_cache[SLB_CACHE_ENTRIES]; + u32 slb_cache[SLB_CACHE_ENTRIES]; #endif /* CONFIG_PPC_STD_MMU_64 */ #ifdef CONFIG_PPC_BOOK3E diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S index 3b75f19..f6a2625 100644 --- a/arch/powerpc/mm/slb_low.S +++ b/arch/powerpc/mm/slb_low.S @@ -270,10 +270,10 @@ _GLOBAL(slb_compare_rr_to_size) bge 1f /* still room in the slb cache */ - sldi r11,r3,1 /* r11 = offset * sizeof(u16) */ - rldicl r10,r10,36,28 /* get low 16 bits of the ESID */ - add r11,r11,r13 /* r11 = (u16 *)paca + offset */ - sth r10,PACASLBCACHE(r11) /* paca->slb_cache[offset] = esid */ + sldi r11,r3,2 /* r11 = offset * sizeof(u32) */ + srdi r10,r10,28 /* get the 36 bits of the ESID */ + add r11,r11,r13 /* r11 = (u32 *)paca + offset */ + stw r10,PACASLBCACHE(r11) /* paca->slb_cache[offset] = esid */ addi r3,r3,1 /* offset++ */ b 2f 1: /* offset >= SLB_CACHE_ENTRIES */ -- cgit v0.10.2 From 048ee0993ec8360abb0b51bdf8f8721e9ed62ec4 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Mon, 10 Sep 2012 02:52:55 +0000 Subject: powerpc/mm: Add 64TB support Increase max addressable range to 64TB. This is not tested on real hardware yet. Reviewed-by: Paul Mackerras Signed-off-by: Aneesh Kumar K.V Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/include/asm/mmu-hash64.h b/arch/powerpc/include/asm/mmu-hash64.h index 7cbd541..23730ee 100644 --- a/arch/powerpc/include/asm/mmu-hash64.h +++ b/arch/powerpc/include/asm/mmu-hash64.h @@ -370,17 +370,21 @@ extern void slb_set_size(u16 size); * (head.S) and ASM_VSID_SCRAMBLE (below) are changed accordingly. */ -#define VSID_MULTIPLIER_256M ASM_CONST(200730139) /* 28-bit prime */ -#define VSID_BITS_256M 36 +/* + * This should be computed such that protovosid * vsid_mulitplier + * doesn't overflow 64 bits. It should also be co-prime to vsid_modulus + */ +#define VSID_MULTIPLIER_256M ASM_CONST(12538073) /* 24-bit prime */ +#define VSID_BITS_256M 38 #define VSID_MODULUS_256M ((1UL<= PAGE_OFFSET */ +/* + * This is only valid for addresses >= PAGE_OFFSET + * The proto-VSID space is divided into two class + * User: 0 to 2^(CONTEXT_BITS + USER_ESID_BITS) -1 + * kernel: 2^(CONTEXT_BITS + USER_ESID_BITS) to 2^(VSID_BITS) - 1 + * + * With KERNEL_START at 0xc000000000000000, the proto vsid for + * the kernel ends up with 0xc00000000 (36 bits). With 64TB + * support we need to have kernel proto-VSID in the + * [2^37 to 2^38 - 1] range due to the increased USER_ESID_BITS. + */ static inline unsigned long get_kernel_vsid(unsigned long ea, int ssize) { - if (ssize == MMU_SEGSIZE_256M) - return vsid_scramble(ea >> SID_SHIFT, 256M); - return vsid_scramble(ea >> SID_SHIFT_1T, 1T); + unsigned long proto_vsid; + /* + * We need to make sure proto_vsid for the kernel is + * >= 2^(CONTEXT_BITS + USER_ESID_BITS[_1T]) + */ + if (ssize == MMU_SEGSIZE_256M) { + proto_vsid = ea >> SID_SHIFT; + proto_vsid |= (1UL << (CONTEXT_BITS + USER_ESID_BITS)); + return vsid_scramble(proto_vsid, 256M); + } + proto_vsid = ea >> SID_SHIFT_1T; + proto_vsid |= (1UL << (CONTEXT_BITS + USER_ESID_BITS_1T)); + return vsid_scramble(proto_vsid, 1T); } /* Returns the segment size indicator for a user address */ diff --git a/arch/powerpc/include/asm/pgtable-ppc64-4k.h b/arch/powerpc/include/asm/pgtable-ppc64-4k.h index d6489a2..12798c9 100644 --- a/arch/powerpc/include/asm/pgtable-ppc64-4k.h +++ b/arch/powerpc/include/asm/pgtable-ppc64-4k.h @@ -7,7 +7,7 @@ */ #define PTE_INDEX_SIZE 9 #define PMD_INDEX_SIZE 7 -#define PUD_INDEX_SIZE 7 +#define PUD_INDEX_SIZE 9 #define PGD_INDEX_SIZE 9 #ifndef __ASSEMBLY__ diff --git a/arch/powerpc/include/asm/pgtable-ppc64-64k.h b/arch/powerpc/include/asm/pgtable-ppc64-64k.h index 90533dd..be4e287 100644 --- a/arch/powerpc/include/asm/pgtable-ppc64-64k.h +++ b/arch/powerpc/include/asm/pgtable-ppc64-64k.h @@ -7,7 +7,7 @@ #define PTE_INDEX_SIZE 12 #define PMD_INDEX_SIZE 12 #define PUD_INDEX_SIZE 0 -#define PGD_INDEX_SIZE 4 +#define PGD_INDEX_SIZE 6 #ifndef __ASSEMBLY__ #define PTE_TABLE_SIZE (sizeof(real_pte_t) << PTE_INDEX_SIZE) diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index 83efc6e..9dc5cd1 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -97,8 +97,8 @@ extern struct task_struct *last_task_used_spe; #endif #ifdef CONFIG_PPC64 -/* 64-bit user address space is 44-bits (16TB user VM) */ -#define TASK_SIZE_USER64 (0x0000100000000000UL) +/* 64-bit user address space is 46-bits (64TB user VM) */ +#define TASK_SIZE_USER64 (0x0000400000000000UL) /* * 32-bit user address space is 4GB - 1 page diff --git a/arch/powerpc/include/asm/sparsemem.h b/arch/powerpc/include/asm/sparsemem.h index 0c5fa31..f6fc0ee 100644 --- a/arch/powerpc/include/asm/sparsemem.h +++ b/arch/powerpc/include/asm/sparsemem.h @@ -10,8 +10,8 @@ */ #define SECTION_SIZE_BITS 24 -#define MAX_PHYSADDR_BITS 44 -#define MAX_PHYSMEM_BITS 44 +#define MAX_PHYSADDR_BITS 46 +#define MAX_PHYSMEM_BITS 46 #endif /* CONFIG_SPARSEMEM */ diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 5eb0056..10b658a 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1083,7 +1083,9 @@ _GLOBAL(do_stab_bolted) rldimi r10,r11,7,52 /* r10 = first ste of the group */ /* Calculate VSID */ - /* This is a kernel address, so protovsid = ESID */ + /* This is a kernel address, so protovsid = ESID | 1 << 37 */ + li r9,0x1 + rldimi r11,r9,(CONTEXT_BITS + USER_ESID_BITS),0 ASM_VSID_SCRAMBLE(r11, r9, 256M) rldic r9,r11,12,16 /* r9 = vsid << 12 */ diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S index f6a2625..1a16ca2 100644 --- a/arch/powerpc/mm/slb_low.S +++ b/arch/powerpc/mm/slb_low.S @@ -56,6 +56,12 @@ _GLOBAL(slb_allocate_realmode) */ _GLOBAL(slb_miss_kernel_load_linear) li r11,0 + li r9,0x1 + /* + * for 1T we shift 12 bits more. slb_finish_load_1T will do + * the necessary adjustment + */ + rldimi r10,r9,(CONTEXT_BITS + USER_ESID_BITS),0 BEGIN_FTR_SECTION b slb_finish_load END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT) @@ -85,6 +91,12 @@ _GLOBAL(slb_miss_kernel_load_vmemmap) _GLOBAL(slb_miss_kernel_load_io) li r11,0 6: + li r9,0x1 + /* + * for 1T we shift 12 bits more. slb_finish_load_1T will do + * the necessary adjustment + */ + rldimi r10,r9,(CONTEXT_BITS + USER_ESID_BITS),0 BEGIN_FTR_SECTION b slb_finish_load END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT) -- cgit v0.10.2 From f033d659c3b931d8b2a16625155e20304e173c9f Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Mon, 10 Sep 2012 02:52:56 +0000 Subject: powerpc/mm: Update VSID allocation documentation This update the proto-VSID and VSID scramble related information to be more generic by using names instead of current values. Reviewed-by: Paul Mackerras Signed-off-by: Aneesh Kumar K.V Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/include/asm/mmu-hash64.h b/arch/powerpc/include/asm/mmu-hash64.h index 23730ee..3e88746 100644 --- a/arch/powerpc/include/asm/mmu-hash64.h +++ b/arch/powerpc/include/asm/mmu-hash64.h @@ -324,51 +324,45 @@ extern void slb_set_size(u16 size); #endif /* __ASSEMBLY__ */ /* - * VSID allocation + * VSID allocation (256MB segment) * - * We first generate a 36-bit "proto-VSID". For kernel addresses this - * is equal to the ESID, for user addresses it is: - * (context << 15) | (esid & 0x7fff) + * We first generate a 38-bit "proto-VSID". For kernel addresses this + * is equal to the ESID | 1 << 37, for user addresses it is: + * (context << USER_ESID_BITS) | (esid & ((1U << USER_ESID_BITS) - 1) * - * The two forms are distinguishable because the top bit is 0 for user - * addresses, whereas the top two bits are 1 for kernel addresses. - * Proto-VSIDs with the top two bits equal to 0b10 are reserved for - * now. + * This splits the proto-VSID into the below range + * 0 - (2^(CONTEXT_BITS + USER_ESID_BITS) - 1) : User proto-VSID range + * 2^(CONTEXT_BITS + USER_ESID_BITS) - 2^(VSID_BITS) : Kernel proto-VSID range + * + * We also have CONTEXT_BITS + USER_ESID_BITS = VSID_BITS - 1 + * That is, we assign half of the space to user processes and half + * to the kernel. * * The proto-VSIDs are then scrambled into real VSIDs with the * multiplicative hash: * * VSID = (proto-VSID * VSID_MULTIPLIER) % VSID_MODULUS - * where VSID_MULTIPLIER = 268435399 = 0xFFFFFC7 - * VSID_MODULUS = 2^36-1 = 0xFFFFFFFFF * - * This scramble is only well defined for proto-VSIDs below - * 0xFFFFFFFFF, so both proto-VSID and actual VSID 0xFFFFFFFFF are - * reserved. VSID_MULTIPLIER is prime, so in particular it is + * VSID_MULTIPLIER is prime, so in particular it is * co-prime to VSID_MODULUS, making this a 1:1 scrambling function. * Because the modulus is 2^n-1 we can compute it efficiently without * a divide or extra multiply (see below). * * This scheme has several advantages over older methods: * - * - We have VSIDs allocated for every kernel address + * - We have VSIDs allocated for every kernel address * (i.e. everything above 0xC000000000000000), except the very top * segment, which simplifies several things. * - * - We allow for 16 significant bits of ESID and 19 bits of - * context for user addresses. i.e. 16T (44 bits) of address space for - * up to half a million contexts. + * - We allow for USER_ESID_BITS significant bits of ESID and + * CONTEXT_BITS bits of context for user addresses. + * i.e. 64T (46 bits) of address space for up to half a million contexts. * - * - The scramble function gives robust scattering in the hash + * - The scramble function gives robust scattering in the hash * table (at least based on some initial results). The previous * method was more susceptible to pathological cases giving excessive * hash collisions. */ -/* - * WARNING - If you change these you must make sure the asm - * implementations in slb_allocate (slb_low.S), do_stab_bolted - * (head.S) and ASM_VSID_SCRAMBLE (below) are changed accordingly. - */ /* * This should be computed such that protovosid * vsid_mulitplier diff --git a/arch/powerpc/mm/mmu_context_hash64.c b/arch/powerpc/mm/mmu_context_hash64.c index daa076c..40bc5b0 100644 --- a/arch/powerpc/mm/mmu_context_hash64.c +++ b/arch/powerpc/mm/mmu_context_hash64.c @@ -30,9 +30,11 @@ static DEFINE_SPINLOCK(mmu_context_lock); static DEFINE_IDA(mmu_context_ida); /* - * The proto-VSID space has 2^35 - 1 segments available for user mappings. - * Each segment contains 2^28 bytes. Each context maps 2^44 bytes, - * so we can support 2^19-1 contexts (19 == 35 + 28 - 44). + * 256MB segment + * The proto-VSID space has 2^(CONTEX_BITS + USER_ESID_BITS) - 1 segments + * available for user mappings. Each segment contains 2^28 bytes. Each + * context maps 2^46 bytes (64TB) so we can support 2^19-1 contexts + * (19 == 37 + 28 - 46). */ #define MAX_CONTEXT ((1UL << CONTEXT_BITS) - 1) -- cgit v0.10.2 From 78f1dbde9fd020419313c2a0c3b602ea2427118f Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Mon, 10 Sep 2012 02:52:57 +0000 Subject: powerpc/mm: Make some of the PGTABLE_RANGE dependency explicit slice array size and slice mask size depend on PGTABLE_RANGE. Reviewed-by: Paul Mackerras Signed-off-by: Aneesh Kumar K.V Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/include/asm/mmu-hash64.h b/arch/powerpc/include/asm/mmu-hash64.h index 3e88746..9673f73 100644 --- a/arch/powerpc/include/asm/mmu-hash64.h +++ b/arch/powerpc/include/asm/mmu-hash64.h @@ -16,6 +16,13 @@ #include /* + * This is necessary to get the definition of PGTABLE_RANGE which we + * need for various slices related matters. Note that this isn't the + * complete pgtable.h but only a portion of it. + */ +#include + +/* * Segment table */ @@ -414,6 +421,8 @@ extern void slb_set_size(u16 size); srdi rx,rx,VSID_BITS_##size; /* extract 2^VSID_BITS bit */ \ add rt,rt,rx +/* 4 bits per slice and we have one slice per 1TB */ +#define SLICE_ARRAY_SIZE (PGTABLE_RANGE >> 41) #ifndef __ASSEMBLY__ @@ -458,11 +467,7 @@ typedef struct { #ifdef CONFIG_PPC_MM_SLICES u64 low_slices_psize; /* SLB page size encodings */ - /* - * Right now we support 64TB and 4 bits for each - * 1TB slice we need 32 bytes for 64TB. - */ - unsigned char high_slices_psize[32]; /* 4 bits per slice for now */ + unsigned char high_slices_psize[SLICE_ARRAY_SIZE]; #else u16 sllp; /* SLB page size encoding */ #endif diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h index e8a26db..5e38eed 100644 --- a/arch/powerpc/include/asm/mmu.h +++ b/arch/powerpc/include/asm/mmu.h @@ -146,6 +146,15 @@ extern void setup_initial_memory_limit(phys_addr_t first_memblock_base, extern u64 ppc64_rma_size; #endif /* CONFIG_PPC64 */ +struct mm_struct; +#ifdef CONFIG_DEBUG_VM +extern void assert_pte_locked(struct mm_struct *mm, unsigned long addr); +#else /* CONFIG_DEBUG_VM */ +static inline void assert_pte_locked(struct mm_struct *mm, unsigned long addr) +{ +} +#endif /* !CONFIG_DEBUG_VM */ + #endif /* !__ASSEMBLY__ */ /* The kernel use the constants below to index in the page sizes array. diff --git a/arch/powerpc/include/asm/page_64.h b/arch/powerpc/include/asm/page_64.h index 6c9bef4..cd915d6 100644 --- a/arch/powerpc/include/asm/page_64.h +++ b/arch/powerpc/include/asm/page_64.h @@ -78,14 +78,18 @@ extern u64 ppc64_pft_size; #define GET_LOW_SLICE_INDEX(addr) ((addr) >> SLICE_LOW_SHIFT) #define GET_HIGH_SLICE_INDEX(addr) ((addr) >> SLICE_HIGH_SHIFT) +/* + * 1 bit per slice and we have one slice per 1TB + * Right now we support only 64TB. + * IF we change this we will have to change the type + * of high_slices + */ +#define SLICE_MASK_SIZE 8 + #ifndef __ASSEMBLY__ struct slice_mask { u16 low_slices; - /* - * This should be derived out of PGTABLE_RANGE. For the current - * max 64TB, u64 should be ok. - */ u64 high_slices; }; diff --git a/arch/powerpc/include/asm/pgtable-ppc64.h b/arch/powerpc/include/asm/pgtable-ppc64.h index 8af1cf2..0182c20 100644 --- a/arch/powerpc/include/asm/pgtable-ppc64.h +++ b/arch/powerpc/include/asm/pgtable-ppc64.h @@ -21,17 +21,6 @@ #define PGTABLE_RANGE (ASM_CONST(1) << PGTABLE_EADDR_SIZE) -/* Some sanity checking */ -#if TASK_SIZE_USER64 > PGTABLE_RANGE -#error TASK_SIZE_USER64 exceeds pagetable range -#endif - -#ifdef CONFIG_PPC_STD_MMU_64 -#if TASK_SIZE_USER64 > (1UL << (USER_ESID_BITS + SID_SHIFT)) -#error TASK_SIZE_USER64 exceeds user VSID range -#endif -#endif - /* * Define the address range of the kernel non-linear virtual area */ @@ -117,9 +106,6 @@ #ifndef __ASSEMBLY__ -#include -#include - /* * This is the default implementation of various PTE accessors, it's * used in all cases except Book3S with 64K pages where we have a @@ -198,7 +184,8 @@ /* to find an entry in a kernel page-table-directory */ /* This now only contains the vmalloc pages */ #define pgd_offset_k(address) pgd_offset(&init_mm, address) - +extern void hpte_need_flush(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, unsigned long pte, int huge); /* Atomic PTE updates */ static inline unsigned long pte_update(struct mm_struct *mm, diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h index 2e0e411..a9cbd3b 100644 --- a/arch/powerpc/include/asm/pgtable.h +++ b/arch/powerpc/include/asm/pgtable.h @@ -9,14 +9,6 @@ struct mm_struct; -#ifdef CONFIG_DEBUG_VM -extern void assert_pte_locked(struct mm_struct *mm, unsigned long addr); -#else /* CONFIG_DEBUG_VM */ -static inline void assert_pte_locked(struct mm_struct *mm, unsigned long addr) -{ -} -#endif /* !CONFIG_DEBUG_VM */ - #endif /* !__ASSEMBLY__ */ #if defined(CONFIG_PPC64) @@ -27,6 +19,8 @@ static inline void assert_pte_locked(struct mm_struct *mm, unsigned long addr) #ifndef __ASSEMBLY__ +#include + /* Generic accessors to PTE bits */ static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_RW; } static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; } diff --git a/arch/powerpc/include/asm/tlbflush.h b/arch/powerpc/include/asm/tlbflush.h index fc02d1d..61a5927 100644 --- a/arch/powerpc/include/asm/tlbflush.h +++ b/arch/powerpc/include/asm/tlbflush.h @@ -103,9 +103,6 @@ DECLARE_PER_CPU(struct ppc64_tlb_batch, ppc64_tlb_batch); extern void __flush_tlb_pending(struct ppc64_tlb_batch *batch); -extern void hpte_need_flush(struct mm_struct *mm, unsigned long addr, - pte_t *ptep, unsigned long pte, int huge); - #define __HAVE_ARCH_ENTER_LAZY_MMU_MODE static inline void arch_enter_lazy_mmu_mode(void) diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index 297d495..e212a27 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -55,8 +55,18 @@ #include "mmu_decl.h" -unsigned long ioremap_bot = IOREMAP_BASE; +/* Some sanity checking */ +#if TASK_SIZE_USER64 > PGTABLE_RANGE +#error TASK_SIZE_USER64 exceeds pagetable range +#endif + +#ifdef CONFIG_PPC_STD_MMU_64 +#if TASK_SIZE_USER64 > (1UL << (USER_ESID_BITS + SID_SHIFT)) +#error TASK_SIZE_USER64 exceeds user VSID range +#endif +#endif +unsigned long ioremap_bot = IOREMAP_BASE; #ifdef CONFIG_PPC_MMU_NOHASH static void *early_alloc_pgtable(unsigned long size) diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c index b4e996a..5829d2a 100644 --- a/arch/powerpc/mm/slice.c +++ b/arch/powerpc/mm/slice.c @@ -34,6 +34,11 @@ #include #include +/* some sanity checks */ +#if (PGTABLE_RANGE >> 43) > SLICE_MASK_SIZE +#error PGTABLE_RANGE exceeds slice_mask high_slices size +#endif + static DEFINE_SPINLOCK(slice_convert_lock); -- cgit v0.10.2 From e72bbbab278a2e6d506bd2cf380ba9bef68e8ec1 Mon Sep 17 00:00:00 2001 From: Li Zhong Date: Mon, 10 Sep 2012 15:37:43 +0000 Subject: powerpc/trace: Fix interrupt tracepoints vs. RCU There are a few tracepoints in the interrupt code path, which is before irq_enter(), or after irq_exit(), like trace_irq_entry()/trace_irq_exit() in do_IRQ(), trace_timer_interrupt_entry()/trace_timer_interrupt_exit() in timer_interrupt(). If the interrupt is from idle(), and because tracepoint contains RCU read-side critical section, we could see following suspicious RCU usage reported: [ 145.127743] =============================== [ 145.127747] [ INFO: suspicious RCU usage. ] [ 145.127752] 3.6.0-rc3+ #1 Not tainted [ 145.127755] ------------------------------- [ 145.127759] /root/.workdir/linux/arch/powerpc/include/asm/trace.h:33 suspicious rcu_dereference_check() usage! [ 145.127765] [ 145.127765] other info that might help us debug this: [ 145.127765] [ 145.127771] [ 145.127771] RCU used illegally from idle CPU! [ 145.127771] rcu_scheduler_active = 1, debug_locks = 0 [ 145.127777] RCU used illegally from extended quiescent state! [ 145.127781] no locks held by swapper/0/0. [ 145.127785] [ 145.127785] stack backtrace: [ 145.127789] Call Trace: [ 145.127796] [c00000000108b530] [c000000000013c40] .show_stack +0x70/0x1c0 (unreliable) [ 145.127806] [c00000000108b5e0] [c0000000000f59d8] .lockdep_rcu_suspicious+0x118/0x150 [ 145.127813] [c00000000108b680] [c00000000000fc58] .do_IRQ+0x498/0x500 [ 145.127820] [c00000000108b750] [c000000000003950] hardware_interrupt_common+0x150/0x180 [ 145.127828] --- Exception: 501 at .plpar_hcall_norets+0x84/0xd4 [ 145.127828] LR = .check_and_cede_processor+0x38/0x70 [ 145.127836] [c00000000108bab0] [c0000000000665dc] .shared_cede_loop +0x5c/0x100 [ 145.127844] [c00000000108bb70] [c000000000588ab0] .cpuidle_enter +0x30/0x50 [ 145.127850] [c00000000108bbe0] [c000000000588b0c] .cpuidle_enter_state+0x3c/0xb0 [ 145.127857] [c00000000108bc60] [c000000000589730] .cpuidle_idle_call +0x150/0x6c0 [ 145.127863] [c00000000108bd30] [c000000000058440] .pSeries_idle +0x10/0x40 [ 145.127870] [c00000000108bda0] [c00000000001683c] .cpu_idle +0x18c/0x2d0 [ 145.127876] [c00000000108be60] [c00000000000b434] .rest_init +0x124/0x1b0 [ 145.127884] [c00000000108bef0] [c0000000009d0d28] .start_kernel +0x568/0x588 [ 145.127890] [c00000000108bf90] [c000000000009660] .start_here_common +0x20/0x40 This is because the RCU usage in interrupt context should be used in area marked by rcu_irq_enter()/rcu_irq_exit(), called in irq_enter()/irq_exit() respectively. Move them into the irq_enter()/irq_exit() area to avoid the reporting. Signed-off-by: Li Zhong Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 1f017bb..71413f4 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -489,10 +489,10 @@ void do_IRQ(struct pt_regs *regs) struct pt_regs *old_regs = set_irq_regs(regs); unsigned int irq; - trace_irq_entry(regs); - irq_enter(); + trace_irq_entry(regs); + check_stack_overflow(); /* @@ -511,10 +511,10 @@ void do_IRQ(struct pt_regs *regs) else __get_cpu_var(irq_stat).spurious_irqs++; + trace_irq_exit(regs); + irq_exit(); set_irq_regs(old_regs); - - trace_irq_exit(regs); } void __init init_IRQ(void) diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index e49e931..bd693a1 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -493,8 +493,6 @@ void timer_interrupt(struct pt_regs * regs) */ may_hard_irq_enable(); - trace_timer_interrupt_entry(regs); - __get_cpu_var(irq_stat).timer_irqs++; #if defined(CONFIG_PPC32) && defined(CONFIG_PMAC) @@ -505,6 +503,8 @@ void timer_interrupt(struct pt_regs * regs) old_regs = set_irq_regs(regs); irq_enter(); + trace_timer_interrupt_entry(regs); + if (test_irq_work_pending()) { clear_irq_work_pending(); irq_work_run(); @@ -529,10 +529,10 @@ void timer_interrupt(struct pt_regs * regs) } #endif + trace_timer_interrupt_exit(regs); + irq_exit(); set_irq_regs(old_regs); - - trace_timer_interrupt_exit(regs); } /* -- cgit v0.10.2 From fb446ad075cfa5212b26c4f77751faefe574ad8b Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Mon, 20 Aug 2012 03:49:14 +0000 Subject: powerpc/powernv: Create bus sensitive PEs Basically, there're 2 types of PCI bus sensitive PEs: (A) The PE includes single PCI bus. (B) The PE includes the PCI bus and all the subordinate PCI buses. At present, we'd like to put PCI bus originated by PCI-e link to form PE that contains single PCI bus, and the PCIe-to-PCI bridge will form the 2nd type of PE. We don't figure out to detect PLX bridge yet. Once we can detect PLX bridge some day, we have to put PCI buses originated from the downstream port of PLX bridge to the 2nd type of PE. The patch changes the original implementation for a little bit to support 2 types of PCI bus sensitive PEs described as above. Also, the function used to retrieve the corresponding PE according to the given PCI device has been changed based on that because each PCI device should trace the directly associated PE. Signed-off-by: Gavin Shan Reviewed-by: Ram Pai Reviewed-by: Richard Yang Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index cae7281..0ddc2e5 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -547,7 +547,7 @@ static void __devinit pnv_ioda_free_pe(struct pnv_phb *phb, int pe) * but in the meantime, we need to protect them to avoid warnings */ #ifdef CONFIG_PCI_MSI -static struct pnv_ioda_pe * __devinit __pnv_ioda_get_one_pe(struct pci_dev *dev) +static struct pnv_ioda_pe * __devinit pnv_ioda_get_pe(struct pci_dev *dev) { struct pci_controller *hose = pci_bus_to_host(dev->bus); struct pnv_phb *phb = hose->private_data; @@ -559,19 +559,6 @@ static struct pnv_ioda_pe * __devinit __pnv_ioda_get_one_pe(struct pci_dev *dev) return NULL; return &phb->ioda.pe_array[pdn->pe_number]; } - -static struct pnv_ioda_pe * __devinit pnv_ioda_get_pe(struct pci_dev *dev) -{ - struct pnv_ioda_pe *pe = __pnv_ioda_get_one_pe(dev); - - while (!pe && dev->bus->self) { - dev = dev->bus->self; - pe = __pnv_ioda_get_one_pe(dev); - if (pe) - pe = pe->bus_pe; - } - return pe; -} #endif /* CONFIG_PCI_MSI */ static int __devinit pnv_ioda_configure_pe(struct pnv_phb *phb, @@ -588,7 +575,11 @@ static int __devinit pnv_ioda_configure_pe(struct pnv_phb *phb, dcomp = OPAL_IGNORE_RID_DEVICE_NUMBER; fcomp = OPAL_IGNORE_RID_FUNCTION_NUMBER; parent = pe->pbus->self; - count = pe->pbus->busn_res.end - pe->pbus->busn_res.start + 1; + if (pe->flags & PNV_IODA_PE_BUS_ALL) + count = pe->pbus->busn_res.end - pe->pbus->busn_res.start + 1; + else + count = 1; + switch(count) { case 1: bcomp = OpalPciBusAll; break; case 2: bcomp = OpalPciBus7Bits; break; @@ -698,6 +689,7 @@ static unsigned int pnv_ioda_dma_weight(struct pci_dev *dev) return 10; } +#if 0 static struct pnv_ioda_pe * __devinit pnv_ioda_setup_dev_PE(struct pci_dev *dev) { struct pci_controller *hose = pci_bus_to_host(dev->bus); @@ -766,6 +758,7 @@ static struct pnv_ioda_pe * __devinit pnv_ioda_setup_dev_PE(struct pci_dev *dev) return pe; } +#endif /* Useful for SRIOV case */ static void pnv_ioda_setup_same_PE(struct pci_bus *bus, struct pnv_ioda_pe *pe) { @@ -783,34 +776,33 @@ static void pnv_ioda_setup_same_PE(struct pci_bus *bus, struct pnv_ioda_pe *pe) pdn->pcidev = dev; pdn->pe_number = pe->pe_number; pe->dma_weight += pnv_ioda_dma_weight(dev); - if (dev->subordinate) + if ((pe->flags & PNV_IODA_PE_BUS_ALL) && dev->subordinate) pnv_ioda_setup_same_PE(dev->subordinate, pe); } } -static void __devinit pnv_ioda_setup_bus_PE(struct pci_dev *dev, - struct pnv_ioda_pe *ppe) +/* + * There're 2 types of PCI bus sensitive PEs: One that is compromised of + * single PCI bus. Another one that contains the primary PCI bus and its + * subordinate PCI devices and buses. The second type of PE is normally + * orgiriated by PCIe-to-PCI bridge or PLX switch downstream ports. + */ +static void __devinit pnv_ioda_setup_bus_PE(struct pci_bus *bus, int all) { - struct pci_controller *hose = pci_bus_to_host(dev->bus); + struct pci_controller *hose = pci_bus_to_host(bus); struct pnv_phb *phb = hose->private_data; - struct pci_bus *bus = dev->subordinate; struct pnv_ioda_pe *pe; int pe_num; - if (!bus) { - pr_warning("%s: Bridge without a subordinate bus !\n", - pci_name(dev)); - return; - } pe_num = pnv_ioda_alloc_pe(phb); if (pe_num == IODA_INVALID_PE) { - pr_warning("%s: Not enough PE# available, disabling bus\n", - pci_name(dev)); + pr_warning("%s: Not enough PE# available for PCI bus %04x:%02x\n", + __func__, pci_domain_nr(bus), bus->number); return; } pe = &phb->ioda.pe_array[pe_num]; - ppe->bus_pe = pe; + pe->flags = (all ? PNV_IODA_PE_BUS_ALL : PNV_IODA_PE_BUS); pe->pbus = bus; pe->pdev = NULL; pe->tce32_seg = -1; @@ -818,8 +810,12 @@ static void __devinit pnv_ioda_setup_bus_PE(struct pci_dev *dev, pe->rid = bus->busn_res.start << 8; pe->dma_weight = 0; - pe_info(pe, "Secondary busses %pR associated with PE\n", - &bus->busn_res); + if (all) + pe_info(pe, "Secondary bus %d..%d associated with PE#%d\n", + bus->busn_res.start, bus->busn_res.end, pe_num); + else + pe_info(pe, "Secondary bus %d associated with PE#%d\n", + bus->busn_res.start, pe_num); if (pnv_ioda_configure_pe(phb, pe)) { /* XXX What do we do here ? */ @@ -847,17 +843,33 @@ static void __devinit pnv_ioda_setup_bus_PE(struct pci_dev *dev, static void __devinit pnv_ioda_setup_PEs(struct pci_bus *bus) { struct pci_dev *dev; - struct pnv_ioda_pe *pe; + + pnv_ioda_setup_bus_PE(bus, 0); list_for_each_entry(dev, &bus->devices, bus_list) { - pe = pnv_ioda_setup_dev_PE(dev); - if (pe == NULL) - continue; - /* Leaving the PCIe domain ... single PE# */ - if (pci_pcie_type(dev) == PCI_EXP_TYPE_PCI_BRIDGE) - pnv_ioda_setup_bus_PE(dev, pe); - else if (dev->subordinate) - pnv_ioda_setup_PEs(dev->subordinate); + if (dev->subordinate) { + if (pci_pcie_type(dev) == PCI_EXP_TYPE_PCI_BRIDGE) + pnv_ioda_setup_bus_PE(dev->subordinate, 1); + else + pnv_ioda_setup_PEs(dev->subordinate); + } + } +} + +/* + * Configure PEs so that the downstream PCI buses and devices + * could have their associated PE#. Unfortunately, we didn't + * figure out the way to identify the PLX bridge yet. So we + * simply put the PCI bus and the subordinate behind the root + * port to PE# here. The game rule here is expected to be changed + * as soon as we can detected PLX bridge correctly. + */ +static void __devinit pnv_pci_ioda_setup_PEs(void) +{ + struct pci_controller *hose, *tmp; + + list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { + pnv_ioda_setup_PEs(hose->bus); } } @@ -1138,6 +1150,11 @@ static void __devinit pnv_pci_ioda_fixup_phb(struct pci_controller *hose) } } +static void __devinit pnv_pci_ioda_fixup(void) +{ + pnv_pci_ioda_setup_PEs(); +} + /* * Returns the alignment for I/O or memory windows for P2P * bridges. That actually depends on how PEs are segmented. @@ -1342,6 +1359,7 @@ void __init pnv_pci_init_ioda1_phb(struct device_node *np) * ourselves here */ ppc_md.pcibios_fixup_phb = pnv_pci_ioda_fixup_phb; + ppc_md.pcibios_fixup = pnv_pci_ioda_fixup; ppc_md.pcibios_enable_device_hook = pnv_pci_enable_device_hook; ppc_md.pcibios_window_alignment = pnv_pci_window_alignment; pci_add_flags(PCI_PROBE_ONLY | PCI_REASSIGN_ALL_RSRC); -- cgit v0.10.2 From 7ebdf956df7961ae6b57b0328e03d33f95f1346f Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Mon, 20 Aug 2012 03:49:15 +0000 Subject: powerpc/powernv: PE list based on creation order The resource (I/O and MMIO) will be assigned on basis of PE from top to bottom so that we can implement the trick here: the resource that has been assigned to parent PE could be taken by child PE if necessary. The current implementation already has PE list per PHB basis, but the list doesn't meet our requirment: tracing PE based on their cration time from top to bottom. So the patch does rename for the DMA based PE list and introduces the list to trace the PEs sequentially based on their creation time. Signed-off-by: Gavin Shan Reviewed-by: Ram Pai Reviewed-by: Richard Yang Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 0ddc2e5..7999da1 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -656,13 +656,13 @@ static void __devinit pnv_ioda_link_pe_by_weight(struct pnv_phb *phb, { struct pnv_ioda_pe *lpe; - list_for_each_entry(lpe, &phb->ioda.pe_list, link) { + list_for_each_entry(lpe, &phb->ioda.pe_dma_list, dma_link) { if (lpe->dma_weight < pe->dma_weight) { - list_add_tail(&pe->link, &lpe->link); + list_add_tail(&pe->dma_link, &lpe->dma_link); return; } } - list_add_tail(&pe->link, &phb->ioda.pe_list); + list_add_tail(&pe->dma_link, &phb->ioda.pe_dma_list); } static unsigned int pnv_ioda_dma_weight(struct pci_dev *dev) @@ -828,6 +828,9 @@ static void __devinit pnv_ioda_setup_bus_PE(struct pci_bus *bus, int all) /* Associate it with all child devices */ pnv_ioda_setup_same_PE(bus, pe); + /* Put PE to the list */ + list_add_tail(&pe->list, &phb->ioda.pe_list); + /* Account for one DMA PE if at least one DMA capable device exist * below the bridge */ @@ -1011,7 +1014,7 @@ static void __devinit pnv_ioda_setup_dma(struct pnv_phb *phb) remaining = phb->ioda.tce32_count; tw = phb->ioda.dma_weight; base = 0; - list_for_each_entry(pe, &phb->ioda.pe_list, link) { + list_for_each_entry(pe, &phb->ioda.pe_dma_list, dma_link) { if (!pe->dma_weight) continue; if (!remaining) { @@ -1305,6 +1308,7 @@ void __init pnv_pci_init_ioda1_phb(struct device_node *np) phb->ioda.pe_array = aux + pemap_off; set_bit(0, phb->ioda.pe_alloc); + INIT_LIST_HEAD(&phb->ioda.pe_dma_list); INIT_LIST_HEAD(&phb->ioda.pe_list); /* Calculate how many 32-bit TCE segments we have */ diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index 8bc4796..b70720b 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -17,9 +17,14 @@ enum pnv_phb_model { }; #define PNV_PCI_DIAG_BUF_SIZE 4096 +#define PNV_IODA_PE_DEV (1 << 0) /* PE has single PCI device */ +#define PNV_IODA_PE_BUS (1 << 1) /* PE has primary PCI bus */ +#define PNV_IODA_PE_BUS_ALL (1 << 2) /* PE has subordinate buses */ /* Data associated with a PE, including IOMMU tracking etc.. */ struct pnv_ioda_pe { + unsigned long flags; + /* A PE can be associated with a single device or an * entire bus (& children). In the former case, pdev * is populated, in the later case, pbus is. @@ -40,11 +45,6 @@ struct pnv_ioda_pe { */ unsigned int dma_weight; - /* This is a PCI-E -> PCI-X bridge, this points to the - * corresponding bus PE - */ - struct pnv_ioda_pe *bus_pe; - /* "Base" iommu table, ie, 4K TCEs, 32-bit DMA */ int tce32_seg; int tce32_segcount; @@ -59,7 +59,8 @@ struct pnv_ioda_pe { int mve_number; /* Link in list of PE#s */ - struct list_head link; + struct list_head dma_link; + struct list_head list; }; struct pnv_phb { @@ -107,6 +108,11 @@ struct pnv_phb { unsigned int *io_segmap; struct pnv_ioda_pe *pe_array; + /* Sorted list of used PE's based + * on the sequence of creation + */ + struct list_head pe_list; + /* Reverse map of PEs, will have to extend if * we are to support more than 256 PEs, indexed * bus { bus, devfn } @@ -125,7 +131,7 @@ struct pnv_phb { /* Sorted list of used PE's, sorted at * boot for resource allocation purposes */ - struct list_head pe_list; + struct list_head pe_dma_list; } ioda; }; -- cgit v0.10.2 From 11685becbf1a962759ae94a0fdb5b0b7521778a5 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Mon, 20 Aug 2012 03:49:16 +0000 Subject: powerpc/powernv: I/O and MMIO resource assignment for PEs There're 2 types of PCI bus sensitive PEs: (A) The PE includes single PCI bus. (B) The PE includes the PCI bus and all the subordinate PCI buses, and the patch tries to assign I/O and MMIO resources based on created PEs. Fortunately, we figured out unified scheme to do resource assignment for all types of PCI bus based PEs according to Ben's idea: - Resource assignment based on PE from top to bottom. - The soureces, either I/O or MMIO, of the PE are figured out from the assigned PCI bus. - The occupied resource by parent PE could possibilly be overrided by children PEs. Signed-off-by: Gavin Shan Reviewed-by: Ram Pai Reviewed-by: Richard Yang Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 7999da1..9e34f46 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1153,9 +1153,98 @@ static void __devinit pnv_pci_ioda_fixup_phb(struct pci_controller *hose) } } +/* + * This function is supposed to be called on basis of PE from top + * to bottom style. So the the I/O or MMIO segment assigned to + * parent PE could be overrided by its child PEs if necessary. + */ +static void __devinit pnv_ioda_setup_pe_seg(struct pci_controller *hose, + struct pnv_ioda_pe *pe) +{ + struct pnv_phb *phb = hose->private_data; + struct pci_bus_region region; + struct resource *res; + int i, index; + int rc; + + /* + * NOTE: We only care PCI bus based PE for now. For PCI + * device based PE, for example SRIOV sensitive VF should + * be figured out later. + */ + BUG_ON(!(pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL))); + + pci_bus_for_each_resource(pe->pbus, res, i) { + if (!res || !res->flags || + res->start > res->end) + continue; + + if (res->flags & IORESOURCE_IO) { + region.start = res->start - phb->ioda.io_pci_base; + region.end = res->end - phb->ioda.io_pci_base; + index = region.start / phb->ioda.io_segsize; + + while (index < phb->ioda.total_pe && + region.start <= region.end) { + phb->ioda.io_segmap[index] = pe->pe_number; + rc = opal_pci_map_pe_mmio_window(phb->opal_id, + pe->pe_number, OPAL_IO_WINDOW_TYPE, 0, index); + if (rc != OPAL_SUCCESS) { + pr_err("%s: OPAL error %d when mapping IO " + "segment #%d to PE#%d\n", + __func__, rc, index, pe->pe_number); + break; + } + + region.start += phb->ioda.io_segsize; + index++; + } + } else if (res->flags & IORESOURCE_MEM) { + region.start = res->start - + hose->pci_mem_offset - + phb->ioda.m32_pci_base; + region.end = res->end - + hose->pci_mem_offset - + phb->ioda.m32_pci_base; + index = region.start / phb->ioda.m32_segsize; + + while (index < phb->ioda.total_pe && + region.start <= region.end) { + phb->ioda.m32_segmap[index] = pe->pe_number; + rc = opal_pci_map_pe_mmio_window(phb->opal_id, + pe->pe_number, OPAL_M32_WINDOW_TYPE, 0, index); + if (rc != OPAL_SUCCESS) { + pr_err("%s: OPAL error %d when mapping M32 " + "segment#%d to PE#%d", + __func__, rc, index, pe->pe_number); + break; + } + + region.start += phb->ioda.m32_segsize; + index++; + } + } + } +} + +static void __devinit pnv_pci_ioda_setup_seg(void) +{ + struct pci_controller *tmp, *hose; + struct pnv_phb *phb; + struct pnv_ioda_pe *pe; + + list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { + phb = hose->private_data; + list_for_each_entry(pe, &phb->ioda.pe_list, list) { + pnv_ioda_setup_pe_seg(hose, pe); + } + } +} + static void __devinit pnv_pci_ioda_fixup(void) { pnv_pci_ioda_setup_PEs(); + pnv_pci_ioda_setup_seg(); } /* -- cgit v0.10.2 From 13395c48c3def3e5afee7bd7a851f901374db5ec Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Mon, 20 Aug 2012 03:49:17 +0000 Subject: powerpc/powernv: Initialize DMA for PEs The patch introduces additional wrapper function to call the original implementation so that the DMA can be configured for all existing PEs. Signed-off-by: Gavin Shan Reviewed-by: Ram Pai Reviewed-by: Richard Yang Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 9e34f46..edf39e0 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1241,10 +1241,20 @@ static void __devinit pnv_pci_ioda_setup_seg(void) } } +static void __devinit pnv_pci_ioda_setup_DMA(void) +{ + struct pci_controller *hose, *tmp; + + list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { + pnv_ioda_setup_dma(hose->private_data); + } +} + static void __devinit pnv_pci_ioda_fixup(void) { pnv_pci_ioda_setup_PEs(); pnv_pci_ioda_setup_seg(); + pnv_pci_ioda_setup_DMA(); } /* -- cgit v0.10.2 From db1266c852611436daa01a89c272722ec39cf916 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Mon, 20 Aug 2012 03:49:18 +0000 Subject: powerpc/powernv: Skip check on PE if necessary While the device driver or PCI core tries to enable PCI device, the platform dependent callback "ppc_md.pcibios_enable_device_hook" will be called to check if there has one associated PE for the PCI device. If we don't have the associated PE for the PCI device, it's not allowed to enable the PCI device. Unfortunately, there might have some cases we have to enable the PCI device (e.g. P2P bridge), but the PEs have not been created yet. The patch handles the unfortunate cases. Each PHB (struct pnv_phb) has one field "initialized" to trace if the PEs have been created and configured or not. When the PEs are not available, we won't check the associated PE for the PCI device to be enabled. Signed-off-by: Gavin Shan Reviewed-by: Ram Pai Reviewed-by: Richard Yang Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index edf39e0..c4461ad 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1244,9 +1244,14 @@ static void __devinit pnv_pci_ioda_setup_seg(void) static void __devinit pnv_pci_ioda_setup_DMA(void) { struct pci_controller *hose, *tmp; + struct pnv_phb *phb; list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { pnv_ioda_setup_dma(hose->private_data); + + /* Mark the PHB initialization done */ + phb = hose->private_data; + phb->initialized = 1; } } @@ -1300,10 +1305,22 @@ static resource_size_t pnv_pci_window_alignment(struct pci_bus *bus, */ static int __devinit pnv_pci_enable_device_hook(struct pci_dev *dev) { - struct pci_dn *pdn = pnv_ioda_get_pdn(dev); + struct pci_controller *hose = pci_bus_to_host(dev->bus); + struct pnv_phb *phb = hose->private_data; + struct pci_dn *pdn; + /* The function is probably called while the PEs have + * not be created yet. For example, resource reassignment + * during PCI probe period. We just skip the check if + * PEs isn't ready. + */ + if (!phb->initialized) + return 0; + + pdn = pnv_ioda_get_pdn(dev); if (!pdn || pdn->pe_number == IODA_INVALID_PE) return -EINVAL; + return 0; } diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index b70720b..7cfb7c8 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -69,6 +69,7 @@ struct pnv_phb { enum pnv_phb_model model; u64 opal_id; void __iomem *regs; + int initialized; spinlock_t lock; #ifdef CONFIG_PCI_MSI -- cgit v0.10.2 From e47747f479502cf5dc1f8043fbc7513a4dc8fd9f Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Mon, 20 Aug 2012 03:49:19 +0000 Subject: powerpc/powernv: Fix overrunning segment tracing array There're 2 arrays introduced to trace which PE has occupied the corresponding resource (I/O or MMIO) segment. However, we didn't allocate enough memory for them and that possiblly leads to PE descriptor corruption. The patch fixes that by allocating enough memory for those 2 arrays. Signed-off-by: Gavin Shan Reviewed-by: Ram Pai Reviewed-by: Richard Yang Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index c4461ad..40559bf 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1411,9 +1411,9 @@ void __init pnv_pci_init_ioda1_phb(struct device_node *np) /* Allocate aux data & arrays */ size = _ALIGN_UP(phb->ioda.total_pe / 8, sizeof(unsigned long)); m32map_off = size; - size += phb->ioda.total_pe; + size += phb->ioda.total_pe * sizeof(phb->ioda.m32_segmap[0]); iomap_off = size; - size += phb->ioda.total_pe; + size += phb->ioda.total_pe * sizeof(phb->ioda.io_segmap[0]); pemap_off = size; size += phb->ioda.total_pe * sizeof(struct pnv_ioda_pe); aux = alloc_bootmem(size); -- cgit v0.10.2 From c40a4210a4b55284a71ed52721d9894b5bdb1953 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Mon, 20 Aug 2012 03:49:20 +0000 Subject: powerpc/powernv: Using PCI core to do resource assignment Currently, the PCI probe flags "PCI_PROBE_ONLY | PCI_REASSIGN_ALL_RSRC" used on powernv platform. That means the platform has to do the PCI resource assignment by itself. The patch changes the PCI probe flag to "PCI_REASSIGN_ALL_RSRC" so that the PCI core will do the resource assignment. Also, the I/O and MMIO minimal alignment for P2P bridges have been configured while doing fixup for the PHBs. Signed-off-by: Gavin Shan Reviewed-by: Ram Pai Reviewed-by: Richard Yang Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 40559bf..d65f84c 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1123,36 +1123,6 @@ static void pnv_pci_init_ioda_msis(struct pnv_phb *phb) static void pnv_pci_init_ioda_msis(struct pnv_phb *phb) { } #endif /* CONFIG_PCI_MSI */ -/* This is the starting point of our IODA specific resource - * allocation process - */ -static void __devinit pnv_pci_ioda_fixup_phb(struct pci_controller *hose) -{ - resource_size_t size, align; - struct pci_bus *child; - - /* Associate PEs per functions */ - pnv_ioda_setup_PEs(hose->bus); - - /* Calculate all resources */ - pnv_ioda_calc_bus(hose->bus, IORESOURCE_IO, &size, &align); - pnv_ioda_calc_bus(hose->bus, IORESOURCE_MEM, &size, &align); - - /* Apply then to HW */ - pnv_ioda_update_resources(hose->bus); - - /* Setup DMA */ - pnv_ioda_setup_dma(hose->private_data); - - /* Configure PCI Express settings */ - list_for_each_entry(child, &hose->bus->children, node) { - struct pci_dev *self = child->self; - if (!self) - continue; - pcie_bus_configure_settings(child, self->pcie_mpss); - } -} - /* * This function is supposed to be called on basis of PE from top * to bottom style. So the the I/O or MMIO segment assigned to @@ -1473,16 +1443,17 @@ void __init pnv_pci_init_ioda1_phb(struct device_node *np) /* Setup MSI support */ pnv_pci_init_ioda_msis(phb); - /* We set both PCI_PROBE_ONLY and PCI_REASSIGN_ALL_RSRC. This is an - * odd combination which essentially means that we skip all resource - * fixups and assignments in the generic code, and do it all - * ourselves here + /* + * We pass the PCI probe flag PCI_REASSIGN_ALL_RSRC here + * to let the PCI core do resource assignment. It's supposed + * that the PCI core will do correct I/O and MMIO alignment + * for the P2P bridge bars so that each PCI bus (excluding + * the child P2P bridges) can form individual PE. */ - ppc_md.pcibios_fixup_phb = pnv_pci_ioda_fixup_phb; ppc_md.pcibios_fixup = pnv_pci_ioda_fixup; ppc_md.pcibios_enable_device_hook = pnv_pci_enable_device_hook; ppc_md.pcibios_window_alignment = pnv_pci_window_alignment; - pci_add_flags(PCI_PROBE_ONLY | PCI_REASSIGN_ALL_RSRC); + pci_add_flags(PCI_REASSIGN_ALL_RSRC); /* Reset IODA tables to a clean state */ rc = opal_pci_reset(phb_id, OPAL_PCI_IODA_TABLE_RESET, OPAL_ASSERT_RESET); -- cgit v0.10.2 From b9ae38aeca2fddbabe2942bee431873a05d21d74 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Mon, 20 Aug 2012 03:49:21 +0000 Subject: powerpc/powernv: Remove unused functions We don't need them anymore. The patch removes those functions. Signed-off-by: Gavin Shan Reviewed-by: Ram Pai Reviewed-by: Richard Yang Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index d65f84c..471aa3c 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -34,14 +34,6 @@ #include "powernv.h" #include "pci.h" -struct resource_wrap { - struct list_head link; - resource_size_t size; - resource_size_t align; - struct pci_dev *dev; /* Set if it's a device */ - struct pci_bus *bus; /* Set if it's a bridge */ -}; - static int __pe_printk(const char *level, const struct pnv_ioda_pe *pe, struct va_format *vaf) { @@ -77,273 +69,6 @@ define_pe_printk_level(pe_err, KERN_ERR); define_pe_printk_level(pe_warn, KERN_WARNING); define_pe_printk_level(pe_info, KERN_INFO); - -/* Calculate resource usage & alignment requirement of a single - * device. This will also assign all resources within the device - * for a given type starting at 0 for the biggest one and then - * assigning in decreasing order of size. - */ -static void __devinit pnv_ioda_calc_dev(struct pci_dev *dev, unsigned int flags, - resource_size_t *size, - resource_size_t *align) -{ - resource_size_t start; - struct resource *r; - int i; - - pr_devel(" -> CDR %s\n", pci_name(dev)); - - *size = *align = 0; - - /* Clear the resources out and mark them all unset */ - for (i = 0; i <= PCI_ROM_RESOURCE; i++) { - r = &dev->resource[i]; - if (!(r->flags & flags)) - continue; - if (r->start) { - r->end -= r->start; - r->start = 0; - } - r->flags |= IORESOURCE_UNSET; - } - - /* We currently keep all memory resources together, we - * will handle prefetch & 64-bit separately in the future - * but for now we stick everybody in M32 - */ - start = 0; - for (;;) { - resource_size_t max_size = 0; - int max_no = -1; - - /* Find next biggest resource */ - for (i = 0; i <= PCI_ROM_RESOURCE; i++) { - r = &dev->resource[i]; - if (!(r->flags & IORESOURCE_UNSET) || - !(r->flags & flags)) - continue; - if (resource_size(r) > max_size) { - max_size = resource_size(r); - max_no = i; - } - } - if (max_no < 0) - break; - r = &dev->resource[max_no]; - if (max_size > *align) - *align = max_size; - *size += max_size; - r->start = start; - start += max_size; - r->end = r->start + max_size - 1; - r->flags &= ~IORESOURCE_UNSET; - pr_devel(" -> R%d %016llx..%016llx\n", - max_no, r->start, r->end); - } - pr_devel(" <- CDR %s size=%llx align=%llx\n", - pci_name(dev), *size, *align); -} - -/* Allocate a resource "wrap" for a given device or bridge and - * insert it at the right position in the sorted list - */ -static void __devinit pnv_ioda_add_wrap(struct list_head *list, - struct pci_bus *bus, - struct pci_dev *dev, - resource_size_t size, - resource_size_t align) -{ - struct resource_wrap *w1, *w = kzalloc(sizeof(*w), GFP_KERNEL); - - w->size = size; - w->align = align; - w->dev = dev; - w->bus = bus; - - list_for_each_entry(w1, list, link) { - if (w1->align < align) { - list_add_tail(&w->link, &w1->link); - return; - } - } - list_add_tail(&w->link, list); -} - -/* Offset device resources of a given type */ -static void __devinit pnv_ioda_offset_dev(struct pci_dev *dev, - unsigned int flags, - resource_size_t offset) -{ - struct resource *r; - int i; - - pr_devel(" -> ODR %s [%x] +%016llx\n", pci_name(dev), flags, offset); - - for (i = 0; i <= PCI_ROM_RESOURCE; i++) { - r = &dev->resource[i]; - if (r->flags & flags) { - dev->resource[i].start += offset; - dev->resource[i].end += offset; - } - } - - pr_devel(" <- ODR %s [%x] +%016llx\n", pci_name(dev), flags, offset); -} - -/* Offset bus resources (& all children) of a given type */ -static void __devinit pnv_ioda_offset_bus(struct pci_bus *bus, - unsigned int flags, - resource_size_t offset) -{ - struct resource *r; - struct pci_dev *dev; - struct pci_bus *cbus; - int i; - - pr_devel(" -> OBR %s [%x] +%016llx\n", - bus->self ? pci_name(bus->self) : "root", flags, offset); - - pci_bus_for_each_resource(bus, r, i) { - if (r && (r->flags & flags)) { - r->start += offset; - r->end += offset; - } - } - list_for_each_entry(dev, &bus->devices, bus_list) - pnv_ioda_offset_dev(dev, flags, offset); - list_for_each_entry(cbus, &bus->children, node) - pnv_ioda_offset_bus(cbus, flags, offset); - - pr_devel(" <- OBR %s [%x]\n", - bus->self ? pci_name(bus->self) : "root", flags); -} - -/* This is the guts of our IODA resource allocation. This is called - * recursively for each bus in the system. It calculates all the - * necessary size and requirements for children and assign them - * resources such that: - * - * - Each function fits in it's own contiguous set of IO/M32 - * segment - * - * - All segments behind a P2P bridge are contiguous and obey - * alignment constraints of those bridges - */ -static void __devinit pnv_ioda_calc_bus(struct pci_bus *bus, unsigned int flags, - resource_size_t *size, - resource_size_t *align) -{ - struct pci_controller *hose = pci_bus_to_host(bus); - struct pnv_phb *phb = hose->private_data; - resource_size_t dev_size, dev_align, start; - resource_size_t min_align, min_balign; - struct pci_dev *cdev; - struct pci_bus *cbus; - struct list_head head; - struct resource_wrap *w; - unsigned int bres; - - *size = *align = 0; - - pr_devel("-> CBR %s [%x]\n", - bus->self ? pci_name(bus->self) : "root", flags); - - /* Calculate alignment requirements based on the type - * of resource we are working on - */ - if (flags & IORESOURCE_IO) { - bres = 0; - min_align = phb->ioda.io_segsize; - min_balign = 0x1000; - } else { - bres = 1; - min_align = phb->ioda.m32_segsize; - min_balign = 0x100000; - } - - /* Gather all our children resources ordered by alignment */ - INIT_LIST_HEAD(&head); - - /* - Busses */ - list_for_each_entry(cbus, &bus->children, node) { - pnv_ioda_calc_bus(cbus, flags, &dev_size, &dev_align); - pnv_ioda_add_wrap(&head, cbus, NULL, dev_size, dev_align); - } - - /* - Devices */ - list_for_each_entry(cdev, &bus->devices, bus_list) { - pnv_ioda_calc_dev(cdev, flags, &dev_size, &dev_align); - /* Align them to segment size */ - if (dev_align < min_align) - dev_align = min_align; - pnv_ioda_add_wrap(&head, NULL, cdev, dev_size, dev_align); - } - if (list_empty(&head)) - goto empty; - - /* Now we can do two things: assign offsets to them within that - * level and get our total alignment & size requirements. The - * assignment algorithm is going to be uber-trivial for now, we - * can try to be smarter later at filling out holes. - */ - if (bus->self) { - /* No offset for downstream bridges */ - start = 0; - } else { - /* Offset from the root */ - if (flags & IORESOURCE_IO) - /* Don't hand out IO 0 */ - start = hose->io_resource.start + 0x1000; - else - start = hose->mem_resources[0].start; - } - while(!list_empty(&head)) { - w = list_first_entry(&head, struct resource_wrap, link); - list_del(&w->link); - if (w->size) { - if (start) { - start = ALIGN(start, w->align); - if (w->dev) - pnv_ioda_offset_dev(w->dev,flags,start); - else if (w->bus) - pnv_ioda_offset_bus(w->bus,flags,start); - } - if (w->align > *align) - *align = w->align; - } - start += w->size; - kfree(w); - } - *size = start; - - /* Align and setup bridge resources */ - *align = max_t(resource_size_t, *align, - max_t(resource_size_t, min_align, min_balign)); - *size = ALIGN(*size, - max_t(resource_size_t, min_align, min_balign)); - empty: - /* Only setup P2P's, not the PHB itself */ - if (bus->self) { - struct resource *res = bus->resource[bres]; - - if (WARN_ON(res == NULL)) - return; - - /* - * FIXME: We should probably export and call - * pci_bridge_check_ranges() to properly re-initialize - * the PCI portion of the flags here, and to detect - * what the bridge actually supports. - */ - res->start = 0; - res->flags = (*size) ? flags : 0; - res->end = (*size) ? (*size - 1) : 0; - } - - pr_devel("<- CBR %s [%x] *size=%016llx *align=%016llx\n", - bus->self ? pci_name(bus->self) : "root", flags,*size,*align); -} - static struct pci_dn *pnv_ioda_get_pdn(struct pci_dev *dev) { struct device_node *np; @@ -354,172 +79,6 @@ static struct pci_dn *pnv_ioda_get_pdn(struct pci_dev *dev) return PCI_DN(np); } -static void __devinit pnv_ioda_setup_pe_segments(struct pci_dev *dev) -{ - struct pci_controller *hose = pci_bus_to_host(dev->bus); - struct pnv_phb *phb = hose->private_data; - struct pci_dn *pdn = pnv_ioda_get_pdn(dev); - unsigned int pe, i; - resource_size_t pos; - struct resource io_res; - struct resource m32_res; - struct pci_bus_region region; - int rc; - - /* Anything not referenced in the device-tree gets PE#0 */ - pe = pdn ? pdn->pe_number : 0; - - /* Calculate the device min/max */ - io_res.start = m32_res.start = (resource_size_t)-1; - io_res.end = m32_res.end = 0; - io_res.flags = IORESOURCE_IO; - m32_res.flags = IORESOURCE_MEM; - - for (i = 0; i <= PCI_ROM_RESOURCE; i++) { - struct resource *r = NULL; - if (dev->resource[i].flags & IORESOURCE_IO) - r = &io_res; - if (dev->resource[i].flags & IORESOURCE_MEM) - r = &m32_res; - if (!r) - continue; - if (dev->resource[i].start < r->start) - r->start = dev->resource[i].start; - if (dev->resource[i].end > r->end) - r->end = dev->resource[i].end; - } - - /* Setup IO segments */ - if (io_res.start < io_res.end) { - pcibios_resource_to_bus(dev, ®ion, &io_res); - pos = region.start; - i = pos / phb->ioda.io_segsize; - while(i < phb->ioda.total_pe && pos <= region.end) { - if (phb->ioda.io_segmap[i]) { - pr_err("%s: Trying to use IO seg #%d which is" - " already used by PE# %d\n", - pci_name(dev), i, - phb->ioda.io_segmap[i]); - /* XXX DO SOMETHING TO DISABLE DEVICE ? */ - break; - } - phb->ioda.io_segmap[i] = pe; - rc = opal_pci_map_pe_mmio_window(phb->opal_id, pe, - OPAL_IO_WINDOW_TYPE, - 0, i); - if (rc != OPAL_SUCCESS) { - pr_err("%s: OPAL error %d setting up mapping" - " for IO seg# %d\n", - pci_name(dev), rc, i); - /* XXX DO SOMETHING TO DISABLE DEVICE ? */ - break; - } - pos += phb->ioda.io_segsize; - i++; - }; - } - - /* Setup M32 segments */ - if (m32_res.start < m32_res.end) { - pcibios_resource_to_bus(dev, ®ion, &m32_res); - pos = region.start; - i = pos / phb->ioda.m32_segsize; - while(i < phb->ioda.total_pe && pos <= region.end) { - if (phb->ioda.m32_segmap[i]) { - pr_err("%s: Trying to use M32 seg #%d which is" - " already used by PE# %d\n", - pci_name(dev), i, - phb->ioda.m32_segmap[i]); - /* XXX DO SOMETHING TO DISABLE DEVICE ? */ - break; - } - phb->ioda.m32_segmap[i] = pe; - rc = opal_pci_map_pe_mmio_window(phb->opal_id, pe, - OPAL_M32_WINDOW_TYPE, - 0, i); - if (rc != OPAL_SUCCESS) { - pr_err("%s: OPAL error %d setting up mapping" - " for M32 seg# %d\n", - pci_name(dev), rc, i); - /* XXX DO SOMETHING TO DISABLE DEVICE ? */ - break; - } - pos += phb->ioda.m32_segsize; - i++; - } - } -} - -/* Check if a resource still fits in the total IO or M32 range - * for a given PHB - */ -static int __devinit pnv_ioda_resource_fit(struct pci_controller *hose, - struct resource *r) -{ - struct resource *bounds; - - if (r->flags & IORESOURCE_IO) - bounds = &hose->io_resource; - else if (r->flags & IORESOURCE_MEM) - bounds = &hose->mem_resources[0]; - else - return 1; - - if (r->start >= bounds->start && r->end <= bounds->end) - return 1; - r->flags = 0; - return 0; -} - -static void __devinit pnv_ioda_update_resources(struct pci_bus *bus) -{ - struct pci_controller *hose = pci_bus_to_host(bus); - struct pci_bus *cbus; - struct pci_dev *cdev; - unsigned int i; - - /* We used to clear all device enables here. However it looks like - * clearing MEM enable causes Obsidian (IPR SCS) to go bonkers, - * and shoot fatal errors to the PHB which in turns fences itself - * and we can't recover from that ... yet. So for now, let's leave - * the enables as-is and hope for the best. - */ - - /* Check if bus resources fit in our IO or M32 range */ - for (i = 0; bus->self && (i < 2); i++) { - struct resource *r = bus->resource[i]; - if (r && !pnv_ioda_resource_fit(hose, r)) - pr_err("%s: Bus %d resource %d disabled, no room\n", - pci_name(bus->self), bus->number, i); - } - - /* Update self if it's not a PHB */ - if (bus->self) - pci_setup_bridge(bus); - - /* Update child devices */ - list_for_each_entry(cdev, &bus->devices, bus_list) { - /* Check if resource fits, if not, disabled it */ - for (i = 0; i <= PCI_ROM_RESOURCE; i++) { - struct resource *r = &cdev->resource[i]; - if (!pnv_ioda_resource_fit(hose, r)) - pr_err("%s: Resource %d disabled, no room\n", - pci_name(cdev), i); - } - - /* Assign segments */ - pnv_ioda_setup_pe_segments(cdev); - - /* Update HW BARs */ - for (i = 0; i <= PCI_ROM_RESOURCE; i++) - pci_update_resource(cdev, i); - } - - /* Update child busses */ - list_for_each_entry(cbus, &bus->children, node) - pnv_ioda_update_resources(cbus); -} - static int __devinit pnv_ioda_alloc_pe(struct pnv_phb *phb) { unsigned long pe; -- cgit v0.10.2 From ddadb6b8e88979a00ac44fba9c92896eec113bd1 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 13 Sep 2012 23:01:31 +0000 Subject: powerpc: Add an xmon command to dump one or all pacas This was originally motivated by a desire to see the mapping between logical and hardware cpu numbers. But it seemed that it made more sense to just add a command to dump (most of) the paca. With no arguments "dp" will dump the paca for the current cpu. It also takes an argument, eg. "dp 3" which is the logical cpu number in hex. This form does not check if the cpu is possible, but displays the paca regardless, as well as the cpu's state in the possible, present and online masks. Thirdly, "dpa" will display the paca for all possible cpus. If there are no possible cpus, like early in boot, it will tell you that. Sample output, number in brackets is the offset into the struct: 2:mon> dp 3 paca for cpu 0x3 @ c00000000ff20a80: possible = yes present = yes online = yes lock_token = 0x8000 (0x8) paca_index = 0x3 (0xa) kernel_toc = 0xc00000000144f990 (0x10) kernelbase = 0xc000000000000000 (0x18) kernel_msr = 0xb000000000001032 (0x20) stab_real = 0x0 (0x28) stab_addr = 0x0 (0x30) emergency_sp = 0xc00000003ffe4000 (0x38) data_offset = 0xa40000 (0x40) hw_cpu_id = 0x9 (0x50) cpu_start = 0x1 (0x52) kexec_state = 0x0 (0x53) __current = 0xc00000007e568680 (0x218) kstack = 0xc00000007e5a3e30 (0x220) stab_rr = 0x1a (0x228) saved_r1 = 0xc00000007e7cb450 (0x230) trap_save = 0x0 (0x240) soft_enabled = 0x0 (0x242) irq_happened = 0x0 (0x243) io_sync = 0x0 (0x244) irq_work_pending = 0x0 (0x245) nap_state_lost = 0x0 (0x246) Signed-off-by: Michael Ellerman Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index 987f441..3a56a63 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -60,6 +60,8 @@ static cpumask_t cpus_in_xmon = CPU_MASK_NONE; static unsigned long xmon_taken = 1; static int xmon_owner; static int xmon_gate; +#else +#define xmon_owner 0 #endif /* CONFIG_SMP */ static unsigned long in_xmon __read_mostly = 0; @@ -202,7 +204,13 @@ Commands:\n\ di dump instructions\n\ df dump float values\n\ dd dump double values\n\ - dl dump the kernel log buffer\n\ + dl dump the kernel log buffer\n" +#ifdef CONFIG_PPC64 + "\ + dp[#] dump paca for current cpu, or cpu #\n\ + dpa dump paca for all possible cpus\n" +#endif + "\ dr dump stream of raw bytes\n\ e print exception information\n\ f flush cache\n\ @@ -2009,6 +2017,95 @@ static void xmon_rawdump (unsigned long adrs, long ndump) printf("\n"); } +#ifdef CONFIG_PPC64 +static void dump_one_paca(int cpu) +{ + struct paca_struct *p; + + if (setjmp(bus_error_jmp) != 0) { + printf("*** Error dumping paca for cpu 0x%x!\n", cpu); + return; + } + + catch_memory_errors = 1; + sync(); + + p = &paca[cpu]; + + printf("paca for cpu 0x%x @ %p:\n", cpu, p); + + printf(" %-*s = %s\n", 16, "possible", cpu_possible(cpu) ? "yes" : "no"); + printf(" %-*s = %s\n", 16, "present", cpu_present(cpu) ? "yes" : "no"); + printf(" %-*s = %s\n", 16, "online", cpu_online(cpu) ? "yes" : "no"); + +#define DUMP(paca, name, format) \ + printf(" %-*s = %#-*"format"\t(0x%lx)\n", 16, #name, 18, paca->name, \ + offsetof(struct paca_struct, name)); + + DUMP(p, lock_token, "x"); + DUMP(p, paca_index, "x"); + DUMP(p, kernel_toc, "lx"); + DUMP(p, kernelbase, "lx"); + DUMP(p, kernel_msr, "lx"); +#ifdef CONFIG_PPC_STD_MMU_64 + DUMP(p, stab_real, "lx"); + DUMP(p, stab_addr, "lx"); +#endif + DUMP(p, emergency_sp, "p"); + DUMP(p, data_offset, "lx"); + DUMP(p, hw_cpu_id, "x"); + DUMP(p, cpu_start, "x"); + DUMP(p, kexec_state, "x"); + DUMP(p, __current, "p"); + DUMP(p, kstack, "lx"); + DUMP(p, stab_rr, "lx"); + DUMP(p, saved_r1, "lx"); + DUMP(p, trap_save, "x"); + DUMP(p, soft_enabled, "x"); + DUMP(p, irq_happened, "x"); + DUMP(p, io_sync, "x"); + DUMP(p, irq_work_pending, "x"); + DUMP(p, nap_state_lost, "x"); + +#undef DUMP + + catch_memory_errors = 0; + sync(); +} + +static void dump_all_pacas(void) +{ + int cpu; + + if (num_possible_cpus() == 0) { + printf("No possible cpus, use 'dp #' to dump individual cpus\n"); + return; + } + + for_each_possible_cpu(cpu) + dump_one_paca(cpu); +} + +static void dump_pacas(void) +{ + unsigned long num; + int c; + + c = inchar(); + if (c == 'a') { + dump_all_pacas(); + return; + } + + termch = c; /* Put c back, it wasn't 'a' */ + + if (scanhex(&num)) + dump_one_paca(num); + else + dump_one_paca(xmon_owner); +} +#endif + #define isxdigit(c) (('0' <= (c) && (c) <= '9') \ || ('a' <= (c) && (c) <= 'f') \ || ('A' <= (c) && (c) <= 'F')) @@ -2018,6 +2115,14 @@ dump(void) int c; c = inchar(); + +#ifdef CONFIG_PPC64 + if (c == 'p') { + dump_pacas(); + return; + } +#endif + if ((isxdigit(c) && c != 'f' && c != 'd') || c == '\n') termch = c; scanhex((void *)&adrs); -- cgit v0.10.2 From 5efc3ad7325d81b5a8e09bc14d5e21ce7272d934 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 11 Sep 2012 19:16:16 +0000 Subject: powerpc/eeh: Introduce EEH_PE_INVALID type PE When EEH error happens on the PE whose PCI devices don't have attached drivers. In function eeh_handle_event(), the default value PCI_ERS_RESULT_NONE will be returned after iterating all drivers of those PCI devices belonging to the PE. Actually, we don't have installed drivers for the PCI devices. Under the circumstance, we will remove the corresponding PCI bus of the PE, including the associated EEH devices and PE instance. However, we still need the information stored in the PE instance to do PE reset after that. So it's unsafe to free the PE instance. The patch introduces EEH_PE_INVALID type PE to address the issue. When the PCI bus and the corresponding attached EEH devices are removed, we will mark the PE as EEH_PE_INVALID. At later point, the PE will be changed to EEH_PE_DEVICE or EEH_PE_BUS when the corresponding EEH devices are attached again. Signed-off-by: Gavin Shan Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index 58c5ee6..afeb400 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -45,9 +45,10 @@ struct device_node; * in the corresponding PHB. Therefore, the root PEs should be created * against existing PHBs in on-to-one fashion. */ -#define EEH_PE_PHB 1 /* PHB PE */ -#define EEH_PE_DEVICE 2 /* Device PE */ -#define EEH_PE_BUS 3 /* Bus PE */ +#define EEH_PE_INVALID (1 << 0) /* Invalid */ +#define EEH_PE_PHB (1 << 1) /* PHB PE */ +#define EEH_PE_DEVICE (1 << 2) /* Device PE */ +#define EEH_PE_BUS (1 << 3) /* Bus PE */ #define EEH_PE_ISOLATED (1 << 0) /* Isolated PE */ #define EEH_PE_RECOVERING (1 << 1) /* Recovering PE */ diff --git a/arch/powerpc/platforms/pseries/eeh_pe.c b/arch/powerpc/platforms/pseries/eeh_pe.c index 904123c..51fc56a 100644 --- a/arch/powerpc/platforms/pseries/eeh_pe.c +++ b/arch/powerpc/platforms/pseries/eeh_pe.c @@ -107,7 +107,7 @@ static struct eeh_pe *eeh_phb_pe_get(struct pci_controller *phb) * the PE for PHB has been determined when that * was created. */ - if (pe->type == EEH_PE_PHB && + if ((pe->type & EEH_PE_PHB) && pe->phb == phb) { eeh_unlock(); return pe; @@ -219,7 +219,7 @@ static void *__eeh_pe_get(void *data, void *flag) struct eeh_dev *edev = (struct eeh_dev *)flag; /* Unexpected PHB PE */ - if (pe->type == EEH_PE_PHB) + if (pe->type & EEH_PE_PHB) return NULL; /* We prefer PE address */ @@ -314,7 +314,7 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev) * components. */ pe = eeh_pe_get(edev); - if (pe) { + if (pe && !(pe->type & EEH_PE_INVALID)) { if (!edev->pe_config_addr) { pr_err("%s: PE with addr 0x%x already exists\n", __func__, edev->config_addr); @@ -331,6 +331,24 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev) edev->dn->full_name, pe->addr); return 0; + } else if (pe && (pe->type & EEH_PE_INVALID)) { + list_add_tail(&edev->list, &pe->edevs); + edev->pe = pe; + /* + * We're running to here because of PCI hotplug caused by + * EEH recovery. We need clear EEH_PE_INVALID until the top. + */ + parent = pe; + while (parent) { + if (!(parent->type & EEH_PE_INVALID)) + break; + parent->type &= ~EEH_PE_INVALID; + parent = parent->parent; + } + pr_debug("EEH: Add %s to Device PE#%x, Parent PE#%x\n", + edev->dn->full_name, pe->addr, pe->parent->addr); + + return 0; } /* Create a new EEH PE */ @@ -385,7 +403,8 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev) */ int eeh_rmv_from_parent_pe(struct eeh_dev *edev) { - struct eeh_pe *pe, *parent; + struct eeh_pe *pe, *parent, *child; + int cnt; if (!edev->pe) { pr_warning("%s: No PE found for EEH device %s\n", @@ -406,13 +425,22 @@ int eeh_rmv_from_parent_pe(struct eeh_dev *edev) */ while (1) { parent = pe->parent; - if (pe->type == EEH_PE_PHB) + if (pe->type & EEH_PE_PHB) break; - if (list_empty(&pe->edevs) && - list_empty(&pe->child_list)) { - list_del(&pe->child); - kfree(pe); + if (list_empty(&pe->edevs)) { + cnt = 0; + list_for_each_entry(child, &pe->child_list, child) { + if (!(pe->type & EEH_PE_INVALID)) { + cnt++; + break; + } + } + + if (!cnt) + pe->type |= EEH_PE_INVALID; + else + break; } pe = parent; @@ -578,9 +606,9 @@ struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe) struct eeh_dev *edev; struct pci_dev *pdev; - if (pe->type == EEH_PE_PHB) { + if (pe->type & EEH_PE_PHB) { bus = pe->phb->bus; - } else if (pe->type == EEH_PE_BUS) { + } else if (pe->type & EEH_PE_BUS) { edev = list_first_entry(&pe->edevs, struct eeh_dev, list); pdev = eeh_dev_to_pci_dev(edev); if (pdev) -- cgit v0.10.2 From 20ee6a970858a0f5711ea32cb7f855a81704cb53 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 11 Sep 2012 19:16:17 +0000 Subject: powerpc/eeh: Remove EEH PE for normal PCI hotplug Function eeh_rmv_from_parent_pe() could be called by the path of either normal PCI hotplug, or EEH recovery. For the former case, we need purge the corresponding PE on removal of the associated PE bus. The patch tries to cover that by passing more information to function pcibios_remove_pci_devices() so that we know if the corresponding PE needs to be purged or be marked as "invalid". Signed-off-by: Gavin Shan Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index afeb400..b0ef738 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -185,7 +185,7 @@ static inline void eeh_unlock(void) typedef void *(*eeh_traverse_func)(void *data, void *flag); int __devinit eeh_phb_pe_create(struct pci_controller *phb); int eeh_add_to_parent_pe(struct eeh_dev *edev); -int eeh_rmv_from_parent_pe(struct eeh_dev *edev); +int eeh_rmv_from_parent_pe(struct eeh_dev *edev, int purge_pe); void *eeh_pe_dev_traverse(struct eeh_pe *root, eeh_traverse_func fn, void *flag); void eeh_pe_restore_bars(struct eeh_pe *pe); @@ -201,7 +201,7 @@ int eeh_dev_check_failure(struct eeh_dev *edev); void __init eeh_addr_cache_build(void); void eeh_add_device_tree_early(struct device_node *); void eeh_add_device_tree_late(struct pci_bus *); -void eeh_remove_bus_device(struct pci_dev *); +void eeh_remove_bus_device(struct pci_dev *, int); /** * EEH_POSSIBLE_ERROR() -- test for possible MMIO failure. @@ -240,7 +240,7 @@ static inline void eeh_add_device_tree_early(struct device_node *dn) { } static inline void eeh_add_device_tree_late(struct pci_bus *bus) { } -static inline void eeh_remove_bus_device(struct pci_dev *dev) { } +static inline void eeh_remove_bus_device(struct pci_dev *dev, int purge_pe) { } static inline void eeh_lock(void) { } static inline void eeh_unlock(void) { } diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h index 973df4d..a059cb9 100644 --- a/arch/powerpc/include/asm/pci-bridge.h +++ b/arch/powerpc/include/asm/pci-bridge.h @@ -192,6 +192,7 @@ static inline struct eeh_dev *of_node_to_eeh_dev(struct device_node *dn) extern struct pci_bus *pcibios_find_pci_bus(struct device_node *dn); /** Remove all of the PCI devices under this bus */ +extern void __pcibios_remove_pci_devices(struct pci_bus *bus, int purge_pe); extern void pcibios_remove_pci_devices(struct pci_bus *bus); /** Discover new pci devices under this bus, and add them */ diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c index 18c168b..43f6ed4 100644 --- a/arch/powerpc/platforms/pseries/eeh.c +++ b/arch/powerpc/platforms/pseries/eeh.c @@ -817,6 +817,7 @@ EXPORT_SYMBOL_GPL(eeh_add_device_tree_late); /** * eeh_remove_device - Undo EEH setup for the indicated pci device * @dev: pci device to be removed + * @purge_pe: remove the PE or not * * This routine should be called when a device is removed from * a running system (e.g. by hotplug or dlpar). It unregisters @@ -824,7 +825,7 @@ EXPORT_SYMBOL_GPL(eeh_add_device_tree_late); * this device will no longer be detected after this call; thus, * i/o errors affecting this slot may leave this device unusable. */ -static void eeh_remove_device(struct pci_dev *dev) +static void eeh_remove_device(struct pci_dev *dev, int purge_pe) { struct eeh_dev *edev; @@ -843,7 +844,7 @@ static void eeh_remove_device(struct pci_dev *dev) dev->dev.archdata.edev = NULL; pci_dev_put(dev); - eeh_rmv_from_parent_pe(edev); + eeh_rmv_from_parent_pe(edev, purge_pe); eeh_addr_cache_rmv_dev(dev); eeh_sysfs_remove_device(dev); } @@ -851,21 +852,22 @@ static void eeh_remove_device(struct pci_dev *dev) /** * eeh_remove_bus_device - Undo EEH setup for the indicated PCI device * @dev: PCI device + * @purge_pe: remove the corresponding PE or not * * This routine must be called when a device is removed from the * running system through hotplug or dlpar. The corresponding * PCI address cache will be removed. */ -void eeh_remove_bus_device(struct pci_dev *dev) +void eeh_remove_bus_device(struct pci_dev *dev, int purge_pe) { struct pci_bus *bus = dev->subordinate; struct pci_dev *child, *tmp; - eeh_remove_device(dev); + eeh_remove_device(dev, purge_pe); if (bus && dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) { list_for_each_entry_safe(child, tmp, &bus->devices, bus_list) - eeh_remove_bus_device(child); + eeh_remove_bus_device(child, purge_pe); } } EXPORT_SYMBOL_GPL(eeh_remove_bus_device); diff --git a/arch/powerpc/platforms/pseries/eeh_driver.c b/arch/powerpc/platforms/pseries/eeh_driver.c index 8370ce7..37c2cf7 100644 --- a/arch/powerpc/platforms/pseries/eeh_driver.c +++ b/arch/powerpc/platforms/pseries/eeh_driver.c @@ -305,8 +305,14 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus) /* pcibios will clear the counter; save the value */ cnt = pe->freeze_count; + /* + * We don't remove the corresponding PE instances because + * we need the information afterwords. The attached EEH + * devices are expected to be attached soon when calling + * into pcibios_add_pci_devices(). + */ if (bus) - pcibios_remove_pci_devices(bus); + __pcibios_remove_pci_devices(bus, 0); /* Reset the pci controller. (Asserts RST#; resets config space). * Reconfigure bridges and devices. Don't try to bring the system diff --git a/arch/powerpc/platforms/pseries/eeh_pe.c b/arch/powerpc/platforms/pseries/eeh_pe.c index 51fc56a..30b8d96 100644 --- a/arch/powerpc/platforms/pseries/eeh_pe.c +++ b/arch/powerpc/platforms/pseries/eeh_pe.c @@ -395,13 +395,14 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev) /** * eeh_rmv_from_parent_pe - Remove one EEH device from the associated PE * @edev: EEH device + * @purge_pe: remove PE or not * * The PE hierarchy tree might be changed when doing PCI hotplug. * Also, the PCI devices or buses could be removed from the system * during EEH recovery. So we have to call the function remove the * corresponding PE accordingly if necessary. */ -int eeh_rmv_from_parent_pe(struct eeh_dev *edev) +int eeh_rmv_from_parent_pe(struct eeh_dev *edev, int purge_pe) { struct eeh_pe *pe, *parent, *child; int cnt; @@ -428,19 +429,29 @@ int eeh_rmv_from_parent_pe(struct eeh_dev *edev) if (pe->type & EEH_PE_PHB) break; - if (list_empty(&pe->edevs)) { - cnt = 0; - list_for_each_entry(child, &pe->child_list, child) { - if (!(pe->type & EEH_PE_INVALID)) { - cnt++; - break; - } + if (purge_pe) { + if (list_empty(&pe->edevs) && + list_empty(&pe->child_list)) { + list_del(&pe->child); + kfree(pe); + } else { + break; } + } else { + if (list_empty(&pe->edevs)) { + cnt = 0; + list_for_each_entry(child, &pe->child_list, child) { + if (!(pe->type & EEH_PE_INVALID)) { + cnt++; + break; + } + } - if (!cnt) - pe->type |= EEH_PE_INVALID; - else - break; + if (!cnt) + pe->type |= EEH_PE_INVALID; + else + break; + } } pe = parent; diff --git a/arch/powerpc/platforms/pseries/pci_dlpar.c b/arch/powerpc/platforms/pseries/pci_dlpar.c index 3ccebc8..261a577 100644 --- a/arch/powerpc/platforms/pseries/pci_dlpar.c +++ b/arch/powerpc/platforms/pseries/pci_dlpar.c @@ -65,27 +65,43 @@ pcibios_find_pci_bus(struct device_node *dn) EXPORT_SYMBOL_GPL(pcibios_find_pci_bus); /** - * pcibios_remove_pci_devices - remove all devices under this bus + * __pcibios_remove_pci_devices - remove all devices under this bus + * @bus: the indicated PCI bus + * @purge_pe: destroy the PE on removal of PCI devices * * Remove all of the PCI devices under this bus both from the * linux pci device tree, and from the powerpc EEH address cache. + * By default, the corresponding PE will be destroied during the + * normal PCI hotplug path. For PCI hotplug during EEH recovery, + * the corresponding PE won't be destroied and deallocated. */ -void pcibios_remove_pci_devices(struct pci_bus *bus) +void __pcibios_remove_pci_devices(struct pci_bus *bus, int purge_pe) { - struct pci_dev *dev, *tmp; + struct pci_dev *dev, *tmp; struct pci_bus *child_bus; /* First go down child busses */ list_for_each_entry(child_bus, &bus->children, node) - pcibios_remove_pci_devices(child_bus); + __pcibios_remove_pci_devices(child_bus, purge_pe); pr_debug("PCI: Removing devices on bus %04x:%02x\n", - pci_domain_nr(bus), bus->number); + pci_domain_nr(bus), bus->number); list_for_each_entry_safe(dev, tmp, &bus->devices, bus_list) { pr_debug(" * Removing %s...\n", pci_name(dev)); - eeh_remove_bus_device(dev); - pci_stop_and_remove_bus_device(dev); - } + eeh_remove_bus_device(dev, purge_pe); + pci_stop_and_remove_bus_device(dev); + } +} + +/** + * pcibios_remove_pci_devices - remove all devices under this bus + * + * Remove all of the PCI devices under this bus both from the + * linux pci device tree, and from the powerpc EEH address cache. + */ +void pcibios_remove_pci_devices(struct pci_bus *bus) +{ + __pcibios_remove_pci_devices(bus, 1); } EXPORT_SYMBOL_GPL(pcibios_remove_pci_devices); diff --git a/drivers/pci/hotplug/rpadlpar_core.c b/drivers/pci/hotplug/rpadlpar_core.c index 1e117c2..b29e20b 100644 --- a/drivers/pci/hotplug/rpadlpar_core.c +++ b/drivers/pci/hotplug/rpadlpar_core.c @@ -388,7 +388,7 @@ int dlpar_remove_pci_slot(char *drc_name, struct device_node *dn) /* Remove the EADS bridge device itself */ BUG_ON(!bus->self); pr_debug("PCI: Now removing bridge device %s\n", pci_name(bus->self)); - eeh_remove_bus_device(bus->self); + eeh_remove_bus_device(bus->self, true); pci_stop_and_remove_bus_device(bus->self); return 0; -- cgit v0.10.2 From ea81245cf4dab5ef5428102a4a00281ed6e890a6 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 11 Sep 2012 19:16:18 +0000 Subject: powerpc/eeh: Global mutex to protect PE tree We have missed lots of situations where the PE hierarchy tree need protection through the EEH global mutex. The patch fixes that for those public APIs implemented in eeh_pe.c. The only exception is eeh_pe_restore_bars() because it calls eeh_pe_dev_traverse(), which has been protected by the mutex. Signed-off-by: Gavin Shan Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/platforms/pseries/eeh_pe.c b/arch/powerpc/platforms/pseries/eeh_pe.c index 30b8d96..9d35543 100644 --- a/arch/powerpc/platforms/pseries/eeh_pe.c +++ b/arch/powerpc/platforms/pseries/eeh_pe.c @@ -99,8 +99,6 @@ static struct eeh_pe *eeh_phb_pe_get(struct pci_controller *phb) { struct eeh_pe *pe; - eeh_lock(); - list_for_each_entry(pe, &eeh_phb_pe, child) { /* * Actually, we needn't check the type since @@ -114,8 +112,6 @@ static struct eeh_pe *eeh_phb_pe_get(struct pci_controller *phb) } } - eeh_unlock(); - return NULL; } @@ -192,14 +188,21 @@ void *eeh_pe_dev_traverse(struct eeh_pe *root, return NULL; } + eeh_lock(); + /* Traverse root PE */ for (pe = root; pe; pe = eeh_pe_next(pe, root)) { eeh_pe_for_each_dev(pe, edev) { ret = fn(edev, flag); - if (ret) return ret; + if (ret) { + eeh_unlock(); + return ret; + } } } + eeh_unlock(); + return NULL; } @@ -251,9 +254,7 @@ static struct eeh_pe *eeh_pe_get(struct eeh_dev *edev) struct eeh_pe *root = eeh_phb_pe_get(edev->phb); struct eeh_pe *pe; - eeh_lock(); pe = eeh_pe_traverse(root, __eeh_pe_get, edev); - eeh_unlock(); return pe; } @@ -307,6 +308,8 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev) { struct eeh_pe *pe, *parent; + eeh_lock(); + /* * Search the PE has been existing or not according * to the PE address. If that has been existing, the @@ -316,6 +319,7 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev) pe = eeh_pe_get(edev); if (pe && !(pe->type & EEH_PE_INVALID)) { if (!edev->pe_config_addr) { + eeh_unlock(); pr_err("%s: PE with addr 0x%x already exists\n", __func__, edev->config_addr); return -EEXIST; @@ -327,6 +331,7 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev) /* Put the edev to PE */ list_add_tail(&edev->list, &pe->edevs); + eeh_unlock(); pr_debug("EEH: Add %s to Bus PE#%x\n", edev->dn->full_name, pe->addr); @@ -345,6 +350,7 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev) parent->type &= ~EEH_PE_INVALID; parent = parent->parent; } + eeh_unlock(); pr_debug("EEH: Add %s to Device PE#%x, Parent PE#%x\n", edev->dn->full_name, pe->addr, pe->parent->addr); @@ -354,6 +360,7 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev) /* Create a new EEH PE */ pe = eeh_pe_alloc(edev->phb, EEH_PE_DEVICE); if (!pe) { + eeh_unlock(); pr_err("%s: out of memory!\n", __func__); return -ENOMEM; } @@ -370,6 +377,7 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev) if (!parent) { parent = eeh_phb_pe_get(edev->phb); if (!parent) { + eeh_unlock(); pr_err("%s: No PHB PE is found (PHB Domain=%d)\n", __func__, edev->phb->global_number); edev->pe = NULL; @@ -386,6 +394,7 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev) list_add_tail(&pe->child, &parent->child_list); list_add_tail(&edev->list, &pe->edevs); edev->pe = pe; + eeh_unlock(); pr_debug("EEH: Add %s to Device PE#%x, Parent PE#%x\n", edev->dn->full_name, pe->addr, pe->parent->addr); @@ -413,6 +422,8 @@ int eeh_rmv_from_parent_pe(struct eeh_dev *edev, int purge_pe) return -EEXIST; } + eeh_lock(); + /* Remove the EEH device */ pe = edev->pe; edev->pe = NULL; @@ -457,6 +468,8 @@ int eeh_rmv_from_parent_pe(struct eeh_dev *edev, int purge_pe) pe = parent; } + eeh_unlock(); + return 0; } @@ -502,7 +515,9 @@ static void *__eeh_pe_state_mark(void *data, void *flag) */ void eeh_pe_state_mark(struct eeh_pe *pe, int state) { + eeh_lock(); eeh_pe_traverse(pe, __eeh_pe_state_mark, &state); + eeh_unlock(); } /** @@ -536,7 +551,9 @@ static void *__eeh_pe_state_clear(void *data, void *flag) */ void eeh_pe_state_clear(struct eeh_pe *pe, int state) { + eeh_lock(); eeh_pe_traverse(pe, __eeh_pe_state_clear, &state); + eeh_unlock(); } /** @@ -598,6 +615,10 @@ static void *eeh_restore_one_device_bars(void *data, void *flag) */ void eeh_pe_restore_bars(struct eeh_pe *pe) { + /* + * We needn't take the EEH lock since eeh_pe_dev_traverse() + * will take that. + */ eeh_pe_dev_traverse(pe, eeh_restore_one_device_bars, NULL); } @@ -617,6 +638,8 @@ struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe) struct eeh_dev *edev; struct pci_dev *pdev; + eeh_lock(); + if (pe->type & EEH_PE_PHB) { bus = pe->phb->bus; } else if (pe->type & EEH_PE_BUS) { @@ -626,5 +649,7 @@ struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe) bus = pdev->bus; } + eeh_unlock(); + return bus; } -- cgit v0.10.2 From 52ab3b2b1b1658f29b7d755f29c8fde8f89a92af Mon Sep 17 00:00:00 2001 From: Bharat Bhushan Date: Tue, 11 Sep 2012 00:19:45 +0000 Subject: powerpc: Remove unused __get_user64() and __put_user64() __get_user64() and __put_user64() are not used. Signed-off-by: Bharat Bhushan Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index 17bb40c..4db49590 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -98,11 +98,6 @@ struct exception_table_entry { * PowerPC, we can just do these as direct assignments. (Of course, the * exception handling means that it's no longer "just"...) * - * The "user64" versions of the user access functions are versions that - * allow access of 64-bit data. The "get_user" functions do not - * properly handle 64-bit data because the value gets down cast to a long. - * The "put_user" functions already handle 64-bit data properly but we add - * "user64" versions for completeness */ #define get_user(x, ptr) \ __get_user_check((x), (ptr), sizeof(*(ptr))) @@ -114,12 +109,6 @@ struct exception_table_entry { #define __put_user(x, ptr) \ __put_user_nocheck((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr))) -#ifndef __powerpc64__ -#define __get_user64(x, ptr) \ - __get_user64_nocheck((x), (ptr), sizeof(*(ptr))) -#define __put_user64(x, ptr) __put_user(x, ptr) -#endif - #define __get_user_inatomic(x, ptr) \ __get_user_nosleep((x), (ptr), sizeof(*(ptr))) #define __put_user_inatomic(x, ptr) \ -- cgit v0.10.2 From f0d1128fcb919a73941a54f80efab04fa3160dfe Mon Sep 17 00:00:00 2001 From: Tiejun Chen Date: Sun, 16 Sep 2012 23:54:29 +0000 Subject: powerpc/kprobe: Introduce a new thread flag We need to add a new thread flag, TIF_EMULATE_STACK_STORE, for emulating stack store operation while exiting exception. Signed-off-by: Tiejun Chen Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h index e942203..8ceea14 100644 --- a/arch/powerpc/include/asm/thread_info.h +++ b/arch/powerpc/include/asm/thread_info.h @@ -104,6 +104,8 @@ static inline struct thread_info *current_thread_info(void) #define TIF_NOTIFY_RESUME 13 /* callback before returning to user */ #define TIF_UPROBE 14 /* breakpointed or single-stepping */ #define TIF_SYSCALL_TRACEPOINT 15 /* syscall tracepoint instrumentation */ +#define TIF_EMULATE_STACK_STORE 16 /* Is an instruction emulation + for stack store? */ /* as above, but as bit values */ #define _TIF_SYSCALL_TRACE (1< Date: Sun, 16 Sep 2012 23:54:30 +0000 Subject: powerpc/kprobe: Complete kprobe and migrate exception frame We can't emulate stwu since that may corrupt current exception stack. So we will have to do real store operation in the exception return code. Firstly we'll allocate a trampoline exception frame below the kprobed function stack and copy the current exception frame to the trampoline. Then we can do this real store operation to implement 'stwu', and reroute the trampoline frame to r1 to complete this exception migration. Signed-off-by: Tiejun Chen Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index ead5016..af37528 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -831,19 +831,56 @@ restore_user: bnel- load_dbcr0 #endif -#ifdef CONFIG_PREEMPT b restore /* N.B. the only way to get here is from the beq following ret_from_except. */ resume_kernel: - /* check current_thread_info->preempt_count */ + /* check current_thread_info, _TIF_EMULATE_STACK_STORE */ CURRENT_THREAD_INFO(r9, r1) + lwz r8,TI_FLAGS(r9) + andis. r8,r8,_TIF_EMULATE_STACK_STORE@h + beq+ 1f + + addi r8,r1,INT_FRAME_SIZE /* Get the kprobed function entry */ + + lwz r3,GPR1(r1) + subi r3,r3,INT_FRAME_SIZE /* dst: Allocate a trampoline exception frame */ + mr r4,r1 /* src: current exception frame */ + mr r1,r3 /* Reroute the trampoline frame to r1 */ + + /* Copy from the original to the trampoline. */ + li r5,INT_FRAME_SIZE/4 /* size: INT_FRAME_SIZE */ + li r6,0 /* start offset: 0 */ + mtctr r5 +2: lwzx r0,r6,r4 + stwx r0,r6,r3 + addi r6,r6,4 + bdnz 2b + + /* Do real store operation to complete stwu */ + lwz r5,GPR1(r1) + stw r8,0(r5) + + /* Clear _TIF_EMULATE_STACK_STORE flag */ + lis r11,_TIF_EMULATE_STACK_STORE@h + addi r5,r9,TI_FLAGS +0: lwarx r8,0,r5 + andc r8,r8,r11 +#ifdef CONFIG_IBM405_ERR77 + dcbt 0,r5 +#endif + stwcx. r8,0,r5 + bne- 0b +1: + +#ifdef CONFIG_PREEMPT + /* check current_thread_info->preempt_count */ lwz r0,TI_PREEMPT(r9) cmpwi 0,r0,0 /* if non-zero, just restore regs and return */ bne restore - lwz r0,TI_FLAGS(r9) - andi. r0,r0,_TIF_NEED_RESCHED + andi. r8,r8,_TIF_NEED_RESCHED beq+ restore + lwz r3,_MSR(r1) andi. r0,r3,MSR_EE /* interrupts off? */ beq restore /* don't schedule if so */ #ifdef CONFIG_TRACE_IRQFLAGS @@ -864,8 +901,6 @@ resume_kernel: */ bl trace_hardirqs_on #endif -#else -resume_kernel: #endif /* CONFIG_PREEMPT */ /* interrupts are hard-disabled at this point */ diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index b40e0b4..0e931aa 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -593,6 +593,41 @@ _GLOBAL(ret_from_except_lite) b .ret_from_except resume_kernel: + /* check current_thread_info, _TIF_EMULATE_STACK_STORE */ + CURRENT_THREAD_INFO(r9, r1) + ld r8,TI_FLAGS(r9) + andis. r8,r8,_TIF_EMULATE_STACK_STORE@h + beq+ 1f + + addi r8,r1,INT_FRAME_SIZE /* Get the kprobed function entry */ + + lwz r3,GPR1(r1) + subi r3,r3,INT_FRAME_SIZE /* dst: Allocate a trampoline exception frame */ + mr r4,r1 /* src: current exception frame */ + mr r1,r3 /* Reroute the trampoline frame to r1 */ + + /* Copy from the original to the trampoline. */ + li r5,INT_FRAME_SIZE/8 /* size: INT_FRAME_SIZE */ + li r6,0 /* start offset: 0 */ + mtctr r5 +2: ldx r0,r6,r4 + stdx r0,r6,r3 + addi r6,r6,8 + bdnz 2b + + /* Do real store operation to complete stwu */ + lwz r5,GPR1(r1) + std r8,0(r5) + + /* Clear _TIF_EMULATE_STACK_STORE flag */ + lis r11,_TIF_EMULATE_STACK_STORE@h + addi r5,r9,TI_FLAGS + ldarx r4,0,r5 + andc r4,r4,r11 + stdcx. r4,0,r5 + bne- 0b +1: + #ifdef CONFIG_PREEMPT /* Check if we need to preempt */ andi. r0,r4,_TIF_NEED_RESCHED -- cgit v0.10.2 From 8e9f69371536981a2a8c9ee4a49dbe3aa4946df4 Mon Sep 17 00:00:00 2001 From: Tiejun Chen Date: Sun, 16 Sep 2012 23:54:31 +0000 Subject: powerpc/kprobe: Don't emulate store when kprobe stwu r1 We don't do the real store operation for kprobing 'stwu Rx,(y)R1' since this may corrupt the exception frame, now we will do this operation safely in exception return code after migrate current exception frame below the kprobed function stack. So we only update gpr[1] here and trigger a thread flag to mask this. Note we should make sure if we trigger kernel stack over flow. Signed-off-by: Tiejun Chen Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index 9a52349..e15c521 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -566,7 +566,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) unsigned long int ea; unsigned int cr, mb, me, sh; int err; - unsigned long old_ra; + unsigned long old_ra, val3; long ival; opcode = instr >> 26; @@ -1486,11 +1486,43 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) goto ldst_done; case 36: /* stw */ - case 37: /* stwu */ val = regs->gpr[rd]; err = write_mem(val, dform_ea(instr, regs), 4, regs); goto ldst_done; + case 37: /* stwu */ + val = regs->gpr[rd]; + val3 = dform_ea(instr, regs); + /* + * For PPC32 we always use stwu to change stack point with r1. So + * this emulated store may corrupt the exception frame, now we + * have to provide the exception frame trampoline, which is pushed + * below the kprobed function stack. So we only update gpr[1] but + * don't emulate the real store operation. We will do real store + * operation safely in exception return code by checking this flag. + */ + if ((ra == 1) && !(regs->msr & MSR_PR) \ + && (val3 >= (regs->gpr[1] - STACK_INT_FRAME_SIZE))) { + /* + * Check if we will touch kernel sack overflow + */ + if (val3 - STACK_INT_FRAME_SIZE <= current->thread.ksp_limit) { + printk(KERN_CRIT "Can't kprobe this since Kernel stack overflow.\n"); + err = -EINVAL; + break; + } + + /* + * Check if we already set since that means we'll + * lose the previous value. + */ + WARN_ON(test_thread_flag(TIF_EMULATE_STACK_STORE)); + set_thread_flag(TIF_EMULATE_STACK_STORE); + err = 0; + } else + err = write_mem(val, val3, 4, regs); + goto ldst_done; + case 38: /* stb */ case 39: /* stbu */ val = regs->gpr[rd]; -- cgit v0.10.2 From feadf7c0a1a7c08c74bebb4a13b755f8c40e3bbc Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Mon, 17 Sep 2012 04:34:27 +0000 Subject: powerpc/eeh: Lock module while handling EEH event The EEH core is talking with the PCI device driver to determine the action (purely reset, or PCI device removal). During the period, the driver might be unloaded and in turn causes kernel crash as follows: EEH: Detected PCI bus error on PHB#4-PE#10000 EEH: This PCI device has failed 3 times in the last hour lpfc 0004:01:00.0: 0:2710 PCI channel disable preparing for reset Unable to handle kernel paging request for data at address 0x00000490 Faulting instruction address: 0xd00000000e682c90 cpu 0x1: Vector: 300 (Data Access) at [c000000fc75ffa20] pc: d00000000e682c90: .lpfc_io_error_detected+0x30/0x240 [lpfc] lr: d00000000e682c8c: .lpfc_io_error_detected+0x2c/0x240 [lpfc] sp: c000000fc75ffca0 msr: 8000000000009032 dar: 490 dsisr: 40000000 current = 0xc000000fc79b88b0 paca = 0xc00000000edb0380 softe: 0 irq_happened: 0x00 pid = 3386, comm = eehd enter ? for help [c000000fc75ffca0] c000000fc75ffd30 (unreliable) [c000000fc75ffd30] c00000000004fd3c .eeh_report_error+0x7c/0xf0 [c000000fc75ffdc0] c00000000004ee00 .eeh_pe_dev_traverse+0xa0/0x180 [c000000fc75ffe70] c00000000004ffd8 .eeh_handle_event+0x68/0x300 [c000000fc75fff00] c0000000000503a0 .eeh_event_handler+0x130/0x1a0 [c000000fc75fff90] c000000000020138 .kernel_thread+0x54/0x70 1:mon> The patch increases the reference of the corresponding driver modules while EEH core does the negotiation with PCI device driver so that the corresponding driver modules can't be unloaded during the period and we're safe to refer the callbacks. Cc: stable@vger.kernel.org Reported-by: Alexey Kardashevskiy Signed-off-by: Gavin Shan Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/platforms/pseries/eeh_driver.c b/arch/powerpc/platforms/pseries/eeh_driver.c index 37c2cf7..a3fefb6 100644 --- a/arch/powerpc/platforms/pseries/eeh_driver.c +++ b/arch/powerpc/platforms/pseries/eeh_driver.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -47,6 +48,41 @@ static inline const char *eeh_pcid_name(struct pci_dev *pdev) return ""; } +/** + * eeh_pcid_get - Get the PCI device driver + * @pdev: PCI device + * + * The function is used to retrieve the PCI device driver for + * the indicated PCI device. Besides, we will increase the reference + * of the PCI device driver to prevent that being unloaded on + * the fly. Otherwise, kernel crash would be seen. + */ +static inline struct pci_driver *eeh_pcid_get(struct pci_dev *pdev) +{ + if (!pdev || !pdev->driver) + return NULL; + + if (!try_module_get(pdev->driver->driver.owner)) + return NULL; + + return pdev->driver; +} + +/** + * eeh_pcid_put - Dereference on the PCI device driver + * @pdev: PCI device + * + * The function is called to do dereference on the PCI device + * driver of the indicated PCI device. + */ +static inline void eeh_pcid_put(struct pci_dev *pdev) +{ + if (!pdev || !pdev->driver) + return; + + module_put(pdev->driver->driver.owner); +} + #if 0 static void print_device_node_tree(struct pci_dn *pdn, int dent) { @@ -128,23 +164,24 @@ static void *eeh_report_error(void *data, void *userdata) struct eeh_dev *edev = (struct eeh_dev *)data; struct pci_dev *dev = eeh_dev_to_pci_dev(edev); enum pci_ers_result rc, *res = userdata; - struct pci_driver *driver = dev->driver; + struct pci_driver *driver; /* We might not have the associated PCI device, * then we should continue for next one. */ if (!dev) return NULL; - dev->error_state = pci_channel_io_frozen; - if (!driver) - return NULL; + driver = eeh_pcid_get(dev); + if (!driver) return NULL; eeh_disable_irq(dev); if (!driver->err_handler || - !driver->err_handler->error_detected) + !driver->err_handler->error_detected) { + eeh_pcid_put(dev); return NULL; + } rc = driver->err_handler->error_detected(dev, pci_channel_io_frozen); @@ -152,6 +189,7 @@ static void *eeh_report_error(void *data, void *userdata) if (rc == PCI_ERS_RESULT_NEED_RESET) *res = rc; if (*res == PCI_ERS_RESULT_NONE) *res = rc; + eeh_pcid_put(dev); return NULL; } @@ -171,12 +209,14 @@ static void *eeh_report_mmio_enabled(void *data, void *userdata) enum pci_ers_result rc, *res = userdata; struct pci_driver *driver; - if (!dev) return NULL; + driver = eeh_pcid_get(dev); + if (!driver) return NULL; - if (!(driver = dev->driver) || - !driver->err_handler || - !driver->err_handler->mmio_enabled) + if (!driver->err_handler || + !driver->err_handler->mmio_enabled) { + eeh_pcid_put(dev); return NULL; + } rc = driver->err_handler->mmio_enabled(dev); @@ -184,6 +224,7 @@ static void *eeh_report_mmio_enabled(void *data, void *userdata) if (rc == PCI_ERS_RESULT_NEED_RESET) *res = rc; if (*res == PCI_ERS_RESULT_NONE) *res = rc; + eeh_pcid_put(dev); return NULL; } @@ -204,16 +245,19 @@ static void *eeh_report_reset(void *data, void *userdata) enum pci_ers_result rc, *res = userdata; struct pci_driver *driver; - if (!dev || !(driver = dev->driver)) - return NULL; - + if (!dev) return NULL; dev->error_state = pci_channel_io_normal; + driver = eeh_pcid_get(dev); + if (!driver) return NULL; + eeh_enable_irq(dev); if (!driver->err_handler || - !driver->err_handler->slot_reset) + !driver->err_handler->slot_reset) { + eeh_pcid_put(dev); return NULL; + } rc = driver->err_handler->slot_reset(dev); if ((*res == PCI_ERS_RESULT_NONE) || @@ -221,6 +265,7 @@ static void *eeh_report_reset(void *data, void *userdata) if (*res == PCI_ERS_RESULT_DISCONNECT && rc == PCI_ERS_RESULT_NEED_RESET) *res = rc; + eeh_pcid_put(dev); return NULL; } @@ -240,20 +285,22 @@ static void *eeh_report_resume(void *data, void *userdata) struct pci_driver *driver; if (!dev) return NULL; - dev->error_state = pci_channel_io_normal; - if (!(driver = dev->driver)) - return NULL; + driver = eeh_pcid_get(dev); + if (!driver) return NULL; eeh_enable_irq(dev); if (!driver->err_handler || - !driver->err_handler->resume) + !driver->err_handler->resume) { + eeh_pcid_put(dev); return NULL; + } driver->err_handler->resume(dev); + eeh_pcid_put(dev); return NULL; } @@ -272,20 +319,22 @@ static void *eeh_report_failure(void *data, void *userdata) struct pci_driver *driver; if (!dev) return NULL; - dev->error_state = pci_channel_io_perm_failure; - if (!(driver = dev->driver)) - return NULL; + driver = eeh_pcid_get(dev); + if (!driver) return NULL; eeh_disable_irq(dev); if (!driver->err_handler || - !driver->err_handler->error_detected) + !driver->err_handler->error_detected) { + eeh_pcid_put(dev); return NULL; + } driver->err_handler->error_detected(dev, pci_channel_io_perm_failure); + eeh_pcid_put(dev); return NULL; } -- cgit v0.10.2 From 1e38b7140185e384da216aff66a711df09b5afc9 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Mon, 17 Sep 2012 04:34:28 +0000 Subject: powerpc/eeh: Fix crash on converting OF node to edev The kernel crash was reported by Alexy. He was testing some feature with private kernel, in which Alexy added some code in pci_pm_reset() to read the CSR after writting it. The bug could be reproduced on Fiber Channel card (Fibre Channel: Emulex Corporation Saturn-X: LightPulse Fibre Channel Host Adapter (rev 03)) by the following commands. # echo 1 > /sys/devices/pci0004:01/0004:01:00.0/reset # rmmod lpfc # modprobe lpfc The history behind the test case is that those additional config space reading operations in pci_pm_reset() would cause EEH error, but we didn't detect EEH error until "modprobe lpfc". For the case, all the PCI devices on PCI bus (0004:01) were removed and added after PE reset. Then the EEH devices would be figured out again based on the OF nodes. Unfortunately, there were some child OF nodes under PCI device (0004:01:00.0), but they didn't have attached PCI_DN since they're invisible from PCI domain. However, we were still trying to convert OF node to EEH device without checking on the attached PCI_DN. Eventually, it caused the kernel crash as follows: Unable to handle kernel paging request for data at address 0x00000030 Faulting instruction address: 0xc00000000004d888 cpu 0x0: Vector: 300 (Data Access) at [c000000fc797b950] pc: c00000000004d888: .eeh_add_device_tree_early+0x78/0x140 lr: c00000000004d880: .eeh_add_device_tree_early+0x70/0x140 sp: c000000fc797bbd0 msr: 8000000000009032 dar: 30 dsisr: 40000000 current = 0xc000000fc78d9f70 paca = 0xc00000000edb0000 softe: 0 irq_happened: 0x00 pid = 2951, comm = eehd enter ? for help [c000000fc797bc50] c00000000004d848 .eeh_add_device_tree_early+0x38/0x140 [c000000fc797bcd0] c00000000004d848 .eeh_add_device_tree_early+0x38/0x140 [c000000fc797bd50] c000000000051b54 .pcibios_add_pci_devices+0x34/0x190 [c000000fc797bde0] c00000000004fb10 .eeh_reset_device+0x100/0x160 [c000000fc797be70] c0000000000502dc .eeh_handle_event+0x19c/0x300 [c000000fc797bf00] c000000000050570 .eeh_event_handler+0x130/0x1a0 [c000000fc797bf90] c000000000020138 .kernel_thread+0x54/0x70 The patch changes of_node_to_eeh_dev() and just returns NULL if the passed OF node doesn't have attached PCI_DN. Cc: stable@vger.kernel.org Reported-by: Alexey Kardashevskiy Signed-off-by: Gavin Shan Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h index a059cb9..025a130 100644 --- a/arch/powerpc/include/asm/pci-bridge.h +++ b/arch/powerpc/include/asm/pci-bridge.h @@ -182,6 +182,14 @@ static inline int pci_device_from_OF_node(struct device_node *np, #if defined(CONFIG_EEH) static inline struct eeh_dev *of_node_to_eeh_dev(struct device_node *dn) { + /* + * For those OF nodes whose parent isn't PCI bridge, they + * don't have PCI_DN actually. So we have to skip them for + * any EEH operations. + */ + if (!dn || !PCI_DN(dn)) + return NULL; + return PCI_DN(dn)->edev; } #else diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c index 43f6ed4..9a04322 100644 --- a/arch/powerpc/platforms/pseries/eeh.c +++ b/arch/powerpc/platforms/pseries/eeh.c @@ -728,7 +728,7 @@ static void eeh_add_device_early(struct device_node *dn) { struct pci_controller *phb; - if (!dn || !of_node_to_eeh_dev(dn)) + if (!of_node_to_eeh_dev(dn)) return; phb = of_node_to_eeh_dev(dn)->phb; -- cgit v0.10.2 From e6651de9cc701ab4829b3a11a7ace85a79405d32 Mon Sep 17 00:00:00 2001 From: Zhao Chenhui Date: Fri, 20 Jul 2012 20:47:01 +0800 Subject: powerpc/smp: Do not disable IPI interrupts during suspend During suspend, all interrupts including IPI will be disabled. In this case, the suspend process will hang in SMP. To prevent this, pass the flag IRQF_NO_SUSPEND when requesting IPI irq. Signed-off-by: Zhao Chenhui Signed-off-by: Li Yang Acked-by: Benjamin Herrenschmidt Signed-off-by: Kumar Gala diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index a51ed20..2b952b5 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -171,7 +171,7 @@ int smp_request_message_ipi(int virq, int msg) } #endif err = request_irq(virq, smp_ipi_action[msg], - IRQF_PERCPU | IRQF_NO_THREAD, + IRQF_PERCPU | IRQF_NO_THREAD | IRQF_NO_SUSPEND, smp_ipi_name[msg], 0); WARN(err < 0, "unable to request_irq %d for %s (rc %d)\n", virq, smp_ipi_name[msg], err); -- cgit v0.10.2 From 1919aac36c2472ad010102661d5aa6d55f0d66a4 Mon Sep 17 00:00:00 2001 From: Chunhe Lan Date: Fri, 14 Sep 2012 15:57:20 -0400 Subject: powerpc/85xx: Enable USB support in p1023rds_defconfig Signed-off-by: Chunhe Lan Signed-off-by: Kumar Gala diff --git a/arch/powerpc/configs/85xx/p1023rds_defconfig b/arch/powerpc/configs/85xx/p1023rds_defconfig index 26e541c..b80bcc6 100644 --- a/arch/powerpc/configs/85xx/p1023rds_defconfig +++ b/arch/powerpc/configs/85xx/p1023rds_defconfig @@ -112,6 +112,12 @@ CONFIG_SND=y CONFIG_SND_MIXER_OSS=y CONFIG_SND_PCM_OSS=y # CONFIG_SND_SUPPORT_OLD_API is not set +CONFIG_USB=y +CONFIG_USB_DEVICEFS=y +CONFIG_USB_MON=y +CONFIG_USB_EHCI_HCD=y +CONFIG_USB_EHCI_FSL=y +CONFIG_USB_STORAGE=y CONFIG_EDAC=y CONFIG_EDAC_MM_EDAC=y CONFIG_RTC_CLASS=y -- cgit v0.10.2 From 721c0705906e2ba69907fd2ca837bec6b6a8dbcb Mon Sep 17 00:00:00 2001 From: Prabhakar Kushwaha Date: Thu, 13 Sep 2012 13:34:11 +0530 Subject: powerpc/mpc85xx: Update interrupt handling for IFC controller IFC may have one or two interrupts. If two interrupt specifiers are present, the first is the "common" interrupt (CM_EVTER_STAT), and the second is the NAND interrupt (NAND_EVTER_STAT). If there is only one, that interrupt reports both types of event. Signed-off-by: Prabhakar Kushwaha Signed-off-by: Kumar Gala diff --git a/arch/powerpc/sysdev/fsl_ifc.c b/arch/powerpc/sysdev/fsl_ifc.c index b31f19f..097cc9d2 100644 --- a/arch/powerpc/sysdev/fsl_ifc.c +++ b/arch/powerpc/sysdev/fsl_ifc.c @@ -244,12 +244,6 @@ static int __devinit fsl_ifc_ctrl_probe(struct platform_device *dev) /* get the nand machine irq */ fsl_ifc_ctrl_dev->nand_irq = irq_of_parse_and_map(dev->dev.of_node, 1); - if (fsl_ifc_ctrl_dev->nand_irq == NO_IRQ) { - dev_err(&dev->dev, "failed to get irq resource " - "for NAND Machine\n"); - ret = -ENODEV; - goto err; - } fsl_ifc_ctrl_dev->dev = &dev->dev; @@ -267,12 +261,14 @@ static int __devinit fsl_ifc_ctrl_probe(struct platform_device *dev) goto err_irq; } - ret = request_irq(fsl_ifc_ctrl_dev->nand_irq, fsl_ifc_nand_irq, 0, - "fsl-ifc-nand", fsl_ifc_ctrl_dev); - if (ret != 0) { - dev_err(&dev->dev, "failed to install irq (%d)\n", - fsl_ifc_ctrl_dev->nand_irq); - goto err_nandirq; + if (fsl_ifc_ctrl_dev->nand_irq) { + ret = request_irq(fsl_ifc_ctrl_dev->nand_irq, fsl_ifc_nand_irq, + 0, "fsl-ifc-nand", fsl_ifc_ctrl_dev); + if (ret != 0) { + dev_err(&dev->dev, "failed to install irq (%d)\n", + fsl_ifc_ctrl_dev->nand_irq); + goto err_nandirq; + } } return 0; -- cgit v0.10.2 From 4d56dec5dca496655ef035ef3b80f7c47dc22b77 Mon Sep 17 00:00:00 2001 From: Jia Hongtao Date: Tue, 18 Sep 2012 17:57:48 +0800 Subject: powerpc/fsl-pci: fix warning when CONFIG_SWIOTLB is disabled Fix the following warning: arch/powerpc/sysdev/fsl_pci.c: In function 'fsl_pci_probe': arch/powerpc/sysdev/fsl_pci.c:867:25: error: unused variable 'hose' Signed-off-by: Jia Hongtao Acked-by: Michael Neuling Signed-off-by: Kumar Gala diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c index 2ff3576..3d6f4d8 100644 --- a/arch/powerpc/sysdev/fsl_pci.c +++ b/arch/powerpc/sysdev/fsl_pci.c @@ -864,7 +864,9 @@ static int __devinit fsl_pci_probe(struct platform_device *pdev) { int ret; struct device_node *node; +#ifdef CONFIG_SWIOTLB struct pci_controller *hose; +#endif node = pdev->dev.of_node; ret = fsl_add_bridge(node, fsl_pci_primary == node); -- cgit v0.10.2 From e6878835ac4794f25385522d29c634b7bbb7cca9 Mon Sep 17 00:00:00 2001 From: "sukadev@linux.vnet.ibm.com" Date: Tue, 18 Sep 2012 20:56:11 +0000 Subject: powerpc/perf: Sample only if SIAR-Valid bit is set in P7+ powerpc/perf: Sample only if SIAR-Valid bit is set in P7+ On POWER7+ two new bits (mmcra[35] and mmcra[36]) indicate whether the contents of SIAR and SDAR are valid. For marked instructions on P7+, we must save the contents of SIAR and SDAR registers only if these new bits are set. This code/check for the SIAR-Valid bit is specific to P7+, so rather than waste a CPU-feature bit use the PVR flag. Note that Carl Love proposed a similar change for oprofile: https://lkml.org/lkml/2012/6/22/309 Signed-off-by: Sukadev Bhattiprolu Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/include/asm/perf_event_server.h b/arch/powerpc/include/asm/perf_event_server.h index 078019b..9710be3 100644 --- a/arch/powerpc/include/asm/perf_event_server.h +++ b/arch/powerpc/include/asm/perf_event_server.h @@ -49,6 +49,7 @@ struct power_pmu { #define PPMU_ALT_SIPR 2 /* uses alternate posn for SIPR/HV */ #define PPMU_NO_SIPR 4 /* no SIPR/HV in MMCRA at all */ #define PPMU_NO_CONT_SAMPLING 8 /* no continuous sampling */ +#define PPMU_SIAR_VALID 16 /* Processor has SIAR Valid bit */ /* * Values for flags to get_alternatives() diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index a1096fb..d24c141 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -606,6 +606,10 @@ #define POWER6_MMCRA_SIPR 0x0000020000000000ULL #define POWER6_MMCRA_THRM 0x00000020UL #define POWER6_MMCRA_OTHER 0x0000000EUL + +#define POWER7P_MMCRA_SIAR_VALID 0x10000000 /* P7+ SIAR contents valid */ +#define POWER7P_MMCRA_SDAR_VALID 0x08000000 /* P7+ SDAR contents valid */ + #define SPRN_PMC1 787 #define SPRN_PMC2 788 #define SPRN_PMC3 789 diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index fb55da9..0db88f5 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -82,6 +82,11 @@ static inline int perf_intr_is_nmi(struct pt_regs *regs) return 0; } +static inline int siar_valid(struct pt_regs *regs) +{ + return 1; +} + #endif /* CONFIG_PPC32 */ /* @@ -106,14 +111,20 @@ static inline unsigned long perf_ip_adjust(struct pt_regs *regs) * If we're not doing instruction sampling, give them the SDAR * (sampled data address). If we are doing instruction sampling, then * only give them the SDAR if it corresponds to the instruction - * pointed to by SIAR; this is indicated by the [POWER6_]MMCRA_SDSYNC - * bit in MMCRA. + * pointed to by SIAR; this is indicated by the [POWER6_]MMCRA_SDSYNC or + * the [POWER7P_]MMCRA_SDAR_VALID bit in MMCRA. */ static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp) { unsigned long mmcra = regs->dsisr; - unsigned long sdsync = (ppmu->flags & PPMU_ALT_SIPR) ? - POWER6_MMCRA_SDSYNC : MMCRA_SDSYNC; + unsigned long sdsync; + + if (ppmu->flags & PPMU_SIAR_VALID) + sdsync = POWER7P_MMCRA_SDAR_VALID; + else if (ppmu->flags & PPMU_ALT_SIPR) + sdsync = POWER6_MMCRA_SDSYNC; + else + sdsync = MMCRA_SDSYNC; if (!(mmcra & MMCRA_SAMPLE_ENABLE) || (mmcra & sdsync)) *addrp = mfspr(SPRN_SDAR); @@ -230,6 +241,24 @@ static inline int perf_intr_is_nmi(struct pt_regs *regs) return !regs->softe; } +/* + * On processors like P7+ that have the SIAR-Valid bit, marked instructions + * must be sampled only if the SIAR-valid bit is set. + * + * For unmarked instructions and for processors that don't have the SIAR-Valid + * bit, assume that SIAR is valid. + */ +static inline int siar_valid(struct pt_regs *regs) +{ + unsigned long mmcra = regs->dsisr; + int marked = mmcra & MMCRA_SAMPLE_ENABLE; + + if ((ppmu->flags & PPMU_SIAR_VALID) && marked) + return mmcra & POWER7P_MMCRA_SIAR_VALID; + + return 1; +} + #endif /* CONFIG_PPC64 */ static void perf_event_interrupt(struct pt_regs *regs); @@ -1291,6 +1320,7 @@ struct pmu power_pmu = { .event_idx = power_pmu_event_idx, }; + /* * A counter has overflowed; update its count and record * things if requested. Note that interrupts are hard-disabled @@ -1324,7 +1354,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val, left += period; if (left <= 0) left = period; - record = 1; + record = siar_valid(regs); event->hw.last_period = event->hw.sample_period; } if (left < 0x80000000LL) @@ -1374,8 +1404,10 @@ unsigned long perf_instruction_pointer(struct pt_regs *regs) { unsigned long use_siar = regs->result; - if (use_siar) + if (use_siar && siar_valid(regs)) return mfspr(SPRN_SIAR) + perf_ip_adjust(regs); + else if (use_siar) + return 0; // no valid instruction pointer else return regs->nip; } diff --git a/arch/powerpc/perf/power7-pmu.c b/arch/powerpc/perf/power7-pmu.c index 1251e4d..441af08 100644 --- a/arch/powerpc/perf/power7-pmu.c +++ b/arch/powerpc/perf/power7-pmu.c @@ -373,6 +373,9 @@ static int __init init_power7_pmu(void) strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power7")) return -ENODEV; + if (pvr_version_is(PVR_POWER7p)) + power7_pmu.flags |= PPMU_SIAR_VALID; + return register_power_pmu(&power7_pmu); } -- cgit v0.10.2 From 466921c5a4669f4315528a25f9afd66601ce2c04 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 20 Sep 2012 22:07:58 +0000 Subject: powerpc: Set paca->data_offset = 0 for boot cpu In commit 407821a we assigned a poison value to the paca->data_offset. Unfortunately with CONFIG_LOCK_STAT=y lockdep will read & write to percpu data very early in boot, prior to us initialising the percpu areas, leading to a crash. We have been getting away with this because the data_offset was previously set to zero. This causes lockdep to read & write to the initial copy of the percpu variables, which are discarded later in boot. Although that is "fishy", it does work, and for lock statistics it is no big deal to discard the counts from early boot. So set the paca->data_offset = 0 for the boot cpu paca only. Reported-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman Tested-by: Aneesh Kumar K.V Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 389bd4f..efb6a41 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -208,6 +208,8 @@ void __init early_setup(unsigned long dt_ptr) /* Fix up paca fields required for the boot cpu */ get_paca()->cpu_start = 1; + /* Allow percpu accesses to "work" until we setup percpu data */ + get_paca()->data_offset = 0; /* Probe the machine type */ probe_machine(); -- cgit v0.10.2 From 8e166991c0c3631b8af5a26990df77fa2a1d09e6 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 20 Sep 2012 22:08:28 +0000 Subject: powerpc: Remove tlb batching hack for nighthawk In hpte_init_native() we call tlb_batching_enabled() to decide if we should setup ppc_md.flush_hash_range. tlb_batching_enabled() checks the _unflattened_ device tree, to see if we are running on a nighthawk. Since commit a223535 ("dont allow pSeries_probe to succeed without initialising MMU", Dec 2006), hpte_init_native() has been called from pSeries_probe() - at which point we have not yet unflattened the device tree. This means tlb_batching_enabled() will always return true, so the hack has effectively been disabled since Dec 2006. Ergo, I think we can drop it. Signed-off-by: Michael Ellerman Acked-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index a4a1c72..ffc1e00 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -569,29 +569,6 @@ static void native_flush_hash_range(unsigned long number, int local) local_irq_restore(flags); } -#ifdef CONFIG_PPC_PSERIES -/* Disable TLB batching on nighthawk */ -static inline int tlb_batching_enabled(void) -{ - struct device_node *root = of_find_node_by_path("/"); - int enabled = 1; - - if (root) { - const char *model = of_get_property(root, "model", NULL); - if (model && !strcmp(model, "IBM,9076-N81")) - enabled = 0; - of_node_put(root); - } - - return enabled; -} -#else -static inline int tlb_batching_enabled(void) -{ - return 1; -} -#endif - void __init hpte_init_native(void) { ppc_md.hpte_invalidate = native_hpte_invalidate; @@ -600,6 +577,5 @@ void __init hpte_init_native(void) ppc_md.hpte_insert = native_hpte_insert; ppc_md.hpte_remove = native_hpte_remove; ppc_md.hpte_clear_all = native_hpte_clear; - if (tlb_batching_enabled()) - ppc_md.flush_hash_range = native_flush_hash_range; + ppc_md.flush_hash_range = native_flush_hash_range; } -- cgit v0.10.2 From 7844663a31e97930e3949430573452ac245bfdd5 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Thu, 20 Sep 2012 23:29:46 +0000 Subject: powerpc/eeh: Don't release eeh_mutex in eeh_phb_pe_get Acked-by: Gavin Shan ===================================== [ BUG: bad unlock balance detected! ] 3.6.0-rc5-00338-gcaa1d63-dirty #6 Not tainted ------------------------------------- swapper/0/1 is trying to release lock (eeh_mutex) at: [] .eeh_add_to_parent_pe+0x318/0x410 but there are no more locks to release! other info that might help us debug this: no locks held by swapper/0/1. stack backtrace: Call Trace: [c00000003e483870] [c000000000013310] .show_stack+0x70/0x1c0 (unreliable) [c00000003e483920] [c0000000000d8310] .print_unlock_inbalance_bug+0x110/0x120 [c00000003e4839b0] [c0000000000d9a50] .lock_release+0x1d0/0x240 [c00000003e483a60] [c000000000778064] .__mutex_unlock_slowpath+0xb4/0x250 [c00000003e483b10] [c000000000058218] .eeh_add_to_parent_pe+0x318/0x410 [c00000003e483bc0] [c00000000005a118] .pseries_eeh_of_probe+0x258/0x2f0 [c00000003e483cc0] [c000000000032528] .traverse_pci_devices+0xa8/0x150 [c00000003e483d70] [c000000000aa7288] .eeh_init+0xd4/0x140 [c00000003e483e00] [c00000000000abc4] .do_one_initcall+0x64/0x1e0 [c00000003e483ec0] [c000000000a90418] .kernel_init+0x1e8/0x2bc [c00000003e483f90] [c00000000002048c] .kernel_thread+0x54/0x70 EEH: PCI Enhanced I/O Error Handling Enabled Signed-off-by: Aneesh Kumar K.V Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/platforms/pseries/eeh_pe.c b/arch/powerpc/platforms/pseries/eeh_pe.c index 9d35543..797cd18 100644 --- a/arch/powerpc/platforms/pseries/eeh_pe.c +++ b/arch/powerpc/platforms/pseries/eeh_pe.c @@ -105,11 +105,8 @@ static struct eeh_pe *eeh_phb_pe_get(struct pci_controller *phb) * the PE for PHB has been determined when that * was created. */ - if ((pe->type & EEH_PE_PHB) && - pe->phb == phb) { - eeh_unlock(); + if ((pe->type & EEH_PE_PHB) && pe->phb == phb) return pe; - } } return NULL; -- cgit v0.10.2 From 59c58c324a81cfb08c490384a7c292b82609673a Mon Sep 17 00:00:00 2001 From: Minghuan Lian Date: Mon, 24 Sep 2012 13:50:52 +0800 Subject: powerpc/fsl-pci: use 'Header Type' to identify PCIE mode The original code uses 'Programming Interface' field to judge if PCIE is EP or RC mode, however, some latest silicons do not support this functionality. According to PCIE specification, 'Header Type' offset 0x0e is used to indicate header type, so change code to use 'Header Type' field to judge PCIE mode. Because FSL PCI controller does not support 'Header Type', patch still uses 'Programming Interface' to identify PCI mode. Signed-off-by: Minghuan Lian Signed-off-by: Roy Zang Signed-off-by: Kumar Gala diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c index 3d6f4d8..ffb93ae 100644 --- a/arch/powerpc/sysdev/fsl_pci.c +++ b/arch/powerpc/sysdev/fsl_pci.c @@ -38,15 +38,15 @@ static int fsl_pcie_bus_fixup, is_mpc83xx_pci; static void __devinit quirk_fsl_pcie_header(struct pci_dev *dev) { - u8 progif; + u8 hdr_type; /* if we aren't a PCIe don't bother */ if (!pci_find_capability(dev, PCI_CAP_ID_EXP)) return; /* if we aren't in host mode don't bother */ - pci_read_config_byte(dev, PCI_CLASS_PROG, &progif); - if (progif & 0x1) + pci_read_config_byte(dev, PCI_HEADER_TYPE, &hdr_type); + if ((hdr_type & 0x7f) != PCI_HEADER_TYPE_BRIDGE) return; dev->class = PCI_CLASS_BRIDGE_PCI << 8; @@ -427,7 +427,7 @@ int __init fsl_add_bridge(struct device_node *dev, int is_primary) struct pci_controller *hose; struct resource rsrc; const int *bus_range; - u8 progif; + u8 hdr_type, progif; if (!of_device_is_available(dev)) { pr_warning("%s: disabled\n", dev->full_name); @@ -459,15 +459,17 @@ int __init fsl_add_bridge(struct device_node *dev, int is_primary) setup_indirect_pci(hose, rsrc.start, rsrc.start + 0x4, PPC_INDIRECT_TYPE_BIG_ENDIAN); - early_read_config_byte(hose, 0, 0, PCI_CLASS_PROG, &progif); - if ((progif & 1) == 1) { - /* unmap cfg_data & cfg_addr separately if not on same page */ - if (((unsigned long)hose->cfg_data & PAGE_MASK) != - ((unsigned long)hose->cfg_addr & PAGE_MASK)) - iounmap(hose->cfg_data); - iounmap(hose->cfg_addr); - pcibios_free_controller(hose); - return -ENODEV; + if (early_find_capability(hose, 0, 0, PCI_CAP_ID_EXP)) { + /* For PCIE read HEADER_TYPE to identify controler mode */ + early_read_config_byte(hose, 0, 0, PCI_HEADER_TYPE, &hdr_type); + if ((hdr_type & 0x7f) != PCI_HEADER_TYPE_BRIDGE) + goto no_bridge; + + } else { + /* For PCI read PROG to identify controller mode */ + early_read_config_byte(hose, 0, 0, PCI_CLASS_PROG, &progif); + if ((progif & 1) == 1) + goto no_bridge; } setup_pci_cmd(hose); @@ -496,6 +498,15 @@ int __init fsl_add_bridge(struct device_node *dev, int is_primary) setup_pci_atmu(hose, &rsrc); return 0; + +no_bridge: + /* unmap cfg_data & cfg_addr separately if not on same page */ + if (((unsigned long)hose->cfg_data & PAGE_MASK) != + ((unsigned long)hose->cfg_addr & PAGE_MASK)) + iounmap(hose->cfg_data); + iounmap(hose->cfg_addr); + pcibios_free_controller(hose); + return -ENODEV; } #endif /* CONFIG_FSL_SOC_BOOKE || CONFIG_PPC_86xx */ -- cgit v0.10.2 From 10bfa766efa19c23f72f50727952a26cb1512256 Mon Sep 17 00:00:00 2001 From: Prabhakar Kushwaha Date: Thu, 13 Sep 2012 14:24:49 +0530 Subject: driver/mtd:IFC NAND:Initialise internal SRAM before any write IFC-1.1.0 uses 28nm techenology for SRAM. This tech has known limitaion for SRAM i.e. "byte select" is not supported. Hence Read Modify Write is implemented in IFC for any "system side write" into sram buffer. Reading an uninitialized memory results in ECC Error from sram wrapper. Hence we must initialize/prefill SRAM buffer by any data before writing anything in SRAM from system side. To initialize SRAM user can use "READID" NAND command with read bytes equal to SRAM size. It will be a one time activity post boot. Signed-off-by: Prabhakar Kushwaha Acked-by: Artem Bityutskiy Signed-off-by: Kumar Gala diff --git a/drivers/mtd/nand/fsl_ifc_nand.c b/drivers/mtd/nand/fsl_ifc_nand.c index 9602c1b..01e2f2e 100644 --- a/drivers/mtd/nand/fsl_ifc_nand.c +++ b/drivers/mtd/nand/fsl_ifc_nand.c @@ -31,6 +31,7 @@ #include #include +#define FSL_IFC_V1_1_0 0x01010000 #define ERR_BYTE 0xFF /* Value returned for read bytes when read failed */ #define IFC_TIMEOUT_MSECS 500 /* Maximum number of mSecs to wait @@ -773,13 +774,62 @@ static int fsl_ifc_chip_init_tail(struct mtd_info *mtd) return 0; } +static void fsl_ifc_sram_init(struct fsl_ifc_mtd *priv) +{ + struct fsl_ifc_ctrl *ctrl = priv->ctrl; + struct fsl_ifc_regs __iomem *ifc = ctrl->regs; + uint32_t csor = 0, csor_8k = 0, csor_ext = 0; + uint32_t cs = priv->bank; + + /* Save CSOR and CSOR_ext */ + csor = in_be32(&ifc->csor_cs[cs].csor); + csor_ext = in_be32(&ifc->csor_cs[cs].csor_ext); + + /* chage PageSize 8K and SpareSize 1K*/ + csor_8k = (csor & ~(CSOR_NAND_PGS_MASK)) | 0x0018C000; + out_be32(&ifc->csor_cs[cs].csor, csor_8k); + out_be32(&ifc->csor_cs[cs].csor_ext, 0x0000400); + + /* READID */ + out_be32(&ifc->ifc_nand.nand_fir0, + (IFC_FIR_OP_CMD0 << IFC_NAND_FIR0_OP0_SHIFT) | + (IFC_FIR_OP_UA << IFC_NAND_FIR0_OP1_SHIFT) | + (IFC_FIR_OP_RB << IFC_NAND_FIR0_OP2_SHIFT)); + out_be32(&ifc->ifc_nand.nand_fcr0, + NAND_CMD_READID << IFC_NAND_FCR0_CMD0_SHIFT); + out_be32(&ifc->ifc_nand.row3, 0x0); + + out_be32(&ifc->ifc_nand.nand_fbcr, 0x0); + + /* Program ROW0/COL0 */ + out_be32(&ifc->ifc_nand.row0, 0x0); + out_be32(&ifc->ifc_nand.col0, 0x0); + + /* set the chip select for NAND Transaction */ + out_be32(&ifc->ifc_nand.nand_csel, cs << IFC_NAND_CSEL_SHIFT); + + /* start read seq */ + out_be32(&ifc->ifc_nand.nandseq_strt, IFC_NAND_SEQ_STRT_FIR_STRT); + + /* wait for command complete flag or timeout */ + wait_event_timeout(ctrl->nand_wait, ctrl->nand_stat, + IFC_TIMEOUT_MSECS * HZ/1000); + + if (ctrl->nand_stat != IFC_NAND_EVTER_STAT_OPC) + printk(KERN_ERR "fsl-ifc: Failed to Initialise SRAM\n"); + + /* Restore CSOR and CSOR_ext */ + out_be32(&ifc->csor_cs[cs].csor, csor); + out_be32(&ifc->csor_cs[cs].csor_ext, csor_ext); +} + static int fsl_ifc_chip_init(struct fsl_ifc_mtd *priv) { struct fsl_ifc_ctrl *ctrl = priv->ctrl; struct fsl_ifc_regs __iomem *ifc = ctrl->regs; struct nand_chip *chip = &priv->chip; struct nand_ecclayout *layout; - u32 csor; + u32 csor, ver; /* Fill in fsl_ifc_mtd structure */ priv->mtd.priv = chip; @@ -874,6 +924,10 @@ static int fsl_ifc_chip_init(struct fsl_ifc_mtd *priv) chip->ecc.mode = NAND_ECC_SOFT; } + ver = in_be32(&ifc->ifc_rev); + if (ver == FSL_IFC_V1_1_0) + fsl_ifc_sram_init(priv); + return 0; } -- cgit v0.10.2 From c8adfeccee01ce3de6a7d14fcd4e3be02e27f03c Mon Sep 17 00:00:00 2001 From: Nishanth Aravamudan Date: Mon, 1 Oct 2012 14:59:13 +0000 Subject: powerpc: Fix VMX fix for memcpy case In 2fae7cdb60240e2e2d9b378afbf6d9fcce8a3890 ("powerpc: Fix VMX in interrupt check in POWER7 copy loops"), Anton inadvertently introduced a regression for memcpy on POWER7 machines. copyuser and memcpy diverge slightly in their use of cr1 (copyuser doesn't use it, but memcpy does) and you end up clobbering that register with your fix. That results in (taken from an FC18 kernel): [ 18.824604] Unrecoverable VMX/Altivec Unavailable Exception f20 at c000000000052f40 [ 18.824618] Oops: Unrecoverable VMX/Altivec Unavailable Exception, sig: 6 [#1] [ 18.824623] SMP NR_CPUS=1024 NUMA pSeries [ 18.824633] Modules linked in: tg3(+) be2net(+) cxgb4(+) ipr(+) sunrpc xts lrw gf128mul dm_crypt dm_round_robin dm_multipath linear raid10 raid456 async_raid6_recov async_memcpy async_pq raid6_pq async_xor xor async_tx raid1 raid0 scsi_dh_rdac scsi_dh_hp_sw scsi_dh_emc scsi_dh_alua squashfs cramfs [ 18.824705] NIP: c000000000052f40 LR: c00000000020b874 CTR: 0000000000000512 [ 18.824709] REGS: c000001f1fef7790 TRAP: 0f20 Not tainted (3.6.0-0.rc6.git0.2.fc18.ppc64) [ 18.824713] MSR: 8000000000009032 CR: 4802802e XER: 20000010 [ 18.824726] SOFTE: 0 [ 18.824728] CFAR: 0000000000000f20 [ 18.824731] TASK = c000000fa7128400[0] 'swapper/24' THREAD: c000000fa7480000 CPU: 24 GPR00: 00000000ffffffc0 c000001f1fef7a10 c00000000164edc0 c000000f9b9a8120 GPR04: c000000f9b9a8124 0000000000001438 0000000000000060 03ffffff064657ee GPR08: 0000000080000000 0000000000000010 0000000000000020 0000000000000030 GPR12: 0000000028028022 c00000000ff25400 0000000000000001 0000000000000000 GPR16: 0000000000000000 7fffffffffffffff c0000000016b2180 c00000000156a500 GPR20: c000000f968c7a90 c0000000131c31d8 c000001f1fef4000 c000000001561d00 GPR24: 000000000000000a 0000000000000000 0000000000000001 0000000000000012 GPR28: c000000fa5c04f80 00000000000008bc c0000000015c0a28 000000000000022e [ 18.824792] NIP [c000000000052f40] .memcpy_power7+0x5a0/0x7c4 [ 18.824797] LR [c00000000020b874] .pcpu_free_area+0x174/0x2d0 [ 18.824800] Call Trace: [ 18.824803] [c000001f1fef7a10] [c000000000052c14] .memcpy_power7+0x274/0x7c4 (unreliable) [ 18.824809] [c000001f1fef7b10] [c00000000020b874] .pcpu_free_area+0x174/0x2d0 [ 18.824813] [c000001f1fef7bb0] [c00000000020ba88] .free_percpu+0xb8/0x1b0 [ 18.824819] [c000001f1fef7c50] [c00000000043d144] .throtl_pd_exit+0x94/0xd0 [ 18.824824] [c000001f1fef7cf0] [c00000000043acf8] .blkg_free+0x88/0xe0 [ 18.824829] [c000001f1fef7d90] [c00000000018c048] .rcu_process_callbacks+0x2e8/0x8a0 [ 18.824835] [c000001f1fef7e90] [c0000000000a8ce8] .__do_softirq+0x158/0x4d0 [ 18.824840] [c000001f1fef7f90] [c000000000025ecc] .call_do_softirq+0x14/0x24 [ 18.824845] [c000000fa7483650] [c000000000010e80] .do_softirq+0x160/0x1a0 [ 18.824850] [c000000fa74836f0] [c0000000000a94a4] .irq_exit+0xf4/0x120 [ 18.824854] [c000000fa7483780] [c000000000020c44] .timer_interrupt+0x154/0x4d0 [ 18.824859] [c000000fa7483830] [c000000000003be0] decrementer_common+0x160/0x180 [ 18.824866] --- Exception: 901 at .plpar_hcall_norets+0x84/0xd4 [ 18.824866] LR = .check_and_cede_processor+0x48/0x80 [ 18.824871] [c000000fa7483b20] [c00000000007f018] .check_and_cede_processor+0x18/0x80 (unreliable) [ 18.824877] [c000000fa7483b90] [c00000000007f104] .dedicated_cede_loop+0x84/0x150 [ 18.824883] [c000000fa7483c50] [c0000000006bc030] .cpuidle_enter+0x30/0x50 [ 18.824887] [c000000fa7483cc0] [c0000000006bc9f4] .cpuidle_idle_call+0x104/0x720 [ 18.824892] [c000000fa7483d80] [c000000000070af8] .pSeries_idle+0x18/0x40 [ 18.824897] [c000000fa7483df0] [c000000000019084] .cpu_idle+0x1a4/0x380 [ 18.824902] [c000000fa7483ec0] [c0000000008a4c18] .start_secondary+0x520/0x528 [ 18.824907] [c000000fa7483f90] [c0000000000093f0] .start_secondary_prolog+0x10/0x14 [ 18.824911] Instruction dump: [ 18.824914] 38840008 90030000 90e30004 38630008 7ca62850 7cc300d0 78c7e102 7cf01120 [ 18.824923] 78c60660 39200010 39400020 39600030 <7e00200c> 7c0020ce 38840010 409f001c [ 18.824935] ---[ end trace 0bb95124affaaa45 ]--- [ 18.825046] Unrecoverable VMX/Altivec Unavailable Exception f20 at c000000000052d08 I believe the right fix is to make memcpy match usercopy and not use cr1. Signed-off-by: Nishanth Aravamudan Signed-off-by: Benjamin Herrenschmidt CC: [v3.6] diff --git a/arch/powerpc/lib/memcpy_power7.S b/arch/powerpc/lib/memcpy_power7.S index 7ba6c96..0663630 100644 --- a/arch/powerpc/lib/memcpy_power7.S +++ b/arch/powerpc/lib/memcpy_power7.S @@ -239,8 +239,8 @@ _GLOBAL(memcpy_power7) ori r9,r9,1 /* stream=1 */ srdi r7,r5,7 /* length in cachelines, capped at 0x3FF */ - cmpldi cr1,r7,0x3FF - ble cr1,1f + cmpldi r7,0x3FF + ble 1f li r7,0x3FF 1: lis r0,0x0E00 /* depth=7 */ sldi r7,r7,7 -- cgit v0.10.2 From d900bd7366463fd96a907b2c212242e2b68b27d8 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 3 Oct 2012 18:57:10 +0000 Subject: powerpc/iommu: Fix multiple issues with IOMMU pools code There are a number of issues in the recent IOMMU pools code: - On a preempt kernel we might switch CPUs in the middle of building a scatter gather list. When this happens the handle hint passed in no longer falls within the local CPU's pool. Check for this and fall back to the pool hint. - We were missing a spin_unlock/spin_lock in one spot where we switch pools. - We need to provide locking around dart_tlb_invalidate_all and dart_tlb_invalidate_one now that the global lock is gone. Reported-by: Alexander Graf Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt CC: [v3.6] diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index ff5a6ce..8226c6c 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -215,7 +215,8 @@ static unsigned long iommu_range_alloc(struct device *dev, spin_lock_irqsave(&(pool->lock), flags); again: - if ((pass == 0) && handle && *handle) + if ((pass == 0) && handle && *handle && + (*handle >= pool->start) && (*handle < pool->end)) start = *handle; else start = pool->hint; @@ -236,7 +237,9 @@ again: * but on second pass, start at 0 in pool 0. */ if ((start & mask) >= limit || pass > 0) { + spin_unlock(&(pool->lock)); pool = &(tbl->pools[0]); + spin_lock(&(pool->lock)); start = pool->start; } else { start &= mask; diff --git a/arch/powerpc/sysdev/dart_iommu.c b/arch/powerpc/sysdev/dart_iommu.c index 8ef63a0..bd968a4 100644 --- a/arch/powerpc/sysdev/dart_iommu.c +++ b/arch/powerpc/sysdev/dart_iommu.c @@ -73,11 +73,16 @@ static int dart_is_u4; #define DBG(...) +static DEFINE_SPINLOCK(invalidate_lock); + static inline void dart_tlb_invalidate_all(void) { unsigned long l = 0; unsigned int reg, inv_bit; unsigned long limit; + unsigned long flags; + + spin_lock_irqsave(&invalidate_lock, flags); DBG("dart: flush\n"); @@ -110,12 +115,17 @@ retry: panic("DART: TLB did not flush after waiting a long " "time. Buggy U3 ?"); } + + spin_unlock_irqrestore(&invalidate_lock, flags); } static inline void dart_tlb_invalidate_one(unsigned long bus_rpn) { unsigned int reg; unsigned int l, limit; + unsigned long flags; + + spin_lock_irqsave(&invalidate_lock, flags); reg = DART_CNTL_U4_ENABLE | DART_CNTL_U4_IONE | (bus_rpn & DART_CNTL_U4_IONE_MASK); @@ -137,6 +147,8 @@ wait_more: panic("DART: TLB did not flush after waiting a long " "time. Buggy U4 ?"); } + + spin_unlock_irqrestore(&invalidate_lock, flags); } static void dart_flush(struct iommu_table *tbl) -- cgit v0.10.2