From f521089158cd48a81b4d72e8e39da006dd79779b Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Fri, 5 Aug 2005 08:02:00 -0700 Subject: [IA64] Spinlock optimizations 1. Nontemporal store for spin unlock. A nontemporal store will not update the LRU setting for the cacheline. The cacheline with the lock may therefore be evicted faster from the cpu caches. Doing so may be useful since it increases the chance that the exclusive cache line has been evicted when another cpu is trying to acquire the lock. The time between dropping and reacquiring a lock on the same cpu is typically very small so the danger of the cacheline being evicted is negligible. 2. Avoid semaphore operation in write_unlock and use nontemporal store write_lock uses a cmpxchg like the regular spin_lock but write_unlock uses clear_bit which requires a load and then a loop over a cmpxchg. The following patch makes write_unlock simply use a nontemporal store to clear the highest 8 bits. We will then still have the lower 3 bytes (24 bits) left to count the readers. Doing the byte store will reduce the number of possible readers from 2^31 to 2^24 = 16 million. These patches were discussed already: http://marc.theaimsgroup.com/?t=111472054400001&r=1&w=2 http://marc.theaimsgroup.com/?l=linux-ia64&m=111401837707849&w=2 The nontemporal stores will only work using GCC. If a compiler is used that does not support inline asm then fallback C code is used. This will preserve the byte store but not be able to do the nontemporal stores. Signed-off-by: Christoph Lameter Signed-off-by: Tony Luck diff --git a/include/asm-ia64/spinlock.h b/include/asm-ia64/spinlock.h index 909936f..d2430aa 100644 --- a/include/asm-ia64/spinlock.h +++ b/include/asm-ia64/spinlock.h @@ -93,7 +93,15 @@ _raw_spin_lock_flags (spinlock_t *lock, unsigned long flags) # endif /* CONFIG_MCKINLEY */ #endif } + #define _raw_spin_lock(lock) _raw_spin_lock_flags(lock, 0) + +/* Unlock by doing an ordered store and releasing the cacheline with nta */ +static inline void _raw_spin_unlock(spinlock_t *x) { + barrier(); + asm volatile ("st4.rel.nta [%0] = r0\n\t" :: "r"(x)); +} + #else /* !ASM_SUPPORTED */ #define _raw_spin_lock_flags(lock, flags) _raw_spin_lock(lock) # define _raw_spin_lock(x) \ @@ -109,16 +117,16 @@ do { \ } while (ia64_spinlock_val); \ } \ } while (0) +#define _raw_spin_unlock(x) do { barrier(); ((spinlock_t *) x)->lock = 0; } while (0) #endif /* !ASM_SUPPORTED */ #define spin_is_locked(x) ((x)->lock != 0) -#define _raw_spin_unlock(x) do { barrier(); ((spinlock_t *) x)->lock = 0; } while (0) #define _raw_spin_trylock(x) (cmpxchg_acq(&(x)->lock, 0, 1) == 0) #define spin_unlock_wait(x) do { barrier(); } while ((x)->lock) typedef struct { - volatile unsigned int read_counter : 31; - volatile unsigned int write_lock : 1; + volatile unsigned int read_counter : 24; + volatile unsigned int write_lock : 8; #ifdef CONFIG_PREEMPT unsigned int break_lock; #endif @@ -174,6 +182,13 @@ do { \ (result == 0); \ }) +static inline void _raw_write_unlock(rwlock_t *x) +{ + u8 *y = (u8 *)x; + barrier(); + asm volatile ("st1.rel.nta [%0] = r0\n\t" :: "r"(y+3) : "memory" ); +} + #else /* !ASM_SUPPORTED */ #define _raw_write_lock(l) \ @@ -195,14 +210,14 @@ do { \ (ia64_val == 0); \ }) +static inline void _raw_write_unlock(rwlock_t *x) +{ + barrier(); + x->write_lock = 0; +} + #endif /* !ASM_SUPPORTED */ #define _raw_read_trylock(lock) generic_raw_read_trylock(lock) -#define _raw_write_unlock(x) \ -({ \ - smp_mb__before_clear_bit(); /* need barrier before releasing lock... */ \ - clear_bit(31, (x)); \ -}) - #endif /* _ASM_IA64_SPINLOCK_H */ -- cgit v0.10.2 From 674c6479b7bdc78528ea83dd43897e3161558b8b Mon Sep 17 00:00:00 2001 From: Colin Ngam Date: Wed, 3 Aug 2005 13:35:00 -0700 Subject: [IA64-SGI] Altix only: Add PCI Domain number support. This patch enables PCI Domain numbering on Altix. Signed-off-by: Colin Ngam Signed-off-by: Tony Luck diff --git a/arch/ia64/sn/include/xtalk/hubdev.h b/arch/ia64/sn/include/xtalk/hubdev.h index 580a1c0..ca4e250 100644 --- a/arch/ia64/sn/include/xtalk/hubdev.h +++ b/arch/ia64/sn/include/xtalk/hubdev.h @@ -34,7 +34,8 @@ struct sn_flush_device_list { unsigned long sfdl_force_int_addr; unsigned long sfdl_flush_value; volatile unsigned long *sfdl_flush_addr; - uint64_t sfdl_persistent_busnum; + uint32_t sfdl_persistent_busnum; + uint32_t sfdl_persistent_segment; struct pcibus_info *sfdl_pcibus_info; spinlock_t sfdl_flush_lock; }; @@ -58,7 +59,8 @@ struct hubdev_info { void *hdi_nodepda; void *hdi_node_vertex; - void *hdi_xtalk_vertex; + uint32_t max_segment_number; + uint32_t max_pcibus_number; }; extern void hubdev_init_node(nodepda_t *, cnodeid_t); diff --git a/arch/ia64/sn/kernel/io_init.c b/arch/ia64/sn/kernel/io_init.c index a6649ba..829ea79 100644 --- a/arch/ia64/sn/kernel/io_init.c +++ b/arch/ia64/sn/kernel/io_init.c @@ -44,6 +44,9 @@ int sn_ioif_inited = 0; /* SN I/O infrastructure initialized? */ struct sn_pcibus_provider *sn_pci_provider[PCIIO_ASIC_MAX_TYPES]; /* indexed by asic type */ +static int max_segment_number = 0; /* Default highest segment number */ +static int max_pcibus_number = 255; /* Default highest pci bus number */ + /* * Hooks and struct for unsupported pci providers */ @@ -157,13 +160,28 @@ static void sn_fixup_ionodes(void) uint64_t nasid; int i, widget; + /* + * Get SGI Specific HUB chipset information. + * Inform Prom that this kernel can support domain bus numbering. + */ for (i = 0; i < numionodes; i++) { hubdev = (struct hubdev_info *)(NODEPDA(i)->pdinfo); nasid = cnodeid_to_nasid(i); + hubdev->max_segment_number = 0xffffffff; + hubdev->max_pcibus_number = 0xff; status = sal_get_hubdev_info(nasid, (uint64_t) __pa(hubdev)); if (status) continue; + /* Save the largest Domain and pcibus numbers found. */ + if (hubdev->max_segment_number) { + /* + * Dealing with a Prom that supports segments. + */ + max_segment_number = hubdev->max_segment_number; + max_pcibus_number = hubdev->max_pcibus_number; + } + /* Attach the error interrupt handlers */ if (nasid & 1) ice_error_init(hubdev); @@ -229,7 +247,7 @@ void sn_pci_unfixup_slot(struct pci_dev *dev) void sn_pci_fixup_slot(struct pci_dev *dev) { int idx; - int segment = 0; + int segment = pci_domain_nr(dev->bus); int status = 0; struct pcibus_bussoft *bs; struct pci_bus *host_pci_bus; @@ -282,9 +300,9 @@ void sn_pci_fixup_slot(struct pci_dev *dev) * PCI host_pci_dev struct and set up host bus linkages */ - bus_no = SN_PCIDEV_INFO(dev)->pdi_slot_host_handle >> 32; + bus_no = (SN_PCIDEV_INFO(dev)->pdi_slot_host_handle >> 32) & 0xff; devfn = SN_PCIDEV_INFO(dev)->pdi_slot_host_handle & 0xffffffff; - host_pci_bus = pci_find_bus(pci_domain_nr(dev->bus), bus_no); + host_pci_bus = pci_find_bus(segment, bus_no); host_pci_dev = pci_get_slot(host_pci_bus, devfn); SN_PCIDEV_INFO(dev)->host_pci_dev = host_pci_dev; @@ -332,6 +350,7 @@ void sn_pci_controller_fixup(int segment, int busnum, struct pci_bus *bus) prom_bussoft_ptr = __va(prom_bussoft_ptr); controller = kcalloc(1,sizeof(struct pci_controller), GFP_KERNEL); + controller->segment = segment; if (!controller) BUG(); @@ -387,7 +406,7 @@ void sn_pci_controller_fixup(int segment, int busnum, struct pci_bus *bus) if (controller->node >= num_online_nodes()) { struct pcibus_bussoft *b = SN_PCIBUS_BUSSOFT(bus); - printk(KERN_WARNING "Device ASIC=%u XID=%u PBUSNUM=%lu" + printk(KERN_WARNING "Device ASIC=%u XID=%u PBUSNUM=%u" "L_IO=%lx L_MEM=%lx BASE=%lx\n", b->bs_asic_type, b->bs_xid, b->bs_persist_busnum, b->bs_legacy_io, b->bs_legacy_mem, b->bs_base); @@ -442,6 +461,7 @@ sn_sysdata_free_start: static int __init sn_pci_init(void) { int i = 0; + int j = 0; struct pci_dev *pci_dev = NULL; extern void sn_init_cpei_timer(void); #ifdef CONFIG_PROC_FS @@ -476,8 +496,9 @@ static int __init sn_pci_init(void) #endif /* busses are not known yet ... */ - for (i = 0; i < PCI_BUSES_TO_SCAN; i++) - sn_pci_controller_fixup(0, i, NULL); + for (i = 0; i <= max_segment_number; i++) + for (j = 0; j <= max_pcibus_number; j++) + sn_pci_controller_fixup(i, j, NULL); /* * Generic Linux PCI Layer has created the pci_bus and pci_dev diff --git a/arch/ia64/sn/pci/pcibr/pcibr_provider.c b/arch/ia64/sn/pci/pcibr/pcibr_provider.c index b95e928..dfaf01e 100644 --- a/arch/ia64/sn/pci/pcibr/pcibr_provider.c +++ b/arch/ia64/sn/pci/pcibr/pcibr_provider.c @@ -60,7 +60,7 @@ static int sal_pcibr_error_interrupt(struct pcibus_info *soft) ret_stuff.status = 0; ret_stuff.v0 = 0; - segment = 0; + segment = soft->pbi_buscommon.bs_persist_segment; busnum = soft->pbi_buscommon.bs_persist_busnum; SAL_CALL_NOLOCK(ret_stuff, (u64) SN_SAL_IOIF_ERROR_INTERRUPT, @@ -142,9 +142,12 @@ pcibr_bus_fixup(struct pcibus_bussoft *prom_bussoft, struct pci_controller *cont j++, sn_flush_device_list++) { if (sn_flush_device_list->sfdl_slot == -1) continue; - if (sn_flush_device_list-> - sfdl_persistent_busnum == - soft->pbi_buscommon.bs_persist_busnum) + if ((sn_flush_device_list-> + sfdl_persistent_segment == + soft->pbi_buscommon.bs_persist_segment) && + (sn_flush_device_list-> + sfdl_persistent_busnum == + soft->pbi_buscommon.bs_persist_busnum)) sn_flush_device_list->sfdl_pcibus_info = soft; } diff --git a/arch/ia64/sn/pci/tioca_provider.c b/arch/ia64/sn/pci/tioca_provider.c index 5d76a75..2fef7c1 100644 --- a/arch/ia64/sn/pci/tioca_provider.c +++ b/arch/ia64/sn/pci/tioca_provider.c @@ -559,7 +559,7 @@ tioca_error_intr_handler(int irq, void *arg, struct pt_regs *pt) ret_stuff.status = 0; ret_stuff.v0 = 0; - segment = 0; + segment = soft->ca_common.bs_persist_segment; busnum = soft->ca_common.bs_persist_busnum; SAL_CALL_NOLOCK(ret_stuff, @@ -622,7 +622,8 @@ tioca_bus_fixup(struct pcibus_bussoft *prom_bussoft, struct pci_controller *cont nasid_to_cnodeid(tioca_common->ca_closest_nasid); tioca_common->ca_kernel_private = (uint64_t) tioca_kern; - bus = pci_find_bus(0, tioca_common->ca_common.bs_persist_busnum); + bus = pci_find_bus(tioca_common->ca_common.bs_persist_segment, + tioca_common->ca_common.bs_persist_busnum); BUG_ON(!bus); tioca_kern->ca_devices = &bus->devices; diff --git a/include/asm-ia64/sn/pcibus_provider_defs.h b/include/asm-ia64/sn/pcibus_provider_defs.h index 976f5ef..b01b21e 100644 --- a/include/asm-ia64/sn/pcibus_provider_defs.h +++ b/include/asm-ia64/sn/pcibus_provider_defs.h @@ -30,7 +30,8 @@ struct pcibus_bussoft { uint32_t bs_asic_type; /* chipset type */ uint32_t bs_xid; /* xwidget id */ - uint64_t bs_persist_busnum; /* Persistent Bus Number */ + uint32_t bs_persist_busnum; /* Persistent Bus Number */ + uint32_t bs_persist_segment; /* Segment Number */ uint64_t bs_legacy_io; /* legacy io pio addr */ uint64_t bs_legacy_mem; /* legacy mem pio addr */ uint64_t bs_base; /* widget base */ -- cgit v0.10.2 From 89963d16dc50a5d91ed09914a1232d59e6461fd6 Mon Sep 17 00:00:00 2001 From: Mark Maule Date: Wed, 3 Aug 2005 14:06:00 -0700 Subject: [IA64-SGI] altix: cosmetic rename of SGI_PCIBR_ERROR Cosmetic altix patch to rename SGI_PCIBR_ERROR to something more generic and remove a duplicate #define. Signed-off-by: Mark Maule Signed-off-by: Tony Luck diff --git a/arch/ia64/sn/pci/pcibr/pcibr_provider.c b/arch/ia64/sn/pci/pcibr/pcibr_provider.c index dfaf01e..f8cfe8a 100644 --- a/arch/ia64/sn/pci/pcibr/pcibr_provider.c +++ b/arch/ia64/sn/pci/pcibr/pcibr_provider.c @@ -115,7 +115,7 @@ pcibr_bus_fixup(struct pcibus_bussoft *prom_bussoft, struct pci_controller *cont /* * register the bridge's error interrupt handler */ - if (request_irq(SGI_PCIBR_ERROR, (void *)pcibr_error_intr_handler, + if (request_irq(SGI_PCIASIC_ERROR, (void *)pcibr_error_intr_handler, SA_SHIRQ, "PCIBR error", (void *)(soft))) { printk(KERN_WARNING "pcibr cannot allocate interrupt for error handler\n"); diff --git a/include/asm-ia64/sn/intr.h b/include/asm-ia64/sn/intr.h index e190dd4..e35074f 100644 --- a/include/asm-ia64/sn/intr.h +++ b/include/asm-ia64/sn/intr.h @@ -12,13 +12,12 @@ #include #define SGI_UART_VECTOR (0xe9) -#define SGI_PCIBR_ERROR (0x33) /* Reserved IRQs : Note, not to exceed IA64_SN2_FIRST_DEVICE_VECTOR */ #define SGI_XPC_ACTIVATE (0x30) #define SGI_II_ERROR (0x31) #define SGI_XBOW_ERROR (0x32) -#define SGI_PCIBR_ERROR (0x33) +#define SGI_PCIASIC_ERROR (0x33) #define SGI_ACPI_SCI_INT (0x34) #define SGI_TIOCA_ERROR (0x35) #define SGI_TIO_ERROR (0x36) -- cgit v0.10.2 From 735e60f4c67823a3e01655c990296e2e56574885 Mon Sep 17 00:00:00 2001 From: Mark Maule Date: Wed, 3 Aug 2005 14:06:00 -0700 Subject: [IA64-SGI] abstract force_interrupt() mechanism Altix patch to abstract the force_interrupt() mechanism away from the pcibr provider. Signed-off-by: Mark Maule Signed-off-by: Tony Luck diff --git a/arch/ia64/sn/kernel/irq.c b/arch/ia64/sn/kernel/irq.c index 84d276a..af061db 100644 --- a/arch/ia64/sn/kernel/irq.c +++ b/arch/ia64/sn/kernel/irq.c @@ -317,6 +317,16 @@ void sn_irq_unfixup(struct pci_dev *pci_dev) pci_dev_put(pci_dev); } +static inline void +sn_call_force_intr_provider(struct sn_irq_info *sn_irq_info) +{ + struct sn_pcibus_provider *pci_provider; + + pci_provider = sn_pci_provider[sn_irq_info->irq_bridge_type]; + if (pci_provider && pci_provider->force_interrupt) + (*pci_provider->force_interrupt)(sn_irq_info); +} + static void force_interrupt(int irq) { struct sn_irq_info *sn_irq_info; @@ -325,11 +335,9 @@ static void force_interrupt(int irq) return; rcu_read_lock(); - list_for_each_entry_rcu(sn_irq_info, sn_irq_lh[irq], list) { - if (IS_PCI_BRIDGE_ASIC(sn_irq_info->irq_bridge_type) && - (sn_irq_info->irq_bridge != NULL)) - pcibr_force_interrupt(sn_irq_info); - } + list_for_each_entry_rcu(sn_irq_info, sn_irq_lh[irq], list) + sn_call_force_intr_provider(sn_irq_info); + rcu_read_unlock(); } @@ -351,6 +359,14 @@ static void sn_check_intr(int irq, struct sn_irq_info *sn_irq_info) struct pcidev_info *pcidev_info; struct pcibus_info *pcibus_info; + /* + * Bridge types attached to TIO (anything but PIC) do not need this WAR + * since they do not target Shub II interrupt registers. If that + * ever changes, this check needs to accomodate. + */ + if (sn_irq_info->irq_bridge_type != PCIIO_ASIC_TYPE_PIC) + return; + pcidev_info = (struct pcidev_info *)sn_irq_info->irq_pciioinfo; if (!pcidev_info) return; @@ -377,16 +393,12 @@ static void sn_check_intr(int irq, struct sn_irq_info *sn_irq_info) break; } if (!test_bit(irr_bit, &irr_reg)) { - if (!test_bit(irq, pda->sn_soft_irr)) { - if (!test_bit(irq, pda->sn_in_service_ivecs)) { - regval &= 0xff; - if (sn_irq_info->irq_int_bit & regval & - sn_irq_info->irq_last_intr) { - regval &= - ~(sn_irq_info-> - irq_int_bit & regval); - pcibr_force_interrupt(sn_irq_info); - } + if (!test_bit(irq, pda->sn_in_service_ivecs)) { + regval &= 0xff; + if (sn_irq_info->irq_int_bit & regval & + sn_irq_info->irq_last_intr) { + regval &= ~(sn_irq_info->irq_int_bit & regval); + sn_call_force_intr_provider(sn_irq_info); } } } @@ -404,13 +416,7 @@ void sn_lb_int_war_check(void) rcu_read_lock(); for (i = pda->sn_first_irq; i <= pda->sn_last_irq; i++) { list_for_each_entry_rcu(sn_irq_info, sn_irq_lh[i], list) { - /* - * Only call for PCI bridges that are fully - * initialized. - */ - if (IS_PCI_BRIDGE_ASIC(sn_irq_info->irq_bridge_type) && - (sn_irq_info->irq_bridge != NULL)) - sn_check_intr(i, sn_irq_info); + sn_check_intr(i, sn_irq_info); } } rcu_read_unlock(); diff --git a/arch/ia64/sn/pci/pcibr/pcibr_provider.c b/arch/ia64/sn/pci/pcibr/pcibr_provider.c index f8cfe8a..ff9c7de9 100644 --- a/arch/ia64/sn/pci/pcibr/pcibr_provider.c +++ b/arch/ia64/sn/pci/pcibr/pcibr_provider.c @@ -178,6 +178,9 @@ void pcibr_force_interrupt(struct sn_irq_info *sn_irq_info) struct pcibus_info *pcibus_info; int bit = sn_irq_info->irq_int_bit; + if (! sn_irq_info->irq_bridge) + return; + pcidev_info = (struct pcidev_info *)sn_irq_info->irq_pciioinfo; if (pcidev_info) { pcibus_info = @@ -222,6 +225,7 @@ struct sn_pcibus_provider pcibr_provider = { .dma_map_consistent = pcibr_dma_map_consistent, .dma_unmap = pcibr_dma_unmap, .bus_fixup = pcibr_bus_fixup, + .force_interrupt = pcibr_force_interrupt }; int diff --git a/arch/ia64/sn/pci/tioca_provider.c b/arch/ia64/sn/pci/tioca_provider.c index 2fef7c1..4ea04cf 100644 --- a/arch/ia64/sn/pci/tioca_provider.c +++ b/arch/ia64/sn/pci/tioca_provider.c @@ -657,6 +657,7 @@ static struct sn_pcibus_provider tioca_pci_interfaces = { .dma_map_consistent = tioca_dma_map, .dma_unmap = tioca_dma_unmap, .bus_fixup = tioca_bus_fixup, + .force_interrupt = NULL }; /** diff --git a/include/asm-ia64/sn/pcibus_provider_defs.h b/include/asm-ia64/sn/pcibus_provider_defs.h index b01b21e..5c3ba57 100644 --- a/include/asm-ia64/sn/pcibus_provider_defs.h +++ b/include/asm-ia64/sn/pcibus_provider_defs.h @@ -48,6 +48,7 @@ struct sn_pcibus_provider { dma_addr_t (*dma_map_consistent)(struct pci_dev *, unsigned long, size_t); void (*dma_unmap)(struct pci_dev *, dma_addr_t, int); void * (*bus_fixup)(struct pcibus_bussoft *, struct pci_controller *); + void (*force_interrupt)(struct sn_irq_info *); }; extern struct sn_pcibus_provider *sn_pci_provider[]; diff --git a/include/asm-ia64/sn/pda.h b/include/asm-ia64/sn/pda.h index ea5590c..1c5108d 100644 --- a/include/asm-ia64/sn/pda.h +++ b/include/asm-ia64/sn/pda.h @@ -39,7 +39,6 @@ typedef struct pda_s { unsigned long pio_write_status_val; volatile unsigned long *pio_shub_war_cam_addr; - unsigned long sn_soft_irr[4]; unsigned long sn_in_service_ivecs[4]; int sn_lb_int_war_ticks; int sn_last_irq; -- cgit v0.10.2 From 5b53ed1f2ed6c85e2b1c39d97cc112ea32004609 Mon Sep 17 00:00:00 2001 From: Mark Maule Date: Wed, 3 Aug 2005 14:06:00 -0700 Subject: [IA64-SGI] add support for TIO huge-window Altix patch to add TIO "huge-window" address support to sn_dma_flush(). Update copyright in affected files. Signed-off-by: Mark Maule Signed-off-by: Tony Luck diff --git a/arch/ia64/sn/include/tio.h b/arch/ia64/sn/include/tio.h index 0139124..6b2e7b7 100644 --- a/arch/ia64/sn/include/tio.h +++ b/arch/ia64/sn/include/tio.h @@ -3,7 +3,7 @@ * License. See the file "COPYING" in the main directory of this archive * for more details. * - * Copyright (C) 2000-2004 Silicon Graphics, Inc. All rights reserved. + * Copyright (C) 2000-2005 Silicon Graphics, Inc. All rights reserved. */ #ifndef _ASM_IA64_SN_TIO_H @@ -26,6 +26,10 @@ #define TIO_ITTE_VALID_MASK 0x1 #define TIO_ITTE_VALID_SHIFT 16 +#define TIO_ITTE_WIDGET(itte) \ + (((itte) >> TIO_ITTE_WIDGET_SHIFT) & TIO_ITTE_WIDGET_MASK) +#define TIO_ITTE_VALID(itte) \ + (((itte) >> TIO_ITTE_VALID_SHIFT) & TIO_ITTE_VALID_MASK) #define TIO_ITTE_PUT(nasid, bigwin, widget, addr, valid) \ REMOTE_HUB_S((nasid), TIO_ITTE(bigwin), \ diff --git a/arch/ia64/sn/include/xtalk/hubdev.h b/arch/ia64/sn/include/xtalk/hubdev.h index ca4e250..71c2b27 100644 --- a/arch/ia64/sn/include/xtalk/hubdev.h +++ b/arch/ia64/sn/include/xtalk/hubdev.h @@ -3,7 +3,7 @@ * License. See the file "COPYING" in the main directory of this archive * for more details. * - * Copyright (C) 1992 - 1997, 2000-2004 Silicon Graphics, Inc. All rights reserved. + * Copyright (C) 1992 - 1997, 2000-2005 Silicon Graphics, Inc. All rights reserved. */ #ifndef _ASM_IA64_SN_XTALK_HUBDEV_H #define _ASM_IA64_SN_XTALK_HUBDEV_H @@ -16,6 +16,9 @@ #define IIO_ITTE_WIDGET_MASK ((1<> IIO_ITTE_WIDGET_SHIFT) & IIO_ITTE_WIDGET_MASK) + /* * Use the top big window as a surrogate for the first small window */ diff --git a/arch/ia64/sn/pci/pcibr/pcibr_dma.c b/arch/ia64/sn/pci/pcibr/pcibr_dma.c index b058dc2..ae455b6 100644 --- a/arch/ia64/sn/pci/pcibr/pcibr_dma.c +++ b/arch/ia64/sn/pci/pcibr/pcibr_dma.c @@ -3,7 +3,7 @@ * License. See the file "COPYING" in the main directory of this archive * for more details. * - * Copyright (C) 2001-2004 Silicon Graphics, Inc. All rights reserved. + * Copyright (C) 2001-2005 Silicon Graphics, Inc. All rights reserved. */ #include @@ -215,8 +215,8 @@ void sn_dma_flush(uint64_t addr) int is_tio; int wid_num; int i, j; - int bwin; uint64_t flags; + uint64_t itte; struct hubdev_info *hubinfo; volatile struct sn_flush_device_list *p; struct sn_flush_nasid_entry *flush_nasid_list; @@ -233,31 +233,36 @@ void sn_dma_flush(uint64_t addr) if (!hubinfo) { BUG(); } - is_tio = (nasid & 1); - if (is_tio) { - wid_num = TIO_SWIN_WIDGETNUM(addr); - bwin = TIO_BWIN_WINDOWNUM(addr); - } else { - wid_num = SWIN_WIDGETNUM(addr); - bwin = BWIN_WINDOWNUM(addr); - } flush_nasid_list = &hubinfo->hdi_flush_nasid_list; if (flush_nasid_list->widget_p == NULL) return; - if (bwin > 0) { - uint64_t itte = flush_nasid_list->iio_itte[bwin]; - if (is_tio) { - wid_num = (itte >> TIO_ITTE_WIDGET_SHIFT) & - TIO_ITTE_WIDGET_MASK; - } else { - wid_num = (itte >> IIO_ITTE_WIDGET_SHIFT) & - IIO_ITTE_WIDGET_MASK; - } + is_tio = (nasid & 1); + if (is_tio) { + int itte_index; + + if (TIO_HWIN(addr)) + itte_index = 0; + else if (TIO_BWIN_WINDOWNUM(addr)) + itte_index = TIO_BWIN_WINDOWNUM(addr); + else + itte_index = -1; + + if (itte_index >= 0) { + itte = flush_nasid_list->iio_itte[itte_index]; + if (! TIO_ITTE_VALID(itte)) + return; + wid_num = TIO_ITTE_WIDGET(itte); + } else + wid_num = TIO_SWIN_WIDGETNUM(addr); + } else { + if (BWIN_WINDOWNUM(addr)) { + itte = flush_nasid_list->iio_itte[BWIN_WINDOWNUM(addr)]; + wid_num = IIO_ITTE_WIDGET(itte); + } else + wid_num = SWIN_WIDGETNUM(addr); } - if (flush_nasid_list->widget_p == NULL) - return; if (flush_nasid_list->widget_p[wid_num] == NULL) return; p = &flush_nasid_list->widget_p[wid_num][0]; @@ -283,9 +288,15 @@ void sn_dma_flush(uint64_t addr) /* * For TIOCP use the Device(x) Write Request Buffer Flush Bridge * register since it ensures the data has entered the coherence - * domain, unlike PIC + * domain, unlike PIC. */ if (is_tio) { + /* + * Note: devices behind TIOCE should never be matched in the + * above code, and so the following code is PIC/CP centric. + * If CE ever needs the sn_dma_flush mechanism, we will have + * to account for that here and in tioce_bus_fixup(). + */ uint32_t tio_id = REMOTE_HUB_L(nasid, TIO_NODE_ID); uint32_t revnum = XWIDGET_PART_REV_NUM(tio_id); diff --git a/include/asm-ia64/sn/addrs.h b/include/asm-ia64/sn/addrs.h index 103d745..b6e85e4 100644 --- a/include/asm-ia64/sn/addrs.h +++ b/include/asm-ia64/sn/addrs.h @@ -3,7 +3,7 @@ * License. See the file "COPYING" in the main directory of this archive * for more details. * - * Copyright (c) 1992-1999,2001-2004 Silicon Graphics, Inc. All rights reserved. + * Copyright (c) 1992-1999,2001-2005 Silicon Graphics, Inc. All rights reserved. */ #ifndef _ASM_IA64_SN_ADDRS_H @@ -191,7 +191,8 @@ #define TIO_BWIN_WINDOW_SELECT_MASK 0x7 #define TIO_BWIN_WINDOWNUM(x) (((x) >> TIO_BWIN_SIZE_BITS) & TIO_BWIN_WINDOW_SELECT_MASK) - +#define TIO_HWIN_SHIFT_BITS 33 +#define TIO_HWIN(x) (NODE_OFFSET(x) >> TIO_HWIN_SHIFT_BITS) /* * The following definitions pertain to the IO special address -- cgit v0.10.2 From c9221da9f2796f082642c3498edb2c8783ad4774 Mon Sep 17 00:00:00 2001 From: Mark Maule Date: Wed, 3 Aug 2005 14:07:00 -0700 Subject: [IA64-SGI] sn pci provider for TIOCE (pci Altix patch to add an SN pci provider for TIOCE, which is SGI's PCI Express implementation. Signed-off-by: Mark Maule Signed-off-by: Tony Luck diff --git a/arch/ia64/sn/kernel/io_init.c b/arch/ia64/sn/kernel/io_init.c index 829ea79..d1fc09b 100644 --- a/arch/ia64/sn/kernel/io_init.c +++ b/arch/ia64/sn/kernel/io_init.c @@ -18,6 +18,7 @@ #include #include #include +#include #include "xtalk/hubdev.h" #include "xtalk/xwidgetdev.h" @@ -481,6 +482,7 @@ static int __init sn_pci_init(void) pcibr_init_provider(); tioca_init_provider(); + tioce_init_provider(); /* * This is needed to avoid bounce limit checks in the blk layer diff --git a/arch/ia64/sn/pci/Makefile b/arch/ia64/sn/pci/Makefile index 2f915bc..321576b 100644 --- a/arch/ia64/sn/pci/Makefile +++ b/arch/ia64/sn/pci/Makefile @@ -7,4 +7,4 @@ # # Makefile for the sn pci general routines. -obj-y := pci_dma.o tioca_provider.o pcibr/ +obj-y := pci_dma.o tioca_provider.o tioce_provider.o pcibr/ diff --git a/arch/ia64/sn/pci/tioce_provider.c b/arch/ia64/sn/pci/tioce_provider.c new file mode 100644 index 0000000..d908136 --- /dev/null +++ b/arch/ia64/sn/pci/tioce_provider.c @@ -0,0 +1,733 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2003-2005 Silicon Graphics, Inc. All Rights Reserved. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +/** + * Bus address ranges for the 5 flavors of TIOCE DMA + */ + +#define TIOCE_D64_MIN 0x8000000000000000UL +#define TIOCE_D64_MAX 0xffffffffffffffffUL +#define TIOCE_D64_ADDR(a) ((a) >= TIOCE_D64_MIN) + +#define TIOCE_D32_MIN 0x0000000080000000UL +#define TIOCE_D32_MAX 0x00000000ffffffffUL +#define TIOCE_D32_ADDR(a) ((a) >= TIOCE_D32_MIN && (a) <= TIOCE_D32_MAX) + +#define TIOCE_M32_MIN 0x0000000000000000UL +#define TIOCE_M32_MAX 0x000000007fffffffUL +#define TIOCE_M32_ADDR(a) ((a) >= TIOCE_M32_MIN && (a) <= TIOCE_M32_MAX) + +#define TIOCE_M40_MIN 0x0000004000000000UL +#define TIOCE_M40_MAX 0x0000007fffffffffUL +#define TIOCE_M40_ADDR(a) ((a) >= TIOCE_M40_MIN && (a) <= TIOCE_M40_MAX) + +#define TIOCE_M40S_MIN 0x0000008000000000UL +#define TIOCE_M40S_MAX 0x000000ffffffffffUL +#define TIOCE_M40S_ADDR(a) ((a) >= TIOCE_M40S_MIN && (a) <= TIOCE_M40S_MAX) + +/* + * ATE manipulation macros. + */ + +#define ATE_PAGESHIFT(ps) (__ffs(ps)) +#define ATE_PAGEMASK(ps) ((ps)-1) + +#define ATE_PAGE(x, ps) ((x) >> ATE_PAGESHIFT(ps)) +#define ATE_NPAGES(start, len, pagesize) \ + (ATE_PAGE((start)+(len)-1, pagesize) - ATE_PAGE(start, pagesize) + 1) + +#define ATE_VALID(ate) ((ate) & (1UL << 63)) +#define ATE_MAKE(addr, ps) (((addr) & ~ATE_PAGEMASK(ps)) | (1UL << 63)) + +/* + * Flavors of ate-based mapping supported by tioce_alloc_map() + */ + +#define TIOCE_ATE_M32 1 +#define TIOCE_ATE_M40 2 +#define TIOCE_ATE_M40S 3 + +#define KB(x) ((x) << 10) +#define MB(x) ((x) << 20) +#define GB(x) ((x) << 30) + +/** + * tioce_dma_d64 - create a DMA mapping using 64-bit direct mode + * @ct_addr: system coretalk address + * + * Map @ct_addr into 64-bit CE bus space. No device context is necessary + * and no CE mapping are consumed. + * + * Bits 53:0 come from the coretalk address. The remaining bits are set as + * follows: + * + * 63 - must be 1 to indicate d64 mode to CE hardware + * 62 - barrier bit ... controlled with tioce_dma_barrier() + * 61 - 0 since this is not an MSI transaction + * 60:54 - reserved, MBZ + */ +static uint64_t +tioce_dma_d64(unsigned long ct_addr) +{ + uint64_t bus_addr; + + bus_addr = ct_addr | (1UL << 63); + + return bus_addr; +} + +/** + * pcidev_to_tioce - return misc ce related pointers given a pci_dev + * @pci_dev: pci device context + * @base: ptr to store struct tioce_mmr * for the CE holding this device + * @kernel: ptr to store struct tioce_kernel * for the CE holding this device + * @port: ptr to store the CE port number that this device is on + * + * Return pointers to various CE-related structures for the CE upstream of + * @pci_dev. + */ +static inline void +pcidev_to_tioce(struct pci_dev *pdev, struct tioce **base, + struct tioce_kernel **kernel, int *port) +{ + struct pcidev_info *pcidev_info; + struct tioce_common *ce_common; + struct tioce_kernel *ce_kernel; + + pcidev_info = SN_PCIDEV_INFO(pdev); + ce_common = (struct tioce_common *)pcidev_info->pdi_pcibus_info; + ce_kernel = (struct tioce_kernel *)ce_common->ce_kernel_private; + + if (base) + *base = (struct tioce *)ce_common->ce_pcibus.bs_base; + if (kernel) + *kernel = ce_kernel; + + /* + * we use port as a zero-based value internally, even though the + * documentation is 1-based. + */ + if (port) + *port = + (pdev->bus->number < ce_kernel->ce_port1_secondary) ? 0 : 1; +} + +/** + * tioce_alloc_map - Given a coretalk address, map it to pcie bus address + * space using one of the various ATE-based address modes. + * @ce_kern: tioce context + * @type: map mode to use + * @port: 0-based port that the requesting device is downstream of + * @ct_addr: the coretalk address to map + * @len: number of bytes to map + * + * Given the addressing type, set up various paramaters that define the + * ATE pool to use. Search for a contiguous block of entries to cover the + * length, and if enough resources exist, fill in the ATE's and construct a + * tioce_dmamap struct to track the mapping. + */ +static uint64_t +tioce_alloc_map(struct tioce_kernel *ce_kern, int type, int port, + uint64_t ct_addr, int len) +{ + int i; + int j; + int first; + int last; + int entries; + int nates; + int pagesize; + uint64_t *ate_shadow; + uint64_t *ate_reg; + uint64_t addr; + struct tioce *ce_mmr; + uint64_t bus_base; + struct tioce_dmamap *map; + + ce_mmr = (struct tioce *)ce_kern->ce_common->ce_pcibus.bs_base; + + switch (type) { + case TIOCE_ATE_M32: + /* + * The first 64 entries of the ate3240 pool are dedicated to + * super-page (TIOCE_ATE_M40S) mode. + */ + first = 64; + entries = TIOCE_NUM_M3240_ATES - 64; + ate_shadow = ce_kern->ce_ate3240_shadow; + ate_reg = ce_mmr->ce_ure_ate3240; + pagesize = ce_kern->ce_ate3240_pagesize; + bus_base = TIOCE_M32_MIN; + break; + case TIOCE_ATE_M40: + first = 0; + entries = TIOCE_NUM_M40_ATES; + ate_shadow = ce_kern->ce_ate40_shadow; + ate_reg = ce_mmr->ce_ure_ate40; + pagesize = MB(64); + bus_base = TIOCE_M40_MIN; + break; + case TIOCE_ATE_M40S: + /* + * ate3240 entries 0-31 are dedicated to port1 super-page + * mappings. ate3240 entries 32-63 are dedicated to port2. + */ + first = port * 32; + entries = 32; + ate_shadow = ce_kern->ce_ate3240_shadow; + ate_reg = ce_mmr->ce_ure_ate3240; + pagesize = GB(16); + bus_base = TIOCE_M40S_MIN; + break; + default: + return 0; + } + + nates = ATE_NPAGES(ct_addr, len, pagesize); + if (nates > entries) + return 0; + + last = first + entries - nates; + for (i = first; i <= last; i++) { + if (ATE_VALID(ate_shadow[i])) + continue; + + for (j = i; j < i + nates; j++) + if (ATE_VALID(ate_shadow[j])) + break; + + if (j >= i + nates) + break; + } + + if (i > last) + return 0; + + map = kcalloc(1, sizeof(struct tioce_dmamap), GFP_ATOMIC); + if (!map) + return 0; + + addr = ct_addr; + for (j = 0; j < nates; j++) { + uint64_t ate; + + ate = ATE_MAKE(addr, pagesize); + ate_shadow[i + j] = ate; + ate_reg[i + j] = ate; + addr += pagesize; + } + + map->refcnt = 1; + map->nbytes = nates * pagesize; + map->ct_start = ct_addr & ~ATE_PAGEMASK(pagesize); + map->pci_start = bus_base + (i * pagesize); + map->ate_hw = &ate_reg[i]; + map->ate_shadow = &ate_shadow[i]; + map->ate_count = nates; + + list_add(&map->ce_dmamap_list, &ce_kern->ce_dmamap_list); + + return (map->pci_start + (ct_addr - map->ct_start)); +} + +/** + * tioce_dma_d32 - create a DMA mapping using 32-bit direct mode + * @pdev: linux pci_dev representing the function + * @paddr: system physical address + * + * Map @paddr into 32-bit bus space of the CE associated with @pcidev_info. + */ +static uint64_t +tioce_dma_d32(struct pci_dev *pdev, uint64_t ct_addr) +{ + int dma_ok; + int port; + struct tioce *ce_mmr; + struct tioce_kernel *ce_kern; + uint64_t ct_upper; + uint64_t ct_lower; + dma_addr_t bus_addr; + + ct_upper = ct_addr & ~0x3fffffffUL; + ct_lower = ct_addr & 0x3fffffffUL; + + pcidev_to_tioce(pdev, &ce_mmr, &ce_kern, &port); + + if (ce_kern->ce_port[port].dirmap_refcnt == 0) { + volatile uint64_t tmp; + + ce_kern->ce_port[port].dirmap_shadow = ct_upper; + ce_mmr->ce_ure_dir_map[port] = ct_upper; + tmp = ce_mmr->ce_ure_dir_map[port]; + dma_ok = 1; + } else + dma_ok = (ce_kern->ce_port[port].dirmap_shadow == ct_upper); + + if (dma_ok) { + ce_kern->ce_port[port].dirmap_refcnt++; + bus_addr = TIOCE_D32_MIN + ct_lower; + } else + bus_addr = 0; + + return bus_addr; +} + +/** + * tioce_dma_barrier - swizzle a TIOCE bus address to include or exclude + * the barrier bit. + * @bus_addr: bus address to swizzle + * + * Given a TIOCE bus address, set the appropriate bit to indicate barrier + * attributes. + */ +static uint64_t +tioce_dma_barrier(uint64_t bus_addr, int on) +{ + uint64_t barrier_bit; + + /* barrier not supported in M40/M40S mode */ + if (TIOCE_M40_ADDR(bus_addr) || TIOCE_M40S_ADDR(bus_addr)) + return bus_addr; + + if (TIOCE_D64_ADDR(bus_addr)) + barrier_bit = (1UL << 62); + else /* must be m32 or d32 */ + barrier_bit = (1UL << 30); + + return (on) ? (bus_addr | barrier_bit) : (bus_addr & ~barrier_bit); +} + +/** + * tioce_dma_unmap - release CE mapping resources + * @pdev: linux pci_dev representing the function + * @bus_addr: bus address returned by an earlier tioce_dma_map + * @dir: mapping direction (unused) + * + * Locate mapping resources associated with @bus_addr and release them. + * For mappings created using the direct modes there are no resources + * to release. + */ +void +tioce_dma_unmap(struct pci_dev *pdev, dma_addr_t bus_addr, int dir) +{ + int i; + int port; + struct tioce_kernel *ce_kern; + struct tioce *ce_mmr; + unsigned long flags; + + bus_addr = tioce_dma_barrier(bus_addr, 0); + pcidev_to_tioce(pdev, &ce_mmr, &ce_kern, &port); + + /* nothing to do for D64 */ + + if (TIOCE_D64_ADDR(bus_addr)) + return; + + spin_lock_irqsave(&ce_kern->ce_lock, flags); + + if (TIOCE_D32_ADDR(bus_addr)) { + if (--ce_kern->ce_port[port].dirmap_refcnt == 0) { + ce_kern->ce_port[port].dirmap_shadow = 0; + ce_mmr->ce_ure_dir_map[port] = 0; + } + } else { + struct tioce_dmamap *map; + + list_for_each_entry(map, &ce_kern->ce_dmamap_list, + ce_dmamap_list) { + uint64_t last; + + last = map->pci_start + map->nbytes - 1; + if (bus_addr >= map->pci_start && bus_addr <= last) + break; + } + + if (&map->ce_dmamap_list == &ce_kern->ce_dmamap_list) { + printk(KERN_WARNING + "%s: %s - no map found for bus_addr 0x%lx\n", + __FUNCTION__, pci_name(pdev), bus_addr); + } else if (--map->refcnt == 0) { + for (i = 0; i < map->ate_count; i++) { + map->ate_shadow[i] = 0; + map->ate_hw[i] = 0; + } + + list_del(&map->ce_dmamap_list); + kfree(map); + } + } + + spin_unlock_irqrestore(&ce_kern->ce_lock, flags); +} + +/** + * tioce_do_dma_map - map pages for PCI DMA + * @pdev: linux pci_dev representing the function + * @paddr: host physical address to map + * @byte_count: bytes to map + * + * This is the main wrapper for mapping host physical pages to CE PCI space. + * The mapping mode used is based on the device's dma_mask. + */ +static uint64_t +tioce_do_dma_map(struct pci_dev *pdev, uint64_t paddr, size_t byte_count, + int barrier) +{ + unsigned long flags; + uint64_t ct_addr; + uint64_t mapaddr = 0; + struct tioce_kernel *ce_kern; + struct tioce_dmamap *map; + int port; + uint64_t dma_mask; + + dma_mask = (barrier) ? pdev->dev.coherent_dma_mask : pdev->dma_mask; + + /* cards must be able to address at least 31 bits */ + if (dma_mask < 0x7fffffffUL) + return 0; + + ct_addr = PHYS_TO_TIODMA(paddr); + + /* + * If the device can generate 64 bit addresses, create a D64 map. + * Since this should never fail, bypass the rest of the checks. + */ + if (dma_mask == ~0UL) { + mapaddr = tioce_dma_d64(ct_addr); + goto dma_map_done; + } + + pcidev_to_tioce(pdev, NULL, &ce_kern, &port); + + spin_lock_irqsave(&ce_kern->ce_lock, flags); + + /* + * D64 didn't work ... See if we have an existing map that covers + * this address range. Must account for devices dma_mask here since + * an existing map might have been done in a mode using more pci + * address bits than this device can support. + */ + list_for_each_entry(map, &ce_kern->ce_dmamap_list, ce_dmamap_list) { + uint64_t last; + + last = map->ct_start + map->nbytes - 1; + if (ct_addr >= map->ct_start && + ct_addr + byte_count - 1 <= last && + map->pci_start <= dma_mask) { + map->refcnt++; + mapaddr = map->pci_start + (ct_addr - map->ct_start); + break; + } + } + + /* + * If we don't have a map yet, and the card can generate 40 + * bit addresses, try the M40/M40S modes. Note these modes do not + * support a barrier bit, so if we need a consistent map these + * won't work. + */ + if (!mapaddr && !barrier && dma_mask >= 0xffffffffffUL) { + /* + * We have two options for 40-bit mappings: 16GB "super" ATE's + * and 64MB "regular" ATE's. We'll try both if needed for a + * given mapping but which one we try first depends on the + * size. For requests >64MB, prefer to use a super page with + * regular as the fallback. Otherwise, try in the reverse order. + */ + + if (byte_count > MB(64)) { + mapaddr = tioce_alloc_map(ce_kern, TIOCE_ATE_M40S, + port, ct_addr, byte_count); + if (!mapaddr) + mapaddr = + tioce_alloc_map(ce_kern, TIOCE_ATE_M40, -1, + ct_addr, byte_count); + } else { + mapaddr = tioce_alloc_map(ce_kern, TIOCE_ATE_M40, -1, + ct_addr, byte_count); + if (!mapaddr) + mapaddr = + tioce_alloc_map(ce_kern, TIOCE_ATE_M40S, + port, ct_addr, byte_count); + } + } + + /* + * 32-bit direct is the next mode to try + */ + if (!mapaddr && dma_mask >= 0xffffffffUL) + mapaddr = tioce_dma_d32(pdev, ct_addr); + + /* + * Last resort, try 32-bit ATE-based map. + */ + if (!mapaddr) + mapaddr = + tioce_alloc_map(ce_kern, TIOCE_ATE_M32, -1, ct_addr, + byte_count); + + spin_unlock_irqrestore(&ce_kern->ce_lock, flags); + +dma_map_done: + if (mapaddr & barrier) + mapaddr = tioce_dma_barrier(mapaddr, 1); + + return mapaddr; +} + +/** + * tioce_dma - standard pci dma map interface + * @pdev: pci device requesting the map + * @paddr: system physical address to map into pci space + * @byte_count: # bytes to map + * + * Simply call tioce_do_dma_map() to create a map with the barrier bit clear + * in the address. + */ +static uint64_t +tioce_dma(struct pci_dev *pdev, uint64_t paddr, size_t byte_count) +{ + return tioce_do_dma_map(pdev, paddr, byte_count, 0); +} + +/** + * tioce_dma_consistent - consistent pci dma map interface + * @pdev: pci device requesting the map + * @paddr: system physical address to map into pci space + * @byte_count: # bytes to map + * + * Simply call tioce_do_dma_map() to create a map with the barrier bit set + * in the address. + */ static uint64_t +tioce_dma_consistent(struct pci_dev *pdev, uint64_t paddr, size_t byte_count) +{ + return tioce_do_dma_map(pdev, paddr, byte_count, 1); +} + +/** + * tioce_error_intr_handler - SGI TIO CE error interrupt handler + * @irq: unused + * @arg: pointer to tioce_common struct for the given CE + * @pt: unused + * + * Handle a CE error interrupt. Simply a wrapper around a SAL call which + * defers processing to the SGI prom. + */ static irqreturn_t +tioce_error_intr_handler(int irq, void *arg, struct pt_regs *pt) +{ + struct tioce_common *soft = arg; + struct ia64_sal_retval ret_stuff; + ret_stuff.status = 0; + ret_stuff.v0 = 0; + + SAL_CALL_NOLOCK(ret_stuff, (u64) SN_SAL_IOIF_ERROR_INTERRUPT, + soft->ce_pcibus.bs_persist_segment, + soft->ce_pcibus.bs_persist_busnum, 0, 0, 0, 0, 0); + + return IRQ_HANDLED; +} + +/** + * tioce_kern_init - init kernel structures related to a given TIOCE + * @tioce_common: ptr to a cached tioce_common struct that originated in prom + */ static struct tioce_kernel * +tioce_kern_init(struct tioce_common *tioce_common) +{ + int i; + uint32_t tmp; + struct tioce *tioce_mmr; + struct tioce_kernel *tioce_kern; + + tioce_kern = kcalloc(1, sizeof(struct tioce_kernel), GFP_KERNEL); + if (!tioce_kern) { + return NULL; + } + + tioce_kern->ce_common = tioce_common; + spin_lock_init(&tioce_kern->ce_lock); + INIT_LIST_HEAD(&tioce_kern->ce_dmamap_list); + tioce_common->ce_kernel_private = (uint64_t) tioce_kern; + + /* + * Determine the secondary bus number of the port2 logical PPB. + * This is used to decide whether a given pci device resides on + * port1 or port2. Note: We don't have enough plumbing set up + * here to use pci_read_config_xxx() so use the raw_pci_ops vector. + */ + + raw_pci_ops->read(tioce_common->ce_pcibus.bs_persist_segment, + tioce_common->ce_pcibus.bs_persist_busnum, + PCI_DEVFN(2, 0), PCI_SECONDARY_BUS, 1, &tmp); + tioce_kern->ce_port1_secondary = (uint8_t) tmp; + + /* + * Set PMU pagesize to the largest size available, and zero out + * the ate's. + */ + + tioce_mmr = (struct tioce *)tioce_common->ce_pcibus.bs_base; + tioce_mmr->ce_ure_page_map &= ~CE_URE_PAGESIZE_MASK; + tioce_mmr->ce_ure_page_map |= CE_URE_256K_PAGESIZE; + tioce_kern->ce_ate3240_pagesize = KB(256); + + for (i = 0; i < TIOCE_NUM_M40_ATES; i++) { + tioce_kern->ce_ate40_shadow[i] = 0; + tioce_mmr->ce_ure_ate40[i] = 0; + } + + for (i = 0; i < TIOCE_NUM_M3240_ATES; i++) { + tioce_kern->ce_ate3240_shadow[i] = 0; + tioce_mmr->ce_ure_ate3240[i] = 0; + } + + return tioce_kern; +} + +/** + * tioce_force_interrupt - implement altix force_interrupt() backend for CE + * @sn_irq_info: sn asic irq that we need an interrupt generated for + * + * Given an sn_irq_info struct, set the proper bit in ce_adm_force_int to + * force a secondary interrupt to be generated. This is to work around an + * asic issue where there is a small window of opportunity for a legacy device + * interrupt to be lost. + */ +static void +tioce_force_interrupt(struct sn_irq_info *sn_irq_info) +{ + struct pcidev_info *pcidev_info; + struct tioce_common *ce_common; + struct tioce *ce_mmr; + uint64_t force_int_val; + + if (!sn_irq_info->irq_bridge) + return; + + if (sn_irq_info->irq_bridge_type != PCIIO_ASIC_TYPE_TIOCE) + return; + + pcidev_info = (struct pcidev_info *)sn_irq_info->irq_pciioinfo; + if (!pcidev_info) + return; + + ce_common = (struct tioce_common *)pcidev_info->pdi_pcibus_info; + ce_mmr = (struct tioce *)ce_common->ce_pcibus.bs_base; + + /* + * irq_int_bit is originally set up by prom, and holds the interrupt + * bit shift (not mask) as defined by the bit definitions in the + * ce_adm_int mmr. These shifts are not the same for the + * ce_adm_force_int register, so do an explicit mapping here to make + * things clearer. + */ + + switch (sn_irq_info->irq_int_bit) { + case CE_ADM_INT_PCIE_PORT1_DEV_A_SHFT: + force_int_val = 1UL << CE_ADM_FORCE_INT_PCIE_PORT1_DEV_A_SHFT; + break; + case CE_ADM_INT_PCIE_PORT1_DEV_B_SHFT: + force_int_val = 1UL << CE_ADM_FORCE_INT_PCIE_PORT1_DEV_B_SHFT; + break; + case CE_ADM_INT_PCIE_PORT1_DEV_C_SHFT: + force_int_val = 1UL << CE_ADM_FORCE_INT_PCIE_PORT1_DEV_C_SHFT; + break; + case CE_ADM_INT_PCIE_PORT1_DEV_D_SHFT: + force_int_val = 1UL << CE_ADM_FORCE_INT_PCIE_PORT1_DEV_D_SHFT; + break; + case CE_ADM_INT_PCIE_PORT2_DEV_A_SHFT: + force_int_val = 1UL << CE_ADM_FORCE_INT_PCIE_PORT2_DEV_A_SHFT; + break; + case CE_ADM_INT_PCIE_PORT2_DEV_B_SHFT: + force_int_val = 1UL << CE_ADM_FORCE_INT_PCIE_PORT2_DEV_B_SHFT; + break; + case CE_ADM_INT_PCIE_PORT2_DEV_C_SHFT: + force_int_val = 1UL << CE_ADM_FORCE_INT_PCIE_PORT2_DEV_C_SHFT; + break; + case CE_ADM_INT_PCIE_PORT2_DEV_D_SHFT: + force_int_val = 1UL << CE_ADM_FORCE_INT_PCIE_PORT2_DEV_D_SHFT; + break; + default: + return; + } + ce_mmr->ce_adm_force_int = force_int_val; +} + +/** + * tioce_bus_fixup - perform final PCI fixup for a TIO CE bus + * @prom_bussoft: Common prom/kernel struct representing the bus + * + * Replicates the tioce_common pointed to by @prom_bussoft in kernel + * space. Allocates and initializes a kernel-only area for a given CE, + * and sets up an irq for handling CE error interrupts. + * + * On successful setup, returns the kernel version of tioce_common back to + * the caller. + */ +static void * +tioce_bus_fixup(struct pcibus_bussoft *prom_bussoft, struct pci_controller *controller) +{ + struct tioce_common *tioce_common; + + /* + * Allocate kernel bus soft and copy from prom. + */ + + tioce_common = kcalloc(1, sizeof(struct tioce_common), GFP_KERNEL); + if (!tioce_common) + return NULL; + + memcpy(tioce_common, prom_bussoft, sizeof(struct tioce_common)); + tioce_common->ce_pcibus.bs_base |= __IA64_UNCACHED_OFFSET; + + if (tioce_kern_init(tioce_common) == NULL) { + kfree(tioce_common); + return NULL; + } + + if (request_irq(SGI_PCIASIC_ERROR, + tioce_error_intr_handler, + SA_SHIRQ, "TIOCE error", (void *)tioce_common)) + printk(KERN_WARNING + "%s: Unable to get irq %d. " + "Error interrupts won't be routed for " + "TIOCE bus %04x:%02x\n", + __FUNCTION__, SGI_PCIASIC_ERROR, + tioce_common->ce_pcibus.bs_persist_segment, + tioce_common->ce_pcibus.bs_persist_busnum); + + return tioce_common; +} + +static struct sn_pcibus_provider tioce_pci_interfaces = { + .dma_map = tioce_dma, + .dma_map_consistent = tioce_dma_consistent, + .dma_unmap = tioce_dma_unmap, + .bus_fixup = tioce_bus_fixup, + .force_interrupt = tioce_force_interrupt +}; + +/** + * tioce_init_provider - init SN PCI provider ops for TIO CE + */ +int +tioce_init_provider(void) +{ + sn_pci_provider[PCIIO_ASIC_TYPE_TIOCE] = &tioce_pci_interfaces; + return 0; +} diff --git a/include/asm-ia64/sn/pcibus_provider_defs.h b/include/asm-ia64/sn/pcibus_provider_defs.h index 5c3ba57..5a92f51 100644 --- a/include/asm-ia64/sn/pcibus_provider_defs.h +++ b/include/asm-ia64/sn/pcibus_provider_defs.h @@ -18,8 +18,9 @@ #define PCIIO_ASIC_TYPE_PIC 2 #define PCIIO_ASIC_TYPE_TIOCP 3 #define PCIIO_ASIC_TYPE_TIOCA 4 +#define PCIIO_ASIC_TYPE_TIOCE 5 -#define PCIIO_ASIC_MAX_TYPES 5 +#define PCIIO_ASIC_MAX_TYPES 6 /* * Common pciio bus provider data. There should be one of these as the diff --git a/include/asm-ia64/sn/tioce.h b/include/asm-ia64/sn/tioce.h new file mode 100644 index 0000000..2287985 --- /dev/null +++ b/include/asm-ia64/sn/tioce.h @@ -0,0 +1,740 @@ +/************************************************************************** + * * + * Unpublished copyright (c) 2005, Silicon Graphics, Inc. * + * THIS IS UNPUBLISHED CONFIDENTIAL AND PROPRIETARY SOURCE CODE OF SGI. * + * * + * The copyright notice above does not evidence any actual or intended * + * publication or disclosure of this source code, which includes * + * information that is confidential and/or proprietary, and is a trade * + * secret, of Silicon Graphics, Inc. ANY REPRODUCTION, MODIFICATION, * + * DISTRIBUTION, PUBLIC PERFORMANCE, OR PUBLIC DISPLAY OF OR THROUGH * + * USE OF THIS SOURCE CODE WITHOUT THE EXPRESS WRITTEN CONSENT OF * + * SILICON GRAPHICS, INC. IS STRICTLY PROHIBITED, AND IN VIOLATION OF * + * APPLICABLE LAWS AND INTERNATIONAL TREATIES. THE RECEIPT OR * + * POSSESSION OF THIS SOURCE CODE AND/OR RELATED INFORMATION DOES NOT * + * CONVEY OR IMPLY ANY RIGHTS TO REPRODUCE, DISCLOSE OR DISTRIBUTE ITS * + * CONTENTS, OR TO MANUFACTURE, USE, OR SELL ANYTHING THAT IT MAY * + * DESCRIBE, IN WHOLE OR IN PART. * + * * + **************************************************************************/ + +#ifndef __ASM_IA64_SN_TIOCE_H__ +#define __ASM_IA64_SN_TIOCE_H__ + +/* CE ASIC part & mfgr information */ +#define TIOCE_PART_NUM 0xCE00 +#define TIOCE_MFGR_NUM 0x36 +#define TIOCE_REV_A 0x1 + +/* CE Virtual PPB Vendor/Device IDs */ +#define CE_VIRT_PPB_VENDOR_ID 0x10a9 +#define CE_VIRT_PPB_DEVICE_ID 0x4002 + +/* CE Host Bridge Vendor/Device IDs */ +#define CE_HOST_BRIDGE_VENDOR_ID 0x10a9 +#define CE_HOST_BRIDGE_DEVICE_ID 0x4003 + + +#define TIOCE_NUM_M40_ATES 4096 +#define TIOCE_NUM_M3240_ATES 2048 +#define TIOCE_NUM_PORTS 2 + +/* + * Register layout for TIOCE. MMR offsets are shown at the far right of the + * structure definition. + */ +typedef volatile struct tioce { + /* + * ADMIN : Administration Registers + */ + uint64_t ce_adm_id; /* 0x000000 */ + uint64_t ce_pad_000008; /* 0x000008 */ + uint64_t ce_adm_dyn_credit_status; /* 0x000010 */ + uint64_t ce_adm_last_credit_status; /* 0x000018 */ + uint64_t ce_adm_credit_limit; /* 0x000020 */ + uint64_t ce_adm_force_credit; /* 0x000028 */ + uint64_t ce_adm_control; /* 0x000030 */ + uint64_t ce_adm_mmr_chn_timeout; /* 0x000038 */ + uint64_t ce_adm_ssp_ure_timeout; /* 0x000040 */ + uint64_t ce_adm_ssp_dre_timeout; /* 0x000048 */ + uint64_t ce_adm_ssp_debug_sel; /* 0x000050 */ + uint64_t ce_adm_int_status; /* 0x000058 */ + uint64_t ce_adm_int_status_alias; /* 0x000060 */ + uint64_t ce_adm_int_mask; /* 0x000068 */ + uint64_t ce_adm_int_pending; /* 0x000070 */ + uint64_t ce_adm_force_int; /* 0x000078 */ + uint64_t ce_adm_ure_ups_buf_barrier_flush; /* 0x000080 */ + uint64_t ce_adm_int_dest[15]; /* 0x000088 -- 0x0000F8 */ + uint64_t ce_adm_error_summary; /* 0x000100 */ + uint64_t ce_adm_error_summary_alias; /* 0x000108 */ + uint64_t ce_adm_error_mask; /* 0x000110 */ + uint64_t ce_adm_first_error; /* 0x000118 */ + uint64_t ce_adm_error_overflow; /* 0x000120 */ + uint64_t ce_adm_error_overflow_alias; /* 0x000128 */ + uint64_t ce_pad_000130[2]; /* 0x000130 -- 0x000138 */ + uint64_t ce_adm_tnum_error; /* 0x000140 */ + uint64_t ce_adm_mmr_err_detail; /* 0x000148 */ + uint64_t ce_adm_msg_sram_perr_detail; /* 0x000150 */ + uint64_t ce_adm_bap_sram_perr_detail; /* 0x000158 */ + uint64_t ce_adm_ce_sram_perr_detail; /* 0x000160 */ + uint64_t ce_adm_ce_credit_oflow_detail; /* 0x000168 */ + uint64_t ce_adm_tx_link_idle_max_timer; /* 0x000170 */ + uint64_t ce_adm_pcie_debug_sel; /* 0x000178 */ + uint64_t ce_pad_000180[16]; /* 0x000180 -- 0x0001F8 */ + + uint64_t ce_adm_pcie_debug_sel_top; /* 0x000200 */ + uint64_t ce_adm_pcie_debug_lat_sel_lo_top; /* 0x000208 */ + uint64_t ce_adm_pcie_debug_lat_sel_hi_top; /* 0x000210 */ + uint64_t ce_adm_pcie_debug_trig_sel_top; /* 0x000218 */ + uint64_t ce_adm_pcie_debug_trig_lat_sel_lo_top; /* 0x000220 */ + uint64_t ce_adm_pcie_debug_trig_lat_sel_hi_top; /* 0x000228 */ + uint64_t ce_adm_pcie_trig_compare_top; /* 0x000230 */ + uint64_t ce_adm_pcie_trig_compare_en_top; /* 0x000238 */ + uint64_t ce_adm_ssp_debug_sel_top; /* 0x000240 */ + uint64_t ce_adm_ssp_debug_lat_sel_lo_top; /* 0x000248 */ + uint64_t ce_adm_ssp_debug_lat_sel_hi_top; /* 0x000250 */ + uint64_t ce_adm_ssp_debug_trig_sel_top; /* 0x000258 */ + uint64_t ce_adm_ssp_debug_trig_lat_sel_lo_top; /* 0x000260 */ + uint64_t ce_adm_ssp_debug_trig_lat_sel_hi_top; /* 0x000268 */ + uint64_t ce_adm_ssp_trig_compare_top; /* 0x000270 */ + uint64_t ce_adm_ssp_trig_compare_en_top; /* 0x000278 */ + uint64_t ce_pad_000280[48]; /* 0x000280 -- 0x0003F8 */ + + uint64_t ce_adm_bap_ctrl; /* 0x000400 */ + uint64_t ce_pad_000408[127]; /* 0x000408 -- 0x0007F8 */ + + uint64_t ce_msg_buf_data63_0[35]; /* 0x000800 -- 0x000918 */ + uint64_t ce_pad_000920[29]; /* 0x000920 -- 0x0009F8 */ + + uint64_t ce_msg_buf_data127_64[35]; /* 0x000A00 -- 0x000B18 */ + uint64_t ce_pad_000B20[29]; /* 0x000B20 -- 0x000BF8 */ + + uint64_t ce_msg_buf_parity[35]; /* 0x000C00 -- 0x000D18 */ + uint64_t ce_pad_000D20[29]; /* 0x000D20 -- 0x000DF8 */ + + uint64_t ce_pad_000E00[576]; /* 0x000E00 -- 0x001FF8 */ + + /* + * LSI : LSI's PCI Express Link Registers (Link#1 and Link#2) + * Link#1 MMRs at start at 0x002000, Link#2 MMRs at 0x003000 + * NOTE: the comment offsets at far right: let 'z' = {2 or 3} + */ + #define ce_lsi(link_num) ce_lsi[link_num-1] + struct ce_lsi_reg { + uint64_t ce_lsi_lpu_id; /* 0x00z000 */ + uint64_t ce_lsi_rst; /* 0x00z008 */ + uint64_t ce_lsi_dbg_stat; /* 0x00z010 */ + uint64_t ce_lsi_dbg_cfg; /* 0x00z018 */ + uint64_t ce_lsi_ltssm_ctrl; /* 0x00z020 */ + uint64_t ce_lsi_lk_stat; /* 0x00z028 */ + uint64_t ce_pad_00z030[2]; /* 0x00z030 -- 0x00z038 */ + uint64_t ce_lsi_int_and_stat; /* 0x00z040 */ + uint64_t ce_lsi_int_mask; /* 0x00z048 */ + uint64_t ce_pad_00z050[22]; /* 0x00z050 -- 0x00z0F8 */ + uint64_t ce_lsi_lk_perf_cnt_sel; /* 0x00z100 */ + uint64_t ce_pad_00z108; /* 0x00z108 */ + uint64_t ce_lsi_lk_perf_cnt_ctrl; /* 0x00z110 */ + uint64_t ce_pad_00z118; /* 0x00z118 */ + uint64_t ce_lsi_lk_perf_cnt1; /* 0x00z120 */ + uint64_t ce_lsi_lk_perf_cnt1_test; /* 0x00z128 */ + uint64_t ce_lsi_lk_perf_cnt2; /* 0x00z130 */ + uint64_t ce_lsi_lk_perf_cnt2_test; /* 0x00z138 */ + uint64_t ce_pad_00z140[24]; /* 0x00z140 -- 0x00z1F8 */ + uint64_t ce_lsi_lk_lyr_cfg; /* 0x00z200 */ + uint64_t ce_lsi_lk_lyr_status; /* 0x00z208 */ + uint64_t ce_lsi_lk_lyr_int_stat; /* 0x00z210 */ + uint64_t ce_lsi_lk_ly_int_stat_test; /* 0x00z218 */ + uint64_t ce_lsi_lk_ly_int_stat_mask; /* 0x00z220 */ + uint64_t ce_pad_00z228[3]; /* 0x00z228 -- 0x00z238 */ + uint64_t ce_lsi_fc_upd_ctl; /* 0x00z240 */ + uint64_t ce_pad_00z248[3]; /* 0x00z248 -- 0x00z258 */ + uint64_t ce_lsi_flw_ctl_upd_to_timer; /* 0x00z260 */ + uint64_t ce_lsi_flw_ctl_upd_timer0; /* 0x00z268 */ + uint64_t ce_lsi_flw_ctl_upd_timer1; /* 0x00z270 */ + uint64_t ce_pad_00z278[49]; /* 0x00z278 -- 0x00z3F8 */ + uint64_t ce_lsi_freq_nak_lat_thrsh; /* 0x00z400 */ + uint64_t ce_lsi_ack_nak_lat_tmr; /* 0x00z408 */ + uint64_t ce_lsi_rply_tmr_thr; /* 0x00z410 */ + uint64_t ce_lsi_rply_tmr; /* 0x00z418 */ + uint64_t ce_lsi_rply_num_stat; /* 0x00z420 */ + uint64_t ce_lsi_rty_buf_max_addr; /* 0x00z428 */ + uint64_t ce_lsi_rty_fifo_ptr; /* 0x00z430 */ + uint64_t ce_lsi_rty_fifo_rd_wr_ptr; /* 0x00z438 */ + uint64_t ce_lsi_rty_fifo_cred; /* 0x00z440 */ + uint64_t ce_lsi_seq_cnt; /* 0x00z448 */ + uint64_t ce_lsi_ack_sent_seq_num; /* 0x00z450 */ + uint64_t ce_lsi_seq_cnt_fifo_max_addr; /* 0x00z458 */ + uint64_t ce_lsi_seq_cnt_fifo_ptr; /* 0x00z460 */ + uint64_t ce_lsi_seq_cnt_rd_wr_ptr; /* 0x00z468 */ + uint64_t ce_lsi_tx_lk_ts_ctl; /* 0x00z470 */ + uint64_t ce_pad_00z478; /* 0x00z478 */ + uint64_t ce_lsi_mem_addr_ctl; /* 0x00z480 */ + uint64_t ce_lsi_mem_d_ld0; /* 0x00z488 */ + uint64_t ce_lsi_mem_d_ld1; /* 0x00z490 */ + uint64_t ce_lsi_mem_d_ld2; /* 0x00z498 */ + uint64_t ce_lsi_mem_d_ld3; /* 0x00z4A0 */ + uint64_t ce_lsi_mem_d_ld4; /* 0x00z4A8 */ + uint64_t ce_pad_00z4B0[2]; /* 0x00z4B0 -- 0x00z4B8 */ + uint64_t ce_lsi_rty_d_cnt; /* 0x00z4C0 */ + uint64_t ce_lsi_seq_buf_cnt; /* 0x00z4C8 */ + uint64_t ce_lsi_seq_buf_bt_d; /* 0x00z4D0 */ + uint64_t ce_pad_00z4D8; /* 0x00z4D8 */ + uint64_t ce_lsi_ack_lat_thr; /* 0x00z4E0 */ + uint64_t ce_pad_00z4E8[3]; /* 0x00z4E8 -- 0x00z4F8 */ + uint64_t ce_lsi_nxt_rcv_seq_1_cntr; /* 0x00z500 */ + uint64_t ce_lsi_unsp_dllp_rcvd; /* 0x00z508 */ + uint64_t ce_lsi_rcv_lk_ts_ctl; /* 0x00z510 */ + uint64_t ce_pad_00z518[29]; /* 0x00z518 -- 0x00z5F8 */ + uint64_t ce_lsi_phy_lyr_cfg; /* 0x00z600 */ + uint64_t ce_pad_00z608; /* 0x00z608 */ + uint64_t ce_lsi_phy_lyr_int_stat; /* 0x00z610 */ + uint64_t ce_lsi_phy_lyr_int_stat_test; /* 0x00z618 */ + uint64_t ce_lsi_phy_lyr_int_mask; /* 0x00z620 */ + uint64_t ce_pad_00z628[11]; /* 0x00z628 -- 0x00z678 */ + uint64_t ce_lsi_rcv_phy_cfg; /* 0x00z680 */ + uint64_t ce_lsi_rcv_phy_stat1; /* 0x00z688 */ + uint64_t ce_lsi_rcv_phy_stat2; /* 0x00z690 */ + uint64_t ce_lsi_rcv_phy_stat3; /* 0x00z698 */ + uint64_t ce_lsi_rcv_phy_int_stat; /* 0x00z6A0 */ + uint64_t ce_lsi_rcv_phy_int_stat_test; /* 0x00z6A8 */ + uint64_t ce_lsi_rcv_phy_int_mask; /* 0x00z6B0 */ + uint64_t ce_pad_00z6B8[9]; /* 0x00z6B8 -- 0x00z6F8 */ + uint64_t ce_lsi_tx_phy_cfg; /* 0x00z700 */ + uint64_t ce_lsi_tx_phy_stat; /* 0x00z708 */ + uint64_t ce_lsi_tx_phy_int_stat; /* 0x00z710 */ + uint64_t ce_lsi_tx_phy_int_stat_test; /* 0x00z718 */ + uint64_t ce_lsi_tx_phy_int_mask; /* 0x00z720 */ + uint64_t ce_lsi_tx_phy_stat2; /* 0x00z728 */ + uint64_t ce_pad_00z730[10]; /* 0x00z730 -- 0x00z77F */ + uint64_t ce_lsi_ltssm_cfg1; /* 0x00z780 */ + uint64_t ce_lsi_ltssm_cfg2; /* 0x00z788 */ + uint64_t ce_lsi_ltssm_cfg3; /* 0x00z790 */ + uint64_t ce_lsi_ltssm_cfg4; /* 0x00z798 */ + uint64_t ce_lsi_ltssm_cfg5; /* 0x00z7A0 */ + uint64_t ce_lsi_ltssm_stat1; /* 0x00z7A8 */ + uint64_t ce_lsi_ltssm_stat2; /* 0x00z7B0 */ + uint64_t ce_lsi_ltssm_int_stat; /* 0x00z7B8 */ + uint64_t ce_lsi_ltssm_int_stat_test; /* 0x00z7C0 */ + uint64_t ce_lsi_ltssm_int_mask; /* 0x00z7C8 */ + uint64_t ce_lsi_ltssm_stat_wr_en; /* 0x00z7D0 */ + uint64_t ce_pad_00z7D8[5]; /* 0x00z7D8 -- 0x00z7F8 */ + uint64_t ce_lsi_gb_cfg1; /* 0x00z800 */ + uint64_t ce_lsi_gb_cfg2; /* 0x00z808 */ + uint64_t ce_lsi_gb_cfg3; /* 0x00z810 */ + uint64_t ce_lsi_gb_cfg4; /* 0x00z818 */ + uint64_t ce_lsi_gb_stat; /* 0x00z820 */ + uint64_t ce_lsi_gb_int_stat; /* 0x00z828 */ + uint64_t ce_lsi_gb_int_stat_test; /* 0x00z830 */ + uint64_t ce_lsi_gb_int_mask; /* 0x00z838 */ + uint64_t ce_lsi_gb_pwr_dn1; /* 0x00z840 */ + uint64_t ce_lsi_gb_pwr_dn2; /* 0x00z848 */ + uint64_t ce_pad_00z850[246]; /* 0x00z850 -- 0x00zFF8 */ + } ce_lsi[2]; + + uint64_t ce_pad_004000[10]; /* 0x004000 -- 0x004048 */ + + /* + * CRM: Coretalk Receive Module Registers + */ + uint64_t ce_crm_debug_mux; /* 0x004050 */ + uint64_t ce_pad_004058; /* 0x004058 */ + uint64_t ce_crm_ssp_err_cmd_wrd; /* 0x004060 */ + uint64_t ce_crm_ssp_err_addr; /* 0x004068 */ + uint64_t ce_crm_ssp_err_syn; /* 0x004070 */ + + uint64_t ce_pad_004078[499]; /* 0x004078 -- 0x005008 */ + + /* + * CXM: Coretalk Xmit Module Registers + */ + uint64_t ce_cxm_dyn_credit_status; /* 0x005010 */ + uint64_t ce_cxm_last_credit_status; /* 0x005018 */ + uint64_t ce_cxm_credit_limit; /* 0x005020 */ + uint64_t ce_cxm_force_credit; /* 0x005028 */ + uint64_t ce_cxm_disable_bypass; /* 0x005030 */ + uint64_t ce_pad_005038[3]; /* 0x005038 -- 0x005048 */ + uint64_t ce_cxm_debug_mux; /* 0x005050 */ + + uint64_t ce_pad_005058[501]; /* 0x005058 -- 0x005FF8 */ + + /* + * DTL: Downstream Transaction Layer Regs (Link#1 and Link#2) + * DTL: Link#1 MMRs at start at 0x006000, Link#2 MMRs at 0x008000 + * DTL: the comment offsets at far right: let 'y' = {6 or 8} + * + * UTL: Downstream Transaction Layer Regs (Link#1 and Link#2) + * UTL: Link#1 MMRs at start at 0x007000, Link#2 MMRs at 0x009000 + * UTL: the comment offsets at far right: let 'z' = {7 or 9} + */ + #define ce_dtl(link_num) ce_dtl_utl[link_num-1] + #define ce_utl(link_num) ce_dtl_utl[link_num-1] + struct ce_dtl_utl_reg { + /* DTL */ + uint64_t ce_dtl_dtdr_credit_limit; /* 0x00y000 */ + uint64_t ce_dtl_dtdr_credit_force; /* 0x00y008 */ + uint64_t ce_dtl_dyn_credit_status; /* 0x00y010 */ + uint64_t ce_dtl_dtl_last_credit_stat; /* 0x00y018 */ + uint64_t ce_dtl_dtl_ctrl; /* 0x00y020 */ + uint64_t ce_pad_00y028[5]; /* 0x00y028 -- 0x00y048 */ + uint64_t ce_dtl_debug_sel; /* 0x00y050 */ + uint64_t ce_pad_00y058[501]; /* 0x00y058 -- 0x00yFF8 */ + + /* UTL */ + uint64_t ce_utl_utl_ctrl; /* 0x00z000 */ + uint64_t ce_utl_debug_sel; /* 0x00z008 */ + uint64_t ce_pad_00z010[510]; /* 0x00z010 -- 0x00zFF8 */ + } ce_dtl_utl[2]; + + uint64_t ce_pad_00A000[514]; /* 0x00A000 -- 0x00B008 */ + + /* + * URE: Upstream Request Engine + */ + uint64_t ce_ure_dyn_credit_status; /* 0x00B010 */ + uint64_t ce_ure_last_credit_status; /* 0x00B018 */ + uint64_t ce_ure_credit_limit; /* 0x00B020 */ + uint64_t ce_pad_00B028; /* 0x00B028 */ + uint64_t ce_ure_control; /* 0x00B030 */ + uint64_t ce_ure_status; /* 0x00B038 */ + uint64_t ce_pad_00B040[2]; /* 0x00B040 -- 0x00B048 */ + uint64_t ce_ure_debug_sel; /* 0x00B050 */ + uint64_t ce_ure_pcie_debug_sel; /* 0x00B058 */ + uint64_t ce_ure_ssp_err_cmd_wrd; /* 0x00B060 */ + uint64_t ce_ure_ssp_err_addr; /* 0x00B068 */ + uint64_t ce_ure_page_map; /* 0x00B070 */ + uint64_t ce_ure_dir_map[TIOCE_NUM_PORTS]; /* 0x00B078 */ + uint64_t ce_ure_pipe_sel1; /* 0x00B088 */ + uint64_t ce_ure_pipe_mask1; /* 0x00B090 */ + uint64_t ce_ure_pipe_sel2; /* 0x00B098 */ + uint64_t ce_ure_pipe_mask2; /* 0x00B0A0 */ + uint64_t ce_ure_pcie1_credits_sent; /* 0x00B0A8 */ + uint64_t ce_ure_pcie1_credits_used; /* 0x00B0B0 */ + uint64_t ce_ure_pcie1_credit_limit; /* 0x00B0B8 */ + uint64_t ce_ure_pcie2_credits_sent; /* 0x00B0C0 */ + uint64_t ce_ure_pcie2_credits_used; /* 0x00B0C8 */ + uint64_t ce_ure_pcie2_credit_limit; /* 0x00B0D0 */ + uint64_t ce_ure_pcie_force_credit; /* 0x00B0D8 */ + uint64_t ce_ure_rd_tnum_val; /* 0x00B0E0 */ + uint64_t ce_ure_rd_tnum_rsp_rcvd; /* 0x00B0E8 */ + uint64_t ce_ure_rd_tnum_esent_timer; /* 0x00B0F0 */ + uint64_t ce_ure_rd_tnum_error; /* 0x00B0F8 */ + uint64_t ce_ure_rd_tnum_first_cl; /* 0x00B100 */ + uint64_t ce_ure_rd_tnum_link_buf; /* 0x00B108 */ + uint64_t ce_ure_wr_tnum_val; /* 0x00B110 */ + uint64_t ce_ure_sram_err_addr0; /* 0x00B118 */ + uint64_t ce_ure_sram_err_addr1; /* 0x00B120 */ + uint64_t ce_ure_sram_err_addr2; /* 0x00B128 */ + uint64_t ce_ure_sram_rd_addr0; /* 0x00B130 */ + uint64_t ce_ure_sram_rd_addr1; /* 0x00B138 */ + uint64_t ce_ure_sram_rd_addr2; /* 0x00B140 */ + uint64_t ce_ure_sram_wr_addr0; /* 0x00B148 */ + uint64_t ce_ure_sram_wr_addr1; /* 0x00B150 */ + uint64_t ce_ure_sram_wr_addr2; /* 0x00B158 */ + uint64_t ce_ure_buf_flush10; /* 0x00B160 */ + uint64_t ce_ure_buf_flush11; /* 0x00B168 */ + uint64_t ce_ure_buf_flush12; /* 0x00B170 */ + uint64_t ce_ure_buf_flush13; /* 0x00B178 */ + uint64_t ce_ure_buf_flush20; /* 0x00B180 */ + uint64_t ce_ure_buf_flush21; /* 0x00B188 */ + uint64_t ce_ure_buf_flush22; /* 0x00B190 */ + uint64_t ce_ure_buf_flush23; /* 0x00B198 */ + uint64_t ce_ure_pcie_control1; /* 0x00B1A0 */ + uint64_t ce_ure_pcie_control2; /* 0x00B1A8 */ + + uint64_t ce_pad_00B1B0[458]; /* 0x00B1B0 -- 0x00BFF8 */ + + /* Upstream Data Buffer, Port1 */ + struct ce_ure_maint_ups_dat1_data { + uint64_t data63_0[512]; /* 0x00C000 -- 0x00CFF8 */ + uint64_t data127_64[512]; /* 0x00D000 -- 0x00DFF8 */ + uint64_t parity[512]; /* 0x00E000 -- 0x00EFF8 */ + } ce_ure_maint_ups_dat1; + + /* Upstream Header Buffer, Port1 */ + struct ce_ure_maint_ups_hdr1_data { + uint64_t data63_0[512]; /* 0x00F000 -- 0x00FFF8 */ + uint64_t data127_64[512]; /* 0x010000 -- 0x010FF8 */ + uint64_t parity[512]; /* 0x011000 -- 0x011FF8 */ + } ce_ure_maint_ups_hdr1; + + /* Upstream Data Buffer, Port2 */ + struct ce_ure_maint_ups_dat2_data { + uint64_t data63_0[512]; /* 0x012000 -- 0x012FF8 */ + uint64_t data127_64[512]; /* 0x013000 -- 0x013FF8 */ + uint64_t parity[512]; /* 0x014000 -- 0x014FF8 */ + } ce_ure_maint_ups_dat2; + + /* Upstream Header Buffer, Port2 */ + struct ce_ure_maint_ups_hdr2_data { + uint64_t data63_0[512]; /* 0x015000 -- 0x015FF8 */ + uint64_t data127_64[512]; /* 0x016000 -- 0x016FF8 */ + uint64_t parity[512]; /* 0x017000 -- 0x017FF8 */ + } ce_ure_maint_ups_hdr2; + + /* Downstream Data Buffer */ + struct ce_ure_maint_dns_dat_data { + uint64_t data63_0[512]; /* 0x018000 -- 0x018FF8 */ + uint64_t data127_64[512]; /* 0x019000 -- 0x019FF8 */ + uint64_t parity[512]; /* 0x01A000 -- 0x01AFF8 */ + } ce_ure_maint_dns_dat; + + /* Downstream Header Buffer */ + struct ce_ure_maint_dns_hdr_data { + uint64_t data31_0[64]; /* 0x01B000 -- 0x01B1F8 */ + uint64_t data95_32[64]; /* 0x01B200 -- 0x01B3F8 */ + uint64_t parity[64]; /* 0x01B400 -- 0x01B5F8 */ + } ce_ure_maint_dns_hdr; + + /* RCI Buffer Data */ + struct ce_ure_maint_rci_data { + uint64_t data41_0[64]; /* 0x01B600 -- 0x01B7F8 */ + uint64_t data69_42[64]; /* 0x01B800 -- 0x01B9F8 */ + } ce_ure_maint_rci; + + /* Response Queue */ + uint64_t ce_ure_maint_rspq[64]; /* 0x01BA00 -- 0x01BBF8 */ + + uint64_t ce_pad_01C000[4224]; /* 0x01BC00 -- 0x023FF8 */ + + /* Admin Build-a-Packet Buffer */ + struct ce_adm_maint_bap_buf_data { + uint64_t data63_0[258]; /* 0x024000 -- 0x024808 */ + uint64_t data127_64[258]; /* 0x024810 -- 0x025018 */ + uint64_t parity[258]; /* 0x025020 -- 0x025828 */ + } ce_adm_maint_bap_buf; + + uint64_t ce_pad_025830[5370]; /* 0x025830 -- 0x02FFF8 */ + + /* URE: 40bit PMU ATE Buffer */ /* 0x030000 -- 0x037FF8 */ + uint64_t ce_ure_ate40[TIOCE_NUM_M40_ATES]; + + /* URE: 32/40bit PMU ATE Buffer */ /* 0x038000 -- 0x03BFF8 */ + uint64_t ce_ure_ate3240[TIOCE_NUM_M3240_ATES]; + + uint64_t ce_pad_03C000[2050]; /* 0x03C000 -- 0x040008 */ + + /* + * DRE: Down Stream Request Engine + */ + uint64_t ce_dre_dyn_credit_status1; /* 0x040010 */ + uint64_t ce_dre_dyn_credit_status2; /* 0x040018 */ + uint64_t ce_dre_last_credit_status1; /* 0x040020 */ + uint64_t ce_dre_last_credit_status2; /* 0x040028 */ + uint64_t ce_dre_credit_limit1; /* 0x040030 */ + uint64_t ce_dre_credit_limit2; /* 0x040038 */ + uint64_t ce_dre_force_credit1; /* 0x040040 */ + uint64_t ce_dre_force_credit2; /* 0x040048 */ + uint64_t ce_dre_debug_mux1; /* 0x040050 */ + uint64_t ce_dre_debug_mux2; /* 0x040058 */ + uint64_t ce_dre_ssp_err_cmd_wrd; /* 0x040060 */ + uint64_t ce_dre_ssp_err_addr; /* 0x040068 */ + uint64_t ce_dre_comp_err_cmd_wrd; /* 0x040070 */ + uint64_t ce_dre_comp_err_addr; /* 0x040078 */ + uint64_t ce_dre_req_status; /* 0x040080 */ + uint64_t ce_dre_config1; /* 0x040088 */ + uint64_t ce_dre_config2; /* 0x040090 */ + uint64_t ce_dre_config_req_status; /* 0x040098 */ + uint64_t ce_pad_0400A0[12]; /* 0x0400A0 -- 0x0400F8 */ + uint64_t ce_dre_dyn_fifo; /* 0x040100 */ + uint64_t ce_pad_040108[3]; /* 0x040108 -- 0x040118 */ + uint64_t ce_dre_last_fifo; /* 0x040120 */ + + uint64_t ce_pad_040128[27]; /* 0x040128 -- 0x0401F8 */ + + /* DRE Downstream Head Queue */ + struct ce_dre_maint_ds_head_queue { + uint64_t data63_0[32]; /* 0x040200 -- 0x0402F8 */ + uint64_t data127_64[32]; /* 0x040300 -- 0x0403F8 */ + uint64_t parity[32]; /* 0x040400 -- 0x0404F8 */ + } ce_dre_maint_ds_head_q; + + uint64_t ce_pad_040500[352]; /* 0x040500 -- 0x040FF8 */ + + /* DRE Downstream Data Queue */ + struct ce_dre_maint_ds_data_queue { + uint64_t data63_0[256]; /* 0x041000 -- 0x0417F8 */ + uint64_t ce_pad_041800[256]; /* 0x041800 -- 0x041FF8 */ + uint64_t data127_64[256]; /* 0x042000 -- 0x0427F8 */ + uint64_t ce_pad_042800[256]; /* 0x042800 -- 0x042FF8 */ + uint64_t parity[256]; /* 0x043000 -- 0x0437F8 */ + uint64_t ce_pad_043800[256]; /* 0x043800 -- 0x043FF8 */ + } ce_dre_maint_ds_data_q; + + /* DRE URE Upstream Response Queue */ + struct ce_dre_maint_ure_us_rsp_queue { + uint64_t data63_0[8]; /* 0x044000 -- 0x044038 */ + uint64_t ce_pad_044040[24]; /* 0x044040 -- 0x0440F8 */ + uint64_t data127_64[8]; /* 0x044100 -- 0x044138 */ + uint64_t ce_pad_044140[24]; /* 0x044140 -- 0x0441F8 */ + uint64_t parity[8]; /* 0x044200 -- 0x044238 */ + uint64_t ce_pad_044240[24]; /* 0x044240 -- 0x0442F8 */ + } ce_dre_maint_ure_us_rsp_q; + + uint64_t ce_dre_maint_us_wrt_rsp[32];/* 0x044300 -- 0x0443F8 */ + + uint64_t ce_end_of_struct; /* 0x044400 */ +} tioce_t; + + +/* ce_adm_int_mask/ce_adm_int_status register bit defines */ +#define CE_ADM_INT_CE_ERROR_SHFT 0 +#define CE_ADM_INT_LSI1_IP_ERROR_SHFT 1 +#define CE_ADM_INT_LSI2_IP_ERROR_SHFT 2 +#define CE_ADM_INT_PCIE_ERROR_SHFT 3 +#define CE_ADM_INT_PORT1_HOTPLUG_EVENT_SHFT 4 +#define CE_ADM_INT_PORT2_HOTPLUG_EVENT_SHFT 5 +#define CE_ADM_INT_PCIE_PORT1_DEV_A_SHFT 6 +#define CE_ADM_INT_PCIE_PORT1_DEV_B_SHFT 7 +#define CE_ADM_INT_PCIE_PORT1_DEV_C_SHFT 8 +#define CE_ADM_INT_PCIE_PORT1_DEV_D_SHFT 9 +#define CE_ADM_INT_PCIE_PORT2_DEV_A_SHFT 10 +#define CE_ADM_INT_PCIE_PORT2_DEV_B_SHFT 11 +#define CE_ADM_INT_PCIE_PORT2_DEV_C_SHFT 12 +#define CE_ADM_INT_PCIE_PORT2_DEV_D_SHFT 13 +#define CE_ADM_INT_PCIE_MSG_SHFT 14 /*see int_dest_14*/ +#define CE_ADM_INT_PCIE_MSG_SLOT_0_SHFT 14 +#define CE_ADM_INT_PCIE_MSG_SLOT_1_SHFT 15 +#define CE_ADM_INT_PCIE_MSG_SLOT_2_SHFT 16 +#define CE_ADM_INT_PCIE_MSG_SLOT_3_SHFT 17 +#define CE_ADM_INT_PORT1_PM_PME_MSG_SHFT 22 +#define CE_ADM_INT_PORT2_PM_PME_MSG_SHFT 23 + +/* ce_adm_force_int register bit defines */ +#define CE_ADM_FORCE_INT_PCIE_PORT1_DEV_A_SHFT 0 +#define CE_ADM_FORCE_INT_PCIE_PORT1_DEV_B_SHFT 1 +#define CE_ADM_FORCE_INT_PCIE_PORT1_DEV_C_SHFT 2 +#define CE_ADM_FORCE_INT_PCIE_PORT1_DEV_D_SHFT 3 +#define CE_ADM_FORCE_INT_PCIE_PORT2_DEV_A_SHFT 4 +#define CE_ADM_FORCE_INT_PCIE_PORT2_DEV_B_SHFT 5 +#define CE_ADM_FORCE_INT_PCIE_PORT2_DEV_C_SHFT 6 +#define CE_ADM_FORCE_INT_PCIE_PORT2_DEV_D_SHFT 7 +#define CE_ADM_FORCE_INT_ALWAYS_SHFT 8 + +/* ce_adm_int_dest register bit masks & shifts */ +#define INTR_VECTOR_SHFT 56 + +/* ce_adm_error_mask and ce_adm_error_summary register bit masks */ +#define CE_ADM_ERR_CRM_SSP_REQ_INVALID (0x1ULL << 0) +#define CE_ADM_ERR_SSP_REQ_HEADER (0x1ULL << 1) +#define CE_ADM_ERR_SSP_RSP_HEADER (0x1ULL << 2) +#define CE_ADM_ERR_SSP_PROTOCOL_ERROR (0x1ULL << 3) +#define CE_ADM_ERR_SSP_SBE (0x1ULL << 4) +#define CE_ADM_ERR_SSP_MBE (0x1ULL << 5) +#define CE_ADM_ERR_CXM_CREDIT_OFLOW (0x1ULL << 6) +#define CE_ADM_ERR_DRE_SSP_REQ_INVAL (0x1ULL << 7) +#define CE_ADM_ERR_SSP_REQ_LONG (0x1ULL << 8) +#define CE_ADM_ERR_SSP_REQ_OFLOW (0x1ULL << 9) +#define CE_ADM_ERR_SSP_REQ_SHORT (0x1ULL << 10) +#define CE_ADM_ERR_SSP_REQ_SIDEBAND (0x1ULL << 11) +#define CE_ADM_ERR_SSP_REQ_ADDR_ERR (0x1ULL << 12) +#define CE_ADM_ERR_SSP_REQ_BAD_BE (0x1ULL << 13) +#define CE_ADM_ERR_PCIE_COMPL_TIMEOUT (0x1ULL << 14) +#define CE_ADM_ERR_PCIE_UNEXP_COMPL (0x1ULL << 15) +#define CE_ADM_ERR_PCIE_ERR_COMPL (0x1ULL << 16) +#define CE_ADM_ERR_DRE_CREDIT_OFLOW (0x1ULL << 17) +#define CE_ADM_ERR_DRE_SRAM_PE (0x1ULL << 18) +#define CE_ADM_ERR_SSP_RSP_INVALID (0x1ULL << 19) +#define CE_ADM_ERR_SSP_RSP_LONG (0x1ULL << 20) +#define CE_ADM_ERR_SSP_RSP_SHORT (0x1ULL << 21) +#define CE_ADM_ERR_SSP_RSP_SIDEBAND (0x1ULL << 22) +#define CE_ADM_ERR_URE_SSP_RSP_UNEXP (0x1ULL << 23) +#define CE_ADM_ERR_URE_SSP_WR_REQ_TIMEOUT (0x1ULL << 24) +#define CE_ADM_ERR_URE_SSP_RD_REQ_TIMEOUT (0x1ULL << 25) +#define CE_ADM_ERR_URE_ATE3240_PAGE_FAULT (0x1ULL << 26) +#define CE_ADM_ERR_URE_ATE40_PAGE_FAULT (0x1ULL << 27) +#define CE_ADM_ERR_URE_CREDIT_OFLOW (0x1ULL << 28) +#define CE_ADM_ERR_URE_SRAM_PE (0x1ULL << 29) +#define CE_ADM_ERR_ADM_SSP_RSP_UNEXP (0x1ULL << 30) +#define CE_ADM_ERR_ADM_SSP_REQ_TIMEOUT (0x1ULL << 31) +#define CE_ADM_ERR_MMR_ACCESS_ERROR (0x1ULL << 32) +#define CE_ADM_ERR_MMR_ADDR_ERROR (0x1ULL << 33) +#define CE_ADM_ERR_ADM_CREDIT_OFLOW (0x1ULL << 34) +#define CE_ADM_ERR_ADM_SRAM_PE (0x1ULL << 35) +#define CE_ADM_ERR_DTL1_MIN_PDATA_CREDIT_ERR (0x1ULL << 36) +#define CE_ADM_ERR_DTL1_INF_COMPL_CRED_UPDT_ERR (0x1ULL << 37) +#define CE_ADM_ERR_DTL1_INF_POSTED_CRED_UPDT_ERR (0x1ULL << 38) +#define CE_ADM_ERR_DTL1_INF_NPOSTED_CRED_UPDT_ERR (0x1ULL << 39) +#define CE_ADM_ERR_DTL1_COMP_HD_CRED_MAX_ERR (0x1ULL << 40) +#define CE_ADM_ERR_DTL1_COMP_D_CRED_MAX_ERR (0x1ULL << 41) +#define CE_ADM_ERR_DTL1_NPOSTED_HD_CRED_MAX_ERR (0x1ULL << 42) +#define CE_ADM_ERR_DTL1_NPOSTED_D_CRED_MAX_ERR (0x1ULL << 43) +#define CE_ADM_ERR_DTL1_POSTED_HD_CRED_MAX_ERR (0x1ULL << 44) +#define CE_ADM_ERR_DTL1_POSTED_D_CRED_MAX_ERR (0x1ULL << 45) +#define CE_ADM_ERR_DTL2_MIN_PDATA_CREDIT_ERR (0x1ULL << 46) +#define CE_ADM_ERR_DTL2_INF_COMPL_CRED_UPDT_ERR (0x1ULL << 47) +#define CE_ADM_ERR_DTL2_INF_POSTED_CRED_UPDT_ERR (0x1ULL << 48) +#define CE_ADM_ERR_DTL2_INF_NPOSTED_CRED_UPDT_ERR (0x1ULL << 49) +#define CE_ADM_ERR_DTL2_COMP_HD_CRED_MAX_ERR (0x1ULL << 50) +#define CE_ADM_ERR_DTL2_COMP_D_CRED_MAX_ERR (0x1ULL << 51) +#define CE_ADM_ERR_DTL2_NPOSTED_HD_CRED_MAX_ERR (0x1ULL << 52) +#define CE_ADM_ERR_DTL2_NPOSTED_D_CRED_MAX_ERR (0x1ULL << 53) +#define CE_ADM_ERR_DTL2_POSTED_HD_CRED_MAX_ERR (0x1ULL << 54) +#define CE_ADM_ERR_DTL2_POSTED_D_CRED_MAX_ERR (0x1ULL << 55) +#define CE_ADM_ERR_PORT1_PCIE_COR_ERR (0x1ULL << 56) +#define CE_ADM_ERR_PORT1_PCIE_NFAT_ERR (0x1ULL << 57) +#define CE_ADM_ERR_PORT1_PCIE_FAT_ERR (0x1ULL << 58) +#define CE_ADM_ERR_PORT2_PCIE_COR_ERR (0x1ULL << 59) +#define CE_ADM_ERR_PORT2_PCIE_NFAT_ERR (0x1ULL << 60) +#define CE_ADM_ERR_PORT2_PCIE_FAT_ERR (0x1ULL << 61) + +/* ce_adm_ure_ups_buf_barrier_flush register bit masks and shifts */ +#define FLUSH_SEL_PORT1_PIPE0_SHFT 0 +#define FLUSH_SEL_PORT1_PIPE1_SHFT 4 +#define FLUSH_SEL_PORT1_PIPE2_SHFT 8 +#define FLUSH_SEL_PORT1_PIPE3_SHFT 12 +#define FLUSH_SEL_PORT2_PIPE0_SHFT 16 +#define FLUSH_SEL_PORT2_PIPE1_SHFT 20 +#define FLUSH_SEL_PORT2_PIPE2_SHFT 24 +#define FLUSH_SEL_PORT2_PIPE3_SHFT 28 + +/* ce_dre_config1 register bit masks and shifts */ +#define CE_DRE_RO_ENABLE (0x1ULL << 0) +#define CE_DRE_DYN_RO_ENABLE (0x1ULL << 1) +#define CE_DRE_SUP_CONFIG_COMP_ERROR (0x1ULL << 2) +#define CE_DRE_SUP_IO_COMP_ERROR (0x1ULL << 3) +#define CE_DRE_ADDR_MODE_SHFT 4 + +/* ce_dre_config_req_status register bit masks */ +#define CE_DRE_LAST_CONFIG_COMPLETION (0x7ULL << 0) +#define CE_DRE_DOWNSTREAM_CONFIG_ERROR (0x1ULL << 3) +#define CE_DRE_CONFIG_COMPLETION_VALID (0x1ULL << 4) +#define CE_DRE_CONFIG_REQUEST_ACTIVE (0x1ULL << 5) + +/* ce_ure_control register bit masks & shifts */ +#define CE_URE_RD_MRG_ENABLE (0x1ULL << 0) +#define CE_URE_WRT_MRG_ENABLE1 (0x1ULL << 4) +#define CE_URE_WRT_MRG_ENABLE2 (0x1ULL << 5) +#define CE_URE_RSPQ_BYPASS_DISABLE (0x1ULL << 24) +#define CE_URE_UPS_DAT1_PAR_DISABLE (0x1ULL << 32) +#define CE_URE_UPS_HDR1_PAR_DISABLE (0x1ULL << 33) +#define CE_URE_UPS_DAT2_PAR_DISABLE (0x1ULL << 34) +#define CE_URE_UPS_HDR2_PAR_DISABLE (0x1ULL << 35) +#define CE_URE_ATE_PAR_DISABLE (0x1ULL << 36) +#define CE_URE_RCI_PAR_DISABLE (0x1ULL << 37) +#define CE_URE_RSPQ_PAR_DISABLE (0x1ULL << 38) +#define CE_URE_DNS_DAT_PAR_DISABLE (0x1ULL << 39) +#define CE_URE_DNS_HDR_PAR_DISABLE (0x1ULL << 40) +#define CE_URE_MALFORM_DISABLE (0x1ULL << 44) +#define CE_URE_UNSUP_DISABLE (0x1ULL << 45) + +/* ce_ure_page_map register bit masks & shifts */ +#define CE_URE_ATE3240_ENABLE (0x1ULL << 0) +#define CE_URE_ATE40_ENABLE (0x1ULL << 1) +#define CE_URE_PAGESIZE_SHFT 4 +#define CE_URE_PAGESIZE_MASK (0x7ULL << CE_URE_PAGESIZE_SHFT) +#define CE_URE_4K_PAGESIZE (0x0ULL << CE_URE_PAGESIZE_SHFT) +#define CE_URE_16K_PAGESIZE (0x1ULL << CE_URE_PAGESIZE_SHFT) +#define CE_URE_64K_PAGESIZE (0x2ULL << CE_URE_PAGESIZE_SHFT) +#define CE_URE_128K_PAGESIZE (0x3ULL << CE_URE_PAGESIZE_SHFT) +#define CE_URE_256K_PAGESIZE (0x4ULL << CE_URE_PAGESIZE_SHFT) + +/* ce_ure_pipe_sel register bit masks & shifts */ +#define PKT_TRAFIC_SHRT 16 +#define BUS_SRC_ID_SHFT 8 +#define DEV_SRC_ID_SHFT 3 +#define FNC_SRC_ID_SHFT 0 +#define CE_URE_TC_MASK (0x07ULL << PKT_TRAFIC_SHRT) +#define CE_URE_BUS_MASK (0xFFULL << BUS_SRC_ID_SHFT) +#define CE_URE_DEV_MASK (0x1FULL << DEV_SRC_ID_SHFT) +#define CE_URE_FNC_MASK (0x07ULL << FNC_SRC_ID_SHFT) +#define CE_URE_PIPE_BUS(b) (((uint64_t)(b) << BUS_SRC_ID_SHFT) & \ + CE_URE_BUS_MASK) +#define CE_URE_PIPE_DEV(d) (((uint64_t)(d) << DEV_SRC_ID_SHFT) & \ + CE_URE_DEV_MASK) +#define CE_URE_PIPE_FNC(f) (((uint64_t)(f) << FNC_SRC_ID_SHFT) & \ + CE_URE_FNC_MASK) + +#define CE_URE_SEL1_SHFT 0 +#define CE_URE_SEL2_SHFT 20 +#define CE_URE_SEL3_SHFT 40 +#define CE_URE_SEL1_MASK (0x7FFFFULL << CE_URE_SEL1_SHFT) +#define CE_URE_SEL2_MASK (0x7FFFFULL << CE_URE_SEL2_SHFT) +#define CE_URE_SEL3_MASK (0x7FFFFULL << CE_URE_SEL3_SHFT) + + +/* ce_ure_pipe_mask register bit masks & shifts */ +#define CE_URE_MASK1_SHFT 0 +#define CE_URE_MASK2_SHFT 20 +#define CE_URE_MASK3_SHFT 40 +#define CE_URE_MASK1_MASK (0x7FFFFULL << CE_URE_MASK1_SHFT) +#define CE_URE_MASK2_MASK (0x7FFFFULL << CE_URE_MASK2_SHFT) +#define CE_URE_MASK3_MASK (0x7FFFFULL << CE_URE_MASK3_SHFT) + + +/* ce_ure_pcie_control1 register bit masks & shifts */ +#define CE_URE_SI (0x1ULL << 0) +#define CE_URE_ELAL_SHFT 4 +#define CE_URE_ELAL_MASK (0x7ULL << CE_URE_ELAL_SHFT) +#define CE_URE_ELAL1_SHFT 8 +#define CE_URE_ELAL1_MASK (0x7ULL << CE_URE_ELAL1_SHFT) +#define CE_URE_SCC (0x1ULL << 12) +#define CE_URE_PN1_SHFT 16 +#define CE_URE_PN1_MASK (0xFFULL << CE_URE_PN1_SHFT) +#define CE_URE_PN2_SHFT 24 +#define CE_URE_PN2_MASK (0xFFULL << CE_URE_PN2_SHFT) +#define CE_URE_PN1_SET(n) (((uint64_t)(n) << CE_URE_PN1_SHFT) & \ + CE_URE_PN1_MASK) +#define CE_URE_PN2_SET(n) (((uint64_t)(n) << CE_URE_PN2_SHFT) & \ + CE_URE_PN2_MASK) + +/* ce_ure_pcie_control2 register bit masks & shifts */ +#define CE_URE_ABP (0x1ULL << 0) +#define CE_URE_PCP (0x1ULL << 1) +#define CE_URE_MSP (0x1ULL << 2) +#define CE_URE_AIP (0x1ULL << 3) +#define CE_URE_PIP (0x1ULL << 4) +#define CE_URE_HPS (0x1ULL << 5) +#define CE_URE_HPC (0x1ULL << 6) +#define CE_URE_SPLV_SHFT 7 +#define CE_URE_SPLV_MASK (0xFFULL << CE_URE_SPLV_SHFT) +#define CE_URE_SPLS_SHFT 15 +#define CE_URE_SPLS_MASK (0x3ULL << CE_URE_SPLS_SHFT) +#define CE_URE_PSN1_SHFT 19 +#define CE_URE_PSN1_MASK (0x1FFFULL << CE_URE_PSN1_SHFT) +#define CE_URE_PSN2_SHFT 32 +#define CE_URE_PSN2_MASK (0x1FFFULL << CE_URE_PSN2_SHFT) +#define CE_URE_PSN1_SET(n) (((uint64_t)(n) << CE_URE_PSN1_SHFT) & \ + CE_URE_PSN1_MASK) +#define CE_URE_PSN2_SET(n) (((uint64_t)(n) << CE_URE_PSN2_SHFT) & \ + CE_URE_PSN2_MASK) + +/* + * PIO address space ranges for CE + */ + +/* Local CE Registers Space */ +#define CE_PIO_MMR 0x00000000 +#define CE_PIO_MMR_LEN 0x04000000 + +/* PCI Compatible Config Space */ +#define CE_PIO_CONFIG_SPACE 0x04000000 +#define CE_PIO_CONFIG_SPACE_LEN 0x04000000 + +/* PCI I/O Space Alias */ +#define CE_PIO_IO_SPACE_ALIAS 0x08000000 +#define CE_PIO_IO_SPACE_ALIAS_LEN 0x08000000 + +/* PCI Enhanced Config Space */ +#define CE_PIO_E_CONFIG_SPACE 0x10000000 +#define CE_PIO_E_CONFIG_SPACE_LEN 0x10000000 + +/* PCI I/O Space */ +#define CE_PIO_IO_SPACE 0x100000000 +#define CE_PIO_IO_SPACE_LEN 0x100000000 + +/* PCI MEM Space */ +#define CE_PIO_MEM_SPACE 0x200000000 +#define CE_PIO_MEM_SPACE_LEN TIO_HWIN_SIZE + + +/* + * CE PCI Enhanced Config Space shifts & masks + */ +#define CE_E_CONFIG_BUS_SHFT 20 +#define CE_E_CONFIG_BUS_MASK (0xFF << CE_E_CONFIG_BUS_SHFT) +#define CE_E_CONFIG_DEVICE_SHFT 15 +#define CE_E_CONFIG_DEVICE_MASK (0x1F << CE_E_CONFIG_DEVICE_SHFT) +#define CE_E_CONFIG_FUNC_SHFT 12 +#define CE_E_CONFIG_FUNC_MASK (0x7 << CE_E_CONFIG_FUNC_SHFT) + +#endif /* __ASM_IA64_SN_TIOCE_H__ */ diff --git a/include/asm-ia64/sn/tioce_provider.h b/include/asm-ia64/sn/tioce_provider.h new file mode 100644 index 0000000..7f63dec --- /dev/null +++ b/include/asm-ia64/sn/tioce_provider.h @@ -0,0 +1,66 @@ +/************************************************************************** + * Copyright (C) 2005, Silicon Graphics, Inc. * + * * + * These coded instructions, statements, and computer programs contain * + * unpublished proprietary information of Silicon Graphics, Inc., and * + * are protected by Federal copyright law. They may not be disclosed * + * to third parties or copied or duplicated in any form, in whole or * + * in part, without the prior written consent of Silicon Graphics, Inc. * + * * + **************************************************************************/ + +#ifndef _ASM_IA64_SN_CE_PROVIDER_H +#define _ASM_IA64_SN_CE_PROVIDER_H + +#include +#include + +/* + * Common TIOCE structure shared between the prom and kernel + * + * DO NOT CHANGE THIS STRUCT WITHOUT MAKING CORRESPONDING CHANGES TO THE + * PROM VERSION. + */ +struct tioce_common { + struct pcibus_bussoft ce_pcibus; /* common pciio header */ + + uint32_t ce_rev; + uint64_t ce_kernel_private; + uint64_t ce_prom_private; +}; + +struct tioce_kernel { + struct tioce_common *ce_common; + spinlock_t ce_lock; + struct list_head ce_dmamap_list; + + uint64_t ce_ate40_shadow[TIOCE_NUM_M40_ATES]; + uint64_t ce_ate3240_shadow[TIOCE_NUM_M3240_ATES]; + uint32_t ce_ate3240_pagesize; + + uint8_t ce_port1_secondary; + + /* per-port resources */ + struct { + int dirmap_refcnt; + uint64_t dirmap_shadow; + } ce_port[TIOCE_NUM_PORTS]; +}; + +struct tioce_dmamap { + struct list_head ce_dmamap_list; /* headed by tioce_kernel */ + uint32_t refcnt; + + uint64_t nbytes; /* # bytes mapped */ + + uint64_t ct_start; /* coretalk start address */ + uint64_t pci_start; /* bus start address */ + + uint64_t *ate_hw; /* hw ptr of first ate in map */ + uint64_t *ate_shadow; /* shadow ptr of firat ate */ + uint16_t ate_count; /* # ate's in the map */ +}; + +extern int tioce_init_provider(void); + +#endif /* __ASM_IA64_SN_CE_PROVIDER_H */ -- cgit v0.10.2 From badea125d7cbd93f1678a95cf009b3bdfe6065cd Mon Sep 17 00:00:00 2001 From: David Mosberger-Tang Date: Mon, 25 Jul 2005 22:23:00 -0700 Subject: [IA64] Fix race in mm-context wrap-around logic. The patch below should fix a race which could cause stale TLB entries. Specifically, when 2 CPUs ended up racing for entrance to wrap_mmu_context(). The losing CPU would find that by the time it acquired ctx.lock, mm->context already had a valid value, but then it failed to (re-)check the delayed TLB flushing logic and hence could end up using a context number when there were still stale entries in its TLB. The fix is to check for delayed TLB flushes only after mm->context is valid (non-zero). The patch also makes GCC v4.x happier by defining a non-volatile variant of mm_context_t called nv_mm_context_t. Signed-off-by: David Mosberger-Tang Signed-off-by: Tony Luck diff --git a/include/asm-ia64/mmu.h b/include/asm-ia64/mmu.h index ae15253..611432b 100644 --- a/include/asm-ia64/mmu.h +++ b/include/asm-ia64/mmu.h @@ -2,10 +2,12 @@ #define __MMU_H /* - * Type for a context number. We declare it volatile to ensure proper ordering when it's - * accessed outside of spinlock'd critical sections (e.g., as done in activate_mm() and - * init_new_context()). + * Type for a context number. We declare it volatile to ensure proper + * ordering when it's accessed outside of spinlock'd critical sections + * (e.g., as done in activate_mm() and init_new_context()). */ typedef volatile unsigned long mm_context_t; +typedef unsigned long nv_mm_context_t; + #endif diff --git a/include/asm-ia64/mmu_context.h b/include/asm-ia64/mmu_context.h index e3e5fed..0680d16 100644 --- a/include/asm-ia64/mmu_context.h +++ b/include/asm-ia64/mmu_context.h @@ -55,34 +55,46 @@ static inline void delayed_tlb_flush (void) { extern void local_flush_tlb_all (void); + unsigned long flags; if (unlikely(__ia64_per_cpu_var(ia64_need_tlb_flush))) { - local_flush_tlb_all(); - __ia64_per_cpu_var(ia64_need_tlb_flush) = 0; + spin_lock_irqsave(&ia64_ctx.lock, flags); + { + if (__ia64_per_cpu_var(ia64_need_tlb_flush)) { + local_flush_tlb_all(); + __ia64_per_cpu_var(ia64_need_tlb_flush) = 0; + } + } + spin_unlock_irqrestore(&ia64_ctx.lock, flags); } } -static inline mm_context_t +static inline nv_mm_context_t get_mmu_context (struct mm_struct *mm) { unsigned long flags; - mm_context_t context = mm->context; - - if (context) - return context; - - spin_lock_irqsave(&ia64_ctx.lock, flags); - { - /* re-check, now that we've got the lock: */ - context = mm->context; - if (context == 0) { - cpus_clear(mm->cpu_vm_mask); - if (ia64_ctx.next >= ia64_ctx.limit) - wrap_mmu_context(mm); - mm->context = context = ia64_ctx.next++; + nv_mm_context_t context = mm->context; + + if (unlikely(!context)) { + spin_lock_irqsave(&ia64_ctx.lock, flags); + { + /* re-check, now that we've got the lock: */ + context = mm->context; + if (context == 0) { + cpus_clear(mm->cpu_vm_mask); + if (ia64_ctx.next >= ia64_ctx.limit) + wrap_mmu_context(mm); + mm->context = context = ia64_ctx.next++; + } } + spin_unlock_irqrestore(&ia64_ctx.lock, flags); } - spin_unlock_irqrestore(&ia64_ctx.lock, flags); + /* + * Ensure we're not starting to use "context" before any old + * uses of it are gone from our TLB. + */ + delayed_tlb_flush(); + return context; } @@ -104,7 +116,7 @@ destroy_context (struct mm_struct *mm) } static inline void -reload_context (mm_context_t context) +reload_context (nv_mm_context_t context) { unsigned long rid; unsigned long rid_incr = 0; @@ -138,7 +150,7 @@ reload_context (mm_context_t context) static inline void activate_context (struct mm_struct *mm) { - mm_context_t context; + nv_mm_context_t context; do { context = get_mmu_context(mm); @@ -157,8 +169,6 @@ activate_context (struct mm_struct *mm) static inline void activate_mm (struct mm_struct *prev, struct mm_struct *next) { - delayed_tlb_flush(); - /* * We may get interrupts here, but that's OK because interrupt handlers cannot * touch user-space. -- cgit v0.10.2 From 1a402aaca51b7d56e62348f50a426c531b6bc29e Mon Sep 17 00:00:00 2001 From: Greg Howard Date: Mon, 15 Aug 2005 13:00:00 -0700 Subject: [IA64-SGI] fix unaligned memory access in snsc_event.c It's been pointed out that environmental events from the system controllers on Altix machines cause the kernel to complain about unaligned memory accesses. This turns out to be because "be32_to_cpup()" didn't do everything I thought/hoped it did. I've added calls to pull the offending integers out of the buffers using get_unaligned() before feeding them to be32_to_cpup(). Signed-off-by: Greg Howard Signed-off-by: Tony Luck diff --git a/drivers/char/snsc_event.c b/drivers/char/snsc_event.c index d692af5..baaa365 100644 --- a/drivers/char/snsc_event.c +++ b/drivers/char/snsc_event.c @@ -19,6 +19,7 @@ #include #include #include +#include #include "snsc.h" static struct subch_data_s *event_sd; @@ -62,13 +63,16 @@ static int scdrv_parse_event(char *event, int *src, int *code, int *esp_code, char *desc) { char *desc_end; + __be32 from_buf; /* record event source address */ - *src = be32_to_cpup((__be32 *)event); + from_buf = get_unaligned((__be32 *)event); + *src = be32_to_cpup(&from_buf); event += 4; /* move on to event code */ /* record the system controller's event code */ - *code = be32_to_cpup((__be32 *)event); + from_buf = get_unaligned((__be32 *)event); + *code = be32_to_cpup(&from_buf); event += 4; /* move on to event arguments */ /* how many arguments are in the packet? */ @@ -82,7 +86,8 @@ scdrv_parse_event(char *event, int *src, int *code, int *esp_code, char *desc) /* not an integer argument, so give up */ return -1; } - *esp_code = be32_to_cpup((__be32 *)event); + from_buf = get_unaligned((__be32 *)event); + *esp_code = be32_to_cpup(&from_buf); event += 4; /* parse out the event description */ -- cgit v0.10.2 From 3d14487b2677cc44b5d1ef115d9d4301ec046591 Mon Sep 17 00:00:00 2001 From: Jack Steiner Date: Thu, 11 Aug 2005 10:25:00 -0700 Subject: [IA64-SGI] - New SN hardware support - addr_macros Update the SN address macros so that they work on both shub1 and shub2. Most of the code to support shub2 was added last year but this patch fixes a few bugs and adds macros to help generate both processor-specific physical addresses & numalink physical addresses. More cleanup & optimization will be done later. Signed-off-by: Jack Steiner Signed-off-by: Tony Luck diff --git a/include/asm-ia64/sn/addrs.h b/include/asm-ia64/sn/addrs.h index b6e85e4..8881882 100644 --- a/include/asm-ia64/sn/addrs.h +++ b/include/asm-ia64/sn/addrs.h @@ -126,6 +126,7 @@ #define GLOBAL_MMR_PHYS_ADDR(n,a) (GLOBAL_PHYS_MMR_SPACE | REMOTE_ADDR(n,a)) #define GLOBAL_CAC_ADDR(n,a) (CAC_BASE | REMOTE_ADDR(n,a)) #define CHANGE_NASID(n,x) ((void *)(((u64)(x) & ~NASID_MASK) | NASID_SPACE(n))) +#define IS_TIO_NASID(n) ((n) & 1) /* non-II mmr's start at top of big window space (4G) */ @@ -155,10 +156,28 @@ * the chiplet id is zero. If we implement TIO-TIO dma, we might need * to insert a chiplet id into this macro. However, it is our belief * right now that this chiplet id will be ICE, which is also zero. - * Nasid starts on bit 40. */ -#define PHYS_TO_TIODMA(x) ( (((u64)(NASID_GET(x))) << 40) | NODE_OFFSET(x)) -#define PHYS_TO_DMA(x) ( (((u64)(x) & NASID_MASK) >> 2) | NODE_OFFSET(x)) +#define SH1_TIO_PHYS_TO_DMA(x) \ + ((((u64)(NASID_GET(x))) << 40) | NODE_OFFSET(x)) + +#define SH2_NETWORK_BANK_OFFSET(x) \ + ((u64)(x) & ((1UL << (sn_hub_info->nasid_shift - 4)) -1)) + +#define SH2_NETWORK_BANK_SELECT(x) \ + ((((u64)(x) & (0x3UL << (sn_hub_info->nasid_shift - 4))) \ + >> (sn_hub_info->nasid_shift - 4)) << 36) + +#define SH2_NETWORK_ADDRESS(x) \ + (SH2_NETWORK_BANK_OFFSET(x) | SH2_NETWORK_BANK_SELECT(x)) + +#define SH2_TIO_PHYS_TO_DMA(x) \ + (((u64)(NASID_GET(x)) << 40) | SH2_NETWORK_ADDRESS(x)) + +#define PHYS_TO_TIODMA(x) \ + (is_shub1() ? SH1_TIO_PHYS_TO_DMA(x) : SH2_TIO_PHYS_TO_DMA(x)) + +#define PHYS_TO_DMA(x) \ + ((((u64)(x) & NASID_MASK) >> 2) | NODE_OFFSET(x)) /* @@ -187,6 +206,7 @@ #define RAW_NODE_SWIN_BASE(n, w) (NODE_IO_BASE(n) + ((u64) (w) << SWIN_SIZE_BITS)) #define BWIN_WIDGET_MASK 0x7 #define BWIN_WINDOWNUM(x) (((x) >> BWIN_SIZE_BITS) & BWIN_WIDGET_MASK) +#define SH1_IS_BIG_WINDOW_ADDR(x) ((x) & BWIN_TOP) #define TIO_BWIN_WINDOW_SELECT_MASK 0x7 #define TIO_BWIN_WINDOWNUM(x) (((x) >> TIO_BWIN_SIZE_BITS) & TIO_BWIN_WINDOW_SELECT_MASK) @@ -217,10 +237,6 @@ #define TIO_SWIN_WIDGETNUM(x) (((x) >> TIO_SWIN_SIZE_BITS) & TIO_SWIN_WIDGET_MASK) -#define TIO_IOSPACE_ADDR(n,x) \ - /* Move in the Chiplet ID for TIO Local Block MMR */ \ - (REMOTE_ADDR(n,x) | 1UL << (NASID_SHIFT - 2)) - /* * The following macros produce the correct base virtual address for * the hub registers. The REMOTE_HUB_* macro produce @@ -235,18 +251,40 @@ * Otherwise, the recommended approach is to use *_HUB_L() and *_HUB_S(). * They're always safe. */ +/* Shub1 TIO & MMR addressing macros */ +#define SH1_TIO_IOSPACE_ADDR(n,x) \ + GLOBAL_MMR_ADDR(n,x) + +#define SH1_REMOTE_BWIN_MMR(n,x) \ + GLOBAL_MMR_ADDR(n,x) + +#define SH1_REMOTE_SWIN_MMR(n,x) \ + (NODE_SWIN_BASE(n,1) + 0x800000UL + (x)) + +#define SH1_REMOTE_MMR(n,x) \ + (SH1_IS_BIG_WINDOW_ADDR(x) ? SH1_REMOTE_BWIN_MMR(n,x) : \ + SH1_REMOTE_SWIN_MMR(n,x)) + +/* Shub1 TIO & MMR addressing macros */ +#define SH2_TIO_IOSPACE_ADDR(n,x) \ + ((UNCACHED | REMOTE_ADDR(n,x) | 1UL << (NASID_SHIFT - 2))) + +#define SH2_REMOTE_MMR(n,x) \ + GLOBAL_MMR_ADDR(n,x) + + +/* TIO & MMR addressing macros that work on both shub1 & shub2 */ +#define TIO_IOSPACE_ADDR(n,x) \ + ((u64 *)(is_shub1() ? SH1_TIO_IOSPACE_ADDR(n,x) : \ + SH2_TIO_IOSPACE_ADDR(n,x))) + +#define SH_REMOTE_MMR(n,x) \ + (is_shub1() ? SH1_REMOTE_MMR(n,x) : SH2_REMOTE_MMR(n,x)) + #define REMOTE_HUB_ADDR(n,x) \ - ((n & 1) ? \ - /* TIO: */ \ - (is_shub2() ? \ - /* TIO on Shub2 */ \ - (volatile u64 *)(TIO_IOSPACE_ADDR(n,x)) \ - : /* TIO on shub1 */ \ - (volatile u64 *)(GLOBAL_MMR_ADDR(n,x))) \ - \ - : /* SHUB1 and SHUB2 MMRs: */ \ - (((x) & BWIN_TOP) ? ((volatile u64 *)(GLOBAL_MMR_ADDR(n,x))) \ - : ((volatile u64 *)(NODE_SWIN_BASE(n,1) + 0x800000 + (x))))) + (IS_TIO_NASID(n) ? ((volatile u64*)TIO_IOSPACE_ADDR(n,x)) : \ + ((volatile u64*)SH_REMOTE_MMR(n,x))) + #define HUB_L(x) (*((volatile typeof(*x) *)x)) #define HUB_S(x,d) (*((volatile typeof(*x) *)x) = (d)) -- cgit v0.10.2 From 0aa2c72e59cf1d09a0b321e4e6292af78a51b8b3 Mon Sep 17 00:00:00 2001 From: Jack Steiner Date: Thu, 11 Aug 2005 10:26:00 -0700 Subject: [IA64-SGI] - New SN hardware support - use_alias_space Use local SHUB alias space when referencing MMRs that are known to be node local. There is a slight performance benefit & code simplification. Signed-off-by: Jack Steiner Signed-off-by: Tony Luck diff --git a/arch/ia64/sn/kernel/irq.c b/arch/ia64/sn/kernel/irq.c index af061db..607938c2 100644 --- a/arch/ia64/sn/kernel/irq.c +++ b/arch/ia64/sn/kernel/irq.c @@ -5,7 +5,7 @@ * License. See the file "COPYING" in the main directory of this archive * for more details. * - * Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved. + * Copyright (c) 2000-2005 Silicon Graphics, Inc. All Rights Reserved. */ #include @@ -76,16 +76,14 @@ static void sn_enable_irq(unsigned int irq) static void sn_ack_irq(unsigned int irq) { - uint64_t event_occurred, mask = 0; - int nasid; + u64 event_occurred, mask = 0; irq = irq & 0xff; - nasid = get_nasid(); event_occurred = - HUB_L((uint64_t *) GLOBAL_MMR_ADDR(nasid, SH_EVENT_OCCURRED)); + HUB_L((u64*)LOCAL_MMR_ADDR(SH_EVENT_OCCURRED)); mask = event_occurred & SH_ALL_INT_MASK; - HUB_S((uint64_t *) GLOBAL_MMR_ADDR(nasid, SH_EVENT_OCCURRED_ALIAS), - mask); + HUB_S((u64*)LOCAL_MMR_ADDR(SH_EVENT_OCCURRED_ALIAS), + mask); __set_bit(irq, (volatile void *)pda->sn_in_service_ivecs); move_irq(irq); @@ -93,15 +91,12 @@ static void sn_ack_irq(unsigned int irq) static void sn_end_irq(unsigned int irq) { - int nasid; int ivec; - uint64_t event_occurred; + u64 event_occurred; ivec = irq & 0xff; if (ivec == SGI_UART_VECTOR) { - nasid = get_nasid(); - event_occurred = HUB_L((uint64_t *) GLOBAL_MMR_ADDR - (nasid, SH_EVENT_OCCURRED)); + event_occurred = HUB_L((u64*)LOCAL_MMR_ADDR (SH_EVENT_OCCURRED)); /* If the UART bit is set here, we may have received an * interrupt from the UART that the driver missed. To * make sure, we IPI ourselves to force us to look again. diff --git a/arch/ia64/sn/pci/pcibr/pcibr_dma.c b/arch/ia64/sn/pci/pcibr/pcibr_dma.c index ae455b6..0f25425 100644 --- a/arch/ia64/sn/pci/pcibr/pcibr_dma.c +++ b/arch/ia64/sn/pci/pcibr/pcibr_dma.c @@ -297,7 +297,7 @@ void sn_dma_flush(uint64_t addr) * If CE ever needs the sn_dma_flush mechanism, we will have * to account for that here and in tioce_bus_fixup(). */ - uint32_t tio_id = REMOTE_HUB_L(nasid, TIO_NODE_ID); + uint32_t tio_id = HUB_L(TIO_IOSPACE_ADDR(nasid, TIO_NODE_ID)); uint32_t revnum = XWIDGET_PART_REV_NUM(tio_id); /* TIOCP BRINGUP WAR (PV907516): Don't write buffer flush reg */ -- cgit v0.10.2 From 2fdbb590e4f9b346e5d06cf7f85dcb7a9f2e0a48 Mon Sep 17 00:00:00 2001 From: Jack Steiner Date: Thu, 11 Aug 2005 10:26:00 -0700 Subject: [IA64-SGI] - New SN hardware support - boot_init_shub2 Update the addresses of the pio_write_status_addr so that they are correct for newer processors. Shub2 did not number the threads in the order that I had expected. Signed-off-by: Jack Steiner Signed-off-by: Tony Luck diff --git a/arch/ia64/sn/kernel/setup.c b/arch/ia64/sn/kernel/setup.c index 7c7fe44..ed77715 100644 --- a/arch/ia64/sn/kernel/setup.c +++ b/arch/ia64/sn/kernel/setup.c @@ -532,8 +532,8 @@ void __init sn_cpu_init(void) */ { u64 pio1[] = {SH1_PIO_WRITE_STATUS_0, 0, SH1_PIO_WRITE_STATUS_1, 0}; - u64 pio2[] = {SH2_PIO_WRITE_STATUS_0, SH2_PIO_WRITE_STATUS_1, - SH2_PIO_WRITE_STATUS_2, SH2_PIO_WRITE_STATUS_3}; + u64 pio2[] = {SH2_PIO_WRITE_STATUS_0, SH2_PIO_WRITE_STATUS_2, + SH2_PIO_WRITE_STATUS_1, SH2_PIO_WRITE_STATUS_3}; u64 *pio; pio = is_shub1() ? pio1 : pio2; pda->pio_write_status_addr = (volatile unsigned long *) LOCAL_MMR_ADDR(pio[slice]); -- cgit v0.10.2 From 1007d02160974a46cc4fd3b1737c183c0471b88f Mon Sep 17 00:00:00 2001 From: Jack Steiner Date: Thu, 11 Aug 2005 10:27:00 -0700 Subject: [IA64-SGI] - New SN hardware support - no_wars Disable some shub1-specific code when running on systems with shub2. Signed-off-by: Jack Steiner Signed-off-by: Tony Luck diff --git a/arch/ia64/sn/kernel/sn2/timer_interrupt.c b/arch/ia64/sn/kernel/sn2/timer_interrupt.c index cde7375..adf5db2 100644 --- a/arch/ia64/sn/kernel/sn2/timer_interrupt.c +++ b/arch/ia64/sn/kernel/sn2/timer_interrupt.c @@ -1,7 +1,7 @@ /* * * - * Copyright (c) 2003 Silicon Graphics, Inc. All Rights Reserved. + * Copyright (c) 2005 Silicon Graphics, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of version 2 of the GNU General Public License @@ -50,14 +50,16 @@ void sn_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) LED_CPU_HEARTBEAT, LED_CPU_HEARTBEAT); } - if (enable_shub_wars_1_1()) { - /* Bugfix code for SHUB 1.1 */ - if (pda->pio_shub_war_cam_addr) - *pda->pio_shub_war_cam_addr = 0x8000000000000010UL; + if (is_shub1()) { + if (enable_shub_wars_1_1()) { + /* Bugfix code for SHUB 1.1 */ + if (pda->pio_shub_war_cam_addr) + *pda->pio_shub_war_cam_addr = 0x8000000000000010UL; + } + if (pda->sn_lb_int_war_ticks == 0) + sn_lb_int_war_check(); + pda->sn_lb_int_war_ticks++; + if (pda->sn_lb_int_war_ticks >= SN_LB_INT_WAR_INTERVAL) + pda->sn_lb_int_war_ticks = 0; } - if (pda->sn_lb_int_war_ticks == 0) - sn_lb_int_war_check(); - pda->sn_lb_int_war_ticks++; - if (pda->sn_lb_int_war_ticks >= SN_LB_INT_WAR_INTERVAL) - pda->sn_lb_int_war_ticks = 0; } -- cgit v0.10.2 From 7e95b9d6e21eda23bac1ff024d465d2072c8996d Mon Sep 17 00:00:00 2001 From: Jack Steiner Date: Thu, 11 Aug 2005 10:27:00 -0700 Subject: [IA64-SGI] - New SN hardware support - bte_fixes Change the BTE driver so that it works for both shub1 and shub2. Most of the changes are related to the number of cores that use the BTE engine, to the MMR addresses of various shub registers, and to using the correct processor or network physical address. Signed-off-by: Russ Anderson Signed-off-by: Jack Steiner Signed-off-by: Tony Luck diff --git a/arch/ia64/sn/kernel/bte.c b/arch/ia64/sn/kernel/bte.c index 647deae..7adef841 100644 --- a/arch/ia64/sn/kernel/bte.c +++ b/arch/ia64/sn/kernel/bte.c @@ -29,16 +29,30 @@ /* two interfaces on two btes */ #define MAX_INTERFACES_TO_TRY 4 +#define MAX_NODES_TO_TRY 2 static struct bteinfo_s *bte_if_on_node(nasid_t nasid, int interface) { nodepda_t *tmp_nodepda; + if (nasid_to_cnodeid(nasid) == -1) + return (struct bteinfo_s *)NULL;; + tmp_nodepda = NODEPDA(nasid_to_cnodeid(nasid)); return &tmp_nodepda->bte_if[interface]; } +static inline void bte_start_transfer(struct bteinfo_s *bte, u64 len, u64 mode) +{ + if (is_shub2()) { + BTE_CTRL_STORE(bte, (IBLS_BUSY | ((len) | (mode) << 24))); + } else { + BTE_LNSTAT_STORE(bte, len); + BTE_CTRL_STORE(bte, mode); + } +} + /************************************************************************ * Block Transfer Engine copy related functions. * @@ -67,13 +81,15 @@ bte_result_t bte_copy(u64 src, u64 dest, u64 len, u64 mode, void *notification) { u64 transfer_size; u64 transfer_stat; + u64 notif_phys_addr; struct bteinfo_s *bte; bte_result_t bte_status; unsigned long irq_flags; unsigned long itc_end = 0; - struct bteinfo_s *btes_to_try[MAX_INTERFACES_TO_TRY]; - int bte_if_index; - int bte_pri, bte_sec; + int nasid_to_try[MAX_NODES_TO_TRY]; + int my_nasid = get_nasid(); + int bte_if_index, nasid_index; + int bte_first, btes_per_node = BTES_PER_NODE; BTE_PRINTK(("bte_copy(0x%lx, 0x%lx, 0x%lx, 0x%lx, 0x%p)\n", src, dest, len, mode, notification)); @@ -86,36 +102,26 @@ bte_result_t bte_copy(u64 src, u64 dest, u64 len, u64 mode, void *notification) (src & L1_CACHE_MASK) || (dest & L1_CACHE_MASK)); BUG_ON(!(len < ((BTE_LEN_MASK + 1) << L1_CACHE_SHIFT))); - /* CPU 0 (per node) tries bte0 first, CPU 1 try bte1 first */ - if (cpuid_to_subnode(smp_processor_id()) == 0) { - bte_pri = 0; - bte_sec = 1; - } else { - bte_pri = 1; - bte_sec = 0; - } + /* + * Start with interface corresponding to cpu number + */ + bte_first = get_cpu() % btes_per_node; if (mode & BTE_USE_DEST) { /* try remote then local */ - btes_to_try[0] = bte_if_on_node(NASID_GET(dest), bte_pri); - btes_to_try[1] = bte_if_on_node(NASID_GET(dest), bte_sec); + nasid_to_try[0] = NASID_GET(dest); if (mode & BTE_USE_ANY) { - btes_to_try[2] = bte_if_on_node(get_nasid(), bte_pri); - btes_to_try[3] = bte_if_on_node(get_nasid(), bte_sec); + nasid_to_try[1] = my_nasid; } else { - btes_to_try[2] = NULL; - btes_to_try[3] = NULL; + nasid_to_try[1] = (int)NULL; } } else { /* try local then remote */ - btes_to_try[0] = bte_if_on_node(get_nasid(), bte_pri); - btes_to_try[1] = bte_if_on_node(get_nasid(), bte_sec); + nasid_to_try[0] = my_nasid; if (mode & BTE_USE_ANY) { - btes_to_try[2] = bte_if_on_node(NASID_GET(dest), bte_pri); - btes_to_try[3] = bte_if_on_node(NASID_GET(dest), bte_sec); + nasid_to_try[1] = NASID_GET(dest); } else { - btes_to_try[2] = NULL; - btes_to_try[3] = NULL; + nasid_to_try[1] = (int)NULL; } } @@ -123,11 +129,12 @@ retry_bteop: do { local_irq_save(irq_flags); - bte_if_index = 0; + bte_if_index = bte_first; + nasid_index = 0; /* Attempt to lock one of the BTE interfaces. */ - while (bte_if_index < MAX_INTERFACES_TO_TRY) { - bte = btes_to_try[bte_if_index++]; + while (nasid_index < MAX_NODES_TO_TRY) { + bte = bte_if_on_node(nasid_to_try[nasid_index],bte_if_index); if (bte == NULL) { continue; @@ -143,6 +150,15 @@ retry_bteop: break; } } + + bte_if_index = (bte_if_index + 1) % btes_per_node; /* Next interface */ + if (bte_if_index == bte_first) { + /* + * We've tried all interfaces on this node + */ + nasid_index++; + } + bte = NULL; } @@ -169,7 +185,13 @@ retry_bteop: /* Initialize the notification to a known value. */ *bte->most_rcnt_na = BTE_WORD_BUSY; + notif_phys_addr = TO_PHYS(ia64_tpa((unsigned long)bte->most_rcnt_na)); + if (is_shub2()) { + src = SH2_TIO_PHYS_TO_DMA(src); + dest = SH2_TIO_PHYS_TO_DMA(dest); + notif_phys_addr = SH2_TIO_PHYS_TO_DMA(notif_phys_addr); + } /* Set the source and destination registers */ BTE_PRINTKV(("IBSA = 0x%lx)\n", (TO_PHYS(src)))); BTE_SRC_STORE(bte, TO_PHYS(src)); @@ -177,14 +199,12 @@ retry_bteop: BTE_DEST_STORE(bte, TO_PHYS(dest)); /* Set the notification register */ - BTE_PRINTKV(("IBNA = 0x%lx)\n", - TO_PHYS(ia64_tpa((unsigned long)bte->most_rcnt_na)))); - BTE_NOTIF_STORE(bte, - TO_PHYS(ia64_tpa((unsigned long)bte->most_rcnt_na))); + BTE_PRINTKV(("IBNA = 0x%lx)\n", notif_phys_addr)); + BTE_NOTIF_STORE(bte, notif_phys_addr); /* Initiate the transfer */ BTE_PRINTK(("IBCT = 0x%lx)\n", BTE_VALID_MODE(mode))); - BTE_START_TRANSFER(bte, transfer_size, BTE_VALID_MODE(mode)); + bte_start_transfer(bte, transfer_size, BTE_VALID_MODE(mode)); itc_end = ia64_get_itc() + (40000000 * local_cpu_data->cyc_per_usec); -- cgit v0.10.2 From 68b9753f47953930cb94de0223c163f289399091 Mon Sep 17 00:00:00 2001 From: Jack Steiner Date: Thu, 11 Aug 2005 10:28:00 -0700 Subject: [IA64-SGI] - New SN hardware support - cpu_relax Add a few missing calls to "hint @pause". Signed-off-by: Jack Steiner Signed-off-by: Tony Luck diff --git a/arch/ia64/sn/kernel/bte.c b/arch/ia64/sn/kernel/bte.c index 7adef841..b75814e 100644 --- a/arch/ia64/sn/kernel/bte.c +++ b/arch/ia64/sn/kernel/bte.c @@ -215,6 +215,7 @@ retry_bteop: } while ((transfer_stat = *bte->most_rcnt_na) == BTE_WORD_BUSY) { + cpu_relax(); if (ia64_get_itc() > itc_end) { BTE_PRINTK(("BTE timeout nasid 0x%x bte%d IBLS = 0x%lx na 0x%lx\n", NASID_GET(bte->bte_base_addr), bte->bte_num, diff --git a/arch/ia64/sn/kernel/huberror.c b/arch/ia64/sn/kernel/huberror.c index 5c39b43..5c5eb01 100644 --- a/arch/ia64/sn/kernel/huberror.c +++ b/arch/ia64/sn/kernel/huberror.c @@ -76,7 +76,7 @@ void hubiio_crb_free(struct hubdev_info *hubdev_info, int crbnum) */ REMOTE_HUB_S(hubdev_info->hdi_nasid, IIO_ICDR, (IIO_ICDR_PND | crbnum)); while (REMOTE_HUB_L(hubdev_info->hdi_nasid, IIO_ICDR) & IIO_ICDR_PND) - udelay(1); + cpu_relax(); } diff --git a/arch/ia64/sn/pci/pcibr/pcibr_dma.c b/arch/ia64/sn/pci/pcibr/pcibr_dma.c index 0f25425..3409347 100644 --- a/arch/ia64/sn/pci/pcibr/pcibr_dma.c +++ b/arch/ia64/sn/pci/pcibr/pcibr_dma.c @@ -317,7 +317,8 @@ void sn_dma_flush(uint64_t addr) *(volatile uint32_t *)(p->sfdl_force_int_addr) = 1; /* wait for the interrupt to come back. */ - while (*(p->sfdl_flush_addr) != 0x10f) ; + while (*(p->sfdl_flush_addr) != 0x10f) + cpu_relax(); /* okay, everything is synched up. */ spin_unlock_irqrestore((spinlock_t *)&p->sfdl_flush_lock, flags); diff --git a/include/asm-ia64/sn/sn_sal.h b/include/asm-ia64/sn/sn_sal.h index 27976d2..99cb9ed 100644 --- a/include/asm-ia64/sn/sn_sal.h +++ b/include/asm-ia64/sn/sn_sal.h @@ -749,7 +749,8 @@ ia64_sn_power_down(void) { struct ia64_sal_retval ret_stuff; SAL_CALL(ret_stuff, SN_SAL_SYSTEM_POWER_DOWN, 0, 0, 0, 0, 0, 0, 0); - while(1); + while(1) + cpu_relax(); /* never returns */ } -- cgit v0.10.2 From 470ceb05d9a2b4d61c19fac615a79e56e8401565 Mon Sep 17 00:00:00 2001 From: Jack Steiner Date: Thu, 11 Aug 2005 10:28:00 -0700 Subject: [IA64-SGI] - New SN hardware support - ptc_fixes Shub2 provides a much improved mechanism for issuing internode TLB purges. Add code to support the newer mechanism. There is also some debug code (disabled) that is useful for testing. Collect statistics on the number, type & duration of TLB purges. This data will be useful for making future improvements in the algorithms. Signed-off-by: Jack Steiner Signed-off-by: Tony Luck diff --git a/arch/ia64/sn/kernel/setup.c b/arch/ia64/sn/kernel/setup.c index ed77715..6648deb 100644 --- a/arch/ia64/sn/kernel/setup.c +++ b/arch/ia64/sn/kernel/setup.c @@ -403,6 +403,7 @@ static void __init sn_init_pdas(char **cmdline_p) memset(nodepdaindr[cnode], 0, sizeof(nodepda_t)); memset(nodepdaindr[cnode]->phys_cpuid, -1, sizeof(nodepdaindr[cnode]->phys_cpuid)); + spin_lock_init(&nodepdaindr[cnode]->ptc_lock); } /* diff --git a/arch/ia64/sn/kernel/sn2/ptc_deadlock.S b/arch/ia64/sn/kernel/sn2/ptc_deadlock.S index 96cb71d..3fa9506 100644 --- a/arch/ia64/sn/kernel/sn2/ptc_deadlock.S +++ b/arch/ia64/sn/kernel/sn2/ptc_deadlock.S @@ -3,7 +3,7 @@ * License. See the file "COPYING" in the main directory of this archive * for more details. * - * Copyright (C) 2000-2004 Silicon Graphics, Inc. All rights reserved. + * Copyright (C) 2000-2005 Silicon Graphics, Inc. All rights reserved. */ #include @@ -11,7 +11,7 @@ #define DEADLOCKBIT SH_PIO_WRITE_STATUS_WRITE_DEADLOCK_SHFT #define WRITECOUNTMASK SH_PIO_WRITE_STATUS_PENDING_WRITE_COUNT_MASK -#define ALIAS_OFFSET (SH1_PIO_WRITE_STATUS_0_ALIAS-SH1_PIO_WRITE_STATUS_0) +#define ALIAS_OFFSET 8 .global sn2_ptc_deadlock_recovery_core @@ -36,13 +36,15 @@ sn2_ptc_deadlock_recovery_core: extr.u piowcphy=piowc,0,61;; // Convert piowc to uncached physical address dep piowcphy=-1,piowcphy,63,1 movl mask=WRITECOUNTMASK + mov r8=r0 1: add scr2=ALIAS_OFFSET,piowc // Address of WRITE_STATUS alias register - mov scr1=7;; // Clear DEADLOCK, WRITE_ERROR, MULTI_WRITE_ERROR - st8.rel [scr2]=scr1;; + ;; + ld8.acq scr1=[scr2];; 5: ld8.acq scr1=[piowc];; // Wait for PIOs to complete. + hint @pause and scr2=scr1,mask;; // mask of writecount bits cmp.ne p6,p0=zeroval,scr2 (p6) br.cond.sptk 5b @@ -57,6 +59,7 @@ sn2_ptc_deadlock_recovery_core: st8.rel [ptc0]=data0 // Write PTC0 & wait for completion. 5: ld8.acq scr1=[piowcphy];; // Wait for PIOs to complete. + hint @pause and scr2=scr1,mask;; // mask of writecount bits cmp.ne p6,p0=zeroval,scr2 (p6) br.cond.sptk 5b;; @@ -67,6 +70,7 @@ sn2_ptc_deadlock_recovery_core: (p7) st8.rel [ptc1]=data1;; // Now write PTC1. 5: ld8.acq scr1=[piowcphy];; // Wait for PIOs to complete. + hint @pause and scr2=scr1,mask;; // mask of writecount bits cmp.ne p6,p0=zeroval,scr2 (p6) br.cond.sptk 5b @@ -77,6 +81,7 @@ sn2_ptc_deadlock_recovery_core: srlz.i;; ////////////// END PHYSICAL MODE //////////////////// +(p8) add r8=1,r8 (p8) br.cond.spnt 1b;; // Repeat if DEADLOCK occurred. br.ret.sptk rp diff --git a/arch/ia64/sn/kernel/sn2/sn2_smp.c b/arch/ia64/sn/kernel/sn2/sn2_smp.c index 7af05a7..0a4ee50 100644 --- a/arch/ia64/sn/kernel/sn2/sn2_smp.c +++ b/arch/ia64/sn/kernel/sn2/sn2_smp.c @@ -5,7 +5,7 @@ * License. See the file "COPYING" in the main directory of this archive * for more details. * - * Copyright (C) 2000-2004 Silicon Graphics, Inc. All rights reserved. + * Copyright (C) 2000-2005 Silicon Graphics, Inc. All rights reserved. */ #include @@ -20,6 +20,8 @@ #include #include #include +#include +#include #include #include @@ -39,12 +41,120 @@ #include #include -void sn2_ptc_deadlock_recovery(volatile unsigned long *, unsigned long data0, - volatile unsigned long *, unsigned long data1); +DEFINE_PER_CPU(struct ptc_stats, ptcstats); +DECLARE_PER_CPU(struct ptc_stats, ptcstats); static __cacheline_aligned DEFINE_SPINLOCK(sn2_global_ptc_lock); -static unsigned long sn2_ptc_deadlock_count; +void sn2_ptc_deadlock_recovery(short *, short, int, volatile unsigned long *, unsigned long data0, + volatile unsigned long *, unsigned long data1); + +#ifdef DEBUG_PTC +/* + * ptctest: + * + * xyz - 3 digit hex number: + * x - Force PTC purges to use shub: + * 0 - no force + * 1 - force + * y - interupt enable + * 0 - disable interrupts + * 1 - leave interuupts enabled + * z - type of lock: + * 0 - global lock + * 1 - node local lock + * 2 - no lock + * + * Note: on shub1, only ptctest == 0 is supported. Don't try other values! + */ + +static unsigned int sn2_ptctest = 0; + +static int __init ptc_test(char *str) +{ + get_option(&str, &sn2_ptctest); + return 1; +} +__setup("ptctest=", ptc_test); + +static inline int ptc_lock(unsigned long *flagp) +{ + unsigned long opt = sn2_ptctest & 255; + + switch (opt) { + case 0x00: + spin_lock_irqsave(&sn2_global_ptc_lock, *flagp); + break; + case 0x01: + spin_lock_irqsave(&sn_nodepda->ptc_lock, *flagp); + break; + case 0x02: + local_irq_save(*flagp); + break; + case 0x10: + spin_lock(&sn2_global_ptc_lock); + break; + case 0x11: + spin_lock(&sn_nodepda->ptc_lock); + break; + case 0x12: + break; + default: + BUG(); + } + return opt; +} + +static inline void ptc_unlock(unsigned long flags, int opt) +{ + switch (opt) { + case 0x00: + spin_unlock_irqrestore(&sn2_global_ptc_lock, flags); + break; + case 0x01: + spin_unlock_irqrestore(&sn_nodepda->ptc_lock, flags); + break; + case 0x02: + local_irq_restore(flags); + break; + case 0x10: + spin_unlock(&sn2_global_ptc_lock); + break; + case 0x11: + spin_unlock(&sn_nodepda->ptc_lock); + break; + case 0x12: + break; + default: + BUG(); + } +} +#else + +#define sn2_ptctest 0 + +static inline int ptc_lock(unsigned long *flagp) +{ + spin_lock_irqsave(&sn2_global_ptc_lock, *flagp); + return 0; +} + +static inline void ptc_unlock(unsigned long flags, int opt) +{ + spin_unlock_irqrestore(&sn2_global_ptc_lock, flags); +} +#endif + +struct ptc_stats { + unsigned long ptc_l; + unsigned long change_rid; + unsigned long shub_ptc_flushes; + unsigned long nodes_flushed; + unsigned long deadlocks; + unsigned long lock_itc_clocks; + unsigned long shub_itc_clocks; + unsigned long shub_itc_clocks_max; +}; static inline unsigned long wait_piowc(void) { @@ -89,9 +199,9 @@ void sn2_global_tlb_purge(unsigned long start, unsigned long end, unsigned long nbits) { - int i, shub1, cnode, mynasid, cpu, lcpu = 0, nasid, flushed = 0; + int i, opt, shub1, cnode, mynasid, cpu, lcpu = 0, nasid, flushed = 0; volatile unsigned long *ptc0, *ptc1; - unsigned long flags = 0, data0 = 0, data1 = 0; + unsigned long itc, itc2, flags, data0 = 0, data1 = 0; struct mm_struct *mm = current->active_mm; short nasids[MAX_NUMNODES], nix; nodemask_t nodes_flushed; @@ -114,16 +224,19 @@ sn2_global_tlb_purge(unsigned long start, unsigned long end, start += (1UL << nbits); } while (start < end); ia64_srlz_i(); + __get_cpu_var(ptcstats).ptc_l++; preempt_enable(); return; } if (atomic_read(&mm->mm_users) == 1) { flush_tlb_mm(mm); + __get_cpu_var(ptcstats).change_rid++; preempt_enable(); return; } + itc = ia64_get_itc(); nix = 0; for_each_node_mask(cnode, nodes_flushed) nasids[nix++] = cnodeid_to_nasid(cnode); @@ -148,7 +261,12 @@ sn2_global_tlb_purge(unsigned long start, unsigned long end, mynasid = get_nasid(); - spin_lock_irqsave(&sn2_global_ptc_lock, flags); + itc = ia64_get_itc(); + opt = ptc_lock(&flags); + itc2 = ia64_get_itc(); + __get_cpu_var(ptcstats).lock_itc_clocks += itc2 - itc; + __get_cpu_var(ptcstats).shub_ptc_flushes++; + __get_cpu_var(ptcstats).nodes_flushed += nix; do { if (shub1) @@ -157,7 +275,7 @@ sn2_global_tlb_purge(unsigned long start, unsigned long end, data0 = (data0 & ~SH2_PTC_ADDR_MASK) | (start & SH2_PTC_ADDR_MASK); for (i = 0; i < nix; i++) { nasid = nasids[i]; - if (unlikely(nasid == mynasid)) { + if ((!(sn2_ptctest & 3)) && unlikely(nasid == mynasid)) { ia64_ptcga(start, nbits << 2); ia64_srlz_i(); } else { @@ -169,18 +287,22 @@ sn2_global_tlb_purge(unsigned long start, unsigned long end, flushed = 1; } } - if (flushed && (wait_piowc() & - SH_PIO_WRITE_STATUS_WRITE_DEADLOCK_MASK)) { - sn2_ptc_deadlock_recovery(ptc0, data0, ptc1, data1); + (SH_PIO_WRITE_STATUS_WRITE_DEADLOCK_MASK))) { + sn2_ptc_deadlock_recovery(nasids, nix, mynasid, ptc0, data0, ptc1, data1); } start += (1UL << nbits); } while (start < end); - spin_unlock_irqrestore(&sn2_global_ptc_lock, flags); + itc2 = ia64_get_itc() - itc2; + __get_cpu_var(ptcstats).shub_itc_clocks += itc2; + if (itc2 > __get_cpu_var(ptcstats).shub_itc_clocks_max) + __get_cpu_var(ptcstats).shub_itc_clocks_max = itc2; + + ptc_unlock(flags, opt); preempt_enable(); } @@ -192,31 +314,29 @@ sn2_global_tlb_purge(unsigned long start, unsigned long end, * TLB flush transaction. The recovery sequence is somewhat tricky & is * coded in assembly language. */ -void sn2_ptc_deadlock_recovery(volatile unsigned long *ptc0, unsigned long data0, +void sn2_ptc_deadlock_recovery(short *nasids, short nix, int mynasid, volatile unsigned long *ptc0, unsigned long data0, volatile unsigned long *ptc1, unsigned long data1) { extern void sn2_ptc_deadlock_recovery_core(volatile unsigned long *, unsigned long, volatile unsigned long *, unsigned long, volatile unsigned long *, unsigned long); - int cnode, mycnode, nasid; - volatile unsigned long *piows; - volatile unsigned long zeroval; + short nasid, i; + unsigned long *piows, zeroval; - sn2_ptc_deadlock_count++; + __get_cpu_var(ptcstats).deadlocks++; - piows = pda->pio_write_status_addr; + piows = (unsigned long *) pda->pio_write_status_addr; zeroval = pda->pio_write_status_val; - mycnode = numa_node_id(); - - for_each_online_node(cnode) { - if (is_headless_node(cnode) || cnode == mycnode) + for (i=0; i < nix; i++) { + nasid = nasids[i]; + if (!(sn2_ptctest & 3) && nasid == mynasid) continue; - nasid = cnodeid_to_nasid(cnode); ptc0 = CHANGE_NASID(nasid, ptc0); if (ptc1) ptc1 = CHANGE_NASID(nasid, ptc1); sn2_ptc_deadlock_recovery_core(ptc0, data0, ptc1, data1, piows, zeroval); } + } /** @@ -293,3 +413,93 @@ void sn2_send_IPI(int cpuid, int vector, int delivery_mode, int redirect) sn_send_IPI_phys(nasid, physid, vector, delivery_mode); } + +#ifdef CONFIG_PROC_FS + +#define PTC_BASENAME "sgi_sn/ptc_statistics" + +static void *sn2_ptc_seq_start(struct seq_file *file, loff_t * offset) +{ + if (*offset < NR_CPUS) + return offset; + return NULL; +} + +static void *sn2_ptc_seq_next(struct seq_file *file, void *data, loff_t * offset) +{ + (*offset)++; + if (*offset < NR_CPUS) + return offset; + return NULL; +} + +static void sn2_ptc_seq_stop(struct seq_file *file, void *data) +{ +} + +static int sn2_ptc_seq_show(struct seq_file *file, void *data) +{ + struct ptc_stats *stat; + int cpu; + + cpu = *(loff_t *) data; + + if (!cpu) { + seq_printf(file, "# ptc_l change_rid shub_ptc_flushes shub_nodes_flushed deadlocks lock_nsec shub_nsec shub_nsec_max\n"); + seq_printf(file, "# ptctest %d\n", sn2_ptctest); + } + + if (cpu < NR_CPUS && cpu_online(cpu)) { + stat = &per_cpu(ptcstats, cpu); + seq_printf(file, "cpu %d %ld %ld %ld %ld %ld %ld %ld %ld\n", cpu, stat->ptc_l, + stat->change_rid, stat->shub_ptc_flushes, stat->nodes_flushed, + stat->deadlocks, + 1000 * stat->lock_itc_clocks / per_cpu(cpu_info, cpu).cyc_per_usec, + 1000 * stat->shub_itc_clocks / per_cpu(cpu_info, cpu).cyc_per_usec, + 1000 * stat->shub_itc_clocks_max / per_cpu(cpu_info, cpu).cyc_per_usec); + } + + return 0; +} + +static struct seq_operations sn2_ptc_seq_ops = { + .start = sn2_ptc_seq_start, + .next = sn2_ptc_seq_next, + .stop = sn2_ptc_seq_stop, + .show = sn2_ptc_seq_show +}; + +int sn2_ptc_proc_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &sn2_ptc_seq_ops); +} + +static struct file_operations proc_sn2_ptc_operations = { + .open = sn2_ptc_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + +static struct proc_dir_entry *proc_sn2_ptc; + +static int __init sn2_ptc_init(void) +{ + if (!(proc_sn2_ptc = create_proc_entry(PTC_BASENAME, 0444, NULL))) { + printk(KERN_ERR "unable to create %s proc entry", PTC_BASENAME); + return -EINVAL; + } + proc_sn2_ptc->proc_fops = &proc_sn2_ptc_operations; + spin_lock_init(&sn2_global_ptc_lock); + return 0; +} + +static void __exit sn2_ptc_exit(void) +{ + remove_proc_entry(PTC_BASENAME, NULL); +} + +module_init(sn2_ptc_init); +module_exit(sn2_ptc_exit); +#endif /* CONFIG_PROC_FS */ + diff --git a/include/asm-ia64/sn/nodepda.h b/include/asm-ia64/sn/nodepda.h index 7138b1e..47bb810 100644 --- a/include/asm-ia64/sn/nodepda.h +++ b/include/asm-ia64/sn/nodepda.h @@ -37,7 +37,6 @@ struct phys_cpuid { struct nodepda_s { void *pdinfo; /* Platform-dependent per-node info */ - spinlock_t bist_lock; /* * The BTEs on this node are shared by the local cpus @@ -55,6 +54,8 @@ struct nodepda_s { * Array of physical cpu identifiers. Indexed by cpuid. */ struct phys_cpuid phys_cpuid[NR_CPUS]; + spinlock_t ptc_lock ____cacheline_aligned_in_smp; + spinlock_t bist_lock; }; typedef struct nodepda_s nodepda_t; -- cgit v0.10.2 From e8579e72ca240f3fbaa669f09a9d610436505366 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Thu, 4 Aug 2005 13:06:00 -0700 Subject: [IA64, X86_64] fix swiotlb sizing Fix swiotlb sizing to match what the comments and the kernel parameters documentation indicate. Given a default 16k page size kernel (ia64) and a 2k swiotlb page size, we're off by a multiple of 8 trying to size the swiotlb. When specified on the boot line, the swiotlb is made 8x bigger than requested. When left to the default value, it's 8x smaller than the comments indicate. For x86_64 the multiplier would be 2x. The patch below fixes this. Now, what's a good default swiotlb size? Apparently we don't really need 64MB. Signed-off-by: Alex Williamson Signed-off-by: Tony Luck diff --git a/arch/ia64/lib/swiotlb.c b/arch/ia64/lib/swiotlb.c index ab7b3ad..dbc0b3e 100644 --- a/arch/ia64/lib/swiotlb.c +++ b/arch/ia64/lib/swiotlb.c @@ -93,8 +93,7 @@ static int __init setup_io_tlb_npages(char *str) { if (isdigit(*str)) { - io_tlb_nslabs = simple_strtoul(str, &str, 0) << - (PAGE_SHIFT - IO_TLB_SHIFT); + io_tlb_nslabs = simple_strtoul(str, &str, 0); /* avoid tail segment of size < IO_TLB_SEGSIZE */ io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE); } @@ -117,7 +116,7 @@ swiotlb_init_with_default_size (size_t default_size) unsigned long i; if (!io_tlb_nslabs) { - io_tlb_nslabs = (default_size >> PAGE_SHIFT); + io_tlb_nslabs = (default_size >> IO_TLB_SHIFT); io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE); } -- cgit v0.10.2 From 16592d269801ea68130b283c888ffb3c3e988299 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Mon, 22 Aug 2005 12:20:00 -0700 Subject: [IA64] Remove rwsem limitation of 32k waiters We ran into the limit with the maximum number of waiters at one of our sites. This patch increases the number of possible waiters from 2^15 to 2^31 by using a long for the counter in struct rw_semaphore. S390 and alpha already do this. Signed-off-by: Christoph Lameter Acked-by: Kenneth Chen Signed-off-by: Tony Luck diff --git a/include/asm-ia64/rwsem.h b/include/asm-ia64/rwsem.h index 6ece506..e18b5ab 100644 --- a/include/asm-ia64/rwsem.h +++ b/include/asm-ia64/rwsem.h @@ -3,6 +3,7 @@ * * Copyright (C) 2003 Ken Chen * Copyright (C) 2003 Asit Mallick + * Copyright (C) 2005 Christoph Lameter * * Based on asm-i386/rwsem.h and other architecture implementation. * @@ -11,9 +12,9 @@ * * The lock count is initialized to 0 (no active and no waiting lockers). * - * When a writer subtracts WRITE_BIAS, it'll get 0xffff0001 for the case - * of an uncontended lock. Readers increment by 1 and see a positive value - * when uncontended, negative if there are writers (and maybe) readers + * When a writer subtracts WRITE_BIAS, it'll get 0xffffffff00000001 for + * the case of an uncontended lock. Readers increment by 1 and see a positive + * value when uncontended, negative if there are writers (and maybe) readers * waiting (in which case it goes to sleep). */ @@ -29,7 +30,7 @@ * the semaphore definition */ struct rw_semaphore { - signed int count; + signed long count; spinlock_t wait_lock; struct list_head wait_list; #if RWSEM_DEBUG @@ -37,10 +38,10 @@ struct rw_semaphore { #endif }; -#define RWSEM_UNLOCKED_VALUE 0x00000000 -#define RWSEM_ACTIVE_BIAS 0x00000001 -#define RWSEM_ACTIVE_MASK 0x0000ffff -#define RWSEM_WAITING_BIAS (-0x00010000) +#define RWSEM_UNLOCKED_VALUE __IA64_UL_CONST(0x0000000000000000) +#define RWSEM_ACTIVE_BIAS __IA64_UL_CONST(0x0000000000000001) +#define RWSEM_ACTIVE_MASK __IA64_UL_CONST(0x00000000ffffffff) +#define RWSEM_WAITING_BIAS -__IA64_UL_CONST(0x0000000100000000) #define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS #define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS) @@ -83,7 +84,7 @@ init_rwsem (struct rw_semaphore *sem) static inline void __down_read (struct rw_semaphore *sem) { - int result = ia64_fetchadd4_acq((unsigned int *)&sem->count, 1); + long result = ia64_fetchadd8_acq((unsigned long *)&sem->count, 1); if (result < 0) rwsem_down_read_failed(sem); @@ -95,7 +96,7 @@ __down_read (struct rw_semaphore *sem) static inline void __down_write (struct rw_semaphore *sem) { - int old, new; + long old, new; do { old = sem->count; @@ -112,7 +113,7 @@ __down_write (struct rw_semaphore *sem) static inline void __up_read (struct rw_semaphore *sem) { - int result = ia64_fetchadd4_rel((unsigned int *)&sem->count, -1); + long result = ia64_fetchadd8_rel((unsigned long *)&sem->count, -1); if (result < 0 && (--result & RWSEM_ACTIVE_MASK) == 0) rwsem_wake(sem); @@ -124,7 +125,7 @@ __up_read (struct rw_semaphore *sem) static inline void __up_write (struct rw_semaphore *sem) { - int old, new; + long old, new; do { old = sem->count; @@ -141,7 +142,7 @@ __up_write (struct rw_semaphore *sem) static inline int __down_read_trylock (struct rw_semaphore *sem) { - int tmp; + long tmp; while ((tmp = sem->count) >= 0) { if (tmp == cmpxchg_acq(&sem->count, tmp, tmp+1)) { return 1; @@ -156,7 +157,7 @@ __down_read_trylock (struct rw_semaphore *sem) static inline int __down_write_trylock (struct rw_semaphore *sem) { - int tmp = cmpxchg_acq(&sem->count, RWSEM_UNLOCKED_VALUE, + long tmp = cmpxchg_acq(&sem->count, RWSEM_UNLOCKED_VALUE, RWSEM_ACTIVE_WRITE_BIAS); return tmp == RWSEM_UNLOCKED_VALUE; } @@ -167,7 +168,7 @@ __down_write_trylock (struct rw_semaphore *sem) static inline void __downgrade_write (struct rw_semaphore *sem) { - int old, new; + long old, new; do { old = sem->count; @@ -182,7 +183,7 @@ __downgrade_write (struct rw_semaphore *sem) * Implement atomic add functionality. These used to be "inline" functions, but GCC v3.1 * doesn't quite optimize this stuff right and ends up with bad calls to fetchandadd. */ -#define rwsem_atomic_add(delta, sem) atomic_add(delta, (atomic_t *)(&(sem)->count)) -#define rwsem_atomic_update(delta, sem) atomic_add_return(delta, (atomic_t *)(&(sem)->count)) +#define rwsem_atomic_add(delta, sem) atomic64_add(delta, (atomic64_t *)(&(sem)->count)) +#define rwsem_atomic_update(delta, sem) atomic64_add_return(delta, (atomic64_t *)(&(sem)->count)) #endif /* _ASM_IA64_RWSEM_H */ -- cgit v0.10.2 From a877bd36f7584fd7e4729099348cfc9190ba00aa Mon Sep 17 00:00:00 2001 From: Kumar Gala Date: Wed, 24 Aug 2005 09:53:00 -0700 Subject: [IA64] remove use of asm/segment.h Removed IA64 architecture specific users of asm/segment.h The removal of asm-ia64/segment.h itself can wait until all of the kernel source has been purged of references. Signed-off-by: Kumar Gala Signed-off-by: Tony Luck diff --git a/arch/ia64/ia32/ia32_signal.c b/arch/ia64/ia32/ia32_signal.c index ebb89be..aa891c9 100644 --- a/arch/ia64/ia32/ia32_signal.c +++ b/arch/ia64/ia32/ia32_signal.c @@ -29,7 +29,6 @@ #include #include #include -#include #include "ia32priv.h" diff --git a/arch/ia64/pci/pci.c b/arch/ia64/pci/pci.c index 54d9ed4..0addeca 100644 --- a/arch/ia64/pci/pci.c +++ b/arch/ia64/pci/pci.c @@ -24,7 +24,6 @@ #include #include -#include #include #include #include diff --git a/arch/ia64/sn/kernel/sn2/sn_hwperf.c b/arch/ia64/sn/kernel/sn2/sn_hwperf.c index 833e700..e282142 100644 --- a/arch/ia64/sn/kernel/sn2/sn_hwperf.c +++ b/arch/ia64/sn/kernel/sn2/sn_hwperf.c @@ -36,7 +36,6 @@ #include #include #include -#include #include #include #include -- cgit v0.10.2 From 38d26b9f577ec63ba64926c45f4ee3805ed2041c Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Wed, 24 Aug 2005 03:58:00 -0700 Subject: [IA64] arch/ia64/hp/sim/boot/fw-emu.c: remove egcs workaround Kernel 2.6 doesn't support egcs, and I didn't find any user of this function. Signed-off-by: Adrian Bunk Signed-off-by: Tony Luck diff --git a/arch/ia64/hp/sim/boot/fw-emu.c b/arch/ia64/hp/sim/boot/fw-emu.c index 5c46928..30fdfb1 100644 --- a/arch/ia64/hp/sim/boot/fw-emu.c +++ b/arch/ia64/hp/sim/boot/fw-emu.c @@ -237,17 +237,6 @@ sal_emulator (long index, unsigned long in1, unsigned long in2, return ((struct sal_ret_values) {status, r9, r10, r11}); } - -/* - * This is here to work around a bug in egcs-1.1.1b that causes the - * compiler to crash (seems like a bug in the new alias analysis code. - */ -void * -id (long addr) -{ - return (void *) addr; -} - struct ia64_boot_param * sys_fw_init (const char *args, int arglen) { -- cgit v0.10.2 From b5f3616ba73699d07deee1f244179fae49502052 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 23 Aug 2005 09:24:00 -0700 Subject: [IA64] fix IO_SPACE_SPARSE_ENCODING macro ambiguity Parenthesize "p" to avoid ambiguity. No callers have a problem today; this is just to clean up the bad form. Signed-off-by: Bjorn Helgaas Signed-off-by: Tony Luck diff --git a/include/asm-ia64/io.h b/include/asm-ia64/io.h index 54e7637..a2f92ed 100644 --- a/include/asm-ia64/io.h +++ b/include/asm-ia64/io.h @@ -41,7 +41,7 @@ #define IO_SPACE_BASE(space) ((space) << IO_SPACE_BITS) #define IO_SPACE_PORT(port) ((port) & (IO_SPACE_SIZE - 1)) -#define IO_SPACE_SPARSE_ENCODING(p) ((((p) >> 2) << 12) | (p & 0xfff)) +#define IO_SPACE_SPARSE_ENCODING(p) ((((p) >> 2) << 12) | ((p) & 0xfff)) struct io_space { unsigned long mmio_base; /* base in MMIO space */ -- cgit v0.10.2 From 0a41e2501160587eb8f66cef3bdf1c6f2cb86997 Mon Sep 17 00:00:00 2001 From: Peter Chubb Date: Tue, 16 Aug 2005 19:54:00 -0700 Subject: [IA64] Rationalise Region Definitions Currently, region numbers are defined in several files, with several names. For example, we have REGION_KERNEL in asm/page.h and RGN_KERNEL in pgtable.h We also have address definitions that should depend on the RGN_XXX macros, but are currently just long constants. The following patch reorganises all the definitions so that they have the same form (RGN_XXX), are in one place, and that addresses that depend on RGN_XXX are derived from them. (This is a necessary but not sufficient patch to allow UML-like operation on IA64). Thanks to David Mosberger for catching the change I missed in mmu_context.h. Signed-off-by: Peter Chubb Signed-off-by: Tony Luck diff --git a/arch/ia64/kernel/sys_ia64.c b/arch/ia64/kernel/sys_ia64.c index 770fab3..f2dbcd1 100644 --- a/arch/ia64/kernel/sys_ia64.c +++ b/arch/ia64/kernel/sys_ia64.c @@ -35,7 +35,7 @@ arch_get_unmapped_area (struct file *filp, unsigned long addr, unsigned long len return -ENOMEM; #ifdef CONFIG_HUGETLB_PAGE - if (REGION_NUMBER(addr) == REGION_HPAGE) + if (REGION_NUMBER(addr) == RGN_HPAGE) addr = 0; #endif if (!addr) diff --git a/arch/ia64/mm/hugetlbpage.c b/arch/ia64/mm/hugetlbpage.c index e0a776a..2d13889 100644 --- a/arch/ia64/mm/hugetlbpage.c +++ b/arch/ia64/mm/hugetlbpage.c @@ -76,7 +76,7 @@ int is_aligned_hugepage_range(unsigned long addr, unsigned long len) return -EINVAL; if (addr & ~HPAGE_MASK) return -EINVAL; - if (REGION_NUMBER(addr) != REGION_HPAGE) + if (REGION_NUMBER(addr) != RGN_HPAGE) return -EINVAL; return 0; @@ -87,7 +87,7 @@ struct page *follow_huge_addr(struct mm_struct *mm, unsigned long addr, int writ struct page *page; pte_t *ptep; - if (REGION_NUMBER(addr) != REGION_HPAGE) + if (REGION_NUMBER(addr) != RGN_HPAGE) return ERR_PTR(-EINVAL); ptep = huge_pte_offset(mm, addr); @@ -142,8 +142,8 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, u return -ENOMEM; if (len & ~HPAGE_MASK) return -EINVAL; - /* This code assumes that REGION_HPAGE != 0. */ - if ((REGION_NUMBER(addr) != REGION_HPAGE) || (addr & (HPAGE_SIZE - 1))) + /* This code assumes that RGN_HPAGE != 0. */ + if ((REGION_NUMBER(addr) != RGN_HPAGE) || (addr & (HPAGE_SIZE - 1))) addr = HPAGE_REGION_BASE; else addr = ALIGN(addr, HPAGE_SIZE); diff --git a/include/asm-ia64/io.h b/include/asm-ia64/io.h index 54e7637..3c28eeb 100644 --- a/include/asm-ia64/io.h +++ b/include/asm-ia64/io.h @@ -23,7 +23,7 @@ #define __SLOW_DOWN_IO do { } while (0) #define SLOW_DOWN_IO do { } while (0) -#define __IA64_UNCACHED_OFFSET 0xc000000000000000UL /* region 6 */ +#define __IA64_UNCACHED_OFFSET RGN_BASE(RGN_UNCACHED) /* * The legacy I/O space defined by the ia64 architecture supports only 65536 ports, but diff --git a/include/asm-ia64/mmu_context.h b/include/asm-ia64/mmu_context.h index e3e5fed..ab60a6a 100644 --- a/include/asm-ia64/mmu_context.h +++ b/include/asm-ia64/mmu_context.h @@ -19,6 +19,7 @@ #define ia64_rid(ctx,addr) (((ctx) << 3) | (addr >> 61)) +# include # ifndef __ASSEMBLY__ #include @@ -110,7 +111,7 @@ reload_context (mm_context_t context) unsigned long rid_incr = 0; unsigned long rr0, rr1, rr2, rr3, rr4, old_rr4; - old_rr4 = ia64_get_rr(0x8000000000000000UL); + old_rr4 = ia64_get_rr(RGN_BASE(RGN_HPAGE)); rid = context << 3; /* make space for encoding the region number */ rid_incr = 1 << 8; @@ -122,6 +123,10 @@ reload_context (mm_context_t context) rr4 = rr0 + 4*rid_incr; #ifdef CONFIG_HUGETLB_PAGE rr4 = (rr4 & (~(0xfcUL))) | (old_rr4 & 0xfc); + +# if RGN_HPAGE != 4 +# error "reload_context assumes RGN_HPAGE is 4" +# endif #endif ia64_set_rr(0x0000000000000000UL, rr0); diff --git a/include/asm-ia64/page.h b/include/asm-ia64/page.h index 08894f7..ec17f9e 100644 --- a/include/asm-ia64/page.h +++ b/include/asm-ia64/page.h @@ -13,6 +13,19 @@ #include /* + * The top three bits of an IA64 address are its Region Number. + * Different regions are assigned to different purposes. + */ +#define RGN_SHIFT (61) +#define RGN_BASE(r) (__IA64_UL_CONST(r)<> (HPAGE_SHIFT-PAGE_SHIFT))) # define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) # define is_hugepage_only_range(mm, addr, len) \ - (REGION_NUMBER(addr) == REGION_HPAGE && \ - REGION_NUMBER((addr)+(len)-1) == REGION_HPAGE) + (REGION_NUMBER(addr) == RGN_HPAGE && \ + REGION_NUMBER((addr)+(len)-1) == RGN_HPAGE) extern unsigned int hpage_shift; #endif @@ -197,7 +206,7 @@ get_order (unsigned long size) # define __pgprot(x) (x) #endif /* !STRICT_MM_TYPECHECKS */ -#define PAGE_OFFSET __IA64_UL_CONST(0xe000000000000000) +#define PAGE_OFFSET RGN_BASE(RGN_KERNEL) #define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | \ VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC | \ diff --git a/include/asm-ia64/pgtable.h b/include/asm-ia64/pgtable.h index 48586e0..2e34c06 100644 --- a/include/asm-ia64/pgtable.h +++ b/include/asm-ia64/pgtable.h @@ -204,21 +204,18 @@ ia64_phys_addr_valid (unsigned long addr) #define set_pte(ptep, pteval) (*(ptep) = (pteval)) #define set_pte_at(mm,addr,ptep,pteval) set_pte(ptep,pteval) -#define RGN_SIZE (1UL << 61) -#define RGN_KERNEL 7 - -#define VMALLOC_START 0xa000000200000000UL +#define VMALLOC_START (RGN_BASE(RGN_GATE) + 0x200000000UL) #ifdef CONFIG_VIRTUAL_MEM_MAP -# define VMALLOC_END_INIT (0xa000000000000000UL + (1UL << (4*PAGE_SHIFT - 9))) +# define VMALLOC_END_INIT (RGN_BASE(RGN_GATE) + (1UL << (4*PAGE_SHIFT - 9))) # define VMALLOC_END vmalloc_end extern unsigned long vmalloc_end; #else -# define VMALLOC_END (0xa000000000000000UL + (1UL << (4*PAGE_SHIFT - 9))) +# define VMALLOC_END (RGN_BASE(RGN_GATE) + (1UL << (4*PAGE_SHIFT - 9))) #endif /* fs/proc/kcore.c */ -#define kc_vaddr_to_offset(v) ((v) - 0xa000000000000000UL) -#define kc_offset_to_vaddr(o) ((o) + 0xa000000000000000UL) +#define kc_vaddr_to_offset(v) ((v) - RGN_BASE(RGN_GATE)) +#define kc_offset_to_vaddr(o) ((o) + RGN_BASE(RGN_GATE)) /* * Conversion functions: convert page frame number (pfn) and a protection value to a page diff --git a/include/asm-ia64/system.h b/include/asm-ia64/system.h index cd2cf76..33256db 100644 --- a/include/asm-ia64/system.h +++ b/include/asm-ia64/system.h @@ -19,12 +19,13 @@ #include #include -#define GATE_ADDR __IA64_UL_CONST(0xa000000000000000) +#define GATE_ADDR RGN_BASE(RGN_GATE) + /* * 0xa000000000000000+2*PERCPU_PAGE_SIZE * - 0xa000000000000000+3*PERCPU_PAGE_SIZE remain unmapped (guard page) */ -#define KERNEL_START __IA64_UL_CONST(0xa000000100000000) +#define KERNEL_START (GATE_ADDR+0x100000000) #define PERCPU_ADDR (-PERCPU_PAGE_SIZE) #ifndef __ASSEMBLY__ -- cgit v0.10.2 From 1b66776da71e33dff5edcc0b096ec3b7c40c75ad Mon Sep 17 00:00:00 2001 From: Greg Edwards Date: Mon, 22 Aug 2005 09:57:00 -0700 Subject: [IA64] clean up sn2 region definitions Clean up some duplicate region definitions in sn2 code. Signed-off-by: Greg Edwards Signed-off-by: Tony Luck diff --git a/include/asm-ia64/page.h b/include/asm-ia64/page.h index ec17f9e..9edffad 100644 --- a/include/asm-ia64/page.h +++ b/include/asm-ia64/page.h @@ -17,9 +17,9 @@ * Different regions are assigned to different purposes. */ #define RGN_SHIFT (61) -#define RGN_BASE(r) (__IA64_UL_CONST(r)< Date: Tue, 16 Aug 2005 00:50:00 -0700 Subject: [IA64] - SGI SN hwperf enhancements - export_pci_topology Bugfix to export PCI topology information in /proc/sgi_sn/sn_topology. Signed-off-by: Mark Goodwin Signed-off-by: Tony Luck diff --git a/arch/ia64/sn/kernel/sn2/sn_hwperf.c b/arch/ia64/sn/kernel/sn2/sn_hwperf.c index 833e700..58e48fd 100644 --- a/arch/ia64/sn/kernel/sn2/sn_hwperf.c +++ b/arch/ia64/sn/kernel/sn2/sn_hwperf.c @@ -174,29 +174,22 @@ static const char *sn_hwperf_get_slabname(struct sn_hwperf_object_info *obj, return slabname; } -static void print_pci_topology(struct seq_file *s, - struct sn_hwperf_object_info *obj, int *ordinal, - u64 rack, u64 bay, u64 slot, u64 slab) +static void print_pci_topology(struct seq_file *s) { - char *p1; - char *p2; - char *pg; - - if (!(pg = (char *)get_zeroed_page(GFP_KERNEL))) - return; /* ignore */ - if (ia64_sn_ioif_get_pci_topology(rack, bay, slot, slab, - __pa(pg), PAGE_SIZE) == SN_HWPERF_OP_OK) { - for (p1=pg; *p1 && p1 < pg + PAGE_SIZE;) { - if (!(p2 = strchr(p1, '\n'))) - break; - *p2 = '\0'; - seq_printf(s, "pcibus %d %s-%s\n", - *ordinal, obj->location, p1); - (*ordinal)++; - p1 = p2 + 1; - } + char *p; + size_t sz; + int e; + + for (sz = PAGE_SIZE; sz < 16 * PAGE_SIZE; sz += PAGE_SIZE) { + if (!(p = (char *)kmalloc(sz, GFP_KERNEL))) + break; + e = ia64_sn_ioif_get_pci_topology(__pa(p), sz); + if (e == SALRET_OK) + seq_puts(s, p); + kfree(p); + if (e == SALRET_OK || e == SALRET_NOT_IMPLEMENTED) + break; } - free_page((unsigned long)pg); } static int sn_topology_show(struct seq_file *s, void *d) @@ -215,7 +208,6 @@ static int sn_topology_show(struct seq_file *s, void *d) struct sn_hwperf_object_info *p; struct sn_hwperf_object_info *obj = d; /* this object */ struct sn_hwperf_object_info *objs = s->private; /* all objects */ - int rack, bay, slot, slab; u8 shubtype; u8 system_size; u8 sharing_size; @@ -225,7 +217,6 @@ static int sn_topology_show(struct seq_file *s, void *d) u8 region_size; u16 nasid_mask; int nasid_msb; - int pci_bus_ordinal = 0; if (obj == objs) { seq_printf(s, "# sn_topology version 2\n"); @@ -253,6 +244,8 @@ static int sn_topology_show(struct seq_file *s, void *d) shubtype ? "shub2" : "shub1", (u64)nasid_mask << nasid_shift, nasid_msb, nasid_shift, system_size, sharing_size, coher, region_size); + + print_pci_topology(s); } if (SN_HWPERF_FOREIGN(obj)) { @@ -300,17 +293,6 @@ static int sn_topology_show(struct seq_file *s, void *d) seq_putc(s, '\n'); } } - - /* - * PCI busses attached to this node, if any - */ - if (sn_hwperf_location_to_bpos(obj->location, - &rack, &bay, &slot, &slab)) { - /* export pci bus info */ - print_pci_topology(s, obj, &pci_bus_ordinal, - rack, bay, slot, slab); - - } } if (obj->ports) { diff --git a/include/asm-ia64/sn/sn_sal.h b/include/asm-ia64/sn/sn_sal.h index 99cb9ed..02d16e3 100644 --- a/include/asm-ia64/sn/sn_sal.h +++ b/include/asm-ia64/sn/sn_sal.h @@ -78,7 +78,8 @@ #define SN_SAL_HUB_ERROR_INTERRUPT 0x02000060 #define SN_SAL_BTE_RECOVER 0x02000061 -#define SN_SAL_IOIF_GET_PCI_TOPOLOGY 0x02000062 +#define SN_SAL_RESERVED_DO_NOT_USE 0x02000062 +#define SN_SAL_IOIF_GET_PCI_TOPOLOGY 0x02000064 /* * Service-specific constants @@ -1069,12 +1070,10 @@ ia64_sn_hwperf_op(nasid_t nasid, u64 opcode, u64 a0, u64 a1, u64 a2, } static inline int -ia64_sn_ioif_get_pci_topology(u64 rack, u64 bay, u64 slot, u64 slab, - u64 buf, u64 len) +ia64_sn_ioif_get_pci_topology(u64 buf, u64 len) { struct ia64_sal_retval rv; - SAL_CALL_NOLOCK(rv, SN_SAL_IOIF_GET_PCI_TOPOLOGY, - rack, bay, slot, slab, buf, len, 0); + SAL_CALL_NOLOCK(rv, SN_SAL_IOIF_GET_PCI_TOPOLOGY, buf, len, 0, 0, 0, 0, 0); return (int) rv.status; } -- cgit v0.10.2 From 60a3ba0bb45995ecf9cfe208527d7cfd6128d053 Mon Sep 17 00:00:00 2001 From: Mark Goodwin Date: Wed, 17 Aug 2005 15:17:00 -0700 Subject: [IA64] - SGI SN hwperf enhancements - Add a new exported function for determining the nearest node with CPUs for I/O nodes and fix a bug where the hwperf dynamic misc device was being registered before misc_init(). Signed-off-by: Mark Goodwin Signed-off-by: Tony Luck diff --git a/arch/ia64/sn/kernel/sn2/sn_hwperf.c b/arch/ia64/sn/kernel/sn2/sn_hwperf.c index 58e48fd..0261452 100644 --- a/arch/ia64/sn/kernel/sn2/sn_hwperf.c +++ b/arch/ia64/sn/kernel/sn2/sn_hwperf.c @@ -59,7 +59,7 @@ static int sn_hwperf_enum_objects(int *nobj, struct sn_hwperf_object_info **ret) struct sn_hwperf_object_info *objbuf = NULL; if ((e = sn_hwperf_init()) < 0) { - printk("sn_hwperf_init failed: err %d\n", e); + printk(KERN_ERR "sn_hwperf_init failed: err %d\n", e); goto out; } @@ -111,7 +111,7 @@ static int sn_hwperf_geoid_to_cnode(char *location) if (sn_hwperf_location_to_bpos(location, &rack, &bay, &slot, &slab)) return -1; - for (cnode = 0; cnode < numionodes; cnode++) { + for_each_node(cnode) { geoid = cnodeid_get_geoid(cnode); module_id = geo_module(geoid); this_rack = MODULE_GET_RACK(module_id); @@ -124,11 +124,13 @@ static int sn_hwperf_geoid_to_cnode(char *location) } } - return cnode < numionodes ? cnode : -1; + return node_possible(cnode) ? cnode : -1; } static int sn_hwperf_obj_to_cnode(struct sn_hwperf_object_info * obj) { + if (!SN_HWPERF_IS_NODE(obj) && !SN_HWPERF_IS_IONODE(obj)) + BUG(); if (!obj->sn_hwp_this_part) return -1; return sn_hwperf_geoid_to_cnode(obj->location); @@ -192,6 +194,181 @@ static void print_pci_topology(struct seq_file *s) } } +static inline int sn_hwperf_has_cpus(cnodeid_t node) +{ + return node_online(node) && nr_cpus_node(node); +} + +static inline int sn_hwperf_has_mem(cnodeid_t node) +{ + return node_online(node) && NODE_DATA(node)->node_present_pages; +} + +static struct sn_hwperf_object_info * +sn_hwperf_findobj_id(struct sn_hwperf_object_info *objbuf, + int nobj, int id) +{ + int i; + struct sn_hwperf_object_info *p = objbuf; + + for (i=0; i < nobj; i++, p++) { + if (p->id == id) + return p; + } + + return NULL; + +} + +static int sn_hwperf_get_nearest_node_objdata(struct sn_hwperf_object_info *objbuf, + int nobj, cnodeid_t node, cnodeid_t *near_mem_node, cnodeid_t *near_cpu_node) +{ + int e; + struct sn_hwperf_object_info *nodeobj = NULL; + struct sn_hwperf_object_info *op; + struct sn_hwperf_object_info *dest; + struct sn_hwperf_object_info *router; + struct sn_hwperf_port_info ptdata[16]; + int sz, i, j; + cnodeid_t c; + int found_mem = 0; + int found_cpu = 0; + + if (!node_possible(node)) + return -EINVAL; + + if (sn_hwperf_has_cpus(node)) { + if (near_cpu_node) + *near_cpu_node = node; + found_cpu++; + } + + if (sn_hwperf_has_mem(node)) { + if (near_mem_node) + *near_mem_node = node; + found_mem++; + } + + if (found_cpu && found_mem) + return 0; /* trivially successful */ + + /* find the argument node object */ + for (i=0, op=objbuf; i < nobj; i++, op++) { + if (!SN_HWPERF_IS_NODE(op) && !SN_HWPERF_IS_IONODE(op)) + continue; + if (node == sn_hwperf_obj_to_cnode(op)) { + nodeobj = op; + break; + } + } + if (!nodeobj) { + e = -ENOENT; + goto err; + } + + /* get it's interconnect topology */ + sz = op->ports * sizeof(struct sn_hwperf_port_info); + if (sz > sizeof(ptdata)) + BUG(); + e = ia64_sn_hwperf_op(sn_hwperf_master_nasid, + SN_HWPERF_ENUM_PORTS, nodeobj->id, sz, + (u64)&ptdata, 0, 0, NULL); + if (e != SN_HWPERF_OP_OK) { + e = -EINVAL; + goto err; + } + + /* find nearest node with cpus and nearest memory */ + for (router=NULL, j=0; j < op->ports; j++) { + dest = sn_hwperf_findobj_id(objbuf, nobj, ptdata[j].conn_id); + if (!dest || SN_HWPERF_FOREIGN(dest) || + !SN_HWPERF_IS_NODE(dest) || SN_HWPERF_IS_IONODE(dest)) { + continue; + } + c = sn_hwperf_obj_to_cnode(dest); + if (!found_cpu && sn_hwperf_has_cpus(c)) { + if (near_cpu_node) + *near_cpu_node = c; + found_cpu++; + } + if (!found_mem && sn_hwperf_has_mem(c)) { + if (near_mem_node) + *near_mem_node = c; + found_mem++; + } + if (SN_HWPERF_IS_ROUTER(dest)) + router = dest; + } + + if (router && (!found_cpu || !found_mem)) { + /* search for a node connected to the same router */ + sz = router->ports * sizeof(struct sn_hwperf_port_info); + if (sz > sizeof(ptdata)) + BUG(); + e = ia64_sn_hwperf_op(sn_hwperf_master_nasid, + SN_HWPERF_ENUM_PORTS, router->id, sz, + (u64)&ptdata, 0, 0, NULL); + if (e != SN_HWPERF_OP_OK) { + e = -EINVAL; + goto err; + } + for (j=0; j < router->ports; j++) { + dest = sn_hwperf_findobj_id(objbuf, nobj, + ptdata[j].conn_id); + if (!dest || dest->id == node || + SN_HWPERF_FOREIGN(dest) || + !SN_HWPERF_IS_NODE(dest) || + SN_HWPERF_IS_IONODE(dest)) { + continue; + } + c = sn_hwperf_obj_to_cnode(dest); + if (!found_cpu && sn_hwperf_has_cpus(c)) { + if (near_cpu_node) + *near_cpu_node = c; + found_cpu++; + } + if (!found_mem && sn_hwperf_has_mem(c)) { + if (near_mem_node) + *near_mem_node = c; + found_mem++; + } + if (found_cpu && found_mem) + break; + } + } + + if (!found_cpu || !found_mem) { + /* resort to _any_ node with CPUs and memory */ + for (i=0, op=objbuf; i < nobj; i++, op++) { + if (SN_HWPERF_FOREIGN(op) || + SN_HWPERF_IS_IONODE(op) || + !SN_HWPERF_IS_NODE(op)) { + continue; + } + c = sn_hwperf_obj_to_cnode(op); + if (!found_cpu && sn_hwperf_has_cpus(c)) { + if (near_cpu_node) + *near_cpu_node = c; + found_cpu++; + } + if (!found_mem && sn_hwperf_has_mem(c)) { + if (near_mem_node) + *near_mem_node = c; + found_mem++; + } + if (found_cpu && found_mem) + break; + } + } + + if (!found_cpu || !found_mem) + e = -ENODATA; + +err: + return e; +} + + static int sn_topology_show(struct seq_file *s, void *d) { int sz; @@ -265,11 +442,24 @@ static int sn_topology_show(struct seq_file *s, void *d) if (!SN_HWPERF_IS_NODE(obj) && !SN_HWPERF_IS_IONODE(obj)) seq_putc(s, '\n'); else { + cnodeid_t near_mem = -1; + cnodeid_t near_cpu = -1; + seq_printf(s, ", nasid 0x%x", cnodeid_to_nasid(ordinal)); - for (i=0; i < numionodes; i++) { - seq_printf(s, i ? ":%d" : ", dist %d", - node_distance(ordinal, i)); + + if (sn_hwperf_get_nearest_node_objdata(objs, sn_hwperf_obj_cnt, + ordinal, &near_mem, &near_cpu) == 0) { + seq_printf(s, ", near_mem_nodeid %d, near_cpu_nodeid %d", + near_mem, near_cpu); + } + + if (!SN_HWPERF_IS_IONODE(obj)) { + for_each_online_node(i) { + seq_printf(s, i ? ":%d" : ", dist %d", + node_distance(ordinal, i)); + } } + seq_putc(s, '\n'); /* @@ -554,6 +744,8 @@ sn_hwperf_ioctl(struct inode *in, struct file *fp, u32 op, u64 arg) if ((r = sn_hwperf_enum_objects(&nobj, &objs)) == 0) { memset(p, 0, a.sz); for (i = 0; i < nobj; i++) { + if (!SN_HWPERF_IS_NODE(objs + i)) + continue; node = sn_hwperf_obj_to_cnode(objs + i); for_each_online_cpu(j) { if (node != cpu_to_node(j)) @@ -580,7 +772,7 @@ sn_hwperf_ioctl(struct inode *in, struct file *fp, u32 op, u64 arg) case SN_HWPERF_GET_NODE_NASID: if (a.sz != sizeof(u64) || - (node = a.arg) < 0 || node >= numionodes) { + (node = a.arg) < 0 || !node_possible(node)) { r = -EINVAL; goto error; } @@ -609,6 +801,14 @@ sn_hwperf_ioctl(struct inode *in, struct file *fp, u32 op, u64 arg) vfree(objs); goto error; } + + if (!SN_HWPERF_IS_NODE(objs + i) && + !SN_HWPERF_IS_IONODE(objs + i)) { + r = -ENOENT; + vfree(objs); + goto error; + } + *(u64 *)p = (u64)sn_hwperf_obj_to_cnode(objs + i); vfree(objs); } @@ -674,6 +874,7 @@ static int sn_hwperf_init(void) /* single threaded, once-only initialization */ down(&sn_hwperf_init_mutex); + if (sn_hwperf_salheap) { up(&sn_hwperf_init_mutex); return e; @@ -724,19 +925,6 @@ out: sn_hwperf_salheap = NULL; sn_hwperf_obj_cnt = 0; } - - if (!e) { - /* - * Register a dynamic misc device for ioctl. Platforms - * supporting hotplug will create /dev/sn_hwperf, else - * user can to look up the minor number in /proc/misc. - */ - if ((e = misc_register(&sn_hwperf_dev)) != 0) { - printk(KERN_ERR "sn_hwperf_init: misc register " - "for \"sn_hwperf\" failed, err %d\n", e); - } - } - up(&sn_hwperf_init_mutex); return e; } @@ -764,3 +952,41 @@ int sn_topology_release(struct inode *inode, struct file *file) vfree(seq->private); return seq_release(inode, file); } + +int sn_hwperf_get_nearest_node(cnodeid_t node, + cnodeid_t *near_mem_node, cnodeid_t *near_cpu_node) +{ + int e; + int nobj; + struct sn_hwperf_object_info *objbuf; + + if ((e = sn_hwperf_enum_objects(&nobj, &objbuf)) == 0) { + e = sn_hwperf_get_nearest_node_objdata(objbuf, nobj, + node, near_mem_node, near_cpu_node); + vfree(objbuf); + } + + return e; +} + +static int __devinit sn_hwperf_misc_register_init(void) +{ + int e; + + sn_hwperf_init(); + + /* + * Register a dynamic misc device for hwperf ioctls. Platforms + * supporting hotplug will create /dev/sn_hwperf, else user + * can to look up the minor number in /proc/misc. + */ + if ((e = misc_register(&sn_hwperf_dev)) != 0) { + printk(KERN_ERR "sn_hwperf_misc_register_init: failed to " + "register misc device for \"%s\"\n", sn_hwperf_dev.name); + } + + return e; +} + +device_initcall(sn_hwperf_misc_register_init); /* after misc_init() */ +EXPORT_SYMBOL(sn_hwperf_get_nearest_node); diff --git a/include/asm-ia64/sn/sn2/sn_hwperf.h b/include/asm-ia64/sn/sn2/sn_hwperf.h index df75f4c..291ef3d 100644 --- a/include/asm-ia64/sn/sn2/sn_hwperf.h +++ b/include/asm-ia64/sn/sn2/sn_hwperf.h @@ -43,6 +43,7 @@ struct sn_hwperf_object_info { /* macros for object classification */ #define SN_HWPERF_IS_NODE(x) ((x) && strstr((x)->name, "SHub")) +#define SN_HWPERF_IS_NODE_SHUB2(x) ((x) && strstr((x)->name, "SHub 2.")) #define SN_HWPERF_IS_IONODE(x) ((x) && strstr((x)->name, "TIO")) #define SN_HWPERF_IS_ROUTER(x) ((x) && strstr((x)->name, "Router")) #define SN_HWPERF_IS_NL3ROUTER(x) ((x) && strstr((x)->name, "NL3Router")) @@ -214,6 +215,15 @@ struct sn_hwperf_ioctl_args { */ #define SN_HWPERF_GET_NODE_NASID (102|SN_HWPERF_OP_MEM_COPYOUT) +/* + * Given a node id, determine the id of the nearest node with CPUs + * and the id of the nearest node that has memory. The argument + * node would normally be a "headless" node, e.g. an "IO node". + * Return 0 on success. + */ +extern int sn_hwperf_get_nearest_node(cnodeid_t node, + cnodeid_t *near_mem, cnodeid_t *near_cpu); + /* return codes */ #define SN_HWPERF_OP_OK 0 #define SN_HWPERF_OP_NOMEM 1 -- cgit v0.10.2 From 5390970d1c11b6d5d6a8253a718ed100e2178e14 Mon Sep 17 00:00:00 2001 From: Mark Goodwin Date: Tue, 16 Aug 2005 00:51:00 -0700 Subject: [IA64] - SGI SN hwperf enhancements - Update the SN pci device info to use the nearest node function to allocate driver memory on the nearest node (rather than defaulting to node 0). Signed-off-by: Mark Goodwin Signed-off-by: Tony Luck diff --git a/arch/ia64/sn/pci/pcibr/pcibr_provider.c b/arch/ia64/sn/pci/pcibr/pcibr_provider.c index ff9c7de9..5862a70 100644 --- a/arch/ia64/sn/pci/pcibr/pcibr_provider.c +++ b/arch/ia64/sn/pci/pcibr/pcibr_provider.c @@ -15,6 +15,7 @@ #include #include #include +#include #include "xtalk/xwidgetdev.h" #include "xtalk/hubdev.h" @@ -88,6 +89,7 @@ void * pcibr_bus_fixup(struct pcibus_bussoft *prom_bussoft, struct pci_controller *controller) { int nasid, cnode, j; + cnodeid_t near_cnode; struct hubdev_info *hubdev_info; struct pcibus_info *soft; struct sn_flush_device_list *sn_flush_device_list; @@ -161,12 +163,18 @@ pcibr_bus_fixup(struct pcibus_bussoft *prom_bussoft, struct pci_controller *cont memset(soft->pbi_int_ate_resource.ate, 0, (soft->pbi_int_ate_size * sizeof(uint64_t))); - if (prom_bussoft->bs_asic_type == PCIIO_ASIC_TYPE_TIOCP) - /* - * TIO PCI Bridge with no closest node information. - * FIXME: Find another way to determine the closest node - */ - controller->node = -1; + if (prom_bussoft->bs_asic_type == PCIIO_ASIC_TYPE_TIOCP) { + /* TIO PCI Bridge: find nearest node with CPUs */ + int e = sn_hwperf_get_nearest_node(cnode, NULL, &near_cnode); + + if (e < 0) { + near_cnode = (cnodeid_t)-1; /* use any node */ + printk(KERN_WARNING "pcibr_bus_fixup: failed to find " + "near node with CPUs to TIO node %d, err=%d\n", + cnode, e); + } + controller->node = near_cnode; + } else controller->node = cnode; return soft; -- cgit v0.10.2 From 5b9021bc5800796e23e4994f8cf2dc61536be0a7 Mon Sep 17 00:00:00 2001 From: Russ Anderson <(rja@sgi.com)> Date: Wed, 17 Aug 2005 10:00:00 -0700 Subject: [IA64] SGI SN remove redundant partition SAL call Clean up of SGI SN partitioning related code. The SN_SAL_GET_SN_INFO SAL call returns the partition ID, making the SN_SAL_SYSCTL_PARTITION_GET SAL call redundant. Remove sn_partid and use sn_partition_id. Signed-off-by: Russ Anderson (rja@sgi.com) Signed-off-by: Tony Luck diff --git a/arch/ia64/sn/kernel/setup.c b/arch/ia64/sn/kernel/setup.c index 6648deb..a594aca 100644 --- a/arch/ia64/sn/kernel/setup.c +++ b/arch/ia64/sn/kernel/setup.c @@ -80,8 +80,6 @@ EXPORT_PER_CPU_SYMBOL(__sn_cnodeid_to_nasid); DEFINE_PER_CPU(struct nodepda_s *, __sn_nodepda); EXPORT_PER_CPU_SYMBOL(__sn_nodepda); -partid_t sn_partid = -1; -EXPORT_SYMBOL(sn_partid); char sn_system_serial_number_string[128]; EXPORT_SYMBOL(sn_system_serial_number_string); u64 sn_partition_serial_number; diff --git a/arch/ia64/sn/kernel/sn2/sn_proc_fs.c b/arch/ia64/sn/kernel/sn2/sn_proc_fs.c index 6a80fca..51bf827 100644 --- a/arch/ia64/sn/kernel/sn2/sn_proc_fs.c +++ b/arch/ia64/sn/kernel/sn2/sn_proc_fs.c @@ -3,7 +3,7 @@ * License. See the file "COPYING" in the main directory of this archive * for more details. * - * Copyright (C) 2000-2004 Silicon Graphics, Inc. All rights reserved. + * Copyright (C) 2000-2005 Silicon Graphics, Inc. All rights reserved. */ #include #include @@ -15,7 +15,7 @@ static int partition_id_show(struct seq_file *s, void *p) { - seq_printf(s, "%d\n", sn_local_partid()); + seq_printf(s, "%d\n", sn_partition_id); return 0; } diff --git a/include/asm-ia64/sn/geo.h b/include/asm-ia64/sn/geo.h index 84b2546..f083c94 100644 --- a/include/asm-ia64/sn/geo.h +++ b/include/asm-ia64/sn/geo.h @@ -3,7 +3,7 @@ * License. See the file "COPYING" in the main directory of this archive * for more details. * - * Copyright (C) 1992 - 1997, 2000-2004 Silicon Graphics, Inc. All rights reserved. + * Copyright (C) 1992 - 1997, 2000-2005 Silicon Graphics, Inc. All rights reserved. */ #ifndef _ASM_IA64_SN_GEO_H @@ -108,7 +108,6 @@ typedef union geoid_u { #define INVALID_SLAB (slabid_t)-1 #define INVALID_SLOT (slotid_t)-1 #define INVALID_MODULE ((moduleid_t)-1) -#define INVALID_PARTID ((partid_t)-1) static inline slabid_t geo_slab(geoid_t g) { diff --git a/include/asm-ia64/sn/sn_sal.h b/include/asm-ia64/sn/sn_sal.h index 02d16e3..e67825a 100644 --- a/include/asm-ia64/sn/sn_sal.h +++ b/include/asm-ia64/sn/sn_sal.h @@ -55,7 +55,6 @@ #define SN_SAL_BUS_CONFIG 0x02000037 #define SN_SAL_SYS_SERIAL_GET 0x02000038 #define SN_SAL_PARTITION_SERIAL_GET 0x02000039 -#define SN_SAL_SYSCTL_PARTITION_GET 0x0200003a #define SN_SAL_SYSTEM_POWER_DOWN 0x0200003b #define SN_SAL_GET_MASTER_BASEIO_NASID 0x0200003c #define SN_SAL_COHERENCE 0x0200003d @@ -587,35 +586,6 @@ sn_partition_serial_number_val(void) { } /* - * Returns the partition id of the nasid passed in as an argument, - * or INVALID_PARTID if the partition id cannot be retrieved. - */ -static inline partid_t -ia64_sn_sysctl_partition_get(nasid_t nasid) -{ - struct ia64_sal_retval ret_stuff; - ia64_sal_oemcall_nolock(&ret_stuff, SN_SAL_SYSCTL_PARTITION_GET, nasid, - 0, 0, 0, 0, 0, 0); - if (ret_stuff.status != 0) - return INVALID_PARTID; - return ((partid_t)ret_stuff.v0); -} - -/* - * Returns the partition id of the current processor. - */ - -extern partid_t sn_partid; - -static inline partid_t -sn_local_partid(void) { - if (unlikely(sn_partid < 0)) { - sn_partid = ia64_sn_sysctl_partition_get(cpuid_to_nasid(smp_processor_id())); - } - return sn_partid; -} - -/* * Returns the physical address of the partition's reserved page through * an iterative number of calls. * @@ -1020,24 +990,6 @@ ia64_sn_get_sn_info(int fc, u8 *shubtype, u16 *nasid_bitmask, u8 *nasid_shift, ret_stuff.v2 = 0; SAL_CALL_NOLOCK(ret_stuff, SN_SAL_GET_SN_INFO, fc, 0, 0, 0, 0, 0, 0); -/***** BEGIN HACK - temp til old proms no longer supported ********/ - if (ret_stuff.status == SALRET_NOT_IMPLEMENTED) { - int nasid = get_sapicid() & 0xfff;; -#define SH_SHUB_ID_NODES_PER_BIT_MASK 0x001f000000000000UL -#define SH_SHUB_ID_NODES_PER_BIT_SHFT 48 - if (shubtype) *shubtype = 0; - if (nasid_bitmask) *nasid_bitmask = 0x7ff; - if (nasid_shift) *nasid_shift = 38; - if (systemsize) *systemsize = 11; - if (sharing_domain_size) *sharing_domain_size = 9; - if (partid) *partid = ia64_sn_sysctl_partition_get(nasid); - if (coher) *coher = nasid >> 9; - if (reg) *reg = (HUB_L((u64 *) LOCAL_MMR_ADDR(SH1_SHUB_ID)) & SH_SHUB_ID_NODES_PER_BIT_MASK) >> - SH_SHUB_ID_NODES_PER_BIT_SHFT; - return 0; - } -/***** END HACK *******/ - if (ret_stuff.status < 0) return ret_stuff.status; -- cgit v0.10.2 From 8409668b561fbe464f7a392e8dc77eca225d27ac Mon Sep 17 00:00:00 2001 From: Mark Maule Date: Thu, 25 Aug 2005 11:45:00 -0700 Subject: [IA64] altix: Abstract irq_affinity at the sn pci provider Altix patch to abstract irq_affinity down to the pci provider level since different SGI hardware implements this in different ways. Signed-off-by: Mark Maule Signed-off-by: Tony Luck diff --git a/arch/ia64/sn/kernel/irq.c b/arch/ia64/sn/kernel/irq.c index 607938c2..9fc7463 100644 --- a/arch/ia64/sn/kernel/irq.c +++ b/arch/ia64/sn/kernel/irq.c @@ -127,6 +127,7 @@ static void sn_set_affinity_irq(unsigned int irq, cpumask_t mask) int local_widget, status; nasid_t local_nasid; struct sn_irq_info *new_irq_info; + struct sn_pcibus_provider *pci_provider; new_irq_info = kmalloc(sizeof(struct sn_irq_info), GFP_ATOMIC); if (new_irq_info == NULL) @@ -166,8 +167,9 @@ static void sn_set_affinity_irq(unsigned int irq, cpumask_t mask) new_irq_info->irq_cpuid = cpuid; register_intr_pda(new_irq_info); - if (IS_PCI_BRIDGE_ASIC(new_irq_info->irq_bridge_type)) - pcibr_change_devices_irq(new_irq_info); + pci_provider = sn_pci_provider[new_irq_info->irq_bridge_type]; + if (pci_provider && pci_provider->target_interrupt) + (pci_provider->target_interrupt)(new_irq_info); spin_lock(&sn_irq_info_lock); list_replace_rcu(&sn_irq_info->list, &new_irq_info->list); diff --git a/arch/ia64/sn/pci/pcibr/pcibr_provider.c b/arch/ia64/sn/pci/pcibr/pcibr_provider.c index 5862a70..7b03b80 100644 --- a/arch/ia64/sn/pci/pcibr/pcibr_provider.c +++ b/arch/ia64/sn/pci/pcibr/pcibr_provider.c @@ -198,7 +198,7 @@ void pcibr_force_interrupt(struct sn_irq_info *sn_irq_info) } } -void pcibr_change_devices_irq(struct sn_irq_info *sn_irq_info) +void pcibr_target_interrupt(struct sn_irq_info *sn_irq_info) { struct pcidev_info *pcidev_info; struct pcibus_info *pcibus_info; @@ -233,7 +233,8 @@ struct sn_pcibus_provider pcibr_provider = { .dma_map_consistent = pcibr_dma_map_consistent, .dma_unmap = pcibr_dma_unmap, .bus_fixup = pcibr_bus_fixup, - .force_interrupt = pcibr_force_interrupt + .force_interrupt = pcibr_force_interrupt, + .target_interrupt = pcibr_target_interrupt }; int diff --git a/arch/ia64/sn/pci/tioca_provider.c b/arch/ia64/sn/pci/tioca_provider.c index 4ea04cf..ea09c12 100644 --- a/arch/ia64/sn/pci/tioca_provider.c +++ b/arch/ia64/sn/pci/tioca_provider.c @@ -657,7 +657,8 @@ static struct sn_pcibus_provider tioca_pci_interfaces = { .dma_map_consistent = tioca_dma_map, .dma_unmap = tioca_dma_unmap, .bus_fixup = tioca_bus_fixup, - .force_interrupt = NULL + .force_interrupt = NULL, + .target_interrupt = NULL }; /** diff --git a/arch/ia64/sn/pci/tioce_provider.c b/arch/ia64/sn/pci/tioce_provider.c index d908136..8e75db2 100644 --- a/arch/ia64/sn/pci/tioce_provider.c +++ b/arch/ia64/sn/pci/tioce_provider.c @@ -669,6 +669,43 @@ tioce_force_interrupt(struct sn_irq_info *sn_irq_info) } /** + * tioce_target_interrupt - implement set_irq_affinity for tioce resident + * functions. Note: only applies to line interrupts, not MSI's. + * + * @sn_irq_info: SN IRQ context + * + * Given an sn_irq_info, set the associated CE device's interrupt destination + * register. Since the interrupt destination registers are on a per-ce-slot + * basis, this will retarget line interrupts for all functions downstream of + * the slot. + */ +static void +tioce_target_interrupt(struct sn_irq_info *sn_irq_info) +{ + struct pcidev_info *pcidev_info; + struct tioce_common *ce_common; + struct tioce *ce_mmr; + int bit; + + pcidev_info = (struct pcidev_info *)sn_irq_info->irq_pciioinfo; + if (!pcidev_info) + return; + + ce_common = (struct tioce_common *)pcidev_info->pdi_pcibus_info; + ce_mmr = (struct tioce *)ce_common->ce_pcibus.bs_base; + + bit = sn_irq_info->irq_int_bit; + + ce_mmr->ce_adm_int_mask |= (1UL << bit); + ce_mmr->ce_adm_int_dest[bit] = + ((uint64_t)sn_irq_info->irq_irq << INTR_VECTOR_SHFT) | + sn_irq_info->irq_xtalkaddr; + ce_mmr->ce_adm_int_mask &= ~(1UL << bit); + + tioce_force_interrupt(sn_irq_info); +} + +/** * tioce_bus_fixup - perform final PCI fixup for a TIO CE bus * @prom_bussoft: Common prom/kernel struct representing the bus * @@ -719,7 +756,8 @@ static struct sn_pcibus_provider tioce_pci_interfaces = { .dma_map_consistent = tioce_dma_consistent, .dma_unmap = tioce_dma_unmap, .bus_fixup = tioce_bus_fixup, - .force_interrupt = tioce_force_interrupt + .force_interrupt = tioce_force_interrupt, + .target_interrupt = tioce_target_interrupt }; /** diff --git a/include/asm-ia64/sn/pcibus_provider_defs.h b/include/asm-ia64/sn/pcibus_provider_defs.h index 5a92f51..ad0e8e8 100644 --- a/include/asm-ia64/sn/pcibus_provider_defs.h +++ b/include/asm-ia64/sn/pcibus_provider_defs.h @@ -50,6 +50,7 @@ struct sn_pcibus_provider { void (*dma_unmap)(struct pci_dev *, dma_addr_t, int); void * (*bus_fixup)(struct pcibus_bussoft *, struct pci_controller *); void (*force_interrupt)(struct sn_irq_info *); + void (*target_interrupt)(struct sn_irq_info *); }; extern struct sn_pcibus_provider *sn_pci_provider[]; -- cgit v0.10.2 From d1e079b3fc90c7c114f46771e983a72ac8740882 Mon Sep 17 00:00:00 2001 From: Russ Anderson <(rja@sgi.com)> Date: Mon, 15 Aug 2005 14:46:00 -0700 Subject: [IA64-SGI] fix bte_copy() calling smp_processor_id() while preemptible bte_copy() calls calls smp_processor_id(), which will get flagged if preemption if enabled. raw_smp_processor_id() is used instead because we are just using it to pick a BTE interface and are not tied to a specific cpu. Signed-off-by: Russ Anderson (rja@sgi.com) Signed-off-by: Tony Luck diff --git a/arch/ia64/sn/kernel/bte.c b/arch/ia64/sn/kernel/bte.c index b75814e..45854c6 100644 --- a/arch/ia64/sn/kernel/bte.c +++ b/arch/ia64/sn/kernel/bte.c @@ -105,7 +105,7 @@ bte_result_t bte_copy(u64 src, u64 dest, u64 len, u64 mode, void *notification) /* * Start with interface corresponding to cpu number */ - bte_first = get_cpu() % btes_per_node; + bte_first = raw_smp_processor_id() % btes_per_node; if (mode & BTE_USE_DEST) { /* try remote then local */ -- cgit v0.10.2 From 4db8699bcfa8faddb5727b1cb010a4d9b8a42e8c Mon Sep 17 00:00:00 2001 From: Venkatesh Pallipadi Date: Fri, 29 Jul 2005 16:15:00 -0700 Subject: [IA64] Add ACPI based P-state support Patch to support P-state transitions on ia64. This driver is based on ACPI, and uses the ACPI processor driver interface to find out the P-state support information for the processor. This driver plugs into generic cpufreq infrastructure. Once this driver is loaded successfully, ondemand/userspace governor can be used to change the CPU frequency dynamically based on load or on request from userspace process. Refer : ACPI specification - http://www.acpi.info P-state related PAL calls - http://developer.intel.com/design/itanium/downloads/24869909.pdf Signed-off-by: Venkatesh Pallipadi Signed-off-by: Tony Luck diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 8098813..3deced6 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -383,6 +383,12 @@ source "drivers/acpi/Kconfig" endif +if PM + +source "arch/ia64/kernel/cpufreq/Kconfig" + +endif + endmenu if !IA64_HP_SIM diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile index e1fb68d..b242594 100644 --- a/arch/ia64/kernel/Makefile +++ b/arch/ia64/kernel/Makefile @@ -20,6 +20,7 @@ obj-$(CONFIG_SMP) += smp.o smpboot.o domain.o obj-$(CONFIG_NUMA) += numa.o obj-$(CONFIG_PERFMON) += perfmon_default_smpl.o obj-$(CONFIG_IA64_CYCLONE) += cyclone.o +obj-$(CONFIG_CPU_FREQ) += cpufreq/ obj-$(CONFIG_IA64_MCA_RECOVERY) += mca_recovery.o obj-$(CONFIG_KPROBES) += kprobes.o jprobes.o obj-$(CONFIG_IA64_UNCACHED_ALLOCATOR) += uncached.o diff --git a/arch/ia64/kernel/cpufreq/Kconfig b/arch/ia64/kernel/cpufreq/Kconfig new file mode 100644 index 0000000..2d9d527 --- /dev/null +++ b/arch/ia64/kernel/cpufreq/Kconfig @@ -0,0 +1,29 @@ + +# +# CPU Frequency scaling +# + +menu "CPU Frequency scaling" + +source "drivers/cpufreq/Kconfig" + +if CPU_FREQ + +comment "CPUFreq processor drivers" + +config IA64_ACPI_CPUFREQ + tristate "ACPI Processor P-States driver" + select CPU_FREQ_TABLE + depends on ACPI_PROCESSOR + help + This driver adds a CPUFreq driver which utilizes the ACPI + Processor Performance States. + + For details, take a look at . + + If in doubt, say N. + +endif # CPU_FREQ + +endmenu + diff --git a/arch/ia64/kernel/cpufreq/Makefile b/arch/ia64/kernel/cpufreq/Makefile new file mode 100644 index 0000000..f748d34 --- /dev/null +++ b/arch/ia64/kernel/cpufreq/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_IA64_ACPI_CPUFREQ) += acpi-cpufreq.o diff --git a/arch/ia64/kernel/cpufreq/acpi-cpufreq.c b/arch/ia64/kernel/cpufreq/acpi-cpufreq.c new file mode 100644 index 0000000..da4d5cf --- /dev/null +++ b/arch/ia64/kernel/cpufreq/acpi-cpufreq.c @@ -0,0 +1,499 @@ +/* + * arch/ia64/kernel/cpufreq/acpi-cpufreq.c + * This file provides the ACPI based P-state support. This + * module works with generic cpufreq infrastructure. Most of + * the code is based on i386 version + * (arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c) + * + * Copyright (C) 2005 Intel Corp + * Venkatesh Pallipadi + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "acpi-cpufreq", msg) + +MODULE_AUTHOR("Venkatesh Pallipadi"); +MODULE_DESCRIPTION("ACPI Processor P-States Driver"); +MODULE_LICENSE("GPL"); + + +struct cpufreq_acpi_io { + struct acpi_processor_performance acpi_data; + struct cpufreq_frequency_table *freq_table; + unsigned int resume; +}; + +static struct cpufreq_acpi_io *acpi_io_data[NR_CPUS]; + +static struct cpufreq_driver acpi_cpufreq_driver; + + +static int +processor_set_pstate ( + u32 value) +{ + s64 retval; + + dprintk("processor_set_pstate\n"); + + retval = ia64_pal_set_pstate((u64)value); + + if (retval) { + dprintk("Failed to set freq to 0x%x, with error 0x%x\n", + value, retval); + return -ENODEV; + } + return (int)retval; +} + + +static int +processor_get_pstate ( + u32 *value) +{ + u64 pstate_index = 0; + s64 retval; + + dprintk("processor_get_pstate\n"); + + retval = ia64_pal_get_pstate(&pstate_index); + *value = (u32) pstate_index; + + if (retval) + dprintk("Failed to get current freq with " + "error 0x%x, idx 0x%x\n", retval, *value); + + return (int)retval; +} + + +/* To be used only after data->acpi_data is initialized */ +static unsigned +extract_clock ( + struct cpufreq_acpi_io *data, + unsigned value, + unsigned int cpu) +{ + unsigned long i; + + dprintk("extract_clock\n"); + + for (i = 0; i < data->acpi_data.state_count; i++) { + if (value >= data->acpi_data.states[i].control) + return data->acpi_data.states[i].core_frequency; + } + return data->acpi_data.states[i-1].core_frequency; +} + + +static unsigned int +processor_get_freq ( + struct cpufreq_acpi_io *data, + unsigned int cpu) +{ + int ret = 0; + u32 value = 0; + cpumask_t saved_mask; + unsigned long clock_freq; + + dprintk("processor_get_freq\n"); + + saved_mask = current->cpus_allowed; + set_cpus_allowed(current, cpumask_of_cpu(cpu)); + if (smp_processor_id() != cpu) { + ret = -EAGAIN; + goto migrate_end; + } + + /* + * processor_get_pstate gets the average frequency since the + * last get. So, do two PAL_get_freq()... + */ + ret = processor_get_pstate(&value); + ret = processor_get_pstate(&value); + + if (ret) { + set_cpus_allowed(current, saved_mask); + printk(KERN_WARNING "get performance failed with error %d\n", + ret); + ret = -EAGAIN; + goto migrate_end; + } + clock_freq = extract_clock(data, value, cpu); + ret = (clock_freq*1000); + +migrate_end: + set_cpus_allowed(current, saved_mask); + return ret; +} + + +static int +processor_set_freq ( + struct cpufreq_acpi_io *data, + unsigned int cpu, + int state) +{ + int ret = 0; + u32 value = 0; + struct cpufreq_freqs cpufreq_freqs; + cpumask_t saved_mask; + int retval; + + dprintk("processor_set_freq\n"); + + saved_mask = current->cpus_allowed; + set_cpus_allowed(current, cpumask_of_cpu(cpu)); + if (smp_processor_id() != cpu) { + retval = -EAGAIN; + goto migrate_end; + } + + if (state == data->acpi_data.state) { + if (unlikely(data->resume)) { + dprintk("Called after resume, resetting to P%d\n", state); + data->resume = 0; + } else { + dprintk("Already at target state (P%d)\n", state); + retval = 0; + goto migrate_end; + } + } + + dprintk("Transitioning from P%d to P%d\n", + data->acpi_data.state, state); + + /* cpufreq frequency struct */ + cpufreq_freqs.cpu = cpu; + cpufreq_freqs.old = data->freq_table[data->acpi_data.state].frequency; + cpufreq_freqs.new = data->freq_table[state].frequency; + + /* notify cpufreq */ + cpufreq_notify_transition(&cpufreq_freqs, CPUFREQ_PRECHANGE); + + /* + * First we write the target state's 'control' value to the + * control_register. + */ + + value = (u32) data->acpi_data.states[state].control; + + dprintk("Transitioning to state: 0x%08x\n", value); + + ret = processor_set_pstate(value); + if (ret) { + unsigned int tmp = cpufreq_freqs.new; + cpufreq_notify_transition(&cpufreq_freqs, CPUFREQ_POSTCHANGE); + cpufreq_freqs.new = cpufreq_freqs.old; + cpufreq_freqs.old = tmp; + cpufreq_notify_transition(&cpufreq_freqs, CPUFREQ_PRECHANGE); + cpufreq_notify_transition(&cpufreq_freqs, CPUFREQ_POSTCHANGE); + printk(KERN_WARNING "Transition failed with error %d\n", ret); + retval = -ENODEV; + goto migrate_end; + } + + cpufreq_notify_transition(&cpufreq_freqs, CPUFREQ_POSTCHANGE); + + data->acpi_data.state = state; + + retval = 0; + +migrate_end: + set_cpus_allowed(current, saved_mask); + return (retval); +} + + +static unsigned int +acpi_cpufreq_get ( + unsigned int cpu) +{ + struct cpufreq_acpi_io *data = acpi_io_data[cpu]; + + dprintk("acpi_cpufreq_get\n"); + + return processor_get_freq(data, cpu); +} + + +static int +acpi_cpufreq_target ( + struct cpufreq_policy *policy, + unsigned int target_freq, + unsigned int relation) +{ + struct cpufreq_acpi_io *data = acpi_io_data[policy->cpu]; + unsigned int next_state = 0; + unsigned int result = 0; + + dprintk("acpi_cpufreq_setpolicy\n"); + + result = cpufreq_frequency_table_target(policy, + data->freq_table, target_freq, relation, &next_state); + if (result) + return (result); + + result = processor_set_freq(data, policy->cpu, next_state); + + return (result); +} + + +static int +acpi_cpufreq_verify ( + struct cpufreq_policy *policy) +{ + unsigned int result = 0; + struct cpufreq_acpi_io *data = acpi_io_data[policy->cpu]; + + dprintk("acpi_cpufreq_verify\n"); + + result = cpufreq_frequency_table_verify(policy, + data->freq_table); + + return (result); +} + + +/* + * processor_init_pdc - let BIOS know about the SMP capabilities + * of this driver + * @perf: processor-specific acpi_io_data struct + * @cpu: CPU being initialized + * + * To avoid issues with legacy OSes, some BIOSes require to be informed of + * the SMP capabilities of OS P-state driver. Here we set the bits in _PDC + * accordingly. Actual call to _PDC is done in driver/acpi/processor.c + */ +static void +processor_init_pdc ( + struct acpi_processor_performance *perf, + unsigned int cpu, + struct acpi_object_list *obj_list + ) +{ + union acpi_object *obj; + u32 *buf; + + dprintk("processor_init_pdc\n"); + + perf->pdc = NULL; + /* Initialize pdc. It will be used later. */ + if (!obj_list) + return; + + if (!(obj_list->count && obj_list->pointer)) + return; + + obj = obj_list->pointer; + if ((obj->buffer.length == 12) && obj->buffer.pointer) { + buf = (u32 *)obj->buffer.pointer; + buf[0] = ACPI_PDC_REVISION_ID; + buf[1] = 1; + buf[2] = ACPI_PDC_EST_CAPABILITY_SMP; + perf->pdc = obj_list; + } + return; +} + + +static int +acpi_cpufreq_cpu_init ( + struct cpufreq_policy *policy) +{ + unsigned int i; + unsigned int cpu = policy->cpu; + struct cpufreq_acpi_io *data; + unsigned int result = 0; + + union acpi_object arg0 = {ACPI_TYPE_BUFFER}; + u32 arg0_buf[3]; + struct acpi_object_list arg_list = {1, &arg0}; + + dprintk("acpi_cpufreq_cpu_init\n"); + /* setup arg_list for _PDC settings */ + arg0.buffer.length = 12; + arg0.buffer.pointer = (u8 *) arg0_buf; + + data = kmalloc(sizeof(struct cpufreq_acpi_io), GFP_KERNEL); + if (!data) + return (-ENOMEM); + + memset(data, 0, sizeof(struct cpufreq_acpi_io)); + + acpi_io_data[cpu] = data; + + processor_init_pdc(&data->acpi_data, cpu, &arg_list); + result = acpi_processor_register_performance(&data->acpi_data, cpu); + data->acpi_data.pdc = NULL; + + if (result) + goto err_free; + + /* capability check */ + if (data->acpi_data.state_count <= 1) { + dprintk("No P-States\n"); + result = -ENODEV; + goto err_unreg; + } + + if ((data->acpi_data.control_register.space_id != + ACPI_ADR_SPACE_FIXED_HARDWARE) || + (data->acpi_data.status_register.space_id != + ACPI_ADR_SPACE_FIXED_HARDWARE)) { + dprintk("Unsupported address space [%d, %d]\n", + (u32) (data->acpi_data.control_register.space_id), + (u32) (data->acpi_data.status_register.space_id)); + result = -ENODEV; + goto err_unreg; + } + + /* alloc freq_table */ + data->freq_table = kmalloc(sizeof(struct cpufreq_frequency_table) * + (data->acpi_data.state_count + 1), + GFP_KERNEL); + if (!data->freq_table) { + result = -ENOMEM; + goto err_unreg; + } + + /* detect transition latency */ + policy->cpuinfo.transition_latency = 0; + for (i=0; iacpi_data.state_count; i++) { + if ((data->acpi_data.states[i].transition_latency * 1000) > + policy->cpuinfo.transition_latency) { + policy->cpuinfo.transition_latency = + data->acpi_data.states[i].transition_latency * 1000; + } + } + policy->governor = CPUFREQ_DEFAULT_GOVERNOR; + + policy->cur = processor_get_freq(data, policy->cpu); + + /* table init */ + for (i = 0; i <= data->acpi_data.state_count; i++) + { + data->freq_table[i].index = i; + if (i < data->acpi_data.state_count) { + data->freq_table[i].frequency = + data->acpi_data.states[i].core_frequency * 1000; + } else { + data->freq_table[i].frequency = CPUFREQ_TABLE_END; + } + } + + result = cpufreq_frequency_table_cpuinfo(policy, data->freq_table); + if (result) { + goto err_freqfree; + } + + /* notify BIOS that we exist */ + acpi_processor_notify_smm(THIS_MODULE); + + printk(KERN_INFO "acpi-cpufreq: CPU%u - ACPI performance management " + "activated.\n", cpu); + + for (i = 0; i < data->acpi_data.state_count; i++) + dprintk(" %cP%d: %d MHz, %d mW, %d uS, %d uS, 0x%x 0x%x\n", + (i == data->acpi_data.state?'*':' '), i, + (u32) data->acpi_data.states[i].core_frequency, + (u32) data->acpi_data.states[i].power, + (u32) data->acpi_data.states[i].transition_latency, + (u32) data->acpi_data.states[i].bus_master_latency, + (u32) data->acpi_data.states[i].status, + (u32) data->acpi_data.states[i].control); + + cpufreq_frequency_table_get_attr(data->freq_table, policy->cpu); + + /* the first call to ->target() should result in us actually + * writing something to the appropriate registers. */ + data->resume = 1; + + return (result); + + err_freqfree: + kfree(data->freq_table); + err_unreg: + acpi_processor_unregister_performance(&data->acpi_data, cpu); + err_free: + kfree(data); + acpi_io_data[cpu] = NULL; + + return (result); +} + + +static int +acpi_cpufreq_cpu_exit ( + struct cpufreq_policy *policy) +{ + struct cpufreq_acpi_io *data = acpi_io_data[policy->cpu]; + + dprintk("acpi_cpufreq_cpu_exit\n"); + + if (data) { + cpufreq_frequency_table_put_attr(policy->cpu); + acpi_io_data[policy->cpu] = NULL; + acpi_processor_unregister_performance(&data->acpi_data, + policy->cpu); + kfree(data); + } + + return (0); +} + + +static struct freq_attr* acpi_cpufreq_attr[] = { + &cpufreq_freq_attr_scaling_available_freqs, + NULL, +}; + + +static struct cpufreq_driver acpi_cpufreq_driver = { + .verify = acpi_cpufreq_verify, + .target = acpi_cpufreq_target, + .get = acpi_cpufreq_get, + .init = acpi_cpufreq_cpu_init, + .exit = acpi_cpufreq_cpu_exit, + .name = "acpi-cpufreq", + .owner = THIS_MODULE, + .attr = acpi_cpufreq_attr, +}; + + +static int __init +acpi_cpufreq_init (void) +{ + dprintk("acpi_cpufreq_init\n"); + + return cpufreq_register_driver(&acpi_cpufreq_driver); +} + + +static void __exit +acpi_cpufreq_exit (void) +{ + dprintk("acpi_cpufreq_exit\n"); + + cpufreq_unregister_driver(&acpi_cpufreq_driver); + return; +} + + +late_initcall(acpi_cpufreq_init); +module_exit(acpi_cpufreq_exit); + diff --git a/include/asm-ia64/acpi.h b/include/asm-ia64/acpi.h index 4c06d45..3a544ff 100644 --- a/include/asm-ia64/acpi.h +++ b/include/asm-ia64/acpi.h @@ -116,6 +116,11 @@ extern int __initdata nid_to_pxm_map[MAX_NUMNODES]; extern u16 ia64_acpiid_to_sapicid[]; +/* + * Refer Intel ACPI _PDC support document for bit definitions + */ +#define ACPI_PDC_EST_CAPABILITY_SMP 0x8 + #endif /*__KERNEL__*/ #endif /*_ASM_ACPI_H*/ diff --git a/include/asm-ia64/pal.h b/include/asm-ia64/pal.h index 2303a10..e828377 100644 --- a/include/asm-ia64/pal.h +++ b/include/asm-ia64/pal.h @@ -75,6 +75,8 @@ #define PAL_CACHE_READ 259 /* read tag & data of cacheline for diagnostic testing */ #define PAL_CACHE_WRITE 260 /* write tag & data of cacheline for diagnostic testing */ #define PAL_VM_TR_READ 261 /* read contents of translation register */ +#define PAL_GET_PSTATE 262 /* get the current P-state */ +#define PAL_SET_PSTATE 263 /* set the P-state */ #ifndef __ASSEMBLY__ @@ -1111,6 +1113,25 @@ ia64_pal_halt_info (pal_power_mgmt_info_u_t *power_buf) return iprv.status; } +/* Get the current P-state information */ +static inline s64 +ia64_pal_get_pstate (u64 *pstate_index) +{ + struct ia64_pal_retval iprv; + PAL_CALL_STK(iprv, PAL_GET_PSTATE, 0, 0, 0); + *pstate_index = iprv.v0; + return iprv.status; +} + +/* Set the P-state */ +static inline s64 +ia64_pal_set_pstate (u64 pstate_index) +{ + struct ia64_pal_retval iprv; + PAL_CALL_STK(iprv, PAL_SET_PSTATE, pstate_index, 0, 0); + return iprv.status; +} + /* Cause the processor to enter LIGHT HALT state, where prefetching and execution are * suspended, but cache and TLB coherency is maintained. */ -- cgit v0.10.2 From f8220c7f15c54aefb4976ad7c6e2aa98b27a8ce8 Mon Sep 17 00:00:00 2001 From: Kenneth Chen Date: Fri, 26 Aug 2005 14:57:00 -0700 Subject: [IA64] Delete erroneous copy_page.o in global lib-y list copy_page.o appeared twice in arch/ia64/lib/Makefile. The one in global lib-y is wrong where it should be just in lib-$(CONFIG_ITANIUM). Both copy_page.o and copy_page_mck.o are build for Itanium2 processor and the link order will pick up the low performing copy_page function (originally written for itanium processor). In this case, we really want the copy_page_mck.o for optimized version. Signed-off-by: Kenneth Chen Signed-off-by: Tony Luck diff --git a/arch/ia64/lib/Makefile b/arch/ia64/lib/Makefile index 1902c3c..799407e 100644 --- a/arch/ia64/lib/Makefile +++ b/arch/ia64/lib/Makefile @@ -6,7 +6,7 @@ obj-y := io.o lib-y := __divsi3.o __udivsi3.o __modsi3.o __umodsi3.o \ __divdi3.o __udivdi3.o __moddi3.o __umoddi3.o \ - bitop.o checksum.o clear_page.o csum_partial_copy.o copy_page.o \ + bitop.o checksum.o clear_page.o csum_partial_copy.o \ clear_user.o strncpy_from_user.o strlen_user.o strnlen_user.o \ flush.o ip_fast_csum.o do_csum.o \ memset.o strlen.o swiotlb.o -- cgit v0.10.2 From e438befd76a5a743725a89365140a8a7bf276096 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Mon, 29 Aug 2005 16:13:36 -0700 Subject: [IA64-SGI] One new use of "UNCACHED" needed fixing for sn2 region cleanup Some shub2 changes were not in the tree when Greg cleaned up the sn2 region definitions in 1b66776da71e33dff5edcc0b096ec3b7c40c75ad, so this one didn't get fixed. Signed-off-by: Tony Luck diff --git a/include/asm-ia64/sn/addrs.h b/include/asm-ia64/sn/addrs.h index 5b7823d..2c32e4b 100644 --- a/include/asm-ia64/sn/addrs.h +++ b/include/asm-ia64/sn/addrs.h @@ -258,7 +258,7 @@ /* Shub1 TIO & MMR addressing macros */ #define SH2_TIO_IOSPACE_ADDR(n,x) \ - ((UNCACHED | REMOTE_ADDR(n,x) | 1UL << (NASID_SHIFT - 2))) + ((__IA64_UNCACHED_OFFSET | REMOTE_ADDR(n,x) | 1UL << (NASID_SHIFT - 2))) #define SH2_REMOTE_MMR(n,x) \ GLOBAL_MMR_ADDR(n,x) -- cgit v0.10.2 From ff67b59726a8cd3549b069dfa78de2f538d3b8e3 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Tue, 30 Aug 2005 14:59:24 -0700 Subject: [IA64] Low byte of current->personality is not a bitmask. Peter Staubach pointed out that it is not correct to check current->personality & PER_LINUX32 (this will have false hits on several other personality values). Signed-off-by: Tony Luck diff --git a/include/asm-ia64/fcntl.h b/include/asm-ia64/fcntl.h index c9f8d83..cee16ea 100644 --- a/include/asm-ia64/fcntl.h +++ b/include/asm-ia64/fcntl.h @@ -81,6 +81,7 @@ struct flock { #define F_LINUX_SPECIFIC_BASE 1024 -#define force_o_largefile() ( ! (current->personality & PER_LINUX32) ) +#define force_o_largefile() \ + (personality(current->personality) != PER_LINUX32) #endif /* _ASM_IA64_FCNTL_H */ -- cgit v0.10.2 From a994018a5fc987702dfb4f5d31172842ea6186dc Mon Sep 17 00:00:00 2001 From: Martin Hicks Date: Wed, 31 Aug 2005 11:21:00 -0700 Subject: [IA64] uncached allocator: use generic (not sn2 specific) functions Change sn2-specific calls into generic functions. Without this change the uncached allocator will not work on non-sn2 platforms. Signed-off-by: Greg Edwards Signed-off-by: Martin Hicks Signed-off-by: Tony Luck diff --git a/arch/ia64/kernel/uncached.c b/arch/ia64/kernel/uncached.c index 490dfc9..4e9d06c 100644 --- a/arch/ia64/kernel/uncached.c +++ b/arch/ia64/kernel/uncached.c @@ -184,7 +184,7 @@ uncached_free_page(unsigned long maddr) { int node; - node = nasid_to_cnodeid(NASID_GET(maddr)); + node = paddr_to_nid(maddr - __IA64_UNCACHED_OFFSET); dprintk(KERN_DEBUG "uncached_free_page(%lx) on node %i\n", maddr, node); @@ -217,7 +217,7 @@ uncached_build_memmap(unsigned long start, unsigned long end, void *arg) memset((char *)vstart, 0, length); - node = nasid_to_cnodeid(NASID_GET(start)); + node = paddr_to_nid(start); for (; vstart < vend ; vstart += PAGE_SIZE) { dprintk(KERN_INFO "sticking %lx into the pool!\n", vstart); -- cgit v0.10.2