diff options
200 files changed, 17001 insertions, 840 deletions
diff --git a/Documentation/ABI/testing/sysfs-class-usb_host b/Documentation/ABI/testing/sysfs-class-uwb_rc-wusbhc index 46b66ad1..4e8106f 100644 --- a/Documentation/ABI/testing/sysfs-class-usb_host +++ b/Documentation/ABI/testing/sysfs-class-uwb_rc-wusbhc @@ -1,4 +1,4 @@ -What: /sys/class/usb_host/usb_hostN/wusb_chid +What: /sys/class/uwb_rc/uwbN/wusbhc/wusb_chid Date: July 2008 KernelVersion: 2.6.27 Contact: David Vrabel <david.vrabel@csr.com> @@ -9,7 +9,7 @@ Description: Set an all zero CHID to stop the host controller. -What: /sys/class/usb_host/usb_hostN/wusb_trust_timeout +What: /sys/class/uwb_rc/uwbN/wusbhc/wusb_trust_timeout Date: July 2008 KernelVersion: 2.6.27 Contact: David Vrabel <david.vrabel@csr.com> diff --git a/Documentation/flexible-arrays.txt b/Documentation/flexible-arrays.txt index 84eb268..cb8a3a0 100644 --- a/Documentation/flexible-arrays.txt +++ b/Documentation/flexible-arrays.txt @@ -1,5 +1,5 @@ Using flexible arrays in the kernel -Last updated for 2.6.31 +Last updated for 2.6.32 Jonathan Corbet <corbet@lwn.net> Large contiguous memory allocations can be unreliable in the Linux kernel. @@ -40,6 +40,13 @@ argument is passed directly to the internal memory allocation calls. With the current code, using flags to ask for high memory is likely to lead to notably unpleasant side effects. +It is also possible to define flexible arrays at compile time with: + + DEFINE_FLEX_ARRAY(name, element_size, total); + +This macro will result in a definition of an array with the given name; the +element size and total will be checked for validity at compile time. + Storing data into a flexible array is accomplished with a call to: int flex_array_put(struct flex_array *array, unsigned int element_nr, @@ -76,16 +83,30 @@ particular element has never been allocated. Note that it is possible to get back a valid pointer for an element which has never been stored in the array. Memory for array elements is allocated one page at a time; a single allocation could provide memory for several -adjacent elements. The flexible array code does not know if a specific -element has been written; it only knows if the associated memory is -present. So a flex_array_get() call on an element which was never stored -in the array has the potential to return a pointer to random data. If the -caller does not have a separate way to know which elements were actually -stored, it might be wise, at least, to add GFP_ZERO to the flags argument -to ensure that all elements are zeroed. - -There is no way to remove a single element from the array. It is possible, -though, to remove all elements with a call to: +adjacent elements. Flexible array elements are normally initialized to the +value FLEX_ARRAY_FREE (defined as 0x6c in <linux/poison.h>), so errors +involving that number probably result from use of unstored array entries. +Note that, if array elements are allocated with __GFP_ZERO, they will be +initialized to zero and this poisoning will not happen. + +Individual elements in the array can be cleared with: + + int flex_array_clear(struct flex_array *array, unsigned int element_nr); + +This function will set the given element to FLEX_ARRAY_FREE and return +zero. If storage for the indicated element is not allocated for the array, +flex_array_clear() will return -EINVAL instead. Note that clearing an +element does not release the storage associated with it; to reduce the +allocated size of an array, call: + + int flex_array_shrink(struct flex_array *array); + +The return value will be the number of pages of memory actually freed. +This function works by scanning the array for pages containing nothing but +FLEX_ARRAY_FREE bytes, so (1) it can be expensive, and (2) it will not work +if the array's pages are allocated with __GFP_ZERO. + +It is possible to remove all elements of an array with a call to: void flex_array_free_parts(struct flex_array *array); diff --git a/Documentation/trace/kprobetrace.txt b/Documentation/trace/kprobetrace.txt new file mode 100644 index 0000000..1541524 --- /dev/null +++ b/Documentation/trace/kprobetrace.txt @@ -0,0 +1,151 @@ + Kprobe-based Event Tracer + ========================= + + Documentation is written by Masami Hiramatsu + + +Overview +-------- +This tracer is similar to the events tracer which is based on Tracepoint +infrastructure. Instead of Tracepoint, this tracer is based on kprobes(kprobe +and kretprobe). It probes anywhere where kprobes can probe(this means, all +functions body except for __kprobes functions). + +Unlike the function tracer, this tracer can probe instructions inside of +kernel functions. It allows you to check which instruction has been executed. + +Unlike the Tracepoint based events tracer, this tracer can add and remove +probe points on the fly. + +Similar to the events tracer, this tracer doesn't need to be activated via +current_tracer, instead of that, just set probe points via +/sys/kernel/debug/tracing/kprobe_events. And you can set filters on each +probe events via /sys/kernel/debug/tracing/events/kprobes/<EVENT>/filter. + + +Synopsis of kprobe_events +------------------------- + p[:[GRP/]EVENT] SYMBOL[+offs]|MEMADDR [FETCHARGS] : Set a probe + r[:[GRP/]EVENT] SYMBOL[+0] [FETCHARGS] : Set a return probe + + GRP : Group name. If omitted, use "kprobes" for it. + EVENT : Event name. If omitted, the event name is generated + based on SYMBOL+offs or MEMADDR. + SYMBOL[+offs] : Symbol+offset where the probe is inserted. + MEMADDR : Address where the probe is inserted. + + FETCHARGS : Arguments. Each probe can have up to 128 args. + %REG : Fetch register REG + @ADDR : Fetch memory at ADDR (ADDR should be in kernel) + @SYM[+|-offs] : Fetch memory at SYM +|- offs (SYM should be a data symbol) + $stackN : Fetch Nth entry of stack (N >= 0) + $stack : Fetch stack address. + $argN : Fetch function argument. (N >= 0)(*) + $retval : Fetch return value.(**) + +|-offs(FETCHARG) : Fetch memory at FETCHARG +|- offs address.(***) + NAME=FETCHARG: Set NAME as the argument name of FETCHARG. + + (*) aN may not correct on asmlinkaged functions and at the middle of + function body. + (**) only for return probe. + (***) this is useful for fetching a field of data structures. + + +Per-Probe Event Filtering +------------------------- + Per-probe event filtering feature allows you to set different filter on each +probe and gives you what arguments will be shown in trace buffer. If an event +name is specified right after 'p:' or 'r:' in kprobe_events, the tracer adds +an event under tracing/events/kprobes/<EVENT>, at the directory you can see +'id', 'enabled', 'format' and 'filter'. + +enabled: + You can enable/disable the probe by writing 1 or 0 on it. + +format: + This shows the format of this probe event. + +filter: + You can write filtering rules of this event. + +id: + This shows the id of this probe event. + +Event Profiling +--------------- + You can check the total number of probe hits and probe miss-hits via +/sys/kernel/debug/tracing/kprobe_profile. + The first column is event name, the second is the number of probe hits, +the third is the number of probe miss-hits. + + +Usage examples +-------------- +To add a probe as a new event, write a new definition to kprobe_events +as below. + + echo p:myprobe do_sys_open dfd=$arg0 filename=$arg1 flags=$arg2 mode=$arg3 > /sys/kernel/debug/tracing/kprobe_events + + This sets a kprobe on the top of do_sys_open() function with recording +1st to 4th arguments as "myprobe" event. As this example shows, users can +choose more familiar names for each arguments. + + echo r:myretprobe do_sys_open $retval >> /sys/kernel/debug/tracing/kprobe_events + + This sets a kretprobe on the return point of do_sys_open() function with +recording return value as "myretprobe" event. + You can see the format of these events via +/sys/kernel/debug/tracing/events/kprobes/<EVENT>/format. + + cat /sys/kernel/debug/tracing/events/kprobes/myprobe/format +name: myprobe +ID: 75 +format: + field:unsigned short common_type; offset:0; size:2; + field:unsigned char common_flags; offset:2; size:1; + field:unsigned char common_preempt_count; offset:3; size:1; + field:int common_pid; offset:4; size:4; + field:int common_tgid; offset:8; size:4; + + field: unsigned long ip; offset:16;tsize:8; + field: int nargs; offset:24;tsize:4; + field: unsigned long dfd; offset:32;tsize:8; + field: unsigned long filename; offset:40;tsize:8; + field: unsigned long flags; offset:48;tsize:8; + field: unsigned long mode; offset:56;tsize:8; + +print fmt: "(%lx) dfd=%lx filename=%lx flags=%lx mode=%lx", REC->ip, REC->dfd, REC->filename, REC->flags, REC->mode + + + You can see that the event has 4 arguments as in the expressions you specified. + + echo > /sys/kernel/debug/tracing/kprobe_events + + This clears all probe points. + + Right after definition, each event is disabled by default. For tracing these +events, you need to enable it. + + echo 1 > /sys/kernel/debug/tracing/events/kprobes/myprobe/enable + echo 1 > /sys/kernel/debug/tracing/events/kprobes/myretprobe/enable + + And you can see the traced information via /sys/kernel/debug/tracing/trace. + + cat /sys/kernel/debug/tracing/trace +# tracer: nop +# +# TASK-PID CPU# TIMESTAMP FUNCTION +# | | | | | + <...>-1447 [001] 1038282.286875: myprobe: (do_sys_open+0x0/0xd6) dfd=3 filename=7fffd1ec4440 flags=8000 mode=0 + <...>-1447 [001] 1038282.286878: myretprobe: (sys_openat+0xc/0xe <- do_sys_open) $retval=fffffffffffffffe + <...>-1447 [001] 1038282.286885: myprobe: (do_sys_open+0x0/0xd6) dfd=ffffff9c filename=40413c flags=8000 mode=1b6 + <...>-1447 [001] 1038282.286915: myretprobe: (sys_open+0x1b/0x1d <- do_sys_open) $retval=3 + <...>-1447 [001] 1038282.286969: myprobe: (do_sys_open+0x0/0xd6) dfd=ffffff9c filename=4041c6 flags=98800 mode=10 + <...>-1447 [001] 1038282.286976: myretprobe: (sys_open+0x1b/0x1d <- do_sys_open) $retval=3 + + + Each line shows when the kernel hits an event, and <- SYMBOL means kernel +returns from SYMBOL(e.g. "sys_open+0x1b/0x1d <- do_sys_open" means kernel +returns from do_sys_open to sys_open+0x1b). + + diff --git a/MAINTAINERS b/MAINTAINERS index d5eb8c1..8824115 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3667,6 +3667,7 @@ NETWORKING [GENERAL] M: "David S. Miller" <davem@davemloft.net> L: netdev@vger.kernel.org W: http://www.linuxfoundation.org/en/Net +W: http://patchwork.ozlabs.org/project/netdev/list/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-2.6.git S: Maintained F: net/ @@ -5664,6 +5665,13 @@ S: Maintained F: drivers/vlynq/vlynq.c F: include/linux/vlynq.h +VMWARE VMXNET3 ETHERNET DRIVER +M: Shreyas Bhatewara <sbhatewara@vmware.com> +M: VMware, Inc. <pv-drivers@vmware.com> +L: netdev@vger.kernel.org +S: Maintained +F: drivers/net/vmxnet3/ + VOLTAGE AND CURRENT REGULATOR FRAMEWORK M: Liam Girdwood <lrg@slimlogic.co.uk> M: Mark Brown <broonie@opensource.wolfsonmicro.com> @@ -1,7 +1,7 @@ VERSION = 2 PATCHLEVEL = 6 SUBLEVEL = 32 -EXTRAVERSION = -rc4 +EXTRAVERSION = -rc5 NAME = Man-Eating Seals of Antiquity # *DOCUMENTATION* diff --git a/arch/arm/configs/omap3_beagle_defconfig b/arch/arm/configs/omap3_beagle_defconfig index 357d402..b3c8cce 100644 --- a/arch/arm/configs/omap3_beagle_defconfig +++ b/arch/arm/configs/omap3_beagle_defconfig @@ -969,7 +969,6 @@ CONFIG_USB_ETH_RNDIS=y # CONFIG_USB_OTG_UTILS=y # CONFIG_USB_GPIO_VBUS is not set -# CONFIG_ISP1301_OMAP is not set CONFIG_TWL4030_USB=y # CONFIG_NOP_USB_XCEIV is not set CONFIG_MMC=y diff --git a/arch/arm/mach-omap2/board-rx51-peripherals.c b/arch/arm/mach-omap2/board-rx51-peripherals.c index c1af532..2b0eb1b 100644 --- a/arch/arm/mach-omap2/board-rx51-peripherals.c +++ b/arch/arm/mach-omap2/board-rx51-peripherals.c @@ -444,7 +444,7 @@ static int __init rx51_i2c_init(void) rx51_twldata.vaux3 = &rx51_vaux3_cam; rx51_twldata.vmmc2 = &rx51_vmmc2; } - omap_register_i2c_bus(1, 2600, rx51_peripherals_i2c_board_info_1, + omap_register_i2c_bus(1, 2200, rx51_peripherals_i2c_board_info_1, ARRAY_SIZE(rx51_peripherals_i2c_board_info_1)); omap_register_i2c_bus(2, 100, NULL, 0); omap_register_i2c_bus(3, 400, NULL, 0); diff --git a/arch/arm/mach-omap2/board-zoom2.c b/arch/arm/mach-omap2/board-zoom2.c index b7b3220..fd3369d 100644 --- a/arch/arm/mach-omap2/board-zoom2.c +++ b/arch/arm/mach-omap2/board-zoom2.c @@ -25,6 +25,7 @@ #include <mach/keypad.h> #include "mmc-twl4030.h" +#include "sdram-micron-mt46h32m32lf-6.h" /* Zoom2 has Qwerty keyboard*/ static int board_keymap[] = { @@ -213,7 +214,8 @@ static void __init omap_zoom2_init_irq(void) { omap_board_config = zoom2_config; omap_board_config_size = ARRAY_SIZE(zoom2_config); - omap2_init_common_hw(NULL, NULL); + omap2_init_common_hw(mt46h32m32lf6_sdrc_params, + mt46h32m32lf6_sdrc_params); omap_init_irq(); omap_gpio_init(); } diff --git a/arch/arm/mach-omap2/clock24xx.c b/arch/arm/mach-omap2/clock24xx.c index bc5d3ac..e2dbedd 100644 --- a/arch/arm/mach-omap2/clock24xx.c +++ b/arch/arm/mach-omap2/clock24xx.c @@ -769,6 +769,7 @@ int __init omap2_clk_init(void) if (c->cpu & cpu_mask) { clkdev_add(&c->lk); clk_register(c->lk.clk); + omap2_init_clk_clkdm(c->lk.clk); } /* Check the MPU rate set by bootloader */ diff --git a/arch/arm/mach-omap2/clockdomain.c b/arch/arm/mach-omap2/clockdomain.c index 4ef7b4f..58aff84 100644 --- a/arch/arm/mach-omap2/clockdomain.c +++ b/arch/arm/mach-omap2/clockdomain.c @@ -137,6 +137,36 @@ static void _clkdm_del_autodeps(struct clockdomain *clkdm) } } +/* + * _omap2_clkdm_set_hwsup - set the hwsup idle transition bit + * @clkdm: struct clockdomain * + * @enable: int 0 to disable, 1 to enable + * + * Internal helper for actually switching the bit that controls hwsup + * idle transitions for clkdm. + */ +static void _omap2_clkdm_set_hwsup(struct clockdomain *clkdm, int enable) +{ + u32 v; + + if (cpu_is_omap24xx()) { + if (enable) + v = OMAP24XX_CLKSTCTRL_ENABLE_AUTO; + else + v = OMAP24XX_CLKSTCTRL_DISABLE_AUTO; + } else if (cpu_is_omap34xx()) { + if (enable) + v = OMAP34XX_CLKSTCTRL_ENABLE_AUTO; + else + v = OMAP34XX_CLKSTCTRL_DISABLE_AUTO; + } else { + BUG(); + } + + cm_rmw_mod_reg_bits(clkdm->clktrctrl_mask, + v << __ffs(clkdm->clktrctrl_mask), + clkdm->pwrdm.ptr->prcm_offs, CM_CLKSTCTRL); +} static struct clockdomain *_clkdm_lookup(const char *name) { @@ -456,8 +486,6 @@ int omap2_clkdm_wakeup(struct clockdomain *clkdm) */ void omap2_clkdm_allow_idle(struct clockdomain *clkdm) { - u32 v; - if (!clkdm) return; @@ -473,18 +501,7 @@ void omap2_clkdm_allow_idle(struct clockdomain *clkdm) if (atomic_read(&clkdm->usecount) > 0) _clkdm_add_autodeps(clkdm); - if (cpu_is_omap24xx()) - v = OMAP24XX_CLKSTCTRL_ENABLE_AUTO; - else if (cpu_is_omap34xx()) - v = OMAP34XX_CLKSTCTRL_ENABLE_AUTO; - else - BUG(); - - - cm_rmw_mod_reg_bits(clkdm->clktrctrl_mask, - v << __ffs(clkdm->clktrctrl_mask), - clkdm->pwrdm.ptr->prcm_offs, - CM_CLKSTCTRL); + _omap2_clkdm_set_hwsup(clkdm, 1); pwrdm_clkdm_state_switch(clkdm); } @@ -500,8 +517,6 @@ void omap2_clkdm_allow_idle(struct clockdomain *clkdm) */ void omap2_clkdm_deny_idle(struct clockdomain *clkdm) { - u32 v; - if (!clkdm) return; @@ -514,16 +529,7 @@ void omap2_clkdm_deny_idle(struct clockdomain *clkdm) pr_debug("clockdomain: disabling automatic idle transitions for %s\n", clkdm->name); - if (cpu_is_omap24xx()) - v = OMAP24XX_CLKSTCTRL_DISABLE_AUTO; - else if (cpu_is_omap34xx()) - v = OMAP34XX_CLKSTCTRL_DISABLE_AUTO; - else - BUG(); - - cm_rmw_mod_reg_bits(clkdm->clktrctrl_mask, - v << __ffs(clkdm->clktrctrl_mask), - clkdm->pwrdm.ptr->prcm_offs, CM_CLKSTCTRL); + _omap2_clkdm_set_hwsup(clkdm, 0); if (atomic_read(&clkdm->usecount) > 0) _clkdm_del_autodeps(clkdm); @@ -569,10 +575,14 @@ int omap2_clkdm_clk_enable(struct clockdomain *clkdm, struct clk *clk) v = omap2_clkdm_clktrctrl_read(clkdm); if ((cpu_is_omap34xx() && v == OMAP34XX_CLKSTCTRL_ENABLE_AUTO) || - (cpu_is_omap24xx() && v == OMAP24XX_CLKSTCTRL_ENABLE_AUTO)) + (cpu_is_omap24xx() && v == OMAP24XX_CLKSTCTRL_ENABLE_AUTO)) { + /* Disable HW transitions when we are changing deps */ + _omap2_clkdm_set_hwsup(clkdm, 0); _clkdm_add_autodeps(clkdm); - else + _omap2_clkdm_set_hwsup(clkdm, 1); + } else { omap2_clkdm_wakeup(clkdm); + } pwrdm_wait_transition(clkdm->pwrdm.ptr); pwrdm_clkdm_state_switch(clkdm); @@ -623,10 +633,14 @@ int omap2_clkdm_clk_disable(struct clockdomain *clkdm, struct clk *clk) v = omap2_clkdm_clktrctrl_read(clkdm); if ((cpu_is_omap34xx() && v == OMAP34XX_CLKSTCTRL_ENABLE_AUTO) || - (cpu_is_omap24xx() && v == OMAP24XX_CLKSTCTRL_ENABLE_AUTO)) + (cpu_is_omap24xx() && v == OMAP24XX_CLKSTCTRL_ENABLE_AUTO)) { + /* Disable HW transitions when we are changing deps */ + _omap2_clkdm_set_hwsup(clkdm, 0); _clkdm_del_autodeps(clkdm); - else + _omap2_clkdm_set_hwsup(clkdm, 1); + } else { omap2_clkdm_sleep(clkdm); + } pwrdm_clkdm_state_switch(clkdm); diff --git a/arch/arm/plat-omap/dma.c b/arch/arm/plat-omap/dma.c index fd3154a..0eb676d 100644 --- a/arch/arm/plat-omap/dma.c +++ b/arch/arm/plat-omap/dma.c @@ -829,10 +829,10 @@ EXPORT_SYMBOL(omap_free_dma); * * @param arb_rate * @param max_fifo_depth - * @param tparams - Number of thereads to reserve : DMA_THREAD_RESERVE_NORM - * DMA_THREAD_RESERVE_ONET - * DMA_THREAD_RESERVE_TWOT - * DMA_THREAD_RESERVE_THREET + * @param tparams - Number of threads to reserve : DMA_THREAD_RESERVE_NORM + * DMA_THREAD_RESERVE_ONET + * DMA_THREAD_RESERVE_TWOT + * DMA_THREAD_RESERVE_THREET */ void omap_dma_set_global_params(int arb_rate, int max_fifo_depth, int tparams) @@ -844,11 +844,14 @@ omap_dma_set_global_params(int arb_rate, int max_fifo_depth, int tparams) return; } + if (max_fifo_depth == 0) + max_fifo_depth = 1; if (arb_rate == 0) arb_rate = 1; - reg = (arb_rate & 0xff) << 16; - reg |= (0xff & max_fifo_depth); + reg = 0xff & max_fifo_depth; + reg |= (0x3 & tparams) << 12; + reg |= (arb_rate & 0xff) << 16; dma_write(reg, GCR); } diff --git a/arch/arm/plat-omap/mcbsp.c b/arch/arm/plat-omap/mcbsp.c index 88ac976..e664b91 100644 --- a/arch/arm/plat-omap/mcbsp.c +++ b/arch/arm/plat-omap/mcbsp.c @@ -595,7 +595,7 @@ void omap_mcbsp_stop(unsigned int id, int tx, int rx) rx &= 1; if (cpu_is_omap2430() || cpu_is_omap34xx()) { w = OMAP_MCBSP_READ(io_base, RCCR); - w |= (tx ? RDISABLE : 0); + w |= (rx ? RDISABLE : 0); OMAP_MCBSP_WRITE(io_base, RCCR, w); } w = OMAP_MCBSP_READ(io_base, SPCR1); diff --git a/arch/powerpc/include/asm/firmware.h b/arch/powerpc/include/asm/firmware.h index 3a17982..20778a4 100644 --- a/arch/powerpc/include/asm/firmware.h +++ b/arch/powerpc/include/asm/firmware.h @@ -37,7 +37,7 @@ #define FW_FEATURE_VIO ASM_CONST(0x0000000000004000) #define FW_FEATURE_RDMA ASM_CONST(0x0000000000008000) #define FW_FEATURE_LLAN ASM_CONST(0x0000000000010000) -#define FW_FEATURE_BULK ASM_CONST(0x0000000000020000) +#define FW_FEATURE_BULK_REMOVE ASM_CONST(0x0000000000020000) #define FW_FEATURE_XDABR ASM_CONST(0x0000000000040000) #define FW_FEATURE_MULTITCE ASM_CONST(0x0000000000080000) #define FW_FEATURE_SPLPAR ASM_CONST(0x0000000000100000) @@ -45,8 +45,7 @@ #define FW_FEATURE_LPAR ASM_CONST(0x0000000000400000) #define FW_FEATURE_PS3_LV1 ASM_CONST(0x0000000000800000) #define FW_FEATURE_BEAT ASM_CONST(0x0000000001000000) -#define FW_FEATURE_BULK_REMOVE ASM_CONST(0x0000000002000000) -#define FW_FEATURE_CMO ASM_CONST(0x0000000004000000) +#define FW_FEATURE_CMO ASM_CONST(0x0000000002000000) #ifndef __ASSEMBLY__ @@ -58,8 +57,9 @@ enum { FW_FEATURE_PERF | FW_FEATURE_DUMP | FW_FEATURE_INTERRUPT | FW_FEATURE_MIGRATE | FW_FEATURE_PERFMON | FW_FEATURE_CRQ | FW_FEATURE_VIO | FW_FEATURE_RDMA | FW_FEATURE_LLAN | - FW_FEATURE_BULK | FW_FEATURE_XDABR | FW_FEATURE_MULTITCE | - FW_FEATURE_SPLPAR | FW_FEATURE_LPAR | FW_FEATURE_CMO, + FW_FEATURE_BULK_REMOVE | FW_FEATURE_XDABR | + FW_FEATURE_MULTITCE | FW_FEATURE_SPLPAR | FW_FEATURE_LPAR | + FW_FEATURE_CMO, FW_FEATURE_PSERIES_ALWAYS = 0, FW_FEATURE_ISERIES_POSSIBLE = FW_FEATURE_ISERIES | FW_FEATURE_LPAR, FW_FEATURE_ISERIES_ALWAYS = FW_FEATURE_ISERIES | FW_FEATURE_LPAR, diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index 0b9c913..03c862b 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -711,6 +711,8 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_setup = __setup_cpu_750, .machine_check = machine_check_generic, .platform = "ppc750", + .oprofile_cpu_type = "ppc/750", + .oprofile_type = PPC_OPROFILE_G4, }, { /* 745/755 */ .pvr_mask = 0xfffff000, diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 900e0ee..f9fd54b 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -1038,8 +1038,7 @@ _GLOBAL(mod_return_to_handler) * We are in a module using the module's TOC. * Switch to our TOC to run inside the core kernel. */ - LOAD_REG_IMMEDIATE(r4,ftrace_return_to_handler) - ld r2, 8(r4) + ld r2, PACATOC(r13) bl .ftrace_return_to_handler nop diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c index fe8f71d..641c74b 100644 --- a/arch/powerpc/kernel/kgdb.c +++ b/arch/powerpc/kernel/kgdb.c @@ -282,12 +282,6 @@ void gdb_regs_to_pt_regs(unsigned long *gdb_regs, struct pt_regs *regs) { unsigned long *ptr = gdb_regs; int reg; -#ifdef CONFIG_SPE - union { - u32 v32[2]; - u64 v64; - } acc; -#endif for (reg = 0; reg < 32; reg++) UNPACK64(regs->gpr[reg], ptr); diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index bb8209e..e8dfdbd 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -1190,7 +1190,7 @@ EXPORT_SYMBOL(pcibios_align_resource); * Reparent resource children of pr that conflict with res * under res, and make res replace those children. */ -static int __init reparent_resources(struct resource *parent, +static int reparent_resources(struct resource *parent, struct resource *res) { struct resource *p, **pp; diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 1168c5f..2ec1eae 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1016,9 +1016,13 @@ void show_stack(struct task_struct *tsk, unsigned long *stack) #ifdef CONFIG_FUNCTION_GRAPH_TRACER int curr_frame = current->curr_ret_stack; extern void return_to_handler(void); - unsigned long addr = (unsigned long)return_to_handler; + unsigned long rth = (unsigned long)return_to_handler; + unsigned long mrth = -1; #ifdef CONFIG_PPC64 - addr = *(unsigned long*)addr; + extern void mod_return_to_handler(void); + rth = *(unsigned long *)rth; + mrth = (unsigned long)mod_return_to_handler; + mrth = *(unsigned long *)mrth; #endif #endif @@ -1044,7 +1048,7 @@ void show_stack(struct task_struct *tsk, unsigned long *stack) if (!firstframe || ip != lr) { printk("["REG"] ["REG"] %pS", sp, ip, (void *)ip); #ifdef CONFIG_FUNCTION_GRAPH_TRACER - if (ip == addr && curr_frame >= 0) { + if ((ip == rth || ip == mrth) && curr_frame >= 0) { printk(" (%pS)", (void *)current->ret_stack[curr_frame].ret); curr_frame--; diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index f564293..27735a7 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -236,6 +236,7 @@ SECTIONS READ_MOSTLY_DATA(L1_CACHE_BYTES) } + . = ALIGN(PAGE_SIZE); .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { NOSAVE_DATA } diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S index bc44dc4..95ce355 100644 --- a/arch/powerpc/mm/slb_low.S +++ b/arch/powerpc/mm/slb_low.S @@ -72,19 +72,17 @@ _GLOBAL(slb_miss_kernel_load_vmemmap) 1: #endif /* CONFIG_SPARSEMEM_VMEMMAP */ - /* vmalloc/ioremap mapping encoding bits, the "li" instructions below - * will be patched by the kernel at boot + /* vmalloc mapping gets the encoding from the PACA as the mapping + * can be demoted from 64K -> 4K dynamically on some machines */ -BEGIN_FTR_SECTION - /* check whether this is in vmalloc or ioremap space */ clrldi r11,r10,48 cmpldi r11,(VMALLOC_SIZE >> 28) - 1 bgt 5f lhz r11,PACAVMALLOCSLLP(r13) b 6f 5: -END_FTR_SECTION_IFCLR(CPU_FTR_CI_LARGE_PAGE) -_GLOBAL(slb_miss_kernel_load_io) + /* IO mapping */ + _GLOBAL(slb_miss_kernel_load_io) li r11,0 6: BEGIN_FTR_SECTION diff --git a/arch/powerpc/platforms/cell/axon_msi.c b/arch/powerpc/platforms/cell/axon_msi.c index aca5741..a86c34b 100644 --- a/arch/powerpc/platforms/cell/axon_msi.c +++ b/arch/powerpc/platforms/cell/axon_msi.c @@ -365,7 +365,7 @@ static int axon_msi_probe(struct of_device *device, printk(KERN_ERR "axon_msi: couldn't parse dcr properties on %s\n", dn->full_name); - goto out; + goto out_free_msic; } msic->dcr_host = dcr_map(dn, dcr_base, dcr_len); diff --git a/arch/powerpc/platforms/powermac/low_i2c.c b/arch/powerpc/platforms/powermac/low_i2c.c index 21226b7..414ca98 100644 --- a/arch/powerpc/platforms/powermac/low_i2c.c +++ b/arch/powerpc/platforms/powermac/low_i2c.c @@ -540,8 +540,11 @@ static struct pmac_i2c_host_kw *__init kw_i2c_host_init(struct device_node *np) /* Make sure IRQ is disabled */ kw_write_reg(reg_ier, 0); - /* Request chip interrupt */ - if (request_irq(host->irq, kw_i2c_irq, 0, "keywest i2c", host)) + /* Request chip interrupt. We set IRQF_TIMER because we don't + * want that interrupt disabled between the 2 passes of driver + * suspend or we'll have issues running the pfuncs + */ + if (request_irq(host->irq, kw_i2c_irq, IRQF_TIMER, "keywest i2c", host)) host->irq = NO_IRQ; printk(KERN_INFO "KeyWest i2c @0x%08x irq %d %s\n", diff --git a/arch/powerpc/platforms/pseries/firmware.c b/arch/powerpc/platforms/pseries/firmware.c index 5a707da..0a14d8c 100644 --- a/arch/powerpc/platforms/pseries/firmware.c +++ b/arch/powerpc/platforms/pseries/firmware.c @@ -51,11 +51,10 @@ firmware_features_table[FIRMWARE_MAX_FEATURES] = { {FW_FEATURE_VIO, "hcall-vio"}, {FW_FEATURE_RDMA, "hcall-rdma"}, {FW_FEATURE_LLAN, "hcall-lLAN"}, - {FW_FEATURE_BULK, "hcall-bulk"}, + {FW_FEATURE_BULK_REMOVE, "hcall-bulk"}, {FW_FEATURE_XDABR, "hcall-xdabr"}, {FW_FEATURE_MULTITCE, "hcall-multi-tce"}, {FW_FEATURE_SPLPAR, "hcall-splpar"}, - {FW_FEATURE_BULK_REMOVE, "hcall-bulk"}, }; /* Build up the firmware features bitmask using the contents of diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index d105f29..7d0b681 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -186,6 +186,15 @@ config X86_DS_SELFTEST config HAVE_MMIOTRACE_SUPPORT def_bool y +config X86_DECODER_SELFTEST + bool "x86 instruction decoder selftest" + depends on DEBUG_KERNEL + ---help--- + Perform x86 instruction decoder selftests at build time. + This option is useful for checking the sanity of x86 instruction + decoder code. + If unsure, say "N". + # # IO delay types: # diff --git a/arch/x86/Makefile b/arch/x86/Makefile index a012ee8..ba7a6df 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -156,6 +156,9 @@ all: bzImage KBUILD_IMAGE := $(boot)/bzImage bzImage: vmlinux +ifeq ($(CONFIG_X86_DECODER_SELFTEST),y) + $(Q)$(MAKE) $(build)=arch/x86/tools posttest +endif $(Q)$(MAKE) $(build)=$(boot) $(KBUILD_IMAGE) $(Q)mkdir -p $(objtree)/arch/$(UTS_MACHINE)/boot $(Q)ln -fsn ../../x86/boot/bzImage $(objtree)/arch/$(UTS_MACHINE)/boot/$@ diff --git a/arch/x86/include/asm/inat.h b/arch/x86/include/asm/inat.h new file mode 100644 index 0000000..2866fdd --- /dev/null +++ b/arch/x86/include/asm/inat.h @@ -0,0 +1,188 @@ +#ifndef _ASM_X86_INAT_H +#define _ASM_X86_INAT_H +/* + * x86 instruction attributes + * + * Written by Masami Hiramatsu <mhiramat@redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + */ +#include <asm/inat_types.h> + +/* + * Internal bits. Don't use bitmasks directly, because these bits are + * unstable. You should use checking functions. + */ + +#define INAT_OPCODE_TABLE_SIZE 256 +#define INAT_GROUP_TABLE_SIZE 8 + +/* Legacy instruction prefixes */ +#define INAT_PFX_OPNDSZ 1 /* 0x66 */ /* LPFX1 */ +#define INAT_PFX_REPNE 2 /* 0xF2 */ /* LPFX2 */ +#define INAT_PFX_REPE 3 /* 0xF3 */ /* LPFX3 */ +#define INAT_PFX_LOCK 4 /* 0xF0 */ +#define INAT_PFX_CS 5 /* 0x2E */ +#define INAT_PFX_DS 6 /* 0x3E */ +#define INAT_PFX_ES 7 /* 0x26 */ +#define INAT_PFX_FS 8 /* 0x64 */ +#define INAT_PFX_GS 9 /* 0x65 */ +#define INAT_PFX_SS 10 /* 0x36 */ +#define INAT_PFX_ADDRSZ 11 /* 0x67 */ + +#define INAT_LPREFIX_MAX 3 + +/* Immediate size */ +#define INAT_IMM_BYTE 1 +#define INAT_IMM_WORD 2 +#define INAT_IMM_DWORD 3 +#define INAT_IMM_QWORD 4 +#define INAT_IMM_PTR 5 +#define INAT_IMM_VWORD32 6 +#define INAT_IMM_VWORD 7 + +/* Legacy prefix */ +#define INAT_PFX_OFFS 0 +#define INAT_PFX_BITS 4 +#define INAT_PFX_MAX ((1 << INAT_PFX_BITS) - 1) +#define INAT_PFX_MASK (INAT_PFX_MAX << INAT_PFX_OFFS) +/* Escape opcodes */ +#define INAT_ESC_OFFS (INAT_PFX_OFFS + INAT_PFX_BITS) +#define INAT_ESC_BITS 2 +#define INAT_ESC_MAX ((1 << INAT_ESC_BITS) - 1) +#define INAT_ESC_MASK (INAT_ESC_MAX << INAT_ESC_OFFS) +/* Group opcodes (1-16) */ +#define INAT_GRP_OFFS (INAT_ESC_OFFS + INAT_ESC_BITS) +#define INAT_GRP_BITS 5 +#define INAT_GRP_MAX ((1 << INAT_GRP_BITS) - 1) +#define INAT_GRP_MASK (INAT_GRP_MAX << INAT_GRP_OFFS) +/* Immediates */ +#define INAT_IMM_OFFS (INAT_GRP_OFFS + INAT_GRP_BITS) +#define INAT_IMM_BITS 3 +#define INAT_IMM_MASK (((1 << INAT_IMM_BITS) - 1) << INAT_IMM_OFFS) +/* Flags */ +#define INAT_FLAG_OFFS (INAT_IMM_OFFS + INAT_IMM_BITS) +#define INAT_REXPFX (1 << INAT_FLAG_OFFS) +#define INAT_MODRM (1 << (INAT_FLAG_OFFS + 1)) +#define INAT_FORCE64 (1 << (INAT_FLAG_OFFS + 2)) +#define INAT_SCNDIMM (1 << (INAT_FLAG_OFFS + 3)) +#define INAT_MOFFSET (1 << (INAT_FLAG_OFFS + 4)) +#define INAT_VARIANT (1 << (INAT_FLAG_OFFS + 5)) +/* Attribute making macros for attribute tables */ +#define INAT_MAKE_PREFIX(pfx) (pfx << INAT_PFX_OFFS) +#define INAT_MAKE_ESCAPE(esc) (esc << INAT_ESC_OFFS) +#define INAT_MAKE_GROUP(grp) ((grp << INAT_GRP_OFFS) | INAT_MODRM) +#define INAT_MAKE_IMM(imm) (imm << INAT_IMM_OFFS) + +/* Attribute search APIs */ +extern insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode); +extern insn_attr_t inat_get_escape_attribute(insn_byte_t opcode, + insn_byte_t last_pfx, + insn_attr_t esc_attr); +extern insn_attr_t inat_get_group_attribute(insn_byte_t modrm, + insn_byte_t last_pfx, + insn_attr_t esc_attr); + +/* Attribute checking functions */ +static inline int inat_is_prefix(insn_attr_t attr) +{ + return attr & INAT_PFX_MASK; +} + +static inline int inat_is_address_size_prefix(insn_attr_t attr) +{ + return (attr & INAT_PFX_MASK) == INAT_PFX_ADDRSZ; +} + +static inline int inat_is_operand_size_prefix(insn_attr_t attr) +{ + return (attr & INAT_PFX_MASK) == INAT_PFX_OPNDSZ; +} + +static inline int inat_last_prefix_id(insn_attr_t attr) +{ + if ((attr & INAT_PFX_MASK) > INAT_LPREFIX_MAX) + return 0; + else + return attr & INAT_PFX_MASK; +} + +static inline int inat_is_escape(insn_attr_t attr) +{ + return attr & INAT_ESC_MASK; +} + +static inline int inat_escape_id(insn_attr_t attr) +{ + return (attr & INAT_ESC_MASK) >> INAT_ESC_OFFS; +} + +static inline int inat_is_group(insn_attr_t attr) +{ + return attr & INAT_GRP_MASK; +} + +static inline int inat_group_id(insn_attr_t attr) +{ + return (attr & INAT_GRP_MASK) >> INAT_GRP_OFFS; +} + +static inline int inat_group_common_attribute(insn_attr_t attr) +{ + return attr & ~INAT_GRP_MASK; +} + +static inline int inat_has_immediate(insn_attr_t attr) +{ + return attr & INAT_IMM_MASK; +} + +static inline int inat_immediate_size(insn_attr_t attr) +{ + return (attr & INAT_IMM_MASK) >> INAT_IMM_OFFS; +} + +static inline int inat_is_rex_prefix(insn_attr_t attr) +{ + return attr & INAT_REXPFX; +} + +static inline int inat_has_modrm(insn_attr_t attr) +{ + return attr & INAT_MODRM; +} + +static inline int inat_is_force64(insn_attr_t attr) +{ + return attr & INAT_FORCE64; +} + +static inline int inat_has_second_immediate(insn_attr_t attr) +{ + return attr & INAT_SCNDIMM; +} + +static inline int inat_has_moffset(insn_attr_t attr) +{ + return attr & INAT_MOFFSET; +} + +static inline int inat_has_variant(insn_attr_t attr) +{ + return attr & INAT_VARIANT; +} + +#endif diff --git a/arch/x86/include/asm/inat_types.h b/arch/x86/include/asm/inat_types.h new file mode 100644 index 0000000..cb3c20c --- /dev/null +++ b/arch/x86/include/asm/inat_types.h @@ -0,0 +1,29 @@ +#ifndef _ASM_X86_INAT_TYPES_H +#define _ASM_X86_INAT_TYPES_H +/* + * x86 instruction attributes + * + * Written by Masami Hiramatsu <mhiramat@redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + */ + +/* Instruction attributes */ +typedef unsigned int insn_attr_t; +typedef unsigned char insn_byte_t; +typedef signed int insn_value_t; + +#endif diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h new file mode 100644 index 0000000..12b4e37 --- /dev/null +++ b/arch/x86/include/asm/insn.h @@ -0,0 +1,143 @@ +#ifndef _ASM_X86_INSN_H +#define _ASM_X86_INSN_H +/* + * x86 instruction analysis + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2009 + */ + +/* insn_attr_t is defined in inat.h */ +#include <asm/inat.h> + +struct insn_field { + union { + insn_value_t value; + insn_byte_t bytes[4]; + }; + /* !0 if we've run insn_get_xxx() for this field */ + unsigned char got; + unsigned char nbytes; +}; + +struct insn { + struct insn_field prefixes; /* + * Prefixes + * prefixes.bytes[3]: last prefix + */ + struct insn_field rex_prefix; /* REX prefix */ + struct insn_field opcode; /* + * opcode.bytes[0]: opcode1 + * opcode.bytes[1]: opcode2 + * opcode.bytes[2]: opcode3 + */ + struct insn_field modrm; + struct insn_field sib; + struct insn_field displacement; + union { + struct insn_field immediate; + struct insn_field moffset1; /* for 64bit MOV */ + struct insn_field immediate1; /* for 64bit imm or off16/32 */ + }; + union { + struct insn_field moffset2; /* for 64bit MOV */ + struct insn_field immediate2; /* for 64bit imm or seg16 */ + }; + + insn_attr_t attr; + unsigned char opnd_bytes; + unsigned char addr_bytes; + unsigned char length; + unsigned char x86_64; + + const insn_byte_t *kaddr; /* kernel address of insn to analyze */ + const insn_byte_t *next_byte; +}; + +#define X86_MODRM_MOD(modrm) (((modrm) & 0xc0) >> 6) +#define X86_MODRM_REG(modrm) (((modrm) & 0x38) >> 3) +#define X86_MODRM_RM(modrm) ((modrm) & 0x07) + +#define X86_SIB_SCALE(sib) (((sib) & 0xc0) >> 6) +#define X86_SIB_INDEX(sib) (((sib) & 0x38) >> 3) +#define X86_SIB_BASE(sib) ((sib) & 0x07) + +#define X86_REX_W(rex) ((rex) & 8) +#define X86_REX_R(rex) ((rex) & 4) +#define X86_REX_X(rex) ((rex) & 2) +#define X86_REX_B(rex) ((rex) & 1) + +/* The last prefix is needed for two-byte and three-byte opcodes */ +static inline insn_byte_t insn_last_prefix(struct insn *insn) +{ + return insn->prefixes.bytes[3]; +} + +extern void insn_init(struct insn *insn, const void *kaddr, int x86_64); +extern void insn_get_prefixes(struct insn *insn); +extern void insn_get_opcode(struct insn *insn); +extern void insn_get_modrm(struct insn *insn); +extern void insn_get_sib(struct insn *insn); +extern void insn_get_displacement(struct insn *insn); +extern void insn_get_immediate(struct insn *insn); +extern void insn_get_length(struct insn *insn); + +/* Attribute will be determined after getting ModRM (for opcode groups) */ +static inline void insn_get_attribute(struct insn *insn) +{ + insn_get_modrm(insn); +} + +/* Instruction uses RIP-relative addressing */ +extern int insn_rip_relative(struct insn *insn); + +/* Init insn for kernel text */ +static inline void kernel_insn_init(struct insn *insn, const void *kaddr) +{ +#ifdef CONFIG_X86_64 + insn_init(insn, kaddr, 1); +#else /* CONFIG_X86_32 */ + insn_init(insn, kaddr, 0); +#endif +} + +/* Offset of each field from kaddr */ +static inline int insn_offset_rex_prefix(struct insn *insn) +{ + return insn->prefixes.nbytes; +} +static inline int insn_offset_opcode(struct insn *insn) +{ + return insn_offset_rex_prefix(insn) + insn->rex_prefix.nbytes; +} +static inline int insn_offset_modrm(struct insn *insn) +{ + return insn_offset_opcode(insn) + insn->opcode.nbytes; +} +static inline int insn_offset_sib(struct insn *insn) +{ + return insn_offset_modrm(insn) + insn->modrm.nbytes; +} +static inline int insn_offset_displacement(struct insn *insn) +{ + return insn_offset_sib(insn) + insn->sib.nbytes; +} +static inline int insn_offset_immediate(struct insn *insn) +{ + return insn_offset_displacement(insn) + insn->displacement.nbytes; +} + +#endif /* _ASM_X86_INSN_H */ diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index 0f0d908..a3d49dd 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h @@ -7,6 +7,7 @@ #ifdef __KERNEL__ #include <asm/segment.h> +#include <asm/page_types.h> #endif #ifndef __ASSEMBLY__ @@ -216,6 +217,67 @@ static inline unsigned long user_stack_pointer(struct pt_regs *regs) return regs->sp; } +/* Query offset/name of register from its name/offset */ +extern int regs_query_register_offset(const char *name); +extern const char *regs_query_register_name(unsigned int offset); +#define MAX_REG_OFFSET (offsetof(struct pt_regs, ss)) + +/** + * regs_get_register() - get register value from its offset + * @regs: pt_regs from which register value is gotten. + * @offset: offset number of the register. + * + * regs_get_register returns the value of a register whose offset from @regs + * is @offset. The @offset is the offset of the register in struct pt_regs. + * If @offset is bigger than MAX_REG_OFFSET, this returns 0. + */ +static inline unsigned long regs_get_register(struct pt_regs *regs, + unsigned int offset) +{ + if (unlikely(offset > MAX_REG_OFFSET)) + return 0; + return *(unsigned long *)((unsigned long)regs + offset); +} + +/** + * regs_within_kernel_stack() - check the address in the stack + * @regs: pt_regs which contains kernel stack pointer. + * @addr: address which is checked. + * + * regs_within_kenel_stack() checks @addr is within the kernel stack page(s). + * If @addr is within the kernel stack, it returns true. If not, returns false. + */ +static inline int regs_within_kernel_stack(struct pt_regs *regs, + unsigned long addr) +{ + return ((addr & ~(THREAD_SIZE - 1)) == + (kernel_stack_pointer(regs) & ~(THREAD_SIZE - 1))); +} + +/** + * regs_get_kernel_stack_nth() - get Nth entry of the stack + * @regs: pt_regs which contains kernel stack pointer. + * @n: stack entry number. + * + * regs_get_kernel_stack_nth() returns @n th entry of the kernel stack which + * is specifined by @regs. If the @n th entry is NOT in the kernel stack, + * this returns 0. + */ +static inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, + unsigned int n) +{ + unsigned long *addr = (unsigned long *)kernel_stack_pointer(regs); + addr += n; + if (regs_within_kernel_stack(regs, (unsigned long)addr)) + return *addr; + else + return 0; +} + +/* Get Nth argument at function call */ +extern unsigned long regs_get_argument_nth(struct pt_regs *regs, + unsigned int n); + /* * These are defined as per linux/ptrace.h, which see. */ diff --git a/arch/x86/kernel/acpi/realmode/wakeup.lds.S b/arch/x86/kernel/acpi/realmode/wakeup.lds.S index 0e50e1e..7da00b7 100644 --- a/arch/x86/kernel/acpi/realmode/wakeup.lds.S +++ b/arch/x86/kernel/acpi/realmode/wakeup.lds.S @@ -56,6 +56,6 @@ SECTIONS /DISCARD/ : { *(.note*) } -} -ASSERT(_end <= WAKEUP_SIZE, "Wakeup too big!"); + . = ASSERT(_end <= WAKEUP_SIZE, "Wakeup too big!"); +} diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 7d52e9d..50b9c22 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -334,6 +334,10 @@ ENTRY(ret_from_fork) END(ret_from_fork) /* + * Interrupt exit functions should be protected against kprobes + */ + .pushsection .kprobes.text, "ax" +/* * Return to user mode is not as complex as all this looks, * but we want the default path for a system call return to * go as quickly as possible which is why some of this is @@ -383,6 +387,10 @@ need_resched: END(resume_kernel) #endif CFI_ENDPROC +/* + * End of kprobes section + */ + .popsection /* SYSENTER_RETURN points to after the "sysenter" instruction in the vsyscall page. See vsyscall-sysentry.S, which defines the symbol. */ @@ -513,6 +521,10 @@ sysexit_audit: PTGS_TO_GS_EX ENDPROC(ia32_sysenter_target) +/* + * syscall stub including irq exit should be protected against kprobes + */ + .pushsection .kprobes.text, "ax" # system call handler stub ENTRY(system_call) RING0_INT_FRAME # can't unwind into user space anyway @@ -705,6 +717,10 @@ syscall_badsys: jmp resume_userspace END(syscall_badsys) CFI_ENDPROC +/* + * End of kprobes section + */ + .popsection /* * System calls that need a pt_regs pointer. @@ -814,6 +830,10 @@ common_interrupt: ENDPROC(common_interrupt) CFI_ENDPROC +/* + * Irq entries should be protected against kprobes + */ + .pushsection .kprobes.text, "ax" #define BUILD_INTERRUPT3(name, nr, fn) \ ENTRY(name) \ RING0_INT_FRAME; \ @@ -980,6 +1000,10 @@ ENTRY(spurious_interrupt_bug) jmp error_code CFI_ENDPROC END(spurious_interrupt_bug) +/* + * End of kprobes section + */ + .popsection ENTRY(kernel_thread_helper) pushl $0 # fake return address for unwinder diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index bd5bbdd..722df1b 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -803,6 +803,10 @@ END(interrupt) call \func .endm +/* + * Interrupt entry/exit should be protected against kprobes + */ + .pushsection .kprobes.text, "ax" /* * The interrupt stubs push (~vector+0x80) onto the stack and * then jump to common_interrupt. @@ -941,6 +945,10 @@ ENTRY(retint_kernel) CFI_ENDPROC END(common_interrupt) +/* + * End of kprobes section + */ + .popsection /* * APIC interrupts. diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 74656d1..04bbd52 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -63,10 +63,10 @@ static int show_other_interrupts(struct seq_file *p, int prec) for_each_online_cpu(j) seq_printf(p, "%10u ", irq_stats(j)->irq_spurious_count); seq_printf(p, " Spurious interrupts\n"); - seq_printf(p, "%*s: ", prec, "CNT"); + seq_printf(p, "%*s: ", prec, "PMI"); for_each_online_cpu(j) seq_printf(p, "%10u ", irq_stats(j)->apic_perf_irqs); - seq_printf(p, " Performance counter interrupts\n"); + seq_printf(p, " Performance monitoring interrupts\n"); seq_printf(p, "%*s: ", prec, "PND"); for_each_online_cpu(j) seq_printf(p, "%10u ", irq_stats(j)->apic_pending_irqs); diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index 7b5169d..c5f1f11 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c @@ -48,12 +48,14 @@ #include <linux/preempt.h> #include <linux/module.h> #include <linux/kdebug.h> +#include <linux/kallsyms.h> #include <asm/cacheflush.h> #include <asm/desc.h> #include <asm/pgtable.h> #include <asm/uaccess.h> #include <asm/alternative.h> +#include <asm/insn.h> void jprobe_return_end(void); @@ -106,50 +108,6 @@ static const u32 twobyte_is_boostable[256 / 32] = { /* ----------------------------------------------- */ /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ }; -static const u32 onebyte_has_modrm[256 / 32] = { - /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ - /* ----------------------------------------------- */ - W(0x00, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 00 */ - W(0x10, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) , /* 10 */ - W(0x20, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 20 */ - W(0x30, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) , /* 30 */ - W(0x40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 40 */ - W(0x50, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 50 */ - W(0x60, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0) | /* 60 */ - W(0x70, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 70 */ - W(0x80, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */ - W(0x90, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 90 */ - W(0xa0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* a0 */ - W(0xb0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* b0 */ - W(0xc0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0) | /* c0 */ - W(0xd0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */ - W(0xe0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* e0 */ - W(0xf0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1) /* f0 */ - /* ----------------------------------------------- */ - /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ -}; -static const u32 twobyte_has_modrm[256 / 32] = { - /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ - /* ----------------------------------------------- */ - W(0x00, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1) | /* 0f */ - W(0x10, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0) , /* 1f */ - W(0x20, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1) | /* 2f */ - W(0x30, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 3f */ - W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 4f */ - W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 5f */ - W(0x60, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 6f */ - W(0x70, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1) , /* 7f */ - W(0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 8f */ - W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 9f */ - W(0xa0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1) | /* af */ - W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1) , /* bf */ - W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0) | /* cf */ - W(0xd0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* df */ - W(0xe0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* ef */ - W(0xf0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0) /* ff */ - /* ----------------------------------------------- */ - /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ -}; #undef W struct kretprobe_blackpoint kretprobe_blacklist[] = { @@ -244,6 +202,75 @@ retry: } } +/* Recover the probed instruction at addr for further analysis. */ +static int recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr) +{ + struct kprobe *kp; + kp = get_kprobe((void *)addr); + if (!kp) + return -EINVAL; + + /* + * Basically, kp->ainsn.insn has an original instruction. + * However, RIP-relative instruction can not do single-stepping + * at different place, fix_riprel() tweaks the displacement of + * that instruction. In that case, we can't recover the instruction + * from the kp->ainsn.insn. + * + * On the other hand, kp->opcode has a copy of the first byte of + * the probed instruction, which is overwritten by int3. And + * the instruction at kp->addr is not modified by kprobes except + * for the first byte, we can recover the original instruction + * from it and kp->opcode. + */ + memcpy(buf, kp->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); + buf[0] = kp->opcode; + return 0; +} + +/* Dummy buffers for kallsyms_lookup */ +static char __dummy_buf[KSYM_NAME_LEN]; + +/* Check if paddr is at an instruction boundary */ +static int __kprobes can_probe(unsigned long paddr) +{ + int ret; + unsigned long addr, offset = 0; + struct insn insn; + kprobe_opcode_t buf[MAX_INSN_SIZE]; + + if (!kallsyms_lookup(paddr, NULL, &offset, NULL, __dummy_buf)) + return 0; + + /* Decode instructions */ + addr = paddr - offset; + while (addr < paddr) { + kernel_insn_init(&insn, (void *)addr); + insn_get_opcode(&insn); + + /* + * Check if the instruction has been modified by another + * kprobe, in which case we replace the breakpoint by the + * original instruction in our buffer. + */ + if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) { + ret = recover_probed_instruction(buf, addr); + if (ret) + /* + * Another debugging subsystem might insert + * this breakpoint. In that case, we can't + * recover it. + */ + return 0; + kernel_insn_init(&insn, buf); + } + insn_get_length(&insn); + addr += insn.length; + } + + return (addr == paddr); +} + /* * Returns non-zero if opcode modifies the interrupt flag. */ @@ -277,68 +304,30 @@ static int __kprobes is_IF_modifier(kprobe_opcode_t *insn) static void __kprobes fix_riprel(struct kprobe *p) { #ifdef CONFIG_X86_64 - u8 *insn = p->ainsn.insn; - s64 disp; - int need_modrm; - - /* Skip legacy instruction prefixes. */ - while (1) { - switch (*insn) { - case 0x66: - case 0x67: - case 0x2e: - case 0x3e: - case 0x26: - case 0x64: - case 0x65: - case 0x36: - case 0xf0: - case 0xf3: - case 0xf2: - ++insn; - continue; - } - break; - } + struct insn insn; + kernel_insn_init(&insn, p->ainsn.insn); - /* Skip REX instruction prefix. */ - if (is_REX_prefix(insn)) - ++insn; - - if (*insn == 0x0f) { - /* Two-byte opcode. */ - ++insn; - need_modrm = test_bit(*insn, - (unsigned long *)twobyte_has_modrm); - } else - /* One-byte opcode. */ - need_modrm = test_bit(*insn, - (unsigned long *)onebyte_has_modrm); - - if (need_modrm) { - u8 modrm = *++insn; - if ((modrm & 0xc7) == 0x05) { - /* %rip+disp32 addressing mode */ - /* Displacement follows ModRM byte. */ - ++insn; - /* - * The copied instruction uses the %rip-relative - * addressing mode. Adjust the displacement for the - * difference between the original location of this - * instruction and the location of the copy that will - * actually be run. The tricky bit here is making sure - * that the sign extension happens correctly in this - * calculation, since we need a signed 32-bit result to - * be sign-extended to 64 bits when it's added to the - * %rip value and yield the same 64-bit result that the - * sign-extension of the original signed 32-bit - * displacement would have given. - */ - disp = (u8 *) p->addr + *((s32 *) insn) - - (u8 *) p->ainsn.insn; - BUG_ON((s64) (s32) disp != disp); /* Sanity check. */ - *(s32 *)insn = (s32) disp; - } + if (insn_rip_relative(&insn)) { + s64 newdisp; + u8 *disp; + insn_get_displacement(&insn); + /* + * The copied instruction uses the %rip-relative addressing + * mode. Adjust the displacement for the difference between + * the original location of this instruction and the location + * of the copy that will actually be run. The tricky bit here + * is making sure that the sign extension happens correctly in + * this calculation, since we need a signed 32-bit result to + * be sign-extended to 64 bits when it's added to the %rip + * value and yield the same 64-bit result that the sign- + * extension of the original signed 32-bit displacement would + * have given. + */ + newdisp = (u8 *) p->addr + (s64) insn.displacement.value - + (u8 *) p->ainsn.insn; + BUG_ON((s64) (s32) newdisp != newdisp); /* Sanity check. */ + disp = (u8 *) p->ainsn.insn + insn_offset_displacement(&insn); + *(s32 *) disp = (s32) newdisp; } #endif } @@ -359,6 +348,8 @@ static void __kprobes arch_copy_kprobe(struct kprobe *p) int __kprobes arch_prepare_kprobe(struct kprobe *p) { + if (!can_probe((unsigned long)p->addr)) + return -EILSEQ; /* insn: must be on special executable page on x86. */ p->ainsn.insn = get_insn_slot(); if (!p->ainsn.insn) @@ -472,17 +463,6 @@ static int __kprobes reenter_kprobe(struct kprobe *p, struct pt_regs *regs, { switch (kcb->kprobe_status) { case KPROBE_HIT_SSDONE: -#ifdef CONFIG_X86_64 - /* TODO: Provide re-entrancy from post_kprobes_handler() and - * avoid exception stack corruption while single-stepping on - * the instruction of the new probe. - */ - arch_disarm_kprobe(p); - regs->ip = (unsigned long)p->addr; - reset_current_kprobe(); - preempt_enable_no_resched(); - break; -#endif case KPROBE_HIT_ACTIVE: save_previous_kprobe(kcb); set_current_kprobe(p, regs, kcb); @@ -491,18 +471,16 @@ static int __kprobes reenter_kprobe(struct kprobe *p, struct pt_regs *regs, kcb->kprobe_status = KPROBE_REENTER; break; case KPROBE_HIT_SS: - if (p == kprobe_running()) { - regs->flags &= ~X86_EFLAGS_TF; - regs->flags |= kcb->kprobe_saved_flags; - return 0; - } else { - /* A probe has been hit in the codepath leading up - * to, or just after, single-stepping of a probed - * instruction. This entire codepath should strictly - * reside in .kprobes.text section. Raise a warning - * to highlight this peculiar case. - */ - } + /* A probe has been hit in the codepath leading up to, or just + * after, single-stepping of a probed instruction. This entire + * codepath should strictly reside in .kprobes.text section. + * Raise a BUG or we'll continue in an endless reentering loop + * and eventually a stack overflow. + */ + printk(KERN_WARNING "Unrecoverable kprobe detected at %p.\n", + p->addr); + dump_kprobe(p); + BUG(); default: /* impossible cases */ WARN_ON(1); diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 7b058a2..c4f76d2 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -49,6 +49,118 @@ enum x86_regset { REGSET_IOPERM32, }; +struct pt_regs_offset { + const char *name; + int offset; +}; + +#define REG_OFFSET_NAME(r) {.name = #r, .offset = offsetof(struct pt_regs, r)} +#define REG_OFFSET_END {.name = NULL, .offset = 0} + +static const struct pt_regs_offset regoffset_table[] = { +#ifdef CONFIG_X86_64 + REG_OFFSET_NAME(r15), + REG_OFFSET_NAME(r14), + REG_OFFSET_NAME(r13), + REG_OFFSET_NAME(r12), + REG_OFFSET_NAME(r11), + REG_OFFSET_NAME(r10), + REG_OFFSET_NAME(r9), + REG_OFFSET_NAME(r8), +#endif + REG_OFFSET_NAME(bx), + REG_OFFSET_NAME(cx), + REG_OFFSET_NAME(dx), + REG_OFFSET_NAME(si), + REG_OFFSET_NAME(di), + REG_OFFSET_NAME(bp), + REG_OFFSET_NAME(ax), +#ifdef CONFIG_X86_32 + REG_OFFSET_NAME(ds), + REG_OFFSET_NAME(es), + REG_OFFSET_NAME(fs), + REG_OFFSET_NAME(gs), +#endif + REG_OFFSET_NAME(orig_ax), + REG_OFFSET_NAME(ip), + REG_OFFSET_NAME(cs), + REG_OFFSET_NAME(flags), + REG_OFFSET_NAME(sp), + REG_OFFSET_NAME(ss), + REG_OFFSET_END, +}; + +/** + * regs_query_register_offset() - query register offset from its name + * @name: the name of a register + * + * regs_query_register_offset() returns the offset of a register in struct + * pt_regs from its name. If the name is invalid, this returns -EINVAL; + */ +int regs_query_register_offset(const char *name) +{ + const struct pt_regs_offset *roff; + for (roff = regoffset_table; roff->name != NULL; roff++) + if (!strcmp(roff->name, name)) + return roff->offset; + return -EINVAL; +} + +/** + * regs_query_register_name() - query register name from its offset + * @offset: the offset of a register in struct pt_regs. + * + * regs_query_register_name() returns the name of a register from its + * offset in struct pt_regs. If the @offset is invalid, this returns NULL; + */ +const char *regs_query_register_name(unsigned int offset) +{ + const struct pt_regs_offset *roff; + for (roff = regoffset_table; roff->name != NULL; roff++) + if (roff->offset == offset) + return roff->name; + return NULL; +} + +static const int arg_offs_table[] = { +#ifdef CONFIG_X86_32 + [0] = offsetof(struct pt_regs, ax), + [1] = offsetof(struct pt_regs, dx), + [2] = offsetof(struct pt_regs, cx) +#else /* CONFIG_X86_64 */ + [0] = offsetof(struct pt_regs, di), + [1] = offsetof(struct pt_regs, si), + [2] = offsetof(struct pt_regs, dx), + [3] = offsetof(struct pt_regs, cx), + [4] = offsetof(struct pt_regs, r8), + [5] = offsetof(struct pt_regs, r9) +#endif +}; + +/** + * regs_get_argument_nth() - get Nth argument at function call + * @regs: pt_regs which contains registers at function entry. + * @n: argument number. + * + * regs_get_argument_nth() returns @n th argument of a function call. + * Since usually the kernel stack will be changed right after function entry, + * you must use this at function entry. If the @n th entry is NOT in the + * kernel stack or pt_regs, this returns 0. + */ +unsigned long regs_get_argument_nth(struct pt_regs *regs, unsigned int n) +{ + if (n < ARRAY_SIZE(arg_offs_table)) + return *(unsigned long *)((char *)regs + arg_offs_table[n]); + else { + /* + * The typical case: arg n is on the stack. + * (Note: stack[0] = return address, so skip it) + */ + n -= ARRAY_SIZE(arg_offs_table); + return regs_get_kernel_stack_nth(regs, 1 + n); + } +} + /* * does not yet catch signals sent when the child dies. * in exit.c or in signal.c. diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 8d6001a..92929fb 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -305,8 +305,8 @@ SECTIONS #ifdef CONFIG_X86_32 -ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE), - "kernel image bigger than KERNEL_IMAGE_SIZE"); +. = ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE), + "kernel image bigger than KERNEL_IMAGE_SIZE"); #else /* * Per-cpu symbols which need to be offset from __per_cpu_load @@ -319,12 +319,12 @@ INIT_PER_CPU(irq_stack_union); /* * Build-time check on the image size: */ -ASSERT((_end - _text <= KERNEL_IMAGE_SIZE), - "kernel image bigger than KERNEL_IMAGE_SIZE"); +. = ASSERT((_end - _text <= KERNEL_IMAGE_SIZE), + "kernel image bigger than KERNEL_IMAGE_SIZE"); #ifdef CONFIG_SMP -ASSERT((per_cpu__irq_stack_union == 0), - "irq_stack_union is not at start of per-cpu area"); +. = ASSERT((per_cpu__irq_stack_union == 0), + "irq_stack_union is not at start of per-cpu area"); #endif #endif /* CONFIG_X86_32 */ @@ -332,6 +332,7 @@ ASSERT((per_cpu__irq_stack_union == 0), #ifdef CONFIG_KEXEC #include <asm/kexec.h> -ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE, - "kexec control code size is too big"); +. = ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE, + "kexec control code size is too big"); #endif + diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index 85f5db9..a2d6472 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -2,12 +2,25 @@ # Makefile for x86 specific library files. # +inat_tables_script = $(srctree)/arch/x86/tools/gen-insn-attr-x86.awk +inat_tables_maps = $(srctree)/arch/x86/lib/x86-opcode-map.txt +quiet_cmd_inat_tables = GEN $@ + cmd_inat_tables = $(AWK) -f $(inat_tables_script) $(inat_tables_maps) > $@ + +$(obj)/inat-tables.c: $(inat_tables_script) $(inat_tables_maps) + $(call cmd,inat_tables) + +$(obj)/inat.o: $(obj)/inat-tables.c + +clean-files := inat-tables.c + obj-$(CONFIG_SMP) := msr.o lib-y := delay.o lib-y += thunk_$(BITS).o lib-y += usercopy_$(BITS).o getuser.o putuser.o lib-y += memcpy_$(BITS).o +lib-y += insn.o inat.o obj-y += msr-reg.o msr-reg-export.o diff --git a/arch/x86/lib/inat.c b/arch/x86/lib/inat.c new file mode 100644 index 0000000..3fb5998 --- /dev/null +++ b/arch/x86/lib/inat.c @@ -0,0 +1,78 @@ +/* + * x86 instruction attribute tables + * + * Written by Masami Hiramatsu <mhiramat@redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + */ +#include <asm/insn.h> + +/* Attribute tables are generated from opcode map */ +#include "inat-tables.c" + +/* Attribute search APIs */ +insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode) +{ + return inat_primary_table[opcode]; +} + +insn_attr_t inat_get_escape_attribute(insn_byte_t opcode, insn_byte_t last_pfx, + insn_attr_t esc_attr) +{ + const insn_attr_t *table; + insn_attr_t lpfx_attr; + int n, m = 0; + + n = inat_escape_id(esc_attr); + if (last_pfx) { + lpfx_attr = inat_get_opcode_attribute(last_pfx); + m = inat_last_prefix_id(lpfx_attr); + } + table = inat_escape_tables[n][0]; + if (!table) + return 0; + if (inat_has_variant(table[opcode]) && m) { + table = inat_escape_tables[n][m]; + if (!table) + return 0; + } + return table[opcode]; +} + +insn_attr_t inat_get_group_attribute(insn_byte_t modrm, insn_byte_t last_pfx, + insn_attr_t grp_attr) +{ + const insn_attr_t *table; + insn_attr_t lpfx_attr; + int n, m = 0; + + n = inat_group_id(grp_attr); + if (last_pfx) { + lpfx_attr = inat_get_opcode_attribute(last_pfx); + m = inat_last_prefix_id(lpfx_attr); + } + table = inat_group_tables[n][0]; + if (!table) + return inat_group_common_attribute(grp_attr); + if (inat_has_variant(table[X86_MODRM_REG(modrm)]) && m) { + table = inat_group_tables[n][m]; + if (!table) + return inat_group_common_attribute(grp_attr); + } + return table[X86_MODRM_REG(modrm)] | + inat_group_common_attribute(grp_attr); +} + diff --git a/arch/x86/lib/insn.c b/arch/x86/lib/insn.c new file mode 100644 index 0000000..dfd56a3 --- /dev/null +++ b/arch/x86/lib/insn.c @@ -0,0 +1,464 @@ +/* + * x86 instruction analysis + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2002, 2004, 2009 + */ + +#include <linux/string.h> +#include <asm/inat.h> +#include <asm/insn.h> + +#define get_next(t, insn) \ + ({t r; r = *(t*)insn->next_byte; insn->next_byte += sizeof(t); r; }) + +#define peek_next(t, insn) \ + ({t r; r = *(t*)insn->next_byte; r; }) + +/** + * insn_init() - initialize struct insn + * @insn: &struct insn to be initialized + * @kaddr: address (in kernel memory) of instruction (or copy thereof) + * @x86_64: !0 for 64-bit kernel or 64-bit app + */ +void insn_init(struct insn *insn, const void *kaddr, int x86_64) +{ + memset(insn, 0, sizeof(*insn)); + insn->kaddr = kaddr; + insn->next_byte = kaddr; + insn->x86_64 = x86_64 ? 1 : 0; + insn->opnd_bytes = 4; + if (x86_64) + insn->addr_bytes = 8; + else + insn->addr_bytes = 4; +} + +/** + * insn_get_prefixes - scan x86 instruction prefix bytes + * @insn: &struct insn containing instruction + * + * Populates the @insn->prefixes bitmap, and updates @insn->next_byte + * to point to the (first) opcode. No effect if @insn->prefixes.got + * is already set. + */ +void insn_get_prefixes(struct insn *insn) +{ + struct insn_field *prefixes = &insn->prefixes; + insn_attr_t attr; + insn_byte_t b, lb; + int i, nb; + + if (prefixes->got) + return; + + nb = 0; + lb = 0; + b = peek_next(insn_byte_t, insn); + attr = inat_get_opcode_attribute(b); + while (inat_is_prefix(attr)) { + /* Skip if same prefix */ + for (i = 0; i < nb; i++) + if (prefixes->bytes[i] == b) + goto found; + if (nb == 4) + /* Invalid instruction */ + break; + prefixes->bytes[nb++] = b; + if (inat_is_address_size_prefix(attr)) { + /* address size switches 2/4 or 4/8 */ + if (insn->x86_64) + insn->addr_bytes ^= 12; + else + insn->addr_bytes ^= 6; + } else if (inat_is_operand_size_prefix(attr)) { + /* oprand size switches 2/4 */ + insn->opnd_bytes ^= 6; + } +found: + prefixes->nbytes++; + insn->next_byte++; + lb = b; + b = peek_next(insn_byte_t, insn); + attr = inat_get_opcode_attribute(b); + } + /* Set the last prefix */ + if (lb && lb != insn->prefixes.bytes[3]) { + if (unlikely(insn->prefixes.bytes[3])) { + /* Swap the last prefix */ + b = insn->prefixes.bytes[3]; + for (i = 0; i < nb; i++) + if (prefixes->bytes[i] == lb) + prefixes->bytes[i] = b; + } + insn->prefixes.bytes[3] = lb; + } + + if (insn->x86_64) { + b = peek_next(insn_byte_t, insn); + attr = inat_get_opcode_attribute(b); + if (inat_is_rex_prefix(attr)) { + insn->rex_prefix.value = b; + insn->rex_prefix.nbytes = 1; + insn->next_byte++; + if (X86_REX_W(b)) + /* REX.W overrides opnd_size */ + insn->opnd_bytes = 8; + } + } + insn->rex_prefix.got = 1; + prefixes->got = 1; + return; +} + +/** + * insn_get_opcode - collect opcode(s) + * @insn: &struct insn containing instruction + * + * Populates @insn->opcode, updates @insn->next_byte to point past the + * opcode byte(s), and set @insn->attr (except for groups). + * If necessary, first collects any preceding (prefix) bytes. + * Sets @insn->opcode.value = opcode1. No effect if @insn->opcode.got + * is already 1. + */ +void insn_get_opcode(struct insn *insn) +{ + struct insn_field *opcode = &insn->opcode; + insn_byte_t op, pfx; + if (opcode->got) + return; + if (!insn->prefixes.got) + insn_get_prefixes(insn); + + /* Get first opcode */ + op = get_next(insn_byte_t, insn); + opcode->bytes[0] = op; + opcode->nbytes = 1; + insn->attr = inat_get_opcode_attribute(op); + while (inat_is_escape(insn->attr)) { + /* Get escaped opcode */ + op = get_next(insn_byte_t, insn); + opcode->bytes[opcode->nbytes++] = op; + pfx = insn_last_prefix(insn); + insn->attr = inat_get_escape_attribute(op, pfx, insn->attr); + } + opcode->got = 1; +} + +/** + * insn_get_modrm - collect ModRM byte, if any + * @insn: &struct insn containing instruction + * + * Populates @insn->modrm and updates @insn->next_byte to point past the + * ModRM byte, if any. If necessary, first collects the preceding bytes + * (prefixes and opcode(s)). No effect if @insn->modrm.got is already 1. + */ +void insn_get_modrm(struct insn *insn) +{ + struct insn_field *modrm = &insn->modrm; + insn_byte_t pfx, mod; + if (modrm->got) + return; + if (!insn->opcode.got) + insn_get_opcode(insn); + + if (inat_has_modrm(insn->attr)) { + mod = get_next(insn_byte_t, insn); + modrm->value = mod; + modrm->nbytes = 1; + if (inat_is_group(insn->attr)) { + pfx = insn_last_prefix(insn); + insn->attr = inat_get_group_attribute(mod, pfx, + insn->attr); + } + } + + if (insn->x86_64 && inat_is_force64(insn->attr)) + insn->opnd_bytes = 8; + modrm->got = 1; +} + + +/** + * insn_rip_relative() - Does instruction use RIP-relative addressing mode? + * @insn: &struct insn containing instruction + * + * If necessary, first collects the instruction up to and including the + * ModRM byte. No effect if @insn->x86_64 is 0. + */ +int insn_rip_relative(struct insn *insn) +{ + struct insn_field *modrm = &insn->modrm; + + if (!insn->x86_64) + return 0; + if (!modrm->got) + insn_get_modrm(insn); + /* + * For rip-relative instructions, the mod field (top 2 bits) + * is zero and the r/m field (bottom 3 bits) is 0x5. + */ + return (modrm->nbytes && (modrm->value & 0xc7) == 0x5); +} + +/** + * insn_get_sib() - Get the SIB byte of instruction + * @insn: &struct insn containing instruction + * + * If necessary, first collects the instruction up to and including the + * ModRM byte. + */ +void insn_get_sib(struct insn *insn) +{ + insn_byte_t modrm; + + if (insn->sib.got) + return; + if (!insn->modrm.got) + insn_get_modrm(insn); + if (insn->modrm.nbytes) { + modrm = (insn_byte_t)insn->modrm.value; + if (insn->addr_bytes != 2 && + X86_MODRM_MOD(modrm) != 3 && X86_MODRM_RM(modrm) == 4) { + insn->sib.value = get_next(insn_byte_t, insn); + insn->sib.nbytes = 1; + } + } + insn->sib.got = 1; +} + + +/** + * insn_get_displacement() - Get the displacement of instruction + * @insn: &struct insn containing instruction + * + * If necessary, first collects the instruction up to and including the + * SIB byte. + * Displacement value is sign-expanded. + */ +void insn_get_displacement(struct insn *insn) +{ + insn_byte_t mod, rm, base; + + if (insn->displacement.got) + return; + if (!insn->sib.got) + insn_get_sib(insn); + if (insn->modrm.nbytes) { + /* + * Interpreting the modrm byte: + * mod = 00 - no displacement fields (exceptions below) + * mod = 01 - 1-byte displacement field + * mod = 10 - displacement field is 4 bytes, or 2 bytes if + * address size = 2 (0x67 prefix in 32-bit mode) + * mod = 11 - no memory operand + * + * If address size = 2... + * mod = 00, r/m = 110 - displacement field is 2 bytes + * + * If address size != 2... + * mod != 11, r/m = 100 - SIB byte exists + * mod = 00, SIB base = 101 - displacement field is 4 bytes + * mod = 00, r/m = 101 - rip-relative addressing, displacement + * field is 4 bytes + */ + mod = X86_MODRM_MOD(insn->modrm.value); + rm = X86_MODRM_RM(insn->modrm.value); + base = X86_SIB_BASE(insn->sib.value); + if (mod == 3) + goto out; + if (mod == 1) { + insn->displacement.value = get_next(char, insn); + insn->displacement.nbytes = 1; + } else if (insn->addr_bytes == 2) { + if ((mod == 0 && rm == 6) || mod == 2) { + insn->displacement.value = + get_next(short, insn); + insn->displacement.nbytes = 2; + } + } else { + if ((mod == 0 && rm == 5) || mod == 2 || + (mod == 0 && base == 5)) { + insn->displacement.value = get_next(int, insn); + insn->displacement.nbytes = 4; + } + } + } +out: + insn->displacement.got = 1; +} + +/* Decode moffset16/32/64 */ +static void __get_moffset(struct insn *insn) +{ + switch (insn->addr_bytes) { + case 2: + insn->moffset1.value = get_next(short, insn); + insn->moffset1.nbytes = 2; + break; + case 4: + insn->moffset1.value = get_next(int, insn); + insn->moffset1.nbytes = 4; + break; + case 8: + insn->moffset1.value = get_next(int, insn); + insn->moffset1.nbytes = 4; + insn->moffset2.value = get_next(int, insn); + insn->moffset2.nbytes = 4; + break; + } + insn->moffset1.got = insn->moffset2.got = 1; +} + +/* Decode imm v32(Iz) */ +static void __get_immv32(struct insn *insn) +{ + switch (insn->opnd_bytes) { + case 2: + insn->immediate.value = get_next(short, insn); + insn->immediate.nbytes = 2; + break; + case 4: + case 8: + insn->immediate.value = get_next(int, insn); + insn->immediate.nbytes = 4; + break; + } +} + +/* Decode imm v64(Iv/Ov) */ +static void __get_immv(struct insn *insn) +{ + switch (insn->opnd_bytes) { + case 2: + insn->immediate1.value = get_next(short, insn); + insn->immediate1.nbytes = 2; + break; + case 4: + insn->immediate1.value = get_next(int, insn); + insn->immediate1.nbytes = 4; + break; + case 8: + insn->immediate1.value = get_next(int, insn); + insn->immediate1.nbytes = 4; + insn->immediate2.value = get_next(int, insn); + insn->immediate2.nbytes = 4; + break; + } + insn->immediate1.got = insn->immediate2.got = 1; +} + +/* Decode ptr16:16/32(Ap) */ +static void __get_immptr(struct insn *insn) +{ + switch (insn->opnd_bytes) { + case 2: + insn->immediate1.value = get_next(short, insn); + insn->immediate1.nbytes = 2; + break; + case 4: + insn->immediate1.value = get_next(int, insn); + insn->immediate1.nbytes = 4; + break; + case 8: + /* ptr16:64 is not exist (no segment) */ + return; + } + insn->immediate2.value = get_next(unsigned short, insn); + insn->immediate2.nbytes = 2; + insn->immediate1.got = insn->immediate2.got = 1; +} + +/** + * insn_get_immediate() - Get the immediates of instruction + * @insn: &struct insn containing instruction + * + * If necessary, first collects the instruction up to and including the + * displacement bytes. + * Basically, most of immediates are sign-expanded. Unsigned-value can be + * get by bit masking with ((1 << (nbytes * 8)) - 1) + */ +void insn_get_immediate(struct insn *insn) +{ + if (insn->immediate.got) + return; + if (!insn->displacement.got) + insn_get_displacement(insn); + + if (inat_has_moffset(insn->attr)) { + __get_moffset(insn); + goto done; + } + + if (!inat_has_immediate(insn->attr)) + /* no immediates */ + goto done; + + switch (inat_immediate_size(insn->attr)) { + case INAT_IMM_BYTE: + insn->immediate.value = get_next(char, insn); + insn->immediate.nbytes = 1; + break; + case INAT_IMM_WORD: + insn->immediate.value = get_next(short, insn); + insn->immediate.nbytes = 2; + break; + case INAT_IMM_DWORD: + insn->immediate.value = get_next(int, insn); + insn->immediate.nbytes = 4; + break; + case INAT_IMM_QWORD: + insn->immediate1.value = get_next(int, insn); + insn->immediate1.nbytes = 4; + insn->immediate2.value = get_next(int, insn); + insn->immediate2.nbytes = 4; + break; + case INAT_IMM_PTR: + __get_immptr(insn); + break; + case INAT_IMM_VWORD32: + __get_immv32(insn); + break; + case INAT_IMM_VWORD: + __get_immv(insn); + break; + default: + break; + } + if (inat_has_second_immediate(insn->attr)) { + insn->immediate2.value = get_next(char, insn); + insn->immediate2.nbytes = 1; + } +done: + insn->immediate.got = 1; +} + +/** + * insn_get_length() - Get the length of instruction + * @insn: &struct insn containing instruction + * + * If necessary, first collects the instruction up to and including the + * immediates bytes. + */ +void insn_get_length(struct insn *insn) +{ + if (insn->length) + return; + if (!insn->immediate.got) + insn_get_immediate(insn); + insn->length = (unsigned char)((unsigned long)insn->next_byte + - (unsigned long)insn->kaddr); +} diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt new file mode 100644 index 0000000..701c467 --- /dev/null +++ b/arch/x86/lib/x86-opcode-map.txt @@ -0,0 +1,841 @@ +# x86 Opcode Maps +# +#<Opcode maps> +# Table: table-name +# Referrer: escaped-name +# opcode: mnemonic|GrpXXX [operand1[,operand2...]] [(extra1)[,(extra2)...] [| 2nd-mnemonic ...] +# (or) +# opcode: escape # escaped-name +# EndTable +# +#<group maps> +# GrpTable: GrpXXX +# reg: mnemonic [operand1[,operand2...]] [(extra1)[,(extra2)...] [| 2nd-mnemonic ...] +# EndTable +# + +Table: one byte opcode +Referrer: +# 0x00 - 0x0f +00: ADD Eb,Gb +01: ADD Ev,Gv +02: ADD Gb,Eb +03: ADD Gv,Ev +04: ADD AL,Ib +05: ADD rAX,Iz +06: PUSH ES (i64) +07: POP ES (i64) +08: OR Eb,Gb +09: OR Ev,Gv +0a: OR Gb,Eb +0b: OR Gv,Ev +0c: OR AL,Ib +0d: OR rAX,Iz +0e: PUSH CS (i64) +0f: escape # 2-byte escape +# 0x10 - 0x1f +10: ADC Eb,Gb +11: ADC Ev,Gv +12: ADC Gb,Eb +13: ADC Gv,Ev +14: ADC AL,Ib +15: ADC rAX,Iz +16: PUSH SS (i64) +17: POP SS (i64) +18: SBB Eb,Gb +19: SBB Ev,Gv +1a: SBB Gb,Eb +1b: SBB Gv,Ev +1c: SBB AL,Ib +1d: SBB rAX,Iz +1e: PUSH DS (i64) +1f: POP DS (i64) +# 0x20 - 0x2f +20: AND Eb,Gb +21: AND Ev,Gv +22: AND Gb,Eb +23: AND Gv,Ev +24: AND AL,Ib +25: AND rAx,Iz +26: SEG=ES (Prefix) +27: DAA (i64) +28: SUB Eb,Gb +29: SUB Ev,Gv +2a: SUB Gb,Eb +2b: SUB Gv,Ev +2c: SUB AL,Ib +2d: SUB rAX,Iz +2e: SEG=CS (Prefix) +2f: DAS (i64) +# 0x30 - 0x3f +30: XOR Eb,Gb +31: XOR Ev,Gv +32: XOR Gb,Eb +33: XOR Gv,Ev +34: XOR AL,Ib +35: XOR rAX,Iz +36: SEG=SS (Prefix) +37: AAA (i64) +38: CMP Eb,Gb +39: CMP Ev,Gv +3a: CMP Gb,Eb +3b: CMP Gv,Ev +3c: CMP AL,Ib +3d: CMP rAX,Iz +3e: SEG=DS (Prefix) +3f: AAS (i64) +# 0x40 - 0x4f +40: INC eAX (i64) | REX (o64) +41: INC eCX (i64) | REX.B (o64) +42: INC eDX (i64) | REX.X (o64) +43: INC eBX (i64) | REX.XB (o64) +44: INC eSP (i64) | REX.R (o64) +45: INC eBP (i64) | REX.RB (o64) +46: INC eSI (i64) | REX.RX (o64) +47: INC eDI (i64) | REX.RXB (o64) +48: DEC eAX (i64) | REX.W (o64) +49: DEC eCX (i64) | REX.WB (o64) +4a: DEC eDX (i64) | REX.WX (o64) +4b: DEC eBX (i64) | REX.WXB (o64) +4c: DEC eSP (i64) | REX.WR (o64) +4d: DEC eBP (i64) | REX.WRB (o64) +4e: DEC eSI (i64) | REX.WRX (o64) +4f: DEC eDI (i64) | REX.WRXB (o64) +# 0x50 - 0x5f +50: PUSH rAX/r8 (d64) +51: PUSH rCX/r9 (d64) +52: PUSH rDX/r10 (d64) +53: PUSH rBX/r11 (d64) +54: PUSH rSP/r12 (d64) +55: PUSH rBP/r13 (d64) +56: PUSH rSI/r14 (d64) +57: PUSH rDI/r15 (d64) +58: POP rAX/r8 (d64) +59: POP rCX/r9 (d64) +5a: POP rDX/r10 (d64) +5b: POP rBX/r11 (d64) +5c: POP rSP/r12 (d64) +5d: POP rBP/r13 (d64) +5e: POP rSI/r14 (d64) +5f: POP rDI/r15 (d64) +# 0x60 - 0x6f +60: PUSHA/PUSHAD (i64) +61: POPA/POPAD (i64) +62: BOUND Gv,Ma (i64) +63: ARPL Ew,Gw (i64) | MOVSXD Gv,Ev (o64) +64: SEG=FS (Prefix) +65: SEG=GS (Prefix) +66: Operand-Size (Prefix) +67: Address-Size (Prefix) +68: PUSH Iz (d64) +69: IMUL Gv,Ev,Iz +6a: PUSH Ib (d64) +6b: IMUL Gv,Ev,Ib +6c: INS/INSB Yb,DX +6d: INS/INSW/INSD Yz,DX +6e: OUTS/OUTSB DX,Xb +6f: OUTS/OUTSW/OUTSD DX,Xz +# 0x70 - 0x7f +70: JO Jb +71: JNO Jb +72: JB/JNAE/JC Jb +73: JNB/JAE/JNC Jb +74: JZ/JE Jb +75: JNZ/JNE Jb +76: JBE/JNA Jb +77: JNBE/JA Jb +78: JS Jb +79: JNS Jb +7a: JP/JPE Jb +7b: JNP/JPO Jb +7c: JL/JNGE Jb +7d: JNL/JGE Jb +7e: JLE/JNG Jb +7f: JNLE/JG Jb +# 0x80 - 0x8f +80: Grp1 Eb,Ib (1A) +81: Grp1 Ev,Iz (1A) +82: Grp1 Eb,Ib (1A),(i64) +83: Grp1 Ev,Ib (1A) +84: TEST Eb,Gb +85: TEST Ev,Gv +86: XCHG Eb,Gb +87: XCHG Ev,Gv +88: MOV Eb,Gb +89: MOV Ev,Gv +8a: MOV Gb,Eb +8b: MOV Gv,Ev +8c: MOV Ev,Sw +8d: LEA Gv,M +8e: MOV Sw,Ew +8f: Grp1A (1A) | POP Ev (d64) +# 0x90 - 0x9f +90: NOP | PAUSE (F3) | XCHG r8,rAX +91: XCHG rCX/r9,rAX +92: XCHG rDX/r10,rAX +93: XCHG rBX/r11,rAX +94: XCHG rSP/r12,rAX +95: XCHG rBP/r13,rAX +96: XCHG rSI/r14,rAX +97: XCHG rDI/r15,rAX +98: CBW/CWDE/CDQE +99: CWD/CDQ/CQO +9a: CALLF Ap (i64) +9b: FWAIT/WAIT +9c: PUSHF/D/Q Fv (d64) +9d: POPF/D/Q Fv (d64) +9e: SAHF +9f: LAHF +# 0xa0 - 0xaf +a0: MOV AL,Ob +a1: MOV rAX,Ov +a2: MOV Ob,AL +a3: MOV Ov,rAX +a4: MOVS/B Xb,Yb +a5: MOVS/W/D/Q Xv,Yv +a6: CMPS/B Xb,Yb +a7: CMPS/W/D Xv,Yv +a8: TEST AL,Ib +a9: TEST rAX,Iz +aa: STOS/B Yb,AL +ab: STOS/W/D/Q Yv,rAX +ac: LODS/B AL,Xb +ad: LODS/W/D/Q rAX,Xv +ae: SCAS/B AL,Yb +af: SCAS/W/D/Q rAX,Xv +# 0xb0 - 0xbf +b0: MOV AL/R8L,Ib +b1: MOV CL/R9L,Ib +b2: MOV DL/R10L,Ib +b3: MOV BL/R11L,Ib +b4: MOV AH/R12L,Ib +b5: MOV CH/R13L,Ib +b6: MOV DH/R14L,Ib +b7: MOV BH/R15L,Ib +b8: MOV rAX/r8,Iv +b9: MOV rCX/r9,Iv +ba: MOV rDX/r10,Iv +bb: MOV rBX/r11,Iv +bc: MOV rSP/r12,Iv +bd: MOV rBP/r13,Iv +be: MOV rSI/r14,Iv +bf: MOV rDI/r15,Iv +# 0xc0 - 0xcf +c0: Grp2 Eb,Ib (1A) +c1: Grp2 Ev,Ib (1A) +c2: RETN Iw (f64) +c3: RETN +c4: LES Gz,Mp (i64) +c5: LDS Gz,Mp (i64) +c6: Grp11 Eb,Ib (1A) +c7: Grp11 Ev,Iz (1A) +c8: ENTER Iw,Ib +c9: LEAVE (d64) +ca: RETF Iw +cb: RETF +cc: INT3 +cd: INT Ib +ce: INTO (i64) +cf: IRET/D/Q +# 0xd0 - 0xdf +d0: Grp2 Eb,1 (1A) +d1: Grp2 Ev,1 (1A) +d2: Grp2 Eb,CL (1A) +d3: Grp2 Ev,CL (1A) +d4: AAM Ib (i64) +d5: AAD Ib (i64) +d6: +d7: XLAT/XLATB +d8: ESC +d9: ESC +da: ESC +db: ESC +dc: ESC +dd: ESC +de: ESC +df: ESC +# 0xe0 - 0xef +e0: LOOPNE/LOOPNZ Jb (f64) +e1: LOOPE/LOOPZ Jb (f64) +e2: LOOP Jb (f64) +e3: JrCXZ Jb (f64) +e4: IN AL,Ib +e5: IN eAX,Ib +e6: OUT Ib,AL +e7: OUT Ib,eAX +e8: CALL Jz (f64) +e9: JMP-near Jz (f64) +ea: JMP-far Ap (i64) +eb: JMP-short Jb (f64) +ec: IN AL,DX +ed: IN eAX,DX +ee: OUT DX,AL +ef: OUT DX,eAX +# 0xf0 - 0xff +f0: LOCK (Prefix) +f1: +f2: REPNE (Prefix) +f3: REP/REPE (Prefix) +f4: HLT +f5: CMC +f6: Grp3_1 Eb (1A) +f7: Grp3_2 Ev (1A) +f8: CLC +f9: STC +fa: CLI +fb: STI +fc: CLD +fd: STD +fe: Grp4 (1A) +ff: Grp5 (1A) +EndTable + +Table: 2-byte opcode # First Byte is 0x0f +Referrer: 2-byte escape +# 0x0f 0x00-0x0f +00: Grp6 (1A) +01: Grp7 (1A) +02: LAR Gv,Ew +03: LSL Gv,Ew +04: +05: SYSCALL (o64) +06: CLTS +07: SYSRET (o64) +08: INVD +09: WBINVD +0a: +0b: UD2 (1B) +0c: +0d: NOP Ev | GrpP +0e: FEMMS +# 3DNow! uses the last imm byte as opcode extension. +0f: 3DNow! Pq,Qq,Ib +# 0x0f 0x10-0x1f +10: movups Vps,Wps | movss Vss,Wss (F3) | movupd Vpd,Wpd (66) | movsd Vsd,Wsd (F2) +11: movups Wps,Vps | movss Wss,Vss (F3) | movupd Wpd,Vpd (66) | movsd Wsd,Vsd (F2) +12: movlps Vq,Mq | movlpd Vq,Mq (66) | movhlps Vq,Uq | movddup Vq,Wq (F2) | movsldup Vq,Wq (F3) +13: mpvlps Mq,Vq | movlpd Mq,Vq (66) +14: unpcklps Vps,Wq | unpcklpd Vpd,Wq (66) +15: unpckhps Vps,Wq | unpckhpd Vpd,Wq (66) +16: movhps Vq,Mq | movhpd Vq,Mq (66) | movlsps Vq,Uq | movshdup Vq,Wq (F3) +17: movhps Mq,Vq | movhpd Mq,Vq (66) +18: Grp16 (1A) +19: +1a: +1b: +1c: +1d: +1e: +1f: NOP Ev +# 0x0f 0x20-0x2f +20: MOV Rd,Cd +21: MOV Rd,Dd +22: MOV Cd,Rd +23: MOV Dd,Rd +24: +25: +26: +27: +28: movaps Vps,Wps | movapd Vpd,Wpd (66) +29: movaps Wps,Vps | movapd Wpd,Vpd (66) +2a: cvtpi2ps Vps,Qpi | cvtsi2ss Vss,Ed/q (F3) | cvtpi2pd Vpd,Qpi (66) | cvtsi2sd Vsd,Ed/q (F2) +2b: movntps Mps,Vps | movntpd Mpd,Vpd (66) +2c: cvttps2pi Ppi,Wps | cvttss2si Gd/q,Wss (F3) | cvttpd2pi Ppi,Wpd (66) | cvttsd2si Gd/q,Wsd (F2) +2d: cvtps2pi Ppi,Wps | cvtss2si Gd/q,Wss (F3) | cvtpd2pi Qpi,Wpd (66) | cvtsd2si Gd/q,Wsd (F2) +2e: ucomiss Vss,Wss | ucomisd Vsd,Wsd (66) +2f: comiss Vss,Wss | comisd Vsd,Wsd (66) +# 0x0f 0x30-0x3f +30: WRMSR +31: RDTSC +32: RDMSR +33: RDPMC +34: SYSENTER +35: SYSEXIT +36: +37: GETSEC +38: escape # 3-byte escape 1 +39: +3a: escape # 3-byte escape 2 +3b: +3c: +3d: +3e: +3f: +# 0x0f 0x40-0x4f +40: CMOVO Gv,Ev +41: CMOVNO Gv,Ev +42: CMOVB/C/NAE Gv,Ev +43: CMOVAE/NB/NC Gv,Ev +44: CMOVE/Z Gv,Ev +45: CMOVNE/NZ Gv,Ev +46: CMOVBE/NA Gv,Ev +47: CMOVA/NBE Gv,Ev +48: CMOVS Gv,Ev +49: CMOVNS Gv,Ev +4a: CMOVP/PE Gv,Ev +4b: CMOVNP/PO Gv,Ev +4c: CMOVL/NGE Gv,Ev +4d: CMOVNL/GE Gv,Ev +4e: CMOVLE/NG Gv,Ev +4f: CMOVNLE/G Gv,Ev +# 0x0f 0x50-0x5f +50: movmskps Gd/q,Ups | movmskpd Gd/q,Upd (66) +51: sqrtps Vps,Wps | sqrtss Vss,Wss (F3) | sqrtpd Vpd,Wpd (66) | sqrtsd Vsd,Wsd (F2) +52: rsqrtps Vps,Wps | rsqrtss Vss,Wss (F3) +53: rcpps Vps,Wps | rcpss Vss,Wss (F3) +54: andps Vps,Wps | andpd Vpd,Wpd (66) +55: andnps Vps,Wps | andnpd Vpd,Wpd (66) +56: orps Vps,Wps | orpd Vpd,Wpd (66) +57: xorps Vps,Wps | xorpd Vpd,Wpd (66) +58: addps Vps,Wps | addss Vss,Wss (F3) | addpd Vpd,Wpd (66) | addsd Vsd,Wsd (F2) +59: mulps Vps,Wps | mulss Vss,Wss (F3) | mulpd Vpd,Wpd (66) | mulsd Vsd,Wsd (F2) +5a: cvtps2pd Vpd,Wps | cvtss2sd Vsd,Wss (F3) | cvtpd2ps Vps,Wpd (66) | cvtsd2ss Vsd,Wsd (F2) +5b: cvtdq2ps Vps,Wdq | cvtps2dq Vdq,Wps (66) | cvttps2dq Vdq,Wps (F3) +5c: subps Vps,Wps | subss Vss,Wss (F3) | subpd Vpd,Wpd (66) | subsd Vsd,Wsd (F2) +5d: minps Vps,Wps | minss Vss,Wss (F3) | minpd Vpd,Wpd (66) | minsd Vsd,Wsd (F2) +5e: divps Vps,Wps | divss Vss,Wss (F3) | divpd Vpd,Wpd (66) | divsd Vsd,Wsd (F2) +5f: maxps Vps,Wps | maxss Vss,Wss (F3) | maxpd Vpd,Wpd (66) | maxsd Vsd,Wsd (F2) +# 0x0f 0x60-0x6f +60: punpcklbw Pq,Qd | punpcklbw Vdq,Wdq (66) +61: punpcklwd Pq,Qd | punpcklwd Vdq,Wdq (66) +62: punpckldq Pq,Qd | punpckldq Vdq,Wdq (66) +63: packsswb Pq,Qq | packsswb Vdq,Wdq (66) +64: pcmpgtb Pq,Qq | pcmpgtb Vdq,Wdq (66) +65: pcmpgtw Pq,Qq | pcmpgtw(66) Vdq,Wdq +66: pcmpgtd Pq,Qq | pcmpgtd Vdq,Wdq (66) +67: packuswb Pq,Qq | packuswb(66) Vdq,Wdq +68: punpckhbw Pq,Qd | punpckhbw Vdq,Wdq (66) +69: punpckhwd Pq,Qd | punpckhwd Vdq,Wdq (66) +6a: punpckhdq Pq,Qd | punpckhdq Vdq,Wdq (66) +6b: packssdw Pq,Qd | packssdw Vdq,Wdq (66) +6c: punpcklqdq Vdq,Wdq (66) +6d: punpckhqdq Vdq,Wdq (66) +6e: movd/q/ Pd,Ed/q | movd/q Vdq,Ed/q (66) +6f: movq Pq,Qq | movdqa Vdq,Wdq (66) | movdqu Vdq,Wdq (F3) +# 0x0f 0x70-0x7f +70: pshufw Pq,Qq,Ib | pshufd Vdq,Wdq,Ib (66) | pshufhw Vdq,Wdq,Ib (F3) | pshuflw VdqWdq,Ib (F2) +71: Grp12 (1A) +72: Grp13 (1A) +73: Grp14 (1A) +74: pcmpeqb Pq,Qq | pcmpeqb Vdq,Wdq (66) +75: pcmpeqw Pq,Qq | pcmpeqw Vdq,Wdq (66) +76: pcmpeqd Pq,Qq | pcmpeqd Vdq,Wdq (66) +77: emms +78: VMREAD Ed/q,Gd/q +79: VMWRITE Gd/q,Ed/q +7a: +7b: +7c: haddps(F2) Vps,Wps | haddpd(66) Vpd,Wpd +7d: hsubps(F2) Vps,Wps | hsubpd(66) Vpd,Wpd +7e: movd/q Ed/q,Pd | movd/q Ed/q,Vdq (66) | movq Vq,Wq (F3) +7f: movq Qq,Pq | movdqa Wdq,Vdq (66) | movdqu Wdq,Vdq (F3) +# 0x0f 0x80-0x8f +80: JO Jz (f64) +81: JNO Jz (f64) +82: JB/JNAE/JC Jz (f64) +83: JNB/JAE/JNC Jz (f64) +84: JZ/JE Jz (f64) +85: JNZ/JNE Jz (f64) +86: JBE/JNA Jz (f64) +87: JNBE/JA Jz (f64) +88: JS Jz (f64) +89: JNS Jz (f64) +8a: JP/JPE Jz (f64) +8b: JNP/JPO Jz (f64) +8c: JL/JNGE Jz (f64) +8d: JNL/JGE Jz (f64) +8e: JLE/JNG Jz (f64) +8f: JNLE/JG Jz (f64) +# 0x0f 0x90-0x9f +90: SETO Eb +91: SETNO Eb +92: SETB/C/NAE Eb +93: SETAE/NB/NC Eb +94: SETE/Z Eb +95: SETNE/NZ Eb +96: SETBE/NA Eb +97: SETA/NBE Eb +98: SETS Eb +99: SETNS Eb +9a: SETP/PE Eb +9b: SETNP/PO Eb +9c: SETL/NGE Eb +9d: SETNL/GE Eb +9e: SETLE/NG Eb +9f: SETNLE/G Eb +# 0x0f 0xa0-0xaf +a0: PUSH FS (d64) +a1: POP FS (d64) +a2: CPUID +a3: BT Ev,Gv +a4: SHLD Ev,Gv,Ib +a5: SHLD Ev,Gv,CL +a6: GrpPDLK +a7: GrpRNG +a8: PUSH GS (d64) +a9: POP GS (d64) +aa: RSM +ab: BTS Ev,Gv +ac: SHRD Ev,Gv,Ib +ad: SHRD Ev,Gv,CL +ae: Grp15 (1A),(1C) +af: IMUL Gv,Ev +# 0x0f 0xb0-0xbf +b0: CMPXCHG Eb,Gb +b1: CMPXCHG Ev,Gv +b2: LSS Gv,Mp +b3: BTR Ev,Gv +b4: LFS Gv,Mp +b5: LGS Gv,Mp +b6: MOVZX Gv,Eb +b7: MOVZX Gv,Ew +b8: JMPE | POPCNT Gv,Ev (F3) +b9: Grp10 (1A) +ba: Grp8 Ev,Ib (1A) +bb: BTC Ev,Gv +bc: BSF Gv,Ev +bd: BSR Gv,Ev +be: MOVSX Gv,Eb +bf: MOVSX Gv,Ew +# 0x0f 0xc0-0xcf +c0: XADD Eb,Gb +c1: XADD Ev,Gv +c2: cmpps Vps,Wps,Ib | cmpss Vss,Wss,Ib (F3) | cmppd Vpd,Wpd,Ib (66) | cmpsd Vsd,Wsd,Ib (F2) +c3: movnti Md/q,Gd/q +c4: pinsrw Pq,Rd/q/Mw,Ib | pinsrw Vdq,Rd/q/Mw,Ib (66) +c5: pextrw Gd,Nq,Ib | pextrw Gd,Udq,Ib (66) +c6: shufps Vps,Wps,Ib | shufpd Vpd,Wpd,Ib (66) +c7: Grp9 (1A) +c8: BSWAP RAX/EAX/R8/R8D +c9: BSWAP RCX/ECX/R9/R9D +ca: BSWAP RDX/EDX/R10/R10D +cb: BSWAP RBX/EBX/R11/R11D +cc: BSWAP RSP/ESP/R12/R12D +cd: BSWAP RBP/EBP/R13/R13D +ce: BSWAP RSI/ESI/R14/R14D +cf: BSWAP RDI/EDI/R15/R15D +# 0x0f 0xd0-0xdf +d0: addsubps Vps,Wps (F2) | addsubpd Vpd,Wpd (66) +d1: psrlw Pq,Qq | psrlw Vdq,Wdq (66) +d2: psrld Pq,Qq | psrld Vdq,Wdq (66) +d3: psrlq Pq,Qq | psrlq Vdq,Wdq (66) +d4: paddq Pq,Qq | paddq Vdq,Wdq (66) +d5: pmullw Pq,Qq | pmullw Vdq,Wdq (66) +d6: movq Wq,Vq (66) | movq2dq Vdq,Nq (F3) | movdq2q Pq,Uq (F2) +d7: pmovmskb Gd,Nq | pmovmskb Gd,Udq (66) +d8: psubusb Pq,Qq | psubusb Vdq,Wdq (66) +d9: psubusw Pq,Qq | psubusw Vdq,Wdq (66) +da: pminub Pq,Qq | pminub Vdq,Wdq (66) +db: pand Pq,Qq | pand Vdq,Wdq (66) +dc: paddusb Pq,Qq | paddusb Vdq,Wdq (66) +dd: paddusw Pq,Qq | paddusw Vdq,Wdq (66) +de: pmaxub Pq,Qq | pmaxub Vdq,Wdq (66) +df: pandn Pq,Qq | pandn Vdq,Wdq (66) +# 0x0f 0xe0-0xef +e0: pavgb Pq,Qq | pavgb Vdq,Wdq (66) +e1: psraw Pq,Qq | psraw Vdq,Wdq (66) +e2: psrad Pq,Qq | psrad Vdq,Wdq (66) +e3: pavgw Pq,Qq | pavgw Vdq,Wdq (66) +e4: pmulhuw Pq,Qq | pmulhuw Vdq,Wdq (66) +e5: pmulhw Pq,Qq | pmulhw Vdq,Wdq (66) +e6: cvtpd2dq Vdq,Wpd (F2) | cvttpd2dq Vdq,Wpd (66) | cvtdq2pd Vpd,Wdq (F3) +e7: movntq Mq,Pq | movntdq Mdq,Vdq (66) +e8: psubsb Pq,Qq | psubsb Vdq,Wdq (66) +e9: psubsw Pq,Qq | psubsw Vdq,Wdq (66) +ea: pminsw Pq,Qq | pminsw Vdq,Wdq (66) +eb: por Pq,Qq | por Vdq,Wdq (66) +ec: paddsb Pq,Qq | paddsb Vdq,Wdq (66) +ed: paddsw Pq,Qq | paddsw Vdq,Wdq (66) +ee: pmaxsw Pq,Qq | pmaxsw Vdq,Wdq (66) +ef: pxor Pq,Qq | pxor Vdq,Wdq (66) +# 0x0f 0xf0-0xff +f0: lddqu Vdq,Mdq (F2) +f1: psllw Pq,Qq | psllw Vdq,Wdq (66) +f2: pslld Pq,Qq | pslld Vdq,Wdq (66) +f3: psllq Pq,Qq | psllq Vdq,Wdq (66) +f4: pmuludq Pq,Qq | pmuludq Vdq,Wdq (66) +f5: pmaddwd Pq,Qq | pmaddwd Vdq,Wdq (66) +f6: psadbw Pq,Qq | psadbw Vdq,Wdq (66) +f7: maskmovq Pq,Nq | maskmovdqu Vdq,Udq (66) +f8: psubb Pq,Qq | psubb Vdq,Wdq (66) +f9: psubw Pq,Qq | psubw Vdq,Wdq (66) +fa: psubd Pq,Qq | psubd Vdq,Wdq (66) +fb: psubq Pq,Qq | psubq Vdq,Wdq (66) +fc: paddb Pq,Qq | paddb Vdq,Wdq (66) +fd: paddw Pq,Qq | paddw Vdq,Wdq (66) +fe: paddd Pq,Qq | paddd Vdq,Wdq (66) +ff: +EndTable + +Table: 3-byte opcode 1 (0x0f 0x38) +Referrer: 3-byte escape 1 +# 0x0f 0x38 0x00-0x0f +00: pshufb Pq,Qq | pshufb Vdq,Wdq (66) +01: phaddw Pq,Qq | phaddw Vdq,Wdq (66) +02: phaddd Pq,Qq | phaddd Vdq,Wdq (66) +03: phaddsw Pq,Qq | phaddsw Vdq,Wdq (66) +04: pmaddubsw Pq,Qq | pmaddubsw (66)Vdq,Wdq +05: phsubw Pq,Qq | phsubw Vdq,Wdq (66) +06: phsubd Pq,Qq | phsubd Vdq,Wdq (66) +07: phsubsw Pq,Qq | phsubsw Vdq,Wdq (66) +08: psignb Pq,Qq | psignb Vdq,Wdq (66) +09: psignw Pq,Qq | psignw Vdq,Wdq (66) +0a: psignd Pq,Qq | psignd Vdq,Wdq (66) +0b: pmulhrsw Pq,Qq | pmulhrsw Vdq,Wdq (66) +0c: +0d: +0e: +0f: +# 0x0f 0x38 0x10-0x1f +10: pblendvb Vdq,Wdq (66) +11: +12: +13: +14: blendvps Vdq,Wdq (66) +15: blendvpd Vdq,Wdq (66) +16: +17: ptest Vdq,Wdq (66) +18: +19: +1a: +1b: +1c: pabsb Pq,Qq | pabsb Vdq,Wdq (66) +1d: pabsw Pq,Qq | pabsw Vdq,Wdq (66) +1e: pabsd Pq,Qq | pabsd Vdq,Wdq (66) +1f: +# 0x0f 0x38 0x20-0x2f +20: pmovsxbw Vdq,Udq/Mq (66) +21: pmovsxbd Vdq,Udq/Md (66) +22: pmovsxbq Vdq,Udq/Mw (66) +23: pmovsxwd Vdq,Udq/Mq (66) +24: pmovsxwq Vdq,Udq/Md (66) +25: pmovsxdq Vdq,Udq/Mq (66) +26: +27: +28: pmuldq Vdq,Wdq (66) +29: pcmpeqq Vdq,Wdq (66) +2a: movntdqa Vdq,Mdq (66) +2b: packusdw Vdq,Wdq (66) +2c: +2d: +2e: +2f: +# 0x0f 0x38 0x30-0x3f +30: pmovzxbw Vdq,Udq/Mq (66) +31: pmovzxbd Vdq,Udq/Md (66) +32: pmovzxbq Vdq,Udq/Mw (66) +33: pmovzxwd Vdq,Udq/Mq (66) +34: pmovzxwq Vdq,Udq/Md (66) +35: pmovzxdq Vdq,Udq/Mq (66) +36: +37: pcmpgtq Vdq,Wdq (66) +38: pminsb Vdq,Wdq (66) +39: pminsd Vdq,Wdq (66) +3a: pminuw Vdq,Wdq (66) +3b: pminud Vdq,Wdq (66) +3c: pmaxsb Vdq,Wdq (66) +3d: pmaxsd Vdq,Wdq (66) +3e: pmaxuw Vdq,Wdq (66) +3f: pmaxud Vdq,Wdq (66) +# 0x0f 0x38 0x4f-0xff +40: pmulld Vdq,Wdq (66) +41: phminposuw Vdq,Wdq (66) +80: INVEPT Gd/q,Mdq (66) +81: INVPID Gd/q,Mdq (66) +db: aesimc Vdq,Wdq (66) +dc: aesenc Vdq,Wdq (66) +dd: aesenclast Vdq,Wdq (66) +de: aesdec Vdq,Wdq (66) +df: aesdeclast Vdq,Wdq (66) +f0: MOVBE Gv,Mv | CRC32 Gd,Eb (F2) +f1: MOVBE Mv,Gv | CRC32 Gd,Ev (F2) +EndTable + +Table: 3-byte opcode 2 (0x0f 0x3a) +Referrer: 3-byte escape 2 +# 0x0f 0x3a 0x00-0xff +08: roundps Vdq,Wdq,Ib (66) +09: roundpd Vdq,Wdq,Ib (66) +0a: roundss Vss,Wss,Ib (66) +0b: roundsd Vsd,Wsd,Ib (66) +0c: blendps Vdq,Wdq,Ib (66) +0d: blendpd Vdq,Wdq,Ib (66) +0e: pblendw Vdq,Wdq,Ib (66) +0f: palignr Pq,Qq,Ib | palignr Vdq,Wdq,Ib (66) +14: pextrb Rd/Mb,Vdq,Ib (66) +15: pextrw Rd/Mw,Vdq,Ib (66) +16: pextrd/pextrq Ed/q,Vdq,Ib (66) +17: extractps Ed,Vdq,Ib (66) +20: pinsrb Vdq,Rd/q/Mb,Ib (66) +21: insertps Vdq,Udq/Md,Ib (66) +22: pinsrd/pinsrq Vdq,Ed/q,Ib (66) +40: dpps Vdq,Wdq,Ib (66) +41: dppd Vdq,Wdq,Ib (66) +42: mpsadbw Vdq,Wdq,Ib (66) +60: pcmpestrm Vdq,Wdq,Ib (66) +61: pcmpestri Vdq,Wdq,Ib (66) +62: pcmpistrm Vdq,Wdq,Ib (66) +63: pcmpistri Vdq,Wdq,Ib (66) +df: aeskeygenassist Vdq,Wdq,Ib (66) +EndTable + +GrpTable: Grp1 +0: ADD +1: OR +2: ADC +3: SBB +4: AND +5: SUB +6: XOR +7: CMP +EndTable + +GrpTable: Grp1A +0: POP +EndTable + +GrpTable: Grp2 +0: ROL +1: ROR +2: RCL +3: RCR +4: SHL/SAL +5: SHR +6: +7: SAR +EndTable + +GrpTable: Grp3_1 +0: TEST Eb,Ib +1: +2: NOT Eb +3: NEG Eb +4: MUL AL,Eb +5: IMUL AL,Eb +6: DIV AL,Eb +7: IDIV AL,Eb +EndTable + +GrpTable: Grp3_2 +0: TEST Ev,Iz +1: +2: NOT Ev +3: NEG Ev +4: MUL rAX,Ev +5: IMUL rAX,Ev +6: DIV rAX,Ev +7: IDIV rAX,Ev +EndTable + +GrpTable: Grp4 +0: INC Eb +1: DEC Eb +EndTable + +GrpTable: Grp5 +0: INC Ev +1: DEC Ev +2: CALLN Ev (f64) +3: CALLF Ep +4: JMPN Ev (f64) +5: JMPF Ep +6: PUSH Ev (d64) +7: +EndTable + +GrpTable: Grp6 +0: SLDT Rv/Mw +1: STR Rv/Mw +2: LLDT Ew +3: LTR Ew +4: VERR Ew +5: VERW Ew +EndTable + +GrpTable: Grp7 +0: SGDT Ms | VMCALL (001),(11B) | VMLAUNCH (010),(11B) | VMRESUME (011),(11B) | VMXOFF (100),(11B) +1: SIDT Ms | MONITOR (000),(11B) | MWAIT (001) +2: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) +3: LIDT Ms +4: SMSW Mw/Rv +5: +6: LMSW Ew +7: INVLPG Mb | SWAPGS (o64),(000),(11B) | RDTSCP (001),(11B) +EndTable + +GrpTable: Grp8 +4: BT +5: BTS +6: BTR +7: BTC +EndTable + +GrpTable: Grp9 +1: CMPXCHG8B/16B Mq/Mdq +6: VMPTRLD Mq | VMCLEAR Mq (66) | VMXON Mq (F3) +7: VMPTRST Mq +EndTable + +GrpTable: Grp10 +EndTable + +GrpTable: Grp11 +0: MOV +EndTable + +GrpTable: Grp12 +2: psrlw Nq,Ib (11B) | psrlw Udq,Ib (66),(11B) +4: psraw Nq,Ib (11B) | psraw Udq,Ib (66),(11B) +6: psllw Nq,Ib (11B) | psllw Udq,Ib (66),(11B) +EndTable + +GrpTable: Grp13 +2: psrld Nq,Ib (11B) | psrld Udq,Ib (66),(11B) +4: psrad Nq,Ib (11B) | psrad Udq,Ib (66),(11B) +6: pslld Nq,Ib (11B) | pslld Udq,Ib (66),(11B) +EndTable + +GrpTable: Grp14 +2: psrlq Nq,Ib (11B) | psrlq Udq,Ib (66),(11B) +3: psrldq Udq,Ib (66),(11B) +6: psllq Nq,Ib (11B) | psllq Udq,Ib (66),(11B) +7: pslldq Udq,Ib (66),(11B) +EndTable + +GrpTable: Grp15 +0: fxsave +1: fxstor +2: ldmxcsr +3: stmxcsr +4: XSAVE +5: XRSTOR | lfence (11B) +6: mfence (11B) +7: clflush | sfence (11B) +EndTable + +GrpTable: Grp16 +0: prefetch NTA +1: prefetch T0 +2: prefetch T1 +3: prefetch T2 +EndTable + +# AMD's Prefetch Group +GrpTable: GrpP +0: PREFETCH +1: PREFETCHW +EndTable + +GrpTable: GrpPDLK +0: MONTMUL +1: XSHA1 +2: XSHA2 +EndTable + +GrpTable: GrpRNG +0: xstore-rng +1: xcrypt-ecb +2: xcrypt-cbc +4: xcrypt-cfb +5: xcrypt-ofb +EndTable diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index f4cee90..8f4e2ac 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -38,7 +38,8 @@ enum x86_pf_error_code { * Returns 0 if mmiotrace is disabled, or if the fault is not * handled by mmiotrace: */ -static inline int kmmio_fault(struct pt_regs *regs, unsigned long addr) +static inline int __kprobes +kmmio_fault(struct pt_regs *regs, unsigned long addr) { if (unlikely(is_kmmio_active())) if (kmmio_handler(regs, addr) == 1) @@ -46,7 +47,7 @@ static inline int kmmio_fault(struct pt_regs *regs, unsigned long addr) return 0; } -static inline int notify_page_fault(struct pt_regs *regs) +static inline int __kprobes notify_page_fault(struct pt_regs *regs) { int ret = 0; @@ -240,7 +241,7 @@ void vmalloc_sync_all(void) * * Handle a fault on the vmalloc or module mapping area */ -static noinline int vmalloc_fault(unsigned long address) +static noinline __kprobes int vmalloc_fault(unsigned long address) { unsigned long pgd_paddr; pmd_t *pmd_k; @@ -357,7 +358,7 @@ void vmalloc_sync_all(void) * * This assumes no large pages in there. */ -static noinline int vmalloc_fault(unsigned long address) +static noinline __kprobes int vmalloc_fault(unsigned long address) { pgd_t *pgd, *pgd_ref; pud_t *pud, *pud_ref; @@ -860,7 +861,7 @@ static int spurious_fault_check(unsigned long error_code, pte_t *pte) * There are no security implications to leaving a stale TLB when * increasing the permissions on a page. */ -static noinline int +static noinline __kprobes int spurious_fault(unsigned long error_code, unsigned long address) { pgd_t *pgd; diff --git a/arch/x86/tools/Makefile b/arch/x86/tools/Makefile new file mode 100644 index 0000000..5e295d9 --- /dev/null +++ b/arch/x86/tools/Makefile @@ -0,0 +1,15 @@ +PHONY += posttest +quiet_cmd_posttest = TEST $@ + cmd_posttest = $(OBJDUMP) -d -j .text $(objtree)/vmlinux | awk -f $(srctree)/arch/x86/tools/distill.awk | $(obj)/test_get_len $(CONFIG_64BIT) + +posttest: $(obj)/test_get_len vmlinux + $(call cmd,posttest) + +hostprogs-y := test_get_len + +# -I needed for generated C source and C source which in the kernel tree. +HOSTCFLAGS_test_get_len.o := -Wall -I$(objtree)/arch/x86/lib/ -I$(srctree)/arch/x86/include/ -I$(srctree)/arch/x86/lib/ -I$(srctree)/include/ + +# Dependencies are also needed. +$(obj)/test_get_len.o: $(srctree)/arch/x86/lib/insn.c $(srctree)/arch/x86/lib/inat.c $(srctree)/arch/x86/include/asm/inat_types.h $(srctree)/arch/x86/include/asm/inat.h $(srctree)/arch/x86/include/asm/insn.h $(objtree)/arch/x86/lib/inat-tables.c + diff --git a/arch/x86/tools/distill.awk b/arch/x86/tools/distill.awk new file mode 100644 index 0000000..d433619 --- /dev/null +++ b/arch/x86/tools/distill.awk @@ -0,0 +1,42 @@ +#!/bin/awk -f +# Usage: objdump -d a.out | awk -f distill.awk | ./test_get_len +# Distills the disassembly as follows: +# - Removes all lines except the disassembled instructions. +# - For instructions that exceed 1 line (7 bytes), crams all the hex bytes +# into a single line. +# - Remove bad(or prefix only) instructions + +BEGIN { + prev_addr = "" + prev_hex = "" + prev_mnemonic = "" + bad_expr = "(\\(bad\\)|^rex|^.byte|^rep(z|nz)$|^lock$|^es$|^cs$|^ss$|^ds$|^fs$|^gs$|^data(16|32)$|^addr(16|32|64))" + fwait_expr = "^9b " + fwait_str="9b\tfwait" +} + +/^ *[0-9a-f]+:/ { + if (split($0, field, "\t") < 3) { + # This is a continuation of the same insn. + prev_hex = prev_hex field[2] + } else { + # Skip bad instructions + if (match(prev_mnemonic, bad_expr)) + prev_addr = "" + # Split fwait from other f* instructions + if (match(prev_hex, fwait_expr) && prev_mnemonic != "fwait") { + printf "%s\t%s\n", prev_addr, fwait_str + sub(fwait_expr, "", prev_hex) + } + if (prev_addr != "") + printf "%s\t%s\t%s\n", prev_addr, prev_hex, prev_mnemonic + prev_addr = field[1] + prev_hex = field[2] + prev_mnemonic = field[3] + } +} + +END { + if (prev_addr != "") + printf "%s\t%s\t%s\n", prev_addr, prev_hex, prev_mnemonic +} diff --git a/arch/x86/tools/gen-insn-attr-x86.awk b/arch/x86/tools/gen-insn-attr-x86.awk new file mode 100644 index 0000000..19ba096 --- /dev/null +++ b/arch/x86/tools/gen-insn-attr-x86.awk @@ -0,0 +1,334 @@ +#!/bin/awk -f +# gen-insn-attr-x86.awk: Instruction attribute table generator +# Written by Masami Hiramatsu <mhiramat@redhat.com> +# +# Usage: awk -f gen-insn-attr-x86.awk x86-opcode-map.txt > inat-tables.c + +# Awk implementation sanity check +function check_awk_implement() { + if (!match("abc", "[[:lower:]]+")) + return "Your awk doesn't support charactor-class." + if (sprintf("%x", 0) != "0") + return "Your awk has a printf-format problem." + return "" +} + +BEGIN { + # Implementation error checking + awkchecked = check_awk_implement() + if (awkchecked != "") { + print "Error: " awkchecked > "/dev/stderr" + print "Please try to use gawk." > "/dev/stderr" + exit 1 + } + + # Setup generating tables + print "/* x86 opcode map generated from x86-opcode-map.txt */" + print "/* Do not change this code. */" + ggid = 1 + geid = 1 + + opnd_expr = "^[[:alpha:]]" + ext_expr = "^\\(" + sep_expr = "^\\|$" + group_expr = "^Grp[[:alnum:]]+" + + imm_expr = "^[IJAO][[:lower:]]" + imm_flag["Ib"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)" + imm_flag["Jb"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)" + imm_flag["Iw"] = "INAT_MAKE_IMM(INAT_IMM_WORD)" + imm_flag["Id"] = "INAT_MAKE_IMM(INAT_IMM_DWORD)" + imm_flag["Iq"] = "INAT_MAKE_IMM(INAT_IMM_QWORD)" + imm_flag["Ap"] = "INAT_MAKE_IMM(INAT_IMM_PTR)" + imm_flag["Iz"] = "INAT_MAKE_IMM(INAT_IMM_VWORD32)" + imm_flag["Jz"] = "INAT_MAKE_IMM(INAT_IMM_VWORD32)" + imm_flag["Iv"] = "INAT_MAKE_IMM(INAT_IMM_VWORD)" + imm_flag["Ob"] = "INAT_MOFFSET" + imm_flag["Ov"] = "INAT_MOFFSET" + + modrm_expr = "^([CDEGMNPQRSUVW][[:lower:]]+|NTA|T[012])" + force64_expr = "\\([df]64\\)" + rex_expr = "^REX(\\.[XRWB]+)*" + fpu_expr = "^ESC" # TODO + + lprefix1_expr = "\\(66\\)" + delete lptable1 + lprefix2_expr = "\\(F2\\)" + delete lptable2 + lprefix3_expr = "\\(F3\\)" + delete lptable3 + max_lprefix = 4 + + prefix_expr = "\\(Prefix\\)" + prefix_num["Operand-Size"] = "INAT_PFX_OPNDSZ" + prefix_num["REPNE"] = "INAT_PFX_REPNE" + prefix_num["REP/REPE"] = "INAT_PFX_REPE" + prefix_num["LOCK"] = "INAT_PFX_LOCK" + prefix_num["SEG=CS"] = "INAT_PFX_CS" + prefix_num["SEG=DS"] = "INAT_PFX_DS" + prefix_num["SEG=ES"] = "INAT_PFX_ES" + prefix_num["SEG=FS"] = "INAT_PFX_FS" + prefix_num["SEG=GS"] = "INAT_PFX_GS" + prefix_num["SEG=SS"] = "INAT_PFX_SS" + prefix_num["Address-Size"] = "INAT_PFX_ADDRSZ" + + delete table + delete etable + delete gtable + eid = -1 + gid = -1 +} + +function semantic_error(msg) { + print "Semantic error at " NR ": " msg > "/dev/stderr" + exit 1 +} + +function debug(msg) { + print "DEBUG: " msg +} + +function array_size(arr, i,c) { + c = 0 + for (i in arr) + c++ + return c +} + +/^Table:/ { + print "/* " $0 " */" +} + +/^Referrer:/ { + if (NF == 1) { + # primary opcode table + tname = "inat_primary_table" + eid = -1 + } else { + # escape opcode table + ref = "" + for (i = 2; i <= NF; i++) + ref = ref $i + eid = escape[ref] + tname = sprintf("inat_escape_table_%d", eid) + } +} + +/^GrpTable:/ { + print "/* " $0 " */" + if (!($2 in group)) + semantic_error("No group: " $2 ) + gid = group[$2] + tname = "inat_group_table_" gid +} + +function print_table(tbl,name,fmt,n) +{ + print "const insn_attr_t " name " = {" + for (i = 0; i < n; i++) { + id = sprintf(fmt, i) + if (tbl[id]) + print " [" id "] = " tbl[id] "," + } + print "};" +} + +/^EndTable/ { + if (gid != -1) { + # print group tables + if (array_size(table) != 0) { + print_table(table, tname "[INAT_GROUP_TABLE_SIZE]", + "0x%x", 8) + gtable[gid,0] = tname + } + if (array_size(lptable1) != 0) { + print_table(lptable1, tname "_1[INAT_GROUP_TABLE_SIZE]", + "0x%x", 8) + gtable[gid,1] = tname "_1" + } + if (array_size(lptable2) != 0) { + print_table(lptable2, tname "_2[INAT_GROUP_TABLE_SIZE]", + "0x%x", 8) + gtable[gid,2] = tname "_2" + } + if (array_size(lptable3) != 0) { + print_table(lptable3, tname "_3[INAT_GROUP_TABLE_SIZE]", + "0x%x", 8) + gtable[gid,3] = tname "_3" + } + } else { + # print primary/escaped tables + if (array_size(table) != 0) { + print_table(table, tname "[INAT_OPCODE_TABLE_SIZE]", + "0x%02x", 256) + etable[eid,0] = tname + } + if (array_size(lptable1) != 0) { + print_table(lptable1,tname "_1[INAT_OPCODE_TABLE_SIZE]", + "0x%02x", 256) + etable[eid,1] = tname "_1" + } + if (array_size(lptable2) != 0) { + print_table(lptable2,tname "_2[INAT_OPCODE_TABLE_SIZE]", + "0x%02x", 256) + etable[eid,2] = tname "_2" + } + if (array_size(lptable3) != 0) { + print_table(lptable3,tname "_3[INAT_OPCODE_TABLE_SIZE]", + "0x%02x", 256) + etable[eid,3] = tname "_3" + } + } + print "" + delete table + delete lptable1 + delete lptable2 + delete lptable3 + gid = -1 + eid = -1 +} + +function add_flags(old,new) { + if (old && new) + return old " | " new + else if (old) + return old + else + return new +} + +# convert operands to flags. +function convert_operands(opnd, i,imm,mod) +{ + imm = null + mod = null + for (i in opnd) { + i = opnd[i] + if (match(i, imm_expr) == 1) { + if (!imm_flag[i]) + semantic_error("Unknown imm opnd: " i) + if (imm) { + if (i != "Ib") + semantic_error("Second IMM error") + imm = add_flags(imm, "INAT_SCNDIMM") + } else + imm = imm_flag[i] + } else if (match(i, modrm_expr)) + mod = "INAT_MODRM" + } + return add_flags(imm, mod) +} + +/^[0-9a-f]+\:/ { + if (NR == 1) + next + # get index + idx = "0x" substr($1, 1, index($1,":") - 1) + if (idx in table) + semantic_error("Redefine " idx " in " tname) + + # check if escaped opcode + if ("escape" == $2) { + if ($3 != "#") + semantic_error("No escaped name") + ref = "" + for (i = 4; i <= NF; i++) + ref = ref $i + if (ref in escape) + semantic_error("Redefine escape (" ref ")") + escape[ref] = geid + geid++ + table[idx] = "INAT_MAKE_ESCAPE(" escape[ref] ")" + next + } + + variant = null + # converts + i = 2 + while (i <= NF) { + opcode = $(i++) + delete opnds + ext = null + flags = null + opnd = null + # parse one opcode + if (match($i, opnd_expr)) { + opnd = $i + split($(i++), opnds, ",") + flags = convert_operands(opnds) + } + if (match($i, ext_expr)) + ext = $(i++) + if (match($i, sep_expr)) + i++ + else if (i < NF) + semantic_error($i " is not a separator") + + # check if group opcode + if (match(opcode, group_expr)) { + if (!(opcode in group)) { + group[opcode] = ggid + ggid++ + } + flags = add_flags(flags, "INAT_MAKE_GROUP(" group[opcode] ")") + } + # check force(or default) 64bit + if (match(ext, force64_expr)) + flags = add_flags(flags, "INAT_FORCE64") + + # check REX prefix + if (match(opcode, rex_expr)) + flags = add_flags(flags, "INAT_REXPFX") + + # check coprocessor escape : TODO + if (match(opcode, fpu_expr)) + flags = add_flags(flags, "INAT_MODRM") + + # check prefixes + if (match(ext, prefix_expr)) { + if (!prefix_num[opcode]) + semantic_error("Unknown prefix: " opcode) + flags = add_flags(flags, "INAT_MAKE_PREFIX(" prefix_num[opcode] ")") + } + if (length(flags) == 0) + continue + # check if last prefix + if (match(ext, lprefix1_expr)) { + lptable1[idx] = add_flags(lptable1[idx],flags) + variant = "INAT_VARIANT" + } else if (match(ext, lprefix2_expr)) { + lptable2[idx] = add_flags(lptable2[idx],flags) + variant = "INAT_VARIANT" + } else if (match(ext, lprefix3_expr)) { + lptable3[idx] = add_flags(lptable3[idx],flags) + variant = "INAT_VARIANT" + } else { + table[idx] = add_flags(table[idx],flags) + } + } + if (variant) + table[idx] = add_flags(table[idx],variant) +} + +END { + if (awkchecked != "") + exit 1 + # print escape opcode map's array + print "/* Escape opcode map array */" + print "const insn_attr_t const *inat_escape_tables[INAT_ESC_MAX + 1]" \ + "[INAT_LPREFIX_MAX + 1] = {" + for (i = 0; i < geid; i++) + for (j = 0; j < max_lprefix; j++) + if (etable[i,j]) + print " ["i"]["j"] = "etable[i,j]"," + print "};\n" + # print group opcode map's array + print "/* Group opcode map array */" + print "const insn_attr_t const *inat_group_tables[INAT_GRP_MAX + 1]"\ + "[INAT_LPREFIX_MAX + 1] = {" + for (i = 0; i < ggid; i++) + for (j = 0; j < max_lprefix; j++) + if (gtable[i,j]) + print " ["i"]["j"] = "gtable[i,j]"," + print "};" +} diff --git a/arch/x86/tools/test_get_len.c b/arch/x86/tools/test_get_len.c new file mode 100644 index 0000000..376d338 --- /dev/null +++ b/arch/x86/tools/test_get_len.c @@ -0,0 +1,108 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2009 + */ + +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <assert.h> + +#define unlikely(cond) (cond) + +#include <asm/insn.h> +#include <inat.c> +#include <insn.c> + +/* + * Test of instruction analysis in general and insn_get_length() in + * particular. See if insn_get_length() and the disassembler agree + * on the length of each instruction in an elf disassembly. + * + * Usage: objdump -d a.out | awk -f distill.awk | ./test_get_len + */ + +const char *prog; + +static void usage(void) +{ + fprintf(stderr, "Usage: objdump -d a.out | awk -f distill.awk |" + " %s [y|n](64bit flag)\n", prog); + exit(1); +} + +static void malformed_line(const char *line, int line_nr) +{ + fprintf(stderr, "%s: malformed line %d:\n%s", prog, line_nr, line); + exit(3); +} + +#define BUFSIZE 256 + +int main(int argc, char **argv) +{ + char line[BUFSIZE]; + unsigned char insn_buf[16]; + struct insn insn; + int insns = 0; + int x86_64 = 0; + + prog = argv[0]; + if (argc > 2) + usage(); + + if (argc == 2 && argv[1][0] == 'y') + x86_64 = 1; + + while (fgets(line, BUFSIZE, stdin)) { + char copy[BUFSIZE], *s, *tab1, *tab2; + int nb = 0; + unsigned int b; + + insns++; + memset(insn_buf, 0, 16); + strcpy(copy, line); + tab1 = strchr(copy, '\t'); + if (!tab1) + malformed_line(line, insns); + s = tab1 + 1; + s += strspn(s, " "); + tab2 = strchr(s, '\t'); + if (!tab2) + malformed_line(line, insns); + *tab2 = '\0'; /* Characters beyond tab2 aren't examined */ + while (s < tab2) { + if (sscanf(s, "%x", &b) == 1) { + insn_buf[nb++] = (unsigned char) b; + s += 3; + } else + break; + } + /* Decode an instruction */ + insn_init(&insn, insn_buf, x86_64); + insn_get_length(&insn); + if (insn.length != nb) { + fprintf(stderr, "Error: %s", line); + fprintf(stderr, "Error: objdump says %d bytes, but " + "insn_get_length() says %d (attr:%x)\n", nb, + insn.length, insn.attr); + exit(2); + } + } + fprintf(stderr, "Succeed: decoded and checked %d instructions\n", + insns); + return 0; +} diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig index 0ed42d8..93d2c79 100644 --- a/drivers/acpi/Kconfig +++ b/drivers/acpi/Kconfig @@ -218,10 +218,10 @@ config ACPI_PROCESSOR_AGGREGATOR depends on X86 help ACPI 4.0 defines processor Aggregator, which enables OS to perform - specfic processor configuration and control that applies to all + specific processor configuration and control that applies to all processors in the platform. Currently only logical processor idling is defined, which is to reduce power consumption. This driver - support the new device. + supports the new device. config ACPI_THERMAL tristate "Thermal Zone" diff --git a/drivers/acpi/ac.c b/drivers/acpi/ac.c index 98b9690..b6ed60b 100644 --- a/drivers/acpi/ac.c +++ b/drivers/acpi/ac.c @@ -245,6 +245,7 @@ static void acpi_ac_notify(struct acpi_device *device, u32 event) acpi_bus_generate_netlink_event(device->pnp.device_class, dev_name(&device->dev), event, (u32) ac->state); + acpi_notifier_call_chain(device, event, (u32) ac->state); #ifdef CONFIG_ACPI_SYSFS_POWER kobject_uevent(&ac->charger.dev->kobj, KOBJ_CHANGE); #endif diff --git a/drivers/acpi/button.c b/drivers/acpi/button.c index 9335b87..0c9c6a9 100644 --- a/drivers/acpi/button.c +++ b/drivers/acpi/button.c @@ -251,6 +251,9 @@ int acpi_lid_open(void) acpi_status status; unsigned long long state; + if (!lid_device) + return -ENODEV; + status = acpi_evaluate_integer(lid_device->handle, "_LID", NULL, &state); if (ACPI_FAILURE(status)) diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c index 3112221..1af8081 100644 --- a/drivers/acpi/pci_root.c +++ b/drivers/acpi/pci_root.c @@ -389,6 +389,17 @@ struct pci_dev *acpi_get_pci_dev(acpi_handle handle) pbus = pdev->subordinate; pci_dev_put(pdev); + + /* + * This function may be called for a non-PCI device that has a + * PCI parent (eg. a disk under a PCI SATA controller). In that + * case pdev->subordinate will be NULL for the parent. + */ + if (!pbus) { + dev_dbg(&pdev->dev, "Not a PCI-to-PCI bridge\n"); + pdev = NULL; + break; + } } out: list_for_each_entry_safe(node, tmp, &device_list, node) diff --git a/drivers/acpi/video.c b/drivers/acpi/video.c index f6e54bf..64e3c58 100644 --- a/drivers/acpi/video.c +++ b/drivers/acpi/video.c @@ -1109,7 +1109,12 @@ static int acpi_video_bus_check(struct acpi_video_bus *video) */ /* Does this device support video switching? */ - if (video->cap._DOS) { + if (video->cap._DOS || video->cap._DOD) { + if (!video->cap._DOS) { + printk(KERN_WARNING FW_BUG + "ACPI(%s) defines _DOD but not _DOS\n", + acpi_device_bid(video->device)); + } video->flags.multihead = 1; status = 0; } diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c index 7032f25..575593a 100644 --- a/drivers/acpi/video_detect.c +++ b/drivers/acpi/video_detect.c @@ -84,7 +84,7 @@ long acpi_is_video_device(struct acpi_device *device) return 0; /* Does this device able to support video switching ? */ - if (ACPI_SUCCESS(acpi_get_handle(device->handle, "_DOD", &h_dummy)) && + if (ACPI_SUCCESS(acpi_get_handle(device->handle, "_DOD", &h_dummy)) || ACPI_SUCCESS(acpi_get_handle(device->handle, "_DOS", &h_dummy))) video_caps |= ACPI_VIDEO_OUTPUT_SWITCHING; diff --git a/drivers/char/vt_ioctl.c b/drivers/char/vt_ioctl.c index 6b36ee5..ed86d3b 100644 --- a/drivers/char/vt_ioctl.c +++ b/drivers/char/vt_ioctl.c @@ -1532,7 +1532,7 @@ long vt_compat_ioctl(struct tty_struct *tty, struct file * file, case PIO_UNIMAP: case GIO_UNIMAP: - ret = do_unimap_ioctl(cmd, up, perm, vc); + ret = compat_unimap_ioctl(cmd, up, perm, vc); break; /* diff --git a/drivers/macintosh/via-pmu.c b/drivers/macintosh/via-pmu.c index b40fb9b..6f308a4 100644 --- a/drivers/macintosh/via-pmu.c +++ b/drivers/macintosh/via-pmu.c @@ -405,7 +405,11 @@ static int __init via_pmu_start(void) printk(KERN_ERR "via-pmu: can't map interrupt\n"); return -ENODEV; } - if (request_irq(irq, via_pmu_interrupt, 0, "VIA-PMU", (void *)0)) { + /* We set IRQF_TIMER because we don't want the interrupt to be disabled + * between the 2 passes of driver suspend, we control our own disabling + * for that one + */ + if (request_irq(irq, via_pmu_interrupt, IRQF_TIMER, "VIA-PMU", (void *)0)) { printk(KERN_ERR "via-pmu: can't request irq %d\n", irq); return -ENODEV; } @@ -419,7 +423,7 @@ static int __init via_pmu_start(void) gpio_irq = irq_of_parse_and_map(gpio_node, 0); if (gpio_irq != NO_IRQ) { - if (request_irq(gpio_irq, gpio1_interrupt, 0, + if (request_irq(gpio_irq, gpio1_interrupt, IRQF_TIMER, "GPIO1 ADB", (void *)0)) printk(KERN_ERR "pmu: can't get irq %d" " (GPIO1)\n", gpio_irq); @@ -925,8 +929,7 @@ proc_write_options(struct file *file, const char __user *buffer, #ifdef CONFIG_ADB /* Send an ADB command */ -static int -pmu_send_request(struct adb_request *req, int sync) +static int pmu_send_request(struct adb_request *req, int sync) { int i, ret; @@ -1005,16 +1008,11 @@ pmu_send_request(struct adb_request *req, int sync) } /* Enable/disable autopolling */ -static int -pmu_adb_autopoll(int devs) +static int __pmu_adb_autopoll(int devs) { struct adb_request req; - if ((vias == NULL) || (!pmu_fully_inited) || !pmu_has_adb) - return -ENXIO; - if (devs) { - adb_dev_map = devs; pmu_request(&req, NULL, 5, PMU_ADB_CMD, 0, 0x86, adb_dev_map >> 8, adb_dev_map); pmu_adb_flags = 2; @@ -1027,9 +1025,17 @@ pmu_adb_autopoll(int devs) return 0; } +static int pmu_adb_autopoll(int devs) +{ + if ((vias == NULL) || (!pmu_fully_inited) || !pmu_has_adb) + return -ENXIO; + + adb_dev_map = devs; + return __pmu_adb_autopoll(devs); +} + /* Reset the ADB bus */ -static int -pmu_adb_reset_bus(void) +static int pmu_adb_reset_bus(void) { struct adb_request req; int save_autopoll = adb_dev_map; @@ -1038,13 +1044,13 @@ pmu_adb_reset_bus(void) return -ENXIO; /* anyone got a better idea?? */ - pmu_adb_autopoll(0); + __pmu_adb_autopoll(0); - req.nbytes = 5; + req.nbytes = 4; req.done = NULL; req.data[0] = PMU_ADB_CMD; - req.data[1] = 0; - req.data[2] = ADB_BUSRESET; + req.data[1] = ADB_BUSRESET; + req.data[2] = 0; req.data[3] = 0; req.data[4] = 0; req.reply_len = 0; @@ -1056,7 +1062,7 @@ pmu_adb_reset_bus(void) pmu_wait_complete(&req); if (save_autopoll != 0) - pmu_adb_autopoll(save_autopoll); + __pmu_adb_autopoll(save_autopoll); return 0; } diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 7127760..e19ca4b 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -1741,6 +1741,7 @@ config KS8851 config KS8851_MLL tristate "Micrel KS8851 MLL" depends on HAS_IOMEM + select MII help This platform driver is for Micrel KS8851 Address/data bus multiplexed network chip. @@ -2482,6 +2483,8 @@ config S6GMAC To compile this driver as a module, choose M here. The module will be called s6gmac. +source "drivers/net/stmmac/Kconfig" + endif # NETDEV_1000 # @@ -3230,4 +3233,12 @@ config VIRTIO_NET This is the virtual network driver for virtio. It can be used with lguest or QEMU based VMMs (like KVM or Xen). Say Y or M. +config VMXNET3 + tristate "VMware VMXNET3 ethernet driver" + depends on PCI && X86 && INET + help + This driver supports VMware's vmxnet3 virtual ethernet NIC. + To compile this driver as a module, choose M here: the + module will be called vmxnet3. + endif # NETDEVICES diff --git a/drivers/net/Makefile b/drivers/net/Makefile index d866b8c..246323d 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -2,6 +2,10 @@ # Makefile for the Linux network (ethercard) device drivers. # +obj-$(CONFIG_MII) += mii.o +obj-$(CONFIG_MDIO) += mdio.o +obj-$(CONFIG_PHYLIB) += phy/ + obj-$(CONFIG_TI_DAVINCI_EMAC) += davinci_emac.o obj-$(CONFIG_E1000) += e1000/ @@ -26,6 +30,7 @@ obj-$(CONFIG_TEHUTI) += tehuti.o obj-$(CONFIG_ENIC) += enic/ obj-$(CONFIG_JME) += jme.o obj-$(CONFIG_BE2NET) += benet/ +obj-$(CONFIG_VMXNET3) += vmxnet3/ gianfar_driver-objs := gianfar.o \ gianfar_ethtool.o \ @@ -95,15 +100,12 @@ obj-$(CONFIG_VIA_VELOCITY) += via-velocity.o obj-$(CONFIG_ADAPTEC_STARFIRE) += starfire.o obj-$(CONFIG_RIONET) += rionet.o obj-$(CONFIG_SH_ETH) += sh_eth.o +obj-$(CONFIG_STMMAC_ETH) += stmmac/ # # end link order section # -obj-$(CONFIG_MII) += mii.o -obj-$(CONFIG_MDIO) += mdio.o -obj-$(CONFIG_PHYLIB) += phy/ - obj-$(CONFIG_SUNDANCE) += sundance.o obj-$(CONFIG_HAMACHI) += hamachi.o obj-$(CONFIG_NET) += Space.o loopback.o diff --git a/drivers/net/acenic.c b/drivers/net/acenic.c index 5f0b05c..d82a9a9 100644 --- a/drivers/net/acenic.c +++ b/drivers/net/acenic.c @@ -1209,7 +1209,8 @@ static int __devinit ace_init(struct net_device *dev) memset(ap->info, 0, sizeof(struct ace_info)); memset(ap->skb, 0, sizeof(struct ace_skb)); - if (ace_load_firmware(dev)) + ecode = ace_load_firmware(dev); + if (ecode) goto init_error; ap->fw_running = 0; diff --git a/drivers/net/can/sja1000/sja1000_of_platform.c b/drivers/net/can/sja1000/sja1000_of_platform.c index 3373560..9dd076a 100644 --- a/drivers/net/can/sja1000/sja1000_of_platform.c +++ b/drivers/net/can/sja1000/sja1000_of_platform.c @@ -213,6 +213,7 @@ static struct of_device_id __devinitdata sja1000_ofp_table[] = { {.compatible = "nxp,sja1000"}, {}, }; +MODULE_DEVICE_TABLE(of, sja1000_ofp_table); static struct of_platform_driver sja1000_ofp_driver = { .owner = THIS_MODULE, diff --git a/drivers/net/davinci_emac.c b/drivers/net/davinci_emac.c index 65a2d0b..f72c56d 100644 --- a/drivers/net/davinci_emac.c +++ b/drivers/net/davinci_emac.c @@ -333,6 +333,9 @@ static const char emac_version_string[] = "TI DaVinci EMAC Linux v6.1"; #define EMAC_DM646X_MAC_EOI_C0_RXEN (0x01) #define EMAC_DM646X_MAC_EOI_C0_TXEN (0x02) +/* EMAC Stats Clear Mask */ +#define EMAC_STATS_CLR_MASK (0xFFFFFFFF) + /** net_buf_obj: EMAC network bufferdata structure * * EMAC network buffer data structure @@ -2548,40 +2551,49 @@ static int emac_dev_stop(struct net_device *ndev) static struct net_device_stats *emac_dev_getnetstats(struct net_device *ndev) { struct emac_priv *priv = netdev_priv(ndev); + u32 mac_control; + u32 stats_clear_mask; /* update emac hardware stats and reset the registers*/ + mac_control = emac_read(EMAC_MACCONTROL); + + if (mac_control & EMAC_MACCONTROL_GMIIEN) + stats_clear_mask = EMAC_STATS_CLR_MASK; + else + stats_clear_mask = 0; + priv->net_dev_stats.multicast += emac_read(EMAC_RXMCASTFRAMES); - emac_write(EMAC_RXMCASTFRAMES, EMAC_ALL_MULTI_REG_VALUE); + emac_write(EMAC_RXMCASTFRAMES, stats_clear_mask); priv->net_dev_stats.collisions += (emac_read(EMAC_TXCOLLISION) + emac_read(EMAC_TXSINGLECOLL) + emac_read(EMAC_TXMULTICOLL)); - emac_write(EMAC_TXCOLLISION, EMAC_ALL_MULTI_REG_VALUE); - emac_write(EMAC_TXSINGLECOLL, EMAC_ALL_MULTI_REG_VALUE); - emac_write(EMAC_TXMULTICOLL, EMAC_ALL_MULTI_REG_VALUE); + emac_write(EMAC_TXCOLLISION, stats_clear_mask); + emac_write(EMAC_TXSINGLECOLL, stats_clear_mask); + emac_write(EMAC_TXMULTICOLL, stats_clear_mask); priv->net_dev_stats.rx_length_errors += (emac_read(EMAC_RXOVERSIZED) + emac_read(EMAC_RXJABBER) + emac_read(EMAC_RXUNDERSIZED)); - emac_write(EMAC_RXOVERSIZED, EMAC_ALL_MULTI_REG_VALUE); - emac_write(EMAC_RXJABBER, EMAC_ALL_MULTI_REG_VALUE); - emac_write(EMAC_RXUNDERSIZED, EMAC_ALL_MULTI_REG_VALUE); + emac_write(EMAC_RXOVERSIZED, stats_clear_mask); + emac_write(EMAC_RXJABBER, stats_clear_mask); + emac_write(EMAC_RXUNDERSIZED, stats_clear_mask); priv->net_dev_stats.rx_over_errors += (emac_read(EMAC_RXSOFOVERRUNS) + emac_read(EMAC_RXMOFOVERRUNS)); - emac_write(EMAC_RXSOFOVERRUNS, EMAC_ALL_MULTI_REG_VALUE); - emac_write(EMAC_RXMOFOVERRUNS, EMAC_ALL_MULTI_REG_VALUE); + emac_write(EMAC_RXSOFOVERRUNS, stats_clear_mask); + emac_write(EMAC_RXMOFOVERRUNS, stats_clear_mask); priv->net_dev_stats.rx_fifo_errors += emac_read(EMAC_RXDMAOVERRUNS); - emac_write(EMAC_RXDMAOVERRUNS, EMAC_ALL_MULTI_REG_VALUE); + emac_write(EMAC_RXDMAOVERRUNS, stats_clear_mask); priv->net_dev_stats.tx_carrier_errors += emac_read(EMAC_TXCARRIERSENSE); - emac_write(EMAC_TXCARRIERSENSE, EMAC_ALL_MULTI_REG_VALUE); + emac_write(EMAC_TXCARRIERSENSE, stats_clear_mask); priv->net_dev_stats.tx_fifo_errors = emac_read(EMAC_TXUNDERRUN); - emac_write(EMAC_TXUNDERRUN, EMAC_ALL_MULTI_REG_VALUE); + emac_write(EMAC_TXUNDERRUN, stats_clear_mask); return &priv->net_dev_stats; } diff --git a/drivers/net/ethoc.c b/drivers/net/ethoc.c index 96f5b2a..9c950bb 100644 --- a/drivers/net/ethoc.c +++ b/drivers/net/ethoc.c @@ -664,7 +664,8 @@ static int ethoc_open(struct net_device *dev) return ret; /* calculate the number of TX/RX buffers, maximum 128 supported */ - num_bd = min(128, (dev->mem_end - dev->mem_start + 1) / ETHOC_BUFSIZ); + num_bd = min_t(unsigned int, + 128, (dev->mem_end - dev->mem_start + 1) / ETHOC_BUFSIZ); priv->num_tx = max(min_tx, num_bd / 4); priv->num_rx = num_bd - priv->num_tx; ethoc_write(priv, TX_BD_NUM, priv->num_tx); diff --git a/drivers/net/fec_mpc52xx.c b/drivers/net/fec_mpc52xx.c index c40113f..66dace6 100644 --- a/drivers/net/fec_mpc52xx.c +++ b/drivers/net/fec_mpc52xx.c @@ -759,12 +759,6 @@ static void mpc52xx_fec_reset(struct net_device *dev) mpc52xx_fec_hw_init(dev); - if (priv->phydev) { - phy_stop(priv->phydev); - phy_write(priv->phydev, MII_BMCR, BMCR_RESET); - phy_start(priv->phydev); - } - bcom_fec_rx_reset(priv->rx_dmatsk); bcom_fec_tx_reset(priv->tx_dmatsk); diff --git a/drivers/net/fec_mpc52xx_phy.c b/drivers/net/fec_mpc52xx_phy.c index 31e6d62..ee0f3c6 100644 --- a/drivers/net/fec_mpc52xx_phy.c +++ b/drivers/net/fec_mpc52xx_phy.c @@ -155,6 +155,7 @@ static struct of_device_id mpc52xx_fec_mdio_match[] = { { .compatible = "mpc5200b-fec-phy", }, {} }; +MODULE_DEVICE_TABLE(of, mpc52xx_fec_mdio_match); struct of_platform_driver mpc52xx_fec_mdio_driver = { .name = "mpc5200b-fec-phy", diff --git a/drivers/net/fs_enet/fs_enet-main.c b/drivers/net/fs_enet/fs_enet-main.c index 2bc2d2b..ec2f503 100644 --- a/drivers/net/fs_enet/fs_enet-main.c +++ b/drivers/net/fs_enet/fs_enet-main.c @@ -1110,6 +1110,7 @@ static struct of_device_id fs_enet_match[] = { #endif {} }; +MODULE_DEVICE_TABLE(of, fs_enet_match); static struct of_platform_driver fs_enet_driver = { .name = "fs_enet", diff --git a/drivers/net/fs_enet/mii-bitbang.c b/drivers/net/fs_enet/mii-bitbang.c index 93b481b..24ff9f4 100644 --- a/drivers/net/fs_enet/mii-bitbang.c +++ b/drivers/net/fs_enet/mii-bitbang.c @@ -221,6 +221,7 @@ static struct of_device_id fs_enet_mdio_bb_match[] = { }, {}, }; +MODULE_DEVICE_TABLE(of, fs_enet_mdio_bb_match); static struct of_platform_driver fs_enet_bb_mdio_driver = { .name = "fsl-bb-mdio", diff --git a/drivers/net/fs_enet/mii-fec.c b/drivers/net/fs_enet/mii-fec.c index a2d69c1..96eba42 100644 --- a/drivers/net/fs_enet/mii-fec.c +++ b/drivers/net/fs_enet/mii-fec.c @@ -219,6 +219,7 @@ static struct of_device_id fs_enet_mdio_fec_match[] = { #endif {}, }; +MODULE_DEVICE_TABLE(of, fs_enet_mdio_fec_match); static struct of_platform_driver fs_enet_fec_mdio_driver = { .name = "fsl-fec-mdio", diff --git a/drivers/net/fsl_pq_mdio.c b/drivers/net/fsl_pq_mdio.c index d167090..6ac4648 100644 --- a/drivers/net/fsl_pq_mdio.c +++ b/drivers/net/fsl_pq_mdio.c @@ -407,6 +407,7 @@ static struct of_device_id fsl_pq_mdio_match[] = { }, {}, }; +MODULE_DEVICE_TABLE(of, fsl_pq_mdio_match); static struct of_platform_driver fsl_pq_mdio_driver = { .name = "fsl-pq_mdio", diff --git a/drivers/net/gianfar.c b/drivers/net/gianfar.c index 1e5289f..5bf31f1 100644 --- a/drivers/net/gianfar.c +++ b/drivers/net/gianfar.c @@ -2325,9 +2325,6 @@ static irqreturn_t gfar_error(int irq, void *dev_id) return IRQ_HANDLED; } -/* work with hotplug and coldplug */ -MODULE_ALIAS("platform:fsl-gianfar"); - static struct of_device_id gfar_match[] = { { @@ -2336,6 +2333,7 @@ static struct of_device_id gfar_match[] = }, {}, }; +MODULE_DEVICE_TABLE(of, gfar_match); /* Structure for a device driver */ static struct of_platform_driver gfar_driver = { diff --git a/drivers/net/ibm_newemac/core.c b/drivers/net/ibm_newemac/core.c index 89c82c5..3fae875 100644 --- a/drivers/net/ibm_newemac/core.c +++ b/drivers/net/ibm_newemac/core.c @@ -24,6 +24,7 @@ * */ +#include <linux/module.h> #include <linux/sched.h> #include <linux/string.h> #include <linux/errno.h> @@ -443,7 +444,7 @@ static u32 __emac_calc_base_mr1(struct emac_instance *dev, int tx_size, int rx_s ret |= EMAC_MR1_TFS_2K; break; default: - printk(KERN_WARNING "%s: Unknown Rx FIFO size %d\n", + printk(KERN_WARNING "%s: Unknown Tx FIFO size %d\n", dev->ndev->name, tx_size); } @@ -470,6 +471,9 @@ static u32 __emac4_calc_base_mr1(struct emac_instance *dev, int tx_size, int rx_ DBG2(dev, "__emac4_calc_base_mr1" NL); switch(tx_size) { + case 16384: + ret |= EMAC4_MR1_TFS_16K; + break; case 4096: ret |= EMAC4_MR1_TFS_4K; break; @@ -477,7 +481,7 @@ static u32 __emac4_calc_base_mr1(struct emac_instance *dev, int tx_size, int rx_ ret |= EMAC4_MR1_TFS_2K; break; default: - printk(KERN_WARNING "%s: Unknown Rx FIFO size %d\n", + printk(KERN_WARNING "%s: Unknown Tx FIFO size %d\n", dev->ndev->name, tx_size); } @@ -2985,6 +2989,7 @@ static struct of_device_id emac_match[] = }, {}, }; +MODULE_DEVICE_TABLE(of, emac_match); static struct of_platform_driver emac_driver = { .name = "emac", diff --git a/drivers/net/ibm_newemac/emac.h b/drivers/net/ibm_newemac/emac.h index 0afc2cf..d34adf9 100644 --- a/drivers/net/ibm_newemac/emac.h +++ b/drivers/net/ibm_newemac/emac.h @@ -153,6 +153,7 @@ struct emac_regs { #define EMAC4_MR1_RFS_16K 0x00280000 #define EMAC4_MR1_TFS_2K 0x00020000 #define EMAC4_MR1_TFS_4K 0x00030000 +#define EMAC4_MR1_TFS_16K 0x00050000 #define EMAC4_MR1_TR 0x00008000 #define EMAC4_MR1_MWSW_001 0x00001000 #define EMAC4_MR1_JPSM 0x00000800 diff --git a/drivers/net/irda/sa1100_ir.c b/drivers/net/irda/sa1100_ir.c index 38bf7cf..c412e80 100644 --- a/drivers/net/irda/sa1100_ir.c +++ b/drivers/net/irda/sa1100_ir.c @@ -232,8 +232,11 @@ static int sa1100_irda_startup(struct sa1100_irda *si) /* * Ensure that the ports for this device are setup correctly. */ - if (si->pdata->startup) - si->pdata->startup(si->dev); + if (si->pdata->startup) { + ret = si->pdata->startup(si->dev); + if (ret) + return ret; + } /* * Configure PPC for IRDA - we want to drive TXD2 low. diff --git a/drivers/net/ixp2000/enp2611.c b/drivers/net/ixp2000/enp2611.c index b02a981..34a6cfd 100644 --- a/drivers/net/ixp2000/enp2611.c +++ b/drivers/net/ixp2000/enp2611.c @@ -119,24 +119,9 @@ static struct ixp2400_msf_parameters enp2611_msf_parameters = } }; -struct enp2611_ixpdev_priv -{ - struct ixpdev_priv ixpdev_priv; - struct net_device_stats stats; -}; - static struct net_device *nds[3]; static struct timer_list link_check_timer; -static struct net_device_stats *enp2611_get_stats(struct net_device *dev) -{ - struct enp2611_ixpdev_priv *ip = netdev_priv(dev); - - pm3386_get_stats(ip->ixpdev_priv.channel, &(ip->stats)); - - return &(ip->stats); -} - /* @@@ Poll the SFP moddef0 line too. */ /* @@@ Try to use the pm3386 DOOL interrupt as well. */ static void enp2611_check_link_status(unsigned long __dummy) @@ -203,14 +188,13 @@ static int __init enp2611_init_module(void) ports = pm3386_port_count(); for (i = 0; i < ports; i++) { - nds[i] = ixpdev_alloc(i, sizeof(struct enp2611_ixpdev_priv)); + nds[i] = ixpdev_alloc(i, sizeof(struct ixpdev_priv)); if (nds[i] == NULL) { while (--i >= 0) free_netdev(nds[i]); return -ENOMEM; } - nds[i]->get_stats = enp2611_get_stats; pm3386_init_port(i); pm3386_get_mac(i, nds[i]->dev_addr); } diff --git a/drivers/net/ixp2000/ixpdev.c b/drivers/net/ixp2000/ixpdev.c index 1272434..9aee0cc 100644 --- a/drivers/net/ixp2000/ixpdev.c +++ b/drivers/net/ixp2000/ixpdev.c @@ -21,6 +21,7 @@ #include "ixp2400_tx.ucode" #include "ixpdev_priv.h" #include "ixpdev.h" +#include "pm3386.h" #define DRV_MODULE_VERSION "0.2" @@ -271,6 +272,15 @@ static int ixpdev_close(struct net_device *dev) return 0; } +static struct net_device_stats *ixpdev_get_stats(struct net_device *dev) +{ + struct ixpdev_priv *ip = netdev_priv(dev); + + pm3386_get_stats(ip->channel, &(dev->stats)); + + return &(dev->stats); +} + static const struct net_device_ops ixpdev_netdev_ops = { .ndo_open = ixpdev_open, .ndo_stop = ixpdev_close, @@ -278,6 +288,7 @@ static const struct net_device_ops ixpdev_netdev_ops = { .ndo_change_mtu = eth_change_mtu, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = eth_mac_addr, + .ndo_get_stats = ixpdev_get_stats, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = ixpdev_poll_controller, #endif diff --git a/drivers/net/netxen/netxen_nic_main.c b/drivers/net/netxen/netxen_nic_main.c index 9b9eab1..7fc15e9 100644 --- a/drivers/net/netxen/netxen_nic_main.c +++ b/drivers/net/netxen/netxen_nic_main.c @@ -595,7 +595,8 @@ netxen_setup_pci_map(struct netxen_adapter *adapter) void __iomem *mem_ptr2 = NULL; void __iomem *db_ptr = NULL; - unsigned long mem_base, mem_len, db_base, db_len = 0, pci_len0 = 0; + resource_size_t mem_base, db_base; + unsigned long mem_len, db_len = 0, pci_len0 = 0; struct pci_dev *pdev = adapter->pdev; int pci_func = adapter->ahw.pci_func; diff --git a/drivers/net/pcmcia/3c574_cs.c b/drivers/net/pcmcia/3c574_cs.c index ee8ad3e..b58965a 100644 --- a/drivers/net/pcmcia/3c574_cs.c +++ b/drivers/net/pcmcia/3c574_cs.c @@ -251,6 +251,7 @@ static void el3_tx_timeout(struct net_device *dev); static int el3_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); static const struct ethtool_ops netdev_ethtool_ops; static void set_rx_mode(struct net_device *dev); +static void set_multicast_list(struct net_device *dev); static void tc574_detach(struct pcmcia_device *p_dev); @@ -266,7 +267,7 @@ static const struct net_device_ops el3_netdev_ops = { .ndo_tx_timeout = el3_tx_timeout, .ndo_get_stats = el3_get_stats, .ndo_do_ioctl = el3_ioctl, - .ndo_set_multicast_list = set_rx_mode, + .ndo_set_multicast_list = set_multicast_list, .ndo_change_mtu = eth_change_mtu, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, @@ -1161,6 +1162,16 @@ static void set_rx_mode(struct net_device *dev) outw(SetRxFilter | RxStation | RxBroadcast, ioaddr + EL3_CMD); } +static void set_multicast_list(struct net_device *dev) +{ + struct el3_private *lp = netdev_priv(dev); + unsigned long flags; + + spin_lock_irqsave(&lp->window_lock, flags); + set_rx_mode(dev); + spin_unlock_irqrestore(&lp->window_lock, flags); +} + static int el3_close(struct net_device *dev) { unsigned int ioaddr = dev->base_addr; diff --git a/drivers/net/phy/mdio-gpio.c b/drivers/net/phy/mdio-gpio.c index 250e10f..8659d34 100644 --- a/drivers/net/phy/mdio-gpio.c +++ b/drivers/net/phy/mdio-gpio.c @@ -238,6 +238,7 @@ static struct of_device_id mdio_ofgpio_match[] = { }, {}, }; +MODULE_DEVICE_TABLE(of, mdio_ofgpio_match); static struct of_platform_driver mdio_ofgpio_driver = { .name = "mdio-gpio", diff --git a/drivers/net/qlge/qlge.h b/drivers/net/qlge/qlge.h index 3ec6e85..e7285f0 100644 --- a/drivers/net/qlge/qlge.h +++ b/drivers/net/qlge/qlge.h @@ -803,6 +803,12 @@ enum { MB_CMD_SET_PORT_CFG = 0x00000122, MB_CMD_GET_PORT_CFG = 0x00000123, MB_CMD_GET_LINK_STS = 0x00000124, + MB_CMD_SET_MGMNT_TFK_CTL = 0x00000160, /* Set Mgmnt Traffic Control */ + MB_SET_MPI_TFK_STOP = (1 << 0), + MB_SET_MPI_TFK_RESUME = (1 << 1), + MB_CMD_GET_MGMNT_TFK_CTL = 0x00000161, /* Get Mgmnt Traffic Control */ + MB_GET_MPI_TFK_STOPPED = (1 << 0), + MB_GET_MPI_TFK_FIFO_EMPTY = (1 << 1), /* Mailbox Command Status. */ MB_CMD_STS_GOOD = 0x00004000, /* Success. */ @@ -1168,7 +1174,7 @@ struct ricb { #define RSS_RI6 0x40 #define RSS_RT6 0x80 __le16 mask; - __le32 hash_cq_id[256]; + u8 hash_cq_id[1024]; __le32 ipv6_hash_key[10]; __le32 ipv4_hash_key[4]; } __attribute((packed)); @@ -1606,6 +1612,8 @@ int ql_read_mpi_reg(struct ql_adapter *qdev, u32 reg, u32 *data); int ql_mb_about_fw(struct ql_adapter *qdev); void ql_link_on(struct ql_adapter *qdev); void ql_link_off(struct ql_adapter *qdev); +int ql_mb_set_mgmnt_traffic_ctl(struct ql_adapter *qdev, u32 control); +int ql_wait_fifo_empty(struct ql_adapter *qdev); #if 1 #define QL_ALL_DUMP diff --git a/drivers/net/qlge/qlge_main.c b/drivers/net/qlge/qlge_main.c index 6168071..48b45df 100644 --- a/drivers/net/qlge/qlge_main.c +++ b/drivers/net/qlge/qlge_main.c @@ -320,6 +320,37 @@ static int ql_set_mac_addr_reg(struct ql_adapter *qdev, u8 *addr, u32 type, switch (type) { case MAC_ADDR_TYPE_MULTI_MAC: + { + u32 upper = (addr[0] << 8) | addr[1]; + u32 lower = (addr[2] << 24) | (addr[3] << 16) | + (addr[4] << 8) | (addr[5]); + + status = + ql_wait_reg_rdy(qdev, + MAC_ADDR_IDX, MAC_ADDR_MW, 0); + if (status) + goto exit; + ql_write32(qdev, MAC_ADDR_IDX, (offset++) | + (index << MAC_ADDR_IDX_SHIFT) | + type | MAC_ADDR_E); + ql_write32(qdev, MAC_ADDR_DATA, lower); + status = + ql_wait_reg_rdy(qdev, + MAC_ADDR_IDX, MAC_ADDR_MW, 0); + if (status) + goto exit; + ql_write32(qdev, MAC_ADDR_IDX, (offset++) | + (index << MAC_ADDR_IDX_SHIFT) | + type | MAC_ADDR_E); + + ql_write32(qdev, MAC_ADDR_DATA, upper); + status = + ql_wait_reg_rdy(qdev, + MAC_ADDR_IDX, MAC_ADDR_MW, 0); + if (status) + goto exit; + break; + } case MAC_ADDR_TYPE_CAM_MAC: { u32 cam_output; @@ -365,16 +396,14 @@ static int ql_set_mac_addr_reg(struct ql_adapter *qdev, u8 *addr, u32 type, and possibly the function id. Right now we hardcode the route field to NIC core. */ - if (type == MAC_ADDR_TYPE_CAM_MAC) { - cam_output = (CAM_OUT_ROUTE_NIC | - (qdev-> - func << CAM_OUT_FUNC_SHIFT) | - (0 << CAM_OUT_CQ_ID_SHIFT)); - if (qdev->vlgrp) - cam_output |= CAM_OUT_RV; - /* route to NIC core */ - ql_write32(qdev, MAC_ADDR_DATA, cam_output); - } + cam_output = (CAM_OUT_ROUTE_NIC | + (qdev-> + func << CAM_OUT_FUNC_SHIFT) | + (0 << CAM_OUT_CQ_ID_SHIFT)); + if (qdev->vlgrp) + cam_output |= CAM_OUT_RV; + /* route to NIC core */ + ql_write32(qdev, MAC_ADDR_DATA, cam_output); break; } case MAC_ADDR_TYPE_VLAN: @@ -546,14 +575,14 @@ static int ql_set_routing_reg(struct ql_adapter *qdev, u32 index, u32 mask, } case RT_IDX_MCAST: /* Pass up All Multicast frames. */ { - value = RT_IDX_DST_CAM_Q | /* dest */ + value = RT_IDX_DST_DFLT_Q | /* dest */ RT_IDX_TYPE_NICQ | /* type */ (RT_IDX_ALLMULTI_SLOT << RT_IDX_IDX_SHIFT);/* index */ break; } case RT_IDX_MCAST_MATCH: /* Pass up matched Multicast frames. */ { - value = RT_IDX_DST_CAM_Q | /* dest */ + value = RT_IDX_DST_DFLT_Q | /* dest */ RT_IDX_TYPE_NICQ | /* type */ (RT_IDX_MCAST_MATCH_SLOT << RT_IDX_IDX_SHIFT);/* index */ break; @@ -3077,6 +3106,12 @@ err_irq: static int ql_start_rss(struct ql_adapter *qdev) { + u8 init_hash_seed[] = {0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, + 0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, + 0xb0, 0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, + 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, + 0x30, 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, + 0xbe, 0xac, 0x01, 0xfa}; struct ricb *ricb = &qdev->ricb; int status = 0; int i; @@ -3086,21 +3121,17 @@ static int ql_start_rss(struct ql_adapter *qdev) ricb->base_cq = RSS_L4K; ricb->flags = - (RSS_L6K | RSS_LI | RSS_LB | RSS_LM | RSS_RI4 | RSS_RI6 | RSS_RT4 | - RSS_RT6); - ricb->mask = cpu_to_le16(qdev->rss_ring_count - 1); + (RSS_L6K | RSS_LI | RSS_LB | RSS_LM | RSS_RT4 | RSS_RT6); + ricb->mask = cpu_to_le16((u16)(0x3ff)); /* * Fill out the Indirection Table. */ - for (i = 0; i < 256; i++) - hash_id[i] = i & (qdev->rss_ring_count - 1); + for (i = 0; i < 1024; i++) + hash_id[i] = (i & (qdev->rss_ring_count - 1)); - /* - * Random values for the IPv6 and IPv4 Hash Keys. - */ - get_random_bytes((void *)&ricb->ipv6_hash_key[0], 40); - get_random_bytes((void *)&ricb->ipv4_hash_key[0], 16); + memcpy((void *)&ricb->ipv6_hash_key[0], init_hash_seed, 40); + memcpy((void *)&ricb->ipv4_hash_key[0], init_hash_seed, 16); QPRINTK(qdev, IFUP, DEBUG, "Initializing RSS.\n"); @@ -3239,6 +3270,13 @@ static int ql_adapter_initialize(struct ql_adapter *qdev) ql_write32(qdev, SPLT_HDR, SPLT_HDR_EP | min(SMALL_BUFFER_SIZE, MAX_SPLIT_SIZE)); + /* Set RX packet routing to use port/pci function on which the + * packet arrived on in addition to usual frame routing. + * This is helpful on bonding where both interfaces can have + * the same MAC address. + */ + ql_write32(qdev, RST_FO, RST_FO_RR_MASK | RST_FO_RR_RCV_FUNC_CQ); + /* Start up the rx queues. */ for (i = 0; i < qdev->rx_ring_count; i++) { status = ql_start_rx_ring(qdev, &qdev->rx_ring[i]); @@ -3311,6 +3349,13 @@ static int ql_adapter_reset(struct ql_adapter *qdev) end_jiffies = jiffies + max((unsigned long)1, usecs_to_jiffies(30)); + + /* Stop management traffic. */ + ql_mb_set_mgmnt_traffic_ctl(qdev, MB_SET_MPI_TFK_STOP); + + /* Wait for the NIC and MGMNT FIFOs to empty. */ + ql_wait_fifo_empty(qdev); + ql_write32(qdev, RST_FO, (RST_FO_FR << 16) | RST_FO_FR); do { @@ -3326,6 +3371,8 @@ static int ql_adapter_reset(struct ql_adapter *qdev) status = -ETIMEDOUT; } + /* Resume management traffic. */ + ql_mb_set_mgmnt_traffic_ctl(qdev, MB_SET_MPI_TFK_RESUME); return status; } @@ -3704,6 +3751,12 @@ static void ql_asic_reset_work(struct work_struct *work) status = ql_adapter_up(qdev); if (status) goto error; + + /* Restore rx mode. */ + clear_bit(QL_ALLMULTI, &qdev->flags); + clear_bit(QL_PROMISCUOUS, &qdev->flags); + qlge_set_multicast_list(qdev->ndev); + rtnl_unlock(); return; error: diff --git a/drivers/net/qlge/qlge_mpi.c b/drivers/net/qlge/qlge_mpi.c index c2e4307..99e58e3 100644 --- a/drivers/net/qlge/qlge_mpi.c +++ b/drivers/net/qlge/qlge_mpi.c @@ -768,6 +768,95 @@ static int ql_idc_wait(struct ql_adapter *qdev) return status; } +int ql_mb_set_mgmnt_traffic_ctl(struct ql_adapter *qdev, u32 control) +{ + struct mbox_params mbc; + struct mbox_params *mbcp = &mbc; + int status; + + memset(mbcp, 0, sizeof(struct mbox_params)); + + mbcp->in_count = 1; + mbcp->out_count = 2; + + mbcp->mbox_in[0] = MB_CMD_SET_MGMNT_TFK_CTL; + mbcp->mbox_in[1] = control; + + status = ql_mailbox_command(qdev, mbcp); + if (status) + return status; + + if (mbcp->mbox_out[0] == MB_CMD_STS_GOOD) + return status; + + if (mbcp->mbox_out[0] == MB_CMD_STS_INVLD_CMD) { + QPRINTK(qdev, DRV, ERR, + "Command not supported by firmware.\n"); + status = -EINVAL; + } else if (mbcp->mbox_out[0] == MB_CMD_STS_ERR) { + /* This indicates that the firmware is + * already in the state we are trying to + * change it to. + */ + QPRINTK(qdev, DRV, ERR, + "Command parameters make no change.\n"); + } + return status; +} + +/* Returns a negative error code or the mailbox command status. */ +static int ql_mb_get_mgmnt_traffic_ctl(struct ql_adapter *qdev, u32 *control) +{ + struct mbox_params mbc; + struct mbox_params *mbcp = &mbc; + int status; + + memset(mbcp, 0, sizeof(struct mbox_params)); + *control = 0; + + mbcp->in_count = 1; + mbcp->out_count = 1; + + mbcp->mbox_in[0] = MB_CMD_GET_MGMNT_TFK_CTL; + + status = ql_mailbox_command(qdev, mbcp); + if (status) + return status; + + if (mbcp->mbox_out[0] == MB_CMD_STS_GOOD) { + *control = mbcp->mbox_in[1]; + return status; + } + + if (mbcp->mbox_out[0] == MB_CMD_STS_INVLD_CMD) { + QPRINTK(qdev, DRV, ERR, + "Command not supported by firmware.\n"); + status = -EINVAL; + } else if (mbcp->mbox_out[0] == MB_CMD_STS_ERR) { + QPRINTK(qdev, DRV, ERR, + "Failed to get MPI traffic control.\n"); + status = -EIO; + } + return status; +} + +int ql_wait_fifo_empty(struct ql_adapter *qdev) +{ + int count = 5; + u32 mgmnt_fifo_empty; + u32 nic_fifo_empty; + + do { + nic_fifo_empty = ql_read32(qdev, STS) & STS_NFE; + ql_mb_get_mgmnt_traffic_ctl(qdev, &mgmnt_fifo_empty); + mgmnt_fifo_empty &= MB_GET_MPI_TFK_FIFO_EMPTY; + if (nic_fifo_empty && mgmnt_fifo_empty) + return 0; + msleep(100); + } while (count-- > 0); + return -ETIMEDOUT; +} + /* API called in work thread context to set new TX/RX * maximum frame size values to match MTU. */ @@ -876,6 +965,8 @@ void ql_mpi_work(struct work_struct *work) int err = 0; rtnl_lock(); + /* Begin polled mode for MPI */ + ql_write32(qdev, INTR_MASK, (INTR_MASK_PI << 16)); while (ql_read32(qdev, STS) & STS_PI) { memset(mbcp, 0, sizeof(struct mbox_params)); @@ -888,6 +979,8 @@ void ql_mpi_work(struct work_struct *work) break; } + /* End polled mode for MPI */ + ql_write32(qdev, INTR_MASK, (INTR_MASK_PI << 16) | INTR_MASK_PI); rtnl_unlock(); ql_enable_completion_interrupt(qdev, 0); } diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c index 50c6a3c..83c47d9 100644 --- a/drivers/net/r8169.c +++ b/drivers/net/r8169.c @@ -115,7 +115,9 @@ enum mac_version { RTL_GIGA_MAC_VER_22 = 0x16, // 8168C RTL_GIGA_MAC_VER_23 = 0x17, // 8168CP RTL_GIGA_MAC_VER_24 = 0x18, // 8168CP - RTL_GIGA_MAC_VER_25 = 0x19 // 8168D + RTL_GIGA_MAC_VER_25 = 0x19, // 8168D + RTL_GIGA_MAC_VER_26 = 0x1a, // 8168D + RTL_GIGA_MAC_VER_27 = 0x1b // 8168DP }; #define _R(NAME,MAC,MASK) \ @@ -150,7 +152,9 @@ static const struct { _R("RTL8168c/8111c", RTL_GIGA_MAC_VER_22, 0xff7e1880), // PCI-E _R("RTL8168cp/8111cp", RTL_GIGA_MAC_VER_23, 0xff7e1880), // PCI-E _R("RTL8168cp/8111cp", RTL_GIGA_MAC_VER_24, 0xff7e1880), // PCI-E - _R("RTL8168d/8111d", RTL_GIGA_MAC_VER_25, 0xff7e1880) // PCI-E + _R("RTL8168d/8111d", RTL_GIGA_MAC_VER_25, 0xff7e1880), // PCI-E + _R("RTL8168d/8111d", RTL_GIGA_MAC_VER_26, 0xff7e1880), // PCI-E + _R("RTL8168dp/8111dp", RTL_GIGA_MAC_VER_27, 0xff7e1880) // PCI-E }; #undef _R @@ -253,6 +257,13 @@ enum rtl8168_8101_registers { DBG_REG = 0xd1, #define FIX_NAK_1 (1 << 4) #define FIX_NAK_2 (1 << 3) + EFUSEAR = 0xdc, +#define EFUSEAR_FLAG 0x80000000 +#define EFUSEAR_WRITE_CMD 0x80000000 +#define EFUSEAR_READ_CMD 0x00000000 +#define EFUSEAR_REG_MASK 0x03ff +#define EFUSEAR_REG_SHIFT 8 +#define EFUSEAR_DATA_MASK 0xff }; enum rtl_register_content { @@ -568,6 +579,14 @@ static void mdio_patch(void __iomem *ioaddr, int reg_addr, int value) mdio_write(ioaddr, reg_addr, mdio_read(ioaddr, reg_addr) | value); } +static void mdio_plus_minus(void __iomem *ioaddr, int reg_addr, int p, int m) +{ + int val; + + val = mdio_read(ioaddr, reg_addr); + mdio_write(ioaddr, reg_addr, (val | p) & ~m); +} + static void rtl_mdio_write(struct net_device *dev, int phy_id, int location, int val) { @@ -651,6 +670,24 @@ static u32 rtl_csi_read(void __iomem *ioaddr, int addr) return value; } +static u8 rtl8168d_efuse_read(void __iomem *ioaddr, int reg_addr) +{ + u8 value = 0xff; + unsigned int i; + + RTL_W32(EFUSEAR, (reg_addr & EFUSEAR_REG_MASK) << EFUSEAR_REG_SHIFT); + + for (i = 0; i < 300; i++) { + if (RTL_R32(EFUSEAR) & EFUSEAR_FLAG) { + value = RTL_R32(EFUSEAR) & EFUSEAR_DATA_MASK; + break; + } + udelay(100); + } + + return value; +} + static void rtl8169_irq_mask_and_ack(void __iomem *ioaddr) { RTL_W16(IntrMask, 0x0000); @@ -1243,7 +1280,10 @@ static void rtl8169_get_mac_version(struct rtl8169_private *tp, int mac_version; } mac_info[] = { /* 8168D family. */ - { 0x7c800000, 0x28000000, RTL_GIGA_MAC_VER_25 }, + { 0x7cf00000, 0x28300000, RTL_GIGA_MAC_VER_26 }, + { 0x7cf00000, 0x28100000, RTL_GIGA_MAC_VER_25 }, + { 0x7c800000, 0x28800000, RTL_GIGA_MAC_VER_27 }, + { 0x7c800000, 0x28000000, RTL_GIGA_MAC_VER_26 }, /* 8168C family. */ { 0x7cf00000, 0x3ca00000, RTL_GIGA_MAC_VER_24 }, @@ -1648,74 +1688,903 @@ static void rtl8168c_4_hw_phy_config(void __iomem *ioaddr) rtl8168c_3_hw_phy_config(ioaddr); } -static void rtl8168d_hw_phy_config(void __iomem *ioaddr) +static void rtl8168d_1_hw_phy_config(void __iomem *ioaddr) { - struct phy_reg phy_reg_init_0[] = { + static struct phy_reg phy_reg_init_0[] = { { 0x1f, 0x0001 }, - { 0x09, 0x2770 }, - { 0x08, 0x04d0 }, - { 0x0b, 0xad15 }, - { 0x0c, 0x5bf0 }, - { 0x1c, 0xf101 }, + { 0x06, 0x4064 }, + { 0x07, 0x2863 }, + { 0x08, 0x059c }, + { 0x09, 0x26b4 }, + { 0x0a, 0x6a19 }, + { 0x0b, 0xdcc8 }, + { 0x10, 0xf06d }, + { 0x14, 0x7f68 }, + { 0x18, 0x7fd9 }, + { 0x1c, 0xf0ff }, + { 0x1d, 0x3d9c }, { 0x1f, 0x0003 }, - { 0x14, 0x94d7 }, - { 0x12, 0xf4d6 }, - { 0x09, 0xca0f }, - { 0x1f, 0x0002 }, - { 0x0b, 0x0b10 }, - { 0x0c, 0xd1f7 }, - { 0x1f, 0x0002 }, - { 0x06, 0x5461 }, + { 0x12, 0xf49f }, + { 0x13, 0x070b }, + { 0x1a, 0x05ad }, + { 0x14, 0x94c0 } + }; + static struct phy_reg phy_reg_init_1[] = { { 0x1f, 0x0002 }, - { 0x05, 0x6662 }, + { 0x06, 0x5561 }, + { 0x1f, 0x0005 }, + { 0x05, 0x8332 }, + { 0x06, 0x5561 } + }; + static struct phy_reg phy_reg_init_2[] = { + { 0x1f, 0x0005 }, + { 0x05, 0xffc2 }, + { 0x1f, 0x0005 }, + { 0x05, 0x8000 }, + { 0x06, 0xf8f9 }, + { 0x06, 0xfaef }, + { 0x06, 0x59ee }, + { 0x06, 0xf8ea }, + { 0x06, 0x00ee }, + { 0x06, 0xf8eb }, + { 0x06, 0x00e0 }, + { 0x06, 0xf87c }, + { 0x06, 0xe1f8 }, + { 0x06, 0x7d59 }, + { 0x06, 0x0fef }, + { 0x06, 0x0139 }, + { 0x06, 0x029e }, + { 0x06, 0x06ef }, + { 0x06, 0x1039 }, + { 0x06, 0x089f }, + { 0x06, 0x2aee }, + { 0x06, 0xf8ea }, + { 0x06, 0x00ee }, + { 0x06, 0xf8eb }, + { 0x06, 0x01e0 }, + { 0x06, 0xf87c }, + { 0x06, 0xe1f8 }, + { 0x06, 0x7d58 }, + { 0x06, 0x409e }, + { 0x06, 0x0f39 }, + { 0x06, 0x46aa }, + { 0x06, 0x0bbf }, + { 0x06, 0x8290 }, + { 0x06, 0xd682 }, + { 0x06, 0x9802 }, + { 0x06, 0x014f }, + { 0x06, 0xae09 }, + { 0x06, 0xbf82 }, + { 0x06, 0x98d6 }, + { 0x06, 0x82a0 }, + { 0x06, 0x0201 }, + { 0x06, 0x4fef }, + { 0x06, 0x95fe }, + { 0x06, 0xfdfc }, + { 0x06, 0x05f8 }, + { 0x06, 0xf9fa }, + { 0x06, 0xeef8 }, + { 0x06, 0xea00 }, + { 0x06, 0xeef8 }, + { 0x06, 0xeb00 }, + { 0x06, 0xe2f8 }, + { 0x06, 0x7ce3 }, + { 0x06, 0xf87d }, + { 0x06, 0xa511 }, + { 0x06, 0x1112 }, + { 0x06, 0xd240 }, + { 0x06, 0xd644 }, + { 0x06, 0x4402 }, + { 0x06, 0x8217 }, + { 0x06, 0xd2a0 }, + { 0x06, 0xd6aa }, + { 0x06, 0xaa02 }, + { 0x06, 0x8217 }, + { 0x06, 0xae0f }, + { 0x06, 0xa544 }, + { 0x06, 0x4402 }, + { 0x06, 0xae4d }, + { 0x06, 0xa5aa }, + { 0x06, 0xaa02 }, + { 0x06, 0xae47 }, + { 0x06, 0xaf82 }, + { 0x06, 0x13ee }, + { 0x06, 0x834e }, + { 0x06, 0x00ee }, + { 0x06, 0x834d }, + { 0x06, 0x0fee }, + { 0x06, 0x834c }, + { 0x06, 0x0fee }, + { 0x06, 0x834f }, + { 0x06, 0x00ee }, + { 0x06, 0x8351 }, + { 0x06, 0x00ee }, + { 0x06, 0x834a }, + { 0x06, 0xffee }, + { 0x06, 0x834b }, + { 0x06, 0xffe0 }, + { 0x06, 0x8330 }, + { 0x06, 0xe183 }, + { 0x06, 0x3158 }, + { 0x06, 0xfee4 }, + { 0x06, 0xf88a }, + { 0x06, 0xe5f8 }, + { 0x06, 0x8be0 }, + { 0x06, 0x8332 }, + { 0x06, 0xe183 }, + { 0x06, 0x3359 }, + { 0x06, 0x0fe2 }, + { 0x06, 0x834d }, + { 0x06, 0x0c24 }, + { 0x06, 0x5af0 }, + { 0x06, 0x1e12 }, + { 0x06, 0xe4f8 }, + { 0x06, 0x8ce5 }, + { 0x06, 0xf88d }, + { 0x06, 0xaf82 }, + { 0x06, 0x13e0 }, + { 0x06, 0x834f }, + { 0x06, 0x10e4 }, + { 0x06, 0x834f }, + { 0x06, 0xe083 }, + { 0x06, 0x4e78 }, + { 0x06, 0x009f }, + { 0x06, 0x0ae0 }, + { 0x06, 0x834f }, + { 0x06, 0xa010 }, + { 0x06, 0xa5ee }, + { 0x06, 0x834e }, + { 0x06, 0x01e0 }, + { 0x06, 0x834e }, + { 0x06, 0x7805 }, + { 0x06, 0x9e9a }, + { 0x06, 0xe083 }, + { 0x06, 0x4e78 }, + { 0x06, 0x049e }, + { 0x06, 0x10e0 }, + { 0x06, 0x834e }, + { 0x06, 0x7803 }, + { 0x06, 0x9e0f }, + { 0x06, 0xe083 }, + { 0x06, 0x4e78 }, + { 0x06, 0x019e }, + { 0x06, 0x05ae }, + { 0x06, 0x0caf }, + { 0x06, 0x81f8 }, + { 0x06, 0xaf81 }, + { 0x06, 0xa3af }, + { 0x06, 0x81dc }, + { 0x06, 0xaf82 }, + { 0x06, 0x13ee }, + { 0x06, 0x8348 }, + { 0x06, 0x00ee }, + { 0x06, 0x8349 }, + { 0x06, 0x00e0 }, + { 0x06, 0x8351 }, + { 0x06, 0x10e4 }, + { 0x06, 0x8351 }, + { 0x06, 0x5801 }, + { 0x06, 0x9fea }, + { 0x06, 0xd000 }, + { 0x06, 0xd180 }, + { 0x06, 0x1f66 }, + { 0x06, 0xe2f8 }, + { 0x06, 0xeae3 }, + { 0x06, 0xf8eb }, + { 0x06, 0x5af8 }, + { 0x06, 0x1e20 }, + { 0x06, 0xe6f8 }, + { 0x06, 0xeae5 }, + { 0x06, 0xf8eb }, + { 0x06, 0xd302 }, + { 0x06, 0xb3fe }, + { 0x06, 0xe2f8 }, + { 0x06, 0x7cef }, + { 0x06, 0x325b }, + { 0x06, 0x80e3 }, + { 0x06, 0xf87d }, + { 0x06, 0x9e03 }, + { 0x06, 0x7dff }, + { 0x06, 0xff0d }, + { 0x06, 0x581c }, + { 0x06, 0x551a }, + { 0x06, 0x6511 }, + { 0x06, 0xa190 }, + { 0x06, 0xd3e2 }, + { 0x06, 0x8348 }, + { 0x06, 0xe383 }, + { 0x06, 0x491b }, + { 0x06, 0x56ab }, + { 0x06, 0x08ef }, + { 0x06, 0x56e6 }, + { 0x06, 0x8348 }, + { 0x06, 0xe783 }, + { 0x06, 0x4910 }, + { 0x06, 0xd180 }, + { 0x06, 0x1f66 }, + { 0x06, 0xa004 }, + { 0x06, 0xb9e2 }, + { 0x06, 0x8348 }, + { 0x06, 0xe383 }, + { 0x06, 0x49ef }, + { 0x06, 0x65e2 }, + { 0x06, 0x834a }, + { 0x06, 0xe383 }, + { 0x06, 0x4b1b }, + { 0x06, 0x56aa }, + { 0x06, 0x0eef }, + { 0x06, 0x56e6 }, + { 0x06, 0x834a }, + { 0x06, 0xe783 }, + { 0x06, 0x4be2 }, + { 0x06, 0x834d }, + { 0x06, 0xe683 }, + { 0x06, 0x4ce0 }, + { 0x06, 0x834d }, + { 0x06, 0xa000 }, + { 0x06, 0x0caf }, + { 0x06, 0x81dc }, + { 0x06, 0xe083 }, + { 0x06, 0x4d10 }, + { 0x06, 0xe483 }, + { 0x06, 0x4dae }, + { 0x06, 0x0480 }, + { 0x06, 0xe483 }, + { 0x06, 0x4de0 }, + { 0x06, 0x834e }, + { 0x06, 0x7803 }, + { 0x06, 0x9e0b }, + { 0x06, 0xe083 }, + { 0x06, 0x4e78 }, + { 0x06, 0x049e }, + { 0x06, 0x04ee }, + { 0x06, 0x834e }, + { 0x06, 0x02e0 }, + { 0x06, 0x8332 }, + { 0x06, 0xe183 }, + { 0x06, 0x3359 }, + { 0x06, 0x0fe2 }, + { 0x06, 0x834d }, + { 0x06, 0x0c24 }, + { 0x06, 0x5af0 }, + { 0x06, 0x1e12 }, + { 0x06, 0xe4f8 }, + { 0x06, 0x8ce5 }, + { 0x06, 0xf88d }, + { 0x06, 0xe083 }, + { 0x06, 0x30e1 }, + { 0x06, 0x8331 }, + { 0x06, 0x6801 }, + { 0x06, 0xe4f8 }, + { 0x06, 0x8ae5 }, + { 0x06, 0xf88b }, + { 0x06, 0xae37 }, + { 0x06, 0xee83 }, + { 0x06, 0x4e03 }, + { 0x06, 0xe083 }, + { 0x06, 0x4ce1 }, + { 0x06, 0x834d }, + { 0x06, 0x1b01 }, + { 0x06, 0x9e04 }, + { 0x06, 0xaaa1 }, + { 0x06, 0xaea8 }, + { 0x06, 0xee83 }, + { 0x06, 0x4e04 }, + { 0x06, 0xee83 }, + { 0x06, 0x4f00 }, + { 0x06, 0xaeab }, + { 0x06, 0xe083 }, + { 0x06, 0x4f78 }, + { 0x06, 0x039f }, + { 0x06, 0x14ee }, + { 0x06, 0x834e }, + { 0x06, 0x05d2 }, + { 0x06, 0x40d6 }, + { 0x06, 0x5554 }, + { 0x06, 0x0282 }, + { 0x06, 0x17d2 }, + { 0x06, 0xa0d6 }, + { 0x06, 0xba00 }, + { 0x06, 0x0282 }, + { 0x06, 0x17fe }, + { 0x06, 0xfdfc }, + { 0x06, 0x05f8 }, + { 0x06, 0xe0f8 }, + { 0x06, 0x60e1 }, + { 0x06, 0xf861 }, + { 0x06, 0x6802 }, + { 0x06, 0xe4f8 }, + { 0x06, 0x60e5 }, + { 0x06, 0xf861 }, + { 0x06, 0xe0f8 }, + { 0x06, 0x48e1 }, + { 0x06, 0xf849 }, + { 0x06, 0x580f }, + { 0x06, 0x1e02 }, + { 0x06, 0xe4f8 }, + { 0x06, 0x48e5 }, + { 0x06, 0xf849 }, + { 0x06, 0xd000 }, + { 0x06, 0x0282 }, + { 0x06, 0x5bbf }, + { 0x06, 0x8350 }, + { 0x06, 0xef46 }, + { 0x06, 0xdc19 }, + { 0x06, 0xddd0 }, + { 0x06, 0x0102 }, + { 0x06, 0x825b }, + { 0x06, 0x0282 }, + { 0x06, 0x77e0 }, + { 0x06, 0xf860 }, + { 0x06, 0xe1f8 }, + { 0x06, 0x6158 }, + { 0x06, 0xfde4 }, + { 0x06, 0xf860 }, + { 0x06, 0xe5f8 }, + { 0x06, 0x61fc }, + { 0x06, 0x04f9 }, + { 0x06, 0xfafb }, + { 0x06, 0xc6bf }, + { 0x06, 0xf840 }, + { 0x06, 0xbe83 }, + { 0x06, 0x50a0 }, + { 0x06, 0x0101 }, + { 0x06, 0x071b }, + { 0x06, 0x89cf }, + { 0x06, 0xd208 }, + { 0x06, 0xebdb }, + { 0x06, 0x19b2 }, + { 0x06, 0xfbff }, + { 0x06, 0xfefd }, + { 0x06, 0x04f8 }, + { 0x06, 0xe0f8 }, + { 0x06, 0x48e1 }, + { 0x06, 0xf849 }, + { 0x06, 0x6808 }, + { 0x06, 0xe4f8 }, + { 0x06, 0x48e5 }, + { 0x06, 0xf849 }, + { 0x06, 0x58f7 }, + { 0x06, 0xe4f8 }, + { 0x06, 0x48e5 }, + { 0x06, 0xf849 }, + { 0x06, 0xfc04 }, + { 0x06, 0x4d20 }, + { 0x06, 0x0002 }, + { 0x06, 0x4e22 }, + { 0x06, 0x0002 }, + { 0x06, 0x4ddf }, + { 0x06, 0xff01 }, + { 0x06, 0x4edd }, + { 0x06, 0xff01 }, + { 0x05, 0x83d4 }, + { 0x06, 0x8000 }, + { 0x05, 0x83d8 }, + { 0x06, 0x8051 }, + { 0x02, 0x6010 }, + { 0x03, 0xdc00 }, + { 0x05, 0xfff6 }, + { 0x06, 0x00fc }, { 0x1f, 0x0000 }, - { 0x14, 0x0060 }, + { 0x1f, 0x0000 }, - { 0x0d, 0xf8a0 }, + { 0x0d, 0xf880 }, + { 0x1f, 0x0000 } + }; + + rtl_phy_write(ioaddr, phy_reg_init_0, ARRAY_SIZE(phy_reg_init_0)); + + mdio_write(ioaddr, 0x1f, 0x0002); + mdio_plus_minus(ioaddr, 0x0b, 0x0010, 0x00ef); + mdio_plus_minus(ioaddr, 0x0c, 0xa200, 0x5d00); + + rtl_phy_write(ioaddr, phy_reg_init_1, ARRAY_SIZE(phy_reg_init_1)); + + if (rtl8168d_efuse_read(ioaddr, 0x01) == 0xb1) { + struct phy_reg phy_reg_init[] = { + { 0x1f, 0x0002 }, + { 0x05, 0x669a }, + { 0x1f, 0x0005 }, + { 0x05, 0x8330 }, + { 0x06, 0x669a }, + { 0x1f, 0x0002 } + }; + int val; + + rtl_phy_write(ioaddr, phy_reg_init, ARRAY_SIZE(phy_reg_init)); + + val = mdio_read(ioaddr, 0x0d); + + if ((val & 0x00ff) != 0x006c) { + u32 set[] = { + 0x0065, 0x0066, 0x0067, 0x0068, + 0x0069, 0x006a, 0x006b, 0x006c + }; + int i; + + mdio_write(ioaddr, 0x1f, 0x0002); + + val &= 0xff00; + for (i = 0; i < ARRAY_SIZE(set); i++) + mdio_write(ioaddr, 0x0d, val | set[i]); + } + } else { + struct phy_reg phy_reg_init[] = { + { 0x1f, 0x0002 }, + { 0x05, 0x6662 }, + { 0x1f, 0x0005 }, + { 0x05, 0x8330 }, + { 0x06, 0x6662 } + }; + + rtl_phy_write(ioaddr, phy_reg_init, ARRAY_SIZE(phy_reg_init)); + } + + mdio_write(ioaddr, 0x1f, 0x0002); + mdio_patch(ioaddr, 0x0d, 0x0300); + mdio_patch(ioaddr, 0x0f, 0x0010); + + mdio_write(ioaddr, 0x1f, 0x0002); + mdio_plus_minus(ioaddr, 0x02, 0x0100, 0x0600); + mdio_plus_minus(ioaddr, 0x03, 0x0000, 0xe000); + + rtl_phy_write(ioaddr, phy_reg_init_2, ARRAY_SIZE(phy_reg_init_2)); +} + +static void rtl8168d_2_hw_phy_config(void __iomem *ioaddr) +{ + static struct phy_reg phy_reg_init_0[] = { + { 0x1f, 0x0001 }, + { 0x06, 0x4064 }, + { 0x07, 0x2863 }, + { 0x08, 0x059c }, + { 0x09, 0x26b4 }, + { 0x0a, 0x6a19 }, + { 0x0b, 0xdcc8 }, + { 0x10, 0xf06d }, + { 0x14, 0x7f68 }, + { 0x18, 0x7fd9 }, + { 0x1c, 0xf0ff }, + { 0x1d, 0x3d9c }, + { 0x1f, 0x0003 }, + { 0x12, 0xf49f }, + { 0x13, 0x070b }, + { 0x1a, 0x05ad }, + { 0x14, 0x94c0 }, + + { 0x1f, 0x0002 }, + { 0x06, 0x5561 }, + { 0x1f, 0x0005 }, + { 0x05, 0x8332 }, + { 0x06, 0x5561 } + }; + static struct phy_reg phy_reg_init_1[] = { + { 0x1f, 0x0005 }, + { 0x05, 0xffc2 }, { 0x1f, 0x0005 }, - { 0x05, 0xffc2 } + { 0x05, 0x8000 }, + { 0x06, 0xf8f9 }, + { 0x06, 0xfaee }, + { 0x06, 0xf8ea }, + { 0x06, 0x00ee }, + { 0x06, 0xf8eb }, + { 0x06, 0x00e2 }, + { 0x06, 0xf87c }, + { 0x06, 0xe3f8 }, + { 0x06, 0x7da5 }, + { 0x06, 0x1111 }, + { 0x06, 0x12d2 }, + { 0x06, 0x40d6 }, + { 0x06, 0x4444 }, + { 0x06, 0x0281 }, + { 0x06, 0xc6d2 }, + { 0x06, 0xa0d6 }, + { 0x06, 0xaaaa }, + { 0x06, 0x0281 }, + { 0x06, 0xc6ae }, + { 0x06, 0x0fa5 }, + { 0x06, 0x4444 }, + { 0x06, 0x02ae }, + { 0x06, 0x4da5 }, + { 0x06, 0xaaaa }, + { 0x06, 0x02ae }, + { 0x06, 0x47af }, + { 0x06, 0x81c2 }, + { 0x06, 0xee83 }, + { 0x06, 0x4e00 }, + { 0x06, 0xee83 }, + { 0x06, 0x4d0f }, + { 0x06, 0xee83 }, + { 0x06, 0x4c0f }, + { 0x06, 0xee83 }, + { 0x06, 0x4f00 }, + { 0x06, 0xee83 }, + { 0x06, 0x5100 }, + { 0x06, 0xee83 }, + { 0x06, 0x4aff }, + { 0x06, 0xee83 }, + { 0x06, 0x4bff }, + { 0x06, 0xe083 }, + { 0x06, 0x30e1 }, + { 0x06, 0x8331 }, + { 0x06, 0x58fe }, + { 0x06, 0xe4f8 }, + { 0x06, 0x8ae5 }, + { 0x06, 0xf88b }, + { 0x06, 0xe083 }, + { 0x06, 0x32e1 }, + { 0x06, 0x8333 }, + { 0x06, 0x590f }, + { 0x06, 0xe283 }, + { 0x06, 0x4d0c }, + { 0x06, 0x245a }, + { 0x06, 0xf01e }, + { 0x06, 0x12e4 }, + { 0x06, 0xf88c }, + { 0x06, 0xe5f8 }, + { 0x06, 0x8daf }, + { 0x06, 0x81c2 }, + { 0x06, 0xe083 }, + { 0x06, 0x4f10 }, + { 0x06, 0xe483 }, + { 0x06, 0x4fe0 }, + { 0x06, 0x834e }, + { 0x06, 0x7800 }, + { 0x06, 0x9f0a }, + { 0x06, 0xe083 }, + { 0x06, 0x4fa0 }, + { 0x06, 0x10a5 }, + { 0x06, 0xee83 }, + { 0x06, 0x4e01 }, + { 0x06, 0xe083 }, + { 0x06, 0x4e78 }, + { 0x06, 0x059e }, + { 0x06, 0x9ae0 }, + { 0x06, 0x834e }, + { 0x06, 0x7804 }, + { 0x06, 0x9e10 }, + { 0x06, 0xe083 }, + { 0x06, 0x4e78 }, + { 0x06, 0x039e }, + { 0x06, 0x0fe0 }, + { 0x06, 0x834e }, + { 0x06, 0x7801 }, + { 0x06, 0x9e05 }, + { 0x06, 0xae0c }, + { 0x06, 0xaf81 }, + { 0x06, 0xa7af }, + { 0x06, 0x8152 }, + { 0x06, 0xaf81 }, + { 0x06, 0x8baf }, + { 0x06, 0x81c2 }, + { 0x06, 0xee83 }, + { 0x06, 0x4800 }, + { 0x06, 0xee83 }, + { 0x06, 0x4900 }, + { 0x06, 0xe083 }, + { 0x06, 0x5110 }, + { 0x06, 0xe483 }, + { 0x06, 0x5158 }, + { 0x06, 0x019f }, + { 0x06, 0xead0 }, + { 0x06, 0x00d1 }, + { 0x06, 0x801f }, + { 0x06, 0x66e2 }, + { 0x06, 0xf8ea }, + { 0x06, 0xe3f8 }, + { 0x06, 0xeb5a }, + { 0x06, 0xf81e }, + { 0x06, 0x20e6 }, + { 0x06, 0xf8ea }, + { 0x06, 0xe5f8 }, + { 0x06, 0xebd3 }, + { 0x06, 0x02b3 }, + { 0x06, 0xfee2 }, + { 0x06, 0xf87c }, + { 0x06, 0xef32 }, + { 0x06, 0x5b80 }, + { 0x06, 0xe3f8 }, + { 0x06, 0x7d9e }, + { 0x06, 0x037d }, + { 0x06, 0xffff }, + { 0x06, 0x0d58 }, + { 0x06, 0x1c55 }, + { 0x06, 0x1a65 }, + { 0x06, 0x11a1 }, + { 0x06, 0x90d3 }, + { 0x06, 0xe283 }, + { 0x06, 0x48e3 }, + { 0x06, 0x8349 }, + { 0x06, 0x1b56 }, + { 0x06, 0xab08 }, + { 0x06, 0xef56 }, + { 0x06, 0xe683 }, + { 0x06, 0x48e7 }, + { 0x06, 0x8349 }, + { 0x06, 0x10d1 }, + { 0x06, 0x801f }, + { 0x06, 0x66a0 }, + { 0x06, 0x04b9 }, + { 0x06, 0xe283 }, + { 0x06, 0x48e3 }, + { 0x06, 0x8349 }, + { 0x06, 0xef65 }, + { 0x06, 0xe283 }, + { 0x06, 0x4ae3 }, + { 0x06, 0x834b }, + { 0x06, 0x1b56 }, + { 0x06, 0xaa0e }, + { 0x06, 0xef56 }, + { 0x06, 0xe683 }, + { 0x06, 0x4ae7 }, + { 0x06, 0x834b }, + { 0x06, 0xe283 }, + { 0x06, 0x4de6 }, + { 0x06, 0x834c }, + { 0x06, 0xe083 }, + { 0x06, 0x4da0 }, + { 0x06, 0x000c }, + { 0x06, 0xaf81 }, + { 0x06, 0x8be0 }, + { 0x06, 0x834d }, + { 0x06, 0x10e4 }, + { 0x06, 0x834d }, + { 0x06, 0xae04 }, + { 0x06, 0x80e4 }, + { 0x06, 0x834d }, + { 0x06, 0xe083 }, + { 0x06, 0x4e78 }, + { 0x06, 0x039e }, + { 0x06, 0x0be0 }, + { 0x06, 0x834e }, + { 0x06, 0x7804 }, + { 0x06, 0x9e04 }, + { 0x06, 0xee83 }, + { 0x06, 0x4e02 }, + { 0x06, 0xe083 }, + { 0x06, 0x32e1 }, + { 0x06, 0x8333 }, + { 0x06, 0x590f }, + { 0x06, 0xe283 }, + { 0x06, 0x4d0c }, + { 0x06, 0x245a }, + { 0x06, 0xf01e }, + { 0x06, 0x12e4 }, + { 0x06, 0xf88c }, + { 0x06, 0xe5f8 }, + { 0x06, 0x8de0 }, + { 0x06, 0x8330 }, + { 0x06, 0xe183 }, + { 0x06, 0x3168 }, + { 0x06, 0x01e4 }, + { 0x06, 0xf88a }, + { 0x06, 0xe5f8 }, + { 0x06, 0x8bae }, + { 0x06, 0x37ee }, + { 0x06, 0x834e }, + { 0x06, 0x03e0 }, + { 0x06, 0x834c }, + { 0x06, 0xe183 }, + { 0x06, 0x4d1b }, + { 0x06, 0x019e }, + { 0x06, 0x04aa }, + { 0x06, 0xa1ae }, + { 0x06, 0xa8ee }, + { 0x06, 0x834e }, + { 0x06, 0x04ee }, + { 0x06, 0x834f }, + { 0x06, 0x00ae }, + { 0x06, 0xabe0 }, + { 0x06, 0x834f }, + { 0x06, 0x7803 }, + { 0x06, 0x9f14 }, + { 0x06, 0xee83 }, + { 0x06, 0x4e05 }, + { 0x06, 0xd240 }, + { 0x06, 0xd655 }, + { 0x06, 0x5402 }, + { 0x06, 0x81c6 }, + { 0x06, 0xd2a0 }, + { 0x06, 0xd6ba }, + { 0x06, 0x0002 }, + { 0x06, 0x81c6 }, + { 0x06, 0xfefd }, + { 0x06, 0xfc05 }, + { 0x06, 0xf8e0 }, + { 0x06, 0xf860 }, + { 0x06, 0xe1f8 }, + { 0x06, 0x6168 }, + { 0x06, 0x02e4 }, + { 0x06, 0xf860 }, + { 0x06, 0xe5f8 }, + { 0x06, 0x61e0 }, + { 0x06, 0xf848 }, + { 0x06, 0xe1f8 }, + { 0x06, 0x4958 }, + { 0x06, 0x0f1e }, + { 0x06, 0x02e4 }, + { 0x06, 0xf848 }, + { 0x06, 0xe5f8 }, + { 0x06, 0x49d0 }, + { 0x06, 0x0002 }, + { 0x06, 0x820a }, + { 0x06, 0xbf83 }, + { 0x06, 0x50ef }, + { 0x06, 0x46dc }, + { 0x06, 0x19dd }, + { 0x06, 0xd001 }, + { 0x06, 0x0282 }, + { 0x06, 0x0a02 }, + { 0x06, 0x8226 }, + { 0x06, 0xe0f8 }, + { 0x06, 0x60e1 }, + { 0x06, 0xf861 }, + { 0x06, 0x58fd }, + { 0x06, 0xe4f8 }, + { 0x06, 0x60e5 }, + { 0x06, 0xf861 }, + { 0x06, 0xfc04 }, + { 0x06, 0xf9fa }, + { 0x06, 0xfbc6 }, + { 0x06, 0xbff8 }, + { 0x06, 0x40be }, + { 0x06, 0x8350 }, + { 0x06, 0xa001 }, + { 0x06, 0x0107 }, + { 0x06, 0x1b89 }, + { 0x06, 0xcfd2 }, + { 0x06, 0x08eb }, + { 0x06, 0xdb19 }, + { 0x06, 0xb2fb }, + { 0x06, 0xfffe }, + { 0x06, 0xfd04 }, + { 0x06, 0xf8e0 }, + { 0x06, 0xf848 }, + { 0x06, 0xe1f8 }, + { 0x06, 0x4968 }, + { 0x06, 0x08e4 }, + { 0x06, 0xf848 }, + { 0x06, 0xe5f8 }, + { 0x06, 0x4958 }, + { 0x06, 0xf7e4 }, + { 0x06, 0xf848 }, + { 0x06, 0xe5f8 }, + { 0x06, 0x49fc }, + { 0x06, 0x044d }, + { 0x06, 0x2000 }, + { 0x06, 0x024e }, + { 0x06, 0x2200 }, + { 0x06, 0x024d }, + { 0x06, 0xdfff }, + { 0x06, 0x014e }, + { 0x06, 0xddff }, + { 0x06, 0x0100 }, + { 0x05, 0x83d8 }, + { 0x06, 0x8000 }, + { 0x03, 0xdc00 }, + { 0x05, 0xfff6 }, + { 0x06, 0x00fc }, + { 0x1f, 0x0000 }, + + { 0x1f, 0x0000 }, + { 0x0d, 0xf880 }, + { 0x1f, 0x0000 } }; rtl_phy_write(ioaddr, phy_reg_init_0, ARRAY_SIZE(phy_reg_init_0)); - if (mdio_read(ioaddr, 0x06) == 0xc400) { - struct phy_reg phy_reg_init_1[] = { + if (rtl8168d_efuse_read(ioaddr, 0x01) == 0xb1) { + struct phy_reg phy_reg_init[] = { + { 0x1f, 0x0002 }, + { 0x05, 0x669a }, { 0x1f, 0x0005 }, - { 0x01, 0x0300 }, - { 0x1f, 0x0000 }, - { 0x11, 0x401c }, - { 0x16, 0x4100 }, + { 0x05, 0x8330 }, + { 0x06, 0x669a }, + + { 0x1f, 0x0002 } + }; + int val; + + rtl_phy_write(ioaddr, phy_reg_init, ARRAY_SIZE(phy_reg_init)); + + val = mdio_read(ioaddr, 0x0d); + if ((val & 0x00ff) != 0x006c) { + u32 set[] = { + 0x0065, 0x0066, 0x0067, 0x0068, + 0x0069, 0x006a, 0x006b, 0x006c + }; + int i; + + mdio_write(ioaddr, 0x1f, 0x0002); + + val &= 0xff00; + for (i = 0; i < ARRAY_SIZE(set); i++) + mdio_write(ioaddr, 0x0d, val | set[i]); + } + } else { + struct phy_reg phy_reg_init[] = { + { 0x1f, 0x0002 }, + { 0x05, 0x2642 }, { 0x1f, 0x0005 }, - { 0x07, 0x0010 }, - { 0x05, 0x83dc }, - { 0x06, 0x087d }, - { 0x05, 0x8300 }, - { 0x06, 0x0101 }, - { 0x06, 0x05f8 }, - { 0x06, 0xf9fa }, - { 0x06, 0xfbef }, - { 0x06, 0x79e2 }, - { 0x06, 0x835f }, - { 0x06, 0xe0f8 }, - { 0x06, 0x9ae1 }, - { 0x06, 0xf89b }, - { 0x06, 0xef31 }, - { 0x06, 0x3b65 }, - { 0x06, 0xaa07 }, - { 0x06, 0x81e4 }, - { 0x06, 0xf89a }, - { 0x06, 0xe5f8 }, - { 0x06, 0x9baf }, - { 0x06, 0x06ae }, - { 0x05, 0x83dc }, - { 0x06, 0x8300 }, + { 0x05, 0x8330 }, + { 0x06, 0x2642 } }; - rtl_phy_write(ioaddr, phy_reg_init_1, - ARRAY_SIZE(phy_reg_init_1)); + rtl_phy_write(ioaddr, phy_reg_init, ARRAY_SIZE(phy_reg_init)); } - mdio_write(ioaddr, 0x1f, 0x0000); + mdio_write(ioaddr, 0x1f, 0x0002); + mdio_plus_minus(ioaddr, 0x02, 0x0100, 0x0600); + mdio_plus_minus(ioaddr, 0x03, 0x0000, 0xe000); + + mdio_write(ioaddr, 0x1f, 0x0001); + mdio_write(ioaddr, 0x17, 0x0cc0); + + mdio_write(ioaddr, 0x1f, 0x0002); + mdio_patch(ioaddr, 0x0f, 0x0017); + + rtl_phy_write(ioaddr, phy_reg_init_1, ARRAY_SIZE(phy_reg_init_1)); +} + +static void rtl8168d_3_hw_phy_config(void __iomem *ioaddr) +{ + struct phy_reg phy_reg_init[] = { + { 0x1f, 0x0002 }, + { 0x10, 0x0008 }, + { 0x0d, 0x006c }, + + { 0x1f, 0x0000 }, + { 0x0d, 0xf880 }, + + { 0x1f, 0x0001 }, + { 0x17, 0x0cc0 }, + + { 0x1f, 0x0001 }, + { 0x0b, 0xa4d8 }, + { 0x09, 0x281c }, + { 0x07, 0x2883 }, + { 0x0a, 0x6b35 }, + { 0x1d, 0x3da4 }, + { 0x1c, 0xeffd }, + { 0x14, 0x7f52 }, + { 0x18, 0x7fc6 }, + { 0x08, 0x0601 }, + { 0x06, 0x4063 }, + { 0x10, 0xf074 }, + { 0x1f, 0x0003 }, + { 0x13, 0x0789 }, + { 0x12, 0xf4bd }, + { 0x1a, 0x04fd }, + { 0x14, 0x84b0 }, + { 0x1f, 0x0000 }, + { 0x00, 0x9200 }, + + { 0x1f, 0x0005 }, + { 0x01, 0x0340 }, + { 0x1f, 0x0001 }, + { 0x04, 0x4000 }, + { 0x03, 0x1d21 }, + { 0x02, 0x0c32 }, + { 0x01, 0x0200 }, + { 0x00, 0x5554 }, + { 0x04, 0x4800 }, + { 0x04, 0x4000 }, + { 0x04, 0xf000 }, + { 0x03, 0xdf01 }, + { 0x02, 0xdf20 }, + { 0x01, 0x101a }, + { 0x00, 0xa0ff }, + { 0x04, 0xf800 }, + { 0x04, 0xf000 }, + { 0x1f, 0x0000 }, + + { 0x1f, 0x0007 }, + { 0x1e, 0x0023 }, + { 0x16, 0x0000 }, + { 0x1f, 0x0000 } + }; + + rtl_phy_write(ioaddr, phy_reg_init, ARRAY_SIZE(phy_reg_init)); } static void rtl8102e_hw_phy_config(void __iomem *ioaddr) @@ -1792,7 +2661,13 @@ static void rtl_hw_phy_config(struct net_device *dev) rtl8168cp_2_hw_phy_config(ioaddr); break; case RTL_GIGA_MAC_VER_25: - rtl8168d_hw_phy_config(ioaddr); + rtl8168d_1_hw_phy_config(ioaddr); + break; + case RTL_GIGA_MAC_VER_26: + rtl8168d_2_hw_phy_config(ioaddr); + break; + case RTL_GIGA_MAC_VER_27: + rtl8168d_3_hw_phy_config(ioaddr); break; default: @@ -2863,6 +3738,8 @@ static void rtl_hw_start_8168(struct net_device *dev) break; case RTL_GIGA_MAC_VER_25: + case RTL_GIGA_MAC_VER_26: + case RTL_GIGA_MAC_VER_27: rtl_hw_start_8168d(ioaddr, pdev); break; diff --git a/drivers/net/stmmac/Kconfig b/drivers/net/stmmac/Kconfig new file mode 100644 index 0000000..35eaa52 --- /dev/null +++ b/drivers/net/stmmac/Kconfig @@ -0,0 +1,53 @@ +config STMMAC_ETH + tristate "STMicroelectronics 10/100/1000 Ethernet driver" + select MII + select PHYLIB + depends on NETDEVICES && CPU_SUBTYPE_ST40 + help + This is the driver for the ST MAC 10/100/1000 on-chip Ethernet + controllers. ST Ethernet IPs are built around a Synopsys IP Core. + +if STMMAC_ETH + +config STMMAC_DA + bool "STMMAC DMA arbitration scheme" + default n + help + Selecting this option, rx has priority over Tx (only for Giga + Ethernet device). + By default, the DMA arbitration scheme is based on Round-robin + (rx:tx priority is 1:1). + +config STMMAC_DUAL_MAC + bool "STMMAC: dual mac support (EXPERIMENTAL)" + default n + depends on EXPERIMENTAL && STMMAC_ETH && !STMMAC_TIMER + help + Some ST SoCs (for example the stx7141 and stx7200c2) have two + Ethernet Controllers. This option turns on the second Ethernet + device on this kind of platforms. + +config STMMAC_TIMER + bool "STMMAC Timer optimisation" + default n + help + Use an external timer for mitigating the number of network + interrupts. + +choice + prompt "Select Timer device" + depends on STMMAC_TIMER + +config STMMAC_TMU_TIMER + bool "TMU channel 2" + depends on CPU_SH4 + help + +config STMMAC_RTC_TIMER + bool "Real time clock" + depends on RTC_CLASS + help + +endchoice + +endif diff --git a/drivers/net/stmmac/Makefile b/drivers/net/stmmac/Makefile new file mode 100644 index 0000000..b2d7a55 --- /dev/null +++ b/drivers/net/stmmac/Makefile @@ -0,0 +1,4 @@ +obj-$(CONFIG_STMMAC_ETH) += stmmac.o +stmmac-$(CONFIG_STMMAC_TIMER) += stmmac_timer.o +stmmac-objs:= stmmac_main.o stmmac_ethtool.o stmmac_mdio.o \ + mac100.o gmac.o $(stmmac-y) diff --git a/drivers/net/stmmac/common.h b/drivers/net/stmmac/common.h new file mode 100644 index 0000000..e49e518 --- /dev/null +++ b/drivers/net/stmmac/common.h @@ -0,0 +1,330 @@ +/******************************************************************************* + STMMAC Common Header File + + Copyright (C) 2007-2009 STMicroelectronics Ltd + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Author: Giuseppe Cavallaro <peppe.cavallaro@st.com> +*******************************************************************************/ + +#include "descs.h" +#include <linux/io.h> + +/* ********************************************* + DMA CRS Control and Status Register Mapping + * *********************************************/ +#define DMA_BUS_MODE 0x00001000 /* Bus Mode */ +#define DMA_XMT_POLL_DEMAND 0x00001004 /* Transmit Poll Demand */ +#define DMA_RCV_POLL_DEMAND 0x00001008 /* Received Poll Demand */ +#define DMA_RCV_BASE_ADDR 0x0000100c /* Receive List Base */ +#define DMA_TX_BASE_ADDR 0x00001010 /* Transmit List Base */ +#define DMA_STATUS 0x00001014 /* Status Register */ +#define DMA_CONTROL 0x00001018 /* Ctrl (Operational Mode) */ +#define DMA_INTR_ENA 0x0000101c /* Interrupt Enable */ +#define DMA_MISSED_FRAME_CTR 0x00001020 /* Missed Frame Counter */ +#define DMA_CUR_TX_BUF_ADDR 0x00001050 /* Current Host Tx Buffer */ +#define DMA_CUR_RX_BUF_ADDR 0x00001054 /* Current Host Rx Buffer */ + +/* ******************************** + DMA Control register defines + * ********************************/ +#define DMA_CONTROL_ST 0x00002000 /* Start/Stop Transmission */ +#define DMA_CONTROL_SR 0x00000002 /* Start/Stop Receive */ + +/* ************************************** + DMA Interrupt Enable register defines + * **************************************/ +/**** NORMAL INTERRUPT ****/ +#define DMA_INTR_ENA_NIE 0x00010000 /* Normal Summary */ +#define DMA_INTR_ENA_TIE 0x00000001 /* Transmit Interrupt */ +#define DMA_INTR_ENA_TUE 0x00000004 /* Transmit Buffer Unavailable */ +#define DMA_INTR_ENA_RIE 0x00000040 /* Receive Interrupt */ +#define DMA_INTR_ENA_ERE 0x00004000 /* Early Receive */ + +#define DMA_INTR_NORMAL (DMA_INTR_ENA_NIE | DMA_INTR_ENA_RIE | \ + DMA_INTR_ENA_TIE) + +/**** ABNORMAL INTERRUPT ****/ +#define DMA_INTR_ENA_AIE 0x00008000 /* Abnormal Summary */ +#define DMA_INTR_ENA_FBE 0x00002000 /* Fatal Bus Error */ +#define DMA_INTR_ENA_ETE 0x00000400 /* Early Transmit */ +#define DMA_INTR_ENA_RWE 0x00000200 /* Receive Watchdog */ +#define DMA_INTR_ENA_RSE 0x00000100 /* Receive Stopped */ +#define DMA_INTR_ENA_RUE 0x00000080 /* Receive Buffer Unavailable */ +#define DMA_INTR_ENA_UNE 0x00000020 /* Tx Underflow */ +#define DMA_INTR_ENA_OVE 0x00000010 /* Receive Overflow */ +#define DMA_INTR_ENA_TJE 0x00000008 /* Transmit Jabber */ +#define DMA_INTR_ENA_TSE 0x00000002 /* Transmit Stopped */ + +#define DMA_INTR_ABNORMAL (DMA_INTR_ENA_AIE | DMA_INTR_ENA_FBE | \ + DMA_INTR_ENA_UNE) + +/* DMA default interrupt mask */ +#define DMA_INTR_DEFAULT_MASK (DMA_INTR_NORMAL | DMA_INTR_ABNORMAL) + +/* **************************** + * DMA Status register defines + * ****************************/ +#define DMA_STATUS_GPI 0x10000000 /* PMT interrupt */ +#define DMA_STATUS_GMI 0x08000000 /* MMC interrupt */ +#define DMA_STATUS_GLI 0x04000000 /* GMAC Line interface int. */ +#define DMA_STATUS_GMI 0x08000000 +#define DMA_STATUS_GLI 0x04000000 +#define DMA_STATUS_EB_MASK 0x00380000 /* Error Bits Mask */ +#define DMA_STATUS_EB_TX_ABORT 0x00080000 /* Error Bits - TX Abort */ +#define DMA_STATUS_EB_RX_ABORT 0x00100000 /* Error Bits - RX Abort */ +#define DMA_STATUS_TS_MASK 0x00700000 /* Transmit Process State */ +#define DMA_STATUS_TS_SHIFT 20 +#define DMA_STATUS_RS_MASK 0x000e0000 /* Receive Process State */ +#define DMA_STATUS_RS_SHIFT 17 +#define DMA_STATUS_NIS 0x00010000 /* Normal Interrupt Summary */ +#define DMA_STATUS_AIS 0x00008000 /* Abnormal Interrupt Summary */ +#define DMA_STATUS_ERI 0x00004000 /* Early Receive Interrupt */ +#define DMA_STATUS_FBI 0x00002000 /* Fatal Bus Error Interrupt */ +#define DMA_STATUS_ETI 0x00000400 /* Early Transmit Interrupt */ +#define DMA_STATUS_RWT 0x00000200 /* Receive Watchdog Timeout */ +#define DMA_STATUS_RPS 0x00000100 /* Receive Process Stopped */ +#define DMA_STATUS_RU 0x00000080 /* Receive Buffer Unavailable */ +#define DMA_STATUS_RI 0x00000040 /* Receive Interrupt */ +#define DMA_STATUS_UNF 0x00000020 /* Transmit Underflow */ +#define DMA_STATUS_OVF 0x00000010 /* Receive Overflow */ +#define DMA_STATUS_TJT 0x00000008 /* Transmit Jabber Timeout */ +#define DMA_STATUS_TU 0x00000004 /* Transmit Buffer Unavailable */ +#define DMA_STATUS_TPS 0x00000002 /* Transmit Process Stopped */ +#define DMA_STATUS_TI 0x00000001 /* Transmit Interrupt */ + +/* Other defines */ +#define HASH_TABLE_SIZE 64 +#define PAUSE_TIME 0x200 + +/* Flow Control defines */ +#define FLOW_OFF 0 +#define FLOW_RX 1 +#define FLOW_TX 2 +#define FLOW_AUTO (FLOW_TX | FLOW_RX) + +/* DMA STORE-AND-FORWARD Operation Mode */ +#define SF_DMA_MODE 1 + +#define HW_CSUM 1 +#define NO_HW_CSUM 0 + +/* GMAC TX FIFO is 8K, Rx FIFO is 16K */ +#define BUF_SIZE_16KiB 16384 +#define BUF_SIZE_8KiB 8192 +#define BUF_SIZE_4KiB 4096 +#define BUF_SIZE_2KiB 2048 + +/* Power Down and WOL */ +#define PMT_NOT_SUPPORTED 0 +#define PMT_SUPPORTED 1 + +/* Common MAC defines */ +#define MAC_CTRL_REG 0x00000000 /* MAC Control */ +#define MAC_ENABLE_TX 0x00000008 /* Transmitter Enable */ +#define MAC_RNABLE_RX 0x00000004 /* Receiver Enable */ + +/* MAC Management Counters register */ +#define MMC_CONTROL 0x00000100 /* MMC Control */ +#define MMC_HIGH_INTR 0x00000104 /* MMC High Interrupt */ +#define MMC_LOW_INTR 0x00000108 /* MMC Low Interrupt */ +#define MMC_HIGH_INTR_MASK 0x0000010c /* MMC High Interrupt Mask */ +#define MMC_LOW_INTR_MASK 0x00000110 /* MMC Low Interrupt Mask */ + +#define MMC_CONTROL_MAX_FRM_MASK 0x0003ff8 /* Maximum Frame Size */ +#define MMC_CONTROL_MAX_FRM_SHIFT 3 +#define MMC_CONTROL_MAX_FRAME 0x7FF + +struct stmmac_extra_stats { + /* Transmit errors */ + unsigned long tx_underflow ____cacheline_aligned; + unsigned long tx_carrier; + unsigned long tx_losscarrier; + unsigned long tx_heartbeat; + unsigned long tx_deferred; + unsigned long tx_vlan; + unsigned long tx_jabber; + unsigned long tx_frame_flushed; + unsigned long tx_payload_error; + unsigned long tx_ip_header_error; + /* Receive errors */ + unsigned long rx_desc; + unsigned long rx_partial; + unsigned long rx_runt; + unsigned long rx_toolong; + unsigned long rx_collision; + unsigned long rx_crc; + unsigned long rx_lenght; + unsigned long rx_mii; + unsigned long rx_multicast; + unsigned long rx_gmac_overflow; + unsigned long rx_watchdog; + unsigned long da_rx_filter_fail; + unsigned long sa_rx_filter_fail; + unsigned long rx_missed_cntr; + unsigned long rx_overflow_cntr; + unsigned long rx_vlan; + /* Tx/Rx IRQ errors */ + unsigned long tx_undeflow_irq; + unsigned long tx_process_stopped_irq; + unsigned long tx_jabber_irq; + unsigned long rx_overflow_irq; + unsigned long rx_buf_unav_irq; + unsigned long rx_process_stopped_irq; + unsigned long rx_watchdog_irq; + unsigned long tx_early_irq; + unsigned long fatal_bus_error_irq; + /* Extra info */ + unsigned long threshold; + unsigned long tx_pkt_n; + unsigned long rx_pkt_n; + unsigned long poll_n; + unsigned long sched_timer_n; + unsigned long normal_irq_n; +}; + +/* GMAC core can compute the checksums in HW. */ +enum rx_frame_status { + good_frame = 0, + discard_frame = 1, + csum_none = 2, +}; + +static inline void stmmac_set_mac_addr(unsigned long ioaddr, u8 addr[6], + unsigned int high, unsigned int low) +{ + unsigned long data; + + data = (addr[5] << 8) | addr[4]; + writel(data, ioaddr + high); + data = (addr[3] << 24) | (addr[2] << 16) | (addr[1] << 8) | addr[0]; + writel(data, ioaddr + low); + + return; +} + +static inline void stmmac_get_mac_addr(unsigned long ioaddr, + unsigned char *addr, unsigned int high, + unsigned int low) +{ + unsigned int hi_addr, lo_addr; + + /* Read the MAC address from the hardware */ + hi_addr = readl(ioaddr + high); + lo_addr = readl(ioaddr + low); + + /* Extract the MAC address from the high and low words */ + addr[0] = lo_addr & 0xff; + addr[1] = (lo_addr >> 8) & 0xff; + addr[2] = (lo_addr >> 16) & 0xff; + addr[3] = (lo_addr >> 24) & 0xff; + addr[4] = hi_addr & 0xff; + addr[5] = (hi_addr >> 8) & 0xff; + + return; +} + +struct stmmac_ops { + /* MAC core initialization */ + void (*core_init) (unsigned long ioaddr) ____cacheline_aligned; + /* DMA core initialization */ + int (*dma_init) (unsigned long ioaddr, int pbl, u32 dma_tx, u32 dma_rx); + /* Dump MAC registers */ + void (*dump_mac_regs) (unsigned long ioaddr); + /* Dump DMA registers */ + void (*dump_dma_regs) (unsigned long ioaddr); + /* Set tx/rx threshold in the csr6 register + * An invalid value enables the store-and-forward mode */ + void (*dma_mode) (unsigned long ioaddr, int txmode, int rxmode); + /* To track extra statistic (if supported) */ + void (*dma_diagnostic_fr) (void *data, struct stmmac_extra_stats *x, + unsigned long ioaddr); + /* RX descriptor ring initialization */ + void (*init_rx_desc) (struct dma_desc *p, unsigned int ring_size, + int disable_rx_ic); + /* TX descriptor ring initialization */ + void (*init_tx_desc) (struct dma_desc *p, unsigned int ring_size); + + /* Invoked by the xmit function to prepare the tx descriptor */ + void (*prepare_tx_desc) (struct dma_desc *p, int is_fs, int len, + int csum_flag); + /* Set/get the owner of the descriptor */ + void (*set_tx_owner) (struct dma_desc *p); + int (*get_tx_owner) (struct dma_desc *p); + /* Invoked by the xmit function to close the tx descriptor */ + void (*close_tx_desc) (struct dma_desc *p); + /* Clean the tx descriptor as soon as the tx irq is received */ + void (*release_tx_desc) (struct dma_desc *p); + /* Clear interrupt on tx frame completion. When this bit is + * set an interrupt happens as soon as the frame is transmitted */ + void (*clear_tx_ic) (struct dma_desc *p); + /* Last tx segment reports the transmit status */ + int (*get_tx_ls) (struct dma_desc *p); + /* Return the transmit status looking at the TDES1 */ + int (*tx_status) (void *data, struct stmmac_extra_stats *x, + struct dma_desc *p, unsigned long ioaddr); + /* Get the buffer size from the descriptor */ + int (*get_tx_len) (struct dma_desc *p); + /* Handle extra events on specific interrupts hw dependent */ + void (*host_irq_status) (unsigned long ioaddr); + int (*get_rx_owner) (struct dma_desc *p); + void (*set_rx_owner) (struct dma_desc *p); + /* Get the receive frame size */ + int (*get_rx_frame_len) (struct dma_desc *p); + /* Return the reception status looking at the RDES1 */ + int (*rx_status) (void *data, struct stmmac_extra_stats *x, + struct dma_desc *p); + /* Multicast filter setting */ + void (*set_filter) (struct net_device *dev); + /* Flow control setting */ + void (*flow_ctrl) (unsigned long ioaddr, unsigned int duplex, + unsigned int fc, unsigned int pause_time); + /* Set power management mode (e.g. magic frame) */ + void (*pmt) (unsigned long ioaddr, unsigned long mode); + /* Set/Get Unicast MAC addresses */ + void (*set_umac_addr) (unsigned long ioaddr, unsigned char *addr, + unsigned int reg_n); + void (*get_umac_addr) (unsigned long ioaddr, unsigned char *addr, + unsigned int reg_n); +}; + +struct mac_link { + int port; + int duplex; + int speed; +}; + +struct mii_regs { + unsigned int addr; /* MII Address */ + unsigned int data; /* MII Data */ +}; + +struct hw_cap { + unsigned int version; /* Core Version register (GMAC) */ + unsigned int pmt; /* Power-Down mode (GMAC) */ + struct mac_link link; + struct mii_regs mii; +}; + +struct mac_device_info { + struct hw_cap hw; + struct stmmac_ops *ops; +}; + +struct mac_device_info *gmac_setup(unsigned long addr); +struct mac_device_info *mac100_setup(unsigned long addr); diff --git a/drivers/net/stmmac/descs.h b/drivers/net/stmmac/descs.h new file mode 100644 index 0000000..6d2a0b2 --- /dev/null +++ b/drivers/net/stmmac/descs.h @@ -0,0 +1,163 @@ +/******************************************************************************* + Header File to describe the DMA descriptors + Use enhanced descriptors in case of GMAC Cores. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Author: Giuseppe Cavallaro <peppe.cavallaro@st.com> +*******************************************************************************/ +struct dma_desc { + /* Receive descriptor */ + union { + struct { + /* RDES0 */ + u32 reserved1:1; + u32 crc_error:1; + u32 dribbling:1; + u32 mii_error:1; + u32 receive_watchdog:1; + u32 frame_type:1; + u32 collision:1; + u32 frame_too_long:1; + u32 last_descriptor:1; + u32 first_descriptor:1; + u32 multicast_frame:1; + u32 run_frame:1; + u32 length_error:1; + u32 partial_frame_error:1; + u32 descriptor_error:1; + u32 error_summary:1; + u32 frame_length:14; + u32 filtering_fail:1; + u32 own:1; + /* RDES1 */ + u32 buffer1_size:11; + u32 buffer2_size:11; + u32 reserved2:2; + u32 second_address_chained:1; + u32 end_ring:1; + u32 reserved3:5; + u32 disable_ic:1; + } rx; + struct { + /* RDES0 */ + u32 payload_csum_error:1; + u32 crc_error:1; + u32 dribbling:1; + u32 error_gmii:1; + u32 receive_watchdog:1; + u32 frame_type:1; + u32 late_collision:1; + u32 ipc_csum_error:1; + u32 last_descriptor:1; + u32 first_descriptor:1; + u32 vlan_tag:1; + u32 overflow_error:1; + u32 length_error:1; + u32 sa_filter_fail:1; + u32 descriptor_error:1; + u32 error_summary:1; + u32 frame_length:14; + u32 da_filter_fail:1; + u32 own:1; + /* RDES1 */ + u32 buffer1_size:13; + u32 reserved1:1; + u32 second_address_chained:1; + u32 end_ring:1; + u32 buffer2_size:13; + u32 reserved2:2; + u32 disable_ic:1; + } erx; /* -- enhanced -- */ + + /* Transmit descriptor */ + struct { + /* TDES0 */ + u32 deferred:1; + u32 underflow_error:1; + u32 excessive_deferral:1; + u32 collision_count:4; + u32 heartbeat_fail:1; + u32 excessive_collisions:1; + u32 late_collision:1; + u32 no_carrier:1; + u32 loss_carrier:1; + u32 reserved1:3; + u32 error_summary:1; + u32 reserved2:15; + u32 own:1; + /* TDES1 */ + u32 buffer1_size:11; + u32 buffer2_size:11; + u32 reserved3:1; + u32 disable_padding:1; + u32 second_address_chained:1; + u32 end_ring:1; + u32 crc_disable:1; + u32 reserved4:2; + u32 first_segment:1; + u32 last_segment:1; + u32 interrupt:1; + } tx; + struct { + /* TDES0 */ + u32 deferred:1; + u32 underflow_error:1; + u32 excessive_deferral:1; + u32 collision_count:4; + u32 vlan_frame:1; + u32 excessive_collisions:1; + u32 late_collision:1; + u32 no_carrier:1; + u32 loss_carrier:1; + u32 payload_error:1; + u32 frame_flushed:1; + u32 jabber_timeout:1; + u32 error_summary:1; + u32 ip_header_error:1; + u32 time_stamp_status:1; + u32 reserved1:2; + u32 second_address_chained:1; + u32 end_ring:1; + u32 checksum_insertion:2; + u32 reserved2:1; + u32 time_stamp_enable:1; + u32 disable_padding:1; + u32 crc_disable:1; + u32 first_segment:1; + u32 last_segment:1; + u32 interrupt:1; + u32 own:1; + /* TDES1 */ + u32 buffer1_size:13; + u32 reserved3:3; + u32 buffer2_size:13; + u32 reserved4:3; + } etx; /* -- enhanced -- */ + } des01; + unsigned int des2; + unsigned int des3; +}; + +/* Transmit checksum insertion control */ +enum tdes_csum_insertion { + cic_disabled = 0, /* Checksum Insertion Control */ + cic_only_ip = 1, /* Only IP header */ + cic_no_pseudoheader = 2, /* IP header but pseudoheader + * is not calculated */ + cic_full = 3, /* IP header and pseudoheader */ +}; diff --git a/drivers/net/stmmac/gmac.c b/drivers/net/stmmac/gmac.c new file mode 100644 index 0000000..b624bb5 --- /dev/null +++ b/drivers/net/stmmac/gmac.c @@ -0,0 +1,693 @@ +/******************************************************************************* + This is the driver for the GMAC on-chip Ethernet controller for ST SoCs. + DWC Ether MAC 10/100/1000 Universal version 3.41a has been used for + developing this code. + + Copyright (C) 2007-2009 STMicroelectronics Ltd + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Author: Giuseppe Cavallaro <peppe.cavallaro@st.com> +*******************************************************************************/ + +#include <linux/netdevice.h> +#include <linux/crc32.h> +#include <linux/mii.h> +#include <linux/phy.h> + +#include "stmmac.h" +#include "gmac.h" + +#undef GMAC_DEBUG +/*#define GMAC_DEBUG*/ +#undef FRAME_FILTER_DEBUG +/*#define FRAME_FILTER_DEBUG*/ +#ifdef GMAC_DEBUG +#define DBG(fmt, args...) printk(fmt, ## args) +#else +#define DBG(fmt, args...) do { } while (0) +#endif + +static void gmac_dump_regs(unsigned long ioaddr) +{ + int i; + pr_info("\t----------------------------------------------\n" + "\t GMAC registers (base addr = 0x%8x)\n" + "\t----------------------------------------------\n", + (unsigned int)ioaddr); + + for (i = 0; i < 55; i++) { + int offset = i * 4; + pr_info("\tReg No. %d (offset 0x%x): 0x%08x\n", i, + offset, readl(ioaddr + offset)); + } + return; +} + +static int gmac_dma_init(unsigned long ioaddr, int pbl, u32 dma_tx, u32 dma_rx) +{ + u32 value = readl(ioaddr + DMA_BUS_MODE); + /* DMA SW reset */ + value |= DMA_BUS_MODE_SFT_RESET; + writel(value, ioaddr + DMA_BUS_MODE); + do {} while ((readl(ioaddr + DMA_BUS_MODE) & DMA_BUS_MODE_SFT_RESET)); + + value = /* DMA_BUS_MODE_FB | */ DMA_BUS_MODE_4PBL | + ((pbl << DMA_BUS_MODE_PBL_SHIFT) | + (pbl << DMA_BUS_MODE_RPBL_SHIFT)); + +#ifdef CONFIG_STMMAC_DA + value |= DMA_BUS_MODE_DA; /* Rx has priority over tx */ +#endif + writel(value, ioaddr + DMA_BUS_MODE); + + /* Mask interrupts by writing to CSR7 */ + writel(DMA_INTR_DEFAULT_MASK, ioaddr + DMA_INTR_ENA); + + /* The base address of the RX/TX descriptor lists must be written into + * DMA CSR3 and CSR4, respectively. */ + writel(dma_tx, ioaddr + DMA_TX_BASE_ADDR); + writel(dma_rx, ioaddr + DMA_RCV_BASE_ADDR); + + return 0; +} + +/* Transmit FIFO flush operation */ +static void gmac_flush_tx_fifo(unsigned long ioaddr) +{ + u32 csr6 = readl(ioaddr + DMA_CONTROL); + writel((csr6 | DMA_CONTROL_FTF), ioaddr + DMA_CONTROL); + + do {} while ((readl(ioaddr + DMA_CONTROL) & DMA_CONTROL_FTF)); +} + +static void gmac_dma_operation_mode(unsigned long ioaddr, int txmode, + int rxmode) +{ + u32 csr6 = readl(ioaddr + DMA_CONTROL); + + if (txmode == SF_DMA_MODE) { + DBG(KERN_DEBUG "GMAC: enabling TX store and forward mode\n"); + /* Transmit COE type 2 cannot be done in cut-through mode. */ + csr6 |= DMA_CONTROL_TSF; + /* Operating on second frame increase the performance + * especially when transmit store-and-forward is used.*/ + csr6 |= DMA_CONTROL_OSF; + } else { + DBG(KERN_DEBUG "GMAC: disabling TX store and forward mode" + " (threshold = %d)\n", txmode); + csr6 &= ~DMA_CONTROL_TSF; + csr6 &= DMA_CONTROL_TC_TX_MASK; + /* Set the transmit threashold */ + if (txmode <= 32) + csr6 |= DMA_CONTROL_TTC_32; + else if (txmode <= 64) + csr6 |= DMA_CONTROL_TTC_64; + else if (txmode <= 128) + csr6 |= DMA_CONTROL_TTC_128; + else if (txmode <= 192) + csr6 |= DMA_CONTROL_TTC_192; + else + csr6 |= DMA_CONTROL_TTC_256; + } + + if (rxmode == SF_DMA_MODE) { + DBG(KERN_DEBUG "GMAC: enabling RX store and forward mode\n"); + csr6 |= DMA_CONTROL_RSF; + } else { + DBG(KERN_DEBUG "GMAC: disabling RX store and forward mode" + " (threshold = %d)\n", rxmode); + csr6 &= ~DMA_CONTROL_RSF; + csr6 &= DMA_CONTROL_TC_RX_MASK; + if (rxmode <= 32) + csr6 |= DMA_CONTROL_RTC_32; + else if (rxmode <= 64) + csr6 |= DMA_CONTROL_RTC_64; + else if (rxmode <= 96) + csr6 |= DMA_CONTROL_RTC_96; + else + csr6 |= DMA_CONTROL_RTC_128; + } + + writel(csr6, ioaddr + DMA_CONTROL); + return; +} + +/* Not yet implemented --- no RMON module */ +static void gmac_dma_diagnostic_fr(void *data, struct stmmac_extra_stats *x, + unsigned long ioaddr) +{ + return; +} + +static void gmac_dump_dma_regs(unsigned long ioaddr) +{ + int i; + pr_info(" DMA registers\n"); + for (i = 0; i < 22; i++) { + if ((i < 9) || (i > 17)) { + int offset = i * 4; + pr_err("\t Reg No. %d (offset 0x%x): 0x%08x\n", i, + (DMA_BUS_MODE + offset), + readl(ioaddr + DMA_BUS_MODE + offset)); + } + } + return; +} + +static int gmac_get_tx_frame_status(void *data, struct stmmac_extra_stats *x, + struct dma_desc *p, unsigned long ioaddr) +{ + int ret = 0; + struct net_device_stats *stats = (struct net_device_stats *)data; + + if (unlikely(p->des01.etx.error_summary)) { + DBG(KERN_ERR "GMAC TX error... 0x%08x\n", p->des01.etx); + if (unlikely(p->des01.etx.jabber_timeout)) { + DBG(KERN_ERR "\tjabber_timeout error\n"); + x->tx_jabber++; + } + + if (unlikely(p->des01.etx.frame_flushed)) { + DBG(KERN_ERR "\tframe_flushed error\n"); + x->tx_frame_flushed++; + gmac_flush_tx_fifo(ioaddr); + } + + if (unlikely(p->des01.etx.loss_carrier)) { + DBG(KERN_ERR "\tloss_carrier error\n"); + x->tx_losscarrier++; + stats->tx_carrier_errors++; + } + if (unlikely(p->des01.etx.no_carrier)) { + DBG(KERN_ERR "\tno_carrier error\n"); + x->tx_carrier++; + stats->tx_carrier_errors++; + } + if (unlikely(p->des01.etx.late_collision)) { + DBG(KERN_ERR "\tlate_collision error\n"); + stats->collisions += p->des01.etx.collision_count; + } + if (unlikely(p->des01.etx.excessive_collisions)) { + DBG(KERN_ERR "\texcessive_collisions\n"); + stats->collisions += p->des01.etx.collision_count; + } + if (unlikely(p->des01.etx.excessive_deferral)) { + DBG(KERN_INFO "\texcessive tx_deferral\n"); + x->tx_deferred++; + } + + if (unlikely(p->des01.etx.underflow_error)) { + DBG(KERN_ERR "\tunderflow error\n"); + gmac_flush_tx_fifo(ioaddr); + x->tx_underflow++; + } + + if (unlikely(p->des01.etx.ip_header_error)) { + DBG(KERN_ERR "\tTX IP header csum error\n"); + x->tx_ip_header_error++; + } + + if (unlikely(p->des01.etx.payload_error)) { + DBG(KERN_ERR "\tAddr/Payload csum error\n"); + x->tx_payload_error++; + gmac_flush_tx_fifo(ioaddr); + } + + ret = -1; + } + + if (unlikely(p->des01.etx.deferred)) { + DBG(KERN_INFO "GMAC TX status: tx deferred\n"); + x->tx_deferred++; + } +#ifdef STMMAC_VLAN_TAG_USED + if (p->des01.etx.vlan_frame) { + DBG(KERN_INFO "GMAC TX status: VLAN frame\n"); + x->tx_vlan++; + } +#endif + + return ret; +} + +static int gmac_get_tx_len(struct dma_desc *p) +{ + return p->des01.etx.buffer1_size; +} + +static int gmac_coe_rdes0(int ipc_err, int type, int payload_err) +{ + int ret = good_frame; + u32 status = (type << 2 | ipc_err << 1 | payload_err) & 0x7; + + /* bits 5 7 0 | Frame status + * ---------------------------------------------------------- + * 0 0 0 | IEEE 802.3 Type frame (lenght < 1536 octects) + * 1 0 0 | IPv4/6 No CSUM errorS. + * 1 0 1 | IPv4/6 CSUM PAYLOAD error + * 1 1 0 | IPv4/6 CSUM IP HR error + * 1 1 1 | IPv4/6 IP PAYLOAD AND HEADER errorS + * 0 0 1 | IPv4/6 unsupported IP PAYLOAD + * 0 1 1 | COE bypassed.. no IPv4/6 frame + * 0 1 0 | Reserved. + */ + if (status == 0x0) { + DBG(KERN_INFO "RX Des0 status: IEEE 802.3 Type frame.\n"); + ret = good_frame; + } else if (status == 0x4) { + DBG(KERN_INFO "RX Des0 status: IPv4/6 No CSUM errorS.\n"); + ret = good_frame; + } else if (status == 0x5) { + DBG(KERN_ERR "RX Des0 status: IPv4/6 Payload Error.\n"); + ret = csum_none; + } else if (status == 0x6) { + DBG(KERN_ERR "RX Des0 status: IPv4/6 Header Error.\n"); + ret = csum_none; + } else if (status == 0x7) { + DBG(KERN_ERR + "RX Des0 status: IPv4/6 Header and Payload Error.\n"); + ret = csum_none; + } else if (status == 0x1) { + DBG(KERN_ERR + "RX Des0 status: IPv4/6 unsupported IP PAYLOAD.\n"); + ret = discard_frame; + } else if (status == 0x3) { + DBG(KERN_ERR "RX Des0 status: No IPv4, IPv6 frame.\n"); + ret = discard_frame; + } + return ret; +} + +static int gmac_get_rx_frame_status(void *data, struct stmmac_extra_stats *x, + struct dma_desc *p) +{ + int ret = good_frame; + struct net_device_stats *stats = (struct net_device_stats *)data; + + if (unlikely(p->des01.erx.error_summary)) { + DBG(KERN_ERR "GMAC RX Error Summary... 0x%08x\n", p->des01.erx); + if (unlikely(p->des01.erx.descriptor_error)) { + DBG(KERN_ERR "\tdescriptor error\n"); + x->rx_desc++; + stats->rx_length_errors++; + } + if (unlikely(p->des01.erx.overflow_error)) { + DBG(KERN_ERR "\toverflow error\n"); + x->rx_gmac_overflow++; + } + + if (unlikely(p->des01.erx.ipc_csum_error)) + DBG(KERN_ERR "\tIPC Csum Error/Giant frame\n"); + + if (unlikely(p->des01.erx.late_collision)) { + DBG(KERN_ERR "\tlate_collision error\n"); + stats->collisions++; + stats->collisions++; + } + if (unlikely(p->des01.erx.receive_watchdog)) { + DBG(KERN_ERR "\treceive_watchdog error\n"); + x->rx_watchdog++; + } + if (unlikely(p->des01.erx.error_gmii)) { + DBG(KERN_ERR "\tReceive Error\n"); + x->rx_mii++; + } + if (unlikely(p->des01.erx.crc_error)) { + DBG(KERN_ERR "\tCRC error\n"); + x->rx_crc++; + stats->rx_crc_errors++; + } + ret = discard_frame; + } + + /* After a payload csum error, the ES bit is set. + * It doesn't match with the information reported into the databook. + * At any rate, we need to understand if the CSUM hw computation is ok + * and report this info to the upper layers. */ + ret = gmac_coe_rdes0(p->des01.erx.ipc_csum_error, + p->des01.erx.frame_type, p->des01.erx.payload_csum_error); + + if (unlikely(p->des01.erx.dribbling)) { + DBG(KERN_ERR "GMAC RX: dribbling error\n"); + ret = discard_frame; + } + if (unlikely(p->des01.erx.sa_filter_fail)) { + DBG(KERN_ERR "GMAC RX : Source Address filter fail\n"); + x->sa_rx_filter_fail++; + ret = discard_frame; + } + if (unlikely(p->des01.erx.da_filter_fail)) { + DBG(KERN_ERR "GMAC RX : Destination Address filter fail\n"); + x->da_rx_filter_fail++; + ret = discard_frame; + } + if (unlikely(p->des01.erx.length_error)) { + DBG(KERN_ERR "GMAC RX: length_error error\n"); + x->rx_lenght++; + ret = discard_frame; + } +#ifdef STMMAC_VLAN_TAG_USED + if (p->des01.erx.vlan_tag) { + DBG(KERN_INFO "GMAC RX: VLAN frame tagged\n"); + x->rx_vlan++; + } +#endif + return ret; +} + +static void gmac_irq_status(unsigned long ioaddr) +{ + u32 intr_status = readl(ioaddr + GMAC_INT_STATUS); + + /* Not used events (e.g. MMC interrupts) are not handled. */ + if ((intr_status & mmc_tx_irq)) + DBG(KERN_DEBUG "GMAC: MMC tx interrupt: 0x%08x\n", + readl(ioaddr + GMAC_MMC_TX_INTR)); + if (unlikely(intr_status & mmc_rx_irq)) + DBG(KERN_DEBUG "GMAC: MMC rx interrupt: 0x%08x\n", + readl(ioaddr + GMAC_MMC_RX_INTR)); + if (unlikely(intr_status & mmc_rx_csum_offload_irq)) + DBG(KERN_DEBUG "GMAC: MMC rx csum offload: 0x%08x\n", + readl(ioaddr + GMAC_MMC_RX_CSUM_OFFLOAD)); + if (unlikely(intr_status & pmt_irq)) { + DBG(KERN_DEBUG "GMAC: received Magic frame\n"); + /* clear the PMT bits 5 and 6 by reading the PMT + * status register. */ + readl(ioaddr + GMAC_PMT); + } + + return; +} + +static void gmac_core_init(unsigned long ioaddr) +{ + u32 value = readl(ioaddr + GMAC_CONTROL); + value |= GMAC_CORE_INIT; + writel(value, ioaddr + GMAC_CONTROL); + + /* STBus Bridge Configuration */ + /*writel(0xc5608, ioaddr + 0x00007000);*/ + + /* Freeze MMC counters */ + writel(0x8, ioaddr + GMAC_MMC_CTRL); + /* Mask GMAC interrupts */ + writel(0x207, ioaddr + GMAC_INT_MASK); + +#ifdef STMMAC_VLAN_TAG_USED + /* Tag detection without filtering */ + writel(0x0, ioaddr + GMAC_VLAN_TAG); +#endif + return; +} + +static void gmac_set_umac_addr(unsigned long ioaddr, unsigned char *addr, + unsigned int reg_n) +{ + stmmac_set_mac_addr(ioaddr, addr, GMAC_ADDR_HIGH(reg_n), + GMAC_ADDR_LOW(reg_n)); +} + +static void gmac_get_umac_addr(unsigned long ioaddr, unsigned char *addr, + unsigned int reg_n) +{ + stmmac_get_mac_addr(ioaddr, addr, GMAC_ADDR_HIGH(reg_n), + GMAC_ADDR_LOW(reg_n)); +} + +static void gmac_set_filter(struct net_device *dev) +{ + unsigned long ioaddr = dev->base_addr; + unsigned int value = 0; + + DBG(KERN_INFO "%s: # mcasts %d, # unicast %d\n", + __func__, dev->mc_count, dev->uc_count); + + if (dev->flags & IFF_PROMISC) + value = GMAC_FRAME_FILTER_PR; + else if ((dev->mc_count > HASH_TABLE_SIZE) + || (dev->flags & IFF_ALLMULTI)) { + value = GMAC_FRAME_FILTER_PM; /* pass all multi */ + writel(0xffffffff, ioaddr + GMAC_HASH_HIGH); + writel(0xffffffff, ioaddr + GMAC_HASH_LOW); + } else if (dev->mc_count > 0) { + int i; + u32 mc_filter[2]; + struct dev_mc_list *mclist; + + /* Hash filter for multicast */ + value = GMAC_FRAME_FILTER_HMC; + + memset(mc_filter, 0, sizeof(mc_filter)); + for (i = 0, mclist = dev->mc_list; + mclist && i < dev->mc_count; i++, mclist = mclist->next) { + /* The upper 6 bits of the calculated CRC are used to + index the contens of the hash table */ + int bit_nr = + bitrev32(~crc32_le(~0, mclist->dmi_addr, 6)) >> 26; + /* The most significant bit determines the register to + * use (H/L) while the other 5 bits determine the bit + * within the register. */ + mc_filter[bit_nr >> 5] |= 1 << (bit_nr & 31); + } + writel(mc_filter[0], ioaddr + GMAC_HASH_LOW); + writel(mc_filter[1], ioaddr + GMAC_HASH_HIGH); + } + + /* Handle multiple unicast addresses (perfect filtering)*/ + if (dev->uc_count > GMAC_MAX_UNICAST_ADDRESSES) + /* Switch to promiscuous mode is more than 16 addrs + are required */ + value |= GMAC_FRAME_FILTER_PR; + else { + int i; + struct dev_addr_list *uc_ptr = dev->uc_list; + + for (i = 0; i < dev->uc_count; i++) { + gmac_set_umac_addr(ioaddr, uc_ptr->da_addr, + i + 1); + + DBG(KERN_INFO "\t%d " + "- Unicast addr %02x:%02x:%02x:%02x:%02x:" + "%02x\n", i + 1, + uc_ptr->da_addr[0], uc_ptr->da_addr[1], + uc_ptr->da_addr[2], uc_ptr->da_addr[3], + uc_ptr->da_addr[4], uc_ptr->da_addr[5]); + uc_ptr = uc_ptr->next; + } + } + +#ifdef FRAME_FILTER_DEBUG + /* Enable Receive all mode (to debug filtering_fail errors) */ + value |= GMAC_FRAME_FILTER_RA; +#endif + writel(value, ioaddr + GMAC_FRAME_FILTER); + + DBG(KERN_INFO "\tFrame Filter reg: 0x%08x\n\tHash regs: " + "HI 0x%08x, LO 0x%08x\n", readl(ioaddr + GMAC_FRAME_FILTER), + readl(ioaddr + GMAC_HASH_HIGH), readl(ioaddr + GMAC_HASH_LOW)); + + return; +} + +static void gmac_flow_ctrl(unsigned long ioaddr, unsigned int duplex, + unsigned int fc, unsigned int pause_time) +{ + unsigned int flow = 0; + + DBG(KERN_DEBUG "GMAC Flow-Control:\n"); + if (fc & FLOW_RX) { + DBG(KERN_DEBUG "\tReceive Flow-Control ON\n"); + flow |= GMAC_FLOW_CTRL_RFE; + } + if (fc & FLOW_TX) { + DBG(KERN_DEBUG "\tTransmit Flow-Control ON\n"); + flow |= GMAC_FLOW_CTRL_TFE; + } + + if (duplex) { + DBG(KERN_DEBUG "\tduplex mode: pause time: %d\n", pause_time); + flow |= (pause_time << GMAC_FLOW_CTRL_PT_SHIFT); + } + + writel(flow, ioaddr + GMAC_FLOW_CTRL); + return; +} + +static void gmac_pmt(unsigned long ioaddr, unsigned long mode) +{ + unsigned int pmt = 0; + + if (mode == WAKE_MAGIC) { + DBG(KERN_DEBUG "GMAC: WOL Magic frame\n"); + pmt |= power_down | magic_pkt_en; + } else if (mode == WAKE_UCAST) { + DBG(KERN_DEBUG "GMAC: WOL on global unicast\n"); + pmt |= global_unicast; + } + + writel(pmt, ioaddr + GMAC_PMT); + return; +} + +static void gmac_init_rx_desc(struct dma_desc *p, unsigned int ring_size, + int disable_rx_ic) +{ + int i; + for (i = 0; i < ring_size; i++) { + p->des01.erx.own = 1; + p->des01.erx.buffer1_size = BUF_SIZE_8KiB - 1; + /* To support jumbo frames */ + p->des01.erx.buffer2_size = BUF_SIZE_8KiB - 1; + if (i == ring_size - 1) + p->des01.erx.end_ring = 1; + if (disable_rx_ic) + p->des01.erx.disable_ic = 1; + p++; + } + return; +} + +static void gmac_init_tx_desc(struct dma_desc *p, unsigned int ring_size) +{ + int i; + + for (i = 0; i < ring_size; i++) { + p->des01.etx.own = 0; + if (i == ring_size - 1) + p->des01.etx.end_ring = 1; + p++; + } + + return; +} + +static int gmac_get_tx_owner(struct dma_desc *p) +{ + return p->des01.etx.own; +} + +static int gmac_get_rx_owner(struct dma_desc *p) +{ + return p->des01.erx.own; +} + +static void gmac_set_tx_owner(struct dma_desc *p) +{ + p->des01.etx.own = 1; +} + +static void gmac_set_rx_owner(struct dma_desc *p) +{ + p->des01.erx.own = 1; +} + +static int gmac_get_tx_ls(struct dma_desc *p) +{ + return p->des01.etx.last_segment; +} + +static void gmac_release_tx_desc(struct dma_desc *p) +{ + int ter = p->des01.etx.end_ring; + + memset(p, 0, sizeof(struct dma_desc)); + p->des01.etx.end_ring = ter; + + return; +} + +static void gmac_prepare_tx_desc(struct dma_desc *p, int is_fs, int len, + int csum_flag) +{ + p->des01.etx.first_segment = is_fs; + if (unlikely(len > BUF_SIZE_4KiB)) { + p->des01.etx.buffer1_size = BUF_SIZE_4KiB; + p->des01.etx.buffer2_size = len - BUF_SIZE_4KiB; + } else { + p->des01.etx.buffer1_size = len; + } + if (likely(csum_flag)) + p->des01.etx.checksum_insertion = cic_full; +} + +static void gmac_clear_tx_ic(struct dma_desc *p) +{ + p->des01.etx.interrupt = 0; +} + +static void gmac_close_tx_desc(struct dma_desc *p) +{ + p->des01.etx.last_segment = 1; + p->des01.etx.interrupt = 1; +} + +static int gmac_get_rx_frame_len(struct dma_desc *p) +{ + return p->des01.erx.frame_length; +} + +struct stmmac_ops gmac_driver = { + .core_init = gmac_core_init, + .dump_mac_regs = gmac_dump_regs, + .dma_init = gmac_dma_init, + .dump_dma_regs = gmac_dump_dma_regs, + .dma_mode = gmac_dma_operation_mode, + .dma_diagnostic_fr = gmac_dma_diagnostic_fr, + .tx_status = gmac_get_tx_frame_status, + .rx_status = gmac_get_rx_frame_status, + .get_tx_len = gmac_get_tx_len, + .set_filter = gmac_set_filter, + .flow_ctrl = gmac_flow_ctrl, + .pmt = gmac_pmt, + .init_rx_desc = gmac_init_rx_desc, + .init_tx_desc = gmac_init_tx_desc, + .get_tx_owner = gmac_get_tx_owner, + .get_rx_owner = gmac_get_rx_owner, + .release_tx_desc = gmac_release_tx_desc, + .prepare_tx_desc = gmac_prepare_tx_desc, + .clear_tx_ic = gmac_clear_tx_ic, + .close_tx_desc = gmac_close_tx_desc, + .get_tx_ls = gmac_get_tx_ls, + .set_tx_owner = gmac_set_tx_owner, + .set_rx_owner = gmac_set_rx_owner, + .get_rx_frame_len = gmac_get_rx_frame_len, + .host_irq_status = gmac_irq_status, + .set_umac_addr = gmac_set_umac_addr, + .get_umac_addr = gmac_get_umac_addr, +}; + +struct mac_device_info *gmac_setup(unsigned long ioaddr) +{ + struct mac_device_info *mac; + u32 uid = readl(ioaddr + GMAC_VERSION); + + pr_info("\tGMAC - user ID: 0x%x, Synopsys ID: 0x%x\n", + ((uid & 0x0000ff00) >> 8), (uid & 0x000000ff)); + + mac = kzalloc(sizeof(const struct mac_device_info), GFP_KERNEL); + + mac->ops = &gmac_driver; + mac->hw.pmt = PMT_SUPPORTED; + mac->hw.link.port = GMAC_CONTROL_PS; + mac->hw.link.duplex = GMAC_CONTROL_DM; + mac->hw.link.speed = GMAC_CONTROL_FES; + mac->hw.mii.addr = GMAC_MII_ADDR; + mac->hw.mii.data = GMAC_MII_DATA; + + return mac; +} diff --git a/drivers/net/stmmac/gmac.h b/drivers/net/stmmac/gmac.h new file mode 100644 index 0000000..684a363 --- /dev/null +++ b/drivers/net/stmmac/gmac.h @@ -0,0 +1,204 @@ +/******************************************************************************* + Copyright (C) 2007-2009 STMicroelectronics Ltd + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Author: Giuseppe Cavallaro <peppe.cavallaro@st.com> +*******************************************************************************/ + +#define GMAC_CONTROL 0x00000000 /* Configuration */ +#define GMAC_FRAME_FILTER 0x00000004 /* Frame Filter */ +#define GMAC_HASH_HIGH 0x00000008 /* Multicast Hash Table High */ +#define GMAC_HASH_LOW 0x0000000c /* Multicast Hash Table Low */ +#define GMAC_MII_ADDR 0x00000010 /* MII Address */ +#define GMAC_MII_DATA 0x00000014 /* MII Data */ +#define GMAC_FLOW_CTRL 0x00000018 /* Flow Control */ +#define GMAC_VLAN_TAG 0x0000001c /* VLAN Tag */ +#define GMAC_VERSION 0x00000020 /* GMAC CORE Version */ +#define GMAC_WAKEUP_FILTER 0x00000028 /* Wake-up Frame Filter */ + +#define GMAC_INT_STATUS 0x00000038 /* interrupt status register */ +enum gmac_irq_status { + time_stamp_irq = 0x0200, + mmc_rx_csum_offload_irq = 0x0080, + mmc_tx_irq = 0x0040, + mmc_rx_irq = 0x0020, + mmc_irq = 0x0010, + pmt_irq = 0x0008, + pcs_ane_irq = 0x0004, + pcs_link_irq = 0x0002, + rgmii_irq = 0x0001, +}; +#define GMAC_INT_MASK 0x0000003c /* interrupt mask register */ + +/* PMT Control and Status */ +#define GMAC_PMT 0x0000002c +enum power_event { + pointer_reset = 0x80000000, + global_unicast = 0x00000200, + wake_up_rx_frame = 0x00000040, + magic_frame = 0x00000020, + wake_up_frame_en = 0x00000004, + magic_pkt_en = 0x00000002, + power_down = 0x00000001, +}; + +/* GMAC HW ADDR regs */ +#define GMAC_ADDR_HIGH(reg) (0x00000040+(reg * 8)) +#define GMAC_ADDR_LOW(reg) (0x00000044+(reg * 8)) +#define GMAC_MAX_UNICAST_ADDRESSES 16 + +#define GMAC_AN_CTRL 0x000000c0 /* AN control */ +#define GMAC_AN_STATUS 0x000000c4 /* AN status */ +#define GMAC_ANE_ADV 0x000000c8 /* Auto-Neg. Advertisement */ +#define GMAC_ANE_LINK 0x000000cc /* Auto-Neg. link partener ability */ +#define GMAC_ANE_EXP 0x000000d0 /* ANE expansion */ +#define GMAC_TBI 0x000000d4 /* TBI extend status */ +#define GMAC_GMII_STATUS 0x000000d8 /* S/R-GMII status */ + +/* GMAC Configuration defines */ +#define GMAC_CONTROL_TC 0x01000000 /* Transmit Conf. in RGMII/SGMII */ +#define GMAC_CONTROL_WD 0x00800000 /* Disable Watchdog on receive */ +#define GMAC_CONTROL_JD 0x00400000 /* Jabber disable */ +#define GMAC_CONTROL_BE 0x00200000 /* Frame Burst Enable */ +#define GMAC_CONTROL_JE 0x00100000 /* Jumbo frame */ +enum inter_frame_gap { + GMAC_CONTROL_IFG_88 = 0x00040000, + GMAC_CONTROL_IFG_80 = 0x00020000, + GMAC_CONTROL_IFG_40 = 0x000e0000, +}; +#define GMAC_CONTROL_DCRS 0x00010000 /* Disable carrier sense during tx */ +#define GMAC_CONTROL_PS 0x00008000 /* Port Select 0:GMI 1:MII */ +#define GMAC_CONTROL_FES 0x00004000 /* Speed 0:10 1:100 */ +#define GMAC_CONTROL_DO 0x00002000 /* Disable Rx Own */ +#define GMAC_CONTROL_LM 0x00001000 /* Loop-back mode */ +#define GMAC_CONTROL_DM 0x00000800 /* Duplex Mode */ +#define GMAC_CONTROL_IPC 0x00000400 /* Checksum Offload */ +#define GMAC_CONTROL_DR 0x00000200 /* Disable Retry */ +#define GMAC_CONTROL_LUD 0x00000100 /* Link up/down */ +#define GMAC_CONTROL_ACS 0x00000080 /* Automatic Pad Stripping */ +#define GMAC_CONTROL_DC 0x00000010 /* Deferral Check */ +#define GMAC_CONTROL_TE 0x00000008 /* Transmitter Enable */ +#define GMAC_CONTROL_RE 0x00000004 /* Receiver Enable */ + +#define GMAC_CORE_INIT (GMAC_CONTROL_JD | GMAC_CONTROL_PS | GMAC_CONTROL_ACS | \ + GMAC_CONTROL_IPC | GMAC_CONTROL_JE | GMAC_CONTROL_BE) + +/* GMAC Frame Filter defines */ +#define GMAC_FRAME_FILTER_PR 0x00000001 /* Promiscuous Mode */ +#define GMAC_FRAME_FILTER_HUC 0x00000002 /* Hash Unicast */ +#define GMAC_FRAME_FILTER_HMC 0x00000004 /* Hash Multicast */ +#define GMAC_FRAME_FILTER_DAIF 0x00000008 /* DA Inverse Filtering */ +#define GMAC_FRAME_FILTER_PM 0x00000010 /* Pass all multicast */ +#define GMAC_FRAME_FILTER_DBF 0x00000020 /* Disable Broadcast frames */ +#define GMAC_FRAME_FILTER_SAIF 0x00000100 /* Inverse Filtering */ +#define GMAC_FRAME_FILTER_SAF 0x00000200 /* Source Address Filter */ +#define GMAC_FRAME_FILTER_HPF 0x00000400 /* Hash or perfect Filter */ +#define GMAC_FRAME_FILTER_RA 0x80000000 /* Receive all mode */ +/* GMII ADDR defines */ +#define GMAC_MII_ADDR_WRITE 0x00000002 /* MII Write */ +#define GMAC_MII_ADDR_BUSY 0x00000001 /* MII Busy */ +/* GMAC FLOW CTRL defines */ +#define GMAC_FLOW_CTRL_PT_MASK 0xffff0000 /* Pause Time Mask */ +#define GMAC_FLOW_CTRL_PT_SHIFT 16 +#define GMAC_FLOW_CTRL_RFE 0x00000004 /* Rx Flow Control Enable */ +#define GMAC_FLOW_CTRL_TFE 0x00000002 /* Tx Flow Control Enable */ +#define GMAC_FLOW_CTRL_FCB_BPA 0x00000001 /* Flow Control Busy ... */ + +/*--- DMA BLOCK defines ---*/ +/* DMA Bus Mode register defines */ +#define DMA_BUS_MODE_SFT_RESET 0x00000001 /* Software Reset */ +#define DMA_BUS_MODE_DA 0x00000002 /* Arbitration scheme */ +#define DMA_BUS_MODE_DSL_MASK 0x0000007c /* Descriptor Skip Length */ +#define DMA_BUS_MODE_DSL_SHIFT 2 /* (in DWORDS) */ +/* Programmable burst length (passed thorugh platform)*/ +#define DMA_BUS_MODE_PBL_MASK 0x00003f00 /* Programmable Burst Len */ +#define DMA_BUS_MODE_PBL_SHIFT 8 + +enum rx_tx_priority_ratio { + double_ratio = 0x00004000, /*2:1 */ + triple_ratio = 0x00008000, /*3:1 */ + quadruple_ratio = 0x0000c000, /*4:1 */ +}; + +#define DMA_BUS_MODE_FB 0x00010000 /* Fixed burst */ +#define DMA_BUS_MODE_RPBL_MASK 0x003e0000 /* Rx-Programmable Burst Len */ +#define DMA_BUS_MODE_RPBL_SHIFT 17 +#define DMA_BUS_MODE_USP 0x00800000 +#define DMA_BUS_MODE_4PBL 0x01000000 +#define DMA_BUS_MODE_AAL 0x02000000 + +/* DMA CRS Control and Status Register Mapping */ +#define DMA_HOST_TX_DESC 0x00001048 /* Current Host Tx descriptor */ +#define DMA_HOST_RX_DESC 0x0000104c /* Current Host Rx descriptor */ +/* DMA Bus Mode register defines */ +#define DMA_BUS_PR_RATIO_MASK 0x0000c000 /* Rx/Tx priority ratio */ +#define DMA_BUS_PR_RATIO_SHIFT 14 +#define DMA_BUS_FB 0x00010000 /* Fixed Burst */ + +/* DMA operation mode defines (start/stop tx/rx are placed in common header)*/ +#define DMA_CONTROL_DT 0x04000000 /* Disable Drop TCP/IP csum error */ +#define DMA_CONTROL_RSF 0x02000000 /* Receive Store and Forward */ +#define DMA_CONTROL_DFF 0x01000000 /* Disaable flushing */ +/* Theshold for Activating the FC */ +enum rfa { + act_full_minus_1 = 0x00800000, + act_full_minus_2 = 0x00800200, + act_full_minus_3 = 0x00800400, + act_full_minus_4 = 0x00800600, +}; +/* Theshold for Deactivating the FC */ +enum rfd { + deac_full_minus_1 = 0x00400000, + deac_full_minus_2 = 0x00400800, + deac_full_minus_3 = 0x00401000, + deac_full_minus_4 = 0x00401800, +}; +#define DMA_CONTROL_TSF 0x00200000 /* Transmit Store and Forward */ +#define DMA_CONTROL_FTF 0x00100000 /* Flush transmit FIFO */ + +enum ttc_control { + DMA_CONTROL_TTC_64 = 0x00000000, + DMA_CONTROL_TTC_128 = 0x00004000, + DMA_CONTROL_TTC_192 = 0x00008000, + DMA_CONTROL_TTC_256 = 0x0000c000, + DMA_CONTROL_TTC_40 = 0x00010000, + DMA_CONTROL_TTC_32 = 0x00014000, + DMA_CONTROL_TTC_24 = 0x00018000, + DMA_CONTROL_TTC_16 = 0x0001c000, +}; +#define DMA_CONTROL_TC_TX_MASK 0xfffe3fff + +#define DMA_CONTROL_EFC 0x00000100 +#define DMA_CONTROL_FEF 0x00000080 +#define DMA_CONTROL_FUF 0x00000040 + +enum rtc_control { + DMA_CONTROL_RTC_64 = 0x00000000, + DMA_CONTROL_RTC_32 = 0x00000008, + DMA_CONTROL_RTC_96 = 0x00000010, + DMA_CONTROL_RTC_128 = 0x00000018, +}; +#define DMA_CONTROL_TC_RX_MASK 0xffffffe7 + +#define DMA_CONTROL_OSF 0x00000004 /* Operate on second frame */ + +/* MMC registers offset */ +#define GMAC_MMC_CTRL 0x100 +#define GMAC_MMC_RX_INTR 0x104 +#define GMAC_MMC_TX_INTR 0x108 +#define GMAC_MMC_RX_CSUM_OFFLOAD 0x208 diff --git a/drivers/net/stmmac/mac100.c b/drivers/net/stmmac/mac100.c new file mode 100644 index 0000000..625171b --- /dev/null +++ b/drivers/net/stmmac/mac100.c @@ -0,0 +1,517 @@ +/******************************************************************************* + This is the driver for the MAC 10/100 on-chip Ethernet controller + currently tested on all the ST boards based on STb7109 and stx7200 SoCs. + + DWC Ether MAC 10/100 Universal version 4.0 has been used for developing + this code. + + Copyright (C) 2007-2009 STMicroelectronics Ltd + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Author: Giuseppe Cavallaro <peppe.cavallaro@st.com> +*******************************************************************************/ + +#include <linux/netdevice.h> +#include <linux/crc32.h> +#include <linux/mii.h> +#include <linux/phy.h> + +#include "common.h" +#include "mac100.h" + +#undef MAC100_DEBUG +/*#define MAC100_DEBUG*/ +#ifdef MAC100_DEBUG +#define DBG(fmt, args...) printk(fmt, ## args) +#else +#define DBG(fmt, args...) do { } while (0) +#endif + +static void mac100_core_init(unsigned long ioaddr) +{ + u32 value = readl(ioaddr + MAC_CONTROL); + + writel((value | MAC_CORE_INIT), ioaddr + MAC_CONTROL); + +#ifdef STMMAC_VLAN_TAG_USED + writel(ETH_P_8021Q, ioaddr + MAC_VLAN1); +#endif + return; +} + +static void mac100_dump_mac_regs(unsigned long ioaddr) +{ + pr_info("\t----------------------------------------------\n" + "\t MAC100 CSR (base addr = 0x%8x)\n" + "\t----------------------------------------------\n", + (unsigned int)ioaddr); + pr_info("\tcontrol reg (offset 0x%x): 0x%08x\n", MAC_CONTROL, + readl(ioaddr + MAC_CONTROL)); + pr_info("\taddr HI (offset 0x%x): 0x%08x\n ", MAC_ADDR_HIGH, + readl(ioaddr + MAC_ADDR_HIGH)); + pr_info("\taddr LO (offset 0x%x): 0x%08x\n", MAC_ADDR_LOW, + readl(ioaddr + MAC_ADDR_LOW)); + pr_info("\tmulticast hash HI (offset 0x%x): 0x%08x\n", + MAC_HASH_HIGH, readl(ioaddr + MAC_HASH_HIGH)); + pr_info("\tmulticast hash LO (offset 0x%x): 0x%08x\n", + MAC_HASH_LOW, readl(ioaddr + MAC_HASH_LOW)); + pr_info("\tflow control (offset 0x%x): 0x%08x\n", + MAC_FLOW_CTRL, readl(ioaddr + MAC_FLOW_CTRL)); + pr_info("\tVLAN1 tag (offset 0x%x): 0x%08x\n", MAC_VLAN1, + readl(ioaddr + MAC_VLAN1)); + pr_info("\tVLAN2 tag (offset 0x%x): 0x%08x\n", MAC_VLAN2, + readl(ioaddr + MAC_VLAN2)); + pr_info("\n\tMAC management counter registers\n"); + pr_info("\t MMC crtl (offset 0x%x): 0x%08x\n", + MMC_CONTROL, readl(ioaddr + MMC_CONTROL)); + pr_info("\t MMC High Interrupt (offset 0x%x): 0x%08x\n", + MMC_HIGH_INTR, readl(ioaddr + MMC_HIGH_INTR)); + pr_info("\t MMC Low Interrupt (offset 0x%x): 0x%08x\n", + MMC_LOW_INTR, readl(ioaddr + MMC_LOW_INTR)); + pr_info("\t MMC High Interrupt Mask (offset 0x%x): 0x%08x\n", + MMC_HIGH_INTR_MASK, readl(ioaddr + MMC_HIGH_INTR_MASK)); + pr_info("\t MMC Low Interrupt Mask (offset 0x%x): 0x%08x\n", + MMC_LOW_INTR_MASK, readl(ioaddr + MMC_LOW_INTR_MASK)); + return; +} + +static int mac100_dma_init(unsigned long ioaddr, int pbl, u32 dma_tx, + u32 dma_rx) +{ + u32 value = readl(ioaddr + DMA_BUS_MODE); + /* DMA SW reset */ + value |= DMA_BUS_MODE_SFT_RESET; + writel(value, ioaddr + DMA_BUS_MODE); + do {} while ((readl(ioaddr + DMA_BUS_MODE) & DMA_BUS_MODE_SFT_RESET)); + + /* Enable Application Access by writing to DMA CSR0 */ + writel(DMA_BUS_MODE_DEFAULT | (pbl << DMA_BUS_MODE_PBL_SHIFT), + ioaddr + DMA_BUS_MODE); + + /* Mask interrupts by writing to CSR7 */ + writel(DMA_INTR_DEFAULT_MASK, ioaddr + DMA_INTR_ENA); + + /* The base address of the RX/TX descriptor lists must be written into + * DMA CSR3 and CSR4, respectively. */ + writel(dma_tx, ioaddr + DMA_TX_BASE_ADDR); + writel(dma_rx, ioaddr + DMA_RCV_BASE_ADDR); + + return 0; +} + +/* Store and Forward capability is not used at all.. + * The transmit threshold can be programmed by + * setting the TTC bits in the DMA control register.*/ +static void mac100_dma_operation_mode(unsigned long ioaddr, int txmode, + int rxmode) +{ + u32 csr6 = readl(ioaddr + DMA_CONTROL); + + if (txmode <= 32) + csr6 |= DMA_CONTROL_TTC_32; + else if (txmode <= 64) + csr6 |= DMA_CONTROL_TTC_64; + else + csr6 |= DMA_CONTROL_TTC_128; + + writel(csr6, ioaddr + DMA_CONTROL); + + return; +} + +static void mac100_dump_dma_regs(unsigned long ioaddr) +{ + int i; + + DBG(KERN_DEBUG "MAC100 DMA CSR \n"); + for (i = 0; i < 9; i++) + pr_debug("\t CSR%d (offset 0x%x): 0x%08x\n", i, + (DMA_BUS_MODE + i * 4), + readl(ioaddr + DMA_BUS_MODE + i * 4)); + DBG(KERN_DEBUG "\t CSR20 (offset 0x%x): 0x%08x\n", + DMA_CUR_TX_BUF_ADDR, readl(ioaddr + DMA_CUR_TX_BUF_ADDR)); + DBG(KERN_DEBUG "\t CSR21 (offset 0x%x): 0x%08x\n", + DMA_CUR_RX_BUF_ADDR, readl(ioaddr + DMA_CUR_RX_BUF_ADDR)); + return; +} + +/* DMA controller has two counters to track the number of + the receive missed frames. */ +static void mac100_dma_diagnostic_fr(void *data, struct stmmac_extra_stats *x, + unsigned long ioaddr) +{ + struct net_device_stats *stats = (struct net_device_stats *)data; + u32 csr8 = readl(ioaddr + DMA_MISSED_FRAME_CTR); + + if (unlikely(csr8)) { + if (csr8 & DMA_MISSED_FRAME_OVE) { + stats->rx_over_errors += 0x800; + x->rx_overflow_cntr += 0x800; + } else { + unsigned int ove_cntr; + ove_cntr = ((csr8 & DMA_MISSED_FRAME_OVE_CNTR) >> 17); + stats->rx_over_errors += ove_cntr; + x->rx_overflow_cntr += ove_cntr; + } + + if (csr8 & DMA_MISSED_FRAME_OVE_M) { + stats->rx_missed_errors += 0xffff; + x->rx_missed_cntr += 0xffff; + } else { + unsigned int miss_f = (csr8 & DMA_MISSED_FRAME_M_CNTR); + stats->rx_missed_errors += miss_f; + x->rx_missed_cntr += miss_f; + } + } + return; +} + +static int mac100_get_tx_frame_status(void *data, struct stmmac_extra_stats *x, + struct dma_desc *p, unsigned long ioaddr) +{ + int ret = 0; + struct net_device_stats *stats = (struct net_device_stats *)data; + + if (unlikely(p->des01.tx.error_summary)) { + if (unlikely(p->des01.tx.underflow_error)) { + x->tx_underflow++; + stats->tx_fifo_errors++; + } + if (unlikely(p->des01.tx.no_carrier)) { + x->tx_carrier++; + stats->tx_carrier_errors++; + } + if (unlikely(p->des01.tx.loss_carrier)) { + x->tx_losscarrier++; + stats->tx_carrier_errors++; + } + if (unlikely((p->des01.tx.excessive_deferral) || + (p->des01.tx.excessive_collisions) || + (p->des01.tx.late_collision))) + stats->collisions += p->des01.tx.collision_count; + ret = -1; + } + if (unlikely(p->des01.tx.heartbeat_fail)) { + x->tx_heartbeat++; + stats->tx_heartbeat_errors++; + ret = -1; + } + if (unlikely(p->des01.tx.deferred)) + x->tx_deferred++; + + return ret; +} + +static int mac100_get_tx_len(struct dma_desc *p) +{ + return p->des01.tx.buffer1_size; +} + +/* This function verifies if each incoming frame has some errors + * and, if required, updates the multicast statistics. + * In case of success, it returns csum_none becasue the device + * is not able to compute the csum in HW. */ +static int mac100_get_rx_frame_status(void *data, struct stmmac_extra_stats *x, + struct dma_desc *p) +{ + int ret = csum_none; + struct net_device_stats *stats = (struct net_device_stats *)data; + + if (unlikely(p->des01.rx.last_descriptor == 0)) { + pr_warning("mac100 Error: Oversized Ethernet " + "frame spanned multiple buffers\n"); + stats->rx_length_errors++; + return discard_frame; + } + + if (unlikely(p->des01.rx.error_summary)) { + if (unlikely(p->des01.rx.descriptor_error)) + x->rx_desc++; + if (unlikely(p->des01.rx.partial_frame_error)) + x->rx_partial++; + if (unlikely(p->des01.rx.run_frame)) + x->rx_runt++; + if (unlikely(p->des01.rx.frame_too_long)) + x->rx_toolong++; + if (unlikely(p->des01.rx.collision)) { + x->rx_collision++; + stats->collisions++; + } + if (unlikely(p->des01.rx.crc_error)) { + x->rx_crc++; + stats->rx_crc_errors++; + } + ret = discard_frame; + } + if (unlikely(p->des01.rx.dribbling)) + ret = discard_frame; + + if (unlikely(p->des01.rx.length_error)) { + x->rx_lenght++; + ret = discard_frame; + } + if (unlikely(p->des01.rx.mii_error)) { + x->rx_mii++; + ret = discard_frame; + } + if (p->des01.rx.multicast_frame) { + x->rx_multicast++; + stats->multicast++; + } + return ret; +} + +static void mac100_irq_status(unsigned long ioaddr) +{ + return; +} + +static void mac100_set_umac_addr(unsigned long ioaddr, unsigned char *addr, + unsigned int reg_n) +{ + stmmac_set_mac_addr(ioaddr, addr, MAC_ADDR_HIGH, MAC_ADDR_LOW); +} + +static void mac100_get_umac_addr(unsigned long ioaddr, unsigned char *addr, + unsigned int reg_n) +{ + stmmac_get_mac_addr(ioaddr, addr, MAC_ADDR_HIGH, MAC_ADDR_LOW); +} + +static void mac100_set_filter(struct net_device *dev) +{ + unsigned long ioaddr = dev->base_addr; + u32 value = readl(ioaddr + MAC_CONTROL); + + if (dev->flags & IFF_PROMISC) { + value |= MAC_CONTROL_PR; + value &= ~(MAC_CONTROL_PM | MAC_CONTROL_IF | MAC_CONTROL_HO | + MAC_CONTROL_HP); + } else if ((dev->mc_count > HASH_TABLE_SIZE) + || (dev->flags & IFF_ALLMULTI)) { + value |= MAC_CONTROL_PM; + value &= ~(MAC_CONTROL_PR | MAC_CONTROL_IF | MAC_CONTROL_HO); + writel(0xffffffff, ioaddr + MAC_HASH_HIGH); + writel(0xffffffff, ioaddr + MAC_HASH_LOW); + } else if (dev->mc_count == 0) { /* no multicast */ + value &= ~(MAC_CONTROL_PM | MAC_CONTROL_PR | MAC_CONTROL_IF | + MAC_CONTROL_HO | MAC_CONTROL_HP); + } else { + int i; + u32 mc_filter[2]; + struct dev_mc_list *mclist; + + /* Perfect filter mode for physical address and Hash + filter for multicast */ + value |= MAC_CONTROL_HP; + value &= ~(MAC_CONTROL_PM | MAC_CONTROL_PR | MAC_CONTROL_IF + | MAC_CONTROL_HO); + + memset(mc_filter, 0, sizeof(mc_filter)); + for (i = 0, mclist = dev->mc_list; + mclist && i < dev->mc_count; i++, mclist = mclist->next) { + /* The upper 6 bits of the calculated CRC are used to + * index the contens of the hash table */ + int bit_nr = + ether_crc(ETH_ALEN, mclist->dmi_addr) >> 26; + /* The most significant bit determines the register to + * use (H/L) while the other 5 bits determine the bit + * within the register. */ + mc_filter[bit_nr >> 5] |= 1 << (bit_nr & 31); + } + writel(mc_filter[0], ioaddr + MAC_HASH_LOW); + writel(mc_filter[1], ioaddr + MAC_HASH_HIGH); + } + + writel(value, ioaddr + MAC_CONTROL); + + DBG(KERN_INFO "%s: CTRL reg: 0x%08x Hash regs: " + "HI 0x%08x, LO 0x%08x\n", + __func__, readl(ioaddr + MAC_CONTROL), + readl(ioaddr + MAC_HASH_HIGH), readl(ioaddr + MAC_HASH_LOW)); + return; +} + +static void mac100_flow_ctrl(unsigned long ioaddr, unsigned int duplex, + unsigned int fc, unsigned int pause_time) +{ + unsigned int flow = MAC_FLOW_CTRL_ENABLE; + + if (duplex) + flow |= (pause_time << MAC_FLOW_CTRL_PT_SHIFT); + writel(flow, ioaddr + MAC_FLOW_CTRL); + + return; +} + +/* No PMT module supported in our SoC for the Ethernet Controller. */ +static void mac100_pmt(unsigned long ioaddr, unsigned long mode) +{ + return; +} + +static void mac100_init_rx_desc(struct dma_desc *p, unsigned int ring_size, + int disable_rx_ic) +{ + int i; + for (i = 0; i < ring_size; i++) { + p->des01.rx.own = 1; + p->des01.rx.buffer1_size = BUF_SIZE_2KiB - 1; + if (i == ring_size - 1) + p->des01.rx.end_ring = 1; + if (disable_rx_ic) + p->des01.rx.disable_ic = 1; + p++; + } + return; +} + +static void mac100_init_tx_desc(struct dma_desc *p, unsigned int ring_size) +{ + int i; + for (i = 0; i < ring_size; i++) { + p->des01.tx.own = 0; + if (i == ring_size - 1) + p->des01.tx.end_ring = 1; + p++; + } + return; +} + +static int mac100_get_tx_owner(struct dma_desc *p) +{ + return p->des01.tx.own; +} + +static int mac100_get_rx_owner(struct dma_desc *p) +{ + return p->des01.rx.own; +} + +static void mac100_set_tx_owner(struct dma_desc *p) +{ + p->des01.tx.own = 1; +} + +static void mac100_set_rx_owner(struct dma_desc *p) +{ + p->des01.rx.own = 1; +} + +static int mac100_get_tx_ls(struct dma_desc *p) +{ + return p->des01.tx.last_segment; +} + +static void mac100_release_tx_desc(struct dma_desc *p) +{ + int ter = p->des01.tx.end_ring; + + /* clean field used within the xmit */ + p->des01.tx.first_segment = 0; + p->des01.tx.last_segment = 0; + p->des01.tx.buffer1_size = 0; + + /* clean status reported */ + p->des01.tx.error_summary = 0; + p->des01.tx.underflow_error = 0; + p->des01.tx.no_carrier = 0; + p->des01.tx.loss_carrier = 0; + p->des01.tx.excessive_deferral = 0; + p->des01.tx.excessive_collisions = 0; + p->des01.tx.late_collision = 0; + p->des01.tx.heartbeat_fail = 0; + p->des01.tx.deferred = 0; + + /* set termination field */ + p->des01.tx.end_ring = ter; + + return; +} + +static void mac100_prepare_tx_desc(struct dma_desc *p, int is_fs, int len, + int csum_flag) +{ + p->des01.tx.first_segment = is_fs; + p->des01.tx.buffer1_size = len; +} + +static void mac100_clear_tx_ic(struct dma_desc *p) +{ + p->des01.tx.interrupt = 0; +} + +static void mac100_close_tx_desc(struct dma_desc *p) +{ + p->des01.tx.last_segment = 1; + p->des01.tx.interrupt = 1; +} + +static int mac100_get_rx_frame_len(struct dma_desc *p) +{ + return p->des01.rx.frame_length; +} + +struct stmmac_ops mac100_driver = { + .core_init = mac100_core_init, + .dump_mac_regs = mac100_dump_mac_regs, + .dma_init = mac100_dma_init, + .dump_dma_regs = mac100_dump_dma_regs, + .dma_mode = mac100_dma_operation_mode, + .dma_diagnostic_fr = mac100_dma_diagnostic_fr, + .tx_status = mac100_get_tx_frame_status, + .rx_status = mac100_get_rx_frame_status, + .get_tx_len = mac100_get_tx_len, + .set_filter = mac100_set_filter, + .flow_ctrl = mac100_flow_ctrl, + .pmt = mac100_pmt, + .init_rx_desc = mac100_init_rx_desc, + .init_tx_desc = mac100_init_tx_desc, + .get_tx_owner = mac100_get_tx_owner, + .get_rx_owner = mac100_get_rx_owner, + .release_tx_desc = mac100_release_tx_desc, + .prepare_tx_desc = mac100_prepare_tx_desc, + .clear_tx_ic = mac100_clear_tx_ic, + .close_tx_desc = mac100_close_tx_desc, + .get_tx_ls = mac100_get_tx_ls, + .set_tx_owner = mac100_set_tx_owner, + .set_rx_owner = mac100_set_rx_owner, + .get_rx_frame_len = mac100_get_rx_frame_len, + .host_irq_status = mac100_irq_status, + .set_umac_addr = mac100_set_umac_addr, + .get_umac_addr = mac100_get_umac_addr, +}; + +struct mac_device_info *mac100_setup(unsigned long ioaddr) +{ + struct mac_device_info *mac; + + mac = kzalloc(sizeof(const struct mac_device_info), GFP_KERNEL); + + pr_info("\tMAC 10/100\n"); + + mac->ops = &mac100_driver; + mac->hw.pmt = PMT_NOT_SUPPORTED; + mac->hw.link.port = MAC_CONTROL_PS; + mac->hw.link.duplex = MAC_CONTROL_F; + mac->hw.link.speed = 0; + mac->hw.mii.addr = MAC_MII_ADDR; + mac->hw.mii.data = MAC_MII_DATA; + + return mac; +} diff --git a/drivers/net/stmmac/mac100.h b/drivers/net/stmmac/mac100.h new file mode 100644 index 0000000..0f8f110 --- /dev/null +++ b/drivers/net/stmmac/mac100.h @@ -0,0 +1,116 @@ +/******************************************************************************* + MAC 10/100 Header File + + Copyright (C) 2007-2009 STMicroelectronics Ltd + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Author: Giuseppe Cavallaro <peppe.cavallaro@st.com> +*******************************************************************************/ + +/*---------------------------------------------------------------------------- + * MAC BLOCK defines + *---------------------------------------------------------------------------*/ +/* MAC CSR offset */ +#define MAC_CONTROL 0x00000000 /* MAC Control */ +#define MAC_ADDR_HIGH 0x00000004 /* MAC Address High */ +#define MAC_ADDR_LOW 0x00000008 /* MAC Address Low */ +#define MAC_HASH_HIGH 0x0000000c /* Multicast Hash Table High */ +#define MAC_HASH_LOW 0x00000010 /* Multicast Hash Table Low */ +#define MAC_MII_ADDR 0x00000014 /* MII Address */ +#define MAC_MII_DATA 0x00000018 /* MII Data */ +#define MAC_FLOW_CTRL 0x0000001c /* Flow Control */ +#define MAC_VLAN1 0x00000020 /* VLAN1 Tag */ +#define MAC_VLAN2 0x00000024 /* VLAN2 Tag */ + +/* MAC CTRL defines */ +#define MAC_CONTROL_RA 0x80000000 /* Receive All Mode */ +#define MAC_CONTROL_BLE 0x40000000 /* Endian Mode */ +#define MAC_CONTROL_HBD 0x10000000 /* Heartbeat Disable */ +#define MAC_CONTROL_PS 0x08000000 /* Port Select */ +#define MAC_CONTROL_DRO 0x00800000 /* Disable Receive Own */ +#define MAC_CONTROL_EXT_LOOPBACK 0x00400000 /* Reserved (ext loopback?) */ +#define MAC_CONTROL_OM 0x00200000 /* Loopback Operating Mode */ +#define MAC_CONTROL_F 0x00100000 /* Full Duplex Mode */ +#define MAC_CONTROL_PM 0x00080000 /* Pass All Multicast */ +#define MAC_CONTROL_PR 0x00040000 /* Promiscuous Mode */ +#define MAC_CONTROL_IF 0x00020000 /* Inverse Filtering */ +#define MAC_CONTROL_PB 0x00010000 /* Pass Bad Frames */ +#define MAC_CONTROL_HO 0x00008000 /* Hash Only Filtering Mode */ +#define MAC_CONTROL_HP 0x00002000 /* Hash/Perfect Filtering Mode */ +#define MAC_CONTROL_LCC 0x00001000 /* Late Collision Control */ +#define MAC_CONTROL_DBF 0x00000800 /* Disable Broadcast Frames */ +#define MAC_CONTROL_DRTY 0x00000400 /* Disable Retry */ +#define MAC_CONTROL_ASTP 0x00000100 /* Automatic Pad Stripping */ +#define MAC_CONTROL_BOLMT_10 0x00000000 /* Back Off Limit 10 */ +#define MAC_CONTROL_BOLMT_8 0x00000040 /* Back Off Limit 8 */ +#define MAC_CONTROL_BOLMT_4 0x00000080 /* Back Off Limit 4 */ +#define MAC_CONTROL_BOLMT_1 0x000000c0 /* Back Off Limit 1 */ +#define MAC_CONTROL_DC 0x00000020 /* Deferral Check */ +#define MAC_CONTROL_TE 0x00000008 /* Transmitter Enable */ +#define MAC_CONTROL_RE 0x00000004 /* Receiver Enable */ + +#define MAC_CORE_INIT (MAC_CONTROL_HBD | MAC_CONTROL_ASTP) + +/* MAC FLOW CTRL defines */ +#define MAC_FLOW_CTRL_PT_MASK 0xffff0000 /* Pause Time Mask */ +#define MAC_FLOW_CTRL_PT_SHIFT 16 +#define MAC_FLOW_CTRL_PASS 0x00000004 /* Pass Control Frames */ +#define MAC_FLOW_CTRL_ENABLE 0x00000002 /* Flow Control Enable */ +#define MAC_FLOW_CTRL_PAUSE 0x00000001 /* Flow Control Busy ... */ + +/* MII ADDR defines */ +#define MAC_MII_ADDR_WRITE 0x00000002 /* MII Write */ +#define MAC_MII_ADDR_BUSY 0x00000001 /* MII Busy */ + +/*---------------------------------------------------------------------------- + * DMA BLOCK defines + *---------------------------------------------------------------------------*/ + +/* DMA Bus Mode register defines */ +#define DMA_BUS_MODE_DBO 0x00100000 /* Descriptor Byte Ordering */ +#define DMA_BUS_MODE_BLE 0x00000080 /* Big Endian/Little Endian */ +#define DMA_BUS_MODE_PBL_MASK 0x00003f00 /* Programmable Burst Len */ +#define DMA_BUS_MODE_PBL_SHIFT 8 +#define DMA_BUS_MODE_DSL_MASK 0x0000007c /* Descriptor Skip Length */ +#define DMA_BUS_MODE_DSL_SHIFT 2 /* (in DWORDS) */ +#define DMA_BUS_MODE_BAR_BUS 0x00000002 /* Bar-Bus Arbitration */ +#define DMA_BUS_MODE_SFT_RESET 0x00000001 /* Software Reset */ +#define DMA_BUS_MODE_DEFAULT 0x00000000 + +/* DMA Control register defines */ +#define DMA_CONTROL_SF 0x00200000 /* Store And Forward */ + +/* Transmit Threshold Control */ +enum ttc_control { + DMA_CONTROL_TTC_DEFAULT = 0x00000000, /* Threshold is 32 DWORDS */ + DMA_CONTROL_TTC_64 = 0x00004000, /* Threshold is 64 DWORDS */ + DMA_CONTROL_TTC_128 = 0x00008000, /* Threshold is 128 DWORDS */ + DMA_CONTROL_TTC_256 = 0x0000c000, /* Threshold is 256 DWORDS */ + DMA_CONTROL_TTC_18 = 0x00400000, /* Threshold is 18 DWORDS */ + DMA_CONTROL_TTC_24 = 0x00404000, /* Threshold is 24 DWORDS */ + DMA_CONTROL_TTC_32 = 0x00408000, /* Threshold is 32 DWORDS */ + DMA_CONTROL_TTC_40 = 0x0040c000, /* Threshold is 40 DWORDS */ + DMA_CONTROL_SE = 0x00000008, /* Stop On Empty */ + DMA_CONTROL_OSF = 0x00000004, /* Operate On 2nd Frame */ +}; + +/* STMAC110 DMA Missed Frame Counter register defines */ +#define DMA_MISSED_FRAME_OVE 0x10000000 /* FIFO Overflow Overflow */ +#define DMA_MISSED_FRAME_OVE_CNTR 0x0ffe0000 /* Overflow Frame Counter */ +#define DMA_MISSED_FRAME_OVE_M 0x00010000 /* Missed Frame Overflow */ +#define DMA_MISSED_FRAME_M_CNTR 0x0000ffff /* Missed Frame Couinter */ diff --git a/drivers/net/stmmac/stmmac.h b/drivers/net/stmmac/stmmac.h new file mode 100644 index 0000000..6d2eae3 --- /dev/null +++ b/drivers/net/stmmac/stmmac.h @@ -0,0 +1,98 @@ +/******************************************************************************* + Copyright (C) 2007-2009 STMicroelectronics Ltd + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Author: Giuseppe Cavallaro <peppe.cavallaro@st.com> +*******************************************************************************/ + +#define DRV_MODULE_VERSION "Oct_09" + +#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) +#define STMMAC_VLAN_TAG_USED +#include <linux/if_vlan.h> +#endif + +#include "common.h" +#ifdef CONFIG_STMMAC_TIMER +#include "stmmac_timer.h" +#endif + +struct stmmac_priv { + /* Frequently used values are kept adjacent for cache effect */ + struct dma_desc *dma_tx ____cacheline_aligned; + dma_addr_t dma_tx_phy; + struct sk_buff **tx_skbuff; + unsigned int cur_tx; + unsigned int dirty_tx; + unsigned int dma_tx_size; + int tx_coe; + int tx_coalesce; + + struct dma_desc *dma_rx ; + unsigned int cur_rx; + unsigned int dirty_rx; + struct sk_buff **rx_skbuff; + dma_addr_t *rx_skbuff_dma; + struct sk_buff_head rx_recycle; + + struct net_device *dev; + int is_gmac; + dma_addr_t dma_rx_phy; + unsigned int dma_rx_size; + int rx_csum; + unsigned int dma_buf_sz; + struct device *device; + struct mac_device_info *mac_type; + + struct stmmac_extra_stats xstats; + struct napi_struct napi; + + phy_interface_t phy_interface; + int pbl; + int bus_id; + int phy_addr; + int phy_mask; + int (*phy_reset) (void *priv); + void (*fix_mac_speed) (void *priv, unsigned int speed); + void *bsp_priv; + + int phy_irq; + struct phy_device *phydev; + int oldlink; + int speed; + int oldduplex; + unsigned int flow_ctrl; + unsigned int pause; + struct mii_bus *mii; + + u32 msg_enable; + spinlock_t lock; + int wolopts; + int wolenabled; + int shutdown; +#ifdef CONFIG_STMMAC_TIMER + struct stmmac_timer *tm; +#endif +#ifdef STMMAC_VLAN_TAG_USED + struct vlan_group *vlgrp; +#endif +}; + +extern int stmmac_mdio_unregister(struct net_device *ndev); +extern int stmmac_mdio_register(struct net_device *ndev); +extern void stmmac_set_ethtool_ops(struct net_device *netdev); diff --git a/drivers/net/stmmac/stmmac_ethtool.c b/drivers/net/stmmac/stmmac_ethtool.c new file mode 100644 index 0000000..694ebe6 --- /dev/null +++ b/drivers/net/stmmac/stmmac_ethtool.c @@ -0,0 +1,395 @@ +/******************************************************************************* + STMMAC Ethtool support + + Copyright (C) 2007-2009 STMicroelectronics Ltd + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Author: Giuseppe Cavallaro <peppe.cavallaro@st.com> +*******************************************************************************/ + +#include <linux/etherdevice.h> +#include <linux/ethtool.h> +#include <linux/mii.h> +#include <linux/phy.h> + +#include "stmmac.h" + +#define REG_SPACE_SIZE 0x1054 +#define MAC100_ETHTOOL_NAME "st_mac100" +#define GMAC_ETHTOOL_NAME "st_gmac" + +struct stmmac_stats { + char stat_string[ETH_GSTRING_LEN]; + int sizeof_stat; + int stat_offset; +}; + +#define STMMAC_STAT(m) \ + { #m, FIELD_SIZEOF(struct stmmac_extra_stats, m), \ + offsetof(struct stmmac_priv, xstats.m)} + +static const struct stmmac_stats stmmac_gstrings_stats[] = { + STMMAC_STAT(tx_underflow), + STMMAC_STAT(tx_carrier), + STMMAC_STAT(tx_losscarrier), + STMMAC_STAT(tx_heartbeat), + STMMAC_STAT(tx_deferred), + STMMAC_STAT(tx_vlan), + STMMAC_STAT(rx_vlan), + STMMAC_STAT(tx_jabber), + STMMAC_STAT(tx_frame_flushed), + STMMAC_STAT(tx_payload_error), + STMMAC_STAT(tx_ip_header_error), + STMMAC_STAT(rx_desc), + STMMAC_STAT(rx_partial), + STMMAC_STAT(rx_runt), + STMMAC_STAT(rx_toolong), + STMMAC_STAT(rx_collision), + STMMAC_STAT(rx_crc), + STMMAC_STAT(rx_lenght), + STMMAC_STAT(rx_mii), + STMMAC_STAT(rx_multicast), + STMMAC_STAT(rx_gmac_overflow), + STMMAC_STAT(rx_watchdog), + STMMAC_STAT(da_rx_filter_fail), + STMMAC_STAT(sa_rx_filter_fail), + STMMAC_STAT(rx_missed_cntr), + STMMAC_STAT(rx_overflow_cntr), + STMMAC_STAT(tx_undeflow_irq), + STMMAC_STAT(tx_process_stopped_irq), + STMMAC_STAT(tx_jabber_irq), + STMMAC_STAT(rx_overflow_irq), + STMMAC_STAT(rx_buf_unav_irq), + STMMAC_STAT(rx_process_stopped_irq), + STMMAC_STAT(rx_watchdog_irq), + STMMAC_STAT(tx_early_irq), + STMMAC_STAT(fatal_bus_error_irq), + STMMAC_STAT(threshold), + STMMAC_STAT(tx_pkt_n), + STMMAC_STAT(rx_pkt_n), + STMMAC_STAT(poll_n), + STMMAC_STAT(sched_timer_n), + STMMAC_STAT(normal_irq_n), +}; +#define STMMAC_STATS_LEN ARRAY_SIZE(stmmac_gstrings_stats) + +void stmmac_ethtool_getdrvinfo(struct net_device *dev, + struct ethtool_drvinfo *info) +{ + struct stmmac_priv *priv = netdev_priv(dev); + + if (!priv->is_gmac) + strcpy(info->driver, MAC100_ETHTOOL_NAME); + else + strcpy(info->driver, GMAC_ETHTOOL_NAME); + + strcpy(info->version, DRV_MODULE_VERSION); + info->fw_version[0] = '\0'; + info->n_stats = STMMAC_STATS_LEN; + return; +} + +int stmmac_ethtool_getsettings(struct net_device *dev, struct ethtool_cmd *cmd) +{ + struct stmmac_priv *priv = netdev_priv(dev); + struct phy_device *phy = priv->phydev; + int rc; + if (phy == NULL) { + pr_err("%s: %s: PHY is not registered\n", + __func__, dev->name); + return -ENODEV; + } + if (!netif_running(dev)) { + pr_err("%s: interface is disabled: we cannot track " + "link speed / duplex setting\n", dev->name); + return -EBUSY; + } + cmd->transceiver = XCVR_INTERNAL; + spin_lock_irq(&priv->lock); + rc = phy_ethtool_gset(phy, cmd); + spin_unlock_irq(&priv->lock); + return rc; +} + +int stmmac_ethtool_setsettings(struct net_device *dev, struct ethtool_cmd *cmd) +{ + struct stmmac_priv *priv = netdev_priv(dev); + struct phy_device *phy = priv->phydev; + int rc; + + spin_lock(&priv->lock); + rc = phy_ethtool_sset(phy, cmd); + spin_unlock(&priv->lock); + + return rc; +} + +u32 stmmac_ethtool_getmsglevel(struct net_device *dev) +{ + struct stmmac_priv *priv = netdev_priv(dev); + return priv->msg_enable; +} + +void stmmac_ethtool_setmsglevel(struct net_device *dev, u32 level) +{ + struct stmmac_priv *priv = netdev_priv(dev); + priv->msg_enable = level; + +} + +int stmmac_check_if_running(struct net_device *dev) +{ + if (!netif_running(dev)) + return -EBUSY; + return 0; +} + +int stmmac_ethtool_get_regs_len(struct net_device *dev) +{ + return REG_SPACE_SIZE; +} + +void stmmac_ethtool_gregs(struct net_device *dev, + struct ethtool_regs *regs, void *space) +{ + int i; + u32 *reg_space = (u32 *) space; + + struct stmmac_priv *priv = netdev_priv(dev); + + memset(reg_space, 0x0, REG_SPACE_SIZE); + + if (!priv->is_gmac) { + /* MAC registers */ + for (i = 0; i < 12; i++) + reg_space[i] = readl(dev->base_addr + (i * 4)); + /* DMA registers */ + for (i = 0; i < 9; i++) + reg_space[i + 12] = + readl(dev->base_addr + (DMA_BUS_MODE + (i * 4))); + reg_space[22] = readl(dev->base_addr + DMA_CUR_TX_BUF_ADDR); + reg_space[23] = readl(dev->base_addr + DMA_CUR_RX_BUF_ADDR); + } else { + /* MAC registers */ + for (i = 0; i < 55; i++) + reg_space[i] = readl(dev->base_addr + (i * 4)); + /* DMA registers */ + for (i = 0; i < 22; i++) + reg_space[i + 55] = + readl(dev->base_addr + (DMA_BUS_MODE + (i * 4))); + } + + return; +} + +int stmmac_ethtool_set_tx_csum(struct net_device *netdev, u32 data) +{ + if (data) + netdev->features |= NETIF_F_HW_CSUM; + else + netdev->features &= ~NETIF_F_HW_CSUM; + + return 0; +} + +u32 stmmac_ethtool_get_rx_csum(struct net_device *dev) +{ + struct stmmac_priv *priv = netdev_priv(dev); + + return priv->rx_csum; +} + +static void +stmmac_get_pauseparam(struct net_device *netdev, + struct ethtool_pauseparam *pause) +{ + struct stmmac_priv *priv = netdev_priv(netdev); + + spin_lock(&priv->lock); + + pause->rx_pause = 0; + pause->tx_pause = 0; + pause->autoneg = priv->phydev->autoneg; + + if (priv->flow_ctrl & FLOW_RX) + pause->rx_pause = 1; + if (priv->flow_ctrl & FLOW_TX) + pause->tx_pause = 1; + + spin_unlock(&priv->lock); + return; +} + +static int +stmmac_set_pauseparam(struct net_device *netdev, + struct ethtool_pauseparam *pause) +{ + struct stmmac_priv *priv = netdev_priv(netdev); + struct phy_device *phy = priv->phydev; + int new_pause = FLOW_OFF; + int ret = 0; + + spin_lock(&priv->lock); + + if (pause->rx_pause) + new_pause |= FLOW_RX; + if (pause->tx_pause) + new_pause |= FLOW_TX; + + priv->flow_ctrl = new_pause; + + if (phy->autoneg) { + if (netif_running(netdev)) { + struct ethtool_cmd cmd; + /* auto-negotiation automatically restarted */ + cmd.cmd = ETHTOOL_NWAY_RST; + cmd.supported = phy->supported; + cmd.advertising = phy->advertising; + cmd.autoneg = phy->autoneg; + cmd.speed = phy->speed; + cmd.duplex = phy->duplex; + cmd.phy_address = phy->addr; + ret = phy_ethtool_sset(phy, &cmd); + } + } else { + unsigned long ioaddr = netdev->base_addr; + priv->mac_type->ops->flow_ctrl(ioaddr, phy->duplex, + priv->flow_ctrl, priv->pause); + } + spin_unlock(&priv->lock); + return ret; +} + +static void stmmac_get_ethtool_stats(struct net_device *dev, + struct ethtool_stats *dummy, u64 *data) +{ + struct stmmac_priv *priv = netdev_priv(dev); + unsigned long ioaddr = dev->base_addr; + int i; + + /* Update HW stats if supported */ + priv->mac_type->ops->dma_diagnostic_fr(&dev->stats, &priv->xstats, + ioaddr); + + for (i = 0; i < STMMAC_STATS_LEN; i++) { + char *p = (char *)priv + stmmac_gstrings_stats[i].stat_offset; + data[i] = (stmmac_gstrings_stats[i].sizeof_stat == + sizeof(u64)) ? (*(u64 *)p) : (*(u32 *)p); + } + + return; +} + +static int stmmac_get_sset_count(struct net_device *netdev, int sset) +{ + switch (sset) { + case ETH_SS_STATS: + return STMMAC_STATS_LEN; + default: + return -EOPNOTSUPP; + } +} + +static void stmmac_get_strings(struct net_device *dev, u32 stringset, u8 *data) +{ + int i; + u8 *p = data; + + switch (stringset) { + case ETH_SS_STATS: + for (i = 0; i < STMMAC_STATS_LEN; i++) { + memcpy(p, stmmac_gstrings_stats[i].stat_string, + ETH_GSTRING_LEN); + p += ETH_GSTRING_LEN; + } + break; + default: + WARN_ON(1); + break; + } + return; +} + +/* Currently only support WOL through Magic packet. */ +static void stmmac_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol) +{ + struct stmmac_priv *priv = netdev_priv(dev); + + spin_lock_irq(&priv->lock); + if (priv->wolenabled == PMT_SUPPORTED) { + wol->supported = WAKE_MAGIC; + wol->wolopts = priv->wolopts; + } + spin_unlock_irq(&priv->lock); +} + +static int stmmac_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol) +{ + struct stmmac_priv *priv = netdev_priv(dev); + u32 support = WAKE_MAGIC; + + if (priv->wolenabled == PMT_NOT_SUPPORTED) + return -EINVAL; + + if (wol->wolopts & ~support) + return -EINVAL; + + if (wol->wolopts == 0) + device_set_wakeup_enable(priv->device, 0); + else + device_set_wakeup_enable(priv->device, 1); + + spin_lock_irq(&priv->lock); + priv->wolopts = wol->wolopts; + spin_unlock_irq(&priv->lock); + + return 0; +} + +static struct ethtool_ops stmmac_ethtool_ops = { + .begin = stmmac_check_if_running, + .get_drvinfo = stmmac_ethtool_getdrvinfo, + .get_settings = stmmac_ethtool_getsettings, + .set_settings = stmmac_ethtool_setsettings, + .get_msglevel = stmmac_ethtool_getmsglevel, + .set_msglevel = stmmac_ethtool_setmsglevel, + .get_regs = stmmac_ethtool_gregs, + .get_regs_len = stmmac_ethtool_get_regs_len, + .get_link = ethtool_op_get_link, + .get_rx_csum = stmmac_ethtool_get_rx_csum, + .get_tx_csum = ethtool_op_get_tx_csum, + .set_tx_csum = stmmac_ethtool_set_tx_csum, + .get_sg = ethtool_op_get_sg, + .set_sg = ethtool_op_set_sg, + .get_pauseparam = stmmac_get_pauseparam, + .set_pauseparam = stmmac_set_pauseparam, + .get_ethtool_stats = stmmac_get_ethtool_stats, + .get_strings = stmmac_get_strings, + .get_wol = stmmac_get_wol, + .set_wol = stmmac_set_wol, + .get_sset_count = stmmac_get_sset_count, +#ifdef NETIF_F_TSO + .get_tso = ethtool_op_get_tso, + .set_tso = ethtool_op_set_tso, +#endif +}; + +void stmmac_set_ethtool_ops(struct net_device *netdev) +{ + SET_ETHTOOL_OPS(netdev, &stmmac_ethtool_ops); +} diff --git a/drivers/net/stmmac/stmmac_main.c b/drivers/net/stmmac/stmmac_main.c new file mode 100644 index 0000000..c2f14dc --- /dev/null +++ b/drivers/net/stmmac/stmmac_main.c @@ -0,0 +1,2204 @@ +/******************************************************************************* + This is the driver for the ST MAC 10/100/1000 on-chip Ethernet controllers. + ST Ethernet IPs are built around a Synopsys IP Core. + + Copyright (C) 2007-2009 STMicroelectronics Ltd + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Author: Giuseppe Cavallaro <peppe.cavallaro@st.com> + + Documentation available at: + http://www.stlinux.com + Support available at: + https://bugzilla.stlinux.com/ +*******************************************************************************/ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/interrupt.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/platform_device.h> +#include <linux/ip.h> +#include <linux/tcp.h> +#include <linux/skbuff.h> +#include <linux/ethtool.h> +#include <linux/if_ether.h> +#include <linux/crc32.h> +#include <linux/mii.h> +#include <linux/phy.h> +#include <linux/if_vlan.h> +#include <linux/dma-mapping.h> +#include <linux/stm/soc.h> +#include "stmmac.h" + +#define STMMAC_RESOURCE_NAME "stmmaceth" +#define PHY_RESOURCE_NAME "stmmacphy" + +#undef STMMAC_DEBUG +/*#define STMMAC_DEBUG*/ +#ifdef STMMAC_DEBUG +#define DBG(nlevel, klevel, fmt, args...) \ + ((void)(netif_msg_##nlevel(priv) && \ + printk(KERN_##klevel fmt, ## args))) +#else +#define DBG(nlevel, klevel, fmt, args...) do { } while (0) +#endif + +#undef STMMAC_RX_DEBUG +/*#define STMMAC_RX_DEBUG*/ +#ifdef STMMAC_RX_DEBUG +#define RX_DBG(fmt, args...) printk(fmt, ## args) +#else +#define RX_DBG(fmt, args...) do { } while (0) +#endif + +#undef STMMAC_XMIT_DEBUG +/*#define STMMAC_XMIT_DEBUG*/ +#ifdef STMMAC_TX_DEBUG +#define TX_DBG(fmt, args...) printk(fmt, ## args) +#else +#define TX_DBG(fmt, args...) do { } while (0) +#endif + +#define STMMAC_ALIGN(x) L1_CACHE_ALIGN(x) +#define JUMBO_LEN 9000 + +/* Module parameters */ +#define TX_TIMEO 5000 /* default 5 seconds */ +static int watchdog = TX_TIMEO; +module_param(watchdog, int, S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(watchdog, "Transmit timeout in milliseconds"); + +static int debug = -1; /* -1: default, 0: no output, 16: all */ +module_param(debug, int, S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(debug, "Message Level (0: no output, 16: all)"); + +static int phyaddr = -1; +module_param(phyaddr, int, S_IRUGO); +MODULE_PARM_DESC(phyaddr, "Physical device address"); + +#define DMA_TX_SIZE 256 +static int dma_txsize = DMA_TX_SIZE; +module_param(dma_txsize, int, S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(dma_txsize, "Number of descriptors in the TX list"); + +#define DMA_RX_SIZE 256 +static int dma_rxsize = DMA_RX_SIZE; +module_param(dma_rxsize, int, S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(dma_rxsize, "Number of descriptors in the RX list"); + +static int flow_ctrl = FLOW_OFF; +module_param(flow_ctrl, int, S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(flow_ctrl, "Flow control ability [on/off]"); + +static int pause = PAUSE_TIME; +module_param(pause, int, S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(pause, "Flow Control Pause Time"); + +#define TC_DEFAULT 64 +static int tc = TC_DEFAULT; +module_param(tc, int, S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(tc, "DMA threshold control value"); + +#define RX_NO_COALESCE 1 /* Always interrupt on completion */ +#define TX_NO_COALESCE -1 /* No moderation by default */ + +/* Pay attention to tune this parameter; take care of both + * hardware capability and network stabitily/performance impact. + * Many tests showed that ~4ms latency seems to be good enough. */ +#ifdef CONFIG_STMMAC_TIMER +#define DEFAULT_PERIODIC_RATE 256 +static int tmrate = DEFAULT_PERIODIC_RATE; +module_param(tmrate, int, S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(tmrate, "External timer freq. (default: 256Hz)"); +#endif + +#define DMA_BUFFER_SIZE BUF_SIZE_2KiB +static int buf_sz = DMA_BUFFER_SIZE; +module_param(buf_sz, int, S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(buf_sz, "DMA buffer size"); + +/* In case of Giga ETH, we can enable/disable the COE for the + * transmit HW checksum computation. + * Note that, if tx csum is off in HW, SG will be still supported. */ +static int tx_coe = HW_CSUM; +module_param(tx_coe, int, S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(tx_coe, "GMAC COE type 2 [on/off]"); + +static const u32 default_msg_level = (NETIF_MSG_DRV | NETIF_MSG_PROBE | + NETIF_MSG_LINK | NETIF_MSG_IFUP | + NETIF_MSG_IFDOWN | NETIF_MSG_TIMER); + +static irqreturn_t stmmac_interrupt(int irq, void *dev_id); +static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev); + +/** + * stmmac_verify_args - verify the driver parameters. + * Description: it verifies if some wrong parameter is passed to the driver. + * Note that wrong parameters are replaced with the default values. + */ +static void stmmac_verify_args(void) +{ + if (unlikely(watchdog < 0)) + watchdog = TX_TIMEO; + if (unlikely(dma_rxsize < 0)) + dma_rxsize = DMA_RX_SIZE; + if (unlikely(dma_txsize < 0)) + dma_txsize = DMA_TX_SIZE; + if (unlikely((buf_sz < DMA_BUFFER_SIZE) || (buf_sz > BUF_SIZE_16KiB))) + buf_sz = DMA_BUFFER_SIZE; + if (unlikely(flow_ctrl > 1)) + flow_ctrl = FLOW_AUTO; + else if (likely(flow_ctrl < 0)) + flow_ctrl = FLOW_OFF; + if (unlikely((pause < 0) || (pause > 0xffff))) + pause = PAUSE_TIME; + + return; +} + +#if defined(STMMAC_XMIT_DEBUG) || defined(STMMAC_RX_DEBUG) +static void print_pkt(unsigned char *buf, int len) +{ + int j; + pr_info("len = %d byte, buf addr: 0x%p", len, buf); + for (j = 0; j < len; j++) { + if ((j % 16) == 0) + pr_info("\n %03x:", j); + pr_info(" %02x", buf[j]); + } + pr_info("\n"); + return; +} +#endif + +/* minimum number of free TX descriptors required to wake up TX process */ +#define STMMAC_TX_THRESH(x) (x->dma_tx_size/4) + +static inline u32 stmmac_tx_avail(struct stmmac_priv *priv) +{ + return priv->dirty_tx + priv->dma_tx_size - priv->cur_tx - 1; +} + +/** + * stmmac_adjust_link + * @dev: net device structure + * Description: it adjusts the link parameters. + */ +static void stmmac_adjust_link(struct net_device *dev) +{ + struct stmmac_priv *priv = netdev_priv(dev); + struct phy_device *phydev = priv->phydev; + unsigned long ioaddr = dev->base_addr; + unsigned long flags; + int new_state = 0; + unsigned int fc = priv->flow_ctrl, pause_time = priv->pause; + + if (phydev == NULL) + return; + + DBG(probe, DEBUG, "stmmac_adjust_link: called. address %d link %d\n", + phydev->addr, phydev->link); + + spin_lock_irqsave(&priv->lock, flags); + if (phydev->link) { + u32 ctrl = readl(ioaddr + MAC_CTRL_REG); + + /* Now we make sure that we can be in full duplex mode. + * If not, we operate in half-duplex mode. */ + if (phydev->duplex != priv->oldduplex) { + new_state = 1; + if (!(phydev->duplex)) + ctrl &= ~priv->mac_type->hw.link.duplex; + else + ctrl |= priv->mac_type->hw.link.duplex; + priv->oldduplex = phydev->duplex; + } + /* Flow Control operation */ + if (phydev->pause) + priv->mac_type->ops->flow_ctrl(ioaddr, phydev->duplex, + fc, pause_time); + + if (phydev->speed != priv->speed) { + new_state = 1; + switch (phydev->speed) { + case 1000: + if (likely(priv->is_gmac)) + ctrl &= ~priv->mac_type->hw.link.port; + break; + case 100: + case 10: + if (priv->is_gmac) { + ctrl |= priv->mac_type->hw.link.port; + if (phydev->speed == SPEED_100) { + ctrl |= + priv->mac_type->hw.link. + speed; + } else { + ctrl &= + ~(priv->mac_type->hw. + link.speed); + } + } else { + ctrl &= ~priv->mac_type->hw.link.port; + } + priv->fix_mac_speed(priv->bsp_priv, + phydev->speed); + break; + default: + if (netif_msg_link(priv)) + pr_warning("%s: Speed (%d) is not 10" + " or 100!\n", dev->name, phydev->speed); + break; + } + + priv->speed = phydev->speed; + } + + writel(ctrl, ioaddr + MAC_CTRL_REG); + + if (!priv->oldlink) { + new_state = 1; + priv->oldlink = 1; + } + } else if (priv->oldlink) { + new_state = 1; + priv->oldlink = 0; + priv->speed = 0; + priv->oldduplex = -1; + } + + if (new_state && netif_msg_link(priv)) + phy_print_status(phydev); + + spin_unlock_irqrestore(&priv->lock, flags); + + DBG(probe, DEBUG, "stmmac_adjust_link: exiting\n"); +} + +/** + * stmmac_init_phy - PHY initialization + * @dev: net device structure + * Description: it initializes the driver's PHY state, and attaches the PHY + * to the mac driver. + * Return value: + * 0 on success + */ +static int stmmac_init_phy(struct net_device *dev) +{ + struct stmmac_priv *priv = netdev_priv(dev); + struct phy_device *phydev; + char phy_id[BUS_ID_SIZE]; /* PHY to connect */ + char bus_id[BUS_ID_SIZE]; + + priv->oldlink = 0; + priv->speed = 0; + priv->oldduplex = -1; + + if (priv->phy_addr == -1) { + /* We don't have a PHY, so do nothing */ + return 0; + } + + snprintf(bus_id, MII_BUS_ID_SIZE, "%x", priv->bus_id); + snprintf(phy_id, BUS_ID_SIZE, PHY_ID_FMT, bus_id, priv->phy_addr); + pr_debug("stmmac_init_phy: trying to attach to %s\n", phy_id); + + phydev = phy_connect(dev, phy_id, &stmmac_adjust_link, 0, + priv->phy_interface); + + if (IS_ERR(phydev)) { + pr_err("%s: Could not attach to PHY\n", dev->name); + return PTR_ERR(phydev); + } + + /* + * Broken HW is sometimes missing the pull-up resistor on the + * MDIO line, which results in reads to non-existent devices returning + * 0 rather than 0xffff. Catch this here and treat 0 as a non-existent + * device as well. + * Note: phydev->phy_id is the result of reading the UID PHY registers. + */ + if (phydev->phy_id == 0) { + phy_disconnect(phydev); + return -ENODEV; + } + pr_debug("stmmac_init_phy: %s: attached to PHY (UID 0x%x)" + " Link = %d\n", dev->name, phydev->phy_id, phydev->link); + + priv->phydev = phydev; + + return 0; +} + +static inline void stmmac_mac_enable_rx(unsigned long ioaddr) +{ + u32 value = readl(ioaddr + MAC_CTRL_REG); + value |= MAC_RNABLE_RX; + /* Set the RE (receive enable bit into the MAC CTRL register). */ + writel(value, ioaddr + MAC_CTRL_REG); +} + +static inline void stmmac_mac_enable_tx(unsigned long ioaddr) +{ + u32 value = readl(ioaddr + MAC_CTRL_REG); + value |= MAC_ENABLE_TX; + /* Set the TE (transmit enable bit into the MAC CTRL register). */ + writel(value, ioaddr + MAC_CTRL_REG); +} + +static inline void stmmac_mac_disable_rx(unsigned long ioaddr) +{ + u32 value = readl(ioaddr + MAC_CTRL_REG); + value &= ~MAC_RNABLE_RX; + writel(value, ioaddr + MAC_CTRL_REG); +} + +static inline void stmmac_mac_disable_tx(unsigned long ioaddr) +{ + u32 value = readl(ioaddr + MAC_CTRL_REG); + value &= ~MAC_ENABLE_TX; + writel(value, ioaddr + MAC_CTRL_REG); +} + +/** + * display_ring + * @p: pointer to the ring. + * @size: size of the ring. + * Description: display all the descriptors within the ring. + */ +static void display_ring(struct dma_desc *p, int size) +{ + struct tmp_s { + u64 a; + unsigned int b; + unsigned int c; + }; + int i; + for (i = 0; i < size; i++) { + struct tmp_s *x = (struct tmp_s *)(p + i); + pr_info("\t%d [0x%x]: DES0=0x%x DES1=0x%x BUF1=0x%x BUF2=0x%x", + i, (unsigned int)virt_to_phys(&p[i]), + (unsigned int)(x->a), (unsigned int)((x->a) >> 32), + x->b, x->c); + pr_info("\n"); + } +} + +/** + * init_dma_desc_rings - init the RX/TX descriptor rings + * @dev: net device structure + * Description: this function initializes the DMA RX/TX descriptors + * and allocates the socket buffers. + */ +static void init_dma_desc_rings(struct net_device *dev) +{ + int i; + struct stmmac_priv *priv = netdev_priv(dev); + struct sk_buff *skb; + unsigned int txsize = priv->dma_tx_size; + unsigned int rxsize = priv->dma_rx_size; + unsigned int bfsize = priv->dma_buf_sz; + int buff2_needed = 0; + int dis_ic = 0; + +#ifdef CONFIG_STMMAC_TIMER + /* Using Timers disable interrupts on completion for the reception */ + dis_ic = 1; +#endif + /* Set the Buffer size according to the MTU; + * indeed, in case of jumbo we need to bump-up the buffer sizes. + */ + if (unlikely(dev->mtu >= BUF_SIZE_8KiB)) + bfsize = BUF_SIZE_16KiB; + else if (unlikely(dev->mtu >= BUF_SIZE_4KiB)) + bfsize = BUF_SIZE_8KiB; + else if (unlikely(dev->mtu >= BUF_SIZE_2KiB)) + bfsize = BUF_SIZE_4KiB; + else if (unlikely(dev->mtu >= DMA_BUFFER_SIZE)) + bfsize = BUF_SIZE_2KiB; + else + bfsize = DMA_BUFFER_SIZE; + + /* If the MTU exceeds 8k so use the second buffer in the chain */ + if (bfsize >= BUF_SIZE_8KiB) + buff2_needed = 1; + + DBG(probe, INFO, "stmmac: txsize %d, rxsize %d, bfsize %d\n", + txsize, rxsize, bfsize); + + priv->rx_skbuff_dma = kmalloc(rxsize * sizeof(dma_addr_t), GFP_KERNEL); + priv->rx_skbuff = + kmalloc(sizeof(struct sk_buff *) * rxsize, GFP_KERNEL); + priv->dma_rx = + (struct dma_desc *)dma_alloc_coherent(priv->device, + rxsize * + sizeof(struct dma_desc), + &priv->dma_rx_phy, + GFP_KERNEL); + priv->tx_skbuff = kmalloc(sizeof(struct sk_buff *) * txsize, + GFP_KERNEL); + priv->dma_tx = + (struct dma_desc *)dma_alloc_coherent(priv->device, + txsize * + sizeof(struct dma_desc), + &priv->dma_tx_phy, + GFP_KERNEL); + + if ((priv->dma_rx == NULL) || (priv->dma_tx == NULL)) { + pr_err("%s:ERROR allocating the DMA Tx/Rx desc\n", __func__); + return; + } + + DBG(probe, INFO, "stmmac (%s) DMA desc rings: virt addr (Rx %p, " + "Tx %p)\n\tDMA phy addr (Rx 0x%08x, Tx 0x%08x)\n", + dev->name, priv->dma_rx, priv->dma_tx, + (unsigned int)priv->dma_rx_phy, (unsigned int)priv->dma_tx_phy); + + /* RX INITIALIZATION */ + DBG(probe, INFO, "stmmac: SKB addresses:\n" + "skb\t\tskb data\tdma data\n"); + + for (i = 0; i < rxsize; i++) { + struct dma_desc *p = priv->dma_rx + i; + + skb = netdev_alloc_skb_ip_align(dev, bfsize); + if (unlikely(skb == NULL)) { + pr_err("%s: Rx init fails; skb is NULL\n", __func__); + break; + } + priv->rx_skbuff[i] = skb; + priv->rx_skbuff_dma[i] = dma_map_single(priv->device, skb->data, + bfsize, DMA_FROM_DEVICE); + + p->des2 = priv->rx_skbuff_dma[i]; + if (unlikely(buff2_needed)) + p->des3 = p->des2 + BUF_SIZE_8KiB; + DBG(probe, INFO, "[%p]\t[%p]\t[%x]\n", priv->rx_skbuff[i], + priv->rx_skbuff[i]->data, priv->rx_skbuff_dma[i]); + } + priv->cur_rx = 0; + priv->dirty_rx = (unsigned int)(i - rxsize); + priv->dma_buf_sz = bfsize; + buf_sz = bfsize; + + /* TX INITIALIZATION */ + for (i = 0; i < txsize; i++) { + priv->tx_skbuff[i] = NULL; + priv->dma_tx[i].des2 = 0; + } + priv->dirty_tx = 0; + priv->cur_tx = 0; + + /* Clear the Rx/Tx descriptors */ + priv->mac_type->ops->init_rx_desc(priv->dma_rx, rxsize, dis_ic); + priv->mac_type->ops->init_tx_desc(priv->dma_tx, txsize); + + if (netif_msg_hw(priv)) { + pr_info("RX descriptor ring:\n"); + display_ring(priv->dma_rx, rxsize); + pr_info("TX descriptor ring:\n"); + display_ring(priv->dma_tx, txsize); + } + return; +} + +static void dma_free_rx_skbufs(struct stmmac_priv *priv) +{ + int i; + + for (i = 0; i < priv->dma_rx_size; i++) { + if (priv->rx_skbuff[i]) { + dma_unmap_single(priv->device, priv->rx_skbuff_dma[i], + priv->dma_buf_sz, DMA_FROM_DEVICE); + dev_kfree_skb_any(priv->rx_skbuff[i]); + } + priv->rx_skbuff[i] = NULL; + } + return; +} + +static void dma_free_tx_skbufs(struct stmmac_priv *priv) +{ + int i; + + for (i = 0; i < priv->dma_tx_size; i++) { + if (priv->tx_skbuff[i] != NULL) { + struct dma_desc *p = priv->dma_tx + i; + if (p->des2) + dma_unmap_single(priv->device, p->des2, + priv->mac_type->ops->get_tx_len(p), + DMA_TO_DEVICE); + dev_kfree_skb_any(priv->tx_skbuff[i]); + priv->tx_skbuff[i] = NULL; + } + } + return; +} + +static void free_dma_desc_resources(struct stmmac_priv *priv) +{ + /* Release the DMA TX/RX socket buffers */ + dma_free_rx_skbufs(priv); + dma_free_tx_skbufs(priv); + + /* Free the region of consistent memory previously allocated for + * the DMA */ + dma_free_coherent(priv->device, + priv->dma_tx_size * sizeof(struct dma_desc), + priv->dma_tx, priv->dma_tx_phy); + dma_free_coherent(priv->device, + priv->dma_rx_size * sizeof(struct dma_desc), + priv->dma_rx, priv->dma_rx_phy); + kfree(priv->rx_skbuff_dma); + kfree(priv->rx_skbuff); + kfree(priv->tx_skbuff); + + return; +} + +/** + * stmmac_dma_start_tx + * @ioaddr: device I/O address + * Description: this function starts the DMA tx process. + */ +static void stmmac_dma_start_tx(unsigned long ioaddr) +{ + u32 value = readl(ioaddr + DMA_CONTROL); + value |= DMA_CONTROL_ST; + writel(value, ioaddr + DMA_CONTROL); + return; +} + +static void stmmac_dma_stop_tx(unsigned long ioaddr) +{ + u32 value = readl(ioaddr + DMA_CONTROL); + value &= ~DMA_CONTROL_ST; + writel(value, ioaddr + DMA_CONTROL); + return; +} + +/** + * stmmac_dma_start_rx + * @ioaddr: device I/O address + * Description: this function starts the DMA rx process. + */ +static void stmmac_dma_start_rx(unsigned long ioaddr) +{ + u32 value = readl(ioaddr + DMA_CONTROL); + value |= DMA_CONTROL_SR; + writel(value, ioaddr + DMA_CONTROL); + + return; +} + +static void stmmac_dma_stop_rx(unsigned long ioaddr) +{ + u32 value = readl(ioaddr + DMA_CONTROL); + value &= ~DMA_CONTROL_SR; + writel(value, ioaddr + DMA_CONTROL); + + return; +} + +/** + * stmmac_dma_operation_mode - HW DMA operation mode + * @priv : pointer to the private device structure. + * Description: it sets the DMA operation mode: tx/rx DMA thresholds + * or Store-And-Forward capability. It also verifies the COE for the + * transmission in case of Giga ETH. + */ +static void stmmac_dma_operation_mode(struct stmmac_priv *priv) +{ + if (!priv->is_gmac) { + /* MAC 10/100 */ + priv->mac_type->ops->dma_mode(priv->dev->base_addr, tc, 0); + priv->tx_coe = NO_HW_CSUM; + } else { + if ((priv->dev->mtu <= ETH_DATA_LEN) && (tx_coe)) { + priv->mac_type->ops->dma_mode(priv->dev->base_addr, + SF_DMA_MODE, SF_DMA_MODE); + tc = SF_DMA_MODE; + priv->tx_coe = HW_CSUM; + } else { + /* Checksum computation is performed in software. */ + priv->mac_type->ops->dma_mode(priv->dev->base_addr, tc, + SF_DMA_MODE); + priv->tx_coe = NO_HW_CSUM; + } + } + tx_coe = priv->tx_coe; + + return; +} + +#ifdef STMMAC_DEBUG +/** + * show_tx_process_state + * @status: tx descriptor status field + * Description: it shows the Transmit Process State for CSR5[22:20] + */ +static void show_tx_process_state(unsigned int status) +{ + unsigned int state; + state = (status & DMA_STATUS_TS_MASK) >> DMA_STATUS_TS_SHIFT; + + switch (state) { + case 0: + pr_info("- TX (Stopped): Reset or Stop command\n"); + break; + case 1: + pr_info("- TX (Running):Fetching the Tx desc\n"); + break; + case 2: + pr_info("- TX (Running): Waiting for end of tx\n"); + break; + case 3: + pr_info("- TX (Running): Reading the data " + "and queuing the data into the Tx buf\n"); + break; + case 6: + pr_info("- TX (Suspended): Tx Buff Underflow " + "or an unavailable Transmit descriptor\n"); + break; + case 7: + pr_info("- TX (Running): Closing Tx descriptor\n"); + break; + default: + break; + } + return; +} + +/** + * show_rx_process_state + * @status: rx descriptor status field + * Description: it shows the Receive Process State for CSR5[19:17] + */ +static void show_rx_process_state(unsigned int status) +{ + unsigned int state; + state = (status & DMA_STATUS_RS_MASK) >> DMA_STATUS_RS_SHIFT; + + switch (state) { + case 0: + pr_info("- RX (Stopped): Reset or Stop command\n"); + break; + case 1: + pr_info("- RX (Running): Fetching the Rx desc\n"); + break; + case 2: + pr_info("- RX (Running):Checking for end of pkt\n"); + break; + case 3: + pr_info("- RX (Running): Waiting for Rx pkt\n"); + break; + case 4: + pr_info("- RX (Suspended): Unavailable Rx buf\n"); + break; + case 5: + pr_info("- RX (Running): Closing Rx descriptor\n"); + break; + case 6: + pr_info("- RX(Running): Flushing the current frame" + " from the Rx buf\n"); + break; + case 7: + pr_info("- RX (Running): Queuing the Rx frame" + " from the Rx buf into memory\n"); + break; + default: + break; + } + return; +} +#endif + +/** + * stmmac_tx: + * @priv: private driver structure + * Description: it reclaims resources after transmission completes. + */ +static void stmmac_tx(struct stmmac_priv *priv) +{ + unsigned int txsize = priv->dma_tx_size; + unsigned long ioaddr = priv->dev->base_addr; + + while (priv->dirty_tx != priv->cur_tx) { + int last; + unsigned int entry = priv->dirty_tx % txsize; + struct sk_buff *skb = priv->tx_skbuff[entry]; + struct dma_desc *p = priv->dma_tx + entry; + + /* Check if the descriptor is owned by the DMA. */ + if (priv->mac_type->ops->get_tx_owner(p)) + break; + + /* Verify tx error by looking at the last segment */ + last = priv->mac_type->ops->get_tx_ls(p); + if (likely(last)) { + int tx_error = + priv->mac_type->ops->tx_status(&priv->dev->stats, + &priv->xstats, + p, ioaddr); + if (likely(tx_error == 0)) { + priv->dev->stats.tx_packets++; + priv->xstats.tx_pkt_n++; + } else + priv->dev->stats.tx_errors++; + } + TX_DBG("%s: curr %d, dirty %d\n", __func__, + priv->cur_tx, priv->dirty_tx); + + if (likely(p->des2)) + dma_unmap_single(priv->device, p->des2, + priv->mac_type->ops->get_tx_len(p), + DMA_TO_DEVICE); + if (unlikely(p->des3)) + p->des3 = 0; + + if (likely(skb != NULL)) { + /* + * If there's room in the queue (limit it to size) + * we add this skb back into the pool, + * if it's the right size. + */ + if ((skb_queue_len(&priv->rx_recycle) < + priv->dma_rx_size) && + skb_recycle_check(skb, priv->dma_buf_sz)) + __skb_queue_head(&priv->rx_recycle, skb); + else + dev_kfree_skb(skb); + + priv->tx_skbuff[entry] = NULL; + } + + priv->mac_type->ops->release_tx_desc(p); + + entry = (++priv->dirty_tx) % txsize; + } + if (unlikely(netif_queue_stopped(priv->dev) && + stmmac_tx_avail(priv) > STMMAC_TX_THRESH(priv))) { + netif_tx_lock(priv->dev); + if (netif_queue_stopped(priv->dev) && + stmmac_tx_avail(priv) > STMMAC_TX_THRESH(priv)) { + TX_DBG("%s: restart transmit\n", __func__); + netif_wake_queue(priv->dev); + } + netif_tx_unlock(priv->dev); + } + return; +} + +static inline void stmmac_enable_irq(struct stmmac_priv *priv) +{ +#ifndef CONFIG_STMMAC_TIMER + writel(DMA_INTR_DEFAULT_MASK, priv->dev->base_addr + DMA_INTR_ENA); +#else + priv->tm->timer_start(tmrate); +#endif +} + +static inline void stmmac_disable_irq(struct stmmac_priv *priv) +{ +#ifndef CONFIG_STMMAC_TIMER + writel(0, priv->dev->base_addr + DMA_INTR_ENA); +#else + priv->tm->timer_stop(); +#endif +} + +static int stmmac_has_work(struct stmmac_priv *priv) +{ + unsigned int has_work = 0; + int rxret, tx_work = 0; + + rxret = priv->mac_type->ops->get_rx_owner(priv->dma_rx + + (priv->cur_rx % priv->dma_rx_size)); + + if (priv->dirty_tx != priv->cur_tx) + tx_work = 1; + + if (likely(!rxret || tx_work)) + has_work = 1; + + return has_work; +} + +static inline void _stmmac_schedule(struct stmmac_priv *priv) +{ + if (likely(stmmac_has_work(priv))) { + stmmac_disable_irq(priv); + napi_schedule(&priv->napi); + } +} + +#ifdef CONFIG_STMMAC_TIMER +void stmmac_schedule(struct net_device *dev) +{ + struct stmmac_priv *priv = netdev_priv(dev); + + priv->xstats.sched_timer_n++; + + _stmmac_schedule(priv); + + return; +} + +static void stmmac_no_timer_started(unsigned int x) +{; +}; + +static void stmmac_no_timer_stopped(void) +{; +}; +#endif + +/** + * stmmac_tx_err: + * @priv: pointer to the private device structure + * Description: it cleans the descriptors and restarts the transmission + * in case of errors. + */ +static void stmmac_tx_err(struct stmmac_priv *priv) +{ + netif_stop_queue(priv->dev); + + stmmac_dma_stop_tx(priv->dev->base_addr); + dma_free_tx_skbufs(priv); + priv->mac_type->ops->init_tx_desc(priv->dma_tx, priv->dma_tx_size); + priv->dirty_tx = 0; + priv->cur_tx = 0; + stmmac_dma_start_tx(priv->dev->base_addr); + + priv->dev->stats.tx_errors++; + netif_wake_queue(priv->dev); + + return; +} + +/** + * stmmac_dma_interrupt - Interrupt handler for the driver + * @dev: net device structure + * Description: Interrupt handler for the driver (DMA). + */ +static void stmmac_dma_interrupt(struct net_device *dev) +{ + unsigned long ioaddr = dev->base_addr; + struct stmmac_priv *priv = netdev_priv(dev); + /* read the status register (CSR5) */ + u32 intr_status = readl(ioaddr + DMA_STATUS); + + DBG(intr, INFO, "%s: [CSR5: 0x%08x]\n", __func__, intr_status); + +#ifdef STMMAC_DEBUG + /* It displays the DMA transmit process state (CSR5 register) */ + if (netif_msg_tx_done(priv)) + show_tx_process_state(intr_status); + if (netif_msg_rx_status(priv)) + show_rx_process_state(intr_status); +#endif + /* ABNORMAL interrupts */ + if (unlikely(intr_status & DMA_STATUS_AIS)) { + DBG(intr, INFO, "CSR5[15] DMA ABNORMAL IRQ: "); + if (unlikely(intr_status & DMA_STATUS_UNF)) { + DBG(intr, INFO, "transmit underflow\n"); + if (unlikely(tc != SF_DMA_MODE) + && (tc <= 256)) { + /* Try to bump up the threshold */ + tc += 64; + priv->mac_type->ops->dma_mode(ioaddr, tc, + SF_DMA_MODE); + priv->xstats.threshold = tc; + } + stmmac_tx_err(priv); + priv->xstats.tx_undeflow_irq++; + } + if (unlikely(intr_status & DMA_STATUS_TJT)) { + DBG(intr, INFO, "transmit jabber\n"); + priv->xstats.tx_jabber_irq++; + } + if (unlikely(intr_status & DMA_STATUS_OVF)) { + DBG(intr, INFO, "recv overflow\n"); + priv->xstats.rx_overflow_irq++; + } + if (unlikely(intr_status & DMA_STATUS_RU)) { + DBG(intr, INFO, "receive buffer unavailable\n"); + priv->xstats.rx_buf_unav_irq++; + } + if (unlikely(intr_status & DMA_STATUS_RPS)) { + DBG(intr, INFO, "receive process stopped\n"); + priv->xstats.rx_process_stopped_irq++; + } + if (unlikely(intr_status & DMA_STATUS_RWT)) { + DBG(intr, INFO, "receive watchdog\n"); + priv->xstats.rx_watchdog_irq++; + } + if (unlikely(intr_status & DMA_STATUS_ETI)) { + DBG(intr, INFO, "transmit early interrupt\n"); + priv->xstats.tx_early_irq++; + } + if (unlikely(intr_status & DMA_STATUS_TPS)) { + DBG(intr, INFO, "transmit process stopped\n"); + priv->xstats.tx_process_stopped_irq++; + stmmac_tx_err(priv); + } + if (unlikely(intr_status & DMA_STATUS_FBI)) { + DBG(intr, INFO, "fatal bus error\n"); + priv->xstats.fatal_bus_error_irq++; + stmmac_tx_err(priv); + } + } + + /* TX/RX NORMAL interrupts */ + if (intr_status & DMA_STATUS_NIS) { + priv->xstats.normal_irq_n++; + if (likely((intr_status & DMA_STATUS_RI) || + (intr_status & (DMA_STATUS_TI)))) + _stmmac_schedule(priv); + } + + /* Optional hardware blocks, interrupts should be disabled */ + if (unlikely(intr_status & + (DMA_STATUS_GPI | DMA_STATUS_GMI | DMA_STATUS_GLI))) + pr_info("%s: unexpected status %08x\n", __func__, intr_status); + + /* Clear the interrupt by writing a logic 1 to the CSR5[15-0] */ + writel((intr_status & 0x1ffff), ioaddr + DMA_STATUS); + + DBG(intr, INFO, "\n\n"); + + return; +} + +/** + * stmmac_open - open entry point of the driver + * @dev : pointer to the device structure. + * Description: + * This function is the open entry point of the driver. + * Return value: + * 0 on success and an appropriate (-)ve integer as defined in errno.h + * file on failure. + */ +static int stmmac_open(struct net_device *dev) +{ + struct stmmac_priv *priv = netdev_priv(dev); + unsigned long ioaddr = dev->base_addr; + int ret; + + /* Check that the MAC address is valid. If its not, refuse + * to bring the device up. The user must specify an + * address using the following linux command: + * ifconfig eth0 hw ether xx:xx:xx:xx:xx:xx */ + if (!is_valid_ether_addr(dev->dev_addr)) { + random_ether_addr(dev->dev_addr); + pr_warning("%s: generated random MAC address %pM\n", dev->name, + dev->dev_addr); + } + + stmmac_verify_args(); + + ret = stmmac_init_phy(dev); + if (unlikely(ret)) { + pr_err("%s: Cannot attach to PHY (error: %d)\n", __func__, ret); + return ret; + } + + /* Request the IRQ lines */ + ret = request_irq(dev->irq, &stmmac_interrupt, + IRQF_SHARED, dev->name, dev); + if (unlikely(ret < 0)) { + pr_err("%s: ERROR: allocating the IRQ %d (error: %d)\n", + __func__, dev->irq, ret); + return ret; + } + +#ifdef CONFIG_STMMAC_TIMER + priv->tm = kmalloc(sizeof(struct stmmac_timer *), GFP_KERNEL); + if (unlikely(priv->tm == NULL)) { + pr_err("%s: ERROR: timer memory alloc failed \n", __func__); + return -ENOMEM; + } + priv->tm->freq = tmrate; + + /* Test if the HW timer can be actually used. + * In case of failure continue with no timer. */ + if (unlikely((stmmac_open_ext_timer(dev, priv->tm)) < 0)) { + pr_warning("stmmaceth: cannot attach the HW timer\n"); + tmrate = 0; + priv->tm->freq = 0; + priv->tm->timer_start = stmmac_no_timer_started; + priv->tm->timer_stop = stmmac_no_timer_stopped; + } +#endif + + /* Create and initialize the TX/RX descriptors chains. */ + priv->dma_tx_size = STMMAC_ALIGN(dma_txsize); + priv->dma_rx_size = STMMAC_ALIGN(dma_rxsize); + priv->dma_buf_sz = STMMAC_ALIGN(buf_sz); + init_dma_desc_rings(dev); + + /* DMA initialization and SW reset */ + if (unlikely(priv->mac_type->ops->dma_init(ioaddr, + priv->pbl, priv->dma_tx_phy, priv->dma_rx_phy) < 0)) { + + pr_err("%s: DMA initialization failed\n", __func__); + return -1; + } + + /* Copy the MAC addr into the HW */ + priv->mac_type->ops->set_umac_addr(ioaddr, dev->dev_addr, 0); + /* Initialize the MAC Core */ + priv->mac_type->ops->core_init(ioaddr); + + priv->shutdown = 0; + + /* Initialise the MMC (if present) to disable all interrupts. */ + writel(0xffffffff, ioaddr + MMC_HIGH_INTR_MASK); + writel(0xffffffff, ioaddr + MMC_LOW_INTR_MASK); + + /* Enable the MAC Rx/Tx */ + stmmac_mac_enable_rx(ioaddr); + stmmac_mac_enable_tx(ioaddr); + + /* Set the HW DMA mode and the COE */ + stmmac_dma_operation_mode(priv); + + /* Extra statistics */ + memset(&priv->xstats, 0, sizeof(struct stmmac_extra_stats)); + priv->xstats.threshold = tc; + + /* Start the ball rolling... */ + DBG(probe, DEBUG, "%s: DMA RX/TX processes started...\n", dev->name); + stmmac_dma_start_tx(ioaddr); + stmmac_dma_start_rx(ioaddr); + +#ifdef CONFIG_STMMAC_TIMER + priv->tm->timer_start(tmrate); +#endif + /* Dump DMA/MAC registers */ + if (netif_msg_hw(priv)) { + priv->mac_type->ops->dump_mac_regs(ioaddr); + priv->mac_type->ops->dump_dma_regs(ioaddr); + } + + if (priv->phydev) + phy_start(priv->phydev); + + napi_enable(&priv->napi); + skb_queue_head_init(&priv->rx_recycle); + netif_start_queue(dev); + return 0; +} + +/** + * stmmac_release - close entry point of the driver + * @dev : device pointer. + * Description: + * This is the stop entry point of the driver. + */ +static int stmmac_release(struct net_device *dev) +{ + struct stmmac_priv *priv = netdev_priv(dev); + + /* Stop and disconnect the PHY */ + if (priv->phydev) { + phy_stop(priv->phydev); + phy_disconnect(priv->phydev); + priv->phydev = NULL; + } + + netif_stop_queue(dev); + +#ifdef CONFIG_STMMAC_TIMER + /* Stop and release the timer */ + stmmac_close_ext_timer(); + if (priv->tm != NULL) + kfree(priv->tm); +#endif + napi_disable(&priv->napi); + skb_queue_purge(&priv->rx_recycle); + + /* Free the IRQ lines */ + free_irq(dev->irq, dev); + + /* Stop TX/RX DMA and clear the descriptors */ + stmmac_dma_stop_tx(dev->base_addr); + stmmac_dma_stop_rx(dev->base_addr); + + /* Release and free the Rx/Tx resources */ + free_dma_desc_resources(priv); + + /* Disable the MAC core */ + stmmac_mac_disable_tx(dev->base_addr); + stmmac_mac_disable_rx(dev->base_addr); + + netif_carrier_off(dev); + + return 0; +} + +/* + * To perform emulated hardware segmentation on skb. + */ +static int stmmac_sw_tso(struct stmmac_priv *priv, struct sk_buff *skb) +{ + struct sk_buff *segs, *curr_skb; + int gso_segs = skb_shinfo(skb)->gso_segs; + + /* Estimate the number of fragments in the worst case */ + if (unlikely(stmmac_tx_avail(priv) < gso_segs)) { + netif_stop_queue(priv->dev); + TX_DBG(KERN_ERR "%s: TSO BUG! Tx Ring full when queue awake\n", + __func__); + if (stmmac_tx_avail(priv) < gso_segs) + return NETDEV_TX_BUSY; + + netif_wake_queue(priv->dev); + } + TX_DBG("\tstmmac_sw_tso: segmenting: skb %p (len %d)\n", + skb, skb->len); + + segs = skb_gso_segment(skb, priv->dev->features & ~NETIF_F_TSO); + if (unlikely(IS_ERR(segs))) + goto sw_tso_end; + + do { + curr_skb = segs; + segs = segs->next; + TX_DBG("\t\tcurrent skb->len: %d, *curr %p," + "*next %p\n", curr_skb->len, curr_skb, segs); + curr_skb->next = NULL; + stmmac_xmit(curr_skb, priv->dev); + } while (segs); + +sw_tso_end: + dev_kfree_skb(skb); + + return NETDEV_TX_OK; +} + +static unsigned int stmmac_handle_jumbo_frames(struct sk_buff *skb, + struct net_device *dev, + int csum_insertion) +{ + struct stmmac_priv *priv = netdev_priv(dev); + unsigned int nopaged_len = skb_headlen(skb); + unsigned int txsize = priv->dma_tx_size; + unsigned int entry = priv->cur_tx % txsize; + struct dma_desc *desc = priv->dma_tx + entry; + + if (nopaged_len > BUF_SIZE_8KiB) { + + int buf2_size = nopaged_len - BUF_SIZE_8KiB; + + desc->des2 = dma_map_single(priv->device, skb->data, + BUF_SIZE_8KiB, DMA_TO_DEVICE); + desc->des3 = desc->des2 + BUF_SIZE_4KiB; + priv->mac_type->ops->prepare_tx_desc(desc, 1, BUF_SIZE_8KiB, + csum_insertion); + + entry = (++priv->cur_tx) % txsize; + desc = priv->dma_tx + entry; + + desc->des2 = dma_map_single(priv->device, + skb->data + BUF_SIZE_8KiB, + buf2_size, DMA_TO_DEVICE); + desc->des3 = desc->des2 + BUF_SIZE_4KiB; + priv->mac_type->ops->prepare_tx_desc(desc, 0, + buf2_size, csum_insertion); + priv->mac_type->ops->set_tx_owner(desc); + priv->tx_skbuff[entry] = NULL; + } else { + desc->des2 = dma_map_single(priv->device, skb->data, + nopaged_len, DMA_TO_DEVICE); + desc->des3 = desc->des2 + BUF_SIZE_4KiB; + priv->mac_type->ops->prepare_tx_desc(desc, 1, nopaged_len, + csum_insertion); + } + return entry; +} + +/** + * stmmac_xmit: + * @skb : the socket buffer + * @dev : device pointer + * Description : Tx entry point of the driver. + */ +static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct stmmac_priv *priv = netdev_priv(dev); + unsigned int txsize = priv->dma_tx_size; + unsigned int entry; + int i, csum_insertion = 0; + int nfrags = skb_shinfo(skb)->nr_frags; + struct dma_desc *desc, *first; + + if (unlikely(stmmac_tx_avail(priv) < nfrags + 1)) { + if (!netif_queue_stopped(dev)) { + netif_stop_queue(dev); + /* This is a hard error, log it. */ + pr_err("%s: BUG! Tx Ring full when queue awake\n", + __func__); + } + return NETDEV_TX_BUSY; + } + + entry = priv->cur_tx % txsize; + +#ifdef STMMAC_XMIT_DEBUG + if ((skb->len > ETH_FRAME_LEN) || nfrags) + pr_info("stmmac xmit:\n" + "\tskb addr %p - len: %d - nopaged_len: %d\n" + "\tn_frags: %d - ip_summed: %d - %s gso\n", + skb, skb->len, skb_headlen(skb), nfrags, skb->ip_summed, + !skb_is_gso(skb) ? "isn't" : "is"); +#endif + + if (unlikely(skb_is_gso(skb))) + return stmmac_sw_tso(priv, skb); + + if (likely((skb->ip_summed == CHECKSUM_PARTIAL))) { + if (likely(priv->tx_coe == NO_HW_CSUM)) + skb_checksum_help(skb); + else + csum_insertion = 1; + } + + desc = priv->dma_tx + entry; + first = desc; + +#ifdef STMMAC_XMIT_DEBUG + if ((nfrags > 0) || (skb->len > ETH_FRAME_LEN)) + pr_debug("stmmac xmit: skb len: %d, nopaged_len: %d,\n" + "\t\tn_frags: %d, ip_summed: %d\n", + skb->len, skb_headlen(skb), nfrags, skb->ip_summed); +#endif + priv->tx_skbuff[entry] = skb; + if (unlikely(skb->len >= BUF_SIZE_4KiB)) { + entry = stmmac_handle_jumbo_frames(skb, dev, csum_insertion); + desc = priv->dma_tx + entry; + } else { + unsigned int nopaged_len = skb_headlen(skb); + desc->des2 = dma_map_single(priv->device, skb->data, + nopaged_len, DMA_TO_DEVICE); + priv->mac_type->ops->prepare_tx_desc(desc, 1, nopaged_len, + csum_insertion); + } + + for (i = 0; i < nfrags; i++) { + skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + int len = frag->size; + + entry = (++priv->cur_tx) % txsize; + desc = priv->dma_tx + entry; + + TX_DBG("\t[entry %d] segment len: %d\n", entry, len); + desc->des2 = dma_map_page(priv->device, frag->page, + frag->page_offset, + len, DMA_TO_DEVICE); + priv->tx_skbuff[entry] = NULL; + priv->mac_type->ops->prepare_tx_desc(desc, 0, len, + csum_insertion); + priv->mac_type->ops->set_tx_owner(desc); + } + + /* Interrupt on completition only for the latest segment */ + priv->mac_type->ops->close_tx_desc(desc); +#ifdef CONFIG_STMMAC_TIMER + /* Clean IC while using timers */ + priv->mac_type->ops->clear_tx_ic(desc); +#endif + /* To avoid raise condition */ + priv->mac_type->ops->set_tx_owner(first); + + priv->cur_tx++; + +#ifdef STMMAC_XMIT_DEBUG + if (netif_msg_pktdata(priv)) { + pr_info("stmmac xmit: current=%d, dirty=%d, entry=%d, " + "first=%p, nfrags=%d\n", + (priv->cur_tx % txsize), (priv->dirty_tx % txsize), + entry, first, nfrags); + display_ring(priv->dma_tx, txsize); + pr_info(">>> frame to be transmitted: "); + print_pkt(skb->data, skb->len); + } +#endif + if (unlikely(stmmac_tx_avail(priv) <= (MAX_SKB_FRAGS + 1))) { + TX_DBG("%s: stop transmitted packets\n", __func__); + netif_stop_queue(dev); + } + + dev->stats.tx_bytes += skb->len; + + /* CSR1 enables the transmit DMA to check for new descriptor */ + writel(1, dev->base_addr + DMA_XMT_POLL_DEMAND); + + return NETDEV_TX_OK; +} + +static inline void stmmac_rx_refill(struct stmmac_priv *priv) +{ + unsigned int rxsize = priv->dma_rx_size; + int bfsize = priv->dma_buf_sz; + struct dma_desc *p = priv->dma_rx; + + for (; priv->cur_rx - priv->dirty_rx > 0; priv->dirty_rx++) { + unsigned int entry = priv->dirty_rx % rxsize; + if (likely(priv->rx_skbuff[entry] == NULL)) { + struct sk_buff *skb; + + skb = __skb_dequeue(&priv->rx_recycle); + if (skb == NULL) + skb = netdev_alloc_skb_ip_align(priv->dev, + bfsize); + + if (unlikely(skb == NULL)) + break; + + priv->rx_skbuff[entry] = skb; + priv->rx_skbuff_dma[entry] = + dma_map_single(priv->device, skb->data, bfsize, + DMA_FROM_DEVICE); + + (p + entry)->des2 = priv->rx_skbuff_dma[entry]; + if (unlikely(priv->is_gmac)) { + if (bfsize >= BUF_SIZE_8KiB) + (p + entry)->des3 = + (p + entry)->des2 + BUF_SIZE_8KiB; + } + RX_DBG(KERN_INFO "\trefill entry #%d\n", entry); + } + priv->mac_type->ops->set_rx_owner(p + entry); + } + return; +} + +static int stmmac_rx(struct stmmac_priv *priv, int limit) +{ + unsigned int rxsize = priv->dma_rx_size; + unsigned int entry = priv->cur_rx % rxsize; + unsigned int next_entry; + unsigned int count = 0; + struct dma_desc *p = priv->dma_rx + entry; + struct dma_desc *p_next; + +#ifdef STMMAC_RX_DEBUG + if (netif_msg_hw(priv)) { + pr_debug(">>> stmmac_rx: descriptor ring:\n"); + display_ring(priv->dma_rx, rxsize); + } +#endif + count = 0; + while (!priv->mac_type->ops->get_rx_owner(p)) { + int status; + + if (count >= limit) + break; + + count++; + + next_entry = (++priv->cur_rx) % rxsize; + p_next = priv->dma_rx + next_entry; + prefetch(p_next); + + /* read the status of the incoming frame */ + status = (priv->mac_type->ops->rx_status(&priv->dev->stats, + &priv->xstats, p)); + if (unlikely(status == discard_frame)) + priv->dev->stats.rx_errors++; + else { + struct sk_buff *skb; + /* Length should omit the CRC */ + int frame_len = + priv->mac_type->ops->get_rx_frame_len(p) - 4; + +#ifdef STMMAC_RX_DEBUG + if (frame_len > ETH_FRAME_LEN) + pr_debug("\tRX frame size %d, COE status: %d\n", + frame_len, status); + + if (netif_msg_hw(priv)) + pr_debug("\tdesc: %p [entry %d] buff=0x%x\n", + p, entry, p->des2); +#endif + skb = priv->rx_skbuff[entry]; + if (unlikely(!skb)) { + pr_err("%s: Inconsistent Rx descriptor chain\n", + priv->dev->name); + priv->dev->stats.rx_dropped++; + break; + } + prefetch(skb->data - NET_IP_ALIGN); + priv->rx_skbuff[entry] = NULL; + + skb_put(skb, frame_len); + dma_unmap_single(priv->device, + priv->rx_skbuff_dma[entry], + priv->dma_buf_sz, DMA_FROM_DEVICE); +#ifdef STMMAC_RX_DEBUG + if (netif_msg_pktdata(priv)) { + pr_info(" frame received (%dbytes)", frame_len); + print_pkt(skb->data, frame_len); + } +#endif + skb->protocol = eth_type_trans(skb, priv->dev); + + if (unlikely(status == csum_none)) { + /* always for the old mac 10/100 */ + skb->ip_summed = CHECKSUM_NONE; + netif_receive_skb(skb); + } else { + skb->ip_summed = CHECKSUM_UNNECESSARY; + napi_gro_receive(&priv->napi, skb); + } + + priv->dev->stats.rx_packets++; + priv->dev->stats.rx_bytes += frame_len; + priv->dev->last_rx = jiffies; + } + entry = next_entry; + p = p_next; /* use prefetched values */ + } + + stmmac_rx_refill(priv); + + priv->xstats.rx_pkt_n += count; + + return count; +} + +/** + * stmmac_poll - stmmac poll method (NAPI) + * @napi : pointer to the napi structure. + * @budget : maximum number of packets that the current CPU can receive from + * all interfaces. + * Description : + * This function implements the the reception process. + * Also it runs the TX completion thread + */ +static int stmmac_poll(struct napi_struct *napi, int budget) +{ + struct stmmac_priv *priv = container_of(napi, struct stmmac_priv, napi); + int work_done = 0; + + priv->xstats.poll_n++; + stmmac_tx(priv); + work_done = stmmac_rx(priv, budget); + + if (work_done < budget) { + napi_complete(napi); + stmmac_enable_irq(priv); + } + return work_done; +} + +/** + * stmmac_tx_timeout + * @dev : Pointer to net device structure + * Description: this function is called when a packet transmission fails to + * complete within a reasonable tmrate. The driver will mark the error in the + * netdev structure and arrange for the device to be reset to a sane state + * in order to transmit a new packet. + */ +static void stmmac_tx_timeout(struct net_device *dev) +{ + struct stmmac_priv *priv = netdev_priv(dev); + + /* Clear Tx resources and restart transmitting again */ + stmmac_tx_err(priv); + return; +} + +/* Configuration changes (passed on by ifconfig) */ +static int stmmac_config(struct net_device *dev, struct ifmap *map) +{ + if (dev->flags & IFF_UP) /* can't act on a running interface */ + return -EBUSY; + + /* Don't allow changing the I/O address */ + if (map->base_addr != dev->base_addr) { + pr_warning("%s: can't change I/O address\n", dev->name); + return -EOPNOTSUPP; + } + + /* Don't allow changing the IRQ */ + if (map->irq != dev->irq) { + pr_warning("%s: can't change IRQ number %d\n", + dev->name, dev->irq); + return -EOPNOTSUPP; + } + + /* ignore other fields */ + return 0; +} + +/** + * stmmac_multicast_list - entry point for multicast addressing + * @dev : pointer to the device structure + * Description: + * This function is a driver entry point which gets called by the kernel + * whenever multicast addresses must be enabled/disabled. + * Return value: + * void. + */ +static void stmmac_multicast_list(struct net_device *dev) +{ + struct stmmac_priv *priv = netdev_priv(dev); + + spin_lock(&priv->lock); + priv->mac_type->ops->set_filter(dev); + spin_unlock(&priv->lock); + return; +} + +/** + * stmmac_change_mtu - entry point to change MTU size for the device. + * @dev : device pointer. + * @new_mtu : the new MTU size for the device. + * Description: the Maximum Transfer Unit (MTU) is used by the network layer + * to drive packet transmission. Ethernet has an MTU of 1500 octets + * (ETH_DATA_LEN). This value can be changed with ifconfig. + * Return value: + * 0 on success and an appropriate (-)ve integer as defined in errno.h + * file on failure. + */ +static int stmmac_change_mtu(struct net_device *dev, int new_mtu) +{ + struct stmmac_priv *priv = netdev_priv(dev); + int max_mtu; + + if (netif_running(dev)) { + pr_err("%s: must be stopped to change its MTU\n", dev->name); + return -EBUSY; + } + + if (priv->is_gmac) + max_mtu = JUMBO_LEN; + else + max_mtu = ETH_DATA_LEN; + + if ((new_mtu < 46) || (new_mtu > max_mtu)) { + pr_err("%s: invalid MTU, max MTU is: %d\n", dev->name, max_mtu); + return -EINVAL; + } + + dev->mtu = new_mtu; + + return 0; +} + +static irqreturn_t stmmac_interrupt(int irq, void *dev_id) +{ + struct net_device *dev = (struct net_device *)dev_id; + struct stmmac_priv *priv = netdev_priv(dev); + + if (unlikely(!dev)) { + pr_err("%s: invalid dev pointer\n", __func__); + return IRQ_NONE; + } + + if (priv->is_gmac) { + unsigned long ioaddr = dev->base_addr; + /* To handle GMAC own interrupts */ + priv->mac_type->ops->host_irq_status(ioaddr); + } + stmmac_dma_interrupt(dev); + + return IRQ_HANDLED; +} + +#ifdef CONFIG_NET_POLL_CONTROLLER +/* Polling receive - used by NETCONSOLE and other diagnostic tools + * to allow network I/O with interrupts disabled. */ +static void stmmac_poll_controller(struct net_device *dev) +{ + disable_irq(dev->irq); + stmmac_interrupt(dev->irq, dev); + enable_irq(dev->irq); +} +#endif + +/** + * stmmac_ioctl - Entry point for the Ioctl + * @dev: Device pointer. + * @rq: An IOCTL specefic structure, that can contain a pointer to + * a proprietary structure used to pass information to the driver. + * @cmd: IOCTL command + * Description: + * Currently there are no special functionality supported in IOCTL, just the + * phy_mii_ioctl(...) can be invoked. + */ +static int stmmac_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) +{ + struct stmmac_priv *priv = netdev_priv(dev); + int ret = -EOPNOTSUPP; + + if (!netif_running(dev)) + return -EINVAL; + + switch (cmd) { + case SIOCGMIIPHY: + case SIOCGMIIREG: + case SIOCSMIIREG: + if (!priv->phydev) + return -EINVAL; + + spin_lock(&priv->lock); + ret = phy_mii_ioctl(priv->phydev, if_mii(rq), cmd); + spin_unlock(&priv->lock); + default: + break; + } + return ret; +} + +#ifdef STMMAC_VLAN_TAG_USED +static void stmmac_vlan_rx_register(struct net_device *dev, + struct vlan_group *grp) +{ + struct stmmac_priv *priv = netdev_priv(dev); + + DBG(probe, INFO, "%s: Setting vlgrp to %p\n", dev->name, grp); + + spin_lock(&priv->lock); + priv->vlgrp = grp; + spin_unlock(&priv->lock); + + return; +} +#endif + +static const struct net_device_ops stmmac_netdev_ops = { + .ndo_open = stmmac_open, + .ndo_start_xmit = stmmac_xmit, + .ndo_stop = stmmac_release, + .ndo_change_mtu = stmmac_change_mtu, + .ndo_set_multicast_list = stmmac_multicast_list, + .ndo_tx_timeout = stmmac_tx_timeout, + .ndo_do_ioctl = stmmac_ioctl, + .ndo_set_config = stmmac_config, +#ifdef STMMAC_VLAN_TAG_USED + .ndo_vlan_rx_register = stmmac_vlan_rx_register, +#endif +#ifdef CONFIG_NET_POLL_CONTROLLER + .ndo_poll_controller = stmmac_poll_controller, +#endif + .ndo_set_mac_address = eth_mac_addr, +}; + +/** + * stmmac_probe - Initialization of the adapter . + * @dev : device pointer + * Description: The function initializes the network device structure for + * the STMMAC driver. It also calls the low level routines + * in order to init the HW (i.e. the DMA engine) + */ +static int stmmac_probe(struct net_device *dev) +{ + int ret = 0; + struct stmmac_priv *priv = netdev_priv(dev); + + ether_setup(dev); + + dev->netdev_ops = &stmmac_netdev_ops; + stmmac_set_ethtool_ops(dev); + + dev->features |= (NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_HIGHDMA); + dev->watchdog_timeo = msecs_to_jiffies(watchdog); +#ifdef STMMAC_VLAN_TAG_USED + /* Both mac100 and gmac support receive VLAN tag detection */ + dev->features |= NETIF_F_HW_VLAN_RX; +#endif + priv->msg_enable = netif_msg_init(debug, default_msg_level); + + if (priv->is_gmac) + priv->rx_csum = 1; + + if (flow_ctrl) + priv->flow_ctrl = FLOW_AUTO; /* RX/TX pause on */ + + priv->pause = pause; + netif_napi_add(dev, &priv->napi, stmmac_poll, 64); + + /* Get the MAC address */ + priv->mac_type->ops->get_umac_addr(dev->base_addr, dev->dev_addr, 0); + + if (!is_valid_ether_addr(dev->dev_addr)) + pr_warning("\tno valid MAC address;" + "please, use ifconfig or nwhwconfig!\n"); + + ret = register_netdev(dev); + if (ret) { + pr_err("%s: ERROR %i registering the device\n", + __func__, ret); + return -ENODEV; + } + + DBG(probe, DEBUG, "%s: Scatter/Gather: %s - HW checksums: %s\n", + dev->name, (dev->features & NETIF_F_SG) ? "on" : "off", + (dev->features & NETIF_F_HW_CSUM) ? "on" : "off"); + + spin_lock_init(&priv->lock); + + return ret; +} + +/** + * stmmac_mac_device_setup + * @dev : device pointer + * Description: select and initialise the mac device (mac100 or Gmac). + */ +static int stmmac_mac_device_setup(struct net_device *dev) +{ + struct stmmac_priv *priv = netdev_priv(dev); + unsigned long ioaddr = dev->base_addr; + + struct mac_device_info *device; + + if (priv->is_gmac) + device = gmac_setup(ioaddr); + else + device = mac100_setup(ioaddr); + + if (!device) + return -ENOMEM; + + priv->mac_type = device; + + priv->wolenabled = priv->mac_type->hw.pmt; /* PMT supported */ + if (priv->wolenabled == PMT_SUPPORTED) + priv->wolopts = WAKE_MAGIC; /* Magic Frame */ + + return 0; +} + +static int stmmacphy_dvr_probe(struct platform_device *pdev) +{ + struct plat_stmmacphy_data *plat_dat; + plat_dat = (struct plat_stmmacphy_data *)((pdev->dev).platform_data); + + pr_debug("stmmacphy_dvr_probe: added phy for bus %d\n", + plat_dat->bus_id); + + return 0; +} + +static int stmmacphy_dvr_remove(struct platform_device *pdev) +{ + return 0; +} + +static struct platform_driver stmmacphy_driver = { + .driver = { + .name = PHY_RESOURCE_NAME, + }, + .probe = stmmacphy_dvr_probe, + .remove = stmmacphy_dvr_remove, +}; + +/** + * stmmac_associate_phy + * @dev: pointer to device structure + * @data: points to the private structure. + * Description: Scans through all the PHYs we have registered and checks if + * any are associated with our MAC. If so, then just fill in + * the blanks in our local context structure + */ +static int stmmac_associate_phy(struct device *dev, void *data) +{ + struct stmmac_priv *priv = (struct stmmac_priv *)data; + struct plat_stmmacphy_data *plat_dat; + + plat_dat = (struct plat_stmmacphy_data *)(dev->platform_data); + + DBG(probe, DEBUG, "%s: checking phy for bus %d\n", __func__, + plat_dat->bus_id); + + /* Check that this phy is for the MAC being initialised */ + if (priv->bus_id != plat_dat->bus_id) + return 0; + + /* OK, this PHY is connected to the MAC. + Go ahead and get the parameters */ + DBG(probe, DEBUG, "%s: OK. Found PHY config\n", __func__); + priv->phy_irq = + platform_get_irq_byname(to_platform_device(dev), "phyirq"); + DBG(probe, DEBUG, "%s: PHY irq on bus %d is %d\n", __func__, + plat_dat->bus_id, priv->phy_irq); + + /* Override with kernel parameters if supplied XXX CRS XXX + * this needs to have multiple instances */ + if ((phyaddr >= 0) && (phyaddr <= 31)) + plat_dat->phy_addr = phyaddr; + + priv->phy_addr = plat_dat->phy_addr; + priv->phy_mask = plat_dat->phy_mask; + priv->phy_interface = plat_dat->interface; + priv->phy_reset = plat_dat->phy_reset; + + DBG(probe, DEBUG, "%s: exiting\n", __func__); + return 1; /* forces exit of driver_for_each_device() */ +} + +/** + * stmmac_dvr_probe + * @pdev: platform device pointer + * Description: the driver is initialized through platform_device. + */ +static int stmmac_dvr_probe(struct platform_device *pdev) +{ + int ret = 0; + struct resource *res; + unsigned int *addr = NULL; + struct net_device *ndev = NULL; + struct stmmac_priv *priv; + struct plat_stmmacenet_data *plat_dat; + + pr_info("STMMAC driver:\n\tplatform registration... "); + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) { + ret = -ENODEV; + goto out; + } + pr_info("done!\n"); + + if (!request_mem_region(res->start, (res->end - res->start), + pdev->name)) { + pr_err("%s: ERROR: memory allocation failed" + "cannot get the I/O addr 0x%x\n", + __func__, (unsigned int)res->start); + ret = -EBUSY; + goto out; + } + + addr = ioremap(res->start, (res->end - res->start)); + if (!addr) { + pr_err("%s: ERROR: memory mapping failed \n", __func__); + ret = -ENOMEM; + goto out; + } + + ndev = alloc_etherdev(sizeof(struct stmmac_priv)); + if (!ndev) { + pr_err("%s: ERROR: allocating the device\n", __func__); + ret = -ENOMEM; + goto out; + } + + SET_NETDEV_DEV(ndev, &pdev->dev); + + /* Get the MAC information */ + ndev->irq = platform_get_irq_byname(pdev, "macirq"); + if (ndev->irq == -ENXIO) { + pr_err("%s: ERROR: MAC IRQ configuration " + "information not found\n", __func__); + ret = -ENODEV; + goto out; + } + + priv = netdev_priv(ndev); + priv->device = &(pdev->dev); + priv->dev = ndev; + plat_dat = (struct plat_stmmacenet_data *)((pdev->dev).platform_data); + priv->bus_id = plat_dat->bus_id; + priv->pbl = plat_dat->pbl; /* TLI */ + priv->is_gmac = plat_dat->has_gmac; /* GMAC is on board */ + + platform_set_drvdata(pdev, ndev); + + /* Set the I/O base addr */ + ndev->base_addr = (unsigned long)addr; + + /* MAC HW revice detection */ + ret = stmmac_mac_device_setup(ndev); + if (ret < 0) + goto out; + + /* Network Device Registration */ + ret = stmmac_probe(ndev); + if (ret < 0) + goto out; + + /* associate a PHY - it is provided by another platform bus */ + if (!driver_for_each_device + (&(stmmacphy_driver.driver), NULL, (void *)priv, + stmmac_associate_phy)) { + pr_err("No PHY device is associated with this MAC!\n"); + ret = -ENODEV; + goto out; + } + + priv->fix_mac_speed = plat_dat->fix_mac_speed; + priv->bsp_priv = plat_dat->bsp_priv; + + pr_info("\t%s - (dev. name: %s - id: %d, IRQ #%d\n" + "\tIO base addr: 0x%08x)\n", ndev->name, pdev->name, + pdev->id, ndev->irq, (unsigned int)addr); + + /* MDIO bus Registration */ + pr_debug("\tMDIO bus (id: %d)...", priv->bus_id); + ret = stmmac_mdio_register(ndev); + if (ret < 0) + goto out; + pr_debug("registered!\n"); + +out: + if (ret < 0) { + platform_set_drvdata(pdev, NULL); + release_mem_region(res->start, (res->end - res->start)); + if (addr != NULL) + iounmap(addr); + } + + return ret; +} + +/** + * stmmac_dvr_remove + * @pdev: platform device pointer + * Description: this function resets the TX/RX processes, disables the MAC RX/TX + * changes the link status, releases the DMA descriptor rings, + * unregisters the MDIO bus and unmaps the allocated memory. + */ +static int stmmac_dvr_remove(struct platform_device *pdev) +{ + struct net_device *ndev = platform_get_drvdata(pdev); + struct resource *res; + + pr_info("%s:\n\tremoving driver", __func__); + + stmmac_dma_stop_rx(ndev->base_addr); + stmmac_dma_stop_tx(ndev->base_addr); + + stmmac_mac_disable_rx(ndev->base_addr); + stmmac_mac_disable_tx(ndev->base_addr); + + netif_carrier_off(ndev); + + stmmac_mdio_unregister(ndev); + + platform_set_drvdata(pdev, NULL); + unregister_netdev(ndev); + + iounmap((void *)ndev->base_addr); + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + release_mem_region(res->start, (res->end - res->start)); + + free_netdev(ndev); + + return 0; +} + +#ifdef CONFIG_PM +static int stmmac_suspend(struct platform_device *pdev, pm_message_t state) +{ + struct net_device *dev = platform_get_drvdata(pdev); + struct stmmac_priv *priv = netdev_priv(dev); + int dis_ic = 0; + + if (!dev || !netif_running(dev)) + return 0; + + spin_lock(&priv->lock); + + if (state.event == PM_EVENT_SUSPEND) { + netif_device_detach(dev); + netif_stop_queue(dev); + if (priv->phydev) + phy_stop(priv->phydev); + +#ifdef CONFIG_STMMAC_TIMER + priv->tm->timer_stop(); + dis_ic = 1; +#endif + napi_disable(&priv->napi); + + /* Stop TX/RX DMA */ + stmmac_dma_stop_tx(dev->base_addr); + stmmac_dma_stop_rx(dev->base_addr); + /* Clear the Rx/Tx descriptors */ + priv->mac_type->ops->init_rx_desc(priv->dma_rx, + priv->dma_rx_size, dis_ic); + priv->mac_type->ops->init_tx_desc(priv->dma_tx, + priv->dma_tx_size); + + stmmac_mac_disable_tx(dev->base_addr); + + if (device_may_wakeup(&(pdev->dev))) { + /* Enable Power down mode by programming the PMT regs */ + if (priv->wolenabled == PMT_SUPPORTED) + priv->mac_type->ops->pmt(dev->base_addr, + priv->wolopts); + } else { + stmmac_mac_disable_rx(dev->base_addr); + } + } else { + priv->shutdown = 1; + /* Although this can appear slightly redundant it actually + * makes fast the standby operation and guarantees the driver + * working if hibernation is on media. */ + stmmac_release(dev); + } + + spin_unlock(&priv->lock); + return 0; +} + +static int stmmac_resume(struct platform_device *pdev) +{ + struct net_device *dev = platform_get_drvdata(pdev); + struct stmmac_priv *priv = netdev_priv(dev); + unsigned long ioaddr = dev->base_addr; + + if (!netif_running(dev)) + return 0; + + spin_lock(&priv->lock); + + if (priv->shutdown) { + /* Re-open the interface and re-init the MAC/DMA + and the rings. */ + stmmac_open(dev); + goto out_resume; + } + + /* Power Down bit, into the PM register, is cleared + * automatically as soon as a magic packet or a Wake-up frame + * is received. Anyway, it's better to manually clear + * this bit because it can generate problems while resuming + * from another devices (e.g. serial console). */ + if (device_may_wakeup(&(pdev->dev))) + if (priv->wolenabled == PMT_SUPPORTED) + priv->mac_type->ops->pmt(dev->base_addr, 0); + + netif_device_attach(dev); + + /* Enable the MAC and DMA */ + stmmac_mac_enable_rx(ioaddr); + stmmac_mac_enable_tx(ioaddr); + stmmac_dma_start_tx(ioaddr); + stmmac_dma_start_rx(ioaddr); + +#ifdef CONFIG_STMMAC_TIMER + priv->tm->timer_start(tmrate); +#endif + napi_enable(&priv->napi); + + if (priv->phydev) + phy_start(priv->phydev); + + netif_start_queue(dev); + +out_resume: + spin_unlock(&priv->lock); + return 0; +} +#endif + +static struct platform_driver stmmac_driver = { + .driver = { + .name = STMMAC_RESOURCE_NAME, + }, + .probe = stmmac_dvr_probe, + .remove = stmmac_dvr_remove, +#ifdef CONFIG_PM + .suspend = stmmac_suspend, + .resume = stmmac_resume, +#endif + +}; + +/** + * stmmac_init_module - Entry point for the driver + * Description: This function is the entry point for the driver. + */ +static int __init stmmac_init_module(void) +{ + int ret; + + if (platform_driver_register(&stmmacphy_driver)) { + pr_err("No PHY devices registered!\n"); + return -ENODEV; + } + + ret = platform_driver_register(&stmmac_driver); + return ret; +} + +/** + * stmmac_cleanup_module - Cleanup routine for the driver + * Description: This function is the cleanup routine for the driver. + */ +static void __exit stmmac_cleanup_module(void) +{ + platform_driver_unregister(&stmmacphy_driver); + platform_driver_unregister(&stmmac_driver); +} + +#ifndef MODULE +static int __init stmmac_cmdline_opt(char *str) +{ + char *opt; + + if (!str || !*str) + return -EINVAL; + while ((opt = strsep(&str, ",")) != NULL) { + if (!strncmp(opt, "debug:", 6)) + strict_strtoul(opt + 6, 0, (unsigned long *)&debug); + else if (!strncmp(opt, "phyaddr:", 8)) + strict_strtoul(opt + 8, 0, (unsigned long *)&phyaddr); + else if (!strncmp(opt, "dma_txsize:", 11)) + strict_strtoul(opt + 11, 0, + (unsigned long *)&dma_txsize); + else if (!strncmp(opt, "dma_rxsize:", 11)) + strict_strtoul(opt + 11, 0, + (unsigned long *)&dma_rxsize); + else if (!strncmp(opt, "buf_sz:", 7)) + strict_strtoul(opt + 7, 0, (unsigned long *)&buf_sz); + else if (!strncmp(opt, "tc:", 3)) + strict_strtoul(opt + 3, 0, (unsigned long *)&tc); + else if (!strncmp(opt, "tx_coe:", 7)) + strict_strtoul(opt + 7, 0, (unsigned long *)&tx_coe); + else if (!strncmp(opt, "watchdog:", 9)) + strict_strtoul(opt + 9, 0, (unsigned long *)&watchdog); + else if (!strncmp(opt, "flow_ctrl:", 10)) + strict_strtoul(opt + 10, 0, + (unsigned long *)&flow_ctrl); + else if (!strncmp(opt, "pause:", 6)) + strict_strtoul(opt + 6, 0, (unsigned long *)&pause); +#ifdef CONFIG_STMMAC_TIMER + else if (!strncmp(opt, "tmrate:", 7)) + strict_strtoul(opt + 7, 0, (unsigned long *)&tmrate); +#endif + } + return 0; +} + +__setup("stmmaceth=", stmmac_cmdline_opt); +#endif + +module_init(stmmac_init_module); +module_exit(stmmac_cleanup_module); + +MODULE_DESCRIPTION("STMMAC 10/100/1000 Ethernet driver"); +MODULE_AUTHOR("Giuseppe Cavallaro <peppe.cavallaro@st.com>"); +MODULE_LICENSE("GPL"); diff --git a/drivers/net/stmmac/stmmac_mdio.c b/drivers/net/stmmac/stmmac_mdio.c new file mode 100644 index 0000000..8498552 --- /dev/null +++ b/drivers/net/stmmac/stmmac_mdio.c @@ -0,0 +1,217 @@ +/******************************************************************************* + STMMAC Ethernet Driver -- MDIO bus implementation + Provides Bus interface for MII registers + + Copyright (C) 2007-2009 STMicroelectronics Ltd + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Author: Carl Shaw <carl.shaw@st.com> + Maintainer: Giuseppe Cavallaro <peppe.cavallaro@st.com> +*******************************************************************************/ + +#include <linux/netdevice.h> +#include <linux/mii.h> +#include <linux/phy.h> + +#include "stmmac.h" + +#define MII_BUSY 0x00000001 +#define MII_WRITE 0x00000002 + +/** + * stmmac_mdio_read + * @bus: points to the mii_bus structure + * @phyaddr: MII addr reg bits 15-11 + * @phyreg: MII addr reg bits 10-6 + * Description: it reads data from the MII register from within the phy device. + * For the 7111 GMAC, we must set the bit 0 in the MII address register while + * accessing the PHY registers. + * Fortunately, it seems this has no drawback for the 7109 MAC. + */ +static int stmmac_mdio_read(struct mii_bus *bus, int phyaddr, int phyreg) +{ + struct net_device *ndev = bus->priv; + struct stmmac_priv *priv = netdev_priv(ndev); + unsigned long ioaddr = ndev->base_addr; + unsigned int mii_address = priv->mac_type->hw.mii.addr; + unsigned int mii_data = priv->mac_type->hw.mii.data; + + int data; + u16 regValue = (((phyaddr << 11) & (0x0000F800)) | + ((phyreg << 6) & (0x000007C0))); + regValue |= MII_BUSY; /* in case of GMAC */ + + do {} while (((readl(ioaddr + mii_address)) & MII_BUSY) == 1); + writel(regValue, ioaddr + mii_address); + do {} while (((readl(ioaddr + mii_address)) & MII_BUSY) == 1); + + /* Read the data from the MII data register */ + data = (int)readl(ioaddr + mii_data); + + return data; +} + +/** + * stmmac_mdio_write + * @bus: points to the mii_bus structure + * @phyaddr: MII addr reg bits 15-11 + * @phyreg: MII addr reg bits 10-6 + * @phydata: phy data + * Description: it writes the data into the MII register from within the device. + */ +static int stmmac_mdio_write(struct mii_bus *bus, int phyaddr, int phyreg, + u16 phydata) +{ + struct net_device *ndev = bus->priv; + struct stmmac_priv *priv = netdev_priv(ndev); + unsigned long ioaddr = ndev->base_addr; + unsigned int mii_address = priv->mac_type->hw.mii.addr; + unsigned int mii_data = priv->mac_type->hw.mii.data; + + u16 value = + (((phyaddr << 11) & (0x0000F800)) | ((phyreg << 6) & (0x000007C0))) + | MII_WRITE; + + value |= MII_BUSY; + + /* Wait until any existing MII operation is complete */ + do {} while (((readl(ioaddr + mii_address)) & MII_BUSY) == 1); + + /* Set the MII address register to write */ + writel(phydata, ioaddr + mii_data); + writel(value, ioaddr + mii_address); + + /* Wait until any existing MII operation is complete */ + do {} while (((readl(ioaddr + mii_address)) & MII_BUSY) == 1); + + return 0; +} + +/** + * stmmac_mdio_reset + * @bus: points to the mii_bus structure + * Description: reset the MII bus + */ +static int stmmac_mdio_reset(struct mii_bus *bus) +{ + struct net_device *ndev = bus->priv; + struct stmmac_priv *priv = netdev_priv(ndev); + unsigned long ioaddr = ndev->base_addr; + unsigned int mii_address = priv->mac_type->hw.mii.addr; + + if (priv->phy_reset) { + pr_debug("stmmac_mdio_reset: calling phy_reset\n"); + priv->phy_reset(priv->bsp_priv); + } + + /* This is a workaround for problems with the STE101P PHY. + * It doesn't complete its reset until at least one clock cycle + * on MDC, so perform a dummy mdio read. + */ + writel(0, ioaddr + mii_address); + + return 0; +} + +/** + * stmmac_mdio_register + * @ndev: net device structure + * Description: it registers the MII bus + */ +int stmmac_mdio_register(struct net_device *ndev) +{ + int err = 0; + struct mii_bus *new_bus; + int *irqlist; + struct stmmac_priv *priv = netdev_priv(ndev); + int addr, found; + + new_bus = mdiobus_alloc(); + if (new_bus == NULL) + return -ENOMEM; + + irqlist = kzalloc(sizeof(int) * PHY_MAX_ADDR, GFP_KERNEL); + if (irqlist == NULL) { + err = -ENOMEM; + goto irqlist_alloc_fail; + } + + /* Assign IRQ to phy at address phy_addr */ + if (priv->phy_addr != -1) + irqlist[priv->phy_addr] = priv->phy_irq; + + new_bus->name = "STMMAC MII Bus"; + new_bus->read = &stmmac_mdio_read; + new_bus->write = &stmmac_mdio_write; + new_bus->reset = &stmmac_mdio_reset; + snprintf(new_bus->id, MII_BUS_ID_SIZE, "%x", priv->bus_id); + new_bus->priv = ndev; + new_bus->irq = irqlist; + new_bus->phy_mask = priv->phy_mask; + new_bus->parent = priv->device; + err = mdiobus_register(new_bus); + if (err != 0) { + pr_err("%s: Cannot register as MDIO bus\n", new_bus->name); + goto bus_register_fail; + } + + priv->mii = new_bus; + + found = 0; + for (addr = 0; addr < 32; addr++) { + struct phy_device *phydev = new_bus->phy_map[addr]; + if (phydev) { + if (priv->phy_addr == -1) { + priv->phy_addr = addr; + phydev->irq = priv->phy_irq; + irqlist[addr] = priv->phy_irq; + } + pr_info("%s: PHY ID %08x at %d IRQ %d (%s)%s\n", + ndev->name, phydev->phy_id, addr, + phydev->irq, dev_name(&phydev->dev), + (addr == priv->phy_addr) ? " active" : ""); + found = 1; + } + } + + if (!found) + pr_warning("%s: No PHY found\n", ndev->name); + + return 0; +bus_register_fail: + kfree(irqlist); +irqlist_alloc_fail: + kfree(new_bus); + return err; +} + +/** + * stmmac_mdio_unregister + * @ndev: net device structure + * Description: it unregisters the MII bus + */ +int stmmac_mdio_unregister(struct net_device *ndev) +{ + struct stmmac_priv *priv = netdev_priv(ndev); + + mdiobus_unregister(priv->mii); + priv->mii->priv = NULL; + kfree(priv->mii); + + return 0; +} diff --git a/drivers/net/stmmac/stmmac_timer.c b/drivers/net/stmmac/stmmac_timer.c new file mode 100644 index 0000000..b838c65 --- /dev/null +++ b/drivers/net/stmmac/stmmac_timer.c @@ -0,0 +1,140 @@ +/******************************************************************************* + STMMAC external timer support. + + Copyright (C) 2007-2009 STMicroelectronics Ltd + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Author: Giuseppe Cavallaro <peppe.cavallaro@st.com> +*******************************************************************************/ + +#include <linux/kernel.h> +#include <linux/etherdevice.h> +#include "stmmac_timer.h" + +static void stmmac_timer_handler(void *data) +{ + struct net_device *dev = (struct net_device *)data; + + stmmac_schedule(dev); + + return; +} + +#define STMMAC_TIMER_MSG(timer, freq) \ +printk(KERN_INFO "stmmac_timer: %s Timer ON (freq %dHz)\n", timer, freq); + +#if defined(CONFIG_STMMAC_RTC_TIMER) +#include <linux/rtc.h> +static struct rtc_device *stmmac_rtc; +static rtc_task_t stmmac_task; + +static void stmmac_rtc_start(unsigned int new_freq) +{ + rtc_irq_set_freq(stmmac_rtc, &stmmac_task, new_freq); + rtc_irq_set_state(stmmac_rtc, &stmmac_task, 1); + return; +} + +static void stmmac_rtc_stop(void) +{ + rtc_irq_set_state(stmmac_rtc, &stmmac_task, 0); + return; +} + +int stmmac_open_ext_timer(struct net_device *dev, struct stmmac_timer *tm) +{ + stmmac_task.private_data = dev; + stmmac_task.func = stmmac_timer_handler; + + stmmac_rtc = rtc_class_open(CONFIG_RTC_HCTOSYS_DEVICE); + if (stmmac_rtc == NULL) { + pr_error("open rtc device failed\n"); + return -ENODEV; + } + + rtc_irq_register(stmmac_rtc, &stmmac_task); + + /* Periodic mode is not supported */ + if ((rtc_irq_set_freq(stmmac_rtc, &stmmac_task, tm->freq) < 0)) { + pr_error("set periodic failed\n"); + rtc_irq_unregister(stmmac_rtc, &stmmac_task); + rtc_class_close(stmmac_rtc); + return -1; + } + + STMMAC_TIMER_MSG(CONFIG_RTC_HCTOSYS_DEVICE, tm->freq); + + tm->timer_start = stmmac_rtc_start; + tm->timer_stop = stmmac_rtc_stop; + + return 0; +} + +int stmmac_close_ext_timer(void) +{ + rtc_irq_set_state(stmmac_rtc, &stmmac_task, 0); + rtc_irq_unregister(stmmac_rtc, &stmmac_task); + rtc_class_close(stmmac_rtc); + return 0; +} + +#elif defined(CONFIG_STMMAC_TMU_TIMER) +#include <linux/clk.h> +#define TMU_CHANNEL "tmu2_clk" +static struct clk *timer_clock; + +static void stmmac_tmu_start(unsigned int new_freq) +{ + clk_set_rate(timer_clock, new_freq); + clk_enable(timer_clock); + return; +} + +static void stmmac_tmu_stop(void) +{ + clk_disable(timer_clock); + return; +} + +int stmmac_open_ext_timer(struct net_device *dev, struct stmmac_timer *tm) +{ + timer_clock = clk_get(NULL, TMU_CHANNEL); + + if (timer_clock == NULL) + return -1; + + if (tmu2_register_user(stmmac_timer_handler, (void *)dev) < 0) { + timer_clock = NULL; + return -1; + } + + STMMAC_TIMER_MSG("TMU2", tm->freq); + tm->timer_start = stmmac_tmu_start; + tm->timer_stop = stmmac_tmu_stop; + + return 0; +} + +int stmmac_close_ext_timer(void) +{ + clk_disable(timer_clock); + tmu2_unregister_user(); + clk_put(timer_clock); + return 0; +} +#endif diff --git a/drivers/net/stmmac/stmmac_timer.h b/drivers/net/stmmac/stmmac_timer.h new file mode 100644 index 0000000..f795cae --- /dev/null +++ b/drivers/net/stmmac/stmmac_timer.h @@ -0,0 +1,41 @@ +/******************************************************************************* + STMMAC external timer Header File. + + Copyright (C) 2007-2009 STMicroelectronics Ltd + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Author: Giuseppe Cavallaro <peppe.cavallaro@st.com> +*******************************************************************************/ + +struct stmmac_timer { + void (*timer_start) (unsigned int new_freq); + void (*timer_stop) (void); + unsigned int freq; +}; + +/* Open the HW timer device and return 0 in case of success */ +int stmmac_open_ext_timer(struct net_device *dev, struct stmmac_timer *tm); +/* Stop the timer and release it */ +int stmmac_close_ext_timer(void); +/* Function used for scheduling task within the stmmac */ +void stmmac_schedule(struct net_device *dev); + +#if defined(CONFIG_STMMAC_TMU_TIMER) +extern int tmu2_register_user(void *fnt, void *data); +extern void tmu2_unregister_user(void); +#endif diff --git a/drivers/net/usb/pegasus.c b/drivers/net/usb/pegasus.c index 6fdaba8..ed4a508 100644 --- a/drivers/net/usb/pegasus.c +++ b/drivers/net/usb/pegasus.c @@ -62,8 +62,11 @@ static char *devid=NULL; static struct usb_eth_dev usb_dev_id[] = { #define PEGASUS_DEV(pn, vid, pid, flags) \ {.name = pn, .vendor = vid, .device = pid, .private = flags}, +#define PEGASUS_DEV_CLASS(pn, vid, pid, dclass, flags) \ + PEGASUS_DEV(pn, vid, pid, flags) #include "pegasus.h" #undef PEGASUS_DEV +#undef PEGASUS_DEV_CLASS {NULL, 0, 0, 0}, {NULL, 0, 0, 0} }; @@ -71,8 +74,18 @@ static struct usb_eth_dev usb_dev_id[] = { static struct usb_device_id pegasus_ids[] = { #define PEGASUS_DEV(pn, vid, pid, flags) \ {.match_flags = USB_DEVICE_ID_MATCH_DEVICE, .idVendor = vid, .idProduct = pid}, +/* + * The Belkin F8T012xx1 bluetooth adaptor has the same vendor and product + * IDs as the Belkin F5D5050, so we need to teach the pegasus driver to + * ignore adaptors belonging to the "Wireless" class 0xE0. For this one + * case anyway, seeing as the pegasus is for "Wired" adaptors. + */ +#define PEGASUS_DEV_CLASS(pn, vid, pid, dclass, flags) \ + {.match_flags = (USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_DEV_CLASS), \ + .idVendor = vid, .idProduct = pid, .bDeviceClass = dclass}, #include "pegasus.h" #undef PEGASUS_DEV +#undef PEGASUS_DEV_CLASS {}, {} }; diff --git a/drivers/net/usb/pegasus.h b/drivers/net/usb/pegasus.h index f968c83..5d02f02 100644 --- a/drivers/net/usb/pegasus.h +++ b/drivers/net/usb/pegasus.h @@ -202,7 +202,11 @@ PEGASUS_DEV( "AEI USB Fast Ethernet Adapter", VENDOR_AEILAB, 0x1701, DEFAULT_GPIO_RESET | PEGASUS_II ) PEGASUS_DEV( "Allied Telesyn Int. AT-USB100", VENDOR_ALLIEDTEL, 0xb100, DEFAULT_GPIO_RESET | PEGASUS_II ) -PEGASUS_DEV( "Belkin F5D5050 USB Ethernet", VENDOR_BELKIN, 0x0121, +/* + * Distinguish between this Belkin adaptor and the Belkin bluetooth adaptors + * with the same product IDs by checking the device class too. + */ +PEGASUS_DEV_CLASS( "Belkin F5D5050 USB Ethernet", VENDOR_BELKIN, 0x0121, 0x00, DEFAULT_GPIO_RESET | PEGASUS_II ) PEGASUS_DEV( "Billionton USB-100", VENDOR_BILLIONTON, 0x0986, DEFAULT_GPIO_RESET ) diff --git a/drivers/net/vmxnet3/Makefile b/drivers/net/vmxnet3/Makefile new file mode 100644 index 0000000..880f509 --- /dev/null +++ b/drivers/net/vmxnet3/Makefile @@ -0,0 +1,35 @@ +################################################################################ +# +# Linux driver for VMware's vmxnet3 ethernet NIC. +# +# Copyright (C) 2007-2009, VMware, Inc. All Rights Reserved. +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the +# Free Software Foundation; version 2 of the License and no later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or +# NON INFRINGEMENT. See the GNU General Public License for more +# details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +# +# The full GNU General Public License is included in this distribution in +# the file called "COPYING". +# +# Maintained by: Shreyas Bhatewara <pv-drivers@vmware.com> +# +# +################################################################################ + +# +# Makefile for the VMware vmxnet3 ethernet NIC driver +# + +obj-$(CONFIG_VMXNET3) += vmxnet3.o + +vmxnet3-objs := vmxnet3_drv.o vmxnet3_ethtool.o diff --git a/drivers/net/vmxnet3/upt1_defs.h b/drivers/net/vmxnet3/upt1_defs.h new file mode 100644 index 0000000..37108fb --- /dev/null +++ b/drivers/net/vmxnet3/upt1_defs.h @@ -0,0 +1,96 @@ +/* + * Linux driver for VMware's vmxnet3 ethernet NIC. + * + * Copyright (C) 2008-2009, VMware, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; version 2 of the License and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * Maintained by: Shreyas Bhatewara <pv-drivers@vmware.com> + * + */ + +#ifndef _UPT1_DEFS_H +#define _UPT1_DEFS_H + +struct UPT1_TxStats { + u64 TSOPktsTxOK; /* TSO pkts post-segmentation */ + u64 TSOBytesTxOK; + u64 ucastPktsTxOK; + u64 ucastBytesTxOK; + u64 mcastPktsTxOK; + u64 mcastBytesTxOK; + u64 bcastPktsTxOK; + u64 bcastBytesTxOK; + u64 pktsTxError; + u64 pktsTxDiscard; +}; + +struct UPT1_RxStats { + u64 LROPktsRxOK; /* LRO pkts */ + u64 LROBytesRxOK; /* bytes from LRO pkts */ + /* the following counters are for pkts from the wire, i.e., pre-LRO */ + u64 ucastPktsRxOK; + u64 ucastBytesRxOK; + u64 mcastPktsRxOK; + u64 mcastBytesRxOK; + u64 bcastPktsRxOK; + u64 bcastBytesRxOK; + u64 pktsRxOutOfBuf; + u64 pktsRxError; +}; + +/* interrupt moderation level */ +enum { + UPT1_IML_NONE = 0, /* no interrupt moderation */ + UPT1_IML_HIGHEST = 7, /* least intr generated */ + UPT1_IML_ADAPTIVE = 8, /* adpative intr moderation */ +}; +/* values for UPT1_RSSConf.hashFunc */ +enum { + UPT1_RSS_HASH_TYPE_NONE = 0x0, + UPT1_RSS_HASH_TYPE_IPV4 = 0x01, + UPT1_RSS_HASH_TYPE_TCP_IPV4 = 0x02, + UPT1_RSS_HASH_TYPE_IPV6 = 0x04, + UPT1_RSS_HASH_TYPE_TCP_IPV6 = 0x08, +}; + +enum { + UPT1_RSS_HASH_FUNC_NONE = 0x0, + UPT1_RSS_HASH_FUNC_TOEPLITZ = 0x01, +}; + +#define UPT1_RSS_MAX_KEY_SIZE 40 +#define UPT1_RSS_MAX_IND_TABLE_SIZE 128 + +struct UPT1_RSSConf { + u16 hashType; + u16 hashFunc; + u16 hashKeySize; + u16 indTableSize; + u8 hashKey[UPT1_RSS_MAX_KEY_SIZE]; + u8 indTable[UPT1_RSS_MAX_IND_TABLE_SIZE]; +}; + +/* features */ +enum { + UPT1_F_RXCSUM = 0x0001, /* rx csum verification */ + UPT1_F_RSS = 0x0002, + UPT1_F_RXVLAN = 0x0004, /* VLAN tag stripping */ + UPT1_F_LRO = 0x0008, +}; +#endif diff --git a/drivers/net/vmxnet3/vmxnet3_defs.h b/drivers/net/vmxnet3/vmxnet3_defs.h new file mode 100644 index 0000000..dc8ee44 --- /dev/null +++ b/drivers/net/vmxnet3/vmxnet3_defs.h @@ -0,0 +1,535 @@ +/* + * Linux driver for VMware's vmxnet3 ethernet NIC. + * + * Copyright (C) 2008-2009, VMware, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; version 2 of the License and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * Maintained by: Shreyas Bhatewara <pv-drivers@vmware.com> + * + */ + +#ifndef _VMXNET3_DEFS_H_ +#define _VMXNET3_DEFS_H_ + +#include "upt1_defs.h" + +/* all registers are 32 bit wide */ +/* BAR 1 */ +enum { + VMXNET3_REG_VRRS = 0x0, /* Vmxnet3 Revision Report Selection */ + VMXNET3_REG_UVRS = 0x8, /* UPT Version Report Selection */ + VMXNET3_REG_DSAL = 0x10, /* Driver Shared Address Low */ + VMXNET3_REG_DSAH = 0x18, /* Driver Shared Address High */ + VMXNET3_REG_CMD = 0x20, /* Command */ + VMXNET3_REG_MACL = 0x28, /* MAC Address Low */ + VMXNET3_REG_MACH = 0x30, /* MAC Address High */ + VMXNET3_REG_ICR = 0x38, /* Interrupt Cause Register */ + VMXNET3_REG_ECR = 0x40 /* Event Cause Register */ +}; + +/* BAR 0 */ +enum { + VMXNET3_REG_IMR = 0x0, /* Interrupt Mask Register */ + VMXNET3_REG_TXPROD = 0x600, /* Tx Producer Index */ + VMXNET3_REG_RXPROD = 0x800, /* Rx Producer Index for ring 1 */ + VMXNET3_REG_RXPROD2 = 0xA00 /* Rx Producer Index for ring 2 */ +}; + +#define VMXNET3_PT_REG_SIZE 4096 /* BAR 0 */ +#define VMXNET3_VD_REG_SIZE 4096 /* BAR 1 */ + +#define VMXNET3_REG_ALIGN 8 /* All registers are 8-byte aligned. */ +#define VMXNET3_REG_ALIGN_MASK 0x7 + +/* I/O Mapped access to registers */ +#define VMXNET3_IO_TYPE_PT 0 +#define VMXNET3_IO_TYPE_VD 1 +#define VMXNET3_IO_ADDR(type, reg) (((type) << 24) | ((reg) & 0xFFFFFF)) +#define VMXNET3_IO_TYPE(addr) ((addr) >> 24) +#define VMXNET3_IO_REG(addr) ((addr) & 0xFFFFFF) + +enum { + VMXNET3_CMD_FIRST_SET = 0xCAFE0000, + VMXNET3_CMD_ACTIVATE_DEV = VMXNET3_CMD_FIRST_SET, + VMXNET3_CMD_QUIESCE_DEV, + VMXNET3_CMD_RESET_DEV, + VMXNET3_CMD_UPDATE_RX_MODE, + VMXNET3_CMD_UPDATE_MAC_FILTERS, + VMXNET3_CMD_UPDATE_VLAN_FILTERS, + VMXNET3_CMD_UPDATE_RSSIDT, + VMXNET3_CMD_UPDATE_IML, + VMXNET3_CMD_UPDATE_PMCFG, + VMXNET3_CMD_UPDATE_FEATURE, + VMXNET3_CMD_LOAD_PLUGIN, + + VMXNET3_CMD_FIRST_GET = 0xF00D0000, + VMXNET3_CMD_GET_QUEUE_STATUS = VMXNET3_CMD_FIRST_GET, + VMXNET3_CMD_GET_STATS, + VMXNET3_CMD_GET_LINK, + VMXNET3_CMD_GET_PERM_MAC_LO, + VMXNET3_CMD_GET_PERM_MAC_HI, + VMXNET3_CMD_GET_DID_LO, + VMXNET3_CMD_GET_DID_HI, + VMXNET3_CMD_GET_DEV_EXTRA_INFO, + VMXNET3_CMD_GET_CONF_INTR +}; + +struct Vmxnet3_TxDesc { + u64 addr; + + u32 len:14; + u32 gen:1; /* generation bit */ + u32 rsvd:1; + u32 dtype:1; /* descriptor type */ + u32 ext1:1; + u32 msscof:14; /* MSS, checksum offset, flags */ + + u32 hlen:10; /* header len */ + u32 om:2; /* offload mode */ + u32 eop:1; /* End Of Packet */ + u32 cq:1; /* completion request */ + u32 ext2:1; + u32 ti:1; /* VLAN Tag Insertion */ + u32 tci:16; /* Tag to Insert */ +}; + +/* TxDesc.OM values */ +#define VMXNET3_OM_NONE 0 +#define VMXNET3_OM_CSUM 2 +#define VMXNET3_OM_TSO 3 + +/* fields in TxDesc we access w/o using bit fields */ +#define VMXNET3_TXD_EOP_SHIFT 12 +#define VMXNET3_TXD_CQ_SHIFT 13 +#define VMXNET3_TXD_GEN_SHIFT 14 + +#define VMXNET3_TXD_CQ (1 << VMXNET3_TXD_CQ_SHIFT) +#define VMXNET3_TXD_EOP (1 << VMXNET3_TXD_EOP_SHIFT) +#define VMXNET3_TXD_GEN (1 << VMXNET3_TXD_GEN_SHIFT) + +#define VMXNET3_HDR_COPY_SIZE 128 + + +struct Vmxnet3_TxDataDesc { + u8 data[VMXNET3_HDR_COPY_SIZE]; +}; + + +struct Vmxnet3_TxCompDesc { + u32 txdIdx:12; /* Index of the EOP TxDesc */ + u32 ext1:20; + + u32 ext2; + u32 ext3; + + u32 rsvd:24; + u32 type:7; /* completion type */ + u32 gen:1; /* generation bit */ +}; + + +struct Vmxnet3_RxDesc { + u64 addr; + + u32 len:14; + u32 btype:1; /* Buffer Type */ + u32 dtype:1; /* Descriptor type */ + u32 rsvd:15; + u32 gen:1; /* Generation bit */ + + u32 ext1; +}; + +/* values of RXD.BTYPE */ +#define VMXNET3_RXD_BTYPE_HEAD 0 /* head only */ +#define VMXNET3_RXD_BTYPE_BODY 1 /* body only */ + +/* fields in RxDesc we access w/o using bit fields */ +#define VMXNET3_RXD_BTYPE_SHIFT 14 +#define VMXNET3_RXD_GEN_SHIFT 31 + + +struct Vmxnet3_RxCompDesc { + u32 rxdIdx:12; /* Index of the RxDesc */ + u32 ext1:2; + u32 eop:1; /* End of Packet */ + u32 sop:1; /* Start of Packet */ + u32 rqID:10; /* rx queue/ring ID */ + u32 rssType:4; /* RSS hash type used */ + u32 cnc:1; /* Checksum Not Calculated */ + u32 ext2:1; + + u32 rssHash; /* RSS hash value */ + + u32 len:14; /* data length */ + u32 err:1; /* Error */ + u32 ts:1; /* Tag is stripped */ + u32 tci:16; /* Tag stripped */ + + u32 csum:16; + u32 tuc:1; /* TCP/UDP Checksum Correct */ + u32 udp:1; /* UDP packet */ + u32 tcp:1; /* TCP packet */ + u32 ipc:1; /* IP Checksum Correct */ + u32 v6:1; /* IPv6 */ + u32 v4:1; /* IPv4 */ + u32 frg:1; /* IP Fragment */ + u32 fcs:1; /* Frame CRC correct */ + u32 type:7; /* completion type */ + u32 gen:1; /* generation bit */ +}; + +/* fields in RxCompDesc we access via Vmxnet3_GenericDesc.dword[3] */ +#define VMXNET3_RCD_TUC_SHIFT 16 +#define VMXNET3_RCD_IPC_SHIFT 19 + +/* fields in RxCompDesc we access via Vmxnet3_GenericDesc.qword[1] */ +#define VMXNET3_RCD_TYPE_SHIFT 56 +#define VMXNET3_RCD_GEN_SHIFT 63 + +/* csum OK for TCP/UDP pkts over IP */ +#define VMXNET3_RCD_CSUM_OK (1 << VMXNET3_RCD_TUC_SHIFT | \ + 1 << VMXNET3_RCD_IPC_SHIFT) + +/* value of RxCompDesc.rssType */ +enum { + VMXNET3_RCD_RSS_TYPE_NONE = 0, + VMXNET3_RCD_RSS_TYPE_IPV4 = 1, + VMXNET3_RCD_RSS_TYPE_TCPIPV4 = 2, + VMXNET3_RCD_RSS_TYPE_IPV6 = 3, + VMXNET3_RCD_RSS_TYPE_TCPIPV6 = 4, +}; + + +/* a union for accessing all cmd/completion descriptors */ +union Vmxnet3_GenericDesc { + u64 qword[2]; + u32 dword[4]; + u16 word[8]; + struct Vmxnet3_TxDesc txd; + struct Vmxnet3_RxDesc rxd; + struct Vmxnet3_TxCompDesc tcd; + struct Vmxnet3_RxCompDesc rcd; +}; + +#define VMXNET3_INIT_GEN 1 + +/* Max size of a single tx buffer */ +#define VMXNET3_MAX_TX_BUF_SIZE (1 << 14) + +/* # of tx desc needed for a tx buffer size */ +#define VMXNET3_TXD_NEEDED(size) (((size) + VMXNET3_MAX_TX_BUF_SIZE - 1) / \ + VMXNET3_MAX_TX_BUF_SIZE) + +/* max # of tx descs for a non-tso pkt */ +#define VMXNET3_MAX_TXD_PER_PKT 16 + +/* Max size of a single rx buffer */ +#define VMXNET3_MAX_RX_BUF_SIZE ((1 << 14) - 1) +/* Minimum size of a type 0 buffer */ +#define VMXNET3_MIN_T0_BUF_SIZE 128 +#define VMXNET3_MAX_CSUM_OFFSET 1024 + +/* Ring base address alignment */ +#define VMXNET3_RING_BA_ALIGN 512 +#define VMXNET3_RING_BA_MASK (VMXNET3_RING_BA_ALIGN - 1) + +/* Ring size must be a multiple of 32 */ +#define VMXNET3_RING_SIZE_ALIGN 32 +#define VMXNET3_RING_SIZE_MASK (VMXNET3_RING_SIZE_ALIGN - 1) + +/* Max ring size */ +#define VMXNET3_TX_RING_MAX_SIZE 4096 +#define VMXNET3_TC_RING_MAX_SIZE 4096 +#define VMXNET3_RX_RING_MAX_SIZE 4096 +#define VMXNET3_RC_RING_MAX_SIZE 8192 + +/* a list of reasons for queue stop */ + +enum { + VMXNET3_ERR_NOEOP = 0x80000000, /* cannot find the EOP desc of a pkt */ + VMXNET3_ERR_TXD_REUSE = 0x80000001, /* reuse TxDesc before tx completion */ + VMXNET3_ERR_BIG_PKT = 0x80000002, /* too many TxDesc for a pkt */ + VMXNET3_ERR_DESC_NOT_SPT = 0x80000003, /* descriptor type not supported */ + VMXNET3_ERR_SMALL_BUF = 0x80000004, /* type 0 buffer too small */ + VMXNET3_ERR_STRESS = 0x80000005, /* stress option firing in vmkernel */ + VMXNET3_ERR_SWITCH = 0x80000006, /* mode switch failure */ + VMXNET3_ERR_TXD_INVALID = 0x80000007, /* invalid TxDesc */ +}; + +/* completion descriptor types */ +#define VMXNET3_CDTYPE_TXCOMP 0 /* Tx Completion Descriptor */ +#define VMXNET3_CDTYPE_RXCOMP 3 /* Rx Completion Descriptor */ + +enum { + VMXNET3_GOS_BITS_UNK = 0, /* unknown */ + VMXNET3_GOS_BITS_32 = 1, + VMXNET3_GOS_BITS_64 = 2, +}; + +#define VMXNET3_GOS_TYPE_LINUX 1 + + +struct Vmxnet3_GOSInfo { + u32 gosBits:2; /* 32-bit or 64-bit? */ + u32 gosType:4; /* which guest */ + u32 gosVer:16; /* gos version */ + u32 gosMisc:10; /* other info about gos */ +}; + + +struct Vmxnet3_DriverInfo { + u32 version; + struct Vmxnet3_GOSInfo gos; + u32 vmxnet3RevSpt; + u32 uptVerSpt; +}; + + +#define VMXNET3_REV1_MAGIC 0xbabefee1 + +/* + * QueueDescPA must be 128 bytes aligned. It points to an array of + * Vmxnet3_TxQueueDesc followed by an array of Vmxnet3_RxQueueDesc. + * The number of Vmxnet3_TxQueueDesc/Vmxnet3_RxQueueDesc are specified by + * Vmxnet3_MiscConf.numTxQueues/numRxQueues, respectively. + */ +#define VMXNET3_QUEUE_DESC_ALIGN 128 + + +struct Vmxnet3_MiscConf { + struct Vmxnet3_DriverInfo driverInfo; + u64 uptFeatures; + u64 ddPA; /* driver data PA */ + u64 queueDescPA; /* queue descriptor table PA */ + u32 ddLen; /* driver data len */ + u32 queueDescLen; /* queue desc. table len in bytes */ + u32 mtu; + u16 maxNumRxSG; + u8 numTxQueues; + u8 numRxQueues; + u32 reserved[4]; +}; + + +struct Vmxnet3_TxQueueConf { + u64 txRingBasePA; + u64 dataRingBasePA; + u64 compRingBasePA; + u64 ddPA; /* driver data */ + u64 reserved; + u32 txRingSize; /* # of tx desc */ + u32 dataRingSize; /* # of data desc */ + u32 compRingSize; /* # of comp desc */ + u32 ddLen; /* size of driver data */ + u8 intrIdx; + u8 _pad[7]; +}; + + +struct Vmxnet3_RxQueueConf { + u64 rxRingBasePA[2]; + u64 compRingBasePA; + u64 ddPA; /* driver data */ + u64 reserved; + u32 rxRingSize[2]; /* # of rx desc */ + u32 compRingSize; /* # of rx comp desc */ + u32 ddLen; /* size of driver data */ + u8 intrIdx; + u8 _pad[7]; +}; + + +enum vmxnet3_intr_mask_mode { + VMXNET3_IMM_AUTO = 0, + VMXNET3_IMM_ACTIVE = 1, + VMXNET3_IMM_LAZY = 2 +}; + +enum vmxnet3_intr_type { + VMXNET3_IT_AUTO = 0, + VMXNET3_IT_INTX = 1, + VMXNET3_IT_MSI = 2, + VMXNET3_IT_MSIX = 3 +}; + +#define VMXNET3_MAX_TX_QUEUES 8 +#define VMXNET3_MAX_RX_QUEUES 16 +/* addition 1 for events */ +#define VMXNET3_MAX_INTRS 25 + + +struct Vmxnet3_IntrConf { + bool autoMask; + u8 numIntrs; /* # of interrupts */ + u8 eventIntrIdx; + u8 modLevels[VMXNET3_MAX_INTRS]; /* moderation level for + * each intr */ + u32 reserved[3]; +}; + +/* one bit per VLAN ID, the size is in the units of u32 */ +#define VMXNET3_VFT_SIZE (4096 / (sizeof(u32) * 8)) + + +struct Vmxnet3_QueueStatus { + bool stopped; + u8 _pad[3]; + u32 error; +}; + + +struct Vmxnet3_TxQueueCtrl { + u32 txNumDeferred; + u32 txThreshold; + u64 reserved; +}; + + +struct Vmxnet3_RxQueueCtrl { + bool updateRxProd; + u8 _pad[7]; + u64 reserved; +}; + +enum { + VMXNET3_RXM_UCAST = 0x01, /* unicast only */ + VMXNET3_RXM_MCAST = 0x02, /* multicast passing the filters */ + VMXNET3_RXM_BCAST = 0x04, /* broadcast only */ + VMXNET3_RXM_ALL_MULTI = 0x08, /* all multicast */ + VMXNET3_RXM_PROMISC = 0x10 /* promiscuous */ +}; + +struct Vmxnet3_RxFilterConf { + u32 rxMode; /* VMXNET3_RXM_xxx */ + u16 mfTableLen; /* size of the multicast filter table */ + u16 _pad1; + u64 mfTablePA; /* PA of the multicast filters table */ + u32 vfTable[VMXNET3_VFT_SIZE]; /* vlan filter */ +}; + + +#define VMXNET3_PM_MAX_FILTERS 6 +#define VMXNET3_PM_MAX_PATTERN_SIZE 128 +#define VMXNET3_PM_MAX_MASK_SIZE (VMXNET3_PM_MAX_PATTERN_SIZE / 8) + +#define VMXNET3_PM_WAKEUP_MAGIC 0x01 /* wake up on magic pkts */ +#define VMXNET3_PM_WAKEUP_FILTER 0x02 /* wake up on pkts matching + * filters */ + + +struct Vmxnet3_PM_PktFilter { + u8 maskSize; + u8 patternSize; + u8 mask[VMXNET3_PM_MAX_MASK_SIZE]; + u8 pattern[VMXNET3_PM_MAX_PATTERN_SIZE]; + u8 pad[6]; +}; + + +struct Vmxnet3_PMConf { + u16 wakeUpEvents; /* VMXNET3_PM_WAKEUP_xxx */ + u8 numFilters; + u8 pad[5]; + struct Vmxnet3_PM_PktFilter filters[VMXNET3_PM_MAX_FILTERS]; +}; + + +struct Vmxnet3_VariableLenConfDesc { + u32 confVer; + u32 confLen; + u64 confPA; +}; + + +struct Vmxnet3_TxQueueDesc { + struct Vmxnet3_TxQueueCtrl ctrl; + struct Vmxnet3_TxQueueConf conf; + + /* Driver read after a GET command */ + struct Vmxnet3_QueueStatus status; + struct UPT1_TxStats stats; + u8 _pad[88]; /* 128 aligned */ +}; + + +struct Vmxnet3_RxQueueDesc { + struct Vmxnet3_RxQueueCtrl ctrl; + struct Vmxnet3_RxQueueConf conf; + /* Driver read after a GET commad */ + struct Vmxnet3_QueueStatus status; + struct UPT1_RxStats stats; + u8 __pad[88]; /* 128 aligned */ +}; + + +struct Vmxnet3_DSDevRead { + /* read-only region for device, read by dev in response to a SET cmd */ + struct Vmxnet3_MiscConf misc; + struct Vmxnet3_IntrConf intrConf; + struct Vmxnet3_RxFilterConf rxFilterConf; + struct Vmxnet3_VariableLenConfDesc rssConfDesc; + struct Vmxnet3_VariableLenConfDesc pmConfDesc; + struct Vmxnet3_VariableLenConfDesc pluginConfDesc; +}; + +/* All structures in DriverShared are padded to multiples of 8 bytes */ +struct Vmxnet3_DriverShared { + u32 magic; + /* make devRead start at 64bit boundaries */ + u32 pad; + struct Vmxnet3_DSDevRead devRead; + u32 ecr; + u32 reserved[5]; +}; + + +#define VMXNET3_ECR_RQERR (1 << 0) +#define VMXNET3_ECR_TQERR (1 << 1) +#define VMXNET3_ECR_LINK (1 << 2) +#define VMXNET3_ECR_DIC (1 << 3) +#define VMXNET3_ECR_DEBUG (1 << 4) + +/* flip the gen bit of a ring */ +#define VMXNET3_FLIP_RING_GEN(gen) ((gen) = (gen) ^ 0x1) + +/* only use this if moving the idx won't affect the gen bit */ +#define VMXNET3_INC_RING_IDX_ONLY(idx, ring_size) \ + do {\ + (idx)++;\ + if (unlikely((idx) == (ring_size))) {\ + (idx) = 0;\ + } \ + } while (0) + +#define VMXNET3_SET_VFTABLE_ENTRY(vfTable, vid) \ + (vfTable[vid >> 5] |= (1 << (vid & 31))) +#define VMXNET3_CLEAR_VFTABLE_ENTRY(vfTable, vid) \ + (vfTable[vid >> 5] &= ~(1 << (vid & 31))) + +#define VMXNET3_VFTABLE_ENTRY_IS_SET(vfTable, vid) \ + ((vfTable[vid >> 5] & (1 << (vid & 31))) != 0) + +#define VMXNET3_MAX_MTU 9000 +#define VMXNET3_MIN_MTU 60 + +#define VMXNET3_LINK_UP (10000 << 16 | 1) /* 10 Gbps, up */ +#define VMXNET3_LINK_DOWN 0 + +#endif /* _VMXNET3_DEFS_H_ */ diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c new file mode 100644 index 0000000..6a16f76 --- /dev/null +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -0,0 +1,2565 @@ +/* + * Linux driver for VMware's vmxnet3 ethernet NIC. + * + * Copyright (C) 2008-2009, VMware, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; version 2 of the License and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * Maintained by: Shreyas Bhatewara <pv-drivers@vmware.com> + * + */ + +#include "vmxnet3_int.h" + +char vmxnet3_driver_name[] = "vmxnet3"; +#define VMXNET3_DRIVER_DESC "VMware vmxnet3 virtual NIC driver" + + +/* + * PCI Device ID Table + * Last entry must be all 0s + */ +static const struct pci_device_id vmxnet3_pciid_table[] = { + {PCI_VDEVICE(VMWARE, PCI_DEVICE_ID_VMWARE_VMXNET3)}, + {0} +}; + +MODULE_DEVICE_TABLE(pci, vmxnet3_pciid_table); + +static atomic_t devices_found; + + +/* + * Enable/Disable the given intr + */ +static void +vmxnet3_enable_intr(struct vmxnet3_adapter *adapter, unsigned intr_idx) +{ + VMXNET3_WRITE_BAR0_REG(adapter, VMXNET3_REG_IMR + intr_idx * 8, 0); +} + + +static void +vmxnet3_disable_intr(struct vmxnet3_adapter *adapter, unsigned intr_idx) +{ + VMXNET3_WRITE_BAR0_REG(adapter, VMXNET3_REG_IMR + intr_idx * 8, 1); +} + + +/* + * Enable/Disable all intrs used by the device + */ +static void +vmxnet3_enable_all_intrs(struct vmxnet3_adapter *adapter) +{ + int i; + + for (i = 0; i < adapter->intr.num_intrs; i++) + vmxnet3_enable_intr(adapter, i); +} + + +static void +vmxnet3_disable_all_intrs(struct vmxnet3_adapter *adapter) +{ + int i; + + for (i = 0; i < adapter->intr.num_intrs; i++) + vmxnet3_disable_intr(adapter, i); +} + + +static void +vmxnet3_ack_events(struct vmxnet3_adapter *adapter, u32 events) +{ + VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_ECR, events); +} + + +static bool +vmxnet3_tq_stopped(struct vmxnet3_tx_queue *tq, struct vmxnet3_adapter *adapter) +{ + return netif_queue_stopped(adapter->netdev); +} + + +static void +vmxnet3_tq_start(struct vmxnet3_tx_queue *tq, struct vmxnet3_adapter *adapter) +{ + tq->stopped = false; + netif_start_queue(adapter->netdev); +} + + +static void +vmxnet3_tq_wake(struct vmxnet3_tx_queue *tq, struct vmxnet3_adapter *adapter) +{ + tq->stopped = false; + netif_wake_queue(adapter->netdev); +} + + +static void +vmxnet3_tq_stop(struct vmxnet3_tx_queue *tq, struct vmxnet3_adapter *adapter) +{ + tq->stopped = true; + tq->num_stop++; + netif_stop_queue(adapter->netdev); +} + + +/* + * Check the link state. This may start or stop the tx queue. + */ +static void +vmxnet3_check_link(struct vmxnet3_adapter *adapter) +{ + u32 ret; + + VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, VMXNET3_CMD_GET_LINK); + ret = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_CMD); + adapter->link_speed = ret >> 16; + if (ret & 1) { /* Link is up. */ + printk(KERN_INFO "%s: NIC Link is Up %d Mbps\n", + adapter->netdev->name, adapter->link_speed); + if (!netif_carrier_ok(adapter->netdev)) + netif_carrier_on(adapter->netdev); + + vmxnet3_tq_start(&adapter->tx_queue, adapter); + } else { + printk(KERN_INFO "%s: NIC Link is Down\n", + adapter->netdev->name); + if (netif_carrier_ok(adapter->netdev)) + netif_carrier_off(adapter->netdev); + + vmxnet3_tq_stop(&adapter->tx_queue, adapter); + } +} + + +static void +vmxnet3_process_events(struct vmxnet3_adapter *adapter) +{ + u32 events = adapter->shared->ecr; + if (!events) + return; + + vmxnet3_ack_events(adapter, events); + + /* Check if link state has changed */ + if (events & VMXNET3_ECR_LINK) + vmxnet3_check_link(adapter); + + /* Check if there is an error on xmit/recv queues */ + if (events & (VMXNET3_ECR_TQERR | VMXNET3_ECR_RQERR)) { + VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, + VMXNET3_CMD_GET_QUEUE_STATUS); + + if (adapter->tqd_start->status.stopped) { + printk(KERN_ERR "%s: tq error 0x%x\n", + adapter->netdev->name, + adapter->tqd_start->status.error); + } + if (adapter->rqd_start->status.stopped) { + printk(KERN_ERR "%s: rq error 0x%x\n", + adapter->netdev->name, + adapter->rqd_start->status.error); + } + + schedule_work(&adapter->work); + } +} + + +static void +vmxnet3_unmap_tx_buf(struct vmxnet3_tx_buf_info *tbi, + struct pci_dev *pdev) +{ + if (tbi->map_type == VMXNET3_MAP_SINGLE) + pci_unmap_single(pdev, tbi->dma_addr, tbi->len, + PCI_DMA_TODEVICE); + else if (tbi->map_type == VMXNET3_MAP_PAGE) + pci_unmap_page(pdev, tbi->dma_addr, tbi->len, + PCI_DMA_TODEVICE); + else + BUG_ON(tbi->map_type != VMXNET3_MAP_NONE); + + tbi->map_type = VMXNET3_MAP_NONE; /* to help debugging */ +} + + +static int +vmxnet3_unmap_pkt(u32 eop_idx, struct vmxnet3_tx_queue *tq, + struct pci_dev *pdev, struct vmxnet3_adapter *adapter) +{ + struct sk_buff *skb; + int entries = 0; + + /* no out of order completion */ + BUG_ON(tq->buf_info[eop_idx].sop_idx != tq->tx_ring.next2comp); + BUG_ON(tq->tx_ring.base[eop_idx].txd.eop != 1); + + skb = tq->buf_info[eop_idx].skb; + BUG_ON(skb == NULL); + tq->buf_info[eop_idx].skb = NULL; + + VMXNET3_INC_RING_IDX_ONLY(eop_idx, tq->tx_ring.size); + + while (tq->tx_ring.next2comp != eop_idx) { + vmxnet3_unmap_tx_buf(tq->buf_info + tq->tx_ring.next2comp, + pdev); + + /* update next2comp w/o tx_lock. Since we are marking more, + * instead of less, tx ring entries avail, the worst case is + * that the tx routine incorrectly re-queues a pkt due to + * insufficient tx ring entries. + */ + vmxnet3_cmd_ring_adv_next2comp(&tq->tx_ring); + entries++; + } + + dev_kfree_skb_any(skb); + return entries; +} + + +static int +vmxnet3_tq_tx_complete(struct vmxnet3_tx_queue *tq, + struct vmxnet3_adapter *adapter) +{ + int completed = 0; + union Vmxnet3_GenericDesc *gdesc; + + gdesc = tq->comp_ring.base + tq->comp_ring.next2proc; + while (gdesc->tcd.gen == tq->comp_ring.gen) { + completed += vmxnet3_unmap_pkt(gdesc->tcd.txdIdx, tq, + adapter->pdev, adapter); + + vmxnet3_comp_ring_adv_next2proc(&tq->comp_ring); + gdesc = tq->comp_ring.base + tq->comp_ring.next2proc; + } + + if (completed) { + spin_lock(&tq->tx_lock); + if (unlikely(vmxnet3_tq_stopped(tq, adapter) && + vmxnet3_cmd_ring_desc_avail(&tq->tx_ring) > + VMXNET3_WAKE_QUEUE_THRESHOLD(tq) && + netif_carrier_ok(adapter->netdev))) { + vmxnet3_tq_wake(tq, adapter); + } + spin_unlock(&tq->tx_lock); + } + return completed; +} + + +static void +vmxnet3_tq_cleanup(struct vmxnet3_tx_queue *tq, + struct vmxnet3_adapter *adapter) +{ + int i; + + while (tq->tx_ring.next2comp != tq->tx_ring.next2fill) { + struct vmxnet3_tx_buf_info *tbi; + union Vmxnet3_GenericDesc *gdesc; + + tbi = tq->buf_info + tq->tx_ring.next2comp; + gdesc = tq->tx_ring.base + tq->tx_ring.next2comp; + + vmxnet3_unmap_tx_buf(tbi, adapter->pdev); + if (tbi->skb) { + dev_kfree_skb_any(tbi->skb); + tbi->skb = NULL; + } + vmxnet3_cmd_ring_adv_next2comp(&tq->tx_ring); + } + + /* sanity check, verify all buffers are indeed unmapped and freed */ + for (i = 0; i < tq->tx_ring.size; i++) { + BUG_ON(tq->buf_info[i].skb != NULL || + tq->buf_info[i].map_type != VMXNET3_MAP_NONE); + } + + tq->tx_ring.gen = VMXNET3_INIT_GEN; + tq->tx_ring.next2fill = tq->tx_ring.next2comp = 0; + + tq->comp_ring.gen = VMXNET3_INIT_GEN; + tq->comp_ring.next2proc = 0; +} + + +void +vmxnet3_tq_destroy(struct vmxnet3_tx_queue *tq, + struct vmxnet3_adapter *adapter) +{ + if (tq->tx_ring.base) { + pci_free_consistent(adapter->pdev, tq->tx_ring.size * + sizeof(struct Vmxnet3_TxDesc), + tq->tx_ring.base, tq->tx_ring.basePA); + tq->tx_ring.base = NULL; + } + if (tq->data_ring.base) { + pci_free_consistent(adapter->pdev, tq->data_ring.size * + sizeof(struct Vmxnet3_TxDataDesc), + tq->data_ring.base, tq->data_ring.basePA); + tq->data_ring.base = NULL; + } + if (tq->comp_ring.base) { + pci_free_consistent(adapter->pdev, tq->comp_ring.size * + sizeof(struct Vmxnet3_TxCompDesc), + tq->comp_ring.base, tq->comp_ring.basePA); + tq->comp_ring.base = NULL; + } + kfree(tq->buf_info); + tq->buf_info = NULL; +} + + +static void +vmxnet3_tq_init(struct vmxnet3_tx_queue *tq, + struct vmxnet3_adapter *adapter) +{ + int i; + + /* reset the tx ring contents to 0 and reset the tx ring states */ + memset(tq->tx_ring.base, 0, tq->tx_ring.size * + sizeof(struct Vmxnet3_TxDesc)); + tq->tx_ring.next2fill = tq->tx_ring.next2comp = 0; + tq->tx_ring.gen = VMXNET3_INIT_GEN; + + memset(tq->data_ring.base, 0, tq->data_ring.size * + sizeof(struct Vmxnet3_TxDataDesc)); + + /* reset the tx comp ring contents to 0 and reset comp ring states */ + memset(tq->comp_ring.base, 0, tq->comp_ring.size * + sizeof(struct Vmxnet3_TxCompDesc)); + tq->comp_ring.next2proc = 0; + tq->comp_ring.gen = VMXNET3_INIT_GEN; + + /* reset the bookkeeping data */ + memset(tq->buf_info, 0, sizeof(tq->buf_info[0]) * tq->tx_ring.size); + for (i = 0; i < tq->tx_ring.size; i++) + tq->buf_info[i].map_type = VMXNET3_MAP_NONE; + + /* stats are not reset */ +} + + +static int +vmxnet3_tq_create(struct vmxnet3_tx_queue *tq, + struct vmxnet3_adapter *adapter) +{ + BUG_ON(tq->tx_ring.base || tq->data_ring.base || + tq->comp_ring.base || tq->buf_info); + + tq->tx_ring.base = pci_alloc_consistent(adapter->pdev, tq->tx_ring.size + * sizeof(struct Vmxnet3_TxDesc), + &tq->tx_ring.basePA); + if (!tq->tx_ring.base) { + printk(KERN_ERR "%s: failed to allocate tx ring\n", + adapter->netdev->name); + goto err; + } + + tq->data_ring.base = pci_alloc_consistent(adapter->pdev, + tq->data_ring.size * + sizeof(struct Vmxnet3_TxDataDesc), + &tq->data_ring.basePA); + if (!tq->data_ring.base) { + printk(KERN_ERR "%s: failed to allocate data ring\n", + adapter->netdev->name); + goto err; + } + + tq->comp_ring.base = pci_alloc_consistent(adapter->pdev, + tq->comp_ring.size * + sizeof(struct Vmxnet3_TxCompDesc), + &tq->comp_ring.basePA); + if (!tq->comp_ring.base) { + printk(KERN_ERR "%s: failed to allocate tx comp ring\n", + adapter->netdev->name); + goto err; + } + + tq->buf_info = kcalloc(tq->tx_ring.size, sizeof(tq->buf_info[0]), + GFP_KERNEL); + if (!tq->buf_info) { + printk(KERN_ERR "%s: failed to allocate tx bufinfo\n", + adapter->netdev->name); + goto err; + } + + return 0; + +err: + vmxnet3_tq_destroy(tq, adapter); + return -ENOMEM; +} + + +/* + * starting from ring->next2fill, allocate rx buffers for the given ring + * of the rx queue and update the rx desc. stop after @num_to_alloc buffers + * are allocated or allocation fails + */ + +static int +vmxnet3_rq_alloc_rx_buf(struct vmxnet3_rx_queue *rq, u32 ring_idx, + int num_to_alloc, struct vmxnet3_adapter *adapter) +{ + int num_allocated = 0; + struct vmxnet3_rx_buf_info *rbi_base = rq->buf_info[ring_idx]; + struct vmxnet3_cmd_ring *ring = &rq->rx_ring[ring_idx]; + u32 val; + + while (num_allocated < num_to_alloc) { + struct vmxnet3_rx_buf_info *rbi; + union Vmxnet3_GenericDesc *gd; + + rbi = rbi_base + ring->next2fill; + gd = ring->base + ring->next2fill; + + if (rbi->buf_type == VMXNET3_RX_BUF_SKB) { + if (rbi->skb == NULL) { + rbi->skb = dev_alloc_skb(rbi->len + + NET_IP_ALIGN); + if (unlikely(rbi->skb == NULL)) { + rq->stats.rx_buf_alloc_failure++; + break; + } + rbi->skb->dev = adapter->netdev; + + skb_reserve(rbi->skb, NET_IP_ALIGN); + rbi->dma_addr = pci_map_single(adapter->pdev, + rbi->skb->data, rbi->len, + PCI_DMA_FROMDEVICE); + } else { + /* rx buffer skipped by the device */ + } + val = VMXNET3_RXD_BTYPE_HEAD << VMXNET3_RXD_BTYPE_SHIFT; + } else { + BUG_ON(rbi->buf_type != VMXNET3_RX_BUF_PAGE || + rbi->len != PAGE_SIZE); + + if (rbi->page == NULL) { + rbi->page = alloc_page(GFP_ATOMIC); + if (unlikely(rbi->page == NULL)) { + rq->stats.rx_buf_alloc_failure++; + break; + } + rbi->dma_addr = pci_map_page(adapter->pdev, + rbi->page, 0, PAGE_SIZE, + PCI_DMA_FROMDEVICE); + } else { + /* rx buffers skipped by the device */ + } + val = VMXNET3_RXD_BTYPE_BODY << VMXNET3_RXD_BTYPE_SHIFT; + } + + BUG_ON(rbi->dma_addr == 0); + gd->rxd.addr = rbi->dma_addr; + gd->dword[2] = (ring->gen << VMXNET3_RXD_GEN_SHIFT) | val | + rbi->len; + + num_allocated++; + vmxnet3_cmd_ring_adv_next2fill(ring); + } + rq->uncommitted[ring_idx] += num_allocated; + + dprintk(KERN_ERR "alloc_rx_buf: %d allocated, next2fill %u, next2comp " + "%u, uncommited %u\n", num_allocated, ring->next2fill, + ring->next2comp, rq->uncommitted[ring_idx]); + + /* so that the device can distinguish a full ring and an empty ring */ + BUG_ON(num_allocated != 0 && ring->next2fill == ring->next2comp); + + return num_allocated; +} + + +static void +vmxnet3_append_frag(struct sk_buff *skb, struct Vmxnet3_RxCompDesc *rcd, + struct vmxnet3_rx_buf_info *rbi) +{ + struct skb_frag_struct *frag = skb_shinfo(skb)->frags + + skb_shinfo(skb)->nr_frags; + + BUG_ON(skb_shinfo(skb)->nr_frags >= MAX_SKB_FRAGS); + + frag->page = rbi->page; + frag->page_offset = 0; + frag->size = rcd->len; + skb->data_len += frag->size; + skb_shinfo(skb)->nr_frags++; +} + + +static void +vmxnet3_map_pkt(struct sk_buff *skb, struct vmxnet3_tx_ctx *ctx, + struct vmxnet3_tx_queue *tq, struct pci_dev *pdev, + struct vmxnet3_adapter *adapter) +{ + u32 dw2, len; + unsigned long buf_offset; + int i; + union Vmxnet3_GenericDesc *gdesc; + struct vmxnet3_tx_buf_info *tbi = NULL; + + BUG_ON(ctx->copy_size > skb_headlen(skb)); + + /* use the previous gen bit for the SOP desc */ + dw2 = (tq->tx_ring.gen ^ 0x1) << VMXNET3_TXD_GEN_SHIFT; + + ctx->sop_txd = tq->tx_ring.base + tq->tx_ring.next2fill; + gdesc = ctx->sop_txd; /* both loops below can be skipped */ + + /* no need to map the buffer if headers are copied */ + if (ctx->copy_size) { + ctx->sop_txd->txd.addr = tq->data_ring.basePA + + tq->tx_ring.next2fill * + sizeof(struct Vmxnet3_TxDataDesc); + ctx->sop_txd->dword[2] = dw2 | ctx->copy_size; + ctx->sop_txd->dword[3] = 0; + + tbi = tq->buf_info + tq->tx_ring.next2fill; + tbi->map_type = VMXNET3_MAP_NONE; + + dprintk(KERN_ERR "txd[%u]: 0x%Lx 0x%x 0x%x\n", + tq->tx_ring.next2fill, ctx->sop_txd->txd.addr, + ctx->sop_txd->dword[2], ctx->sop_txd->dword[3]); + vmxnet3_cmd_ring_adv_next2fill(&tq->tx_ring); + + /* use the right gen for non-SOP desc */ + dw2 = tq->tx_ring.gen << VMXNET3_TXD_GEN_SHIFT; + } + + /* linear part can use multiple tx desc if it's big */ + len = skb_headlen(skb) - ctx->copy_size; + buf_offset = ctx->copy_size; + while (len) { + u32 buf_size; + + buf_size = len > VMXNET3_MAX_TX_BUF_SIZE ? + VMXNET3_MAX_TX_BUF_SIZE : len; + + tbi = tq->buf_info + tq->tx_ring.next2fill; + tbi->map_type = VMXNET3_MAP_SINGLE; + tbi->dma_addr = pci_map_single(adapter->pdev, + skb->data + buf_offset, buf_size, + PCI_DMA_TODEVICE); + + tbi->len = buf_size; /* this automatically convert 2^14 to 0 */ + + gdesc = tq->tx_ring.base + tq->tx_ring.next2fill; + BUG_ON(gdesc->txd.gen == tq->tx_ring.gen); + + gdesc->txd.addr = tbi->dma_addr; + gdesc->dword[2] = dw2 | buf_size; + gdesc->dword[3] = 0; + + dprintk(KERN_ERR "txd[%u]: 0x%Lx 0x%x 0x%x\n", + tq->tx_ring.next2fill, gdesc->txd.addr, + gdesc->dword[2], gdesc->dword[3]); + vmxnet3_cmd_ring_adv_next2fill(&tq->tx_ring); + dw2 = tq->tx_ring.gen << VMXNET3_TXD_GEN_SHIFT; + + len -= buf_size; + buf_offset += buf_size; + } + + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { + struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i]; + + tbi = tq->buf_info + tq->tx_ring.next2fill; + tbi->map_type = VMXNET3_MAP_PAGE; + tbi->dma_addr = pci_map_page(adapter->pdev, frag->page, + frag->page_offset, frag->size, + PCI_DMA_TODEVICE); + + tbi->len = frag->size; + + gdesc = tq->tx_ring.base + tq->tx_ring.next2fill; + BUG_ON(gdesc->txd.gen == tq->tx_ring.gen); + + gdesc->txd.addr = tbi->dma_addr; + gdesc->dword[2] = dw2 | frag->size; + gdesc->dword[3] = 0; + + dprintk(KERN_ERR "txd[%u]: 0x%llu %u %u\n", + tq->tx_ring.next2fill, gdesc->txd.addr, + gdesc->dword[2], gdesc->dword[3]); + vmxnet3_cmd_ring_adv_next2fill(&tq->tx_ring); + dw2 = tq->tx_ring.gen << VMXNET3_TXD_GEN_SHIFT; + } + + ctx->eop_txd = gdesc; + + /* set the last buf_info for the pkt */ + tbi->skb = skb; + tbi->sop_idx = ctx->sop_txd - tq->tx_ring.base; +} + + +/* + * parse and copy relevant protocol headers: + * For a tso pkt, relevant headers are L2/3/4 including options + * For a pkt requesting csum offloading, they are L2/3 and may include L4 + * if it's a TCP/UDP pkt + * + * Returns: + * -1: error happens during parsing + * 0: protocol headers parsed, but too big to be copied + * 1: protocol headers parsed and copied + * + * Other effects: + * 1. related *ctx fields are updated. + * 2. ctx->copy_size is # of bytes copied + * 3. the portion copied is guaranteed to be in the linear part + * + */ +static int +vmxnet3_parse_and_copy_hdr(struct sk_buff *skb, struct vmxnet3_tx_queue *tq, + struct vmxnet3_tx_ctx *ctx, + struct vmxnet3_adapter *adapter) +{ + struct Vmxnet3_TxDataDesc *tdd; + + if (ctx->mss) { + ctx->eth_ip_hdr_size = skb_transport_offset(skb); + ctx->l4_hdr_size = ((struct tcphdr *) + skb_transport_header(skb))->doff * 4; + ctx->copy_size = ctx->eth_ip_hdr_size + ctx->l4_hdr_size; + } else { + unsigned int pull_size; + + if (skb->ip_summed == CHECKSUM_PARTIAL) { + ctx->eth_ip_hdr_size = skb_transport_offset(skb); + + if (ctx->ipv4) { + struct iphdr *iph = (struct iphdr *) + skb_network_header(skb); + if (iph->protocol == IPPROTO_TCP) { + pull_size = ctx->eth_ip_hdr_size + + sizeof(struct tcphdr); + + if (unlikely(!pskb_may_pull(skb, + pull_size))) { + goto err; + } + ctx->l4_hdr_size = ((struct tcphdr *) + skb_transport_header(skb))->doff * 4; + } else if (iph->protocol == IPPROTO_UDP) { + ctx->l4_hdr_size = + sizeof(struct udphdr); + } else { + ctx->l4_hdr_size = 0; + } + } else { + /* for simplicity, don't copy L4 headers */ + ctx->l4_hdr_size = 0; + } + ctx->copy_size = ctx->eth_ip_hdr_size + + ctx->l4_hdr_size; + } else { + ctx->eth_ip_hdr_size = 0; + ctx->l4_hdr_size = 0; + /* copy as much as allowed */ + ctx->copy_size = min((unsigned int)VMXNET3_HDR_COPY_SIZE + , skb_headlen(skb)); + } + + /* make sure headers are accessible directly */ + if (unlikely(!pskb_may_pull(skb, ctx->copy_size))) + goto err; + } + + if (unlikely(ctx->copy_size > VMXNET3_HDR_COPY_SIZE)) { + tq->stats.oversized_hdr++; + ctx->copy_size = 0; + return 0; + } + + tdd = tq->data_ring.base + tq->tx_ring.next2fill; + + memcpy(tdd->data, skb->data, ctx->copy_size); + dprintk(KERN_ERR "copy %u bytes to dataRing[%u]\n", + ctx->copy_size, tq->tx_ring.next2fill); + return 1; + +err: + return -1; +} + + +static void +vmxnet3_prepare_tso(struct sk_buff *skb, + struct vmxnet3_tx_ctx *ctx) +{ + struct tcphdr *tcph = (struct tcphdr *)skb_transport_header(skb); + if (ctx->ipv4) { + struct iphdr *iph = (struct iphdr *)skb_network_header(skb); + iph->check = 0; + tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, 0, + IPPROTO_TCP, 0); + } else { + struct ipv6hdr *iph = (struct ipv6hdr *)skb_network_header(skb); + tcph->check = ~csum_ipv6_magic(&iph->saddr, &iph->daddr, 0, + IPPROTO_TCP, 0); + } +} + + +/* + * Transmits a pkt thru a given tq + * Returns: + * NETDEV_TX_OK: descriptors are setup successfully + * NETDEV_TX_OK: error occured, the pkt is dropped + * NETDEV_TX_BUSY: tx ring is full, queue is stopped + * + * Side-effects: + * 1. tx ring may be changed + * 2. tq stats may be updated accordingly + * 3. shared->txNumDeferred may be updated + */ + +static int +vmxnet3_tq_xmit(struct sk_buff *skb, struct vmxnet3_tx_queue *tq, + struct vmxnet3_adapter *adapter, struct net_device *netdev) +{ + int ret; + u32 count; + unsigned long flags; + struct vmxnet3_tx_ctx ctx; + union Vmxnet3_GenericDesc *gdesc; + + /* conservatively estimate # of descriptors to use */ + count = VMXNET3_TXD_NEEDED(skb_headlen(skb)) + + skb_shinfo(skb)->nr_frags + 1; + + ctx.ipv4 = (skb->protocol == __constant_ntohs(ETH_P_IP)); + + ctx.mss = skb_shinfo(skb)->gso_size; + if (ctx.mss) { + if (skb_header_cloned(skb)) { + if (unlikely(pskb_expand_head(skb, 0, 0, + GFP_ATOMIC) != 0)) { + tq->stats.drop_tso++; + goto drop_pkt; + } + tq->stats.copy_skb_header++; + } + vmxnet3_prepare_tso(skb, &ctx); + } else { + if (unlikely(count > VMXNET3_MAX_TXD_PER_PKT)) { + + /* non-tso pkts must not use more than + * VMXNET3_MAX_TXD_PER_PKT entries + */ + if (skb_linearize(skb) != 0) { + tq->stats.drop_too_many_frags++; + goto drop_pkt; + } + tq->stats.linearized++; + + /* recalculate the # of descriptors to use */ + count = VMXNET3_TXD_NEEDED(skb_headlen(skb)) + 1; + } + } + + ret = vmxnet3_parse_and_copy_hdr(skb, tq, &ctx, adapter); + if (ret >= 0) { + BUG_ON(ret <= 0 && ctx.copy_size != 0); + /* hdrs parsed, check against other limits */ + if (ctx.mss) { + if (unlikely(ctx.eth_ip_hdr_size + ctx.l4_hdr_size > + VMXNET3_MAX_TX_BUF_SIZE)) { + goto hdr_too_big; + } + } else { + if (skb->ip_summed == CHECKSUM_PARTIAL) { + if (unlikely(ctx.eth_ip_hdr_size + + skb->csum_offset > + VMXNET3_MAX_CSUM_OFFSET)) { + goto hdr_too_big; + } + } + } + } else { + tq->stats.drop_hdr_inspect_err++; + goto drop_pkt; + } + + spin_lock_irqsave(&tq->tx_lock, flags); + + if (count > vmxnet3_cmd_ring_desc_avail(&tq->tx_ring)) { + tq->stats.tx_ring_full++; + dprintk(KERN_ERR "tx queue stopped on %s, next2comp %u" + " next2fill %u\n", adapter->netdev->name, + tq->tx_ring.next2comp, tq->tx_ring.next2fill); + + vmxnet3_tq_stop(tq, adapter); + spin_unlock_irqrestore(&tq->tx_lock, flags); + return NETDEV_TX_BUSY; + } + + /* fill tx descs related to addr & len */ + vmxnet3_map_pkt(skb, &ctx, tq, adapter->pdev, adapter); + + /* setup the EOP desc */ + ctx.eop_txd->dword[3] = VMXNET3_TXD_CQ | VMXNET3_TXD_EOP; + + /* setup the SOP desc */ + gdesc = ctx.sop_txd; + if (ctx.mss) { + gdesc->txd.hlen = ctx.eth_ip_hdr_size + ctx.l4_hdr_size; + gdesc->txd.om = VMXNET3_OM_TSO; + gdesc->txd.msscof = ctx.mss; + tq->shared->txNumDeferred += (skb->len - gdesc->txd.hlen + + ctx.mss - 1) / ctx.mss; + } else { + if (skb->ip_summed == CHECKSUM_PARTIAL) { + gdesc->txd.hlen = ctx.eth_ip_hdr_size; + gdesc->txd.om = VMXNET3_OM_CSUM; + gdesc->txd.msscof = ctx.eth_ip_hdr_size + + skb->csum_offset; + } else { + gdesc->txd.om = 0; + gdesc->txd.msscof = 0; + } + tq->shared->txNumDeferred++; + } + + if (vlan_tx_tag_present(skb)) { + gdesc->txd.ti = 1; + gdesc->txd.tci = vlan_tx_tag_get(skb); + } + + wmb(); + + /* finally flips the GEN bit of the SOP desc */ + gdesc->dword[2] ^= VMXNET3_TXD_GEN; + dprintk(KERN_ERR "txd[%u]: SOP 0x%Lx 0x%x 0x%x\n", + (u32)((union Vmxnet3_GenericDesc *)ctx.sop_txd - + tq->tx_ring.base), gdesc->txd.addr, gdesc->dword[2], + gdesc->dword[3]); + + spin_unlock_irqrestore(&tq->tx_lock, flags); + + if (tq->shared->txNumDeferred >= tq->shared->txThreshold) { + tq->shared->txNumDeferred = 0; + VMXNET3_WRITE_BAR0_REG(adapter, VMXNET3_REG_TXPROD, + tq->tx_ring.next2fill); + } + netdev->trans_start = jiffies; + + return NETDEV_TX_OK; + +hdr_too_big: + tq->stats.drop_oversized_hdr++; +drop_pkt: + tq->stats.drop_total++; + dev_kfree_skb(skb); + return NETDEV_TX_OK; +} + + +static netdev_tx_t +vmxnet3_xmit_frame(struct sk_buff *skb, struct net_device *netdev) +{ + struct vmxnet3_adapter *adapter = netdev_priv(netdev); + struct vmxnet3_tx_queue *tq = &adapter->tx_queue; + + return vmxnet3_tq_xmit(skb, tq, adapter, netdev); +} + + +static void +vmxnet3_rx_csum(struct vmxnet3_adapter *adapter, + struct sk_buff *skb, + union Vmxnet3_GenericDesc *gdesc) +{ + if (!gdesc->rcd.cnc && adapter->rxcsum) { + /* typical case: TCP/UDP over IP and both csums are correct */ + if ((gdesc->dword[3] & VMXNET3_RCD_CSUM_OK) == + VMXNET3_RCD_CSUM_OK) { + skb->ip_summed = CHECKSUM_UNNECESSARY; + BUG_ON(!(gdesc->rcd.tcp || gdesc->rcd.udp)); + BUG_ON(!(gdesc->rcd.v4 || gdesc->rcd.v6)); + BUG_ON(gdesc->rcd.frg); + } else { + if (gdesc->rcd.csum) { + skb->csum = htons(gdesc->rcd.csum); + skb->ip_summed = CHECKSUM_PARTIAL; + } else { + skb->ip_summed = CHECKSUM_NONE; + } + } + } else { + skb->ip_summed = CHECKSUM_NONE; + } +} + + +static void +vmxnet3_rx_error(struct vmxnet3_rx_queue *rq, struct Vmxnet3_RxCompDesc *rcd, + struct vmxnet3_rx_ctx *ctx, struct vmxnet3_adapter *adapter) +{ + rq->stats.drop_err++; + if (!rcd->fcs) + rq->stats.drop_fcs++; + + rq->stats.drop_total++; + + /* + * We do not unmap and chain the rx buffer to the skb. + * We basically pretend this buffer is not used and will be recycled + * by vmxnet3_rq_alloc_rx_buf() + */ + + /* + * ctx->skb may be NULL if this is the first and the only one + * desc for the pkt + */ + if (ctx->skb) + dev_kfree_skb_irq(ctx->skb); + + ctx->skb = NULL; +} + + +static int +vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq, + struct vmxnet3_adapter *adapter, int quota) +{ + static u32 rxprod_reg[2] = {VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2}; + u32 num_rxd = 0; + struct Vmxnet3_RxCompDesc *rcd; + struct vmxnet3_rx_ctx *ctx = &rq->rx_ctx; + + rcd = &rq->comp_ring.base[rq->comp_ring.next2proc].rcd; + while (rcd->gen == rq->comp_ring.gen) { + struct vmxnet3_rx_buf_info *rbi; + struct sk_buff *skb; + int num_to_alloc; + struct Vmxnet3_RxDesc *rxd; + u32 idx, ring_idx; + + if (num_rxd >= quota) { + /* we may stop even before we see the EOP desc of + * the current pkt + */ + break; + } + num_rxd++; + + idx = rcd->rxdIdx; + ring_idx = rcd->rqID == rq->qid ? 0 : 1; + + rxd = &rq->rx_ring[ring_idx].base[idx].rxd; + rbi = rq->buf_info[ring_idx] + idx; + + BUG_ON(rxd->addr != rbi->dma_addr || rxd->len != rbi->len); + + if (unlikely(rcd->eop && rcd->err)) { + vmxnet3_rx_error(rq, rcd, ctx, adapter); + goto rcd_done; + } + + if (rcd->sop) { /* first buf of the pkt */ + BUG_ON(rxd->btype != VMXNET3_RXD_BTYPE_HEAD || + rcd->rqID != rq->qid); + + BUG_ON(rbi->buf_type != VMXNET3_RX_BUF_SKB); + BUG_ON(ctx->skb != NULL || rbi->skb == NULL); + + if (unlikely(rcd->len == 0)) { + /* Pretend the rx buffer is skipped. */ + BUG_ON(!(rcd->sop && rcd->eop)); + dprintk(KERN_ERR "rxRing[%u][%u] 0 length\n", + ring_idx, idx); + goto rcd_done; + } + + ctx->skb = rbi->skb; + rbi->skb = NULL; + + pci_unmap_single(adapter->pdev, rbi->dma_addr, rbi->len, + PCI_DMA_FROMDEVICE); + + skb_put(ctx->skb, rcd->len); + } else { + BUG_ON(ctx->skb == NULL); + /* non SOP buffer must be type 1 in most cases */ + if (rbi->buf_type == VMXNET3_RX_BUF_PAGE) { + BUG_ON(rxd->btype != VMXNET3_RXD_BTYPE_BODY); + + if (rcd->len) { + pci_unmap_page(adapter->pdev, + rbi->dma_addr, rbi->len, + PCI_DMA_FROMDEVICE); + + vmxnet3_append_frag(ctx->skb, rcd, rbi); + rbi->page = NULL; + } + } else { + /* + * The only time a non-SOP buffer is type 0 is + * when it's EOP and error flag is raised, which + * has already been handled. + */ + BUG_ON(true); + } + } + + skb = ctx->skb; + if (rcd->eop) { + skb->len += skb->data_len; + skb->truesize += skb->data_len; + + vmxnet3_rx_csum(adapter, skb, + (union Vmxnet3_GenericDesc *)rcd); + skb->protocol = eth_type_trans(skb, adapter->netdev); + + if (unlikely(adapter->vlan_grp && rcd->ts)) { + vlan_hwaccel_receive_skb(skb, + adapter->vlan_grp, rcd->tci); + } else { + netif_receive_skb(skb); + } + + adapter->netdev->last_rx = jiffies; + ctx->skb = NULL; + } + +rcd_done: + /* device may skip some rx descs */ + rq->rx_ring[ring_idx].next2comp = idx; + VMXNET3_INC_RING_IDX_ONLY(rq->rx_ring[ring_idx].next2comp, + rq->rx_ring[ring_idx].size); + + /* refill rx buffers frequently to avoid starving the h/w */ + num_to_alloc = vmxnet3_cmd_ring_desc_avail(rq->rx_ring + + ring_idx); + if (unlikely(num_to_alloc > VMXNET3_RX_ALLOC_THRESHOLD(rq, + ring_idx, adapter))) { + vmxnet3_rq_alloc_rx_buf(rq, ring_idx, num_to_alloc, + adapter); + + /* if needed, update the register */ + if (unlikely(rq->shared->updateRxProd)) { + VMXNET3_WRITE_BAR0_REG(adapter, + rxprod_reg[ring_idx] + rq->qid * 8, + rq->rx_ring[ring_idx].next2fill); + rq->uncommitted[ring_idx] = 0; + } + } + + vmxnet3_comp_ring_adv_next2proc(&rq->comp_ring); + rcd = &rq->comp_ring.base[rq->comp_ring.next2proc].rcd; + } + + return num_rxd; +} + + +static void +vmxnet3_rq_cleanup(struct vmxnet3_rx_queue *rq, + struct vmxnet3_adapter *adapter) +{ + u32 i, ring_idx; + struct Vmxnet3_RxDesc *rxd; + + for (ring_idx = 0; ring_idx < 2; ring_idx++) { + for (i = 0; i < rq->rx_ring[ring_idx].size; i++) { + rxd = &rq->rx_ring[ring_idx].base[i].rxd; + + if (rxd->btype == VMXNET3_RXD_BTYPE_HEAD && + rq->buf_info[ring_idx][i].skb) { + pci_unmap_single(adapter->pdev, rxd->addr, + rxd->len, PCI_DMA_FROMDEVICE); + dev_kfree_skb(rq->buf_info[ring_idx][i].skb); + rq->buf_info[ring_idx][i].skb = NULL; + } else if (rxd->btype == VMXNET3_RXD_BTYPE_BODY && + rq->buf_info[ring_idx][i].page) { + pci_unmap_page(adapter->pdev, rxd->addr, + rxd->len, PCI_DMA_FROMDEVICE); + put_page(rq->buf_info[ring_idx][i].page); + rq->buf_info[ring_idx][i].page = NULL; + } + } + + rq->rx_ring[ring_idx].gen = VMXNET3_INIT_GEN; + rq->rx_ring[ring_idx].next2fill = + rq->rx_ring[ring_idx].next2comp = 0; + rq->uncommitted[ring_idx] = 0; + } + + rq->comp_ring.gen = VMXNET3_INIT_GEN; + rq->comp_ring.next2proc = 0; +} + + +void vmxnet3_rq_destroy(struct vmxnet3_rx_queue *rq, + struct vmxnet3_adapter *adapter) +{ + int i; + int j; + + /* all rx buffers must have already been freed */ + for (i = 0; i < 2; i++) { + if (rq->buf_info[i]) { + for (j = 0; j < rq->rx_ring[i].size; j++) + BUG_ON(rq->buf_info[i][j].page != NULL); + } + } + + + kfree(rq->buf_info[0]); + + for (i = 0; i < 2; i++) { + if (rq->rx_ring[i].base) { + pci_free_consistent(adapter->pdev, rq->rx_ring[i].size + * sizeof(struct Vmxnet3_RxDesc), + rq->rx_ring[i].base, + rq->rx_ring[i].basePA); + rq->rx_ring[i].base = NULL; + } + rq->buf_info[i] = NULL; + } + + if (rq->comp_ring.base) { + pci_free_consistent(adapter->pdev, rq->comp_ring.size * + sizeof(struct Vmxnet3_RxCompDesc), + rq->comp_ring.base, rq->comp_ring.basePA); + rq->comp_ring.base = NULL; + } +} + + +static int +vmxnet3_rq_init(struct vmxnet3_rx_queue *rq, + struct vmxnet3_adapter *adapter) +{ + int i; + + /* initialize buf_info */ + for (i = 0; i < rq->rx_ring[0].size; i++) { + + /* 1st buf for a pkt is skbuff */ + if (i % adapter->rx_buf_per_pkt == 0) { + rq->buf_info[0][i].buf_type = VMXNET3_RX_BUF_SKB; + rq->buf_info[0][i].len = adapter->skb_buf_size; + } else { /* subsequent bufs for a pkt is frag */ + rq->buf_info[0][i].buf_type = VMXNET3_RX_BUF_PAGE; + rq->buf_info[0][i].len = PAGE_SIZE; + } + } + for (i = 0; i < rq->rx_ring[1].size; i++) { + rq->buf_info[1][i].buf_type = VMXNET3_RX_BUF_PAGE; + rq->buf_info[1][i].len = PAGE_SIZE; + } + + /* reset internal state and allocate buffers for both rings */ + for (i = 0; i < 2; i++) { + rq->rx_ring[i].next2fill = rq->rx_ring[i].next2comp = 0; + rq->uncommitted[i] = 0; + + memset(rq->rx_ring[i].base, 0, rq->rx_ring[i].size * + sizeof(struct Vmxnet3_RxDesc)); + rq->rx_ring[i].gen = VMXNET3_INIT_GEN; + } + if (vmxnet3_rq_alloc_rx_buf(rq, 0, rq->rx_ring[0].size - 1, + adapter) == 0) { + /* at least has 1 rx buffer for the 1st ring */ + return -ENOMEM; + } + vmxnet3_rq_alloc_rx_buf(rq, 1, rq->rx_ring[1].size - 1, adapter); + + /* reset the comp ring */ + rq->comp_ring.next2proc = 0; + memset(rq->comp_ring.base, 0, rq->comp_ring.size * + sizeof(struct Vmxnet3_RxCompDesc)); + rq->comp_ring.gen = VMXNET3_INIT_GEN; + + /* reset rxctx */ + rq->rx_ctx.skb = NULL; + + /* stats are not reset */ + return 0; +} + + +static int +vmxnet3_rq_create(struct vmxnet3_rx_queue *rq, struct vmxnet3_adapter *adapter) +{ + int i; + size_t sz; + struct vmxnet3_rx_buf_info *bi; + + for (i = 0; i < 2; i++) { + + sz = rq->rx_ring[i].size * sizeof(struct Vmxnet3_RxDesc); + rq->rx_ring[i].base = pci_alloc_consistent(adapter->pdev, sz, + &rq->rx_ring[i].basePA); + if (!rq->rx_ring[i].base) { + printk(KERN_ERR "%s: failed to allocate rx ring %d\n", + adapter->netdev->name, i); + goto err; + } + } + + sz = rq->comp_ring.size * sizeof(struct Vmxnet3_RxCompDesc); + rq->comp_ring.base = pci_alloc_consistent(adapter->pdev, sz, + &rq->comp_ring.basePA); + if (!rq->comp_ring.base) { + printk(KERN_ERR "%s: failed to allocate rx comp ring\n", + adapter->netdev->name); + goto err; + } + + sz = sizeof(struct vmxnet3_rx_buf_info) * (rq->rx_ring[0].size + + rq->rx_ring[1].size); + bi = kmalloc(sz, GFP_KERNEL); + if (!bi) { + printk(KERN_ERR "%s: failed to allocate rx bufinfo\n", + adapter->netdev->name); + goto err; + } + memset(bi, 0, sz); + rq->buf_info[0] = bi; + rq->buf_info[1] = bi + rq->rx_ring[0].size; + + return 0; + +err: + vmxnet3_rq_destroy(rq, adapter); + return -ENOMEM; +} + + +static int +vmxnet3_do_poll(struct vmxnet3_adapter *adapter, int budget) +{ + if (unlikely(adapter->shared->ecr)) + vmxnet3_process_events(adapter); + + vmxnet3_tq_tx_complete(&adapter->tx_queue, adapter); + return vmxnet3_rq_rx_complete(&adapter->rx_queue, adapter, budget); +} + + +static int +vmxnet3_poll(struct napi_struct *napi, int budget) +{ + struct vmxnet3_adapter *adapter = container_of(napi, + struct vmxnet3_adapter, napi); + int rxd_done; + + rxd_done = vmxnet3_do_poll(adapter, budget); + + if (rxd_done < budget) { + napi_complete(napi); + vmxnet3_enable_intr(adapter, 0); + } + return rxd_done; +} + + +/* Interrupt handler for vmxnet3 */ +static irqreturn_t +vmxnet3_intr(int irq, void *dev_id) +{ + struct net_device *dev = dev_id; + struct vmxnet3_adapter *adapter = netdev_priv(dev); + + if (unlikely(adapter->intr.type == VMXNET3_IT_INTX)) { + u32 icr = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_ICR); + if (unlikely(icr == 0)) + /* not ours */ + return IRQ_NONE; + } + + + /* disable intr if needed */ + if (adapter->intr.mask_mode == VMXNET3_IMM_ACTIVE) + vmxnet3_disable_intr(adapter, 0); + + napi_schedule(&adapter->napi); + + return IRQ_HANDLED; +} + +#ifdef CONFIG_NET_POLL_CONTROLLER + + +/* netpoll callback. */ +static void +vmxnet3_netpoll(struct net_device *netdev) +{ + struct vmxnet3_adapter *adapter = netdev_priv(netdev); + int irq; + +#ifdef CONFIG_PCI_MSI + if (adapter->intr.type == VMXNET3_IT_MSIX) + irq = adapter->intr.msix_entries[0].vector; + else +#endif + irq = adapter->pdev->irq; + + disable_irq(irq); + vmxnet3_intr(irq, netdev); + enable_irq(irq); +} +#endif + +static int +vmxnet3_request_irqs(struct vmxnet3_adapter *adapter) +{ + int err; + +#ifdef CONFIG_PCI_MSI + if (adapter->intr.type == VMXNET3_IT_MSIX) { + /* we only use 1 MSI-X vector */ + err = request_irq(adapter->intr.msix_entries[0].vector, + vmxnet3_intr, 0, adapter->netdev->name, + adapter->netdev); + } else +#endif + if (adapter->intr.type == VMXNET3_IT_MSI) { + err = request_irq(adapter->pdev->irq, vmxnet3_intr, 0, + adapter->netdev->name, adapter->netdev); + } else { + err = request_irq(adapter->pdev->irq, vmxnet3_intr, + IRQF_SHARED, adapter->netdev->name, + adapter->netdev); + } + + if (err) + printk(KERN_ERR "Failed to request irq %s (intr type:%d), error" + ":%d\n", adapter->netdev->name, adapter->intr.type, err); + + + if (!err) { + int i; + /* init our intr settings */ + for (i = 0; i < adapter->intr.num_intrs; i++) + adapter->intr.mod_levels[i] = UPT1_IML_ADAPTIVE; + + /* next setup intr index for all intr sources */ + adapter->tx_queue.comp_ring.intr_idx = 0; + adapter->rx_queue.comp_ring.intr_idx = 0; + adapter->intr.event_intr_idx = 0; + + printk(KERN_INFO "%s: intr type %u, mode %u, %u vectors " + "allocated\n", adapter->netdev->name, adapter->intr.type, + adapter->intr.mask_mode, adapter->intr.num_intrs); + } + + return err; +} + + +static void +vmxnet3_free_irqs(struct vmxnet3_adapter *adapter) +{ + BUG_ON(adapter->intr.type == VMXNET3_IT_AUTO || + adapter->intr.num_intrs <= 0); + + switch (adapter->intr.type) { +#ifdef CONFIG_PCI_MSI + case VMXNET3_IT_MSIX: + { + int i; + + for (i = 0; i < adapter->intr.num_intrs; i++) + free_irq(adapter->intr.msix_entries[i].vector, + adapter->netdev); + break; + } +#endif + case VMXNET3_IT_MSI: + free_irq(adapter->pdev->irq, adapter->netdev); + break; + case VMXNET3_IT_INTX: + free_irq(adapter->pdev->irq, adapter->netdev); + break; + default: + BUG_ON(true); + } +} + + +static void +vmxnet3_vlan_rx_register(struct net_device *netdev, struct vlan_group *grp) +{ + struct vmxnet3_adapter *adapter = netdev_priv(netdev); + struct Vmxnet3_DriverShared *shared = adapter->shared; + u32 *vfTable = adapter->shared->devRead.rxFilterConf.vfTable; + + if (grp) { + /* add vlan rx stripping. */ + if (adapter->netdev->features & NETIF_F_HW_VLAN_RX) { + int i; + struct Vmxnet3_DSDevRead *devRead = &shared->devRead; + adapter->vlan_grp = grp; + + /* update FEATURES to device */ + devRead->misc.uptFeatures |= UPT1_F_RXVLAN; + VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, + VMXNET3_CMD_UPDATE_FEATURE); + /* + * Clear entire vfTable; then enable untagged pkts. + * Note: setting one entry in vfTable to non-zero turns + * on VLAN rx filtering. + */ + for (i = 0; i < VMXNET3_VFT_SIZE; i++) + vfTable[i] = 0; + + VMXNET3_SET_VFTABLE_ENTRY(vfTable, 0); + VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, + VMXNET3_CMD_UPDATE_VLAN_FILTERS); + } else { + printk(KERN_ERR "%s: vlan_rx_register when device has " + "no NETIF_F_HW_VLAN_RX\n", netdev->name); + } + } else { + /* remove vlan rx stripping. */ + struct Vmxnet3_DSDevRead *devRead = &shared->devRead; + adapter->vlan_grp = NULL; + + if (devRead->misc.uptFeatures & UPT1_F_RXVLAN) { + int i; + + for (i = 0; i < VMXNET3_VFT_SIZE; i++) { + /* clear entire vfTable; this also disables + * VLAN rx filtering + */ + vfTable[i] = 0; + } + VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, + VMXNET3_CMD_UPDATE_VLAN_FILTERS); + + /* update FEATURES to device */ + devRead->misc.uptFeatures &= ~UPT1_F_RXVLAN; + VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, + VMXNET3_CMD_UPDATE_FEATURE); + } + } +} + + +static void +vmxnet3_restore_vlan(struct vmxnet3_adapter *adapter) +{ + if (adapter->vlan_grp) { + u16 vid; + u32 *vfTable = adapter->shared->devRead.rxFilterConf.vfTable; + bool activeVlan = false; + + for (vid = 0; vid < VLAN_GROUP_ARRAY_LEN; vid++) { + if (vlan_group_get_device(adapter->vlan_grp, vid)) { + VMXNET3_SET_VFTABLE_ENTRY(vfTable, vid); + activeVlan = true; + } + } + if (activeVlan) { + /* continue to allow untagged pkts */ + VMXNET3_SET_VFTABLE_ENTRY(vfTable, 0); + } + } +} + + +static void +vmxnet3_vlan_rx_add_vid(struct net_device *netdev, u16 vid) +{ + struct vmxnet3_adapter *adapter = netdev_priv(netdev); + u32 *vfTable = adapter->shared->devRead.rxFilterConf.vfTable; + + VMXNET3_SET_VFTABLE_ENTRY(vfTable, vid); + VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, + VMXNET3_CMD_UPDATE_VLAN_FILTERS); +} + + +static void +vmxnet3_vlan_rx_kill_vid(struct net_device *netdev, u16 vid) +{ + struct vmxnet3_adapter *adapter = netdev_priv(netdev); + u32 *vfTable = adapter->shared->devRead.rxFilterConf.vfTable; + + VMXNET3_CLEAR_VFTABLE_ENTRY(vfTable, vid); + VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, + VMXNET3_CMD_UPDATE_VLAN_FILTERS); +} + + +static u8 * +vmxnet3_copy_mc(struct net_device *netdev) +{ + u8 *buf = NULL; + u32 sz = netdev->mc_count * ETH_ALEN; + + /* struct Vmxnet3_RxFilterConf.mfTableLen is u16. */ + if (sz <= 0xffff) { + /* We may be called with BH disabled */ + buf = kmalloc(sz, GFP_ATOMIC); + if (buf) { + int i; + struct dev_mc_list *mc = netdev->mc_list; + + for (i = 0; i < netdev->mc_count; i++) { + BUG_ON(!mc); + memcpy(buf + i * ETH_ALEN, mc->dmi_addr, + ETH_ALEN); + mc = mc->next; + } + } + } + return buf; +} + + +static void +vmxnet3_set_mc(struct net_device *netdev) +{ + struct vmxnet3_adapter *adapter = netdev_priv(netdev); + struct Vmxnet3_RxFilterConf *rxConf = + &adapter->shared->devRead.rxFilterConf; + u8 *new_table = NULL; + u32 new_mode = VMXNET3_RXM_UCAST; + + if (netdev->flags & IFF_PROMISC) + new_mode |= VMXNET3_RXM_PROMISC; + + if (netdev->flags & IFF_BROADCAST) + new_mode |= VMXNET3_RXM_BCAST; + + if (netdev->flags & IFF_ALLMULTI) + new_mode |= VMXNET3_RXM_ALL_MULTI; + else + if (netdev->mc_count > 0) { + new_table = vmxnet3_copy_mc(netdev); + if (new_table) { + new_mode |= VMXNET3_RXM_MCAST; + rxConf->mfTableLen = netdev->mc_count * + ETH_ALEN; + rxConf->mfTablePA = virt_to_phys(new_table); + } else { + printk(KERN_INFO "%s: failed to copy mcast list" + ", setting ALL_MULTI\n", netdev->name); + new_mode |= VMXNET3_RXM_ALL_MULTI; + } + } + + + if (!(new_mode & VMXNET3_RXM_MCAST)) { + rxConf->mfTableLen = 0; + rxConf->mfTablePA = 0; + } + + if (new_mode != rxConf->rxMode) { + rxConf->rxMode = new_mode; + VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, + VMXNET3_CMD_UPDATE_RX_MODE); + } + + VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, + VMXNET3_CMD_UPDATE_MAC_FILTERS); + + kfree(new_table); +} + + +/* + * Set up driver_shared based on settings in adapter. + */ + +static void +vmxnet3_setup_driver_shared(struct vmxnet3_adapter *adapter) +{ + struct Vmxnet3_DriverShared *shared = adapter->shared; + struct Vmxnet3_DSDevRead *devRead = &shared->devRead; + struct Vmxnet3_TxQueueConf *tqc; + struct Vmxnet3_RxQueueConf *rqc; + int i; + + memset(shared, 0, sizeof(*shared)); + + /* driver settings */ + shared->magic = VMXNET3_REV1_MAGIC; + devRead->misc.driverInfo.version = VMXNET3_DRIVER_VERSION_NUM; + devRead->misc.driverInfo.gos.gosBits = (sizeof(void *) == 4 ? + VMXNET3_GOS_BITS_32 : VMXNET3_GOS_BITS_64); + devRead->misc.driverInfo.gos.gosType = VMXNET3_GOS_TYPE_LINUX; + devRead->misc.driverInfo.vmxnet3RevSpt = 1; + devRead->misc.driverInfo.uptVerSpt = 1; + + devRead->misc.ddPA = virt_to_phys(adapter); + devRead->misc.ddLen = sizeof(struct vmxnet3_adapter); + + /* set up feature flags */ + if (adapter->rxcsum) + devRead->misc.uptFeatures |= UPT1_F_RXCSUM; + + if (adapter->lro) { + devRead->misc.uptFeatures |= UPT1_F_LRO; + devRead->misc.maxNumRxSG = 1 + MAX_SKB_FRAGS; + } + if ((adapter->netdev->features & NETIF_F_HW_VLAN_RX) + && adapter->vlan_grp) { + devRead->misc.uptFeatures |= UPT1_F_RXVLAN; + } + + devRead->misc.mtu = adapter->netdev->mtu; + devRead->misc.queueDescPA = adapter->queue_desc_pa; + devRead->misc.queueDescLen = sizeof(struct Vmxnet3_TxQueueDesc) + + sizeof(struct Vmxnet3_RxQueueDesc); + + /* tx queue settings */ + BUG_ON(adapter->tx_queue.tx_ring.base == NULL); + + devRead->misc.numTxQueues = 1; + tqc = &adapter->tqd_start->conf; + tqc->txRingBasePA = adapter->tx_queue.tx_ring.basePA; + tqc->dataRingBasePA = adapter->tx_queue.data_ring.basePA; + tqc->compRingBasePA = adapter->tx_queue.comp_ring.basePA; + tqc->ddPA = virt_to_phys(adapter->tx_queue.buf_info); + tqc->txRingSize = adapter->tx_queue.tx_ring.size; + tqc->dataRingSize = adapter->tx_queue.data_ring.size; + tqc->compRingSize = adapter->tx_queue.comp_ring.size; + tqc->ddLen = sizeof(struct vmxnet3_tx_buf_info) * + tqc->txRingSize; + tqc->intrIdx = adapter->tx_queue.comp_ring.intr_idx; + + /* rx queue settings */ + devRead->misc.numRxQueues = 1; + rqc = &adapter->rqd_start->conf; + rqc->rxRingBasePA[0] = adapter->rx_queue.rx_ring[0].basePA; + rqc->rxRingBasePA[1] = adapter->rx_queue.rx_ring[1].basePA; + rqc->compRingBasePA = adapter->rx_queue.comp_ring.basePA; + rqc->ddPA = virt_to_phys(adapter->rx_queue.buf_info); + rqc->rxRingSize[0] = adapter->rx_queue.rx_ring[0].size; + rqc->rxRingSize[1] = adapter->rx_queue.rx_ring[1].size; + rqc->compRingSize = adapter->rx_queue.comp_ring.size; + rqc->ddLen = sizeof(struct vmxnet3_rx_buf_info) * + (rqc->rxRingSize[0] + rqc->rxRingSize[1]); + rqc->intrIdx = adapter->rx_queue.comp_ring.intr_idx; + + /* intr settings */ + devRead->intrConf.autoMask = adapter->intr.mask_mode == + VMXNET3_IMM_AUTO; + devRead->intrConf.numIntrs = adapter->intr.num_intrs; + for (i = 0; i < adapter->intr.num_intrs; i++) + devRead->intrConf.modLevels[i] = adapter->intr.mod_levels[i]; + + devRead->intrConf.eventIntrIdx = adapter->intr.event_intr_idx; + + /* rx filter settings */ + devRead->rxFilterConf.rxMode = 0; + vmxnet3_restore_vlan(adapter); + /* the rest are already zeroed */ +} + + +int +vmxnet3_activate_dev(struct vmxnet3_adapter *adapter) +{ + int err; + u32 ret; + + dprintk(KERN_ERR "%s: skb_buf_size %d, rx_buf_per_pkt %d, ring sizes" + " %u %u %u\n", adapter->netdev->name, adapter->skb_buf_size, + adapter->rx_buf_per_pkt, adapter->tx_queue.tx_ring.size, + adapter->rx_queue.rx_ring[0].size, + adapter->rx_queue.rx_ring[1].size); + + vmxnet3_tq_init(&adapter->tx_queue, adapter); + err = vmxnet3_rq_init(&adapter->rx_queue, adapter); + if (err) { + printk(KERN_ERR "Failed to init rx queue for %s: error %d\n", + adapter->netdev->name, err); + goto rq_err; + } + + err = vmxnet3_request_irqs(adapter); + if (err) { + printk(KERN_ERR "Failed to setup irq for %s: error %d\n", + adapter->netdev->name, err); + goto irq_err; + } + + vmxnet3_setup_driver_shared(adapter); + + VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_DSAL, + VMXNET3_GET_ADDR_LO(adapter->shared_pa)); + VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_DSAH, + VMXNET3_GET_ADDR_HI(adapter->shared_pa)); + + VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, + VMXNET3_CMD_ACTIVATE_DEV); + ret = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_CMD); + + if (ret != 0) { + printk(KERN_ERR "Failed to activate dev %s: error %u\n", + adapter->netdev->name, ret); + err = -EINVAL; + goto activate_err; + } + VMXNET3_WRITE_BAR0_REG(adapter, VMXNET3_REG_RXPROD, + adapter->rx_queue.rx_ring[0].next2fill); + VMXNET3_WRITE_BAR0_REG(adapter, VMXNET3_REG_RXPROD2, + adapter->rx_queue.rx_ring[1].next2fill); + + /* Apply the rx filter settins last. */ + vmxnet3_set_mc(adapter->netdev); + + /* + * Check link state when first activating device. It will start the + * tx queue if the link is up. + */ + vmxnet3_check_link(adapter); + + napi_enable(&adapter->napi); + vmxnet3_enable_all_intrs(adapter); + clear_bit(VMXNET3_STATE_BIT_QUIESCED, &adapter->state); + return 0; + +activate_err: + VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_DSAL, 0); + VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_DSAH, 0); + vmxnet3_free_irqs(adapter); +irq_err: +rq_err: + /* free up buffers we allocated */ + vmxnet3_rq_cleanup(&adapter->rx_queue, adapter); + return err; +} + + +void +vmxnet3_reset_dev(struct vmxnet3_adapter *adapter) +{ + VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, VMXNET3_CMD_RESET_DEV); +} + + +int +vmxnet3_quiesce_dev(struct vmxnet3_adapter *adapter) +{ + if (test_and_set_bit(VMXNET3_STATE_BIT_QUIESCED, &adapter->state)) + return 0; + + + VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, + VMXNET3_CMD_QUIESCE_DEV); + vmxnet3_disable_all_intrs(adapter); + + napi_disable(&adapter->napi); + netif_tx_disable(adapter->netdev); + adapter->link_speed = 0; + netif_carrier_off(adapter->netdev); + + vmxnet3_tq_cleanup(&adapter->tx_queue, adapter); + vmxnet3_rq_cleanup(&adapter->rx_queue, adapter); + vmxnet3_free_irqs(adapter); + return 0; +} + + +static void +vmxnet3_write_mac_addr(struct vmxnet3_adapter *adapter, u8 *mac) +{ + u32 tmp; + + tmp = *(u32 *)mac; + VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_MACL, tmp); + + tmp = (mac[5] << 8) | mac[4]; + VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_MACH, tmp); +} + + +static int +vmxnet3_set_mac_addr(struct net_device *netdev, void *p) +{ + struct sockaddr *addr = p; + struct vmxnet3_adapter *adapter = netdev_priv(netdev); + + memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len); + vmxnet3_write_mac_addr(adapter, addr->sa_data); + + return 0; +} + + +/* ==================== initialization and cleanup routines ============ */ + +static int +vmxnet3_alloc_pci_resources(struct vmxnet3_adapter *adapter, bool *dma64) +{ + int err; + unsigned long mmio_start, mmio_len; + struct pci_dev *pdev = adapter->pdev; + + err = pci_enable_device(pdev); + if (err) { + printk(KERN_ERR "Failed to enable adapter %s: error %d\n", + pci_name(pdev), err); + return err; + } + + if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64)) == 0) { + if (pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)) != 0) { + printk(KERN_ERR "pci_set_consistent_dma_mask failed " + "for adapter %s\n", pci_name(pdev)); + err = -EIO; + goto err_set_mask; + } + *dma64 = true; + } else { + if (pci_set_dma_mask(pdev, DMA_BIT_MASK(32)) != 0) { + printk(KERN_ERR "pci_set_dma_mask failed for adapter " + "%s\n", pci_name(pdev)); + err = -EIO; + goto err_set_mask; + } + *dma64 = false; + } + + err = pci_request_selected_regions(pdev, (1 << 2) - 1, + vmxnet3_driver_name); + if (err) { + printk(KERN_ERR "Failed to request region for adapter %s: " + "error %d\n", pci_name(pdev), err); + goto err_set_mask; + } + + pci_set_master(pdev); + + mmio_start = pci_resource_start(pdev, 0); + mmio_len = pci_resource_len(pdev, 0); + adapter->hw_addr0 = ioremap(mmio_start, mmio_len); + if (!adapter->hw_addr0) { + printk(KERN_ERR "Failed to map bar0 for adapter %s\n", + pci_name(pdev)); + err = -EIO; + goto err_ioremap; + } + + mmio_start = pci_resource_start(pdev, 1); + mmio_len = pci_resource_len(pdev, 1); + adapter->hw_addr1 = ioremap(mmio_start, mmio_len); + if (!adapter->hw_addr1) { + printk(KERN_ERR "Failed to map bar1 for adapter %s\n", + pci_name(pdev)); + err = -EIO; + goto err_bar1; + } + return 0; + +err_bar1: + iounmap(adapter->hw_addr0); +err_ioremap: + pci_release_selected_regions(pdev, (1 << 2) - 1); +err_set_mask: + pci_disable_device(pdev); + return err; +} + + +static void +vmxnet3_free_pci_resources(struct vmxnet3_adapter *adapter) +{ + BUG_ON(!adapter->pdev); + + iounmap(adapter->hw_addr0); + iounmap(adapter->hw_addr1); + pci_release_selected_regions(adapter->pdev, (1 << 2) - 1); + pci_disable_device(adapter->pdev); +} + + +static void +vmxnet3_adjust_rx_ring_size(struct vmxnet3_adapter *adapter) +{ + size_t sz; + + if (adapter->netdev->mtu <= VMXNET3_MAX_SKB_BUF_SIZE - + VMXNET3_MAX_ETH_HDR_SIZE) { + adapter->skb_buf_size = adapter->netdev->mtu + + VMXNET3_MAX_ETH_HDR_SIZE; + if (adapter->skb_buf_size < VMXNET3_MIN_T0_BUF_SIZE) + adapter->skb_buf_size = VMXNET3_MIN_T0_BUF_SIZE; + + adapter->rx_buf_per_pkt = 1; + } else { + adapter->skb_buf_size = VMXNET3_MAX_SKB_BUF_SIZE; + sz = adapter->netdev->mtu - VMXNET3_MAX_SKB_BUF_SIZE + + VMXNET3_MAX_ETH_HDR_SIZE; + adapter->rx_buf_per_pkt = 1 + (sz + PAGE_SIZE - 1) / PAGE_SIZE; + } + + /* + * for simplicity, force the ring0 size to be a multiple of + * rx_buf_per_pkt * VMXNET3_RING_SIZE_ALIGN + */ + sz = adapter->rx_buf_per_pkt * VMXNET3_RING_SIZE_ALIGN; + adapter->rx_queue.rx_ring[0].size = (adapter->rx_queue.rx_ring[0].size + + sz - 1) / sz * sz; + adapter->rx_queue.rx_ring[0].size = min_t(u32, + adapter->rx_queue.rx_ring[0].size, + VMXNET3_RX_RING_MAX_SIZE / sz * sz); +} + + +int +vmxnet3_create_queues(struct vmxnet3_adapter *adapter, u32 tx_ring_size, + u32 rx_ring_size, u32 rx_ring2_size) +{ + int err; + + adapter->tx_queue.tx_ring.size = tx_ring_size; + adapter->tx_queue.data_ring.size = tx_ring_size; + adapter->tx_queue.comp_ring.size = tx_ring_size; + adapter->tx_queue.shared = &adapter->tqd_start->ctrl; + adapter->tx_queue.stopped = true; + err = vmxnet3_tq_create(&adapter->tx_queue, adapter); + if (err) + return err; + + adapter->rx_queue.rx_ring[0].size = rx_ring_size; + adapter->rx_queue.rx_ring[1].size = rx_ring2_size; + vmxnet3_adjust_rx_ring_size(adapter); + adapter->rx_queue.comp_ring.size = adapter->rx_queue.rx_ring[0].size + + adapter->rx_queue.rx_ring[1].size; + adapter->rx_queue.qid = 0; + adapter->rx_queue.qid2 = 1; + adapter->rx_queue.shared = &adapter->rqd_start->ctrl; + err = vmxnet3_rq_create(&adapter->rx_queue, adapter); + if (err) + vmxnet3_tq_destroy(&adapter->tx_queue, adapter); + + return err; +} + +static int +vmxnet3_open(struct net_device *netdev) +{ + struct vmxnet3_adapter *adapter; + int err; + + adapter = netdev_priv(netdev); + + spin_lock_init(&adapter->tx_queue.tx_lock); + + err = vmxnet3_create_queues(adapter, VMXNET3_DEF_TX_RING_SIZE, + VMXNET3_DEF_RX_RING_SIZE, + VMXNET3_DEF_RX_RING_SIZE); + if (err) + goto queue_err; + + err = vmxnet3_activate_dev(adapter); + if (err) + goto activate_err; + + return 0; + +activate_err: + vmxnet3_rq_destroy(&adapter->rx_queue, adapter); + vmxnet3_tq_destroy(&adapter->tx_queue, adapter); +queue_err: + return err; +} + + +static int +vmxnet3_close(struct net_device *netdev) +{ + struct vmxnet3_adapter *adapter = netdev_priv(netdev); + + /* + * Reset_work may be in the middle of resetting the device, wait for its + * completion. + */ + while (test_and_set_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state)) + msleep(1); + + vmxnet3_quiesce_dev(adapter); + + vmxnet3_rq_destroy(&adapter->rx_queue, adapter); + vmxnet3_tq_destroy(&adapter->tx_queue, adapter); + + clear_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state); + + + return 0; +} + + +void +vmxnet3_force_close(struct vmxnet3_adapter *adapter) +{ + /* + * we must clear VMXNET3_STATE_BIT_RESETTING, otherwise + * vmxnet3_close() will deadlock. + */ + BUG_ON(test_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state)); + + /* we need to enable NAPI, otherwise dev_close will deadlock */ + napi_enable(&adapter->napi); + dev_close(adapter->netdev); +} + + +static int +vmxnet3_change_mtu(struct net_device *netdev, int new_mtu) +{ + struct vmxnet3_adapter *adapter = netdev_priv(netdev); + int err = 0; + + if (new_mtu < VMXNET3_MIN_MTU || new_mtu > VMXNET3_MAX_MTU) + return -EINVAL; + + if (new_mtu > 1500 && !adapter->jumbo_frame) + return -EINVAL; + + netdev->mtu = new_mtu; + + /* + * Reset_work may be in the middle of resetting the device, wait for its + * completion. + */ + while (test_and_set_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state)) + msleep(1); + + if (netif_running(netdev)) { + vmxnet3_quiesce_dev(adapter); + vmxnet3_reset_dev(adapter); + + /* we need to re-create the rx queue based on the new mtu */ + vmxnet3_rq_destroy(&adapter->rx_queue, adapter); + vmxnet3_adjust_rx_ring_size(adapter); + adapter->rx_queue.comp_ring.size = + adapter->rx_queue.rx_ring[0].size + + adapter->rx_queue.rx_ring[1].size; + err = vmxnet3_rq_create(&adapter->rx_queue, adapter); + if (err) { + printk(KERN_ERR "%s: failed to re-create rx queue," + " error %d. Closing it.\n", netdev->name, err); + goto out; + } + + err = vmxnet3_activate_dev(adapter); + if (err) { + printk(KERN_ERR "%s: failed to re-activate, error %d. " + "Closing it\n", netdev->name, err); + goto out; + } + } + +out: + clear_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state); + if (err) + vmxnet3_force_close(adapter); + + return err; +} + + +static void +vmxnet3_declare_features(struct vmxnet3_adapter *adapter, bool dma64) +{ + struct net_device *netdev = adapter->netdev; + + netdev->features = NETIF_F_SG | + NETIF_F_HW_CSUM | + NETIF_F_HW_VLAN_TX | + NETIF_F_HW_VLAN_RX | + NETIF_F_HW_VLAN_FILTER | + NETIF_F_TSO | + NETIF_F_TSO6 | + NETIF_F_LRO; + + printk(KERN_INFO "features: sg csum vlan jf tso tsoIPv6 lro"); + + adapter->rxcsum = true; + adapter->jumbo_frame = true; + adapter->lro = true; + + if (dma64) { + netdev->features |= NETIF_F_HIGHDMA; + printk(" highDMA"); + } + + netdev->vlan_features = netdev->features; + printk("\n"); +} + + +static void +vmxnet3_read_mac_addr(struct vmxnet3_adapter *adapter, u8 *mac) +{ + u32 tmp; + + tmp = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_MACL); + *(u32 *)mac = tmp; + + tmp = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_MACH); + mac[4] = tmp & 0xff; + mac[5] = (tmp >> 8) & 0xff; +} + + +static void +vmxnet3_alloc_intr_resources(struct vmxnet3_adapter *adapter) +{ + u32 cfg; + + /* intr settings */ + VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, + VMXNET3_CMD_GET_CONF_INTR); + cfg = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_CMD); + adapter->intr.type = cfg & 0x3; + adapter->intr.mask_mode = (cfg >> 2) & 0x3; + + if (adapter->intr.type == VMXNET3_IT_AUTO) { + int err; + +#ifdef CONFIG_PCI_MSI + adapter->intr.msix_entries[0].entry = 0; + err = pci_enable_msix(adapter->pdev, adapter->intr.msix_entries, + VMXNET3_LINUX_MAX_MSIX_VECT); + if (!err) { + adapter->intr.num_intrs = 1; + adapter->intr.type = VMXNET3_IT_MSIX; + return; + } +#endif + + err = pci_enable_msi(adapter->pdev); + if (!err) { + adapter->intr.num_intrs = 1; + adapter->intr.type = VMXNET3_IT_MSI; + return; + } + } + + adapter->intr.type = VMXNET3_IT_INTX; + + /* INT-X related setting */ + adapter->intr.num_intrs = 1; +} + + +static void +vmxnet3_free_intr_resources(struct vmxnet3_adapter *adapter) +{ + if (adapter->intr.type == VMXNET3_IT_MSIX) + pci_disable_msix(adapter->pdev); + else if (adapter->intr.type == VMXNET3_IT_MSI) + pci_disable_msi(adapter->pdev); + else + BUG_ON(adapter->intr.type != VMXNET3_IT_INTX); +} + + +static void +vmxnet3_tx_timeout(struct net_device *netdev) +{ + struct vmxnet3_adapter *adapter = netdev_priv(netdev); + adapter->tx_timeout_count++; + + printk(KERN_ERR "%s: tx hang\n", adapter->netdev->name); + schedule_work(&adapter->work); +} + + +static void +vmxnet3_reset_work(struct work_struct *data) +{ + struct vmxnet3_adapter *adapter; + + adapter = container_of(data, struct vmxnet3_adapter, work); + + /* if another thread is resetting the device, no need to proceed */ + if (test_and_set_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state)) + return; + + /* if the device is closed, we must leave it alone */ + if (netif_running(adapter->netdev)) { + printk(KERN_INFO "%s: resetting\n", adapter->netdev->name); + vmxnet3_quiesce_dev(adapter); + vmxnet3_reset_dev(adapter); + vmxnet3_activate_dev(adapter); + } else { + printk(KERN_INFO "%s: already closed\n", adapter->netdev->name); + } + + clear_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state); +} + + +static int __devinit +vmxnet3_probe_device(struct pci_dev *pdev, + const struct pci_device_id *id) +{ + static const struct net_device_ops vmxnet3_netdev_ops = { + .ndo_open = vmxnet3_open, + .ndo_stop = vmxnet3_close, + .ndo_start_xmit = vmxnet3_xmit_frame, + .ndo_set_mac_address = vmxnet3_set_mac_addr, + .ndo_change_mtu = vmxnet3_change_mtu, + .ndo_get_stats = vmxnet3_get_stats, + .ndo_tx_timeout = vmxnet3_tx_timeout, + .ndo_set_multicast_list = vmxnet3_set_mc, + .ndo_vlan_rx_register = vmxnet3_vlan_rx_register, + .ndo_vlan_rx_add_vid = vmxnet3_vlan_rx_add_vid, + .ndo_vlan_rx_kill_vid = vmxnet3_vlan_rx_kill_vid, +#ifdef CONFIG_NET_POLL_CONTROLLER + .ndo_poll_controller = vmxnet3_netpoll, +#endif + }; + int err; + bool dma64 = false; /* stupid gcc */ + u32 ver; + struct net_device *netdev; + struct vmxnet3_adapter *adapter; + u8 mac[ETH_ALEN]; + + netdev = alloc_etherdev(sizeof(struct vmxnet3_adapter)); + if (!netdev) { + printk(KERN_ERR "Failed to alloc ethernet device for adapter " + "%s\n", pci_name(pdev)); + return -ENOMEM; + } + + pci_set_drvdata(pdev, netdev); + adapter = netdev_priv(netdev); + adapter->netdev = netdev; + adapter->pdev = pdev; + + adapter->shared = pci_alloc_consistent(adapter->pdev, + sizeof(struct Vmxnet3_DriverShared), + &adapter->shared_pa); + if (!adapter->shared) { + printk(KERN_ERR "Failed to allocate memory for %s\n", + pci_name(pdev)); + err = -ENOMEM; + goto err_alloc_shared; + } + + adapter->tqd_start = pci_alloc_consistent(adapter->pdev, + sizeof(struct Vmxnet3_TxQueueDesc) + + sizeof(struct Vmxnet3_RxQueueDesc), + &adapter->queue_desc_pa); + + if (!adapter->tqd_start) { + printk(KERN_ERR "Failed to allocate memory for %s\n", + pci_name(pdev)); + err = -ENOMEM; + goto err_alloc_queue_desc; + } + adapter->rqd_start = (struct Vmxnet3_RxQueueDesc *)(adapter->tqd_start + + 1); + + adapter->pm_conf = kmalloc(sizeof(struct Vmxnet3_PMConf), GFP_KERNEL); + if (adapter->pm_conf == NULL) { + printk(KERN_ERR "Failed to allocate memory for %s\n", + pci_name(pdev)); + err = -ENOMEM; + goto err_alloc_pm; + } + + err = vmxnet3_alloc_pci_resources(adapter, &dma64); + if (err < 0) + goto err_alloc_pci; + + ver = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_VRRS); + if (ver & 1) { + VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_VRRS, 1); + } else { + printk(KERN_ERR "Incompatible h/w version (0x%x) for adapter" + " %s\n", ver, pci_name(pdev)); + err = -EBUSY; + goto err_ver; + } + + ver = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_UVRS); + if (ver & 1) { + VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_UVRS, 1); + } else { + printk(KERN_ERR "Incompatible upt version (0x%x) for " + "adapter %s\n", ver, pci_name(pdev)); + err = -EBUSY; + goto err_ver; + } + + vmxnet3_declare_features(adapter, dma64); + + adapter->dev_number = atomic_read(&devices_found); + vmxnet3_alloc_intr_resources(adapter); + + vmxnet3_read_mac_addr(adapter, mac); + memcpy(netdev->dev_addr, mac, netdev->addr_len); + + netdev->netdev_ops = &vmxnet3_netdev_ops; + netdev->watchdog_timeo = 5 * HZ; + vmxnet3_set_ethtool_ops(netdev); + + INIT_WORK(&adapter->work, vmxnet3_reset_work); + + netif_napi_add(netdev, &adapter->napi, vmxnet3_poll, 64); + SET_NETDEV_DEV(netdev, &pdev->dev); + err = register_netdev(netdev); + + if (err) { + printk(KERN_ERR "Failed to register adapter %s\n", + pci_name(pdev)); + goto err_register; + } + + set_bit(VMXNET3_STATE_BIT_QUIESCED, &adapter->state); + atomic_inc(&devices_found); + return 0; + +err_register: + vmxnet3_free_intr_resources(adapter); +err_ver: + vmxnet3_free_pci_resources(adapter); +err_alloc_pci: + kfree(adapter->pm_conf); +err_alloc_pm: + pci_free_consistent(adapter->pdev, sizeof(struct Vmxnet3_TxQueueDesc) + + sizeof(struct Vmxnet3_RxQueueDesc), + adapter->tqd_start, adapter->queue_desc_pa); +err_alloc_queue_desc: + pci_free_consistent(adapter->pdev, sizeof(struct Vmxnet3_DriverShared), + adapter->shared, adapter->shared_pa); +err_alloc_shared: + pci_set_drvdata(pdev, NULL); + free_netdev(netdev); + return err; +} + + +static void __devexit +vmxnet3_remove_device(struct pci_dev *pdev) +{ + struct net_device *netdev = pci_get_drvdata(pdev); + struct vmxnet3_adapter *adapter = netdev_priv(netdev); + + flush_scheduled_work(); + + unregister_netdev(netdev); + + vmxnet3_free_intr_resources(adapter); + vmxnet3_free_pci_resources(adapter); + kfree(adapter->pm_conf); + pci_free_consistent(adapter->pdev, sizeof(struct Vmxnet3_TxQueueDesc) + + sizeof(struct Vmxnet3_RxQueueDesc), + adapter->tqd_start, adapter->queue_desc_pa); + pci_free_consistent(adapter->pdev, sizeof(struct Vmxnet3_DriverShared), + adapter->shared, adapter->shared_pa); + free_netdev(netdev); +} + + +#ifdef CONFIG_PM + +static int +vmxnet3_suspend(struct device *device) +{ + struct pci_dev *pdev = to_pci_dev(device); + struct net_device *netdev = pci_get_drvdata(pdev); + struct vmxnet3_adapter *adapter = netdev_priv(netdev); + struct Vmxnet3_PMConf *pmConf; + struct ethhdr *ehdr; + struct arphdr *ahdr; + u8 *arpreq; + struct in_device *in_dev; + struct in_ifaddr *ifa; + int i = 0; + + if (!netif_running(netdev)) + return 0; + + vmxnet3_disable_all_intrs(adapter); + vmxnet3_free_irqs(adapter); + vmxnet3_free_intr_resources(adapter); + + netif_device_detach(netdev); + netif_stop_queue(netdev); + + /* Create wake-up filters. */ + pmConf = adapter->pm_conf; + memset(pmConf, 0, sizeof(*pmConf)); + + if (adapter->wol & WAKE_UCAST) { + pmConf->filters[i].patternSize = ETH_ALEN; + pmConf->filters[i].maskSize = 1; + memcpy(pmConf->filters[i].pattern, netdev->dev_addr, ETH_ALEN); + pmConf->filters[i].mask[0] = 0x3F; /* LSB ETH_ALEN bits */ + + pmConf->wakeUpEvents |= VMXNET3_PM_WAKEUP_FILTER; + i++; + } + + if (adapter->wol & WAKE_ARP) { + in_dev = in_dev_get(netdev); + if (!in_dev) + goto skip_arp; + + ifa = (struct in_ifaddr *)in_dev->ifa_list; + if (!ifa) + goto skip_arp; + + pmConf->filters[i].patternSize = ETH_HLEN + /* Ethernet header*/ + sizeof(struct arphdr) + /* ARP header */ + 2 * ETH_ALEN + /* 2 Ethernet addresses*/ + 2 * sizeof(u32); /*2 IPv4 addresses */ + pmConf->filters[i].maskSize = + (pmConf->filters[i].patternSize - 1) / 8 + 1; + + /* ETH_P_ARP in Ethernet header. */ + ehdr = (struct ethhdr *)pmConf->filters[i].pattern; + ehdr->h_proto = htons(ETH_P_ARP); + + /* ARPOP_REQUEST in ARP header. */ + ahdr = (struct arphdr *)&pmConf->filters[i].pattern[ETH_HLEN]; + ahdr->ar_op = htons(ARPOP_REQUEST); + arpreq = (u8 *)(ahdr + 1); + + /* The Unicast IPv4 address in 'tip' field. */ + arpreq += 2 * ETH_ALEN + sizeof(u32); + *(u32 *)arpreq = ifa->ifa_address; + + /* The mask for the relevant bits. */ + pmConf->filters[i].mask[0] = 0x00; + pmConf->filters[i].mask[1] = 0x30; /* ETH_P_ARP */ + pmConf->filters[i].mask[2] = 0x30; /* ARPOP_REQUEST */ + pmConf->filters[i].mask[3] = 0x00; + pmConf->filters[i].mask[4] = 0xC0; /* IPv4 TIP */ + pmConf->filters[i].mask[5] = 0x03; /* IPv4 TIP */ + in_dev_put(in_dev); + + pmConf->wakeUpEvents |= VMXNET3_PM_WAKEUP_FILTER; + i++; + } + +skip_arp: + if (adapter->wol & WAKE_MAGIC) + pmConf->wakeUpEvents |= VMXNET3_PM_WAKEUP_MAGIC; + + pmConf->numFilters = i; + + adapter->shared->devRead.pmConfDesc.confVer = 1; + adapter->shared->devRead.pmConfDesc.confLen = sizeof(*pmConf); + adapter->shared->devRead.pmConfDesc.confPA = virt_to_phys(pmConf); + + VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, + VMXNET3_CMD_UPDATE_PMCFG); + + pci_save_state(pdev); + pci_enable_wake(pdev, pci_choose_state(pdev, PMSG_SUSPEND), + adapter->wol); + pci_disable_device(pdev); + pci_set_power_state(pdev, pci_choose_state(pdev, PMSG_SUSPEND)); + + return 0; +} + + +static int +vmxnet3_resume(struct device *device) +{ + int err; + struct pci_dev *pdev = to_pci_dev(device); + struct net_device *netdev = pci_get_drvdata(pdev); + struct vmxnet3_adapter *adapter = netdev_priv(netdev); + struct Vmxnet3_PMConf *pmConf; + + if (!netif_running(netdev)) + return 0; + + /* Destroy wake-up filters. */ + pmConf = adapter->pm_conf; + memset(pmConf, 0, sizeof(*pmConf)); + + adapter->shared->devRead.pmConfDesc.confVer = 1; + adapter->shared->devRead.pmConfDesc.confLen = sizeof(*pmConf); + adapter->shared->devRead.pmConfDesc.confPA = virt_to_phys(pmConf); + + netif_device_attach(netdev); + pci_set_power_state(pdev, PCI_D0); + pci_restore_state(pdev); + err = pci_enable_device_mem(pdev); + if (err != 0) + return err; + + pci_enable_wake(pdev, PCI_D0, 0); + + VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, + VMXNET3_CMD_UPDATE_PMCFG); + vmxnet3_alloc_intr_resources(adapter); + vmxnet3_request_irqs(adapter); + vmxnet3_enable_all_intrs(adapter); + + return 0; +} + +static struct dev_pm_ops vmxnet3_pm_ops = { + .suspend = vmxnet3_suspend, + .resume = vmxnet3_resume, +}; +#endif + +static struct pci_driver vmxnet3_driver = { + .name = vmxnet3_driver_name, + .id_table = vmxnet3_pciid_table, + .probe = vmxnet3_probe_device, + .remove = __devexit_p(vmxnet3_remove_device), +#ifdef CONFIG_PM + .driver.pm = &vmxnet3_pm_ops, +#endif +}; + + +static int __init +vmxnet3_init_module(void) +{ + printk(KERN_INFO "%s - version %s\n", VMXNET3_DRIVER_DESC, + VMXNET3_DRIVER_VERSION_REPORT); + return pci_register_driver(&vmxnet3_driver); +} + +module_init(vmxnet3_init_module); + + +static void +vmxnet3_exit_module(void) +{ + pci_unregister_driver(&vmxnet3_driver); +} + +module_exit(vmxnet3_exit_module); + +MODULE_AUTHOR("VMware, Inc."); +MODULE_DESCRIPTION(VMXNET3_DRIVER_DESC); +MODULE_LICENSE("GPL v2"); +MODULE_VERSION(VMXNET3_DRIVER_VERSION_STRING); diff --git a/drivers/net/vmxnet3/vmxnet3_ethtool.c b/drivers/net/vmxnet3/vmxnet3_ethtool.c new file mode 100644 index 0000000..c2c15e4 --- /dev/null +++ b/drivers/net/vmxnet3/vmxnet3_ethtool.c @@ -0,0 +1,566 @@ +/* + * Linux driver for VMware's vmxnet3 ethernet NIC. + * + * Copyright (C) 2008-2009, VMware, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; version 2 of the License and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * Maintained by: Shreyas Bhatewara <pv-drivers@vmware.com> + * + */ + + +#include "vmxnet3_int.h" + +struct vmxnet3_stat_desc { + char desc[ETH_GSTRING_LEN]; + int offset; +}; + + +static u32 +vmxnet3_get_rx_csum(struct net_device *netdev) +{ + struct vmxnet3_adapter *adapter = netdev_priv(netdev); + return adapter->rxcsum; +} + + +static int +vmxnet3_set_rx_csum(struct net_device *netdev, u32 val) +{ + struct vmxnet3_adapter *adapter = netdev_priv(netdev); + + if (adapter->rxcsum != val) { + adapter->rxcsum = val; + if (netif_running(netdev)) { + if (val) + adapter->shared->devRead.misc.uptFeatures |= + UPT1_F_RXCSUM; + else + adapter->shared->devRead.misc.uptFeatures &= + ~UPT1_F_RXCSUM; + + VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, + VMXNET3_CMD_UPDATE_FEATURE); + } + } + return 0; +} + + +/* per tq stats maintained by the device */ +static const struct vmxnet3_stat_desc +vmxnet3_tq_dev_stats[] = { + /* description, offset */ + { "TSO pkts tx", offsetof(struct UPT1_TxStats, TSOPktsTxOK) }, + { "TSO bytes tx", offsetof(struct UPT1_TxStats, TSOBytesTxOK) }, + { "ucast pkts tx", offsetof(struct UPT1_TxStats, ucastPktsTxOK) }, + { "ucast bytes tx", offsetof(struct UPT1_TxStats, ucastBytesTxOK) }, + { "mcast pkts tx", offsetof(struct UPT1_TxStats, mcastPktsTxOK) }, + { "mcast bytes tx", offsetof(struct UPT1_TxStats, mcastBytesTxOK) }, + { "bcast pkts tx", offsetof(struct UPT1_TxStats, bcastPktsTxOK) }, + { "bcast bytes tx", offsetof(struct UPT1_TxStats, bcastBytesTxOK) }, + { "pkts tx err", offsetof(struct UPT1_TxStats, pktsTxError) }, + { "pkts tx discard", offsetof(struct UPT1_TxStats, pktsTxDiscard) }, +}; + +/* per tq stats maintained by the driver */ +static const struct vmxnet3_stat_desc +vmxnet3_tq_driver_stats[] = { + /* description, offset */ + {"drv dropped tx total", offsetof(struct vmxnet3_tq_driver_stats, + drop_total) }, + { " too many frags", offsetof(struct vmxnet3_tq_driver_stats, + drop_too_many_frags) }, + { " giant hdr", offsetof(struct vmxnet3_tq_driver_stats, + drop_oversized_hdr) }, + { " hdr err", offsetof(struct vmxnet3_tq_driver_stats, + drop_hdr_inspect_err) }, + { " tso", offsetof(struct vmxnet3_tq_driver_stats, + drop_tso) }, + { "ring full", offsetof(struct vmxnet3_tq_driver_stats, + tx_ring_full) }, + { "pkts linearized", offsetof(struct vmxnet3_tq_driver_stats, + linearized) }, + { "hdr cloned", offsetof(struct vmxnet3_tq_driver_stats, + copy_skb_header) }, + { "giant hdr", offsetof(struct vmxnet3_tq_driver_stats, + oversized_hdr) }, +}; + +/* per rq stats maintained by the device */ +static const struct vmxnet3_stat_desc +vmxnet3_rq_dev_stats[] = { + { "LRO pkts rx", offsetof(struct UPT1_RxStats, LROPktsRxOK) }, + { "LRO byte rx", offsetof(struct UPT1_RxStats, LROBytesRxOK) }, + { "ucast pkts rx", offsetof(struct UPT1_RxStats, ucastPktsRxOK) }, + { "ucast bytes rx", offsetof(struct UPT1_RxStats, ucastBytesRxOK) }, + { "mcast pkts rx", offsetof(struct UPT1_RxStats, mcastPktsRxOK) }, + { "mcast bytes rx", offsetof(struct UPT1_RxStats, mcastBytesRxOK) }, + { "bcast pkts rx", offsetof(struct UPT1_RxStats, bcastPktsRxOK) }, + { "bcast bytes rx", offsetof(struct UPT1_RxStats, bcastBytesRxOK) }, + { "pkts rx out of buf", offsetof(struct UPT1_RxStats, pktsRxOutOfBuf) }, + { "pkts rx err", offsetof(struct UPT1_RxStats, pktsRxError) }, +}; + +/* per rq stats maintained by the driver */ +static const struct vmxnet3_stat_desc +vmxnet3_rq_driver_stats[] = { + /* description, offset */ + { "drv dropped rx total", offsetof(struct vmxnet3_rq_driver_stats, + drop_total) }, + { " err", offsetof(struct vmxnet3_rq_driver_stats, + drop_err) }, + { " fcs", offsetof(struct vmxnet3_rq_driver_stats, + drop_fcs) }, + { "rx buf alloc fail", offsetof(struct vmxnet3_rq_driver_stats, + rx_buf_alloc_failure) }, +}; + +/* gloabl stats maintained by the driver */ +static const struct vmxnet3_stat_desc +vmxnet3_global_stats[] = { + /* description, offset */ + { "tx timeout count", offsetof(struct vmxnet3_adapter, + tx_timeout_count) } +}; + + +struct net_device_stats * +vmxnet3_get_stats(struct net_device *netdev) +{ + struct vmxnet3_adapter *adapter; + struct vmxnet3_tq_driver_stats *drvTxStats; + struct vmxnet3_rq_driver_stats *drvRxStats; + struct UPT1_TxStats *devTxStats; + struct UPT1_RxStats *devRxStats; + struct net_device_stats *net_stats = &netdev->stats; + + adapter = netdev_priv(netdev); + + /* Collect the dev stats into the shared area */ + VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, VMXNET3_CMD_GET_STATS); + + /* Assuming that we have a single queue device */ + devTxStats = &adapter->tqd_start->stats; + devRxStats = &adapter->rqd_start->stats; + + /* Get access to the driver stats per queue */ + drvTxStats = &adapter->tx_queue.stats; + drvRxStats = &adapter->rx_queue.stats; + + memset(net_stats, 0, sizeof(*net_stats)); + + net_stats->rx_packets = devRxStats->ucastPktsRxOK + + devRxStats->mcastPktsRxOK + + devRxStats->bcastPktsRxOK; + + net_stats->tx_packets = devTxStats->ucastPktsTxOK + + devTxStats->mcastPktsTxOK + + devTxStats->bcastPktsTxOK; + + net_stats->rx_bytes = devRxStats->ucastBytesRxOK + + devRxStats->mcastBytesRxOK + + devRxStats->bcastBytesRxOK; + + net_stats->tx_bytes = devTxStats->ucastBytesTxOK + + devTxStats->mcastBytesTxOK + + devTxStats->bcastBytesTxOK; + + net_stats->rx_errors = devRxStats->pktsRxError; + net_stats->tx_errors = devTxStats->pktsTxError; + net_stats->rx_dropped = drvRxStats->drop_total; + net_stats->tx_dropped = drvTxStats->drop_total; + net_stats->multicast = devRxStats->mcastPktsRxOK; + + return net_stats; +} + +static int +vmxnet3_get_sset_count(struct net_device *netdev, int sset) +{ + switch (sset) { + case ETH_SS_STATS: + return ARRAY_SIZE(vmxnet3_tq_dev_stats) + + ARRAY_SIZE(vmxnet3_tq_driver_stats) + + ARRAY_SIZE(vmxnet3_rq_dev_stats) + + ARRAY_SIZE(vmxnet3_rq_driver_stats) + + ARRAY_SIZE(vmxnet3_global_stats); + default: + return -EOPNOTSUPP; + } +} + + +static int +vmxnet3_get_regs_len(struct net_device *netdev) +{ + return 20 * sizeof(u32); +} + + +static void +vmxnet3_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *drvinfo) +{ + struct vmxnet3_adapter *adapter = netdev_priv(netdev); + + strlcpy(drvinfo->driver, vmxnet3_driver_name, sizeof(drvinfo->driver)); + drvinfo->driver[sizeof(drvinfo->driver) - 1] = '\0'; + + strlcpy(drvinfo->version, VMXNET3_DRIVER_VERSION_REPORT, + sizeof(drvinfo->version)); + drvinfo->driver[sizeof(drvinfo->version) - 1] = '\0'; + + strlcpy(drvinfo->fw_version, "N/A", sizeof(drvinfo->fw_version)); + drvinfo->fw_version[sizeof(drvinfo->fw_version) - 1] = '\0'; + + strlcpy(drvinfo->bus_info, pci_name(adapter->pdev), + ETHTOOL_BUSINFO_LEN); + drvinfo->n_stats = vmxnet3_get_sset_count(netdev, ETH_SS_STATS); + drvinfo->testinfo_len = 0; + drvinfo->eedump_len = 0; + drvinfo->regdump_len = vmxnet3_get_regs_len(netdev); +} + + +static void +vmxnet3_get_strings(struct net_device *netdev, u32 stringset, u8 *buf) +{ + if (stringset == ETH_SS_STATS) { + int i; + + for (i = 0; i < ARRAY_SIZE(vmxnet3_tq_dev_stats); i++) { + memcpy(buf, vmxnet3_tq_dev_stats[i].desc, + ETH_GSTRING_LEN); + buf += ETH_GSTRING_LEN; + } + for (i = 0; i < ARRAY_SIZE(vmxnet3_tq_driver_stats); i++) { + memcpy(buf, vmxnet3_tq_driver_stats[i].desc, + ETH_GSTRING_LEN); + buf += ETH_GSTRING_LEN; + } + for (i = 0; i < ARRAY_SIZE(vmxnet3_rq_dev_stats); i++) { + memcpy(buf, vmxnet3_rq_dev_stats[i].desc, + ETH_GSTRING_LEN); + buf += ETH_GSTRING_LEN; + } + for (i = 0; i < ARRAY_SIZE(vmxnet3_rq_driver_stats); i++) { + memcpy(buf, vmxnet3_rq_driver_stats[i].desc, + ETH_GSTRING_LEN); + buf += ETH_GSTRING_LEN; + } + for (i = 0; i < ARRAY_SIZE(vmxnet3_global_stats); i++) { + memcpy(buf, vmxnet3_global_stats[i].desc, + ETH_GSTRING_LEN); + buf += ETH_GSTRING_LEN; + } + } +} + +static u32 +vmxnet3_get_flags(struct net_device *netdev) { + return netdev->features; +} + +static int +vmxnet3_set_flags(struct net_device *netdev, u32 data) { + struct vmxnet3_adapter *adapter = netdev_priv(netdev); + u8 lro_requested = (data & ETH_FLAG_LRO) == 0 ? 0 : 1; + u8 lro_present = (netdev->features & NETIF_F_LRO) == 0 ? 0 : 1; + + if (lro_requested ^ lro_present) { + /* toggle the LRO feature*/ + netdev->features ^= NETIF_F_LRO; + + /* update harware LRO capability accordingly */ + if (lro_requested) + adapter->shared->devRead.misc.uptFeatures &= UPT1_F_LRO; + else + adapter->shared->devRead.misc.uptFeatures &= + ~UPT1_F_LRO; + VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, + VMXNET3_CMD_UPDATE_FEATURE); + } + return 0; +} + +static void +vmxnet3_get_ethtool_stats(struct net_device *netdev, + struct ethtool_stats *stats, u64 *buf) +{ + struct vmxnet3_adapter *adapter = netdev_priv(netdev); + u8 *base; + int i; + + VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, VMXNET3_CMD_GET_STATS); + + /* this does assume each counter is 64-bit wide */ + + base = (u8 *)&adapter->tqd_start->stats; + for (i = 0; i < ARRAY_SIZE(vmxnet3_tq_dev_stats); i++) + *buf++ = *(u64 *)(base + vmxnet3_tq_dev_stats[i].offset); + + base = (u8 *)&adapter->tx_queue.stats; + for (i = 0; i < ARRAY_SIZE(vmxnet3_tq_driver_stats); i++) + *buf++ = *(u64 *)(base + vmxnet3_tq_driver_stats[i].offset); + + base = (u8 *)&adapter->rqd_start->stats; + for (i = 0; i < ARRAY_SIZE(vmxnet3_rq_dev_stats); i++) + *buf++ = *(u64 *)(base + vmxnet3_rq_dev_stats[i].offset); + + base = (u8 *)&adapter->rx_queue.stats; + for (i = 0; i < ARRAY_SIZE(vmxnet3_rq_driver_stats); i++) + *buf++ = *(u64 *)(base + vmxnet3_rq_driver_stats[i].offset); + + base = (u8 *)adapter; + for (i = 0; i < ARRAY_SIZE(vmxnet3_global_stats); i++) + *buf++ = *(u64 *)(base + vmxnet3_global_stats[i].offset); +} + + +static void +vmxnet3_get_regs(struct net_device *netdev, struct ethtool_regs *regs, void *p) +{ + struct vmxnet3_adapter *adapter = netdev_priv(netdev); + u32 *buf = p; + + memset(p, 0, vmxnet3_get_regs_len(netdev)); + + regs->version = 1; + + /* Update vmxnet3_get_regs_len if we want to dump more registers */ + + /* make each ring use multiple of 16 bytes */ + buf[0] = adapter->tx_queue.tx_ring.next2fill; + buf[1] = adapter->tx_queue.tx_ring.next2comp; + buf[2] = adapter->tx_queue.tx_ring.gen; + buf[3] = 0; + + buf[4] = adapter->tx_queue.comp_ring.next2proc; + buf[5] = adapter->tx_queue.comp_ring.gen; + buf[6] = adapter->tx_queue.stopped; + buf[7] = 0; + + buf[8] = adapter->rx_queue.rx_ring[0].next2fill; + buf[9] = adapter->rx_queue.rx_ring[0].next2comp; + buf[10] = adapter->rx_queue.rx_ring[0].gen; + buf[11] = 0; + + buf[12] = adapter->rx_queue.rx_ring[1].next2fill; + buf[13] = adapter->rx_queue.rx_ring[1].next2comp; + buf[14] = adapter->rx_queue.rx_ring[1].gen; + buf[15] = 0; + + buf[16] = adapter->rx_queue.comp_ring.next2proc; + buf[17] = adapter->rx_queue.comp_ring.gen; + buf[18] = 0; + buf[19] = 0; +} + + +static void +vmxnet3_get_wol(struct net_device *netdev, struct ethtool_wolinfo *wol) +{ + struct vmxnet3_adapter *adapter = netdev_priv(netdev); + + wol->supported = WAKE_UCAST | WAKE_ARP | WAKE_MAGIC; + wol->wolopts = adapter->wol; +} + + +static int +vmxnet3_set_wol(struct net_device *netdev, struct ethtool_wolinfo *wol) +{ + struct vmxnet3_adapter *adapter = netdev_priv(netdev); + + if (wol->wolopts & (WAKE_PHY | WAKE_MCAST | WAKE_BCAST | + WAKE_MAGICSECURE)) { + return -EOPNOTSUPP; + } + + adapter->wol = wol->wolopts; + + device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol); + + return 0; +} + + +static int +vmxnet3_get_settings(struct net_device *netdev, struct ethtool_cmd *ecmd) +{ + struct vmxnet3_adapter *adapter = netdev_priv(netdev); + + ecmd->supported = SUPPORTED_10000baseT_Full | SUPPORTED_1000baseT_Full | + SUPPORTED_TP; + ecmd->advertising = ADVERTISED_TP; + ecmd->port = PORT_TP; + ecmd->transceiver = XCVR_INTERNAL; + + if (adapter->link_speed) { + ecmd->speed = adapter->link_speed; + ecmd->duplex = DUPLEX_FULL; + } else { + ecmd->speed = -1; + ecmd->duplex = -1; + } + return 0; +} + + +static void +vmxnet3_get_ringparam(struct net_device *netdev, + struct ethtool_ringparam *param) +{ + struct vmxnet3_adapter *adapter = netdev_priv(netdev); + + param->rx_max_pending = VMXNET3_RX_RING_MAX_SIZE; + param->tx_max_pending = VMXNET3_TX_RING_MAX_SIZE; + param->rx_mini_max_pending = 0; + param->rx_jumbo_max_pending = 0; + + param->rx_pending = adapter->rx_queue.rx_ring[0].size; + param->tx_pending = adapter->tx_queue.tx_ring.size; + param->rx_mini_pending = 0; + param->rx_jumbo_pending = 0; +} + + +static int +vmxnet3_set_ringparam(struct net_device *netdev, + struct ethtool_ringparam *param) +{ + struct vmxnet3_adapter *adapter = netdev_priv(netdev); + u32 new_tx_ring_size, new_rx_ring_size; + u32 sz; + int err = 0; + + if (param->tx_pending == 0 || param->tx_pending > + VMXNET3_TX_RING_MAX_SIZE) + return -EINVAL; + + if (param->rx_pending == 0 || param->rx_pending > + VMXNET3_RX_RING_MAX_SIZE) + return -EINVAL; + + + /* round it up to a multiple of VMXNET3_RING_SIZE_ALIGN */ + new_tx_ring_size = (param->tx_pending + VMXNET3_RING_SIZE_MASK) & + ~VMXNET3_RING_SIZE_MASK; + new_tx_ring_size = min_t(u32, new_tx_ring_size, + VMXNET3_TX_RING_MAX_SIZE); + if (new_tx_ring_size > VMXNET3_TX_RING_MAX_SIZE || (new_tx_ring_size % + VMXNET3_RING_SIZE_ALIGN) != 0) + return -EINVAL; + + /* ring0 has to be a multiple of + * rx_buf_per_pkt * VMXNET3_RING_SIZE_ALIGN + */ + sz = adapter->rx_buf_per_pkt * VMXNET3_RING_SIZE_ALIGN; + new_rx_ring_size = (param->rx_pending + sz - 1) / sz * sz; + new_rx_ring_size = min_t(u32, new_rx_ring_size, + VMXNET3_RX_RING_MAX_SIZE / sz * sz); + if (new_rx_ring_size > VMXNET3_RX_RING_MAX_SIZE || (new_rx_ring_size % + sz) != 0) + return -EINVAL; + + if (new_tx_ring_size == adapter->tx_queue.tx_ring.size && + new_rx_ring_size == adapter->rx_queue.rx_ring[0].size) { + return 0; + } + + /* + * Reset_work may be in the middle of resetting the device, wait for its + * completion. + */ + while (test_and_set_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state)) + msleep(1); + + if (netif_running(netdev)) { + vmxnet3_quiesce_dev(adapter); + vmxnet3_reset_dev(adapter); + + /* recreate the rx queue and the tx queue based on the + * new sizes */ + vmxnet3_tq_destroy(&adapter->tx_queue, adapter); + vmxnet3_rq_destroy(&adapter->rx_queue, adapter); + + err = vmxnet3_create_queues(adapter, new_tx_ring_size, + new_rx_ring_size, VMXNET3_DEF_RX_RING_SIZE); + if (err) { + /* failed, most likely because of OOM, try default + * size */ + printk(KERN_ERR "%s: failed to apply new sizes, try the" + " default ones\n", netdev->name); + err = vmxnet3_create_queues(adapter, + VMXNET3_DEF_TX_RING_SIZE, + VMXNET3_DEF_RX_RING_SIZE, + VMXNET3_DEF_RX_RING_SIZE); + if (err) { + printk(KERN_ERR "%s: failed to create queues " + "with default sizes. Closing it\n", + netdev->name); + goto out; + } + } + + err = vmxnet3_activate_dev(adapter); + if (err) + printk(KERN_ERR "%s: failed to re-activate, error %d." + " Closing it\n", netdev->name, err); + } + +out: + clear_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state); + if (err) + vmxnet3_force_close(adapter); + + return err; +} + + +static struct ethtool_ops vmxnet3_ethtool_ops = { + .get_settings = vmxnet3_get_settings, + .get_drvinfo = vmxnet3_get_drvinfo, + .get_regs_len = vmxnet3_get_regs_len, + .get_regs = vmxnet3_get_regs, + .get_wol = vmxnet3_get_wol, + .set_wol = vmxnet3_set_wol, + .get_link = ethtool_op_get_link, + .get_rx_csum = vmxnet3_get_rx_csum, + .set_rx_csum = vmxnet3_set_rx_csum, + .get_tx_csum = ethtool_op_get_tx_csum, + .set_tx_csum = ethtool_op_set_tx_hw_csum, + .get_sg = ethtool_op_get_sg, + .set_sg = ethtool_op_set_sg, + .get_tso = ethtool_op_get_tso, + .set_tso = ethtool_op_set_tso, + .get_strings = vmxnet3_get_strings, + .get_flags = vmxnet3_get_flags, + .set_flags = vmxnet3_set_flags, + .get_sset_count = vmxnet3_get_sset_count, + .get_ethtool_stats = vmxnet3_get_ethtool_stats, + .get_ringparam = vmxnet3_get_ringparam, + .set_ringparam = vmxnet3_set_ringparam, +}; + +void vmxnet3_set_ethtool_ops(struct net_device *netdev) +{ + SET_ETHTOOL_OPS(netdev, &vmxnet3_ethtool_ops); +} diff --git a/drivers/net/vmxnet3/vmxnet3_int.h b/drivers/net/vmxnet3/vmxnet3_int.h new file mode 100644 index 0000000..6bb9157 --- /dev/null +++ b/drivers/net/vmxnet3/vmxnet3_int.h @@ -0,0 +1,389 @@ +/* + * Linux driver for VMware's vmxnet3 ethernet NIC. + * + * Copyright (C) 2008-2009, VMware, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; version 2 of the License and no later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * Maintained by: Shreyas Bhatewara <pv-drivers@vmware.com> + * + */ + +#ifndef _VMXNET3_INT_H +#define _VMXNET3_INT_H + +#include <linux/types.h> +#include <linux/ethtool.h> +#include <linux/delay.h> +#include <linux/netdevice.h> +#include <linux/pci.h> +#include <linux/ethtool.h> +#include <linux/compiler.h> +#include <linux/module.h> +#include <linux/moduleparam.h> +#include <linux/slab.h> +#include <linux/spinlock.h> +#include <linux/ioport.h> +#include <linux/highmem.h> +#include <linux/init.h> +#include <linux/timer.h> +#include <linux/skbuff.h> +#include <linux/interrupt.h> +#include <linux/workqueue.h> +#include <linux/uaccess.h> +#include <asm/dma.h> +#include <asm/page.h> + +#include <linux/tcp.h> +#include <linux/udp.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/in.h> +#include <linux/etherdevice.h> +#include <asm/checksum.h> +#include <linux/if_vlan.h> +#include <linux/if_arp.h> +#include <linux/inetdevice.h> +#include <linux/dst.h> + +#include "vmxnet3_defs.h" + +#ifdef DEBUG +# define VMXNET3_DRIVER_VERSION_REPORT VMXNET3_DRIVER_VERSION_STRING"-NAPI(debug)" +#else +# define VMXNET3_DRIVER_VERSION_REPORT VMXNET3_DRIVER_VERSION_STRING"-NAPI" +#endif + + +/* + * Version numbers + */ +#define VMXNET3_DRIVER_VERSION_STRING "1.0.5.0-k" + +/* a 32-bit int, each byte encode a verion number in VMXNET3_DRIVER_VERSION */ +#define VMXNET3_DRIVER_VERSION_NUM 0x01000500 + + +/* + * Capabilities + */ + +enum { + VMNET_CAP_SG = 0x0001, /* Can do scatter-gather transmits. */ + VMNET_CAP_IP4_CSUM = 0x0002, /* Can checksum only TCP/UDP over + * IPv4 */ + VMNET_CAP_HW_CSUM = 0x0004, /* Can checksum all packets. */ + VMNET_CAP_HIGH_DMA = 0x0008, /* Can DMA to high memory. */ + VMNET_CAP_TOE = 0x0010, /* Supports TCP/IP offload. */ + VMNET_CAP_TSO = 0x0020, /* Supports TCP Segmentation + * offload */ + VMNET_CAP_SW_TSO = 0x0040, /* Supports SW TCP Segmentation */ + VMNET_CAP_VMXNET_APROM = 0x0080, /* Vmxnet APROM support */ + VMNET_CAP_HW_TX_VLAN = 0x0100, /* Can we do VLAN tagging in HW */ + VMNET_CAP_HW_RX_VLAN = 0x0200, /* Can we do VLAN untagging in HW */ + VMNET_CAP_SW_VLAN = 0x0400, /* VLAN tagging/untagging in SW */ + VMNET_CAP_WAKE_PCKT_RCV = 0x0800, /* Can wake on network packet recv? */ + VMNET_CAP_ENABLE_INT_INLINE = 0x1000, /* Enable Interrupt Inline */ + VMNET_CAP_ENABLE_HEADER_COPY = 0x2000, /* copy header for vmkernel */ + VMNET_CAP_TX_CHAIN = 0x4000, /* Guest can use multiple tx entries + * for a pkt */ + VMNET_CAP_RX_CHAIN = 0x8000, /* pkt can span multiple rx entries */ + VMNET_CAP_LPD = 0x10000, /* large pkt delivery */ + VMNET_CAP_BPF = 0x20000, /* BPF Support in VMXNET Virtual HW*/ + VMNET_CAP_SG_SPAN_PAGES = 0x40000, /* Scatter-gather can span multiple*/ + /* pages transmits */ + VMNET_CAP_IP6_CSUM = 0x80000, /* Can do IPv6 csum offload. */ + VMNET_CAP_TSO6 = 0x100000, /* TSO seg. offload for IPv6 pkts. */ + VMNET_CAP_TSO256k = 0x200000, /* Can do TSO seg offload for */ + /* pkts up to 256kB. */ + VMNET_CAP_UPT = 0x400000 /* Support UPT */ +}; + +/* + * PCI vendor and device IDs. + */ +#define PCI_VENDOR_ID_VMWARE 0x15AD +#define PCI_DEVICE_ID_VMWARE_VMXNET3 0x07B0 +#define MAX_ETHERNET_CARDS 10 +#define MAX_PCI_PASSTHRU_DEVICE 6 + +struct vmxnet3_cmd_ring { + union Vmxnet3_GenericDesc *base; + u32 size; + u32 next2fill; + u32 next2comp; + u8 gen; + dma_addr_t basePA; +}; + +static inline void +vmxnet3_cmd_ring_adv_next2fill(struct vmxnet3_cmd_ring *ring) +{ + ring->next2fill++; + if (unlikely(ring->next2fill == ring->size)) { + ring->next2fill = 0; + VMXNET3_FLIP_RING_GEN(ring->gen); + } +} + +static inline void +vmxnet3_cmd_ring_adv_next2comp(struct vmxnet3_cmd_ring *ring) +{ + VMXNET3_INC_RING_IDX_ONLY(ring->next2comp, ring->size); +} + +static inline int +vmxnet3_cmd_ring_desc_avail(struct vmxnet3_cmd_ring *ring) +{ + return (ring->next2comp > ring->next2fill ? 0 : ring->size) + + ring->next2comp - ring->next2fill - 1; +} + +struct vmxnet3_comp_ring { + union Vmxnet3_GenericDesc *base; + u32 size; + u32 next2proc; + u8 gen; + u8 intr_idx; + dma_addr_t basePA; +}; + +static inline void +vmxnet3_comp_ring_adv_next2proc(struct vmxnet3_comp_ring *ring) +{ + ring->next2proc++; + if (unlikely(ring->next2proc == ring->size)) { + ring->next2proc = 0; + VMXNET3_FLIP_RING_GEN(ring->gen); + } +} + +struct vmxnet3_tx_data_ring { + struct Vmxnet3_TxDataDesc *base; + u32 size; + dma_addr_t basePA; +}; + +enum vmxnet3_buf_map_type { + VMXNET3_MAP_INVALID = 0, + VMXNET3_MAP_NONE, + VMXNET3_MAP_SINGLE, + VMXNET3_MAP_PAGE, +}; + +struct vmxnet3_tx_buf_info { + u32 map_type; + u16 len; + u16 sop_idx; + dma_addr_t dma_addr; + struct sk_buff *skb; +}; + +struct vmxnet3_tq_driver_stats { + u64 drop_total; /* # of pkts dropped by the driver, the + * counters below track droppings due to + * different reasons + */ + u64 drop_too_many_frags; + u64 drop_oversized_hdr; + u64 drop_hdr_inspect_err; + u64 drop_tso; + + u64 tx_ring_full; + u64 linearized; /* # of pkts linearized */ + u64 copy_skb_header; /* # of times we have to copy skb header */ + u64 oversized_hdr; +}; + +struct vmxnet3_tx_ctx { + bool ipv4; + u16 mss; + u32 eth_ip_hdr_size; /* only valid for pkts requesting tso or csum + * offloading + */ + u32 l4_hdr_size; /* only valid if mss != 0 */ + u32 copy_size; /* # of bytes copied into the data ring */ + union Vmxnet3_GenericDesc *sop_txd; + union Vmxnet3_GenericDesc *eop_txd; +}; + +struct vmxnet3_tx_queue { + spinlock_t tx_lock; + struct vmxnet3_cmd_ring tx_ring; + struct vmxnet3_tx_buf_info *buf_info; + struct vmxnet3_tx_data_ring data_ring; + struct vmxnet3_comp_ring comp_ring; + struct Vmxnet3_TxQueueCtrl *shared; + struct vmxnet3_tq_driver_stats stats; + bool stopped; + int num_stop; /* # of times the queue is + * stopped */ +} __attribute__((__aligned__(SMP_CACHE_BYTES))); + +enum vmxnet3_rx_buf_type { + VMXNET3_RX_BUF_NONE = 0, + VMXNET3_RX_BUF_SKB = 1, + VMXNET3_RX_BUF_PAGE = 2 +}; + +struct vmxnet3_rx_buf_info { + enum vmxnet3_rx_buf_type buf_type; + u16 len; + union { + struct sk_buff *skb; + struct page *page; + }; + dma_addr_t dma_addr; +}; + +struct vmxnet3_rx_ctx { + struct sk_buff *skb; + u32 sop_idx; +}; + +struct vmxnet3_rq_driver_stats { + u64 drop_total; + u64 drop_err; + u64 drop_fcs; + u64 rx_buf_alloc_failure; +}; + +struct vmxnet3_rx_queue { + struct vmxnet3_cmd_ring rx_ring[2]; + struct vmxnet3_comp_ring comp_ring; + struct vmxnet3_rx_ctx rx_ctx; + u32 qid; /* rqID in RCD for buffer from 1st ring */ + u32 qid2; /* rqID in RCD for buffer from 2nd ring */ + u32 uncommitted[2]; /* # of buffers allocated since last RXPROD + * update */ + struct vmxnet3_rx_buf_info *buf_info[2]; + struct Vmxnet3_RxQueueCtrl *shared; + struct vmxnet3_rq_driver_stats stats; +} __attribute__((__aligned__(SMP_CACHE_BYTES))); + +#define VMXNET3_LINUX_MAX_MSIX_VECT 1 + +struct vmxnet3_intr { + enum vmxnet3_intr_mask_mode mask_mode; + enum vmxnet3_intr_type type; /* MSI-X, MSI, or INTx? */ + u8 num_intrs; /* # of intr vectors */ + u8 event_intr_idx; /* idx of the intr vector for event */ + u8 mod_levels[VMXNET3_LINUX_MAX_MSIX_VECT]; /* moderation level */ +#ifdef CONFIG_PCI_MSI + struct msix_entry msix_entries[VMXNET3_LINUX_MAX_MSIX_VECT]; +#endif +}; + +#define VMXNET3_STATE_BIT_RESETTING 0 +#define VMXNET3_STATE_BIT_QUIESCED 1 +struct vmxnet3_adapter { + struct vmxnet3_tx_queue tx_queue; + struct vmxnet3_rx_queue rx_queue; + struct napi_struct napi; + struct vlan_group *vlan_grp; + + struct vmxnet3_intr intr; + + struct Vmxnet3_DriverShared *shared; + struct Vmxnet3_PMConf *pm_conf; + struct Vmxnet3_TxQueueDesc *tqd_start; /* first tx queue desc */ + struct Vmxnet3_RxQueueDesc *rqd_start; /* first rx queue desc */ + struct net_device *netdev; + struct pci_dev *pdev; + + u8 *hw_addr0; /* for BAR 0 */ + u8 *hw_addr1; /* for BAR 1 */ + + /* feature control */ + bool rxcsum; + bool lro; + bool jumbo_frame; + + /* rx buffer related */ + unsigned skb_buf_size; + int rx_buf_per_pkt; /* only apply to the 1st ring */ + dma_addr_t shared_pa; + dma_addr_t queue_desc_pa; + + /* Wake-on-LAN */ + u32 wol; + + /* Link speed */ + u32 link_speed; /* in mbps */ + + u64 tx_timeout_count; + struct work_struct work; + + unsigned long state; /* VMXNET3_STATE_BIT_xxx */ + + int dev_number; +}; + +#define VMXNET3_WRITE_BAR0_REG(adapter, reg, val) \ + writel((val), (adapter)->hw_addr0 + (reg)) +#define VMXNET3_READ_BAR0_REG(adapter, reg) \ + readl((adapter)->hw_addr0 + (reg)) + +#define VMXNET3_WRITE_BAR1_REG(adapter, reg, val) \ + writel((val), (adapter)->hw_addr1 + (reg)) +#define VMXNET3_READ_BAR1_REG(adapter, reg) \ + readl((adapter)->hw_addr1 + (reg)) + +#define VMXNET3_WAKE_QUEUE_THRESHOLD(tq) (5) +#define VMXNET3_RX_ALLOC_THRESHOLD(rq, ring_idx, adapter) \ + ((rq)->rx_ring[ring_idx].size >> 3) + +#define VMXNET3_GET_ADDR_LO(dma) ((u32)(dma)) +#define VMXNET3_GET_ADDR_HI(dma) ((u32)(((u64)(dma)) >> 32)) + +/* must be a multiple of VMXNET3_RING_SIZE_ALIGN */ +#define VMXNET3_DEF_TX_RING_SIZE 512 +#define VMXNET3_DEF_RX_RING_SIZE 256 + +#define VMXNET3_MAX_ETH_HDR_SIZE 22 +#define VMXNET3_MAX_SKB_BUF_SIZE (3*1024) + +int +vmxnet3_quiesce_dev(struct vmxnet3_adapter *adapter); + +int +vmxnet3_activate_dev(struct vmxnet3_adapter *adapter); + +void +vmxnet3_force_close(struct vmxnet3_adapter *adapter); + +void +vmxnet3_reset_dev(struct vmxnet3_adapter *adapter); + +void +vmxnet3_tq_destroy(struct vmxnet3_tx_queue *tq, + struct vmxnet3_adapter *adapter); + +void +vmxnet3_rq_destroy(struct vmxnet3_rx_queue *rq, + struct vmxnet3_adapter *adapter); + +int +vmxnet3_create_queues(struct vmxnet3_adapter *adapter, + u32 tx_ring_size, u32 rx_ring_size, u32 rx_ring2_size); + +extern void vmxnet3_set_ethtool_ops(struct net_device *netdev); +extern struct net_device_stats *vmxnet3_get_stats(struct net_device *netdev); + +extern char vmxnet3_driver_name[]; +#endif diff --git a/drivers/net/wan/hdlc_cisco.c b/drivers/net/wan/hdlc_cisco.c index cf5fd17..f1bff98 100644 --- a/drivers/net/wan/hdlc_cisco.c +++ b/drivers/net/wan/hdlc_cisco.c @@ -58,8 +58,7 @@ struct cisco_state { spinlock_t lock; unsigned long last_poll; int up; - int request_sent; - u32 txseq; /* TX sequence number */ + u32 txseq; /* TX sequence number, 0 = none */ u32 rxseq; /* RX sequence number */ }; @@ -163,6 +162,7 @@ static int cisco_rx(struct sk_buff *skb) struct cisco_packet *cisco_data; struct in_device *in_dev; __be32 addr, mask; + u32 ack; if (skb->len < sizeof(struct hdlc_header)) goto rx_error; @@ -223,8 +223,10 @@ static int cisco_rx(struct sk_buff *skb) case CISCO_KEEPALIVE_REQ: spin_lock(&st->lock); st->rxseq = ntohl(cisco_data->par1); - if (st->request_sent && - ntohl(cisco_data->par2) == st->txseq) { + ack = ntohl(cisco_data->par2); + if (ack && (ack == st->txseq || + /* our current REQ may be in transit */ + ack == st->txseq - 1)) { st->last_poll = jiffies; if (!st->up) { u32 sec, min, hrs, days; @@ -275,7 +277,6 @@ static void cisco_timer(unsigned long arg) cisco_keepalive_send(dev, CISCO_KEEPALIVE_REQ, htonl(++st->txseq), htonl(st->rxseq)); - st->request_sent = 1; spin_unlock(&st->lock); st->timer.expires = jiffies + st->settings.interval * HZ; @@ -293,9 +294,7 @@ static void cisco_start(struct net_device *dev) unsigned long flags; spin_lock_irqsave(&st->lock, flags); - st->up = 0; - st->request_sent = 0; - st->txseq = st->rxseq = 0; + st->up = st->txseq = st->rxseq = 0; spin_unlock_irqrestore(&st->lock, flags); init_timer(&st->timer); @@ -317,8 +316,7 @@ static void cisco_stop(struct net_device *dev) spin_lock_irqsave(&st->lock, flags); netif_dormant_on(dev); - st->up = 0; - st->request_sent = 0; + st->up = st->txseq = 0; spin_unlock_irqrestore(&st->lock, flags); } diff --git a/drivers/net/wireless/adm8211.h b/drivers/net/wireless/adm8211.h index 4f6ab13..b07e4d3 100644 --- a/drivers/net/wireless/adm8211.h +++ b/drivers/net/wireless/adm8211.h @@ -266,7 +266,7 @@ do { \ #define ADM8211_SYNCTL_CS1 (1 << 28) #define ADM8211_SYNCTL_CAL (1 << 27) #define ADM8211_SYNCTL_SELCAL (1 << 26) -#define ADM8211_SYNCTL_RFtype ((1 << 24) || (1 << 23) || (1 << 22)) +#define ADM8211_SYNCTL_RFtype ((1 << 24) | (1 << 23) | (1 << 22)) #define ADM8211_SYNCTL_RFMD (1 << 22) #define ADM8211_SYNCTL_GENERAL (0x7 << 22) /* SYNCTL 21:0 Data (Si4126: 18-bit data, 4-bit address) */ diff --git a/drivers/net/wireless/b43/b43.h b/drivers/net/wireless/b43/b43.h index fa1549a..6607162 100644 --- a/drivers/net/wireless/b43/b43.h +++ b/drivers/net/wireless/b43/b43.h @@ -607,82 +607,7 @@ struct b43_qos_params { struct ieee80211_tx_queue_params p; }; -struct b43_wldev; - -/* Data structure for the WLAN parts (802.11 cores) of the b43 chip. */ -struct b43_wl { - /* Pointer to the active wireless device on this chip */ - struct b43_wldev *current_dev; - /* Pointer to the ieee80211 hardware data structure */ - struct ieee80211_hw *hw; - - /* Global driver mutex. Every operation must run with this mutex locked. */ - struct mutex mutex; - /* Hard-IRQ spinlock. This lock protects things used in the hard-IRQ - * handler, only. This basically is just the IRQ mask register. */ - spinlock_t hardirq_lock; - - /* The number of queues that were registered with the mac80211 subsystem - * initially. This is a backup copy of hw->queues in case hw->queues has - * to be dynamically lowered at runtime (Firmware does not support QoS). - * hw->queues has to be restored to the original value before unregistering - * from the mac80211 subsystem. */ - u16 mac80211_initially_registered_queues; - - /* We can only have one operating interface (802.11 core) - * at a time. General information about this interface follows. - */ - - struct ieee80211_vif *vif; - /* The MAC address of the operating interface. */ - u8 mac_addr[ETH_ALEN]; - /* Current BSSID */ - u8 bssid[ETH_ALEN]; - /* Interface type. (NL80211_IFTYPE_XXX) */ - int if_type; - /* Is the card operating in AP, STA or IBSS mode? */ - bool operating; - /* filter flags */ - unsigned int filter_flags; - /* Stats about the wireless interface */ - struct ieee80211_low_level_stats ieee_stats; - -#ifdef CONFIG_B43_HWRNG - struct hwrng rng; - bool rng_initialized; - char rng_name[30 + 1]; -#endif /* CONFIG_B43_HWRNG */ - - /* List of all wireless devices on this chip */ - struct list_head devlist; - u8 nr_devs; - - bool radiotap_enabled; - bool radio_enabled; - - /* The beacon we are currently using (AP or IBSS mode). */ - struct sk_buff *current_beacon; - bool beacon0_uploaded; - bool beacon1_uploaded; - bool beacon_templates_virgin; /* Never wrote the templates? */ - struct work_struct beacon_update_trigger; - - /* The current QOS parameters for the 4 queues. */ - struct b43_qos_params qos_params[4]; - - /* Work for adjustment of the transmission power. - * This is scheduled when we determine that the actual TX output - * power doesn't match what we want. */ - struct work_struct txpower_adjust_work; - - /* Packet transmit work */ - struct work_struct tx_work; - /* Queue of packets to be transmitted. */ - struct sk_buff_head tx_queue; - - /* The device LEDs. */ - struct b43_leds leds; -}; +struct b43_wl; /* The type of the firmware file. */ enum b43_firmware_file_type { @@ -824,6 +749,97 @@ struct b43_wldev { #endif }; +/* + * Include goes here to avoid a dependency problem. + * A better fix would be to integrate xmit.h into b43.h. + */ +#include "xmit.h" + +/* Data structure for the WLAN parts (802.11 cores) of the b43 chip. */ +struct b43_wl { + /* Pointer to the active wireless device on this chip */ + struct b43_wldev *current_dev; + /* Pointer to the ieee80211 hardware data structure */ + struct ieee80211_hw *hw; + + /* Global driver mutex. Every operation must run with this mutex locked. */ + struct mutex mutex; + /* Hard-IRQ spinlock. This lock protects things used in the hard-IRQ + * handler, only. This basically is just the IRQ mask register. */ + spinlock_t hardirq_lock; + + /* The number of queues that were registered with the mac80211 subsystem + * initially. This is a backup copy of hw->queues in case hw->queues has + * to be dynamically lowered at runtime (Firmware does not support QoS). + * hw->queues has to be restored to the original value before unregistering + * from the mac80211 subsystem. */ + u16 mac80211_initially_registered_queues; + + /* We can only have one operating interface (802.11 core) + * at a time. General information about this interface follows. + */ + + struct ieee80211_vif *vif; + /* The MAC address of the operating interface. */ + u8 mac_addr[ETH_ALEN]; + /* Current BSSID */ + u8 bssid[ETH_ALEN]; + /* Interface type. (NL80211_IFTYPE_XXX) */ + int if_type; + /* Is the card operating in AP, STA or IBSS mode? */ + bool operating; + /* filter flags */ + unsigned int filter_flags; + /* Stats about the wireless interface */ + struct ieee80211_low_level_stats ieee_stats; + +#ifdef CONFIG_B43_HWRNG + struct hwrng rng; + bool rng_initialized; + char rng_name[30 + 1]; +#endif /* CONFIG_B43_HWRNG */ + + /* List of all wireless devices on this chip */ + struct list_head devlist; + u8 nr_devs; + + bool radiotap_enabled; + bool radio_enabled; + + /* The beacon we are currently using (AP or IBSS mode). */ + struct sk_buff *current_beacon; + bool beacon0_uploaded; + bool beacon1_uploaded; + bool beacon_templates_virgin; /* Never wrote the templates? */ + struct work_struct beacon_update_trigger; + + /* The current QOS parameters for the 4 queues. */ + struct b43_qos_params qos_params[4]; + + /* Work for adjustment of the transmission power. + * This is scheduled when we determine that the actual TX output + * power doesn't match what we want. */ + struct work_struct txpower_adjust_work; + + /* Packet transmit work */ + struct work_struct tx_work; + /* Queue of packets to be transmitted. */ + struct sk_buff_head tx_queue; + + /* The device LEDs. */ + struct b43_leds leds; + +#ifdef CONFIG_B43_PIO + /* + * RX/TX header/tail buffers used by the frame transmit functions. + */ + struct b43_rxhdr_fw4 rxhdr; + struct b43_txhdr txhdr; + u8 rx_tail[4]; + u8 tx_tail[4]; +#endif /* CONFIG_B43_PIO */ +}; + static inline struct b43_wl *hw_to_b43_wl(struct ieee80211_hw *hw) { return hw->priv; diff --git a/drivers/net/wireless/b43/leds.c b/drivers/net/wireless/b43/leds.c index fbe3d4f..1e8dba4 100644 --- a/drivers/net/wireless/b43/leds.c +++ b/drivers/net/wireless/b43/leds.c @@ -348,9 +348,9 @@ void b43_leds_register(struct b43_wldev *dev) } } -void b43_leds_unregister(struct b43_wldev *dev) +void b43_leds_unregister(struct b43_wl *wl) { - struct b43_leds *leds = &dev->wl->leds; + struct b43_leds *leds = &wl->leds; b43_unregister_led(&leds->led_tx); b43_unregister_led(&leds->led_rx); diff --git a/drivers/net/wireless/b43/leds.h b/drivers/net/wireless/b43/leds.h index 9592e4c..4c56187 100644 --- a/drivers/net/wireless/b43/leds.h +++ b/drivers/net/wireless/b43/leds.h @@ -60,7 +60,7 @@ enum b43_led_behaviour { }; void b43_leds_register(struct b43_wldev *dev); -void b43_leds_unregister(struct b43_wldev *dev); +void b43_leds_unregister(struct b43_wl *wl); void b43_leds_init(struct b43_wldev *dev); void b43_leds_exit(struct b43_wldev *dev); void b43_leds_stop(struct b43_wldev *dev); @@ -76,7 +76,7 @@ struct b43_leds { static inline void b43_leds_register(struct b43_wldev *dev) { } -static inline void b43_leds_unregister(struct b43_wldev *dev) +static inline void b43_leds_unregister(struct b43_wl *wl) { } static inline void b43_leds_init(struct b43_wldev *dev) diff --git a/drivers/net/wireless/b43/main.c b/drivers/net/wireless/b43/main.c index 9b907a3..df6b26a 100644 --- a/drivers/net/wireless/b43/main.c +++ b/drivers/net/wireless/b43/main.c @@ -3874,6 +3874,7 @@ static struct b43_wldev * b43_wireless_core_stop(struct b43_wldev *dev) { struct b43_wl *wl = dev->wl; struct b43_wldev *orig_dev; + u32 mask; redo: if (!dev || b43_status(dev) < B43_STAT_STARTED) @@ -3920,7 +3921,8 @@ redo: goto redo; return dev; } - B43_WARN_ON(b43_read32(dev, B43_MMIO_GEN_IRQ_MASK)); + mask = b43_read32(dev, B43_MMIO_GEN_IRQ_MASK); + B43_WARN_ON(mask != 0xFFFFFFFF && mask); /* Drain the TX queue */ while (skb_queue_len(&wl->tx_queue)) @@ -4499,6 +4501,7 @@ static void b43_op_stop(struct ieee80211_hw *hw) cancel_work_sync(&(wl->beacon_update_trigger)); + wiphy_rfkill_stop_polling(hw->wiphy); mutex_lock(&wl->mutex); if (b43_status(dev) >= B43_STAT_STARTED) { dev = b43_wireless_core_stop(dev); @@ -4997,7 +5000,7 @@ static void b43_remove(struct ssb_device *dev) if (list_empty(&wl->devlist)) { b43_rng_exit(wl); - b43_leds_unregister(wldev); + b43_leds_unregister(wl); /* Last core on the chip unregistered. * We can destroy common struct b43_wl. */ diff --git a/drivers/net/wireless/b43/pio.c b/drivers/net/wireless/b43/pio.c index 5e87650..9b90444 100644 --- a/drivers/net/wireless/b43/pio.c +++ b/drivers/net/wireless/b43/pio.c @@ -332,6 +332,7 @@ static u16 tx_write_2byte_queue(struct b43_pio_txqueue *q, unsigned int data_len) { struct b43_wldev *dev = q->dev; + struct b43_wl *wl = dev->wl; const u8 *data = _data; ctl |= B43_PIO_TXCTL_WRITELO | B43_PIO_TXCTL_WRITEHI; @@ -341,13 +342,12 @@ static u16 tx_write_2byte_queue(struct b43_pio_txqueue *q, q->mmio_base + B43_PIO_TXDATA, sizeof(u16)); if (data_len & 1) { - u8 tail[2] = { 0, }; - /* Write the last byte. */ ctl &= ~B43_PIO_TXCTL_WRITEHI; b43_piotx_write16(q, B43_PIO_TXCTL, ctl); - tail[0] = data[data_len - 1]; - ssb_block_write(dev->dev, tail, 2, + wl->tx_tail[0] = data[data_len - 1]; + wl->tx_tail[1] = 0; + ssb_block_write(dev->dev, wl->tx_tail, 2, q->mmio_base + B43_PIO_TXDATA, sizeof(u16)); } @@ -382,6 +382,7 @@ static u32 tx_write_4byte_queue(struct b43_pio_txqueue *q, unsigned int data_len) { struct b43_wldev *dev = q->dev; + struct b43_wl *wl = dev->wl; const u8 *data = _data; ctl |= B43_PIO8_TXCTL_0_7 | B43_PIO8_TXCTL_8_15 | @@ -392,29 +393,31 @@ static u32 tx_write_4byte_queue(struct b43_pio_txqueue *q, q->mmio_base + B43_PIO8_TXDATA, sizeof(u32)); if (data_len & 3) { - u8 tail[4] = { 0, }; - + wl->tx_tail[3] = 0; /* Write the last few bytes. */ ctl &= ~(B43_PIO8_TXCTL_8_15 | B43_PIO8_TXCTL_16_23 | B43_PIO8_TXCTL_24_31); switch (data_len & 3) { case 3: ctl |= B43_PIO8_TXCTL_16_23 | B43_PIO8_TXCTL_8_15; - tail[0] = data[data_len - 3]; - tail[1] = data[data_len - 2]; - tail[2] = data[data_len - 1]; + wl->tx_tail[0] = data[data_len - 3]; + wl->tx_tail[1] = data[data_len - 2]; + wl->tx_tail[2] = data[data_len - 1]; break; case 2: ctl |= B43_PIO8_TXCTL_8_15; - tail[0] = data[data_len - 2]; - tail[1] = data[data_len - 1]; + wl->tx_tail[0] = data[data_len - 2]; + wl->tx_tail[1] = data[data_len - 1]; + wl->tx_tail[2] = 0; break; case 1: - tail[0] = data[data_len - 1]; + wl->tx_tail[0] = data[data_len - 1]; + wl->tx_tail[1] = 0; + wl->tx_tail[2] = 0; break; } b43_piotx_write32(q, B43_PIO8_TXCTL, ctl); - ssb_block_write(dev->dev, tail, 4, + ssb_block_write(dev->dev, wl->tx_tail, 4, q->mmio_base + B43_PIO8_TXDATA, sizeof(u32)); } @@ -446,8 +449,9 @@ static void pio_tx_frame_4byte_queue(struct b43_pio_txpacket *pack, static int pio_tx_frame(struct b43_pio_txqueue *q, struct sk_buff *skb) { + struct b43_wldev *dev = q->dev; + struct b43_wl *wl = dev->wl; struct b43_pio_txpacket *pack; - struct b43_txhdr txhdr; u16 cookie; int err; unsigned int hdrlen; @@ -458,8 +462,8 @@ static int pio_tx_frame(struct b43_pio_txqueue *q, struct b43_pio_txpacket, list); cookie = generate_cookie(q, pack); - hdrlen = b43_txhdr_size(q->dev); - err = b43_generate_txhdr(q->dev, (u8 *)&txhdr, skb, + hdrlen = b43_txhdr_size(dev); + err = b43_generate_txhdr(dev, (u8 *)&wl->txhdr, skb, info, cookie); if (err) return err; @@ -467,15 +471,15 @@ static int pio_tx_frame(struct b43_pio_txqueue *q, if (info->flags & IEEE80211_TX_CTL_SEND_AFTER_DTIM) { /* Tell the firmware about the cookie of the last * mcast frame, so it can clear the more-data bit in it. */ - b43_shm_write16(q->dev, B43_SHM_SHARED, + b43_shm_write16(dev, B43_SHM_SHARED, B43_SHM_SH_MCASTCOOKIE, cookie); } pack->skb = skb; if (q->rev >= 8) - pio_tx_frame_4byte_queue(pack, (const u8 *)&txhdr, hdrlen); + pio_tx_frame_4byte_queue(pack, (const u8 *)&wl->txhdr, hdrlen); else - pio_tx_frame_2byte_queue(pack, (const u8 *)&txhdr, hdrlen); + pio_tx_frame_2byte_queue(pack, (const u8 *)&wl->txhdr, hdrlen); /* Remove it from the list of available packet slots. * It will be put back when we receive the status report. */ @@ -615,14 +619,14 @@ void b43_pio_get_tx_stats(struct b43_wldev *dev, static bool pio_rx_frame(struct b43_pio_rxqueue *q) { struct b43_wldev *dev = q->dev; - struct b43_rxhdr_fw4 rxhdr; + struct b43_wl *wl = dev->wl; u16 len; u32 macstat; unsigned int i, padding; struct sk_buff *skb; const char *err_msg = NULL; - memset(&rxhdr, 0, sizeof(rxhdr)); + memset(&wl->rxhdr, 0, sizeof(wl->rxhdr)); /* Check if we have data and wait for it to get ready. */ if (q->rev >= 8) { @@ -660,16 +664,16 @@ data_ready: /* Get the preamble (RX header) */ if (q->rev >= 8) { - ssb_block_read(dev->dev, &rxhdr, sizeof(rxhdr), + ssb_block_read(dev->dev, &wl->rxhdr, sizeof(wl->rxhdr), q->mmio_base + B43_PIO8_RXDATA, sizeof(u32)); } else { - ssb_block_read(dev->dev, &rxhdr, sizeof(rxhdr), + ssb_block_read(dev->dev, &wl->rxhdr, sizeof(wl->rxhdr), q->mmio_base + B43_PIO_RXDATA, sizeof(u16)); } /* Sanity checks. */ - len = le16_to_cpu(rxhdr.frame_len); + len = le16_to_cpu(wl->rxhdr.frame_len); if (unlikely(len > 0x700)) { err_msg = "len > 0x700"; goto rx_error; @@ -679,7 +683,7 @@ data_ready: goto rx_error; } - macstat = le32_to_cpu(rxhdr.mac_status); + macstat = le32_to_cpu(wl->rxhdr.mac_status); if (macstat & B43_RX_MAC_FCSERR) { if (!(q->dev->wl->filter_flags & FIF_FCSFAIL)) { /* Drop frames with failed FCS. */ @@ -704,24 +708,22 @@ data_ready: q->mmio_base + B43_PIO8_RXDATA, sizeof(u32)); if (len & 3) { - u8 tail[4] = { 0, }; - /* Read the last few bytes. */ - ssb_block_read(dev->dev, tail, 4, + ssb_block_read(dev->dev, wl->rx_tail, 4, q->mmio_base + B43_PIO8_RXDATA, sizeof(u32)); switch (len & 3) { case 3: - skb->data[len + padding - 3] = tail[0]; - skb->data[len + padding - 2] = tail[1]; - skb->data[len + padding - 1] = tail[2]; + skb->data[len + padding - 3] = wl->rx_tail[0]; + skb->data[len + padding - 2] = wl->rx_tail[1]; + skb->data[len + padding - 1] = wl->rx_tail[2]; break; case 2: - skb->data[len + padding - 2] = tail[0]; - skb->data[len + padding - 1] = tail[1]; + skb->data[len + padding - 2] = wl->rx_tail[0]; + skb->data[len + padding - 1] = wl->rx_tail[1]; break; case 1: - skb->data[len + padding - 1] = tail[0]; + skb->data[len + padding - 1] = wl->rx_tail[0]; break; } } @@ -730,17 +732,15 @@ data_ready: q->mmio_base + B43_PIO_RXDATA, sizeof(u16)); if (len & 1) { - u8 tail[2] = { 0, }; - /* Read the last byte. */ - ssb_block_read(dev->dev, tail, 2, + ssb_block_read(dev->dev, wl->rx_tail, 2, q->mmio_base + B43_PIO_RXDATA, sizeof(u16)); - skb->data[len + padding - 1] = tail[0]; + skb->data[len + padding - 1] = wl->rx_tail[0]; } } - b43_rx(q->dev, skb, &rxhdr); + b43_rx(q->dev, skb, &wl->rxhdr); return 1; diff --git a/drivers/net/wireless/b43/xmit.c b/drivers/net/wireless/b43/xmit.c index ac9f600..f4e9695 100644 --- a/drivers/net/wireless/b43/xmit.c +++ b/drivers/net/wireless/b43/xmit.c @@ -27,7 +27,7 @@ */ -#include "xmit.h" +#include "b43.h" #include "phy_common.h" #include "dma.h" #include "pio.h" @@ -690,7 +690,10 @@ void b43_rx(struct b43_wldev *dev, struct sk_buff *skb, const void *_rxhdr) } memcpy(IEEE80211_SKB_RXCB(skb), &status, sizeof(status)); + + local_bh_disable(); ieee80211_rx(dev->wl->hw, skb); + local_bh_enable(); #if B43_DEBUG dev->rx_count++; diff --git a/drivers/net/wireless/iwlwifi/iwl-3945-rs.c b/drivers/net/wireless/iwlwifi/iwl-3945-rs.c index a16bd41..cbb0585 100644 --- a/drivers/net/wireless/iwlwifi/iwl-3945-rs.c +++ b/drivers/net/wireless/iwlwifi/iwl-3945-rs.c @@ -702,7 +702,7 @@ static void rs_get_rate(void *priv_r, struct ieee80211_sta *sta, u8 sta_id = iwl_find_station(priv, hdr->addr1); if (sta_id == IWL_INVALID_STATION) { - IWL_DEBUG_RATE(priv, "LQ: ADD station %pm\n", + IWL_DEBUG_RATE(priv, "LQ: ADD station %pM\n", hdr->addr1); sta_id = iwl_add_station(priv, hdr->addr1, false, CMD_ASYNC, NULL); diff --git a/drivers/net/wireless/iwlwifi/iwl-3945.c b/drivers/net/wireless/iwlwifi/iwl-3945.c index 6813617..f059b49 100644 --- a/drivers/net/wireless/iwlwifi/iwl-3945.c +++ b/drivers/net/wireless/iwlwifi/iwl-3945.c @@ -611,7 +611,7 @@ static void iwl3945_rx_reply_rx(struct iwl_priv *priv, if (rx_status.band == IEEE80211_BAND_5GHZ) rx_status.rate_idx -= IWL_FIRST_OFDM_RATE; - rx_status.antenna = le16_to_cpu(rx_hdr->phy_flags & + rx_status.antenna = (le16_to_cpu(rx_hdr->phy_flags) & RX_RES_PHY_FLAGS_ANTENNA_MSK) >> 4; /* set the preamble flag if appropriate */ diff --git a/drivers/net/wireless/iwlwifi/iwl-5000.c b/drivers/net/wireless/iwlwifi/iwl-5000.c index d6bc0e0..6e6f516 100644 --- a/drivers/net/wireless/iwlwifi/iwl-5000.c +++ b/drivers/net/wireless/iwlwifi/iwl-5000.c @@ -318,7 +318,7 @@ static void iwl5000_gain_computation(struct iwl_priv *priv, (s32)average_noise[i])) / 1500; /* bound gain by 2 bits value max, 3rd bit is sign */ data->delta_gain_code[i] = - min(abs(delta_g), CHAIN_NOISE_MAX_DELTA_GAIN_CODE); + min(abs(delta_g), (long) CHAIN_NOISE_MAX_DELTA_GAIN_CODE); if (delta_g < 0) /* set negative sign */ diff --git a/drivers/net/wireless/iwlwifi/iwl-agn.c b/drivers/net/wireless/iwlwifi/iwl-agn.c index 313d3e5..eaafae0 100644 --- a/drivers/net/wireless/iwlwifi/iwl-agn.c +++ b/drivers/net/wireless/iwlwifi/iwl-agn.c @@ -3106,8 +3106,8 @@ static int iwl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) out_pci_disable_device: pci_disable_device(pdev); out_ieee80211_free_hw: - ieee80211_free_hw(priv->hw); iwl_free_traffic_mem(priv); + ieee80211_free_hw(priv->hw); out: return err; } diff --git a/drivers/net/wireless/iwlwifi/iwl-commands.h b/drivers/net/wireless/iwlwifi/iwl-commands.h index 2c5c88f..4afaf77 100644 --- a/drivers/net/wireless/iwlwifi/iwl-commands.h +++ b/drivers/net/wireless/iwlwifi/iwl-commands.h @@ -1154,7 +1154,7 @@ struct iwl_wep_cmd { #define RX_RES_PHY_FLAGS_MOD_CCK_MSK cpu_to_le16(1 << 1) #define RX_RES_PHY_FLAGS_SHORT_PREAMBLE_MSK cpu_to_le16(1 << 2) #define RX_RES_PHY_FLAGS_NARROW_BAND_MSK cpu_to_le16(1 << 3) -#define RX_RES_PHY_FLAGS_ANTENNA_MSK cpu_to_le16(0xf0) +#define RX_RES_PHY_FLAGS_ANTENNA_MSK 0xf0 #define RX_RES_PHY_FLAGS_ANTENNA_POS 4 #define RX_RES_STATUS_SEC_TYPE_MSK (0x7 << 8) diff --git a/drivers/net/wireless/iwlwifi/iwl-eeprom.c b/drivers/net/wireless/iwlwifi/iwl-eeprom.c index 3d2b93a..e14c995 100644 --- a/drivers/net/wireless/iwlwifi/iwl-eeprom.c +++ b/drivers/net/wireless/iwlwifi/iwl-eeprom.c @@ -410,7 +410,6 @@ static int iwl_find_otp_image(struct iwl_priv *priv, u16 *validblockaddr) { u16 next_link_addr = 0, link_value = 0, valid_addr; - int ret = 0; int usedblocks = 0; /* set addressing mode to absolute to traverse the link list */ @@ -430,29 +429,29 @@ static int iwl_find_otp_image(struct iwl_priv *priv, * check for more block on the link list */ valid_addr = next_link_addr; - next_link_addr = link_value; + next_link_addr = link_value * sizeof(u16); IWL_DEBUG_INFO(priv, "OTP blocks %d addr 0x%x\n", usedblocks, next_link_addr); if (iwl_read_otp_word(priv, next_link_addr, &link_value)) return -EINVAL; if (!link_value) { /* - * reach the end of link list, + * reach the end of link list, return success and * set address point to the starting address * of the image */ - goto done; + *validblockaddr = valid_addr; + /* skip first 2 bytes (link list pointer) */ + *validblockaddr += 2; + return 0; } /* more in the link list, continue */ usedblocks++; - } while (usedblocks < priv->cfg->max_ll_items); - /* OTP full, use last block */ - IWL_DEBUG_INFO(priv, "OTP is full, use last block\n"); -done: - *validblockaddr = valid_addr; - /* skip first 2 bytes (link list pointer) */ - *validblockaddr += 2; - return ret; + } while (usedblocks <= priv->cfg->max_ll_items); + + /* OTP has no valid blocks */ + IWL_DEBUG_INFO(priv, "OTP has no valid blocks\n"); + return -EINVAL; } /** diff --git a/drivers/net/wireless/iwlwifi/iwl-eeprom.h b/drivers/net/wireless/iwlwifi/iwl-eeprom.h index 6b68db7..80b9e45 100644 --- a/drivers/net/wireless/iwlwifi/iwl-eeprom.h +++ b/drivers/net/wireless/iwlwifi/iwl-eeprom.h @@ -220,35 +220,35 @@ struct iwl_eeprom_enhanced_txpwr { * Section 10: 2.4 GHz 40MHz channels: 132, 44 (_above_) */ /* 2.4 GHz band: CCK */ -#define EEPROM_LB_CCK_20_COMMON ((0xAA)\ +#define EEPROM_LB_CCK_20_COMMON ((0xA8)\ | INDIRECT_ADDRESS | INDIRECT_REGULATORY) /* 8 bytes */ /* 2.4 GHz band: 20MHz-Legacy, 20MHz-HT, 40MHz-HT */ -#define EEPROM_LB_OFDM_COMMON ((0xB2)\ +#define EEPROM_LB_OFDM_COMMON ((0xB0)\ | INDIRECT_ADDRESS | INDIRECT_REGULATORY) /* 24 bytes */ /* 5.2 GHz band: 20MHz-Legacy, 20MHz-HT, 40MHz-HT */ -#define EEPROM_HB_OFDM_COMMON ((0xCA)\ +#define EEPROM_HB_OFDM_COMMON ((0xC8)\ | INDIRECT_ADDRESS | INDIRECT_REGULATORY) /* 24 bytes */ /* 2.4GHz band channels: * 1Legacy, 1HT, 2Legacy, 2HT, 10Legacy, 10HT, 11Legacy, 11HT */ -#define EEPROM_LB_OFDM_20_BAND ((0xE2)\ +#define EEPROM_LB_OFDM_20_BAND ((0xE0)\ | INDIRECT_ADDRESS | INDIRECT_REGULATORY) /* 64 bytes */ /* 2.4 GHz band HT40 channels: (1,+1) (2,+1) (6,+1) (7,+1) (9,+1) */ -#define EEPROM_LB_OFDM_HT40_BAND ((0x122)\ +#define EEPROM_LB_OFDM_HT40_BAND ((0x120)\ | INDIRECT_ADDRESS | INDIRECT_REGULATORY) /* 40 bytes */ /* 5.2GHz band channels: 36Legacy, 36HT, 64Legacy, 64HT, 100Legacy, 100HT */ -#define EEPROM_HB_OFDM_20_BAND ((0x14A)\ +#define EEPROM_HB_OFDM_20_BAND ((0x148)\ | INDIRECT_ADDRESS | INDIRECT_REGULATORY) /* 48 bytes */ /* 5.2 GHz band HT40 channels: (36,+1) (60,+1) (100,+1) */ -#define EEPROM_HB_OFDM_HT40_BAND ((0x17A)\ +#define EEPROM_HB_OFDM_HT40_BAND ((0x178)\ | INDIRECT_ADDRESS | INDIRECT_REGULATORY) /* 24 bytes */ /* 2.4 GHz band, channnel 13: Legacy, HT */ -#define EEPROM_LB_OFDM_20_CHANNEL_13 ((0x192)\ +#define EEPROM_LB_OFDM_20_CHANNEL_13 ((0x190)\ | INDIRECT_ADDRESS | INDIRECT_REGULATORY) /* 16 bytes */ /* 5.2 GHz band, channnel 140: Legacy, HT */ -#define EEPROM_HB_OFDM_20_CHANNEL_140 ((0x1A2)\ +#define EEPROM_HB_OFDM_20_CHANNEL_140 ((0x1A0)\ | INDIRECT_ADDRESS | INDIRECT_REGULATORY) /* 16 bytes */ /* 5.2 GHz band, HT40 channnels (132,+1) (44,+1) */ -#define EEPROM_HB_OFDM_HT40_BAND_1 ((0x1B2)\ +#define EEPROM_HB_OFDM_HT40_BAND_1 ((0x1B0)\ | INDIRECT_ADDRESS | INDIRECT_REGULATORY) /* 16 bytes */ diff --git a/drivers/net/wireless/iwlwifi/iwl-rx.c b/drivers/net/wireless/iwlwifi/iwl-rx.c index 8e1bb53..493626b 100644 --- a/drivers/net/wireless/iwlwifi/iwl-rx.c +++ b/drivers/net/wireless/iwlwifi/iwl-rx.c @@ -1044,7 +1044,7 @@ void iwl_rx_reply_rx(struct iwl_priv *priv, * as a bitmask. */ rx_status.antenna = - le16_to_cpu(phy_res->phy_flags & RX_RES_PHY_FLAGS_ANTENNA_MSK) + (le16_to_cpu(phy_res->phy_flags) & RX_RES_PHY_FLAGS_ANTENNA_MSK) >> RX_RES_PHY_FLAGS_ANTENNA_POS; /* set the preamble flag if appropriate */ diff --git a/drivers/net/wireless/iwlwifi/iwl3945-base.c b/drivers/net/wireless/iwlwifi/iwl3945-base.c index aa49230..d00a803 100644 --- a/drivers/net/wireless/iwlwifi/iwl3945-base.c +++ b/drivers/net/wireless/iwlwifi/iwl3945-base.c @@ -4097,8 +4097,8 @@ static int iwl3945_pci_probe(struct pci_dev *pdev, const struct pci_device_id *e pci_set_drvdata(pdev, NULL); pci_disable_device(pdev); out_ieee80211_free_hw: - ieee80211_free_hw(priv->hw); iwl_free_traffic_mem(priv); + ieee80211_free_hw(priv->hw); out: return err; } diff --git a/drivers/net/wireless/libertas/cmdresp.c b/drivers/net/wireless/libertas/cmdresp.c index c42d3fa..23f6843 100644 --- a/drivers/net/wireless/libertas/cmdresp.c +++ b/drivers/net/wireless/libertas/cmdresp.c @@ -3,6 +3,7 @@ * responses as well as events generated by firmware. */ #include <linux/delay.h> +#include <linux/sched.h> #include <linux/if_arp.h> #include <linux/netdevice.h> #include <asm/unaligned.h> diff --git a/drivers/net/znet.c b/drivers/net/znet.c index a0384b6..b423473 100644 --- a/drivers/net/znet.c +++ b/drivers/net/znet.c @@ -169,7 +169,6 @@ static void znet_tx_timeout (struct net_device *dev); static int znet_request_resources (struct net_device *dev) { struct znet_private *znet = netdev_priv(dev); - unsigned long flags; if (request_irq (dev->irq, &znet_interrupt, 0, "ZNet", dev)) goto failed; @@ -187,13 +186,9 @@ static int znet_request_resources (struct net_device *dev) free_sia: release_region (znet->sia_base, znet->sia_size); free_tx_dma: - flags = claim_dma_lock(); free_dma (znet->tx_dma); - release_dma_lock (flags); free_rx_dma: - flags = claim_dma_lock(); free_dma (znet->rx_dma); - release_dma_lock (flags); free_irq: free_irq (dev->irq, dev); failed: @@ -203,14 +198,11 @@ static int znet_request_resources (struct net_device *dev) static void znet_release_resources (struct net_device *dev) { struct znet_private *znet = netdev_priv(dev); - unsigned long flags; release_region (znet->sia_base, znet->sia_size); release_region (dev->base_addr, znet->io_size); - flags = claim_dma_lock(); free_dma (znet->tx_dma); free_dma (znet->rx_dma); - release_dma_lock (flags); free_irq (dev->irq, dev); } diff --git a/drivers/platform/x86/eeepc-laptop.c b/drivers/platform/x86/eeepc-laptop.c index 749e210..d379e74 100644 --- a/drivers/platform/x86/eeepc-laptop.c +++ b/drivers/platform/x86/eeepc-laptop.c @@ -150,6 +150,8 @@ struct eeepc_hotk { /* The actual device the driver binds to */ static struct eeepc_hotk *ehotk; +static void eeepc_rfkill_hotplug(bool real); + /* Platform device/driver */ static int eeepc_hotk_thaw(struct device *device); static int eeepc_hotk_restore(struct device *device); @@ -343,14 +345,23 @@ static bool eeepc_wlan_rfkill_blocked(void) static int eeepc_rfkill_set(void *data, bool blocked) { unsigned long asl = (unsigned long)data; - return set_acpi(asl, !blocked); + int ret; + + if (asl != CM_ASL_WLAN) + return set_acpi(asl, !blocked); + + /* hack to avoid panic with rt2860sta */ + if (blocked) + eeepc_rfkill_hotplug(false); + ret = set_acpi(asl, !blocked); + return ret; } static const struct rfkill_ops eeepc_rfkill_ops = { .set_block = eeepc_rfkill_set, }; -static void __init eeepc_enable_camera(void) +static void __devinit eeepc_enable_camera(void) { /* * If the following call to set_acpi() fails, it's because there's no @@ -643,13 +654,13 @@ static int eeepc_get_adapter_status(struct hotplug_slot *hotplug_slot, return 0; } -static void eeepc_rfkill_hotplug(void) +static void eeepc_rfkill_hotplug(bool real) { struct pci_dev *dev; struct pci_bus *bus; - bool blocked = eeepc_wlan_rfkill_blocked(); + bool blocked = real ? eeepc_wlan_rfkill_blocked() : true; - if (ehotk->wlan_rfkill) + if (real && ehotk->wlan_rfkill) rfkill_set_sw_state(ehotk->wlan_rfkill, blocked); mutex_lock(&ehotk->hotplug_lock); @@ -692,7 +703,7 @@ static void eeepc_rfkill_notify(acpi_handle handle, u32 event, void *data) if (event != ACPI_NOTIFY_BUS_CHECK) return; - eeepc_rfkill_hotplug(); + eeepc_rfkill_hotplug(true); } static void eeepc_hotk_notify(struct acpi_device *device, u32 event) @@ -850,7 +861,7 @@ static int eeepc_hotk_restore(struct device *device) { /* Refresh both wlan rfkill state and pci hotplug */ if (ehotk->wlan_rfkill) - eeepc_rfkill_hotplug(); + eeepc_rfkill_hotplug(true); if (ehotk->bluetooth_rfkill) rfkill_set_sw_state(ehotk->bluetooth_rfkill, @@ -993,7 +1004,7 @@ static void eeepc_rfkill_exit(void) * Refresh pci hotplug in case the rfkill state was changed after * eeepc_unregister_rfkill_notifier() */ - eeepc_rfkill_hotplug(); + eeepc_rfkill_hotplug(true); if (ehotk->hotplug_slot) pci_hp_deregister(ehotk->hotplug_slot); @@ -1109,7 +1120,7 @@ static int eeepc_rfkill_init(struct device *dev) * Refresh pci hotplug in case the rfkill state was changed during * setup. */ - eeepc_rfkill_hotplug(); + eeepc_rfkill_hotplug(true); exit: if (result && result != -ENODEV) @@ -1189,7 +1200,7 @@ static int eeepc_input_init(struct device *dev) return 0; } -static int eeepc_hotk_add(struct acpi_device *device) +static int __devinit eeepc_hotk_add(struct acpi_device *device) { struct device *dev; int result; diff --git a/drivers/platform/x86/fujitsu-laptop.c b/drivers/platform/x86/fujitsu-laptop.c index f35aee5..bcd4ba8 100644 --- a/drivers/platform/x86/fujitsu-laptop.c +++ b/drivers/platform/x86/fujitsu-laptop.c @@ -944,7 +944,7 @@ static int acpi_fujitsu_hotkey_remove(struct acpi_device *device, int type) struct fujitsu_hotkey_t *fujitsu_hotkey = acpi_driver_data(device); struct input_dev *input = fujitsu_hotkey->input; -#ifdef CONFIG_LEDS_CLASS +#if defined(CONFIG_LEDS_CLASS) || defined(CONFIG_LEDS_CLASS_MODULE) if (fujitsu_hotkey->logolamp_registered) led_classdev_unregister(&logolamp_led); diff --git a/drivers/staging/b3dfg/b3dfg.c b/drivers/staging/b3dfg/b3dfg.c index 94c5d27..cda26bb4 100644 --- a/drivers/staging/b3dfg/b3dfg.c +++ b/drivers/staging/b3dfg/b3dfg.c @@ -36,6 +36,7 @@ #include <linux/wait.h> #include <linux/mm.h> #include <linux/uaccess.h> +#include <linux/sched.h> static unsigned int b3dfg_nbuf = 2; diff --git a/drivers/staging/comedi/drivers/me_daq.c b/drivers/staging/comedi/drivers/me_daq.c index 2cda7ad..80e192d 100644 --- a/drivers/staging/comedi/drivers/me_daq.c +++ b/drivers/staging/comedi/drivers/me_daq.c @@ -51,6 +51,7 @@ from http://www.comedi.org */ #include <linux/interrupt.h> +#include <linux/sched.h> #include "../comedidev.h" #include "comedi_pci.h" diff --git a/drivers/staging/comedi/drivers/ni_mio_common.c b/drivers/staging/comedi/drivers/ni_mio_common.c index e3ffb06..753ee05 100644 --- a/drivers/staging/comedi/drivers/ni_mio_common.c +++ b/drivers/staging/comedi/drivers/ni_mio_common.c @@ -62,6 +62,7 @@ /* #define DEBUG_STATUS_B */ #include <linux/interrupt.h> +#include <linux/sched.h> #include "8255.h" #include "mite.h" #include "comedi_fc.h" diff --git a/drivers/staging/comedi/drivers/ni_pcidio.c b/drivers/staging/comedi/drivers/ni_pcidio.c index 52b2eca..d544698 100644 --- a/drivers/staging/comedi/drivers/ni_pcidio.c +++ b/drivers/staging/comedi/drivers/ni_pcidio.c @@ -70,6 +70,7 @@ comedi_nonfree_firmware tarball available from http://www.comedi.org /* #define DEBUG_FLAGS */ #include <linux/interrupt.h> +#include <linux/sched.h> #include "../comedidev.h" #include "mite.h" diff --git a/drivers/staging/et131x/et1310_address_map.h b/drivers/staging/et131x/et1310_address_map.h index 6294d38..2c3d65a 100644 --- a/drivers/staging/et131x/et1310_address_map.h +++ b/drivers/staging/et131x/et1310_address_map.h @@ -223,7 +223,7 @@ typedef union _TXDMA_PR_NUM_DES_t { extern inline void add_10bit(u32 *v, int n) { - *v = INDEX10(*v + n); + *v = INDEX10(*v + n) | (*v & ET_DMA10_WRAP); } /* diff --git a/drivers/staging/et131x/et1310_rx.c b/drivers/staging/et131x/et1310_rx.c index 8f2e91f..10e21db 100644 --- a/drivers/staging/et131x/et1310_rx.c +++ b/drivers/staging/et131x/et1310_rx.c @@ -1177,12 +1177,20 @@ void et131x_handle_recv_interrupt(struct et131x_adapter *etdev) static inline u32 bump_fbr(u32 *fbr, u32 limit) { - u32 v = *fbr; - add_10bit(&v, 1); - if (v > limit) - v = (*fbr & ~ET_DMA10_MASK) ^ ET_DMA10_WRAP; - *fbr = v; - return v; + u32 v = *fbr; + v++; + /* This works for all cases where limit < 1024. The 1023 case + works because 1023++ is 1024 which means the if condition is not + taken but the carry of the bit into the wrap bit toggles the wrap + value correctly */ + if ((v & ET_DMA10_MASK) > limit) { + v &= ~ET_DMA10_MASK; + v ^= ET_DMA10_WRAP; + } + /* For the 1023 case */ + v &= (ET_DMA10_MASK|ET_DMA10_WRAP); + *fbr = v; + return v; } /** diff --git a/drivers/staging/hv/osd.c b/drivers/staging/hv/osd.c index 8fe543b..3a4793a 100644 --- a/drivers/staging/hv/osd.c +++ b/drivers/staging/hv/osd.c @@ -30,6 +30,7 @@ #include <linux/ioport.h> #include <linux/irq.h> #include <linux/interrupt.h> +#include <linux/sched.h> #include <linux/wait.h> #include <linux/spinlock.h> #include <linux/workqueue.h> diff --git a/drivers/staging/iio/industrialio-core.c b/drivers/staging/iio/industrialio-core.c index 1fa18f2..768f448 100644 --- a/drivers/staging/iio/industrialio-core.c +++ b/drivers/staging/iio/industrialio-core.c @@ -18,6 +18,8 @@ #include <linux/fs.h> #include <linux/interrupt.h> #include <linux/poll.h> +#include <linux/sched.h> +#include <linux/wait.h> #include <linux/cdev.h> #include "iio.h" #include "trigger_consumer.h" diff --git a/drivers/staging/poch/poch.c b/drivers/staging/poch/poch.c index 0d111dd..2eb8e3d 100644 --- a/drivers/staging/poch/poch.c +++ b/drivers/staging/poch/poch.c @@ -20,6 +20,7 @@ #include <linux/init.h> #include <linux/ioctl.h> #include <linux/io.h> +#include <linux/sched.h> #include "poch.h" diff --git a/drivers/staging/rt2860/common/cmm_data_2860.c b/drivers/staging/rt2860/common/cmm_data_2860.c index fb17355..857ff45 100644 --- a/drivers/staging/rt2860/common/cmm_data_2860.c +++ b/drivers/staging/rt2860/common/cmm_data_2860.c @@ -363,6 +363,8 @@ int RtmpPCIMgmtKickOut( ULONG SwIdx = pAd->MgmtRing.TxCpuIdx; pTxD = (PTXD_STRUC) pAd->MgmtRing.Cell[SwIdx].AllocVa; + if (!pTxD) + return 0; pAd->MgmtRing.Cell[SwIdx].pNdisPacket = pPacket; pAd->MgmtRing.Cell[SwIdx].pNextNdisPacket = NULL; diff --git a/drivers/staging/rt2860/common/cmm_info.c b/drivers/staging/rt2860/common/cmm_info.c index 9d589c2..019cc44 100644 --- a/drivers/staging/rt2860/common/cmm_info.c +++ b/drivers/staging/rt2860/common/cmm_info.c @@ -25,6 +25,7 @@ ************************************************************************* */ +#include <linux/sched.h> #include "../rt_config.h" INT Show_SSID_Proc( diff --git a/drivers/staging/rt2860/rt_linux.c b/drivers/staging/rt2860/rt_linux.c index b396a9b..ed27b85 100644 --- a/drivers/staging/rt2860/rt_linux.c +++ b/drivers/staging/rt2860/rt_linux.c @@ -25,6 +25,7 @@ ************************************************************************* */ +#include <linux/sched.h> #include "rt_config.h" ULONG RTDebugLevel = RT_DEBUG_ERROR; diff --git a/drivers/staging/rt3090/common/cmm_info.c b/drivers/staging/rt3090/common/cmm_info.c index 5be0714..3e51e98 100644 --- a/drivers/staging/rt3090/common/cmm_info.c +++ b/drivers/staging/rt3090/common/cmm_info.c @@ -34,6 +34,7 @@ --------- ---------- ---------------------------------------------- */ +#include <linux/sched.h> #include "../rt_config.h" diff --git a/drivers/staging/rt3090/rt_linux.c b/drivers/staging/rt3090/rt_linux.c index d2241ec..9b94aa6 100644 --- a/drivers/staging/rt3090/rt_linux.c +++ b/drivers/staging/rt3090/rt_linux.c @@ -25,6 +25,7 @@ ************************************************************************* */ +#include <linux/sched.h> #include "rt_config.h" ULONG RTDebugLevel = RT_DEBUG_ERROR; diff --git a/drivers/staging/sep/sep_driver.c b/drivers/staging/sep/sep_driver.c index 87f8a11..f890a16 100644 --- a/drivers/staging/sep/sep_driver.c +++ b/drivers/staging/sep/sep_driver.c @@ -38,6 +38,7 @@ #include <linux/mm.h> #include <linux/poll.h> #include <linux/wait.h> +#include <linux/sched.h> #include <linux/pci.h> #include <linux/firmware.h> #include <asm/ioctl.h> diff --git a/drivers/staging/vme/bridges/vme_ca91cx42.c b/drivers/staging/vme/bridges/vme_ca91cx42.c index 3d2a84c..e139eae 100644 --- a/drivers/staging/vme/bridges/vme_ca91cx42.c +++ b/drivers/staging/vme/bridges/vme_ca91cx42.c @@ -25,6 +25,7 @@ #include <linux/poll.h> #include <linux/interrupt.h> #include <linux/spinlock.h> +#include <linux/sched.h> #include <asm/time.h> #include <asm/io.h> #include <asm/uaccess.h> diff --git a/drivers/staging/vme/bridges/vme_tsi148.c b/drivers/staging/vme/bridges/vme_tsi148.c index 8960fa9..00fe080 100644 --- a/drivers/staging/vme/bridges/vme_tsi148.c +++ b/drivers/staging/vme/bridges/vme_tsi148.c @@ -25,6 +25,7 @@ #include <linux/dma-mapping.h> #include <linux/interrupt.h> #include <linux/spinlock.h> +#include <linux/sched.h> #include <asm/time.h> #include <asm/io.h> #include <asm/uaccess.h> diff --git a/drivers/usb/gadget/ether.c b/drivers/usb/gadget/ether.c index f37de28..167cb2a 100644 --- a/drivers/usb/gadget/ether.c +++ b/drivers/usb/gadget/ether.c @@ -61,11 +61,6 @@ * simpler, Microsoft pushes their own approach: RNDIS. The published * RNDIS specs are ambiguous and appear to be incomplete, and are also * needlessly complex. They borrow more from CDC ACM than CDC ECM. - * - * While CDC ECM, CDC Subset, and RNDIS are designed to extend the ethernet - * interface to the target, CDC EEM was designed to use ethernet over the USB - * link between the host and target. CDC EEM is implemented as an alternative - * to those other protocols when that communication model is more appropriate */ #define DRIVER_DESC "Ethernet Gadget" @@ -157,8 +152,8 @@ static inline bool has_rndis(void) #define RNDIS_PRODUCT_NUM 0xa4a2 /* Ethernet/RNDIS Gadget */ /* For EEM gadgets */ -#define EEM_VENDOR_NUM 0x0525 /* INVALID - NEEDS TO BE ALLOCATED */ -#define EEM_PRODUCT_NUM 0xa4a1 /* INVALID - NEEDS TO BE ALLOCATED */ +#define EEM_VENDOR_NUM 0x1d6b /* Linux Foundation */ +#define EEM_PRODUCT_NUM 0x0102 /* EEM Gadget */ /*-------------------------------------------------------------------------*/ diff --git a/drivers/usb/host/ehci-sched.c b/drivers/usb/host/ehci-sched.c index 3efa59b..b25cdea 100644 --- a/drivers/usb/host/ehci-sched.c +++ b/drivers/usb/host/ehci-sched.c @@ -1400,6 +1400,10 @@ iso_stream_schedule ( goto fail; } + period = urb->interval; + if (!stream->highspeed) + period <<= 3; + now = ehci_readl(ehci, &ehci->regs->frame_index) % mod; /* when's the last uframe this urb could start? */ @@ -1417,8 +1421,8 @@ iso_stream_schedule ( /* Fell behind (by up to twice the slop amount)? */ if (start >= max - 2 * 8 * SCHEDULE_SLOP) - start += stream->interval * DIV_ROUND_UP( - max - start, stream->interval) - mod; + start += period * DIV_ROUND_UP( + max - start, period) - mod; /* Tried to schedule too far into the future? */ if (unlikely((start + sched->span) >= max)) { @@ -1441,10 +1445,6 @@ iso_stream_schedule ( /* NOTE: assumes URB_ISO_ASAP, to limit complexity/bugs */ - period = urb->interval; - if (!stream->highspeed) - period <<= 3; - /* find a uframe slot with enough bandwidth */ for (; start < (stream->next_uframe + period); start++) { int enough_space; diff --git a/drivers/usb/host/whci/asl.c b/drivers/usb/host/whci/asl.c index c632437..562eba1 100644 --- a/drivers/usb/host/whci/asl.c +++ b/drivers/usb/host/whci/asl.c @@ -115,6 +115,10 @@ static uint32_t process_qset(struct whc *whc, struct whc_qset *qset) if (status & QTD_STS_HALTED) { /* Ug, an error. */ process_halted_qtd(whc, qset, td); + /* A halted qTD always triggers an update + because the qset was either removed or + reactivated. */ + update |= WHC_UPDATE_UPDATED; goto done; } @@ -305,6 +309,7 @@ int asl_urb_dequeue(struct whc *whc, struct urb *urb, int status) struct whc_urb *wurb = urb->hcpriv; struct whc_qset *qset = wurb->qset; struct whc_std *std, *t; + bool has_qtd = false; int ret; unsigned long flags; @@ -315,17 +320,21 @@ int asl_urb_dequeue(struct whc *whc, struct urb *urb, int status) goto out; list_for_each_entry_safe(std, t, &qset->stds, list_node) { - if (std->urb == urb) + if (std->urb == urb) { + if (std->qtd) + has_qtd = true; qset_free_std(whc, std); - else + } else std->qtd = NULL; /* so this std is re-added when the qset is */ } - asl_qset_remove(whc, qset); - wurb->status = status; - wurb->is_async = true; - queue_work(whc->workqueue, &wurb->dequeue_work); - + if (has_qtd) { + asl_qset_remove(whc, qset); + wurb->status = status; + wurb->is_async = true; + queue_work(whc->workqueue, &wurb->dequeue_work); + } else + qset_remove_urb(whc, qset, urb, status); out: spin_unlock_irqrestore(&whc->lock, flags); diff --git a/drivers/usb/host/whci/pzl.c b/drivers/usb/host/whci/pzl.c index a9e05ba..0db3fb2 100644 --- a/drivers/usb/host/whci/pzl.c +++ b/drivers/usb/host/whci/pzl.c @@ -121,6 +121,10 @@ static enum whc_update pzl_process_qset(struct whc *whc, struct whc_qset *qset) if (status & QTD_STS_HALTED) { /* Ug, an error. */ process_halted_qtd(whc, qset, td); + /* A halted qTD always triggers an update + because the qset was either removed or + reactivated. */ + update |= WHC_UPDATE_UPDATED; goto done; } @@ -333,6 +337,7 @@ int pzl_urb_dequeue(struct whc *whc, struct urb *urb, int status) struct whc_urb *wurb = urb->hcpriv; struct whc_qset *qset = wurb->qset; struct whc_std *std, *t; + bool has_qtd = false; int ret; unsigned long flags; @@ -343,17 +348,22 @@ int pzl_urb_dequeue(struct whc *whc, struct urb *urb, int status) goto out; list_for_each_entry_safe(std, t, &qset->stds, list_node) { - if (std->urb == urb) + if (std->urb == urb) { + if (std->qtd) + has_qtd = true; qset_free_std(whc, std); - else + } else std->qtd = NULL; /* so this std is re-added when the qset is */ } - pzl_qset_remove(whc, qset); - wurb->status = status; - wurb->is_async = false; - queue_work(whc->workqueue, &wurb->dequeue_work); - + if (has_qtd) { + pzl_qset_remove(whc, qset); + update_pzl_hw_view(whc); + wurb->status = status; + wurb->is_async = false; + queue_work(whc->workqueue, &wurb->dequeue_work); + } else + qset_remove_urb(whc, qset, urb, status); out: spin_unlock_irqrestore(&whc->lock, flags); diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 43c2270..65d96b2 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -328,6 +328,9 @@ static int option_resume(struct usb_serial *serial); #define ALCATEL_VENDOR_ID 0x1bbb #define ALCATEL_PRODUCT_X060S 0x0000 +/* Airplus products */ +#define AIRPLUS_VENDOR_ID 0x1011 +#define AIRPLUS_PRODUCT_MCD650 0x3198 static struct usb_device_id option_ids[] = { { USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_COLT) }, @@ -589,6 +592,7 @@ static struct usb_device_id option_ids[] = { { USB_DEVICE(ALINK_VENDOR_ID, 0x9000) }, { USB_DEVICE_AND_INTERFACE_INFO(ALINK_VENDOR_ID, ALINK_PRODUCT_3GU, 0xff, 0xff, 0xff) }, { USB_DEVICE(ALCATEL_VENDOR_ID, ALCATEL_PRODUCT_X060S) }, + { USB_DEVICE(AIRPLUS_VENDOR_ID, AIRPLUS_PRODUCT_MCD650) }, { } /* Terminating entry */ }; MODULE_DEVICE_TABLE(usb, option_ids); diff --git a/drivers/usb/storage/transport.c b/drivers/usb/storage/transport.c index 3a4fb02..589f6b4 100644 --- a/drivers/usb/storage/transport.c +++ b/drivers/usb/storage/transport.c @@ -696,7 +696,7 @@ void usb_stor_invoke_transport(struct scsi_cmnd *srb, struct us_data *us) /* device supports and needs bigger sense buffer */ if (us->fflags & US_FL_SANE_SENSE) sense_size = ~0; - +Retry_Sense: US_DEBUGP("Issuing auto-REQUEST_SENSE\n"); scsi_eh_prep_cmnd(srb, &ses, NULL, 0, sense_size); @@ -720,6 +720,21 @@ void usb_stor_invoke_transport(struct scsi_cmnd *srb, struct us_data *us) srb->result = DID_ABORT << 16; goto Handle_Errors; } + + /* Some devices claim to support larger sense but fail when + * trying to request it. When a transport failure happens + * using US_FS_SANE_SENSE, we always retry with a standard + * (small) sense request. This fixes some USB GSM modems + */ + if (temp_result == USB_STOR_TRANSPORT_FAILED && + (us->fflags & US_FL_SANE_SENSE) && + sense_size != US_SENSE_SIZE) { + US_DEBUGP("-- auto-sense failure, retry small sense\n"); + sense_size = US_SENSE_SIZE; + goto Retry_Sense; + } + + /* Other failures */ if (temp_result != USB_STOR_TRANSPORT_GOOD) { US_DEBUGP("-- auto-sense failure\n"); diff --git a/drivers/usb/wusbcore/security.c b/drivers/usb/wusbcore/security.c index b2f149f..4516c36 100644 --- a/drivers/usb/wusbcore/security.c +++ b/drivers/usb/wusbcore/security.c @@ -200,35 +200,40 @@ int wusb_dev_sec_add(struct wusbhc *wusbhc, { int result, bytes, secd_size; struct device *dev = &usb_dev->dev; - struct usb_security_descriptor secd; + struct usb_security_descriptor *secd; const struct usb_encryption_descriptor *etd, *ccm1_etd = NULL; - void *secd_buf; const void *itr, *top; char buf[64]; + secd = kmalloc(sizeof(struct usb_security_descriptor), GFP_KERNEL); + if (secd == NULL) { + result = -ENOMEM; + goto out; + } + result = usb_get_descriptor(usb_dev, USB_DT_SECURITY, - 0, &secd, sizeof(secd)); + 0, secd, sizeof(struct usb_security_descriptor)); if (result < sizeof(secd)) { dev_err(dev, "Can't read security descriptor or " "not enough data: %d\n", result); - goto error_secd; + goto out; } - secd_size = le16_to_cpu(secd.wTotalLength); - secd_buf = kmalloc(secd_size, GFP_KERNEL); - if (secd_buf == NULL) { + secd_size = le16_to_cpu(secd->wTotalLength); + secd = krealloc(secd, secd_size, GFP_KERNEL); + if (secd == NULL) { dev_err(dev, "Can't allocate space for security descriptors\n"); - goto error_secd_alloc; + goto out; } result = usb_get_descriptor(usb_dev, USB_DT_SECURITY, - 0, secd_buf, secd_size); + 0, secd, secd_size); if (result < secd_size) { dev_err(dev, "Can't read security descriptor or " "not enough data: %d\n", result); - goto error_secd_all; + goto out; } bytes = 0; - itr = secd_buf + sizeof(secd); - top = secd_buf + result; + itr = &secd[1]; + top = (void *)secd + result; while (itr < top) { etd = itr; if (top - itr < sizeof(*etd)) { @@ -259,24 +264,16 @@ int wusb_dev_sec_add(struct wusbhc *wusbhc, dev_err(dev, "WUSB device doesn't support CCM1 encryption, " "can't use!\n"); result = -EINVAL; - goto error_no_ccm1; + goto out; } wusb_dev->ccm1_etd = *ccm1_etd; dev_dbg(dev, "supported encryption: %s; using %s (0x%02x/%02x)\n", buf, wusb_et_name(ccm1_etd->bEncryptionType), ccm1_etd->bEncryptionValue, ccm1_etd->bAuthKeyIndex); result = 0; - kfree(secd_buf); out: + kfree(secd); return result; - - -error_no_ccm1: -error_secd_all: - kfree(secd_buf); -error_secd_alloc: -error_secd: - goto out; } void wusb_dev_sec_rm(struct wusb_dev *wusb_dev) diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c index 69b355a..3616042 100644 --- a/fs/btrfs/acl.c +++ b/fs/btrfs/acl.c @@ -27,7 +27,7 @@ #include "btrfs_inode.h" #include "xattr.h" -#ifdef CONFIG_BTRFS_POSIX_ACL +#ifdef CONFIG_BTRFS_FS_POSIX_ACL static struct posix_acl *btrfs_get_acl(struct inode *inode, int type) { @@ -313,7 +313,7 @@ struct xattr_handler btrfs_xattr_acl_access_handler = { .set = btrfs_xattr_acl_access_set, }; -#else /* CONFIG_BTRFS_POSIX_ACL */ +#else /* CONFIG_BTRFS_FS_POSIX_ACL */ int btrfs_acl_chmod(struct inode *inode) { @@ -325,4 +325,4 @@ int btrfs_init_acl(struct inode *inode, struct inode *dir) return 0; } -#endif /* CONFIG_BTRFS_POSIX_ACL */ +#endif /* CONFIG_BTRFS_FS_POSIX_ACL */ diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index c71abec..f6783a4 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -86,6 +86,12 @@ struct btrfs_inode { * transid of the trans_handle that last modified this inode */ u64 last_trans; + + /* + * log transid when this inode was last modified + */ + u64 last_sub_trans; + /* * transid that last logged this inode */ diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 1bb897e..444b3e9 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1009,6 +1009,7 @@ struct btrfs_root { atomic_t log_writers; atomic_t log_commit[2]; unsigned long log_transid; + unsigned long last_log_commit; unsigned long log_batch; pid_t log_start_pid; bool log_multiple_pids; @@ -1152,6 +1153,7 @@ struct btrfs_root { #define BTRFS_MOUNT_FLUSHONCOMMIT (1 << 7) #define BTRFS_MOUNT_SSD_SPREAD (1 << 8) #define BTRFS_MOUNT_NOSSD (1 << 9) +#define BTRFS_MOUNT_DISCARD (1 << 10) #define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) #define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) @@ -2373,7 +2375,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options); int btrfs_sync_fs(struct super_block *sb, int wait); /* acl.c */ -#ifdef CONFIG_BTRFS_POSIX_ACL +#ifdef CONFIG_BTRFS_FS_POSIX_ACL int btrfs_check_acl(struct inode *inode, int mask); #else #define btrfs_check_acl NULL diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 100551a..02b6afb 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -917,6 +917,7 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, atomic_set(&root->log_writers, 0); root->log_batch = 0; root->log_transid = 0; + root->last_log_commit = 0; extent_io_tree_init(&root->dirty_log_pages, fs_info->btree_inode->i_mapping, GFP_NOFS); @@ -1087,6 +1088,7 @@ int btrfs_add_log_tree(struct btrfs_trans_handle *trans, WARN_ON(root->log_root); root->log_root = log_root; root->log_transid = 0; + root->last_log_commit = 0; return 0; } diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index d0c4d58..e238a0c 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1568,23 +1568,23 @@ static int remove_extent_backref(struct btrfs_trans_handle *trans, return ret; } -#ifdef BIO_RW_DISCARD static void btrfs_issue_discard(struct block_device *bdev, u64 start, u64 len) { blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL, DISCARD_FL_BARRIER); } -#endif static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, u64 num_bytes) { -#ifdef BIO_RW_DISCARD int ret; u64 map_length = num_bytes; struct btrfs_multi_bio *multi = NULL; + if (!btrfs_test_opt(root, DISCARD)) + return 0; + /* Tell the block device(s) that the sectors can be discarded */ ret = btrfs_map_block(&root->fs_info->mapping_tree, READ, bytenr, &map_length, &multi, 0); @@ -1604,9 +1604,6 @@ static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, } return ret; -#else - return 0; -#endif } int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, @@ -3690,6 +3687,14 @@ static int pin_down_bytes(struct btrfs_trans_handle *trans, if (is_data) goto pinit; + /* + * discard is sloooow, and so triggering discards on + * individual btree blocks isn't a good plan. Just + * pin everything in discard mode. + */ + if (btrfs_test_opt(root, DISCARD)) + goto pinit; + buf = btrfs_find_tree_block(root, bytenr, num_bytes); if (!buf) goto pinit; diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 2d623aa..06550af 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1086,8 +1086,10 @@ out_nolock: btrfs_end_transaction(trans, root); else btrfs_commit_transaction(trans, root); - } else { + } else if (ret != BTRFS_NO_LOG_SYNC) { btrfs_commit_transaction(trans, root); + } else { + btrfs_end_transaction(trans, root); } } if (file->f_flags & O_DIRECT) { @@ -1137,6 +1139,13 @@ int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync) int ret = 0; struct btrfs_trans_handle *trans; + + /* we wait first, since the writeback may change the inode */ + root->log_batch++; + /* the VFS called filemap_fdatawrite for us */ + btrfs_wait_ordered_range(inode, 0, (u64)-1); + root->log_batch++; + /* * check the transaction that last modified this inode * and see if its already been committed @@ -1144,6 +1153,11 @@ int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync) if (!BTRFS_I(inode)->last_trans) goto out; + /* + * if the last transaction that changed this file was before + * the current transaction, we can bail out now without any + * syncing + */ mutex_lock(&root->fs_info->trans_mutex); if (BTRFS_I(inode)->last_trans <= root->fs_info->last_trans_committed) { @@ -1153,13 +1167,6 @@ int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync) } mutex_unlock(&root->fs_info->trans_mutex); - root->log_batch++; - filemap_fdatawrite(inode->i_mapping); - btrfs_wait_ordered_range(inode, 0, (u64)-1); - root->log_batch++; - - if (datasync && !(inode->i_state & I_DIRTY_PAGES)) - goto out; /* * ok we haven't committed the transaction yet, lets do a commit */ @@ -1188,14 +1195,18 @@ int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync) */ mutex_unlock(&dentry->d_inode->i_mutex); - if (ret > 0) { - ret = btrfs_commit_transaction(trans, root); - } else { - ret = btrfs_sync_log(trans, root); - if (ret == 0) - ret = btrfs_end_transaction(trans, root); - else + if (ret != BTRFS_NO_LOG_SYNC) { + if (ret > 0) { ret = btrfs_commit_transaction(trans, root); + } else { + ret = btrfs_sync_log(trans, root); + if (ret == 0) + ret = btrfs_end_transaction(trans, root); + else + ret = btrfs_commit_transaction(trans, root); + } + } else { + ret = btrfs_end_transaction(trans, root); } mutex_lock(&dentry->d_inode->i_mutex); out: diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 9e138b7..dae12dc 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3032,12 +3032,22 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from) if ((offset & (blocksize - 1)) == 0) goto out; + ret = btrfs_check_data_free_space(root, inode, PAGE_CACHE_SIZE); + if (ret) + goto out; + + ret = btrfs_reserve_metadata_for_delalloc(root, inode, 1); + if (ret) + goto out; ret = -ENOMEM; again: page = grab_cache_page(mapping, index); - if (!page) + if (!page) { + btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE); + btrfs_unreserve_metadata_for_delalloc(root, inode, 1); goto out; + } page_start = page_offset(page); page_end = page_start + PAGE_CACHE_SIZE - 1; @@ -3070,6 +3080,10 @@ again: goto again; } + clear_extent_bits(&BTRFS_I(inode)->io_tree, page_start, page_end, + EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING, + GFP_NOFS); + ret = btrfs_set_extent_delalloc(inode, page_start, page_end); if (ret) { unlock_extent(io_tree, page_start, page_end, GFP_NOFS); @@ -3088,6 +3102,9 @@ again: unlock_extent(io_tree, page_start, page_end, GFP_NOFS); out_unlock: + if (ret) + btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE); + btrfs_unreserve_metadata_for_delalloc(root, inode, 1); unlock_page(page); page_cache_release(page); out: @@ -3111,7 +3128,9 @@ int btrfs_cont_expand(struct inode *inode, loff_t size) if (size <= hole_start) return 0; - btrfs_truncate_page(inode->i_mapping, inode->i_size); + err = btrfs_truncate_page(inode->i_mapping, inode->i_size); + if (err) + return err; while (1) { struct btrfs_ordered_extent *ordered; @@ -3480,6 +3499,7 @@ static noinline void init_btrfs_i(struct inode *inode) bi->generation = 0; bi->sequence = 0; bi->last_trans = 0; + bi->last_sub_trans = 0; bi->logged_trans = 0; bi->delalloc_bytes = 0; bi->reserved_bytes = 0; @@ -4980,7 +5000,9 @@ again: set_page_dirty(page); SetPageUptodate(page); - BTRFS_I(inode)->last_trans = root->fs_info->generation + 1; + BTRFS_I(inode)->last_trans = root->fs_info->generation; + BTRFS_I(inode)->last_sub_trans = BTRFS_I(inode)->root->log_transid; + unlock_extent(io_tree, page_start, page_end, GFP_NOFS); out_unlock: @@ -5005,7 +5027,9 @@ static void btrfs_truncate(struct inode *inode) if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) return; - btrfs_truncate_page(inode->i_mapping, inode->i_size); + ret = btrfs_truncate_page(inode->i_mapping, inode->i_size); + if (ret) + return; btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1); trans = btrfs_start_transaction(root, 1); @@ -5100,6 +5124,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) if (!ei) return NULL; ei->last_trans = 0; + ei->last_sub_trans = 0; ei->logged_trans = 0; ei->outstanding_extents = 0; ei->reserved_extents = 0; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 9de9b22..752a546 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -66,7 +66,8 @@ enum { Opt_degraded, Opt_subvol, Opt_device, Opt_nodatasum, Opt_nodatacow, Opt_max_extent, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, Opt_ssd, Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, - Opt_compress, Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_err, + Opt_compress, Opt_notreelog, Opt_ratio, Opt_flushoncommit, + Opt_discard, Opt_err, }; static match_table_t tokens = { @@ -88,6 +89,7 @@ static match_table_t tokens = { {Opt_notreelog, "notreelog"}, {Opt_flushoncommit, "flushoncommit"}, {Opt_ratio, "metadata_ratio=%d"}, + {Opt_discard, "discard"}, {Opt_err, NULL}, }; @@ -257,6 +259,9 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) info->metadata_ratio); } break; + case Opt_discard: + btrfs_set_opt(info->mount_opt, DISCARD); + break; default: break; } @@ -344,7 +349,7 @@ static int btrfs_fill_super(struct super_block *sb, sb->s_export_op = &btrfs_export_ops; sb->s_xattr = btrfs_xattr_handlers; sb->s_time_gran = 1; -#ifdef CONFIG_BTRFS_POSIX_ACL +#ifdef CONFIG_BTRFS_FS_POSIX_ACL sb->s_flags |= MS_POSIXACL; #endif diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 0b8f36d..bca82a4 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -344,10 +344,10 @@ int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans, /* * when btree blocks are allocated, they have some corresponding bits set for * them in one of two extent_io trees. This is used to make sure all of - * those extents are on disk for transaction or log commit + * those extents are sent to disk but does not wait on them */ -int btrfs_write_and_wait_marked_extents(struct btrfs_root *root, - struct extent_io_tree *dirty_pages) +int btrfs_write_marked_extents(struct btrfs_root *root, + struct extent_io_tree *dirty_pages) { int ret; int err = 0; @@ -394,6 +394,29 @@ int btrfs_write_and_wait_marked_extents(struct btrfs_root *root, page_cache_release(page); } } + if (err) + werr = err; + return werr; +} + +/* + * when btree blocks are allocated, they have some corresponding bits set for + * them in one of two extent_io trees. This is used to make sure all of + * those extents are on disk for transaction or log commit. We wait + * on all the pages and clear them from the dirty pages state tree + */ +int btrfs_wait_marked_extents(struct btrfs_root *root, + struct extent_io_tree *dirty_pages) +{ + int ret; + int err = 0; + int werr = 0; + struct page *page; + struct inode *btree_inode = root->fs_info->btree_inode; + u64 start = 0; + u64 end; + unsigned long index; + while (1) { ret = find_first_extent_bit(dirty_pages, 0, &start, &end, EXTENT_DIRTY); @@ -424,6 +447,22 @@ int btrfs_write_and_wait_marked_extents(struct btrfs_root *root, return werr; } +/* + * when btree blocks are allocated, they have some corresponding bits set for + * them in one of two extent_io trees. This is used to make sure all of + * those extents are on disk for transaction or log commit + */ +int btrfs_write_and_wait_marked_extents(struct btrfs_root *root, + struct extent_io_tree *dirty_pages) +{ + int ret; + int ret2; + + ret = btrfs_write_marked_extents(root, dirty_pages); + ret2 = btrfs_wait_marked_extents(root, dirty_pages); + return ret || ret2; +} + int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root) { diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 663c674..d4e3e7a 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -79,6 +79,7 @@ static inline void btrfs_set_inode_last_trans(struct btrfs_trans_handle *trans, struct inode *inode) { BTRFS_I(inode)->last_trans = trans->transaction->transid; + BTRFS_I(inode)->last_sub_trans = BTRFS_I(inode)->root->log_transid; } int btrfs_end_transaction(struct btrfs_trans_handle *trans, @@ -107,5 +108,9 @@ int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans, struct btrfs_root *root); int btrfs_write_and_wait_marked_extents(struct btrfs_root *root, struct extent_io_tree *dirty_pages); +int btrfs_write_marked_extents(struct btrfs_root *root, + struct extent_io_tree *dirty_pages); +int btrfs_wait_marked_extents(struct btrfs_root *root, + struct extent_io_tree *dirty_pages); int btrfs_transaction_in_commit(struct btrfs_fs_info *info); #endif diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 4edfdc2..741666a 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -1980,6 +1980,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, int ret; struct btrfs_root *log = root->log_root; struct btrfs_root *log_root_tree = root->fs_info->log_root_tree; + u64 log_transid = 0; mutex_lock(&root->log_mutex); index1 = root->log_transid % 2; @@ -1994,12 +1995,13 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, if (atomic_read(&root->log_commit[(index1 + 1) % 2])) wait_log_commit(trans, root, root->log_transid - 1); - while (root->log_multiple_pids) { + while (1) { unsigned long batch = root->log_batch; - mutex_unlock(&root->log_mutex); - schedule_timeout_uninterruptible(1); - mutex_lock(&root->log_mutex); - + if (root->log_multiple_pids) { + mutex_unlock(&root->log_mutex); + schedule_timeout_uninterruptible(1); + mutex_lock(&root->log_mutex); + } wait_for_writer(trans, root); if (batch == root->log_batch) break; @@ -2012,12 +2014,16 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, goto out; } - ret = btrfs_write_and_wait_marked_extents(log, &log->dirty_log_pages); + /* we start IO on all the marked extents here, but we don't actually + * wait for them until later. + */ + ret = btrfs_write_marked_extents(log, &log->dirty_log_pages); BUG_ON(ret); btrfs_set_root_node(&log->root_item, log->node); root->log_batch = 0; + log_transid = root->log_transid; root->log_transid++; log->log_transid = root->log_transid; root->log_start_pid = 0; @@ -2046,6 +2052,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, index2 = log_root_tree->log_transid % 2; if (atomic_read(&log_root_tree->log_commit[index2])) { + btrfs_wait_marked_extents(log, &log->dirty_log_pages); wait_log_commit(trans, log_root_tree, log_root_tree->log_transid); mutex_unlock(&log_root_tree->log_mutex); @@ -2065,6 +2072,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, * check the full commit flag again */ if (root->fs_info->last_trans_log_full_commit == trans->transid) { + btrfs_wait_marked_extents(log, &log->dirty_log_pages); mutex_unlock(&log_root_tree->log_mutex); ret = -EAGAIN; goto out_wake_log_root; @@ -2073,6 +2081,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, ret = btrfs_write_and_wait_marked_extents(log_root_tree, &log_root_tree->dirty_log_pages); BUG_ON(ret); + btrfs_wait_marked_extents(log, &log->dirty_log_pages); btrfs_set_super_log_root(&root->fs_info->super_for_commit, log_root_tree->node->start); @@ -2092,9 +2101,14 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, * the running transaction open, so a full commit can't hop * in and cause problems either. */ - write_ctree_super(trans, root->fs_info->tree_root, 2); + write_ctree_super(trans, root->fs_info->tree_root, 1); ret = 0; + mutex_lock(&root->log_mutex); + if (root->last_log_commit < log_transid) + root->last_log_commit = log_transid; + mutex_unlock(&root->log_mutex); + out_wake_log_root: atomic_set(&log_root_tree->log_commit[index2], 0); smp_mb(); @@ -2862,6 +2876,21 @@ out: return ret; } +static int inode_in_log(struct btrfs_trans_handle *trans, + struct inode *inode) +{ + struct btrfs_root *root = BTRFS_I(inode)->root; + int ret = 0; + + mutex_lock(&root->log_mutex); + if (BTRFS_I(inode)->logged_trans == trans->transid && + BTRFS_I(inode)->last_sub_trans <= root->last_log_commit) + ret = 1; + mutex_unlock(&root->log_mutex); + return ret; +} + + /* * helper function around btrfs_log_inode to make sure newly created * parent directories also end up in the log. A minimal inode and backref @@ -2901,6 +2930,11 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, if (ret) goto end_no_trans; + if (inode_in_log(trans, inode)) { + ret = BTRFS_NO_LOG_SYNC; + goto end_no_trans; + } + start_log_trans(trans, root); ret = btrfs_log_inode(trans, root, inode, inode_only); diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h index d09c760..0776eac 100644 --- a/fs/btrfs/tree-log.h +++ b/fs/btrfs/tree-log.h @@ -19,6 +19,9 @@ #ifndef __TREE_LOG_ #define __TREE_LOG_ +/* return value for btrfs_log_dentry_safe that means we don't need to log it at all */ +#define BTRFS_NO_LOG_SYNC 256 + int btrfs_sync_log(struct btrfs_trans_handle *trans, struct btrfs_root *root); int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root); diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index b0fc93f..b6dd596 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c @@ -260,7 +260,7 @@ err: * attributes are handled directly. */ struct xattr_handler *btrfs_xattr_handlers[] = { -#ifdef CONFIG_BTRFS_POSIX_ACL +#ifdef CONFIG_BTRFS_FS_POSIX_ACL &btrfs_xattr_acl_access_handler, &btrfs_xattr_acl_default_handler, #endif diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index 240cef1..70736eb 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c @@ -316,6 +316,10 @@ int dlm_lowcomms_connect_node(int nodeid) { struct connection *con; + /* with sctp there's no connecting without sending */ + if (dlm_config.ci_protocol != 0) + return 0; + if (nodeid == dlm_our_nodeid()) return 0; @@ -455,9 +459,9 @@ static void process_sctp_notification(struct connection *con, int prim_len, ret; int addr_len; struct connection *new_con; - struct file *file; sctp_peeloff_arg_t parg; int parglen = sizeof(parg); + int err; /* * We get this before any data for an association. @@ -512,19 +516,22 @@ static void process_sctp_notification(struct connection *con, ret = kernel_getsockopt(con->sock, IPPROTO_SCTP, SCTP_SOCKOPT_PEELOFF, (void *)&parg, &parglen); - if (ret) { + if (ret < 0) { log_print("Can't peel off a socket for " - "connection %d to node %d: err=%d\n", + "connection %d to node %d: err=%d", parg.associd, nodeid, ret); + return; + } + new_con->sock = sockfd_lookup(parg.sd, &err); + if (!new_con->sock) { + log_print("sockfd_lookup error %d", err); + return; } - file = fget(parg.sd); - new_con->sock = SOCKET_I(file->f_dentry->d_inode); add_sock(new_con->sock, new_con); - fput(file); - put_unused_fd(parg.sd); + sockfd_put(new_con->sock); - log_print("got new/restarted association %d nodeid %d", - (int)sn->sn_assoc_change.sac_assoc_id, nodeid); + log_print("connecting to %d sctp association %d", + nodeid, (int)sn->sn_assoc_change.sac_assoc_id); /* Send any pending writes */ clear_bit(CF_CONNECT_PENDING, &new_con->flags); @@ -837,8 +844,6 @@ static void sctp_init_assoc(struct connection *con) if (con->retries++ > MAX_CONNECT_RETRIES) return; - log_print("Initiating association with node %d", con->nodeid); - if (nodeid_to_addr(con->nodeid, (struct sockaddr *)&rem_addr)) { log_print("no address for nodeid %d", con->nodeid); return; @@ -855,11 +860,14 @@ static void sctp_init_assoc(struct connection *con) outmessage.msg_flags = MSG_EOR; spin_lock(&con->writequeue_lock); - e = list_entry(con->writequeue.next, struct writequeue_entry, - list); - BUG_ON((struct list_head *) e == &con->writequeue); + if (list_empty(&con->writequeue)) { + spin_unlock(&con->writequeue_lock); + log_print("writequeue empty for nodeid %d", con->nodeid); + return; + } + e = list_first_entry(&con->writequeue, struct writequeue_entry, list); len = e->len; offset = e->offset; spin_unlock(&con->writequeue_lock); diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index 0050fc4..5fad489 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c @@ -894,7 +894,8 @@ int sysfs_move_dir(struct kobject *kobj, struct kobject *new_parent_kobj) mutex_lock(&sysfs_rename_mutex); BUG_ON(!sd->s_parent); - new_parent_sd = new_parent_kobj->sd ? new_parent_kobj->sd : &sysfs_root; + new_parent_sd = (new_parent_kobj && new_parent_kobj->sd) ? + new_parent_kobj->sd : &sysfs_root; error = 0; if (sd->s_parent == new_parent_sd) diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index 561a9c0..f5ea468 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c @@ -268,7 +268,7 @@ static int sysfs_get_open_dirent(struct sysfs_dirent *sd, struct sysfs_open_dirent *od, *new_od = NULL; retry: - spin_lock(&sysfs_open_dirent_lock); + spin_lock_irq(&sysfs_open_dirent_lock); if (!sd->s_attr.open && new_od) { sd->s_attr.open = new_od; @@ -281,7 +281,7 @@ static int sysfs_get_open_dirent(struct sysfs_dirent *sd, list_add_tail(&buffer->list, &od->buffers); } - spin_unlock(&sysfs_open_dirent_lock); + spin_unlock_irq(&sysfs_open_dirent_lock); if (od) { kfree(new_od); @@ -315,8 +315,9 @@ static void sysfs_put_open_dirent(struct sysfs_dirent *sd, struct sysfs_buffer *buffer) { struct sysfs_open_dirent *od = sd->s_attr.open; + unsigned long flags; - spin_lock(&sysfs_open_dirent_lock); + spin_lock_irqsave(&sysfs_open_dirent_lock, flags); list_del(&buffer->list); if (atomic_dec_and_test(&od->refcnt)) @@ -324,7 +325,7 @@ static void sysfs_put_open_dirent(struct sysfs_dirent *sd, else od = NULL; - spin_unlock(&sysfs_open_dirent_lock); + spin_unlock_irqrestore(&sysfs_open_dirent_lock, flags); kfree(od); } @@ -456,8 +457,9 @@ static unsigned int sysfs_poll(struct file *filp, poll_table *wait) void sysfs_notify_dirent(struct sysfs_dirent *sd) { struct sysfs_open_dirent *od; + unsigned long flags; - spin_lock(&sysfs_open_dirent_lock); + spin_lock_irqsave(&sysfs_open_dirent_lock, flags); od = sd->s_attr.open; if (od) { @@ -465,7 +467,7 @@ void sysfs_notify_dirent(struct sysfs_dirent *sd) wake_up_interruptible(&od->poll); } - spin_unlock(&sysfs_open_dirent_lock); + spin_unlock_irqrestore(&sysfs_open_dirent_lock, flags); } EXPORT_SYMBOL_GPL(sysfs_notify_dirent); diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index d117704..f7b47c3 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -117,12 +117,12 @@ struct ftrace_event_call { struct dentry *dir; struct trace_event *event; int enabled; - int (*regfunc)(void *); - void (*unregfunc)(void *); + int (*regfunc)(struct ftrace_event_call *); + void (*unregfunc)(struct ftrace_event_call *); int id; - int (*raw_init)(void); - int (*show_format)(struct ftrace_event_call *call, - struct trace_seq *s); + int (*raw_init)(struct ftrace_event_call *); + int (*show_format)(struct ftrace_event_call *, + struct trace_seq *); int (*define_fields)(struct ftrace_event_call *); struct list_head fields; int filter_active; @@ -131,8 +131,8 @@ struct ftrace_event_call { void *data; atomic_t profile_count; - int (*profile_enable)(void); - void (*profile_disable)(void); + int (*profile_enable)(struct ftrace_event_call *); + void (*profile_disable)(struct ftrace_event_call *); }; #define FTRACE_MAX_PROFILE_SIZE 2048 @@ -157,11 +157,12 @@ enum { FILTER_PTR_STRING, }; -extern int trace_define_field(struct ftrace_event_call *call, - const char *type, const char *name, - int offset, int size, int is_signed, - int filter_type); extern int trace_define_common_fields(struct ftrace_event_call *call); +extern int trace_define_field(struct ftrace_event_call *call, const char *type, + const char *name, int offset, int size, + int is_signed, int filter_type); +extern int trace_add_event_call(struct ftrace_event_call *call); +extern void trace_remove_event_call(struct ftrace_event_call *call); #define is_signed_type(type) (((type)(-1)) < 0) diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 3a46b7b..1b672f7 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -296,6 +296,8 @@ void recycle_rp_inst(struct kretprobe_instance *ri, struct hlist_head *head); int disable_kprobe(struct kprobe *kp); int enable_kprobe(struct kprobe *kp); +void dump_kprobe(struct kprobe *kp); + #else /* !CONFIG_KPROBES: */ static inline int kprobes_built_in(void) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 94958c1..812a5f3 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -557,7 +557,7 @@ struct netdev_queue { * Callback uses when the transmitter has not made any progress * for dev->watchdog ticks. * - * struct net_device_stats* (*get_stats)(struct net_device *dev); + * struct net_device_stats* (*ndo_get_stats)(struct net_device *dev); * Called when a user wants to get the network device usage * statistics. If not defined, the counters in dev->stats will * be used. diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index a990ace..b50974a 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -100,23 +100,23 @@ struct perf_event_attr; #ifdef CONFIG_EVENT_PROFILE #define TRACE_SYS_ENTER_PROFILE(sname) \ -static int prof_sysenter_enable_##sname(void) \ +static int prof_sysenter_enable_##sname(struct ftrace_event_call *unused) \ { \ return reg_prof_syscall_enter("sys"#sname); \ } \ \ -static void prof_sysenter_disable_##sname(void) \ +static void prof_sysenter_disable_##sname(struct ftrace_event_call *unused) \ { \ unreg_prof_syscall_enter("sys"#sname); \ } #define TRACE_SYS_EXIT_PROFILE(sname) \ -static int prof_sysexit_enable_##sname(void) \ +static int prof_sysexit_enable_##sname(struct ftrace_event_call *unused) \ { \ return reg_prof_syscall_exit("sys"#sname); \ } \ \ -static void prof_sysexit_disable_##sname(void) \ +static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused) \ { \ unreg_prof_syscall_exit("sys"#sname); \ } @@ -157,7 +157,7 @@ static void prof_sysexit_disable_##sname(void) \ struct trace_event enter_syscall_print_##sname = { \ .trace = print_syscall_enter, \ }; \ - static int init_enter_##sname(void) \ + static int init_enter_##sname(struct ftrace_event_call *call) \ { \ int num, id; \ num = syscall_name_to_nr("sys"#sname); \ @@ -193,7 +193,7 @@ static void prof_sysexit_disable_##sname(void) \ struct trace_event exit_syscall_print_##sname = { \ .trace = print_syscall_exit, \ }; \ - static int init_exit_##sname(void) \ + static int init_exit_##sname(struct ftrace_event_call *call) \ { \ int num, id; \ num = syscall_name_to_nr("sys"#sname); \ diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 466859b..c75b960 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1669,6 +1669,8 @@ void ieee80211_restart_hw(struct ieee80211_hw *hw); * to this function and ieee80211_rx_irqsafe() may not be mixed for a * single hardware. * + * Note that right now, this function must be called with softirqs disabled. + * * @hw: the hardware this frame came in on * @skb: the buffer to receive, owned by mac80211 after this call */ diff --git a/include/net/sock.h b/include/net/sock.h index 1621935..9f96394 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -226,12 +226,12 @@ struct sock { #define sk_prot __sk_common.skc_prot #define sk_net __sk_common.skc_net kmemcheck_bitfield_begin(flags); - unsigned char sk_shutdown : 2, - sk_no_check : 2, - sk_userlocks : 4; + unsigned int sk_shutdown : 2, + sk_no_check : 2, + sk_userlocks : 4, + sk_protocol : 8, + sk_type : 16; kmemcheck_bitfield_end(flags); - unsigned char sk_protocol; - unsigned short sk_type; int sk_rcvbuf; socket_lock_t sk_lock; /* diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index c9bbcab..a7f9460 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -402,12 +402,12 @@ static inline int ftrace_get_offsets_##call( \ \ static void ftrace_profile_##call(proto); \ \ -static int ftrace_profile_enable_##call(void) \ +static int ftrace_profile_enable_##call(struct ftrace_event_call *unused)\ { \ return register_trace_##call(ftrace_profile_##call); \ } \ \ -static void ftrace_profile_disable_##call(void) \ +static void ftrace_profile_disable_##call(struct ftrace_event_call *unused)\ { \ unregister_trace_##call(ftrace_profile_##call); \ } @@ -426,7 +426,7 @@ static void ftrace_profile_disable_##call(void) \ * event_trace_printk(_RET_IP_, "<call>: " <fmt>); * } * - * static int ftrace_reg_event_<call>(void) + * static int ftrace_reg_event_<call>(struct ftrace_event_call *unused) * { * int ret; * @@ -437,7 +437,7 @@ static void ftrace_profile_disable_##call(void) \ * return ret; * } * - * static void ftrace_unreg_event_<call>(void) + * static void ftrace_unreg_event_<call>(struct ftrace_event_call *unused) * { * unregister_trace_<call>(ftrace_event_<call>); * } @@ -472,7 +472,7 @@ static void ftrace_profile_disable_##call(void) \ * trace_current_buffer_unlock_commit(buffer, event, irq_flags, pc); * } * - * static int ftrace_raw_reg_event_<call>(void) + * static int ftrace_raw_reg_event_<call>(struct ftrace_event_call *unused) * { * int ret; * @@ -483,7 +483,7 @@ static void ftrace_profile_disable_##call(void) \ * return ret; * } * - * static void ftrace_unreg_event_<call>(void) + * static void ftrace_unreg_event_<call>(struct ftrace_event_call *unused) * { * unregister_trace_<call>(ftrace_raw_event_<call>); * } @@ -492,7 +492,7 @@ static void ftrace_profile_disable_##call(void) \ * .trace = ftrace_raw_output_<call>, <-- stage 2 * }; * - * static int ftrace_raw_init_event_<call>(void) + * static int ftrace_raw_init_event_<call>(struct ftrace_event_call *unused) * { * int id; * @@ -589,7 +589,7 @@ static void ftrace_raw_event_##call(proto) \ event, irq_flags, pc); \ } \ \ -static int ftrace_raw_reg_event_##call(void *ptr) \ +static int ftrace_raw_reg_event_##call(struct ftrace_event_call *unused)\ { \ int ret; \ \ @@ -600,7 +600,7 @@ static int ftrace_raw_reg_event_##call(void *ptr) \ return ret; \ } \ \ -static void ftrace_raw_unreg_event_##call(void *ptr) \ +static void ftrace_raw_unreg_event_##call(struct ftrace_event_call *unused)\ { \ unregister_trace_##call(ftrace_raw_event_##call); \ } \ @@ -609,7 +609,7 @@ static struct trace_event ftrace_event_type_##call = { \ .trace = ftrace_raw_output_##call, \ }; \ \ -static int ftrace_raw_init_event_##call(void) \ +static int ftrace_raw_init_event_##call(struct ftrace_event_call *unused)\ { \ int id; \ \ diff --git a/include/trace/syscall.h b/include/trace/syscall.h index e972f0a..51ee17d 100644 --- a/include/trace/syscall.h +++ b/include/trace/syscall.h @@ -39,16 +39,19 @@ void set_syscall_enter_id(int num, int id); void set_syscall_exit_id(int num, int id); extern struct trace_event event_syscall_enter; extern struct trace_event event_syscall_exit; -extern int reg_event_syscall_enter(void *ptr); -extern void unreg_event_syscall_enter(void *ptr); -extern int reg_event_syscall_exit(void *ptr); -extern void unreg_event_syscall_exit(void *ptr); + extern int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s); extern int syscall_exit_format(struct ftrace_event_call *call, struct trace_seq *s); extern int syscall_enter_define_fields(struct ftrace_event_call *call); extern int syscall_exit_define_fields(struct ftrace_event_call *call); +extern int reg_event_syscall_enter(struct ftrace_event_call *call); +extern void unreg_event_syscall_enter(struct ftrace_event_call *call); +extern int reg_event_syscall_exit(struct ftrace_event_call *call); +extern void unreg_event_syscall_exit(struct ftrace_event_call *call); +extern int +ftrace_format_syscall(struct ftrace_event_call *call, struct trace_seq *s); enum print_line_t print_syscall_enter(struct trace_iterator *iter, int flags); enum print_line_t print_syscall_exit(struct trace_iterator *iter, int flags); #endif diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 5240d75..8449595 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -90,6 +90,9 @@ static spinlock_t *kretprobe_table_lock_ptr(unsigned long hash) */ static struct kprobe_blackpoint kprobe_blacklist[] = { {"preempt_schedule",}, + {"native_get_debugreg",}, + {"irq_entries_start",}, + {"common_interrupt",}, {NULL} /* Terminator */ }; @@ -673,6 +676,40 @@ static kprobe_opcode_t __kprobes *kprobe_addr(struct kprobe *p) return (kprobe_opcode_t *)(((char *)addr) + p->offset); } +/* Check passed kprobe is valid and return kprobe in kprobe_table. */ +static struct kprobe * __kprobes __get_valid_kprobe(struct kprobe *p) +{ + struct kprobe *old_p, *list_p; + + old_p = get_kprobe(p->addr); + if (unlikely(!old_p)) + return NULL; + + if (p != old_p) { + list_for_each_entry_rcu(list_p, &old_p->list, list) + if (list_p == p) + /* kprobe p is a valid probe */ + goto valid; + return NULL; + } +valid: + return old_p; +} + +/* Return error if the kprobe is being re-registered */ +static inline int check_kprobe_rereg(struct kprobe *p) +{ + int ret = 0; + struct kprobe *old_p; + + mutex_lock(&kprobe_mutex); + old_p = __get_valid_kprobe(p); + if (old_p) + ret = -EINVAL; + mutex_unlock(&kprobe_mutex); + return ret; +} + int __kprobes register_kprobe(struct kprobe *p) { int ret = 0; @@ -685,6 +722,10 @@ int __kprobes register_kprobe(struct kprobe *p) return -EINVAL; p->addr = addr; + ret = check_kprobe_rereg(p); + if (ret) + return ret; + preempt_disable(); if (!kernel_text_address((unsigned long) p->addr) || in_kprobes_functions((unsigned long) p->addr)) { @@ -754,26 +795,6 @@ out: } EXPORT_SYMBOL_GPL(register_kprobe); -/* Check passed kprobe is valid and return kprobe in kprobe_table. */ -static struct kprobe * __kprobes __get_valid_kprobe(struct kprobe *p) -{ - struct kprobe *old_p, *list_p; - - old_p = get_kprobe(p->addr); - if (unlikely(!old_p)) - return NULL; - - if (p != old_p) { - list_for_each_entry_rcu(list_p, &old_p->list, list) - if (list_p == p) - /* kprobe p is a valid probe */ - goto valid; - return NULL; - } -valid: - return old_p; -} - /* * Unregister a kprobe without a scheduler synchronization. */ @@ -1141,6 +1162,13 @@ static void __kprobes kill_kprobe(struct kprobe *p) arch_remove_kprobe(p); } +void __kprobes dump_kprobe(struct kprobe *kp) +{ + printk(KERN_WARNING "Dumping kprobe:\n"); + printk(KERN_WARNING "Name: %s\nAddress: %p\nOffset: %x\n", + kp->symbol_name, kp->addr, kp->offset); +} + /* Module notifier call back, checking kprobes on the module */ static int __kprobes kprobes_module_callback(struct notifier_block *nb, unsigned long val, void *data) diff --git a/kernel/notifier.c b/kernel/notifier.c index 61d5aa5..acd24e7 100644 --- a/kernel/notifier.c +++ b/kernel/notifier.c @@ -558,7 +558,7 @@ EXPORT_SYMBOL(unregister_reboot_notifier); static ATOMIC_NOTIFIER_HEAD(die_chain); -int notrace notify_die(enum die_val val, const char *str, +int notrace __kprobes notify_die(enum die_val val, const char *str, struct pt_regs *regs, long err, int trap, int sig) { struct die_args args = { diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index b416512..15372a9 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -428,6 +428,18 @@ config BLK_DEV_IO_TRACE If unsure, say N. +config KPROBE_TRACER + depends on KPROBES + depends on X86 + bool "Trace kprobes" + select TRACING + select GENERIC_TRACER + help + This tracer probes everywhere where kprobes can probe it, and + records various registers and memories specified by user. + This also allows you to trace kprobe probe points as a dynamic + defined events. It provides per-probe event filtering interface. + config DYNAMIC_FTRACE bool "enable/disable ftrace tracepoints dynamically" depends on FUNCTION_TRACER diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index 26f03ac..c8cb75d 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -53,6 +53,7 @@ obj-$(CONFIG_EVENT_TRACING) += trace_export.o obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o +obj-$(CONFIG_KPROBE_TRACER) += trace_kprobe.o obj-$(CONFIG_EVENT_TRACING) += power-traces.o libftrace-y := ftrace.o diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 4959ada..b4e4212 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -101,6 +101,29 @@ struct syscall_trace_exit { unsigned long ret; }; +struct kprobe_trace_entry { + struct trace_entry ent; + unsigned long ip; + int nargs; + unsigned long args[]; +}; + +#define SIZEOF_KPROBE_TRACE_ENTRY(n) \ + (offsetof(struct kprobe_trace_entry, args) + \ + (sizeof(unsigned long) * (n))) + +struct kretprobe_trace_entry { + struct trace_entry ent; + unsigned long func; + unsigned long ret_ip; + int nargs; + unsigned long args[]; +}; + +#define SIZEOF_KRETPROBE_TRACE_ENTRY(n) \ + (offsetof(struct kretprobe_trace_entry, args) + \ + (sizeof(unsigned long) * (n))) + /* * trace_flag_type is an enumeration that holds different * states when a trace occurs. These are: diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c index 8d5c171..c9f687a 100644 --- a/kernel/trace/trace_event_profile.c +++ b/kernel/trace/trace_event_profile.c @@ -45,7 +45,7 @@ static int ftrace_profile_enable_event(struct ftrace_event_call *event) rcu_assign_pointer(trace_profile_buf_nmi, buf); } - ret = event->profile_enable(); + ret = event->profile_enable(event); if (!ret) { total_profile_count++; return 0; @@ -89,7 +89,7 @@ static void ftrace_profile_disable_event(struct ftrace_event_call *event) if (!atomic_add_negative(-1, &event->profile_count)) return; - event->profile_disable(); + event->profile_disable(event); if (!--total_profile_count) { buf = trace_profile_buf; diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 7c18d15..1d18315 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -93,9 +93,7 @@ int trace_define_common_fields(struct ftrace_event_call *call) } EXPORT_SYMBOL_GPL(trace_define_common_fields); -#ifdef CONFIG_MODULES - -static void trace_destroy_fields(struct ftrace_event_call *call) +void trace_destroy_fields(struct ftrace_event_call *call) { struct ftrace_event_field *field, *next; @@ -107,8 +105,6 @@ static void trace_destroy_fields(struct ftrace_event_call *call) } } -#endif /* CONFIG_MODULES */ - static void ftrace_event_enable_disable(struct ftrace_event_call *call, int enable) { @@ -117,14 +113,14 @@ static void ftrace_event_enable_disable(struct ftrace_event_call *call, if (call->enabled) { call->enabled = 0; tracing_stop_cmdline_record(); - call->unregfunc(call->data); + call->unregfunc(call); } break; case 1: if (!call->enabled) { call->enabled = 1; tracing_start_cmdline_record(); - call->regfunc(call->data); + call->regfunc(call); } break; } @@ -937,27 +933,46 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events, return 0; } -#define for_each_event(event, start, end) \ - for (event = start; \ - (unsigned long)event < (unsigned long)end; \ - event++) +static int __trace_add_event_call(struct ftrace_event_call *call) +{ + struct dentry *d_events; + int ret; -#ifdef CONFIG_MODULES + if (!call->name) + return -EINVAL; -static LIST_HEAD(ftrace_module_file_list); + if (call->raw_init) { + ret = call->raw_init(call); + if (ret < 0) { + if (ret != -ENOSYS) + pr_warning("Could not initialize trace " + "events/%s\n", call->name); + return ret; + } + } -/* - * Modules must own their file_operations to keep up with - * reference counting. - */ -struct ftrace_module_file_ops { - struct list_head list; - struct module *mod; - struct file_operations id; - struct file_operations enable; - struct file_operations format; - struct file_operations filter; -}; + d_events = event_trace_events_dir(); + if (!d_events) + return -ENOENT; + + ret = event_create_dir(call, d_events, &ftrace_event_id_fops, + &ftrace_enable_fops, &ftrace_event_filter_fops, + &ftrace_event_format_fops); + if (!ret) + list_add(&call->list, &ftrace_events); + + return ret; +} + +/* Add an additional event_call dynamically */ +int trace_add_event_call(struct ftrace_event_call *call) +{ + int ret; + mutex_lock(&event_mutex); + ret = __trace_add_event_call(call); + mutex_unlock(&event_mutex); + return ret; +} static void remove_subsystem_dir(const char *name) { @@ -985,6 +1000,53 @@ static void remove_subsystem_dir(const char *name) } } +/* + * Must be called under locking both of event_mutex and trace_event_mutex. + */ +static void __trace_remove_event_call(struct ftrace_event_call *call) +{ + ftrace_event_enable_disable(call, 0); + if (call->event) + __unregister_ftrace_event(call->event); + debugfs_remove_recursive(call->dir); + list_del(&call->list); + trace_destroy_fields(call); + destroy_preds(call); + remove_subsystem_dir(call->system); +} + +/* Remove an event_call */ +void trace_remove_event_call(struct ftrace_event_call *call) +{ + mutex_lock(&event_mutex); + down_write(&trace_event_mutex); + __trace_remove_event_call(call); + up_write(&trace_event_mutex); + mutex_unlock(&event_mutex); +} + +#define for_each_event(event, start, end) \ + for (event = start; \ + (unsigned long)event < (unsigned long)end; \ + event++) + +#ifdef CONFIG_MODULES + +static LIST_HEAD(ftrace_module_file_list); + +/* + * Modules must own their file_operations to keep up with + * reference counting. + */ +struct ftrace_module_file_ops { + struct list_head list; + struct module *mod; + struct file_operations id; + struct file_operations enable; + struct file_operations format; + struct file_operations filter; +}; + static struct ftrace_module_file_ops * trace_create_file_ops(struct module *mod) { @@ -1042,7 +1104,7 @@ static void trace_module_add_events(struct module *mod) if (!call->name) continue; if (call->raw_init) { - ret = call->raw_init(); + ret = call->raw_init(call); if (ret < 0) { if (ret != -ENOSYS) pr_warning("Could not initialize trace " @@ -1060,10 +1122,11 @@ static void trace_module_add_events(struct module *mod) return; } call->mod = mod; - list_add(&call->list, &ftrace_events); - event_create_dir(call, d_events, - &file_ops->id, &file_ops->enable, - &file_ops->filter, &file_ops->format); + ret = event_create_dir(call, d_events, + &file_ops->id, &file_ops->enable, + &file_ops->filter, &file_ops->format); + if (!ret) + list_add(&call->list, &ftrace_events); } } @@ -1077,14 +1140,7 @@ static void trace_module_remove_events(struct module *mod) list_for_each_entry_safe(call, p, &ftrace_events, list) { if (call->mod == mod) { found = true; - ftrace_event_enable_disable(call, 0); - if (call->event) - __unregister_ftrace_event(call->event); - debugfs_remove_recursive(call->dir); - list_del(&call->list); - trace_destroy_fields(call); - destroy_preds(call); - remove_subsystem_dir(call->system); + __trace_remove_event_call(call); } } @@ -1202,7 +1258,7 @@ static __init int event_trace_init(void) if (!call->name) continue; if (call->raw_init) { - ret = call->raw_init(); + ret = call->raw_init(call); if (ret < 0) { if (ret != -ENOSYS) pr_warning("Could not initialize trace " @@ -1210,10 +1266,12 @@ static __init int event_trace_init(void) continue; } } - list_add(&call->list, &ftrace_events); - event_create_dir(call, d_events, &ftrace_event_id_fops, - &ftrace_enable_fops, &ftrace_event_filter_fops, - &ftrace_event_format_fops); + ret = event_create_dir(call, d_events, &ftrace_event_id_fops, + &ftrace_enable_fops, + &ftrace_event_filter_fops, + &ftrace_event_format_fops); + if (!ret) + list_add(&call->list, &ftrace_events); } while (true) { diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c index 31da218..934d81f 100644 --- a/kernel/trace/trace_export.c +++ b/kernel/trace/trace_export.c @@ -134,7 +134,6 @@ ftrace_format_##name(struct ftrace_event_call *unused, \ #include "trace_entries.h" - #undef __field #define __field(type, item) \ ret = trace_define_field(event_call, #type, #item, \ @@ -196,6 +195,11 @@ ftrace_define_fields_##name(struct ftrace_event_call *event_call) \ #include "trace_entries.h" +static int ftrace_raw_init_event(struct ftrace_event_call *call) +{ + INIT_LIST_HEAD(&call->fields); + return 0; +} #undef __field #define __field(type, item) @@ -214,7 +218,6 @@ ftrace_define_fields_##name(struct ftrace_event_call *event_call) \ #undef FTRACE_ENTRY #define FTRACE_ENTRY(call, struct_name, type, tstruct, print) \ -static int ftrace_raw_init_event_##call(void); \ \ struct ftrace_event_call __used \ __attribute__((__aligned__(4))) \ @@ -222,14 +225,9 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ .name = #call, \ .id = type, \ .system = __stringify(TRACE_SYSTEM), \ - .raw_init = ftrace_raw_init_event_##call, \ + .raw_init = ftrace_raw_init_event, \ .show_format = ftrace_format_##call, \ .define_fields = ftrace_define_fields_##call, \ }; \ -static int ftrace_raw_init_event_##call(void) \ -{ \ - INIT_LIST_HEAD(&event_##call.fields); \ - return 0; \ -} \ #include "trace_entries.h" diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c new file mode 100644 index 0000000..b8ef707 --- /dev/null +++ b/kernel/trace/trace_kprobe.c @@ -0,0 +1,1479 @@ +/* + * kprobe based kernel tracer + * + * Created by Masami Hiramatsu <mhiramat@redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/module.h> +#include <linux/uaccess.h> +#include <linux/kprobes.h> +#include <linux/seq_file.h> +#include <linux/slab.h> +#include <linux/smp.h> +#include <linux/debugfs.h> +#include <linux/types.h> +#include <linux/string.h> +#include <linux/ctype.h> +#include <linux/ptrace.h> +#include <linux/perf_event.h> + +#include "trace.h" +#include "trace_output.h" + +#define MAX_TRACE_ARGS 128 +#define MAX_ARGSTR_LEN 63 +#define MAX_EVENT_NAME_LEN 64 +#define KPROBE_EVENT_SYSTEM "kprobes" + +/* Reserved field names */ +#define FIELD_STRING_IP "__probe_ip" +#define FIELD_STRING_NARGS "__probe_nargs" +#define FIELD_STRING_RETIP "__probe_ret_ip" +#define FIELD_STRING_FUNC "__probe_func" + +const char *reserved_field_names[] = { + "common_type", + "common_flags", + "common_preempt_count", + "common_pid", + "common_tgid", + "common_lock_depth", + FIELD_STRING_IP, + FIELD_STRING_NARGS, + FIELD_STRING_RETIP, + FIELD_STRING_FUNC, +}; + +/* currently, trace_kprobe only supports X86. */ + +struct fetch_func { + unsigned long (*func)(struct pt_regs *, void *); + void *data; +}; + +static __kprobes unsigned long call_fetch(struct fetch_func *f, + struct pt_regs *regs) +{ + return f->func(regs, f->data); +} + +/* fetch handlers */ +static __kprobes unsigned long fetch_register(struct pt_regs *regs, + void *offset) +{ + return regs_get_register(regs, (unsigned int)((unsigned long)offset)); +} + +static __kprobes unsigned long fetch_stack(struct pt_regs *regs, + void *num) +{ + return regs_get_kernel_stack_nth(regs, + (unsigned int)((unsigned long)num)); +} + +static __kprobes unsigned long fetch_memory(struct pt_regs *regs, void *addr) +{ + unsigned long retval; + + if (probe_kernel_address(addr, retval)) + return 0; + return retval; +} + +static __kprobes unsigned long fetch_argument(struct pt_regs *regs, void *num) +{ + return regs_get_argument_nth(regs, (unsigned int)((unsigned long)num)); +} + +static __kprobes unsigned long fetch_retvalue(struct pt_regs *regs, + void *dummy) +{ + return regs_return_value(regs); +} + +static __kprobes unsigned long fetch_stack_address(struct pt_regs *regs, + void *dummy) +{ + return kernel_stack_pointer(regs); +} + +/* Memory fetching by symbol */ +struct symbol_cache { + char *symbol; + long offset; + unsigned long addr; +}; + +static unsigned long update_symbol_cache(struct symbol_cache *sc) +{ + sc->addr = (unsigned long)kallsyms_lookup_name(sc->symbol); + if (sc->addr) + sc->addr += sc->offset; + return sc->addr; +} + +static void free_symbol_cache(struct symbol_cache *sc) +{ + kfree(sc->symbol); + kfree(sc); +} + +static struct symbol_cache *alloc_symbol_cache(const char *sym, long offset) +{ + struct symbol_cache *sc; + + if (!sym || strlen(sym) == 0) + return NULL; + sc = kzalloc(sizeof(struct symbol_cache), GFP_KERNEL); + if (!sc) + return NULL; + + sc->symbol = kstrdup(sym, GFP_KERNEL); + if (!sc->symbol) { + kfree(sc); + return NULL; + } + sc->offset = offset; + + update_symbol_cache(sc); + return sc; +} + +static __kprobes unsigned long fetch_symbol(struct pt_regs *regs, void *data) +{ + struct symbol_cache *sc = data; + + if (sc->addr) + return fetch_memory(regs, (void *)sc->addr); + else + return 0; +} + +/* Special indirect memory access interface */ +struct indirect_fetch_data { + struct fetch_func orig; + long offset; +}; + +static __kprobes unsigned long fetch_indirect(struct pt_regs *regs, void *data) +{ + struct indirect_fetch_data *ind = data; + unsigned long addr; + + addr = call_fetch(&ind->orig, regs); + if (addr) { + addr += ind->offset; + return fetch_memory(regs, (void *)addr); + } else + return 0; +} + +static __kprobes void free_indirect_fetch_data(struct indirect_fetch_data *data) +{ + if (data->orig.func == fetch_indirect) + free_indirect_fetch_data(data->orig.data); + else if (data->orig.func == fetch_symbol) + free_symbol_cache(data->orig.data); + kfree(data); +} + +/** + * Kprobe tracer core functions + */ + +struct probe_arg { + struct fetch_func fetch; + const char *name; +}; + +/* Flags for trace_probe */ +#define TP_FLAG_TRACE 1 +#define TP_FLAG_PROFILE 2 + +struct trace_probe { + struct list_head list; + struct kretprobe rp; /* Use rp.kp for kprobe use */ + unsigned long nhit; + unsigned int flags; /* For TP_FLAG_* */ + const char *symbol; /* symbol name */ + struct ftrace_event_call call; + struct trace_event event; + unsigned int nr_args; + struct probe_arg args[]; +}; + +#define SIZEOF_TRACE_PROBE(n) \ + (offsetof(struct trace_probe, args) + \ + (sizeof(struct probe_arg) * (n))) + +static __kprobes int probe_is_return(struct trace_probe *tp) +{ + return tp->rp.handler != NULL; +} + +static __kprobes const char *probe_symbol(struct trace_probe *tp) +{ + return tp->symbol ? tp->symbol : "unknown"; +} + +static int probe_arg_string(char *buf, size_t n, struct fetch_func *ff) +{ + int ret = -EINVAL; + + if (ff->func == fetch_argument) + ret = snprintf(buf, n, "$arg%lu", (unsigned long)ff->data); + else if (ff->func == fetch_register) { + const char *name; + name = regs_query_register_name((unsigned int)((long)ff->data)); + ret = snprintf(buf, n, "%%%s", name); + } else if (ff->func == fetch_stack) + ret = snprintf(buf, n, "$stack%lu", (unsigned long)ff->data); + else if (ff->func == fetch_memory) + ret = snprintf(buf, n, "@0x%p", ff->data); + else if (ff->func == fetch_symbol) { + struct symbol_cache *sc = ff->data; + ret = snprintf(buf, n, "@%s%+ld", sc->symbol, sc->offset); + } else if (ff->func == fetch_retvalue) + ret = snprintf(buf, n, "$retval"); + else if (ff->func == fetch_stack_address) + ret = snprintf(buf, n, "$stack"); + else if (ff->func == fetch_indirect) { + struct indirect_fetch_data *id = ff->data; + size_t l = 0; + ret = snprintf(buf, n, "%+ld(", id->offset); + if (ret >= n) + goto end; + l += ret; + ret = probe_arg_string(buf + l, n - l, &id->orig); + if (ret < 0) + goto end; + l += ret; + ret = snprintf(buf + l, n - l, ")"); + ret += l; + } +end: + if (ret >= n) + return -ENOSPC; + return ret; +} + +static int register_probe_event(struct trace_probe *tp); +static void unregister_probe_event(struct trace_probe *tp); + +static DEFINE_MUTEX(probe_lock); +static LIST_HEAD(probe_list); + +static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs); +static int kretprobe_dispatcher(struct kretprobe_instance *ri, + struct pt_regs *regs); + +/* + * Allocate new trace_probe and initialize it (including kprobes). + */ +static struct trace_probe *alloc_trace_probe(const char *group, + const char *event, + void *addr, + const char *symbol, + unsigned long offs, + int nargs, int is_return) +{ + struct trace_probe *tp; + + tp = kzalloc(SIZEOF_TRACE_PROBE(nargs), GFP_KERNEL); + if (!tp) + return ERR_PTR(-ENOMEM); + + if (symbol) { + tp->symbol = kstrdup(symbol, GFP_KERNEL); + if (!tp->symbol) + goto error; + tp->rp.kp.symbol_name = tp->symbol; + tp->rp.kp.offset = offs; + } else + tp->rp.kp.addr = addr; + + if (is_return) + tp->rp.handler = kretprobe_dispatcher; + else + tp->rp.kp.pre_handler = kprobe_dispatcher; + + if (!event) + goto error; + tp->call.name = kstrdup(event, GFP_KERNEL); + if (!tp->call.name) + goto error; + + if (!group) + goto error; + tp->call.system = kstrdup(group, GFP_KERNEL); + if (!tp->call.system) + goto error; + + INIT_LIST_HEAD(&tp->list); + return tp; +error: + kfree(tp->call.name); + kfree(tp->symbol); + kfree(tp); + return ERR_PTR(-ENOMEM); +} + +static void free_probe_arg(struct probe_arg *arg) +{ + if (arg->fetch.func == fetch_symbol) + free_symbol_cache(arg->fetch.data); + else if (arg->fetch.func == fetch_indirect) + free_indirect_fetch_data(arg->fetch.data); + kfree(arg->name); +} + +static void free_trace_probe(struct trace_probe *tp) +{ + int i; + + for (i = 0; i < tp->nr_args; i++) + free_probe_arg(&tp->args[i]); + + kfree(tp->call.system); + kfree(tp->call.name); + kfree(tp->symbol); + kfree(tp); +} + +static struct trace_probe *find_probe_event(const char *event) +{ + struct trace_probe *tp; + + list_for_each_entry(tp, &probe_list, list) + if (!strcmp(tp->call.name, event)) + return tp; + return NULL; +} + +/* Unregister a trace_probe and probe_event: call with locking probe_lock */ +static void unregister_trace_probe(struct trace_probe *tp) +{ + if (probe_is_return(tp)) + unregister_kretprobe(&tp->rp); + else + unregister_kprobe(&tp->rp.kp); + list_del(&tp->list); + unregister_probe_event(tp); +} + +/* Register a trace_probe and probe_event */ +static int register_trace_probe(struct trace_probe *tp) +{ + struct trace_probe *old_tp; + int ret; + + mutex_lock(&probe_lock); + + /* register as an event */ + old_tp = find_probe_event(tp->call.name); + if (old_tp) { + /* delete old event */ + unregister_trace_probe(old_tp); + free_trace_probe(old_tp); + } + ret = register_probe_event(tp); + if (ret) { + pr_warning("Faild to register probe event(%d)\n", ret); + goto end; + } + + tp->rp.kp.flags |= KPROBE_FLAG_DISABLED; + if (probe_is_return(tp)) + ret = register_kretprobe(&tp->rp); + else + ret = register_kprobe(&tp->rp.kp); + + if (ret) { + pr_warning("Could not insert probe(%d)\n", ret); + if (ret == -EILSEQ) { + pr_warning("Probing address(0x%p) is not an " + "instruction boundary.\n", + tp->rp.kp.addr); + ret = -EINVAL; + } + unregister_probe_event(tp); + } else + list_add_tail(&tp->list, &probe_list); +end: + mutex_unlock(&probe_lock); + return ret; +} + +/* Split symbol and offset. */ +static int split_symbol_offset(char *symbol, unsigned long *offset) +{ + char *tmp; + int ret; + + if (!offset) + return -EINVAL; + + tmp = strchr(symbol, '+'); + if (tmp) { + /* skip sign because strict_strtol doesn't accept '+' */ + ret = strict_strtoul(tmp + 1, 0, offset); + if (ret) + return ret; + *tmp = '\0'; + } else + *offset = 0; + return 0; +} + +#define PARAM_MAX_ARGS 16 +#define PARAM_MAX_STACK (THREAD_SIZE / sizeof(unsigned long)) + +static int parse_probe_vars(char *arg, struct fetch_func *ff, int is_return) +{ + int ret = 0; + unsigned long param; + + if (strcmp(arg, "retval") == 0) { + if (is_return) { + ff->func = fetch_retvalue; + ff->data = NULL; + } else + ret = -EINVAL; + } else if (strncmp(arg, "stack", 5) == 0) { + if (arg[5] == '\0') { + ff->func = fetch_stack_address; + ff->data = NULL; + } else if (isdigit(arg[5])) { + ret = strict_strtoul(arg + 5, 10, ¶m); + if (ret || param > PARAM_MAX_STACK) + ret = -EINVAL; + else { + ff->func = fetch_stack; + ff->data = (void *)param; + } + } else + ret = -EINVAL; + } else if (strncmp(arg, "arg", 3) == 0 && isdigit(arg[3])) { + ret = strict_strtoul(arg + 3, 10, ¶m); + if (ret || param > PARAM_MAX_ARGS) + ret = -EINVAL; + else { + ff->func = fetch_argument; + ff->data = (void *)param; + } + } else + ret = -EINVAL; + return ret; +} + +static int parse_probe_arg(char *arg, struct fetch_func *ff, int is_return) +{ + int ret = 0; + unsigned long param; + long offset; + char *tmp; + + switch (arg[0]) { + case '$': + ret = parse_probe_vars(arg + 1, ff, is_return); + break; + case '%': /* named register */ + ret = regs_query_register_offset(arg + 1); + if (ret >= 0) { + ff->func = fetch_register; + ff->data = (void *)(unsigned long)ret; + ret = 0; + } + break; + case '@': /* memory or symbol */ + if (isdigit(arg[1])) { + ret = strict_strtoul(arg + 1, 0, ¶m); + if (ret) + break; + ff->func = fetch_memory; + ff->data = (void *)param; + } else { + ret = split_symbol_offset(arg + 1, &offset); + if (ret) + break; + ff->data = alloc_symbol_cache(arg + 1, offset); + if (ff->data) + ff->func = fetch_symbol; + else + ret = -EINVAL; + } + break; + case '+': /* indirect memory */ + case '-': + tmp = strchr(arg, '('); + if (!tmp) { + ret = -EINVAL; + break; + } + *tmp = '\0'; + ret = strict_strtol(arg + 1, 0, &offset); + if (ret) + break; + if (arg[0] == '-') + offset = -offset; + arg = tmp + 1; + tmp = strrchr(arg, ')'); + if (tmp) { + struct indirect_fetch_data *id; + *tmp = '\0'; + id = kzalloc(sizeof(struct indirect_fetch_data), + GFP_KERNEL); + if (!id) + return -ENOMEM; + id->offset = offset; + ret = parse_probe_arg(arg, &id->orig, is_return); + if (ret) + kfree(id); + else { + ff->func = fetch_indirect; + ff->data = (void *)id; + } + } else + ret = -EINVAL; + break; + default: + /* TODO: support custom handler */ + ret = -EINVAL; + } + return ret; +} + +/* Return 1 if name is reserved or already used by another argument */ +static int conflict_field_name(const char *name, + struct probe_arg *args, int narg) +{ + int i; + for (i = 0; i < ARRAY_SIZE(reserved_field_names); i++) + if (strcmp(reserved_field_names[i], name) == 0) + return 1; + for (i = 0; i < narg; i++) + if (strcmp(args[i].name, name) == 0) + return 1; + return 0; +} + +static int create_trace_probe(int argc, char **argv) +{ + /* + * Argument syntax: + * - Add kprobe: p[:[GRP/]EVENT] KSYM[+OFFS]|KADDR [FETCHARGS] + * - Add kretprobe: r[:[GRP/]EVENT] KSYM[+0] [FETCHARGS] + * Fetch args: + * $argN : fetch Nth of function argument. (N:0-) + * $retval : fetch return value + * $stack : fetch stack address + * $stackN : fetch Nth of stack (N:0-) + * @ADDR : fetch memory at ADDR (ADDR should be in kernel) + * @SYM[+|-offs] : fetch memory at SYM +|- offs (SYM is a data symbol) + * %REG : fetch register REG + * Indirect memory fetch: + * +|-offs(ARG) : fetch memory at ARG +|- offs address. + * Alias name of args: + * NAME=FETCHARG : set NAME as alias of FETCHARG. + */ + struct trace_probe *tp; + int i, ret = 0; + int is_return = 0; + char *symbol = NULL, *event = NULL, *arg = NULL, *group = NULL; + unsigned long offset = 0; + void *addr = NULL; + char buf[MAX_EVENT_NAME_LEN]; + + if (argc < 2) { + pr_info("Probe point is not specified.\n"); + return -EINVAL; + } + + if (argv[0][0] == 'p') + is_return = 0; + else if (argv[0][0] == 'r') + is_return = 1; + else { + pr_info("Probe definition must be started with 'p' or 'r'.\n"); + return -EINVAL; + } + + if (argv[0][1] == ':') { + event = &argv[0][2]; + if (strchr(event, '/')) { + group = event; + event = strchr(group, '/') + 1; + event[-1] = '\0'; + if (strlen(group) == 0) { + pr_info("Group name is not specifiled\n"); + return -EINVAL; + } + } + if (strlen(event) == 0) { + pr_info("Event name is not specifiled\n"); + return -EINVAL; + } + } + + if (isdigit(argv[1][0])) { + if (is_return) { + pr_info("Return probe point must be a symbol.\n"); + return -EINVAL; + } + /* an address specified */ + ret = strict_strtoul(&argv[0][2], 0, (unsigned long *)&addr); + if (ret) { + pr_info("Failed to parse address.\n"); + return ret; + } + } else { + /* a symbol specified */ + symbol = argv[1]; + /* TODO: support .init module functions */ + ret = split_symbol_offset(symbol, &offset); + if (ret) { + pr_info("Failed to parse symbol.\n"); + return ret; + } + if (offset && is_return) { + pr_info("Return probe must be used without offset.\n"); + return -EINVAL; + } + } + argc -= 2; argv += 2; + + /* setup a probe */ + if (!group) + group = KPROBE_EVENT_SYSTEM; + if (!event) { + /* Make a new event name */ + if (symbol) + snprintf(buf, MAX_EVENT_NAME_LEN, "%c@%s%+ld", + is_return ? 'r' : 'p', symbol, offset); + else + snprintf(buf, MAX_EVENT_NAME_LEN, "%c@0x%p", + is_return ? 'r' : 'p', addr); + event = buf; + } + tp = alloc_trace_probe(group, event, addr, symbol, offset, argc, + is_return); + if (IS_ERR(tp)) { + pr_info("Failed to allocate trace_probe.(%d)\n", + (int)PTR_ERR(tp)); + return PTR_ERR(tp); + } + + /* parse arguments */ + ret = 0; + for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) { + /* Parse argument name */ + arg = strchr(argv[i], '='); + if (arg) + *arg++ = '\0'; + else + arg = argv[i]; + + if (conflict_field_name(argv[i], tp->args, i)) { + pr_info("Argument%d name '%s' conflicts with " + "another field.\n", i, argv[i]); + ret = -EINVAL; + goto error; + } + + tp->args[i].name = kstrdup(argv[i], GFP_KERNEL); + + /* Parse fetch argument */ + if (strlen(arg) > MAX_ARGSTR_LEN) { + pr_info("Argument%d(%s) is too long.\n", i, arg); + ret = -ENOSPC; + goto error; + } + ret = parse_probe_arg(arg, &tp->args[i].fetch, is_return); + if (ret) { + pr_info("Parse error at argument%d. (%d)\n", i, ret); + goto error; + } + } + tp->nr_args = i; + + ret = register_trace_probe(tp); + if (ret) + goto error; + return 0; + +error: + free_trace_probe(tp); + return ret; +} + +static void cleanup_all_probes(void) +{ + struct trace_probe *tp; + + mutex_lock(&probe_lock); + /* TODO: Use batch unregistration */ + while (!list_empty(&probe_list)) { + tp = list_entry(probe_list.next, struct trace_probe, list); + unregister_trace_probe(tp); + free_trace_probe(tp); + } + mutex_unlock(&probe_lock); +} + + +/* Probes listing interfaces */ +static void *probes_seq_start(struct seq_file *m, loff_t *pos) +{ + mutex_lock(&probe_lock); + return seq_list_start(&probe_list, *pos); +} + +static void *probes_seq_next(struct seq_file *m, void *v, loff_t *pos) +{ + return seq_list_next(v, &probe_list, pos); +} + +static void probes_seq_stop(struct seq_file *m, void *v) +{ + mutex_unlock(&probe_lock); +} + +static int probes_seq_show(struct seq_file *m, void *v) +{ + struct trace_probe *tp = v; + int i, ret; + char buf[MAX_ARGSTR_LEN + 1]; + + seq_printf(m, "%c", probe_is_return(tp) ? 'r' : 'p'); + seq_printf(m, ":%s", tp->call.name); + + if (tp->symbol) + seq_printf(m, " %s+%u", probe_symbol(tp), tp->rp.kp.offset); + else + seq_printf(m, " 0x%p", tp->rp.kp.addr); + + for (i = 0; i < tp->nr_args; i++) { + ret = probe_arg_string(buf, MAX_ARGSTR_LEN, &tp->args[i].fetch); + if (ret < 0) { + pr_warning("Argument%d decoding error(%d).\n", i, ret); + return ret; + } + seq_printf(m, " %s=%s", tp->args[i].name, buf); + } + seq_printf(m, "\n"); + return 0; +} + +static const struct seq_operations probes_seq_op = { + .start = probes_seq_start, + .next = probes_seq_next, + .stop = probes_seq_stop, + .show = probes_seq_show +}; + +static int probes_open(struct inode *inode, struct file *file) +{ + if ((file->f_mode & FMODE_WRITE) && + (file->f_flags & O_TRUNC)) + cleanup_all_probes(); + + return seq_open(file, &probes_seq_op); +} + +static int command_trace_probe(const char *buf) +{ + char **argv; + int argc = 0, ret = 0; + + argv = argv_split(GFP_KERNEL, buf, &argc); + if (!argv) + return -ENOMEM; + + if (argc) + ret = create_trace_probe(argc, argv); + + argv_free(argv); + return ret; +} + +#define WRITE_BUFSIZE 128 + +static ssize_t probes_write(struct file *file, const char __user *buffer, + size_t count, loff_t *ppos) +{ + char *kbuf, *tmp; + int ret; + size_t done; + size_t size; + + kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL); + if (!kbuf) + return -ENOMEM; + + ret = done = 0; + while (done < count) { + size = count - done; + if (size >= WRITE_BUFSIZE) + size = WRITE_BUFSIZE - 1; + if (copy_from_user(kbuf, buffer + done, size)) { + ret = -EFAULT; + goto out; + } + kbuf[size] = '\0'; + tmp = strchr(kbuf, '\n'); + if (tmp) { + *tmp = '\0'; + size = tmp - kbuf + 1; + } else if (done + size < count) { + pr_warning("Line length is too long: " + "Should be less than %d.", WRITE_BUFSIZE); + ret = -EINVAL; + goto out; + } + done += size; + /* Remove comments */ + tmp = strchr(kbuf, '#'); + if (tmp) + *tmp = '\0'; + + ret = command_trace_probe(kbuf); + if (ret) + goto out; + } + ret = done; +out: + kfree(kbuf); + return ret; +} + +static const struct file_operations kprobe_events_ops = { + .owner = THIS_MODULE, + .open = probes_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, + .write = probes_write, +}; + +/* Probes profiling interfaces */ +static int probes_profile_seq_show(struct seq_file *m, void *v) +{ + struct trace_probe *tp = v; + + seq_printf(m, " %-44s %15lu %15lu\n", tp->call.name, tp->nhit, + tp->rp.kp.nmissed); + + return 0; +} + +static const struct seq_operations profile_seq_op = { + .start = probes_seq_start, + .next = probes_seq_next, + .stop = probes_seq_stop, + .show = probes_profile_seq_show +}; + +static int profile_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &profile_seq_op); +} + +static const struct file_operations kprobe_profile_ops = { + .owner = THIS_MODULE, + .open = profile_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + +/* Kprobe handler */ +static __kprobes int kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs) +{ + struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp); + struct kprobe_trace_entry *entry; + struct ring_buffer_event *event; + struct ring_buffer *buffer; + int size, i, pc; + unsigned long irq_flags; + struct ftrace_event_call *call = &tp->call; + + tp->nhit++; + + local_save_flags(irq_flags); + pc = preempt_count(); + + size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args); + + event = trace_current_buffer_lock_reserve(&buffer, call->id, size, + irq_flags, pc); + if (!event) + return 0; + + entry = ring_buffer_event_data(event); + entry->nargs = tp->nr_args; + entry->ip = (unsigned long)kp->addr; + for (i = 0; i < tp->nr_args; i++) + entry->args[i] = call_fetch(&tp->args[i].fetch, regs); + + if (!filter_current_check_discard(buffer, call, entry, event)) + trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc); + return 0; +} + +/* Kretprobe handler */ +static __kprobes int kretprobe_trace_func(struct kretprobe_instance *ri, + struct pt_regs *regs) +{ + struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp); + struct kretprobe_trace_entry *entry; + struct ring_buffer_event *event; + struct ring_buffer *buffer; + int size, i, pc; + unsigned long irq_flags; + struct ftrace_event_call *call = &tp->call; + + local_save_flags(irq_flags); + pc = preempt_count(); + + size = SIZEOF_KRETPROBE_TRACE_ENTRY(tp->nr_args); + + event = trace_current_buffer_lock_reserve(&buffer, call->id, size, + irq_flags, pc); + if (!event) + return 0; + + entry = ring_buffer_event_data(event); + entry->nargs = tp->nr_args; + entry->func = (unsigned long)tp->rp.kp.addr; + entry->ret_ip = (unsigned long)ri->ret_addr; + for (i = 0; i < tp->nr_args; i++) + entry->args[i] = call_fetch(&tp->args[i].fetch, regs); + + if (!filter_current_check_discard(buffer, call, entry, event)) + trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc); + + return 0; +} + +/* Event entry printers */ +enum print_line_t +print_kprobe_event(struct trace_iterator *iter, int flags) +{ + struct kprobe_trace_entry *field; + struct trace_seq *s = &iter->seq; + struct trace_event *event; + struct trace_probe *tp; + int i; + + field = (struct kprobe_trace_entry *)iter->ent; + event = ftrace_find_event(field->ent.type); + tp = container_of(event, struct trace_probe, event); + + if (!trace_seq_printf(s, "%s: (", tp->call.name)) + goto partial; + + if (!seq_print_ip_sym(s, field->ip, flags | TRACE_ITER_SYM_OFFSET)) + goto partial; + + if (!trace_seq_puts(s, ")")) + goto partial; + + for (i = 0; i < field->nargs; i++) + if (!trace_seq_printf(s, " %s=%lx", + tp->args[i].name, field->args[i])) + goto partial; + + if (!trace_seq_puts(s, "\n")) + goto partial; + + return TRACE_TYPE_HANDLED; +partial: + return TRACE_TYPE_PARTIAL_LINE; +} + +enum print_line_t +print_kretprobe_event(struct trace_iterator *iter, int flags) +{ + struct kretprobe_trace_entry *field; + struct trace_seq *s = &iter->seq; + struct trace_event *event; + struct trace_probe *tp; + int i; + + field = (struct kretprobe_trace_entry *)iter->ent; + event = ftrace_find_event(field->ent.type); + tp = container_of(event, struct trace_probe, event); + + if (!trace_seq_printf(s, "%s: (", tp->call.name)) + goto partial; + + if (!seq_print_ip_sym(s, field->ret_ip, flags | TRACE_ITER_SYM_OFFSET)) + goto partial; + + if (!trace_seq_puts(s, " <- ")) + goto partial; + + if (!seq_print_ip_sym(s, field->func, flags & ~TRACE_ITER_SYM_OFFSET)) + goto partial; + + if (!trace_seq_puts(s, ")")) + goto partial; + + for (i = 0; i < field->nargs; i++) + if (!trace_seq_printf(s, " %s=%lx", + tp->args[i].name, field->args[i])) + goto partial; + + if (!trace_seq_puts(s, "\n")) + goto partial; + + return TRACE_TYPE_HANDLED; +partial: + return TRACE_TYPE_PARTIAL_LINE; +} + +static int probe_event_enable(struct ftrace_event_call *call) +{ + struct trace_probe *tp = (struct trace_probe *)call->data; + + tp->flags |= TP_FLAG_TRACE; + if (probe_is_return(tp)) + return enable_kretprobe(&tp->rp); + else + return enable_kprobe(&tp->rp.kp); +} + +static void probe_event_disable(struct ftrace_event_call *call) +{ + struct trace_probe *tp = (struct trace_probe *)call->data; + + tp->flags &= ~TP_FLAG_TRACE; + if (!(tp->flags & (TP_FLAG_TRACE | TP_FLAG_PROFILE))) { + if (probe_is_return(tp)) + disable_kretprobe(&tp->rp); + else + disable_kprobe(&tp->rp.kp); + } +} + +static int probe_event_raw_init(struct ftrace_event_call *event_call) +{ + INIT_LIST_HEAD(&event_call->fields); + + return 0; +} + +#undef DEFINE_FIELD +#define DEFINE_FIELD(type, item, name, is_signed) \ + do { \ + ret = trace_define_field(event_call, #type, name, \ + offsetof(typeof(field), item), \ + sizeof(field.item), is_signed, \ + FILTER_OTHER); \ + if (ret) \ + return ret; \ + } while (0) + +static int kprobe_event_define_fields(struct ftrace_event_call *event_call) +{ + int ret, i; + struct kprobe_trace_entry field; + struct trace_probe *tp = (struct trace_probe *)event_call->data; + + ret = trace_define_common_fields(event_call); + if (!ret) + return ret; + + DEFINE_FIELD(unsigned long, ip, FIELD_STRING_IP, 0); + DEFINE_FIELD(int, nargs, FIELD_STRING_NARGS, 1); + /* Set argument names as fields */ + for (i = 0; i < tp->nr_args; i++) + DEFINE_FIELD(unsigned long, args[i], tp->args[i].name, 0); + return 0; +} + +static int kretprobe_event_define_fields(struct ftrace_event_call *event_call) +{ + int ret, i; + struct kretprobe_trace_entry field; + struct trace_probe *tp = (struct trace_probe *)event_call->data; + + ret = trace_define_common_fields(event_call); + if (!ret) + return ret; + + DEFINE_FIELD(unsigned long, func, FIELD_STRING_FUNC, 0); + DEFINE_FIELD(unsigned long, ret_ip, FIELD_STRING_RETIP, 0); + DEFINE_FIELD(int, nargs, FIELD_STRING_NARGS, 1); + /* Set argument names as fields */ + for (i = 0; i < tp->nr_args; i++) + DEFINE_FIELD(unsigned long, args[i], tp->args[i].name, 0); + return 0; +} + +static int __probe_event_show_format(struct trace_seq *s, + struct trace_probe *tp, const char *fmt, + const char *arg) +{ + int i; + + /* Show format */ + if (!trace_seq_printf(s, "\nprint fmt: \"%s", fmt)) + return 0; + + for (i = 0; i < tp->nr_args; i++) + if (!trace_seq_printf(s, " %s=%%lx", tp->args[i].name)) + return 0; + + if (!trace_seq_printf(s, "\", %s", arg)) + return 0; + + for (i = 0; i < tp->nr_args; i++) + if (!trace_seq_printf(s, ", REC->%s", tp->args[i].name)) + return 0; + + return trace_seq_puts(s, "\n"); +} + +#undef SHOW_FIELD +#define SHOW_FIELD(type, item, name) \ + do { \ + ret = trace_seq_printf(s, "\tfield: " #type " %s;\t" \ + "offset:%u;\tsize:%u;\n", name, \ + (unsigned int)offsetof(typeof(field), item),\ + (unsigned int)sizeof(type)); \ + if (!ret) \ + return 0; \ + } while (0) + +static int kprobe_event_show_format(struct ftrace_event_call *call, + struct trace_seq *s) +{ + struct kprobe_trace_entry field __attribute__((unused)); + int ret, i; + struct trace_probe *tp = (struct trace_probe *)call->data; + + SHOW_FIELD(unsigned long, ip, FIELD_STRING_IP); + SHOW_FIELD(int, nargs, FIELD_STRING_NARGS); + + /* Show fields */ + for (i = 0; i < tp->nr_args; i++) + SHOW_FIELD(unsigned long, args[i], tp->args[i].name); + trace_seq_puts(s, "\n"); + + return __probe_event_show_format(s, tp, "(%lx)", + "REC->" FIELD_STRING_IP); +} + +static int kretprobe_event_show_format(struct ftrace_event_call *call, + struct trace_seq *s) +{ + struct kretprobe_trace_entry field __attribute__((unused)); + int ret, i; + struct trace_probe *tp = (struct trace_probe *)call->data; + + SHOW_FIELD(unsigned long, func, FIELD_STRING_FUNC); + SHOW_FIELD(unsigned long, ret_ip, FIELD_STRING_RETIP); + SHOW_FIELD(int, nargs, FIELD_STRING_NARGS); + + /* Show fields */ + for (i = 0; i < tp->nr_args; i++) + SHOW_FIELD(unsigned long, args[i], tp->args[i].name); + trace_seq_puts(s, "\n"); + + return __probe_event_show_format(s, tp, "(%lx <- %lx)", + "REC->" FIELD_STRING_FUNC + ", REC->" FIELD_STRING_RETIP); +} + +#ifdef CONFIG_EVENT_PROFILE + +/* Kprobe profile handler */ +static __kprobes int kprobe_profile_func(struct kprobe *kp, + struct pt_regs *regs) +{ + struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp); + struct ftrace_event_call *call = &tp->call; + struct kprobe_trace_entry *entry; + struct trace_entry *ent; + int size, __size, i, pc, __cpu; + unsigned long irq_flags; + char *raw_data; + + pc = preempt_count(); + __size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args); + size = ALIGN(__size + sizeof(u32), sizeof(u64)); + size -= sizeof(u32); + if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE, + "profile buffer not large enough")) + return 0; + + /* + * Protect the non nmi buffer + * This also protects the rcu read side + */ + local_irq_save(irq_flags); + __cpu = smp_processor_id(); + + if (in_nmi()) + raw_data = rcu_dereference(trace_profile_buf_nmi); + else + raw_data = rcu_dereference(trace_profile_buf); + + if (!raw_data) + goto end; + + raw_data = per_cpu_ptr(raw_data, __cpu); + /* Zero dead bytes from alignment to avoid buffer leak to userspace */ + *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; + entry = (struct kprobe_trace_entry *)raw_data; + ent = &entry->ent; + + tracing_generic_entry_update(ent, irq_flags, pc); + ent->type = call->id; + entry->nargs = tp->nr_args; + entry->ip = (unsigned long)kp->addr; + for (i = 0; i < tp->nr_args; i++) + entry->args[i] = call_fetch(&tp->args[i].fetch, regs); + perf_tp_event(call->id, entry->ip, 1, entry, size); +end: + local_irq_restore(irq_flags); + return 0; +} + +/* Kretprobe profile handler */ +static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri, + struct pt_regs *regs) +{ + struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp); + struct ftrace_event_call *call = &tp->call; + struct kretprobe_trace_entry *entry; + struct trace_entry *ent; + int size, __size, i, pc, __cpu; + unsigned long irq_flags; + char *raw_data; + + pc = preempt_count(); + __size = SIZEOF_KRETPROBE_TRACE_ENTRY(tp->nr_args); + size = ALIGN(__size + sizeof(u32), sizeof(u64)); + size -= sizeof(u32); + if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE, + "profile buffer not large enough")) + return 0; + + /* + * Protect the non nmi buffer + * This also protects the rcu read side + */ + local_irq_save(irq_flags); + __cpu = smp_processor_id(); + + if (in_nmi()) + raw_data = rcu_dereference(trace_profile_buf_nmi); + else + raw_data = rcu_dereference(trace_profile_buf); + + if (!raw_data) + goto end; + + raw_data = per_cpu_ptr(raw_data, __cpu); + /* Zero dead bytes from alignment to avoid buffer leak to userspace */ + *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; + entry = (struct kretprobe_trace_entry *)raw_data; + ent = &entry->ent; + + tracing_generic_entry_update(ent, irq_flags, pc); + ent->type = call->id; + entry->nargs = tp->nr_args; + entry->func = (unsigned long)tp->rp.kp.addr; + entry->ret_ip = (unsigned long)ri->ret_addr; + for (i = 0; i < tp->nr_args; i++) + entry->args[i] = call_fetch(&tp->args[i].fetch, regs); + perf_tp_event(call->id, entry->ret_ip, 1, entry, size); +end: + local_irq_restore(irq_flags); + return 0; +} + +static int probe_profile_enable(struct ftrace_event_call *call) +{ + struct trace_probe *tp = (struct trace_probe *)call->data; + + tp->flags |= TP_FLAG_PROFILE; + + if (probe_is_return(tp)) + return enable_kretprobe(&tp->rp); + else + return enable_kprobe(&tp->rp.kp); +} + +static void probe_profile_disable(struct ftrace_event_call *call) +{ + struct trace_probe *tp = (struct trace_probe *)call->data; + + tp->flags &= ~TP_FLAG_PROFILE; + + if (!(tp->flags & TP_FLAG_TRACE)) { + if (probe_is_return(tp)) + disable_kretprobe(&tp->rp); + else + disable_kprobe(&tp->rp.kp); + } +} +#endif /* CONFIG_EVENT_PROFILE */ + + +static __kprobes +int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs) +{ + struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp); + + if (tp->flags & TP_FLAG_TRACE) + kprobe_trace_func(kp, regs); +#ifdef CONFIG_EVENT_PROFILE + if (tp->flags & TP_FLAG_PROFILE) + kprobe_profile_func(kp, regs); +#endif /* CONFIG_EVENT_PROFILE */ + return 0; /* We don't tweek kernel, so just return 0 */ +} + +static __kprobes +int kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs) +{ + struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp); + + if (tp->flags & TP_FLAG_TRACE) + kretprobe_trace_func(ri, regs); +#ifdef CONFIG_EVENT_PROFILE + if (tp->flags & TP_FLAG_PROFILE) + kretprobe_profile_func(ri, regs); +#endif /* CONFIG_EVENT_PROFILE */ + return 0; /* We don't tweek kernel, so just return 0 */ +} + +static int register_probe_event(struct trace_probe *tp) +{ + struct ftrace_event_call *call = &tp->call; + int ret; + + /* Initialize ftrace_event_call */ + if (probe_is_return(tp)) { + tp->event.trace = print_kretprobe_event; + call->raw_init = probe_event_raw_init; + call->show_format = kretprobe_event_show_format; + call->define_fields = kretprobe_event_define_fields; + } else { + tp->event.trace = print_kprobe_event; + call->raw_init = probe_event_raw_init; + call->show_format = kprobe_event_show_format; + call->define_fields = kprobe_event_define_fields; + } + call->event = &tp->event; + call->id = register_ftrace_event(&tp->event); + if (!call->id) + return -ENODEV; + call->enabled = 0; + call->regfunc = probe_event_enable; + call->unregfunc = probe_event_disable; + +#ifdef CONFIG_EVENT_PROFILE + atomic_set(&call->profile_count, -1); + call->profile_enable = probe_profile_enable; + call->profile_disable = probe_profile_disable; +#endif + call->data = tp; + ret = trace_add_event_call(call); + if (ret) { + pr_info("Failed to register kprobe event: %s\n", call->name); + unregister_ftrace_event(&tp->event); + } + return ret; +} + +static void unregister_probe_event(struct trace_probe *tp) +{ + /* tp->event is unregistered in trace_remove_event_call() */ + trace_remove_event_call(&tp->call); +} + +/* Make a debugfs interface for controling probe points */ +static __init int init_kprobe_trace(void) +{ + struct dentry *d_tracer; + struct dentry *entry; + + d_tracer = tracing_init_dentry(); + if (!d_tracer) + return 0; + + entry = debugfs_create_file("kprobe_events", 0644, d_tracer, + NULL, &kprobe_events_ops); + + /* Event list interface */ + if (!entry) + pr_warning("Could not create debugfs " + "'kprobe_events' entry\n"); + + /* Profile interface */ + entry = debugfs_create_file("kprobe_profile", 0444, d_tracer, + NULL, &kprobe_profile_ops); + + if (!entry) + pr_warning("Could not create debugfs " + "'kprobe_profile' entry\n"); + return 0; +} +fs_initcall(init_kprobe_trace); + + +#ifdef CONFIG_FTRACE_STARTUP_TEST + +static int kprobe_trace_selftest_target(int a1, int a2, int a3, + int a4, int a5, int a6) +{ + return a1 + a2 + a3 + a4 + a5 + a6; +} + +static __init int kprobe_trace_self_tests_init(void) +{ + int ret; + int (*target)(int, int, int, int, int, int); + + target = kprobe_trace_selftest_target; + + pr_info("Testing kprobe tracing: "); + + ret = command_trace_probe("p:testprobe kprobe_trace_selftest_target " + "$arg1 $arg2 $arg3 $arg4 $stack $stack0"); + if (WARN_ON_ONCE(ret)) + pr_warning("error enabling function entry\n"); + + ret = command_trace_probe("r:testprobe2 kprobe_trace_selftest_target " + "$retval"); + if (WARN_ON_ONCE(ret)) + pr_warning("error enabling function return\n"); + + ret = target(1, 2, 3, 4, 5, 6); + + cleanup_all_probes(); + + pr_cont("OK\n"); + return 0; +} + +late_initcall(kprobe_trace_self_tests_init); + +#endif diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index d00d1a8..58b8e53 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -354,13 +354,13 @@ void ftrace_syscall_exit(struct pt_regs *regs, long ret) trace_current_buffer_unlock_commit(buffer, event, 0, 0); } -int reg_event_syscall_enter(void *ptr) +int reg_event_syscall_enter(struct ftrace_event_call *call) { int ret = 0; int num; char *name; - name = (char *)ptr; + name = (char *)call->data; num = syscall_name_to_nr(name); if (num < 0 || num >= NR_syscalls) return -ENOSYS; @@ -378,12 +378,12 @@ int reg_event_syscall_enter(void *ptr) return ret; } -void unreg_event_syscall_enter(void *ptr) +void unreg_event_syscall_enter(struct ftrace_event_call *call) { int num; char *name; - name = (char *)ptr; + name = (char *)call->data; num = syscall_name_to_nr(name); if (num < 0 || num >= NR_syscalls) return; @@ -395,13 +395,13 @@ void unreg_event_syscall_enter(void *ptr) mutex_unlock(&syscall_trace_lock); } -int reg_event_syscall_exit(void *ptr) +int reg_event_syscall_exit(struct ftrace_event_call *call) { int ret = 0; int num; char *name; - name = (char *)ptr; + name = call->data; num = syscall_name_to_nr(name); if (num < 0 || num >= NR_syscalls) return -ENOSYS; @@ -419,12 +419,12 @@ int reg_event_syscall_exit(void *ptr) return ret; } -void unreg_event_syscall_exit(void *ptr) +void unreg_event_syscall_exit(struct ftrace_event_call *call) { int num; char *name; - name = (char *)ptr; + name = call->data; num = syscall_name_to_nr(name); if (num < 0 || num >= NR_syscalls) return; diff --git a/kernel/workqueue.c b/kernel/workqueue.c index ccefe57..47cdd7e 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -647,7 +647,7 @@ EXPORT_SYMBOL(schedule_delayed_work); */ void flush_delayed_work(struct delayed_work *dwork) { - if (del_timer(&dwork->timer)) { + if (del_timer_sync(&dwork->timer)) { struct cpu_workqueue_struct *cwq; cwq = wq_per_cpu(keventd_wq, get_cpu()); __queue_work(cwq, &dwork->work); diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 624c3c9..e320afe 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -644,6 +644,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, /* If TCP_DEFER_ACCEPT is set, drop bare ACK. */ if (inet_csk(sk)->icsk_accept_queue.rskq_defer_accept && TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) { + inet_csk(sk)->icsk_accept_queue.rskq_defer_accept--; inet_rsk(req)->acked = 1; return NULL; } diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 6ec6a8a..d0d436d 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -841,6 +841,42 @@ out: return ret; } + +/** + * first_packet_length - return length of first packet in receive queue + * @sk: socket + * + * Drops all bad checksum frames, until a valid one is found. + * Returns the length of found skb, or 0 if none is found. + */ +static unsigned int first_packet_length(struct sock *sk) +{ + struct sk_buff_head list_kill, *rcvq = &sk->sk_receive_queue; + struct sk_buff *skb; + unsigned int res; + + __skb_queue_head_init(&list_kill); + + spin_lock_bh(&rcvq->lock); + while ((skb = skb_peek(rcvq)) != NULL && + udp_lib_checksum_complete(skb)) { + UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, + IS_UDPLITE(sk)); + __skb_unlink(skb, rcvq); + __skb_queue_tail(&list_kill, skb); + } + res = skb ? skb->len : 0; + spin_unlock_bh(&rcvq->lock); + + if (!skb_queue_empty(&list_kill)) { + lock_sock(sk); + __skb_queue_purge(&list_kill); + sk_mem_reclaim_partial(sk); + release_sock(sk); + } + return res; +} + /* * IOCTL requests applicable to the UDP protocol */ @@ -857,21 +893,16 @@ int udp_ioctl(struct sock *sk, int cmd, unsigned long arg) case SIOCINQ: { - struct sk_buff *skb; - unsigned long amount; + unsigned int amount = first_packet_length(sk); - amount = 0; - spin_lock_bh(&sk->sk_receive_queue.lock); - skb = skb_peek(&sk->sk_receive_queue); - if (skb != NULL) { + if (amount) /* * We will only return the amount * of this packet since that is all * that will be read. */ - amount = skb->len - sizeof(struct udphdr); - } - spin_unlock_bh(&sk->sk_receive_queue.lock); + amount -= sizeof(struct udphdr); + return put_user(amount, (int __user *)arg); } @@ -1540,29 +1571,11 @@ unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait) { unsigned int mask = datagram_poll(file, sock, wait); struct sock *sk = sock->sk; - int is_lite = IS_UDPLITE(sk); /* Check for false positives due to checksum errors */ - if ((mask & POLLRDNORM) && - !(file->f_flags & O_NONBLOCK) && - !(sk->sk_shutdown & RCV_SHUTDOWN)) { - struct sk_buff_head *rcvq = &sk->sk_receive_queue; - struct sk_buff *skb; - - spin_lock_bh(&rcvq->lock); - while ((skb = skb_peek(rcvq)) != NULL && - udp_lib_checksum_complete(skb)) { - UDP_INC_STATS_BH(sock_net(sk), - UDP_MIB_INERRORS, is_lite); - __skb_unlink(skb, rcvq); - kfree_skb(skb); - } - spin_unlock_bh(&rcvq->lock); - - /* nothing to see, move along */ - if (skb == NULL) - mask &= ~(POLLIN | POLLRDNORM); - } + if ((mask & POLLRDNORM) && !(file->f_flags & O_NONBLOCK) && + !(sk->sk_shutdown & RCV_SHUTDOWN) && !first_packet_length(sk)) + mask &= ~(POLLIN | POLLRDNORM); return mask; diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 920ec87..6eaf698 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -544,7 +544,7 @@ static void ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata) "%pM\n", bss->cbss.bssid, ifibss->bssid); #endif /* CONFIG_MAC80211_IBSS_DEBUG */ - if (bss && memcmp(ifibss->bssid, bss->cbss.bssid, ETH_ALEN)) { + if (bss && !memcmp(ifibss->bssid, bss->cbss.bssid, ETH_ALEN)) { printk(KERN_DEBUG "%s: Selected IBSS BSSID %pM" " based on configured SSID\n", sdata->dev->name, bss->cbss.bssid); @@ -829,7 +829,7 @@ void ieee80211_ibss_notify_scan_completed(struct ieee80211_local *local) if (!sdata->u.ibss.ssid_len) continue; sdata->u.ibss.last_scan_completed = jiffies; - ieee80211_sta_find_ibss(sdata); + mod_timer(&sdata->u.ibss.timer, 0); } mutex_unlock(&local->iflist_mtx); } diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index c01588f..7170bf4 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2164,11 +2164,17 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw, skb = rx.skb; - list_for_each_entry_rcu(sdata, &local->interfaces, list) { + if (rx.sdata && ieee80211_is_data(hdr->frame_control)) { + rx.flags |= IEEE80211_RX_RA_MATCH; + prepares = prepare_for_handlers(rx.sdata, &rx, hdr); + if (prepares) + prev = rx.sdata; + } else list_for_each_entry_rcu(sdata, &local->interfaces, list) { if (!netif_running(sdata->dev)) continue; - if (sdata->vif.type == NL80211_IFTYPE_MONITOR) + if (sdata->vif.type == NL80211_IFTYPE_MONITOR || + sdata->vif.type == NL80211_IFTYPE_AP_VLAN) continue; rx.flags |= IEEE80211_RX_RA_MATCH; @@ -2447,6 +2453,8 @@ void ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb) struct ieee80211_supported_band *sband; struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); + WARN_ON_ONCE(softirq_count() == 0); + if (WARN_ON(status->band < 0 || status->band >= IEEE80211_NUM_BANDS)) goto drop; diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index eec0014..594f231 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -361,6 +361,7 @@ int sta_info_insert(struct sta_info *sta) u.ap); drv_sta_notify(local, &sdata->vif, STA_NOTIFY_ADD, &sta->sta); + sdata = sta->sdata; } #ifdef CONFIG_MAC80211_VERBOSE_DEBUG @@ -496,6 +497,7 @@ static void __sta_info_unlink(struct sta_info **sta) drv_sta_notify(local, &sdata->vif, STA_NOTIFY_REMOVE, &(*sta)->sta); + sdata = (*sta)->sdata; } if (ieee80211_vif_is_mesh(&sdata->vif)) { diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index fd40282..db4bda6 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1704,7 +1704,8 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb, if (!is_multicast_ether_addr(hdr.addr1)) { rcu_read_lock(); sta = sta_info_get(local, hdr.addr1); - if (sta) + /* XXX: in the future, use sdata to look up the sta */ + if (sta && sta->sdata == sdata) sta_flags = get_sta_flags(sta); rcu_read_unlock(); } diff --git a/net/mac80211/util.c b/net/mac80211/util.c index dd65643..aeb65b3 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -339,7 +339,7 @@ void ieee80211_add_pending_skb(struct ieee80211_local *local, struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); if (WARN_ON(!info->control.vif)) { - kfree(skb); + kfree_skb(skb); return; } @@ -367,7 +367,7 @@ int ieee80211_add_pending_skbs(struct ieee80211_local *local, struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); if (WARN_ON(!info->control.vif)) { - kfree(skb); + kfree_skb(skb); continue; } diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index 96c0ed1..6b0359a 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -34,7 +34,7 @@ static struct tcf_hashinfo pedit_hash_info = { }; static const struct nla_policy pedit_policy[TCA_PEDIT_MAX + 1] = { - [TCA_PEDIT_PARMS] = { .len = sizeof(struct tcf_pedit) }, + [TCA_PEDIT_PARMS] = { .len = sizeof(struct tc_pedit) }, }; static int tcf_pedit_init(struct nlattr *nla, struct nlattr *est, diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 6a53694..7cf6c0f 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -350,7 +350,7 @@ static int tcf_fill_node(struct sk_buff *skb, struct tcf_proto *tp, tcm = NLMSG_DATA(nlh); tcm->tcm_family = AF_UNSPEC; tcm->tcm__pad1 = 0; - tcm->tcm__pad1 = 0; + tcm->tcm__pad2 = 0; tcm->tcm_ifindex = qdisc_dev(tp->q)->ifindex; tcm->tcm_parent = tp->classid; tcm->tcm_info = TC_H_MAKE(tp->prio, tp->protocol); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index eddab09..ca3c92a 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -4029,7 +4029,7 @@ static int nl80211_wiphy_netns(struct sk_buff *skb, struct genl_info *info) rdev = cfg80211_get_dev_from_info(info); if (IS_ERR(rdev)) { err = PTR_ERR(rdev); - goto out; + goto out_rtnl; } net = get_net_ns_by_pid(pid); @@ -4049,6 +4049,7 @@ static int nl80211_wiphy_netns(struct sk_buff *skb, struct genl_info *info) put_net(net); out: cfg80211_unlock_rdev(rdev); + out_rtnl: rtnl_unlock(); return err; } diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c index 2fb28ef..06ec722 100644 --- a/security/keys/keyctl.c +++ b/security/keys/keyctl.c @@ -873,7 +873,7 @@ static long get_instantiation_keyring(key_serial_t ringid, /* otherwise specify the destination keyring recorded in the * authorisation key (any KEY_SPEC_*_KEYRING) */ if (ringid >= KEY_SPEC_REQUESTOR_KEYRING) { - *_dest_keyring = rka->dest_keyring; + *_dest_keyring = key_get(rka->dest_keyring); return 0; } diff --git a/tools/perf/Documentation/perf-probe.txt b/tools/perf/Documentation/perf-probe.txt new file mode 100644 index 0000000..6b6c6ae --- /dev/null +++ b/tools/perf/Documentation/perf-probe.txt @@ -0,0 +1,48 @@ +perf-probe(1) +============= + +NAME +---- +perf-probe - Define new dynamic tracepoints + +SYNOPSIS +-------- +[verse] +'perf probe' [-k <file>] -P 'PROBE' [-P 'PROBE' ...] + + +DESCRIPTION +----------- +This command defines dynamic tracepoint events, by symbol and registers +without debuginfo, or by C expressions (C line numbers, C function names, +and C local variables) with debuginfo. + + +OPTIONS +------- +-k:: +--vmlinux:: + Specify vmlinux path which has debuginfo (Dwarf binary). + +-v:: +--verbose:: + Be more verbose (show parsed arguments, etc). + +-P:: +--probe:: + Define a probe point (see PROBE SYNTAX for detail) + +PROBE SYNTAX +------------ +Probe points are defined by following syntax. + + "TYPE:[GRP/]NAME FUNC[+OFFS][@SRC]|@SRC:LINE [ARG ...]" + +'TYPE' specifies the type of probe point, you can use either "p" (kprobe) or "r" (kretprobe) for 'TYPE'. 'GRP' specifies the group name of this probe, and 'NAME' specifies the event name. If 'GRP' is omitted, "kprobes" is used for its group name. +'FUNC' and 'OFFS' specifies function and offset (in byte) where probe will be put. In addition, 'SRC' specifies a source file which has that function (this is mainly for inline functions). +You can specify a probe point by the source line number by using '@SRC:LINE' syntax, where 'SRC' is the source file path and 'LINE' is the line number. +'ARG' specifies the arguments of this probe point. You can use the name of local variable, or kprobe-tracer argument format (e.g. $retval, %ax, etc). + +SEE ALSO +-------- +linkperf:perf-trace[1], linkperf:perf-record[1] diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 0a40c29..147e3cf 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -335,6 +335,7 @@ LIB_FILE=libperf.a LIB_H += ../../include/linux/perf_event.h LIB_H += ../../include/linux/rbtree.h LIB_H += ../../include/linux/list.h +LIB_H += ../../include/linux/stringify.h LIB_H += util/include/linux/bitmap.h LIB_H += util/include/linux/bitops.h LIB_H += util/include/linux/compiler.h @@ -425,6 +426,7 @@ BUILTIN_OBJS += builtin-stat.o BUILTIN_OBJS += builtin-timechart.o BUILTIN_OBJS += builtin-top.o BUILTIN_OBJS += builtin-trace.o +BUILTIN_OBJS += builtin-probe.o PERFLIBS = $(LIB_FILE) @@ -463,6 +465,15 @@ ifneq ($(shell sh -c "(echo '\#include <libelf.h>'; echo 'int main(void) { Elf * msg := $(error No libelf.h/libelf found, please install libelf-dev/elfutils-libelf-devel); endif +ifneq ($(shell sh -c "(echo '\#include <libdwarf/dwarf.h>'; echo '\#include <libdwarf/libdwarf.h>'; echo 'int main(void) { Dwarf_Debug dbg; Dwarf_Error err; Dwarf_Ranges *rng; dwarf_init(0, DW_DLC_READ, 0, 0, &dbg, &err); dwarf_get_ranges(dbg, 0, &rng, 0, 0, &err); return (long)dbg; }') | $(CC) -x c - $(ALL_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -ldwarf -lelf -o /dev/null $(ALL_LDFLAGS) > /dev/null 2>&1 && echo y"), y) + msg := $(warning No libdwarf.h found or old libdwarf.h found, disables dwarf support. Please install libdwarf-dev/libdwarf-devel >= 20081231); + BASIC_CFLAGS += -DNO_LIBDWARF +else + EXTLIBS += -lelf -ldwarf + LIB_H += util/probe-finder.h + LIB_OBJS += util/probe-finder.o +endif + ifdef NO_DEMANGLE BASIC_CFLAGS += -DNO_DEMANGLE else diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c new file mode 100644 index 0000000..b5ad86a --- /dev/null +++ b/tools/perf/builtin-probe.c @@ -0,0 +1,372 @@ +/* + * builtin-probe.c + * + * Builtin probe command: Set up probe events by C expression + * + * Written by Masami Hiramatsu <mhiramat@redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + */ +#define _GNU_SOURCE +#include <sys/utsname.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <errno.h> +#include <stdio.h> +#include <unistd.h> +#include <stdlib.h> +#include <string.h> + +#undef _GNU_SOURCE +#include "perf.h" +#include "builtin.h" +#include "util/util.h" +#include "util/event.h" +#include "util/debug.h" +#include "util/parse-options.h" +#include "util/parse-events.h" /* For debugfs_path */ +#include "util/probe-finder.h" + +/* Default vmlinux search paths */ +#define NR_SEARCH_PATH 3 +const char *default_search_path[NR_SEARCH_PATH] = { +"/lib/modules/%s/build/vmlinux", /* Custom build kernel */ +"/usr/lib/debug/lib/modules/%s/vmlinux", /* Red Hat debuginfo */ +"/boot/vmlinux-debug-%s", /* Ubuntu */ +}; + +#define MAX_PATH_LEN 256 +#define MAX_PROBES 128 +#define MAX_PROBE_ARGS 128 + +/* Session management structure */ +static struct { + char *vmlinux; + char *release; + int need_dwarf; + int nr_probe; + struct probe_point probes[MAX_PROBES]; + char *events[MAX_PROBES]; +} session; + +#define semantic_error(msg ...) die("Semantic error :" msg) + +static int parse_probepoint(const struct option *opt __used, + const char *str, int unset __used) +{ + char *argv[MAX_PROBE_ARGS + 2]; /* Event + probe + args */ + int argc, i; + char *arg, *ptr; + struct probe_point *pp = &session.probes[session.nr_probe]; + char **event = &session.events[session.nr_probe]; + int retp = 0; + + if (!str) /* The end of probe points */ + return 0; + + eprintf("probe-definition(%d): %s\n", session.nr_probe, str); + if (++session.nr_probe == MAX_PROBES) + semantic_error("Too many probes"); + + /* Separate arguments, similar to argv_split */ + argc = 0; + do { + /* Skip separators */ + while (isspace(*str)) + str++; + + /* Add an argument */ + if (*str != '\0') { + const char *s = str; + + /* Skip the argument */ + while (!isspace(*str) && *str != '\0') + str++; + + /* Duplicate the argument */ + argv[argc] = strndup(s, str - s); + if (argv[argc] == NULL) + die("strndup"); + if (++argc == MAX_PROBE_ARGS) + semantic_error("Too many arguments"); + eprintf("argv[%d]=%s\n", argc, argv[argc - 1]); + } + } while (*str != '\0'); + if (argc < 2) + semantic_error("Need event-name and probe-point at least."); + + /* Parse the event name */ + if (argv[0][0] == 'r') + retp = 1; + else if (argv[0][0] != 'p') + semantic_error("You must specify 'p'(kprobe) or" + " 'r'(kretprobe) first."); + /* TODO: check event name */ + *event = argv[0]; + + /* Parse probe point */ + arg = argv[1]; + if (arg[0] == '@') { + /* Source Line */ + arg++; + ptr = strchr(arg, ':'); + if (!ptr || !isdigit(ptr[1])) + semantic_error("Line number is required."); + *ptr++ = '\0'; + if (strlen(arg) == 0) + semantic_error("No file name."); + pp->file = strdup(arg); + pp->line = atoi(ptr); + if (!pp->file || !pp->line) + semantic_error("Failed to parse line."); + eprintf("file:%s line:%d\n", pp->file, pp->line); + } else { + /* Function name */ + ptr = strchr(arg, '+'); + if (ptr) { + if (!isdigit(ptr[1])) + semantic_error("Offset is required."); + *ptr++ = '\0'; + pp->offset = atoi(ptr); + } else + ptr = arg; + ptr = strchr(ptr, '@'); + if (ptr) { + *ptr++ = '\0'; + pp->file = strdup(ptr); + } + pp->function = strdup(arg); + eprintf("symbol:%s file:%s offset:%d\n", + pp->function, pp->file, pp->offset); + } + free(argv[1]); + if (pp->file) + session.need_dwarf = 1; + + /* Copy arguments */ + pp->nr_args = argc - 2; + if (pp->nr_args > 0) { + pp->args = (char **)malloc(sizeof(char *) * pp->nr_args); + if (!pp->args) + die("malloc"); + memcpy(pp->args, &argv[2], sizeof(char *) * pp->nr_args); + } + + /* Ensure return probe has no C argument */ + for (i = 0; i < pp->nr_args; i++) + if (is_c_varname(pp->args[i])) { + if (retp) + semantic_error("You can't specify local" + " variable for kretprobe"); + session.need_dwarf = 1; + } + + eprintf("%d arguments\n", pp->nr_args); + return 0; +} + +#ifndef NO_LIBDWARF +static int open_default_vmlinux(void) +{ + struct utsname uts; + char fname[MAX_PATH_LEN]; + int fd, ret, i; + + ret = uname(&uts); + if (ret) { + eprintf("uname() failed.\n"); + return -errno; + } + session.release = uts.release; + for (i = 0; i < NR_SEARCH_PATH; i++) { + ret = snprintf(fname, MAX_PATH_LEN, + default_search_path[i], session.release); + if (ret >= MAX_PATH_LEN || ret < 0) { + eprintf("Filename(%d,%s) is too long.\n", i, + uts.release); + errno = E2BIG; + return -E2BIG; + } + eprintf("try to open %s\n", fname); + fd = open(fname, O_RDONLY); + if (fd >= 0) + break; + } + return fd; +} +#endif + +static const char * const probe_usage[] = { + "perf probe [<options>] -P 'PROBEDEF' [-P 'PROBEDEF' ...]", + NULL +}; + +static const struct option options[] = { + OPT_BOOLEAN('v', "verbose", &verbose, + "be more verbose (show parsed arguments, etc)"), +#ifndef NO_LIBDWARF + OPT_STRING('k', "vmlinux", &session.vmlinux, "file", + "vmlinux/module pathname"), +#endif + OPT_CALLBACK('P', "probe", NULL, +#ifdef NO_LIBDWARF + "p|r:[GRP/]NAME FUNC[+OFFS] [ARG ...]", +#else + "p|r:[GRP/]NAME FUNC[+OFFS][@SRC]|@SRC:LINE [ARG ...]", +#endif + "probe point definition, where\n" + "\t\tp:\tkprobe probe\n" + "\t\tr:\tkretprobe probe\n" + "\t\tGRP:\tGroup name (optional)\n" + "\t\tNAME:\tEvent name\n" + "\t\tFUNC:\tFunction name\n" + "\t\tOFFS:\tOffset from function entry (in byte)\n" +#ifdef NO_LIBDWARF + "\t\tARG:\tProbe argument (only \n" +#else + "\t\tSRC:\tSource code path\n" + "\t\tLINE:\tLine number\n" + "\t\tARG:\tProbe argument (local variable name or\n" +#endif + "\t\t\tkprobe-tracer argument format is supported.)\n", + parse_probepoint), + OPT_END() +}; + +static int write_new_event(int fd, const char *buf) +{ + int ret; + + printf("Adding new event: %s\n", buf); + ret = write(fd, buf, strlen(buf)); + if (ret <= 0) + die("failed to create event."); + + return ret; +} + +#define MAX_CMDLEN 256 + +static int synthesize_probepoint(struct probe_point *pp) +{ + char *buf; + int i, len, ret; + pp->probes[0] = buf = (char *)calloc(MAX_CMDLEN, sizeof(char)); + if (!buf) + die("calloc"); + ret = snprintf(buf, MAX_CMDLEN, "%s+%d", pp->function, pp->offset); + if (ret <= 0 || ret >= MAX_CMDLEN) + goto error; + len = ret; + + for (i = 0; i < pp->nr_args; i++) { + ret = snprintf(&buf[len], MAX_CMDLEN - len, " %s", + pp->args[i]); + if (ret <= 0 || ret >= MAX_CMDLEN - len) + goto error; + len += ret; + } + pp->found = 1; + return pp->found; +error: + free(pp->probes[0]); + if (ret > 0) + ret = -E2BIG; + return ret; +} + +int cmd_probe(int argc, const char **argv, const char *prefix __used) +{ + int i, j, fd, ret; + struct probe_point *pp; + char buf[MAX_CMDLEN]; + + argc = parse_options(argc, argv, options, probe_usage, + PARSE_OPT_STOP_AT_NON_OPTION); + if (argc || session.nr_probe == 0) + usage_with_options(probe_usage, options); + +#ifdef NO_LIBDWARF + if (session.need_dwarf) + semantic_error("Dwarf-analysis is not supported"); +#endif + + /* Synthesize probes without dwarf */ + for (j = 0; j < session.nr_probe; j++) { +#ifndef NO_LIBDWARF + if (session.events[j][0] != 'r') { + session.need_dwarf = 1; + continue; + } +#endif + ret = synthesize_probepoint(&session.probes[j]); + if (ret == -E2BIG) + semantic_error("probe point is too long."); + else if (ret < 0) + die("snprintf"); + } + +#ifndef NO_LIBDWARF + if (!session.need_dwarf) + goto setup_probes; + + if (session.vmlinux) + fd = open(session.vmlinux, O_RDONLY); + else + fd = open_default_vmlinux(); + if (fd < 0) + die("vmlinux/module file open"); + + /* Searching probe points */ + for (j = 0; j < session.nr_probe; j++) { + pp = &session.probes[j]; + if (pp->found) + continue; + + lseek(fd, SEEK_SET, 0); + ret = find_probepoint(fd, pp); + if (ret <= 0) + die("No probe point found.\n"); + eprintf("probe event %s found\n", session.events[j]); + } + close(fd); + +setup_probes: +#endif /* !NO_LIBDWARF */ + + /* Settng up probe points */ + snprintf(buf, MAX_CMDLEN, "%s/../kprobe_events", debugfs_path); + fd = open(buf, O_WRONLY, O_APPEND); + if (fd < 0) + die("kprobe_events open"); + for (j = 0; j < session.nr_probe; j++) { + pp = &session.probes[j]; + if (pp->found == 1) { + snprintf(buf, MAX_CMDLEN, "%s %s\n", + session.events[j], pp->probes[0]); + write_new_event(fd, buf); + } else + for (i = 0; i < pp->found; i++) { + snprintf(buf, MAX_CMDLEN, "%s%d %s\n", + session.events[j], i, pp->probes[i]); + write_new_event(fd, buf); + } + } + close(fd); + return 0; +} + diff --git a/tools/perf/builtin.h b/tools/perf/builtin.h index e11d8d2..ad5f0f4 100644 --- a/tools/perf/builtin.h +++ b/tools/perf/builtin.h @@ -25,5 +25,6 @@ extern int cmd_timechart(int argc, const char **argv, const char *prefix); extern int cmd_top(int argc, const char **argv, const char *prefix); extern int cmd_trace(int argc, const char **argv, const char *prefix); extern int cmd_version(int argc, const char **argv, const char *prefix); +extern int cmd_probe(int argc, const char **argv, const char *prefix); #endif diff --git a/tools/perf/command-list.txt b/tools/perf/command-list.txt index 00326e2..6475db4 100644 --- a/tools/perf/command-list.txt +++ b/tools/perf/command-list.txt @@ -11,3 +11,4 @@ perf-stat mainporcelain common perf-timechart mainporcelain common perf-top mainporcelain common perf-trace mainporcelain common +perf-probe mainporcelain common diff --git a/tools/perf/perf.c b/tools/perf/perf.c index 624e62d..9cafe54 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -295,6 +295,7 @@ static void handle_internal_command(int argc, const char **argv) { "version", cmd_version, 0 }, { "trace", cmd_trace, 0 }, { "sched", cmd_sched, 0 }, + { "probe", cmd_probe, 0 }, }; unsigned int i; static const char ext[] = STRIP_EXTENSION; diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c new file mode 100644 index 0000000..be997ab --- /dev/null +++ b/tools/perf/util/probe-finder.c @@ -0,0 +1,677 @@ +/* + * probe-finder.c : C expression to kprobe event converter + * + * Written by Masami Hiramatsu <mhiramat@redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + */ + +#include <sys/utsname.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <errno.h> +#include <stdio.h> +#include <unistd.h> +#include <getopt.h> +#include <stdlib.h> +#include <string.h> +#include <stdarg.h> +#include <ctype.h> + +#include "event.h" +#include "debug.h" +#include "util.h" +#include "probe-finder.h" + + +/* Dwarf_Die Linkage to parent Die */ +struct die_link { + struct die_link *parent; /* Parent die */ + Dwarf_Die die; /* Current die */ +}; + +static Dwarf_Debug __dw_debug; +static Dwarf_Error __dw_error; + +/* + * Generic dwarf analysis helpers + */ + +#define X86_32_MAX_REGS 8 +const char *x86_32_regs_table[X86_32_MAX_REGS] = { + "%ax", + "%cx", + "%dx", + "%bx", + "$stack", /* Stack address instead of %sp */ + "%bp", + "%si", + "%di", +}; + +#define X86_64_MAX_REGS 16 +const char *x86_64_regs_table[X86_64_MAX_REGS] = { + "%ax", + "%dx", + "%cx", + "%bx", + "%si", + "%di", + "%bp", + "%sp", + "%r8", + "%r9", + "%r10", + "%r11", + "%r12", + "%r13", + "%r14", + "%r15", +}; + +/* TODO: switching by dwarf address size */ +#ifdef __x86_64__ +#define ARCH_MAX_REGS X86_64_MAX_REGS +#define arch_regs_table x86_64_regs_table +#else +#define ARCH_MAX_REGS X86_32_MAX_REGS +#define arch_regs_table x86_32_regs_table +#endif + +/* Return architecture dependent register string (for kprobe-tracer) */ +static const char *get_arch_regstr(unsigned int n) +{ + return (n <= ARCH_MAX_REGS) ? arch_regs_table[n] : NULL; +} + +/* + * Compare the tail of two strings. + * Return 0 if whole of either string is same as another's tail part. + */ +static int strtailcmp(const char *s1, const char *s2) +{ + int i1 = strlen(s1); + int i2 = strlen(s2); + while (--i1 > 0 && --i2 > 0) { + if (s1[i1] != s2[i2]) + return s1[i1] - s2[i2]; + } + return 0; +} + +/* Find the fileno of the target file. */ +static Dwarf_Unsigned die_get_fileno(Dwarf_Die cu_die, const char *fname) +{ + Dwarf_Signed cnt, i; + Dwarf_Unsigned found = 0; + char **srcs; + int ret; + + if (!fname) + return 0; + + ret = dwarf_srcfiles(cu_die, &srcs, &cnt, &__dw_error); + if (ret == DW_DLV_OK) { + for (i = 0; i < cnt && !found; i++) { + if (strtailcmp(srcs[i], fname) == 0) + found = i + 1; + dwarf_dealloc(__dw_debug, srcs[i], DW_DLA_STRING); + } + for (; i < cnt; i++) + dwarf_dealloc(__dw_debug, srcs[i], DW_DLA_STRING); + dwarf_dealloc(__dw_debug, srcs, DW_DLA_LIST); + } + if (found) + eprintf("found fno: %d\n", (int)found); + return found; +} + +/* Compare diename and tname */ +static int die_compare_name(Dwarf_Die dw_die, const char *tname) +{ + char *name; + int ret; + ret = dwarf_diename(dw_die, &name, &__dw_error); + DIE_IF(ret == DW_DLV_ERROR); + if (ret == DW_DLV_OK) { + ret = strcmp(tname, name); + dwarf_dealloc(__dw_debug, name, DW_DLA_STRING); + } else + ret = -1; + return ret; +} + +/* Check the address is in the subprogram(function). */ +static int die_within_subprogram(Dwarf_Die sp_die, Dwarf_Addr addr, + Dwarf_Signed *offs) +{ + Dwarf_Addr lopc, hipc; + int ret; + + /* TODO: check ranges */ + ret = dwarf_lowpc(sp_die, &lopc, &__dw_error); + DIE_IF(ret == DW_DLV_ERROR); + if (ret == DW_DLV_NO_ENTRY) + return 0; + ret = dwarf_highpc(sp_die, &hipc, &__dw_error); + DIE_IF(ret != DW_DLV_OK); + if (lopc <= addr && addr < hipc) { + *offs = addr - lopc; + return 1; + } else + return 0; +} + +/* Check the die is inlined function */ +static Dwarf_Bool die_inlined_subprogram(Dwarf_Die dw_die) +{ + /* TODO: check strictly */ + Dwarf_Bool inl; + int ret; + + ret = dwarf_hasattr(dw_die, DW_AT_inline, &inl, &__dw_error); + DIE_IF(ret == DW_DLV_ERROR); + return inl; +} + +/* Get the offset of abstruct_origin */ +static Dwarf_Off die_get_abstract_origin(Dwarf_Die dw_die) +{ + Dwarf_Attribute attr; + Dwarf_Off cu_offs; + int ret; + + ret = dwarf_attr(dw_die, DW_AT_abstract_origin, &attr, &__dw_error); + DIE_IF(ret != DW_DLV_OK); + ret = dwarf_formref(attr, &cu_offs, &__dw_error); + DIE_IF(ret != DW_DLV_OK); + dwarf_dealloc(__dw_debug, attr, DW_DLA_ATTR); + return cu_offs; +} + +/* Get entry pc(or low pc, 1st entry of ranges) of the die */ +static Dwarf_Addr die_get_entrypc(Dwarf_Die dw_die) +{ + Dwarf_Attribute attr; + Dwarf_Addr addr; + Dwarf_Off offs; + Dwarf_Ranges *ranges; + Dwarf_Signed cnt; + int ret; + + /* Try to get entry pc */ + ret = dwarf_attr(dw_die, DW_AT_entry_pc, &attr, &__dw_error); + DIE_IF(ret == DW_DLV_ERROR); + if (ret == DW_DLV_OK) { + ret = dwarf_formaddr(attr, &addr, &__dw_error); + DIE_IF(ret != DW_DLV_OK); + dwarf_dealloc(__dw_debug, attr, DW_DLA_ATTR); + return addr; + } + + /* Try to get low pc */ + ret = dwarf_lowpc(dw_die, &addr, &__dw_error); + DIE_IF(ret == DW_DLV_ERROR); + if (ret == DW_DLV_OK) + return addr; + + /* Try to get ranges */ + ret = dwarf_attr(dw_die, DW_AT_ranges, &attr, &__dw_error); + DIE_IF(ret != DW_DLV_OK); + ret = dwarf_formref(attr, &offs, &__dw_error); + DIE_IF(ret != DW_DLV_OK); + ret = dwarf_get_ranges(__dw_debug, offs, &ranges, &cnt, NULL, + &__dw_error); + DIE_IF(ret != DW_DLV_OK); + addr = ranges[0].dwr_addr1; + dwarf_ranges_dealloc(__dw_debug, ranges, cnt); + return addr; +} + +/* + * Search a Die from Die tree. + * Note: cur_link->die should be deallocated in this function. + */ +static int __search_die_tree(struct die_link *cur_link, + int (*die_cb)(struct die_link *, void *), + void *data) +{ + Dwarf_Die new_die; + struct die_link new_link; + int ret; + + if (!die_cb) + return 0; + + /* Check current die */ + while (!(ret = die_cb(cur_link, data))) { + /* Check child die */ + ret = dwarf_child(cur_link->die, &new_die, &__dw_error); + DIE_IF(ret == DW_DLV_ERROR); + if (ret == DW_DLV_OK) { + new_link.parent = cur_link; + new_link.die = new_die; + ret = __search_die_tree(&new_link, die_cb, data); + if (ret) + break; + } + + /* Move to next sibling */ + ret = dwarf_siblingof(__dw_debug, cur_link->die, &new_die, + &__dw_error); + DIE_IF(ret == DW_DLV_ERROR); + dwarf_dealloc(__dw_debug, cur_link->die, DW_DLA_DIE); + cur_link->die = new_die; + if (ret == DW_DLV_NO_ENTRY) + return 0; + } + dwarf_dealloc(__dw_debug, cur_link->die, DW_DLA_DIE); + return ret; +} + +/* Search a die in its children's die tree */ +static int search_die_from_children(Dwarf_Die parent_die, + int (*die_cb)(struct die_link *, void *), + void *data) +{ + struct die_link new_link; + int ret; + + new_link.parent = NULL; + ret = dwarf_child(parent_die, &new_link.die, &__dw_error); + DIE_IF(ret == DW_DLV_ERROR); + if (ret == DW_DLV_OK) + return __search_die_tree(&new_link, die_cb, data); + else + return 0; +} + +/* Find a locdesc corresponding to the address */ +static int attr_get_locdesc(Dwarf_Attribute attr, Dwarf_Locdesc *desc, + Dwarf_Addr addr) +{ + Dwarf_Signed lcnt; + Dwarf_Locdesc **llbuf; + int ret, i; + + ret = dwarf_loclist_n(attr, &llbuf, &lcnt, &__dw_error); + DIE_IF(ret != DW_DLV_OK); + ret = DW_DLV_NO_ENTRY; + for (i = 0; i < lcnt; ++i) { + if (llbuf[i]->ld_lopc <= addr && + llbuf[i]->ld_hipc > addr) { + memcpy(desc, llbuf[i], sizeof(Dwarf_Locdesc)); + desc->ld_s = + malloc(sizeof(Dwarf_Loc) * llbuf[i]->ld_cents); + DIE_IF(desc->ld_s == NULL); + memcpy(desc->ld_s, llbuf[i]->ld_s, + sizeof(Dwarf_Loc) * llbuf[i]->ld_cents); + ret = DW_DLV_OK; + break; + } + dwarf_dealloc(__dw_debug, llbuf[i]->ld_s, DW_DLA_LOC_BLOCK); + dwarf_dealloc(__dw_debug, llbuf[i], DW_DLA_LOCDESC); + } + /* Releasing loop */ + for (; i < lcnt; ++i) { + dwarf_dealloc(__dw_debug, llbuf[i]->ld_s, DW_DLA_LOC_BLOCK); + dwarf_dealloc(__dw_debug, llbuf[i], DW_DLA_LOCDESC); + } + dwarf_dealloc(__dw_debug, llbuf, DW_DLA_LIST); + return ret; +} + +/* + * Probe finder related functions + */ + +/* Show a location */ +static void show_location(Dwarf_Loc *loc, struct probe_finder *pf) +{ + Dwarf_Small op; + Dwarf_Unsigned regn; + Dwarf_Signed offs; + int deref = 0, ret; + const char *regs; + + op = loc->lr_atom; + + /* If this is based on frame buffer, set the offset */ + if (op == DW_OP_fbreg) { + deref = 1; + offs = (Dwarf_Signed)loc->lr_number; + op = pf->fbloc.ld_s[0].lr_atom; + loc = &pf->fbloc.ld_s[0]; + } else + offs = 0; + + if (op >= DW_OP_breg0 && op <= DW_OP_breg31) { + regn = op - DW_OP_breg0; + offs += (Dwarf_Signed)loc->lr_number; + deref = 1; + } else if (op >= DW_OP_reg0 && op <= DW_OP_reg31) { + regn = op - DW_OP_reg0; + } else if (op == DW_OP_bregx) { + regn = loc->lr_number; + offs += (Dwarf_Signed)loc->lr_number2; + deref = 1; + } else if (op == DW_OP_regx) { + regn = loc->lr_number; + } else + die("Dwarf_OP %d is not supported.\n", op); + + regs = get_arch_regstr(regn); + if (!regs) + die("%lld exceeds max register number.\n", regn); + + if (deref) + ret = snprintf(pf->buf, pf->len, + " %s=%+lld(%s)", pf->var, offs, regs); + else + ret = snprintf(pf->buf, pf->len, " %s=%s", pf->var, regs); + DIE_IF(ret < 0); + DIE_IF(ret >= pf->len); +} + +/* Show a variables in kprobe event format */ +static void show_variable(Dwarf_Die vr_die, struct probe_finder *pf) +{ + Dwarf_Attribute attr; + Dwarf_Locdesc ld; + int ret; + + ret = dwarf_attr(vr_die, DW_AT_location, &attr, &__dw_error); + if (ret != DW_DLV_OK) + goto error; + ret = attr_get_locdesc(attr, &ld, (pf->addr - pf->cu_base)); + if (ret != DW_DLV_OK) + goto error; + /* TODO? */ + DIE_IF(ld.ld_cents != 1); + show_location(&ld.ld_s[0], pf); + free(ld.ld_s); + dwarf_dealloc(__dw_debug, attr, DW_DLA_ATTR); + return ; +error: + die("Failed to find the location of %s at this address.\n" + " Perhaps, it has been optimized out.\n", pf->var); +} + +static int variable_callback(struct die_link *dlink, void *data) +{ + struct probe_finder *pf = (struct probe_finder *)data; + Dwarf_Half tag; + int ret; + + ret = dwarf_tag(dlink->die, &tag, &__dw_error); + DIE_IF(ret == DW_DLV_ERROR); + if ((tag == DW_TAG_formal_parameter || + tag == DW_TAG_variable) && + (die_compare_name(dlink->die, pf->var) == 0)) { + show_variable(dlink->die, pf); + return 1; + } + /* TODO: Support struct members and arrays */ + return 0; +} + +/* Find a variable in a subprogram die */ +static void find_variable(Dwarf_Die sp_die, struct probe_finder *pf) +{ + int ret; + + if (!is_c_varname(pf->var)) { + /* Output raw parameters */ + ret = snprintf(pf->buf, pf->len, " %s", pf->var); + DIE_IF(ret < 0); + DIE_IF(ret >= pf->len); + return ; + } + + eprintf("Searching '%s' variable in context.\n", pf->var); + /* Search child die for local variables and parameters. */ + ret = search_die_from_children(sp_die, variable_callback, pf); + if (!ret) + die("Failed to find '%s' in this function.\n", pf->var); +} + +/* Get a frame base on the address */ +static void get_current_frame_base(Dwarf_Die sp_die, struct probe_finder *pf) +{ + Dwarf_Attribute attr; + int ret; + + ret = dwarf_attr(sp_die, DW_AT_frame_base, &attr, &__dw_error); + DIE_IF(ret != DW_DLV_OK); + ret = attr_get_locdesc(attr, &pf->fbloc, (pf->addr - pf->cu_base)); + DIE_IF(ret != DW_DLV_OK); + dwarf_dealloc(__dw_debug, attr, DW_DLA_ATTR); +} + +static void free_current_frame_base(struct probe_finder *pf) +{ + free(pf->fbloc.ld_s); + memset(&pf->fbloc, 0, sizeof(Dwarf_Locdesc)); +} + +/* Show a probe point to output buffer */ +static void show_probepoint(Dwarf_Die sp_die, Dwarf_Signed offs, + struct probe_finder *pf) +{ + struct probe_point *pp = pf->pp; + char *name; + char tmp[MAX_PROBE_BUFFER]; + int ret, i, len; + + /* Output name of probe point */ + ret = dwarf_diename(sp_die, &name, &__dw_error); + DIE_IF(ret == DW_DLV_ERROR); + if (ret == DW_DLV_OK) { + ret = snprintf(tmp, MAX_PROBE_BUFFER, "%s+%u", name, + (unsigned int)offs); + dwarf_dealloc(__dw_debug, name, DW_DLA_STRING); + } else { + /* This function has no name. */ + ret = snprintf(tmp, MAX_PROBE_BUFFER, "0x%llx", pf->addr); + } + DIE_IF(ret < 0); + DIE_IF(ret >= MAX_PROBE_BUFFER); + len = ret; + + /* Find each argument */ + get_current_frame_base(sp_die, pf); + for (i = 0; i < pp->nr_args; i++) { + pf->var = pp->args[i]; + pf->buf = &tmp[len]; + pf->len = MAX_PROBE_BUFFER - len; + find_variable(sp_die, pf); + len += strlen(pf->buf); + } + free_current_frame_base(pf); + + pp->probes[pp->found] = strdup(tmp); + pp->found++; +} + +static int probeaddr_callback(struct die_link *dlink, void *data) +{ + struct probe_finder *pf = (struct probe_finder *)data; + Dwarf_Half tag; + Dwarf_Signed offs; + int ret; + + ret = dwarf_tag(dlink->die, &tag, &__dw_error); + DIE_IF(ret == DW_DLV_ERROR); + /* Check the address is in this subprogram */ + if (tag == DW_TAG_subprogram && + die_within_subprogram(dlink->die, pf->addr, &offs)) { + show_probepoint(dlink->die, offs, pf); + return 1; + } + return 0; +} + +/* Find probe point from its line number */ +static void find_by_line(Dwarf_Die cu_die, struct probe_finder *pf) +{ + struct probe_point *pp = pf->pp; + Dwarf_Signed cnt, i; + Dwarf_Line *lines; + Dwarf_Unsigned lineno = 0; + Dwarf_Addr addr; + Dwarf_Unsigned fno; + int ret; + + ret = dwarf_srclines(cu_die, &lines, &cnt, &__dw_error); + DIE_IF(ret != DW_DLV_OK); + + for (i = 0; i < cnt; i++) { + ret = dwarf_line_srcfileno(lines[i], &fno, &__dw_error); + DIE_IF(ret != DW_DLV_OK); + if (fno != pf->fno) + continue; + + ret = dwarf_lineno(lines[i], &lineno, &__dw_error); + DIE_IF(ret != DW_DLV_OK); + if (lineno != (Dwarf_Unsigned)pp->line) + continue; + + ret = dwarf_lineaddr(lines[i], &addr, &__dw_error); + DIE_IF(ret != DW_DLV_OK); + eprintf("Probe point found: 0x%llx\n", addr); + pf->addr = addr; + /* Search a real subprogram including this line, */ + ret = search_die_from_children(cu_die, probeaddr_callback, pf); + if (ret == 0) + die("Probe point is not found in subprograms.\n"); + /* Continuing, because target line might be inlined. */ + } + dwarf_srclines_dealloc(__dw_debug, lines, cnt); +} + +/* Search function from function name */ +static int probefunc_callback(struct die_link *dlink, void *data) +{ + struct probe_finder *pf = (struct probe_finder *)data; + struct probe_point *pp = pf->pp; + struct die_link *lk; + Dwarf_Signed offs; + Dwarf_Half tag; + int ret; + + ret = dwarf_tag(dlink->die, &tag, &__dw_error); + DIE_IF(ret == DW_DLV_ERROR); + if (tag == DW_TAG_subprogram) { + if (die_compare_name(dlink->die, pp->function) == 0) { + if (die_inlined_subprogram(dlink->die)) { + /* Inlined function, save it. */ + ret = dwarf_die_CU_offset(dlink->die, + &pf->inl_offs, + &__dw_error); + DIE_IF(ret != DW_DLV_OK); + eprintf("inline definition offset %lld\n", + pf->inl_offs); + return 0; + } + /* Get probe address */ + pf->addr = die_get_entrypc(dlink->die); + pf->addr += pp->offset; + /* TODO: Check the address in this function */ + show_probepoint(dlink->die, pp->offset, pf); + /* Continue to search */ + } + } else if (tag == DW_TAG_inlined_subroutine && pf->inl_offs) { + if (die_get_abstract_origin(dlink->die) == pf->inl_offs) { + /* Get probe address */ + pf->addr = die_get_entrypc(dlink->die); + pf->addr += pp->offset; + eprintf("found inline addr: 0x%llx\n", pf->addr); + /* Inlined function. Get a real subprogram */ + for (lk = dlink->parent; lk != NULL; lk = lk->parent) { + tag = 0; + dwarf_tag(lk->die, &tag, &__dw_error); + DIE_IF(ret == DW_DLV_ERROR); + if (tag == DW_TAG_subprogram && + !die_inlined_subprogram(lk->die)) + goto found; + } + die("Failed to find real subprogram.\n"); +found: + /* Get offset from subprogram */ + ret = die_within_subprogram(lk->die, pf->addr, &offs); + DIE_IF(!ret); + show_probepoint(lk->die, offs, pf); + /* Continue to search */ + } + } + return 0; +} + +static void find_by_func(Dwarf_Die cu_die, struct probe_finder *pf) +{ + search_die_from_children(cu_die, probefunc_callback, pf); +} + +/* Find a probe point */ +int find_probepoint(int fd, struct probe_point *pp) +{ + Dwarf_Half addr_size = 0; + Dwarf_Unsigned next_cuh = 0; + Dwarf_Die cu_die = 0; + int cu_number = 0, ret; + struct probe_finder pf = {.pp = pp}; + + ret = dwarf_init(fd, DW_DLC_READ, 0, 0, &__dw_debug, &__dw_error); + if (ret != DW_DLV_OK) + die("Failed to call dwarf_init(). Maybe, not a dwarf file.\n"); + + pp->found = 0; + while (++cu_number) { + /* Search CU (Compilation Unit) */ + ret = dwarf_next_cu_header(__dw_debug, NULL, NULL, NULL, + &addr_size, &next_cuh, &__dw_error); + DIE_IF(ret == DW_DLV_ERROR); + if (ret == DW_DLV_NO_ENTRY) + break; + + /* Get the DIE(Debugging Information Entry) of this CU */ + ret = dwarf_siblingof(__dw_debug, 0, &cu_die, &__dw_error); + DIE_IF(ret != DW_DLV_OK); + + /* Check if target file is included. */ + if (pp->file) + pf.fno = die_get_fileno(cu_die, pp->file); + + if (!pp->file || pf.fno) { + /* Save CU base address (for frame_base) */ + ret = dwarf_lowpc(cu_die, &pf.cu_base, &__dw_error); + DIE_IF(ret == DW_DLV_ERROR); + if (ret == DW_DLV_NO_ENTRY) + pf.cu_base = 0; + if (pp->line) + find_by_line(cu_die, &pf); + if (pp->function) + find_by_func(cu_die, &pf); + } + dwarf_dealloc(__dw_debug, cu_die, DW_DLA_DIE); + } + ret = dwarf_finish(__dw_debug, &__dw_error); + DIE_IF(ret != DW_DLV_OK); + + return pp->found; +} + diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h new file mode 100644 index 0000000..d17fafc --- /dev/null +++ b/tools/perf/util/probe-finder.h @@ -0,0 +1,53 @@ +#ifndef _PROBE_FINDER_H +#define _PROBE_FINDER_H + +#define MAX_PATH_LEN 256 +#define MAX_PROBE_BUFFER 1024 +#define MAX_PROBES 128 + +static inline int is_c_varname(const char *name) +{ + /* TODO */ + return isalpha(name[0]) || name[0] == '_'; +} + +struct probe_point { + /* Inputs */ + char *file; /* File name */ + int line; /* Line number */ + + char *function; /* Function name */ + int offset; /* Offset bytes */ + + int nr_args; /* Number of arguments */ + char **args; /* Arguments */ + + /* Output */ + int found; /* Number of found probe points */ + char *probes[MAX_PROBES]; /* Output buffers (will be allocated)*/ +}; + +#ifndef NO_LIBDWARF +extern int find_probepoint(int fd, struct probe_point *pp); + +#include <libdwarf/dwarf.h> +#include <libdwarf/libdwarf.h> + +struct probe_finder { + struct probe_point *pp; /* Target probe point */ + + /* For function searching */ + Dwarf_Addr addr; /* Address */ + Dwarf_Unsigned fno; /* File number */ + Dwarf_Off inl_offs; /* Inline offset */ + + /* For variable searching */ + Dwarf_Addr cu_base; /* Current CU base address */ + Dwarf_Locdesc fbloc; /* Location of Current Frame Base */ + const char *var; /* Current variable name */ + char *buf; /* Current output buffer */ + int len; /* Length of output buffer */ +}; +#endif /* NO_LIBDWARF */ + +#endif /*_PROBE_FINDER_H */ diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 9de2329..0daa341 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -134,6 +134,15 @@ extern void die(const char *err, ...) NORETURN __attribute__((format (printf, 1, extern int error(const char *err, ...) __attribute__((format (printf, 1, 2))); extern void warning(const char *err, ...) __attribute__((format (printf, 1, 2))); +#include "../../../include/linux/stringify.h" + +#define DIE_IF(cnd) \ + do { if (cnd) \ + die(" at (" __FILE__ ":" __stringify(__LINE__) "): " \ + __stringify(cnd) "\n"); \ + } while (0) + + extern void set_die_routine(void (*routine)(const char *err, va_list params) NORETURN); extern int prefixcmp(const char *str, const char *prefix); |