From a2e5d188aad31f7177cbd6d9ddaf8cc9aa4affe0 Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Thu, 6 Nov 2014 14:36:40 +0000 Subject: kdb: Remove currently unused kdbtab_t->cmd_flags The struct member is never used in the code, so we can remove it. We will introduce real flags soon by renaming cmd_repeat to cmd_flags. Signed-off-by: Anton Vorontsov Signed-off-by: John Stultz Signed-off-by: Daniel Thompson Cc: Jason Wessel Signed-off-by: Jason Wessel diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c index 379650b..cc02aa2 100644 --- a/kernel/debug/kdb/kdb_main.c +++ b/kernel/debug/kdb/kdb_main.c @@ -2694,7 +2694,6 @@ int kdb_register_repeat(char *cmd, kp->cmd_func = func; kp->cmd_usage = usage; kp->cmd_help = help; - kp->cmd_flags = 0; kp->cmd_minlen = minlen; kp->cmd_repeat = repeat; diff --git a/kernel/debug/kdb/kdb_private.h b/kernel/debug/kdb/kdb_private.h index 7afd3c8..c4c46c7 100644 --- a/kernel/debug/kdb/kdb_private.h +++ b/kernel/debug/kdb/kdb_private.h @@ -172,7 +172,6 @@ typedef struct _kdbtab { kdb_func_t cmd_func; /* Function to execute command */ char *cmd_usage; /* Usage String for this command */ char *cmd_help; /* Help message for this command */ - short cmd_flags; /* Parsing flags */ short cmd_minlen; /* Minimum legal # command * chars required */ kdb_repeat_t cmd_repeat; /* Does command auto repeat on enter? */ -- cgit v0.10.2 From 15a42a9bc9ffcff4315a7154313db08c6bf9ef11 Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Thu, 6 Nov 2014 14:36:41 +0000 Subject: kdb: Rename kdb_repeat_t to kdb_cmdflags_t, cmd_repeat to cmd_flags We're about to add more options for command behaviour, so let's expand the meaning of kdb_repeat_t. So far we just do various renames, there should be no functional changes. Signed-off-by: Anton Vorontsov Signed-off-by: John Stultz Signed-off-by: Daniel Thompson Cc: Jason Wessel Signed-off-by: Jason Wessel diff --git a/include/linux/kdb.h b/include/linux/kdb.h index 290db12..e650f79 100644 --- a/include/linux/kdb.h +++ b/include/linux/kdb.h @@ -17,7 +17,7 @@ typedef enum { KDB_REPEAT_NONE = 0, /* Do not repeat this command */ KDB_REPEAT_NO_ARGS, /* Repeat the command without arguments */ KDB_REPEAT_WITH_ARGS, /* Repeat the command including its arguments */ -} kdb_repeat_t; +} kdb_cmdflags_t; typedef int (*kdb_func_t)(int, const char **); @@ -147,7 +147,7 @@ static inline const char *kdb_walk_kallsyms(loff_t *pos) /* Dynamic kdb shell command registration */ extern int kdb_register(char *, kdb_func_t, char *, char *, short); extern int kdb_register_repeat(char *, kdb_func_t, char *, char *, - short, kdb_repeat_t); + short, kdb_cmdflags_t); extern int kdb_unregister(char *); #else /* ! CONFIG_KGDB_KDB */ static inline __printf(1, 2) int kdb_printf(const char *fmt, ...) { return 0; } @@ -156,7 +156,7 @@ static inline int kdb_register(char *cmd, kdb_func_t func, char *usage, char *help, short minlen) { return 0; } static inline int kdb_register_repeat(char *cmd, kdb_func_t func, char *usage, char *help, short minlen, - kdb_repeat_t repeat) { return 0; } + kdb_cmdflags_t flags) { return 0; } static inline int kdb_unregister(char *cmd) { return 0; } #endif /* CONFIG_KGDB_KDB */ enum { diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c index cc02aa2..41966b5 100644 --- a/kernel/debug/kdb/kdb_main.c +++ b/kernel/debug/kdb/kdb_main.c @@ -1008,7 +1008,7 @@ int kdb_parse(const char *cmdstr) if (result && ignore_errors && result > KDB_CMD_GO) result = 0; KDB_STATE_CLEAR(CMD); - switch (tp->cmd_repeat) { + switch (tp->cmd_flags) { case KDB_REPEAT_NONE: argc = 0; if (argv[0]) @@ -2646,7 +2646,7 @@ int kdb_register_repeat(char *cmd, char *usage, char *help, short minlen, - kdb_repeat_t repeat) + kdb_cmdflags_t flags) { int i; kdbtab_t *kp; @@ -2695,7 +2695,7 @@ int kdb_register_repeat(char *cmd, kp->cmd_usage = usage; kp->cmd_help = help; kp->cmd_minlen = minlen; - kp->cmd_repeat = repeat; + kp->cmd_flags = flags; return 0; } diff --git a/kernel/debug/kdb/kdb_private.h b/kernel/debug/kdb/kdb_private.h index c4c46c7..eaacd16 100644 --- a/kernel/debug/kdb/kdb_private.h +++ b/kernel/debug/kdb/kdb_private.h @@ -174,7 +174,7 @@ typedef struct _kdbtab { char *cmd_help; /* Help message for this command */ short cmd_minlen; /* Minimum legal # command * chars required */ - kdb_repeat_t cmd_repeat; /* Does command auto repeat on enter? */ + kdb_cmdflags_t cmd_flags; /* Command behaviour flags */ } kdbtab_t; extern int kdb_bt(int, const char **); /* KDB display back trace */ -- cgit v0.10.2 From 42c884c10b775ce04f8aabe488820134625c893e Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Thu, 6 Nov 2014 14:36:42 +0000 Subject: kdb: Rename kdb_register_repeat() to kdb_register_flags() We're about to add more options for commands behaviour, so let's give a more generic name to the low-level kdb command registration function. There are just various renames, no functional changes. Signed-off-by: Anton Vorontsov Signed-off-by: John Stultz Signed-off-by: Daniel Thompson Cc: Jason Wessel Signed-off-by: Jason Wessel diff --git a/include/linux/kdb.h b/include/linux/kdb.h index e650f79..32d2f40 100644 --- a/include/linux/kdb.h +++ b/include/linux/kdb.h @@ -146,17 +146,17 @@ static inline const char *kdb_walk_kallsyms(loff_t *pos) /* Dynamic kdb shell command registration */ extern int kdb_register(char *, kdb_func_t, char *, char *, short); -extern int kdb_register_repeat(char *, kdb_func_t, char *, char *, - short, kdb_cmdflags_t); +extern int kdb_register_flags(char *, kdb_func_t, char *, char *, + short, kdb_cmdflags_t); extern int kdb_unregister(char *); #else /* ! CONFIG_KGDB_KDB */ static inline __printf(1, 2) int kdb_printf(const char *fmt, ...) { return 0; } static inline void kdb_init(int level) {} static inline int kdb_register(char *cmd, kdb_func_t func, char *usage, char *help, short minlen) { return 0; } -static inline int kdb_register_repeat(char *cmd, kdb_func_t func, char *usage, - char *help, short minlen, - kdb_cmdflags_t flags) { return 0; } +static inline int kdb_register_flags(char *cmd, kdb_func_t func, char *usage, + char *help, short minlen, + kdb_cmdflags_t flags) { return 0; } static inline int kdb_unregister(char *cmd) { return 0; } #endif /* CONFIG_KGDB_KDB */ enum { diff --git a/kernel/debug/kdb/kdb_bp.c b/kernel/debug/kdb/kdb_bp.c index b20d544..5953666 100644 --- a/kernel/debug/kdb/kdb_bp.c +++ b/kernel/debug/kdb/kdb_bp.c @@ -531,21 +531,21 @@ void __init kdb_initbptab(void) for (i = 0, bp = kdb_breakpoints; i < KDB_MAXBPT; i++, bp++) bp->bp_free = 1; - kdb_register_repeat("bp", kdb_bp, "[]", + kdb_register_flags("bp", kdb_bp, "[]", "Set/Display breakpoints", 0, KDB_REPEAT_NO_ARGS); - kdb_register_repeat("bl", kdb_bp, "[]", + kdb_register_flags("bl", kdb_bp, "[]", "Display breakpoints", 0, KDB_REPEAT_NO_ARGS); if (arch_kgdb_ops.flags & KGDB_HW_BREAKPOINT) - kdb_register_repeat("bph", kdb_bp, "[]", + kdb_register_flags("bph", kdb_bp, "[]", "[datar [length]|dataw [length]] Set hw brk", 0, KDB_REPEAT_NO_ARGS); - kdb_register_repeat("bc", kdb_bc, "", + kdb_register_flags("bc", kdb_bc, "", "Clear Breakpoint", 0, KDB_REPEAT_NONE); - kdb_register_repeat("be", kdb_bc, "", + kdb_register_flags("be", kdb_bc, "", "Enable Breakpoint", 0, KDB_REPEAT_NONE); - kdb_register_repeat("bd", kdb_bc, "", + kdb_register_flags("bd", kdb_bc, "", "Disable Breakpoint", 0, KDB_REPEAT_NONE); - kdb_register_repeat("ss", kdb_ss, "", + kdb_register_flags("ss", kdb_ss, "", "Single Step", 1, KDB_REPEAT_NO_ARGS); /* * Architecture dependent initialization. diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c index 41966b5..070f1ff 100644 --- a/kernel/debug/kdb/kdb_main.c +++ b/kernel/debug/kdb/kdb_main.c @@ -2629,7 +2629,7 @@ static int kdb_grep_help(int argc, const char **argv) } /* - * kdb_register_repeat - This function is used to register a kernel + * kdb_register_flags - This function is used to register a kernel * debugger command. * Inputs: * cmd Command name @@ -2641,12 +2641,12 @@ static int kdb_grep_help(int argc, const char **argv) * zero for success, one if a duplicate command. */ #define kdb_command_extend 50 /* arbitrary */ -int kdb_register_repeat(char *cmd, - kdb_func_t func, - char *usage, - char *help, - short minlen, - kdb_cmdflags_t flags) +int kdb_register_flags(char *cmd, + kdb_func_t func, + char *usage, + char *help, + short minlen, + kdb_cmdflags_t flags) { int i; kdbtab_t *kp; @@ -2699,13 +2699,13 @@ int kdb_register_repeat(char *cmd, return 0; } -EXPORT_SYMBOL_GPL(kdb_register_repeat); +EXPORT_SYMBOL_GPL(kdb_register_flags); /* * kdb_register - Compatibility register function for commands that do * not need to specify a repeat state. Equivalent to - * kdb_register_repeat with KDB_REPEAT_NONE. + * kdb_register_flags with KDB_REPEAT_NONE. * Inputs: * cmd Command name * func Function to execute the command @@ -2720,8 +2720,8 @@ int kdb_register(char *cmd, char *help, short minlen) { - return kdb_register_repeat(cmd, func, usage, help, minlen, - KDB_REPEAT_NONE); + return kdb_register_flags(cmd, func, usage, help, minlen, + KDB_REPEAT_NONE); } EXPORT_SYMBOL_GPL(kdb_register); @@ -2763,79 +2763,79 @@ static void __init kdb_inittab(void) for_each_kdbcmd(kp, i) kp->cmd_name = NULL; - kdb_register_repeat("md", kdb_md, "", + kdb_register_flags("md", kdb_md, "", "Display Memory Contents, also mdWcN, e.g. md8c1", 1, KDB_REPEAT_NO_ARGS); - kdb_register_repeat("mdr", kdb_md, " ", + kdb_register_flags("mdr", kdb_md, " ", "Display Raw Memory", 0, KDB_REPEAT_NO_ARGS); - kdb_register_repeat("mdp", kdb_md, " ", + kdb_register_flags("mdp", kdb_md, " ", "Display Physical Memory", 0, KDB_REPEAT_NO_ARGS); - kdb_register_repeat("mds", kdb_md, "", + kdb_register_flags("mds", kdb_md, "", "Display Memory Symbolically", 0, KDB_REPEAT_NO_ARGS); - kdb_register_repeat("mm", kdb_mm, " ", + kdb_register_flags("mm", kdb_mm, " ", "Modify Memory Contents", 0, KDB_REPEAT_NO_ARGS); - kdb_register_repeat("go", kdb_go, "[]", + kdb_register_flags("go", kdb_go, "[]", "Continue Execution", 1, KDB_REPEAT_NONE); - kdb_register_repeat("rd", kdb_rd, "", + kdb_register_flags("rd", kdb_rd, "", "Display Registers", 0, KDB_REPEAT_NONE); - kdb_register_repeat("rm", kdb_rm, " ", + kdb_register_flags("rm", kdb_rm, " ", "Modify Registers", 0, KDB_REPEAT_NONE); - kdb_register_repeat("ef", kdb_ef, "", + kdb_register_flags("ef", kdb_ef, "", "Display exception frame", 0, KDB_REPEAT_NONE); - kdb_register_repeat("bt", kdb_bt, "[]", + kdb_register_flags("bt", kdb_bt, "[]", "Stack traceback", 1, KDB_REPEAT_NONE); - kdb_register_repeat("btp", kdb_bt, "", + kdb_register_flags("btp", kdb_bt, "", "Display stack for process ", 0, KDB_REPEAT_NONE); - kdb_register_repeat("bta", kdb_bt, "[D|R|S|T|C|Z|E|U|I|M|A]", + kdb_register_flags("bta", kdb_bt, "[D|R|S|T|C|Z|E|U|I|M|A]", "Backtrace all processes matching state flag", 0, KDB_REPEAT_NONE); - kdb_register_repeat("btc", kdb_bt, "", + kdb_register_flags("btc", kdb_bt, "", "Backtrace current process on each cpu", 0, KDB_REPEAT_NONE); - kdb_register_repeat("btt", kdb_bt, "", + kdb_register_flags("btt", kdb_bt, "", "Backtrace process given its struct task address", 0, KDB_REPEAT_NONE); - kdb_register_repeat("env", kdb_env, "", + kdb_register_flags("env", kdb_env, "", "Show environment variables", 0, KDB_REPEAT_NONE); - kdb_register_repeat("set", kdb_set, "", + kdb_register_flags("set", kdb_set, "", "Set environment variables", 0, KDB_REPEAT_NONE); - kdb_register_repeat("help", kdb_help, "", + kdb_register_flags("help", kdb_help, "", "Display Help Message", 1, KDB_REPEAT_NONE); - kdb_register_repeat("?", kdb_help, "", + kdb_register_flags("?", kdb_help, "", "Display Help Message", 0, KDB_REPEAT_NONE); - kdb_register_repeat("cpu", kdb_cpu, "", + kdb_register_flags("cpu", kdb_cpu, "", "Switch to new cpu", 0, KDB_REPEAT_NONE); - kdb_register_repeat("kgdb", kdb_kgdb, "", + kdb_register_flags("kgdb", kdb_kgdb, "", "Enter kgdb mode", 0, KDB_REPEAT_NONE); - kdb_register_repeat("ps", kdb_ps, "[|A]", + kdb_register_flags("ps", kdb_ps, "[|A]", "Display active task list", 0, KDB_REPEAT_NONE); - kdb_register_repeat("pid", kdb_pid, "", + kdb_register_flags("pid", kdb_pid, "", "Switch to another task", 0, KDB_REPEAT_NONE); - kdb_register_repeat("reboot", kdb_reboot, "", + kdb_register_flags("reboot", kdb_reboot, "", "Reboot the machine immediately", 0, KDB_REPEAT_NONE); #if defined(CONFIG_MODULES) - kdb_register_repeat("lsmod", kdb_lsmod, "", + kdb_register_flags("lsmod", kdb_lsmod, "", "List loaded kernel modules", 0, KDB_REPEAT_NONE); #endif #if defined(CONFIG_MAGIC_SYSRQ) - kdb_register_repeat("sr", kdb_sr, "", + kdb_register_flags("sr", kdb_sr, "", "Magic SysRq key", 0, KDB_REPEAT_NONE); #endif #if defined(CONFIG_PRINTK) - kdb_register_repeat("dmesg", kdb_dmesg, "[lines]", + kdb_register_flags("dmesg", kdb_dmesg, "[lines]", "Display syslog buffer", 0, KDB_REPEAT_NONE); #endif if (arch_kgdb_ops.enable_nmi) { - kdb_register_repeat("disable_nmi", kdb_disable_nmi, "", + kdb_register_flags("disable_nmi", kdb_disable_nmi, "", "Disable NMI entry to KDB", 0, KDB_REPEAT_NONE); } - kdb_register_repeat("defcmd", kdb_defcmd, "name \"usage\" \"help\"", + kdb_register_flags("defcmd", kdb_defcmd, "name \"usage\" \"help\"", "Define a set of commands, down to endefcmd", 0, KDB_REPEAT_NONE); - kdb_register_repeat("kill", kdb_kill, "<-signal> ", + kdb_register_flags("kill", kdb_kill, "<-signal> ", "Send a signal to a process", 0, KDB_REPEAT_NONE); - kdb_register_repeat("summary", kdb_summary, "", + kdb_register_flags("summary", kdb_summary, "", "Summarize the system", 4, KDB_REPEAT_NONE); - kdb_register_repeat("per_cpu", kdb_per_cpu, " [] []", + kdb_register_flags("per_cpu", kdb_per_cpu, " [] []", "Display per_cpu variables", 3, KDB_REPEAT_NONE); - kdb_register_repeat("grephelp", kdb_grep_help, "", + kdb_register_flags("grephelp", kdb_grep_help, "", "Display help on | grep", 0, KDB_REPEAT_NONE); } diff --git a/kernel/trace/trace_kdb.c b/kernel/trace/trace_kdb.c index bd90e1b..1e3b36c 100644 --- a/kernel/trace/trace_kdb.c +++ b/kernel/trace/trace_kdb.c @@ -127,7 +127,7 @@ static int kdb_ftdump(int argc, const char **argv) static __init int kdb_ftrace_register(void) { - kdb_register_repeat("ftdump", kdb_ftdump, "[skip_#lines] [cpu]", + kdb_register_flags("ftdump", kdb_ftdump, "[skip_#lines] [cpu]", "Dump ftrace log", 0, KDB_REPEAT_NONE); return 0; } -- cgit v0.10.2 From 04bb171e7aa99dee0c92e772e4f66f8d5c1b4081 Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Thu, 6 Nov 2014 14:36:43 +0000 Subject: kdb: Use KDB_REPEAT_* values as flags The actual values of KDB_REPEAT_* enum values and overall logic stayed the same, but we now treat the values as flags. This makes it possible to add other flags and combine them, plus makes the code a lot simpler and shorter. But functionality-wise, there should be no changes. Signed-off-by: Anton Vorontsov Signed-off-by: John Stultz Signed-off-by: Daniel Thompson Cc: Jason Wessel Signed-off-by: Jason Wessel diff --git a/include/linux/kdb.h b/include/linux/kdb.h index 32d2f40..90aed7c 100644 --- a/include/linux/kdb.h +++ b/include/linux/kdb.h @@ -15,8 +15,8 @@ typedef enum { KDB_REPEAT_NONE = 0, /* Do not repeat this command */ - KDB_REPEAT_NO_ARGS, /* Repeat the command without arguments */ - KDB_REPEAT_WITH_ARGS, /* Repeat the command including its arguments */ + KDB_REPEAT_NO_ARGS = 0x1, /* Repeat the command w/o arguments */ + KDB_REPEAT_WITH_ARGS = 0x2, /* Repeat the command w/ its arguments */ } kdb_cmdflags_t; typedef int (*kdb_func_t)(int, const char **); diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c index 070f1ff..cbacae2 100644 --- a/kernel/debug/kdb/kdb_main.c +++ b/kernel/debug/kdb/kdb_main.c @@ -1008,20 +1008,13 @@ int kdb_parse(const char *cmdstr) if (result && ignore_errors && result > KDB_CMD_GO) result = 0; KDB_STATE_CLEAR(CMD); - switch (tp->cmd_flags) { - case KDB_REPEAT_NONE: - argc = 0; - if (argv[0]) - *(argv[0]) = '\0'; - break; - case KDB_REPEAT_NO_ARGS: - argc = 1; - if (argv[1]) - *(argv[1]) = '\0'; - break; - case KDB_REPEAT_WITH_ARGS: - break; - } + + if (tp->cmd_flags & KDB_REPEAT_WITH_ARGS) + return result; + + argc = tp->cmd_flags & KDB_REPEAT_NO_ARGS ? 1 : 0; + if (argv[argc]) + *(argv[argc]) = '\0'; return result; } -- cgit v0.10.2 From e8ab24d9b0173ada3eeed31d7d7f982228efc2c5 Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Thu, 6 Nov 2014 14:36:44 +0000 Subject: kdb: Remove KDB_REPEAT_NONE flag Since we now treat KDB_REPEAT_* as flags, there is no need to pass KDB_REPEAT_NONE. It's just the default behaviour when no flags are specified. Signed-off-by: Anton Vorontsov Signed-off-by: John Stultz Signed-off-by: Daniel Thompson Cc: Jason Wessel Signed-off-by: Jason Wessel diff --git a/include/linux/kdb.h b/include/linux/kdb.h index 90aed7c..39b44b3 100644 --- a/include/linux/kdb.h +++ b/include/linux/kdb.h @@ -14,7 +14,6 @@ */ typedef enum { - KDB_REPEAT_NONE = 0, /* Do not repeat this command */ KDB_REPEAT_NO_ARGS = 0x1, /* Repeat the command w/o arguments */ KDB_REPEAT_WITH_ARGS = 0x2, /* Repeat the command w/ its arguments */ } kdb_cmdflags_t; diff --git a/kernel/debug/kdb/kdb_bp.c b/kernel/debug/kdb/kdb_bp.c index 5953666..f8844fb 100644 --- a/kernel/debug/kdb/kdb_bp.c +++ b/kernel/debug/kdb/kdb_bp.c @@ -539,11 +539,11 @@ void __init kdb_initbptab(void) kdb_register_flags("bph", kdb_bp, "[]", "[datar [length]|dataw [length]] Set hw brk", 0, KDB_REPEAT_NO_ARGS); kdb_register_flags("bc", kdb_bc, "", - "Clear Breakpoint", 0, KDB_REPEAT_NONE); + "Clear Breakpoint", 0, 0); kdb_register_flags("be", kdb_bc, "", - "Enable Breakpoint", 0, KDB_REPEAT_NONE); + "Enable Breakpoint", 0, 0); kdb_register_flags("bd", kdb_bc, "", - "Disable Breakpoint", 0, KDB_REPEAT_NONE); + "Disable Breakpoint", 0, 0); kdb_register_flags("ss", kdb_ss, "", "Single Step", 1, KDB_REPEAT_NO_ARGS); diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c index cbacae2..538bf1d 100644 --- a/kernel/debug/kdb/kdb_main.c +++ b/kernel/debug/kdb/kdb_main.c @@ -2698,7 +2698,7 @@ EXPORT_SYMBOL_GPL(kdb_register_flags); /* * kdb_register - Compatibility register function for commands that do * not need to specify a repeat state. Equivalent to - * kdb_register_flags with KDB_REPEAT_NONE. + * kdb_register_flags with flags set to 0. * Inputs: * cmd Command name * func Function to execute the command @@ -2713,8 +2713,7 @@ int kdb_register(char *cmd, char *help, short minlen) { - return kdb_register_flags(cmd, func, usage, help, minlen, - KDB_REPEAT_NONE); + return kdb_register_flags(cmd, func, usage, help, minlen, 0); } EXPORT_SYMBOL_GPL(kdb_register); @@ -2768,68 +2767,68 @@ static void __init kdb_inittab(void) kdb_register_flags("mm", kdb_mm, " ", "Modify Memory Contents", 0, KDB_REPEAT_NO_ARGS); kdb_register_flags("go", kdb_go, "[]", - "Continue Execution", 1, KDB_REPEAT_NONE); + "Continue Execution", 1, 0); kdb_register_flags("rd", kdb_rd, "", - "Display Registers", 0, KDB_REPEAT_NONE); + "Display Registers", 0, 0); kdb_register_flags("rm", kdb_rm, " ", - "Modify Registers", 0, KDB_REPEAT_NONE); + "Modify Registers", 0, 0); kdb_register_flags("ef", kdb_ef, "", - "Display exception frame", 0, KDB_REPEAT_NONE); + "Display exception frame", 0, 0); kdb_register_flags("bt", kdb_bt, "[]", - "Stack traceback", 1, KDB_REPEAT_NONE); + "Stack traceback", 1, 0); kdb_register_flags("btp", kdb_bt, "", - "Display stack for process ", 0, KDB_REPEAT_NONE); + "Display stack for process ", 0, 0); kdb_register_flags("bta", kdb_bt, "[D|R|S|T|C|Z|E|U|I|M|A]", - "Backtrace all processes matching state flag", 0, KDB_REPEAT_NONE); + "Backtrace all processes matching state flag", 0, 0); kdb_register_flags("btc", kdb_bt, "", - "Backtrace current process on each cpu", 0, KDB_REPEAT_NONE); + "Backtrace current process on each cpu", 0, 0); kdb_register_flags("btt", kdb_bt, "", "Backtrace process given its struct task address", 0, - KDB_REPEAT_NONE); + 0); kdb_register_flags("env", kdb_env, "", - "Show environment variables", 0, KDB_REPEAT_NONE); + "Show environment variables", 0, 0); kdb_register_flags("set", kdb_set, "", - "Set environment variables", 0, KDB_REPEAT_NONE); + "Set environment variables", 0, 0); kdb_register_flags("help", kdb_help, "", - "Display Help Message", 1, KDB_REPEAT_NONE); + "Display Help Message", 1, 0); kdb_register_flags("?", kdb_help, "", - "Display Help Message", 0, KDB_REPEAT_NONE); + "Display Help Message", 0, 0); kdb_register_flags("cpu", kdb_cpu, "", - "Switch to new cpu", 0, KDB_REPEAT_NONE); + "Switch to new cpu", 0, 0); kdb_register_flags("kgdb", kdb_kgdb, "", - "Enter kgdb mode", 0, KDB_REPEAT_NONE); + "Enter kgdb mode", 0, 0); kdb_register_flags("ps", kdb_ps, "[|A]", - "Display active task list", 0, KDB_REPEAT_NONE); + "Display active task list", 0, 0); kdb_register_flags("pid", kdb_pid, "", - "Switch to another task", 0, KDB_REPEAT_NONE); + "Switch to another task", 0, 0); kdb_register_flags("reboot", kdb_reboot, "", - "Reboot the machine immediately", 0, KDB_REPEAT_NONE); + "Reboot the machine immediately", 0, 0); #if defined(CONFIG_MODULES) kdb_register_flags("lsmod", kdb_lsmod, "", - "List loaded kernel modules", 0, KDB_REPEAT_NONE); + "List loaded kernel modules", 0, 0); #endif #if defined(CONFIG_MAGIC_SYSRQ) kdb_register_flags("sr", kdb_sr, "", - "Magic SysRq key", 0, KDB_REPEAT_NONE); + "Magic SysRq key", 0, 0); #endif #if defined(CONFIG_PRINTK) kdb_register_flags("dmesg", kdb_dmesg, "[lines]", - "Display syslog buffer", 0, KDB_REPEAT_NONE); + "Display syslog buffer", 0, 0); #endif if (arch_kgdb_ops.enable_nmi) { kdb_register_flags("disable_nmi", kdb_disable_nmi, "", - "Disable NMI entry to KDB", 0, KDB_REPEAT_NONE); + "Disable NMI entry to KDB", 0, 0); } kdb_register_flags("defcmd", kdb_defcmd, "name \"usage\" \"help\"", - "Define a set of commands, down to endefcmd", 0, KDB_REPEAT_NONE); + "Define a set of commands, down to endefcmd", 0, 0); kdb_register_flags("kill", kdb_kill, "<-signal> ", - "Send a signal to a process", 0, KDB_REPEAT_NONE); + "Send a signal to a process", 0, 0); kdb_register_flags("summary", kdb_summary, "", - "Summarize the system", 4, KDB_REPEAT_NONE); + "Summarize the system", 4, 0); kdb_register_flags("per_cpu", kdb_per_cpu, " [] []", - "Display per_cpu variables", 3, KDB_REPEAT_NONE); + "Display per_cpu variables", 3, 0); kdb_register_flags("grephelp", kdb_grep_help, "", - "Display help on | grep", 0, KDB_REPEAT_NONE); + "Display help on | grep", 0, 0); } /* Execute any commands defined in kdb_cmds. */ diff --git a/kernel/trace/trace_kdb.c b/kernel/trace/trace_kdb.c index 1e3b36c..3da7e30 100644 --- a/kernel/trace/trace_kdb.c +++ b/kernel/trace/trace_kdb.c @@ -128,7 +128,7 @@ static int kdb_ftdump(int argc, const char **argv) static __init int kdb_ftrace_register(void) { kdb_register_flags("ftdump", kdb_ftdump, "[skip_#lines] [cpu]", - "Dump ftrace log", 0, KDB_REPEAT_NONE); + "Dump ftrace log", 0, 0); return 0; } -- cgit v0.10.2 From 9452e977ac17caf9f98a91b33d5e3c3357258c64 Mon Sep 17 00:00:00 2001 From: Daniel Thompson Date: Thu, 6 Nov 2014 14:36:45 +0000 Subject: kdb: Categorize kdb commands (similar to SysRq categorization) This patch introduces several new flags to collect kdb commands into groups (later allowing them to be optionally disabled). This follows similar prior art to enable/disable magic sysrq commands. The commands have been categorized as follows: Always on: go (w/o args), env, set, help, ?, cpu (w/o args), sr, dmesg, disable_nmi, defcmd, summary, grephelp Mem read: md, mdr, mdp, mds, ef, bt (with args), per_cpu Mem write: mm Reg read: rd Reg write: go (with args), rm Inspect: bt (w/o args), btp, bta, btc, btt, ps, pid, lsmod Flow ctrl: bp, bl, bph, bc, be, bd, ss Signal: kill Reboot: reboot All: cpu, kgdb, (and all of the above), nmi_console Signed-off-by: Daniel Thompson Cc: Jason Wessel Signed-off-by: Jason Wessel diff --git a/include/linux/kdb.h b/include/linux/kdb.h index 39b44b3..f1fe361 100644 --- a/include/linux/kdb.h +++ b/include/linux/kdb.h @@ -13,9 +13,53 @@ * Copyright (C) 2009 Jason Wessel */ +/* Shifted versions of the command enable bits are be used if the command + * has no arguments (see kdb_check_flags). This allows commands, such as + * go, to have different permissions depending upon whether it is called + * with an argument. + */ +#define KDB_ENABLE_NO_ARGS_SHIFT 10 + typedef enum { - KDB_REPEAT_NO_ARGS = 0x1, /* Repeat the command w/o arguments */ - KDB_REPEAT_WITH_ARGS = 0x2, /* Repeat the command w/ its arguments */ + KDB_ENABLE_ALL = (1 << 0), /* Enable everything */ + KDB_ENABLE_MEM_READ = (1 << 1), + KDB_ENABLE_MEM_WRITE = (1 << 2), + KDB_ENABLE_REG_READ = (1 << 3), + KDB_ENABLE_REG_WRITE = (1 << 4), + KDB_ENABLE_INSPECT = (1 << 5), + KDB_ENABLE_FLOW_CTRL = (1 << 6), + KDB_ENABLE_SIGNAL = (1 << 7), + KDB_ENABLE_REBOOT = (1 << 8), + /* User exposed values stop here, all remaining flags are + * exclusively used to describe a commands behaviour. + */ + + KDB_ENABLE_ALWAYS_SAFE = (1 << 9), + KDB_ENABLE_MASK = (1 << KDB_ENABLE_NO_ARGS_SHIFT) - 1, + + KDB_ENABLE_ALL_NO_ARGS = KDB_ENABLE_ALL << KDB_ENABLE_NO_ARGS_SHIFT, + KDB_ENABLE_MEM_READ_NO_ARGS = KDB_ENABLE_MEM_READ + << KDB_ENABLE_NO_ARGS_SHIFT, + KDB_ENABLE_MEM_WRITE_NO_ARGS = KDB_ENABLE_MEM_WRITE + << KDB_ENABLE_NO_ARGS_SHIFT, + KDB_ENABLE_REG_READ_NO_ARGS = KDB_ENABLE_REG_READ + << KDB_ENABLE_NO_ARGS_SHIFT, + KDB_ENABLE_REG_WRITE_NO_ARGS = KDB_ENABLE_REG_WRITE + << KDB_ENABLE_NO_ARGS_SHIFT, + KDB_ENABLE_INSPECT_NO_ARGS = KDB_ENABLE_INSPECT + << KDB_ENABLE_NO_ARGS_SHIFT, + KDB_ENABLE_FLOW_CTRL_NO_ARGS = KDB_ENABLE_FLOW_CTRL + << KDB_ENABLE_NO_ARGS_SHIFT, + KDB_ENABLE_SIGNAL_NO_ARGS = KDB_ENABLE_SIGNAL + << KDB_ENABLE_NO_ARGS_SHIFT, + KDB_ENABLE_REBOOT_NO_ARGS = KDB_ENABLE_REBOOT + << KDB_ENABLE_NO_ARGS_SHIFT, + KDB_ENABLE_ALWAYS_SAFE_NO_ARGS = KDB_ENABLE_ALWAYS_SAFE + << KDB_ENABLE_NO_ARGS_SHIFT, + KDB_ENABLE_MASK_NO_ARGS = KDB_ENABLE_MASK << KDB_ENABLE_NO_ARGS_SHIFT, + + KDB_REPEAT_NO_ARGS = 0x40000000, /* Repeat the command w/o arguments */ + KDB_REPEAT_WITH_ARGS = 0x80000000, /* Repeat the command with args */ } kdb_cmdflags_t; typedef int (*kdb_func_t)(int, const char **); diff --git a/kernel/debug/kdb/kdb_bp.c b/kernel/debug/kdb/kdb_bp.c index f8844fb..e1dbf4a 100644 --- a/kernel/debug/kdb/kdb_bp.c +++ b/kernel/debug/kdb/kdb_bp.c @@ -532,21 +532,28 @@ void __init kdb_initbptab(void) bp->bp_free = 1; kdb_register_flags("bp", kdb_bp, "[]", - "Set/Display breakpoints", 0, KDB_REPEAT_NO_ARGS); + "Set/Display breakpoints", 0, + KDB_ENABLE_FLOW_CTRL | KDB_REPEAT_NO_ARGS); kdb_register_flags("bl", kdb_bp, "[]", - "Display breakpoints", 0, KDB_REPEAT_NO_ARGS); + "Display breakpoints", 0, + KDB_ENABLE_FLOW_CTRL | KDB_REPEAT_NO_ARGS); if (arch_kgdb_ops.flags & KGDB_HW_BREAKPOINT) kdb_register_flags("bph", kdb_bp, "[]", - "[datar [length]|dataw [length]] Set hw brk", 0, KDB_REPEAT_NO_ARGS); + "[datar [length]|dataw [length]] Set hw brk", 0, + KDB_ENABLE_FLOW_CTRL | KDB_REPEAT_NO_ARGS); kdb_register_flags("bc", kdb_bc, "", - "Clear Breakpoint", 0, 0); + "Clear Breakpoint", 0, + KDB_ENABLE_FLOW_CTRL); kdb_register_flags("be", kdb_bc, "", - "Enable Breakpoint", 0, 0); + "Enable Breakpoint", 0, + KDB_ENABLE_FLOW_CTRL); kdb_register_flags("bd", kdb_bc, "", - "Disable Breakpoint", 0, 0); + "Disable Breakpoint", 0, + KDB_ENABLE_FLOW_CTRL); kdb_register_flags("ss", kdb_ss, "", - "Single Step", 1, KDB_REPEAT_NO_ARGS); + "Single Step", 1, + KDB_ENABLE_FLOW_CTRL | KDB_REPEAT_NO_ARGS); /* * Architecture dependent initialization. */ diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c index 538bf1d..fae1fc3 100644 --- a/kernel/debug/kdb/kdb_main.c +++ b/kernel/debug/kdb/kdb_main.c @@ -188,6 +188,26 @@ struct task_struct *kdb_curr_task(int cpu) } /* + * Check whether the flags of the current command and the permissions + * of the kdb console has allow a command to be run. + */ +static inline bool kdb_check_flags(kdb_cmdflags_t flags, int permissions, + bool no_args) +{ + /* permissions comes from userspace so needs massaging slightly */ + permissions &= KDB_ENABLE_MASK; + permissions |= KDB_ENABLE_ALWAYS_SAFE; + + /* some commands change group when launched with no arguments */ + if (no_args) + permissions |= permissions << KDB_ENABLE_NO_ARGS_SHIFT; + + flags |= KDB_ENABLE_ALL; + + return permissions & flags; +} + +/* * kdbgetenv - This function will return the character string value of * an environment variable. * Parameters: @@ -641,8 +661,13 @@ static int kdb_defcmd2(const char *cmdstr, const char *argv0) if (!s->count) s->usable = 0; if (s->usable) - kdb_register(s->name, kdb_exec_defcmd, - s->usage, s->help, 0); + /* macros are always safe because when executed each + * internal command re-enters kdb_parse() and is + * safety checked individually. + */ + kdb_register_flags(s->name, kdb_exec_defcmd, s->usage, + s->help, 0, + KDB_ENABLE_ALWAYS_SAFE); return 0; } if (!s->usable) @@ -2757,78 +2782,107 @@ static void __init kdb_inittab(void) kdb_register_flags("md", kdb_md, "", "Display Memory Contents, also mdWcN, e.g. md8c1", 1, - KDB_REPEAT_NO_ARGS); + KDB_ENABLE_MEM_READ | KDB_REPEAT_NO_ARGS); kdb_register_flags("mdr", kdb_md, " ", - "Display Raw Memory", 0, KDB_REPEAT_NO_ARGS); + "Display Raw Memory", 0, + KDB_ENABLE_MEM_READ | KDB_REPEAT_NO_ARGS); kdb_register_flags("mdp", kdb_md, " ", - "Display Physical Memory", 0, KDB_REPEAT_NO_ARGS); + "Display Physical Memory", 0, + KDB_ENABLE_MEM_READ | KDB_REPEAT_NO_ARGS); kdb_register_flags("mds", kdb_md, "", - "Display Memory Symbolically", 0, KDB_REPEAT_NO_ARGS); + "Display Memory Symbolically", 0, + KDB_ENABLE_MEM_READ | KDB_REPEAT_NO_ARGS); kdb_register_flags("mm", kdb_mm, " ", - "Modify Memory Contents", 0, KDB_REPEAT_NO_ARGS); + "Modify Memory Contents", 0, + KDB_ENABLE_MEM_WRITE | KDB_REPEAT_NO_ARGS); kdb_register_flags("go", kdb_go, "[]", - "Continue Execution", 1, 0); + "Continue Execution", 1, + KDB_ENABLE_REG_WRITE | KDB_ENABLE_ALWAYS_SAFE_NO_ARGS); kdb_register_flags("rd", kdb_rd, "", - "Display Registers", 0, 0); + "Display Registers", 0, + KDB_ENABLE_REG_READ); kdb_register_flags("rm", kdb_rm, " ", - "Modify Registers", 0, 0); + "Modify Registers", 0, + KDB_ENABLE_REG_WRITE); kdb_register_flags("ef", kdb_ef, "", - "Display exception frame", 0, 0); + "Display exception frame", 0, + KDB_ENABLE_MEM_READ); kdb_register_flags("bt", kdb_bt, "[]", - "Stack traceback", 1, 0); + "Stack traceback", 1, + KDB_ENABLE_MEM_READ | KDB_ENABLE_INSPECT_NO_ARGS); kdb_register_flags("btp", kdb_bt, "", - "Display stack for process ", 0, 0); + "Display stack for process ", 0, + KDB_ENABLE_INSPECT); kdb_register_flags("bta", kdb_bt, "[D|R|S|T|C|Z|E|U|I|M|A]", - "Backtrace all processes matching state flag", 0, 0); + "Backtrace all processes matching state flag", 0, + KDB_ENABLE_INSPECT); kdb_register_flags("btc", kdb_bt, "", - "Backtrace current process on each cpu", 0, 0); + "Backtrace current process on each cpu", 0, + KDB_ENABLE_INSPECT); kdb_register_flags("btt", kdb_bt, "", "Backtrace process given its struct task address", 0, - 0); + KDB_ENABLE_MEM_READ | KDB_ENABLE_INSPECT_NO_ARGS); kdb_register_flags("env", kdb_env, "", - "Show environment variables", 0, 0); + "Show environment variables", 0, + KDB_ENABLE_ALWAYS_SAFE); kdb_register_flags("set", kdb_set, "", - "Set environment variables", 0, 0); + "Set environment variables", 0, + KDB_ENABLE_ALWAYS_SAFE); kdb_register_flags("help", kdb_help, "", - "Display Help Message", 1, 0); + "Display Help Message", 1, + KDB_ENABLE_ALWAYS_SAFE); kdb_register_flags("?", kdb_help, "", - "Display Help Message", 0, 0); + "Display Help Message", 0, + KDB_ENABLE_ALWAYS_SAFE); kdb_register_flags("cpu", kdb_cpu, "", - "Switch to new cpu", 0, 0); + "Switch to new cpu", 0, + KDB_ENABLE_ALWAYS_SAFE_NO_ARGS); kdb_register_flags("kgdb", kdb_kgdb, "", "Enter kgdb mode", 0, 0); kdb_register_flags("ps", kdb_ps, "[|A]", - "Display active task list", 0, 0); + "Display active task list", 0, + KDB_ENABLE_INSPECT); kdb_register_flags("pid", kdb_pid, "", - "Switch to another task", 0, 0); + "Switch to another task", 0, + KDB_ENABLE_INSPECT); kdb_register_flags("reboot", kdb_reboot, "", - "Reboot the machine immediately", 0, 0); + "Reboot the machine immediately", 0, + KDB_ENABLE_REBOOT); #if defined(CONFIG_MODULES) kdb_register_flags("lsmod", kdb_lsmod, "", - "List loaded kernel modules", 0, 0); + "List loaded kernel modules", 0, + KDB_ENABLE_INSPECT); #endif #if defined(CONFIG_MAGIC_SYSRQ) kdb_register_flags("sr", kdb_sr, "", - "Magic SysRq key", 0, 0); + "Magic SysRq key", 0, + KDB_ENABLE_ALWAYS_SAFE); #endif #if defined(CONFIG_PRINTK) kdb_register_flags("dmesg", kdb_dmesg, "[lines]", - "Display syslog buffer", 0, 0); + "Display syslog buffer", 0, + KDB_ENABLE_ALWAYS_SAFE); #endif if (arch_kgdb_ops.enable_nmi) { kdb_register_flags("disable_nmi", kdb_disable_nmi, "", - "Disable NMI entry to KDB", 0, 0); + "Disable NMI entry to KDB", 0, + KDB_ENABLE_ALWAYS_SAFE); } kdb_register_flags("defcmd", kdb_defcmd, "name \"usage\" \"help\"", - "Define a set of commands, down to endefcmd", 0, 0); + "Define a set of commands, down to endefcmd", 0, + KDB_ENABLE_ALWAYS_SAFE); kdb_register_flags("kill", kdb_kill, "<-signal> ", - "Send a signal to a process", 0, 0); + "Send a signal to a process", 0, + KDB_ENABLE_SIGNAL); kdb_register_flags("summary", kdb_summary, "", - "Summarize the system", 4, 0); + "Summarize the system", 4, + KDB_ENABLE_ALWAYS_SAFE); kdb_register_flags("per_cpu", kdb_per_cpu, " [] []", - "Display per_cpu variables", 3, 0); + "Display per_cpu variables", 3, + KDB_ENABLE_MEM_READ); kdb_register_flags("grephelp", kdb_grep_help, "", - "Display help on | grep", 0, 0); + "Display help on | grep", 0, + KDB_ENABLE_ALWAYS_SAFE); } /* Execute any commands defined in kdb_cmds. */ diff --git a/kernel/trace/trace_kdb.c b/kernel/trace/trace_kdb.c index 3da7e30..1058f6b 100644 --- a/kernel/trace/trace_kdb.c +++ b/kernel/trace/trace_kdb.c @@ -128,7 +128,7 @@ static int kdb_ftdump(int argc, const char **argv) static __init int kdb_ftrace_register(void) { kdb_register_flags("ftdump", kdb_ftdump, "[skip_#lines] [cpu]", - "Dump ftrace log", 0, 0); + "Dump ftrace log", 0, KDB_ENABLE_ALWAYS_SAFE); return 0; } -- cgit v0.10.2 From 420c2b1b0df84f5956036b5185cc1e11d247817d Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Thu, 6 Nov 2014 14:36:46 +0000 Subject: kdb: Add enable mask for groups of commands Currently all kdb commands are enabled whenever kdb is deployed. This makes it difficult to deploy kdb to help debug certain types of systems. Android phones provide one example; the FIQ debugger found on some Android devices has a deliberately weak set of commands to allow the debugger to enabled very late in the production cycle. Certain kiosk environments offer another interesting case where an engineer might wish to probe the system state using passive inspection commands without providing sufficient power for a passer by to root it. Without any restrictions, obtaining the root rights via KDB is a matter of a few commands, and works everywhere. For example, log in as a normal user: cbou:~$ id uid=1001(cbou) gid=1001(cbou) groups=1001(cbou) Now enter KDB (for example via sysrq): Entering kdb (current=0xffff8800065bc740, pid 920) due to Keyboard Entry kdb> ps 23 sleeping system daemon (state M) processes suppressed, use 'ps A' to see all. Task Addr Pid Parent [*] cpu State Thread Command 0xffff8800065bc740 920 919 1 0 R 0xffff8800065bca20 *bash 0xffff880007078000 1 0 0 0 S 0xffff8800070782e0 init [...snip...] 0xffff8800065be3c0 918 1 0 0 S 0xffff8800065be6a0 getty 0xffff8800065b9c80 919 1 0 0 S 0xffff8800065b9f60 login 0xffff8800065bc740 920 919 1 0 R 0xffff8800065bca20 *bash All we need is the offset of cred pointers. We can look up the offset in the distro's kernel source, but it is unnecessary. We can just start dumping init's task_struct, until we see the process name: kdb> md 0xffff880007078000 0xffff880007078000 0000000000000001 ffff88000703c000 ................ 0xffff880007078010 0040210000000002 0000000000000000 .....!@......... [...snip...] 0xffff8800070782b0 ffff8800073e0580 ffff8800073e0580 ..>.......>..... 0xffff8800070782c0 0000000074696e69 0000000000000000 init............ ^ Here, 'init'. Creds are just above it, so the offset is 0x02b0. Now we set up init's creds for our non-privileged shell: kdb> mm 0xffff8800065bc740+0x02b0 0xffff8800073e0580 0xffff8800065bc9f0 = 0xffff8800073e0580 kdb> mm 0xffff8800065bc740+0x02b8 0xffff8800073e0580 0xffff8800065bc9f8 = 0xffff8800073e0580 And thus gaining the root: kdb> go cbou:~$ id uid=0(root) gid=0(root) groups=0(root) cbou:~$ bash root:~# p.s. No distro enables kdb by default (although, with a nice KDB-over-KMS feature availability, I would expect at least some would enable it), so it's not actually some kind of a major issue. Signed-off-by: Anton Vorontsov Signed-off-by: John Stultz Signed-off-by: Daniel Thompson Cc: Jason Wessel Signed-off-by: Jason Wessel diff --git a/include/linux/kdb.h b/include/linux/kdb.h index f1fe361..75ae2e2 100644 --- a/include/linux/kdb.h +++ b/include/linux/kdb.h @@ -105,6 +105,7 @@ extern atomic_t kdb_event; #define KDB_BADLENGTH (-19) #define KDB_NOBP (-20) #define KDB_BADADDR (-21) +#define KDB_NOPERM (-22) /* * kdb_diemsg diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c index fae1fc3..fe1ac56 100644 --- a/kernel/debug/kdb/kdb_main.c +++ b/kernel/debug/kdb/kdb_main.c @@ -12,6 +12,7 @@ */ #include +#include #include #include #include @@ -23,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -42,6 +44,12 @@ #include #include "kdb_private.h" +#undef MODULE_PARAM_PREFIX +#define MODULE_PARAM_PREFIX "kdb." + +static int kdb_cmd_enabled; +module_param_named(cmd_enable, kdb_cmd_enabled, int, 0600); + #define GREP_LEN 256 char kdb_grep_string[GREP_LEN]; int kdb_grepping_flag; @@ -121,6 +129,7 @@ static kdbmsg_t kdbmsgs[] = { KDBMSG(BADLENGTH, "Invalid length field"), KDBMSG(NOBP, "No Breakpoint exists"), KDBMSG(BADADDR, "Invalid address"), + KDBMSG(NOPERM, "Permission denied"), }; #undef KDBMSG @@ -496,6 +505,15 @@ int kdbgetaddrarg(int argc, const char **argv, int *nextarg, kdb_symtab_t symtab; /* + * If the enable flags prohibit both arbitrary memory access + * and flow control then there are no reasonable grounds to + * provide symbol lookup. + */ + if (!kdb_check_flags(KDB_ENABLE_MEM_READ | KDB_ENABLE_FLOW_CTRL, + kdb_cmd_enabled, false)) + return KDB_NOPERM; + + /* * Process arguments which follow the following syntax: * * symbol | numeric-address [+/- numeric-offset] @@ -1028,6 +1046,10 @@ int kdb_parse(const char *cmdstr) if (i < kdb_max_commands) { int result; + + if (!kdb_check_flags(tp->cmd_flags, kdb_cmd_enabled, argc <= 1)) + return KDB_NOPERM; + KDB_STATE_SET(CMD); result = (*tp->cmd_func)(argc-1, (const char **)argv); if (result && ignore_errors && result > KDB_CMD_GO) @@ -1939,10 +1961,14 @@ static int kdb_rm(int argc, const char **argv) */ static int kdb_sr(int argc, const char **argv) { + bool check_mask = + !kdb_check_flags(KDB_ENABLE_ALL, kdb_cmd_enabled, false); + if (argc != 1) return KDB_ARGCOUNT; + kdb_trap_printk++; - __handle_sysrq(*argv[1], false); + __handle_sysrq(*argv[1], check_mask); kdb_trap_printk--; return 0; @@ -2393,6 +2419,8 @@ static int kdb_help(int argc, const char **argv) return 0; if (!kt->cmd_name) continue; + if (!kdb_check_flags(kt->cmd_flags, kdb_cmd_enabled, true)) + continue; if (strlen(kt->cmd_usage) > 20) space = "\n "; kdb_printf("%-15.15s %-20s%s%s\n", kt->cmd_name, -- cgit v0.10.2 From b8017177cdfd46b0222b3b74b206780f52f22f3d Mon Sep 17 00:00:00 2001 From: Daniel Thompson Date: Thu, 6 Nov 2014 14:36:47 +0000 Subject: kdb: Allow access to sensitive commands to be restricted by default Currently kiosk mode must be explicitly requested by the bootloader or userspace. It is convenient to be able to change the default value in a similar manner to CONFIG_MAGIC_SYSRQ_DEFAULT_MASK. Signed-off-by: Daniel Thompson Cc: Jason Wessel Signed-off-by: Jason Wessel diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c index fe1ac56..8d84979 100644 --- a/kernel/debug/kdb/kdb_main.c +++ b/kernel/debug/kdb/kdb_main.c @@ -47,7 +47,7 @@ #undef MODULE_PARAM_PREFIX #define MODULE_PARAM_PREFIX "kdb." -static int kdb_cmd_enabled; +static int kdb_cmd_enabled = CONFIG_KDB_DEFAULT_ENABLE; module_param_named(cmd_enable, kdb_cmd_enabled, int, 0600); #define GREP_LEN 256 diff --git a/lib/Kconfig.kgdb b/lib/Kconfig.kgdb index 358eb81..c635a10 100644 --- a/lib/Kconfig.kgdb +++ b/lib/Kconfig.kgdb @@ -73,6 +73,31 @@ config KGDB_KDB help KDB frontend for kernel +config KDB_DEFAULT_ENABLE + hex "KDB: Select kdb command functions to be enabled by default" + depends on KGDB_KDB + default 0x1 + help + Specifiers which kdb commands are enabled by default. This may + be set to 1 or 0 to enable all commands or disable almost all + commands. + + Alternatively the following bitmask applies: + + 0x0002 - allow arbitrary reads from memory and symbol lookup + 0x0004 - allow arbitrary writes to memory + 0x0008 - allow current register state to be inspected + 0x0010 - allow current register state to be modified + 0x0020 - allow passive inspection (backtrace, process list, lsmod) + 0x0040 - allow flow control management (breakpoint, single step) + 0x0080 - enable signalling of processes + 0x0100 - allow machine to be rebooted + + The config option merely sets the default at boot time. Both + issuing 'echo X > /sys/module/kdb/parameters/cmd_enable' or + setting with kdb.cmd_enable=X kernel command line option will + override the default settings. + config KDB_KEYBOARD bool "KGDB_KDB: keyboard as input device" depends on VT && KGDB_KDB -- cgit v0.10.2 From a1465d2f396e416a0049332b20fca5977384b9f5 Mon Sep 17 00:00:00 2001 From: Daniel Thompson Date: Tue, 11 Nov 2014 09:31:53 -0600 Subject: kgdb: timeout if secondary CPUs ignore the roundup Currently if an active CPU fails to respond to a roundup request the CPU that requested the roundup will become stuck. This needlessly reduces the robustness of the debugger. This patch introduces a timeout allowing the system state to be examined even when the system contains unresponsive processors. It also modifies kdb's cpu command to make it censor attempts to switch to unresponsive processors and to report their state as (D)ead. Signed-off-by: Daniel Thompson Cc: Jason Wessel Signed-off-by: Andrew Morton Signed-off-by: Jason Wessel diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c index 1adf62b..acd7497 100644 --- a/kernel/debug/debug_core.c +++ b/kernel/debug/debug_core.c @@ -471,6 +471,7 @@ static int kgdb_cpu_enter(struct kgdb_state *ks, struct pt_regs *regs, int cpu; int trace_on = 0; int online_cpus = num_online_cpus(); + u64 time_left; kgdb_info[ks->cpu].enter_kgdb++; kgdb_info[ks->cpu].exception_state |= exception_state; @@ -595,9 +596,13 @@ return_normal: /* * Wait for the other CPUs to be notified and be waiting for us: */ - while (kgdb_do_roundup && (atomic_read(&masters_in_kgdb) + - atomic_read(&slaves_in_kgdb)) != online_cpus) + time_left = loops_per_jiffy * HZ; + while (kgdb_do_roundup && --time_left && + (atomic_read(&masters_in_kgdb) + atomic_read(&slaves_in_kgdb)) != + online_cpus) cpu_relax(); + if (!time_left) + pr_crit("KGDB: Timed out waiting for secondary CPUs.\n"); /* * At this point the primary processor is completely diff --git a/kernel/debug/kdb/kdb_debugger.c b/kernel/debug/kdb/kdb_debugger.c index 8859ca3..15e1a7a 100644 --- a/kernel/debug/kdb/kdb_debugger.c +++ b/kernel/debug/kdb/kdb_debugger.c @@ -129,6 +129,10 @@ int kdb_stub(struct kgdb_state *ks) ks->pass_exception = 1; KDB_FLAG_SET(CATASTROPHIC); } + /* set CATASTROPHIC if the system contains unresponsive processors */ + for_each_online_cpu(i) + if (!kgdb_info[i].enter_kgdb) + KDB_FLAG_SET(CATASTROPHIC); if (KDB_STATE(SSBPT) && reason == KDB_REASON_SSTEP) { KDB_STATE_CLEAR(SSBPT); KDB_STATE_CLEAR(DOING_SS); diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c index 8d84979..f191bdd 100644 --- a/kernel/debug/kdb/kdb_main.c +++ b/kernel/debug/kdb/kdb_main.c @@ -2201,6 +2201,8 @@ static void kdb_cpu_status(void) for (start_cpu = -1, i = 0; i < NR_CPUS; i++) { if (!cpu_online(i)) { state = 'F'; /* cpu is offline */ + } else if (!kgdb_info[i].enter_kgdb) { + state = 'D'; /* cpu is online but unresponsive */ } else { state = ' '; /* cpu is responding to kdb */ if (kdb_task_state_char(KDB_TSK(i)) == 'I') @@ -2254,7 +2256,7 @@ static int kdb_cpu(int argc, const char **argv) /* * Validate cpunum */ - if ((cpunum > NR_CPUS) || !cpu_online(cpunum)) + if ((cpunum > NR_CPUS) || !kgdb_info[cpunum].enter_kgdb) return KDB_BADCPUNUM; dbg_switch_cpu = cpunum; -- cgit v0.10.2 From 0f16996cf2ed7c368dd95b4c517ce572b96a10f5 Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Thu, 12 Jun 2014 21:30:11 +0200 Subject: kernel/debug/debug_core.c: Logging clean-up -Convert printk( to pr_foo() -Add pr_fmt -Coalesce formats Cc: Jason Wessel Cc: Andrew Morton Cc: Joe Perches Signed-off-by: Fabian Frederick Signed-off-by: Jason Wessel diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c index acd7497..07ce18c 100644 --- a/kernel/debug/debug_core.c +++ b/kernel/debug/debug_core.c @@ -27,6 +27,9 @@ * version 2. This program is licensed "as is" without any warranty of any * kind, whether express or implied. */ + +#define pr_fmt(fmt) "KGDB: " fmt + #include #include #include @@ -196,8 +199,8 @@ int __weak kgdb_validate_break_address(unsigned long addr) return err; err = kgdb_arch_remove_breakpoint(&tmp); if (err) - printk(KERN_ERR "KGDB: Critical breakpoint error, kernel " - "memory destroyed at: %lx", addr); + pr_err("Critical breakpoint error, kernel memory destroyed at: %lx\n", + addr); return err; } @@ -256,8 +259,8 @@ int dbg_activate_sw_breakpoints(void) error = kgdb_arch_set_breakpoint(&kgdb_break[i]); if (error) { ret = error; - printk(KERN_INFO "KGDB: BP install failed: %lx", - kgdb_break[i].bpt_addr); + pr_info("BP install failed: %lx\n", + kgdb_break[i].bpt_addr); continue; } @@ -319,8 +322,8 @@ int dbg_deactivate_sw_breakpoints(void) continue; error = kgdb_arch_remove_breakpoint(&kgdb_break[i]); if (error) { - printk(KERN_INFO "KGDB: BP remove failed: %lx\n", - kgdb_break[i].bpt_addr); + pr_info("BP remove failed: %lx\n", + kgdb_break[i].bpt_addr); ret = error; } @@ -367,7 +370,7 @@ int dbg_remove_all_break(void) goto setundefined; error = kgdb_arch_remove_breakpoint(&kgdb_break[i]); if (error) - printk(KERN_ERR "KGDB: breakpoint remove failed: %lx\n", + pr_err("breakpoint remove failed: %lx\n", kgdb_break[i].bpt_addr); setundefined: kgdb_break[i].state = BP_UNDEFINED; @@ -400,9 +403,9 @@ static int kgdb_io_ready(int print_wait) if (print_wait) { #ifdef CONFIG_KGDB_KDB if (!dbg_kdb_mode) - printk(KERN_CRIT "KGDB: waiting... or $3#33 for KDB\n"); + pr_crit("waiting... or $3#33 for KDB\n"); #else - printk(KERN_CRIT "KGDB: Waiting for remote debugger\n"); + pr_crit("Waiting for remote debugger\n"); #endif } return 1; @@ -430,8 +433,7 @@ static int kgdb_reenter_check(struct kgdb_state *ks) exception_level = 0; kgdb_skipexception(ks->ex_vector, ks->linux_regs); dbg_activate_sw_breakpoints(); - printk(KERN_CRIT "KGDB: re-enter error: breakpoint removed %lx\n", - addr); + pr_crit("re-enter error: breakpoint removed %lx\n", addr); WARN_ON_ONCE(1); return 1; @@ -444,7 +446,7 @@ static int kgdb_reenter_check(struct kgdb_state *ks) panic("Recursive entry to debugger"); } - printk(KERN_CRIT "KGDB: re-enter exception: ALL breakpoints killed\n"); + pr_crit("re-enter exception: ALL breakpoints killed\n"); #ifdef CONFIG_KGDB_KDB /* Allow kdb to debug itself one level */ return 0; @@ -800,15 +802,15 @@ static struct console kgdbcons = { static void sysrq_handle_dbg(int key) { if (!dbg_io_ops) { - printk(KERN_CRIT "ERROR: No KGDB I/O module available\n"); + pr_crit("ERROR: No KGDB I/O module available\n"); return; } if (!kgdb_connected) { #ifdef CONFIG_KGDB_KDB if (!dbg_kdb_mode) - printk(KERN_CRIT "KGDB or $3#33 for KDB\n"); + pr_crit("KGDB or $3#33 for KDB\n"); #else - printk(KERN_CRIT "Entering KGDB\n"); + pr_crit("Entering KGDB\n"); #endif } @@ -950,7 +952,7 @@ static void kgdb_initial_breakpoint(void) { kgdb_break_asap = 0; - printk(KERN_CRIT "kgdb: Waiting for connection from remote gdb...\n"); + pr_crit("Waiting for connection from remote gdb...\n"); kgdb_breakpoint(); } @@ -969,8 +971,7 @@ int kgdb_register_io_module(struct kgdb_io *new_dbg_io_ops) if (dbg_io_ops) { spin_unlock(&kgdb_registration_lock); - printk(KERN_ERR "kgdb: Another I/O driver is already " - "registered with KGDB.\n"); + pr_err("Another I/O driver is already registered with KGDB\n"); return -EBUSY; } @@ -986,8 +987,7 @@ int kgdb_register_io_module(struct kgdb_io *new_dbg_io_ops) spin_unlock(&kgdb_registration_lock); - printk(KERN_INFO "kgdb: Registered I/O driver %s.\n", - new_dbg_io_ops->name); + pr_info("Registered I/O driver %s\n", new_dbg_io_ops->name); /* Arm KGDB now. */ kgdb_register_callbacks(); @@ -1022,8 +1022,7 @@ void kgdb_unregister_io_module(struct kgdb_io *old_dbg_io_ops) spin_unlock(&kgdb_registration_lock); - printk(KERN_INFO - "kgdb: Unregistered I/O driver %s, debugger disabled.\n", + pr_info("Unregistered I/O driver %s, debugger disabled\n", old_dbg_io_ops->name); } EXPORT_SYMBOL_GPL(kgdb_unregister_io_module); -- cgit v0.10.2 From b6ffbab813cfc535db5c3815b4eb16dd7d97197c Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Sun, 7 Dec 2014 22:27:24 +0200 Subject: amdkfd: Fix accounting of device queues This patch fixes a device QCM bug, where the number of queues were not counted correctly for the operation of update queue. The count was incorrect as there was no regard to the previous state of the queue. Signed-off-by: Oded Gabbay diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 924e90c..f44d673 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -320,6 +320,7 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q) { int retval; struct mqd_manager *mqd; + bool prev_active = false; BUG_ON(!dqm || !q || !q->mqd); @@ -330,10 +331,18 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q) return -ENOMEM; } - retval = mqd->update_mqd(mqd, q->mqd, &q->properties); if (q->properties.is_active == true) + prev_active = true; + + /* + * + * check active state vs. the previous state + * and modify counter accordingly + */ + retval = mqd->update_mqd(mqd, q->mqd, &q->properties); + if ((q->properties.is_active == true) && (prev_active == false)) dqm->queue_count++; - else + else if ((q->properties.is_active == false) && (prev_active == true)) dqm->queue_count--; if (sched_policy != KFD_SCHED_POLICY_NO_HWS) -- cgit v0.10.2 From 8dfead6c2836621bd878626ae841fc55259b2523 Mon Sep 17 00:00:00 2001 From: Ben Goz Date: Tue, 2 Dec 2014 16:41:08 +0200 Subject: amdkfd: Fixing topology bug in building sysfs nodes Original code sent always 0 as the index number of the node. This patch fixes this bug by sending a variable which is incremented per node. Signed-off-by: Ben Goz Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index b11792d..cca1708 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -921,7 +921,7 @@ static int kfd_build_sysfs_node_tree(void) uint32_t i = 0; list_for_each_entry(dev, &topology_device_list, list) { - ret = kfd_build_sysfs_node_entry(dev, 0); + ret = kfd_build_sysfs_node_entry(dev, i); if (ret < 0) return ret; i++; -- cgit v0.10.2 From 0cb989c0c6e62f235b14e48604f909085494026b Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Fri, 5 Dec 2014 10:40:34 +0200 Subject: amdkfd: Remove duplicate include Signed-off-by: Oded Gabbay diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 7d4974b..fe5c543 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -31,7 +31,6 @@ #include #include #include -#include #include #include #include "kfd_priv.h" -- cgit v0.10.2 From b64b8afcca9dea38cfde090af76ea935627ce1d5 Mon Sep 17 00:00:00 2001 From: Ben Goz Date: Tue, 9 Dec 2014 12:00:09 +0200 Subject: drm/amd: Fixing typos in kfd<->kgd interface Signed-off-by: Ben Goz Signed-off-by: Oded Gabbay diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c index adc3147..4c3828c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c @@ -184,7 +184,7 @@ static bool is_occupied(struct mqd_manager *mm, void *mqd, uint32_t queue_id) { - return kfd2kgd->hqd_is_occupies(mm->dev->kgd, queue_address, + return kfd2kgd->hqd_is_occupied(mm->dev->kgd, queue_address, pipe_id, queue_id); } diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h index 47b5519..96a5122 100644 --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h @@ -183,7 +183,7 @@ struct kfd2kgd_calls { int (*hqd_load)(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, uint32_t queue_id, uint32_t __user *wptr); - bool (*hqd_is_occupies)(struct kgd_dev *kgd, uint64_t queue_address, + bool (*hqd_is_occupied)(struct kgd_dev *kgd, uint64_t queue_address, uint32_t pipe_id, uint32_t queue_id); int (*hqd_destroy)(struct kgd_dev *kgd, uint32_t reset_type, diff --git a/drivers/gpu/drm/radeon/radeon_kfd.c b/drivers/gpu/drm/radeon/radeon_kfd.c index d3e78b4..12c43df 100644 --- a/drivers/gpu/drm/radeon/radeon_kfd.c +++ b/drivers/gpu/drm/radeon/radeon_kfd.c @@ -72,7 +72,7 @@ static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, uint32_t queue_id, uint32_t __user *wptr); -static bool kgd_hqd_is_occupies(struct kgd_dev *kgd, uint64_t queue_address, +static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, uint32_t pipe_id, uint32_t queue_id); static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type, @@ -92,7 +92,7 @@ static const struct kfd2kgd_calls kfd2kgd = { .init_memory = kgd_init_memory, .init_pipeline = kgd_init_pipeline, .hqd_load = kgd_hqd_load, - .hqd_is_occupies = kgd_hqd_is_occupies, + .hqd_is_occupied = kgd_hqd_is_occupied, .hqd_destroy = kgd_hqd_destroy, .get_fw_version = get_fw_version }; @@ -529,7 +529,7 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, return 0; } -static bool kgd_hqd_is_occupies(struct kgd_dev *kgd, uint64_t queue_address, +static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, uint32_t pipe_id, uint32_t queue_id) { uint32_t act; -- cgit v0.10.2 From f254ae938ea479739572790a4e9b0ca86d16249f Mon Sep 17 00:00:00 2001 From: Peter Wu Date: Tue, 16 Dec 2014 16:55:21 +0100 Subject: HID: logitech-dj: check report length Malicious USB devices can send bogus reports smaller than the expected buffer size. Ensure that the length is valid to avoid reading out of bounds. Signed-off-by: Peter Wu Reviewed-by: Benjamin Tissoires Signed-off-by: Jiri Kosina diff --git a/drivers/hid/hid-logitech-dj.c b/drivers/hid/hid-logitech-dj.c index c917ab6..5bc6d80 100644 --- a/drivers/hid/hid-logitech-dj.c +++ b/drivers/hid/hid-logitech-dj.c @@ -962,10 +962,24 @@ static int logi_dj_raw_event(struct hid_device *hdev, switch (data[0]) { case REPORT_ID_DJ_SHORT: + if (size != DJREPORT_SHORT_LENGTH) { + dev_err(&hdev->dev, "DJ report of bad size (%d)", size); + return false; + } return logi_dj_dj_event(hdev, report, data, size); case REPORT_ID_HIDPP_SHORT: - /* intentional fallthrough */ + if (size != HIDPP_REPORT_SHORT_LENGTH) { + dev_err(&hdev->dev, + "Short HID++ report of bad size (%d)", size); + return false; + } + return logi_dj_hidpp_event(hdev, report, data, size); case REPORT_ID_HIDPP_LONG: + if (size != HIDPP_REPORT_LONG_LENGTH) { + dev_err(&hdev->dev, + "Long HID++ report of bad size (%d)", size); + return false; + } return logi_dj_hidpp_event(hdev, report, data, size); } -- cgit v0.10.2 From 0b3f6569a560aa68c9c50cae0e1bc401f7ee699f Mon Sep 17 00:00:00 2001 From: Peter Wu Date: Tue, 16 Dec 2014 16:55:22 +0100 Subject: HID: logitech-hidpp: check WTP report length Malicious USB devices can send bogus reports smaller than the expected buffer size. Ensure that the length for WTP reports is valid to avoid reading out of bounds. Signed-off-by: Peter Wu Reviewed-by: Benjamin Tissoires Signed-off-by: Jiri Kosina diff --git a/drivers/hid/hid-logitech-hidpp.c b/drivers/hid/hid-logitech-hidpp.c index 2f420c0..dd3c21b 100644 --- a/drivers/hid/hid-logitech-hidpp.c +++ b/drivers/hid/hid-logitech-hidpp.c @@ -794,6 +794,11 @@ static int wtp_raw_event(struct hid_device *hdev, u8 *data, int size) switch (data[0]) { case 0x02: + if (size < 2) { + hid_err(hdev, "Received HID report of bad size (%d)", + size); + return 1; + } if (hidpp->quirks & HIDPP_QUIRK_WTP_PHYSICAL_BUTTONS) { input_event(wd->input, EV_KEY, BTN_LEFT, !!(data[1] & 0x01)); @@ -806,6 +811,7 @@ static int wtp_raw_event(struct hid_device *hdev, u8 *data, int size) return wtp_mouse_raw_xy_event(hidpp, &data[7]); } case REPORT_ID_HIDPP_LONG: + /* size is already checked in hidpp_raw_event. */ if ((report->fap.feature_index != wd->mt_feature_index) || (report->fap.funcindex_clientid != EVENT_TOUCHPAD_RAW_XY)) return 1; -- cgit v0.10.2 From da940db41dcf8c04166f711646df2f35376010aa Mon Sep 17 00:00:00 2001 From: Karl Relton Date: Tue, 16 Dec 2014 15:37:22 +0000 Subject: HID: add battery quirk for USB_DEVICE_ID_APPLE_ALU_WIRELESS_2011_ISO keyboard Apple bluetooth wireless keyboard (sold in UK) has always reported zero for battery strength no matter what condition the batteries are actually in. With this patch applied (applying same quirk as other Apple keyboards), the battery strength is now correctly reported. Signed-off-by: Karl Relton Signed-off-by: Jiri Kosina diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c index e0a0f06..9505605 100644 --- a/drivers/hid/hid-input.c +++ b/drivers/hid/hid-input.c @@ -312,6 +312,9 @@ static const struct hid_device_id hid_battery_quirks[] = { USB_DEVICE_ID_APPLE_ALU_WIRELESS_2011_ANSI), HID_BATTERY_QUIRK_PERCENT | HID_BATTERY_QUIRK_FEATURE }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, + USB_DEVICE_ID_APPLE_ALU_WIRELESS_2011_ISO), + HID_BATTERY_QUIRK_PERCENT | HID_BATTERY_QUIRK_FEATURE }, + { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_ANSI), HID_BATTERY_QUIRK_PERCENT | HID_BATTERY_QUIRK_FEATURE }, {} -- cgit v0.10.2 From 5b44c53aeb791757072be4a267255cedfff594fd Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Fri, 12 Dec 2014 14:01:49 +0200 Subject: HID: i2c-hid: Do not free buffers in i2c_hid_stop() When a hid driver that uses i2c-hid as transport is unloaded, the hid core will call i2c_hid_stop() which releases all the buffers associated with the device. This includes also the command buffer. Now, when the i2c-hid driver itself is unloaded it tries to power down the device by sending it PWR_SLEEP command. Since the command buffer is already released we get following crash: [ 79.691459] BUG: unable to handle kernel NULL pointer dereference at (null) [ 79.691532] IP: [] __i2c_hid_command+0x49/0x310 [i2c_hid] ... [ 79.693467] Call Trace: [ 79.693494] [] ? __unmask_ioapic+0x21/0x30 [ 79.693537] [] ? unmask_ioapic+0x25/0x40 [ 79.693581] [] ? i2c_hid_set_power+0x4b/0xa0 [i2c_hid] [ 79.693632] [] ? i2c_hid_runtime_resume+0x1f/0x30 [i2c_hid] [ 79.693689] [] ? __rpm_callback+0x2b/0x70 [ 79.693733] [] ? rpm_callback+0x21/0x90 [ 79.693776] [] ? rpm_resume+0x41c/0x600 [ 79.693820] [] ? __pm_runtime_resume+0x4c/0x80 [ 79.693868] [] ? __device_release_driver+0x28/0x100 [ 79.693917] [] ? driver_detach+0xa0/0xb0 [ 79.693959] [] ? bus_remove_driver+0x4c/0xb0 [ 79.694006] [] ? SyS_delete_module+0x11d/0x1d0 [ 79.694054] [] ? int_signal+0x12/0x17 [ 79.694095] [] ? system_call_fastpath+0x12/0x17 Fix this so that we only free buffers when the i2c-hid driver itself is removed. Fixes: 34f439e4afcd ("HID: i2c-hid: add runtime PM support") Reported-by: Gabriele Mazzotta Signed-off-by: Mika Westerberg Reviewed-by: Benjamin Tissoires Signed-off-by: Jiri Kosina diff --git a/drivers/hid/i2c-hid/i2c-hid.c b/drivers/hid/i2c-hid/i2c-hid.c index d32037c..d43e967 100644 --- a/drivers/hid/i2c-hid/i2c-hid.c +++ b/drivers/hid/i2c-hid/i2c-hid.c @@ -706,12 +706,7 @@ static int i2c_hid_start(struct hid_device *hid) static void i2c_hid_stop(struct hid_device *hid) { - struct i2c_client *client = hid->driver_data; - struct i2c_hid *ihid = i2c_get_clientdata(client); - hid->claimed = 0; - - i2c_hid_free_buffers(ihid); } static int i2c_hid_open(struct hid_device *hid) -- cgit v0.10.2 From 030e416b4f7782b22e8985095be0ea641aa20bf8 Mon Sep 17 00:00:00 2001 From: Ben Goz Date: Sun, 4 Jan 2015 21:46:44 +0200 Subject: drm/amdkfd: Load mqd to hqd in non-HWS mode This patch fixes a bug in DQM, where the MQD of a newly created compute queue is not loaded to an HQD slot. As a result, the CP never reads packets from this queue. This bug happens only in non-HWS (hardware scheduling) mode. In HWS mode, the CP is responsible of loading MQDs to HQDs slots. Signed-off-by: Ben Goz Signed-off-by: Oded Gabbay Acked-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index f44d673..3b08ed6 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -272,6 +272,18 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm, return retval; } + pr_debug("kfd: loading mqd to hqd on pipe (%d) queue (%d)\n", + q->pipe, + q->queue); + + retval = mqd->load_mqd(mqd, q->mqd, q->pipe, + q->queue, q->properties.write_ptr); + if (retval != 0) { + deallocate_hqd(dqm, q); + mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj); + return retval; + } + return 0; } -- cgit v0.10.2 From 4c18442e536ec24cc5ca23c5bbaa80bd53020b95 Mon Sep 17 00:00:00 2001 From: Alexey Khoroshilov Date: Sun, 4 Jan 2015 02:31:20 +0300 Subject: drm/radeon: do not leave queue acquired if timeout happens in kgd_hqd_destroy() If timeout happens, kgd_hqd_destroy() just returns -ETIME leaving queue acquired. It may cause a deadlock, so the patch proposes to release queue before return. Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Alexey Khoroshilov Signed-off-by: Oded Gabbay diff --git a/drivers/gpu/drm/radeon/radeon_kfd.c b/drivers/gpu/drm/radeon/radeon_kfd.c index 12c43df..a55afba 100644 --- a/drivers/gpu/drm/radeon/radeon_kfd.c +++ b/drivers/gpu/drm/radeon/radeon_kfd.c @@ -568,6 +568,7 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type, if (timeout == 0) { pr_err("kfd: cp queue preemption time out (%dms)\n", temp); + release_queue(kgd); return -ETIME; } msleep(20); -- cgit v0.10.2 From fec77bb50fd0b94cb502cd3745e8b5c4a6e9c404 Mon Sep 17 00:00:00 2001 From: Ben Goz Date: Wed, 17 Dec 2014 14:09:10 +0200 Subject: drm/radeon: Assign VMID to PASID for IH in non-HWS mode This patch fixes a bug in kgd_set_pasid_vmid_mapping(), where the function only updated the ATC registers (IOMMU) with the new VMID <--> PASID mapping, but didn't update the IH (Interrupt) registers. The bug only occurs when using non-HWS mode. In HWS mode, the CP automatically does the VMID <--> PASID mapping. Signed-off-by: Ben Goz Signed-off-by: Oded Gabbay Acked-by: Alex Deucher diff --git a/drivers/gpu/drm/radeon/cikd.h b/drivers/gpu/drm/radeon/cikd.h index ba85986..03003f8 100644 --- a/drivers/gpu/drm/radeon/cikd.h +++ b/drivers/gpu/drm/radeon/cikd.h @@ -2156,4 +2156,6 @@ #define ATC_VM_APERTURE1_HIGH_ADDR 0x330Cu #define ATC_VM_APERTURE1_LOW_ADDR 0x3304u +#define IH_VMID_0_LUT 0x3D40u + #endif diff --git a/drivers/gpu/drm/radeon/radeon_kfd.c b/drivers/gpu/drm/radeon/radeon_kfd.c index a55afba..8bf87f1 100644 --- a/drivers/gpu/drm/radeon/radeon_kfd.c +++ b/drivers/gpu/drm/radeon/radeon_kfd.c @@ -390,6 +390,10 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, cpu_relax(); write_register(kgd, ATC_VMID_PASID_MAPPING_UPDATE_STATUS, 1U << vmid); + /* Mapping vmid to pasid also for IH block */ + write_register(kgd, IH_VMID_0_LUT + vmid * sizeof(uint32_t), + pasid_mapping); + return 0; } -- cgit v0.10.2 From 060c998251728a7d183092da5e8a318398e56e76 Mon Sep 17 00:00:00 2001 From: Daniel Nicoletti Date: Wed, 17 Dec 2014 11:53:54 -0200 Subject: HID: Allow HID_BATTERY_STRENGTH to be enabled Allows for HID_BATTERY_STRENGTH config be enabled without the need for HID module to be built-in, prior to this HID and POWER_SUPPLY had to be equal, and now we only select POWER_SUPPLY and depend on the HID module. Signed-off-by: Daniel Nicoletti Signed-off-by: Jiri Kosina diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig index 230b6f8..dfdc269 100644 --- a/drivers/hid/Kconfig +++ b/drivers/hid/Kconfig @@ -27,7 +27,8 @@ if HID config HID_BATTERY_STRENGTH bool "Battery level reporting for HID devices" - depends on HID && POWER_SUPPLY && HID = POWER_SUPPLY + depends on HID + select POWER_SUPPLY default n ---help--- This option adds support of reporting battery strength (for HID devices -- cgit v0.10.2 From 7f1241ed1a06b4846ad7a2a57eb088b757e58e16 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 18 Dec 2014 11:44:06 +0200 Subject: drm/i915: Kill check_power_well() calls MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit pps_{lock,unlock}() call intel_display_power_{get,put}() outside pps_mutes to avoid deadlocks with the power_domain mutex. In theory during aux transfers we should usually have the relevant power domain references already held by some higher level code, so this should not result in much overhead (exception being userspace i2c-dev access). However thanks to the check_power_well() calls in intel_display_power_{get/put}() we end up doing a few Punit reads for each aux transfer. Obviously doing this for each byte transferred via i2c-over-aux is not a good idea. I can't think of a good way to keep check_power_well() while eliminating the overhead, so let's just remove check_power_well() entirely. Fixes a driver init time regression introduced by: commit 773538e86081d146e0020435d614f4b96996c1f9 Author: Ville Syrjälä Date: Thu Sep 4 14:54:56 2014 +0300 drm/i915: Reset power sequencer pipe tracking when disp2d is off Credit goes to Jani for figuring this out. v2: Add the regression note in the commit message. Cc: stable@vger.kernel.org (v3.18+) Cc: Egbert Eich Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=86201 Tested-by: Wendy Wang Signed-off-by: Ville Syrjälä Signed-off-by: Jani Nikula diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index f5a78d5..ac6da71 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -615,29 +615,6 @@ static void chv_pipe_power_well_disable(struct drm_i915_private *dev_priv, vlv_power_sequencer_reset(dev_priv); } -static void check_power_well_state(struct drm_i915_private *dev_priv, - struct i915_power_well *power_well) -{ - bool enabled = power_well->ops->is_enabled(dev_priv, power_well); - - if (power_well->always_on || !i915.disable_power_well) { - if (!enabled) - goto mismatch; - - return; - } - - if (enabled != (power_well->count > 0)) - goto mismatch; - - return; - -mismatch: - WARN(1, "state mismatch for '%s' (always_on %d hw state %d use-count %d disable_power_well %d\n", - power_well->name, power_well->always_on, enabled, - power_well->count, i915.disable_power_well); -} - /** * intel_display_power_get - grab a power domain reference * @dev_priv: i915 device instance @@ -669,8 +646,6 @@ void intel_display_power_get(struct drm_i915_private *dev_priv, power_well->ops->enable(dev_priv, power_well); power_well->hw_enabled = true; } - - check_power_well_state(dev_priv, power_well); } power_domains->domain_use_count[domain]++; @@ -709,8 +684,6 @@ void intel_display_power_put(struct drm_i915_private *dev_priv, power_well->hw_enabled = false; power_well->ops->disable(dev_priv, power_well); } - - check_power_well_state(dev_priv, power_well); } mutex_unlock(&power_domains->lock); -- cgit v0.10.2 From 7d47559ee84b3ac206aa9e675606fafcd7c0b500 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 17 Dec 2014 23:08:03 +0200 Subject: drm/i915: Don't call intel_prepare_page_flip() multiple times on gen2-4 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The flip stall detector kicks in when pending>=INTEL_FLIP_COMPLETE. That means if we first call intel_prepare_page_flip() but don't call intel_finish_page_flip(), the next stall check will erroneosly think the page flip was somehow stuck. With enough debug spew emitted from the interrupt handler my 830 hangs when this happens. My theory is that the previous vblank interrupt gets sufficiently delayed that the handler will see the pending bit set in IIR, but ISR still has the bit set as well (ie. the flip was processed by CS but didn't complete yet). In this case the handler will proceed to call intel_check_page_flip() immediately after intel_prepare_page_flip(). It then tries to print a backtrace for the stuck flip WARN, which apparetly results in way too much debug spew delaying interrupt processing further. That then seems to cause an endless loop in the interrupt handler, and the machine is dead until the watchdog kicks in and reboots. At least limiting the number of iterations of the loop in the interrupt handler also prevented the hang. So it seems better to not call intel_prepare_page_flip() without immediately calling intel_finish_page_flip(). The IIR/ISR trickery avoids races here so this is a perfectly safe thing to do. v2: Fix typo in commit message (checkpatch) Cc: stable@vger.kernel.org Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=88381 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=85888 Reviewed-by: Chris Wilson Signed-off-by: Ville Syrjälä Signed-off-by: Jani Nikula diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 996c293..d0d3dfb 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -3725,8 +3725,6 @@ static bool i8xx_handle_vblank(struct drm_device *dev, if ((iir & flip_pending) == 0) goto check_page_flip; - intel_prepare_page_flip(dev, plane); - /* We detect FlipDone by looking for the change in PendingFlip from '1' * to '0' on the following vblank, i.e. IIR has the Pendingflip * asserted following the MI_DISPLAY_FLIP, but ISR is deasserted, hence @@ -3736,6 +3734,7 @@ static bool i8xx_handle_vblank(struct drm_device *dev, if (I915_READ16(ISR) & flip_pending) goto check_page_flip; + intel_prepare_page_flip(dev, plane); intel_finish_page_flip(dev, pipe); return true; @@ -3907,8 +3906,6 @@ static bool i915_handle_vblank(struct drm_device *dev, if ((iir & flip_pending) == 0) goto check_page_flip; - intel_prepare_page_flip(dev, plane); - /* We detect FlipDone by looking for the change in PendingFlip from '1' * to '0' on the following vblank, i.e. IIR has the Pendingflip * asserted following the MI_DISPLAY_FLIP, but ISR is deasserted, hence @@ -3918,6 +3915,7 @@ static bool i915_handle_vblank(struct drm_device *dev, if (I915_READ(ISR) & flip_pending) goto check_page_flip; + intel_prepare_page_flip(dev, plane); intel_finish_page_flip(dev, pipe); return true; -- cgit v0.10.2 From 01f5a6261cea395f72877aeb7c2fe2d42e1b1e00 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 16 Dec 2014 18:38:37 +0200 Subject: Revert "drm/i915: Preserve VGACNTR bits from the BIOS" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The VGA_2X_MODE bit apparently affects the display even when the VGA plane is disabled. The bit will set by the BIOS when the panel width is at least 1280 pixels. So by preserving the bit from the BIOS we end up with corrupted display on machines with such high res panels. I only have 1024x768 panels on my gen2 machines so never ran into this problem. The original reason for preserving the VGACNTR register was to make my 830 survive S3 with acpi_sleep=s3_bios option. However after further 830 fixes that option is no longer needed to make S3 work and preserving VGACNTR doesn't seem to be necessary without it, so we can just revert the entire patch. This reverts commit 69769f9a422bfc62e17399da3590c5e31ac37f24 Author: Ville Syrjälä Date: Fri Aug 15 01:22:08 2014 +0300 drm/i915: Preserve VGACNTR bits from the BIOS Cc: Bruno Prémont Cc: stable@vger.kernel.org Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=87171 Signed-off-by: Ville Syrjälä Signed-off-by: Jani Nikula diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 63bcda5..3c710bb 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1756,8 +1756,6 @@ struct drm_i915_private { */ struct workqueue_struct *dp_wq; - uint32_t bios_vgacntr; - /* Abstract the submission mechanism (legacy ringbuffer or execlists) away */ struct { int (*do_execbuf)(struct drm_device *dev, struct drm_file *file, diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 03d0b0c..97c5513 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -13060,11 +13060,7 @@ static void i915_disable_vga(struct drm_device *dev) vga_put(dev->pdev, VGA_RSRC_LEGACY_IO); udelay(300); - /* - * Fujitsu-Siemens Lifebook S6010 (830) has problems resuming - * from S3 without preserving (some of?) the other bits. - */ - I915_WRITE(vga_reg, dev_priv->bios_vgacntr | VGA_DISP_DISABLE); + I915_WRITE(vga_reg, VGA_DISP_DISABLE); POSTING_READ(vga_reg); } @@ -13149,8 +13145,6 @@ void intel_modeset_init(struct drm_device *dev) intel_shared_dpll_init(dev); - /* save the BIOS value before clobbering it */ - dev_priv->bios_vgacntr = I915_READ(i915_vgacntrl_reg(dev)); /* Just disable it once at startup */ i915_disable_vga(dev); intel_setup_outputs(dev); -- cgit v0.10.2 From 5d77d9c5e177d2182df5d9fd61ba986facb64415 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Wed, 12 Nov 2014 16:40:35 +0200 Subject: drm/i915: add missing rpm ref to i915_gem_pwrite_ioctl Without this RPM ref we can hit the device suspended WARN via: i915_gem_object_pin()->ggtt_bind_vma->gen6_ggtt_insert_entries(). I noticed this on my BYT while keeping the i915 device in runtime suspended state for a while. I chose this place to take the ref to avoid the possible deadlock via the mutex_lock taken both later in this function and in the runtime suspend handler. This can happen if an RPM suspend event is queued and need to be flushed before taking the RPM ref. Testcase: igt/pm_rpm/gem-evict-pwrite Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=87363 Signed-off-by: Imre Deak Signed-off-by: Jani Nikula diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 4a9faea6..18f802d 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1050,6 +1050,7 @@ int i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { + struct drm_i915_private *dev_priv = dev->dev_private; struct drm_i915_gem_pwrite *args = data; struct drm_i915_gem_object *obj; int ret; @@ -1069,9 +1070,11 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, return -EFAULT; } + intel_runtime_pm_get(dev_priv); + ret = i915_mutex_lock_interruptible(dev); if (ret) - return ret; + goto put_rpm; obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); if (&obj->base == NULL) { @@ -1123,6 +1126,9 @@ out: drm_gem_object_unreference(&obj->base); unlock: mutex_unlock(&dev->struct_mutex); +put_rpm: + intel_runtime_pm_put(dev_priv); + return ret; } -- cgit v0.10.2 From 8abd820503e4befb28d0722f8712bdbec57e63a8 Mon Sep 17 00:00:00 2001 From: Peter Wu Date: Tue, 16 Dec 2014 01:50:16 +0100 Subject: HID: logitech-hidpp: avoid unintended fall-through Add a return to avoid a fall-through. Introduced in commit 57ac86cf52e903d9e3e0f12b34c814cce6b65550 ("HID: logitech-hidpp: add support of the first Logitech Wireless Touchpad"). Signed-off-by: Peter Wu Reviewed-by: Benjamin Tissoires Signed-off-by: Jiri Kosina diff --git a/drivers/hid/hid-logitech-hidpp.c b/drivers/hid/hid-logitech-hidpp.c index dd3c21b..66cb1b5 100644 --- a/drivers/hid/hid-logitech-hidpp.c +++ b/drivers/hid/hid-logitech-hidpp.c @@ -805,6 +805,7 @@ static int wtp_raw_event(struct hid_device *hdev, u8 *data, int size) input_event(wd->input, EV_KEY, BTN_RIGHT, !!(data[1] & 0x02)); input_sync(wd->input); + return 0; } else { if (size < 21) return 1; -- cgit v0.10.2 From a0e625f8b7fd3fbffcb3af36fb1f88ea059ee6a6 Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Thu, 11 Dec 2014 17:39:59 -0500 Subject: HID: logitech-hidpp: prefix the name with "Logitech" Current names are reported as "K750", "M705", and it can be misleading for the users when they look at their input device list. Prefixing the names with "Logitech " makes things better. Signed-off-by: Benjamin Tissoires Reviewed-by: Peter Wu Signed-off-by: Jiri Kosina diff --git a/drivers/hid/hid-logitech-hidpp.c b/drivers/hid/hid-logitech-hidpp.c index 66cb1b5..a93cefe 100644 --- a/drivers/hid/hid-logitech-hidpp.c +++ b/drivers/hid/hid-logitech-hidpp.c @@ -282,6 +282,33 @@ static inline bool hidpp_report_is_connect_event(struct hidpp_report *report) (report->rap.sub_id == 0x41); } +/** + * hidpp_prefix_name() prefixes the current given name with "Logitech ". + */ +static void hidpp_prefix_name(char **name, int name_length) +{ +#define PREFIX_LENGTH 9 /* "Logitech " */ + + int new_length; + char *new_name; + + if (name_length > PREFIX_LENGTH && + strncmp(*name, "Logitech ", PREFIX_LENGTH) == 0) + /* The prefix has is already in the name */ + return; + + new_length = PREFIX_LENGTH + name_length; + new_name = kzalloc(new_length, GFP_KERNEL); + if (!new_name) + return; + + snprintf(new_name, new_length, "Logitech %s", *name); + + kfree(*name); + + *name = new_name; +} + /* -------------------------------------------------------------------------- */ /* HIDP++ 1.0 commands */ /* -------------------------------------------------------------------------- */ @@ -321,6 +348,10 @@ static char *hidpp_get_unifying_name(struct hidpp_device *hidpp_dev) return NULL; memcpy(name, &response.rap.params[2], len); + + /* include the terminating '\0' */ + hidpp_prefix_name(&name, len + 1); + return name; } @@ -498,6 +529,9 @@ static char *hidpp_get_device_name(struct hidpp_device *hidpp) index += ret; } + /* include the terminating '\0' */ + hidpp_prefix_name(&name, __name_length + 1); + return name; } -- cgit v0.10.2 From 95bfdf23e4d59099340c01064f39e7f6875c02bc Mon Sep 17 00:00:00 2001 From: Kirill Smelkov Date: Mon, 8 Dec 2014 14:07:59 +0300 Subject: tools/liblockdep: Fix debug_check thinko in mutex destroy In mutex destroy code currently we pass to debug_check_no_locks_freed() [mem_from, mem_end) address region. But debug_check_no_locks_freed() accepts mem_from, mem_*len* i.e. second parameter is region length, not end address. And it was always so, starting from 2006 (fbb9ce95 "lockdep: core"). Fix it, or else on a mutex destroy we wrongly check much-wider-than-mutex region and can find not-yet-released other locks there and wrongly report BUGs on them. Signed-off-by: Kirill Smelkov Signed-off-by: Sasha Levin diff --git a/tools/lib/lockdep/preload.c b/tools/lib/lockdep/preload.c index 6f80360..0b0112c 100644 --- a/tools/lib/lockdep/preload.c +++ b/tools/lib/lockdep/preload.c @@ -317,7 +317,7 @@ int pthread_mutex_destroy(pthread_mutex_t *mutex) * * TODO: Hook into free() and add that check there as well. */ - debug_check_no_locks_freed(mutex, mutex + sizeof(*mutex)); + debug_check_no_locks_freed(mutex, sizeof(*mutex)); __del_lock(__get_lock(mutex)); return ll_pthread_mutex_destroy(mutex); } @@ -341,7 +341,7 @@ int pthread_rwlock_destroy(pthread_rwlock_t *rwlock) { try_init_preload(); - debug_check_no_locks_freed(rwlock, rwlock + sizeof(*rwlock)); + debug_check_no_locks_freed(rwlock, sizeof(*rwlock)); __del_lock(__get_lock(rwlock)); return ll_pthread_rwlock_destroy(rwlock); } -- cgit v0.10.2 From 394f56fe480140877304d342dec46d50dc823d46 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Fri, 19 Dec 2014 16:04:11 -0800 Subject: x86_64, vdso: Fix the vdso address randomization algorithm The theory behind vdso randomization is that it's mapped at a random offset above the top of the stack. To avoid wasting a page of memory for an extra page table, the vdso isn't supposed to extend past the lowest PMD into which it can fit. Other than that, the address should be a uniformly distributed address that meets all of the alignment requirements. The current algorithm is buggy: the vdso has about a 50% probability of being at the very end of a PMD. The current algorithm also has a decent chance of failing outright due to incorrect handling of the case where the top of the stack is near the top of its PMD. This fixes the implementation. The paxtest estimate of vdso "randomisation" improves from 11 bits to 18 bits. (Disclaimer: I don't know what the paxtest code is actually calculating.) It's worth noting that this algorithm is inherently biased: the vdso is more likely to end up near the end of its PMD than near the beginning. Ideally we would either nix the PMD sharing requirement or jointly randomize the vdso and the stack to reduce the bias. In the mean time, this is a considerable improvement with basically no risk of compatibility issues, since the allowed outputs of the algorithm are unchanged. As an easy test, doing this: for i in `seq 10000` do grep -P vdso /proc/self/maps |cut -d- -f1 done |sort |uniq -d used to produce lots of output (1445 lines on my most recent run). A tiny subset looks like this: 7fffdfffe000 7fffe01fe000 7fffe05fe000 7fffe07fe000 7fffe09fe000 7fffe0bfe000 7fffe0dfe000 Note the suspicious fe000 endings. With the fix, I get a much more palatable 76 repeated addresses. Reviewed-by: Kees Cook Cc: stable@vger.kernel.org Signed-off-by: Andy Lutomirski diff --git a/arch/x86/vdso/vma.c b/arch/x86/vdso/vma.c index 009495b..1c9f750 100644 --- a/arch/x86/vdso/vma.c +++ b/arch/x86/vdso/vma.c @@ -41,12 +41,17 @@ void __init init_vdso_image(const struct vdso_image *image) struct linux_binprm; -/* Put the vdso above the (randomized) stack with another randomized offset. - This way there is no hole in the middle of address space. - To save memory make sure it is still in the same PTE as the stack top. - This doesn't give that many random bits. - - Only used for the 64-bit and x32 vdsos. */ +/* + * Put the vdso above the (randomized) stack with another randomized + * offset. This way there is no hole in the middle of address space. + * To save memory make sure it is still in the same PTE as the stack + * top. This doesn't give that many random bits. + * + * Note that this algorithm is imperfect: the distribution of the vdso + * start address within a PMD is biased toward the end. + * + * Only used for the 64-bit and x32 vdsos. + */ static unsigned long vdso_addr(unsigned long start, unsigned len) { #ifdef CONFIG_X86_32 @@ -54,22 +59,30 @@ static unsigned long vdso_addr(unsigned long start, unsigned len) #else unsigned long addr, end; unsigned offset; - end = (start + PMD_SIZE - 1) & PMD_MASK; + + /* + * Round up the start address. It can start out unaligned as a result + * of stack start randomization. + */ + start = PAGE_ALIGN(start); + + /* Round the lowest possible end address up to a PMD boundary. */ + end = (start + len + PMD_SIZE - 1) & PMD_MASK; if (end >= TASK_SIZE_MAX) end = TASK_SIZE_MAX; end -= len; - /* This loses some more bits than a modulo, but is cheaper */ - offset = get_random_int() & (PTRS_PER_PTE - 1); - addr = start + (offset << PAGE_SHIFT); - if (addr >= end) - addr = end; + + if (end > start) { + offset = get_random_int() % (((end - start) >> PAGE_SHIFT) + 1); + addr = start + (offset << PAGE_SHIFT); + } else { + addr = start; + } /* - * page-align it here so that get_unmapped_area doesn't - * align it wrongfully again to the next page. addr can come in 4K - * unaligned here as a result of stack start randomization. + * Forcibly align the final address in case we have a hardware + * issue that requires alignment for performance reasons. */ - addr = PAGE_ALIGN(addr); addr = align_vdso_addr(addr); return addr; -- cgit v0.10.2 From 1bacc894c227fad8a727eb99728df708eba57654 Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Mon, 22 Dec 2014 11:47:37 +0200 Subject: drivers: Move iommu/ before gpu/ in Makefile MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit AMD GPU devices are dependent on AMD IOMMU controller functionality to allow the GPU to access a process's virtual memory address space, without the need for pinning the memory. This patch changes the order in the drivers makefile, so iommu/ subsystem is linked before gpu/ subsystem. That way, if the gpu and iommu drivers are compiled inside the kernel image (not as modules), the correct order of device loading is still maintained (iommu module is loaded before gpu module). Signed-off-by: Oded Gabbay Reviewed-by: Christian König diff --git a/drivers/Makefile b/drivers/Makefile index 67d2334..527a6da 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -50,7 +50,10 @@ obj-$(CONFIG_RESET_CONTROLLER) += reset/ obj-y += tty/ obj-y += char/ -# gpu/ comes after char for AGP vs DRM startup +# iommu/ comes before gpu as gpu are using iommu controllers +obj-$(CONFIG_IOMMU_SUPPORT) += iommu/ + +# gpu/ comes after char for AGP vs DRM startup and after iommu obj-y += gpu/ obj-$(CONFIG_CONNECTOR) += connector/ @@ -141,7 +144,6 @@ obj-y += clk/ obj-$(CONFIG_MAILBOX) += mailbox/ obj-$(CONFIG_HWSPINLOCK) += hwspinlock/ -obj-$(CONFIG_IOMMU_SUPPORT) += iommu/ obj-$(CONFIG_REMOTEPROC) += remoteproc/ obj-$(CONFIG_RPMSG) += rpmsg/ -- cgit v0.10.2 From 611a03d764b151190ba7a388b00be72b23aee2bc Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Sun, 21 Dec 2014 15:21:41 +0200 Subject: drm: Put amdkfd before radeon in drm Makefile MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When amdkfd and radeon are compiled inside the kernel image (not as modules), radeon will load before amdkfd, which will cause a bug when radeon will probe the GPUs. When the two drivers are compiled as modules, amdkfd is loaded after radeon is loaded but before radeon starts probing the GPUs. This is done because radeon loads the amdkfd module through symbol_request function. This patch makes amdkfd load before radeon when they are both compiled inside the kernel image, which makes the behavior similar to the case when they are modules, and prevents the kernel bug. Signed-off-by: Oded Gabbay Reviewed-by: Christian König diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile index 66e4039..e620807 100644 --- a/drivers/gpu/drm/Makefile +++ b/drivers/gpu/drm/Makefile @@ -37,6 +37,7 @@ obj-$(CONFIG_DRM_MIPI_DSI) += drm_mipi_dsi.o obj-$(CONFIG_DRM_TTM) += ttm/ obj-$(CONFIG_DRM_TDFX) += tdfx/ obj-$(CONFIG_DRM_R128) += r128/ +obj-$(CONFIG_HSA_AMD) += amd/amdkfd/ obj-$(CONFIG_DRM_RADEON)+= radeon/ obj-$(CONFIG_DRM_MGA) += mga/ obj-$(CONFIG_DRM_I810) += i810/ @@ -67,4 +68,3 @@ obj-$(CONFIG_DRM_IMX) += imx/ obj-y += i2c/ obj-y += panel/ obj-y += bridge/ -obj-$(CONFIG_HSA_AMD) += amd/amdkfd/ -- cgit v0.10.2 From c7e873f85fb60b1af589ac1b0c62353cfe0bbb29 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 3 Dec 2014 16:16:52 +1000 Subject: drm/nouveau/bios: fix oops on pre-nv50 chipsets Signed-off-by: Ben Skeggs diff --git a/drivers/gpu/drm/nouveau/core/subdev/bios/shadowramin.c b/drivers/gpu/drm/nouveau/core/subdev/bios/shadowramin.c index 5e58bba..a7a890f 100644 --- a/drivers/gpu/drm/nouveau/core/subdev/bios/shadowramin.c +++ b/drivers/gpu/drm/nouveau/core/subdev/bios/shadowramin.c @@ -44,8 +44,10 @@ static void pramin_fini(void *data) { struct priv *priv = data; - nv_wr32(priv->bios, 0x001700, priv->bar0); - kfree(priv); + if (priv) { + nv_wr32(priv->bios, 0x001700, priv->bar0); + kfree(priv); + } } static void * -- cgit v0.10.2 From dcccdc143ffa832674a81070cfe4c9a7eb6c8aa1 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Thu, 11 Dec 2014 03:09:10 +0900 Subject: drm/nouveau: fix missing return statement in nouveau_ttm_tt_unpopulate nouveau_ttm_tt_unpopulate() is supposed to return right after calling ttm_dma_unpopulate() in the case of a coherent buffer. The return statement was omitted, leading to the pages being unmapped twice. Fix this. Signed-off-by: Alexandre Courbot Signed-off-by: Ben Skeggs diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index 21ec561..bba2960 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -1572,8 +1572,10 @@ nouveau_ttm_tt_unpopulate(struct ttm_tt *ttm) * so use the DMA API for them. */ if (!nv_device_is_cpu_coherent(device) && - ttm->caching_state == tt_uncached) + ttm->caching_state == tt_uncached) { ttm_dma_unpopulate(ttm_dma, dev->dev); + return; + } #if __OS_HAS_AGP if (drm->agp.stat == ENABLED) { -- cgit v0.10.2 From 8d5e3af15c798af93ee8bf5f504fa0511b85c627 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sven=20K=C3=B6hler?= Date: Wed, 3 Dec 2014 14:15:54 +0000 Subject: drm/nouveau/device: Add support for GK208B, resolves bug 86935 Signed-off-by: Ben Skeggs diff --git a/drivers/gpu/drm/nouveau/core/engine/device/nve0.c b/drivers/gpu/drm/nouveau/core/engine/device/nve0.c index 674da1f..7329226 100644 --- a/drivers/gpu/drm/nouveau/core/engine/device/nve0.c +++ b/drivers/gpu/drm/nouveau/core/engine/device/nve0.c @@ -249,6 +249,39 @@ nve0_identify(struct nouveau_device *device) device->oclass[NVDEV_ENGINE_PPP ] = &nvc0_ppp_oclass; device->oclass[NVDEV_ENGINE_PERFMON] = &nvf0_perfmon_oclass; break; + case 0x106: + device->cname = "GK208B"; + device->oclass[NVDEV_SUBDEV_VBIOS ] = &nouveau_bios_oclass; + device->oclass[NVDEV_SUBDEV_GPIO ] = nve0_gpio_oclass; + device->oclass[NVDEV_SUBDEV_I2C ] = nve0_i2c_oclass; + device->oclass[NVDEV_SUBDEV_FUSE ] = &gf100_fuse_oclass; + device->oclass[NVDEV_SUBDEV_CLOCK ] = &nve0_clock_oclass; + device->oclass[NVDEV_SUBDEV_THERM ] = &nvd0_therm_oclass; + device->oclass[NVDEV_SUBDEV_MXM ] = &nv50_mxm_oclass; + device->oclass[NVDEV_SUBDEV_DEVINIT] = nvc0_devinit_oclass; + device->oclass[NVDEV_SUBDEV_MC ] = gk20a_mc_oclass; + device->oclass[NVDEV_SUBDEV_BUS ] = nvc0_bus_oclass; + device->oclass[NVDEV_SUBDEV_TIMER ] = &nv04_timer_oclass; + device->oclass[NVDEV_SUBDEV_FB ] = nve0_fb_oclass; + device->oclass[NVDEV_SUBDEV_LTC ] = gk104_ltc_oclass; + device->oclass[NVDEV_SUBDEV_IBUS ] = &nve0_ibus_oclass; + device->oclass[NVDEV_SUBDEV_INSTMEM] = nv50_instmem_oclass; + device->oclass[NVDEV_SUBDEV_VM ] = &nvc0_vmmgr_oclass; + device->oclass[NVDEV_SUBDEV_BAR ] = &nvc0_bar_oclass; + device->oclass[NVDEV_SUBDEV_PWR ] = nv108_pwr_oclass; + device->oclass[NVDEV_SUBDEV_VOLT ] = &nv40_volt_oclass; + device->oclass[NVDEV_ENGINE_DMAOBJ ] = nvd0_dmaeng_oclass; + device->oclass[NVDEV_ENGINE_FIFO ] = nv108_fifo_oclass; + device->oclass[NVDEV_ENGINE_SW ] = nvc0_software_oclass; + device->oclass[NVDEV_ENGINE_GR ] = nv108_graph_oclass; + device->oclass[NVDEV_ENGINE_DISP ] = nvf0_disp_oclass; + device->oclass[NVDEV_ENGINE_COPY0 ] = &nve0_copy0_oclass; + device->oclass[NVDEV_ENGINE_COPY1 ] = &nve0_copy1_oclass; + device->oclass[NVDEV_ENGINE_COPY2 ] = &nve0_copy2_oclass; + device->oclass[NVDEV_ENGINE_BSP ] = &nve0_bsp_oclass; + device->oclass[NVDEV_ENGINE_VP ] = &nve0_vp_oclass; + device->oclass[NVDEV_ENGINE_PPP ] = &nvc0_ppp_oclass; + break; case 0x108: device->cname = "GK208"; device->oclass[NVDEV_SUBDEV_VBIOS ] = &nouveau_bios_oclass; -- cgit v0.10.2 From 5cc8d536c21a17e301e6f3e8c70a678b5f4b419f Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Thu, 11 Dec 2014 10:05:00 +1000 Subject: drm/nouveau: wake up the card if necessary during gem callbacks The failure paths if we fail to wake the card are less than desirable, but there's not really a graceful way to handle this case currently. I'll keep this situation in mind when I get to fixing other vm-related issues. Signed-off-by: Ben Skeggs diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c index 28d51a2..5922d2e 100644 --- a/drivers/gpu/drm/nouveau/nouveau_gem.c +++ b/drivers/gpu/drm/nouveau/nouveau_gem.c @@ -36,7 +36,14 @@ void nouveau_gem_object_del(struct drm_gem_object *gem) { struct nouveau_bo *nvbo = nouveau_gem_object(gem); + struct nouveau_drm *drm = nouveau_bdev(nvbo->bo.bdev); struct ttm_buffer_object *bo = &nvbo->bo; + struct device *dev = drm->dev->dev; + int ret; + + ret = pm_runtime_get_sync(dev); + if (WARN_ON(ret < 0 && ret != -EACCES)) + return; if (gem->import_attach) drm_prime_gem_destroy(gem, nvbo->bo.sg); @@ -46,6 +53,9 @@ nouveau_gem_object_del(struct drm_gem_object *gem) /* reset filp so nouveau_bo_del_ttm() can test for it */ gem->filp = NULL; ttm_bo_unref(&bo); + + pm_runtime_mark_last_busy(dev); + pm_runtime_put_autosuspend(dev); } int @@ -53,7 +63,9 @@ nouveau_gem_object_open(struct drm_gem_object *gem, struct drm_file *file_priv) { struct nouveau_cli *cli = nouveau_cli(file_priv); struct nouveau_bo *nvbo = nouveau_gem_object(gem); + struct nouveau_drm *drm = nouveau_bdev(nvbo->bo.bdev); struct nouveau_vma *vma; + struct device *dev = drm->dev->dev; int ret; if (!cli->vm) @@ -71,11 +83,16 @@ nouveau_gem_object_open(struct drm_gem_object *gem, struct drm_file *file_priv) goto out; } + ret = pm_runtime_get_sync(dev); + if (ret < 0 && ret != -EACCES) + goto out; + ret = nouveau_bo_vma_add(nvbo, cli->vm, vma); - if (ret) { + if (ret) kfree(vma); - goto out; - } + + pm_runtime_mark_last_busy(dev); + pm_runtime_put_autosuspend(dev); } else { vma->refcount++; } @@ -129,6 +146,8 @@ nouveau_gem_object_close(struct drm_gem_object *gem, struct drm_file *file_priv) { struct nouveau_cli *cli = nouveau_cli(file_priv); struct nouveau_bo *nvbo = nouveau_gem_object(gem); + struct nouveau_drm *drm = nouveau_bdev(nvbo->bo.bdev); + struct device *dev = drm->dev->dev; struct nouveau_vma *vma; int ret; @@ -141,8 +160,14 @@ nouveau_gem_object_close(struct drm_gem_object *gem, struct drm_file *file_priv) vma = nouveau_bo_vma_find(nvbo, cli->vm); if (vma) { - if (--vma->refcount == 0) - nouveau_gem_object_unmap(nvbo, vma); + if (--vma->refcount == 0) { + ret = pm_runtime_get_sync(dev); + if (!WARN_ON(ret < 0 && ret != -EACCES)) { + nouveau_gem_object_unmap(nvbo, vma); + pm_runtime_mark_last_busy(dev); + pm_runtime_put_autosuspend(dev); + } + } } ttm_bo_unreserve(&nvbo->bo); } -- cgit v0.10.2 From 0b428011fa2b2f41d3f82ddf2c141fdf936dbaee Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Thu, 11 Dec 2014 13:19:31 +1000 Subject: drm/nouveau/fb/ram/mcp77: subclass nouveau_ram Signed-off-by: Ben Skeggs diff --git a/drivers/gpu/drm/nouveau/core/subdev/fb/ramnvaa.c b/drivers/gpu/drm/nouveau/core/subdev/fb/ramnvaa.c index 00f2ca7..8ee3d37 100644 --- a/drivers/gpu/drm/nouveau/core/subdev/fb/ramnvaa.c +++ b/drivers/gpu/drm/nouveau/core/subdev/fb/ramnvaa.c @@ -24,6 +24,10 @@ #include "nv50.h" +struct nvaa_ram_priv { + struct nouveau_ram base; +}; + static int nvaa_ram_ctor(struct nouveau_object *parent, struct nouveau_object *engine, struct nouveau_oclass *oclass, void *data, u32 datasize, @@ -32,26 +36,26 @@ nvaa_ram_ctor(struct nouveau_object *parent, struct nouveau_object *engine, const u32 rsvd_head = ( 256 * 1024) >> 12; /* vga memory */ const u32 rsvd_tail = (1024 * 1024) >> 12; /* vbios etc */ struct nouveau_fb *pfb = nouveau_fb(parent); - struct nouveau_ram *ram; + struct nvaa_ram_priv *priv; int ret; - ret = nouveau_ram_create(parent, engine, oclass, &ram); - *pobject = nv_object(ram); + ret = nouveau_ram_create(parent, engine, oclass, &priv); + *pobject = nv_object(priv); if (ret) return ret; - ram->size = nv_rd32(pfb, 0x10020c); - ram->size = (ram->size & 0xffffff00) | ((ram->size & 0x000000ff) << 32); + priv->base.size = nv_rd32(pfb, 0x10020c); + priv->base.size = (priv->base.size & 0xffffff00) | ((priv->base.size & 0x000000ff) << 32); - ret = nouveau_mm_init(&pfb->vram, rsvd_head, (ram->size >> 12) - + ret = nouveau_mm_init(&pfb->vram, rsvd_head, (priv->base.size >> 12) - (rsvd_head + rsvd_tail), 1); if (ret) return ret; - ram->type = NV_MEM_TYPE_STOLEN; - ram->stolen = (u64)nv_rd32(pfb, 0x100e10) << 12; - ram->get = nv50_ram_get; - ram->put = nv50_ram_put; + priv->base.type = NV_MEM_TYPE_STOLEN; + priv->base.stolen = (u64)nv_rd32(pfb, 0x100e10) << 12; + priv->base.get = nv50_ram_get; + priv->base.put = nv50_ram_put; return 0; } -- cgit v0.10.2 From 5f3ac299c0b76addcf3706cb6cc9d04dad26dab0 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Thu, 11 Dec 2014 13:21:24 +1000 Subject: drm/nouveau/fb/ram/mcp77: use carveout reg to determine size Signed-off-by: Ben Skeggs diff --git a/drivers/gpu/drm/nouveau/core/subdev/fb/ramnvaa.c b/drivers/gpu/drm/nouveau/core/subdev/fb/ramnvaa.c index 8ee3d37..f56ee55 100644 --- a/drivers/gpu/drm/nouveau/core/subdev/fb/ramnvaa.c +++ b/drivers/gpu/drm/nouveau/core/subdev/fb/ramnvaa.c @@ -44,16 +44,15 @@ nvaa_ram_ctor(struct nouveau_object *parent, struct nouveau_object *engine, if (ret) return ret; - priv->base.size = nv_rd32(pfb, 0x10020c); - priv->base.size = (priv->base.size & 0xffffff00) | ((priv->base.size & 0x000000ff) << 32); + priv->base.type = NV_MEM_TYPE_STOLEN; + priv->base.stolen = (u64)nv_rd32(pfb, 0x100e10) << 12; + priv->base.size = (u64)nv_rd32(pfb, 0x100e14) << 12; ret = nouveau_mm_init(&pfb->vram, rsvd_head, (priv->base.size >> 12) - (rsvd_head + rsvd_tail), 1); if (ret) return ret; - priv->base.type = NV_MEM_TYPE_STOLEN; - priv->base.stolen = (u64)nv_rd32(pfb, 0x100e10) << 12; priv->base.get = nv50_ram_get; priv->base.put = nv50_ram_put; return 0; -- cgit v0.10.2 From e9d91238990d89421315a556a3ba4dbbae35ffbf Mon Sep 17 00:00:00 2001 From: Pierre Moreau Date: Tue, 16 Dec 2014 09:47:40 +1000 Subject: drm/nouveau/fb/ram/mcp77: enable NISO poller Signed-off-by: Pierre Moreau Signed-off-by: Ben Skeggs diff --git a/drivers/gpu/drm/nouveau/core/subdev/fb/ramnvaa.c b/drivers/gpu/drm/nouveau/core/subdev/fb/ramnvaa.c index f56ee55..033a8e9 100644 --- a/drivers/gpu/drm/nouveau/core/subdev/fb/ramnvaa.c +++ b/drivers/gpu/drm/nouveau/core/subdev/fb/ramnvaa.c @@ -26,6 +26,7 @@ struct nvaa_ram_priv { struct nouveau_ram base; + u64 poller_base; }; static int @@ -33,8 +34,8 @@ nvaa_ram_ctor(struct nouveau_object *parent, struct nouveau_object *engine, struct nouveau_oclass *oclass, void *data, u32 datasize, struct nouveau_object **pobject) { - const u32 rsvd_head = ( 256 * 1024) >> 12; /* vga memory */ - const u32 rsvd_tail = (1024 * 1024) >> 12; /* vbios etc */ + u32 rsvd_head = ( 256 * 1024); /* vga memory */ + u32 rsvd_tail = (1024 * 1024); /* vbios etc */ struct nouveau_fb *pfb = nouveau_fb(parent); struct nvaa_ram_priv *priv; int ret; @@ -48,8 +49,12 @@ nvaa_ram_ctor(struct nouveau_object *parent, struct nouveau_object *engine, priv->base.stolen = (u64)nv_rd32(pfb, 0x100e10) << 12; priv->base.size = (u64)nv_rd32(pfb, 0x100e14) << 12; - ret = nouveau_mm_init(&pfb->vram, rsvd_head, (priv->base.size >> 12) - - (rsvd_head + rsvd_tail), 1); + rsvd_tail += 0x1000; + priv->poller_base = priv->base.size - rsvd_tail; + + ret = nouveau_mm_init(&pfb->vram, rsvd_head >> 12, + (priv->base.size - (rsvd_head + rsvd_tail)) >> 12, + 1); if (ret) return ret; @@ -58,12 +63,41 @@ nvaa_ram_ctor(struct nouveau_object *parent, struct nouveau_object *engine, return 0; } +static int +nvaa_ram_init(struct nouveau_object *object) +{ + struct nouveau_fb *pfb = nouveau_fb(object); + struct nvaa_ram_priv *priv = (void *)object; + int ret; + u64 dniso, hostnb, flush; + + ret = nouveau_ram_init(&priv->base); + if (ret) + return ret; + + dniso = ((priv->base.size - (priv->poller_base + 0x00)) >> 5) - 1; + hostnb = ((priv->base.size - (priv->poller_base + 0x20)) >> 5) - 1; + flush = ((priv->base.size - (priv->poller_base + 0x40)) >> 5) - 1; + + /* Enable NISO poller for various clients and set their associated + * read address, only for MCP77/78 and MCP79/7A. (fd#25701) + */ + nv_wr32(pfb, 0x100c18, dniso); + nv_mask(pfb, 0x100c14, 0x00000000, 0x00000001); + nv_wr32(pfb, 0x100c1c, hostnb); + nv_mask(pfb, 0x100c14, 0x00000000, 0x00000002); + nv_wr32(pfb, 0x100c24, flush); + nv_mask(pfb, 0x100c14, 0x00000000, 0x00010000); + + return 0; +} + struct nouveau_oclass nvaa_ram_oclass = { .ofuncs = &(struct nouveau_ofuncs) { .ctor = nvaa_ram_ctor, .dtor = _nouveau_ram_dtor, - .init = _nouveau_ram_init, + .init = nvaa_ram_init, .fini = _nouveau_ram_fini, }, }; -- cgit v0.10.2 From 4761703bd04bbdf56396d264903cc5a1fdcb3c01 Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Tue, 16 Dec 2014 13:55:38 -0500 Subject: drm/nv4c/mc: disable msi Several users have, over time, reported issues with MSI on these IGPs. They're old, rarely available, and MSI doesn't provide such huge advantages on them. Just disable. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=87361 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=74492 Fixes: fa8c9ac72fe ("drm/nv4c/mc: nv4x igp's have a different msi rearm register") Cc: stable@vger.kernel.org Signed-off-by: Ilia Mirkin Signed-off-by: Ben Skeggs diff --git a/drivers/gpu/drm/nouveau/core/subdev/mc/nv4c.c b/drivers/gpu/drm/nouveau/core/subdev/mc/nv4c.c index a75c35c..165401c 100644 --- a/drivers/gpu/drm/nouveau/core/subdev/mc/nv4c.c +++ b/drivers/gpu/drm/nouveau/core/subdev/mc/nv4c.c @@ -24,13 +24,6 @@ #include "nv04.h" -static void -nv4c_mc_msi_rearm(struct nouveau_mc *pmc) -{ - struct nv04_mc_priv *priv = (void *)pmc; - nv_wr08(priv, 0x088050, 0xff); -} - struct nouveau_oclass * nv4c_mc_oclass = &(struct nouveau_mc_oclass) { .base.handle = NV_SUBDEV(MC, 0x4c), @@ -41,5 +34,4 @@ nv4c_mc_oclass = &(struct nouveau_mc_oclass) { .fini = _nouveau_mc_fini, }, .intr = nv04_mc_intr, - .msi_rearm = nv4c_mc_msi_rearm, }.base; -- cgit v0.10.2 From ff4c0d5213b015e60aa87c1352604f10ba9c3e12 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bruno=20Pr=C3=A9mont?= Date: Sun, 21 Dec 2014 17:43:31 +0100 Subject: drm/nouveau/nouveau: Do not BUG_ON(!spin_is_locked()) on UP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On !SMP systems spinlocks do not exist. Thus checking of they are active will always fail. Use assert_spin_locked(lock); instead of BUG_ON(!spin_is_locked(lock)); to not BUG() on all UP systems. Signed-off-by: Bruno Prémont Signed-off-by: Ben Skeggs diff --git a/drivers/gpu/drm/nouveau/core/core/event.c b/drivers/gpu/drm/nouveau/core/core/event.c index ff2b434..760947e 100644 --- a/drivers/gpu/drm/nouveau/core/core/event.c +++ b/drivers/gpu/drm/nouveau/core/core/event.c @@ -26,7 +26,7 @@ void nvkm_event_put(struct nvkm_event *event, u32 types, int index) { - BUG_ON(!spin_is_locked(&event->refs_lock)); + assert_spin_locked(&event->refs_lock); while (types) { int type = __ffs(types); types &= ~(1 << type); if (--event->refs[index * event->types_nr + type] == 0) { @@ -39,7 +39,7 @@ nvkm_event_put(struct nvkm_event *event, u32 types, int index) void nvkm_event_get(struct nvkm_event *event, u32 types, int index) { - BUG_ON(!spin_is_locked(&event->refs_lock)); + assert_spin_locked(&event->refs_lock); while (types) { int type = __ffs(types); types &= ~(1 << type); if (++event->refs[index * event->types_nr + type] == 1) { diff --git a/drivers/gpu/drm/nouveau/core/core/notify.c b/drivers/gpu/drm/nouveau/core/core/notify.c index d1bcde5..839a325 100644 --- a/drivers/gpu/drm/nouveau/core/core/notify.c +++ b/drivers/gpu/drm/nouveau/core/core/notify.c @@ -98,7 +98,7 @@ nvkm_notify_send(struct nvkm_notify *notify, void *data, u32 size) struct nvkm_event *event = notify->event; unsigned long flags; - BUG_ON(!spin_is_locked(&event->list_lock)); + assert_spin_locked(&event->list_lock); BUG_ON(size != notify->size); spin_lock_irqsave(&event->refs_lock, flags); -- cgit v0.10.2 From 8b8cd8a3673dd766428aee8222fa8fd3fdd26d58 Mon Sep 17 00:00:00 2001 From: Boris Ostrovsky Date: Mon, 22 Dec 2014 13:33:10 -0500 Subject: x86/xen: Remove unnecessary BUG_ON(preemptible()) in xen_setup_timer() There is no reason for having it and, with commit 250a1ac685f1 ("x86, smpboot: Remove pointless preempt_disable() in native_smp_prepare_cpus()"), it prevents HVM guests from booting. Signed-off-by: Boris Ostrovsky Signed-off-by: David Vrabel diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index f473d26..23019b4 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c @@ -458,8 +458,6 @@ void xen_setup_timer(int cpu) void xen_setup_cpu_clockevents(void) { - BUG_ON(preemptible()); - clockevents_register_device(this_cpu_ptr(&xen_clock_events.evt)); } -- cgit v0.10.2 From 132978b94e66f8ad7d20790f8332f0e9c1426029 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Fri, 19 Dec 2014 16:10:54 +0000 Subject: x86: Fix step size adjustment during initial memory mapping The old scheme can lead to failure in certain cases - the problem is that after bumping step_size the next (non-final) iteration is only guaranteed to make available a memory block the size of what step_size was before. E.g. for a memory block [0,3004600000) we'd have: iter start end step amount 1 3004400000 30045fffff 2M 2M 2 3004000000 30043fffff 64M 4M 3 3000000000 3003ffffff 2G 64M 4 2000000000 2fffffffff 64G 64G Yet to map 64G with 4k pages (as happens e.g. under PV Xen) we need slightly over 128M, but the first three iterations made only about 70M available. The condition (new_mapped_ram_size > mapped_ram_size) for bumping step_size is just not suitable. Instead we want to bump it when we know we have enough memory available to cover a block of the new step_size. And rather than making that condition more complicated than needed, simply adjust step_size by the largest possible factor we know we can cover at that point - which is shifting it left by one less than the difference between page table level shifts. (Interestingly the original STEP_SIZE_SHIFT definition had a comment hinting at that having been the intention, just that it should have been PUD_SHIFT-PMD_SHIFT-1 instead of (PUD_SHIFT-PMD_SHIFT)/2, and of course for non-PAE 32-bit we can't really use these two constants as they're equal there.) Furthermore the comment in get_new_step_size() didn't get updated when the bottom-down mapping logic got added. Yet while an overflow (flushing step_size to zero) of the shift doesn't matter for the top-down method, it does for bottom-up because round_up(x, 0) = 0, and an upper range boundary of zero can't really work well. Signed-off-by: Jan Beulich Acked-by: Yinghai Lu Link: http://lkml.kernel.org/r/54945C1E020000780005114E@mail.emea.novell.com Signed-off-by: Ingo Molnar diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index a97ee08..08a7d31 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -438,20 +438,20 @@ static unsigned long __init init_range_memory_mapping( static unsigned long __init get_new_step_size(unsigned long step_size) { /* - * Explain why we shift by 5 and why we don't have to worry about - * 'step_size << 5' overflowing: - * - * initial mapped size is PMD_SIZE (2M). + * Initial mapped size is PMD_SIZE (2M). * We can not set step_size to be PUD_SIZE (1G) yet. * In worse case, when we cross the 1G boundary, and * PG_LEVEL_2M is not set, we will need 1+1+512 pages (2M + 8k) - * to map 1G range with PTE. Use 5 as shift for now. + * to map 1G range with PTE. Hence we use one less than the + * difference of page table level shifts. * - * Don't need to worry about overflow, on 32bit, when step_size - * is 0, round_down() returns 0 for start, and that turns it - * into 0x100000000ULL. + * Don't need to worry about overflow in the top-down case, on 32bit, + * when step_size is 0, round_down() returns 0 for start, and that + * turns it into 0x100000000ULL. + * In the bottom-up case, round_up(x, 0) returns 0 though too, which + * needs to be taken into consideration by the code below. */ - return step_size << 5; + return step_size << (PMD_SHIFT - PAGE_SHIFT - 1); } /** @@ -471,7 +471,6 @@ static void __init memory_map_top_down(unsigned long map_start, unsigned long step_size; unsigned long addr; unsigned long mapped_ram_size = 0; - unsigned long new_mapped_ram_size; /* xen has big range in reserved near end of ram, skip it at first.*/ addr = memblock_find_in_range(map_start, map_end, PMD_SIZE, PMD_SIZE); @@ -496,14 +495,12 @@ static void __init memory_map_top_down(unsigned long map_start, start = map_start; } else start = map_start; - new_mapped_ram_size = init_range_memory_mapping(start, + mapped_ram_size += init_range_memory_mapping(start, last_start); last_start = start; min_pfn_mapped = last_start >> PAGE_SHIFT; - /* only increase step_size after big range get mapped */ - if (new_mapped_ram_size > mapped_ram_size) + if (mapped_ram_size >= step_size) step_size = get_new_step_size(step_size); - mapped_ram_size += new_mapped_ram_size; } if (real_end < map_end) @@ -524,7 +521,7 @@ static void __init memory_map_top_down(unsigned long map_start, static void __init memory_map_bottom_up(unsigned long map_start, unsigned long map_end) { - unsigned long next, new_mapped_ram_size, start; + unsigned long next, start; unsigned long mapped_ram_size = 0; /* step_size need to be small so pgt_buf from BRK could cover it */ unsigned long step_size = PMD_SIZE; @@ -539,19 +536,19 @@ static void __init memory_map_bottom_up(unsigned long map_start, * for page table. */ while (start < map_end) { - if (map_end - start > step_size) { + if (step_size && map_end - start > step_size) { next = round_up(start + 1, step_size); if (next > map_end) next = map_end; - } else + } else { next = map_end; + } - new_mapped_ram_size = init_range_memory_mapping(start, next); + mapped_ram_size += init_range_memory_mapping(start, next); start = next; - if (new_mapped_ram_size > mapped_ram_size) + if (mapped_ram_size >= step_size) step_size = get_new_step_size(step_size); - mapped_ram_size += new_mapped_ram_size; } } -- cgit v0.10.2 From b74e6278fd6db5848163ccdc6e9d8eb6efdee9bd Mon Sep 17 00:00:00 2001 From: Alex Thorlton Date: Thu, 18 Dec 2014 12:44:30 -0600 Subject: sched: Fix KMALLOC_MAX_SIZE overflow during cpumask allocation When allocating space for load_balance_mask, in sched_init, when CPUMASK_OFFSTACK is set, we've managed to spill over KMALLOC_MAX_SIZE on our 6144 core machine. The patch below breaks up the allocations so that they don't overflow the max alloc size. It also allocates the masks on the the node from which they'll most commonly be accessed, to minimize remote accesses on NUMA machines. Suggested-by: George Beshers Signed-off-by: Alex Thorlton Cc: George Beshers Cc: Russ Anderson Cc: Peter Zijlstra Cc: Linus Torvalds Link: http://lkml.kernel.org/r/1418928270-148543-1-git-send-email-athorlton@sgi.com Signed-off-by: Ingo Molnar diff --git a/kernel/sched/core.c b/kernel/sched/core.c index b5797b7..c0accc0 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -7113,9 +7113,6 @@ void __init sched_init(void) #ifdef CONFIG_RT_GROUP_SCHED alloc_size += 2 * nr_cpu_ids * sizeof(void **); #endif -#ifdef CONFIG_CPUMASK_OFFSTACK - alloc_size += num_possible_cpus() * cpumask_size(); -#endif if (alloc_size) { ptr = (unsigned long)kzalloc(alloc_size, GFP_NOWAIT); @@ -7135,13 +7132,13 @@ void __init sched_init(void) ptr += nr_cpu_ids * sizeof(void **); #endif /* CONFIG_RT_GROUP_SCHED */ + } #ifdef CONFIG_CPUMASK_OFFSTACK - for_each_possible_cpu(i) { - per_cpu(load_balance_mask, i) = (void *)ptr; - ptr += cpumask_size(); - } -#endif /* CONFIG_CPUMASK_OFFSTACK */ + for_each_possible_cpu(i) { + per_cpu(load_balance_mask, i) = (cpumask_var_t)kzalloc_node( + cpumask_size(), GFP_KERNEL, cpu_to_node(i)); } +#endif /* CONFIG_CPUMASK_OFFSTACK */ init_rt_bandwidth(&def_rt_bandwidth, global_rt_period(), global_rt_runtime()); -- cgit v0.10.2 From ea174f4c4f6135e30a4e1e8c4511980338238b16 Mon Sep 17 00:00:00 2001 From: Sylvain BERTRAND Date: Tue, 23 Dec 2014 13:39:12 +0100 Subject: x86: Fix mkcapflags.sh bash-ism Chocked while compiling linux with dash shell instead of bash shell. See: http://pubs.opengroup.org/onlinepubs/9699919799/utilities/test.html Signed-off-by: Sylvain BERTRAND Link: http://lkml.kernel.org/r/20141223123912.GA1386@localhost.localdomain Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/mkcapflags.sh b/arch/x86/kernel/cpu/mkcapflags.sh index e2b22df..36d99a3 100644 --- a/arch/x86/kernel/cpu/mkcapflags.sh +++ b/arch/x86/kernel/cpu/mkcapflags.sh @@ -28,7 +28,7 @@ function dump_array() # If the /* comment */ starts with a quote string, grab that. VALUE="$(echo "$i" | sed -n 's@.*/\* *\("[^"]*"\).*\*/@\1@p')" [ -z "$VALUE" ] && VALUE="\"$NAME\"" - [ "$VALUE" == '""' ] && continue + [ "$VALUE" = '""' ] && continue # Name is uppercase, VALUE is all lowercase VALUE="$(echo "$VALUE" | tr A-Z a-z)" -- cgit v0.10.2 From 280dbc572357eb50184663fc9e4aaf09c8141e9b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= Date: Tue, 23 Dec 2014 12:57:43 +0100 Subject: x86/build: Clean auto-generated processor feature files MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 9def39be4e96 ("x86: Support compiling out human-friendly processor feature names") made two source file targets conditional. Such conditional targets will not be cleaned automatically by make mrproper. Fix by adding explicit clean-files targets for the two files. Fixes: 9def39be4e96 ("x86: Support compiling out human-friendly processor feature names") Signed-off-by: Bjørn Mork Cc: Josh Triplett Link: http://lkml.kernel.org/r/1419335863-10608-1-git-send-email-bjorn@mork.no Signed-off-by: Ingo Molnar diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile index 5b016e2..3db07f3 100644 --- a/arch/x86/boot/Makefile +++ b/arch/x86/boot/Makefile @@ -51,6 +51,7 @@ targets += cpustr.h $(obj)/cpustr.h: $(obj)/mkcpustr FORCE $(call if_changed,cpustr) endif +clean-files += cpustr.h # --------------------------------------------------------------------------- diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index e27b49d..80091ae 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -66,3 +66,4 @@ targets += capflags.c $(obj)/capflags.c: $(cpufeature) $(src)/mkcapflags.sh FORCE $(call if_changed,mkcapflags) endif +clean-files += capflags.c -- cgit v0.10.2 From 740b97f9509ac5a015278940747178af4eb0900d Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 22 Dec 2014 13:44:10 +0900 Subject: perf report: Show progress bar for output resorting Sometimes it takes a long time to resort hist entries for output in case of a large data file. Show a progress bar window and inform user. Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: David Ahern Cc: Ingo Molnar Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1419223455-4362-3-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index e7417fe..747f861 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -232,7 +232,7 @@ static int __cmd_annotate(struct perf_annotate *ann) if (nr_samples > 0) { total_nr_samples += nr_samples; hists__collapse_resort(hists, NULL); - hists__output_resort(hists); + hists__output_resort(hists, NULL); if (symbol_conf.event_group && !perf_evsel__is_group_leader(pos)) diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index 1ce425d..303c1e1 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -605,7 +605,7 @@ static void hists__process(struct hists *hists) hists__precompute(hists); hists__compute_resort(hists); } else { - hists__output_resort(hists); + hists__output_resort(hists, NULL); } hists__fprintf(hists, true, 0, 0, 0, stdout); diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 3936760..072ae8a 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -457,6 +457,19 @@ static void report__collapse_hists(struct report *rep) ui_progress__finish(); } +static void report__output_resort(struct report *rep) +{ + struct ui_progress prog; + struct perf_evsel *pos; + + ui_progress__init(&prog, rep->nr_entries, "Sorting events for output..."); + + evlist__for_each(rep->session->evlist, pos) + hists__output_resort(evsel__hists(pos), &prog); + + ui_progress__finish(); +} + static int __cmd_report(struct report *rep) { int ret; @@ -505,13 +518,20 @@ static int __cmd_report(struct report *rep) if (session_done()) return 0; + /* + * recalculate number of entries after collapsing since it + * might be changed during the collapse phase. + */ + rep->nr_entries = 0; + evlist__for_each(session->evlist, pos) + rep->nr_entries += evsel__hists(pos)->nr_entries; + if (rep->nr_entries == 0) { ui__error("The %s file has no samples!\n", file->path); return 0; } - evlist__for_each(session->evlist, pos) - hists__output_resort(evsel__hists(pos)); + report__output_resort(rep); return report__browse_hists(rep); } diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 0aa7747..961cea1 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -285,7 +285,7 @@ static void perf_top__print_sym_table(struct perf_top *top) } hists__collapse_resort(hists, NULL); - hists__output_resort(hists); + hists__output_resort(hists, NULL); hists__output_recalc_col_len(hists, top->print_entries - printed); putchar('\n'); @@ -554,7 +554,7 @@ static void perf_top__sort_new_samples(void *arg) } hists__collapse_resort(hists, NULL); - hists__output_resort(hists); + hists__output_resort(hists, NULL); } static void *display_thread_tui(void *arg) diff --git a/tools/perf/tests/hists_cumulate.c b/tools/perf/tests/hists_cumulate.c index 614d5c4..4b8226e 100644 --- a/tools/perf/tests/hists_cumulate.c +++ b/tools/perf/tests/hists_cumulate.c @@ -187,7 +187,7 @@ static int do_test(struct hists *hists, struct result *expected, size_t nr_expec * function since TEST_ASSERT_VAL() returns in case of failure. */ hists__collapse_resort(hists, NULL); - hists__output_resort(hists); + hists__output_resort(hists, NULL); if (verbose > 2) { pr_info("use callchain: %d, cumulate callchain: %d\n", diff --git a/tools/perf/tests/hists_filter.c b/tools/perf/tests/hists_filter.c index 74f257a..59e53db 100644 --- a/tools/perf/tests/hists_filter.c +++ b/tools/perf/tests/hists_filter.c @@ -138,7 +138,7 @@ int test__hists_filter(void) struct hists *hists = evsel__hists(evsel); hists__collapse_resort(hists, NULL); - hists__output_resort(hists); + hists__output_resort(hists, NULL); if (verbose > 2) { pr_info("Normal histogram\n"); diff --git a/tools/perf/tests/hists_output.c b/tools/perf/tests/hists_output.c index a748f2b..f554761 100644 --- a/tools/perf/tests/hists_output.c +++ b/tools/perf/tests/hists_output.c @@ -152,7 +152,7 @@ static int test1(struct perf_evsel *evsel, struct machine *machine) goto out; hists__collapse_resort(hists, NULL); - hists__output_resort(hists); + hists__output_resort(hists, NULL); if (verbose > 2) { pr_info("[fields = %s, sort = %s]\n", field_order, sort_order); @@ -252,7 +252,7 @@ static int test2(struct perf_evsel *evsel, struct machine *machine) goto out; hists__collapse_resort(hists, NULL); - hists__output_resort(hists); + hists__output_resort(hists, NULL); if (verbose > 2) { pr_info("[fields = %s, sort = %s]\n", field_order, sort_order); @@ -306,7 +306,7 @@ static int test3(struct perf_evsel *evsel, struct machine *machine) goto out; hists__collapse_resort(hists, NULL); - hists__output_resort(hists); + hists__output_resort(hists, NULL); if (verbose > 2) { pr_info("[fields = %s, sort = %s]\n", field_order, sort_order); @@ -384,7 +384,7 @@ static int test4(struct perf_evsel *evsel, struct machine *machine) goto out; hists__collapse_resort(hists, NULL); - hists__output_resort(hists); + hists__output_resort(hists, NULL); if (verbose > 2) { pr_info("[fields = %s, sort = %s]\n", field_order, sort_order); @@ -487,7 +487,7 @@ static int test5(struct perf_evsel *evsel, struct machine *machine) goto out; hists__collapse_resort(hists, NULL); - hists__output_resort(hists); + hists__output_resort(hists, NULL); if (verbose > 2) { pr_info("[fields = %s, sort = %s]\n", field_order, sort_order); diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 6e88b9e..1cc6ea4 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -6,6 +6,7 @@ #include "evlist.h" #include "evsel.h" #include "annotate.h" +#include "ui/progress.h" #include static bool hists__filter_entry_by_dso(struct hists *hists, @@ -987,6 +988,7 @@ static bool hists__collapse_insert_entry(struct hists *hists __maybe_unused, else p = &(*p)->rb_right; } + hists->nr_entries++; rb_link_node(&he->rb_node_in, parent, p); rb_insert_color(&he->rb_node_in, root); @@ -1024,7 +1026,10 @@ void hists__collapse_resort(struct hists *hists, struct ui_progress *prog) if (!sort__need_collapse) return; + hists->nr_entries = 0; + root = hists__get_rotate_entries_in(hists); + next = rb_first(root); while (next) { @@ -1119,7 +1124,7 @@ static void __hists__insert_output_entry(struct rb_root *entries, rb_insert_color(&he->rb_node, entries); } -void hists__output_resort(struct hists *hists) +void hists__output_resort(struct hists *hists, struct ui_progress *prog) { struct rb_root *root; struct rb_node *next; @@ -1148,6 +1153,9 @@ void hists__output_resort(struct hists *hists) if (!n->filtered) hists__calc_col_len(hists, n); + + if (prog) + ui_progress__update(prog, 1); } } diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index d0ef9a1..46bd503 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -121,7 +121,7 @@ int hist_entry__sort_snprintf(struct hist_entry *he, char *bf, size_t size, struct hists *hists); void hist_entry__free(struct hist_entry *); -void hists__output_resort(struct hists *hists); +void hists__output_resort(struct hists *hists, struct ui_progress *prog); void hists__collapse_resort(struct hists *hists, struct ui_progress *prog); void hists__decay_entries(struct hists *hists, bool zap_user, bool zap_kernel); -- cgit v0.10.2 From b11bc8e28f4829f693ef6c0178fe1811386ac828 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 22 Dec 2014 13:44:11 +0900 Subject: perf ui/tui: Print backtrace symbols when segfault occurs The output will look like below. (I added an error into ui__init() for the test). $ perf report perf: Segmentation fault -------- backtrace -------- perf[0x503781] /usr/lib/libc.so.6(+0x33b20)[0x7f1a14f04b20] perf(ui__init+0xd5)[0x503645] perf(setup_browser+0x97)[0x4ce4e7] perf(cmd_report+0xcea)[0x4392ba] perf[0x428493] perf(main+0x60a)[0x427c0a] /usr/lib/libc.so.6(__libc_start_main+0xf0)[0x7f1a14ef1040] perf[0x427d29] [0x0] Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: David Ahern Cc: Ingo Molnar Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1419223455-4362-4-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/ui/tui/setup.c b/tools/perf/ui/tui/setup.c index 2f61256..3c38f25 100644 --- a/tools/perf/ui/tui/setup.c +++ b/tools/perf/ui/tui/setup.c @@ -1,5 +1,8 @@ #include #include +#ifdef HAVE_BACKTRACE_SUPPORT +#include +#endif #include "../../util/cache.h" #include "../../util/debug.h" @@ -88,6 +91,25 @@ int ui__getch(int delay_secs) return SLkp_getkey(); } +#ifdef HAVE_BACKTRACE_SUPPORT +static void ui__signal_backtrace(int sig) +{ + void *stackdump[32]; + size_t size; + + ui__exit(false); + psignal(sig, "perf"); + + printf("-------- backtrace --------\n"); + size = backtrace(stackdump, ARRAY_SIZE(stackdump)); + backtrace_symbols_fd(stackdump, size, STDOUT_FILENO); + + exit(0); +} +#else +# define ui__signal_backtrace ui__signal +#endif + static void ui__signal(int sig) { ui__exit(false); @@ -122,8 +144,8 @@ int ui__init(void) ui_browser__init(); tui_progress__init(); - signal(SIGSEGV, ui__signal); - signal(SIGFPE, ui__signal); + signal(SIGSEGV, ui__signal_backtrace); + signal(SIGFPE, ui__signal_backtrace); signal(SIGINT, ui__signal); signal(SIGQUIT, ui__signal); signal(SIGTERM, ui__signal); -- cgit v0.10.2 From 82aa019e0098a1e0801df94345c0297448323126 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 22 Dec 2014 13:44:14 +0900 Subject: perf callchain: Append callchains only when requested The perf report --children can be called with callchain disabled so no need to append callchains. Actually the root of callchain tree is not initialized properly in this case. Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: David Ahern Cc: Ingo Molnar Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1419223455-4362-7-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 1cc6ea4..0ced178 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -304,7 +304,7 @@ static struct hist_entry *hist_entry__new(struct hist_entry *template, size_t callchain_size = 0; struct hist_entry *he; - if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain) + if (symbol_conf.use_callchain) callchain_size = sizeof(struct callchain_root); he = zalloc(sizeof(*he) + callchain_size); @@ -737,7 +737,7 @@ iter_add_single_cumulative_entry(struct hist_entry_iter *iter, iter->he = he; he_cache[iter->curr++] = he; - callchain_append(he->callchain, &callchain_cursor, sample->period); + hist_entry__append_callchain(he, sample); /* * We need to re-initialize the cursor since callchain_append() @@ -810,7 +810,8 @@ iter_add_next_cumulative_entry(struct hist_entry_iter *iter, iter->he = he; he_cache[iter->curr++] = he; - callchain_append(he->callchain, &cursor, sample->period); + if (symbol_conf.use_callchain) + callchain_append(he->callchain, &cursor, sample->period); return 0; } -- cgit v0.10.2 From 1ddf0b1b11aa8a90cef6706e935fc31c75c406ba Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Sun, 21 Dec 2014 08:57:46 -0800 Subject: x86, vdso: Use asm volatile in __getcpu In Linux 3.18 and below, GCC hoists the lsl instructions in the pvclock code all the way to the beginning of __vdso_clock_gettime, slowing the non-paravirt case significantly. For unknown reasons, presumably related to the removal of a branch, the performance issue is gone as of e76b027e6408 x86,vdso: Use LSL unconditionally for vgetcpu but I don't trust GCC enough to expect the problem to stay fixed. There should be no correctness issue, because the __getcpu calls in __vdso_vlock_gettime were never necessary in the first place. Note to stable maintainers: In 3.18 and below, depending on configuration, gcc 4.9.2 generates code like this: 9c3: 44 0f 03 e8 lsl %ax,%r13d 9c7: 45 89 eb mov %r13d,%r11d 9ca: 0f 03 d8 lsl %ax,%ebx This patch won't apply as is to any released kernel, but I'll send a trivial backported version if needed. Fixes: 51c19b4f5927 x86: vdso: pvclock gettime support Cc: stable@vger.kernel.org # 3.8+ Cc: Marcelo Tosatti Acked-by: Paolo Bonzini Signed-off-by: Andy Lutomirski diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h index e7e9682..f556c48 100644 --- a/arch/x86/include/asm/vgtod.h +++ b/arch/x86/include/asm/vgtod.h @@ -80,9 +80,11 @@ static inline unsigned int __getcpu(void) /* * Load per CPU data from GDT. LSL is faster than RDTSCP and - * works on all CPUs. + * works on all CPUs. This is volatile so that it orders + * correctly wrt barrier() and to keep gcc from cleverly + * hoisting it out of the calling function. */ - asm("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG)); + asm volatile ("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG)); return p; } -- cgit v0.10.2 From 011fa99404bea3f5d897c4983f6bd51170e3b18f Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Fri, 26 Dec 2014 23:58:21 -0500 Subject: ext4: prevent online resize with backup superblock Prevent BUG or corrupted file systems after the following: mkfs.ext4 /dev/vdc 100M mount -t ext4 -o sb=40961 /dev/vdc /vdc resize2fs /dev/vdc We previously prevented online resizing using the old resize ioctl. Move the code to ext4_resize_begin(), so the check applies for all of the resize ioctl's. Reported-by: Maxim Malkov Signed-off-by: Theodore Ts'o diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index bf76f40..8a8ec62 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -24,6 +24,18 @@ int ext4_resize_begin(struct super_block *sb) return -EPERM; /* + * If we are not using the primary superblock/GDT copy don't resize, + * because the user tools have no way of handling this. Probably a + * bad time to do it anyways. + */ + if (EXT4_SB(sb)->s_sbh->b_blocknr != + le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block)) { + ext4_warning(sb, "won't resize using backup superblock at %llu", + (unsigned long long)EXT4_SB(sb)->s_sbh->b_blocknr); + return -EPERM; + } + + /* * We are not allowed to do online-resizing on a filesystem mounted * with error, because it can destroy the filesystem easily. */ @@ -758,18 +770,6 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, "EXT4-fs: ext4_add_new_gdb: adding group block %lu\n", gdb_num); - /* - * If we are not using the primary superblock/GDT copy don't resize, - * because the user tools have no way of handling this. Probably a - * bad time to do it anyways. - */ - if (EXT4_SB(sb)->s_sbh->b_blocknr != - le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block)) { - ext4_warning(sb, "won't resize using backup superblock at %llu", - (unsigned long long)EXT4_SB(sb)->s_sbh->b_blocknr); - return -EPERM; - } - gdb_bh = sb_bread(sb, gdblock); if (!gdb_bh) return -EIO; -- cgit v0.10.2 From 68d0cb49f8db5ddce7cb6cbd7781e232c42c472a Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Sun, 28 Dec 2014 11:44:37 -0500 Subject: amdkfd: actually allocate longs for the pasid bitmask Commit "amdkfd: use sizeof(long) granularity for the pasid bitmask" calculated the number of longs it will need, but ended up allocating that number of bytes rather than longs. Fix that silly error and allocate the amount of data really required. Signed-off-by: Sasha Levin Signed-off-by: Oded Gabbay diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c b/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c index 71699ad..4c25ef5 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c @@ -32,7 +32,7 @@ int kfd_pasid_init(void) { pasid_limit = max_num_of_processes; - pasid_bitmap = kzalloc(BITS_TO_LONGS(pasid_limit), GFP_KERNEL); + pasid_bitmap = kcalloc(BITS_TO_LONGS(pasid_limit), sizeof(long), GFP_KERNEL); if (!pasid_bitmap) return -ENOMEM; -- cgit v0.10.2 From 1e5d0fdb5b30827141843d69eaddbb4c607fc679 Mon Sep 17 00:00:00 2001 From: Pranith Kumar Date: Sun, 21 Dec 2014 08:59:32 -0500 Subject: powerpc: Wire up sys_execveat() syscall Wire up sys_execveat(). This passes the selftests for the system call. Check success of execveat(3, '../execveat', 0)... [OK] Check success of execveat(5, 'execveat', 0)... [OK] Check success of execveat(6, 'execveat', 0)... [OK] Check success of execveat(-100, '/home/pranith/linux/...ftests/exec/execveat', 0)... [OK] Check success of execveat(99, '/home/pranith/linux/...ftests/exec/execveat', 0)... [OK] Check success of execveat(8, '', 4096)... [OK] Check success of execveat(17, '', 4096)... [OK] Check success of execveat(9, '', 4096)... [OK] Check success of execveat(14, '', 4096)... [OK] Check success of execveat(14, '', 4096)... [OK] Check success of execveat(15, '', 4096)... [OK] Check failure of execveat(8, '', 0) with ENOENT... [OK] Check failure of execveat(8, '(null)', 4096) with EFAULT... [OK] Check success of execveat(5, 'execveat.symlink', 0)... [OK] Check success of execveat(6, 'execveat.symlink', 0)... [OK] Check success of execveat(-100, '/home/pranith/linux/...xec/execveat.symlink', 0)... [OK] Check success of execveat(10, '', 4096)... [OK] Check success of execveat(10, '', 4352)... [OK] Check failure of execveat(5, 'execveat.symlink', 256) with ELOOP... [OK] Check failure of execveat(6, 'execveat.symlink', 256) with ELOOP... [OK] Check failure of execveat(-100, '/home/pranith/linux/tools/testing/selftests/exec/execveat.symlink', 256) with ELOOP... [OK] Check success of execveat(3, '../script', 0)... [OK] Check success of execveat(5, 'script', 0)... [OK] Check success of execveat(6, 'script', 0)... [OK] Check success of execveat(-100, '/home/pranith/linux/...elftests/exec/script', 0)... [OK] Check success of execveat(13, '', 4096)... [OK] Check success of execveat(13, '', 4352)... [OK] Check failure of execveat(18, '', 4096) with ENOENT... [OK] Check failure of execveat(7, 'script', 0) with ENOENT... [OK] Check success of execveat(16, '', 4096)... [OK] Check success of execveat(16, '', 4096)... [OK] Check success of execveat(4, '../script', 0)... [OK] Check success of execveat(4, 'script', 0)... [OK] Check success of execveat(4, '../script', 0)... [OK] Check failure of execveat(4, 'script', 0) with ENOENT... [OK] Check failure of execveat(5, 'execveat', 65535) with EINVAL... [OK] Check failure of execveat(5, 'no-such-file', 0) with ENOENT... [OK] Check failure of execveat(6, 'no-such-file', 0) with ENOENT... [OK] Check failure of execveat(-100, 'no-such-file', 0) with ENOENT... [OK] Check failure of execveat(5, '', 4096) with EACCES... [OK] Check failure of execveat(5, 'Makefile', 0) with EACCES... [OK] Check failure of execveat(11, '', 4096) with EACCES... [OK] Check failure of execveat(12, '', 4096) with EACCES... [OK] Check failure of execveat(99, '', 4096) with EBADF... [OK] Check failure of execveat(99, 'execveat', 0) with EBADF... [OK] Check failure of execveat(8, 'execveat', 0) with ENOTDIR... [OK] Invoke copy of 'execveat' via filename of length 4093: Check success of execveat(19, '', 4096)... [OK] Check success of execveat(5, 'xxxxxxxxxxxxxxxxxxxx...yyyyyyyyyyyyyyyyyyyy', 0)... [OK] Invoke copy of 'script' via filename of length 4093: Check success of execveat(20, '', 4096)... [OK] /bin/sh: 0: Can't open /dev/fd/5/xxxxxxx(... a long line of x's and y's, 0)... [OK] Check success of execveat(5, 'xxxxxxxxxxxxxxxxxxxx...yyyyyyyyyyyyyyyyyyyy', 0)... [OK] Tested on a 32-bit powerpc system. Signed-off-by: Pranith Kumar Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h index ce9577d..91062ee 100644 --- a/arch/powerpc/include/asm/systbl.h +++ b/arch/powerpc/include/asm/systbl.h @@ -366,3 +366,4 @@ SYSCALL_SPU(seccomp) SYSCALL_SPU(getrandom) SYSCALL_SPU(memfd_create) SYSCALL_SPU(bpf) +COMPAT_SYS(execveat) diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h index e0da021..36b79c3 100644 --- a/arch/powerpc/include/asm/unistd.h +++ b/arch/powerpc/include/asm/unistd.h @@ -12,7 +12,7 @@ #include -#define __NR_syscalls 362 +#define __NR_syscalls 363 #define __NR__exit __NR_exit #define NR_syscalls __NR_syscalls diff --git a/arch/powerpc/include/uapi/asm/unistd.h b/arch/powerpc/include/uapi/asm/unistd.h index f55351f..ef5b5b1 100644 --- a/arch/powerpc/include/uapi/asm/unistd.h +++ b/arch/powerpc/include/uapi/asm/unistd.h @@ -384,5 +384,6 @@ #define __NR_getrandom 359 #define __NR_memfd_create 360 #define __NR_bpf 361 +#define __NR_execveat 362 #endif /* _UAPI_ASM_POWERPC_UNISTD_H_ */ -- cgit v0.10.2 From c1caae3de46a072d0855729aed6e793e536a4a55 Mon Sep 17 00:00:00 2001 From: Hari Bathini Date: Thu, 18 Dec 2014 23:36:55 +0530 Subject: powerpc/kdump: Ignore failure in enabling big endian exception during crash In LE kernel, we currently have a hack for kexec that resets the exception endian before starting a new kernel as the kernel that is loaded could be a big endian or a little endian kernel. In kdump case, resetting exception endian fails when one or more cpus is disabled. But we can ignore the failure and still go ahead, as in most cases crashkernel will be of same endianess as primary kernel and reseting endianess is not even needed in those cases. This patch adds a new inline function to say if this is kdump path. This function is used at places where such a check is needed. Signed-off-by: Hari Bathini [mpe: Rename to kdump_in_progress(), use bool, and edit comment] Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h index 19c36cb..a46f5f4 100644 --- a/arch/powerpc/include/asm/kexec.h +++ b/arch/powerpc/include/asm/kexec.h @@ -86,6 +86,11 @@ extern int overlaps_crashkernel(unsigned long start, unsigned long size); extern void reserve_crashkernel(void); extern void machine_kexec_mask_interrupts(void); +static inline bool kdump_in_progress(void) +{ + return crashing_cpu >= 0; +} + #else /* !CONFIG_KEXEC */ static inline void crash_kexec_secondary(struct pt_regs *regs) { } @@ -106,6 +111,11 @@ static inline int crash_shutdown_unregister(crash_shutdown_t handler) return 0; } +static inline bool kdump_in_progress(void) +{ + return false; +} + #endif /* CONFIG_KEXEC */ #endif /* ! __ASSEMBLY__ */ #endif /* __KERNEL__ */ diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c index 879b3aa..f96d1ec 100644 --- a/arch/powerpc/kernel/machine_kexec_64.c +++ b/arch/powerpc/kernel/machine_kexec_64.c @@ -330,7 +330,7 @@ void default_machine_kexec(struct kimage *image) * using debugger IPI. */ - if (crashing_cpu == -1) + if (!kdump_in_progress()) kexec_prepare_cpus(); pr_debug("kexec: Starting switchover sequence.\n"); diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 469751d..b5682fd 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -43,6 +43,7 @@ #include #include #include +#include #include #include "pseries.h" @@ -267,8 +268,13 @@ static void pSeries_lpar_hptab_clear(void) * out to the user, but at least this will stop us from * continuing on further and creating an even more * difficult to debug situation. + * + * There is a known problem when kdump'ing, if cpus are offline + * the above call will fail. Rather than panicking again, keep + * going and hope the kdump kernel is also little endian, which + * it usually is. */ - if (rc) + if (rc && !kdump_in_progress()) panic("Could not enable big endian exceptions"); } #endif -- cgit v0.10.2 From 1be6f10f6f9caade3a053938cb80a2eed237e262 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 29 Dec 2014 15:47:05 +1100 Subject: Revert "powerpc: Secondary CPUs must set cpu_callin_map after setting active and online" This reverts commit 7c5c92ed56d932b2c19c3f8aea86369509407d33. Although this did fix the bug it was aimed at, it also broke secondary startup on platforms that use give/take_timebase(). Unfortunately we didn't detect that while it was in next. Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 8ec017c..8b2d2dc 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -700,6 +700,7 @@ void start_secondary(void *unused) smp_store_cpu_info(cpu); set_dec(tb_ticks_per_jiffy); preempt_disable(); + cpu_callin_map[cpu] = 1; if (smp_ops->setup_cpu) smp_ops->setup_cpu(cpu); @@ -738,14 +739,6 @@ void start_secondary(void *unused) notify_cpu_starting(cpu); set_cpu_online(cpu, true); - /* - * CPU must be marked active and online before we signal back to the - * master, because the scheduler needs to see the cpu_online and - * cpu_active bits set. - */ - smp_wmb(); - cpu_callin_map[cpu] = 1; - local_irq_enable(); cpu_startup_entry(CPUHP_ONLINE); -- cgit v0.10.2 From 2bacedada682d5485424f5227f27a3d5d6eb551c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Giedrius=20Statkevi=C4=8Dius?= Date: Sat, 27 Dec 2014 00:28:30 +0200 Subject: HID: Add a new id 0x501a for Genius MousePen i608X MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit New Genius MousePen i608X devices have a new id 0x501a instead of the old 0x5011 so add a new #define with "_2" appended and change required places. The remaining two checkpatch warnings about line length being over 80 characters are present in the original files too and this patch was made in the same style (no line break). Just adding a new id and changing the required places should make the new device work without any issues according to the bug report in the following url. This patch was made according to and fixes: https://bugzilla.kernel.org/show_bug.cgi?id=67111 Signed-off-by: Giedrius Statkevičius Signed-off-by: Jiri Kosina diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index c3d0ac1..8b63879 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -1805,6 +1805,7 @@ static const struct hid_device_id hid_have_special_driver[] = { { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_ERGO_525V) }, { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_EASYPEN_I405X) }, { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_MOUSEPEN_I608X) }, + { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_MOUSEPEN_I608X_2) }, { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_EASYPEN_M610X) }, { HID_USB_DEVICE(USB_VENDOR_ID_LABTEC, USB_DEVICE_ID_LABTEC_WIRELESS_KEYBOARD) }, { HID_USB_DEVICE(USB_VENDOR_ID_LCPOWER, USB_DEVICE_ID_LCPOWER_LC1000 ) }, diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 7460f34..9243359 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -526,6 +526,7 @@ #define USB_DEVICE_ID_KYE_GPEN_560 0x5003 #define USB_DEVICE_ID_KYE_EASYPEN_I405X 0x5010 #define USB_DEVICE_ID_KYE_MOUSEPEN_I608X 0x5011 +#define USB_DEVICE_ID_KYE_MOUSEPEN_I608X_2 0x501a #define USB_DEVICE_ID_KYE_EASYPEN_M610X 0x5013 #define USB_VENDOR_ID_LABTEC 0x1020 diff --git a/drivers/hid/hid-kye.c b/drivers/hid/hid-kye.c index b92bf01..158fcf5 100644 --- a/drivers/hid/hid-kye.c +++ b/drivers/hid/hid-kye.c @@ -323,6 +323,7 @@ static __u8 *kye_report_fixup(struct hid_device *hdev, __u8 *rdesc, } break; case USB_DEVICE_ID_KYE_MOUSEPEN_I608X: + case USB_DEVICE_ID_KYE_MOUSEPEN_I608X_2: if (*rsize == MOUSEPEN_I608X_RDESC_ORIG_SIZE) { rdesc = mousepen_i608x_rdesc_fixed; *rsize = sizeof(mousepen_i608x_rdesc_fixed); @@ -415,6 +416,7 @@ static int kye_probe(struct hid_device *hdev, const struct hid_device_id *id) switch (id->product) { case USB_DEVICE_ID_KYE_EASYPEN_I405X: case USB_DEVICE_ID_KYE_MOUSEPEN_I608X: + case USB_DEVICE_ID_KYE_MOUSEPEN_I608X_2: case USB_DEVICE_ID_KYE_EASYPEN_M610X: ret = kye_tablet_enable(hdev); if (ret) { @@ -446,6 +448,8 @@ static const struct hid_device_id kye_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_MOUSEPEN_I608X) }, { HID_USB_DEVICE(USB_VENDOR_ID_KYE, + USB_DEVICE_ID_KYE_MOUSEPEN_I608X_2) }, + { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_EASYPEN_M610X) }, { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_GENIUS_GILA_GAMING_MOUSE) }, diff --git a/drivers/hid/usbhid/hid-quirks.c b/drivers/hid/usbhid/hid-quirks.c index dc89be9..b27b3d3 100644 --- a/drivers/hid/usbhid/hid-quirks.c +++ b/drivers/hid/usbhid/hid-quirks.c @@ -124,6 +124,7 @@ static const struct hid_blacklist { { USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_WIRELESS, HID_QUIRK_MULTI_INPUT }, { USB_VENDOR_ID_SIGMA_MICRO, USB_DEVICE_ID_SIGMA_MICRO_KEYBOARD, HID_QUIRK_NO_INIT_REPORTS }, { USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_MOUSEPEN_I608X, HID_QUIRK_MULTI_INPUT }, + { USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_MOUSEPEN_I608X_2, HID_QUIRK_MULTI_INPUT }, { USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_EASYPEN_M610X, HID_QUIRK_MULTI_INPUT }, { USB_VENDOR_ID_NTRIG, USB_DEVICE_ID_NTRIG_DUOSENSE, HID_QUIRK_NO_INIT_REPORTS }, { USB_VENDOR_ID_SEMICO, USB_DEVICE_ID_SEMICO_USB_KEYKOARD, HID_QUIRK_NO_INIT_REPORTS }, -- cgit v0.10.2 From 38c2adfb00db045a876dd667040abc01b788ad61 Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Mon, 22 Dec 2014 11:19:23 +0200 Subject: drm/radeon: Init amdkfd only if it was compiled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch changes the radeon_kfd_init(), which is used to initialize the interface between radeon and amdkfd, so the interface will be initialized only if amdkfd was build, either as module or inside the kernel image. In the modules case, the symbol_request() will be used (same as old code). In the in-image compilation case, a direct call to kgd2kfd_init() will be done. For other cases, radeon_kfd_init() will just return false. This patch is necessary because in case of the following specific configuration: kernel 32-bit, no modules support, random kernel base and no hibernation, the symbol_request() doesn't work as expected - it doesn't return NULL if the symbol doesn't exists - which makes the kernel panic. Reviewed-by: Christian König Signed-off-by: Oded Gabbay diff --git a/drivers/gpu/drm/radeon/radeon_kfd.c b/drivers/gpu/drm/radeon/radeon_kfd.c index 242fd8b..d3e78b4 100644 --- a/drivers/gpu/drm/radeon/radeon_kfd.c +++ b/drivers/gpu/drm/radeon/radeon_kfd.c @@ -101,6 +101,7 @@ static const struct kgd2kfd_calls *kgd2kfd; bool radeon_kfd_init(void) { +#if defined(CONFIG_HSA_AMD_MODULE) bool (*kgd2kfd_init_p)(unsigned, const struct kfd2kgd_calls*, const struct kgd2kfd_calls**); @@ -117,6 +118,17 @@ bool radeon_kfd_init(void) } return true; +#elif defined(CONFIG_HSA_AMD) + if (!kgd2kfd_init(KFD_INTERFACE_VERSION, &kfd2kgd, &kgd2kfd)) { + kgd2kfd = NULL; + + return false; + } + + return true; +#else + return false; +#endif } void radeon_kfd_fini(void) -- cgit v0.10.2 From 68bda47c57c9d671820672badc1cb62211ec4700 Mon Sep 17 00:00:00 2001 From: Doug Anderson Date: Wed, 19 Nov 2014 14:51:32 -0800 Subject: pinctrl: rockchip: Handle wakeup pins The rockchip pinctrl driver was using irq_gc_set_wake() as its implementation of irq_set_wake() but was totally ignoring everything that irq_gc_set_wake() did (which is to upkeep gc->wake_active). Let's fix that by setting gc->wake_active as GPIO_INTEN at suspend time and restoring GPIO_INTEN at resume time. NOTE a few quirks when thinking about this patch: - Rockchip pinctrl hardware supports both "disable/enable" and "mask/unmask". Right now we only use "disable/enable" and present those to Linux as "mask/unmask". This should be OK because enable/disable is optional and Linux will implement it in terms of mask/unmask. At the moment we always tell hardware all interrupts are unmasked (the boot default). - At suspend time Linux tries to call "disable" on all interrupts and also enables wakeup on all wakeup interrupts. One would think that since "disable" is implemented as "mask" when "disable" isn't provided and that since we were ignoring gc->wake_active that nothing would have woken us up. That's not the case since Linux "optimizes" things and just leaves interrutps unmasked, assuming it could mask them later when they go off. That meant that at suspend time all interrupts were actually being left enabled. With this patch random non-wakeup interrupts no longer wake the system up. Wakeup interrupts still wake the system up. Signed-off-by: Doug Anderson Reviewed-by: Dmitry Torokhov Reviewed-by: Heiko Stuebner Signed-off-by: Linus Walleij diff --git a/drivers/pinctrl/pinctrl-rockchip.c b/drivers/pinctrl/pinctrl-rockchip.c index ba74f0a..e91e845 100644 --- a/drivers/pinctrl/pinctrl-rockchip.c +++ b/drivers/pinctrl/pinctrl-rockchip.c @@ -89,6 +89,7 @@ struct rockchip_iomux { * @reg_pull: optional separate register for additional pull settings * @clk: clock of the gpio bank * @irq: interrupt of the gpio bank + * @saved_enables: Saved content of GPIO_INTEN at suspend time. * @pin_base: first pin number * @nr_pins: number of pins in this bank * @name: name of the bank @@ -107,6 +108,7 @@ struct rockchip_pin_bank { struct regmap *regmap_pull; struct clk *clk; int irq; + u32 saved_enables; u32 pin_base; u8 nr_pins; char *name; @@ -1543,6 +1545,23 @@ static int rockchip_irq_set_type(struct irq_data *d, unsigned int type) return 0; } +static void rockchip_irq_suspend(struct irq_data *d) +{ + struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); + struct rockchip_pin_bank *bank = gc->private; + + bank->saved_enables = irq_reg_readl(gc, GPIO_INTEN); + irq_reg_writel(gc, gc->wake_active, GPIO_INTEN); +} + +static void rockchip_irq_resume(struct irq_data *d) +{ + struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); + struct rockchip_pin_bank *bank = gc->private; + + irq_reg_writel(gc, bank->saved_enables, GPIO_INTEN); +} + static int rockchip_interrupts_register(struct platform_device *pdev, struct rockchip_pinctrl *info) { @@ -1587,6 +1606,8 @@ static int rockchip_interrupts_register(struct platform_device *pdev, gc->chip_types[0].chip.irq_mask = irq_gc_mask_clr_bit; gc->chip_types[0].chip.irq_unmask = irq_gc_mask_set_bit; gc->chip_types[0].chip.irq_set_wake = irq_gc_set_wake; + gc->chip_types[0].chip.irq_suspend = rockchip_irq_suspend; + gc->chip_types[0].chip.irq_resume = rockchip_irq_resume; gc->chip_types[0].chip.irq_set_type = rockchip_irq_set_type; gc->wake_enabled = IRQ_MSK(bank->nr_pins); -- cgit v0.10.2 From f2dd028c2632d107c26b1daed543d9efd4f0decd Mon Sep 17 00:00:00 2001 From: Doug Anderson Date: Wed, 19 Nov 2014 14:51:33 -0800 Subject: pinctrl: rockchip: Fix enable/disable/mask/unmask The Rockchip pinctrl driver was only implementing the "mask" and "unmask" operations though the hardware actually has two distinct things: enable/disable and mask/unmask. It was implementing the "mask" operations as a hardware enable/disable and always leaving all interrupts unmasked. I believe that the old system had some downsides, specifically: - (Untested) if an interrupt went off while interrupts were "masked" it would be lost. Now it will be kept track of. - If someone wanted to change an interrupt back into a GPIO (is such a thing sensible?) by calling irq_disable() it wouldn't actually take effect. That's because Linux does some extra optimizations when there's no true "disable" function: it does a lazy mask. Let's actually implement enable/disable/mask/unmask properly. Signed-off-by: Doug Anderson Reviewed-by: Dmitry Torokhov Reviewed-by: Heiko Stuebner Signed-off-by: Linus Walleij diff --git a/drivers/pinctrl/pinctrl-rockchip.c b/drivers/pinctrl/pinctrl-rockchip.c index e91e845..3c22dbe 100644 --- a/drivers/pinctrl/pinctrl-rockchip.c +++ b/drivers/pinctrl/pinctrl-rockchip.c @@ -1562,6 +1562,34 @@ static void rockchip_irq_resume(struct irq_data *d) irq_reg_writel(gc, bank->saved_enables, GPIO_INTEN); } +static void rockchip_irq_disable(struct irq_data *d) +{ + struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); + u32 val; + + irq_gc_lock(gc); + + val = irq_reg_readl(gc, GPIO_INTEN); + val &= ~d->mask; + irq_reg_writel(gc, val, GPIO_INTEN); + + irq_gc_unlock(gc); +} + +static void rockchip_irq_enable(struct irq_data *d) +{ + struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); + u32 val; + + irq_gc_lock(gc); + + val = irq_reg_readl(gc, GPIO_INTEN); + val |= d->mask; + irq_reg_writel(gc, val, GPIO_INTEN); + + irq_gc_unlock(gc); +} + static int rockchip_interrupts_register(struct platform_device *pdev, struct rockchip_pinctrl *info) { @@ -1600,11 +1628,13 @@ static int rockchip_interrupts_register(struct platform_device *pdev, gc = irq_get_domain_generic_chip(bank->domain, 0); gc->reg_base = bank->reg_base; gc->private = bank; - gc->chip_types[0].regs.mask = GPIO_INTEN; + gc->chip_types[0].regs.mask = GPIO_INTMASK; gc->chip_types[0].regs.ack = GPIO_PORTS_EOI; gc->chip_types[0].chip.irq_ack = irq_gc_ack_set_bit; - gc->chip_types[0].chip.irq_mask = irq_gc_mask_clr_bit; - gc->chip_types[0].chip.irq_unmask = irq_gc_mask_set_bit; + gc->chip_types[0].chip.irq_mask = irq_gc_mask_set_bit; + gc->chip_types[0].chip.irq_unmask = irq_gc_mask_clr_bit; + gc->chip_types[0].chip.irq_enable = rockchip_irq_enable; + gc->chip_types[0].chip.irq_disable = rockchip_irq_disable; gc->chip_types[0].chip.irq_set_wake = irq_gc_set_wake; gc->chip_types[0].chip.irq_suspend = rockchip_irq_suspend; gc->chip_types[0].chip.irq_resume = rockchip_irq_resume; -- cgit v0.10.2 From 1421c935df159c8b893e82dd81b329b8977c4e86 Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Tue, 30 Dec 2014 13:31:45 -0600 Subject: ipmi: Fix compile warning with tv_usec It's not a long int on all arches. Signed-off-by: Corey Minyard diff --git a/drivers/char/ipmi/ipmi_ssif.c b/drivers/char/ipmi/ipmi_ssif.c index fd5a5e8..982b963 100644 --- a/drivers/char/ipmi/ipmi_ssif.c +++ b/drivers/char/ipmi/ipmi_ssif.c @@ -969,7 +969,8 @@ static void sender(void *send_info, do_gettimeofday(&t); pr_info("**Enqueue %02x %02x: %ld.%6.6ld\n", - msg->data[0], msg->data[1], t.tv_sec, t.tv_usec); + msg->data[0], msg->data[1], + (long) t.tv_sec, (long) t.tv_usec); } } -- cgit v0.10.2 From a3566b5290c146f51f8129893b957faea1700b84 Mon Sep 17 00:00:00 2001 From: "John W. Linville" Date: Sat, 15 Nov 2014 01:19:53 +0000 Subject: e100: fix typo in MDI/MDI-X eeprom check in e100_phy_init Although it doesn't explicitly say so, commit 60ffa478759f39a2 ("e100: Fix MDIO/MDIO-X") appears to be intended to revert the earlier commit 648951451e6d2d53 ("e100: fixed e100 MDI/MDI-X issues"). However, careful examination reveals that the attempted revert actually _inverted_ the test for eeprom_mdix_enabled. That is bound to program a few PHYs incorrectly... https://bugzilla.redhat.com/show_bug.cgi?id=1156417 Signed-off-by: "John W. Linville" Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher diff --git a/drivers/net/ethernet/intel/e100.c b/drivers/net/ethernet/intel/e100.c index 781065e..e9c3a87 100644 --- a/drivers/net/ethernet/intel/e100.c +++ b/drivers/net/ethernet/intel/e100.c @@ -1543,7 +1543,7 @@ static int e100_phy_init(struct nic *nic) mdio_write(netdev, nic->mii.phy_id, MII_BMCR, bmcr); } else if ((nic->mac >= mac_82550_D102) || ((nic->flags & ich) && (mdio_read(netdev, nic->mii.phy_id, MII_TPISTATUS) & 0x8000) && - !(nic->eeprom[eeprom_cnfg_mdix] & eeprom_mdix_enabled))) { + (nic->eeprom[eeprom_cnfg_mdix] & eeprom_mdix_enabled))) { /* enable/disable MDI/MDI-X auto-switching. */ mdio_write(netdev, nic->mii.phy_id, MII_NCONFIG, nic->mii.force_media ? 0 : NCONFIG_AUTO_SWITCH); -- cgit v0.10.2 From 2184aa3d0f6f952ca55b9daeaeb9d1e3d6b74a83 Mon Sep 17 00:00:00 2001 From: Todd Fujinaka Date: Thu, 27 Nov 2014 01:00:02 +0000 Subject: igb: Remove unneeded FIXME Remove a FIXME comment that was missed in a commit on 1/2007. Signed-off-by: Todd Fujinaka Reported-by: nick Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher diff --git a/drivers/net/ethernet/intel/igb/e1000_82575.c b/drivers/net/ethernet/intel/igb/e1000_82575.c index 051ea94..0f69ef8 100644 --- a/drivers/net/ethernet/intel/igb/e1000_82575.c +++ b/drivers/net/ethernet/intel/igb/e1000_82575.c @@ -1125,7 +1125,7 @@ static s32 igb_acquire_swfw_sync_82575(struct e1000_hw *hw, u16 mask) u32 swmask = mask; u32 fwmask = mask << 16; s32 ret_val = 0; - s32 i = 0, timeout = 200; /* FIXME: find real value to use here */ + s32 i = 0, timeout = 200; while (i < timeout) { if (igb_get_hw_semaphore(hw)) { -- cgit v0.10.2 From e3fe44c75913076b06ca0d0b79c21ce326ce3ef8 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Mon, 8 Dec 2014 04:28:39 +0000 Subject: i40e: Fix possible memory leak in i40e_dbg_dump_desc I didn't notice that return in the code, fix it by adding a goto out instead to free the memory. Fixes: > New smatch warnings: > drivers/net/ethernet/intel/i40e/i40e_debugfs.c:832 i40e_dbg_dump_desc() warn: possible memory leak of 'ring' Reported-by: Dan Carpenter Signed-off-by: Joe Perches Tested-by: Jim Young Signed-off-by: Jeff Kirsher diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c index 433a558..cb0de45 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c +++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c @@ -829,7 +829,7 @@ static void i40e_dbg_dump_desc(int cnt, int vsi_seid, int ring_id, int desc_n, if (desc_n >= ring->count || desc_n < 0) { dev_info(&pf->pdev->dev, "descriptor %d not found\n", desc_n); - return; + goto out; } if (!is_rx_ring) { txd = I40E_TX_DESC(ring, desc_n); @@ -855,6 +855,8 @@ static void i40e_dbg_dump_desc(int cnt, int vsi_seid, int ring_id, int desc_n, } else { dev_info(&pf->pdev->dev, "dump desc rx/tx []\n"); } + +out: kfree(ring); } -- cgit v0.10.2 From 8a0a1f840f6cc09c20963b1938cb3c976378783d Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Mon, 29 Dec 2014 18:04:36 +0100 Subject: net: Xilinx: fix error return code Return a negative error code on failure. A simplified version of the semantic match that finds this problem is as follows: (http://coccinelle.lip6.fr/) // @@ identifier ret; expression e1,e2; @@ ( if (\(ret < 0\|ret != 0\)) { ... return ret; } | ret = 0 ) ... when != ret = e1 when != &ret *if(...) { ... when != ret = e2 when forall return ret; } // Signed-off-by: Julia Lawall Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c index 9c2d91e..dbcbf0c 100644 --- a/drivers/net/ethernet/xilinx/ll_temac_main.c +++ b/drivers/net/ethernet/xilinx/ll_temac_main.c @@ -1043,6 +1043,7 @@ static int temac_of_probe(struct platform_device *op) lp->regs = of_iomap(op->dev.of_node, 0); if (!lp->regs) { dev_err(&op->dev, "could not map temac regs.\n"); + rc = -ENOMEM; goto nodev; } @@ -1062,6 +1063,7 @@ static int temac_of_probe(struct platform_device *op) np = of_parse_phandle(op->dev.of_node, "llink-connected", 0); if (!np) { dev_err(&op->dev, "could not find DMA node\n"); + rc = -ENODEV; goto err_iounmap; } diff --git a/drivers/net/ethernet/xilinx/xilinx_emaclite.c b/drivers/net/ethernet/xilinx/xilinx_emaclite.c index 2485879..9d4ce38 100644 --- a/drivers/net/ethernet/xilinx/xilinx_emaclite.c +++ b/drivers/net/ethernet/xilinx/xilinx_emaclite.c @@ -1109,6 +1109,7 @@ static int xemaclite_of_probe(struct platform_device *ofdev) res = platform_get_resource(ofdev, IORESOURCE_IRQ, 0); if (!res) { dev_err(dev, "no IRQ found\n"); + rc = -ENXIO; goto error; } -- cgit v0.10.2 From f12e77caf6c21588d6b24b5f1d8a6036f9ff9378 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Mon, 29 Dec 2014 18:04:37 +0100 Subject: myri10ge: fix error return code Return a negative error code on failure. A simplified version of the semantic match that finds this problem is as follows: (http://coccinelle.lip6.fr/) // @@ identifier ret; expression e1,e2; @@ ( if (\(ret < 0\|ret != 0\)) { ... return ret; } | ret = 0 ) ... when != ret = e1 when != &ret *if(...) { ... when != ret = e2 when forall return ret; } // The patch also modifies the test of mgp->cmd to satisfy checkpatch. Signed-off-by: Julia Lawall Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c index af09905..71af98b 100644 --- a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c +++ b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c @@ -4033,8 +4033,10 @@ static int myri10ge_probe(struct pci_dev *pdev, const struct pci_device_id *ent) (void)pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); mgp->cmd = dma_alloc_coherent(&pdev->dev, sizeof(*mgp->cmd), &mgp->cmd_bus, GFP_KERNEL); - if (mgp->cmd == NULL) + if (!mgp->cmd) { + status = -ENOMEM; goto abort_with_enabled; + } mgp->board_span = pci_resource_len(pdev, 0); mgp->iomem_base = pci_resource_start(pdev, 0); -- cgit v0.10.2 From 3d2232f54dc6cf1512a707dbf32c94f6f6a1be87 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Mon, 29 Dec 2014 18:04:40 +0100 Subject: net: sun4i-emac: fix error return code Return a negative error code on failure. A simplified version of the semantic match that finds this problem is as follows: (http://coccinelle.lip6.fr/) // @@ identifier ret; expression e1,e2; @@ ( if (\(ret < 0\|ret != 0\)) { ... return ret; } | ret = 0 ) ... when != ret = e1 when != &ret *if(...) { ... when != ret = e2 when forall return ret; } // Signed-off-by: Julia Lawall Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/allwinner/sun4i-emac.c b/drivers/net/ethernet/allwinner/sun4i-emac.c index 1fcd556..f3470d9 100644 --- a/drivers/net/ethernet/allwinner/sun4i-emac.c +++ b/drivers/net/ethernet/allwinner/sun4i-emac.c @@ -850,8 +850,10 @@ static int emac_probe(struct platform_device *pdev) } db->clk = devm_clk_get(&pdev->dev, NULL); - if (IS_ERR(db->clk)) + if (IS_ERR(db->clk)) { + ret = PTR_ERR(db->clk); goto out; + } clk_prepare_enable(db->clk); -- cgit v0.10.2 From 0f113b81172705f38a1cb94b1644a339e6bdf884 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Mon, 29 Dec 2014 18:04:42 +0100 Subject: net: axienet: fix error return code Return a negative error code on failure. A simplified version of the semantic match that finds this problem is as follows: (http://coccinelle.lip6.fr/) // @@ identifier ret; expression e1,e2; @@ ( if (\(ret < 0\|ret != 0\)) { ... return ret; } | ret = 0 ) ... when != ret = e1 when != &ret *if(...) { ... when != ret = e2 when forall return ret; } // Signed-off-by: Julia Lawall Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c index c18a0c6..a6d2860 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c @@ -1501,6 +1501,7 @@ static int axienet_of_probe(struct platform_device *op) lp->regs = of_iomap(op->dev.of_node, 0); if (!lp->regs) { dev_err(&op->dev, "could not map Axi Ethernet regs.\n"); + ret = -ENOMEM; goto nodev; } /* Setup checksum offload, but default to off if not specified */ @@ -1563,6 +1564,7 @@ static int axienet_of_probe(struct platform_device *op) np = of_parse_phandle(op->dev.of_node, "axistream-connected", 0); if (!np) { dev_err(&op->dev, "could not find DMA node\n"); + ret = -ENODEV; goto err_iounmap; } lp->dma_regs = of_iomap(np, 0); -- cgit v0.10.2 From 7824acd92494cf21229ea5313e525fa20927ba26 Mon Sep 17 00:00:00 2001 From: Yongjian Xu Date: Tue, 30 Dec 2014 16:03:46 +0800 Subject: qlcnic: Fix return value in qlcnic_probe() If the check of adapter fails and goes into the 'else' branch, the return value 'err' should not still be zero. Signed-off-by: Yongjian Xu Acked-by: Shahed Shaikh Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c index 9929b97..2528c3f 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c @@ -2605,6 +2605,7 @@ qlcnic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) } else { dev_err(&pdev->dev, "%s: failed. Please Reboot\n", __func__); + err = -ENODEV; goto err_out_free_hw; } -- cgit v0.10.2 From a16c5f99a28c9945165c46da27fff8e6f26f8736 Mon Sep 17 00:00:00 2001 From: Michal Marek Date: Wed, 31 Dec 2014 16:29:35 +0100 Subject: kbuild: Fix removal of the debian/ directory scripts/Makefile.clean treats absolute path specially, but $(objtree)/debian is no longer an absolute path since 7e1c0477 (kbuild: Use relative path for $(objtree). Work around this by checking if the path starts with $(objtree)/. Reported-and-tested-by: Sedat Dilek Fixes: 7e1c0477 (kbuild: Use relative path for $(objtree) Signed-off-by: Michal Marek diff --git a/scripts/Makefile.clean b/scripts/Makefile.clean index 1bca180..627f8cb 100644 --- a/scripts/Makefile.clean +++ b/scripts/Makefile.clean @@ -42,19 +42,19 @@ __clean-files := $(extra-y) $(extra-m) $(extra-) \ __clean-files := $(filter-out $(no-clean-files), $(__clean-files)) -# as clean-files is given relative to the current directory, this adds -# a $(obj) prefix, except for absolute paths +# clean-files is given relative to the current directory, unless it +# starts with $(objtree)/ (which means "./", so do not add "./" unless +# you want to delete a file from the toplevel object directory). __clean-files := $(wildcard \ - $(addprefix $(obj)/, $(filter-out /%, $(__clean-files))) \ - $(filter /%, $(__clean-files))) + $(addprefix $(obj)/, $(filter-out $(objtree)/%, $(__clean-files))) \ + $(filter $(objtree)/%, $(__clean-files))) -# as clean-dirs is given relative to the current directory, this adds -# a $(obj) prefix, except for absolute paths +# same as clean-files __clean-dirs := $(wildcard \ - $(addprefix $(obj)/, $(filter-out /%, $(clean-dirs))) \ - $(filter /%, $(clean-dirs))) + $(addprefix $(obj)/, $(filter-out $(objtree)/%, $(clean-dirs))) \ + $(filter $(objtree)/%, $(clean-dirs))) # ========================================================================== -- cgit v0.10.2 From 906451b98b6774042b707a5dcebb6a93dbd14c85 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 31 Dec 2014 15:27:47 +0900 Subject: perf probe: Fix to fall back to find probe point in symbols Fix to fall back to find a probe point in symbols if perf fails to find it in debuginfo. This can happen when the target function is an alias of another function. Such alias doesn't have an entry in debuginfo but in symbols. David Ahern reported this problem in https://lkml.org/lkml/2014/12/29/355 I ensured the problem and deeper investigation discovers it. ----- eu-readelf --debug-dump=info /usr/lib/debug/lib/x86_64-linux-gnu/libc-2.19.so | grep \"malloc\" -A6 name (strp) "malloc" decl_file (data1) 25 decl_line (data2) 466 prototyped (flag_present) type (ref4) [ 81b5] declaration (flag_present) [ 8f58] formal_parameter -- name (strp) "malloc" decl_file (data1) 23 decl_line (data2) 466 prototyped (flag_present) type (ref4) [ 9f4a] declaration (flag_present) sibling (ref4) [ bb29] ... ----- All these entires have no instances (all of them are declarations) This is why the perf probe failed to find it in debuginfo. However, there are some malloc instances in symbols. ----- eu-readelf --symbols /usr/lib/debug/lib/x86_64-linux-gnu/libc-2.19.so | grep malloc$ 1181: 0000000000080700 5332 FUNC LOCAL DEFAULT 12 _int_malloc 4537: 00000000000831d0 339 FUNC LOCAL DEFAULT 12 __GI___libc_malloc 5545: 00000000000831d0 339 FUNC LOCAL DEFAULT 12 __malloc 6063: 00000000000831d0 339 FUNC GLOBAL DEFAULT 12 malloc 7302: 00000000000831d0 339 FUNC GLOBAL DEFAULT 12 __libc_malloc ----- As you an see, malloc and __libc_malloc have same address, and actually __libc_malloc has an entry in debuginfo. So you can set up a probe on __libc_malloc. To fix this problem shortly, perf probe simply falls back to find probe point(malloc) in symbols if it is not found in debuginfo. Reported-by: David Ahern Signed-off-by: Masami Hiramatsu Acked-by: Namhyung Kim Cc: David Ahern Cc: Ingo Molnar Cc: Namhyung Kim Cc: yrl.pp-manager.tt@hitachi.com Link: http://lkml.kernel.org/r/20141231062747.2087.80961.stgit@localhost.localdomain Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 28eb141..7f9b863 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -495,9 +495,11 @@ static int try_to_find_probe_trace_events(struct perf_probe_event *pev, } if (ntevs == 0) { /* No error but failed to find probe point. */ - pr_warning("Probe point '%s' not found.\n", + pr_warning("Probe point '%s' not found in debuginfo.\n", synthesize_perf_probe_point(&pev->point)); - return -ENOENT; + if (need_dwarf) + return -ENOENT; + return 0; } /* Error path : ntevs < 0 */ pr_debug("An error occurred in debuginfo analysis (%d).\n", ntevs); -- cgit v0.10.2 From 4093325f829746b88eaf02a5ae3b88a325ea8b75 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 30 Dec 2014 17:47:47 +0900 Subject: perf probe: Fix crash in dwarf_getcfi_elf David reported that perf can segfault when adding an uprobe event like this: $ perf probe -x /lib64/libc-2.14.90.so -a 'malloc size=%di' (gdb) bt #0 parse_eh_frame_hdr (hdr=0x0, hdr_size=2596, hdr_vaddr=71788, ehdr=0x7fffffffd390, eh_frame_vaddr= 0x7fffffffd378, table_entries=0x8808d8, table_encoding=0x8808e0 "") at dwarf_getcfi_elf.c:79 #1 0x000000385f81615a in getcfi_scn_eh_frame (hdr_vaddr=71788, hdr_scn=0x8839b0, shdr=0x7fffffffd2f0, scn=, ehdr=0x7fffffffd390, elf=0x882b30) at dwarf_getcfi_elf.c:231 #2 getcfi_shdr (ehdr=0x7fffffffd390, elf=0x882b30) at dwarf_getcfi_elf.c:283 #3 dwarf_getcfi_elf (elf=0x882b30) at dwarf_getcfi_elf.c:309 #4 0x00000000004d5bac in debuginfo__find_probes (pf=0x7fffffffd4f0, dbg=Unhandled dwarf expression opcode 0xfa) at util/probe-finder.c:993 #5 0x00000000004d634a in debuginfo__find_trace_events (dbg=0x880840, pev=, tevs=0x880f88, max_tevs=) at util/probe-finder.c:1200 #6 0x00000000004aed6b in try_to_find_probe_trace_events (target=0x881b20 "/lib64/libpthread-2.14.90.so", max_tevs=128, tevs=0x880f88, pev=0x859b30) at util/probe-event.c:482 #7 convert_to_probe_trace_events (target=0x881b20 "/lib64/libpthread-2.14.90.so", max_tevs=128, tevs=0x880f88, pev=0x859b30) at util/probe-event.c:2356 #8 add_perf_probe_events (pevs=, npevs=1, max_tevs=128, target=0x881b20 "/lib64/libpthread-2.14.90.so", force_add=false) at util/probe-event.c:2391 #9 0x000000000044014f in __cmd_probe (argc=, argv=0x7fffffffe2f0, prefix=Unhandled dwarf expression opcode 0xfa) at at builtin-probe.c:488 #10 0x0000000000440313 in cmd_probe (argc=5, argv=0x7fffffffe2f0, prefix=) at builtin-probe.c:506 #11 0x000000000041d133 in run_builtin (p=0x805680, argc=5, argv=0x7fffffffe2f0) at perf.c:341 #12 0x000000000041c8b2 in handle_internal_command (argv=, argc=) at perf.c:400 #13 run_argv (argv=, argcp=) at perf.c:444 #14 main (argc=5, argv=0x7fffffffe2f0) at perf.c:559 And I found a related commit (5704c8c4fa71 "getcfi_scn_eh_frame: Don't crash and burn when .eh_frame bits aren't there.") in elfutils that can lead to a unexpected crash like this. To safely use the function, it needs to check the .eh_frame section is a PROGBITS type. Reported-by: David Ahern Tested-by: David Ahern Signed-off-by: Namhyung Kim Acked-by: Masami Hiramatsu Cc: David Ahern Cc: Mark Wielaard Cc: Masami Hiramatsu Link: http://lkml.kernel.org/r/20141230090533.GH6081@sejong Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index c7918f8..b5247d7 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -989,8 +989,24 @@ static int debuginfo__find_probes(struct debuginfo *dbg, int ret = 0; #if _ELFUTILS_PREREQ(0, 142) + Elf *elf; + GElf_Ehdr ehdr; + GElf_Shdr shdr; + /* Get the call frame information from this dwarf */ - pf->cfi = dwarf_getcfi_elf(dwarf_getelf(dbg->dbg)); + elf = dwarf_getelf(dbg->dbg); + if (elf == NULL) + return -EINVAL; + + if (gelf_getehdr(elf, &ehdr) == NULL) + return -EINVAL; + + if (elf_section_by_name(elf, &ehdr, &shdr, ".eh_frame", NULL) && + shdr.sh_type == SHT_PROGBITS) { + pf->cfi = dwarf_getcfi_elf(elf); + } else { + pf->cfi = dwarf_getcfi(dbg->dbg); + } #endif off = 0; -- cgit v0.10.2 From 4e0c4a47d723c6bfdf24323cf539bd08edff7d31 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 30 Dec 2014 09:00:02 -0800 Subject: Btrfs: add more maintainers I'm lucky to have a huge amount of help on Btrfs, and want to thank everyone that sends patches, does review and helps track down bugs. Dave Sterba is a long time reviewer and contributor, and adding him to the maintainers file reflects the excellent work he has been doing for years. Signed-off-by: Chris Mason diff --git a/MAINTAINERS b/MAINTAINERS index 0ff630d..189c7c5 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2189,6 +2189,7 @@ F: drivers/gpio/gpio-bt8xx.c BTRFS FILE SYSTEM M: Chris Mason M: Josef Bacik +M: David Sterba L: linux-btrfs@vger.kernel.org W: http://btrfs.wiki.kernel.org/ Q: http://patchwork.kernel.org/project/linux-btrfs/list/ -- cgit v0.10.2 From 6b6d24b38991f72fe974215c96e0fdfe409ea50e Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 12 Dec 2014 22:30:00 +0300 Subject: Btrfs, scrub: uninitialized variable in scrub_extent_for_parity() The only way that "ret" is set is when we call scrub_pages_for_parity() so the skip to "if (ret) " test doesn't make sense and causes a static checker warning. Signed-off-by: Dan Carpenter Signed-off-by: Chris Mason diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index f2bb13a..9e1569f 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -2607,9 +2607,9 @@ static int scrub_extent_for_parity(struct scrub_parity *sparity, ret = scrub_pages_for_parity(sparity, logical, l, physical, dev, flags, gen, mirror_num, have_csum ? csum : NULL); -skip: if (ret) return ret; +skip: len -= l; logical += l; physical += l; -- cgit v0.10.2 From df95e7f0d93e2fa776b168eac798a16a1e361022 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 12 Dec 2014 16:02:20 -0500 Subject: Btrfs: abort transaction if we don't find the block group We shouldn't BUG_ON() if there is corruption. I hit this while testing my block group patch and the abort worked properly. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index a80b971..1511658 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3139,9 +3139,11 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans, struct extent_buffer *leaf; ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1); - if (ret < 0) + if (ret) { + if (ret > 0) + ret = -ENOENT; goto fail; - BUG_ON(ret); /* Corruption */ + } leaf = path->nodes[0]; bi = btrfs_item_ptr_offset(leaf, path->slots[0]); @@ -3149,11 +3151,9 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans, btrfs_mark_buffer_dirty(leaf); btrfs_release_path(path); fail: - if (ret) { + if (ret) btrfs_abort_transaction(trans, root, ret); - return ret; - } - return 0; + return ret; } -- cgit v0.10.2 From c7cfb8a5405a34777d670f7a5441bb2c7ca9730f Mon Sep 17 00:00:00 2001 From: Wang Shilong Date: Wed, 24 Dec 2014 14:45:30 +0800 Subject: Btrfs: call inode_dec_link_count() on mkdir error path In btrfs_mkdir(), if it fails to create dir, we should clean up existed items, setting inode's link properly to make sure it could be cleaned up properly. Signed-off-by: Wang Shilong Signed-off-by: Chris Mason diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 8de2335..8a036ed 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -6255,8 +6255,10 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) out_fail: btrfs_end_transaction(trans, root); - if (drop_on_err) + if (drop_on_err) { + inode_dec_link_count(inode); iput(inode); + } btrfs_balance_delayed_items(root); btrfs_btree_balance_dirty(root); return err; -- cgit v0.10.2 From a1317f455ab936a9447f17b08e3e874c27742870 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 15 Dec 2014 16:04:42 +0000 Subject: Btrfs: correctly get tree level in tree_backref_for_extent If we are using skinny metadata, the block's tree level is in the offset of the key and not in a btrfs_tree_block_info structure following the extent item (it doesn't exist). Therefore fix it. Besides returning the correct level in the tree, this also prevents reading past the leaf's end in the case where the extent item is the last item in the leaf (eb) and it has only 1 inline reference - this is because sizeof(struct btrfs_tree_block_info) is greater than sizeof(struct btrfs_extent_inline_ref). Got it while running a scrub which produced the following warning: BTRFS: checksum error at logical 42123264 on dev /dev/sde, sector 15840: metadata node (level 24) in tree 5 Signed-off-by: Filipe Manana Reviewed-by: Satoru Takeuchi Signed-off-by: Chris Mason diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 2d3e32e..8729cf6 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -1552,7 +1552,6 @@ int tree_backref_for_extent(unsigned long *ptr, struct extent_buffer *eb, { int ret; int type; - struct btrfs_tree_block_info *info; struct btrfs_extent_inline_ref *eiref; if (*ptr == (unsigned long)-1) @@ -1573,9 +1572,17 @@ int tree_backref_for_extent(unsigned long *ptr, struct extent_buffer *eb, } /* we can treat both ref types equally here */ - info = (struct btrfs_tree_block_info *)(ei + 1); *out_root = btrfs_extent_inline_ref_offset(eb, eiref); - *out_level = btrfs_tree_block_level(eb, info); + + if (key->type == BTRFS_EXTENT_ITEM_KEY) { + struct btrfs_tree_block_info *info; + + info = (struct btrfs_tree_block_info *)(ei + 1); + *out_level = btrfs_tree_block_level(eb, info); + } else { + ASSERT(key->type == BTRFS_METADATA_ITEM_KEY); + *out_level = (u8)key->offset; + } if (ret == 1) *ptr = (unsigned long)-1; -- cgit v0.10.2 From 6f8960541b1eb6054a642da48daae2320fddba93 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 31 Dec 2014 12:18:29 -0500 Subject: Btrfs: don't delay inode ref updates during log replay Commit 1d52c78afbb (Btrfs: try not to ENOSPC on log replay) added a check to skip delayed inode updates during log replay because it confuses the enospc code. But the delayed processing will end up ignoring delayed refs from log replay because the inode itself wasn't put through the delayed code. This can end up triggering a warning at commit time: WARNING: CPU: 2 PID: 778 at fs/btrfs/delayed-inode.c:1410 btrfs_assert_delayed_root_empty+0x32/0x34() Which is repeated for each commit because we never process the delayed inode ref update. The fix used here is to change btrfs_delayed_delete_inode_ref to return an error if we're currently in log replay. The caller will do the ref deletion immediately and everything will work properly. Signed-off-by: Chris Mason cc: stable@vger.kernel.org # v3.18 and any stable series that picked 1d52c78afbbf80b58299e076a159617d6b42fe3c diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 054577b..de4e70f 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -1857,6 +1857,14 @@ int btrfs_delayed_delete_inode_ref(struct inode *inode) { struct btrfs_delayed_node *delayed_node; + /* + * we don't do delayed inode updates during log recovery because it + * leads to enospc problems. This means we also can't do + * delayed inode refs + */ + if (BTRFS_I(inode)->root->fs_info->log_root_recovering) + return -EAGAIN; + delayed_node = btrfs_get_or_create_delayed_node(inode); if (IS_ERR(delayed_node)) return PTR_ERR(delayed_node); -- cgit v0.10.2 From ad7fefb109b0418bb4f16fc1176fd082f986698b Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Fri, 2 Jan 2015 15:16:00 -0500 Subject: Revert "ext4: fix suboptimal seek_{data,hole} extents traversial" This reverts commit 14516bb7bb6ffbd49f35389f9ece3b2045ba5815. This was causing regression test failures with generic/285 with an ext3 filesystem using CONFIG_EXT4_USE_FOR_EXT23. Signed-off-by: Theodore Ts'o diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index e5d3ead..bed4308 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -5166,8 +5166,8 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, /* fallback to generic here if not in extents fmt */ if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) - return __generic_block_fiemap(inode, fieinfo, start, len, - ext4_get_block); + return generic_block_fiemap(inode, fieinfo, start, len, + ext4_get_block); if (fiemap_check_flags(fieinfo, EXT4_FIEMAP_FLAGS)) return -EBADR; diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 513c12c..8131be8 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -273,19 +273,24 @@ static int ext4_file_open(struct inode * inode, struct file * filp) * we determine this extent as a data or a hole according to whether the * page cache has data or not. */ -static int ext4_find_unwritten_pgoff(struct inode *inode, int whence, - loff_t endoff, loff_t *offset) +static int ext4_find_unwritten_pgoff(struct inode *inode, + int whence, + struct ext4_map_blocks *map, + loff_t *offset) { struct pagevec pvec; + unsigned int blkbits; pgoff_t index; pgoff_t end; + loff_t endoff; loff_t startoff; loff_t lastoff; int found = 0; + blkbits = inode->i_sb->s_blocksize_bits; startoff = *offset; lastoff = startoff; - + endoff = (loff_t)(map->m_lblk + map->m_len) << blkbits; index = startoff >> PAGE_CACHE_SHIFT; end = endoff >> PAGE_CACHE_SHIFT; @@ -403,144 +408,147 @@ out: static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize) { struct inode *inode = file->f_mapping->host; - struct fiemap_extent_info fie; - struct fiemap_extent ext[2]; - loff_t next; - int i, ret = 0; + struct ext4_map_blocks map; + struct extent_status es; + ext4_lblk_t start, last, end; + loff_t dataoff, isize; + int blkbits; + int ret = 0; mutex_lock(&inode->i_mutex); - if (offset >= inode->i_size) { + + isize = i_size_read(inode); + if (offset >= isize) { mutex_unlock(&inode->i_mutex); return -ENXIO; } - fie.fi_flags = 0; - fie.fi_extents_max = 2; - fie.fi_extents_start = (struct fiemap_extent __user *) &ext; - while (1) { - mm_segment_t old_fs = get_fs(); - - fie.fi_extents_mapped = 0; - memset(ext, 0, sizeof(*ext) * fie.fi_extents_max); - - set_fs(get_ds()); - ret = ext4_fiemap(inode, &fie, offset, maxsize - offset); - set_fs(old_fs); - if (ret) + + blkbits = inode->i_sb->s_blocksize_bits; + start = offset >> blkbits; + last = start; + end = isize >> blkbits; + dataoff = offset; + + do { + map.m_lblk = last; + map.m_len = end - last + 1; + ret = ext4_map_blocks(NULL, inode, &map, 0); + if (ret > 0 && !(map.m_flags & EXT4_MAP_UNWRITTEN)) { + if (last != start) + dataoff = (loff_t)last << blkbits; break; + } - /* No extents found, EOF */ - if (!fie.fi_extents_mapped) { - ret = -ENXIO; + /* + * If there is a delay extent at this offset, + * it will be as a data. + */ + ext4_es_find_delayed_extent_range(inode, last, last, &es); + if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) { + if (last != start) + dataoff = (loff_t)last << blkbits; break; } - for (i = 0; i < fie.fi_extents_mapped; i++) { - next = (loff_t)(ext[i].fe_length + ext[i].fe_logical); - if (offset < (loff_t)ext[i].fe_logical) - offset = (loff_t)ext[i].fe_logical; - /* - * If extent is not unwritten, then it contains valid - * data, mapped or delayed. - */ - if (!(ext[i].fe_flags & FIEMAP_EXTENT_UNWRITTEN)) - goto out; + /* + * If there is a unwritten extent at this offset, + * it will be as a data or a hole according to page + * cache that has data or not. + */ + if (map.m_flags & EXT4_MAP_UNWRITTEN) { + int unwritten; + unwritten = ext4_find_unwritten_pgoff(inode, SEEK_DATA, + &map, &dataoff); + if (unwritten) + break; + } - /* - * If there is a unwritten extent at this offset, - * it will be as a data or a hole according to page - * cache that has data or not. - */ - if (ext4_find_unwritten_pgoff(inode, SEEK_DATA, - next, &offset)) - goto out; + last++; + dataoff = (loff_t)last << blkbits; + } while (last <= end); - if (ext[i].fe_flags & FIEMAP_EXTENT_LAST) { - ret = -ENXIO; - goto out; - } - offset = next; - } - } - if (offset > inode->i_size) - offset = inode->i_size; -out: mutex_unlock(&inode->i_mutex); - if (ret) - return ret; - return vfs_setpos(file, offset, maxsize); + if (dataoff > isize) + return -ENXIO; + + return vfs_setpos(file, dataoff, maxsize); } /* - * ext4_seek_hole() retrieves the offset for SEEK_HOLE + * ext4_seek_hole() retrieves the offset for SEEK_HOLE. */ static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize) { struct inode *inode = file->f_mapping->host; - struct fiemap_extent_info fie; - struct fiemap_extent ext[2]; - loff_t next; - int i, ret = 0; + struct ext4_map_blocks map; + struct extent_status es; + ext4_lblk_t start, last, end; + loff_t holeoff, isize; + int blkbits; + int ret = 0; mutex_lock(&inode->i_mutex); - if (offset >= inode->i_size) { + + isize = i_size_read(inode); + if (offset >= isize) { mutex_unlock(&inode->i_mutex); return -ENXIO; } - fie.fi_flags = 0; - fie.fi_extents_max = 2; - fie.fi_extents_start = (struct fiemap_extent __user *)&ext; - while (1) { - mm_segment_t old_fs = get_fs(); - - fie.fi_extents_mapped = 0; - memset(ext, 0, sizeof(*ext)); + blkbits = inode->i_sb->s_blocksize_bits; + start = offset >> blkbits; + last = start; + end = isize >> blkbits; + holeoff = offset; - set_fs(get_ds()); - ret = ext4_fiemap(inode, &fie, offset, maxsize - offset); - set_fs(old_fs); - if (ret) - break; + do { + map.m_lblk = last; + map.m_len = end - last + 1; + ret = ext4_map_blocks(NULL, inode, &map, 0); + if (ret > 0 && !(map.m_flags & EXT4_MAP_UNWRITTEN)) { + last += ret; + holeoff = (loff_t)last << blkbits; + continue; + } - /* No extents found */ - if (!fie.fi_extents_mapped) - break; + /* + * If there is a delay extent at this offset, + * we will skip this extent. + */ + ext4_es_find_delayed_extent_range(inode, last, last, &es); + if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) { + last = es.es_lblk + es.es_len; + holeoff = (loff_t)last << blkbits; + continue; + } - for (i = 0; i < fie.fi_extents_mapped; i++) { - next = (loff_t)(ext[i].fe_logical + ext[i].fe_length); - /* - * If extent is not unwritten, then it contains valid - * data, mapped or delayed. - */ - if (!(ext[i].fe_flags & FIEMAP_EXTENT_UNWRITTEN)) { - if (offset < (loff_t)ext[i].fe_logical) - goto out; - offset = next; + /* + * If there is a unwritten extent at this offset, + * it will be as a data or a hole according to page + * cache that has data or not. + */ + if (map.m_flags & EXT4_MAP_UNWRITTEN) { + int unwritten; + unwritten = ext4_find_unwritten_pgoff(inode, SEEK_HOLE, + &map, &holeoff); + if (!unwritten) { + last += ret; + holeoff = (loff_t)last << blkbits; continue; } - /* - * If there is a unwritten extent at this offset, - * it will be as a data or a hole according to page - * cache that has data or not. - */ - if (ext4_find_unwritten_pgoff(inode, SEEK_HOLE, - next, &offset)) - goto out; - - offset = next; - if (ext[i].fe_flags & FIEMAP_EXTENT_LAST) - goto out; } - } - if (offset > inode->i_size) - offset = inode->i_size; -out: + + /* find a hole */ + break; + } while (last <= end); + mutex_unlock(&inode->i_mutex); - if (ret) - return ret; - return vfs_setpos(file, offset, maxsize); + if (holeoff > isize) + holeoff = isize; + + return vfs_setpos(file, holeoff, maxsize); } /* -- cgit v0.10.2 From 363307e6e561cde78572fd22e7fd00cd3e5adb02 Mon Sep 17 00:00:00 2001 From: Jakub Wilk Date: Fri, 2 Jan 2015 15:31:14 -0500 Subject: ext4: remove spurious KERN_INFO from ext4_warning call Signed-off-by: Jakub Wilk Signed-off-by: Theodore Ts'o diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 4fca81c..65b9c9e 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3475,7 +3475,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) && EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) - ext4_warning(sb, KERN_INFO "metadata_csum and uninit_bg are " + ext4_warning(sb, "metadata_csum and uninit_bg are " "redundant flags; please run fsck."); /* Check for a known checksum algorithm */ -- cgit v0.10.2 From a51e0df4c1e06afd7aba84496c14238e6b363caa Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Tue, 30 Dec 2014 11:59:49 +0200 Subject: net/mlx4_core: Correcly update the mtt's offset in the MR re-reg flow Previously, mlx4_mt_rereg_write filled the MPT's entity_size with the old MTT's page shift, which could result in using an incorrect offset. Fix the initialization to be after we calculate the new MTT offset. In addition, assign mtt order to -1 after calling mlx4_mtt_cleanup. This is necessary in order to mark the MTT as invalid and avoid freeing it later. Fixes: e630664 ('mlx4_core: Add helper functions to support MR re-registration') Signed-off-by: Maor Gottlieb Signed-off-by: Matan Barak Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlx4/mr.c b/drivers/net/ethernet/mellanox/mlx4/mr.c index d6f5496..7094a9c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mr.c +++ b/drivers/net/ethernet/mellanox/mlx4/mr.c @@ -584,6 +584,7 @@ EXPORT_SYMBOL_GPL(mlx4_mr_free); void mlx4_mr_rereg_mem_cleanup(struct mlx4_dev *dev, struct mlx4_mr *mr) { mlx4_mtt_cleanup(dev, &mr->mtt); + mr->mtt.order = -1; } EXPORT_SYMBOL_GPL(mlx4_mr_rereg_mem_cleanup); @@ -593,14 +594,14 @@ int mlx4_mr_rereg_mem_write(struct mlx4_dev *dev, struct mlx4_mr *mr, { int err; - mpt_entry->start = cpu_to_be64(iova); - mpt_entry->length = cpu_to_be64(size); - mpt_entry->entity_size = cpu_to_be32(page_shift); - err = mlx4_mtt_init(dev, npages, page_shift, &mr->mtt); if (err) return err; + mpt_entry->start = cpu_to_be64(mr->iova); + mpt_entry->length = cpu_to_be64(mr->size); + mpt_entry->entity_size = cpu_to_be32(mr->mtt.page_shift); + mpt_entry->pd_flags &= cpu_to_be32(MLX4_MPT_PD_MASK | MLX4_MPT_PD_FLAG_EN_INV); mpt_entry->flags &= cpu_to_be32(MLX4_MPT_FLAG_FREE | -- cgit v0.10.2 From d0d012509f2ecee9af1857ccacac33b2266c6ce8 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Tue, 30 Dec 2014 11:59:50 +0200 Subject: net/mlx4_core: Fix error flow in mlx4_init_hca() We shouldn't call UNMAP_FA here, this is done in mlx4_load_one. If mlx4_query_func fails, we need to invoke CLOSE_HCA for both native and master. Fixes: a0eacca948d2 ('net/mlx4_core: Refactor mlx4_load_one') Signed-off-by: Jack Morgenstein Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 943cbd4..03e9eb0 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -1829,7 +1829,7 @@ static int mlx4_init_hca(struct mlx4_dev *dev) err = mlx4_dev_cap(dev, &dev_cap); if (err) { mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting\n"); - goto err_stop_fw; + return err; } choose_steering_mode(dev, &dev_cap); @@ -1860,7 +1860,7 @@ static int mlx4_init_hca(struct mlx4_dev *dev) &init_hca); if ((long long) icm_size < 0) { err = icm_size; - goto err_stop_fw; + return err; } dev->caps.max_fmr_maps = (1 << (32 - ilog2(dev->caps.num_mpts))) - 1; @@ -1874,7 +1874,7 @@ static int mlx4_init_hca(struct mlx4_dev *dev) err = mlx4_init_icm(dev, &dev_cap, &init_hca, icm_size); if (err) - goto err_stop_fw; + return err; err = mlx4_INIT_HCA(dev, &init_hca); if (err) { @@ -1886,7 +1886,7 @@ static int mlx4_init_hca(struct mlx4_dev *dev) err = mlx4_query_func(dev, &dev_cap); if (err < 0) { mlx4_err(dev, "QUERY_FUNC command failed, aborting.\n"); - goto err_stop_fw; + goto err_close; } else if (err & MLX4_QUERY_FUNC_NUM_SYS_EQS) { dev->caps.num_eqs = dev_cap.max_eqs; dev->caps.reserved_eqs = dev_cap.reserved_eqs; @@ -2006,11 +2006,6 @@ err_free_icm: if (!mlx4_is_slave(dev)) mlx4_free_icms(dev); -err_stop_fw: - if (!mlx4_is_slave(dev)) { - mlx4_UNMAP_FA(dev); - mlx4_free_icm(dev, priv->fw.fw_icm, 0); - } return err; } -- cgit v0.10.2 From c484994eb317f24693fbed6084cf1b4d984ffd3b Mon Sep 17 00:00:00 2001 From: Kostya Belezko Date: Tue, 30 Dec 2014 12:27:09 -0500 Subject: Altera TSE: Add missing phydev Altera network device doesn't come up after ifconfig eth0 down ifconfig eth0 up The reason behind is clearing priv->phydev during tse_shutdown(). The phydev is not restored back at tse_open(). Resubmiting as to follow Tobias Klauser suggestion. phy_start/phy_stop are called on each ifup/ifdown and phy_disconnect is called once during the module removal. Signed-off-by: Kostya Belezko Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/altera/altera_tse_main.c b/drivers/net/ethernet/altera/altera_tse_main.c index 3498760..760c72c 100644 --- a/drivers/net/ethernet/altera/altera_tse_main.c +++ b/drivers/net/ethernet/altera/altera_tse_main.c @@ -1170,10 +1170,6 @@ tx_request_irq_error: init_error: free_skbufs(dev); alloc_skbuf_error: - if (priv->phydev) { - phy_disconnect(priv->phydev); - priv->phydev = NULL; - } phy_error: return ret; } @@ -1186,12 +1182,9 @@ static int tse_shutdown(struct net_device *dev) int ret; unsigned long int flags; - /* Stop and disconnect the PHY */ - if (priv->phydev) { + /* Stop the PHY */ + if (priv->phydev) phy_stop(priv->phydev); - phy_disconnect(priv->phydev); - priv->phydev = NULL; - } netif_stop_queue(dev); napi_disable(&priv->napi); @@ -1525,6 +1518,10 @@ err_free_netdev: static int altera_tse_remove(struct platform_device *pdev) { struct net_device *ndev = platform_get_drvdata(pdev); + struct altera_tse_private *priv = netdev_priv(ndev); + + if (priv->phydev) + phy_disconnect(priv->phydev); platform_set_drvdata(pdev, NULL); altera_tse_mdio_destroy(ndev); -- cgit v0.10.2 From 843925f33fcc293d80acf2c5c8a78adf3344d49b Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 1 Jan 2015 00:39:23 +1100 Subject: tcp: Do not apply TSO segment limit to non-TSO packets Thomas Jarosch reported IPsec TCP stalls when a PMTU event occurs. In fact the problem was completely unrelated to IPsec. The bug is also reproducible if you just disable TSO/GSO. The problem is that when the MSS goes down, existing queued packet on the TX queue that have not been transmitted yet all look like TSO packets and get treated as such. This then triggers a bug where tcp_mss_split_point tells us to generate a zero-sized packet on the TX queue. Once that happens we're screwed because the zero-sized packet can never be removed by ACKs. Fixes: 1485348d242 ("tcp: Apply device TSO segment limit earlier") Reported-by: Thomas Jarosch Signed-off-by: Herbert Xu Cheers, Signed-off-by: David S. Miller diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 7f18262..65caf8b 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2019,7 +2019,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, if (unlikely(!tcp_snd_wnd_test(tp, skb, mss_now))) break; - if (tso_segs == 1) { + if (tso_segs == 1 || !max_segs) { if (unlikely(!tcp_nagle_test(tp, skb, mss_now, (tcp_skb_is_last(sk, skb) ? nonagle : TCP_NAGLE_PUSH)))) @@ -2032,7 +2032,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, } limit = mss_now; - if (tso_segs > 1 && !tcp_urg_mode(tp)) + if (tso_segs > 1 && max_segs && !tcp_urg_mode(tp)) limit = tcp_mss_split_point(sk, skb, mss_now, min_t(unsigned int, cwnd_quota, -- cgit v0.10.2 From 24cc59d1ebaac54d933dc0b30abcd8bd86193eef Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Wed, 31 Dec 2014 08:45:46 -0800 Subject: openvswitch: Consistently include VLAN header in flow and port stats. Until now, when VLAN acceleration was in use, the bytes of the VLAN header were not included in port or flow byte counters. They were however included when VLAN acceleration was not used. This commit corrects the inconsistency, by always including the VLAN header in byte counters. Previous discussion at http://openvswitch.org/pipermail/dev/2014-December/049521.html Reported-by: Motonori Shindo Signed-off-by: Ben Pfaff Reviewed-by: Flavio Leitner Acked-by: Pravin B Shelar Signed-off-by: David S. Miller diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 70bef2a..da2fae0 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -70,6 +70,7 @@ void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags, { struct flow_stats *stats; int node = numa_node_id(); + int len = skb->len + (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0); stats = rcu_dereference(flow->stats[node]); @@ -105,7 +106,7 @@ void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags, if (likely(new_stats)) { new_stats->used = jiffies; new_stats->packet_count = 1; - new_stats->byte_count = skb->len; + new_stats->byte_count = len; new_stats->tcp_flags = tcp_flags; spin_lock_init(&new_stats->lock); @@ -120,7 +121,7 @@ void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags, stats->used = jiffies; stats->packet_count++; - stats->byte_count += skb->len; + stats->byte_count += len; stats->tcp_flags |= tcp_flags; unlock: spin_unlock(&stats->lock); diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index 53f3ebb..2034c6d 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -480,7 +480,7 @@ void ovs_vport_receive(struct vport *vport, struct sk_buff *skb, stats = this_cpu_ptr(vport->percpu_stats); u64_stats_update_begin(&stats->syncp); stats->rx_packets++; - stats->rx_bytes += skb->len; + stats->rx_bytes += skb->len + (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0); u64_stats_update_end(&stats->syncp); OVS_CB(skb)->input_vport = vport; -- cgit v0.10.2 From 531ad4282e5105db984f1706e1a21799157655a3 Mon Sep 17 00:00:00 2001 From: Kristian Evensen Date: Fri, 2 Jan 2015 16:21:45 +0100 Subject: qmi_wwan: Set random MAC on devices with buggy fw Some buggy firmwares export an incorrect MAC address (00:a0:c6:00:00:00). This makes for example checking devices for random MAC addresses tricky, and you might end up with multiple network interfaces with the same address. This patch tries to fix, or at least improve, the situation by setting the MAC address of devices with this firmware bug to a random address. I tested the patch with two devices that has this firmware bug (Huawei E398 and E392), and network traffic worked fine after changing the address. Signed-off-by: Kristian Evensen Signed-off-by: David S. Miller diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index b8a82b8..602dc66 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -56,6 +56,8 @@ struct qmi_wwan_state { /* default ethernet address used by the modem */ static const u8 default_modem_addr[ETH_ALEN] = {0x02, 0x50, 0xf3}; +static const u8 buggy_fw_addr[ETH_ALEN] = {0x00, 0xa0, 0xc6, 0x00, 0x00, 0x00}; + /* Make up an ethernet header if the packet doesn't have one. * * A firmware bug common among several devices cause them to send raw @@ -332,10 +334,12 @@ next_desc: usb_driver_release_interface(driver, info->data); } - /* Never use the same address on both ends of the link, even - * if the buggy firmware told us to. + /* Never use the same address on both ends of the link, even if the + * buggy firmware told us to. Or, if device is assigned the well-known + * buggy firmware MAC address, replace it with a random address, */ - if (ether_addr_equal(dev->net->dev_addr, default_modem_addr)) + if (ether_addr_equal(dev->net->dev_addr, default_modem_addr) || + ether_addr_equal(dev->net->dev_addr, buggy_fw_addr)) eth_hw_addr_random(dev->net); /* make MAC addr easily distinguishable from an IP header */ -- cgit v0.10.2 From 9dac6232e2ee2bc85dc71f464f19f047afc9422c Mon Sep 17 00:00:00 2001 From: Govindarajulu Varadarajan <_govind@gmx.com> Date: Fri, 2 Jan 2015 20:53:27 +0530 Subject: enic: free all rq buffs when allocation fails When allocation of all RQs fail, we do not free previously allocated buffers, before returning error. This causes memory leak. This patch fixes this by calling vnic_rq_clean(), which frees all the rq buffers. Signed-off-by: Govindarajulu Varadarajan <_govind@gmx.com> Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c index 705f334..b29e027 100644 --- a/drivers/net/ethernet/cisco/enic/enic_main.c +++ b/drivers/net/ethernet/cisco/enic/enic_main.c @@ -1616,7 +1616,7 @@ static int enic_open(struct net_device *netdev) if (vnic_rq_desc_used(&enic->rq[i]) == 0) { netdev_err(netdev, "Unable to alloc receive buffers\n"); err = -ENOMEM; - goto err_out_notify_unset; + goto err_out_free_rq; } } @@ -1649,7 +1649,9 @@ static int enic_open(struct net_device *netdev) return 0; -err_out_notify_unset: +err_out_free_rq: + for (i = 0; i < enic->rq_count; i++) + vnic_rq_clean(&enic->rq[i], enic_free_rq_buf); enic_dev_notify_unset(enic); err_out_free_intr: enic_free_intr(enic); -- cgit v0.10.2 From b3505208804f3b59150cd77719f01c8b0023a865 Mon Sep 17 00:00:00 2001 From: Taesoo Kim Date: Tue, 30 Dec 2014 22:36:55 -0500 Subject: perf list: Fix --raw-dump option Currently, 'perf list --raw-dump' requires extra arguments (e.g., hw) to invoke, which breaks bash/zsh completion (perf-completion.sh). $ perf list --raw-dump Error: unknown option `raw-dump' usage: perf list [hw|sw|cache|tracepoint|pmu|event_glob] After, $ perf list --raw-dump cpu-cycles instructions cache-references cache-misses ... Signed-off-by: Taesoo Kim Acked-by: Namhyung Kim Cc: Ingo Molnar Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Taesoo kim Link: http://lkml.kernel.org/r/1419997015-11071-1-git-send-email-tsgatesv@gmail.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c index 011195e..198f3c3 100644 --- a/tools/perf/builtin-list.c +++ b/tools/perf/builtin-list.c @@ -19,7 +19,9 @@ int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused) { int i; - const struct option list_options[] = { + bool raw_dump = false; + struct option list_options[] = { + OPT_BOOLEAN(0, "raw-dump", &raw_dump, "Dump raw events"), OPT_END() }; const char * const list_usage[] = { @@ -27,11 +29,18 @@ int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused) NULL }; + set_option_flag(list_options, 0, "raw-dump", PARSE_OPT_HIDDEN); + argc = parse_options(argc, argv, list_options, list_usage, PARSE_OPT_STOP_AT_NON_OPTION); setup_pager(); + if (raw_dump) { + print_events(NULL, true); + return 0; + } + if (argc == 0) { print_events(NULL, false); return 0; @@ -53,8 +62,6 @@ int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused) print_hwcache_events(NULL, false); else if (strcmp(argv[i], "pmu") == 0) print_pmu_events(NULL, false); - else if (strcmp(argv[i], "--raw-dump") == 0) - print_events(NULL, true); else { char *sep = strchr(argv[i], ':'), *s; int sep_idx; -- cgit v0.10.2 From e7024fc3783317608b8e07048116a72a7d1cd26d Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Sat, 27 Dec 2014 14:06:29 +0900 Subject: perf diff: Fix to sort by baseline field by default The currently perf diff didn't add the baseline and delta (or other compute) fields to the sort list so output will be sorted by other fields like alphabetical order of DSO or symbol as below example. Fix it by adding hpp formats for the fields and provides default compare functions. Before: $ perf diff # Event 'cycles' # # Baseline Delta Shared Object Symbol # ........ ....... .................. ............................... # [bridge] [k] ip_sabotage_in [btrfs] [k] __etree_search.constprop.47 0.01% [btrfs] [k] btrfs_file_mmap 0.01% -0.01% [btrfs] [k] btrfs_getattr [e1000e] [k] e1000_watchdog 0.00% [kernel.vmlinux] [k] PageHuge 0.00% [kernel.vmlinux] [k] __acct_update_integrals 0.00% [kernel.vmlinux] [k] __activate_page [kernel.vmlinux] [k] __alloc_fd 0.02% +0.02% [kernel.vmlinux] [k] __alloc_pages_nodemask ... After: # Baseline Delta Shared Object Symbol # ........ ....... .................. ................................ # 24.73% -4.62% perf [.] append_chain_children 7.96% -1.29% perf [.] dso__find_symbol 6.97% -2.07% libc-2.20.so [.] vfprintf 4.61% +0.88% libc-2.20.so [.] __fprintf_chk 4.41% +2.43% perf [.] sort__comm_cmp 4.10% -0.16% perf [.] comm__str 4.03% -0.93% perf [.] machine__findnew_thread_time 3.82% +3.09% perf [.] __hists__add_entry 2.95% -0.18% perf [.] sort__dso_cmp ... Signed-off-by: Namhyung Kim Cc: Ingo Molnar Cc: Jiri Olsa Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1419656793-32756-1-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index 303c1e1..1fd96c1 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -545,6 +545,42 @@ hist_entry__cmp_compute(struct hist_entry *left, struct hist_entry *right, return __hist_entry__cmp_compute(p_left, p_right, c); } +static int64_t +hist_entry__cmp_nop(struct hist_entry *left __maybe_unused, + struct hist_entry *right __maybe_unused) +{ + return 0; +} + +static int64_t +hist_entry__cmp_baseline(struct hist_entry *left, struct hist_entry *right) +{ + if (sort_compute) + return 0; + + if (left->stat.period == right->stat.period) + return 0; + return left->stat.period > right->stat.period ? 1 : -1; +} + +static int64_t +hist_entry__cmp_delta(struct hist_entry *left, struct hist_entry *right) +{ + return hist_entry__cmp_compute(right, left, COMPUTE_DELTA); +} + +static int64_t +hist_entry__cmp_ratio(struct hist_entry *left, struct hist_entry *right) +{ + return hist_entry__cmp_compute(right, left, COMPUTE_RATIO); +} + +static int64_t +hist_entry__cmp_wdiff(struct hist_entry *left, struct hist_entry *right) +{ + return hist_entry__cmp_compute(right, left, COMPUTE_WEIGHTED_DIFF); +} + static void insert_hist_entry_by_compute(struct rb_root *root, struct hist_entry *he, int c) @@ -1038,27 +1074,35 @@ static void data__hpp_register(struct data__file *d, int idx) fmt->header = hpp__header; fmt->width = hpp__width; fmt->entry = hpp__entry_global; + fmt->cmp = hist_entry__cmp_nop; + fmt->collapse = hist_entry__cmp_nop; /* TODO more colors */ switch (idx) { case PERF_HPP_DIFF__BASELINE: fmt->color = hpp__color_baseline; + fmt->sort = hist_entry__cmp_baseline; break; case PERF_HPP_DIFF__DELTA: fmt->color = hpp__color_delta; + fmt->sort = hist_entry__cmp_delta; break; case PERF_HPP_DIFF__RATIO: fmt->color = hpp__color_ratio; + fmt->sort = hist_entry__cmp_ratio; break; case PERF_HPP_DIFF__WEIGHTED_DIFF: fmt->color = hpp__color_wdiff; + fmt->sort = hist_entry__cmp_wdiff; break; default: + fmt->sort = hist_entry__cmp_nop; break; } init_header(d, dfmt); perf_hpp__column_register(fmt); + perf_hpp__register_sort_field(fmt); } static void ui_init(void) -- cgit v0.10.2 From 7ce67a38f799d1fb332f672b117efbadedaa5352 Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Fri, 2 Jan 2015 16:15:59 -0600 Subject: net: ethernet: cpsw: fix hangs with interrupts The CPSW IP implements pulse-signaled interrupts. Due to that we must write a correct, pre-defined value to the CPDMA_MACEOIVECTOR register so the controller generates a pulse on the correct IRQ line to signal the End Of Interrupt. The way the driver is written today, all four IRQ lines are requested using the same IRQ handler and, because of that, we could fall into situations where a TX IRQ fires but we tell the controller that we ended an RX IRQ (or vice-versa). This situation triggers an IRQ storm on the reserved IRQ 127 of INTC which will in turn call ack_bad_irq() which will, then, print a ton of: unexpected IRQ trap at vector 00 In order to fix the problem, we are moving all calls to cpdma_ctlr_eoi() inside the IRQ handler and making sure we *always* write the correct value to the CPDMA_MACEOIVECTOR register. Note that the algorithm assumes that IRQ numbers and value-to-be-written-to-EOI are proportional, meaning that a write of value 0 would trigger an EOI pulse for the RX_THRESHOLD Interrupt and that's the IRQ number sitting in the 0-th index of our irqs_table array. This, however, is safe at least for current implementations of CPSW so we will refrain from making the check smarter (and, as a side-effect, slower) until we actually have a platform where IRQ lines are swapped. This patch has been tested for several days with AM335x- and AM437x-based platforms. AM57x was left out because there are still pending patches to enable ethernet in mainline for that platform. A read of the TRM confirms the statement on previous paragraph. Reported-by: Yegor Yefremov Fixes: 510a1e7 (drivers: net: davinci_cpdma: acknowledge interrupt properly) Cc: # v3.9+ Signed-off-by: Felipe Balbi Acked-by: Tony Lindgren Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index c560f9a..e61ee83 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -757,6 +757,14 @@ requeue: static irqreturn_t cpsw_interrupt(int irq, void *dev_id) { struct cpsw_priv *priv = dev_id; + int value = irq - priv->irqs_table[0]; + + /* NOTICE: Ending IRQ here. The trick with the 'value' variable above + * is to make sure we will always write the correct value to the EOI + * register. Namely 0 for RX_THRESH Interrupt, 1 for RX Interrupt, 2 + * for TX Interrupt and 3 for MISC Interrupt. + */ + cpdma_ctlr_eoi(priv->dma, value); cpsw_intr_disable(priv); if (priv->irq_enabled == true) { @@ -786,8 +794,6 @@ static int cpsw_poll(struct napi_struct *napi, int budget) int num_tx, num_rx; num_tx = cpdma_chan_process(priv->txch, 128); - if (num_tx) - cpdma_ctlr_eoi(priv->dma, CPDMA_EOI_TX); num_rx = cpdma_chan_process(priv->rxch, budget); if (num_rx < budget) { @@ -795,7 +801,6 @@ static int cpsw_poll(struct napi_struct *napi, int budget) napi_complete(napi); cpsw_intr_enable(priv); - cpdma_ctlr_eoi(priv->dma, CPDMA_EOI_RX); prim_cpsw = cpsw_get_slave_priv(priv, 0); if (prim_cpsw->irq_enabled == false) { prim_cpsw->irq_enabled = true; @@ -1310,8 +1315,6 @@ static int cpsw_ndo_open(struct net_device *ndev) napi_enable(&priv->napi); cpdma_ctlr_start(priv->dma); cpsw_intr_enable(priv); - cpdma_ctlr_eoi(priv->dma, CPDMA_EOI_RX); - cpdma_ctlr_eoi(priv->dma, CPDMA_EOI_TX); prim_cpsw = cpsw_get_slave_priv(priv, 0); if (prim_cpsw->irq_enabled == false) { @@ -1578,9 +1581,6 @@ static void cpsw_ndo_tx_timeout(struct net_device *ndev) cpdma_chan_start(priv->txch); cpdma_ctlr_int_ctrl(priv->dma, true); cpsw_intr_enable(priv); - cpdma_ctlr_eoi(priv->dma, CPDMA_EOI_RX); - cpdma_ctlr_eoi(priv->dma, CPDMA_EOI_TX); - } static int cpsw_ndo_set_mac_address(struct net_device *ndev, void *p) @@ -1620,9 +1620,6 @@ static void cpsw_ndo_poll_controller(struct net_device *ndev) cpsw_interrupt(ndev->irq, priv); cpdma_ctlr_int_ctrl(priv->dma, true); cpsw_intr_enable(priv); - cpdma_ctlr_eoi(priv->dma, CPDMA_EOI_RX); - cpdma_ctlr_eoi(priv->dma, CPDMA_EOI_TX); - } #endif -- cgit v0.10.2 From 49cdd5b641933fda6324fc901eaf856924ba6a27 Mon Sep 17 00:00:00 2001 From: Daniel Mack Date: Sun, 4 Jan 2015 19:59:29 +0100 Subject: ALSA: snd-usb-caiaq: fix stream count check Commit 897c329bc ("ALSA: usb: caiaq: check for cdev->n_streams > 1") introduced a safety check to protect against bogus data provided by devices. However, the n_streams variable is already divided by CHANNELS_PER_STREAM, so the correct check is 'n_streams > 0'. Fix this to un-break support for stereo devices. Signed-off-by: Daniel Mack Cc: stable@kernel.org [v3.18+] Signed-off-by: Takashi Iwai diff --git a/sound/usb/caiaq/audio.c b/sound/usb/caiaq/audio.c index 2728447..327f864 100644 --- a/sound/usb/caiaq/audio.c +++ b/sound/usb/caiaq/audio.c @@ -816,7 +816,7 @@ int snd_usb_caiaq_audio_init(struct snd_usb_caiaqdev *cdev) return -EINVAL; } - if (cdev->n_streams < 2) { + if (cdev->n_streams < 1) { dev_err(dev, "bogus number of streams: %d\n", cdev->n_streams); return -EINVAL; } -- cgit v0.10.2 From 1e359a5de861a57aa04d92bb620f52a5c1d7f8b1 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 5 Jan 2015 10:28:49 +0100 Subject: Revert "mac80211: Fix accounting of the tailroom-needed counter" This reverts commit ca34e3b5c808385b175650605faa29e71e91991b. It turns out that the p54 and cw2100 drivers assume that there's tailroom even when they don't say they really need it. However, there's currently no way for them to explicitly say they do need it, so for now revert this. This fixes https://bugzilla.kernel.org/show_bug.cgi?id=90331. Cc: stable@vger.kernel.org Fixes: ca34e3b5c808 ("mac80211: Fix accounting of the tailroom-needed counter") Reported-by: Christopher Chavez Bisected-by: Larry Finger Debugged-by: Christian Lamparter Signed-off-by: Johannes Berg diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 58d719d..29c7be8 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1270,8 +1270,7 @@ struct ieee80211_vif *wdev_to_ieee80211_vif(struct wireless_dev *wdev); * * @IEEE80211_KEY_FLAG_GENERATE_IV: This flag should be set by the * driver to indicate that it requires IV generation for this - * particular key. Setting this flag does not necessarily mean that SKBs - * will have sufficient tailroom for ICV or MIC. + * particular key. * @IEEE80211_KEY_FLAG_GENERATE_MMIC: This flag should be set by * the driver for a TKIP key if it requires Michael MIC * generation in software. @@ -1283,9 +1282,7 @@ struct ieee80211_vif *wdev_to_ieee80211_vif(struct wireless_dev *wdev); * @IEEE80211_KEY_FLAG_PUT_IV_SPACE: This flag should be set by the driver * if space should be prepared for the IV, but the IV * itself should not be generated. Do not set together with - * @IEEE80211_KEY_FLAG_GENERATE_IV on the same key. Setting this flag does - * not necessarily mean that SKBs will have sufficient tailroom for ICV or - * MIC. + * @IEEE80211_KEY_FLAG_GENERATE_IV on the same key. * @IEEE80211_KEY_FLAG_RX_MGMT: This key will be used to decrypt received * management frames. The flag can help drivers that have a hardware * crypto implementation that doesn't deal with management frames diff --git a/net/mac80211/key.c b/net/mac80211/key.c index 0bb7038..bd4e46e 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -140,7 +140,9 @@ static int ieee80211_key_enable_hw_accel(struct ieee80211_key *key) if (!ret) { key->flags |= KEY_FLAG_UPLOADED_TO_HARDWARE; - if (!(key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_MMIC)) + if (!((key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_MMIC) || + (key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV) || + (key->conf.flags & IEEE80211_KEY_FLAG_PUT_IV_SPACE))) sdata->crypto_tx_tailroom_needed_cnt--; WARN_ON((key->conf.flags & IEEE80211_KEY_FLAG_PUT_IV_SPACE) && @@ -188,7 +190,9 @@ static void ieee80211_key_disable_hw_accel(struct ieee80211_key *key) sta = key->sta; sdata = key->sdata; - if (!(key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_MMIC)) + if (!((key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_MMIC) || + (key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV) || + (key->conf.flags & IEEE80211_KEY_FLAG_PUT_IV_SPACE))) increment_tailroom_need_count(sdata); ret = drv_set_key(key->local, DISABLE_KEY, sdata, @@ -884,7 +888,9 @@ void ieee80211_remove_key(struct ieee80211_key_conf *keyconf) if (key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) { key->flags &= ~KEY_FLAG_UPLOADED_TO_HARDWARE; - if (!(key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_MMIC)) + if (!((key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_MMIC) || + (key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV) || + (key->conf.flags & IEEE80211_KEY_FLAG_PUT_IV_SPACE))) increment_tailroom_need_count(key->sdata); } -- cgit v0.10.2 From 0b1e95b2fa0934c3a08db483979c70d3b287f50e Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Tue, 30 Dec 2014 22:50:54 +0100 Subject: crypto: aesni - fix "by8" variant for 128 bit keys The "by8" counter mode optimization is broken for 128 bit keys with input data longer than 128 bytes. It uses the wrong key material for en- and decryption. The key registers xkey0, xkey4, xkey8 and xkey12 need to be preserved in case we're handling more than 128 bytes of input data -- they won't get reloaded after the initial load. They must therefore be (a) loaded on the first iteration and (b) be preserved for the latter ones. The implementation for 128 bit keys does not comply with (a) nor (b). Fix this by bringing the implementation back to its original source and correctly load the key registers and preserve their values by *not* re-using the registers for other purposes. Kudos to James for reporting the issue and providing a test case showing the discrepancies. Reported-by: James Yonan Cc: Chandramouli Narayanan Cc: # v3.18 Signed-off-by: Mathias Krause Signed-off-by: Herbert Xu diff --git a/arch/x86/crypto/aes_ctrby8_avx-x86_64.S b/arch/x86/crypto/aes_ctrby8_avx-x86_64.S index 2df2a02..a916c4a 100644 --- a/arch/x86/crypto/aes_ctrby8_avx-x86_64.S +++ b/arch/x86/crypto/aes_ctrby8_avx-x86_64.S @@ -208,7 +208,7 @@ ddq_add_8: .if (klen == KEY_128) .if (load_keys) - vmovdqa 3*16(p_keys), xkeyA + vmovdqa 3*16(p_keys), xkey4 .endif .else vmovdqa 3*16(p_keys), xkeyA @@ -224,7 +224,7 @@ ddq_add_8: add $(16*by), p_in .if (klen == KEY_128) - vmovdqa 4*16(p_keys), xkey4 + vmovdqa 4*16(p_keys), xkeyB .else .if (load_keys) vmovdqa 4*16(p_keys), xkey4 @@ -234,7 +234,12 @@ ddq_add_8: .set i, 0 .rept by club XDATA, i - vaesenc xkeyA, var_xdata, var_xdata /* key 3 */ + /* key 3 */ + .if (klen == KEY_128) + vaesenc xkey4, var_xdata, var_xdata + .else + vaesenc xkeyA, var_xdata, var_xdata + .endif .set i, (i +1) .endr @@ -243,13 +248,18 @@ ddq_add_8: .set i, 0 .rept by club XDATA, i - vaesenc xkey4, var_xdata, var_xdata /* key 4 */ + /* key 4 */ + .if (klen == KEY_128) + vaesenc xkeyB, var_xdata, var_xdata + .else + vaesenc xkey4, var_xdata, var_xdata + .endif .set i, (i +1) .endr .if (klen == KEY_128) .if (load_keys) - vmovdqa 6*16(p_keys), xkeyB + vmovdqa 6*16(p_keys), xkey8 .endif .else vmovdqa 6*16(p_keys), xkeyB @@ -267,12 +277,17 @@ ddq_add_8: .set i, 0 .rept by club XDATA, i - vaesenc xkeyB, var_xdata, var_xdata /* key 6 */ + /* key 6 */ + .if (klen == KEY_128) + vaesenc xkey8, var_xdata, var_xdata + .else + vaesenc xkeyB, var_xdata, var_xdata + .endif .set i, (i +1) .endr .if (klen == KEY_128) - vmovdqa 8*16(p_keys), xkey8 + vmovdqa 8*16(p_keys), xkeyB .else .if (load_keys) vmovdqa 8*16(p_keys), xkey8 @@ -288,7 +303,7 @@ ddq_add_8: .if (klen == KEY_128) .if (load_keys) - vmovdqa 9*16(p_keys), xkeyA + vmovdqa 9*16(p_keys), xkey12 .endif .else vmovdqa 9*16(p_keys), xkeyA @@ -297,7 +312,12 @@ ddq_add_8: .set i, 0 .rept by club XDATA, i - vaesenc xkey8, var_xdata, var_xdata /* key 8 */ + /* key 8 */ + .if (klen == KEY_128) + vaesenc xkeyB, var_xdata, var_xdata + .else + vaesenc xkey8, var_xdata, var_xdata + .endif .set i, (i +1) .endr @@ -306,7 +326,12 @@ ddq_add_8: .set i, 0 .rept by club XDATA, i - vaesenc xkeyA, var_xdata, var_xdata /* key 9 */ + /* key 9 */ + .if (klen == KEY_128) + vaesenc xkey12, var_xdata, var_xdata + .else + vaesenc xkeyA, var_xdata, var_xdata + .endif .set i, (i +1) .endr @@ -412,7 +437,6 @@ ddq_add_8: /* main body of aes ctr load */ .macro do_aes_ctrmain key_len - cmp $16, num_bytes jb .Ldo_return2\key_len -- cgit v0.10.2 From 0b8c960cf6defc56b3aa1a71b5af95872b6dff2b Mon Sep 17 00:00:00 2001 From: Vinson Lee Date: Mon, 29 Dec 2014 16:20:39 -0800 Subject: crypto: sha-mb - Add avx2_supported check. This patch fixes this allyesconfig target build error with older binutils. LD arch/x86/crypto/built-in.o ld: arch/x86/crypto/sha-mb/built-in.o: No such file: No such file or directory Cc: stable@vger.kernel.org # 3.18+ Signed-off-by: Vinson Lee Signed-off-by: Herbert Xu diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index fd0f848..5a4a089 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile @@ -26,7 +26,6 @@ obj-$(CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL) += ghash-clmulni-intel.o obj-$(CONFIG_CRYPTO_CRC32C_INTEL) += crc32c-intel.o obj-$(CONFIG_CRYPTO_SHA1_SSSE3) += sha1-ssse3.o -obj-$(CONFIG_CRYPTO_SHA1_MB) += sha-mb/ obj-$(CONFIG_CRYPTO_CRC32_PCLMUL) += crc32-pclmul.o obj-$(CONFIG_CRYPTO_SHA256_SSSE3) += sha256-ssse3.o obj-$(CONFIG_CRYPTO_SHA512_SSSE3) += sha512-ssse3.o @@ -46,6 +45,7 @@ endif ifeq ($(avx2_supported),yes) obj-$(CONFIG_CRYPTO_CAMELLIA_AESNI_AVX2_X86_64) += camellia-aesni-avx2.o obj-$(CONFIG_CRYPTO_SERPENT_AVX2_X86_64) += serpent-avx2.o + obj-$(CONFIG_CRYPTO_SHA1_MB) += sha-mb/ endif aes-i586-y := aes-i586-asm_32.o aes_glue.o -- cgit v0.10.2 From 04561ca5c7703738c1fab64078f44175940cc6d8 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Mon, 15 Dec 2014 20:41:13 +0200 Subject: iommu/ipmmu-vmsa: Change IOMMU_EXEC to IOMMU_NOEXEC Commit a720b41c41f5a7e4 ("iommu/arm-smmu: change IOMMU_EXEC to IOMMU_NOEXEC") has inverted and replaced the IOMMU_EXEC flag with IOMMU_NOEXEC. Update the driver accordingly. Signed-off-by: Laurent Pinchart Signed-off-by: Joerg Roedel diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index 68dfb0f..7486931 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -558,7 +558,7 @@ static pmd_t *ipmmu_alloc_pmd(struct ipmmu_vmsa_device *mmu, pgd_t *pgd, static u64 ipmmu_page_prot(unsigned int prot, u64 type) { - u64 pgprot = ARM_VMSA_PTE_XN | ARM_VMSA_PTE_nG | ARM_VMSA_PTE_AF + u64 pgprot = ARM_VMSA_PTE_nG | ARM_VMSA_PTE_AF | ARM_VMSA_PTE_SH_IS | ARM_VMSA_PTE_AP_UNPRIV | ARM_VMSA_PTE_NS | type; @@ -568,8 +568,8 @@ static u64 ipmmu_page_prot(unsigned int prot, u64 type) if (prot & IOMMU_CACHE) pgprot |= IMMAIR_ATTR_IDX_WBRWA << ARM_VMSA_PTE_ATTRINDX_SHIFT; - if (prot & IOMMU_EXEC) - pgprot &= ~ARM_VMSA_PTE_XN; + if (prot & IOMMU_NOEXEC) + pgprot |= ARM_VMSA_PTE_XN; else if (!(prot & (IOMMU_READ | IOMMU_WRITE))) /* If no access create a faulting entry to avoid TLB fills. */ pgprot &= ~ARM_VMSA_PTE_PAGE; -- cgit v0.10.2 From 62c22167dd70b730f61c2b88f950e98154a87980 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 9 Dec 2014 12:56:45 +0100 Subject: iommu/vt-d: Fix dmar_domain leak in iommu_attach_device Since commit 1196c2f a domain is only destroyed in the notifier path if it is hot-unplugged. This caused a domain leakage in iommu_attach_device when a driver was unbound from the device and bound to VFIO. In this case the device is attached to a new domain and unlinked from the old domain. At this point nothing points to the old domain anymore and its memory is leaked. Fix this by explicitly freeing the old domain in iommu_attach_domain. Fixes: 1196c2f (iommu/vt-d: Fix dmar_domain leak in iommu_attach_device) Cc: stable@vger.kernel.org # v3.18 Tested-by: Jerry Hoemann Signed-off-by: Joerg Roedel diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 1232336..7610121 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -4428,6 +4428,10 @@ static int intel_iommu_attach_device(struct iommu_domain *domain, domain_remove_one_dev_info(old_domain, dev); else domain_remove_dev_info(old_domain); + + if (!domain_type_is_vm_or_si(old_domain) && + list_empty(&old_domain->devices)) + domain_exit(old_domain); } } -- cgit v0.10.2 From 6d1b9cc9ee10374290583f69270e97de38128fb6 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 9 Dec 2014 13:18:22 +0100 Subject: iommu/vt-d: Remove dead code in device_notifier This code only runs when action == BUS_NOTIFY_REMOVED_DEVICE, so it can't be BUS_NOTIFY_DEL_DEVICE. Signed-off-by: Joerg Roedel diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 7610121..40dfbc0 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -4029,14 +4029,6 @@ static int device_notifier(struct notifier_block *nb, if (action != BUS_NOTIFY_REMOVED_DEVICE) return 0; - /* - * If the device is still attached to a device driver we can't - * tear down the domain yet as DMA mappings may still be in use. - * Wait for the BUS_NOTIFY_UNBOUND_DRIVER event to do that. - */ - if (action == BUS_NOTIFY_DEL_DEVICE && dev->driver != NULL) - return 0; - domain = find_domain(dev); if (!domain) return 0; -- cgit v0.10.2 From 2c0ee8b85aae427fa8432788ddf05c8a87510657 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Sun, 21 Dec 2014 22:14:30 +0100 Subject: iommu/rockchip: Drop owner assignment from platform_drivers This platform_driver does not need to set an owner, it will be populated by the driver core. Signed-off-by: Wolfram Sang Signed-off-by: Joerg Roedel diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c index b2023af..6a8b1ec 100644 --- a/drivers/iommu/rockchip-iommu.c +++ b/drivers/iommu/rockchip-iommu.c @@ -1009,7 +1009,6 @@ static struct platform_driver rk_iommu_driver = { .remove = rk_iommu_remove, .driver = { .name = "rk_iommu", - .owner = THIS_MODULE, .of_match_table = of_match_ptr(rk_iommu_dt_ids), }, }; -- cgit v0.10.2 From c507de88f6a336bd7296c9ec0073b2d4af8b4f5e Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 5 Jan 2015 13:27:33 +0100 Subject: ALSA: hda - Fix wrong gpio_dir & gpio_mask hint setups for IDT/STAC codecs stac_store_hints() does utterly wrong for masking the values for gpio_dir and gpio_data, likely due to copy&paste errors. Fortunately, this feature is used very rarely, so the impact must be really small. Reported-by: Rasmus Villemoes Cc: Signed-off-by: Takashi Iwai diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c index 4f6413e..605d140 100644 --- a/sound/pci/hda/patch_sigmatel.c +++ b/sound/pci/hda/patch_sigmatel.c @@ -568,9 +568,9 @@ static void stac_store_hints(struct hda_codec *codec) spec->gpio_mask; } if (get_int_hint(codec, "gpio_dir", &spec->gpio_dir)) - spec->gpio_mask &= spec->gpio_mask; - if (get_int_hint(codec, "gpio_data", &spec->gpio_data)) spec->gpio_dir &= spec->gpio_mask; + if (get_int_hint(codec, "gpio_data", &spec->gpio_data)) + spec->gpio_data &= spec->gpio_mask; if (get_int_hint(codec, "eapd_mask", &spec->eapd_mask)) spec->eapd_mask &= spec->gpio_mask; if (get_int_hint(codec, "gpio_mute", &spec->gpio_mute)) -- cgit v0.10.2 From 2030664b709caa769f2b6a1d2e71d8cb343c6884 Mon Sep 17 00:00:00 2001 From: Ben Goz Date: Mon, 5 Jan 2015 15:48:28 +0200 Subject: drm/amdkfd: unmap VMID<-->PASID when relesing VMID (non-HWS) This patch fixes a bug where deallocate_vmid() didn't actually unmap the VMID<-->PASID mapping (in the registers). That can cause undefined behavior. This bug only occurs in non-HWS mode. Signed-off-by: Ben Goz Signed-off-by: Oded Gabbay Acked-by: Alex Deucher diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 3b08ed6..9c8961d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -161,6 +161,9 @@ static void deallocate_vmid(struct device_queue_manager *dqm, { int bit = qpd->vmid - KFD_VMID_START_OFFSET; + /* Release the vmid mapping */ + set_pasid_vmid_mapping(dqm, 0, qpd->vmid); + set_bit(bit, (unsigned long *)&dqm->vmid_bitmap); qpd->vmid = 0; q->properties.vmid = 0; -- cgit v0.10.2 From a3a8784454692dd72e5d5d34dcdab17b4420e74c Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Mon, 29 Dec 2014 09:39:01 -0500 Subject: KEYS: close race between key lookup and freeing When a key is being garbage collected, it's key->user would get put before the ->destroy() callback is called, where the key is removed from it's respective tracking structures. This leaves a key hanging in a semi-invalid state which leaves a window open for a different task to try an access key->user. An example is find_keyring_by_name() which would dereference key->user for a key that is in the process of being garbage collected (where key->user was freed but ->destroy() wasn't called yet - so it's still present in the linked list). This would cause either a panic, or corrupt memory. Fixes CVE-2014-9529. Signed-off-by: Sasha Levin Signed-off-by: David Howells diff --git a/security/keys/gc.c b/security/keys/gc.c index 9609a7f..c795237 100644 --- a/security/keys/gc.c +++ b/security/keys/gc.c @@ -148,12 +148,12 @@ static noinline void key_gc_unused_keys(struct list_head *keys) if (test_bit(KEY_FLAG_INSTANTIATED, &key->flags)) atomic_dec(&key->user->nikeys); - key_user_put(key->user); - /* now throw away the key memory */ if (key->type->destroy) key->type->destroy(key); + key_user_put(key->user); + kfree(key->description); #ifdef KEY_DEBUGGING -- cgit v0.10.2 From fbedf1c3fc3a1e9f249c2efe2f4553d8df9d86d3 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 5 Dec 2014 13:46:07 -0500 Subject: drm/radeon: KV has three PPLLs (v2) Enable all three in the driver. Early documentation indicated the 3rd one was used for something else, but that is not the case. v2: handle disable as well Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org diff --git a/drivers/gpu/drm/radeon/atombios_crtc.c b/drivers/gpu/drm/radeon/atombios_crtc.c index d59ec49..ed644a4 100644 --- a/drivers/gpu/drm/radeon/atombios_crtc.c +++ b/drivers/gpu/drm/radeon/atombios_crtc.c @@ -1851,10 +1851,9 @@ static int radeon_atom_pick_pll(struct drm_crtc *crtc) return pll; } /* otherwise, pick one of the plls */ - if ((rdev->family == CHIP_KAVERI) || - (rdev->family == CHIP_KABINI) || + if ((rdev->family == CHIP_KABINI) || (rdev->family == CHIP_MULLINS)) { - /* KB/KV/ML has PPLL1 and PPLL2 */ + /* KB/ML has PPLL1 and PPLL2 */ pll_in_use = radeon_get_pll_use_mask(crtc); if (!(pll_in_use & (1 << ATOM_PPLL2))) return ATOM_PPLL2; @@ -1863,7 +1862,7 @@ static int radeon_atom_pick_pll(struct drm_crtc *crtc) DRM_ERROR("unable to allocate a PPLL\n"); return ATOM_PPLL_INVALID; } else { - /* CI has PPLL0, PPLL1, and PPLL2 */ + /* CI/KV has PPLL0, PPLL1, and PPLL2 */ pll_in_use = radeon_get_pll_use_mask(crtc); if (!(pll_in_use & (1 << ATOM_PPLL2))) return ATOM_PPLL2; @@ -2155,6 +2154,7 @@ static void atombios_crtc_disable(struct drm_crtc *crtc) case ATOM_PPLL0: /* disable the ppll */ if ((rdev->family == CHIP_ARUBA) || + (rdev->family == CHIP_KAVERI) || (rdev->family == CHIP_BONAIRE) || (rdev->family == CHIP_HAWAII)) atombios_crtc_program_pll(crtc, radeon_crtc->crtc_id, radeon_crtc->pll_id, -- cgit v0.10.2 From 5665c3ebe5ee8a2c516925461f7214ba59c2e6d7 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 9 Dec 2014 10:04:01 -0500 Subject: drm/radeon: fix sad_count check for dce3 Make it consistent with the sad code for other asics to deal with monitors that don't report sads. bug: https://bugzilla.kernel.org/show_bug.cgi?id=89461 Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org diff --git a/drivers/gpu/drm/radeon/dce3_1_afmt.c b/drivers/gpu/drm/radeon/dce3_1_afmt.c index 2fe8cfc..bafdf92 100644 --- a/drivers/gpu/drm/radeon/dce3_1_afmt.c +++ b/drivers/gpu/drm/radeon/dce3_1_afmt.c @@ -103,7 +103,7 @@ static void dce3_2_afmt_write_sad_regs(struct drm_encoder *encoder) } sad_count = drm_edid_to_sad(radeon_connector->edid, &sads); - if (sad_count < 0) { + if (sad_count <= 0) { DRM_ERROR("Couldn't read SADs: %d\n", sad_count); return; } -- cgit v0.10.2 From 410cce2a6b82299b46ff316c6384e789ce275ecb Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 10 Dec 2014 09:42:10 -0500 Subject: drm/radeon: properly filter DP1.2 4k modes on non-DP1.2 hw The check was already in place in the dp mode_valid check, but radeon_dp_get_dp_link_clock() never returned the high clock mode_valid was checking for because that function clipped the clock based on the hw capabilities. Add an explicit check in the mode_valid function. bug: https://bugs.freedesktop.org/show_bug.cgi?id=87172 Signed-off-by: Alex Deucher Cc:stable@vge.kernel.org diff --git a/drivers/gpu/drm/radeon/atombios_dp.c b/drivers/gpu/drm/radeon/atombios_dp.c index 11ba9d2..db42a67 100644 --- a/drivers/gpu/drm/radeon/atombios_dp.c +++ b/drivers/gpu/drm/radeon/atombios_dp.c @@ -492,6 +492,10 @@ int radeon_dp_mode_valid_helper(struct drm_connector *connector, struct radeon_connector_atom_dig *dig_connector; int dp_clock; + if ((mode->clock > 340000) && + (!radeon_connector_is_dp12_capable(connector))) + return MODE_CLOCK_HIGH; + if (!radeon_connector->con_priv) return MODE_CLOCK_HIGH; dig_connector = radeon_connector->con_priv; -- cgit v0.10.2 From 02ae7af53a451a1b0a51022c4693f5b339133e79 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 15 Dec 2014 17:24:19 -0500 Subject: drm/radeon: adjust default bapm settings for KV Enabling bapm seems to cause clocking problems on some KV configurations. Disable it by default for now. Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org diff --git a/drivers/gpu/drm/radeon/kv_dpm.c b/drivers/gpu/drm/radeon/kv_dpm.c index 9b42001..e3e9c10 100644 --- a/drivers/gpu/drm/radeon/kv_dpm.c +++ b/drivers/gpu/drm/radeon/kv_dpm.c @@ -2745,13 +2745,11 @@ int kv_dpm_init(struct radeon_device *rdev) pi->enable_auto_thermal_throttling = true; pi->disable_nb_ps3_in_battery = false; if (radeon_bapm == -1) { - /* There are stability issues reported on with - * bapm enabled on an asrock system. - */ - if (rdev->pdev->subsystem_vendor == 0x1849) - pi->bapm_enable = false; - else + /* only enable bapm on KB, ML by default */ + if (rdev->family == CHIP_KABINI || rdev->family == CHIP_MULLINS) pi->bapm_enable = true; + else + pi->bapm_enable = false; } else if (radeon_bapm == 0) { pi->bapm_enable = false; } else { -- cgit v0.10.2 From dd5a74f2f982193620cfa1ef609df1ee805781d4 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 23 Dec 2014 12:56:49 +0300 Subject: drm/radeon: integer underflow in radeon_cp_dispatch_texture() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The test: if (size > RADEON_MAX_TEXTURE_SIZE) { "size" is an integer and it's controled by the user so it can be negative and the test can underflow. Later we use "size" in: dwords = size / 4; ... RADEON_COPY_MT(buffer, data, (int)(dwords * sizeof(u32))); It causes memory corruption to copy a negative size buffer. Signed-off-by: Dan Carpenter Reviewed-by: Christian König Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/radeon/radeon_state.c b/drivers/gpu/drm/radeon/radeon_state.c index 535403e..15aee72 100644 --- a/drivers/gpu/drm/radeon/radeon_state.c +++ b/drivers/gpu/drm/radeon/radeon_state.c @@ -1703,7 +1703,7 @@ static int radeon_cp_dispatch_texture(struct drm_device * dev, u32 format; u32 *buffer; const u8 __user *data; - int size, dwords, tex_width, blit_width, spitch; + unsigned int size, dwords, tex_width, blit_width, spitch; u32 height; int i; u32 texpitch, microtile; -- cgit v0.10.2 From 975f7d467480a11864d71a10dee908b83c5e682b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bruno=20Pr=C3=A9mont?= Date: Fri, 19 Dec 2014 10:29:16 +0100 Subject: qla2xxx: fix busy wait regression MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit e05fe29248 (qla2xxx: Honor FCP_RSP retry delay timer field.) causes systems to busy-wait for about 3 minutes after boot prior to detecting SAN disks. During this wait period one kworker is running full-time (though /proc//stack has no useful data). Another kworker is waiting for IO to complete during that whole time period. Looking at drivers/scsi/qla2xxx/qla_os.c, fcport->retry_delay_timestamp has a special value of 0 though that 0 value forces system to wait when jiffies is very large value (e.g. 4294952605 - "negative" value when signed on 32bit systems). Signed-off-by: Bruno Prémont Acked-by: Chad Dupuis Signed-off-by: Christoph Hellwig diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 12ca291..cce1cbc 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -734,7 +734,9 @@ qla2xxx_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd) * Return target busy if we've received a non-zero retry_delay_timer * in a FCP_RSP. */ - if (time_after(jiffies, fcport->retry_delay_timestamp)) + if (fcport->retry_delay_timestamp == 0) { + /* retry delay not set */ + } else if (time_after(jiffies, fcport->retry_delay_timestamp)) fcport->retry_delay_timestamp = 0; else goto qc24_target_busy; -- cgit v0.10.2 From b739896dd262ca523497913b0ea232da56d6ee69 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Mon, 5 Jan 2015 11:25:19 -0800 Subject: [IA64] Enable execveat syscall for ia64 See commit 51f39a1f0cea1cacf8c787f652f26dfee9611874 syscalls: implement execveat() system call Signed-off-by: Tony Luck diff --git a/arch/ia64/include/asm/unistd.h b/arch/ia64/include/asm/unistd.h index f3b51b5..95c39b9 100644 --- a/arch/ia64/include/asm/unistd.h +++ b/arch/ia64/include/asm/unistd.h @@ -11,7 +11,7 @@ -#define NR_syscalls 318 /* length of syscall table */ +#define NR_syscalls 319 /* length of syscall table */ /* * The following defines stop scripts/checksyscalls.sh from complaining about diff --git a/arch/ia64/include/uapi/asm/unistd.h b/arch/ia64/include/uapi/asm/unistd.h index 4c2240c..4610795 100644 --- a/arch/ia64/include/uapi/asm/unistd.h +++ b/arch/ia64/include/uapi/asm/unistd.h @@ -331,5 +331,6 @@ #define __NR_getrandom 1339 #define __NR_memfd_create 1340 #define __NR_bpf 1341 +#define __NR_execveat 1342 #endif /* _UAPI_ASM_IA64_UNISTD_H */ diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S index f5e96df..fcf8b8c 100644 --- a/arch/ia64/kernel/entry.S +++ b/arch/ia64/kernel/entry.S @@ -1779,6 +1779,7 @@ sys_call_table: data8 sys_getrandom data8 sys_memfd_create // 1340 data8 sys_bpf + data8 sys_execveat .org sys_call_table + 8*NR_syscalls // guard against failures to increase NR_syscalls #endif /* __IA64_ASM_PARAVIRTUALIZED_NATIVE */ -- cgit v0.10.2 From 1b1f3e1699a9886f1070f94171097ab4ccdbfc95 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 1 Jan 2015 23:38:28 +0100 Subject: ACPI / PM: Fix PM initialization for devices that are not present If an ACPI device object whose _STA returns 0 (not present and not functional) has _PR0 or _PS0, its power_manageable flag will be set and acpi_bus_init_power() will return 0 for it. Consequently, if such a device object is passed to the ACPI device PM functions, they will attempt to carry out the requested operation on the device, although they should not do that for devices that are not present. To fix that problem make acpi_bus_init_power() return an error code for devices that are not present which will cause power_manageable to be cleared for them as appropriate in acpi_bus_get_power_flags(). However, the lists of power resources should not be freed for the device in that case, so modify acpi_bus_get_power_flags() to keep those lists even if acpi_bus_init_power() returns an error. Accordingly, when deciding whether or not the lists of power resources need to be freed, acpi_free_power_resources_lists() should check the power.flags.power_resources flag instead of flags.power_manageable, so make that change too. Furthermore, if acpi_bus_attach() sees that flags.initialized is unset for the given device, it should reset the power management settings of the device and re-initialize them from scratch instead of relying on the previous settings (the device may have appeared after being not present previously, for example), so make it use the 'valid' flag of the D0 power state as the initial value of flags.power_manageable for it and call acpi_bus_init_power() to discover its current power state. Signed-off-by: Rafael J. Wysocki Reviewed-by: Mika Westerberg Cc: 3.10+ # 3.10+ diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c index c2daa85..c0d44d3 100644 --- a/drivers/acpi/device_pm.c +++ b/drivers/acpi/device_pm.c @@ -257,7 +257,7 @@ int acpi_bus_init_power(struct acpi_device *device) device->power.state = ACPI_STATE_UNKNOWN; if (!acpi_device_is_present(device)) - return 0; + return -ENXIO; result = acpi_device_get_power(device, &state); if (result) diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 16914cc..dc4d896 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -1001,7 +1001,7 @@ static void acpi_free_power_resources_lists(struct acpi_device *device) if (device->wakeup.flags.valid) acpi_power_resources_list_free(&device->wakeup.resources); - if (!device->flags.power_manageable) + if (!device->power.flags.power_resources) return; for (i = ACPI_STATE_D0; i <= ACPI_STATE_D3_HOT; i++) { @@ -1744,10 +1744,8 @@ static void acpi_bus_get_power_flags(struct acpi_device *device) device->power.flags.power_resources) device->power.states[ACPI_STATE_D3_COLD].flags.os_accessible = 1; - if (acpi_bus_init_power(device)) { - acpi_free_power_resources_lists(device); + if (acpi_bus_init_power(device)) device->flags.power_manageable = 0; - } } static void acpi_bus_get_flags(struct acpi_device *device) @@ -2371,13 +2369,18 @@ static void acpi_bus_attach(struct acpi_device *device) /* Skip devices that are not present. */ if (!acpi_device_is_present(device)) { device->flags.visited = false; + device->flags.power_manageable = 0; return; } if (device->handler) goto ok; if (!device->flags.initialized) { - acpi_bus_update_power(device, NULL); + device->flags.power_manageable = + device->power.states[ACPI_STATE_D0].flags.valid; + if (acpi_bus_init_power(device)) + device->flags.power_manageable = 0; + device->flags.initialized = true; } device->flags.visited = false; -- cgit v0.10.2 From af8f3f514d193eb353f9b6cea503c55d074e6153 Mon Sep 17 00:00:00 2001 From: Hanjun Guo Date: Sun, 4 Jan 2015 18:55:02 +0800 Subject: ACPI / processor: Convert apic_id to phys_id to make it arch agnostic apic_id in MADT table is the CPU hardware id which identify it self in the system for x86 and ia64, OSPM will use it for SMP init to map APIC ID to logical cpu number in the early boot, when the DSDT/SSDT (ACPI namespace) is scanned later, the ACPI processor driver is probed and the driver will use acpi_id in DSDT to get the apic_id, then map to the logical cpu number which is needed by the processor driver. Before ACPI 5.0, only x86 and ia64 were supported in ACPI spec, so apic_id is used both in arch code and ACPI core which is pretty fine. Since ACPI 5.0, ARM is supported by ACPI and APIC is not available on ARM, this will confuse people when apic_id is both used by x86 and ARM in one function. So convert apic_id to phys_id (which is the original meaning) in ACPI processor dirver to make it arch agnostic, but leave the arch dependent code unchanged, no functional change. Signed-off-by: Hanjun Guo Signed-off-by: Rafael J. Wysocki diff --git a/drivers/acpi/acpi_processor.c b/drivers/acpi/acpi_processor.c index 1fdf5e0..f02b29e 100644 --- a/drivers/acpi/acpi_processor.c +++ b/drivers/acpi/acpi_processor.c @@ -170,7 +170,7 @@ static int acpi_processor_hotadd_init(struct acpi_processor *pr) acpi_status status; int ret; - if (pr->apic_id == -1) + if (pr->phys_id == -1) return -ENODEV; status = acpi_evaluate_integer(pr->handle, "_STA", NULL, &sta); @@ -180,7 +180,7 @@ static int acpi_processor_hotadd_init(struct acpi_processor *pr) cpu_maps_update_begin(); cpu_hotplug_begin(); - ret = acpi_map_lsapic(pr->handle, pr->apic_id, &pr->id); + ret = acpi_map_lsapic(pr->handle, pr->phys_id, &pr->id); if (ret) goto out; @@ -215,7 +215,7 @@ static int acpi_processor_get_info(struct acpi_device *device) union acpi_object object = { 0 }; struct acpi_buffer buffer = { sizeof(union acpi_object), &object }; struct acpi_processor *pr = acpi_driver_data(device); - int apic_id, cpu_index, device_declaration = 0; + int phys_id, cpu_index, device_declaration = 0; acpi_status status = AE_OK; static int cpu0_initialized; unsigned long long value; @@ -262,15 +262,18 @@ static int acpi_processor_get_info(struct acpi_device *device) pr->acpi_id = value; } - apic_id = acpi_get_apicid(pr->handle, device_declaration, pr->acpi_id); - if (apic_id < 0) - acpi_handle_debug(pr->handle, "failed to get CPU APIC ID.\n"); - pr->apic_id = apic_id; + phys_id = acpi_get_phys_id(pr->handle, device_declaration, pr->acpi_id); + if (phys_id < 0) + acpi_handle_debug(pr->handle, "failed to get CPU physical ID.\n"); + pr->phys_id = phys_id; - cpu_index = acpi_map_cpuid(pr->apic_id, pr->acpi_id); + cpu_index = acpi_map_cpuid(pr->phys_id, pr->acpi_id); if (!cpu0_initialized && !acpi_has_cpu_in_madt()) { cpu0_initialized = 1; - /* Handle UP system running SMP kernel, with no LAPIC in MADT */ + /* + * Handle UP system running SMP kernel, with no CPU + * entry in MADT + */ if ((cpu_index == -1) && (num_online_cpus() == 1)) cpu_index = 0; } diff --git a/drivers/acpi/processor_core.c b/drivers/acpi/processor_core.c index 342942f..02e4839 100644 --- a/drivers/acpi/processor_core.c +++ b/drivers/acpi/processor_core.c @@ -69,7 +69,7 @@ static int map_madt_entry(int type, u32 acpi_id) unsigned long madt_end, entry; static struct acpi_table_madt *madt; static int read_madt; - int apic_id = -1; + int phys_id = -1; /* CPU hardware ID */ if (!read_madt) { if (ACPI_FAILURE(acpi_get_table(ACPI_SIG_MADT, 0, @@ -79,7 +79,7 @@ static int map_madt_entry(int type, u32 acpi_id) } if (!madt) - return apic_id; + return phys_id; entry = (unsigned long)madt; madt_end = entry + madt->header.length; @@ -91,18 +91,18 @@ static int map_madt_entry(int type, u32 acpi_id) struct acpi_subtable_header *header = (struct acpi_subtable_header *)entry; if (header->type == ACPI_MADT_TYPE_LOCAL_APIC) { - if (!map_lapic_id(header, acpi_id, &apic_id)) + if (!map_lapic_id(header, acpi_id, &phys_id)) break; } else if (header->type == ACPI_MADT_TYPE_LOCAL_X2APIC) { - if (!map_x2apic_id(header, type, acpi_id, &apic_id)) + if (!map_x2apic_id(header, type, acpi_id, &phys_id)) break; } else if (header->type == ACPI_MADT_TYPE_LOCAL_SAPIC) { - if (!map_lsapic_id(header, type, acpi_id, &apic_id)) + if (!map_lsapic_id(header, type, acpi_id, &phys_id)) break; } entry += header->length; } - return apic_id; + return phys_id; } static int map_mat_entry(acpi_handle handle, int type, u32 acpi_id) @@ -110,7 +110,7 @@ static int map_mat_entry(acpi_handle handle, int type, u32 acpi_id) struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; union acpi_object *obj; struct acpi_subtable_header *header; - int apic_id = -1; + int phys_id = -1; if (ACPI_FAILURE(acpi_evaluate_object(handle, "_MAT", NULL, &buffer))) goto exit; @@ -126,38 +126,38 @@ static int map_mat_entry(acpi_handle handle, int type, u32 acpi_id) header = (struct acpi_subtable_header *)obj->buffer.pointer; if (header->type == ACPI_MADT_TYPE_LOCAL_APIC) - map_lapic_id(header, acpi_id, &apic_id); + map_lapic_id(header, acpi_id, &phys_id); else if (header->type == ACPI_MADT_TYPE_LOCAL_SAPIC) - map_lsapic_id(header, type, acpi_id, &apic_id); + map_lsapic_id(header, type, acpi_id, &phys_id); else if (header->type == ACPI_MADT_TYPE_LOCAL_X2APIC) - map_x2apic_id(header, type, acpi_id, &apic_id); + map_x2apic_id(header, type, acpi_id, &phys_id); exit: kfree(buffer.pointer); - return apic_id; + return phys_id; } -int acpi_get_apicid(acpi_handle handle, int type, u32 acpi_id) +int acpi_get_phys_id(acpi_handle handle, int type, u32 acpi_id) { - int apic_id; + int phys_id; - apic_id = map_mat_entry(handle, type, acpi_id); - if (apic_id == -1) - apic_id = map_madt_entry(type, acpi_id); + phys_id = map_mat_entry(handle, type, acpi_id); + if (phys_id == -1) + phys_id = map_madt_entry(type, acpi_id); - return apic_id; + return phys_id; } -int acpi_map_cpuid(int apic_id, u32 acpi_id) +int acpi_map_cpuid(int phys_id, u32 acpi_id) { #ifdef CONFIG_SMP int i; #endif - if (apic_id == -1) { + if (phys_id == -1) { /* * On UP processor, there is no _MAT or MADT table. - * So above apic_id is always set to -1. + * So above phys_id is always set to -1. * * BIOS may define multiple CPU handles even for UP processor. * For example, @@ -170,7 +170,7 @@ int acpi_map_cpuid(int apic_id, u32 acpi_id) * Processor (CPU3, 0x03, 0x00000410, 0x06) {} * } * - * Ignores apic_id and always returns 0 for the processor + * Ignores phys_id and always returns 0 for the processor * handle with acpi id 0 if nr_cpu_ids is 1. * This should be the case if SMP tables are not found. * Return -1 for other CPU's handle. @@ -178,28 +178,28 @@ int acpi_map_cpuid(int apic_id, u32 acpi_id) if (nr_cpu_ids <= 1 && acpi_id == 0) return acpi_id; else - return apic_id; + return phys_id; } #ifdef CONFIG_SMP for_each_possible_cpu(i) { - if (cpu_physical_id(i) == apic_id) + if (cpu_physical_id(i) == phys_id) return i; } #else /* In UP kernel, only processor 0 is valid */ - if (apic_id == 0) - return apic_id; + if (phys_id == 0) + return phys_id; #endif return -1; } int acpi_get_cpuid(acpi_handle handle, int type, u32 acpi_id) { - int apic_id; + int phys_id; - apic_id = acpi_get_apicid(handle, type, acpi_id); + phys_id = acpi_get_phys_id(handle, type, acpi_id); - return acpi_map_cpuid(apic_id, acpi_id); + return acpi_map_cpuid(phys_id, acpi_id); } EXPORT_SYMBOL_GPL(acpi_get_cpuid); diff --git a/include/acpi/processor.h b/include/acpi/processor.h index 3ca9b75..b95dc32 100644 --- a/include/acpi/processor.h +++ b/include/acpi/processor.h @@ -196,8 +196,8 @@ struct acpi_processor_flags { struct acpi_processor { acpi_handle handle; u32 acpi_id; - u32 apic_id; - u32 id; + u32 phys_id; /* CPU hardware ID such as APIC ID for x86 */ + u32 id; /* CPU logical ID allocated by OS */ u32 pblk; int performance_platform_limit; int throttling_platform_limit; @@ -310,8 +310,8 @@ static inline int acpi_processor_get_bios_limit(int cpu, unsigned int *limit) #endif /* CONFIG_CPU_FREQ */ /* in processor_core.c */ -int acpi_get_apicid(acpi_handle, int type, u32 acpi_id); -int acpi_map_cpuid(int apic_id, u32 acpi_id); +int acpi_get_phys_id(acpi_handle, int type, u32 acpi_id); +int acpi_map_cpuid(int phys_id, u32 acpi_id); int acpi_get_cpuid(acpi_handle, int type, u32 acpi_id); /* in processor_pdc.c */ -- cgit v0.10.2 From d02dc27db0dc74683efc4a2b36f55f5594451f38 Mon Sep 17 00:00:00 2001 From: Hanjun Guo Date: Sun, 4 Jan 2015 18:55:03 +0800 Subject: ACPI / processor: Rename acpi_(un)map_lsapic() to acpi_(un)map_cpu() acpi_map_lsapic() will allocate a logical CPU number and map it to physical CPU id (such as APIC id) for the hot-added CPU, it will also do some mapping for NUMA node id and etc, acpi_unmap_lsapic() will do the reverse. We can see that the name of the function is a little bit confusing and arch (IA64) dependent so rename them as acpi_(un)map_cpu() to make arch agnostic and explicit. Signed-off-by: Hanjun Guo Signed-off-by: Rafael J. Wysocki diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c index 615ef81..e795cb8 100644 --- a/arch/ia64/kernel/acpi.c +++ b/arch/ia64/kernel/acpi.c @@ -893,13 +893,13 @@ static int _acpi_map_lsapic(acpi_handle handle, int physid, int *pcpu) } /* wrapper to silence section mismatch warning */ -int __ref acpi_map_lsapic(acpi_handle handle, int physid, int *pcpu) +int __ref acpi_map_cpu(acpi_handle handle, int physid, int *pcpu) { return _acpi_map_lsapic(handle, physid, pcpu); } -EXPORT_SYMBOL(acpi_map_lsapic); +EXPORT_SYMBOL(acpi_map_cpu); -int acpi_unmap_lsapic(int cpu) +int acpi_unmap_cpu(int cpu) { ia64_cpu_to_sapicid[cpu] = -1; set_cpu_present(cpu, false); @@ -910,8 +910,7 @@ int acpi_unmap_lsapic(int cpu) return (0); } - -EXPORT_SYMBOL(acpi_unmap_lsapic); +EXPORT_SYMBOL(acpi_unmap_cpu); #endif /* CONFIG_ACPI_HOTPLUG_CPU */ #ifdef CONFIG_ACPI_NUMA diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 4433a4b..d162636 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -750,13 +750,13 @@ static int _acpi_map_lsapic(acpi_handle handle, int physid, int *pcpu) } /* wrapper to silence section mismatch warning */ -int __ref acpi_map_lsapic(acpi_handle handle, int physid, int *pcpu) +int __ref acpi_map_cpu(acpi_handle handle, int physid, int *pcpu) { return _acpi_map_lsapic(handle, physid, pcpu); } -EXPORT_SYMBOL(acpi_map_lsapic); +EXPORT_SYMBOL(acpi_map_cpu); -int acpi_unmap_lsapic(int cpu) +int acpi_unmap_cpu(int cpu) { #ifdef CONFIG_ACPI_NUMA set_apicid_to_node(per_cpu(x86_cpu_to_apicid, cpu), NUMA_NO_NODE); @@ -768,8 +768,7 @@ int acpi_unmap_lsapic(int cpu) return (0); } - -EXPORT_SYMBOL(acpi_unmap_lsapic); +EXPORT_SYMBOL(acpi_unmap_cpu); #endif /* CONFIG_ACPI_HOTPLUG_CPU */ int acpi_register_ioapic(acpi_handle handle, u64 phys_addr, u32 gsi_base) diff --git a/drivers/acpi/acpi_processor.c b/drivers/acpi/acpi_processor.c index f02b29e..1020b1b 100644 --- a/drivers/acpi/acpi_processor.c +++ b/drivers/acpi/acpi_processor.c @@ -180,13 +180,13 @@ static int acpi_processor_hotadd_init(struct acpi_processor *pr) cpu_maps_update_begin(); cpu_hotplug_begin(); - ret = acpi_map_lsapic(pr->handle, pr->phys_id, &pr->id); + ret = acpi_map_cpu(pr->handle, pr->phys_id, &pr->id); if (ret) goto out; ret = arch_register_cpu(pr->id); if (ret) { - acpi_unmap_lsapic(pr->id); + acpi_unmap_cpu(pr->id); goto out; } @@ -461,7 +461,7 @@ static void acpi_processor_remove(struct acpi_device *device) /* Remove the CPU. */ arch_unregister_cpu(pr->id); - acpi_unmap_lsapic(pr->id); + acpi_unmap_cpu(pr->id); cpu_hotplug_done(); cpu_maps_update_done(); diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 856d381..d459cd1 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -147,8 +147,8 @@ void acpi_numa_arch_fixup(void); #ifdef CONFIG_ACPI_HOTPLUG_CPU /* Arch dependent functions for cpu hotplug support */ -int acpi_map_lsapic(acpi_handle handle, int physid, int *pcpu); -int acpi_unmap_lsapic(int cpu); +int acpi_map_cpu(acpi_handle handle, int physid, int *pcpu); +int acpi_unmap_cpu(int cpu); #endif /* CONFIG_ACPI_HOTPLUG_CPU */ int acpi_register_ioapic(acpi_handle handle, u64 phys_addr, u32 gsi_base); -- cgit v0.10.2 From 48628e4015dbd77b676c5a9b5bd145a284f8bb93 Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Sun, 4 Jan 2015 14:24:23 +0800 Subject: ACPI/int340x_thermal: enumerate INT340X devices even if they're not in _ART/_TRT For some INT340X thermal devices, even if they are not referred in _TRT/_ART table, they still can be used by userspace for thermal control. Thus change the code to enumerated all the INT340X devices, no matter if they're referred in _TRT/_ART or not. Signed-off-by: Zhang Rui Tested-by: Srinivas Pandruvada diff --git a/drivers/acpi/int340x_thermal.c b/drivers/acpi/int340x_thermal.c index a27d31d..dc3553d 100644 --- a/drivers/acpi/int340x_thermal.c +++ b/drivers/acpi/int340x_thermal.c @@ -14,9 +14,8 @@ #include "internal.h" -#define DO_ENUMERATION 0x01 static const struct acpi_device_id int340x_thermal_device_ids[] = { - {"INT3400", DO_ENUMERATION }, + {"INT3400"}, {"INT3401"}, {"INT3402"}, {"INT3403"}, @@ -34,8 +33,7 @@ static int int340x_thermal_handler_attach(struct acpi_device *adev, const struct acpi_device_id *id) { #if defined(CONFIG_INT340X_THERMAL) || defined(CONFIG_INT340X_THERMAL_MODULE) - if (id->driver_data == DO_ENUMERATION) - acpi_create_platform_device(adev); + acpi_create_platform_device(adev); #endif return 1; } diff --git a/drivers/thermal/int340x_thermal/acpi_thermal_rel.c b/drivers/thermal/int340x_thermal/acpi_thermal_rel.c index 231cabc..2c2ec76 100644 --- a/drivers/thermal/int340x_thermal/acpi_thermal_rel.c +++ b/drivers/thermal/int340x_thermal/acpi_thermal_rel.c @@ -119,15 +119,11 @@ int acpi_parse_trt(acpi_handle handle, int *trt_count, struct trt **trtp, continue; result = acpi_bus_get_device(trt->source, &adev); - if (!result) - acpi_create_platform_device(adev); - else + if (result) pr_warn("Failed to get source ACPI device\n"); result = acpi_bus_get_device(trt->target, &adev); - if (!result) - acpi_create_platform_device(adev); - else + if (result) pr_warn("Failed to get target ACPI device\n"); } @@ -206,16 +202,12 @@ int acpi_parse_art(acpi_handle handle, int *art_count, struct art **artp, if (art->source) { result = acpi_bus_get_device(art->source, &adev); - if (!result) - acpi_create_platform_device(adev); - else + if (result) pr_warn("Failed to get source ACPI device\n"); } if (art->target) { result = acpi_bus_get_device(art->target, &adev); - if (!result) - acpi_create_platform_device(adev); - else + if (result) pr_warn("Failed to get source ACPI device\n"); } } -- cgit v0.10.2 From 014d9d5d0cc1da79bbe48fbc5e1068c5616238d2 Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Sun, 4 Jan 2015 14:24:24 +0800 Subject: ACPI/int340x_thermal: enumerate INT3401 for Intel SoC DTS thermal driver Intel SoC DTS thermal driver on Baytrail platform uses IRQ 86 for critical overheating notification. But this IRQ 86 is described in the _CRS control method of INT3401 device, thus we should enumerate INT3401 to set the IRQ descriptor when Intel SoC DTS thermal driver is built. Signed-off-by: Zhang Rui Tested-by: Srinivas Pandruvada diff --git a/drivers/acpi/int340x_thermal.c b/drivers/acpi/int340x_thermal.c index dc3553d..9dcf836 100644 --- a/drivers/acpi/int340x_thermal.c +++ b/drivers/acpi/int340x_thermal.c @@ -14,9 +14,10 @@ #include "internal.h" +#define INT3401_DEVICE 0X01 static const struct acpi_device_id int340x_thermal_device_ids[] = { {"INT3400"}, - {"INT3401"}, + {"INT3401", INT3401_DEVICE}, {"INT3402"}, {"INT3403"}, {"INT3404"}, @@ -34,6 +35,10 @@ static int int340x_thermal_handler_attach(struct acpi_device *adev, { #if defined(CONFIG_INT340X_THERMAL) || defined(CONFIG_INT340X_THERMAL_MODULE) acpi_create_platform_device(adev); +#elif defined(INTEL_SOC_DTS_THERMAL) || defined(INTEL_SOC_DTS_THERMAL_MODULE) + /* Intel SoC DTS thermal driver needs INT3401 to set IRQ descriptor */ + if (id->driver_data == INT3401_DEVICE) + acpi_create_platform_device(adev); #endif return 1; } -- cgit v0.10.2 From ad0f409051a67dde8d454990e09b6db41f675876 Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Sun, 4 Jan 2015 16:35:59 +0800 Subject: int340x_thermal/processor_thermal_device: return failure when there is no ACPI device object processor_thermal_device driver needs ACPI support to work. Thus, the driver probing should fail when there is no ACPI device object asscociated. This fixes a NULL pointer dereference when the driver is loaded with INT340X feature disabled in BIOS. Reported-by: Chen Yu Signed-off-by: Zhang Rui Tested-by: Chen Yu diff --git a/drivers/thermal/int340x_thermal/processor_thermal_device.c b/drivers/thermal/int340x_thermal/processor_thermal_device.c index 31bb553..0fe5dbb 100644 --- a/drivers/thermal/int340x_thermal/processor_thermal_device.c +++ b/drivers/thermal/int340x_thermal/processor_thermal_device.c @@ -130,6 +130,8 @@ static int proc_thermal_add(struct device *dev, int ret; adev = ACPI_COMPANION(dev); + if (!adev) + return -ENODEV; status = acpi_evaluate_object(adev->handle, "PPCC", NULL, &buf); if (ACPI_FAILURE(status)) -- cgit v0.10.2 From b1940cd21c0f4abdce101253e860feff547291b0 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 5 Jan 2015 17:05:20 -0800 Subject: Linux 3.19-rc3 diff --git a/Makefile b/Makefile index ef748e1..eb4eca5 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 3 PATCHLEVEL = 19 SUBLEVEL = 0 -EXTRAVERSION = -rc2 +EXTRAVERSION = -rc3 NAME = Diseased Newt # *DOCUMENTATION* -- cgit v0.10.2 From 329887ad13a3f3d26837ea9fce5a8305a7c983e2 Mon Sep 17 00:00:00 2001 From: Simon Wunderlich Date: Wed, 13 Aug 2014 14:26:56 +0200 Subject: batman-adv: fix and simplify condition when bonding should be used The current condition actually does NOT consider bonding when the interface the packet came in from is the soft interface, which is the opposite of what it should do (and the comment describes). Fix that and slightly simplify the condition. Reported-by: Ray Gibson Signed-off-by: Simon Wunderlich Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index 35f76f2..6648f32 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -443,11 +443,13 @@ batadv_find_router(struct batadv_priv *bat_priv, router = batadv_orig_router_get(orig_node, recv_if); + if (!router) + return router; + /* only consider bonding for recv_if == BATADV_IF_DEFAULT (first hop) * and if activated. */ - if (recv_if == BATADV_IF_DEFAULT || !atomic_read(&bat_priv->bonding) || - !router) + if (!(recv_if == BATADV_IF_DEFAULT && atomic_read(&bat_priv->bonding))) return router; /* bonding: loop through the list of possible routers found -- cgit v0.10.2 From 2c667a339c7a6f23e1b7e23eed06f2564cff0c91 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20L=C3=BCssing?= Date: Thu, 30 Oct 2014 06:23:40 +0100 Subject: batman-adv: fix delayed foreign originator recognition MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently it can happen that the reception of an OGM from a new originator is not being accepted. More precisely it can happen that an originator struct gets allocated and initialized (batadv_orig_node_new()), even the TQ gets calculated and set correctly (batadv_iv_ogm_calc_tq()) but still the periodic orig_node purging thread will decide to delete it if it has a chance to jump between these two function calls. This is because batadv_orig_node_new() initializes the last_seen value to zero and its caller (batadv_iv_ogm_orig_get()) makes it visible to other threads by adding it to the hash table already. batadv_iv_ogm_calc_tq() will set the last_seen variable to the correct, current time a few lines later but if the purging thread jumps in between that it will think that the orig_node timed out and will wrongly schedule it for deletion already. If the purging interval is the same as the originator interval (which is the default: 1 second), then this game can continue for several rounds until the random OGM jitter added enough difference between these two (in tests, two to about four rounds seemed common). Fixing this by initializing the last_seen variable of an orig_node to the current time before adding it to the hash table. Signed-off-by: Linus Lüssing Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index 6a48451..648bdba 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -678,6 +678,7 @@ struct batadv_orig_node *batadv_orig_node_new(struct batadv_priv *bat_priv, atomic_set(&orig_node->last_ttvn, 0); orig_node->tt_buff = NULL; orig_node->tt_buff_len = 0; + orig_node->last_seen = jiffies; reset_time = jiffies - 1 - msecs_to_jiffies(BATADV_RESET_PROTECTION_MS); orig_node->bcast_seqno_reset = reset_time; #ifdef CONFIG_BATMAN_ADV_MCAST -- cgit v0.10.2 From f44d54077a69b1c990aeef49cf9b813fb274cfee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Hundeb=C3=B8ll?= Date: Tue, 11 Nov 2014 16:22:23 +0100 Subject: batman-adv: fix lock class for decoding hash in network-coding.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit batadv_has_set_lock_class() is called with the wrong hash table as first argument (probably due to a copy-paste error), which leads to false positives when running with lockdep. Introduced-by: 612d2b4fe0a1ff2f8389462a6f8be34e54124c05 ("batman-adv: network coding - save overheard and tx packets for decoding") Signed-off-by: Martin Hundebøll Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c index 8d04d17..fab47f1 100644 --- a/net/batman-adv/network-coding.c +++ b/net/batman-adv/network-coding.c @@ -133,7 +133,7 @@ int batadv_nc_mesh_init(struct batadv_priv *bat_priv) if (!bat_priv->nc.decoding_hash) goto err; - batadv_hash_set_lock_class(bat_priv->nc.coding_hash, + batadv_hash_set_lock_class(bat_priv->nc.decoding_hash, &batadv_nc_decoding_hash_lock_class_key); INIT_DELAYED_WORK(&bat_priv->nc.work, batadv_nc_worker); -- cgit v0.10.2 From e8829f007e982a9a8fb4023109233d5f344d4657 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20L=C3=BCssing?= Date: Thu, 30 Oct 2014 05:40:46 +0100 Subject: batman-adv: fix counter for multicast supporting nodes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A miscounting of nodes having multicast optimizations enabled can lead to multicast packet loss in the following scenario: If the first OGM a node receives from another one has no multicast optimizations support (no multicast tvlv) then we are missing to increase the counter. This potentially leads to the wrong assumption that we could safely use multicast optimizations. Fixings this by increasing the counter if the initial OGM has the multicast TVLV unset, too. Introduced by 60432d756cf06e597ef9da511402dd059b112447 ("batman-adv: Announce new capability via multicast TVLV") Reported-by: Tobias Hachmer Signed-off-by: Linus Lüssing Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c index ab6bb2a..d3503fb 100644 --- a/net/batman-adv/multicast.c +++ b/net/batman-adv/multicast.c @@ -685,11 +685,13 @@ static void batadv_mcast_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv, if (orig_initialized) atomic_dec(&bat_priv->mcast.num_disabled); orig->capabilities |= BATADV_ORIG_CAPA_HAS_MCAST; - /* If mcast support is being switched off increase the disabled - * mcast node counter. + /* If mcast support is being switched off or if this is an initial + * OGM without mcast support then increase the disabled mcast + * node counter. */ } else if (!orig_mcast_enabled && - orig->capabilities & BATADV_ORIG_CAPA_HAS_MCAST) { + (orig->capabilities & BATADV_ORIG_CAPA_HAS_MCAST || + !orig_initialized)) { atomic_inc(&bat_priv->mcast.num_disabled); orig->capabilities &= ~BATADV_ORIG_CAPA_HAS_MCAST; } -- cgit v0.10.2 From a5164886b0bdadd662f9715a7541432c4d1a0d99 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20L=C3=BCssing?= Date: Thu, 30 Oct 2014 05:40:47 +0100 Subject: batman-adv: fix multicast counter when purging originators MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When purging an orig_node we should only decrease counter tracking the number of nodes without multicast optimizations support if it was increased through this orig_node before. A not yet quite initialized orig_node (meaning it did not have its turn in the mcast-tvlv handler so far) which gets purged would not adhere to this and will lead to a counter imbalance. Fixing this by adding a check whether the orig_node is mcast-initalized before decreasing the counter in the mcast-orig_node-purging routine. Introduced by 60432d756cf06e597ef9da511402dd059b112447 ("batman-adv: Announce new capability via multicast TVLV") Reported-by: Tobias Hachmer Signed-off-by: Linus Lüssing Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c index d3503fb..b24e4bb 100644 --- a/net/batman-adv/multicast.c +++ b/net/batman-adv/multicast.c @@ -740,7 +740,8 @@ void batadv_mcast_purge_orig(struct batadv_orig_node *orig) { struct batadv_priv *bat_priv = orig->bat_priv; - if (!(orig->capabilities & BATADV_ORIG_CAPA_HAS_MCAST)) + if (!(orig->capabilities & BATADV_ORIG_CAPA_HAS_MCAST) && + orig->capa_initialized & BATADV_ORIG_CAPA_HAS_MCAST) atomic_dec(&bat_priv->mcast.num_disabled); batadv_mcast_want_unsnoop_update(bat_priv, orig, BATADV_NO_FLAGS); -- cgit v0.10.2 From 9d31b3ce81683ce3c9fd10afa70892e373b21067 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20L=C3=BCssing?= Date: Sat, 13 Dec 2014 23:32:15 +0100 Subject: batman-adv: fix potential TT client + orig-node memory leak MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch fixes a potential memory leak which can occur once an originator times out. On timeout the according global translation table entry might not get purged correctly. Furthermore, the non purged TT entry will cause its orig-node to leak, too. Which additionally can lead to the new multicast optimization feature not kicking in because of a therefore bogus counter. In detail: The batadv_tt_global_entry->orig_list holds the reference to the orig-node. Usually this reference is released after BATADV_PURGE_TIMEOUT through: _batadv_purge_orig()-> batadv_purge_orig_node()->batadv_update_route()->_batadv_update_route()-> batadv_tt_global_del_orig() which purges this global tt entry and releases the reference to the orig-node. However, if between two batadv_purge_orig_node() calls the orig-node timeout grew to 2*BATADV_PURGE_TIMEOUT then this call path isn't reached. Instead the according orig-node is removed from the originator hash in _batadv_purge_orig(), the batadv_update_route() part is skipped and won't be reached anymore. Fixing the issue by moving batadv_tt_global_del_orig() out of the rcu callback. Signed-off-by: Linus Lüssing Acked-by: Antonio Quartulli Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index 648bdba..bea8198 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -570,9 +570,6 @@ static void batadv_orig_node_free_rcu(struct rcu_head *rcu) batadv_frag_purge_orig(orig_node, NULL); - batadv_tt_global_del_orig(orig_node->bat_priv, orig_node, -1, - "originator timed out"); - if (orig_node->bat_priv->bat_algo_ops->bat_orig_free) orig_node->bat_priv->bat_algo_ops->bat_orig_free(orig_node); @@ -978,6 +975,9 @@ static void _batadv_purge_orig(struct batadv_priv *bat_priv) if (batadv_purge_orig_node(bat_priv, orig_node)) { batadv_gw_node_delete(bat_priv, orig_node); hlist_del_rcu(&orig_node->hash_entry); + batadv_tt_global_del_orig(orig_node->bat_priv, + orig_node, -1, + "originator timed out"); batadv_orig_node_free_ref(orig_node); continue; } -- cgit v0.10.2 From 80e9541f7987f60471268b751aaa9b6800513fe9 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Sun, 4 Jan 2015 17:21:58 +0200 Subject: virtio: make del_vqs idempotent Our code calls del_vqs multiple times, assuming it's idempotent. commit 3ec7a77bb3089bb01032fdbd958eb5c29da58b49 virtio_pci: free up vq->priv broke this assumption, by adding kfree there, so multiple calls cause double free. Fix it up. Fixes: 3ec7a77bb3089bb01032fdbd958eb5c29da58b49 Reported-by: Sasha Levin Signed-off-by: Michael S. Tsirkin diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c index 2ef9529..5243868 100644 --- a/drivers/virtio/virtio_pci_common.c +++ b/drivers/virtio/virtio_pci_common.c @@ -282,6 +282,7 @@ void vp_del_vqs(struct virtio_device *vdev) vp_free_vectors(vdev); kfree(vp_dev->vqs); + vp_dev->vqs = NULL; } static int vp_try_to_find_vqs(struct virtio_device *vdev, unsigned nvqs, -- cgit v0.10.2 From 945399a8c78ac225cdbaece0f94c0d8741b4e1d8 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Sun, 4 Jan 2015 13:25:30 +0200 Subject: virtio_pci: device-specific release callback It turns out we need to add device-specific code in release callback. Move it to virtio_pci_legacy.c. Signed-off-by: Michael S. Tsirkin diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c index 5243868..9756f21 100644 --- a/drivers/virtio/virtio_pci_common.c +++ b/drivers/virtio/virtio_pci_common.c @@ -422,15 +422,6 @@ int vp_set_vq_affinity(struct virtqueue *vq, int cpu) return 0; } -void virtio_pci_release_dev(struct device *_d) -{ - /* - * No need for a release method as we allocate/free - * all devices together with the pci devices. - * Provide an empty one to avoid getting a warning from core. - */ -} - #ifdef CONFIG_PM_SLEEP static int virtio_pci_freeze(struct device *dev) { diff --git a/drivers/virtio/virtio_pci_common.h b/drivers/virtio/virtio_pci_common.h index adddb64..5a49728 100644 --- a/drivers/virtio/virtio_pci_common.h +++ b/drivers/virtio/virtio_pci_common.h @@ -126,7 +126,6 @@ const char *vp_bus_name(struct virtio_device *vdev); * - ignore the affinity request if we're using INTX */ int vp_set_vq_affinity(struct virtqueue *vq, int cpu); -void virtio_pci_release_dev(struct device *); int virtio_pci_legacy_probe(struct pci_dev *pci_dev, const struct pci_device_id *id); diff --git a/drivers/virtio/virtio_pci_legacy.c b/drivers/virtio/virtio_pci_legacy.c index 6c76f0f..08d1915 100644 --- a/drivers/virtio/virtio_pci_legacy.c +++ b/drivers/virtio/virtio_pci_legacy.c @@ -211,6 +211,15 @@ static const struct virtio_config_ops virtio_pci_config_ops = { .set_vq_affinity = vp_set_vq_affinity, }; +static void virtio_pci_release_dev(struct device *_d) +{ + /* + * No need for a release method as we allocate/free + * all devices together with the pci devices. + * Provide an empty one to avoid getting a warning from core. + */ +} + /* the PCI probing function */ int virtio_pci_legacy_probe(struct pci_dev *pci_dev, const struct pci_device_id *id) -- cgit v0.10.2 From 63bd62a08ca45a0c804c3c89777edc7f76a2d6da Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Fri, 2 Jan 2015 14:47:40 -0500 Subject: virtio_pci: defer kfree until release callback A struct device which has just been unregistered can live on past the point at which a driver decides to drop it's initial reference to the kobject gained on allocation. This implies that when releasing a virtio device, we can't free a struct virtio_device until the underlying struct device has been released, which might not happen immediately on device_unregister(). Unfortunately, this is exactly what virtio pci does: it has an empty release callback, and frees memory immediately after unregistering the device. This causes an easy to reproduce crash if CONFIG_DEBUG_KOBJECT_RELEASE it enabled. To fix, free the memory only once we know the device is gone in the release callback. Cc: stable@vger.kernel.org Signed-off-by: Sasha Levin Signed-off-by: Michael S. Tsirkin diff --git a/drivers/virtio/virtio_pci_legacy.c b/drivers/virtio/virtio_pci_legacy.c index 08d1915..4beaee3 100644 --- a/drivers/virtio/virtio_pci_legacy.c +++ b/drivers/virtio/virtio_pci_legacy.c @@ -213,11 +213,10 @@ static const struct virtio_config_ops virtio_pci_config_ops = { static void virtio_pci_release_dev(struct device *_d) { - /* - * No need for a release method as we allocate/free - * all devices together with the pci devices. - * Provide an empty one to avoid getting a warning from core. - */ + struct virtio_device *vdev = dev_to_virtio(_d); + struct virtio_pci_device *vp_dev = to_vp_device(vdev); + + kfree(vp_dev); } /* the PCI probing function */ @@ -311,5 +310,4 @@ void virtio_pci_legacy_remove(struct pci_dev *pci_dev) pci_iounmap(pci_dev, vp_dev->ioaddr); pci_release_regions(pci_dev); pci_disable_device(pci_dev); - kfree(vp_dev); } -- cgit v0.10.2 From a1eb03f546d651a8f39c7d0692b1f7f5b4e7e3cd Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Sun, 4 Jan 2015 17:28:27 +0200 Subject: virtio_pci: document why we defer kfree The reason we defer kfree until release function is because it's a general rule for kobjects: kfree of the reference counter itself is only legal in the release function. Previous patch didn't make this clear, document this in code. Cc: stable@vger.kernel.org Signed-off-by: Michael S. Tsirkin diff --git a/drivers/virtio/virtio_pci_legacy.c b/drivers/virtio/virtio_pci_legacy.c index 4beaee3..a5486e6 100644 --- a/drivers/virtio/virtio_pci_legacy.c +++ b/drivers/virtio/virtio_pci_legacy.c @@ -216,6 +216,9 @@ static void virtio_pci_release_dev(struct device *_d) struct virtio_device *vdev = dev_to_virtio(_d); struct virtio_pci_device *vp_dev = to_vp_device(vdev); + /* As struct device is a kobject, it's not safe to + * free the memory (including the reference counter itself) + * until it's release callback. */ kfree(vp_dev); } -- cgit v0.10.2 From 524a640444ae19593dd4e9e80075041c1ed831bd Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Mon, 29 Dec 2014 13:52:22 +0200 Subject: drm/amdkfd: Do copy_to/from_user in general kfd_ioctl() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch moves the copy_to_user() and copy_from_user() calls from the different ioctl functions in amdkfd to the general kfd_ioctl() function, as this is a common code for all ioctls. This was done according to example taken from drm_ioctl.c Signed-off-by: Oded Gabbay Reviewed-by: Christian König diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index fe5c543..249f492 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -126,17 +126,14 @@ static int kfd_open(struct inode *inode, struct file *filep) return 0; } -static long kfd_ioctl_get_version(struct file *filep, struct kfd_process *p, - void __user *arg) +static int kfd_ioctl_get_version(struct file *filep, struct kfd_process *p, + void *data) { - struct kfd_ioctl_get_version_args args; + struct kfd_ioctl_get_version_args *args = data; int err = 0; - args.major_version = KFD_IOCTL_MAJOR_VERSION; - args.minor_version = KFD_IOCTL_MINOR_VERSION; - - if (copy_to_user(arg, &args, sizeof(args))) - err = -EFAULT; + args->major_version = KFD_IOCTL_MAJOR_VERSION; + args->minor_version = KFD_IOCTL_MINOR_VERSION; return err; } @@ -220,10 +217,10 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties, return 0; } -static long kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, - void __user *arg) +static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, + void *data) { - struct kfd_ioctl_create_queue_args args; + struct kfd_ioctl_create_queue_args *args = data; struct kfd_dev *dev; int err = 0; unsigned int queue_id; @@ -232,16 +229,13 @@ static long kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, memset(&q_properties, 0, sizeof(struct queue_properties)); - if (copy_from_user(&args, arg, sizeof(args))) - return -EFAULT; - pr_debug("kfd: creating queue ioctl\n"); - err = set_queue_properties_from_user(&q_properties, &args); + err = set_queue_properties_from_user(&q_properties, args); if (err) return err; - dev = kfd_device_by_id(args.gpu_id); + dev = kfd_device_by_id(args->gpu_id); if (dev == NULL) return -EINVAL; @@ -249,7 +243,7 @@ static long kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, pdd = kfd_bind_process_to_device(dev, p); if (IS_ERR(pdd)) { - err = PTR_ERR(pdd); + err = -ESRCH; goto err_bind_process; } @@ -262,33 +256,26 @@ static long kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, if (err != 0) goto err_create_queue; - args.queue_id = queue_id; + args->queue_id = queue_id; /* Return gpu_id as doorbell offset for mmap usage */ - args.doorbell_offset = args.gpu_id << PAGE_SHIFT; - - if (copy_to_user(arg, &args, sizeof(args))) { - err = -EFAULT; - goto err_copy_args_out; - } + args->doorbell_offset = args->gpu_id << PAGE_SHIFT; mutex_unlock(&p->mutex); - pr_debug("kfd: queue id %d was created successfully\n", args.queue_id); + pr_debug("kfd: queue id %d was created successfully\n", args->queue_id); pr_debug("ring buffer address == 0x%016llX\n", - args.ring_base_address); + args->ring_base_address); pr_debug("read ptr address == 0x%016llX\n", - args.read_pointer_address); + args->read_pointer_address); pr_debug("write ptr address == 0x%016llX\n", - args.write_pointer_address); + args->write_pointer_address); return 0; -err_copy_args_out: - pqm_destroy_queue(&p->pqm, queue_id); err_create_queue: err_bind_process: mutex_unlock(&p->mutex); @@ -296,99 +283,90 @@ err_bind_process: } static int kfd_ioctl_destroy_queue(struct file *filp, struct kfd_process *p, - void __user *arg) + void *data) { int retval; - struct kfd_ioctl_destroy_queue_args args; - - if (copy_from_user(&args, arg, sizeof(args))) - return -EFAULT; + struct kfd_ioctl_destroy_queue_args *args = data; pr_debug("kfd: destroying queue id %d for PASID %d\n", - args.queue_id, + args->queue_id, p->pasid); mutex_lock(&p->mutex); - retval = pqm_destroy_queue(&p->pqm, args.queue_id); + retval = pqm_destroy_queue(&p->pqm, args->queue_id); mutex_unlock(&p->mutex); return retval; } static int kfd_ioctl_update_queue(struct file *filp, struct kfd_process *p, - void __user *arg) + void *data) { int retval; - struct kfd_ioctl_update_queue_args args; + struct kfd_ioctl_update_queue_args *args = data; struct queue_properties properties; - if (copy_from_user(&args, arg, sizeof(args))) - return -EFAULT; - - if (args.queue_percentage > KFD_MAX_QUEUE_PERCENTAGE) { + if (args->queue_percentage > KFD_MAX_QUEUE_PERCENTAGE) { pr_err("kfd: queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n"); return -EINVAL; } - if (args.queue_priority > KFD_MAX_QUEUE_PRIORITY) { + if (args->queue_priority > KFD_MAX_QUEUE_PRIORITY) { pr_err("kfd: queue priority must be between 0 to KFD_MAX_QUEUE_PRIORITY\n"); return -EINVAL; } - if ((args.ring_base_address) && + if ((args->ring_base_address) && (!access_ok(VERIFY_WRITE, - (const void __user *) args.ring_base_address, + (const void __user *) args->ring_base_address, sizeof(uint64_t)))) { pr_err("kfd: can't access ring base address\n"); return -EFAULT; } - if (!is_power_of_2(args.ring_size) && (args.ring_size != 0)) { + if (!is_power_of_2(args->ring_size) && (args->ring_size != 0)) { pr_err("kfd: ring size must be a power of 2 or 0\n"); return -EINVAL; } - properties.queue_address = args.ring_base_address; - properties.queue_size = args.ring_size; - properties.queue_percent = args.queue_percentage; - properties.priority = args.queue_priority; + properties.queue_address = args->ring_base_address; + properties.queue_size = args->ring_size; + properties.queue_percent = args->queue_percentage; + properties.priority = args->queue_priority; pr_debug("kfd: updating queue id %d for PASID %d\n", - args.queue_id, p->pasid); + args->queue_id, p->pasid); mutex_lock(&p->mutex); - retval = pqm_update_queue(&p->pqm, args.queue_id, &properties); + retval = pqm_update_queue(&p->pqm, args->queue_id, &properties); mutex_unlock(&p->mutex); return retval; } -static long kfd_ioctl_set_memory_policy(struct file *filep, - struct kfd_process *p, void __user *arg) +static int kfd_ioctl_set_memory_policy(struct file *filep, + struct kfd_process *p, void *data) { - struct kfd_ioctl_set_memory_policy_args args; + struct kfd_ioctl_set_memory_policy_args *args = data; struct kfd_dev *dev; int err = 0; struct kfd_process_device *pdd; enum cache_policy default_policy, alternate_policy; - if (copy_from_user(&args, arg, sizeof(args))) - return -EFAULT; - - if (args.default_policy != KFD_IOC_CACHE_POLICY_COHERENT - && args.default_policy != KFD_IOC_CACHE_POLICY_NONCOHERENT) { + if (args->default_policy != KFD_IOC_CACHE_POLICY_COHERENT + && args->default_policy != KFD_IOC_CACHE_POLICY_NONCOHERENT) { return -EINVAL; } - if (args.alternate_policy != KFD_IOC_CACHE_POLICY_COHERENT - && args.alternate_policy != KFD_IOC_CACHE_POLICY_NONCOHERENT) { + if (args->alternate_policy != KFD_IOC_CACHE_POLICY_COHERENT + && args->alternate_policy != KFD_IOC_CACHE_POLICY_NONCOHERENT) { return -EINVAL; } - dev = kfd_device_by_id(args.gpu_id); + dev = kfd_device_by_id(args->gpu_id); if (dev == NULL) return -EINVAL; @@ -396,23 +374,23 @@ static long kfd_ioctl_set_memory_policy(struct file *filep, pdd = kfd_bind_process_to_device(dev, p); if (IS_ERR(pdd)) { - err = PTR_ERR(pdd); + err = -ESRCH; goto out; } - default_policy = (args.default_policy == KFD_IOC_CACHE_POLICY_COHERENT) + default_policy = (args->default_policy == KFD_IOC_CACHE_POLICY_COHERENT) ? cache_policy_coherent : cache_policy_noncoherent; alternate_policy = - (args.alternate_policy == KFD_IOC_CACHE_POLICY_COHERENT) + (args->alternate_policy == KFD_IOC_CACHE_POLICY_COHERENT) ? cache_policy_coherent : cache_policy_noncoherent; if (!dev->dqm->set_cache_memory_policy(dev->dqm, &pdd->qpd, default_policy, alternate_policy, - (void __user *)args.alternate_aperture_base, - args.alternate_aperture_size)) + (void __user *)args->alternate_aperture_base, + args->alternate_aperture_size)) err = -EINVAL; out: @@ -421,53 +399,44 @@ out: return err; } -static long kfd_ioctl_get_clock_counters(struct file *filep, - struct kfd_process *p, void __user *arg) +static int kfd_ioctl_get_clock_counters(struct file *filep, + struct kfd_process *p, void *data) { - struct kfd_ioctl_get_clock_counters_args args; + struct kfd_ioctl_get_clock_counters_args *args = data; struct kfd_dev *dev; struct timespec time; - if (copy_from_user(&args, arg, sizeof(args))) - return -EFAULT; - - dev = kfd_device_by_id(args.gpu_id); + dev = kfd_device_by_id(args->gpu_id); if (dev == NULL) return -EINVAL; /* Reading GPU clock counter from KGD */ - args.gpu_clock_counter = kfd2kgd->get_gpu_clock_counter(dev->kgd); + args->gpu_clock_counter = kfd2kgd->get_gpu_clock_counter(dev->kgd); /* No access to rdtsc. Using raw monotonic time */ getrawmonotonic(&time); - args.cpu_clock_counter = (uint64_t)timespec_to_ns(&time); + args->cpu_clock_counter = (uint64_t)timespec_to_ns(&time); get_monotonic_boottime(&time); - args.system_clock_counter = (uint64_t)timespec_to_ns(&time); + args->system_clock_counter = (uint64_t)timespec_to_ns(&time); /* Since the counter is in nano-seconds we use 1GHz frequency */ - args.system_clock_freq = 1000000000; - - if (copy_to_user(arg, &args, sizeof(args))) - return -EFAULT; + args->system_clock_freq = 1000000000; return 0; } static int kfd_ioctl_get_process_apertures(struct file *filp, - struct kfd_process *p, void __user *arg) + struct kfd_process *p, void *data) { - struct kfd_ioctl_get_process_apertures_args args; + struct kfd_ioctl_get_process_apertures_args *args = data; struct kfd_process_device_apertures *pAperture; struct kfd_process_device *pdd; dev_dbg(kfd_device, "get apertures for PASID %d", p->pasid); - if (copy_from_user(&args, arg, sizeof(args))) - return -EFAULT; - - args.num_of_nodes = 0; + args->num_of_nodes = 0; mutex_lock(&p->mutex); @@ -476,7 +445,8 @@ static int kfd_ioctl_get_process_apertures(struct file *filp, /* Run over all pdd of the process */ pdd = kfd_get_first_process_device_data(p); do { - pAperture = &args.process_apertures[args.num_of_nodes]; + pAperture = + &args->process_apertures[args->num_of_nodes]; pAperture->gpu_id = pdd->dev->id; pAperture->lds_base = pdd->lds_base; pAperture->lds_limit = pdd->lds_limit; @@ -486,7 +456,7 @@ static int kfd_ioctl_get_process_apertures(struct file *filp, pAperture->scratch_limit = pdd->scratch_limit; dev_dbg(kfd_device, - "node id %u\n", args.num_of_nodes); + "node id %u\n", args->num_of_nodes); dev_dbg(kfd_device, "gpu id %u\n", pdd->dev->id); dev_dbg(kfd_device, @@ -502,23 +472,23 @@ static int kfd_ioctl_get_process_apertures(struct file *filp, dev_dbg(kfd_device, "scratch_limit %llX\n", pdd->scratch_limit); - args.num_of_nodes++; + args->num_of_nodes++; } while ((pdd = kfd_get_next_process_device_data(p, pdd)) != NULL && - (args.num_of_nodes < NUM_OF_SUPPORTED_GPUS)); + (args->num_of_nodes < NUM_OF_SUPPORTED_GPUS)); } mutex_unlock(&p->mutex); - if (copy_to_user(arg, &args, sizeof(args))) - return -EFAULT; - return 0; } static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) { struct kfd_process *process; - long err = -EINVAL; + char stack_kdata[128]; + char *kdata = NULL; + unsigned int usize, asize; + int retcode = -EINVAL; dev_dbg(kfd_device, "ioctl cmd 0x%x (#%d), arg 0x%lx\n", @@ -528,54 +498,84 @@ static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) if (IS_ERR(process)) return PTR_ERR(process); + if (cmd & (IOC_IN | IOC_OUT)) { + if (asize <= sizeof(stack_kdata)) { + kdata = stack_kdata; + } else { + kdata = kmalloc(asize, GFP_KERNEL); + if (!kdata) { + retcode = -ENOMEM; + goto err_i1; + } + } + if (asize > usize) + memset(kdata + usize, 0, asize - usize); + } + + if (cmd & IOC_IN) { + if (copy_from_user(kdata, (void __user *)arg, usize) != 0) { + retcode = -EFAULT; + goto err_i1; + } + } else if (cmd & IOC_OUT) { + memset(kdata, 0, usize); + } + + switch (cmd) { case KFD_IOC_GET_VERSION: - err = kfd_ioctl_get_version(filep, process, (void __user *)arg); + retcode = kfd_ioctl_get_version(filep, process, kdata); break; case KFD_IOC_CREATE_QUEUE: - err = kfd_ioctl_create_queue(filep, process, - (void __user *)arg); + retcode = kfd_ioctl_create_queue(filep, process, + kdata); break; case KFD_IOC_DESTROY_QUEUE: - err = kfd_ioctl_destroy_queue(filep, process, - (void __user *)arg); + retcode = kfd_ioctl_destroy_queue(filep, process, + kdata); break; case KFD_IOC_SET_MEMORY_POLICY: - err = kfd_ioctl_set_memory_policy(filep, process, - (void __user *)arg); + retcode = kfd_ioctl_set_memory_policy(filep, process, + kdata); break; case KFD_IOC_GET_CLOCK_COUNTERS: - err = kfd_ioctl_get_clock_counters(filep, process, - (void __user *)arg); + retcode = kfd_ioctl_get_clock_counters(filep, process, + kdata); break; case KFD_IOC_GET_PROCESS_APERTURES: - err = kfd_ioctl_get_process_apertures(filep, process, - (void __user *)arg); + retcode = kfd_ioctl_get_process_apertures(filep, process, + kdata); break; case KFD_IOC_UPDATE_QUEUE: - err = kfd_ioctl_update_queue(filep, process, - (void __user *)arg); + retcode = kfd_ioctl_update_queue(filep, process, + kdata); break; default: - dev_err(kfd_device, + dev_dbg(kfd_device, "unknown ioctl cmd 0x%x, arg 0x%lx)\n", cmd, arg); - err = -EINVAL; + retcode = -EINVAL; break; } - if (err < 0) - dev_err(kfd_device, - "ioctl error %ld for ioctl cmd 0x%x (#%d)\n", - err, cmd, _IOC_NR(cmd)); + if (cmd & IOC_OUT) + if (copy_to_user((void __user *)arg, kdata, usize) != 0) + retcode = -EFAULT; - return err; +err_i1: + if (kdata != stack_kdata) + kfree(kdata); + + if (retcode) + dev_dbg(kfd_device, "ret = %d\n", retcode); + + return retcode; } static int kfd_mmap(struct file *filp, struct vm_area_struct *vma) -- cgit v0.10.2 From b81c55db1053805866a242cd0bfbfb0c60c499b3 Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Mon, 29 Dec 2014 15:24:25 +0200 Subject: drm/amdkfd: reformat IOCTL definitions to drm-style MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch reformats the ioctl definitions in kfd_ioctl.h to be similar to the drm ioctls definition style. v2: Renamed KFD_COMMAND_(START|END) to AMDKFD_... Signed-off-by: Oded Gabbay Acked-by: Christian König diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 249f492..6fbde9e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -523,35 +523,36 @@ static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) switch (cmd) { - case KFD_IOC_GET_VERSION: + case AMDKFD_IOC_GET_VERSION: retcode = kfd_ioctl_get_version(filep, process, kdata); break; - case KFD_IOC_CREATE_QUEUE: + + case AMDKFD_IOC_CREATE_QUEUE: retcode = kfd_ioctl_create_queue(filep, process, kdata); break; - case KFD_IOC_DESTROY_QUEUE: + case AMDKFD_IOC_DESTROY_QUEUE: retcode = kfd_ioctl_destroy_queue(filep, process, kdata); break; - case KFD_IOC_SET_MEMORY_POLICY: + case AMDKFD_IOC_SET_MEMORY_POLICY: retcode = kfd_ioctl_set_memory_policy(filep, process, kdata); break; - case KFD_IOC_GET_CLOCK_COUNTERS: + case AMDKFD_IOC_GET_CLOCK_COUNTERS: retcode = kfd_ioctl_get_clock_counters(filep, process, kdata); break; - case KFD_IOC_GET_PROCESS_APERTURES: + case AMDKFD_IOC_GET_PROCESS_APERTURES: retcode = kfd_ioctl_get_process_apertures(filep, process, kdata); break; - case KFD_IOC_UPDATE_QUEUE: + case AMDKFD_IOC_UPDATE_QUEUE: retcode = kfd_ioctl_update_queue(filep, process, kdata); break; diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index 7acef41..af94f31 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -128,27 +128,34 @@ struct kfd_ioctl_get_process_apertures_args { uint32_t pad; }; -#define KFD_IOC_MAGIC 'K' +#define AMDKFD_IOCTL_BASE 'K' +#define AMDKFD_IO(nr) _IO(AMDKFD_IOCTL_BASE, nr) +#define AMDKFD_IOR(nr, type) _IOR(AMDKFD_IOCTL_BASE, nr, type) +#define AMDKFD_IOW(nr, type) _IOW(AMDKFD_IOCTL_BASE, nr, type) +#define AMDKFD_IOWR(nr, type) _IOWR(AMDKFD_IOCTL_BASE, nr, type) -#define KFD_IOC_GET_VERSION \ - _IOR(KFD_IOC_MAGIC, 1, struct kfd_ioctl_get_version_args) +#define AMDKFD_IOC_GET_VERSION \ + AMDKFD_IOR(0x01, struct kfd_ioctl_get_version_args) -#define KFD_IOC_CREATE_QUEUE \ - _IOWR(KFD_IOC_MAGIC, 2, struct kfd_ioctl_create_queue_args) +#define AMDKFD_IOC_CREATE_QUEUE \ + AMDKFD_IOWR(0x02, struct kfd_ioctl_create_queue_args) -#define KFD_IOC_DESTROY_QUEUE \ - _IOWR(KFD_IOC_MAGIC, 3, struct kfd_ioctl_destroy_queue_args) +#define AMDKFD_IOC_DESTROY_QUEUE \ + AMDKFD_IOWR(0x03, struct kfd_ioctl_destroy_queue_args) -#define KFD_IOC_SET_MEMORY_POLICY \ - _IOW(KFD_IOC_MAGIC, 4, struct kfd_ioctl_set_memory_policy_args) +#define AMDKFD_IOC_SET_MEMORY_POLICY \ + AMDKFD_IOW(0x04, struct kfd_ioctl_set_memory_policy_args) -#define KFD_IOC_GET_CLOCK_COUNTERS \ - _IOWR(KFD_IOC_MAGIC, 5, struct kfd_ioctl_get_clock_counters_args) +#define AMDKFD_IOC_GET_CLOCK_COUNTERS \ + AMDKFD_IOWR(0x05, struct kfd_ioctl_get_clock_counters_args) -#define KFD_IOC_GET_PROCESS_APERTURES \ - _IOR(KFD_IOC_MAGIC, 6, struct kfd_ioctl_get_process_apertures_args) +#define AMDKFD_IOC_GET_PROCESS_APERTURES \ + AMDKFD_IOR(0x06, struct kfd_ioctl_get_process_apertures_args) -#define KFD_IOC_UPDATE_QUEUE \ - _IOW(KFD_IOC_MAGIC, 7, struct kfd_ioctl_update_queue_args) +#define AMDKFD_IOC_UPDATE_QUEUE \ + AMDKFD_IOW(0x07, struct kfd_ioctl_update_queue_args) + +#define AMDKFD_COMMAND_START 0x01 +#define AMDKFD_COMMAND_END 0x08 #endif -- cgit v0.10.2 From 76baee6c733bfef30fcf86cbd121e336b839e408 Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Mon, 29 Dec 2014 14:20:05 +0200 Subject: drm/amdkfd: rewrite kfd_ioctl() according to drm_ioctl() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch changes kfd_ioctl() to be very similar to drm_ioctl(). The patch defines an array of amdkfd_ioctls, which maps IOCTL definition to the ioctl function. The kfd_ioctl() uses that mapping to call the appropriate ioctl function, through a function pointer. This patch also declares a new typedef for the ioctl function pointer. v2: Renamed KFD_COMMAND_(START|END) to AMDKFD_... Signed-off-by: Oded Gabbay Acked-by: Christian König diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 6fbde9e..fcfdf23 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -482,21 +482,79 @@ static int kfd_ioctl_get_process_apertures(struct file *filp, return 0; } +#define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \ + [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, .cmd_drv = 0, .name = #ioctl} + +/** Ioctl table */ +static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = { + AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_VERSION, + kfd_ioctl_get_version, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_CREATE_QUEUE, + kfd_ioctl_create_queue, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_DESTROY_QUEUE, + kfd_ioctl_destroy_queue, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_MEMORY_POLICY, + kfd_ioctl_set_memory_policy, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_CLOCK_COUNTERS, + kfd_ioctl_get_clock_counters, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_PROCESS_APERTURES, + kfd_ioctl_get_process_apertures, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_UPDATE_QUEUE, + kfd_ioctl_update_queue, 0), +}; + +#define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls) + static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) { struct kfd_process *process; + amdkfd_ioctl_t *func; + const struct amdkfd_ioctl_desc *ioctl = NULL; + unsigned int nr = _IOC_NR(cmd); char stack_kdata[128]; char *kdata = NULL; unsigned int usize, asize; int retcode = -EINVAL; - dev_dbg(kfd_device, - "ioctl cmd 0x%x (#%d), arg 0x%lx\n", - cmd, _IOC_NR(cmd), arg); + if (nr >= AMDKFD_CORE_IOCTL_COUNT) + goto err_i1; + + if ((nr >= AMDKFD_COMMAND_START) && (nr < AMDKFD_COMMAND_END)) { + u32 amdkfd_size; + + ioctl = &amdkfd_ioctls[nr]; + + amdkfd_size = _IOC_SIZE(ioctl->cmd); + usize = asize = _IOC_SIZE(cmd); + if (amdkfd_size > asize) + asize = amdkfd_size; + + cmd = ioctl->cmd; + } else + goto err_i1; + + dev_dbg(kfd_device, "ioctl cmd 0x%x (#%d), arg 0x%lx\n", cmd, nr, arg); process = kfd_get_process(current); - if (IS_ERR(process)) - return PTR_ERR(process); + if (IS_ERR(process)) { + dev_dbg(kfd_device, "no process\n"); + goto err_i1; + } + + /* Do not trust userspace, use our own definition */ + func = ioctl->func; + + if (unlikely(!func)) { + dev_dbg(kfd_device, "no function\n"); + retcode = -EINVAL; + goto err_i1; + } if (cmd & (IOC_IN | IOC_OUT)) { if (asize <= sizeof(stack_kdata)) { @@ -521,55 +579,17 @@ static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) memset(kdata, 0, usize); } - - switch (cmd) { - case AMDKFD_IOC_GET_VERSION: - retcode = kfd_ioctl_get_version(filep, process, kdata); - break; - - case AMDKFD_IOC_CREATE_QUEUE: - retcode = kfd_ioctl_create_queue(filep, process, - kdata); - break; - - case AMDKFD_IOC_DESTROY_QUEUE: - retcode = kfd_ioctl_destroy_queue(filep, process, - kdata); - break; - - case AMDKFD_IOC_SET_MEMORY_POLICY: - retcode = kfd_ioctl_set_memory_policy(filep, process, - kdata); - break; - - case AMDKFD_IOC_GET_CLOCK_COUNTERS: - retcode = kfd_ioctl_get_clock_counters(filep, process, - kdata); - break; - - case AMDKFD_IOC_GET_PROCESS_APERTURES: - retcode = kfd_ioctl_get_process_apertures(filep, process, - kdata); - break; - - case AMDKFD_IOC_UPDATE_QUEUE: - retcode = kfd_ioctl_update_queue(filep, process, - kdata); - break; - - default: - dev_dbg(kfd_device, - "unknown ioctl cmd 0x%x, arg 0x%lx)\n", - cmd, arg); - retcode = -EINVAL; - break; - } + retcode = func(filep, process, kdata); if (cmd & IOC_OUT) if (copy_to_user((void __user *)arg, kdata, usize) != 0) retcode = -EFAULT; err_i1: + if (!ioctl) + dev_dbg(kfd_device, "invalid ioctl: pid=%d, cmd=0x%02x, nr=0x%02x\n", + task_pid_nr(current), cmd, nr); + if (kdata != stack_kdata) kfree(kdata); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index f9fb81e3..a5edb29 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -463,6 +463,24 @@ struct kfd_process { bool is_32bit_user_mode; }; +/** + * Ioctl function type. + * + * \param filep pointer to file structure. + * \param p amdkfd process pointer. + * \param data pointer to arg that was copied from user. + */ +typedef int amdkfd_ioctl_t(struct file *filep, struct kfd_process *p, + void *data); + +struct amdkfd_ioctl_desc { + unsigned int cmd; + int flags; + amdkfd_ioctl_t *func; + unsigned int cmd_drv; + const char *name; +}; + void kfd_process_create_wq(void); void kfd_process_destroy_wq(void); struct kfd_process *kfd_create_process(const struct task_struct *); -- cgit v0.10.2 From a5e31255e02a0797259f210c3bb92c6326a98a9c Mon Sep 17 00:00:00 2001 From: hayeswang Date: Tue, 6 Jan 2015 17:41:58 +0800 Subject: r8152: support ndo_features_check Support ndo_features_check to avoid: - the transport offset is more than the hw limitation when using hw checksum. - the skb->len of a GSO packet is more than the limitation. Signed-off-by: Hayes Wang Signed-off-by: David S. Miller diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 2d1c77e..57ec23e 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -1897,6 +1897,22 @@ static void _rtl8152_set_rx_mode(struct net_device *netdev) netif_wake_queue(netdev); } +static netdev_features_t +rtl8152_features_check(struct sk_buff *skb, struct net_device *dev, + netdev_features_t features) +{ + u32 mss = skb_shinfo(skb)->gso_size; + int max_offset = mss ? GTTCPHO_MAX : TCPHO_MAX; + int offset = skb_transport_offset(skb); + + if ((mss || skb->ip_summed == CHECKSUM_PARTIAL) && offset > max_offset) + features &= ~(NETIF_F_ALL_CSUM | NETIF_F_GSO_MASK); + else if ((skb->len + sizeof(struct tx_desc)) > agg_buf_sz) + features &= ~NETIF_F_GSO_MASK; + + return features; +} + static netdev_tx_t rtl8152_start_xmit(struct sk_buff *skb, struct net_device *netdev) { @@ -3706,6 +3722,7 @@ static const struct net_device_ops rtl8152_netdev_ops = { .ndo_set_mac_address = rtl8152_set_mac_address, .ndo_change_mtu = rtl8152_change_mtu, .ndo_validate_addr = eth_validate_addr, + .ndo_features_check = rtl8152_features_check, }; static void r8152b_get_version(struct r8152 *tp) -- cgit v0.10.2 From 07ff890daeda31cf23173865edf50bcb03e100c3 Mon Sep 17 00:00:00 2001 From: "Palik, Imre" Date: Tue, 6 Jan 2015 16:44:44 +0100 Subject: xen-netback: fixing the propagation of the transmit shaper timeout Since e9ce7cb6b107 ("xen-netback: Factor queue-specific data into queue struct"), the transimt shaper timeout is always set to 0. The value the user sets via xenbus is never propagated to the transmit shaper. This patch fixes the issue. Cc: Anthony Liguori Signed-off-by: Imre Palik Acked-by: Ian Campbell Signed-off-by: David S. Miller diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c index efbaf2a..794204e 100644 --- a/drivers/net/xen-netback/xenbus.c +++ b/drivers/net/xen-netback/xenbus.c @@ -737,6 +737,7 @@ static void connect(struct backend_info *be) } queue->remaining_credit = credit_bytes; + queue->credit_usec = credit_usec; err = connect_rings(be, queue); if (err) { -- cgit v0.10.2 From cd805f367633f6540aa287751c270973b7e4cdc9 Mon Sep 17 00:00:00 2001 From: David Drysdale Date: Tue, 6 Jan 2015 08:23:56 +0000 Subject: selftests/exec: allow shell return code of 126 When the shell fails to invoke a script because its path name is too long (ENAMETOOLONG), most shells return 127 to indicate command not found. However, some systems report 126 (which POSIX suggests should indicate a non-executable file) for this case, so allow that too. Reported-by: Geert Uytterhoeven Signed-off-by: David Drysdale Tested-by: Geert Uytterhoeven Signed-off-by: Shuah Khan diff --git a/tools/testing/selftests/exec/execveat.c b/tools/testing/selftests/exec/execveat.c index d273624..e238c95 100644 --- a/tools/testing/selftests/exec/execveat.c +++ b/tools/testing/selftests/exec/execveat.c @@ -62,7 +62,7 @@ static int _check_execveat_fail(int fd, const char *path, int flags, } static int check_execveat_invoked_rc(int fd, const char *path, int flags, - int expected_rc) + int expected_rc, int expected_rc2) { int status; int rc; @@ -98,9 +98,10 @@ static int check_execveat_invoked_rc(int fd, const char *path, int flags, child, status); return 1; } - if (WEXITSTATUS(status) != expected_rc) { - printf("[FAIL] (child %d exited with %d not %d)\n", - child, WEXITSTATUS(status), expected_rc); + if ((WEXITSTATUS(status) != expected_rc) && + (WEXITSTATUS(status) != expected_rc2)) { + printf("[FAIL] (child %d exited with %d not %d nor %d)\n", + child, WEXITSTATUS(status), expected_rc, expected_rc2); return 1; } printf("[OK]\n"); @@ -109,7 +110,7 @@ static int check_execveat_invoked_rc(int fd, const char *path, int flags, static int check_execveat(int fd, const char *path, int flags) { - return check_execveat_invoked_rc(fd, path, flags, 99); + return check_execveat_invoked_rc(fd, path, flags, 99, 99); } static char *concat(const char *left, const char *right) @@ -192,9 +193,15 @@ static int check_execveat_pathmax(int dot_dfd, const char *src, int is_script) * Execute as a long pathname relative to ".". If this is a script, * the interpreter will launch but fail to open the script because its * name ("/dev/fd/5/xxx....") is bigger than PATH_MAX. + * + * The failure code is usually 127 (POSIX: "If a command is not found, + * the exit status shall be 127."), but some systems give 126 (POSIX: + * "If the command name is found, but it is not an executable utility, + * the exit status shall be 126."), so allow either. */ if (is_script) - fail += check_execveat_invoked_rc(dot_dfd, longpath, 0, 127); + fail += check_execveat_invoked_rc(dot_dfd, longpath, 0, + 127, 126); else fail += check_execveat(dot_dfd, longpath, 0); -- cgit v0.10.2 From 13e634de0955a2da84a7bf8584d103805b455cbb Mon Sep 17 00:00:00 2001 From: dann frazier Date: Mon, 5 Jan 2015 19:54:40 -0700 Subject: tools: testing: selftests: mq_perf_tests: Fix infinite loop on ARM We can't use a char type to check for a negative return value since char isn't guaranteed to be signed. Indeed, the char type tends to be unsigned on ARM. Signed-off-by: dann frazier Signed-off-by: Shuah Khan diff --git a/tools/testing/selftests/mqueue/mq_perf_tests.c b/tools/testing/selftests/mqueue/mq_perf_tests.c index 94dae65..8519e9e 100644 --- a/tools/testing/selftests/mqueue/mq_perf_tests.c +++ b/tools/testing/selftests/mqueue/mq_perf_tests.c @@ -536,10 +536,9 @@ int main(int argc, char *argv[]) { struct mq_attr attr; char *option, *next_option; - int i, cpu; + int i, cpu, rc; struct sigaction sa; poptContext popt_context; - char rc; void *retval; main_thread = pthread_self(); -- cgit v0.10.2 From fee7e49d45149fba60156f5b59014f764d3e3728 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 6 Jan 2015 13:00:05 -0800 Subject: mm: propagate error from stack expansion even for guard page Jay Foad reports that the address sanitizer test (asan) sometimes gets confused by a stack pointer that ends up being outside the stack vma that is reported by /proc/maps. This happens due to an interaction between RLIMIT_STACK and the guard page: when we do the guard page check, we ignore the potential error from the stack expansion, which effectively results in a missing guard page, since the expected stack expansion won't have been done. And since /proc/maps explicitly ignores the guard page (commit d7824370e263: "mm: fix up some user-visible effects of the stack guard page"), the stack pointer ends up being outside the reported stack area. This is the minimal patch: it just propagates the error. It also effectively makes the guard page part of the stack limit, which in turn measn that the actual real stack is one page less than the stack limit. Let's see if anybody notices. We could teach acct_stack_growth() to allow an extra page for a grow-up/grow-down stack in the rlimit test, but I don't want to add more complexity if it isn't needed. Reported-and-tested-by: Jay Foad Cc: stable@vger.kernel.org Signed-off-by: Linus Torvalds diff --git a/include/linux/mm.h b/include/linux/mm.h index f80d019..80fc92a 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1952,7 +1952,7 @@ extern int expand_downwards(struct vm_area_struct *vma, #if VM_GROWSUP extern int expand_upwards(struct vm_area_struct *vma, unsigned long address); #else - #define expand_upwards(vma, address) do { } while (0) + #define expand_upwards(vma, address) (0) #endif /* Look up the first VMA which satisfies addr < vm_end, NULL if none. */ diff --git a/mm/memory.c b/mm/memory.c index ca920d1..d7e497e 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2593,7 +2593,7 @@ static inline int check_stack_guard_page(struct vm_area_struct *vma, unsigned lo if (prev && prev->vm_end == address) return prev->vm_flags & VM_GROWSDOWN ? 0 : -ENOMEM; - expand_downwards(vma, address - PAGE_SIZE); + return expand_downwards(vma, address - PAGE_SIZE); } if ((vma->vm_flags & VM_GROWSUP) && address + PAGE_SIZE == vma->vm_end) { struct vm_area_struct *next = vma->vm_next; @@ -2602,7 +2602,7 @@ static inline int check_stack_guard_page(struct vm_area_struct *vma, unsigned lo if (next && next->vm_start == address + PAGE_SIZE) return next->vm_flags & VM_GROWSUP ? 0 : -ENOMEM; - expand_upwards(vma, address + PAGE_SIZE); + return expand_upwards(vma, address + PAGE_SIZE); } return 0; } -- cgit v0.10.2 From 46243860806bdc2756f3ce8ac86b4d7c616bcd6c Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Sun, 21 Dec 2014 10:42:08 -0800 Subject: vhost-scsi: Add missing virtio-scsi -> TCM attribute conversion While looking at hch's recent conversion to drop the MSG_*_TAG definitions, I noticed a long standing bug in vhost-scsi where the VIRTIO_SCSI_S_* attribute definitions where incorrectly being passed directly into target_submit_cmd_map_sgls(). This patch adds the missing virtio-scsi to TCM/SAM task attribute conversion. Cc: Christoph Hellwig Cc: Michael S. Tsirkin Cc: Paolo Bonzini Cc: stable@vger.kernel.org # 3.5 Signed-off-by: Nicholas Bellinger diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index 01c01cb..d695b16 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -911,6 +911,23 @@ vhost_scsi_map_iov_to_prot(struct tcm_vhost_cmd *cmd, return 0; } +static int vhost_scsi_to_tcm_attr(int attr) +{ + switch (attr) { + case VIRTIO_SCSI_S_SIMPLE: + return TCM_SIMPLE_TAG; + case VIRTIO_SCSI_S_ORDERED: + return TCM_ORDERED_TAG; + case VIRTIO_SCSI_S_HEAD: + return TCM_HEAD_TAG; + case VIRTIO_SCSI_S_ACA: + return TCM_ACA_TAG; + default: + break; + } + return TCM_SIMPLE_TAG; +} + static void tcm_vhost_submission_work(struct work_struct *work) { struct tcm_vhost_cmd *cmd = @@ -936,9 +953,10 @@ static void tcm_vhost_submission_work(struct work_struct *work) rc = target_submit_cmd_map_sgls(se_cmd, tv_nexus->tvn_se_sess, cmd->tvc_cdb, &cmd->tvc_sense_buf[0], cmd->tvc_lun, cmd->tvc_exp_data_len, - cmd->tvc_task_attr, cmd->tvc_data_direction, - TARGET_SCF_ACK_KREF, sg_ptr, cmd->tvc_sgl_count, - NULL, 0, sg_prot_ptr, cmd->tvc_prot_sgl_count); + vhost_scsi_to_tcm_attr(cmd->tvc_task_attr), + cmd->tvc_data_direction, TARGET_SCF_ACK_KREF, + sg_ptr, cmd->tvc_sgl_count, NULL, 0, sg_prot_ptr, + cmd->tvc_prot_sgl_count); if (rc < 0) { transport_send_check_condition_and_sense(se_cmd, TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE, 0); -- cgit v0.10.2 From 67e51daa5029417db86f1833f1b7b2212c454fe9 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Tue, 23 Dec 2014 00:32:21 -0800 Subject: Documentation/target: Update fabric_ops to latest code This patch updates tcm_mod_builder.py to add/drop a handful of struct target_core_fabric_ops functions to sync up with the latest requirements to function in target_fabric_tf_ops_check(). Signed-off-by: Nicholas Bellinger diff --git a/Documentation/target/tcm_mod_builder.py b/Documentation/target/tcm_mod_builder.py index 230ce71..2b47704 100755 --- a/Documentation/target/tcm_mod_builder.py +++ b/Documentation/target/tcm_mod_builder.py @@ -389,9 +389,6 @@ def tcm_mod_build_configfs(proto_ident, fabric_mod_dir_var, fabric_mod_name): buf += " .release_cmd = " + fabric_mod_name + "_release_cmd,\n" buf += " .shutdown_session = " + fabric_mod_name + "_shutdown_session,\n" buf += " .close_session = " + fabric_mod_name + "_close_session,\n" - buf += " .stop_session = " + fabric_mod_name + "_stop_session,\n" - buf += " .fall_back_to_erl0 = " + fabric_mod_name + "_reset_nexus,\n" - buf += " .sess_logged_in = " + fabric_mod_name + "_sess_logged_in,\n" buf += " .sess_get_index = " + fabric_mod_name + "_sess_get_index,\n" buf += " .sess_get_initiator_sid = NULL,\n" buf += " .write_pending = " + fabric_mod_name + "_write_pending,\n" @@ -402,7 +399,7 @@ def tcm_mod_build_configfs(proto_ident, fabric_mod_dir_var, fabric_mod_name): buf += " .queue_data_in = " + fabric_mod_name + "_queue_data_in,\n" buf += " .queue_status = " + fabric_mod_name + "_queue_status,\n" buf += " .queue_tm_rsp = " + fabric_mod_name + "_queue_tm_rsp,\n" - buf += " .is_state_remove = " + fabric_mod_name + "_is_state_remove,\n" + buf += " .aborted_task = " + fabric_mod_name + "_aborted_task,\n" buf += " /*\n" buf += " * Setup function pointers for generic logic in target_core_fabric_configfs.c\n" buf += " */\n" @@ -428,7 +425,7 @@ def tcm_mod_build_configfs(proto_ident, fabric_mod_dir_var, fabric_mod_name): buf += " /*\n" buf += " * Register the top level struct config_item_type with TCM core\n" buf += " */\n" - buf += " fabric = target_fabric_configfs_init(THIS_MODULE, \"" + fabric_mod_name[4:] + "\");\n" + buf += " fabric = target_fabric_configfs_init(THIS_MODULE, \"" + fabric_mod_name + "\");\n" buf += " if (IS_ERR(fabric)) {\n" buf += " printk(KERN_ERR \"target_fabric_configfs_init() failed\\n\");\n" buf += " return PTR_ERR(fabric);\n" @@ -595,7 +592,7 @@ def tcm_mod_dump_fabric_ops(proto_ident, fabric_mod_dir_var, fabric_mod_name): if re.search('get_fabric_name', fo): buf += "char *" + fabric_mod_name + "_get_fabric_name(void)\n" buf += "{\n" - buf += " return \"" + fabric_mod_name[4:] + "\";\n" + buf += " return \"" + fabric_mod_name + "\";\n" buf += "}\n\n" bufi += "char *" + fabric_mod_name + "_get_fabric_name(void);\n" continue @@ -820,27 +817,6 @@ def tcm_mod_dump_fabric_ops(proto_ident, fabric_mod_dir_var, fabric_mod_name): buf += "}\n\n" bufi += "void " + fabric_mod_name + "_close_session(struct se_session *);\n" - if re.search('stop_session\)\(', fo): - buf += "void " + fabric_mod_name + "_stop_session(struct se_session *se_sess, int sess_sleep , int conn_sleep)\n" - buf += "{\n" - buf += " return;\n" - buf += "}\n\n" - bufi += "void " + fabric_mod_name + "_stop_session(struct se_session *, int, int);\n" - - if re.search('fall_back_to_erl0\)\(', fo): - buf += "void " + fabric_mod_name + "_reset_nexus(struct se_session *se_sess)\n" - buf += "{\n" - buf += " return;\n" - buf += "}\n\n" - bufi += "void " + fabric_mod_name + "_reset_nexus(struct se_session *);\n" - - if re.search('sess_logged_in\)\(', fo): - buf += "int " + fabric_mod_name + "_sess_logged_in(struct se_session *se_sess)\n" - buf += "{\n" - buf += " return 0;\n" - buf += "}\n\n" - bufi += "int " + fabric_mod_name + "_sess_logged_in(struct se_session *);\n" - if re.search('sess_get_index\)\(', fo): buf += "u32 " + fabric_mod_name + "_sess_get_index(struct se_session *se_sess)\n" buf += "{\n" @@ -898,19 +874,18 @@ def tcm_mod_dump_fabric_ops(proto_ident, fabric_mod_dir_var, fabric_mod_name): bufi += "int " + fabric_mod_name + "_queue_status(struct se_cmd *);\n" if re.search('queue_tm_rsp\)\(', fo): - buf += "int " + fabric_mod_name + "_queue_tm_rsp(struct se_cmd *se_cmd)\n" + buf += "void " + fabric_mod_name + "_queue_tm_rsp(struct se_cmd *se_cmd)\n" buf += "{\n" - buf += " return 0;\n" + buf += " return;\n" buf += "}\n\n" - bufi += "int " + fabric_mod_name + "_queue_tm_rsp(struct se_cmd *);\n" + bufi += "void " + fabric_mod_name + "_queue_tm_rsp(struct se_cmd *);\n" - if re.search('is_state_remove\)\(', fo): - buf += "int " + fabric_mod_name + "_is_state_remove(struct se_cmd *se_cmd)\n" + if re.search('aborted_task\)\(', fo): + buf += "void " + fabric_mod_name + "_aborted_task(struct se_cmd *se_cmd)\n" buf += "{\n" - buf += " return 0;\n" + buf += " return;\n" buf += "}\n\n" - bufi += "int " + fabric_mod_name + "_is_state_remove(struct se_cmd *);\n" - + bufi += "void " + fabric_mod_name + "_aborted_task(struct se_cmd *);\n" ret = p.write(buf) if ret: @@ -1018,11 +993,11 @@ def main(modname, proto_ident): tcm_mod_build_kbuild(fabric_mod_dir, fabric_mod_name) tcm_mod_build_kconfig(fabric_mod_dir, fabric_mod_name) - input = raw_input("Would you like to add " + fabric_mod_name + "to drivers/target/Makefile..? [yes,no]: ") + input = raw_input("Would you like to add " + fabric_mod_name + " to drivers/target/Makefile..? [yes,no]: ") if input == "yes" or input == "y": tcm_mod_add_kbuild(tcm_dir, fabric_mod_name) - input = raw_input("Would you like to add " + fabric_mod_name + "to drivers/target/Kconfig..? [yes,no]: ") + input = raw_input("Would you like to add " + fabric_mod_name + " to drivers/target/Kconfig..? [yes,no]: ") if input == "yes" or input == "y": tcm_mod_add_kconfig(tcm_dir, fabric_mod_name) -- cgit v0.10.2 From 6a3ef10bacb08860805e9053f919786dc34760ba Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 5 Jan 2015 08:57:04 +0100 Subject: ACPI / video: Add disable_native_backlight quirk for Dell XPS15 L521X The L521X variant of the Dell XPS15 has integrated nvidia graphics, and backlight control does not work properly when using the native interfaces. Link: https://bugzilla.redhat.com/show_bug.cgi?id=1163574 Signed-off-by: Hans de Goede Signed-off-by: Rafael J. Wysocki diff --git a/drivers/acpi/video.c b/drivers/acpi/video.c index c72e79d2c5..032db45 100644 --- a/drivers/acpi/video.c +++ b/drivers/acpi/video.c @@ -522,6 +522,16 @@ static struct dmi_system_id video_dmi_table[] __initdata = { DMI_MATCH(DMI_PRODUCT_NAME, "370R4E/370R4V/370R5E/3570RE/370R5V"), }, }, + + { + /* https://bugzilla.redhat.com/show_bug.cgi?id=1163574 */ + .callback = video_disable_native_backlight, + .ident = "Dell XPS15 L521X", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "XPS L521X"), + }, + }, {} }; -- cgit v0.10.2 From 2abad79afa700e837cb4feed170141292e0720c0 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 6 Jan 2015 23:17:53 +0200 Subject: qla3xxx: don't allow never end busy loop The counter variable wasn't increased at all which may stuck under certain circumstances. Signed-off-by: Andy Shevchenko Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/qlogic/qla3xxx.c b/drivers/net/ethernet/qlogic/qla3xxx.c index c2f09af..4847713 100644 --- a/drivers/net/ethernet/qlogic/qla3xxx.c +++ b/drivers/net/ethernet/qlogic/qla3xxx.c @@ -146,10 +146,7 @@ static int ql_wait_for_drvr_lock(struct ql3_adapter *qdev) { int i = 0; - while (i < 10) { - if (i) - ssleep(1); - + do { if (ql_sem_lock(qdev, QL_DRVR_SEM_MASK, (QL_RESOURCE_BITS_BASE_CODE | (qdev->mac_index) @@ -158,7 +155,8 @@ static int ql_wait_for_drvr_lock(struct ql3_adapter *qdev) "driver lock acquired\n"); return 1; } - } + ssleep(1); + } while (++i < 10); netdev_err(qdev->ndev, "Timed out waiting for driver lock...\n"); return 0; -- cgit v0.10.2 From 60834b73a9c2bbc2f514122ddc626f3350fb40cd Mon Sep 17 00:00:00 2001 From: Aaron Plattner Date: Tue, 6 Jan 2015 13:40:14 -0800 Subject: ALSA: hda - Add new GPU codec ID 0x10de0072 to snd-hda Vendor ID 0x10de0072 is used by a yet-to-be-named GPU chip. Signed-off-by: Aaron Plattner Cc: Signed-off-by: Takashi Iwai diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index 5f13d2d..b422e40 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -3353,6 +3353,7 @@ static const struct hda_codec_preset snd_hda_preset_hdmi[] = { { .id = 0x10de0067, .name = "MCP67 HDMI", .patch = patch_nvhdmi_2ch }, { .id = 0x10de0070, .name = "GPU 70 HDMI/DP", .patch = patch_nvhdmi }, { .id = 0x10de0071, .name = "GPU 71 HDMI/DP", .patch = patch_nvhdmi }, +{ .id = 0x10de0072, .name = "GPU 72 HDMI/DP", .patch = patch_nvhdmi }, { .id = 0x10de8001, .name = "MCP73 HDMI", .patch = patch_nvhdmi_2ch }, { .id = 0x11069f80, .name = "VX900 HDMI/DP", .patch = patch_via_hdmi }, { .id = 0x11069f81, .name = "VX900 HDMI/DP", .patch = patch_via_hdmi }, @@ -3413,6 +3414,7 @@ MODULE_ALIAS("snd-hda-codec-id:10de0060"); MODULE_ALIAS("snd-hda-codec-id:10de0067"); MODULE_ALIAS("snd-hda-codec-id:10de0070"); MODULE_ALIAS("snd-hda-codec-id:10de0071"); +MODULE_ALIAS("snd-hda-codec-id:10de0072"); MODULE_ALIAS("snd-hda-codec-id:10de8001"); MODULE_ALIAS("snd-hda-codec-id:11069f80"); MODULE_ALIAS("snd-hda-codec-id:11069f81"); -- cgit v0.10.2 From 0acb0e712b57b4bcabc5fdcb3e445cfce0f80340 Mon Sep 17 00:00:00 2001 From: Octavian Purdila Date: Tue, 16 Dec 2014 17:57:12 +0200 Subject: gpio: dln2: fix issue when an IRQ is unmasked then enabled As noticed during suspend/resume operations, the IRQ can be unmasked then disabled in suspend and eventually enabled in resume, but without being unmasked. The current implementation does not take into account interactions between mask/unmask and enable/disable interrupts, and thus in the above scenarios the IRQs remain unactive. To fix this we removed the enable/disable operations as they fallback to mask/unmask anyway. We also remove the pending bitmaks as it is already done in irq_data (i.e. IRQS_PENDING). Signed-off-by: Octavian Purdila Acked-by: Alexandre Courbot Signed-off-by: Linus Walleij diff --git a/drivers/gpio/gpio-dln2.c b/drivers/gpio/gpio-dln2.c index 978b51e..28a62c4 100644 --- a/drivers/gpio/gpio-dln2.c +++ b/drivers/gpio/gpio-dln2.c @@ -64,9 +64,8 @@ struct dln2_gpio { */ DECLARE_BITMAP(output_enabled, DLN2_GPIO_MAX_PINS); - DECLARE_BITMAP(irqs_masked, DLN2_GPIO_MAX_PINS); - DECLARE_BITMAP(irqs_enabled, DLN2_GPIO_MAX_PINS); - DECLARE_BITMAP(irqs_pending, DLN2_GPIO_MAX_PINS); + /* active IRQs - not synced to hardware */ + DECLARE_BITMAP(unmasked_irqs, DLN2_GPIO_MAX_PINS); struct dln2_irq_work *irq_work; }; @@ -303,29 +302,19 @@ static void dln2_irq_work(struct work_struct *w) struct dln2_gpio *dln2 = iw->dln2; u8 type = iw->type & DLN2_GPIO_EVENT_MASK; - if (test_bit(iw->pin, dln2->irqs_enabled)) + if (test_bit(iw->pin, dln2->unmasked_irqs)) dln2_gpio_set_event_cfg(dln2, iw->pin, type, 0); else dln2_gpio_set_event_cfg(dln2, iw->pin, DLN2_GPIO_EVENT_NONE, 0); } -static void dln2_irq_enable(struct irq_data *irqd) -{ - struct gpio_chip *gc = irq_data_get_irq_chip_data(irqd); - struct dln2_gpio *dln2 = container_of(gc, struct dln2_gpio, gpio); - int pin = irqd_to_hwirq(irqd); - - set_bit(pin, dln2->irqs_enabled); - schedule_work(&dln2->irq_work[pin].work); -} - -static void dln2_irq_disable(struct irq_data *irqd) +static void dln2_irq_unmask(struct irq_data *irqd) { struct gpio_chip *gc = irq_data_get_irq_chip_data(irqd); struct dln2_gpio *dln2 = container_of(gc, struct dln2_gpio, gpio); int pin = irqd_to_hwirq(irqd); - clear_bit(pin, dln2->irqs_enabled); + set_bit(pin, dln2->unmasked_irqs); schedule_work(&dln2->irq_work[pin].work); } @@ -335,27 +324,8 @@ static void dln2_irq_mask(struct irq_data *irqd) struct dln2_gpio *dln2 = container_of(gc, struct dln2_gpio, gpio); int pin = irqd_to_hwirq(irqd); - set_bit(pin, dln2->irqs_masked); -} - -static void dln2_irq_unmask(struct irq_data *irqd) -{ - struct gpio_chip *gc = irq_data_get_irq_chip_data(irqd); - struct dln2_gpio *dln2 = container_of(gc, struct dln2_gpio, gpio); - struct device *dev = dln2->gpio.dev; - int pin = irqd_to_hwirq(irqd); - - if (test_and_clear_bit(pin, dln2->irqs_pending)) { - int irq; - - irq = irq_find_mapping(dln2->gpio.irqdomain, pin); - if (!irq) { - dev_err(dev, "pin %d not mapped to IRQ\n", pin); - return; - } - - generic_handle_irq(irq); - } + clear_bit(pin, dln2->unmasked_irqs); + schedule_work(&dln2->irq_work[pin].work); } static int dln2_irq_set_type(struct irq_data *irqd, unsigned type) @@ -389,8 +359,6 @@ static int dln2_irq_set_type(struct irq_data *irqd, unsigned type) static struct irq_chip dln2_gpio_irqchip = { .name = "dln2-irq", - .irq_enable = dln2_irq_enable, - .irq_disable = dln2_irq_disable, .irq_mask = dln2_irq_mask, .irq_unmask = dln2_irq_unmask, .irq_set_type = dln2_irq_set_type, @@ -425,13 +393,6 @@ static void dln2_gpio_event(struct platform_device *pdev, u16 echo, return; } - if (!test_bit(pin, dln2->irqs_enabled)) - return; - if (test_bit(pin, dln2->irqs_masked)) { - set_bit(pin, dln2->irqs_pending); - return; - } - switch (dln2->irq_work[pin].type) { case DLN2_GPIO_EVENT_CHANGE_RISING: if (event->value) -- cgit v0.10.2 From 5afb287a06ce869827563cbd2b0a035800dd6f5d Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Wed, 17 Dec 2014 17:47:14 +0800 Subject: gpio: dln2: Fix gpio output value in dln2_gpio_direction_output() dln2_gpio_direction_output() ignored the state passed into it. Fix it. Also make dln2_gpio_pin_set_out_val return int, so we can check the error value. Signed-off-by: Axel Lin Tested-by: Daniel Baluta Acked-by: Alexandre Courbot Reviewed-by: Octavian Purdila Signed-off-by: Linus Walleij diff --git a/drivers/gpio/gpio-dln2.c b/drivers/gpio/gpio-dln2.c index 28a62c4..2ff4619 100644 --- a/drivers/gpio/gpio-dln2.c +++ b/drivers/gpio/gpio-dln2.c @@ -140,16 +140,16 @@ static int dln2_gpio_pin_get_out_val(struct dln2_gpio *dln2, unsigned int pin) return !!ret; } -static void dln2_gpio_pin_set_out_val(struct dln2_gpio *dln2, - unsigned int pin, int value) +static int dln2_gpio_pin_set_out_val(struct dln2_gpio *dln2, + unsigned int pin, int value) { struct dln2_gpio_pin_val req = { .pin = cpu_to_le16(pin), .value = value, }; - dln2_transfer_tx(dln2->pdev, DLN2_GPIO_PIN_SET_OUT_VAL, &req, - sizeof(req)); + return dln2_transfer_tx(dln2->pdev, DLN2_GPIO_PIN_SET_OUT_VAL, &req, + sizeof(req)); } #define DLN2_GPIO_DIRECTION_IN 0 @@ -266,6 +266,13 @@ static int dln2_gpio_direction_input(struct gpio_chip *chip, unsigned offset) static int dln2_gpio_direction_output(struct gpio_chip *chip, unsigned offset, int value) { + struct dln2_gpio *dln2 = container_of(chip, struct dln2_gpio, gpio); + int ret; + + ret = dln2_gpio_pin_set_out_val(dln2, offset, value); + if (ret < 0) + return ret; + return dln2_gpio_set_direction(chip, offset, DLN2_GPIO_DIRECTION_OUT); } -- cgit v0.10.2 From 96d16c3088a924a44175d2923cc0eafa0d8bbecc Mon Sep 17 00:00:00 2001 From: Francesco VIRLINZI Date: Mon, 5 Jan 2015 11:04:13 +0100 Subject: pinctrl: st: avoid multiple mutex lock Using the sysfs inteface to inspect the pins configuration the system can walk around a path which acquires the same mutex twice. On STiH407 platform, for example : cat /sys/kernel/debug/pinctrl/920f080.pin-controller-front0/pinconf-pins hangs the kernel and never returns. With this patch the mutex is temporary freed. Signed-off-by: Francesco Virlinzi Signed-off-by: Patrice Chotard Signed-off-by: Linus Walleij diff --git a/drivers/pinctrl/pinctrl-st.c b/drivers/pinctrl/pinctrl-st.c index 7c9d513..87570e6 100644 --- a/drivers/pinctrl/pinctrl-st.c +++ b/drivers/pinctrl/pinctrl-st.c @@ -1012,8 +1012,10 @@ static void st_pinconf_dbg_show(struct pinctrl_dev *pctldev, struct seq_file *s, unsigned pin_id) { unsigned long config; - st_pinconf_get(pctldev, pin_id, &config); + mutex_unlock(&pctldev->mutex); + st_pinconf_get(pctldev, pin_id, &config); + mutex_lock(&pctldev->mutex); seq_printf(s, "[OE:%ld,PU:%ld,OD:%ld]\n" "\t\t[retime:%ld,invclk:%ld,clknotdat:%ld," "de:%ld,rt-clk:%ld,rt-delay:%ld]", -- cgit v0.10.2 From fce7fcc785998d91a4e2293ec6c0fa7a4677999b Mon Sep 17 00:00:00 2001 From: Patrice CHOTARD Date: Mon, 5 Jan 2015 11:04:14 +0100 Subject: pinctrl: st: Add irq_disable hook to st_gpio_irqchip Currently disable_irq() doesn't work for pinctrl-st driver, due to missing irq_disable hook in the driver. disable_irq() is required only for level-triggered interrupts, which is not the case normally. Signed-off-by: Pankaj Dev Signed-off-by: Patrice Chotard Signed-off-by: Linus Walleij diff --git a/drivers/pinctrl/pinctrl-st.c b/drivers/pinctrl/pinctrl-st.c index 87570e6..9e5ec00 100644 --- a/drivers/pinctrl/pinctrl-st.c +++ b/drivers/pinctrl/pinctrl-st.c @@ -1445,6 +1445,7 @@ static struct gpio_chip st_gpio_template = { static struct irq_chip st_gpio_irqchip = { .name = "GPIO", + .irq_disable = st_gpio_irq_mask, .irq_mask = st_gpio_irq_mask, .irq_unmask = st_gpio_irq_unmask, .irq_set_type = st_gpio_irq_set_type, -- cgit v0.10.2 From 99975cc6ada0d5f2675e83abecae05aba5f437d2 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 7 Jan 2015 10:51:00 +0200 Subject: vhost/net: length miscalculation commit 8b38694a2dc8b18374310df50174f1e4376d6824 vhost/net: virtio 1.0 byte swap had this chunk: - heads[headcount - 1].len += datalen; + heads[headcount - 1].len = cpu_to_vhost32(vq, len - datalen); This adds datalen with the wrong sign, causing guest panics. Fixes: 8b38694a2dc8b18374310df50174f1e4376d6824 Reported-by: Alex Williamson Suggested-by: Greg Kurz Signed-off-by: Michael S. Tsirkin diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 14419a8..d415d69 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -538,7 +538,7 @@ static int get_rx_bufs(struct vhost_virtqueue *vq, ++headcount; seg += in; } - heads[headcount - 1].len = cpu_to_vhost32(vq, len - datalen); + heads[headcount - 1].len = cpu_to_vhost32(vq, len + datalen); *iovcount = seg; if (unlikely(log)) *log_num = nlogs; -- cgit v0.10.2 From 0f9132ceab112ab75d34865f5aeddf0bac234896 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 5 Jan 2015 12:24:47 +0000 Subject: arm64: Correct __NR_compat_syscalls for bpf Commit 97b56be10352a70c (arm64: compat: Enable bpf syscall) made the usual mistake of forgetting to update __NR_compat_syscalls. Due to this, when el0_sync_compat calls el0_svc_naked, the test against sc_nr (__NR_compat_syscalls) will fail, and we'll call ni_sys, returning -ENOSYS to userspace. This patch bumps __NR_compat_syscalls appropriately, enabling the use of the bpf syscall from compat tasks. Due to the reorganisation of unistd{,32}.h as part of commit f3e5c847ec3d12b4 (arm64: Add __NR_* definitions for compat syscalls) it is not currently possible to include both headers and sanity-check the value of __NR_compat_syscalls at build-time to prevent this from happening again. Additional rework is required to make such niceties a possibility. Cc: Will Deacon Acked-by: Catalin Marinas Signed-off-by: Mark Rutland Signed-off-by: Will Deacon diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h index 49c9aef..b780c6c 100644 --- a/arch/arm64/include/asm/unistd.h +++ b/arch/arm64/include/asm/unistd.h @@ -44,7 +44,7 @@ #define __ARM_NR_compat_cacheflush (__ARM_NR_COMPAT_BASE+2) #define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE+5) -#define __NR_compat_syscalls 386 +#define __NR_compat_syscalls 387 #endif #define __ARCH_WANT_SYS_CLONE -- cgit v0.10.2 From 3efcb7a44bb75bd94d889245ba82e2195a7ab0a2 Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Mon, 5 Jan 2015 14:23:48 +0000 Subject: arm64: Remove unused prepare_to_copy() prepare_to_copy() was removed from all architectures supported at that time in commit 55ccf3fe3f9a ("fork: move the real prepare_to_copy() users to arch_dup_task_struct()"). Remove it from arm64 as well. Signed-off-by: Tobias Klauser Signed-off-by: Will Deacon diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 286b1be..f131a98 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -123,9 +123,6 @@ struct task_struct; /* Free all resources held by a thread. */ extern void release_thread(struct task_struct *); -/* Prepare to copy thread state - unlazy all lazy status */ -#define prepare_to_copy(tsk) do { } while (0) - unsigned long get_wchan(struct task_struct *p); #define cpu_relax() barrier() -- cgit v0.10.2 From 80639d4a79aac5081d43ed64a0801ef42de3aba3 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 7 Jan 2015 10:31:56 +0000 Subject: arm64: sanity checks: add missing AArch32 registers We don't currently check a number of registers exposed to AArch32 guests (MVFR{0,1,2}_EL1 and ID_DFR0_EL1), despite the fact these describe AArch32 feature support exposed to userspace and KVM guests similarly to AArch64 registers which we do check. We do not expect these registers to vary across a set of CPUs. This patch adds said registers to the cpuinfo framework and sanity checks. No sanity check failures have been observed on a current ARMv8 big.LITTLE platform (Juno). Cc: Catalin Marinas Reported-by: Suzuki K. Poulose Signed-off-by: Suzuki K. Poulose Signed-off-by: Mark Rutland Signed-off-by: Will Deacon diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.h index ace7068..8e797b2 100644 --- a/arch/arm64/include/asm/cpu.h +++ b/arch/arm64/include/asm/cpu.h @@ -39,6 +39,7 @@ struct cpuinfo_arm64 { u64 reg_id_aa64pfr0; u64 reg_id_aa64pfr1; + u32 reg_id_dfr0; u32 reg_id_isar0; u32 reg_id_isar1; u32 reg_id_isar2; @@ -51,6 +52,10 @@ struct cpuinfo_arm64 { u32 reg_id_mmfr3; u32 reg_id_pfr0; u32 reg_id_pfr1; + + u32 reg_mvfr0; + u32 reg_mvfr1; + u32 reg_mvfr2; }; DECLARE_PER_CPU(struct cpuinfo_arm64, cpu_data); diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 57b6417..07d435c 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -147,6 +147,7 @@ static void cpuinfo_sanity_check(struct cpuinfo_arm64 *cur) * If we have AArch32, we care about 32-bit features for compat. These * registers should be RES0 otherwise. */ + diff |= CHECK(id_dfr0, boot, cur, cpu); diff |= CHECK(id_isar0, boot, cur, cpu); diff |= CHECK(id_isar1, boot, cur, cpu); diff |= CHECK(id_isar2, boot, cur, cpu); @@ -165,6 +166,10 @@ static void cpuinfo_sanity_check(struct cpuinfo_arm64 *cur) diff |= CHECK(id_pfr0, boot, cur, cpu); diff |= CHECK(id_pfr1, boot, cur, cpu); + diff |= CHECK(mvfr0, boot, cur, cpu); + diff |= CHECK(mvfr1, boot, cur, cpu); + diff |= CHECK(mvfr2, boot, cur, cpu); + /* * Mismatched CPU features are a recipe for disaster. Don't even * pretend to support them. @@ -189,6 +194,7 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info) info->reg_id_aa64pfr0 = read_cpuid(ID_AA64PFR0_EL1); info->reg_id_aa64pfr1 = read_cpuid(ID_AA64PFR1_EL1); + info->reg_id_dfr0 = read_cpuid(ID_DFR0_EL1); info->reg_id_isar0 = read_cpuid(ID_ISAR0_EL1); info->reg_id_isar1 = read_cpuid(ID_ISAR1_EL1); info->reg_id_isar2 = read_cpuid(ID_ISAR2_EL1); @@ -202,6 +208,10 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info) info->reg_id_pfr0 = read_cpuid(ID_PFR0_EL1); info->reg_id_pfr1 = read_cpuid(ID_PFR1_EL1); + info->reg_mvfr0 = read_cpuid(MVFR0_EL1); + info->reg_mvfr1 = read_cpuid(MVFR1_EL1); + info->reg_mvfr2 = read_cpuid(MVFR2_EL1); + cpuinfo_detect_icache_policy(info); check_local_cpu_errata(); -- cgit v0.10.2 From 2ec4560b7c73e6c9febc4fb2a3e6af257c904979 Mon Sep 17 00:00:00 2001 From: Paul Walmsley Date: Mon, 5 Jan 2015 17:38:41 -0700 Subject: arm64: fix missing asm/pgtable-hwdef.h include in asm/processor.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On next-20150105, defconfig compilation breaks with: ./arch/arm64/include/asm/processor.h:47:32: error: ‘PHYS_MASK’ undeclared (first use in this function) Fix by including asm/pgtable-hwdef.h, where PHYS_MASK is defined. This second version incorporates a comment from Mark Rutland to keep the includes in alphabetical order by filename. Signed-off-by: Paul Walmsley Cc: Paul Walmsley Cc: Catalin Marinas Cc: Will Deacon Acked-by: Mark Rutland Signed-off-by: Will Deacon diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index f131a98..f9be30e 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -31,6 +31,7 @@ #include #include +#include #include #include -- cgit v0.10.2 From 082471a8efe1a91d4e44abec202d9e3067dcec91 Mon Sep 17 00:00:00 2001 From: Paul Walmsley Date: Mon, 5 Jan 2015 17:38:41 -0700 Subject: arm64: fix missing linux/bug.h include in asm/arch_timer.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On next-20150105, defconfig compilation breaks with: ./arch/arm64/include/asm/arch_timer.h:112:2: error: implicit declaration of function ‘BUG’ [-Werror=implicit-function-declaration] Fix by including linux/bug.h, where the BUG macro is defined. This second version incorporates a comment from Mark Rutland to keep the includes in alphabetical order by filename. Signed-off-by: Paul Walmsley Cc: Paul Walmsley Cc: Catalin Marinas Cc: Will Deacon Acked-by: Mark Rutland Signed-off-by: Will Deacon diff --git a/arch/arm64/include/asm/arch_timer.h b/arch/arm64/include/asm/arch_timer.h index b1fa4e6..fbe0ca3 100644 --- a/arch/arm64/include/asm/arch_timer.h +++ b/arch/arm64/include/asm/arch_timer.h @@ -21,6 +21,7 @@ #include +#include #include #include -- cgit v0.10.2 From 2c2b282d001e9934adeac93c10eb037b81d532f5 Mon Sep 17 00:00:00 2001 From: Paul Walmsley Date: Mon, 5 Jan 2015 17:38:41 -0700 Subject: arm64: fix missing asm/alternative.h include in kernel/module.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On next-20150105, defconfig compilation breaks with: arch/arm64/kernel/module.c:408:4: error: implicit declaration of function ‘apply_alternatives’ [-Werror=implicit-function-declaration] Fix by including asm/alternative.h, where the apply_alternatives() prototype is declared. This second version incorporates a comment from Mark Rutland to keep the includes in alphabetical order by filename. Signed-off-by: Paul Walmsley Cc: Paul Walmsley Cc: Catalin Marinas Cc: Will Deacon Acked-by: Mark Rutland Signed-off-by: Will Deacon diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index fd027b1..9b6f71d 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include -- cgit v0.10.2 From 59c68329a00eee7759568bc7a5383407d0d40be1 Mon Sep 17 00:00:00 2001 From: Paul Walmsley Date: Mon, 5 Jan 2015 17:38:42 -0700 Subject: arm64: fix missing asm/io.h include in kernel/smp_spin_table.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On next-20150105, defconfig compilation breaks with: arch/arm64/kernel/smp_spin_table.c:80:2: error: implicit declaration of function ‘ioremap_cache’ [-Werror=implicit-function-declaration] arch/arm64/kernel/smp_spin_table.c:92:2: error: implicit declaration of function ‘writeq_relaxed’ [-Werror=implicit-function-declaration] arch/arm64/kernel/smp_spin_table.c:101:2: error: implicit declaration of function ‘iounmap’ [-Werror=implicit-function-declaration] Fix by including asm/io.h, which contains definitions or prototypes for these macros or functions. This second version incorporates a comment from Mark Rutland to keep the includes in alphabetical order by filename. Signed-off-by: Paul Walmsley Cc: Paul Walmsley Cc: Catalin Marinas Cc: Will Deacon Acked-by: Mark Rutland Signed-off-by: Will Deacon diff --git a/arch/arm64/kernel/smp_spin_table.c b/arch/arm64/kernel/smp_spin_table.c index 4f93c67..14944e5 100644 --- a/arch/arm64/kernel/smp_spin_table.c +++ b/arch/arm64/kernel/smp_spin_table.c @@ -25,6 +25,7 @@ #include #include #include +#include #include extern void secondary_holding_pen(void); -- cgit v0.10.2 From 92cb46584e104e2f4b14a44959109ffe13524a26 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Thu, 8 Jan 2015 00:31:16 +0900 Subject: ALSA: fireworks: fix an endianness bug for transaction length Although the 't->length' is a big-endian value, it's used without any conversion. This means that the driver always uses 'length' parameter. Fixes: 555e8a8f7f14("ALSA: fireworks: Add command/response functionality into hwdep interface") Reported-by: Clemens Ladisch Signed-off-by: Takashi Sakamoto Signed-off-by: Takashi Iwai diff --git a/sound/firewire/fireworks/fireworks_transaction.c b/sound/firewire/fireworks/fireworks_transaction.c index 255dabc..2a85e42 100644 --- a/sound/firewire/fireworks/fireworks_transaction.c +++ b/sound/firewire/fireworks/fireworks_transaction.c @@ -124,7 +124,7 @@ copy_resp_to_buf(struct snd_efw *efw, void *data, size_t length, int *rcode) spin_lock_irq(&efw->lock); t = (struct snd_efw_transaction *)data; - length = min_t(size_t, t->length * sizeof(t->length), length); + length = min_t(size_t, be32_to_cpu(t->length) * sizeof(u32), length); if (efw->push_ptr < efw->pull_ptr) capacity = (unsigned int)(efw->pull_ptr - efw->push_ptr); -- cgit v0.10.2 From 990428b8ead311c68a850ead7ec8557a10b8893a Mon Sep 17 00:00:00 2001 From: Pranith Kumar Date: Tue, 30 Dec 2014 00:46:21 -0500 Subject: assoc_array: Include rcupdate.h for call_rcu() definition Include rcupdate.h header to provide call_rcu() definition. This was implicitly being provided by slab.h file which include srcu.h somewhere in its include hierarchy which in-turn included rcupdate.h. Lately, tinification effort added support to remove srcu entirely because of which we are encountering build errors like lib/assoc_array.c: In function 'assoc_array_apply_edit': lib/assoc_array.c:1426:2: error: implicit declaration of function 'call_rcu' [-Werror=implicit-function-declaration] cc1: some warnings being treated as errors Fix these by including rcupdate.h explicitly. Signed-off-by: Pranith Kumar Reported-by: Scott Wood diff --git a/lib/assoc_array.c b/lib/assoc_array.c index 2404d03..03dd576 100644 --- a/lib/assoc_array.c +++ b/lib/assoc_array.c @@ -11,6 +11,7 @@ * 2 of the Licence, or (at your option) any later version. */ //#define DEBUG +#include #include #include #include -- cgit v0.10.2 From 7c2e211f3c95b91912a92a8c6736343690042e2e Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Wed, 7 Jan 2015 10:29:11 -0700 Subject: vfio-pci: Fix the check on pci device type in vfio_pci_probe() Current vfio-pci just supports normal pci device, so vfio_pci_probe() will return if the pci device is not a normal device. While current code makes a mistake. PCI_HEADER_TYPE is the offset in configuration space of the device type, but we use this value to mask the type value. This patch fixs this by do the check directly on the pci_dev->hdr_type. Signed-off-by: Wei Yang Signed-off-by: Alex Williamson Cc: stable@vger.kernel.org # v3.6+ diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index 255201f..7cc0122 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -840,13 +840,11 @@ static const struct vfio_device_ops vfio_pci_ops = { static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) { - u8 type; struct vfio_pci_device *vdev; struct iommu_group *group; int ret; - pci_read_config_byte(pdev, PCI_HEADER_TYPE, &type); - if ((type & PCI_HEADER_TYPE) != PCI_HEADER_TYPE_NORMAL) + if (pdev->hdr_type != PCI_HEADER_TYPE_NORMAL) return -EINVAL; group = iommu_group_get(&pdev->dev); -- cgit v0.10.2 From 94ae1db226a5bcbb48372d81161f084c9e283fd8 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Sat, 13 Dec 2014 09:11:39 -0500 Subject: nfsd: fix fi_delegees leak when fi_had_conflict returns true Currently, nfs4_set_delegation takes a reference to an existing delegation and then checks to see if there is a conflict. If there is one, then it doesn't release that reference. Change the code to take the reference after the check and only if there is no conflict. Signed-off-by: Jeff Layton Cc: stable@vger.kernel.org Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 3550a9c..c06a1ba 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -3897,11 +3897,11 @@ nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh, status = nfs4_setlease(dp); goto out; } - atomic_inc(&fp->fi_delegees); if (fp->fi_had_conflict) { status = -EAGAIN; goto out_unlock; } + atomic_inc(&fp->fi_delegees); hash_delegation_locked(dp, fp); status = 0; out_unlock: -- cgit v0.10.2 From 49a068f82a1d30eb585d7804b05948376be6cf9a Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 22 Dec 2014 16:14:51 -0500 Subject: rpc: fix xdr_truncate_encode to handle buffer ending on page boundary MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A struct xdr_stream at a page boundary might point to the end of one page or the beginning of the next, but xdr_truncate_encode isn't prepared to handle the former. This can cause corruption of NFSv4 READDIR replies in the case that a readdir entry that would have exceeded the client's dircount/maxcount limit would have ended exactly on a 4k page boundary. You're more likely to hit this case on large directories. Other xdr_truncate_encode callers are probably also affected. Reported-by: Holger Hoffstätte Tested-by: Holger Hoffstätte Fixes: 3e19ce762b53 "rpc: xdr_truncate_encode" Cc: stable@vger.kernel.org Signed-off-by: J. Bruce Fields diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index 1cb6124..4439ac4 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -606,7 +606,7 @@ void xdr_truncate_encode(struct xdr_stream *xdr, size_t len) struct kvec *head = buf->head; struct kvec *tail = buf->tail; int fraglen; - int new, old; + int new; if (len > buf->len) { WARN_ON_ONCE(1); @@ -629,8 +629,8 @@ void xdr_truncate_encode(struct xdr_stream *xdr, size_t len) buf->len -= fraglen; new = buf->page_base + buf->page_len; - old = new + fraglen; - xdr->page_ptr -= (old >> PAGE_SHIFT) - (new >> PAGE_SHIFT); + + xdr->page_ptr = buf->pages + (new >> PAGE_SHIFT); if (buf->page_len) { xdr->p = page_address(*xdr->page_ptr); -- cgit v0.10.2 From 841ee230253f2ceb647f89a218e6e0575d961435 Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 18 Dec 2014 19:43:36 +0000 Subject: ARM: wire up execveat syscall Signed-off-by: Russell King diff --git a/arch/arm/include/uapi/asm/unistd.h b/arch/arm/include/uapi/asm/unistd.h index 705bb76..0c3f5a0 100644 --- a/arch/arm/include/uapi/asm/unistd.h +++ b/arch/arm/include/uapi/asm/unistd.h @@ -413,6 +413,7 @@ #define __NR_getrandom (__NR_SYSCALL_BASE+384) #define __NR_memfd_create (__NR_SYSCALL_BASE+385) #define __NR_bpf (__NR_SYSCALL_BASE+386) +#define __NR_execveat (__NR_SYSCALL_BASE+387) /* * The following SWIs are ARM private. diff --git a/arch/arm/kernel/calls.S b/arch/arm/kernel/calls.S index e51833f..05745eb 100644 --- a/arch/arm/kernel/calls.S +++ b/arch/arm/kernel/calls.S @@ -396,6 +396,7 @@ CALL(sys_getrandom) /* 385 */ CALL(sys_memfd_create) CALL(sys_bpf) + CALL(sys_execveat) #ifndef syscalls_counted .equ syscalls_padding, ((NR_syscalls + 3) & ~3) - NR_syscalls #define syscalls_counted -- cgit v0.10.2 From cca547e9aa3a6d561fe65e75a4bb2c18d80c541a Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 17 Dec 2014 17:57:38 +0100 Subject: ARM: 8249/1: mm: dump: don't skip regions Currently the arm page table dumping code starts dumping page tables from USER_PGTABLES_CEILING. This is unnecessary for skipping any entries related to userspace as the swapper_pg_dir does not contain such entries, and results in a couple of unfortuante side effects. Firstly, any kernel mappings which might exist below USER_PGTABLES_CEILING will not be accounted in the dump output. This masks any entries erroneously created below this address. Secondly, if the final page table entry walked is part of a valid mapping the page table dumping code will not log the region this entry is part of, as the final note_page call in walk_pgd will trigger an early return when 0 < USER_PGTABLES_CEILING. Luckily this isn't seen on contemporary systems as they typically don't have enough RAM to extend the linear mapping right to the end of the address space. Due to the way addr is constructed in the walk_* functions, it can never be less than USER_PGTABLES_CEILING when walking the page tables, so it is not necessary to avoid dereferencing invalid table addresses. The existing checks for st->current_prot and st->marker[1].start_address are sufficient to ensure we will not print and/or dereference garbage when trying to log information. This patch removes both problematic uses of USER_PGTABLES_CEILING from the arm page table dumping code, preventing both of these issues. We will now report any low mappings, and the final note_page call will not return early, ensuring all regions are logged. Signed-off-by: Mark Rutland Cc: Steve Capper Cc: Kees Cook Cc: Will Deacon Signed-off-by: Russell King diff --git a/arch/arm/mm/dump.c b/arch/arm/mm/dump.c index 5942493..9fe8e24 100644 --- a/arch/arm/mm/dump.c +++ b/arch/arm/mm/dump.c @@ -220,9 +220,6 @@ static void note_page(struct pg_state *st, unsigned long addr, unsigned level, u static const char units[] = "KMGTPE"; u64 prot = val & pg_level[level].mask; - if (addr < USER_PGTABLES_CEILING) - return; - if (!st->level) { st->level = level; st->current_prot = prot; @@ -308,15 +305,13 @@ static void walk_pgd(struct seq_file *m) pgd_t *pgd = swapper_pg_dir; struct pg_state st; unsigned long addr; - unsigned i, pgdoff = USER_PGTABLES_CEILING / PGDIR_SIZE; + unsigned i; memset(&st, 0, sizeof(st)); st.seq = m; st.marker = address_markers; - pgd += pgdoff; - - for (i = pgdoff; i < PTRS_PER_PGD; i++, pgd++) { + for (i = 0; i < PTRS_PER_PGD; i++, pgd++) { addr = i * PGDIR_SIZE; if (!pgd_none(*pgd)) { walk_pud(&st, pgd, addr); -- cgit v0.10.2 From ac08468867e99bc02b22baf4e58bc3537e9d852c Mon Sep 17 00:00:00 2001 From: Grygorii Strashko Date: Tue, 23 Dec 2014 19:36:55 +0100 Subject: ARM: 8253/1: mm: use phys_addr_t type in map_lowmem() for kernel mem region Now local variables kernel_x_start and kernel_x_end defined using 'unsigned long' type which is wrong because they represent physical memory range and will be calculated wrongly if LPAE is enabled. As result, all following code in map_lowmem() will not work correctly. For example, Keystone 2 boot is broken because kernel_x_start == 0x0000 0000 kernel_x_end == 0x0080 0000 instead of kernel_x_start == 0x0000 0008 0000 0000 kernel_x_end == 0x0000 0008 0080 0000 and as result whole low memory will be mapped with MT_MEMORY_RW permissions by code (start > kernel_x_end): } else if (start >= kernel_x_end) { map.pfn = __phys_to_pfn(start); map.virtual = __phys_to_virt(start); map.length = end - start; map.type = MT_MEMORY_RW; create_mapping(&map); } Hence, fix it by using phys_addr_t type for variables kernel_x_start and kernel_x_end. Tested-by: Murali Karicheri Signed-off-by: Grygorii Strashko Signed-off-by: Russell King diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index cda7c40..4e6ef89 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -1329,8 +1329,8 @@ static void __init kmap_init(void) static void __init map_lowmem(void) { struct memblock_region *reg; - unsigned long kernel_x_start = round_down(__pa(_stext), SECTION_SIZE); - unsigned long kernel_x_end = round_up(__pa(__init_end), SECTION_SIZE); + phys_addr_t kernel_x_start = round_down(__pa(_stext), SECTION_SIZE); + phys_addr_t kernel_x_end = round_up(__pa(__init_end), SECTION_SIZE); /* Map all the lowmem memory banks. */ for_each_memblock(memory, reg) { -- cgit v0.10.2 From 0e63ea48b4d8035dd0e91a3fa6fb79458b47adfb Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 8 Jan 2015 09:54:58 +0000 Subject: arm64/efi: add missing call to early_ioremap_reset() The early ioremap support introduced by patch bf4b558eba92 ("arm64: add early_ioremap support") failed to add a call to early_ioremap_reset() at an appropriate time. Without this call, invocations of early_ioremap etc. that are done too late will go unnoticed and may cause corruption. This is exactly what happened when the first user of this feature was added in patch f84d02755f5a ("arm64: add EFI runtime services"). The early mapping of the EFI memory map is unmapped during an early initcall, at which time the early ioremap support is long gone. Fix by adding the missing call to early_ioremap_reset() to setup_arch(), and move the offending early_memunmap() to right after the point where the early mapping of the EFI memory map is last used. Fixes: f84d02755f5a ("arm64: add EFI runtime services") Cc: Signed-off-by: Leif Lindholm Signed-off-by: Ard Biesheuvel Signed-off-by: Will Deacon diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c index 6fac253..2bb4347 100644 --- a/arch/arm64/kernel/efi.c +++ b/arch/arm64/kernel/efi.c @@ -326,6 +326,7 @@ void __init efi_idmap_init(void) /* boot time idmap_pg_dir is incomplete, so fill in missing parts */ efi_setup_idmap(); + early_memunmap(memmap.map, memmap.map_end - memmap.map); } static int __init remap_region(efi_memory_desc_t *md, void **new) @@ -380,7 +381,6 @@ static int __init arm64_enter_virtual_mode(void) } mapsize = memmap.map_end - memmap.map; - early_memunmap(memmap.map, mapsize); if (efi_runtime_disabled()) { pr_info("EFI runtime services will be disabled.\n"); diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index b809911..20fe293 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -402,6 +402,7 @@ void __init setup_arch(char **cmdline_p) request_standard_resources(); efi_idmap_init(); + early_ioremap_reset(); unflatten_device_tree(); -- cgit v0.10.2 From 701a261ad6c4c1915861673b7e8ab9fee5cef69a Mon Sep 17 00:00:00 2001 From: Boris Ostrovsky Date: Wed, 7 Jan 2015 09:08:54 -0500 Subject: x86/xen: Free bootmem in free_p2m_page() during early boot With recent changes in p2m we now have legitimate cases when p2m memory needs to be freed during early boot (i.e. before slab is initialized). Signed-off-by: Boris Ostrovsky Reviewed-by: Juergen Gross Signed-off-by: David Vrabel diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index edbc7a6..cab221d 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -167,10 +167,13 @@ static void * __ref alloc_p2m_page(void) return (void *)__get_free_page(GFP_KERNEL | __GFP_REPEAT); } -/* Only to be called in case of a race for a page just allocated! */ -static void free_p2m_page(void *p) +static void __ref free_p2m_page(void *p) { - BUG_ON(!slab_is_available()); + if (unlikely(!slab_is_available())) { + free_bootmem((unsigned long)p, PAGE_SIZE); + return; + } + free_page((unsigned long)p); } -- cgit v0.10.2 From bc7142cf798ae77628ae8c29bfdf6aa6dd2378e9 Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Wed, 7 Jan 2015 11:01:08 +0000 Subject: x86/xen: don't count how many PFNs are identity mapped This accounting is just used to print a diagnostic message that isn't very useful. Signed-off-by: David Vrabel Reviewed-by: Juergen Gross diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index dfd77de..664dffc 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -229,15 +229,14 @@ static int __init xen_free_mfn(unsigned long mfn) * as a fallback if the remapping fails. */ static void __init xen_set_identity_and_release_chunk(unsigned long start_pfn, - unsigned long end_pfn, unsigned long nr_pages, unsigned long *identity, - unsigned long *released) + unsigned long end_pfn, unsigned long nr_pages, unsigned long *released) { - unsigned long len = 0; unsigned long pfn, end; int ret; WARN_ON(start_pfn > end_pfn); + /* Release pages first. */ end = min(end_pfn, nr_pages); for (pfn = start_pfn; pfn < end; pfn++) { unsigned long mfn = pfn_to_mfn(pfn); @@ -250,16 +249,14 @@ static void __init xen_set_identity_and_release_chunk(unsigned long start_pfn, WARN(ret != 1, "Failed to release pfn %lx err=%d\n", pfn, ret); if (ret == 1) { + (*released)++; if (!__set_phys_to_machine(pfn, INVALID_P2M_ENTRY)) break; - len++; } else break; } - /* Need to release pages first */ - *released += len; - *identity += set_phys_range_identity(start_pfn, end_pfn); + set_phys_range_identity(start_pfn, end_pfn); } /* @@ -318,7 +315,6 @@ static void __init xen_do_set_identity_and_remap_chunk( unsigned long ident_pfn_iter, remap_pfn_iter; unsigned long ident_end_pfn = start_pfn + size; unsigned long left = size; - unsigned long ident_cnt = 0; unsigned int i, chunk; WARN_ON(size == 0); @@ -347,8 +343,7 @@ static void __init xen_do_set_identity_and_remap_chunk( xen_remap_mfn = mfn; /* Set identity map */ - ident_cnt += set_phys_range_identity(ident_pfn_iter, - ident_pfn_iter + chunk); + set_phys_range_identity(ident_pfn_iter, ident_pfn_iter + chunk); left -= chunk; } @@ -371,7 +366,7 @@ static void __init xen_do_set_identity_and_remap_chunk( static unsigned long __init xen_set_identity_and_remap_chunk( const struct e820entry *list, size_t map_size, unsigned long start_pfn, unsigned long end_pfn, unsigned long nr_pages, unsigned long remap_pfn, - unsigned long *identity, unsigned long *released) + unsigned long *released) { unsigned long pfn; unsigned long i = 0; @@ -386,8 +381,7 @@ static unsigned long __init xen_set_identity_and_remap_chunk( /* Do not remap pages beyond the current allocation */ if (cur_pfn >= nr_pages) { /* Identity map remaining pages */ - *identity += set_phys_range_identity(cur_pfn, - cur_pfn + size); + set_phys_range_identity(cur_pfn, cur_pfn + size); break; } if (cur_pfn + size > nr_pages) @@ -398,7 +392,7 @@ static unsigned long __init xen_set_identity_and_remap_chunk( if (!remap_range_size) { pr_warning("Unable to find available pfn range, not remapping identity pages\n"); xen_set_identity_and_release_chunk(cur_pfn, - cur_pfn + left, nr_pages, identity, released); + cur_pfn + left, nr_pages, released); break; } /* Adjust size to fit in current e820 RAM region */ @@ -410,7 +404,6 @@ static unsigned long __init xen_set_identity_and_remap_chunk( /* Update variables to reflect new mappings. */ i += size; remap_pfn += size; - *identity += size; } /* @@ -430,7 +423,6 @@ static void __init xen_set_identity_and_remap( unsigned long *released) { phys_addr_t start = 0; - unsigned long identity = 0; unsigned long last_pfn = nr_pages; const struct e820entry *entry; unsigned long num_released = 0; @@ -460,14 +452,13 @@ static void __init xen_set_identity_and_remap( last_pfn = xen_set_identity_and_remap_chunk( list, map_size, start_pfn, end_pfn, nr_pages, last_pfn, - &identity, &num_released); + &num_released); start = end; } } *released = num_released; - pr_info("Set %ld page(s) to 1-1 mapping\n", identity); pr_info("Released %ld page(s)\n", num_released); } -- cgit v0.10.2 From a97dae1a2e92e728d28515e88e8eda151f5796f5 Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Wed, 7 Jan 2015 11:21:50 +0000 Subject: x86/xen: add extra memory for remapped frames during setup If the non-RAM regions in the e820 memory map are larger than the size of the initial balloon, a BUG was triggered as the frames are remaped beyond the limit of the linear p2m. The frames are remapped into the initial balloon area (xen_extra_mem) but not enough of this is available. Ensure enough extra memory regions are added for these remapped frames. Signed-off-by: David Vrabel Reviewed-by: Juergen Gross diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 664dffc..feb6d86 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -366,7 +366,7 @@ static void __init xen_do_set_identity_and_remap_chunk( static unsigned long __init xen_set_identity_and_remap_chunk( const struct e820entry *list, size_t map_size, unsigned long start_pfn, unsigned long end_pfn, unsigned long nr_pages, unsigned long remap_pfn, - unsigned long *released) + unsigned long *released, unsigned long *remapped) { unsigned long pfn; unsigned long i = 0; @@ -404,6 +404,7 @@ static unsigned long __init xen_set_identity_and_remap_chunk( /* Update variables to reflect new mappings. */ i += size; remap_pfn += size; + *remapped += size; } /* @@ -420,12 +421,13 @@ static unsigned long __init xen_set_identity_and_remap_chunk( static void __init xen_set_identity_and_remap( const struct e820entry *list, size_t map_size, unsigned long nr_pages, - unsigned long *released) + unsigned long *released, unsigned long *remapped) { phys_addr_t start = 0; unsigned long last_pfn = nr_pages; const struct e820entry *entry; unsigned long num_released = 0; + unsigned long num_remapped = 0; int i; /* @@ -452,12 +454,13 @@ static void __init xen_set_identity_and_remap( last_pfn = xen_set_identity_and_remap_chunk( list, map_size, start_pfn, end_pfn, nr_pages, last_pfn, - &num_released); + &num_released, &num_remapped); start = end; } } *released = num_released; + *remapped = num_remapped; pr_info("Released %ld page(s)\n", num_released); } @@ -577,6 +580,7 @@ char * __init xen_memory_setup(void) struct xen_memory_map memmap; unsigned long max_pages; unsigned long extra_pages = 0; + unsigned long remapped_pages; int i; int op; @@ -626,9 +630,10 @@ char * __init xen_memory_setup(void) * underlying RAM. */ xen_set_identity_and_remap(map, memmap.nr_entries, max_pfn, - &xen_released_pages); + &xen_released_pages, &remapped_pages); extra_pages += xen_released_pages; + extra_pages += remapped_pages; /* * Clamp the amount of extra memory to a EXTRA_MEM_RATIO -- cgit v0.10.2 From 7be0772d19103b3eac3c2e9ac325df2563273fdc Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Mon, 5 Jan 2015 16:27:51 +0100 Subject: x86/xen: avoid freeing static 'name' when kasprintf() fails In case kasprintf() fails in xen_setup_timer() we assign name to the static string "". We, however, don't check that fact before issuing kfree() in xen_teardown_timer(), kernel is supposed to crash with 'kernel BUG at mm/slub.c:3341!' Solve the issue by making name a fixed length string inside struct xen_clock_event_device. 16 bytes should be enough. Suggested-by: Laszlo Ersek Signed-off-by: Vitaly Kuznetsov Signed-off-by: David Vrabel diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index 23019b4..6908734 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c @@ -391,7 +391,7 @@ static const struct clock_event_device *xen_clockevent = struct xen_clock_event_device { struct clock_event_device evt; - char *name; + char name[16]; }; static DEFINE_PER_CPU(struct xen_clock_event_device, xen_clock_events) = { .evt.irq = -1 }; @@ -420,39 +420,33 @@ void xen_teardown_timer(int cpu) if (evt->irq >= 0) { unbind_from_irqhandler(evt->irq, NULL); evt->irq = -1; - kfree(per_cpu(xen_clock_events, cpu).name); - per_cpu(xen_clock_events, cpu).name = NULL; } } void xen_setup_timer(int cpu) { - char *name; - struct clock_event_device *evt; + struct xen_clock_event_device *xevt = &per_cpu(xen_clock_events, cpu); + struct clock_event_device *evt = &xevt->evt; int irq; - evt = &per_cpu(xen_clock_events, cpu).evt; WARN(evt->irq >= 0, "IRQ%d for CPU%d is already allocated\n", evt->irq, cpu); if (evt->irq >= 0) xen_teardown_timer(cpu); printk(KERN_INFO "installing Xen timer for CPU %d\n", cpu); - name = kasprintf(GFP_KERNEL, "timer%d", cpu); - if (!name) - name = ""; + snprintf(xevt->name, sizeof(xevt->name), "timer%d", cpu); irq = bind_virq_to_irqhandler(VIRQ_TIMER, cpu, xen_timer_interrupt, IRQF_PERCPU|IRQF_NOBALANCING|IRQF_TIMER| IRQF_FORCE_RESUME|IRQF_EARLY_RESUME, - name, NULL); + xevt->name, NULL); (void)xen_set_irq_priority(irq, XEN_IRQ_PRIORITY_MAX); memcpy(evt, xen_clockevent, sizeof(*evt)); evt->cpumask = cpumask_of(cpu); evt->irq = irq; - per_cpu(xen_clock_events, cpu).name = name; } -- cgit v0.10.2 From 5ca8271022da8583f0d618aeda5b2bae785e7882 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 23 Dec 2014 13:36:21 +0900 Subject: perf hists: Fix children sort key behavior When perf report --children resorts output fields, it tries to put caller above the callee. But this was only meaningful for a same thread and doing this requires callchain enabled. So fix its check before comparing the callchain depth. This also changes the hist accumulation tests: In test 3, xmalloc in bash thread should be above than other perf threads due to alphabetical order of comm string. Also it's under page_fault in bash thread since alphabetical order of dso name. The sys_perf_event_open in perf thread is put on the last line since it's self overhead is 0. In test 4, the sys_perf_event_open is put above other perf entries that have same children overhead since its callchain depth is smaller. Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: David Ahern Cc: Ingo Molnar Cc: Jiri Olsa Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1419309381-2593-1-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/tests/hists_cumulate.c b/tools/perf/tests/hists_cumulate.c index 4b8226e..8d110de 100644 --- a/tools/perf/tests/hists_cumulate.c +++ b/tools/perf/tests/hists_cumulate.c @@ -454,12 +454,12 @@ static int test3(struct perf_evsel *evsel, struct machine *machine) * 30.00% 10.00% perf perf [.] cmd_record * 20.00% 0.00% bash libc [.] malloc * 10.00% 10.00% bash [kernel] [k] page_fault - * 10.00% 10.00% perf [kernel] [k] schedule - * 10.00% 0.00% perf [kernel] [k] sys_perf_event_open + * 10.00% 10.00% bash bash [.] xmalloc * 10.00% 10.00% perf [kernel] [k] page_fault - * 10.00% 10.00% perf libc [.] free * 10.00% 10.00% perf libc [.] malloc - * 10.00% 10.00% bash bash [.] xmalloc + * 10.00% 10.00% perf [kernel] [k] schedule + * 10.00% 10.00% perf libc [.] free + * 10.00% 0.00% perf [kernel] [k] sys_perf_event_open */ struct result expected[] = { { 7000, 2000, "perf", "perf", "main" }, @@ -468,12 +468,12 @@ static int test3(struct perf_evsel *evsel, struct machine *machine) { 3000, 1000, "perf", "perf", "cmd_record" }, { 2000, 0, "bash", "libc", "malloc" }, { 1000, 1000, "bash", "[kernel]", "page_fault" }, - { 1000, 1000, "perf", "[kernel]", "schedule" }, - { 1000, 0, "perf", "[kernel]", "sys_perf_event_open" }, + { 1000, 1000, "bash", "bash", "xmalloc" }, { 1000, 1000, "perf", "[kernel]", "page_fault" }, + { 1000, 1000, "perf", "[kernel]", "schedule" }, { 1000, 1000, "perf", "libc", "free" }, { 1000, 1000, "perf", "libc", "malloc" }, - { 1000, 1000, "bash", "bash", "xmalloc" }, + { 1000, 0, "perf", "[kernel]", "sys_perf_event_open" }, }; symbol_conf.use_callchain = false; @@ -537,10 +537,13 @@ static int test4(struct perf_evsel *evsel, struct machine *machine) * malloc * main * - * 10.00% 10.00% perf [kernel] [k] schedule + * 10.00% 10.00% bash bash [.] xmalloc * | - * --- schedule - * run_command + * --- xmalloc + * malloc + * xmalloc <--- NOTE: there's a cycle + * malloc + * xmalloc * main * * 10.00% 0.00% perf [kernel] [k] sys_perf_event_open @@ -556,6 +559,12 @@ static int test4(struct perf_evsel *evsel, struct machine *machine) * run_command * main * + * 10.00% 10.00% perf [kernel] [k] schedule + * | + * --- schedule + * run_command + * main + * * 10.00% 10.00% perf libc [.] free * | * --- free @@ -570,15 +579,6 @@ static int test4(struct perf_evsel *evsel, struct machine *machine) * run_command * main * - * 10.00% 10.00% bash bash [.] xmalloc - * | - * --- xmalloc - * malloc - * xmalloc <--- NOTE: there's a cycle - * malloc - * xmalloc - * main - * */ struct result expected[] = { { 7000, 2000, "perf", "perf", "main" }, @@ -587,12 +587,12 @@ static int test4(struct perf_evsel *evsel, struct machine *machine) { 3000, 1000, "perf", "perf", "cmd_record" }, { 2000, 0, "bash", "libc", "malloc" }, { 1000, 1000, "bash", "[kernel]", "page_fault" }, - { 1000, 1000, "perf", "[kernel]", "schedule" }, + { 1000, 1000, "bash", "bash", "xmalloc" }, { 1000, 0, "perf", "[kernel]", "sys_perf_event_open" }, { 1000, 1000, "perf", "[kernel]", "page_fault" }, + { 1000, 1000, "perf", "[kernel]", "schedule" }, { 1000, 1000, "perf", "libc", "free" }, { 1000, 1000, "perf", "libc", "malloc" }, - { 1000, 1000, "bash", "bash", "xmalloc" }, }; struct callchain_result expected_callchain[] = { { @@ -622,9 +622,12 @@ static int test4(struct perf_evsel *evsel, struct machine *machine) { "bash", "main" }, }, }, { - 3, { { "[kernel]", "schedule" }, - { "perf", "run_command" }, - { "perf", "main" }, }, + 6, { { "bash", "xmalloc" }, + { "libc", "malloc" }, + { "bash", "xmalloc" }, + { "libc", "malloc" }, + { "bash", "xmalloc" }, + { "bash", "main" }, }, }, { 3, { { "[kernel]", "sys_perf_event_open" }, @@ -638,6 +641,11 @@ static int test4(struct perf_evsel *evsel, struct machine *machine) { "perf", "main" }, }, }, { + 3, { { "[kernel]", "schedule" }, + { "perf", "run_command" }, + { "perf", "main" }, }, + }, + { 4, { { "libc", "free" }, { "perf", "cmd_record" }, { "perf", "run_command" }, @@ -649,14 +657,6 @@ static int test4(struct perf_evsel *evsel, struct machine *machine) { "perf", "run_command" }, { "perf", "main" }, }, }, - { - 6, { { "bash", "xmalloc" }, - { "libc", "malloc" }, - { "bash", "xmalloc" }, - { "libc", "malloc" }, - { "bash", "xmalloc" }, - { "bash", "main" }, }, - }, }; symbol_conf.use_callchain = true; diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c index dc0d095..482adae 100644 --- a/tools/perf/ui/hist.c +++ b/tools/perf/ui/hist.c @@ -204,6 +204,9 @@ static int __hpp__sort_acc(struct hist_entry *a, struct hist_entry *b, if (ret) return ret; + if (a->thread != b->thread || !symbol_conf.use_callchain) + return 0; + ret = b->callchain->max_depth - a->callchain->max_depth; } return ret; -- cgit v0.10.2 From d114960c488b5a95705a04bba305f931cef0efd6 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 30 Dec 2014 14:38:13 +0900 Subject: perf callchain: Free callchains when hist entries are deleted Markus reported that "perf top -g" can leak ~300MB per second on his machine. This is partly because it missed to free callchains when hist entries are deleted. Fix it. Reported-by: Markus Trippelsdorf Signed-off-by: Namhyung Kim Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Markus Trippelsdorf Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20141230053813.GD6081@sejong Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 64b377e..14e7a12 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -841,3 +841,33 @@ char *callchain_list__sym_name(struct callchain_list *cl, return bf; } + +static void free_callchain_node(struct callchain_node *node) +{ + struct callchain_list *list, *tmp; + struct callchain_node *child; + struct rb_node *n; + + list_for_each_entry_safe(list, tmp, &node->val, list) { + list_del(&list->list); + free(list); + } + + n = rb_first(&node->rb_root_in); + while (n) { + child = container_of(n, struct callchain_node, rb_node_in); + n = rb_next(n); + rb_erase(&child->rb_node_in, &node->rb_root_in); + + free_callchain_node(child); + free(child); + } +} + +void free_callchain(struct callchain_root *root) +{ + if (!symbol_conf.use_callchain) + return; + + free_callchain_node(&root->node); +} diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index dbc08cf..c0ec1ac 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -198,4 +198,6 @@ static inline int arch_skip_callchain_idx(struct thread *thread __maybe_unused, char *callchain_list__sym_name(struct callchain_list *cl, char *bf, size_t bfsize, bool show_dso); +void free_callchain(struct callchain_root *root); + #endif /* __PERF_CALLCHAIN_H */ diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 0ced178..1823955 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -947,6 +947,7 @@ void hist_entry__free(struct hist_entry *he) zfree(&he->mem_info); zfree(&he->stat_acc); free_srcline(he->srcline); + free_callchain(he->callchain); free(he); } -- cgit v0.10.2 From c09e31cc128cf1aec3a6cd47203508fbf0082873 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 24 Dec 2014 15:04:36 +0900 Subject: perf hists browser: Fix segfault when showing callchain When perf report on TUI shows callchain it checks first node has siblings to determine whether it needs to print percentage value. But it missed a case that first node is NULL. So sometimes it segfaults like below: $ perf top -g perf: Segmentation fault -------- backtrace -------- perf[0x4fcefb] /usr/lib/libc.so.6(+0x33b20)[0x7f2a35839b20] perf(rb_next+0x8)[0x47d3d8] perf[0x4f6058] perf[0x4f833b] perf[0x4f8610] perf[0x4f209e] perf(ui_browser__run+0x3a)[0x4f2e6a] perf[0x4f94ee] perf(perf_evlist__tui_browse_hists+0x94)[0x4fbbf4] perf[0x444d10] /usr/lib/libpthread.so.0(+0x7314)[0x7f2a37070314] /usr/lib/libc.so.6(clone+0x6d)[0x7f2a358ee5bd] $ addr2line -e `which perf` 0x4f6058 /home/namhyung/project/linux/tools/perf/ui/browsers/hists.c:553 I don't know why the backtrace didn't print some symbols.. Signed-off-by: Namhyung Kim Fixes: 4087d11cd945 ("perf hists browser: Print overhead percent value for first-level callchain") Cc: Adrian Hunter Cc: Andi Kleen Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1419401076-21700-1-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index e6bb04b..788506e 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -550,7 +550,7 @@ static int hist_browser__show_callchain(struct hist_browser *browser, bool need_percent; node = rb_first(root); - need_percent = !!rb_next(node); + need_percent = node && rb_next(node); while (node) { struct callchain_node *child = rb_entry(node, struct callchain_node, rb_node); -- cgit v0.10.2 From f5db310d77ef1742e40bfc303b8625584c55f9e3 Mon Sep 17 00:00:00 2001 From: Andrey Skvortsov Date: Wed, 7 Jan 2015 21:35:54 +0300 Subject: selftests/vm: fix link error for transhuge-stress test add -lrt to fix undefined reference to `clock_gettime' error seen when the test is compiled using gcc 4.6.4. Signed-off-by: Andrey Skvortsov Signed-off-by: Shuah Khan diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile index 4c4b1f6..077828c 100644 --- a/tools/testing/selftests/vm/Makefile +++ b/tools/testing/selftests/vm/Makefile @@ -7,7 +7,7 @@ BINARIES += transhuge-stress all: $(BINARIES) %: %.c - $(CC) $(CFLAGS) -o $@ $^ + $(CC) $(CFLAGS) -o $@ $^ -lrt run_tests: all @/bin/sh ./run_vmtests || (echo "vmtests: [FAIL]"; exit 1) -- cgit v0.10.2 From 0668ff52e2fbca869c579025612e9bcfc4edd40e Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Fri, 19 Dec 2014 13:10:10 +0300 Subject: ceph: use %zu for len in ceph_fill_inline_data() len is size_t, should be printed with %zu. Signed-off-by: Ilya Dryomov diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index f5013d9..c81c0e0 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -1416,7 +1416,7 @@ void ceph_fill_inline_data(struct inode *inode, struct page *locked_page, } } - dout("fill_inline_data %p %llx.%llx len %lu locked_page %p\n", + dout("fill_inline_data %p %llx.%llx len %zu locked_page %p\n", inode, ceph_vinop(inode), len, locked_page); if (len > 0) { -- cgit v0.10.2 From d7d5a007b1c64c617ce3ee30c973ed0bb93443d9 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Fri, 19 Dec 2014 14:00:41 +0300 Subject: libceph: fix sparse endianness warnings The only real issue is the one in auth_x.c and it came with 3.19-rc1 merge. Signed-off-by: Ilya Dryomov diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 5d86416..61b19c4 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -87,8 +87,8 @@ struct ceph_osd_req_op { struct ceph_osd_data osd_data; } extent; struct { - __le32 name_len; - __le32 value_len; + u32 name_len; + u32 value_len; __u8 cmp_op; /* CEPH_OSD_CMPXATTR_OP_* */ __u8 cmp_mode; /* CEPH_OSD_CMPXATTR_MODE_* */ struct ceph_osd_data osd_data; diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c index 1584581..ba6eb17 100644 --- a/net/ceph/auth_x.c +++ b/net/ceph/auth_x.c @@ -676,7 +676,7 @@ static int calcu_signature(struct ceph_x_authorizer *au, int ret; char tmp_enc[40]; __le32 tmp[5] = { - 16u, msg->hdr.crc, msg->footer.front_crc, + cpu_to_le32(16), msg->hdr.crc, msg->footer.front_crc, msg->footer.middle_crc, msg->footer.data_crc, }; ret = ceph_x_encrypt(&au->session_key, &tmp, sizeof(tmp), diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c index a83062c..f2148e2 100644 --- a/net/ceph/mon_client.c +++ b/net/ceph/mon_client.c @@ -717,7 +717,7 @@ static int get_poolop_reply_buf(const char *src, size_t src_len, if (src_len != sizeof(u32) + dst_len) return -EINVAL; - buf_len = le32_to_cpu(*(u32 *)src); + buf_len = le32_to_cpu(*(__le32 *)src); if (buf_len != dst_len) return -EINVAL; -- cgit v0.10.2 From 879828c6ade916d13a5187eb02546899c2cd496c Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Sat, 20 Dec 2014 22:47:07 +0800 Subject: gpio: grgpio: Avoid potential NULL pointer dereference irqmap is optional property, so priv->domain can be NULL if !irqmap. Thus add NULL test for priv->domain before calling irq_domain_remove() to prevent NULL pointer dereference. Signed-off-by: Axel Lin Signed-off-by: Linus Walleij diff --git a/drivers/gpio/gpio-grgpio.c b/drivers/gpio/gpio-grgpio.c index 09daaf2..3a5a710 100644 --- a/drivers/gpio/gpio-grgpio.c +++ b/drivers/gpio/gpio-grgpio.c @@ -441,7 +441,8 @@ static int grgpio_probe(struct platform_device *ofdev) err = gpiochip_add(gc); if (err) { dev_err(&ofdev->dev, "Could not add gpiochip\n"); - irq_domain_remove(priv->domain); + if (priv->domain) + irq_domain_remove(priv->domain); return err; } -- cgit v0.10.2 From 90ac086bca100c654e84637d9990567f436cab23 Mon Sep 17 00:00:00 2001 From: Michal Marek Date: Thu, 8 Jan 2015 16:16:39 +0100 Subject: Makefile: include arch/*/include/generated/uapi before .../generated The introduction of the uapi directories in v3.7-rc1 moved some of the generated headers from arch/*/include/generated to the uapi directory, keeping the #include directives intact. This creates a problem when bisecting, because the unversioned files are not cleaned automatically by git and the compiler might include stale headers as a result. Instead of cleaning them in the Makefiles, promote arch/*/include/generated/uapi in the search path. Under normal circumstances, there is no overlap between this uapi subdirectory and its parent, so the include choices remain the same. We keep arch/*/include/generated/uapi in the USERINCLUDE variable so that it is usable standalone. Note that we cannot completely swap the order of the uapi and kernel-only directories, since the headers in include/uapi/asm-generic are meant to be wrapped by their include/asm-generic counterparts when building kernel code. Reported-by: "Nicholas A. Bellinger" Reported-by: David Drysdale Signed-off-by: Michal Marek Signed-off-by: Linus Torvalds diff --git a/Makefile b/Makefile index eb4eca5..87f0c05 100644 --- a/Makefile +++ b/Makefile @@ -391,6 +391,7 @@ USERINCLUDE := \ # Needed to be compatible with the O= option LINUXINCLUDE := \ -I$(srctree)/arch/$(hdr-arch)/include \ + -Iarch/$(hdr-arch)/include/generated/uapi \ -Iarch/$(hdr-arch)/include/generated \ $(if $(KBUILD_SRC), -I$(srctree)/include) \ -Iinclude \ -- cgit v0.10.2 From eb4f73b4ca6c04f31af6f1ff1bf11b5020a1216f Mon Sep 17 00:00:00 2001 From: Joseph Qi Date: Thu, 8 Jan 2015 14:32:09 -0800 Subject: ocfs2: remove bogus check in dlm_process_recovery_data In dlm_process_recovery_data, only when dlm_new_lock failed the ret will be set to -ENOMEM. And in this case, newlock is definitely NULL. So test newlock is meaningless, remove it. Signed-off-by: Joseph Qi Reviewed-by: Alex Chen Reviewed-by: Mark Fasheh Cc: Joel Becker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index 79b5af5..cecd875 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c @@ -2023,11 +2023,8 @@ leave: dlm_lockres_drop_inflight_ref(dlm, res); spin_unlock(&res->spinlock); - if (ret < 0) { + if (ret < 0) mlog_errno(ret); - if (newlock) - dlm_lock_put(newlock); - } return ret; } -- cgit v0.10.2 From 3245d6acab981a2388ffb877c7ecc97e763c59d4 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 8 Jan 2015 14:32:12 -0800 Subject: exit: fix race between wait_consider_task() and wait_task_zombie() wait_consider_task() checks EXIT_ZOMBIE after EXIT_DEAD/EXIT_TRACE and both checks can fail if we race with EXIT_ZOMBIE -> EXIT_DEAD/EXIT_TRACE change in between, gcc needs to reload p->exit_state after security_task_wait(). In this case ->notask_error will be wrongly cleared and do_wait() can hang forever if it was the last eligible child. Many thanks to Arne who carefully investigated the problem. Note: this bug is very old but it was pure theoretical until commit b3ab03160dfa ("wait: completely ignore the EXIT_DEAD tasks"). Before this commit "-O2" was probably enough to guarantee that compiler won't read ->exit_state twice. Signed-off-by: Oleg Nesterov Reported-by: Arne Goedeke Tested-by: Arne Goedeke Cc: [3.15+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/kernel/exit.c b/kernel/exit.c index 1ea4369..6806c55 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -1287,9 +1287,15 @@ static int wait_task_continued(struct wait_opts *wo, struct task_struct *p) static int wait_consider_task(struct wait_opts *wo, int ptrace, struct task_struct *p) { + /* + * We can race with wait_task_zombie() from another thread. + * Ensure that EXIT_ZOMBIE -> EXIT_DEAD/EXIT_TRACE transition + * can't confuse the checks below. + */ + int exit_state = ACCESS_ONCE(p->exit_state); int ret; - if (unlikely(p->exit_state == EXIT_DEAD)) + if (unlikely(exit_state == EXIT_DEAD)) return 0; ret = eligible_child(wo, p); @@ -1310,7 +1316,7 @@ static int wait_consider_task(struct wait_opts *wo, int ptrace, return 0; } - if (unlikely(p->exit_state == EXIT_TRACE)) { + if (unlikely(exit_state == EXIT_TRACE)) { /* * ptrace == 0 means we are the natural parent. In this case * we should clear notask_error, debugger will notify us. @@ -1337,7 +1343,7 @@ static int wait_consider_task(struct wait_opts *wo, int ptrace, } /* slay zombie? */ - if (p->exit_state == EXIT_ZOMBIE) { + if (exit_state == EXIT_ZOMBIE) { /* we don't reap group leaders with subthreads */ if (!delay_group_leader(p)) { /* -- cgit v0.10.2 From 7a3ef208e662f4b63d43a23f61a64a129c525bbc Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Thu, 8 Jan 2015 14:32:15 -0800 Subject: mm: prevent endless growth of anon_vma hierarchy Constantly forking task causes unlimited grow of anon_vma chain. Each next child allocates new level of anon_vmas and links vma to all previous levels because pages might be inherited from any level. This patch adds heuristic which decides to reuse existing anon_vma instead of forking new one. It adds counter anon_vma->degree which counts linked vmas and directly descending anon_vmas and reuses anon_vma if counter is lower than two. As a result each anon_vma has either vma or at least two descending anon_vmas. In such trees half of nodes are leafs with alive vmas, thus count of anon_vmas is no more than two times bigger than count of vmas. This heuristic reuses anon_vmas as few as possible because each reuse adds false aliasing among vmas and rmap walker ought to scan more ptes when it searches where page is might be mapped. Link: http://lkml.kernel.org/r/20120816024610.GA5350@evergreen.ssec.wisc.edu Fixes: 5beb49305251 ("mm: change anon_vma linking to fix multi-process server scalability issue") [akpm@linux-foundation.org: fix typo, per Rik] Signed-off-by: Konstantin Khlebnikov Reported-by: Daniel Forrest Tested-by: Michal Hocko Tested-by: Jerome Marchand Reviewed-by: Michal Hocko Reviewed-by: Rik van Riel Cc: [2.6.34+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/linux/rmap.h b/include/linux/rmap.h index c0c2bce..d9d7e7e 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -37,6 +37,16 @@ struct anon_vma { atomic_t refcount; /* + * Count of child anon_vmas and VMAs which points to this anon_vma. + * + * This counter is used for making decision about reusing anon_vma + * instead of forking new one. See comments in function anon_vma_clone. + */ + unsigned degree; + + struct anon_vma *parent; /* Parent of this anon_vma */ + + /* * NOTE: the LSB of the rb_root.rb_node is set by * mm_take_all_locks() _after_ taking the above lock. So the * rb_root must only be read/written after taking the above lock diff --git a/mm/rmap.c b/mm/rmap.c index c5bc241..71cd5bd 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -72,6 +72,8 @@ static inline struct anon_vma *anon_vma_alloc(void) anon_vma = kmem_cache_alloc(anon_vma_cachep, GFP_KERNEL); if (anon_vma) { atomic_set(&anon_vma->refcount, 1); + anon_vma->degree = 1; /* Reference for first vma */ + anon_vma->parent = anon_vma; /* * Initialise the anon_vma root to point to itself. If called * from fork, the root will be reset to the parents anon_vma. @@ -188,6 +190,8 @@ int anon_vma_prepare(struct vm_area_struct *vma) if (likely(!vma->anon_vma)) { vma->anon_vma = anon_vma; anon_vma_chain_link(vma, avc, anon_vma); + /* vma reference or self-parent link for new root */ + anon_vma->degree++; allocated = NULL; avc = NULL; } @@ -236,6 +240,14 @@ static inline void unlock_anon_vma_root(struct anon_vma *root) /* * Attach the anon_vmas from src to dst. * Returns 0 on success, -ENOMEM on failure. + * + * If dst->anon_vma is NULL this function tries to find and reuse existing + * anon_vma which has no vmas and only one child anon_vma. This prevents + * degradation of anon_vma hierarchy to endless linear chain in case of + * constantly forking task. On the other hand, an anon_vma with more than one + * child isn't reused even if there was no alive vma, thus rmap walker has a + * good chance of avoiding scanning the whole hierarchy when it searches where + * page is mapped. */ int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src) { @@ -256,7 +268,21 @@ int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src) anon_vma = pavc->anon_vma; root = lock_anon_vma_root(root, anon_vma); anon_vma_chain_link(dst, avc, anon_vma); + + /* + * Reuse existing anon_vma if its degree lower than two, + * that means it has no vma and only one anon_vma child. + * + * Do not chose parent anon_vma, otherwise first child + * will always reuse it. Root anon_vma is never reused: + * it has self-parent reference and at least one child. + */ + if (!dst->anon_vma && anon_vma != src->anon_vma && + anon_vma->degree < 2) + dst->anon_vma = anon_vma; } + if (dst->anon_vma) + dst->anon_vma->degree++; unlock_anon_vma_root(root); return 0; @@ -280,6 +306,9 @@ int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma) if (!pvma->anon_vma) return 0; + /* Drop inherited anon_vma, we'll reuse existing or allocate new. */ + vma->anon_vma = NULL; + /* * First, attach the new VMA to the parent VMA's anon_vmas, * so rmap can find non-COWed pages in child processes. @@ -288,6 +317,10 @@ int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma) if (error) return error; + /* An existing anon_vma has been reused, all done then. */ + if (vma->anon_vma) + return 0; + /* Then add our own anon_vma. */ anon_vma = anon_vma_alloc(); if (!anon_vma) @@ -301,6 +334,7 @@ int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma) * lock any of the anon_vmas in this anon_vma tree. */ anon_vma->root = pvma->anon_vma->root; + anon_vma->parent = pvma->anon_vma; /* * With refcounts, an anon_vma can stay around longer than the * process it belongs to. The root anon_vma needs to be pinned until @@ -311,6 +345,7 @@ int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma) vma->anon_vma = anon_vma; anon_vma_lock_write(anon_vma); anon_vma_chain_link(vma, avc, anon_vma); + anon_vma->parent->degree++; anon_vma_unlock_write(anon_vma); return 0; @@ -341,12 +376,16 @@ void unlink_anon_vmas(struct vm_area_struct *vma) * Leave empty anon_vmas on the list - we'll need * to free them outside the lock. */ - if (RB_EMPTY_ROOT(&anon_vma->rb_root)) + if (RB_EMPTY_ROOT(&anon_vma->rb_root)) { + anon_vma->parent->degree--; continue; + } list_del(&avc->same_vma); anon_vma_chain_free(avc); } + if (vma->anon_vma) + vma->anon_vma->degree--; unlock_anon_vma_root(root); /* @@ -357,6 +396,7 @@ void unlink_anon_vmas(struct vm_area_struct *vma) list_for_each_entry_safe(avc, next, &vma->anon_vma_chain, same_vma) { struct anon_vma *anon_vma = avc->anon_vma; + BUG_ON(anon_vma->degree); put_anon_vma(anon_vma); list_del(&avc->same_vma); -- cgit v0.10.2 From 2d6d7f98284648c5ed113fe22a132148950b140f Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Thu, 8 Jan 2015 14:32:18 -0800 Subject: mm: protect set_page_dirty() from ongoing truncation Tejun, while reviewing the code, spotted the following race condition between the dirtying and truncation of a page: __set_page_dirty_nobuffers() __delete_from_page_cache() if (TestSetPageDirty(page)) page->mapping = NULL if (PageDirty()) dec_zone_page_state(page, NR_FILE_DIRTY); dec_bdi_stat(mapping->backing_dev_info, BDI_RECLAIMABLE); if (page->mapping) account_page_dirtied(page) __inc_zone_page_state(page, NR_FILE_DIRTY); __inc_bdi_stat(mapping->backing_dev_info, BDI_RECLAIMABLE); which results in an imbalance of NR_FILE_DIRTY and BDI_RECLAIMABLE. Dirtiers usually lock out truncation, either by holding the page lock directly, or in case of zap_pte_range(), by pinning the mapcount with the page table lock held. The notable exception to this rule, though, is do_wp_page(), for which this race exists. However, do_wp_page() already waits for a locked page to unlock before setting the dirty bit, in order to prevent a race where clear_page_dirty() misses the page bit in the presence of dirty ptes. Upgrade that wait to a fully locked set_page_dirty() to also cover the situation explained above. Afterwards, the code in set_page_dirty() dealing with a truncation race is no longer needed. Remove it. Reported-by: Tejun Heo Signed-off-by: Johannes Weiner Acked-by: Kirill A. Shutemov Reviewed-by: Jan Kara Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/linux/writeback.h b/include/linux/writeback.h index a219be9..0004833 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -177,7 +177,6 @@ int write_cache_pages(struct address_space *mapping, struct writeback_control *wbc, writepage_t writepage, void *data); int do_writepages(struct address_space *mapping, struct writeback_control *wbc); -void set_page_dirty_balance(struct page *page); void writeback_set_ratelimit(void); void tag_pages_for_writeback(struct address_space *mapping, pgoff_t start, pgoff_t end); diff --git a/mm/memory.c b/mm/memory.c index d7e497e..c6565f0 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2137,17 +2137,24 @@ reuse: if (!dirty_page) return ret; - /* - * Yes, Virginia, this is actually required to prevent a race - * with clear_page_dirty_for_io() from clearing the page dirty - * bit after it clear all dirty ptes, but before a racing - * do_wp_page installs a dirty pte. - * - * do_shared_fault is protected similarly. - */ if (!page_mkwrite) { - wait_on_page_locked(dirty_page); - set_page_dirty_balance(dirty_page); + struct address_space *mapping; + int dirtied; + + lock_page(dirty_page); + dirtied = set_page_dirty(dirty_page); + VM_BUG_ON_PAGE(PageAnon(dirty_page), dirty_page); + mapping = dirty_page->mapping; + unlock_page(dirty_page); + + if (dirtied && mapping) { + /* + * Some device drivers do not set page.mapping + * but still dirty their pages + */ + balance_dirty_pages_ratelimited(mapping); + } + /* file_update_time outside page_lock */ if (vma->vm_file) file_update_time(vma->vm_file); diff --git a/mm/page-writeback.c b/mm/page-writeback.c index d5d81f5..6f43352 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -1541,16 +1541,6 @@ pause: bdi_start_background_writeback(bdi); } -void set_page_dirty_balance(struct page *page) -{ - if (set_page_dirty(page)) { - struct address_space *mapping = page_mapping(page); - - if (mapping) - balance_dirty_pages_ratelimited(mapping); - } -} - static DEFINE_PER_CPU(int, bdp_ratelimits); /* @@ -2123,32 +2113,25 @@ EXPORT_SYMBOL(account_page_dirtied); * page dirty in that case, but not all the buffers. This is a "bottom-up" * dirtying, whereas __set_page_dirty_buffers() is a "top-down" dirtying. * - * Most callers have locked the page, which pins the address_space in memory. - * But zap_pte_range() does not lock the page, however in that case the - * mapping is pinned by the vma's ->vm_file reference. - * - * We take care to handle the case where the page was truncated from the - * mapping by re-checking page_mapping() inside tree_lock. + * The caller must ensure this doesn't race with truncation. Most will simply + * hold the page lock, but e.g. zap_pte_range() calls with the page mapped and + * the pte lock held, which also locks out truncation. */ int __set_page_dirty_nobuffers(struct page *page) { if (!TestSetPageDirty(page)) { struct address_space *mapping = page_mapping(page); - struct address_space *mapping2; unsigned long flags; if (!mapping) return 1; spin_lock_irqsave(&mapping->tree_lock, flags); - mapping2 = page_mapping(page); - if (mapping2) { /* Race with truncate? */ - BUG_ON(mapping2 != mapping); - WARN_ON_ONCE(!PagePrivate(page) && !PageUptodate(page)); - account_page_dirtied(page, mapping); - radix_tree_tag_set(&mapping->page_tree, - page_index(page), PAGECACHE_TAG_DIRTY); - } + BUG_ON(page_mapping(page) != mapping); + WARN_ON_ONCE(!PagePrivate(page) && !PageUptodate(page)); + account_page_dirtied(page, mapping); + radix_tree_tag_set(&mapping->page_tree, page_index(page), + PAGECACHE_TAG_DIRTY); spin_unlock_irqrestore(&mapping->tree_lock, flags); if (mapping->host) { /* !PageAnon && !swapper_space */ @@ -2305,12 +2288,10 @@ int clear_page_dirty_for_io(struct page *page) /* * We carefully synchronise fault handlers against * installing a dirty pte and marking the page dirty - * at this point. We do this by having them hold the - * page lock at some point after installing their - * pte, but before marking the page dirty. - * Pages are always locked coming in here, so we get - * the desired exclusion. See mm/memory.c:do_wp_page() - * for more comments. + * at this point. We do this by having them hold the + * page lock while dirtying the page, and pages are + * always locked coming in here, so we get the desired + * exclusion. */ if (TestClearPageDirty(page)) { dec_zone_page_state(page, NR_FILE_DIRTY); -- cgit v0.10.2 From 75dd112aac25713cd686cb4bfa78cf907519c504 Mon Sep 17 00:00:00 2001 From: Henrik Rydberg Date: Thu, 8 Jan 2015 14:32:21 -0800 Subject: MAINTAINERS: update rydberg's addresses My ISP finally gave up on the old mail address, so I am moving things over to bitmath.org instead. Also change the status fields to better reflect reality. Signed-off-by: Henrik Rydberg Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/.mailmap b/.mailmap index ada8ad6..d357e1b 100644 --- a/.mailmap +++ b/.mailmap @@ -51,6 +51,7 @@ Greg Kroah-Hartman Greg Kroah-Hartman Henk Vergonet Henrik Kretzschmar +Henrik Rydberg Herbert Xu Jacob Shin James Bottomley diff --git a/MAINTAINERS b/MAINTAINERS index ddb9ac8..79b2e4b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -724,15 +724,15 @@ F: include/uapi/linux/apm_bios.h F: drivers/char/apm-emulation.c APPLE BCM5974 MULTITOUCH DRIVER -M: Henrik Rydberg +M: Henrik Rydberg L: linux-input@vger.kernel.org -S: Maintained +S: Odd fixes F: drivers/input/mouse/bcm5974.c APPLE SMC DRIVER -M: Henrik Rydberg +M: Henrik Rydberg L: lm-sensors@lm-sensors.org -S: Maintained +S: Odd fixes F: drivers/hwmon/applesmc.c APPLETALK NETWORK LAYER @@ -4940,10 +4940,10 @@ F: include/uapi/linux/input.h F: include/linux/input/ INPUT MULTITOUCH (MT) PROTOCOL -M: Henrik Rydberg +M: Henrik Rydberg L: linux-input@vger.kernel.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/rydberg/input-mt.git -S: Maintained +S: Odd fixes F: Documentation/input/multi-touch-protocol.txt F: drivers/input/input-mt.c K: \b(ABS|SYN)_MT_ -- cgit v0.10.2 From 53dc20b9a3d928b0744dad5aee65b610de1cc85d Mon Sep 17 00:00:00 2001 From: Xue jiufei Date: Thu, 8 Jan 2015 14:32:23 -0800 Subject: ocfs2: fix the wrong directory passed to ocfs2_lookup_ino_from_name() when link file In ocfs2_link(), the parent directory inode passed to function ocfs2_lookup_ino_from_name() is wrong. Parameter dir is the parent of new_dentry not old_dentry. We should get old_dir from old_dentry and lookup old_dentry in old_dir in case another node remove the old dentry. With this change, hard linking works again, when paths are relative with at least one subdirectory. This is how the problem was reproducable: # mkdir a # mkdir b # touch a/test # ln a/test b/test ln: failed to create hard link `b/test' => `a/test': No such file or directory However when creating links in the same dir, it worked well. Now the link gets created. Fixes: 0e048316ff57 ("ocfs2: check existence of old dentry in ocfs2_link()") Signed-off-by: joyce.xue Reported-by: Szabo Aron - UBIT Cc: Mark Fasheh Cc: Joel Becker Tested-by: Aron Szabo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index b931e04..914c121 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -94,6 +94,14 @@ static int ocfs2_create_symlink_data(struct ocfs2_super *osb, struct inode *inode, const char *symname); +static int ocfs2_double_lock(struct ocfs2_super *osb, + struct buffer_head **bh1, + struct inode *inode1, + struct buffer_head **bh2, + struct inode *inode2, + int rename); + +static void ocfs2_double_unlock(struct inode *inode1, struct inode *inode2); /* An orphan dir name is an 8 byte value, printed as a hex string */ #define OCFS2_ORPHAN_NAMELEN ((int)(2 * sizeof(u64))) @@ -678,8 +686,10 @@ static int ocfs2_link(struct dentry *old_dentry, { handle_t *handle; struct inode *inode = old_dentry->d_inode; + struct inode *old_dir = old_dentry->d_parent->d_inode; int err; struct buffer_head *fe_bh = NULL; + struct buffer_head *old_dir_bh = NULL; struct buffer_head *parent_fe_bh = NULL; struct ocfs2_dinode *fe = NULL; struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); @@ -696,19 +706,33 @@ static int ocfs2_link(struct dentry *old_dentry, dquot_initialize(dir); - err = ocfs2_inode_lock_nested(dir, &parent_fe_bh, 1, OI_LS_PARENT); + err = ocfs2_double_lock(osb, &old_dir_bh, old_dir, + &parent_fe_bh, dir, 0); if (err < 0) { if (err != -ENOENT) mlog_errno(err); return err; } + /* make sure both dirs have bhs + * get an extra ref on old_dir_bh if old==new */ + if (!parent_fe_bh) { + if (old_dir_bh) { + parent_fe_bh = old_dir_bh; + get_bh(parent_fe_bh); + } else { + mlog(ML_ERROR, "%s: no old_dir_bh!\n", osb->uuid_str); + err = -EIO; + goto out; + } + } + if (!dir->i_nlink) { err = -ENOENT; goto out; } - err = ocfs2_lookup_ino_from_name(dir, old_dentry->d_name.name, + err = ocfs2_lookup_ino_from_name(old_dir, old_dentry->d_name.name, old_dentry->d_name.len, &old_de_ino); if (err) { err = -ENOENT; @@ -801,10 +825,11 @@ out_unlock_inode: ocfs2_inode_unlock(inode, 1); out: - ocfs2_inode_unlock(dir, 1); + ocfs2_double_unlock(old_dir, dir); brelse(fe_bh); brelse(parent_fe_bh); + brelse(old_dir_bh); ocfs2_free_dir_lookup_result(&lookup); @@ -1072,14 +1097,15 @@ static int ocfs2_check_if_ancestor(struct ocfs2_super *osb, } /* - * The only place this should be used is rename! + * The only place this should be used is rename and link! * if they have the same id, then the 1st one is the only one locked. */ static int ocfs2_double_lock(struct ocfs2_super *osb, struct buffer_head **bh1, struct inode *inode1, struct buffer_head **bh2, - struct inode *inode2) + struct inode *inode2, + int rename) { int status; int inode1_is_ancestor, inode2_is_ancestor; @@ -1127,7 +1153,7 @@ static int ocfs2_double_lock(struct ocfs2_super *osb, } /* lock id2 */ status = ocfs2_inode_lock_nested(inode2, bh2, 1, - OI_LS_RENAME1); + rename == 1 ? OI_LS_RENAME1 : OI_LS_PARENT); if (status < 0) { if (status != -ENOENT) mlog_errno(status); @@ -1136,7 +1162,8 @@ static int ocfs2_double_lock(struct ocfs2_super *osb, } /* lock id1 */ - status = ocfs2_inode_lock_nested(inode1, bh1, 1, OI_LS_RENAME2); + status = ocfs2_inode_lock_nested(inode1, bh1, 1, + rename == 1 ? OI_LS_RENAME2 : OI_LS_PARENT); if (status < 0) { /* * An error return must mean that no cluster locks @@ -1252,7 +1279,7 @@ static int ocfs2_rename(struct inode *old_dir, /* if old and new are the same, this'll just do one lock. */ status = ocfs2_double_lock(osb, &old_dir_bh, old_dir, - &new_dir_bh, new_dir); + &new_dir_bh, new_dir, 1); if (status < 0) { mlog_errno(status); goto bail; -- cgit v0.10.2 From 9de93e7873f5f6c4d0768649d404703a62a51610 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 8 Jan 2015 14:32:26 -0800 Subject: arch/blackfin/mach-bf533/boards/stamp.c: add linux/delay.h build error arch/blackfin/mach-bf533/boards/stamp.c:834:2: error: implicit declaration of function 'mdelay' Signed-off-by: Oleg Nesterov Reported-by: Wu Fengguang Acked-by: Mike Frysinger Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/blackfin/mach-bf533/boards/stamp.c b/arch/blackfin/mach-bf533/boards/stamp.c index 6f4bac9..23eada7 100644 --- a/arch/blackfin/mach-bf533/boards/stamp.c +++ b/arch/blackfin/mach-bf533/boards/stamp.c @@ -7,6 +7,7 @@ */ #include +#include #include #include #include -- cgit v0.10.2 From 75069f2b5bfb5164beafaf3da597279c25b5535a Mon Sep 17 00:00:00 2001 From: David Drysdale Date: Thu, 8 Jan 2015 14:32:29 -0800 Subject: vfs: renumber FMODE_NONOTIFY and add to uniqueness check Fix clashing values for O_PATH and FMODE_NONOTIFY on sparc. The clashing O_PATH value was added in commit 5229645bdc35 ("vfs: add nonconflicting values for O_PATH") but this can't be changed as it is user-visible. FMODE_NONOTIFY is only used internally in the kernel, but it is in the same numbering space as the other O_* flags, as indicated by the comment at the top of include/uapi/asm-generic/fcntl.h (and its use in fs/notify/fanotify/fanotify_user.c). So renumber it to avoid the clash. All of this has happened before (commit 12ed2e36c98a: "fanotify: FMODE_NONOTIFY and __O_SYNC in sparc conflict"), and all of this will happen again -- so update the uniqueness check in fcntl_init() to include __FMODE_NONOTIFY. Signed-off-by: David Drysdale Acked-by: David S. Miller Acked-by: Jan Kara Cc: Heinrich Schuchardt Cc: Alexander Viro Cc: Arnd Bergmann Cc: Stephen Rothwell Cc: Eric Paris Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/fcntl.c b/fs/fcntl.c index 99d440a..ee85cd4 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -740,14 +740,15 @@ static int __init fcntl_init(void) * Exceptions: O_NONBLOCK is a two bit define on parisc; O_NDELAY * is defined as O_NONBLOCK on some platforms and not on others. */ - BUILD_BUG_ON(20 - 1 /* for O_RDONLY being 0 */ != HWEIGHT32( + BUILD_BUG_ON(21 - 1 /* for O_RDONLY being 0 */ != HWEIGHT32( O_RDONLY | O_WRONLY | O_RDWR | O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC | O_APPEND | /* O_NONBLOCK | */ __O_SYNC | O_DSYNC | FASYNC | O_DIRECT | O_LARGEFILE | O_DIRECTORY | O_NOFOLLOW | O_NOATIME | O_CLOEXEC | - __FMODE_EXEC | O_PATH | __O_TMPFILE + __FMODE_EXEC | O_PATH | __O_TMPFILE | + __FMODE_NONOTIFY )); fasync_cache = kmem_cache_create("fasync_cache", diff --git a/include/linux/fs.h b/include/linux/fs.h index f90c028..42efe13 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -135,7 +135,7 @@ typedef void (dio_iodone_t)(struct kiocb *iocb, loff_t offset, #define FMODE_CAN_WRITE ((__force fmode_t)0x40000) /* File was opened by fanotify and shouldn't generate fanotify events */ -#define FMODE_NONOTIFY ((__force fmode_t)0x1000000) +#define FMODE_NONOTIFY ((__force fmode_t)0x4000000) /* * Flag for rw_copy_check_uvector and compat_rw_copy_check_uvector diff --git a/include/uapi/asm-generic/fcntl.h b/include/uapi/asm-generic/fcntl.h index 7543b3e..e063eff 100644 --- a/include/uapi/asm-generic/fcntl.h +++ b/include/uapi/asm-generic/fcntl.h @@ -5,7 +5,7 @@ /* * FMODE_EXEC is 0x20 - * FMODE_NONOTIFY is 0x1000000 + * FMODE_NONOTIFY is 0x4000000 * These cannot be used by userspace O_* until internal and external open * flags are split. * -Eric Paris -- cgit v0.10.2 From 70ecb3cb033f37fd5024448f809ea028fb85c28b Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Thu, 8 Jan 2015 14:32:32 -0800 Subject: mm/debug_pagealloc: remove obsolete Kconfig options These are obsolete since commit e30825f1869a ("mm/debug-pagealloc: prepare boottime configurable") was merged. So remove them. [pebolle@tiscali.nl: find obsolete Kconfig options] Signed-off-by: Joonsoo Kim Cc: Paul Bolle Cc: Mel Gorman Cc: Johannes Weiner Cc: Minchan Kim Cc: Dave Hansen Cc: Michal Nazarewicz Cc: Jungsoo Son Acked-by: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/Kconfig.debug b/mm/Kconfig.debug index 56badfc..957d3da 100644 --- a/mm/Kconfig.debug +++ b/mm/Kconfig.debug @@ -14,7 +14,6 @@ config DEBUG_PAGEALLOC depends on !KMEMCHECK select PAGE_EXTENSION select PAGE_POISONING if !ARCH_SUPPORTS_DEBUG_PAGEALLOC - select PAGE_GUARD if ARCH_SUPPORTS_DEBUG_PAGEALLOC ---help--- Unmap pages from the kernel linear mapping after free_pages(). This results in a large slowdown, but helps to find certain types @@ -27,13 +26,5 @@ config DEBUG_PAGEALLOC that would result in incorrect warnings of memory corruption after a resume because free pages are not saved to the suspend image. -config WANT_PAGE_DEBUG_FLAGS - bool - config PAGE_POISONING bool - select WANT_PAGE_DEBUG_FLAGS - -config PAGE_GUARD - bool - select WANT_PAGE_DEBUG_FLAGS -- cgit v0.10.2 From 24d404dc10b903da271e943a0f6b032dcbd177d8 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Thu, 8 Jan 2015 14:32:35 -0800 Subject: mm: memcontrol: switch soft limit default back to infinity Commit 3e32cb2e0a12 ("mm: memcontrol: lockless page counters") accidentally switched the soft limit default from infinity to zero, which turns all memcgs with even a single page into soft limit excessors and engages soft limit reclaim on all of them during global memory pressure. This makes global reclaim generally more aggressive, but also inverts the meaning of existing soft limit configurations where unset soft limits are usually more generous than set ones. Signed-off-by: Johannes Weiner Acked-by: Michal Hocko Acked-by: Vladimir Davydov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/memcontrol.c b/mm/memcontrol.c index ef91e85..b7104a5 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -4679,6 +4679,7 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) if (parent_css == NULL) { root_mem_cgroup = memcg; page_counter_init(&memcg->memory, NULL); + memcg->soft_limit = PAGE_COUNTER_MAX; page_counter_init(&memcg->memsw, NULL); page_counter_init(&memcg->kmem, NULL); } @@ -4724,6 +4725,7 @@ mem_cgroup_css_online(struct cgroup_subsys_state *css) if (parent->use_hierarchy) { page_counter_init(&memcg->memory, &parent->memory); + memcg->soft_limit = PAGE_COUNTER_MAX; page_counter_init(&memcg->memsw, &parent->memsw); page_counter_init(&memcg->kmem, &parent->kmem); @@ -4733,6 +4735,7 @@ mem_cgroup_css_online(struct cgroup_subsys_state *css) */ } else { page_counter_init(&memcg->memory, NULL); + memcg->soft_limit = PAGE_COUNTER_MAX; page_counter_init(&memcg->memsw, NULL); page_counter_init(&memcg->kmem, NULL); /* @@ -4807,7 +4810,7 @@ static void mem_cgroup_css_reset(struct cgroup_subsys_state *css) mem_cgroup_resize_limit(memcg, PAGE_COUNTER_MAX); mem_cgroup_resize_memsw_limit(memcg, PAGE_COUNTER_MAX); memcg_update_kmem_limit(memcg, PAGE_COUNTER_MAX); - memcg->soft_limit = 0; + memcg->soft_limit = PAGE_COUNTER_MAX; } #ifdef CONFIG_MMU -- cgit v0.10.2 From 4bdfc1c4a943ce8707675ad510ea1076c9e8e528 Mon Sep 17 00:00:00 2001 From: Vladimir Davydov Date: Thu, 8 Jan 2015 14:32:37 -0800 Subject: memcg: fix destination cgroup leak on task charges migration We are supposed to take one css reference per each memory page and per each swap entry accounted to a memory cgroup. However, during task charges migration we take a reference to the destination cgroup twice per each swap entry: first in mem_cgroup_do_precharge()->try_charge() and then in mem_cgroup_move_swap_account(), permanently leaking the destination cgroup. The hunk taking the second reference seems to be a leftover from the pre-00501b531c472 ("mm: memcontrol: rewrite charge API") era. Remove it to fix the leak. Fixes: e8ea14cc6ead (mm: memcontrol: take a css reference for each charged page) Signed-off-by: Vladimir Davydov Cc: Johannes Weiner Acked-by: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/memcontrol.c b/mm/memcontrol.c index b7104a5..851924f 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3043,18 +3043,6 @@ static int mem_cgroup_move_swap_account(swp_entry_t entry, if (swap_cgroup_cmpxchg(entry, old_id, new_id) == old_id) { mem_cgroup_swap_statistics(from, false); mem_cgroup_swap_statistics(to, true); - /* - * This function is only called from task migration context now. - * It postpones page_counter and refcount handling till the end - * of task migration(mem_cgroup_clear_mc()) for performance - * improvement. But we cannot postpone css_get(to) because if - * the process that has been moved to @to does swap-in, the - * refcount of @to might be decreased to 0. - * - * We are in attach() phase, so the cgroup is guaranteed to be - * alive, so we can just call css_get(). - */ - css_get(&to->css); return 0; } return -EINVAL; -- cgit v0.10.2 From 9e5e3661727eaf960d3480213f8e87c8d67b6956 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Thu, 8 Jan 2015 14:32:40 -0800 Subject: mm, vmscan: prevent kswapd livelock due to pfmemalloc-throttled process being killed Charles Shirron and Paul Cassella from Cray Inc have reported kswapd stuck in a busy loop with nothing left to balance, but kswapd_try_to_sleep() failing to sleep. Their analysis found the cause to be a combination of several factors: 1. A process is waiting in throttle_direct_reclaim() on pgdat->pfmemalloc_wait 2. The process has been killed (by OOM in this case), but has not yet been scheduled to remove itself from the waitqueue and die. 3. kswapd checks for throttled processes in prepare_kswapd_sleep(): if (waitqueue_active(&pgdat->pfmemalloc_wait)) { wake_up(&pgdat->pfmemalloc_wait); return false; // kswapd will not go to sleep } However, for a process that was already killed, wake_up() does not remove the process from the waitqueue, since try_to_wake_up() checks its state first and returns false when the process is no longer waiting. 4. kswapd is running on the same CPU as the only CPU that the process is allowed to run on (through cpus_allowed, or possibly single-cpu system). 5. CONFIG_PREEMPT_NONE=y kernel is used. If there's nothing to balance, kswapd encounters no voluntary preemption points and repeatedly fails prepare_kswapd_sleep(), blocking the process from running and removing itself from the waitqueue, which would let kswapd sleep. So, the source of the problem is that we prevent kswapd from going to sleep until there are processes waiting on the pfmemalloc_wait queue, and a process waiting on a queue is guaranteed to be removed from the queue only when it gets scheduled. This was done to make sure that no process is left sleeping on pfmemalloc_wait when kswapd itself goes to sleep. However, it isn't necessary to postpone kswapd sleep until the pfmemalloc_wait queue actually empties. To prevent processes from being left sleeping, it's actually enough to guarantee that all processes waiting on pfmemalloc_wait queue have been woken up by the time we put kswapd to sleep. This patch therefore fixes this issue by substituting 'wake_up' with 'wake_up_all' and removing 'return false' in the code snippet from prepare_kswapd_sleep() above. Note that if any process puts itself in the queue after this waitqueue_active() check, or after the wake up itself, it means that the process will also wake up kswapd - and since we are under prepare_to_wait(), the wake up won't be missed. Also we update the comment prepare_kswapd_sleep() to hopefully more clearly describe the races it is preventing. Fixes: 5515061d22f0 ("mm: throttle direct reclaimers if PF_MEMALLOC reserves are low and swap is backed by network storage") Signed-off-by: Vlastimil Babka Signed-off-by: Vladimir Davydov Cc: Mel Gorman Cc: Johannes Weiner Acked-by: Michal Hocko Acked-by: Rik van Riel Cc: [3.6+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/vmscan.c b/mm/vmscan.c index bd9a72b..ab2505c 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2921,18 +2921,20 @@ static bool prepare_kswapd_sleep(pg_data_t *pgdat, int order, long remaining, return false; /* - * There is a potential race between when kswapd checks its watermarks - * and a process gets throttled. There is also a potential race if - * processes get throttled, kswapd wakes, a large process exits therby - * balancing the zones that causes kswapd to miss a wakeup. If kswapd - * is going to sleep, no process should be sleeping on pfmemalloc_wait - * so wake them now if necessary. If necessary, processes will wake - * kswapd and get throttled again + * The throttled processes are normally woken up in balance_pgdat() as + * soon as pfmemalloc_watermark_ok() is true. But there is a potential + * race between when kswapd checks the watermarks and a process gets + * throttled. There is also a potential race if processes get + * throttled, kswapd wakes, a large process exits thereby balancing the + * zones, which causes kswapd to exit balance_pgdat() before reaching + * the wake up checks. If kswapd is going to sleep, no process should + * be sleeping on pfmemalloc_wait, so wake them now if necessary. If + * the wake up is premature, processes will wake kswapd and get + * throttled again. The difference from wake ups in balance_pgdat() is + * that here we are under prepare_to_wait(). */ - if (waitqueue_active(&pgdat->pfmemalloc_wait)) { - wake_up(&pgdat->pfmemalloc_wait); - return false; - } + if (waitqueue_active(&pgdat->pfmemalloc_wait)) + wake_up_all(&pgdat->pfmemalloc_wait); return pgdat_balanced(pgdat, order, classzone_idx); } -- cgit v0.10.2 From 96b932b84409a5c1037827b4432a2b53adc09a61 Mon Sep 17 00:00:00 2001 From: Octavian Purdila Date: Thu, 11 Dec 2014 15:02:30 +0200 Subject: gpio: dln2: use bus_sync_unlock instead of scheduling work Use the irq_chip bus_sync_unlock method to update hardware registers instead of scheduling work from the mask/unmask methods. This simplifies a bit the driver and make it more uniform with the other GPIO IRQ drivers. Signed-off-by: Octavian Purdila Signed-off-by: Linus Walleij diff --git a/drivers/gpio/gpio-dln2.c b/drivers/gpio/gpio-dln2.c index 2ff4619..ce3c155 100644 --- a/drivers/gpio/gpio-dln2.c +++ b/drivers/gpio/gpio-dln2.c @@ -47,13 +47,6 @@ #define DLN2_GPIO_MAX_PINS 32 -struct dln2_irq_work { - struct work_struct work; - struct dln2_gpio *dln2; - int pin; - int type; -}; - struct dln2_gpio { struct platform_device *pdev; struct gpio_chip gpio; @@ -66,7 +59,10 @@ struct dln2_gpio { /* active IRQs - not synced to hardware */ DECLARE_BITMAP(unmasked_irqs, DLN2_GPIO_MAX_PINS); - struct dln2_irq_work *irq_work; + /* active IRQS - synced to hardware */ + DECLARE_BITMAP(enabled_irqs, DLN2_GPIO_MAX_PINS); + int irq_type[DLN2_GPIO_MAX_PINS]; + struct mutex irq_lock; }; struct dln2_gpio_pin { @@ -303,18 +299,6 @@ static int dln2_gpio_set_event_cfg(struct dln2_gpio *dln2, unsigned pin, &req, sizeof(req)); } -static void dln2_irq_work(struct work_struct *w) -{ - struct dln2_irq_work *iw = container_of(w, struct dln2_irq_work, work); - struct dln2_gpio *dln2 = iw->dln2; - u8 type = iw->type & DLN2_GPIO_EVENT_MASK; - - if (test_bit(iw->pin, dln2->unmasked_irqs)) - dln2_gpio_set_event_cfg(dln2, iw->pin, type, 0); - else - dln2_gpio_set_event_cfg(dln2, iw->pin, DLN2_GPIO_EVENT_NONE, 0); -} - static void dln2_irq_unmask(struct irq_data *irqd) { struct gpio_chip *gc = irq_data_get_irq_chip_data(irqd); @@ -322,7 +306,6 @@ static void dln2_irq_unmask(struct irq_data *irqd) int pin = irqd_to_hwirq(irqd); set_bit(pin, dln2->unmasked_irqs); - schedule_work(&dln2->irq_work[pin].work); } static void dln2_irq_mask(struct irq_data *irqd) @@ -332,7 +315,6 @@ static void dln2_irq_mask(struct irq_data *irqd) int pin = irqd_to_hwirq(irqd); clear_bit(pin, dln2->unmasked_irqs); - schedule_work(&dln2->irq_work[pin].work); } static int dln2_irq_set_type(struct irq_data *irqd, unsigned type) @@ -343,19 +325,19 @@ static int dln2_irq_set_type(struct irq_data *irqd, unsigned type) switch (type) { case IRQ_TYPE_LEVEL_HIGH: - dln2->irq_work[pin].type = DLN2_GPIO_EVENT_LVL_HIGH; + dln2->irq_type[pin] = DLN2_GPIO_EVENT_LVL_HIGH; break; case IRQ_TYPE_LEVEL_LOW: - dln2->irq_work[pin].type = DLN2_GPIO_EVENT_LVL_LOW; + dln2->irq_type[pin] = DLN2_GPIO_EVENT_LVL_LOW; break; case IRQ_TYPE_EDGE_BOTH: - dln2->irq_work[pin].type = DLN2_GPIO_EVENT_CHANGE; + dln2->irq_type[pin] = DLN2_GPIO_EVENT_CHANGE; break; case IRQ_TYPE_EDGE_RISING: - dln2->irq_work[pin].type = DLN2_GPIO_EVENT_CHANGE_RISING; + dln2->irq_type[pin] = DLN2_GPIO_EVENT_CHANGE_RISING; break; case IRQ_TYPE_EDGE_FALLING: - dln2->irq_work[pin].type = DLN2_GPIO_EVENT_CHANGE_FALLING; + dln2->irq_type[pin] = DLN2_GPIO_EVENT_CHANGE_FALLING; break; default: return -EINVAL; @@ -364,11 +346,50 @@ static int dln2_irq_set_type(struct irq_data *irqd, unsigned type) return 0; } +static void dln2_irq_bus_lock(struct irq_data *irqd) +{ + struct gpio_chip *gc = irq_data_get_irq_chip_data(irqd); + struct dln2_gpio *dln2 = container_of(gc, struct dln2_gpio, gpio); + + mutex_lock(&dln2->irq_lock); +} + +static void dln2_irq_bus_unlock(struct irq_data *irqd) +{ + struct gpio_chip *gc = irq_data_get_irq_chip_data(irqd); + struct dln2_gpio *dln2 = container_of(gc, struct dln2_gpio, gpio); + int pin = irqd_to_hwirq(irqd); + int enabled, unmasked; + unsigned type; + int ret; + + enabled = test_bit(pin, dln2->enabled_irqs); + unmasked = test_bit(pin, dln2->unmasked_irqs); + + if (enabled != unmasked) { + if (unmasked) { + type = dln2->irq_type[pin] & DLN2_GPIO_EVENT_MASK; + set_bit(pin, dln2->enabled_irqs); + } else { + type = DLN2_GPIO_EVENT_NONE; + clear_bit(pin, dln2->enabled_irqs); + } + + ret = dln2_gpio_set_event_cfg(dln2, pin, type, 0); + if (ret) + dev_err(dln2->gpio.dev, "failed to set event\n"); + } + + mutex_unlock(&dln2->irq_lock); +} + static struct irq_chip dln2_gpio_irqchip = { .name = "dln2-irq", .irq_mask = dln2_irq_mask, .irq_unmask = dln2_irq_unmask, .irq_set_type = dln2_irq_set_type, + .irq_bus_lock = dln2_irq_bus_lock, + .irq_bus_sync_unlock = dln2_irq_bus_unlock, }; static void dln2_gpio_event(struct platform_device *pdev, u16 echo, @@ -400,7 +421,7 @@ static void dln2_gpio_event(struct platform_device *pdev, u16 echo, return; } - switch (dln2->irq_work[pin].type) { + switch (dln2->irq_type[pin]) { case DLN2_GPIO_EVENT_CHANGE_RISING: if (event->value) generic_handle_irq(irq); @@ -419,7 +440,7 @@ static int dln2_gpio_probe(struct platform_device *pdev) struct dln2_gpio *dln2; struct device *dev = &pdev->dev; int pins; - int i, ret; + int ret; pins = dln2_gpio_get_pin_count(pdev); if (pins < 0) { @@ -435,15 +456,7 @@ static int dln2_gpio_probe(struct platform_device *pdev) if (!dln2) return -ENOMEM; - dln2->irq_work = devm_kcalloc(&pdev->dev, pins, - sizeof(struct dln2_irq_work), GFP_KERNEL); - if (!dln2->irq_work) - return -ENOMEM; - for (i = 0; i < pins; i++) { - INIT_WORK(&dln2->irq_work[i].work, dln2_irq_work); - dln2->irq_work[i].pin = i; - dln2->irq_work[i].dln2 = dln2; - } + mutex_init(&dln2->irq_lock); dln2->pdev = pdev; @@ -497,11 +510,8 @@ out: static int dln2_gpio_remove(struct platform_device *pdev) { struct dln2_gpio *dln2 = platform_get_drvdata(pdev); - int i; dln2_unregister_event_cb(pdev, DLN2_GPIO_CONDITION_MET_EV); - for (i = 0; i < dln2->gpio.ngpio; i++) - flush_work(&dln2->irq_work[i].work); gpiochip_remove(&dln2->gpio); return 0; -- cgit v0.10.2 From 0f363b250b15af0f218bb2876d101fe5cd413f8b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 16 Dec 2014 11:46:14 +0100 Subject: x86: Fix off-by-one in instruction decoder Stephane reported that the PEBS fixup was broken by the recent commit to the instruction decoder. The thing had an off-by-one which resulted in not being able to decode the last instruction and always bail. Reported-by: Stephane Eranian Fixes: 6ba48ff46f76 ("x86: Remove arbitrary instruction size limit in instruction decoder") Signed-off-by: Peter Zijlstra (Intel) Cc: stable@vger.kernel.org # 3.18 Cc: Cc: Jiri Olsa Cc: Liang Kan Cc: Arnaldo Carvalho de Melo Cc: Dave Hansen Cc: Jim Keniston Cc: Linus Torvalds Cc: Masami Hiramatsu Link: http://lkml.kernel.org/r/20141216104614.GV3337@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar diff --git a/arch/x86/lib/insn.c b/arch/x86/lib/insn.c index 2480978b..1313ae6 100644 --- a/arch/x86/lib/insn.c +++ b/arch/x86/lib/insn.c @@ -28,7 +28,7 @@ /* Verify next sizeof(t) bytes can be on the same instruction */ #define validate_next(t, insn, n) \ - ((insn)->next_byte + sizeof(t) + n < (insn)->end_kaddr) + ((insn)->next_byte + sizeof(t) + n <= (insn)->end_kaddr) #define __get_next(t, insn) \ ({ t r = *(t*)insn->next_byte; insn->next_byte += sizeof(t); r; }) -- cgit v0.10.2 From 88a7c26af8dab2f2d69f5a6067eb670694ec38c0 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Sun, 4 Jan 2015 10:36:19 -0800 Subject: perf: Move task_pt_regs sampling into arch code On x86_64, at least, task_pt_regs may be only partially initialized in many contexts, so x86_64 should not use it without extra care from interrupt context, let alone NMI context. This will allow x86_64 to override the logic and will supply some scratch space to use to make a cleaner copy of user regs. Tested-by: Jiri Olsa Signed-off-by: Andy Lutomirski Signed-off-by: Peter Zijlstra (Intel) Cc: Stephane Eranian Cc: chenggang.qcg@taobao.com Cc: Wu Fengguang Cc: Namhyung Kim Cc: Mike Galbraith Cc: Arjan van de Ven Cc: David Ahern Cc: Arnaldo Carvalho de Melo Cc: Catalin Marinas Cc: Jean Pihet Cc: Linus Torvalds Cc: Mark Salter Cc: Russell King Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/e431cd4c18c2e1c44c774f10758527fb2d1025c4.1420396372.git.luto@amacapital.net Signed-off-by: Ingo Molnar diff --git a/arch/arm/kernel/perf_regs.c b/arch/arm/kernel/perf_regs.c index 6e4379c..592dda3 100644 --- a/arch/arm/kernel/perf_regs.c +++ b/arch/arm/kernel/perf_regs.c @@ -28,3 +28,11 @@ u64 perf_reg_abi(struct task_struct *task) { return PERF_SAMPLE_REGS_ABI_32; } + +void perf_get_regs_user(struct perf_regs *regs_user, + struct pt_regs *regs, + struct pt_regs *regs_user_copy) +{ + regs_user->regs = task_pt_regs(current); + regs_user->abi = perf_reg_abi(current); +} diff --git a/arch/arm64/kernel/perf_regs.c b/arch/arm64/kernel/perf_regs.c index 6762ad7..3f62b35 100644 --- a/arch/arm64/kernel/perf_regs.c +++ b/arch/arm64/kernel/perf_regs.c @@ -50,3 +50,11 @@ u64 perf_reg_abi(struct task_struct *task) else return PERF_SAMPLE_REGS_ABI_64; } + +void perf_get_regs_user(struct perf_regs *regs_user, + struct pt_regs *regs, + struct pt_regs *regs_user_copy) +{ + regs_user->regs = task_pt_regs(current); + regs_user->abi = perf_reg_abi(current); +} diff --git a/arch/x86/kernel/perf_regs.c b/arch/x86/kernel/perf_regs.c index e309cc5..3bbbb1a 100644 --- a/arch/x86/kernel/perf_regs.c +++ b/arch/x86/kernel/perf_regs.c @@ -78,6 +78,14 @@ u64 perf_reg_abi(struct task_struct *task) { return PERF_SAMPLE_REGS_ABI_32; } + +void perf_get_regs_user(struct perf_regs *regs_user, + struct pt_regs *regs, + struct pt_regs *regs_user_copy) +{ + regs_user->regs = task_pt_regs(current); + regs_user->abi = perf_reg_abi(current); +} #else /* CONFIG_X86_64 */ #define REG_NOSUPPORT ((1ULL << PERF_REG_X86_DS) | \ (1ULL << PERF_REG_X86_ES) | \ @@ -102,4 +110,12 @@ u64 perf_reg_abi(struct task_struct *task) else return PERF_SAMPLE_REGS_ABI_64; } + +void perf_get_regs_user(struct perf_regs *regs_user, + struct pt_regs *regs, + struct pt_regs *regs_user_copy) +{ + regs_user->regs = task_pt_regs(current); + regs_user->abi = perf_reg_abi(current); +} #endif /* CONFIG_X86_32 */ diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 486e84c..4f7a61c 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -79,11 +79,6 @@ struct perf_branch_stack { struct perf_branch_entry entries[0]; }; -struct perf_regs { - __u64 abi; - struct pt_regs *regs; -}; - struct task_struct; /* @@ -610,7 +605,14 @@ struct perf_sample_data { u32 reserved; } cpu_entry; struct perf_callchain_entry *callchain; + + /* + * regs_user may point to task_pt_regs or to regs_user_copy, depending + * on arch details. + */ struct perf_regs regs_user; + struct pt_regs regs_user_copy; + struct perf_regs regs_intr; u64 stack_user_size; } ____cacheline_aligned; diff --git a/include/linux/perf_regs.h b/include/linux/perf_regs.h index 3c73d5f..a5f98d5 100644 --- a/include/linux/perf_regs.h +++ b/include/linux/perf_regs.h @@ -1,11 +1,19 @@ #ifndef _LINUX_PERF_REGS_H #define _LINUX_PERF_REGS_H +struct perf_regs { + __u64 abi; + struct pt_regs *regs; +}; + #ifdef CONFIG_HAVE_PERF_REGS #include u64 perf_reg_value(struct pt_regs *regs, int idx); int perf_reg_validate(u64 mask); u64 perf_reg_abi(struct task_struct *task); +void perf_get_regs_user(struct perf_regs *regs_user, + struct pt_regs *regs, + struct pt_regs *regs_user_copy); #else static inline u64 perf_reg_value(struct pt_regs *regs, int idx) { @@ -21,5 +29,13 @@ static inline u64 perf_reg_abi(struct task_struct *task) { return PERF_SAMPLE_REGS_ABI_NONE; } + +static inline void perf_get_regs_user(struct perf_regs *regs_user, + struct pt_regs *regs, + struct pt_regs *regs_user_copy) +{ + regs_user->regs = task_pt_regs(current); + regs_user->abi = perf_reg_abi(current); +} #endif /* CONFIG_HAVE_PERF_REGS */ #endif /* _LINUX_PERF_REGS_H */ diff --git a/kernel/events/core.c b/kernel/events/core.c index 4c1ee7f..882f835 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -4461,18 +4461,14 @@ perf_output_sample_regs(struct perf_output_handle *handle, } static void perf_sample_regs_user(struct perf_regs *regs_user, - struct pt_regs *regs) + struct pt_regs *regs, + struct pt_regs *regs_user_copy) { - if (!user_mode(regs)) { - if (current->mm) - regs = task_pt_regs(current); - else - regs = NULL; - } - - if (regs) { - regs_user->abi = perf_reg_abi(current); + if (user_mode(regs)) { + regs_user->abi = perf_reg_abi(current); regs_user->regs = regs; + } else if (current->mm) { + perf_get_regs_user(regs_user, regs, regs_user_copy); } else { regs_user->abi = PERF_SAMPLE_REGS_ABI_NONE; regs_user->regs = NULL; @@ -4951,7 +4947,8 @@ void perf_prepare_sample(struct perf_event_header *header, } if (sample_type & (PERF_SAMPLE_REGS_USER | PERF_SAMPLE_STACK_USER)) - perf_sample_regs_user(&data->regs_user, regs); + perf_sample_regs_user(&data->regs_user, regs, + &data->regs_user_copy); if (sample_type & PERF_SAMPLE_REGS_USER) { /* regs dump ABI info */ -- cgit v0.10.2 From 86c269fea37334687b1c0789e6444be0d750e8a6 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Sun, 4 Jan 2015 10:36:20 -0800 Subject: perf/x86_64: Improve user regs sampling Perf reports user regs for kernel-mode samples so that samples can be backtraced through user code. The old code was very broken in syscall context, resulting in useless backtraces. The new code, in contrast, is still dangerously racy, but it should at least work most of the time. Tested-by: Jiri Olsa Signed-off-by: Andy Lutomirski Signed-off-by: Peter Zijlstra (Intel) Cc: Stephane Eranian Cc: Andrew Morton Cc: chenggang.qcg@taobao.com Cc: Wu Fengguang Cc: Namhyung Kim Cc: Mike Galbraith Cc: Arjan van de Ven Cc: David Ahern Cc: Linus Torvalds Link: http://lkml.kernel.org/r/243560c26ff0f739978e2459e203f6515367634d.1420396372.git.luto@amacapital.net Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/perf_regs.c b/arch/x86/kernel/perf_regs.c index 3bbbb1a..781861c 100644 --- a/arch/x86/kernel/perf_regs.c +++ b/arch/x86/kernel/perf_regs.c @@ -115,7 +115,81 @@ void perf_get_regs_user(struct perf_regs *regs_user, struct pt_regs *regs, struct pt_regs *regs_user_copy) { - regs_user->regs = task_pt_regs(current); - regs_user->abi = perf_reg_abi(current); + struct pt_regs *user_regs = task_pt_regs(current); + + /* + * If we're in an NMI that interrupted task_pt_regs setup, then + * we can't sample user regs at all. This check isn't really + * sufficient, though, as we could be in an NMI inside an interrupt + * that happened during task_pt_regs setup. + */ + if (regs->sp > (unsigned long)&user_regs->r11 && + regs->sp <= (unsigned long)(user_regs + 1)) { + regs_user->abi = PERF_SAMPLE_REGS_ABI_NONE; + regs_user->regs = NULL; + return; + } + + /* + * RIP, flags, and the argument registers are usually saved. + * orig_ax is probably okay, too. + */ + regs_user_copy->ip = user_regs->ip; + regs_user_copy->cx = user_regs->cx; + regs_user_copy->dx = user_regs->dx; + regs_user_copy->si = user_regs->si; + regs_user_copy->di = user_regs->di; + regs_user_copy->r8 = user_regs->r8; + regs_user_copy->r9 = user_regs->r9; + regs_user_copy->r10 = user_regs->r10; + regs_user_copy->r11 = user_regs->r11; + regs_user_copy->orig_ax = user_regs->orig_ax; + regs_user_copy->flags = user_regs->flags; + + /* + * Don't even try to report the "rest" regs. + */ + regs_user_copy->bx = -1; + regs_user_copy->bp = -1; + regs_user_copy->r12 = -1; + regs_user_copy->r13 = -1; + regs_user_copy->r14 = -1; + regs_user_copy->r15 = -1; + + /* + * For this to be at all useful, we need a reasonable guess for + * sp and the ABI. Be careful: we're in NMI context, and we're + * considering current to be the current task, so we should + * be careful not to look at any other percpu variables that might + * change during context switches. + */ + if (IS_ENABLED(CONFIG_IA32_EMULATION) && + task_thread_info(current)->status & TS_COMPAT) { + /* Easy case: we're in a compat syscall. */ + regs_user->abi = PERF_SAMPLE_REGS_ABI_32; + regs_user_copy->sp = user_regs->sp; + regs_user_copy->cs = user_regs->cs; + regs_user_copy->ss = user_regs->ss; + } else if (user_regs->orig_ax != -1) { + /* + * We're probably in a 64-bit syscall. + * Warning: this code is severely racy. At least it's better + * than just blindly copying user_regs. + */ + regs_user->abi = PERF_SAMPLE_REGS_ABI_64; + regs_user_copy->sp = this_cpu_read(old_rsp); + regs_user_copy->cs = __USER_CS; + regs_user_copy->ss = __USER_DS; + regs_user_copy->cx = -1; /* usually contains garbage */ + } else { + /* We're probably in an interrupt or exception. */ + regs_user->abi = user_64bit_mode(user_regs) ? + PERF_SAMPLE_REGS_ABI_64 : PERF_SAMPLE_REGS_ABI_32; + regs_user_copy->sp = user_regs->sp; + regs_user_copy->cs = user_regs->cs; + regs_user_copy->ss = user_regs->ss; + } + + regs_user->regs = regs_user_copy; } #endif /* CONFIG_X86_32 */ -- cgit v0.10.2 From 5306c31c5733cb4a79cc002e0c3ad256fd439614 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 6 Jan 2015 14:34:35 -0800 Subject: perf/x86/uncore/hsw-ep: Handle systems with only two SBOXes There was another report of a boot failure with a #GP fault in the uncore SBOX initialization. The earlier work around was not enough for this system. The boot was failing while trying to initialize the third SBOX. This patch detects parts with only two SBOXes and limits the number of SBOX units to two there. Stable material, as it affects boot problems on 3.18. Tested-by: Andreas Oehler Signed-off-by: Andi Kleen Signed-off-by: Peter Zijlstra (Intel) Cc: Cc: Arnaldo Carvalho de Melo Cc: Stephane Eranian Cc: Yan, Zheng Link: http://lkml.kernel.org/r/1420583675-9163-1-git-send-email-andi@firstfloor.org Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h index 18eb78b..863d9b0 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h @@ -17,7 +17,7 @@ #define UNCORE_PCI_DEV_TYPE(data) ((data >> 8) & 0xff) #define UNCORE_PCI_DEV_IDX(data) (data & 0xff) #define UNCORE_EXTRA_PCI_DEV 0xff -#define UNCORE_EXTRA_PCI_DEV_MAX 2 +#define UNCORE_EXTRA_PCI_DEV_MAX 3 /* support up to 8 sockets */ #define UNCORE_SOCKET_MAX 8 diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c b/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c index 745b158..21af6149e 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c @@ -891,6 +891,7 @@ void snbep_uncore_cpu_init(void) enum { SNBEP_PCI_QPI_PORT0_FILTER, SNBEP_PCI_QPI_PORT1_FILTER, + HSWEP_PCI_PCU_3, }; static int snbep_qpi_hw_config(struct intel_uncore_box *box, struct perf_event *event) @@ -2026,6 +2027,17 @@ void hswep_uncore_cpu_init(void) { if (hswep_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores) hswep_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores; + + /* Detect 6-8 core systems with only two SBOXes */ + if (uncore_extra_pci_dev[0][HSWEP_PCI_PCU_3]) { + u32 capid4; + + pci_read_config_dword(uncore_extra_pci_dev[0][HSWEP_PCI_PCU_3], + 0x94, &capid4); + if (((capid4 >> 6) & 0x3) == 0) + hswep_uncore_sbox.num_boxes = 2; + } + uncore_msr_uncores = hswep_msr_uncores; } @@ -2287,6 +2299,11 @@ static DEFINE_PCI_DEVICE_TABLE(hswep_uncore_pci_ids) = { .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, SNBEP_PCI_QPI_PORT1_FILTER), }, + { /* PCU.3 (for Capability registers) */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2fc0), + .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, + HSWEP_PCI_PCU_3), + }, { /* end: all zeroes */ } }; -- cgit v0.10.2 From 536ebe9ca999f6d0903d91698678ccc1742e8dd9 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 16 Dec 2014 16:28:38 +0100 Subject: sched, fanotify: Deal with nested sleeps As per e23738a7300a ("sched, inotify: Deal with nested sleeps"). fanotify_read is a wait loop with sleeps in. Wait loops rely on task_struct::state and sleeps do too, since that's the only means of actually sleeping. Therefore the nested sleeps destroy the wait loop state and the wait loop breaks the sleep functions that assume TASK_RUNNING (mutex_lock). Fix this by using the new woken_wake_function and wait_woken() stuff, which registers wakeups in wait and thereby allows shrinking the task_state::state changes to the actual sleep part. Reported-by: Yuanhan Liu Reported-by: Sedat Dilek Signed-off-by: Peter Zijlstra (Intel) Cc: Takashi Iwai Cc: Al Viro Cc: Eric Paris Cc: Linus Torvalds Cc: Eric Paris Link: http://lkml.kernel.org/r/20141216152838.GZ3337@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index c991616..bff8567 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -259,16 +259,15 @@ static ssize_t fanotify_read(struct file *file, char __user *buf, struct fsnotify_event *kevent; char __user *start; int ret; - DEFINE_WAIT(wait); + DEFINE_WAIT_FUNC(wait, woken_wake_function); start = buf; group = file->private_data; pr_debug("%s: group=%p\n", __func__, group); + add_wait_queue(&group->notification_waitq, &wait); while (1) { - prepare_to_wait(&group->notification_waitq, &wait, TASK_INTERRUPTIBLE); - mutex_lock(&group->notification_mutex); kevent = get_one_event(group, count); mutex_unlock(&group->notification_mutex); @@ -289,7 +288,8 @@ static ssize_t fanotify_read(struct file *file, char __user *buf, if (start != buf) break; - schedule(); + + wait_woken(&wait, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT); continue; } @@ -318,8 +318,8 @@ static ssize_t fanotify_read(struct file *file, char __user *buf, buf += ret; count -= ret; } + remove_wait_queue(&group->notification_waitq, &wait); - finish_wait(&group->notification_waitq, &wait); if (start != buf && ret != -EFAULT) ret = buf - start; return ret; -- cgit v0.10.2 From 32a8df4e0b33fccc9715213b382160415b5c4008 Mon Sep 17 00:00:00 2001 From: Yuyang Du Date: Fri, 19 Dec 2014 08:29:56 +0800 Subject: sched: Fix odd values in effective_load() calculations In effective_load, we have (long w * unsigned long tg->shares) / long W, when w is negative, it is cast to unsigned long and hence the product is insanely large. Fix this by casting tg->shares to long. Reported-by: Sasha Levin Signed-off-by: Yuyang Du Signed-off-by: Peter Zijlstra (Intel) Cc: Dave Jones Cc: Andrey Ryabinin Cc: Linus Torvalds Link: http://lkml.kernel.org/r/20141219002956.GA25405@intel.com Signed-off-by: Ingo Molnar diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index df2cdf7..6b99659 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -4424,7 +4424,7 @@ static long effective_load(struct task_group *tg, int cpu, long wl, long wg) * wl = S * s'_i; see (2) */ if (W > 0 && w < W) - wl = (w * tg->shares) / W; + wl = (w * (long)tg->shares) / W; else wl = tg->shares; -- cgit v0.10.2 From 6a503c3be937d275113b702e0421e5b0720abe8a Mon Sep 17 00:00:00 2001 From: Luca Abeni Date: Wed, 17 Dec 2014 11:50:31 +0100 Subject: sched/deadline: Fix migration of SCHED_DEADLINE tasks According to global EDF, tasks should be migrated between runqueues without checking if their scheduling deadlines and runtimes are valid. However, SCHED_DEADLINE currently performs such a check: a migration happens doing: deactivate_task(rq, next_task, 0); set_task_cpu(next_task, later_rq->cpu); activate_task(later_rq, next_task, 0); which ends up calling dequeue_task_dl(), setting the new CPU, and then calling enqueue_task_dl(). enqueue_task_dl() then calls enqueue_dl_entity(), which calls update_dl_entity(), which can modify scheduling deadline and runtime, breaking global EDF scheduling. As a result, some of the properties of global EDF are not respected: for example, a taskset {(30, 80), (40, 80), (120, 170)} scheduled on two cores can have unbounded response times for the third task even if 30/80+40/80+120/170 = 1.5809 < 2 This can be fixed by invoking update_dl_entity() only in case of wakeup, or if this is a new SCHED_DEADLINE task. Signed-off-by: Luca Abeni Signed-off-by: Peter Zijlstra (Intel) Acked-by: Juri Lelli Cc: Cc: Dario Faggioli Cc: Linus Torvalds Link: http://lkml.kernel.org/r/1418813432-20797-2-git-send-email-luca.abeni@unitn.it Signed-off-by: Ingo Molnar diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index e5db8c6..55af498 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -826,10 +826,10 @@ enqueue_dl_entity(struct sched_dl_entity *dl_se, * parameters of the task might need updating. Otherwise, * we want a replenishment of its runtime. */ - if (!dl_se->dl_new && flags & ENQUEUE_REPLENISH) - replenish_dl_entity(dl_se, pi_se); - else + if (dl_se->dl_new || flags & ENQUEUE_WAKEUP) update_dl_entity(dl_se, pi_se); + else if (flags & ENQUEUE_REPLENISH) + replenish_dl_entity(dl_se, pi_se); __enqueue_dl_entity(dl_se); } -- cgit v0.10.2 From 269ad8015a6b2bb1cf9e684da4921eb6fa0a0c88 Mon Sep 17 00:00:00 2001 From: Luca Abeni Date: Wed, 17 Dec 2014 11:50:32 +0100 Subject: sched/deadline: Avoid double-accounting in case of missed deadlines The dl_runtime_exceeded() function is supposed to ckeck if a SCHED_DEADLINE task must be throttled, by checking if its current runtime is <= 0. However, it also checks if the scheduling deadline has been missed (the current time is larger than the current scheduling deadline), further decreasing the runtime if this happens. This "double accounting" is wrong: - In case of partitioned scheduling (or single CPU), this happens if task_tick_dl() has been called later than expected (due to small HZ values). In this case, the current runtime is also negative, and replenish_dl_entity() can take care of the deadline miss by recharging the current runtime to a value smaller than dl_runtime - In case of global scheduling on multiple CPUs, scheduling deadlines can be missed even if the task did not consume more runtime than expected, hence penalizing the task is wrong This patch fix this problem by throttling a SCHED_DEADLINE task only when its runtime becomes negative, and not modifying the runtime Signed-off-by: Luca Abeni Signed-off-by: Peter Zijlstra (Intel) Acked-by: Juri Lelli Cc: Cc: Dario Faggioli Cc: Linus Torvalds Link: http://lkml.kernel.org/r/1418813432-20797-3-git-send-email-luca.abeni@unitn.it Signed-off-by: Ingo Molnar diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 55af498..b52092f 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -570,24 +570,7 @@ void init_dl_task_timer(struct sched_dl_entity *dl_se) static int dl_runtime_exceeded(struct rq *rq, struct sched_dl_entity *dl_se) { - int dmiss = dl_time_before(dl_se->deadline, rq_clock(rq)); - int rorun = dl_se->runtime <= 0; - - if (!rorun && !dmiss) - return 0; - - /* - * If we are beyond our current deadline and we are still - * executing, then we have already used some of the runtime of - * the next instance. Thus, if we do not account that, we are - * stealing bandwidth from the system at each deadline miss! - */ - if (dmiss) { - dl_se->runtime = rorun ? dl_se->runtime : 0; - dl_se->runtime -= rq_clock(rq) - dl_se->deadline; - } - - return 1; + return (dl_se->runtime <= 0); } extern bool sched_rt_bandwidth_account(struct rt_rq *rt_rq); -- cgit v0.10.2 From 7f1a169b88f513e32a432ca0f85bfd282d117bd6 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Thu, 25 Dec 2014 15:51:21 +0900 Subject: sched/fair: Fix RCU stall upon -ENOMEM in sched_create_group() When alloc_fair_sched_group() in sched_create_group() fails, free_sched_group() is called, and free_fair_sched_group() is called by free_sched_group(). Since destroy_cfs_bandwidth() is called by free_fair_sched_group() without calling init_cfs_bandwidth(), RCU stall occurs at hrtimer_cancel(): INFO: rcu_sched self-detected stall on CPU { 1} (t=60000 jiffies g=13074 c=13073 q=0) Task dump for CPU 1: (fprintd) R running task 0 6249 1 0x00000088 ... Call Trace: [] sched_show_task+0xa8/0x110 [] dump_cpu_task+0x3d/0x50 [] rcu_dump_cpu_stacks+0x90/0xd0 [] rcu_check_callbacks+0x491/0x700 [] update_process_times+0x4b/0x80 [] tick_sched_handle.isra.20+0x36/0x50 [] tick_sched_timer+0x42/0x70 [] __run_hrtimer+0x69/0x1a0 [] ? tick_sched_handle.isra.20+0x50/0x50 [] hrtimer_interrupt+0xef/0x230 [] local_apic_timer_interrupt+0x3b/0x70 [] smp_apic_timer_interrupt+0x45/0x60 [] apic_timer_interrupt+0x6d/0x80 [] ? lock_hrtimer_base.isra.23+0x18/0x50 [] ? __kmalloc+0x211/0x230 [] hrtimer_try_to_cancel+0x22/0xd0 [] ? __kmalloc+0x211/0x230 [] hrtimer_cancel+0x22/0x30 [] free_fair_sched_group+0x25/0xd0 [] free_sched_group+0x16/0x40 [] sched_create_group+0x4b/0x80 [] sched_autogroup_create_attach+0x43/0x1c0 [] sys_setsid+0x7c/0x110 [] system_call_fastpath+0x12/0x17 Check whether init_cfs_bandwidth() was called before calling destroy_cfs_bandwidth(). Signed-off-by: Tetsuo Handa [ Move the check into destroy_cfs_bandwidth() to aid compilability. ] Signed-off-by: Peter Zijlstra (Intel) Cc: Paul Turner Cc: Ben Segall Cc: Linus Torvalds Link: http://lkml.kernel.org/r/201412252210.GCC30204.SOMVFFOtQJFLOH@I-love.SAKURA.ne.jp Signed-off-by: Ingo Molnar diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 6b99659..40667cb 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -4005,6 +4005,10 @@ void __start_cfs_bandwidth(struct cfs_bandwidth *cfs_b, bool force) static void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b) { + /* init_cfs_bandwidth() was not called */ + if (!cfs_b->throttled_cfs_rq.next) + return; + hrtimer_cancel(&cfs_b->period_timer); hrtimer_cancel(&cfs_b->slack_timer); } -- cgit v0.10.2 From a63b03e2d2477586440741677ecac45bcf28d7b1 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 6 Jan 2015 10:29:35 +0000 Subject: mutex: Always clear owner field upon mutex_unlock() Currently if DEBUG_MUTEXES is enabled, the mutex->owner field is only cleared iff debug_locks is active. This exposes a race to other users of the field where the mutex->owner may be still set to a stale value, potentially upsetting mutex_spin_on_owner() among others. References: https://bugs.freedesktop.org/show_bug.cgi?id=87955 Signed-off-by: Chris Wilson Signed-off-by: Peter Zijlstra (Intel) Acked-by: Davidlohr Bueso Cc: Daniel Vetter Cc: Linus Torvalds Link: http://lkml.kernel.org/r/1420540175-30204-1-git-send-email-chris@chris-wilson.co.uk Signed-off-by: Ingo Molnar diff --git a/kernel/locking/mutex-debug.c b/kernel/locking/mutex-debug.c index 5cf6731..3ef3736 100644 --- a/kernel/locking/mutex-debug.c +++ b/kernel/locking/mutex-debug.c @@ -80,13 +80,13 @@ void debug_mutex_unlock(struct mutex *lock) DEBUG_LOCKS_WARN_ON(lock->owner != current); DEBUG_LOCKS_WARN_ON(!lock->wait_list.prev && !lock->wait_list.next); - mutex_clear_owner(lock); } /* * __mutex_slowpath_needs_to_unlock() is explicitly 0 for debug * mutexes so that we can do it here after we've verified state. */ + mutex_clear_owner(lock); atomic_set(&lock->count, 1); } -- cgit v0.10.2 From 606185b20caf4c57d7e41e5a5ea4aff460aef2ab Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 9 Jan 2015 15:32:31 +0300 Subject: HID: roccat: potential out of bounds in pyra_sysfs_write_settings() This is a static checker fix. We write some binary settings to the sysfs file. One of the settings is the "->startup_profile". There isn't any checking to make sure it fits into the pyra->profile_settings[] array in the profile_activated() function. I added a check to pyra_sysfs_write_settings() in both places because I wasn't positive that the other callers were correct. Cc: Signed-off-by: Dan Carpenter Signed-off-by: Jiri Kosina diff --git a/drivers/hid/hid-roccat-pyra.c b/drivers/hid/hid-roccat-pyra.c index 1a07e07..47d7e74 100644 --- a/drivers/hid/hid-roccat-pyra.c +++ b/drivers/hid/hid-roccat-pyra.c @@ -35,6 +35,8 @@ static struct class *pyra_class; static void profile_activated(struct pyra_device *pyra, unsigned int new_profile) { + if (new_profile >= ARRAY_SIZE(pyra->profile_settings)) + return; pyra->actual_profile = new_profile; pyra->actual_cpi = pyra->profile_settings[pyra->actual_profile].y_cpi; } @@ -257,9 +259,11 @@ static ssize_t pyra_sysfs_write_settings(struct file *fp, if (off != 0 || count != PYRA_SIZE_SETTINGS) return -EINVAL; - mutex_lock(&pyra->pyra_lock); - settings = (struct pyra_settings const *)buf; + if (settings->startup_profile >= ARRAY_SIZE(pyra->profile_settings)) + return -EINVAL; + + mutex_lock(&pyra->pyra_lock); retval = pyra_set_settings(usb_dev, settings); if (retval) { -- cgit v0.10.2 From 1e3479225acbb7ae048ac30fb7c6090fa7f0df02 Mon Sep 17 00:00:00 2001 From: Victor Kamensky Date: Fri, 9 Jan 2015 18:55:45 +0100 Subject: ARM: 8275/1: mm: fix PMD_SECT_RDONLY undeclared compile error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In v3.19-rc3 tree when CONFIG_ARM_LPAE and CONFIG_DEBUG_RODATA are enabled image failed to compile with the following error: arch/arm/mm/init.c:661:14: error: ‘PMD_SECT_RDONLY’ undeclared here (not in a function) It seems that '80d6b0c ARM: mm: allow text and rodata sections to be read-only' and 'ded9477 ARM: 8109/1: mm: Modify pte_write and pmd_write logic for LPAE' commits crossed. 80d6b0c uses PMD_SECT_RDONLY macro but ded9477 renames it and uses software bits L_PMD_SECT_RDONLY instead. Fix is to use L_PMD_SECT_RDONLY instead PMD_SECT_RDONLY as ded9477 does in another places. Signed-off-by: Victor Kamensky Acked-by: Will Deacon Signed-off-by: Russell King diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index 98ad9c7..2495c8c 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -658,8 +658,8 @@ static struct section_perm ro_perms[] = { .start = (unsigned long)_stext, .end = (unsigned long)__init_begin, #ifdef CONFIG_ARM_LPAE - .mask = ~PMD_SECT_RDONLY, - .prot = PMD_SECT_RDONLY, + .mask = ~L_PMD_SECT_RDONLY, + .prot = L_PMD_SECT_RDONLY, #else .mask = ~(PMD_SECT_APX | PMD_SECT_AP_WRITE), .prot = PMD_SECT_APX | PMD_SECT_AP_WRITE, -- cgit v0.10.2 From 046ba64285a4389ae5e9a7dfa253c6bff3d7c341 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Tue, 6 Jan 2015 16:10:37 -0800 Subject: target: Drop arbitrary maximum I/O size limit This patch drops the arbitrary maximum I/O size limit in sbc_parse_cdb(), which currently for fabric_max_sectors is hardcoded to 8192 (4 MB for 512 byte sector devices), and for hw_max_sectors is a backend driver dependent value. This limit is problematic because Linux initiators have only recently started to honor block limits MAXIMUM TRANSFER LENGTH, and other non-Linux based initiators (eg: MSFT Fibre Channel) can also generate I/Os larger than 4 MB in size. Currently when this happens, the following message will appear on the target resulting in I/Os being returned with non recoverable status: SCSI OP 28h with too big sectors 16384 exceeds fabric_max_sectors: 8192 Instead, drop both [fabric,hw]_max_sector checks in sbc_parse_cdb(), and convert the existing hw_max_sectors into a purely informational attribute used to represent the granuality that backend driver and/or subsystem code is splitting I/Os upon. Also, update FILEIO with an explicit FD_MAX_BYTES check in fd_execute_rw() to deal with the one special iovec limitiation case. v2 changes: - Drop hw_max_sectors check in sbc_parse_cdb() Reported-by: Lance Gropper Reported-by: Stefan Priebe Cc: Christoph Hellwig Cc: Martin K. Petersen Cc: Roland Dreier Cc: stable@vger.kernel.org # 3.4 Signed-off-by: Nicholas Bellinger diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c index 7653cfb..ee4b89f 100644 --- a/drivers/target/target_core_device.c +++ b/drivers/target/target_core_device.c @@ -1156,10 +1156,10 @@ int se_dev_set_optimal_sectors(struct se_device *dev, u32 optimal_sectors) dev, dev->export_count); return -EINVAL; } - if (optimal_sectors > dev->dev_attrib.fabric_max_sectors) { + if (optimal_sectors > dev->dev_attrib.hw_max_sectors) { pr_err("dev[%p]: Passed optimal_sectors %u cannot be" - " greater than fabric_max_sectors: %u\n", dev, - optimal_sectors, dev->dev_attrib.fabric_max_sectors); + " greater than hw_max_sectors: %u\n", dev, + optimal_sectors, dev->dev_attrib.hw_max_sectors); return -EINVAL; } @@ -1554,7 +1554,6 @@ struct se_device *target_alloc_device(struct se_hba *hba, const char *name) DA_UNMAP_GRANULARITY_ALIGNMENT_DEFAULT; dev->dev_attrib.max_write_same_len = DA_MAX_WRITE_SAME_LEN; dev->dev_attrib.fabric_max_sectors = DA_FABRIC_MAX_SECTORS; - dev->dev_attrib.optimal_sectors = DA_FABRIC_MAX_SECTORS; xcopy_lun = &dev->xcopy_lun; xcopy_lun->lun_se_dev = dev; @@ -1595,6 +1594,7 @@ int target_configure_device(struct se_device *dev) dev->dev_attrib.hw_max_sectors = se_dev_align_max_sectors(dev->dev_attrib.hw_max_sectors, dev->dev_attrib.hw_block_size); + dev->dev_attrib.optimal_sectors = dev->dev_attrib.hw_max_sectors; dev->dev_index = scsi_get_new_index(SCSI_DEVICE_INDEX); dev->creation_time = get_jiffies_64(); diff --git a/drivers/target/target_core_file.c b/drivers/target/target_core_file.c index c2aea09..b090211 100644 --- a/drivers/target/target_core_file.c +++ b/drivers/target/target_core_file.c @@ -621,7 +621,16 @@ fd_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents, struct fd_prot fd_prot; sense_reason_t rc; int ret = 0; - + /* + * We are currently limited by the number of iovecs (2048) per + * single vfs_[writev,readv] call. + */ + if (cmd->data_length > FD_MAX_BYTES) { + pr_err("FILEIO: Not able to process I/O of %u bytes due to" + "FD_MAX_BYTES: %u iovec count limitiation\n", + cmd->data_length, FD_MAX_BYTES); + return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; + } /* * Call vectorized fileio functions to map struct scatterlist * physical memory addresses to struct iovec virtual memory. diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c index 3efff94..5795cd8 100644 --- a/drivers/target/target_core_iblock.c +++ b/drivers/target/target_core_iblock.c @@ -124,7 +124,7 @@ static int iblock_configure_device(struct se_device *dev) q = bdev_get_queue(bd); dev->dev_attrib.hw_block_size = bdev_logical_block_size(bd); - dev->dev_attrib.hw_max_sectors = UINT_MAX; + dev->dev_attrib.hw_max_sectors = queue_max_hw_sectors(q); dev->dev_attrib.hw_queue_depth = q->nr_requests; /* diff --git a/drivers/target/target_core_sbc.c b/drivers/target/target_core_sbc.c index 11bea19..cd4bed7 100644 --- a/drivers/target/target_core_sbc.c +++ b/drivers/target/target_core_sbc.c @@ -953,21 +953,6 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops) if (cmd->se_cmd_flags & SCF_SCSI_DATA_CDB) { unsigned long long end_lba; - - if (sectors > dev->dev_attrib.fabric_max_sectors) { - printk_ratelimited(KERN_ERR "SCSI OP %02xh with too" - " big sectors %u exceeds fabric_max_sectors:" - " %u\n", cdb[0], sectors, - dev->dev_attrib.fabric_max_sectors); - return TCM_INVALID_CDB_FIELD; - } - if (sectors > dev->dev_attrib.hw_max_sectors) { - printk_ratelimited(KERN_ERR "SCSI OP %02xh with too" - " big sectors %u exceeds backend hw_max_sectors:" - " %u\n", cdb[0], sectors, - dev->dev_attrib.hw_max_sectors); - return TCM_INVALID_CDB_FIELD; - } check_lba: end_lba = dev->transport->get_blocks(dev) + 1; if (cmd->t_task_lba + sectors > end_lba) { diff --git a/drivers/target/target_core_spc.c b/drivers/target/target_core_spc.c index 1307600..4c71657 100644 --- a/drivers/target/target_core_spc.c +++ b/drivers/target/target_core_spc.c @@ -505,7 +505,6 @@ static sense_reason_t spc_emulate_evpd_b0(struct se_cmd *cmd, unsigned char *buf) { struct se_device *dev = cmd->se_dev; - u32 max_sectors; int have_tp = 0; int opt, min; @@ -539,9 +538,7 @@ spc_emulate_evpd_b0(struct se_cmd *cmd, unsigned char *buf) /* * Set MAXIMUM TRANSFER LENGTH */ - max_sectors = min(dev->dev_attrib.fabric_max_sectors, - dev->dev_attrib.hw_max_sectors); - put_unaligned_be32(max_sectors, &buf[8]); + put_unaligned_be32(dev->dev_attrib.hw_max_sectors, &buf[8]); /* * Set OPTIMAL TRANSFER LENGTH -- cgit v0.10.2 From 7216dc077dfcd46e2e0143f57711c8dd2eb99e68 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Tue, 6 Jan 2015 16:15:01 -0800 Subject: target: Drop left-over fabric_max_sectors attribute Now that fabric_max_sectors is no longer used to enforce the maximum I/O size, go ahead and drop it's left-over usage in target-core and associated backend drivers. Cc: Christoph Hellwig Cc: Martin K. Petersen Cc: Roland Dreier Signed-off-by: Nicholas Bellinger diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c index ee4b89f..58f49ff 100644 --- a/drivers/target/target_core_device.c +++ b/drivers/target/target_core_device.c @@ -1103,51 +1103,6 @@ int se_dev_set_queue_depth(struct se_device *dev, u32 queue_depth) } EXPORT_SYMBOL(se_dev_set_queue_depth); -int se_dev_set_fabric_max_sectors(struct se_device *dev, u32 fabric_max_sectors) -{ - int block_size = dev->dev_attrib.block_size; - - if (dev->export_count) { - pr_err("dev[%p]: Unable to change SE Device" - " fabric_max_sectors while export_count is %d\n", - dev, dev->export_count); - return -EINVAL; - } - if (!fabric_max_sectors) { - pr_err("dev[%p]: Illegal ZERO value for" - " fabric_max_sectors\n", dev); - return -EINVAL; - } - if (fabric_max_sectors < DA_STATUS_MAX_SECTORS_MIN) { - pr_err("dev[%p]: Passed fabric_max_sectors: %u less than" - " DA_STATUS_MAX_SECTORS_MIN: %u\n", dev, fabric_max_sectors, - DA_STATUS_MAX_SECTORS_MIN); - return -EINVAL; - } - if (fabric_max_sectors > DA_STATUS_MAX_SECTORS_MAX) { - pr_err("dev[%p]: Passed fabric_max_sectors: %u" - " greater than DA_STATUS_MAX_SECTORS_MAX:" - " %u\n", dev, fabric_max_sectors, - DA_STATUS_MAX_SECTORS_MAX); - return -EINVAL; - } - /* - * Align max_sectors down to PAGE_SIZE to follow transport_allocate_data_tasks() - */ - if (!block_size) { - block_size = 512; - pr_warn("Defaulting to 512 for zero block_size\n"); - } - fabric_max_sectors = se_dev_align_max_sectors(fabric_max_sectors, - block_size); - - dev->dev_attrib.fabric_max_sectors = fabric_max_sectors; - pr_debug("dev[%p]: SE Device max_sectors changed to %u\n", - dev, fabric_max_sectors); - return 0; -} -EXPORT_SYMBOL(se_dev_set_fabric_max_sectors); - int se_dev_set_optimal_sectors(struct se_device *dev, u32 optimal_sectors) { if (dev->export_count) { @@ -1553,7 +1508,6 @@ struct se_device *target_alloc_device(struct se_hba *hba, const char *name) dev->dev_attrib.unmap_granularity_alignment = DA_UNMAP_GRANULARITY_ALIGNMENT_DEFAULT; dev->dev_attrib.max_write_same_len = DA_MAX_WRITE_SAME_LEN; - dev->dev_attrib.fabric_max_sectors = DA_FABRIC_MAX_SECTORS; xcopy_lun = &dev->xcopy_lun; xcopy_lun->lun_se_dev = dev; diff --git a/drivers/target/target_core_file.c b/drivers/target/target_core_file.c index b090211..d836de2 100644 --- a/drivers/target/target_core_file.c +++ b/drivers/target/target_core_file.c @@ -968,7 +968,6 @@ static struct configfs_attribute *fileio_backend_dev_attrs[] = { &fileio_dev_attrib_hw_block_size.attr, &fileio_dev_attrib_block_size.attr, &fileio_dev_attrib_hw_max_sectors.attr, - &fileio_dev_attrib_fabric_max_sectors.attr, &fileio_dev_attrib_optimal_sectors.attr, &fileio_dev_attrib_hw_queue_depth.attr, &fileio_dev_attrib_queue_depth.attr, diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c index 5795cd8..78346b8 100644 --- a/drivers/target/target_core_iblock.c +++ b/drivers/target/target_core_iblock.c @@ -883,7 +883,6 @@ static struct configfs_attribute *iblock_backend_dev_attrs[] = { &iblock_dev_attrib_hw_block_size.attr, &iblock_dev_attrib_block_size.attr, &iblock_dev_attrib_hw_max_sectors.attr, - &iblock_dev_attrib_fabric_max_sectors.attr, &iblock_dev_attrib_optimal_sectors.attr, &iblock_dev_attrib_hw_queue_depth.attr, &iblock_dev_attrib_queue_depth.attr, diff --git a/drivers/target/target_core_rd.c b/drivers/target/target_core_rd.c index 60ebd17..98e83ac 100644 --- a/drivers/target/target_core_rd.c +++ b/drivers/target/target_core_rd.c @@ -657,7 +657,6 @@ static struct configfs_attribute *rd_mcp_backend_dev_attrs[] = { &rd_mcp_dev_attrib_hw_block_size.attr, &rd_mcp_dev_attrib_block_size.attr, &rd_mcp_dev_attrib_hw_max_sectors.attr, - &rd_mcp_dev_attrib_fabric_max_sectors.attr, &rd_mcp_dev_attrib_optimal_sectors.attr, &rd_mcp_dev_attrib_hw_queue_depth.attr, &rd_mcp_dev_attrib_queue_depth.attr, diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index 8bfa61c..1157b55 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -1118,7 +1118,6 @@ static struct configfs_attribute *tcmu_backend_dev_attrs[] = { &tcmu_dev_attrib_hw_block_size.attr, &tcmu_dev_attrib_block_size.attr, &tcmu_dev_attrib_hw_max_sectors.attr, - &tcmu_dev_attrib_fabric_max_sectors.attr, &tcmu_dev_attrib_optimal_sectors.attr, &tcmu_dev_attrib_hw_queue_depth.attr, &tcmu_dev_attrib_queue_depth.attr, diff --git a/include/target/target_core_backend.h b/include/target/target_core_backend.h index 430cfaf..db81c65 100644 --- a/include/target/target_core_backend.h +++ b/include/target/target_core_backend.h @@ -135,7 +135,6 @@ int se_dev_set_is_nonrot(struct se_device *, int); int se_dev_set_emulate_rest_reord(struct se_device *dev, int); int se_dev_set_queue_depth(struct se_device *, u32); int se_dev_set_max_sectors(struct se_device *, u32); -int se_dev_set_fabric_max_sectors(struct se_device *, u32); int se_dev_set_optimal_sectors(struct se_device *, u32); int se_dev_set_block_size(struct se_device *, u32); diff --git a/include/target/target_core_backend_configfs.h b/include/target/target_core_backend_configfs.h index 3247d75..186f7a9 100644 --- a/include/target/target_core_backend_configfs.h +++ b/include/target/target_core_backend_configfs.h @@ -98,8 +98,6 @@ static struct target_backend_dev_attrib_attribute _backend##_dev_attrib_##_name TB_DEV_ATTR(_backend, block_size, S_IRUGO | S_IWUSR); \ DEF_TB_DEV_ATTRIB_RO(_backend, hw_max_sectors); \ TB_DEV_ATTR_RO(_backend, hw_max_sectors); \ - DEF_TB_DEV_ATTRIB(_backend, fabric_max_sectors); \ - TB_DEV_ATTR(_backend, fabric_max_sectors, S_IRUGO | S_IWUSR); \ DEF_TB_DEV_ATTRIB(_backend, optimal_sectors); \ TB_DEV_ATTR(_backend, optimal_sectors, S_IRUGO | S_IWUSR); \ DEF_TB_DEV_ATTRIB_RO(_backend, hw_queue_depth); \ diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index 397fb63..4a8795a 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -77,8 +77,6 @@ #define DA_UNMAP_GRANULARITY_ALIGNMENT_DEFAULT 0 /* Default max_write_same_len, disabled by default */ #define DA_MAX_WRITE_SAME_LEN 0 -/* Default max transfer length */ -#define DA_FABRIC_MAX_SECTORS 8192 /* Use a model alias based on the configfs backend device name */ #define DA_EMULATE_MODEL_ALIAS 0 /* Emulation for Direct Page Out */ @@ -694,7 +692,6 @@ struct se_dev_attrib { u32 hw_block_size; u32 block_size; u32 hw_max_sectors; - u32 fabric_max_sectors; u32 optimal_sectors; u32 hw_queue_depth; u32 queue_depth; -- cgit v0.10.2 From 1ecc7586922662e3ca2f3f0c3f17fec8749fc621 Mon Sep 17 00:00:00 2001 From: Lee Duncan Date: Mon, 5 Jan 2015 10:49:44 -0800 Subject: target: Allow Write Exclusive non-reservation holders to READ For PGR reservation of type Write Exclusive Access, allow all non reservation holding I_T nexuses with active registrations to READ from the device. This addresses a bug where active registrations that attempted to READ would result in an reservation conflict. Signed-off-by: Lee Duncan Signed-off-by: Nicholas Bellinger diff --git a/drivers/target/target_core_pr.c b/drivers/target/target_core_pr.c index d56f2aa..283cf78 100644 --- a/drivers/target/target_core_pr.c +++ b/drivers/target/target_core_pr.c @@ -528,6 +528,18 @@ static int core_scsi3_pr_seq_non_holder( return 0; } + } else if (we && registered_nexus) { + /* + * Reads are allowed for Write Exclusive locks + * from all registrants. + */ + if (cmd->data_direction == DMA_FROM_DEVICE) { + pr_debug("Allowing READ CDB: 0x%02x for %s" + " reservation\n", cdb[0], + core_scsi3_pr_dump_type(pr_reg_type)); + + return 0; + } } pr_debug("%s Conflict for %sregistered nexus %s CDB: 0x%2x" " for %s reservation\n", transport_dump_cmd_direction(cmd), -- cgit v0.10.2 From 2b70e5fda9d569f1d45735884a9871c7fb1cedf6 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 7 Jan 2015 12:54:44 +0200 Subject: MAINTAINERS: Add entry for iSER target driver iSCSI extensions for RDMA - Target mode. Signed-off-by: Sagi Grimberg Signed-off-by: Nicholas Bellinger diff --git a/MAINTAINERS b/MAINTAINERS index ddb9ac8..6b989d3 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5279,6 +5279,15 @@ W: www.open-iscsi.org Q: http://patchwork.kernel.org/project/linux-rdma/list/ F: drivers/infiniband/ulp/iser/ +ISCSI EXTENSIONS FOR RDMA (ISER) TARGET +M: Sagi Grimberg +T: git git://git.kernel.org/pub/scm/linux/kernel/git/nab/target-pending.git master +L: linux-rdma@vger.kernel.org +L: target-devel@vger.kernel.org +S: Supported +W: http://www.linux-iscsi.org +F: drivers/infiniband/ulp/isert + ISDN SUBSYSTEM M: Karsten Keil L: isdn4linux@listserv.isdn4linux.de (subscribers-only) -- cgit v0.10.2 From 8060b8dd4526bd4897e7a4207016a30a8fd99293 Mon Sep 17 00:00:00 2001 From: Andy Grover Date: Fri, 9 Jan 2015 15:13:08 -0800 Subject: iscsi-target: Fix typos in enum cmd_flags_table Everything else starts with ICF so the last two should as well. Fix places they are used to match. Signed-off-by: Andy Grover Signed-off-by: Nicholas Bellinger diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index 55f6774..aebde32 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -2027,10 +2027,10 @@ iscsit_process_text_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, goto reject; } if (!strncmp("=All", text_ptr, 4)) { - cmd->cmd_flags |= IFC_SENDTARGETS_ALL; + cmd->cmd_flags |= ICF_SENDTARGETS_ALL; } else if (!strncmp("=iqn.", text_ptr, 5) || !strncmp("=eui.", text_ptr, 5)) { - cmd->cmd_flags |= IFC_SENDTARGETS_SINGLE; + cmd->cmd_flags |= ICF_SENDTARGETS_SINGLE; } else { pr_err("Unable to locate valid SendTargets=%s value\n", text_ptr); goto reject; @@ -3415,10 +3415,10 @@ iscsit_build_sendtargets_response(struct iscsi_cmd *cmd, return -ENOMEM; } /* - * Locate pointer to iqn./eui. string for IFC_SENDTARGETS_SINGLE + * Locate pointer to iqn./eui. string for ICF_SENDTARGETS_SINGLE * explicit case.. */ - if (cmd->cmd_flags & IFC_SENDTARGETS_SINGLE) { + if (cmd->cmd_flags & ICF_SENDTARGETS_SINGLE) { text_ptr = strchr(text_in, '='); if (!text_ptr) { pr_err("Unable to locate '=' string in text_in:" @@ -3434,7 +3434,7 @@ iscsit_build_sendtargets_response(struct iscsi_cmd *cmd, spin_lock(&tiqn_lock); list_for_each_entry(tiqn, &g_tiqn_list, tiqn_list) { - if ((cmd->cmd_flags & IFC_SENDTARGETS_SINGLE) && + if ((cmd->cmd_flags & ICF_SENDTARGETS_SINGLE) && strcmp(tiqn->tiqn, text_ptr)) { continue; } @@ -3512,7 +3512,7 @@ eob: if (end_of_buf) break; - if (cmd->cmd_flags & IFC_SENDTARGETS_SINGLE) + if (cmd->cmd_flags & ICF_SENDTARGETS_SINGLE) break; } spin_unlock(&tiqn_lock); diff --git a/drivers/target/iscsi/iscsi_target_core.h b/drivers/target/iscsi/iscsi_target_core.h index 09a522b..cbcff38 100644 --- a/drivers/target/iscsi/iscsi_target_core.h +++ b/drivers/target/iscsi/iscsi_target_core.h @@ -135,8 +135,8 @@ enum cmd_flags_table { ICF_CONTIG_MEMORY = 0x00000020, ICF_ATTACHED_TO_RQUEUE = 0x00000040, ICF_OOO_CMDSN = 0x00000080, - IFC_SENDTARGETS_ALL = 0x00000100, - IFC_SENDTARGETS_SINGLE = 0x00000200, + ICF_SENDTARGETS_ALL = 0x00000100, + ICF_SENDTARGETS_SINGLE = 0x00000200, }; /* struct iscsi_cmd->i_state */ -- cgit v0.10.2 From 690eac53daff34169a4d74fc7bfbd388c4896abb Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 11 Jan 2015 11:33:57 -0800 Subject: mm: Don't count the stack guard page towards RLIMIT_STACK Commit fee7e49d4514 ("mm: propagate error from stack expansion even for guard page") made sure that we return the error properly for stack growth conditions. It also theorized that counting the guard page towards the stack limit might break something, but also said "Let's see if anybody notices". Somebody did notice. Apparently android-x86 sets the stack limit very close to the limit indeed, and including the guard page in the rlimit check causes the android 'zygote' process problems. So this adds the (fairly trivial) code to make the stack rlimit check be against the actual real stack size, rather than the size of the vma that includes the guard page. Reported-and-tested-by: Chih-Wei Huang Cc: Jay Foad Cc: stable@kernel.org # to match back-porting of fee7e49d4514 Signed-off-by: Linus Torvalds diff --git a/mm/mmap.c b/mm/mmap.c index 7b36aa7..0bb74ca 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -2099,14 +2099,17 @@ static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, uns { struct mm_struct *mm = vma->vm_mm; struct rlimit *rlim = current->signal->rlim; - unsigned long new_start; + unsigned long new_start, actual_size; /* address space limit tests */ if (!may_expand_vm(mm, grow)) return -ENOMEM; /* Stack limit test */ - if (size > ACCESS_ONCE(rlim[RLIMIT_STACK].rlim_cur)) + actual_size = size; + if (size && (vma->vm_flags & (VM_GROWSUP | VM_GROWSDOWN))) + actual_size -= PAGE_SIZE; + if (actual_size > ACCESS_ONCE(rlim[RLIMIT_STACK].rlim_cur)) return -ENOMEM; /* mlock limit tests */ -- cgit v0.10.2 From b800c91a0517071156e772d4fb329ad33590da62 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Sun, 11 Jan 2015 16:54:06 +0300 Subject: mm: fix corner case in anon_vma endless growing prevention Fix for BUG_ON(anon_vma->degree) splashes in unlink_anon_vmas() ("kernel BUG at mm/rmap.c:399!") caused by commit 7a3ef208e662 ("mm: prevent endless growth of anon_vma hierarchy") Anon_vma_clone() is usually called for a copy of source vma in destination argument. If source vma has anon_vma it should be already in dst->anon_vma. NULL in dst->anon_vma is used as a sign that it's called from anon_vma_fork(). In this case anon_vma_clone() finds anon_vma for reusing. Vma_adjust() calls it differently and this breaks anon_vma reusing logic: anon_vma_clone() links vma to old anon_vma and updates degree counters but vma_adjust() overrides vma->anon_vma right after that. As a result final unlink_anon_vmas() decrements degree for wrong anon_vma. This patch assigns ->anon_vma before calling anon_vma_clone(). Signed-off-by: Konstantin Khlebnikov Reported-and-tested-by: Chris Clayton Reported-and-tested-by: Oded Gabbay Reported-and-tested-by: Chih-Wei Huang Acked-by: Rik van Riel Acked-by: Vlastimil Babka Cc: Daniel Forrest Cc: Michal Hocko Cc: stable@vger.kernel.org # to match back-porting of 7a3ef208e662 Signed-off-by: Linus Torvalds diff --git a/mm/mmap.c b/mm/mmap.c index 0bb74ca..7f684d5 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -778,10 +778,12 @@ again: remove_next = 1 + (end > next->vm_end); if (exporter && exporter->anon_vma && !importer->anon_vma) { int error; + importer->anon_vma = exporter->anon_vma; error = anon_vma_clone(importer, exporter); - if (error) + if (error) { + importer->anon_vma = NULL; return error; - importer->anon_vma = exporter->anon_vma; + } } } -- cgit v0.10.2 From eaa27f34e91a14cdceed26ed6c6793ec1d186115 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 11 Jan 2015 12:44:53 -0800 Subject: linux 3.19-rc4 diff --git a/Makefile b/Makefile index 87f0c05..e41a335 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 3 PATCHLEVEL = 19 SUBLEVEL = 0 -EXTRAVERSION = -rc3 +EXTRAVERSION = -rc4 NAME = Diseased Newt # *DOCUMENTATION* -- cgit v0.10.2 From ac00531d9f4be487366573e2bb943e68e9e523ab Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 5 Dec 2014 19:25:28 +0200 Subject: mmc: sdhci: Tuning should not change max_blk_count Re-tuning requires that the maximum data length is limited to 4MiB. The code currently changes max_blk_count in an attempt to achieve that. This is wrong because max_blk_count is a different limit, but it is also un-necessary because max_req_size is 512KiB anyway. Consequently, the changes to max_blk_count are removed and the comment for max_req_size adjusted accordingly. The comment is also tweaked to show that the 512KiB limit is a SDMA limit not an ADMA limit. Signed-off-by: Adrian Hunter Reviewed-by: Aaron Lu Signed-off-by: Ulf Hansson diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index cbb245b..040158d 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -259,8 +259,6 @@ static void sdhci_reinit(struct sdhci_host *host) del_timer_sync(&host->tuning_timer); host->flags &= ~SDHCI_NEEDS_RETUNING; - host->mmc->max_blk_count = - (host->quirks & SDHCI_QUIRK_NO_MULTIBLOCK) ? 1 : 65535; } sdhci_enable_card_detection(host); } @@ -2048,8 +2046,6 @@ out: host->flags |= SDHCI_USING_RETUNING_TIMER; mod_timer(&host->tuning_timer, jiffies + host->tuning_count * HZ); - /* Tuning mode 1 limits the maximum data length to 4MB */ - mmc->max_blk_count = (4 * 1024 * 1024) / mmc->max_blk_size; } else if (host->flags & SDHCI_USING_RETUNING_TIMER) { host->flags &= ~SDHCI_NEEDS_RETUNING; /* Reload the new initial value for timer */ @@ -3260,8 +3256,9 @@ int sdhci_add_host(struct sdhci_host *host) mmc->max_segs = SDHCI_MAX_SEGS; /* - * Maximum number of sectors in one transfer. Limited by DMA boundary - * size (512KiB). + * Maximum number of sectors in one transfer. Limited by SDMA boundary + * size (512KiB). Note some tuning modes impose a 4MiB limit, but this + * is less anyway. */ mmc->max_req_size = 524288; -- cgit v0.10.2 From d519c863fc198f43c979e9d6a74b53d9c53d55aa Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 5 Dec 2014 19:25:29 +0200 Subject: mmc: sdhci: Add out_unlock to sdhci_execute_tuning A 'goto' can be used to save duplicating unlocking and returning. Signed-off-by: Adrian Hunter Reviewed-by: Aaron Lu Signed-off-by: Ulf Hansson diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index 040158d..c45beaf 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -1909,9 +1909,7 @@ static int sdhci_execute_tuning(struct mmc_host *mmc, u32 opcode) /* FALLTHROUGH */ default: - spin_unlock_irqrestore(&host->lock, flags); - sdhci_runtime_pm_put(host); - return 0; + goto out_unlock; } if (host->ops->platform_execute_tuning) { @@ -2066,6 +2064,7 @@ out: sdhci_writel(host, host->ier, SDHCI_INT_ENABLE); sdhci_writel(host, host->ier, SDHCI_SIGNAL_ENABLE); +out_unlock: spin_unlock_irqrestore(&host->lock, flags); sdhci_runtime_pm_put(host); -- cgit v0.10.2 From 38e40bf5db9d5b8cbaeca9a78ee17f38473dd78d Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 5 Dec 2014 19:25:30 +0200 Subject: mmc: sdhci: Simplify use of tuning timer The tuning timer is always used if the tuning mode is 1 and there is a tuning count, irrespective of whether this is the first call, or any subsequent call. Consequently the logic to start the timer can be simplified. Signed-off-by: Adrian Hunter Reviewed-by: Aaron Lu Signed-off-by: Ulf Hansson diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index c45beaf..5c28c19 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -1885,10 +1885,14 @@ static int sdhci_execute_tuning(struct mmc_host *mmc, u32 opcode) int tuning_loop_counter = MAX_TUNING_LOOP; int err = 0; unsigned long flags; + unsigned int tuning_count = 0; sdhci_runtime_pm_get(host); spin_lock_irqsave(&host->lock, flags); + if (host->tuning_mode == SDHCI_TUNING_MODE_1) + tuning_count = host->tuning_count; + /* * The Host Controller needs tuning only in case of SDR104 mode * and for SDR50 mode when Use Tuning for SDR50 is set in the @@ -2033,22 +2037,11 @@ static int sdhci_execute_tuning(struct mmc_host *mmc, u32 opcode) } out: - /* - * If this is the very first time we are here, we start the retuning - * timer. Since only during the first time, SDHCI_NEEDS_RETUNING - * flag won't be set, we check this condition before actually starting - * the timer. - */ - if (!(host->flags & SDHCI_NEEDS_RETUNING) && host->tuning_count && - (host->tuning_mode == SDHCI_TUNING_MODE_1)) { + host->flags &= ~SDHCI_NEEDS_RETUNING; + + if (tuning_count) { host->flags |= SDHCI_USING_RETUNING_TIMER; - mod_timer(&host->tuning_timer, jiffies + - host->tuning_count * HZ); - } else if (host->flags & SDHCI_USING_RETUNING_TIMER) { - host->flags &= ~SDHCI_NEEDS_RETUNING; - /* Reload the new initial value for timer */ - mod_timer(&host->tuning_timer, jiffies + - host->tuning_count * HZ); + mod_timer(&host->tuning_timer, jiffies + tuning_count * HZ); } /* -- cgit v0.10.2 From b5540ce1512eede3bed68ab1e9949df9ad556091 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 5 Dec 2014 19:25:31 +0200 Subject: mmc: sdhci: Disable re-tuning for HS400 Re-tuning for HS400 mode must be done in HS200 mode. Currently there is no support for that. That needs to be reflected in the code. Specifically, if tuning is executed in HS400 mode then return an error, and do not start the tuning timer if HS200 tuning is being done prior to switching to HS400. Note that periodic re-tuning is not expected to be needed for HS400 but re-tuning is still needed after the host controller has lost power. In the case of suspend/resume that is not necessary because the card is fully re-initialised. That just leaves runtime suspend/resume with no support for HS400 re-tuning. Signed-off-by: Adrian Hunter Signed-off-by: Ulf Hansson diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index 5c28c19..398e9e4 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -1878,6 +1878,18 @@ static int sdhci_card_busy(struct mmc_host *mmc) return !(present_state & SDHCI_DATA_LVL_MASK); } +static int sdhci_prepare_hs400_tuning(struct mmc_host *mmc, struct mmc_ios *ios) +{ + struct sdhci_host *host = mmc_priv(mmc); + unsigned long flags; + + spin_lock_irqsave(&host->lock, flags); + host->flags |= SDHCI_HS400_TUNING; + spin_unlock_irqrestore(&host->lock, flags); + + return 0; +} + static int sdhci_execute_tuning(struct mmc_host *mmc, u32 opcode) { struct sdhci_host *host = mmc_priv(mmc); @@ -1886,10 +1898,14 @@ static int sdhci_execute_tuning(struct mmc_host *mmc, u32 opcode) int err = 0; unsigned long flags; unsigned int tuning_count = 0; + bool hs400_tuning; sdhci_runtime_pm_get(host); spin_lock_irqsave(&host->lock, flags); + hs400_tuning = host->flags & SDHCI_HS400_TUNING; + host->flags &= ~SDHCI_HS400_TUNING; + if (host->tuning_mode == SDHCI_TUNING_MODE_1) tuning_count = host->tuning_count; @@ -1901,8 +1917,20 @@ static int sdhci_execute_tuning(struct mmc_host *mmc, u32 opcode) * tuning function has to be executed. */ switch (host->timing) { + /* HS400 tuning is done in HS200 mode */ case MMC_TIMING_MMC_HS400: + err = -EINVAL; + goto out_unlock; + case MMC_TIMING_MMC_HS200: + /* + * Periodic re-tuning for HS400 is not expected to be needed, so + * disable it here. + */ + if (hs400_tuning) + tuning_count = 0; + break; + case MMC_TIMING_UHS_SDR104: break; @@ -2130,6 +2158,7 @@ static const struct mmc_host_ops sdhci_ops = { .hw_reset = sdhci_hw_reset, .enable_sdio_irq = sdhci_enable_sdio_irq, .start_signal_voltage_switch = sdhci_start_signal_voltage_switch, + .prepare_hs400_tuning = sdhci_prepare_hs400_tuning, .execute_tuning = sdhci_execute_tuning, .card_event = sdhci_card_event, .card_busy = sdhci_card_busy, diff --git a/include/linux/mmc/sdhci.h b/include/linux/mmc/sdhci.h index 375af80..f767a0d 100644 --- a/include/linux/mmc/sdhci.h +++ b/include/linux/mmc/sdhci.h @@ -137,6 +137,7 @@ struct sdhci_host { #define SDHCI_SDR104_NEEDS_TUNING (1<<10) /* SDR104/HS200 needs tuning */ #define SDHCI_USING_RETUNING_TIMER (1<<11) /* Host is using a retuning timer for the card */ #define SDHCI_USE_64_BIT_DMA (1<<12) /* Use 64-bit DMA */ +#define SDHCI_HS400_TUNING (1<<13) /* Tuning for HS400 */ unsigned int version; /* SDHCI spec. version */ -- cgit v0.10.2 From aa8165f914420f143476305a01894b017d3abe6b Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Wed, 31 Dec 2014 11:54:10 +0100 Subject: mmc: sdhci-pxav3: do the mbus window configuration after enabling clocks In commit 5491ce3f79ee ("mmc: sdhci-pxav3: add support for the Armada 38x SDHCI controller"), the sdhci-pxav3 driver was extended to include support for the SDHCI controller found in the Armada 38x processor. This mainly involved adding some MBus window related configuration. However, this configuration is currently done too early in ->probe(): it is done before clocks are enabled, while this configuration involves touching the registers of the controller, which will hang the SoC if the clock is disabled. It wasn't noticed until now because the bootloader typically leaves gatable clocks enabled, but in situations where we have a deferred probe (due to a CD GPIO that cannot be taken, for example), then the probe will be re-tried later, after a clock disable has been done in the exit path of the failed probe attempt of the device. This second probe() will hang the system due to the clock being disabled. This can for example be produced on Armada 385 GP, which has a CD GPIO connected to an I2C PCA9555. If the driver for the PCA9555 is not compiled into the kernel, then we will have the following sequence of events: 1. The SDHCI probes 2. It does the MBus configuration (which works, because the clock is left enabled by the bootloader) 3. It enables the clock 4. It tries to get the CD GPIO, which fails due to the driver being missing, so -EPROBE_DEFER is returned. 5. Before returning -EPROBE_DEFER, the driver cleans up what was done, which includes disabling the clock. 6. Later on, the SDHCI probe is tried again. 7. It does the MBus configuration, which hangs because the clock is no longer enabled. This commit does the obvious fix of doing the MBus configuration after the clock has been enabled by the driver. Fixes: 5491ce3f79ee ("mmc: sdhci-pxav3: add support for the Armada 38x SDHCI controller") Cc: # v3.15+ Signed-off-by: Thomas Petazzoni Signed-off-by: Ulf Hansson diff --git a/drivers/mmc/host/sdhci-pxav3.c b/drivers/mmc/host/sdhci-pxav3.c index 4523887..ca3424e 100644 --- a/drivers/mmc/host/sdhci-pxav3.c +++ b/drivers/mmc/host/sdhci-pxav3.c @@ -300,13 +300,6 @@ static int sdhci_pxav3_probe(struct platform_device *pdev) if (IS_ERR(host)) return PTR_ERR(host); - if (of_device_is_compatible(np, "marvell,armada-380-sdhci")) { - ret = mv_conf_mbus_windows(pdev, mv_mbus_dram_info()); - if (ret < 0) - goto err_mbus_win; - } - - pltfm_host = sdhci_priv(host); pltfm_host->priv = pxa; @@ -325,6 +318,12 @@ static int sdhci_pxav3_probe(struct platform_device *pdev) if (!IS_ERR(pxa->clk_core)) clk_prepare_enable(pxa->clk_core); + if (of_device_is_compatible(np, "marvell,armada-380-sdhci")) { + ret = mv_conf_mbus_windows(pdev, mv_mbus_dram_info()); + if (ret < 0) + goto err_mbus_win; + } + /* enable 1/8V DDR capable */ host->mmc->caps |= MMC_CAP_1_8V_DDR; @@ -396,11 +395,11 @@ err_add_host: pm_runtime_disable(&pdev->dev); err_of_parse: err_cd_req: +err_mbus_win: clk_disable_unprepare(pxa->clk_io); if (!IS_ERR(pxa->clk_core)) clk_disable_unprepare(pxa->clk_core); err_clk_get: -err_mbus_win: sdhci_pltfm_free(pdev); return ret; } -- cgit v0.10.2 From 2836766a9d0bd02c66073f8dd44796e6cc23848d Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 5 Jan 2015 10:50:15 +0100 Subject: mmc: sdhci: Fix sleep in atomic after inserting SD card Sleep in atomic context happened on Trats2 board after inserting or removing SD card because mmc_gpio_get_cd() was called under spin lock. Fix this by moving card detection earlier, before acquiring spin lock. The mmc_gpio_get_cd() call does not have to be protected by spin lock because it does not access any sdhci internal data. The sdhci_do_get_cd() call access host flags (SDHCI_DEVICE_DEAD). After moving it out side of spin lock it could theoretically race with driver removal but still there is no actual protection against manual card eject. Dmesg after inserting SD card: [ 41.663414] BUG: sleeping function called from invalid context at drivers/gpio/gpiolib.c:1511 [ 41.670469] in_atomic(): 1, irqs_disabled(): 128, pid: 30, name: kworker/u8:1 [ 41.677580] INFO: lockdep is turned off. [ 41.681486] irq event stamp: 61972 [ 41.684872] hardirqs last enabled at (61971): [] _raw_spin_unlock_irq+0x24/0x5c [ 41.693118] hardirqs last disabled at (61972): [] _raw_spin_lock_irq+0x18/0x54 [ 41.701190] softirqs last enabled at (61648): [] __do_softirq+0x234/0x2c8 [ 41.708914] softirqs last disabled at (61631): [] irq_exit+0xd0/0x114 [ 41.716206] Preemption disabled at:[< (null)>] (null) [ 41.721500] [ 41.722985] CPU: 3 PID: 30 Comm: kworker/u8:1 Tainted: G W 3.18.0-rc5-next-20141121 #883 [ 41.732111] Workqueue: kmmcd mmc_rescan [ 41.735945] [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [ 41.743661] [] (show_stack) from [] (dump_stack+0x70/0xbc) [ 41.750867] [] (dump_stack) from [] (gpiod_get_raw_value_cansleep+0x18/0x30) [ 41.759628] [] (gpiod_get_raw_value_cansleep) from [] (mmc_gpio_get_cd+0x38/0x58) [ 41.768821] [] (mmc_gpio_get_cd) from [] (sdhci_request+0x50/0x1a4) [ 41.776808] [] (sdhci_request) from [] (mmc_start_request+0x138/0x268) [ 41.785051] [] (mmc_start_request) from [] (mmc_wait_for_req+0x58/0x1a0) [ 41.793469] [] (mmc_wait_for_req) from [] (mmc_wait_for_cmd+0x58/0x78) [ 41.801714] [] (mmc_wait_for_cmd) from [] (mmc_io_rw_direct_host+0x98/0x124) [ 41.810480] [] (mmc_io_rw_direct_host) from [] (sdio_reset+0x2c/0x64) [ 41.818641] [] (sdio_reset) from [] (mmc_rescan+0x254/0x2e4) [ 41.826028] [] (mmc_rescan) from [] (process_one_work+0x180/0x3f4) [ 41.833920] [] (process_one_work) from [] (worker_thread+0x34/0x4b0) [ 41.841991] [] (worker_thread) from [] (kthread+0xe4/0x104) [ 41.849285] [] (kthread) from [] (ret_from_fork+0x14/0x2c) [ 42.038276] mmc0: new high speed SDHC card at address 1234 Signed-off-by: Krzysztof Kozlowski Fixes: 94144a465dd0 ("mmc: sdhci: add get_cd() implementation") Cc: Signed-off-by: Ulf Hansson diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index 398e9e4..1453cd1 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -1351,6 +1351,8 @@ static void sdhci_request(struct mmc_host *mmc, struct mmc_request *mrq) sdhci_runtime_pm_get(host); + present = mmc_gpio_get_cd(host->mmc); + spin_lock_irqsave(&host->lock, flags); WARN_ON(host->mrq != NULL); @@ -1379,7 +1381,6 @@ static void sdhci_request(struct mmc_host *mmc, struct mmc_request *mrq) * zero: cd-gpio is used, and card is removed * one: cd-gpio is used, and card is present */ - present = mmc_gpio_get_cd(host->mmc); if (present < 0) { /* If polling, assume that the card is always present. */ if (host->quirks & SDHCI_QUIRK_BROKEN_CARD_DETECTION) @@ -2126,15 +2127,18 @@ static void sdhci_card_event(struct mmc_host *mmc) { struct sdhci_host *host = mmc_priv(mmc); unsigned long flags; + int present; /* First check if client has provided their own card event */ if (host->ops->card_event) host->ops->card_event(host); + present = sdhci_do_get_cd(host); + spin_lock_irqsave(&host->lock, flags); /* Check host->mrq first in case we are runtime suspended */ - if (host->mrq && !sdhci_do_get_cd(host)) { + if (host->mrq && !present) { pr_err("%s: Card removed during transfer!\n", mmc_hostname(host->mmc)); pr_err("%s: Resetting controller.\n", -- cgit v0.10.2 From d0ed8e6b0ab149421cd1532e7c5ebb0992ad25d0 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 5 Jan 2015 14:47:57 +0200 Subject: mmc: sdhci-acpi: Add ACPI HID INT344D Add ACPI HID INT344D for an Intel SDIO host controller. Signed-off-by: Adrian Hunter Signed-off-by: Ulf Hansson diff --git a/drivers/mmc/host/sdhci-acpi.c b/drivers/mmc/host/sdhci-acpi.c index e3e56d3..970314e 100644 --- a/drivers/mmc/host/sdhci-acpi.c +++ b/drivers/mmc/host/sdhci-acpi.c @@ -247,6 +247,7 @@ static const struct sdhci_acpi_uid_slot sdhci_acpi_uids[] = { { "INT33BB" , "3" , &sdhci_acpi_slot_int_sd }, { "INT33C6" , NULL, &sdhci_acpi_slot_int_sdio }, { "INT3436" , NULL, &sdhci_acpi_slot_int_sdio }, + { "INT344D" , NULL, &sdhci_acpi_slot_int_sdio }, { "PNP0D40" }, { }, }; @@ -257,6 +258,7 @@ static const struct acpi_device_id sdhci_acpi_ids[] = { { "INT33BB" }, { "INT33C6" }, { "INT3436" }, + { "INT344D" }, { "PNP0D40" }, { }, }; -- cgit v0.10.2 From 1f7f26528fb159e71f081df1d1050c218ff1d74d Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 5 Jan 2015 14:47:58 +0200 Subject: mmc: sdhci-pci: Add support for Intel SPT Add PCI IDs for SPT eMMC, SDIO and SD card. Signed-off-by: Adrian Hunter Signed-off-by: Ulf Hansson diff --git a/drivers/mmc/host/sdhci-pci.c b/drivers/mmc/host/sdhci-pci.c index 0342775..4f38554 100644 --- a/drivers/mmc/host/sdhci-pci.c +++ b/drivers/mmc/host/sdhci-pci.c @@ -993,6 +993,31 @@ static const struct pci_device_id pci_ids[] = { .subdevice = PCI_ANY_ID, .driver_data = (kernel_ulong_t)&sdhci_intel_mrfl_mmc, }, + + { + .vendor = PCI_VENDOR_ID_INTEL, + .device = PCI_DEVICE_ID_INTEL_SPT_EMMC, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .driver_data = (kernel_ulong_t)&sdhci_intel_byt_emmc, + }, + + { + .vendor = PCI_VENDOR_ID_INTEL, + .device = PCI_DEVICE_ID_INTEL_SPT_SDIO, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .driver_data = (kernel_ulong_t)&sdhci_intel_byt_sdio, + }, + + { + .vendor = PCI_VENDOR_ID_INTEL, + .device = PCI_DEVICE_ID_INTEL_SPT_SD, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .driver_data = (kernel_ulong_t)&sdhci_intel_byt_sd, + }, + { .vendor = PCI_VENDOR_ID_O2, .device = PCI_DEVICE_ID_O2_8120, diff --git a/drivers/mmc/host/sdhci-pci.h b/drivers/mmc/host/sdhci-pci.h index d57c3d1..1ec684d 100644 --- a/drivers/mmc/host/sdhci-pci.h +++ b/drivers/mmc/host/sdhci-pci.h @@ -21,6 +21,9 @@ #define PCI_DEVICE_ID_INTEL_CLV_EMMC0 0x08e5 #define PCI_DEVICE_ID_INTEL_CLV_EMMC1 0x08e6 #define PCI_DEVICE_ID_INTEL_QRK_SD 0x08A7 +#define PCI_DEVICE_ID_INTEL_SPT_EMMC 0x9d2b +#define PCI_DEVICE_ID_INTEL_SPT_SDIO 0x9d2c +#define PCI_DEVICE_ID_INTEL_SPT_SD 0x9d2d /* * PCI registers -- cgit v0.10.2 From 82c92ed1357bca22a5d637fbb93dab3eb18a8e8c Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 12 Jan 2015 06:05:07 +0100 Subject: xen: correct error for building p2m list on 32 bits In xen_rebuild_p2m_list() for large areas of invalid or identity mapped memory the pmd entries on 32 bit systems are initialized wrong. Correct this error. Suggested-by: Boris Ostrovsky Signed-off-by: Juergen Gross Signed-off-by: David Vrabel diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index cab221d..ff4ebd8 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -378,7 +378,7 @@ static void __init xen_rebuild_p2m_list(unsigned long *p2m) p2m_missing_pte : p2m_identity_pte; for (i = 0; i < PMDS_PER_MID_PAGE; i++) { pmdp = populate_extra_pmd( - (unsigned long)(p2m + pfn + i * PTRS_PER_PTE)); + (unsigned long)(p2m + pfn) + i * PMD_SIZE); set_pmd(pmdp, __pmd(__pa(ptep) | _KERNPG_TABLE)); } } -- cgit v0.10.2 From f241b0b891c903da2465b7a98eaf650784e666da Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 12 Jan 2015 06:05:08 +0100 Subject: xen: correct race in alloc_p2m_pmd() When allocating a new pmd for the linear mapped p2m list a check is done for not introducing another pmd when this just happened on another cpu. In this case the old pte pointer was returned which points to the p2m_missing or p2m_identity page. The correct value would be the pointer to the found new page. Signed-off-by: Juergen Gross Signed-off-by: David Vrabel diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index ff4ebd8..70fb507 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -439,10 +439,9 @@ EXPORT_SYMBOL_GPL(get_phys_to_machine); * a new pmd is to replace p2m_missing_pte or p2m_identity_pte by a individual * pmd. In case of PAE/x86-32 there are multiple pmds to allocate! */ -static pte_t *alloc_p2m_pmd(unsigned long addr, pte_t *ptep, pte_t *pte_pg) +static pte_t *alloc_p2m_pmd(unsigned long addr, pte_t *pte_pg) { pte_t *ptechk; - pte_t *pteret = ptep; pte_t *pte_newpg[PMDS_PER_MID_PAGE]; pmd_t *pmdp; unsigned int level; @@ -476,8 +475,6 @@ static pte_t *alloc_p2m_pmd(unsigned long addr, pte_t *ptep, pte_t *pte_pg) if (ptechk == pte_pg) { set_pmd(pmdp, __pmd(__pa(pte_newpg[i]) | _KERNPG_TABLE)); - if (vaddr == (addr & ~(PMD_SIZE - 1))) - pteret = pte_offset_kernel(pmdp, addr); pte_newpg[i] = NULL; } @@ -491,7 +488,7 @@ static pte_t *alloc_p2m_pmd(unsigned long addr, pte_t *ptep, pte_t *pte_pg) vaddr += PMD_SIZE; } - return pteret; + return lookup_address(addr, &level); } /* @@ -520,7 +517,7 @@ static bool alloc_p2m(unsigned long pfn) if (pte_pg == p2m_missing_pte || pte_pg == p2m_identity_pte) { /* PMD level is missing, allocate a new one */ - ptep = alloc_p2m_pmd(addr, ptep, pte_pg); + ptep = alloc_p2m_pmd(addr, pte_pg); if (!ptep) return false; } -- cgit v0.10.2 From e86f949667127509d95b6c678fdd928b93128d9d Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 12 Jan 2015 06:05:09 +0100 Subject: xen: use correct type for physical addresses When converting a pfn to a physical address be sure to use 64 bit wide types or convert the physical address to a pfn if possible. Signed-off-by: Juergen Gross Tested-by: Boris Ostrovsky Signed-off-by: David Vrabel diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index feb6d86..410210f 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -140,7 +140,7 @@ static void __init xen_del_extra_mem(u64 start, u64 size) unsigned long __ref xen_chk_extra_mem(unsigned long pfn) { int i; - unsigned long addr = PFN_PHYS(pfn); + phys_addr_t addr = PFN_PHYS(pfn); for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) { if (addr >= xen_extra_mem[i].start && @@ -284,7 +284,7 @@ static void __init xen_update_mem_tables(unsigned long pfn, unsigned long mfn) } /* Update kernel mapping, but not for highmem. */ - if ((pfn << PAGE_SHIFT) >= __pa(high_memory)) + if (pfn >= PFN_UP(__pa(high_memory - 1))) return; if (HYPERVISOR_update_va_mapping((unsigned long)__va(pfn << PAGE_SHIFT), -- cgit v0.10.2 From 9a17ad7f3db17db0c6375de96672f16ab1aa51ae Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 12 Jan 2015 06:05:10 +0100 Subject: xen: check for zero sized area when invalidating memory With the introduction of the linear mapped p2m list setting memory areas to "invalid" had to be delayed. When doing the invalidation make sure no zero sized areas are processed. Signed-off-by: Juegren Gross Signed-off-by: David Vrabel diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 410210f..865e56c 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -160,6 +160,8 @@ void __init xen_inv_extra_mem(void) int i; for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) { + if (!xen_extra_mem[i].size) + continue; pfn_s = PFN_DOWN(xen_extra_mem[i].start); pfn_e = PFN_UP(xen_extra_mem[i].start + xen_extra_mem[i].size); for (pfn = pfn_s; pfn < pfn_e; pfn++) -- cgit v0.10.2 From 721c21c17ab958abf19a8fc611c3bd4743680e38 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 12 Jan 2015 19:10:55 +0000 Subject: mm: mmu_gather: use tlb->end != 0 only for TLB invalidation When batching up address ranges for TLB invalidation, we check tlb->end != 0 to indicate that some pages have actually been unmapped. As of commit f045bbb9fa1b ("mmu_gather: fix over-eager tlb_flush_mmu_free() calling"), we use the same check for freeing these pages in order to avoid a performance regression where we call free_pages_and_swap_cache even when no pages are actually queued up. Unfortunately, the range could have been reset (tlb->end = 0) by tlb_end_vma, which has been shown to cause memory leaks on arm64. Furthermore, investigation into these leaks revealed that the fullmm case on task exit no longer invalidates the TLB, by virtue of tlb->end == 0 (in 3.18, need_flush would have been set). This patch resolves the problem by reverting commit f045bbb9fa1b, using instead tlb->local.nr as the predicate for page freeing in tlb_flush_mmu_free and ensuring that tlb->end is initialised to a non-zero value in the fullmm case. Tested-by: Mark Langsdorf Tested-by: Dave Hansen Signed-off-by: Will Deacon Signed-off-by: Linus Torvalds diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index 0884805..db284bf 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -136,8 +136,12 @@ static inline void __tlb_adjust_range(struct mmu_gather *tlb, static inline void __tlb_reset_range(struct mmu_gather *tlb) { - tlb->start = TASK_SIZE; - tlb->end = 0; + if (tlb->fullmm) { + tlb->start = tlb->end = ~0; + } else { + tlb->start = TASK_SIZE; + tlb->end = 0; + } } /* diff --git a/mm/memory.c b/mm/memory.c index c6565f0..54f3a9b 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -235,6 +235,9 @@ void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long static void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb) { + if (!tlb->end) + return; + tlb_flush(tlb); mmu_notifier_invalidate_range(tlb->mm, tlb->start, tlb->end); #ifdef CONFIG_HAVE_RCU_TABLE_FREE @@ -247,7 +250,7 @@ static void tlb_flush_mmu_free(struct mmu_gather *tlb) { struct mmu_gather_batch *batch; - for (batch = &tlb->local; batch; batch = batch->next) { + for (batch = &tlb->local; batch && batch->nr; batch = batch->next) { free_pages_and_swap_cache(batch->pages, batch->nr); batch->nr = 0; } @@ -256,9 +259,6 @@ static void tlb_flush_mmu_free(struct mmu_gather *tlb) void tlb_flush_mmu(struct mmu_gather *tlb) { - if (!tlb->end) - return; - tlb_flush_mmu_tlbonly(tlb); tlb_flush_mmu_free(tlb); } -- cgit v0.10.2 From f221b04fe07eb56c39935e31bb8e9ddacc00612f Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Tue, 13 Jan 2015 07:40:05 +0000 Subject: x86/xen: properly retrieve NMI reason Using the native code here can't work properly, as the hypervisor would normally have cleared the two reason bits by the time Dom0 gets to see the NMI (if passed to it at all). There's a shared info field for this, and there's an existing hook to use - just fit the two together. This is particularly relevant so that NMIs intended to be handled by APEI / GHES actually make it to the respective handler. Note that the hook can (and should) be used irrespective of whether being in Dom0, as accessing port 0x61 in a DomU would be even worse, while the shared info field would just hold zero all the time. Note further that hardware NMI handling for PVH doesn't currently work anyway due to missing code in the hypervisor (but it is expected to work the native rather than the PV way). Signed-off-by: Jan Beulich Reviewed-by: Boris Ostrovsky Signed-off-by: David Vrabel diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index fac5e4f..1150163 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #include #include @@ -66,6 +67,7 @@ #include #include #include +#include #include #include #include @@ -1357,6 +1359,21 @@ static const struct machine_ops xen_machine_ops __initconst = { .emergency_restart = xen_emergency_restart, }; +static unsigned char xen_get_nmi_reason(void) +{ + unsigned char reason = 0; + + /* Construct a value which looks like it came from port 0x61. */ + if (test_bit(_XEN_NMIREASON_io_error, + &HYPERVISOR_shared_info->arch.nmi_reason)) + reason |= NMI_REASON_IOCHK; + if (test_bit(_XEN_NMIREASON_pci_serr, + &HYPERVISOR_shared_info->arch.nmi_reason)) + reason |= NMI_REASON_SERR; + + return reason; +} + static void __init xen_boot_params_init_edd(void) { #if IS_ENABLED(CONFIG_EDD) @@ -1541,9 +1558,12 @@ asmlinkage __visible void __init xen_start_kernel(void) pv_info = xen_info; pv_init_ops = xen_init_ops; pv_apic_ops = xen_apic_ops; - if (!xen_pvh_domain()) + if (!xen_pvh_domain()) { pv_cpu_ops = xen_cpu_ops; + x86_platform.get_nmi_reason = xen_get_nmi_reason; + } + if (xen_feature(XENFEAT_auto_translated_physmap)) x86_init.resources.memory_setup = xen_auto_xlated_memory_setup; else diff --git a/include/xen/interface/nmi.h b/include/xen/interface/nmi.h new file mode 100644 index 0000000..b47d9d0 --- /dev/null +++ b/include/xen/interface/nmi.h @@ -0,0 +1,51 @@ +/****************************************************************************** + * nmi.h + * + * NMI callback registration and reason codes. + * + * Copyright (c) 2005, Keir Fraser + */ + +#ifndef __XEN_PUBLIC_NMI_H__ +#define __XEN_PUBLIC_NMI_H__ + +#include + +/* + * NMI reason codes: + * Currently these are x86-specific, stored in arch_shared_info.nmi_reason. + */ + /* I/O-check error reported via ISA port 0x61, bit 6. */ +#define _XEN_NMIREASON_io_error 0 +#define XEN_NMIREASON_io_error (1UL << _XEN_NMIREASON_io_error) + /* PCI SERR reported via ISA port 0x61, bit 7. */ +#define _XEN_NMIREASON_pci_serr 1 +#define XEN_NMIREASON_pci_serr (1UL << _XEN_NMIREASON_pci_serr) + /* Unknown hardware-generated NMI. */ +#define _XEN_NMIREASON_unknown 2 +#define XEN_NMIREASON_unknown (1UL << _XEN_NMIREASON_unknown) + +/* + * long nmi_op(unsigned int cmd, void *arg) + * NB. All ops return zero on success, else a negative error code. + */ + +/* + * Register NMI callback for this (calling) VCPU. Currently this only makes + * sense for domain 0, vcpu 0. All other callers will be returned EINVAL. + * arg == pointer to xennmi_callback structure. + */ +#define XENNMI_register_callback 0 +struct xennmi_callback { + unsigned long handler_address; + unsigned long pad; +}; +DEFINE_GUEST_HANDLE_STRUCT(xennmi_callback); + +/* + * Deregister NMI callback for this (calling) VCPU. + * arg == NULL. + */ +#define XENNMI_unregister_callback 1 + +#endif /* __XEN_PUBLIC_NMI_H__ */ -- cgit v0.10.2 From 43239cbe79fc369f5d2160bd7f69e28b5c50a58c Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Tue, 13 Jan 2015 10:46:42 +0100 Subject: kernel: Change ASSIGN_ONCE(val, x) to WRITE_ONCE(x, val) Feedback has shown that WRITE_ONCE(x, val) is easier to use than ASSIGN_ONCE(val,x). There are no in-tree users yet, so lets change it for 3.19. Signed-off-by: Christian Borntraeger Acked-by: Peter Zijlstra Acked-by: Davidlohr Bueso Acked-by: Paul E. McKenney diff --git a/include/linux/compiler.h b/include/linux/compiler.h index a1c81f8..33063f8 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -215,7 +215,7 @@ static __always_inline void __read_once_size(volatile void *p, void *res, int si } } -static __always_inline void __assign_once_size(volatile void *p, void *res, int size) +static __always_inline void __write_once_size(volatile void *p, void *res, int size) { switch (size) { case 1: *(volatile __u8 *)p = *(__u8 *)res; break; @@ -235,15 +235,15 @@ static __always_inline void __assign_once_size(volatile void *p, void *res, int /* * Prevent the compiler from merging or refetching reads or writes. The * compiler is also forbidden from reordering successive instances of - * READ_ONCE, ASSIGN_ONCE and ACCESS_ONCE (see below), but only when the + * READ_ONCE, WRITE_ONCE and ACCESS_ONCE (see below), but only when the * compiler is aware of some particular ordering. One way to make the * compiler aware of ordering is to put the two invocations of READ_ONCE, - * ASSIGN_ONCE or ACCESS_ONCE() in different C statements. + * WRITE_ONCE or ACCESS_ONCE() in different C statements. * * In contrast to ACCESS_ONCE these two macros will also work on aggregate * data types like structs or unions. If the size of the accessed data * type exceeds the word size of the machine (e.g., 32 bits or 64 bits) - * READ_ONCE() and ASSIGN_ONCE() will fall back to memcpy and print a + * READ_ONCE() and WRITE_ONCE() will fall back to memcpy and print a * compile-time warning. * * Their two major use cases are: (1) Mediating communication between @@ -257,8 +257,8 @@ static __always_inline void __assign_once_size(volatile void *p, void *res, int #define READ_ONCE(x) \ ({ typeof(x) __val; __read_once_size(&x, &__val, sizeof(__val)); __val; }) -#define ASSIGN_ONCE(val, x) \ - ({ typeof(x) __val; __val = val; __assign_once_size(&x, &__val, sizeof(__val)); __val; }) +#define WRITE_ONCE(x, val) \ + ({ typeof(x) __val; __val = val; __write_once_size(&x, &__val, sizeof(__val)); __val; }) #endif /* __KERNEL__ */ -- cgit v0.10.2 From 0c86ac2c50647451d6a70dc900f8bb0ee3d485d9 Mon Sep 17 00:00:00 2001 From: Simon Guinot Date: Tue, 2 Dec 2014 07:32:10 -0800 Subject: leds: netxbig: fix oops at probe time This patch fixes a NULL pointer dereference on led_dat->mode_val. Due to this bug, a kernel oops can be observed at probe time on the LaCie 2Big and 5Big v2 boards: Unable to handle kernel NULL pointer dereference at virtual address 00000008 [...] [] (netxbig_led_probe) from [] (platform_drv_probe+0x4c/0x9c) [] (platform_drv_probe) from [] (driver_probe_device+0x98/0x25c) [] (driver_probe_device) from [] (__driver_attach+0x8c/0x90) [] (__driver_attach) from [] (bus_for_each_dev+0x68/0x94) [] (bus_for_each_dev) from [] (bus_add_driver+0x124/0x1dc) [] (bus_add_driver) from [] (driver_register+0x78/0xf8) [] (driver_register) from [] (do_one_initcall+0x80/0x1cc) [] (do_one_initcall) from [] (kernel_init_freeable+0xe4/0x1b4) [] (kernel_init_freeable) from [] (kernel_init+0xc/0xec) [] (kernel_init) from [] (ret_from_fork+0x14/0x24) [...] This bug was introduced by commit 588a6a99286ae30afb1339d8bc2163517b1b7dd1 ("leds: netxbig: fix attribute-creation race"). Signed-off-by: Simon Guinot Cc: # 3.17+ Acked-by: Johan Hovold Signed-off-by: Bryan Wu diff --git a/drivers/leds/leds-netxbig.c b/drivers/leds/leds-netxbig.c index 26515c2..25e4197 100644 --- a/drivers/leds/leds-netxbig.c +++ b/drivers/leds/leds-netxbig.c @@ -330,18 +330,18 @@ create_netxbig_led(struct platform_device *pdev, led_dat->sata = 0; led_dat->cdev.brightness = LED_OFF; led_dat->cdev.flags |= LED_CORE_SUSPENDRESUME; - /* - * If available, expose the SATA activity blink capability through - * a "sata" sysfs attribute. - */ - if (led_dat->mode_val[NETXBIG_LED_SATA] != NETXBIG_LED_INVALID_MODE) - led_dat->cdev.groups = netxbig_led_groups; led_dat->mode_addr = template->mode_addr; led_dat->mode_val = template->mode_val; led_dat->bright_addr = template->bright_addr; led_dat->bright_max = (1 << pdata->gpio_ext->num_data) - 1; led_dat->timer = pdata->timer; led_dat->num_timer = pdata->num_timer; + /* + * If available, expose the SATA activity blink capability through + * a "sata" sysfs attribute. + */ + if (led_dat->mode_val[NETXBIG_LED_SATA] != NETXBIG_LED_INVALID_MODE) + led_dat->cdev.groups = netxbig_led_groups; return led_classdev_register(&pdev->dev, &led_dat->cdev); } -- cgit v0.10.2 From b07ef35244424cbeda9844198607c7077099c82c Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 28 Jan 2015 08:38:20 +0100 Subject: udf: Release preallocation on last writeable close Commit 6fb1ca92a640 "udf: Fix race between write(2) and close(2)" changed the condition when preallocation is released. The idea was that we don't want to release the preallocation for an inode on close when there are other writeable file descriptors for the inode. However the condition was written in the opposite way so we released preallocation only if there were other writeable file descriptors. Fix the problem by changing the condition properly. CC: stable@vger.kernel.org Fixes: 6fb1ca92a6409a9d5b0696447cd4997bc9aaf5a2 Reported-by: Fabian Frederick Signed-off-by: Jan Kara diff --git a/fs/udf/file.c b/fs/udf/file.c index bb15771..08f3555 100644 --- a/fs/udf/file.c +++ b/fs/udf/file.c @@ -224,7 +224,7 @@ out: static int udf_release_file(struct inode *inode, struct file *filp) { if (filp->f_mode & FMODE_WRITE && - atomic_read(&inode->i_writecount) > 1) { + atomic_read(&inode->i_writecount) == 1) { /* * Grab i_mutex to avoid races with writes changing i_size * while we are running. -- cgit v0.10.2 From 14bf61ffe6ac54afcd1e888a4407fe16054483db Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 9 Oct 2014 16:03:13 +0200 Subject: quota: Switch ->get_dqblk() and ->set_dqblk() to use bytes as space units Currently ->get_dqblk() and ->set_dqblk() use struct fs_disk_quota which tracks space limits and usage in 512-byte blocks. However VFS quotas track usage in bytes (as some filesystems require that) and we need to somehow pass this information. Upto now it wasn't a problem because we didn't do any unit conversion (thus VFS quota routines happily stuck number of bytes into d_bcount field of struct fd_disk_quota). Only if you tried to use Q_XGETQUOTA or Q_XSETQLIM for VFS quotas (or Q_GETQUOTA / Q_SETQUOTA for XFS quotas), you got bogus results. Hardly anyone tried this but reportedly some Samba users hit the problem in practice. So when we want interfaces compatible we need to fix this. We bite the bullet and define another quota structure used for passing information from/to ->get_dqblk()/->set_dqblk. It's somewhat sad we have to have more conversion routines in fs/quota/quota.c and another copying of quota structure slows down getting of quota information by about 2% but it seems cleaner than overloading e.g. units of d_bcount to bytes. CC: stable@vger.kernel.org Reviewed-by: Christoph Hellwig Signed-off-by: Jan Kara diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index c8b148b..3e193cb 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -667,7 +667,7 @@ static void do_qc(struct gfs2_quota_data *qd, s64 change) static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc, s64 change, struct gfs2_quota_data *qd, - struct fs_disk_quota *fdq) + struct qc_dqblk *fdq) { struct inode *inode = &ip->i_inode; struct gfs2_sbd *sdp = GFS2_SB(inode); @@ -697,16 +697,16 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc, be64_add_cpu(&q.qu_value, change); qd->qd_qb.qb_value = q.qu_value; if (fdq) { - if (fdq->d_fieldmask & FS_DQ_BSOFT) { - q.qu_warn = cpu_to_be64(fdq->d_blk_softlimit >> sdp->sd_fsb2bb_shift); + if (fdq->d_fieldmask & QC_SPC_SOFT) { + q.qu_warn = cpu_to_be64(fdq->d_spc_softlimit >> sdp->sd_sb.sb_bsize_shift); qd->qd_qb.qb_warn = q.qu_warn; } - if (fdq->d_fieldmask & FS_DQ_BHARD) { - q.qu_limit = cpu_to_be64(fdq->d_blk_hardlimit >> sdp->sd_fsb2bb_shift); + if (fdq->d_fieldmask & QC_SPC_HARD) { + q.qu_limit = cpu_to_be64(fdq->d_spc_hardlimit >> sdp->sd_sb.sb_bsize_shift); qd->qd_qb.qb_limit = q.qu_limit; } - if (fdq->d_fieldmask & FS_DQ_BCOUNT) { - q.qu_value = cpu_to_be64(fdq->d_bcount >> sdp->sd_fsb2bb_shift); + if (fdq->d_fieldmask & QC_SPACE) { + q.qu_value = cpu_to_be64(fdq->d_space >> sdp->sd_sb.sb_bsize_shift); qd->qd_qb.qb_value = q.qu_value; } } @@ -1497,7 +1497,7 @@ static int gfs2_quota_get_xstate(struct super_block *sb, } static int gfs2_get_dqblk(struct super_block *sb, struct kqid qid, - struct fs_disk_quota *fdq) + struct qc_dqblk *fdq) { struct gfs2_sbd *sdp = sb->s_fs_info; struct gfs2_quota_lvb *qlvb; @@ -1505,7 +1505,7 @@ static int gfs2_get_dqblk(struct super_block *sb, struct kqid qid, struct gfs2_holder q_gh; int error; - memset(fdq, 0, sizeof(struct fs_disk_quota)); + memset(fdq, 0, sizeof(*fdq)); if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF) return -ESRCH; /* Crazy XFS error code */ @@ -1522,12 +1522,9 @@ static int gfs2_get_dqblk(struct super_block *sb, struct kqid qid, goto out; qlvb = (struct gfs2_quota_lvb *)qd->qd_gl->gl_lksb.sb_lvbptr; - fdq->d_version = FS_DQUOT_VERSION; - fdq->d_flags = (qid.type == USRQUOTA) ? FS_USER_QUOTA : FS_GROUP_QUOTA; - fdq->d_id = from_kqid_munged(current_user_ns(), qid); - fdq->d_blk_hardlimit = be64_to_cpu(qlvb->qb_limit) << sdp->sd_fsb2bb_shift; - fdq->d_blk_softlimit = be64_to_cpu(qlvb->qb_warn) << sdp->sd_fsb2bb_shift; - fdq->d_bcount = be64_to_cpu(qlvb->qb_value) << sdp->sd_fsb2bb_shift; + fdq->d_spc_hardlimit = be64_to_cpu(qlvb->qb_limit) << sdp->sd_sb.sb_bsize_shift; + fdq->d_spc_softlimit = be64_to_cpu(qlvb->qb_warn) << sdp->sd_sb.sb_bsize_shift; + fdq->d_space = be64_to_cpu(qlvb->qb_value) << sdp->sd_sb.sb_bsize_shift; gfs2_glock_dq_uninit(&q_gh); out: @@ -1536,10 +1533,10 @@ out: } /* GFS2 only supports a subset of the XFS fields */ -#define GFS2_FIELDMASK (FS_DQ_BSOFT|FS_DQ_BHARD|FS_DQ_BCOUNT) +#define GFS2_FIELDMASK (QC_SPC_SOFT|QC_SPC_HARD|QC_SPACE) static int gfs2_set_dqblk(struct super_block *sb, struct kqid qid, - struct fs_disk_quota *fdq) + struct qc_dqblk *fdq) { struct gfs2_sbd *sdp = sb->s_fs_info; struct gfs2_inode *ip = GFS2_I(sdp->sd_quota_inode); @@ -1583,17 +1580,17 @@ static int gfs2_set_dqblk(struct super_block *sb, struct kqid qid, goto out_i; /* If nothing has changed, this is a no-op */ - if ((fdq->d_fieldmask & FS_DQ_BSOFT) && - ((fdq->d_blk_softlimit >> sdp->sd_fsb2bb_shift) == be64_to_cpu(qd->qd_qb.qb_warn))) - fdq->d_fieldmask ^= FS_DQ_BSOFT; + if ((fdq->d_fieldmask & QC_SPC_SOFT) && + ((fdq->d_spc_softlimit >> sdp->sd_sb.sb_bsize_shift) == be64_to_cpu(qd->qd_qb.qb_warn))) + fdq->d_fieldmask ^= QC_SPC_SOFT; - if ((fdq->d_fieldmask & FS_DQ_BHARD) && - ((fdq->d_blk_hardlimit >> sdp->sd_fsb2bb_shift) == be64_to_cpu(qd->qd_qb.qb_limit))) - fdq->d_fieldmask ^= FS_DQ_BHARD; + if ((fdq->d_fieldmask & QC_SPC_HARD) && + ((fdq->d_spc_hardlimit >> sdp->sd_sb.sb_bsize_shift) == be64_to_cpu(qd->qd_qb.qb_limit))) + fdq->d_fieldmask ^= QC_SPC_HARD; - if ((fdq->d_fieldmask & FS_DQ_BCOUNT) && - ((fdq->d_bcount >> sdp->sd_fsb2bb_shift) == be64_to_cpu(qd->qd_qb.qb_value))) - fdq->d_fieldmask ^= FS_DQ_BCOUNT; + if ((fdq->d_fieldmask & QC_SPACE) && + ((fdq->d_space >> sdp->sd_sb.sb_bsize_shift) == be64_to_cpu(qd->qd_qb.qb_value))) + fdq->d_fieldmask ^= QC_SPACE; if (fdq->d_fieldmask == 0) goto out_i; diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 8f0acef..69df5b2 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -2396,30 +2396,25 @@ static inline qsize_t stoqb(qsize_t space) } /* Generic routine for getting common part of quota structure */ -static void do_get_dqblk(struct dquot *dquot, struct fs_disk_quota *di) +static void do_get_dqblk(struct dquot *dquot, struct qc_dqblk *di) { struct mem_dqblk *dm = &dquot->dq_dqb; memset(di, 0, sizeof(*di)); - di->d_version = FS_DQUOT_VERSION; - di->d_flags = dquot->dq_id.type == USRQUOTA ? - FS_USER_QUOTA : FS_GROUP_QUOTA; - di->d_id = from_kqid_munged(current_user_ns(), dquot->dq_id); - spin_lock(&dq_data_lock); - di->d_blk_hardlimit = stoqb(dm->dqb_bhardlimit); - di->d_blk_softlimit = stoqb(dm->dqb_bsoftlimit); + di->d_spc_hardlimit = dm->dqb_bhardlimit; + di->d_spc_softlimit = dm->dqb_bsoftlimit; di->d_ino_hardlimit = dm->dqb_ihardlimit; di->d_ino_softlimit = dm->dqb_isoftlimit; - di->d_bcount = dm->dqb_curspace + dm->dqb_rsvspace; - di->d_icount = dm->dqb_curinodes; - di->d_btimer = dm->dqb_btime; - di->d_itimer = dm->dqb_itime; + di->d_space = dm->dqb_curspace + dm->dqb_rsvspace; + di->d_ino_count = dm->dqb_curinodes; + di->d_spc_timer = dm->dqb_btime; + di->d_ino_timer = dm->dqb_itime; spin_unlock(&dq_data_lock); } int dquot_get_dqblk(struct super_block *sb, struct kqid qid, - struct fs_disk_quota *di) + struct qc_dqblk *di) { struct dquot *dquot; @@ -2433,70 +2428,70 @@ int dquot_get_dqblk(struct super_block *sb, struct kqid qid, } EXPORT_SYMBOL(dquot_get_dqblk); -#define VFS_FS_DQ_MASK \ - (FS_DQ_BCOUNT | FS_DQ_BSOFT | FS_DQ_BHARD | \ - FS_DQ_ICOUNT | FS_DQ_ISOFT | FS_DQ_IHARD | \ - FS_DQ_BTIMER | FS_DQ_ITIMER) +#define VFS_QC_MASK \ + (QC_SPACE | QC_SPC_SOFT | QC_SPC_HARD | \ + QC_INO_COUNT | QC_INO_SOFT | QC_INO_HARD | \ + QC_SPC_TIMER | QC_INO_TIMER) /* Generic routine for setting common part of quota structure */ -static int do_set_dqblk(struct dquot *dquot, struct fs_disk_quota *di) +static int do_set_dqblk(struct dquot *dquot, struct qc_dqblk *di) { struct mem_dqblk *dm = &dquot->dq_dqb; int check_blim = 0, check_ilim = 0; struct mem_dqinfo *dqi = &sb_dqopt(dquot->dq_sb)->info[dquot->dq_id.type]; - if (di->d_fieldmask & ~VFS_FS_DQ_MASK) + if (di->d_fieldmask & ~VFS_QC_MASK) return -EINVAL; - if (((di->d_fieldmask & FS_DQ_BSOFT) && - (di->d_blk_softlimit > dqi->dqi_maxblimit)) || - ((di->d_fieldmask & FS_DQ_BHARD) && - (di->d_blk_hardlimit > dqi->dqi_maxblimit)) || - ((di->d_fieldmask & FS_DQ_ISOFT) && + if (((di->d_fieldmask & QC_SPC_SOFT) && + stoqb(di->d_spc_softlimit) > dqi->dqi_maxblimit) || + ((di->d_fieldmask & QC_SPC_HARD) && + stoqb(di->d_spc_hardlimit) > dqi->dqi_maxblimit) || + ((di->d_fieldmask & QC_INO_SOFT) && (di->d_ino_softlimit > dqi->dqi_maxilimit)) || - ((di->d_fieldmask & FS_DQ_IHARD) && + ((di->d_fieldmask & QC_INO_HARD) && (di->d_ino_hardlimit > dqi->dqi_maxilimit))) return -ERANGE; spin_lock(&dq_data_lock); - if (di->d_fieldmask & FS_DQ_BCOUNT) { - dm->dqb_curspace = di->d_bcount - dm->dqb_rsvspace; + if (di->d_fieldmask & QC_SPACE) { + dm->dqb_curspace = di->d_space - dm->dqb_rsvspace; check_blim = 1; set_bit(DQ_LASTSET_B + QIF_SPACE_B, &dquot->dq_flags); } - if (di->d_fieldmask & FS_DQ_BSOFT) - dm->dqb_bsoftlimit = qbtos(di->d_blk_softlimit); - if (di->d_fieldmask & FS_DQ_BHARD) - dm->dqb_bhardlimit = qbtos(di->d_blk_hardlimit); - if (di->d_fieldmask & (FS_DQ_BSOFT | FS_DQ_BHARD)) { + if (di->d_fieldmask & QC_SPC_SOFT) + dm->dqb_bsoftlimit = di->d_spc_softlimit; + if (di->d_fieldmask & QC_SPC_HARD) + dm->dqb_bhardlimit = di->d_spc_hardlimit; + if (di->d_fieldmask & (QC_SPC_SOFT | QC_SPC_HARD)) { check_blim = 1; set_bit(DQ_LASTSET_B + QIF_BLIMITS_B, &dquot->dq_flags); } - if (di->d_fieldmask & FS_DQ_ICOUNT) { - dm->dqb_curinodes = di->d_icount; + if (di->d_fieldmask & QC_INO_COUNT) { + dm->dqb_curinodes = di->d_ino_count; check_ilim = 1; set_bit(DQ_LASTSET_B + QIF_INODES_B, &dquot->dq_flags); } - if (di->d_fieldmask & FS_DQ_ISOFT) + if (di->d_fieldmask & QC_INO_SOFT) dm->dqb_isoftlimit = di->d_ino_softlimit; - if (di->d_fieldmask & FS_DQ_IHARD) + if (di->d_fieldmask & QC_INO_HARD) dm->dqb_ihardlimit = di->d_ino_hardlimit; - if (di->d_fieldmask & (FS_DQ_ISOFT | FS_DQ_IHARD)) { + if (di->d_fieldmask & (QC_INO_SOFT | QC_INO_HARD)) { check_ilim = 1; set_bit(DQ_LASTSET_B + QIF_ILIMITS_B, &dquot->dq_flags); } - if (di->d_fieldmask & FS_DQ_BTIMER) { - dm->dqb_btime = di->d_btimer; + if (di->d_fieldmask & QC_SPC_TIMER) { + dm->dqb_btime = di->d_spc_timer; check_blim = 1; set_bit(DQ_LASTSET_B + QIF_BTIME_B, &dquot->dq_flags); } - if (di->d_fieldmask & FS_DQ_ITIMER) { - dm->dqb_itime = di->d_itimer; + if (di->d_fieldmask & QC_INO_TIMER) { + dm->dqb_itime = di->d_ino_timer; check_ilim = 1; set_bit(DQ_LASTSET_B + QIF_ITIME_B, &dquot->dq_flags); } @@ -2506,7 +2501,7 @@ static int do_set_dqblk(struct dquot *dquot, struct fs_disk_quota *di) dm->dqb_curspace < dm->dqb_bsoftlimit) { dm->dqb_btime = 0; clear_bit(DQ_BLKS_B, &dquot->dq_flags); - } else if (!(di->d_fieldmask & FS_DQ_BTIMER)) + } else if (!(di->d_fieldmask & QC_SPC_TIMER)) /* Set grace only if user hasn't provided his own... */ dm->dqb_btime = get_seconds() + dqi->dqi_bgrace; } @@ -2515,7 +2510,7 @@ static int do_set_dqblk(struct dquot *dquot, struct fs_disk_quota *di) dm->dqb_curinodes < dm->dqb_isoftlimit) { dm->dqb_itime = 0; clear_bit(DQ_INODES_B, &dquot->dq_flags); - } else if (!(di->d_fieldmask & FS_DQ_ITIMER)) + } else if (!(di->d_fieldmask & QC_INO_TIMER)) /* Set grace only if user hasn't provided his own... */ dm->dqb_itime = get_seconds() + dqi->dqi_igrace; } @@ -2531,7 +2526,7 @@ static int do_set_dqblk(struct dquot *dquot, struct fs_disk_quota *di) } int dquot_set_dqblk(struct super_block *sb, struct kqid qid, - struct fs_disk_quota *di) + struct qc_dqblk *di) { struct dquot *dquot; int rc; diff --git a/fs/quota/quota.c b/fs/quota/quota.c index 2aa4151..6f38563 100644 --- a/fs/quota/quota.c +++ b/fs/quota/quota.c @@ -118,17 +118,27 @@ static int quota_setinfo(struct super_block *sb, int type, void __user *addr) return sb->s_qcop->set_info(sb, type, &info); } -static void copy_to_if_dqblk(struct if_dqblk *dst, struct fs_disk_quota *src) +static inline qsize_t qbtos(qsize_t blocks) +{ + return blocks << QIF_DQBLKSIZE_BITS; +} + +static inline qsize_t stoqb(qsize_t space) +{ + return (space + QIF_DQBLKSIZE - 1) >> QIF_DQBLKSIZE_BITS; +} + +static void copy_to_if_dqblk(struct if_dqblk *dst, struct qc_dqblk *src) { memset(dst, 0, sizeof(*dst)); - dst->dqb_bhardlimit = src->d_blk_hardlimit; - dst->dqb_bsoftlimit = src->d_blk_softlimit; - dst->dqb_curspace = src->d_bcount; + dst->dqb_bhardlimit = stoqb(src->d_spc_hardlimit); + dst->dqb_bsoftlimit = stoqb(src->d_spc_softlimit); + dst->dqb_curspace = src->d_space; dst->dqb_ihardlimit = src->d_ino_hardlimit; dst->dqb_isoftlimit = src->d_ino_softlimit; - dst->dqb_curinodes = src->d_icount; - dst->dqb_btime = src->d_btimer; - dst->dqb_itime = src->d_itimer; + dst->dqb_curinodes = src->d_ino_count; + dst->dqb_btime = src->d_spc_timer; + dst->dqb_itime = src->d_ino_timer; dst->dqb_valid = QIF_ALL; } @@ -136,7 +146,7 @@ static int quota_getquota(struct super_block *sb, int type, qid_t id, void __user *addr) { struct kqid qid; - struct fs_disk_quota fdq; + struct qc_dqblk fdq; struct if_dqblk idq; int ret; @@ -154,36 +164,36 @@ static int quota_getquota(struct super_block *sb, int type, qid_t id, return 0; } -static void copy_from_if_dqblk(struct fs_disk_quota *dst, struct if_dqblk *src) +static void copy_from_if_dqblk(struct qc_dqblk *dst, struct if_dqblk *src) { - dst->d_blk_hardlimit = src->dqb_bhardlimit; - dst->d_blk_softlimit = src->dqb_bsoftlimit; - dst->d_bcount = src->dqb_curspace; + dst->d_spc_hardlimit = qbtos(src->dqb_bhardlimit); + dst->d_spc_softlimit = qbtos(src->dqb_bsoftlimit); + dst->d_space = src->dqb_curspace; dst->d_ino_hardlimit = src->dqb_ihardlimit; dst->d_ino_softlimit = src->dqb_isoftlimit; - dst->d_icount = src->dqb_curinodes; - dst->d_btimer = src->dqb_btime; - dst->d_itimer = src->dqb_itime; + dst->d_ino_count = src->dqb_curinodes; + dst->d_spc_timer = src->dqb_btime; + dst->d_ino_timer = src->dqb_itime; dst->d_fieldmask = 0; if (src->dqb_valid & QIF_BLIMITS) - dst->d_fieldmask |= FS_DQ_BSOFT | FS_DQ_BHARD; + dst->d_fieldmask |= QC_SPC_SOFT | QC_SPC_HARD; if (src->dqb_valid & QIF_SPACE) - dst->d_fieldmask |= FS_DQ_BCOUNT; + dst->d_fieldmask |= QC_SPACE; if (src->dqb_valid & QIF_ILIMITS) - dst->d_fieldmask |= FS_DQ_ISOFT | FS_DQ_IHARD; + dst->d_fieldmask |= QC_INO_SOFT | QC_INO_HARD; if (src->dqb_valid & QIF_INODES) - dst->d_fieldmask |= FS_DQ_ICOUNT; + dst->d_fieldmask |= QC_INO_COUNT; if (src->dqb_valid & QIF_BTIME) - dst->d_fieldmask |= FS_DQ_BTIMER; + dst->d_fieldmask |= QC_SPC_TIMER; if (src->dqb_valid & QIF_ITIME) - dst->d_fieldmask |= FS_DQ_ITIMER; + dst->d_fieldmask |= QC_INO_TIMER; } static int quota_setquota(struct super_block *sb, int type, qid_t id, void __user *addr) { - struct fs_disk_quota fdq; + struct qc_dqblk fdq; struct if_dqblk idq; struct kqid qid; @@ -247,10 +257,78 @@ static int quota_getxstatev(struct super_block *sb, void __user *addr) return ret; } +/* + * XFS defines BBTOB and BTOBB macros inside fs/xfs/ and we cannot move them + * out of there as xfsprogs rely on definitions being in that header file. So + * just define same functions here for quota purposes. + */ +#define XFS_BB_SHIFT 9 + +static inline u64 quota_bbtob(u64 blocks) +{ + return blocks << XFS_BB_SHIFT; +} + +static inline u64 quota_btobb(u64 bytes) +{ + return (bytes + (1 << XFS_BB_SHIFT) - 1) >> XFS_BB_SHIFT; +} + +static void copy_from_xfs_dqblk(struct qc_dqblk *dst, struct fs_disk_quota *src) +{ + dst->d_spc_hardlimit = quota_bbtob(src->d_blk_hardlimit); + dst->d_spc_softlimit = quota_bbtob(src->d_blk_softlimit); + dst->d_ino_hardlimit = src->d_ino_hardlimit; + dst->d_ino_softlimit = src->d_ino_softlimit; + dst->d_space = quota_bbtob(src->d_bcount); + dst->d_ino_count = src->d_icount; + dst->d_ino_timer = src->d_itimer; + dst->d_spc_timer = src->d_btimer; + dst->d_ino_warns = src->d_iwarns; + dst->d_spc_warns = src->d_bwarns; + dst->d_rt_spc_hardlimit = quota_bbtob(src->d_rtb_hardlimit); + dst->d_rt_spc_softlimit = quota_bbtob(src->d_rtb_softlimit); + dst->d_rt_space = quota_bbtob(src->d_rtbcount); + dst->d_rt_spc_timer = src->d_rtbtimer; + dst->d_rt_spc_warns = src->d_rtbwarns; + dst->d_fieldmask = 0; + if (src->d_fieldmask & FS_DQ_ISOFT) + dst->d_fieldmask |= QC_INO_SOFT; + if (src->d_fieldmask & FS_DQ_IHARD) + dst->d_fieldmask |= QC_INO_HARD; + if (src->d_fieldmask & FS_DQ_BSOFT) + dst->d_fieldmask |= QC_SPC_SOFT; + if (src->d_fieldmask & FS_DQ_BHARD) + dst->d_fieldmask |= QC_SPC_HARD; + if (src->d_fieldmask & FS_DQ_RTBSOFT) + dst->d_fieldmask |= QC_RT_SPC_SOFT; + if (src->d_fieldmask & FS_DQ_RTBHARD) + dst->d_fieldmask |= QC_RT_SPC_HARD; + if (src->d_fieldmask & FS_DQ_BTIMER) + dst->d_fieldmask |= QC_SPC_TIMER; + if (src->d_fieldmask & FS_DQ_ITIMER) + dst->d_fieldmask |= QC_INO_TIMER; + if (src->d_fieldmask & FS_DQ_RTBTIMER) + dst->d_fieldmask |= QC_RT_SPC_TIMER; + if (src->d_fieldmask & FS_DQ_BWARNS) + dst->d_fieldmask |= QC_SPC_WARNS; + if (src->d_fieldmask & FS_DQ_IWARNS) + dst->d_fieldmask |= QC_INO_WARNS; + if (src->d_fieldmask & FS_DQ_RTBWARNS) + dst->d_fieldmask |= QC_RT_SPC_WARNS; + if (src->d_fieldmask & FS_DQ_BCOUNT) + dst->d_fieldmask |= QC_SPACE; + if (src->d_fieldmask & FS_DQ_ICOUNT) + dst->d_fieldmask |= QC_INO_COUNT; + if (src->d_fieldmask & FS_DQ_RTBCOUNT) + dst->d_fieldmask |= QC_RT_SPACE; +} + static int quota_setxquota(struct super_block *sb, int type, qid_t id, void __user *addr) { struct fs_disk_quota fdq; + struct qc_dqblk qdq; struct kqid qid; if (copy_from_user(&fdq, addr, sizeof(fdq))) @@ -260,13 +338,44 @@ static int quota_setxquota(struct super_block *sb, int type, qid_t id, qid = make_kqid(current_user_ns(), type, id); if (!qid_valid(qid)) return -EINVAL; - return sb->s_qcop->set_dqblk(sb, qid, &fdq); + copy_from_xfs_dqblk(&qdq, &fdq); + return sb->s_qcop->set_dqblk(sb, qid, &qdq); +} + +static void copy_to_xfs_dqblk(struct fs_disk_quota *dst, struct qc_dqblk *src, + int type, qid_t id) +{ + memset(dst, 0, sizeof(*dst)); + dst->d_version = FS_DQUOT_VERSION; + dst->d_id = id; + if (type == USRQUOTA) + dst->d_flags = FS_USER_QUOTA; + else if (type == PRJQUOTA) + dst->d_flags = FS_PROJ_QUOTA; + else + dst->d_flags = FS_GROUP_QUOTA; + dst->d_blk_hardlimit = quota_btobb(src->d_spc_hardlimit); + dst->d_blk_softlimit = quota_btobb(src->d_spc_softlimit); + dst->d_ino_hardlimit = src->d_ino_hardlimit; + dst->d_ino_softlimit = src->d_ino_softlimit; + dst->d_bcount = quota_btobb(src->d_space); + dst->d_icount = src->d_ino_count; + dst->d_itimer = src->d_ino_timer; + dst->d_btimer = src->d_spc_timer; + dst->d_iwarns = src->d_ino_warns; + dst->d_bwarns = src->d_spc_warns; + dst->d_rtb_hardlimit = quota_btobb(src->d_rt_spc_hardlimit); + dst->d_rtb_softlimit = quota_btobb(src->d_rt_spc_softlimit); + dst->d_rtbcount = quota_btobb(src->d_rt_space); + dst->d_rtbtimer = src->d_rt_spc_timer; + dst->d_rtbwarns = src->d_rt_spc_warns; } static int quota_getxquota(struct super_block *sb, int type, qid_t id, void __user *addr) { struct fs_disk_quota fdq; + struct qc_dqblk qdq; struct kqid qid; int ret; @@ -275,8 +384,11 @@ static int quota_getxquota(struct super_block *sb, int type, qid_t id, qid = make_kqid(current_user_ns(), type, id); if (!qid_valid(qid)) return -EINVAL; - ret = sb->s_qcop->get_dqblk(sb, qid, &fdq); - if (!ret && copy_to_user(addr, &fdq, sizeof(fdq))) + ret = sb->s_qcop->get_dqblk(sb, qid, &qdq); + if (ret) + return ret; + copy_to_xfs_dqblk(&fdq, &qdq, type, id); + if (copy_to_user(addr, &fdq, sizeof(fdq))) return -EFAULT; return ret; } diff --git a/fs/xfs/xfs_qm.h b/fs/xfs/xfs_qm.h index 3a07a93..41f6c0b 100644 --- a/fs/xfs/xfs_qm.h +++ b/fs/xfs/xfs_qm.h @@ -166,9 +166,9 @@ extern void xfs_qm_dqrele_all_inodes(struct xfs_mount *, uint); /* quota ops */ extern int xfs_qm_scall_trunc_qfiles(struct xfs_mount *, uint); extern int xfs_qm_scall_getquota(struct xfs_mount *, xfs_dqid_t, - uint, struct fs_disk_quota *); + uint, struct qc_dqblk *); extern int xfs_qm_scall_setqlim(struct xfs_mount *, xfs_dqid_t, uint, - struct fs_disk_quota *); + struct qc_dqblk *); extern int xfs_qm_scall_getqstat(struct xfs_mount *, struct fs_quota_stat *); extern int xfs_qm_scall_getqstatv(struct xfs_mount *, diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c index 74fca68..cb6168e 100644 --- a/fs/xfs/xfs_qm_syscalls.c +++ b/fs/xfs/xfs_qm_syscalls.c @@ -39,7 +39,6 @@ STATIC int xfs_qm_log_quotaoff(xfs_mount_t *, xfs_qoff_logitem_t **, uint); STATIC int xfs_qm_log_quotaoff_end(xfs_mount_t *, xfs_qoff_logitem_t *, uint); STATIC uint xfs_qm_export_flags(uint); -STATIC uint xfs_qm_export_qtype_flags(uint); /* * Turn off quota accounting and/or enforcement for all udquots and/or @@ -573,8 +572,8 @@ xfs_qm_scall_getqstatv( return 0; } -#define XFS_DQ_MASK \ - (FS_DQ_LIMIT_MASK | FS_DQ_TIMER_MASK | FS_DQ_WARNS_MASK) +#define XFS_QC_MASK \ + (QC_LIMIT_MASK | QC_TIMER_MASK | QC_WARNS_MASK) /* * Adjust quota limits, and start/stop timers accordingly. @@ -584,7 +583,7 @@ xfs_qm_scall_setqlim( struct xfs_mount *mp, xfs_dqid_t id, uint type, - fs_disk_quota_t *newlim) + struct qc_dqblk *newlim) { struct xfs_quotainfo *q = mp->m_quotainfo; struct xfs_disk_dquot *ddq; @@ -593,9 +592,9 @@ xfs_qm_scall_setqlim( int error; xfs_qcnt_t hard, soft; - if (newlim->d_fieldmask & ~XFS_DQ_MASK) + if (newlim->d_fieldmask & ~XFS_QC_MASK) return -EINVAL; - if ((newlim->d_fieldmask & XFS_DQ_MASK) == 0) + if ((newlim->d_fieldmask & XFS_QC_MASK) == 0) return 0; /* @@ -633,11 +632,11 @@ xfs_qm_scall_setqlim( /* * Make sure that hardlimits are >= soft limits before changing. */ - hard = (newlim->d_fieldmask & FS_DQ_BHARD) ? - (xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_blk_hardlimit) : + hard = (newlim->d_fieldmask & QC_SPC_HARD) ? + (xfs_qcnt_t) XFS_B_TO_FSB(mp, newlim->d_spc_hardlimit) : be64_to_cpu(ddq->d_blk_hardlimit); - soft = (newlim->d_fieldmask & FS_DQ_BSOFT) ? - (xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_blk_softlimit) : + soft = (newlim->d_fieldmask & QC_SPC_SOFT) ? + (xfs_qcnt_t) XFS_B_TO_FSB(mp, newlim->d_spc_softlimit) : be64_to_cpu(ddq->d_blk_softlimit); if (hard == 0 || hard >= soft) { ddq->d_blk_hardlimit = cpu_to_be64(hard); @@ -650,11 +649,11 @@ xfs_qm_scall_setqlim( } else { xfs_debug(mp, "blkhard %Ld < blksoft %Ld", hard, soft); } - hard = (newlim->d_fieldmask & FS_DQ_RTBHARD) ? - (xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_rtb_hardlimit) : + hard = (newlim->d_fieldmask & QC_RT_SPC_HARD) ? + (xfs_qcnt_t) XFS_B_TO_FSB(mp, newlim->d_rt_spc_hardlimit) : be64_to_cpu(ddq->d_rtb_hardlimit); - soft = (newlim->d_fieldmask & FS_DQ_RTBSOFT) ? - (xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_rtb_softlimit) : + soft = (newlim->d_fieldmask & QC_RT_SPC_SOFT) ? + (xfs_qcnt_t) XFS_B_TO_FSB(mp, newlim->d_rt_spc_softlimit) : be64_to_cpu(ddq->d_rtb_softlimit); if (hard == 0 || hard >= soft) { ddq->d_rtb_hardlimit = cpu_to_be64(hard); @@ -667,10 +666,10 @@ xfs_qm_scall_setqlim( xfs_debug(mp, "rtbhard %Ld < rtbsoft %Ld", hard, soft); } - hard = (newlim->d_fieldmask & FS_DQ_IHARD) ? + hard = (newlim->d_fieldmask & QC_INO_HARD) ? (xfs_qcnt_t) newlim->d_ino_hardlimit : be64_to_cpu(ddq->d_ino_hardlimit); - soft = (newlim->d_fieldmask & FS_DQ_ISOFT) ? + soft = (newlim->d_fieldmask & QC_INO_SOFT) ? (xfs_qcnt_t) newlim->d_ino_softlimit : be64_to_cpu(ddq->d_ino_softlimit); if (hard == 0 || hard >= soft) { @@ -687,12 +686,12 @@ xfs_qm_scall_setqlim( /* * Update warnings counter(s) if requested */ - if (newlim->d_fieldmask & FS_DQ_BWARNS) - ddq->d_bwarns = cpu_to_be16(newlim->d_bwarns); - if (newlim->d_fieldmask & FS_DQ_IWARNS) - ddq->d_iwarns = cpu_to_be16(newlim->d_iwarns); - if (newlim->d_fieldmask & FS_DQ_RTBWARNS) - ddq->d_rtbwarns = cpu_to_be16(newlim->d_rtbwarns); + if (newlim->d_fieldmask & QC_SPC_WARNS) + ddq->d_bwarns = cpu_to_be16(newlim->d_spc_warns); + if (newlim->d_fieldmask & QC_INO_WARNS) + ddq->d_iwarns = cpu_to_be16(newlim->d_ino_warns); + if (newlim->d_fieldmask & QC_RT_SPC_WARNS) + ddq->d_rtbwarns = cpu_to_be16(newlim->d_rt_spc_warns); if (id == 0) { /* @@ -702,24 +701,24 @@ xfs_qm_scall_setqlim( * soft and hard limit values (already done, above), and * for warnings. */ - if (newlim->d_fieldmask & FS_DQ_BTIMER) { - q->qi_btimelimit = newlim->d_btimer; - ddq->d_btimer = cpu_to_be32(newlim->d_btimer); + if (newlim->d_fieldmask & QC_SPC_TIMER) { + q->qi_btimelimit = newlim->d_spc_timer; + ddq->d_btimer = cpu_to_be32(newlim->d_spc_timer); } - if (newlim->d_fieldmask & FS_DQ_ITIMER) { - q->qi_itimelimit = newlim->d_itimer; - ddq->d_itimer = cpu_to_be32(newlim->d_itimer); + if (newlim->d_fieldmask & QC_INO_TIMER) { + q->qi_itimelimit = newlim->d_ino_timer; + ddq->d_itimer = cpu_to_be32(newlim->d_ino_timer); } - if (newlim->d_fieldmask & FS_DQ_RTBTIMER) { - q->qi_rtbtimelimit = newlim->d_rtbtimer; - ddq->d_rtbtimer = cpu_to_be32(newlim->d_rtbtimer); + if (newlim->d_fieldmask & QC_RT_SPC_TIMER) { + q->qi_rtbtimelimit = newlim->d_rt_spc_timer; + ddq->d_rtbtimer = cpu_to_be32(newlim->d_rt_spc_timer); } - if (newlim->d_fieldmask & FS_DQ_BWARNS) - q->qi_bwarnlimit = newlim->d_bwarns; - if (newlim->d_fieldmask & FS_DQ_IWARNS) - q->qi_iwarnlimit = newlim->d_iwarns; - if (newlim->d_fieldmask & FS_DQ_RTBWARNS) - q->qi_rtbwarnlimit = newlim->d_rtbwarns; + if (newlim->d_fieldmask & QC_SPC_WARNS) + q->qi_bwarnlimit = newlim->d_spc_warns; + if (newlim->d_fieldmask & QC_INO_WARNS) + q->qi_iwarnlimit = newlim->d_ino_warns; + if (newlim->d_fieldmask & QC_RT_SPC_WARNS) + q->qi_rtbwarnlimit = newlim->d_rt_spc_warns; } else { /* * If the user is now over quota, start the timelimit. @@ -824,7 +823,7 @@ xfs_qm_scall_getquota( struct xfs_mount *mp, xfs_dqid_t id, uint type, - struct fs_disk_quota *dst) + struct qc_dqblk *dst) { struct xfs_dquot *dqp; int error; @@ -848,28 +847,25 @@ xfs_qm_scall_getquota( } memset(dst, 0, sizeof(*dst)); - dst->d_version = FS_DQUOT_VERSION; - dst->d_flags = xfs_qm_export_qtype_flags(dqp->q_core.d_flags); - dst->d_id = be32_to_cpu(dqp->q_core.d_id); - dst->d_blk_hardlimit = - XFS_FSB_TO_BB(mp, be64_to_cpu(dqp->q_core.d_blk_hardlimit)); - dst->d_blk_softlimit = - XFS_FSB_TO_BB(mp, be64_to_cpu(dqp->q_core.d_blk_softlimit)); + dst->d_spc_hardlimit = + XFS_FSB_TO_B(mp, be64_to_cpu(dqp->q_core.d_blk_hardlimit)); + dst->d_spc_softlimit = + XFS_FSB_TO_B(mp, be64_to_cpu(dqp->q_core.d_blk_softlimit)); dst->d_ino_hardlimit = be64_to_cpu(dqp->q_core.d_ino_hardlimit); dst->d_ino_softlimit = be64_to_cpu(dqp->q_core.d_ino_softlimit); - dst->d_bcount = XFS_FSB_TO_BB(mp, dqp->q_res_bcount); - dst->d_icount = dqp->q_res_icount; - dst->d_btimer = be32_to_cpu(dqp->q_core.d_btimer); - dst->d_itimer = be32_to_cpu(dqp->q_core.d_itimer); - dst->d_iwarns = be16_to_cpu(dqp->q_core.d_iwarns); - dst->d_bwarns = be16_to_cpu(dqp->q_core.d_bwarns); - dst->d_rtb_hardlimit = - XFS_FSB_TO_BB(mp, be64_to_cpu(dqp->q_core.d_rtb_hardlimit)); - dst->d_rtb_softlimit = - XFS_FSB_TO_BB(mp, be64_to_cpu(dqp->q_core.d_rtb_softlimit)); - dst->d_rtbcount = XFS_FSB_TO_BB(mp, dqp->q_res_rtbcount); - dst->d_rtbtimer = be32_to_cpu(dqp->q_core.d_rtbtimer); - dst->d_rtbwarns = be16_to_cpu(dqp->q_core.d_rtbwarns); + dst->d_space = XFS_FSB_TO_B(mp, dqp->q_res_bcount); + dst->d_ino_count = dqp->q_res_icount; + dst->d_spc_timer = be32_to_cpu(dqp->q_core.d_btimer); + dst->d_ino_timer = be32_to_cpu(dqp->q_core.d_itimer); + dst->d_ino_warns = be16_to_cpu(dqp->q_core.d_iwarns); + dst->d_spc_warns = be16_to_cpu(dqp->q_core.d_bwarns); + dst->d_rt_spc_hardlimit = + XFS_FSB_TO_B(mp, be64_to_cpu(dqp->q_core.d_rtb_hardlimit)); + dst->d_rt_spc_softlimit = + XFS_FSB_TO_B(mp, be64_to_cpu(dqp->q_core.d_rtb_softlimit)); + dst->d_rt_space = XFS_FSB_TO_B(mp, dqp->q_res_rtbcount); + dst->d_rt_spc_timer = be32_to_cpu(dqp->q_core.d_rtbtimer); + dst->d_rt_spc_warns = be16_to_cpu(dqp->q_core.d_rtbwarns); /* * Internally, we don't reset all the timers when quota enforcement @@ -882,23 +878,23 @@ xfs_qm_scall_getquota( dqp->q_core.d_flags == XFS_DQ_GROUP) || (!XFS_IS_PQUOTA_ENFORCED(mp) && dqp->q_core.d_flags == XFS_DQ_PROJ)) { - dst->d_btimer = 0; - dst->d_itimer = 0; - dst->d_rtbtimer = 0; + dst->d_spc_timer = 0; + dst->d_ino_timer = 0; + dst->d_rt_spc_timer = 0; } #ifdef DEBUG - if (((XFS_IS_UQUOTA_ENFORCED(mp) && dst->d_flags == FS_USER_QUOTA) || - (XFS_IS_GQUOTA_ENFORCED(mp) && dst->d_flags == FS_GROUP_QUOTA) || - (XFS_IS_PQUOTA_ENFORCED(mp) && dst->d_flags == FS_PROJ_QUOTA)) && - dst->d_id != 0) { - if ((dst->d_bcount > dst->d_blk_softlimit) && - (dst->d_blk_softlimit > 0)) { - ASSERT(dst->d_btimer != 0); + if (((XFS_IS_UQUOTA_ENFORCED(mp) && type == XFS_DQ_USER) || + (XFS_IS_GQUOTA_ENFORCED(mp) && type == XFS_DQ_GROUP) || + (XFS_IS_PQUOTA_ENFORCED(mp) && type == XFS_DQ_PROJ)) && + id != 0) { + if ((dst->d_space > dst->d_spc_softlimit) && + (dst->d_spc_softlimit > 0)) { + ASSERT(dst->d_spc_timer != 0); } - if ((dst->d_icount > dst->d_ino_softlimit) && + if ((dst->d_ino_count > dst->d_ino_softlimit) && (dst->d_ino_softlimit > 0)) { - ASSERT(dst->d_itimer != 0); + ASSERT(dst->d_ino_timer != 0); } } #endif @@ -908,26 +904,6 @@ out_put: } STATIC uint -xfs_qm_export_qtype_flags( - uint flags) -{ - /* - * Can't be more than one, or none. - */ - ASSERT((flags & (FS_PROJ_QUOTA | FS_USER_QUOTA)) != - (FS_PROJ_QUOTA | FS_USER_QUOTA)); - ASSERT((flags & (FS_PROJ_QUOTA | FS_GROUP_QUOTA)) != - (FS_PROJ_QUOTA | FS_GROUP_QUOTA)); - ASSERT((flags & (FS_USER_QUOTA | FS_GROUP_QUOTA)) != - (FS_USER_QUOTA | FS_GROUP_QUOTA)); - ASSERT((flags & (FS_PROJ_QUOTA|FS_USER_QUOTA|FS_GROUP_QUOTA)) != 0); - - return (flags & XFS_DQ_USER) ? - FS_USER_QUOTA : (flags & XFS_DQ_PROJ) ? - FS_PROJ_QUOTA : FS_GROUP_QUOTA; -} - -STATIC uint xfs_qm_export_flags( uint flags) { diff --git a/fs/xfs/xfs_quotaops.c b/fs/xfs/xfs_quotaops.c index 7542bbe..801a84c 100644 --- a/fs/xfs/xfs_quotaops.c +++ b/fs/xfs/xfs_quotaops.c @@ -131,7 +131,7 @@ STATIC int xfs_fs_get_dqblk( struct super_block *sb, struct kqid qid, - struct fs_disk_quota *fdq) + struct qc_dqblk *qdq) { struct xfs_mount *mp = XFS_M(sb); @@ -141,14 +141,14 @@ xfs_fs_get_dqblk( return -ESRCH; return xfs_qm_scall_getquota(mp, from_kqid(&init_user_ns, qid), - xfs_quota_type(qid.type), fdq); + xfs_quota_type(qid.type), qdq); } STATIC int xfs_fs_set_dqblk( struct super_block *sb, struct kqid qid, - struct fs_disk_quota *fdq) + struct qc_dqblk *qdq) { struct xfs_mount *mp = XFS_M(sb); @@ -160,7 +160,7 @@ xfs_fs_set_dqblk( return -ESRCH; return xfs_qm_scall_setqlim(mp, from_kqid(&init_user_ns, qid), - xfs_quota_type(qid.type), fdq); + xfs_quota_type(qid.type), qdq); } const struct quotactl_ops xfs_quotactl_operations = { diff --git a/include/linux/quota.h b/include/linux/quota.h index 50978b7..097d7eb2 100644 --- a/include/linux/quota.h +++ b/include/linux/quota.h @@ -321,6 +321,49 @@ struct dquot_operations { struct path; +/* Structure for communicating via ->get_dqblk() & ->set_dqblk() */ +struct qc_dqblk { + int d_fieldmask; /* mask of fields to change in ->set_dqblk() */ + u64 d_spc_hardlimit; /* absolute limit on used space */ + u64 d_spc_softlimit; /* preferred limit on used space */ + u64 d_ino_hardlimit; /* maximum # allocated inodes */ + u64 d_ino_softlimit; /* preferred inode limit */ + u64 d_space; /* Space owned by the user */ + u64 d_ino_count; /* # inodes owned by the user */ + s64 d_ino_timer; /* zero if within inode limits */ + /* if not, we refuse service */ + s64 d_spc_timer; /* similar to above; for space */ + int d_ino_warns; /* # warnings issued wrt num inodes */ + int d_spc_warns; /* # warnings issued wrt used space */ + u64 d_rt_spc_hardlimit; /* absolute limit on realtime space */ + u64 d_rt_spc_softlimit; /* preferred limit on RT space */ + u64 d_rt_space; /* realtime space owned */ + s64 d_rt_spc_timer; /* similar to above; for RT space */ + int d_rt_spc_warns; /* # warnings issued wrt RT space */ +}; + +/* Field specifiers for ->set_dqblk() in struct qc_dqblk */ +#define QC_INO_SOFT (1<<0) +#define QC_INO_HARD (1<<1) +#define QC_SPC_SOFT (1<<2) +#define QC_SPC_HARD (1<<3) +#define QC_RT_SPC_SOFT (1<<4) +#define QC_RT_SPC_HARD (1<<5) +#define QC_LIMIT_MASK (QC_INO_SOFT | QC_INO_HARD | QC_SPC_SOFT | QC_SPC_HARD | \ + QC_RT_SPC_SOFT | QC_RT_SPC_HARD) +#define QC_SPC_TIMER (1<<6) +#define QC_INO_TIMER (1<<7) +#define QC_RT_SPC_TIMER (1<<8) +#define QC_TIMER_MASK (QC_SPC_TIMER | QC_INO_TIMER | QC_RT_SPC_TIMER) +#define QC_SPC_WARNS (1<<9) +#define QC_INO_WARNS (1<<10) +#define QC_RT_SPC_WARNS (1<<11) +#define QC_WARNS_MASK (QC_SPC_WARNS | QC_INO_WARNS | QC_RT_SPC_WARNS) +#define QC_SPACE (1<<12) +#define QC_INO_COUNT (1<<13) +#define QC_RT_SPACE (1<<14) +#define QC_ACCT_MASK (QC_SPACE | QC_INO_COUNT | QC_RT_SPACE) + /* Operations handling requests from userspace */ struct quotactl_ops { int (*quota_on)(struct super_block *, int, int, struct path *); @@ -329,8 +372,8 @@ struct quotactl_ops { int (*quota_sync)(struct super_block *, int); int (*get_info)(struct super_block *, int, struct if_dqinfo *); int (*set_info)(struct super_block *, int, struct if_dqinfo *); - int (*get_dqblk)(struct super_block *, struct kqid, struct fs_disk_quota *); - int (*set_dqblk)(struct super_block *, struct kqid, struct fs_disk_quota *); + int (*get_dqblk)(struct super_block *, struct kqid, struct qc_dqblk *); + int (*set_dqblk)(struct super_block *, struct kqid, struct qc_dqblk *); int (*get_xstate)(struct super_block *, struct fs_quota_stat *); int (*set_xstate)(struct super_block *, unsigned int, int); int (*get_xstatev)(struct super_block *, struct fs_quota_statv *); diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h index f23538a..29e3455 100644 --- a/include/linux/quotaops.h +++ b/include/linux/quotaops.h @@ -98,9 +98,9 @@ int dquot_quota_sync(struct super_block *sb, int type); int dquot_get_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii); int dquot_set_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii); int dquot_get_dqblk(struct super_block *sb, struct kqid id, - struct fs_disk_quota *di); + struct qc_dqblk *di); int dquot_set_dqblk(struct super_block *sb, struct kqid id, - struct fs_disk_quota *di); + struct qc_dqblk *di); int __dquot_transfer(struct inode *inode, struct dquot **transfer_to); int dquot_transfer(struct inode *inode, struct iattr *iattr); -- cgit v0.10.2