From 6dda9266913ad57e09afc1a10d6473f10c806a63 Mon Sep 17 00:00:00 2001 From: "Luck, Tony" Date: Thu, 11 Aug 2011 15:14:39 -0700 Subject: pstore: defer inserting OOPS entries into pstore Life is simple for all the kernel terminating types of kmsg_dump call backs - pstore just saves the tail end of the console log. But for "oops" the situation is more complex - the kernel may carry on running (possibly for ever). So we'd like to make the logged copy of the oops appear in the pstore filesystem - so that the user has a handle to clear the entry from the persistent backing store (if we don't, the store may fill with "oops" entries (that are also safely stashed in /var/log/messages) leaving no space for real errors. Current code calls pstore_mkfile() immediately. But this may not be safe. The oops could have happened with arbitrary locks held, or in interrupt or NMI context. So allocating memory and calling into generic filesystem code seems unwise. This patch defers making the entry appear. At the time of the oops, we merely set a flag "pstore_new_entry" noting that a new entry has been added. A periodic timer checks once a minute to see if the flag is set - if so, it schedules a work queue to rescan the backing store and make all new entries appear in the pstore filesystem. Signed-off-by: Tony Luck diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c index 893b961..379a02d 100644 --- a/fs/pstore/inode.c +++ b/fs/pstore/inode.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -32,13 +33,18 @@ #include #include #include +#include #include #include "internal.h" #define PSTORE_NAMELEN 64 +static DEFINE_SPINLOCK(allpstore_lock); +static LIST_HEAD(allpstore); + struct pstore_private { + struct list_head list; struct pstore_info *psi; enum pstore_type_id type; u64 id; @@ -81,8 +87,16 @@ static int pstore_unlink(struct inode *dir, struct dentry *dentry) static void pstore_evict_inode(struct inode *inode) { + struct pstore_private *p = inode->i_private; + unsigned long flags; + end_writeback(inode); - kfree(inode->i_private); + if (p) { + spin_lock_irqsave(&allpstore_lock, flags); + list_del(&p->list); + spin_unlock_irqrestore(&allpstore_lock, flags); + kfree(p); + } } static const struct inode_operations pstore_dir_inode_operations = { @@ -182,9 +196,23 @@ int pstore_mkfile(enum pstore_type_id type, char *psname, u64 id, struct dentry *root = pstore_sb->s_root; struct dentry *dentry; struct inode *inode; - int rc; + int rc = 0; char name[PSTORE_NAMELEN]; - struct pstore_private *private; + struct pstore_private *private, *pos; + unsigned long flags; + + spin_lock_irqsave(&allpstore_lock, flags); + list_for_each_entry(pos, &allpstore, list) { + if (pos->type == type && + pos->id == id && + pos->psi == psi) { + rc = -EEXIST; + break; + } + } + spin_unlock_irqrestore(&allpstore_lock, flags); + if (rc) + return rc; rc = -ENOMEM; inode = pstore_get_inode(pstore_sb, root->d_inode, S_IFREG | 0444, 0); @@ -229,6 +257,10 @@ int pstore_mkfile(enum pstore_type_id type, char *psname, u64 id, d_add(dentry, inode); + spin_lock_irqsave(&allpstore_lock, flags); + list_add(&private->list, &allpstore); + spin_unlock_irqrestore(&allpstore_lock, flags); + mutex_unlock(&root->d_inode->i_mutex); return 0; @@ -277,7 +309,7 @@ int pstore_fill_super(struct super_block *sb, void *data, int silent) goto fail; } - pstore_get_records(); + pstore_get_records(0); return 0; fail: diff --git a/fs/pstore/internal.h b/fs/pstore/internal.h index 611c1b3..3bde461 100644 --- a/fs/pstore/internal.h +++ b/fs/pstore/internal.h @@ -1,5 +1,5 @@ extern void pstore_set_kmsg_bytes(int); -extern void pstore_get_records(void); +extern void pstore_get_records(int); extern int pstore_mkfile(enum pstore_type_id, char *psname, u64 id, char *data, size_t size, struct timespec time, struct pstore_info *psi); diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c index c5300ec..ca60ebc 100644 --- a/fs/pstore/platform.c +++ b/fs/pstore/platform.c @@ -25,12 +25,29 @@ #include #include #include +#include #include #include +#include #include "internal.h" /* + * We defer making "oops" entries appear in pstore - see + * whether the system is actually still running well enough + * to let someone see the entry + */ +#define PSTORE_INTERVAL (60 * HZ) + +static int pstore_new_entry; + +static void pstore_timefunc(unsigned long); +static DEFINE_TIMER(pstore_timer, pstore_timefunc, 0, 0); + +static void pstore_dowork(struct work_struct *); +static DECLARE_WORK(pstore_work, pstore_dowork); + +/* * pstore_lock just protects "psinfo" during * calls to pstore_register() */ @@ -100,9 +117,7 @@ static void pstore_dump(struct kmsg_dumper *dumper, id = psinfo->write(PSTORE_TYPE_DMESG, part, hsize + l1_cpy + l2_cpy, psinfo); if (reason == KMSG_DUMP_OOPS && pstore_is_mounted()) - pstore_mkfile(PSTORE_TYPE_DMESG, psinfo->name, id, - psinfo->buf, hsize + l1_cpy + l2_cpy, - CURRENT_TIME, psinfo); + pstore_new_entry = 1; l1 -= l1_cpy; l2 -= l2_cpy; total += l1_cpy + l2_cpy; @@ -148,19 +163,24 @@ int pstore_register(struct pstore_info *psi) } if (pstore_is_mounted()) - pstore_get_records(); + pstore_get_records(0); kmsg_dump_register(&pstore_dumper); + pstore_timer.expires = jiffies + PSTORE_INTERVAL; + add_timer(&pstore_timer); + return 0; } EXPORT_SYMBOL_GPL(pstore_register); /* - * Read all the records from the persistent store. Create and - * file files in our filesystem. + * Read all the records from the persistent store. Create + * files in our filesystem. Don't warn about -EEXIST errors + * when we are re-scanning the backing store looking to add new + * error records. */ -void pstore_get_records(void) +void pstore_get_records(int quiet) { struct pstore_info *psi = psinfo; ssize_t size; @@ -178,8 +198,9 @@ void pstore_get_records(void) goto out; while ((size = psi->read(&id, &type, &time, psi)) > 0) { - if (pstore_mkfile(type, psi->name, id, psi->buf, (size_t)size, - time, psi)) + rc = pstore_mkfile(type, psi->name, id, psi->buf, (size_t)size, + time, psi); + if (rc && (rc != -EEXIST || !quiet)) failed++; } psi->close(psi); @@ -191,6 +212,21 @@ out: failed, psi->name); } +static void pstore_dowork(struct work_struct *work) +{ + pstore_get_records(1); +} + +static void pstore_timefunc(unsigned long dummy) +{ + if (pstore_new_entry) { + pstore_new_entry = 0; + schedule_work(&pstore_work); + } + + mod_timer(&pstore_timer, jiffies + PSTORE_INTERVAL); +} + /* * Call platform driver to write a record to the * persistent store. -- cgit v0.10.2 From abd4d5587be911f63592537284dad78766d97d62 Mon Sep 17 00:00:00 2001 From: Don Zickus Date: Fri, 12 Aug 2011 10:54:51 -0700 Subject: pstore: change mutex locking to spin_locks pstore was using mutex locking to protect read/write access to the backend plug-ins. This causes problems when pstore is executed in an NMI context through panic() -> kmsg_dump(). This patch changes the mutex to a spin_lock_irqsave then also checks to see if we are in an NMI context. If we are in an NMI and can't get the lock, just print a message stating that and blow by the locking. All this is probably a hack around the bigger locking problem but it solves my current situation of trying to sleep in an NMI context. Tested by loading the lkdtm module and executing a HARDLOCKUP which will cause the machine to panic inside the nmi handler. Signed-off-by: Don Zickus Acked-by: Matthew Garrett Signed-off-by: Tony Luck diff --git a/drivers/acpi/apei/erst.c b/drivers/acpi/apei/erst.c index 2ca59dc..5e820ea 100644 --- a/drivers/acpi/apei/erst.c +++ b/drivers/acpi/apei/erst.c @@ -1165,7 +1165,7 @@ static int __init erst_init(void) goto err_release_erange; buf = kmalloc(erst_erange.size, GFP_KERNEL); - mutex_init(&erst_info.buf_mutex); + spin_lock_init(&erst_info.buf_lock); if (buf) { erst_info.buf = buf + sizeof(struct cper_pstore_record); erst_info.bufsize = erst_erange.size - diff --git a/drivers/firmware/efivars.c b/drivers/firmware/efivars.c index eb80b54..be8bcb0 100644 --- a/drivers/firmware/efivars.c +++ b/drivers/firmware/efivars.c @@ -978,7 +978,7 @@ int register_efivars(struct efivars *efivars, if (efivars->efi_pstore_info.buf) { efivars->efi_pstore_info.bufsize = 1024; efivars->efi_pstore_info.data = efivars; - mutex_init(&efivars->efi_pstore_info.buf_mutex); + spin_lock_init(&efivars->efi_pstore_info.buf_lock); pstore_register(&efivars->efi_pstore_info); } diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c index ca60ebc..0472924 100644 --- a/fs/pstore/platform.c +++ b/fs/pstore/platform.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include "internal.h" @@ -88,13 +89,20 @@ static void pstore_dump(struct kmsg_dumper *dumper, u64 id; int hsize; unsigned int part = 1; + unsigned long flags = 0; + int is_locked = 0; if (reason < ARRAY_SIZE(reason_str)) why = reason_str[reason]; else why = "Unknown"; - mutex_lock(&psinfo->buf_mutex); + if (in_nmi()) { + is_locked = spin_trylock(&psinfo->buf_lock); + if (!is_locked) + pr_err("pstore dump routine blocked in NMI, may corrupt error record\n"); + } else + spin_lock_irqsave(&psinfo->buf_lock, flags); oopscount++; while (total < kmsg_bytes) { dst = psinfo->buf; @@ -123,7 +131,11 @@ static void pstore_dump(struct kmsg_dumper *dumper, total += l1_cpy + l2_cpy; part++; } - mutex_unlock(&psinfo->buf_mutex); + if (in_nmi()) { + if (is_locked) + spin_unlock(&psinfo->buf_lock); + } else + spin_unlock_irqrestore(&psinfo->buf_lock, flags); } static struct kmsg_dumper pstore_dumper = { @@ -188,11 +200,12 @@ void pstore_get_records(int quiet) enum pstore_type_id type; struct timespec time; int failed = 0, rc; + unsigned long flags; if (!psi) return; - mutex_lock(&psinfo->buf_mutex); + spin_lock_irqsave(&psinfo->buf_lock, flags); rc = psi->open(psi); if (rc) goto out; @@ -205,7 +218,7 @@ void pstore_get_records(int quiet) } psi->close(psi); out: - mutex_unlock(&psinfo->buf_mutex); + spin_unlock_irqrestore(&psinfo->buf_lock, flags); if (failed) printk(KERN_WARNING "pstore: failed to load %d record(s) from '%s'\n", @@ -233,7 +246,8 @@ static void pstore_timefunc(unsigned long dummy) */ int pstore_write(enum pstore_type_id type, char *buf, size_t size) { - u64 id; + u64 id; + unsigned long flags; if (!psinfo) return -ENODEV; @@ -241,13 +255,13 @@ int pstore_write(enum pstore_type_id type, char *buf, size_t size) if (size > psinfo->bufsize) return -EFBIG; - mutex_lock(&psinfo->buf_mutex); + spin_lock_irqsave(&psinfo->buf_lock, flags); memcpy(psinfo->buf, buf, size); id = psinfo->write(type, 0, size, psinfo); if (pstore_is_mounted()) pstore_mkfile(PSTORE_TYPE_DMESG, psinfo->name, id, psinfo->buf, size, CURRENT_TIME, psinfo); - mutex_unlock(&psinfo->buf_mutex); + spin_unlock_irqrestore(&psinfo->buf_lock, flags); return 0; } diff --git a/include/linux/pstore.h b/include/linux/pstore.h index cc03bbf..b91440e 100644 --- a/include/linux/pstore.h +++ b/include/linux/pstore.h @@ -32,7 +32,7 @@ enum pstore_type_id { struct pstore_info { struct module *owner; char *name; - struct mutex buf_mutex; /* serialize access to 'buf' */ + spinlock_t buf_lock; /* serialize access to 'buf' */ char *buf; size_t bufsize; int (*open)(struct pstore_info *psi); -- cgit v0.10.2 From b238b8fa93353ab50c9a2b1e2fa47a0ab01c37cd Mon Sep 17 00:00:00 2001 From: Chen Gong Date: Wed, 12 Oct 2011 09:17:24 -0700 Subject: pstore: make pstore write function return normal success/fail value Currently pstore write interface employs record id as return value, but it is not enough because it can't tell caller if the write operation is successful. Pass the record id back via an argument pointer and return zero for success, non-zero for failure. Signed-off-by: Chen Gong Signed-off-by: Tony Luck diff --git a/drivers/acpi/apei/erst.c b/drivers/acpi/apei/erst.c index 5e820ea..1274080 100644 --- a/drivers/acpi/apei/erst.c +++ b/drivers/acpi/apei/erst.c @@ -933,7 +933,7 @@ static int erst_open_pstore(struct pstore_info *psi); static int erst_close_pstore(struct pstore_info *psi); static ssize_t erst_reader(u64 *id, enum pstore_type_id *type, struct timespec *time, struct pstore_info *psi); -static u64 erst_writer(enum pstore_type_id type, unsigned int part, +static int erst_writer(enum pstore_type_id type, u64 *id, unsigned int part, size_t size, struct pstore_info *psi); static int erst_clearer(enum pstore_type_id type, u64 id, struct pstore_info *psi); @@ -1040,11 +1040,12 @@ out: return (rc < 0) ? rc : (len - sizeof(*rcd)); } -static u64 erst_writer(enum pstore_type_id type, unsigned int part, +static int erst_writer(enum pstore_type_id type, u64 *id, unsigned int part, size_t size, struct pstore_info *psi) { struct cper_pstore_record *rcd = (struct cper_pstore_record *) (erst_info.buf - sizeof(*rcd)); + int ret; memset(rcd, 0, sizeof(*rcd)); memcpy(rcd->hdr.signature, CPER_SIG_RECORD, CPER_SIG_SIZE); @@ -1079,9 +1080,10 @@ static u64 erst_writer(enum pstore_type_id type, unsigned int part, } rcd->sec_hdr.section_severity = CPER_SEV_FATAL; - erst_write(&rcd->hdr); + ret = erst_write(&rcd->hdr); + *id = rcd->hdr.record_id; - return rcd->hdr.record_id; + return ret; } static int erst_clearer(enum pstore_type_id type, u64 id, diff --git a/drivers/firmware/efivars.c b/drivers/firmware/efivars.c index be8bcb0..8370f72 100644 --- a/drivers/firmware/efivars.c +++ b/drivers/firmware/efivars.c @@ -490,8 +490,8 @@ static ssize_t efi_pstore_read(u64 *id, enum pstore_type_id *type, return 0; } -static u64 efi_pstore_write(enum pstore_type_id type, unsigned int part, - size_t size, struct pstore_info *psi) +static int efi_pstore_write(enum pstore_type_id type, u64 *id, + unsigned int part, size_t size, struct pstore_info *psi) { char name[DUMP_NAME_LEN]; char stub_name[DUMP_NAME_LEN]; @@ -499,7 +499,7 @@ static u64 efi_pstore_write(enum pstore_type_id type, unsigned int part, efi_guid_t vendor = LINUX_EFI_CRASH_GUID; struct efivars *efivars = psi->data; struct efivar_entry *entry, *found = NULL; - int i; + int i, ret = 0; sprintf(stub_name, "dump-type%u-%u-", type, part); sprintf(name, "%s%lu", stub_name, get_seconds()); @@ -548,18 +548,19 @@ static u64 efi_pstore_write(enum pstore_type_id type, unsigned int part, efivar_unregister(found); if (size) - efivar_create_sysfs_entry(efivars, + ret = efivar_create_sysfs_entry(efivars, utf16_strsize(efi_name, DUMP_NAME_LEN * 2), efi_name, &vendor); - return part; + *id = part; + return ret; }; static int efi_pstore_erase(enum pstore_type_id type, u64 id, struct pstore_info *psi) { - efi_pstore_write(type, id, 0, psi); + efi_pstore_write(type, &id, (unsigned int)id, 0, psi); return 0; } @@ -580,8 +581,8 @@ static ssize_t efi_pstore_read(u64 *id, enum pstore_type_id *type, return -1; } -static u64 efi_pstore_write(enum pstore_type_id type, unsigned int part, - size_t size, struct pstore_info *psi) +static int efi_pstore_write(enum pstore_type_id type, u64 *id, + unsigned int part, size_t size, struct pstore_info *psi) { return 0; } diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c index 0472924..2bd620f 100644 --- a/fs/pstore/platform.c +++ b/fs/pstore/platform.c @@ -87,7 +87,7 @@ static void pstore_dump(struct kmsg_dumper *dumper, unsigned long size, total = 0; char *dst, *why; u64 id; - int hsize; + int hsize, ret; unsigned int part = 1; unsigned long flags = 0; int is_locked = 0; @@ -122,9 +122,9 @@ static void pstore_dump(struct kmsg_dumper *dumper, memcpy(dst, s1 + s1_start, l1_cpy); memcpy(dst + l1_cpy, s2 + s2_start, l2_cpy); - id = psinfo->write(PSTORE_TYPE_DMESG, part, + ret = psinfo->write(PSTORE_TYPE_DMESG, &id, part, hsize + l1_cpy + l2_cpy, psinfo); - if (reason == KMSG_DUMP_OOPS && pstore_is_mounted()) + if (ret == 0 && reason == KMSG_DUMP_OOPS && pstore_is_mounted()) pstore_new_entry = 1; l1 -= l1_cpy; l2 -= l2_cpy; @@ -247,6 +247,7 @@ static void pstore_timefunc(unsigned long dummy) int pstore_write(enum pstore_type_id type, char *buf, size_t size) { u64 id; + int ret; unsigned long flags; if (!psinfo) @@ -257,8 +258,8 @@ int pstore_write(enum pstore_type_id type, char *buf, size_t size) spin_lock_irqsave(&psinfo->buf_lock, flags); memcpy(psinfo->buf, buf, size); - id = psinfo->write(type, 0, size, psinfo); - if (pstore_is_mounted()) + ret = psinfo->write(type, &id, 0, size, psinfo); + if (ret == 0 && pstore_is_mounted()) pstore_mkfile(PSTORE_TYPE_DMESG, psinfo->name, id, psinfo->buf, size, CURRENT_TIME, psinfo); spin_unlock_irqrestore(&psinfo->buf_lock, flags); diff --git a/include/linux/pstore.h b/include/linux/pstore.h index b91440e..ea56732 100644 --- a/include/linux/pstore.h +++ b/include/linux/pstore.h @@ -39,8 +39,8 @@ struct pstore_info { int (*close)(struct pstore_info *psi); ssize_t (*read)(u64 *id, enum pstore_type_id *type, struct timespec *time, struct pstore_info *psi); - u64 (*write)(enum pstore_type_id type, unsigned int part, - size_t size, struct pstore_info *psi); + int (*write)(enum pstore_type_id type, u64 *id, + unsigned int part, size_t size, struct pstore_info *psi); int (*erase)(enum pstore_type_id type, u64 id, struct pstore_info *psi); void *data; -- cgit v0.10.2