From b842e240f27678aa5d71611cddc8d17a93fb0caf Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 10 May 2007 19:02:07 -0400 Subject: locks: reverse order of posix_locks_conflict() arguments The first argument to posix_locks_conflict() is meant to be a lock request, and the second a lock from an inode's lock request. It doesn't really make a difference which order you call them in, since the only asymmetric test in posix_lock_conflict() is the check whether the second argument is a posix lock--and every caller already does that check for some reason. But may as well fix posix_test_lock() to call posix_locks_conflict() with the arguments in the same order as everywhere else. Signed-off-by: "J. Bruce Fields" diff --git a/fs/locks.c b/fs/locks.c index c795eaa..51bae622 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -668,7 +668,7 @@ posix_test_lock(struct file *filp, struct file_lock *fl) for (cfl = filp->f_path.dentry->d_inode->i_flock; cfl; cfl = cfl->fl_next) { if (!IS_POSIX(cfl)) continue; - if (posix_locks_conflict(cfl, fl)) + if (posix_locks_conflict(fl, cfl)) break; } if (cfl) -- cgit v0.10.2 From 526985b9dd6ef7716b87f5fe6f0e2438ea3a89c7 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 14 Nov 2006 16:54:36 -0500 Subject: locks: kill redundant local variable There's no need for another variable local to this loop; we can use the variable (of the same name!) already declared at the top of the function, and not used till later (at which point it's initialized, so this is safe). Signed-off-by: J. Bruce Fields diff --git a/fs/locks.c b/fs/locks.c index 51bae622..efe1aff 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -819,7 +819,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str lock_kernel(); if (request->fl_type != F_UNLCK) { for_each_lock(inode, before) { - struct file_lock *fl = *before; + fl = *before; if (!IS_POSIX(fl)) continue; if (!posix_locks_conflict(request, fl)) -- cgit v0.10.2 From 84d535ade62b6f8ce852745731ad6200c46b977c Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Tue, 11 Sep 2007 16:38:13 +0400 Subject: Memory shortage can result in inconsistent flocks state When the flock_lock_file() is called to change the flock from F_RDLCK to F_WRLCK or vice versa the existing flock can be removed without appropriate warning. Look: for_each_lock(inode, before) { struct file_lock *fl = *before; if (IS_POSIX(fl)) break; if (IS_LEASE(fl)) continue; if (filp != fl->fl_file) continue; if (request->fl_type == fl->fl_type) goto out; found = 1; locks_delete_lock(before); <<<<<< ! break; } if after this point the subsequent locks_alloc_lock() will fail the return code will be -ENOMEM, but the existing lock is already removed. This is a known feature that such "re-locking" is not atomic, but in the racy case the file should stay locked (although by some other process), but in this case the file will be unlocked. The proposal is to prepare the lock in advance keeping no chance to fail in the future code. Found during making the flocks pid-namespaces aware. (Note: Thanks to Reuben Farrelly for finding a bug in an earlier version of this patch.) Signed-off-by: Pavel Emelyanov Signed-off-by: J. Bruce Fields Cc: Reuben Farrelly diff --git a/fs/locks.c b/fs/locks.c index efe1aff..6e22c81 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -733,6 +733,15 @@ static int flock_lock_file(struct file *filp, struct file_lock *request) lock_kernel(); if (request->fl_flags & FL_ACCESS) goto find_conflict; + + if (request->fl_type != F_UNLCK) { + error = -ENOMEM; + new_fl = locks_alloc_lock(); + if (new_fl == NULL) + goto out; + error = 0; + } + for_each_lock(inode, before) { struct file_lock *fl = *before; if (IS_POSIX(fl)) @@ -754,10 +763,6 @@ static int flock_lock_file(struct file *filp, struct file_lock *request) goto out; } - error = -ENOMEM; - new_fl = locks_alloc_lock(); - if (new_fl == NULL) - goto out; /* * If a higher-priority process was blocked on the old file lock, * give it the opportunity to lock the file. -- cgit v0.10.2 From 02888f41e9d7fa95d1f5b2f76e0f0af6ea8198cc Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 12 Sep 2007 15:45:07 -0400 Subject: locks: fix flock_lock_file() comment This comment wasn't updated when lease support was added, and it makes essentially the same mistake that the code made before a recent bugfix. Signed-off-by: J. Bruce Fields diff --git a/fs/locks.c b/fs/locks.c index 6e22c81..c7c69d2 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -715,8 +715,7 @@ next_task: } /* Try to create a FLOCK lock on filp. We always insert new FLOCK locks - * at the head of the list, but that's secret knowledge known only to - * flock_lock_file and posix_lock_file. + * after any leases, but before any posix locks. * * Note that if called with an FL_EXISTS argument, the caller may determine * whether or not a lock was successfully freed by testing the return -- cgit v0.10.2 From f0c1cd0eaf0b127356c2c09e40305453bc361b0f Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Wed, 19 Sep 2007 16:44:07 +0400 Subject: Use list_first_entry in locks_wake_up_blocks This routine deletes all the elements from the list with the "while (!list_empty())" loop, and we already have a list_first_entry() macro to help it look nicer :) Signed-off-by: Pavel Emelyanov diff --git a/fs/locks.c b/fs/locks.c index c7c69d2..282b6c1 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -534,7 +534,9 @@ static void locks_insert_block(struct file_lock *blocker, static void locks_wake_up_blocks(struct file_lock *blocker) { while (!list_empty(&blocker->fl_block)) { - struct file_lock *waiter = list_entry(blocker->fl_block.next, + struct file_lock *waiter; + + waiter = list_first_entry(&blocker->fl_block, struct file_lock, fl_block); __locks_delete_block(waiter); if (waiter->fl_lmops && waiter->fl_lmops->fl_notify) -- cgit v0.10.2 From 85c59580b30c82aa771aa33b37217a6b6851bc14 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Thu, 20 Sep 2007 12:45:02 +0400 Subject: locks: Fix potential OOPS in generic_setlease() This code is run under lock_kernel(), which is dropped during sleeping operations, so the following race is possible: CPU1: CPU2: vfs_setlease(); vfs_setlease(); lock_kernel(); lock_kernel(); /* spin */ generic_setlease(): ... for (before = ...) /* here we found some lease after * which we will insert the new one */ fl = locks_alloc_lock(); /* go to sleep in this allocation and * drop the BKL */ generic_setlease(): ... for (before = ...) /* here we find the "before" pointing * at the one we found on CPU1 */ ->fl_change(my_before, arg); lease_modify(); locks_free_lock(); /* and we freed it */ ... unlock_kernel(); locks_insert_lock(before, fl); /* OOPS! We have just tried to add the lease * at the tail of already removed one */ The similar races are already handled in other code - all the allocations are performed before any checks/updates. Thanks to Kamalesh Babulal for testing and for a bug report on an earlier version. Signed-off-by: Pavel Emelyanov Signed-off-by: J. Bruce Fields Cc: Kamalesh Babulal diff --git a/fs/locks.c b/fs/locks.c index 282b6c1..43dbc7f 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -1343,6 +1343,7 @@ int fcntl_getlease(struct file *filp) int generic_setlease(struct file *filp, long arg, struct file_lock **flp) { struct file_lock *fl, **before, **my_before = NULL, *lease; + struct file_lock *new_fl = NULL; struct dentry *dentry = filp->f_path.dentry; struct inode *inode = dentry->d_inode; int error, rdlease_count = 0, wrlease_count = 0; @@ -1369,6 +1370,11 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp) || (atomic_read(&inode->i_count) > 1))) goto out; + error = -ENOMEM; + new_fl = locks_alloc_lock(); + if (new_fl == NULL) + goto out; + /* * At this point, we know that if there is an exclusive * lease on this file, then we hold it on this filp @@ -1411,18 +1417,15 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp) if (!leases_enable) goto out; - error = -ENOMEM; - fl = locks_alloc_lock(); - if (fl == NULL) - goto out; - - locks_copy_lock(fl, lease); + locks_copy_lock(new_fl, lease); + locks_insert_lock(before, new_fl); - locks_insert_lock(before, fl); + *flp = new_fl; + return 0; - *flp = fl; - error = 0; out: + if (new_fl != NULL) + locks_free_lock(new_fl); return error; } EXPORT_SYMBOL(generic_setlease); -- cgit v0.10.2 From 4f3b19ca41fbe572e3d44caf516c215b286fe2a6 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 24 Sep 2007 18:52:09 -0400 Subject: Documentation: move mandatory locking documentation to filesystems/ Shouldn't this mandatory-locking documentation be in the Documentation/filesystems directory? Give it a more descriptive name while we're at it, and update 00-INDEX with a more inclusive description of Documentation/filesystems (which has already talked about more than just individual filesystems). Signed-off-by: J. Bruce Fields Acked-by: Randy Dunlap diff --git a/Documentation/00-INDEX b/Documentation/00-INDEX index 43e89b1..910473c 100644 --- a/Documentation/00-INDEX +++ b/Documentation/00-INDEX @@ -145,7 +145,7 @@ fb/ feature-removal-schedule.txt - list of files and features that are going to be removed. filesystems/ - - directory with info on the various filesystems that Linux supports. + - info on the vfs and the various filesystems that Linux supports. firmware_class/ - request_firmware() hotplug interface info. floppy.txt @@ -240,8 +240,6 @@ m68k/ - directory with info about Linux on Motorola 68k architecture. magic-number.txt - list of magic numbers used to mark/protect kernel data structures. -mandatory.txt - - info on the Linux implementation of Sys V mandatory file locking. mca.txt - info on supporting Micro Channel Architecture (e.g. PS/2) systems. md.txt diff --git a/Documentation/filesystems/00-INDEX b/Documentation/filesystems/00-INDEX index 59db1bc..e801076 100644 --- a/Documentation/filesystems/00-INDEX +++ b/Documentation/filesystems/00-INDEX @@ -52,6 +52,8 @@ isofs.txt - info and mount options for the ISO 9660 (CDROM) filesystem. jfs.txt - info and mount options for the JFS filesystem. +mandatory-locking.txt + - info on the Linux implementation of Sys V mandatory file locking. ncpfs.txt - info on Novell Netware(tm) filesystem using NCP protocol. ntfs.txt diff --git a/Documentation/filesystems/mandatory-locking.txt b/Documentation/filesystems/mandatory-locking.txt new file mode 100644 index 0000000..bc449d4 --- /dev/null +++ b/Documentation/filesystems/mandatory-locking.txt @@ -0,0 +1,152 @@ + Mandatory File Locking For The Linux Operating System + + Andy Walker + + 15 April 1996 + + +1. What is mandatory locking? +------------------------------ + +Mandatory locking is kernel enforced file locking, as opposed to the more usual +cooperative file locking used to guarantee sequential access to files among +processes. File locks are applied using the flock() and fcntl() system calls +(and the lockf() library routine which is a wrapper around fcntl().) It is +normally a process' responsibility to check for locks on a file it wishes to +update, before applying its own lock, updating the file and unlocking it again. +The most commonly used example of this (and in the case of sendmail, the most +troublesome) is access to a user's mailbox. The mail user agent and the mail +transfer agent must guard against updating the mailbox at the same time, and +prevent reading the mailbox while it is being updated. + +In a perfect world all processes would use and honour a cooperative, or +"advisory" locking scheme. However, the world isn't perfect, and there's +a lot of poorly written code out there. + +In trying to address this problem, the designers of System V UNIX came up +with a "mandatory" locking scheme, whereby the operating system kernel would +block attempts by a process to write to a file that another process holds a +"read" -or- "shared" lock on, and block attempts to both read and write to a +file that a process holds a "write " -or- "exclusive" lock on. + +The System V mandatory locking scheme was intended to have as little impact as +possible on existing user code. The scheme is based on marking individual files +as candidates for mandatory locking, and using the existing fcntl()/lockf() +interface for applying locks just as if they were normal, advisory locks. + +Note 1: In saying "file" in the paragraphs above I am actually not telling +the whole truth. System V locking is based on fcntl(). The granularity of +fcntl() is such that it allows the locking of byte ranges in files, in addition +to entire files, so the mandatory locking rules also have byte level +granularity. + +Note 2: POSIX.1 does not specify any scheme for mandatory locking, despite +borrowing the fcntl() locking scheme from System V. The mandatory locking +scheme is defined by the System V Interface Definition (SVID) Version 3. + +2. Marking a file for mandatory locking +--------------------------------------- + +A file is marked as a candidate for mandatory locking by setting the group-id +bit in its file mode but removing the group-execute bit. This is an otherwise +meaningless combination, and was chosen by the System V implementors so as not +to break existing user programs. + +Note that the group-id bit is usually automatically cleared by the kernel when +a setgid file is written to. This is a security measure. The kernel has been +modified to recognize the special case of a mandatory lock candidate and to +refrain from clearing this bit. Similarly the kernel has been modified not +to run mandatory lock candidates with setgid privileges. + +3. Available implementations +---------------------------- + +I have considered the implementations of mandatory locking available with +SunOS 4.1.x, Solaris 2.x and HP-UX 9.x. + +Generally I have tried to make the most sense out of the behaviour exhibited +by these three reference systems. There are many anomalies. + +All the reference systems reject all calls to open() for a file on which +another process has outstanding mandatory locks. This is in direct +contravention of SVID 3, which states that only calls to open() with the +O_TRUNC flag set should be rejected. The Linux implementation follows the SVID +definition, which is the "Right Thing", since only calls with O_TRUNC can +modify the contents of the file. + +HP-UX even disallows open() with O_TRUNC for a file with advisory locks, not +just mandatory locks. That would appear to contravene POSIX.1. + +mmap() is another interesting case. All the operating systems mentioned +prevent mandatory locks from being applied to an mmap()'ed file, but HP-UX +also disallows advisory locks for such a file. SVID actually specifies the +paranoid HP-UX behaviour. + +In my opinion only MAP_SHARED mappings should be immune from locking, and then +only from mandatory locks - that is what is currently implemented. + +SunOS is so hopeless that it doesn't even honour the O_NONBLOCK flag for +mandatory locks, so reads and writes to locked files always block when they +should return EAGAIN. + +I'm afraid that this is such an esoteric area that the semantics described +below are just as valid as any others, so long as the main points seem to +agree. + +4. Semantics +------------ + +1. Mandatory locks can only be applied via the fcntl()/lockf() locking + interface - in other words the System V/POSIX interface. BSD style + locks using flock() never result in a mandatory lock. + +2. If a process has locked a region of a file with a mandatory read lock, then + other processes are permitted to read from that region. If any of these + processes attempts to write to the region it will block until the lock is + released, unless the process has opened the file with the O_NONBLOCK + flag in which case the system call will return immediately with the error + status EAGAIN. + +3. If a process has locked a region of a file with a mandatory write lock, all + attempts to read or write to that region block until the lock is released, + unless a process has opened the file with the O_NONBLOCK flag in which case + the system call will return immediately with the error status EAGAIN. + +4. Calls to open() with O_TRUNC, or to creat(), on a existing file that has + any mandatory locks owned by other processes will be rejected with the + error status EAGAIN. + +5. Attempts to apply a mandatory lock to a file that is memory mapped and + shared (via mmap() with MAP_SHARED) will be rejected with the error status + EAGAIN. + +6. Attempts to create a shared memory map of a file (via mmap() with MAP_SHARED) + that has any mandatory locks in effect will be rejected with the error status + EAGAIN. + +5. Which system calls are affected? +----------------------------------- + +Those which modify a file's contents, not just the inode. That gives read(), +write(), readv(), writev(), open(), creat(), mmap(), truncate() and +ftruncate(). truncate() and ftruncate() are considered to be "write" actions +for the purposes of mandatory locking. + +The affected region is usually defined as stretching from the current position +for the total number of bytes read or written. For the truncate calls it is +defined as the bytes of a file removed or added (we must also consider bytes +added, as a lock can specify just "the whole file", rather than a specific +range of bytes.) + +Note 3: I may have overlooked some system calls that need mandatory lock +checking in my eagerness to get this code out the door. Please let me know, or +better still fix the system calls yourself and submit a patch to me or Linus. + +6. Warning! +----------- + +Not even root can override a mandatory lock, so runaway processes can wreak +havoc if they lock crucial files. The way around it is to change the file +permissions (remove the setgid bit) before trying to read or write to it. +Of course, that might be a bit tricky if the system is hung :-( + diff --git a/Documentation/locks.txt b/Documentation/locks.txt index e3b402e..fab857a 100644 --- a/Documentation/locks.txt +++ b/Documentation/locks.txt @@ -53,11 +53,11 @@ fcntl(), with all the problems that implies. 1.3 Mandatory Locking As A Mount Option --------------------------------------- -Mandatory locking, as described in 'Documentation/mandatory.txt' was prior -to this release a general configuration option that was valid for all -mounted filesystems. This had a number of inherent dangers, not the least -of which was the ability to freeze an NFS server by asking it to read a -file for which a mandatory lock existed. +Mandatory locking, as described in 'Documentation/filesystems/mandatory.txt' +was prior to this release a general configuration option that was valid for +all mounted filesystems. This had a number of inherent dangers, not the +least of which was the ability to freeze an NFS server by asking it to read +a file for which a mandatory lock existed. From this release of the kernel, mandatory locking can be turned on and off on a per-filesystem basis, using the mount options 'mand' and 'nomand'. diff --git a/Documentation/mandatory.txt b/Documentation/mandatory.txt deleted file mode 100644 index bc449d4..0000000 --- a/Documentation/mandatory.txt +++ /dev/null @@ -1,152 +0,0 @@ - Mandatory File Locking For The Linux Operating System - - Andy Walker - - 15 April 1996 - - -1. What is mandatory locking? ------------------------------- - -Mandatory locking is kernel enforced file locking, as opposed to the more usual -cooperative file locking used to guarantee sequential access to files among -processes. File locks are applied using the flock() and fcntl() system calls -(and the lockf() library routine which is a wrapper around fcntl().) It is -normally a process' responsibility to check for locks on a file it wishes to -update, before applying its own lock, updating the file and unlocking it again. -The most commonly used example of this (and in the case of sendmail, the most -troublesome) is access to a user's mailbox. The mail user agent and the mail -transfer agent must guard against updating the mailbox at the same time, and -prevent reading the mailbox while it is being updated. - -In a perfect world all processes would use and honour a cooperative, or -"advisory" locking scheme. However, the world isn't perfect, and there's -a lot of poorly written code out there. - -In trying to address this problem, the designers of System V UNIX came up -with a "mandatory" locking scheme, whereby the operating system kernel would -block attempts by a process to write to a file that another process holds a -"read" -or- "shared" lock on, and block attempts to both read and write to a -file that a process holds a "write " -or- "exclusive" lock on. - -The System V mandatory locking scheme was intended to have as little impact as -possible on existing user code. The scheme is based on marking individual files -as candidates for mandatory locking, and using the existing fcntl()/lockf() -interface for applying locks just as if they were normal, advisory locks. - -Note 1: In saying "file" in the paragraphs above I am actually not telling -the whole truth. System V locking is based on fcntl(). The granularity of -fcntl() is such that it allows the locking of byte ranges in files, in addition -to entire files, so the mandatory locking rules also have byte level -granularity. - -Note 2: POSIX.1 does not specify any scheme for mandatory locking, despite -borrowing the fcntl() locking scheme from System V. The mandatory locking -scheme is defined by the System V Interface Definition (SVID) Version 3. - -2. Marking a file for mandatory locking ---------------------------------------- - -A file is marked as a candidate for mandatory locking by setting the group-id -bit in its file mode but removing the group-execute bit. This is an otherwise -meaningless combination, and was chosen by the System V implementors so as not -to break existing user programs. - -Note that the group-id bit is usually automatically cleared by the kernel when -a setgid file is written to. This is a security measure. The kernel has been -modified to recognize the special case of a mandatory lock candidate and to -refrain from clearing this bit. Similarly the kernel has been modified not -to run mandatory lock candidates with setgid privileges. - -3. Available implementations ----------------------------- - -I have considered the implementations of mandatory locking available with -SunOS 4.1.x, Solaris 2.x and HP-UX 9.x. - -Generally I have tried to make the most sense out of the behaviour exhibited -by these three reference systems. There are many anomalies. - -All the reference systems reject all calls to open() for a file on which -another process has outstanding mandatory locks. This is in direct -contravention of SVID 3, which states that only calls to open() with the -O_TRUNC flag set should be rejected. The Linux implementation follows the SVID -definition, which is the "Right Thing", since only calls with O_TRUNC can -modify the contents of the file. - -HP-UX even disallows open() with O_TRUNC for a file with advisory locks, not -just mandatory locks. That would appear to contravene POSIX.1. - -mmap() is another interesting case. All the operating systems mentioned -prevent mandatory locks from being applied to an mmap()'ed file, but HP-UX -also disallows advisory locks for such a file. SVID actually specifies the -paranoid HP-UX behaviour. - -In my opinion only MAP_SHARED mappings should be immune from locking, and then -only from mandatory locks - that is what is currently implemented. - -SunOS is so hopeless that it doesn't even honour the O_NONBLOCK flag for -mandatory locks, so reads and writes to locked files always block when they -should return EAGAIN. - -I'm afraid that this is such an esoteric area that the semantics described -below are just as valid as any others, so long as the main points seem to -agree. - -4. Semantics ------------- - -1. Mandatory locks can only be applied via the fcntl()/lockf() locking - interface - in other words the System V/POSIX interface. BSD style - locks using flock() never result in a mandatory lock. - -2. If a process has locked a region of a file with a mandatory read lock, then - other processes are permitted to read from that region. If any of these - processes attempts to write to the region it will block until the lock is - released, unless the process has opened the file with the O_NONBLOCK - flag in which case the system call will return immediately with the error - status EAGAIN. - -3. If a process has locked a region of a file with a mandatory write lock, all - attempts to read or write to that region block until the lock is released, - unless a process has opened the file with the O_NONBLOCK flag in which case - the system call will return immediately with the error status EAGAIN. - -4. Calls to open() with O_TRUNC, or to creat(), on a existing file that has - any mandatory locks owned by other processes will be rejected with the - error status EAGAIN. - -5. Attempts to apply a mandatory lock to a file that is memory mapped and - shared (via mmap() with MAP_SHARED) will be rejected with the error status - EAGAIN. - -6. Attempts to create a shared memory map of a file (via mmap() with MAP_SHARED) - that has any mandatory locks in effect will be rejected with the error status - EAGAIN. - -5. Which system calls are affected? ------------------------------------ - -Those which modify a file's contents, not just the inode. That gives read(), -write(), readv(), writev(), open(), creat(), mmap(), truncate() and -ftruncate(). truncate() and ftruncate() are considered to be "write" actions -for the purposes of mandatory locking. - -The affected region is usually defined as stretching from the current position -for the total number of bytes read or written. For the truncate calls it is -defined as the bytes of a file removed or added (we must also consider bytes -added, as a lock can specify just "the whole file", rather than a specific -range of bytes.) - -Note 3: I may have overlooked some system calls that need mandatory lock -checking in my eagerness to get this code out the door. Please let me know, or -better still fix the system calls yourself and submit a patch to me or Linus. - -6. Warning! ------------ - -Not even root can override a mandatory lock, so runaway processes can wreak -havoc if they lock crucial files. The way around it is to change the file -permissions (remove the setgid bit) before trying to read or write to it. -Of course, that might be a bit tricky if the system is hung :-( - -- cgit v0.10.2 From 9efa68ed079af97f4e9477eadef567ffe64f7afc Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 25 Sep 2007 11:57:19 -0400 Subject: locks: add warning about mandatory locking races The mandatory file locking implementation has long-standing races that probably render it useless. I know of no plans to fix them. Till we do, we should at least warn people. Signed-off-by: J. Bruce Fields diff --git a/Documentation/filesystems/mandatory-locking.txt b/Documentation/filesystems/mandatory-locking.txt index bc449d4..0979d1d 100644 --- a/Documentation/filesystems/mandatory-locking.txt +++ b/Documentation/filesystems/mandatory-locking.txt @@ -3,7 +3,26 @@ Andy Walker 15 April 1996 - + (Updated September 2007) + +0. Why you should avoid mandatory locking +----------------------------------------- + +The Linux implementation is prey to a number of difficult-to-fix race +conditions which in practice make it not dependable: + + - The write system call checks for a mandatory lock only once + at its start. It is therefore possible for a lock request to + be granted after this check but before the data is modified. + A process may then see file data change even while a mandatory + lock was held. + - Similarly, an exclusive lock may be granted on a file after + the kernel has decided to proceed with a read, but before the + read has actually completed, and the reading process may see + the file data in a state which should not have been visible + to it. + - Similar races make the claimed mutual exclusion between lock + and mmap similarly unreliable. 1. What is mandatory locking? ------------------------------ -- cgit v0.10.2 From 98257af5a2ad0c5b502ebd07094d9fd8ce87acef Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Sun, 30 Sep 2007 22:18:55 -0400 Subject: Documentation: move locks.txt in filesystems/ This documentation (about file locking) belongs in filesystems/. Signed-off-by: J. Bruce Fields diff --git a/Documentation/00-INDEX b/Documentation/00-INDEX index 910473c..cc10ce7 100644 --- a/Documentation/00-INDEX +++ b/Documentation/00-INDEX @@ -230,8 +230,6 @@ local_ops.txt - semantics and behavior of local atomic operations. lockdep-design.txt - documentation on the runtime locking correctness validator. -locks.txt - - info on file locking implementations, flock() vs. fcntl(), etc. logo.gif - full colour GIF image of Linux logo (penguin - Tux). logo.txt diff --git a/Documentation/filesystems/00-INDEX b/Documentation/filesystems/00-INDEX index e801076..599593a 100644 --- a/Documentation/filesystems/00-INDEX +++ b/Documentation/filesystems/00-INDEX @@ -52,6 +52,8 @@ isofs.txt - info and mount options for the ISO 9660 (CDROM) filesystem. jfs.txt - info and mount options for the JFS filesystem. +locks.txt + - info on file locking implementations, flock() vs. fcntl(), etc. mandatory-locking.txt - info on the Linux implementation of Sys V mandatory file locking. ncpfs.txt diff --git a/Documentation/filesystems/locks.txt b/Documentation/filesystems/locks.txt new file mode 100644 index 0000000..fab857a --- /dev/null +++ b/Documentation/filesystems/locks.txt @@ -0,0 +1,67 @@ + File Locking Release Notes + + Andy Walker + + 12 May 1997 + + +1. What's New? +-------------- + +1.1 Broken Flock Emulation +-------------------------- + +The old flock(2) emulation in the kernel was swapped for proper BSD +compatible flock(2) support in the 1.3.x series of kernels. With the +release of the 2.1.x kernel series, support for the old emulation has +been totally removed, so that we don't need to carry this baggage +forever. + +This should not cause problems for anybody, since everybody using a +2.1.x kernel should have updated their C library to a suitable version +anyway (see the file "Documentation/Changes".) + +1.2 Allow Mixed Locks Again +--------------------------- + +1.2.1 Typical Problems - Sendmail +--------------------------------- +Because sendmail was unable to use the old flock() emulation, many sendmail +installations use fcntl() instead of flock(). This is true of Slackware 3.0 +for example. This gave rise to some other subtle problems if sendmail was +configured to rebuild the alias file. Sendmail tried to lock the aliases.dir +file with fcntl() at the same time as the GDBM routines tried to lock this +file with flock(). With pre 1.3.96 kernels this could result in deadlocks that, +over time, or under a very heavy mail load, would eventually cause the kernel +to lock solid with deadlocked processes. + + +1.2.2 The Solution +------------------ +The solution I have chosen, after much experimentation and discussion, +is to make flock() and fcntl() locks oblivious to each other. Both can +exists, and neither will have any effect on the other. + +I wanted the two lock styles to be cooperative, but there were so many +race and deadlock conditions that the current solution was the only +practical one. It puts us in the same position as, for example, SunOS +4.1.x and several other commercial Unices. The only OS's that support +cooperative flock()/fcntl() are those that emulate flock() using +fcntl(), with all the problems that implies. + + +1.3 Mandatory Locking As A Mount Option +--------------------------------------- + +Mandatory locking, as described in 'Documentation/filesystems/mandatory.txt' +was prior to this release a general configuration option that was valid for +all mounted filesystems. This had a number of inherent dangers, not the +least of which was the ability to freeze an NFS server by asking it to read +a file for which a mandatory lock existed. + +From this release of the kernel, mandatory locking can be turned on and off +on a per-filesystem basis, using the mount options 'mand' and 'nomand'. +The default is to disallow mandatory locking. The intention is that +mandatory locking only be enabled on a local filesystem as the specific need +arises. + diff --git a/Documentation/locks.txt b/Documentation/locks.txt deleted file mode 100644 index fab857a..0000000 --- a/Documentation/locks.txt +++ /dev/null @@ -1,67 +0,0 @@ - File Locking Release Notes - - Andy Walker - - 12 May 1997 - - -1. What's New? --------------- - -1.1 Broken Flock Emulation --------------------------- - -The old flock(2) emulation in the kernel was swapped for proper BSD -compatible flock(2) support in the 1.3.x series of kernels. With the -release of the 2.1.x kernel series, support for the old emulation has -been totally removed, so that we don't need to carry this baggage -forever. - -This should not cause problems for anybody, since everybody using a -2.1.x kernel should have updated their C library to a suitable version -anyway (see the file "Documentation/Changes".) - -1.2 Allow Mixed Locks Again ---------------------------- - -1.2.1 Typical Problems - Sendmail ---------------------------------- -Because sendmail was unable to use the old flock() emulation, many sendmail -installations use fcntl() instead of flock(). This is true of Slackware 3.0 -for example. This gave rise to some other subtle problems if sendmail was -configured to rebuild the alias file. Sendmail tried to lock the aliases.dir -file with fcntl() at the same time as the GDBM routines tried to lock this -file with flock(). With pre 1.3.96 kernels this could result in deadlocks that, -over time, or under a very heavy mail load, would eventually cause the kernel -to lock solid with deadlocked processes. - - -1.2.2 The Solution ------------------- -The solution I have chosen, after much experimentation and discussion, -is to make flock() and fcntl() locks oblivious to each other. Both can -exists, and neither will have any effect on the other. - -I wanted the two lock styles to be cooperative, but there were so many -race and deadlock conditions that the current solution was the only -practical one. It puts us in the same position as, for example, SunOS -4.1.x and several other commercial Unices. The only OS's that support -cooperative flock()/fcntl() are those that emulate flock() using -fcntl(), with all the problems that implies. - - -1.3 Mandatory Locking As A Mount Option ---------------------------------------- - -Mandatory locking, as described in 'Documentation/filesystems/mandatory.txt' -was prior to this release a general configuration option that was valid for -all mounted filesystems. This had a number of inherent dangers, not the -least of which was the ability to freeze an NFS server by asking it to read -a file for which a mandatory lock existed. - -From this release of the kernel, mandatory locking can be turned on and off -on a per-filesystem basis, using the mount options 'mand' and 'nomand'. -The default is to disallow mandatory locking. The intention is that -mandatory locking only be enabled on a local filesystem as the specific need -arises. - -- cgit v0.10.2 From a16877ca9cec211708a161057a7cbfbf2cbc3a53 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Mon, 1 Oct 2007 14:41:11 -0700 Subject: Cleanup macros for distinguishing mandatory locks The combination of S_ISGID bit set and S_IXGRP bit unset is used to mark the inode as "mandatory lockable" and there's a macro for this check called MANDATORY_LOCK(inode). However, fs/locks.c and some filesystems still perform the explicit i_mode checking. Besides, Andrew pointed out, that this macro is buggy itself, as it dereferences the inode arg twice. Convert this macro into static inline function and switch its users to it, making the code shorter and more readable. The __mandatory_lock() helper is to be used in places where the IS_MANDLOCK() for superblock is already known to be true. Signed-off-by: Pavel Emelyanov Cc: Trond Myklebust Cc: "J. Bruce Fields" Cc: David Howells Cc: Eric Van Hensbergen Cc: Ron Minnich Cc: Latchesar Ionkov Cc: Steven Whitehouse Signed-off-by: Andrew Morton diff --git a/fs/locks.c b/fs/locks.c index 43dbc7f..9a3fe0d 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -1119,7 +1119,7 @@ int locks_mandatory_area(int read_write, struct inode *inode, * If we've been sleeping someone might have * changed the permissions behind our back. */ - if ((inode->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID) + if (__mandatory_lock(inode)) continue; } @@ -1761,9 +1761,7 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd, /* Don't allow mandatory locks on files that may be memory mapped * and shared. */ - if (IS_MANDLOCK(inode) && - (inode->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID && - mapping_writably_mapped(filp->f_mapping)) { + if (mandatory_lock(inode) && mapping_writably_mapped(filp->f_mapping)) { error = -EAGAIN; goto out; } @@ -1887,9 +1885,7 @@ int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd, /* Don't allow mandatory locks on files that may be memory mapped * and shared. */ - if (IS_MANDLOCK(inode) && - (inode->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID && - mapping_writably_mapped(filp->f_mapping)) { + if (mandatory_lock(inode) && mapping_writably_mapped(filp->f_mapping)) { error = -EAGAIN; goto out; } @@ -2083,9 +2079,7 @@ static void lock_get_status(char* out, struct file_lock *fl, int id, char *pfx) out += sprintf(out, "%6s %s ", (fl->fl_flags & FL_ACCESS) ? "ACCESS" : "POSIX ", (inode == NULL) ? "*NOINODE*" : - (IS_MANDLOCK(inode) && - (inode->i_mode & (S_IXGRP | S_ISGID)) == S_ISGID) ? - "MANDATORY" : "ADVISORY "); + mandatory_lock(inode) ? "MANDATORY" : "ADVISORY "); } else if (IS_FLOCK(fl)) { if (fl->fl_type & LOCK_MAND) { out += sprintf(out, "FLOCK MSNFS "); diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 3f55970..3c028b9 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -2044,7 +2044,7 @@ static inline int io_during_grace_disallowed(struct inode *inode, int flags) { return nfs4_in_grace() && (flags & (RD_STATE | WR_STATE)) - && MANDATORY_LOCK(inode); + && mandatory_lock(inode); } /* diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 7867151..9152f87 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -65,7 +65,7 @@ * locks on them because there is no way to know if the accesser has * the lock. */ -#define IS_ISMNDLK(i) (S_ISREG((i)->i_mode) && MANDATORY_LOCK(i)) +#define IS_ISMNDLK(i) (S_ISREG((i)->i_mode) && mandatory_lock(i)) /* * This is a cache of readahead params that help us choose the proper diff --git a/fs/read_write.c b/fs/read_write.c index 507ddff..124693e 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -205,7 +205,7 @@ int rw_verify_area(int read_write, struct file *file, loff_t *ppos, size_t count if (unlikely((pos < 0) || (loff_t) (pos + count) < 0)) goto Einval; - if (unlikely(inode->i_flock && MANDATORY_LOCK(inode))) { + if (unlikely(inode->i_flock && mandatory_lock(inode))) { int retval = locks_mandatory_area( read_write == READ ? FLOCK_VERIFY_READ : FLOCK_VERIFY_WRITE, inode, file, pos, count); diff --git a/include/linux/fs.h b/include/linux/fs.h index 16421f6..f5075e0 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1369,12 +1369,25 @@ extern int locks_mandatory_area(int, struct inode *, struct file *, loff_t, size * Candidates for mandatory locking have the setgid bit set * but no group execute bit - an otherwise meaningless combination. */ -#define MANDATORY_LOCK(inode) \ - (IS_MANDLOCK(inode) && ((inode)->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID) + +static inline int __mandatory_lock(struct inode *ino) +{ + return (ino->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID; +} + +/* + * ... and these candidates should be on MS_MANDLOCK mounted fs, + * otherwise these will be advisory locks + */ + +static inline int mandatory_lock(struct inode *ino) +{ + return IS_MANDLOCK(ino) && __mandatory_lock(ino); +} static inline int locks_verify_locked(struct inode *inode) { - if (MANDATORY_LOCK(inode)) + if (mandatory_lock(inode)) return locks_mandatory_locked(inode); return 0; } @@ -1385,7 +1398,7 @@ static inline int locks_verify_truncate(struct inode *inode, struct file *filp, loff_t size) { - if (inode->i_flock && MANDATORY_LOCK(inode)) + if (inode->i_flock && mandatory_lock(inode)) return locks_mandatory_area( FLOCK_VERIFY_WRITE, inode, filp, size < inode->i_size ? size : inode->i_size, -- cgit v0.10.2 From 7afaac6202782ec28f2039503bdaef666834d60c Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Mon, 1 Oct 2007 14:41:13 -0700 Subject: GFS2: clean up explicit check for mandatory locks The __mandatory_lock(inode) function makes the same check, but makes the code more readable. Signed-off-by: Pavel Emelyanov Cc: Steven Whitehouse Signed-off-by: Andrew Morton diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c index 94d76ac..28773ca 100644 --- a/fs/gfs2/ops_file.c +++ b/fs/gfs2/ops_file.c @@ -535,7 +535,7 @@ static int gfs2_lock(struct file *file, int cmd, struct file_lock *fl) if (!(fl->fl_flags & FL_POSIX)) return -ENOLCK; - if ((ip->i_inode.i_mode & (S_ISGID | S_IXGRP)) == S_ISGID) + if (__mandatory_lock(&ip->i_inode)) return -ENOLCK; if (sdp->sd_args.ar_localflocks) { @@ -637,7 +637,7 @@ static int gfs2_flock(struct file *file, int cmd, struct file_lock *fl) if (!(fl->fl_flags & FL_FLOCK)) return -ENOLCK; - if ((ip->i_inode.i_mode & (S_ISGID | S_IXGRP)) == S_ISGID) + if (__mandatory_lock(&ip->i_inode)) return -ENOLCK; if (sdp->sd_args.ar_localflocks) -- cgit v0.10.2 From 66abe5f257e719547744fdb8691cf5d20603f051 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Mon, 1 Oct 2007 14:41:13 -0700 Subject: 9PFS: clean up explicit check for mandatory locks The __mandatory_lock(inode) macro makes the same check, but makes the code more readable. Signed-off-by: Pavel Emelyanov Cc: Eric Van Hensbergen Cc: Ron Minnich Cc: Latchesar Ionkov Signed-off-by: Andrew Morton diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index 2a40c29..7166916 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -105,7 +105,7 @@ static int v9fs_file_lock(struct file *filp, int cmd, struct file_lock *fl) P9_DPRINTK(P9_DEBUG_VFS, "filp: %p lock: %p\n", filp, fl); /* No mandatory locks */ - if ((inode->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID) + if (__mandatory_lock(inode)) return -ENOLCK; if ((IS_SETLK(cmd) || IS_SETLKW(cmd)) && fl->fl_type != F_UNLCK) { -- cgit v0.10.2 From fc5846e555177c2ae01bcded7fddf60cb10dcfd0 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Mon, 1 Oct 2007 14:41:14 -0700 Subject: AFS: clean up explicit check for mandatory locks The __mandatory_lock(inode) macro makes the same check, but makes the code more readable. Signed-off-by: Pavel Emelyanov Cc: David Howells Signed-off-by: Andrew Morton diff --git a/fs/afs/flock.c b/fs/afs/flock.c index af6952e..210acaf 100644 --- a/fs/afs/flock.c +++ b/fs/afs/flock.c @@ -524,8 +524,7 @@ int afs_lock(struct file *file, int cmd, struct file_lock *fl) (long long) fl->fl_start, (long long) fl->fl_end); /* AFS doesn't support mandatory locks */ - if ((vnode->vfs_inode.i_mode & (S_ISGID | S_IXGRP)) == S_ISGID && - fl->fl_type != F_UNLCK) + if (__mandatory_lock(&vnode->vfs_inode) && fl->fl_type != F_UNLCK) return -ENOLCK; if (IS_GETLK(cmd)) -- cgit v0.10.2 From dfad9441be82f1eadc3fa3f1bbc93f93d48d1bdf Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Mon, 1 Oct 2007 14:41:15 -0700 Subject: NFS: clean up explicit check for mandatory locks The __mandatory_lock(inode) macro makes the same check, but makes the code more readable. Signed-off-by: Pavel Emelyanov Cc: Trond Myklebust Cc: "J. Bruce Fields" Signed-off-by: Andrew Morton diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 579cf8a..9c98ccb 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -522,8 +522,7 @@ static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl) nfs_inc_stats(inode, NFSIOS_VFSLOCK); /* No mandatory locks over NFS */ - if ((inode->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID && - fl->fl_type != F_UNLCK) + if (__mandatory_lock(inode) && fl->fl_type != F_UNLCK) return -ENOLCK; if (IS_GETLK(cmd)) -- cgit v0.10.2 From 094f2825218fec1b240cb8537d2d0a10edf5ddc9 Mon Sep 17 00:00:00 2001 From: Matthias Kaehlcke Date: Tue, 2 Oct 2007 11:21:34 -0700 Subject: fs/locks.c: use list_for_each_entry() instead of list_for_each() fs/locks.c: use list_for_each_entry() instead of list_for_each() in posix_locks_deadlock() and get_locks_status() Signed-off-by: Matthias Kaehlcke Signed-off-by: Andrew Morton diff --git a/fs/locks.c b/fs/locks.c index 9a3fe0d..c17bc00 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -700,13 +700,12 @@ EXPORT_SYMBOL(posix_test_lock); static int posix_locks_deadlock(struct file_lock *caller_fl, struct file_lock *block_fl) { - struct list_head *tmp; + struct file_lock *fl; next_task: if (posix_same_owner(caller_fl, block_fl)) return 1; - list_for_each(tmp, &blocked_list) { - struct file_lock *fl = list_entry(tmp, struct file_lock, fl_link); + list_for_each_entry(fl, &blocked_list, fl_link) { if (posix_same_owner(fl, block_fl)) { fl = fl->fl_next; block_fl = fl; @@ -2164,24 +2163,22 @@ static void move_lock_status(char **p, off_t* pos, off_t offset) int get_locks_status(char *buffer, char **start, off_t offset, int length) { - struct list_head *tmp; + struct file_lock *fl; char *q = buffer; off_t pos = 0; int i = 0; lock_kernel(); - list_for_each(tmp, &file_lock_list) { - struct list_head *btmp; - struct file_lock *fl = list_entry(tmp, struct file_lock, fl_link); + list_for_each_entry(fl, &file_lock_list, fl_link) { + struct file_lock *bfl; + lock_get_status(q, fl, ++i, ""); move_lock_status(&q, &pos, offset); if(pos >= offset+length) goto done; - list_for_each(btmp, &fl->fl_block) { - struct file_lock *bfl = list_entry(btmp, - struct file_lock, fl_block); + list_for_each_entry(bfl, &fl->fl_block, fl_block) { lock_get_status(q, bfl, i, " ->"); move_lock_status(&q, &pos, offset); -- cgit v0.10.2 From 7f8ada98d9edd83d6ebd01e431e15b024a4a3dc4 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Mon, 1 Oct 2007 14:41:15 -0700 Subject: Rework /proc/locks via seq_files and seq_list helpers Currently /proc/locks is shown with a proc_read function, but its behavior is rather complex as it has to manually handle current offset and buffer length. On the other hand, files that show objects from lists can be easily reimplemented using the sequential files and the seq_list_XXX() helpers. This saves (as usually) 16 lines of code and more than 200 from the .text section. [akpm@linux-foundation.org: no externs in C] [akpm@linux-foundation.org: warning fixes] Signed-off-by: Pavel Emelyanov Cc: "J. Bruce Fields" Cc: Trond Myklebust Signed-off-by: Andrew Morton diff --git a/fs/locks.c b/fs/locks.c index c17bc00..7f9a3ea 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -2066,134 +2066,114 @@ int vfs_cancel_lock(struct file *filp, struct file_lock *fl) EXPORT_SYMBOL_GPL(vfs_cancel_lock); -static void lock_get_status(char* out, struct file_lock *fl, int id, char *pfx) +#ifdef CONFIG_PROC_FS +#include + +static void lock_get_status(struct seq_file *f, struct file_lock *fl, + int id, char *pfx) { struct inode *inode = NULL; if (fl->fl_file != NULL) inode = fl->fl_file->f_path.dentry->d_inode; - out += sprintf(out, "%d:%s ", id, pfx); + seq_printf(f, "%d:%s ", id, pfx); if (IS_POSIX(fl)) { - out += sprintf(out, "%6s %s ", + seq_printf(f, "%6s %s ", (fl->fl_flags & FL_ACCESS) ? "ACCESS" : "POSIX ", (inode == NULL) ? "*NOINODE*" : mandatory_lock(inode) ? "MANDATORY" : "ADVISORY "); } else if (IS_FLOCK(fl)) { if (fl->fl_type & LOCK_MAND) { - out += sprintf(out, "FLOCK MSNFS "); + seq_printf(f, "FLOCK MSNFS "); } else { - out += sprintf(out, "FLOCK ADVISORY "); + seq_printf(f, "FLOCK ADVISORY "); } } else if (IS_LEASE(fl)) { - out += sprintf(out, "LEASE "); + seq_printf(f, "LEASE "); if (fl->fl_type & F_INPROGRESS) - out += sprintf(out, "BREAKING "); + seq_printf(f, "BREAKING "); else if (fl->fl_file) - out += sprintf(out, "ACTIVE "); + seq_printf(f, "ACTIVE "); else - out += sprintf(out, "BREAKER "); + seq_printf(f, "BREAKER "); } else { - out += sprintf(out, "UNKNOWN UNKNOWN "); + seq_printf(f, "UNKNOWN UNKNOWN "); } if (fl->fl_type & LOCK_MAND) { - out += sprintf(out, "%s ", + seq_printf(f, "%s ", (fl->fl_type & LOCK_READ) ? (fl->fl_type & LOCK_WRITE) ? "RW " : "READ " : (fl->fl_type & LOCK_WRITE) ? "WRITE" : "NONE "); } else { - out += sprintf(out, "%s ", + seq_printf(f, "%s ", (fl->fl_type & F_INPROGRESS) ? (fl->fl_type & F_UNLCK) ? "UNLCK" : "READ " : (fl->fl_type & F_WRLCK) ? "WRITE" : "READ "); } if (inode) { #ifdef WE_CAN_BREAK_LSLK_NOW - out += sprintf(out, "%d %s:%ld ", fl->fl_pid, + seq_printf(f, "%d %s:%ld ", fl->fl_pid, inode->i_sb->s_id, inode->i_ino); #else /* userspace relies on this representation of dev_t ;-( */ - out += sprintf(out, "%d %02x:%02x:%ld ", fl->fl_pid, + seq_printf(f, "%d %02x:%02x:%ld ", fl->fl_pid, MAJOR(inode->i_sb->s_dev), MINOR(inode->i_sb->s_dev), inode->i_ino); #endif } else { - out += sprintf(out, "%d :0 ", fl->fl_pid); + seq_printf(f, "%d :0 ", fl->fl_pid); } if (IS_POSIX(fl)) { if (fl->fl_end == OFFSET_MAX) - out += sprintf(out, "%Ld EOF\n", fl->fl_start); + seq_printf(f, "%Ld EOF\n", fl->fl_start); else - out += sprintf(out, "%Ld %Ld\n", fl->fl_start, - fl->fl_end); + seq_printf(f, "%Ld %Ld\n", fl->fl_start, fl->fl_end); } else { - out += sprintf(out, "0 EOF\n"); + seq_printf(f, "0 EOF\n"); } } -static void move_lock_status(char **p, off_t* pos, off_t offset) +static int locks_show(struct seq_file *f, void *v) { - int len; - len = strlen(*p); - if(*pos >= offset) { - /* the complete line is valid */ - *p += len; - *pos += len; - return; - } - if(*pos+len > offset) { - /* use the second part of the line */ - int i = offset-*pos; - memmove(*p,*p+i,len-i); - *p += len-i; - *pos += len; - return; - } - /* discard the complete line */ - *pos += len; -} + struct file_lock *fl, *bfl; -/** - * get_locks_status - reports lock usage in /proc/locks - * @buffer: address in userspace to write into - * @start: ? - * @offset: how far we are through the buffer - * @length: how much to read - */ + fl = list_entry(v, struct file_lock, fl_link); -int get_locks_status(char *buffer, char **start, off_t offset, int length) -{ - struct file_lock *fl; - char *q = buffer; - off_t pos = 0; - int i = 0; + lock_get_status(f, fl, (long)f->private, ""); - lock_kernel(); - list_for_each_entry(fl, &file_lock_list, fl_link) { - struct file_lock *bfl; + list_for_each_entry(bfl, &fl->fl_block, fl_block) + lock_get_status(f, bfl, (long)f->private, " ->"); - lock_get_status(q, fl, ++i, ""); - move_lock_status(&q, &pos, offset); + f->private++; + return 0; +} - if(pos >= offset+length) - goto done; +static void *locks_start(struct seq_file *f, loff_t *pos) +{ + lock_kernel(); + f->private = (void *)1; + return seq_list_start(&file_lock_list, *pos); +} - list_for_each_entry(bfl, &fl->fl_block, fl_block) { - lock_get_status(q, bfl, i, " ->"); - move_lock_status(&q, &pos, offset); +static void *locks_next(struct seq_file *f, void *v, loff_t *pos) +{ + return seq_list_next(v, &file_lock_list, pos); +} - if(pos >= offset+length) - goto done; - } - } -done: +static void locks_stop(struct seq_file *f, void *v) +{ unlock_kernel(); - *start = buffer; - if(q-buffer < length) - return (q-buffer); - return length; } +struct seq_operations locks_seq_operations = { + .start = locks_start, + .next = locks_next, + .stop = locks_stop, + .show = locks_show, +}; +#endif + /** * lock_may_read - checks that the region is free of locks * @inode: the inode that is being read diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c index bee251c..c9d6d5f 100644 --- a/fs/proc/proc_misc.c +++ b/fs/proc/proc_misc.c @@ -66,7 +66,6 @@ extern int get_stram_list(char *); extern int get_filesystem_list(char *); extern int get_exec_domain_list(char *); extern int get_dma_list(char *); -extern int get_locks_status (char *, char **, off_t, int); static int proc_calc_metrics(char *page, char **start, off_t off, int count, int *eof, int len) @@ -617,16 +616,18 @@ static int cmdline_read_proc(char *page, char **start, off_t off, return proc_calc_metrics(page, start, off, count, eof, len); } -static int locks_read_proc(char *page, char **start, off_t off, - int count, int *eof, void *data) +static int locks_open(struct inode *inode, struct file *filp) { - int len = get_locks_status(page, start, off, count); - - if (len < count) - *eof = 1; - return len; + return seq_open(filp, &locks_seq_operations); } +static const struct file_operations proc_locks_operations = { + .open = locks_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + static int execdomains_read_proc(char *page, char **start, off_t off, int count, int *eof, void *data) { @@ -684,7 +685,6 @@ void __init proc_misc_init(void) #endif {"filesystems", filesystems_read_proc}, {"cmdline", cmdline_read_proc}, - {"locks", locks_read_proc}, {"execdomains", execdomains_read_proc}, {NULL,} }; @@ -702,6 +702,7 @@ void __init proc_misc_init(void) entry->proc_fops = &proc_kmsg_operations; } #endif + create_seq_entry("locks", 0, &proc_locks_operations); create_seq_entry("devices", 0, &proc_devinfo_operations); create_seq_entry("cpuinfo", 0, &proc_cpuinfo_operations); #ifdef CONFIG_BLOCK diff --git a/include/linux/fs.h b/include/linux/fs.h index f5075e0..4f1e8ce 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -883,6 +883,7 @@ extern int vfs_setlease(struct file *, long, struct file_lock **); extern int lease_modify(struct file_lock **, int); extern int lock_may_read(struct inode *, loff_t start, unsigned long count); extern int lock_may_write(struct inode *, loff_t start, unsigned long count); +extern struct seq_operations locks_seq_operations; struct fasync_struct { int magic; -- cgit v0.10.2 From 5e7fc436426b1f9e106f511a049de91c82ec2c53 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 2 Oct 2007 14:18:12 -0400 Subject: nfsd: remove IS_ISMNDLCK macro This macro is only used in one place; in this place it seems simpler to put open-code it and move the comment to where it's used. Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 9152f87..085ded6 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -61,12 +61,6 @@ #define NFSDDBG_FACILITY NFSDDBG_FILEOP -/* We must ignore files (but only files) which might have mandatory - * locks on them because there is no way to know if the accesser has - * the lock. - */ -#define IS_ISMNDLK(i) (S_ISREG((i)->i_mode) && mandatory_lock(i)) - /* * This is a cache of readahead params that help us choose the proper * readahead strategy. Initially, we set all readahead parameters to 0 @@ -680,7 +674,12 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, err = nfserr_perm; if (IS_APPEND(inode) && (access & MAY_WRITE)) goto out; - if (IS_ISMNDLK(inode)) + /* + * We must ignore files (but only files) which might have mandatory + * locks on them because there is no way to know if the accesser has + * the lock. + */ + if (S_ISREG((inode)->i_mode) && mandatory_lock(inode)) goto out; if (!inode->i_fop) -- cgit v0.10.2