summaryrefslogtreecommitdiff
path: root/Documentation/filesystems
diff options
context:
space:
mode:
Diffstat (limited to 'Documentation/filesystems')
-rw-r--r--Documentation/filesystems/Locking13
-rw-r--r--Documentation/filesystems/configfs/configfs.txt57
-rw-r--r--Documentation/filesystems/configfs/configfs_example.c8
-rw-r--r--Documentation/filesystems/ecryptfs.txt77
-rw-r--r--Documentation/filesystems/proc.txt125
-rw-r--r--Documentation/filesystems/tmpfs.txt10
-rw-r--r--Documentation/filesystems/vfs.txt51
7 files changed, 292 insertions, 49 deletions
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index d866551..f0f8258 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -510,13 +510,24 @@ More details about quota locking can be found in fs/dquot.c.
prototypes:
void (*open)(struct vm_area_struct*);
void (*close)(struct vm_area_struct*);
+ int (*fault)(struct vm_area_struct*, struct vm_fault *);
struct page *(*nopage)(struct vm_area_struct*, unsigned long, int *);
+ int (*page_mkwrite)(struct vm_area_struct *, struct page *);
locking rules:
- BKL mmap_sem
+ BKL mmap_sem PageLocked(page)
open: no yes
close: no yes
+fault: no yes
nopage: no yes
+page_mkwrite: no yes no
+
+ ->page_mkwrite() is called when a previously read-only page is
+about to become writeable. The file system is responsible for
+protecting against truncate races. Once appropriate action has been
+taking to lock out truncate, the page range should be verified to be
+within i_size. The page mapping should also be checked that it is not
+NULL.
================================================================================
Dubious stuff
diff --git a/Documentation/filesystems/configfs/configfs.txt b/Documentation/filesystems/configfs/configfs.txt
index b34cdb5..d1b9825 100644
--- a/Documentation/filesystems/configfs/configfs.txt
+++ b/Documentation/filesystems/configfs/configfs.txt
@@ -238,6 +238,8 @@ config_item_type.
struct config_group *(*make_group)(struct config_group *group,
const char *name);
int (*commit_item)(struct config_item *item);
+ void (*disconnect_notify)(struct config_group *group,
+ struct config_item *item);
void (*drop_item)(struct config_group *group,
struct config_item *item);
};
@@ -268,6 +270,16 @@ the item in other threads, the memory is safe. It may take some time
for the item to actually disappear from the subsystem's usage. But it
is gone from configfs.
+When drop_item() is called, the item's linkage has already been torn
+down. It no longer has a reference on its parent and has no place in
+the item hierarchy. If a client needs to do some cleanup before this
+teardown happens, the subsystem can implement the
+ct_group_ops->disconnect_notify() method. The method is called after
+configfs has removed the item from the filesystem view but before the
+item is removed from its parent group. Like drop_item(),
+disconnect_notify() is void and cannot fail. Client subsystems should
+not drop any references here, as they still must do it in drop_item().
+
A config_group cannot be removed while it still has child items. This
is implemented in the configfs rmdir(2) code. ->drop_item() will not be
called, as the item has not been dropped. rmdir(2) will fail, as the
@@ -280,18 +292,18 @@ tells configfs to make the subsystem appear in the file tree.
struct configfs_subsystem {
struct config_group su_group;
- struct semaphore su_sem;
+ struct mutex su_mutex;
};
int configfs_register_subsystem(struct configfs_subsystem *subsys);
void configfs_unregister_subsystem(struct configfs_subsystem *subsys);
- A subsystem consists of a toplevel config_group and a semaphore.
+ A subsystem consists of a toplevel config_group and a mutex.
The group is where child config_items are created. For a subsystem,
this group is usually defined statically. Before calling
configfs_register_subsystem(), the subsystem must have initialized the
group via the usual group _init() functions, and it must also have
-initialized the semaphore.
+initialized the mutex.
When the register call returns, the subsystem is live, and it
will be visible via configfs. At that point, mkdir(2) can be called and
the subsystem must be ready for it.
@@ -303,7 +315,7 @@ subsystem/group and the simple_child item in configfs_example.c It
shows a trivial object displaying and storing an attribute, and a simple
group creating and destroying these children.
-[Hierarchy Navigation and the Subsystem Semaphore]
+[Hierarchy Navigation and the Subsystem Mutex]
There is an extra bonus that configfs provides. The config_groups and
config_items are arranged in a hierarchy due to the fact that they
@@ -314,19 +326,19 @@ and config_item->ci_parent structure members.
A subsystem can navigate the cg_children list and the ci_parent pointer
to see the tree created by the subsystem. This can race with configfs'
-management of the hierarchy, so configfs uses the subsystem semaphore to
+management of the hierarchy, so configfs uses the subsystem mutex to
protect modifications. Whenever a subsystem wants to navigate the
hierarchy, it must do so under the protection of the subsystem
-semaphore.
+mutex.
-A subsystem will be prevented from acquiring the semaphore while a newly
+A subsystem will be prevented from acquiring the mutex while a newly
allocated item has not been linked into this hierarchy. Similarly, it
-will not be able to acquire the semaphore while a dropping item has not
+will not be able to acquire the mutex while a dropping item has not
yet been unlinked. This means that an item's ci_parent pointer will
never be NULL while the item is in configfs, and that an item will only
be in its parent's cg_children list for the same duration. This allows
a subsystem to trust ci_parent and cg_children while they hold the
-semaphore.
+mutex.
[Item Aggregation Via symlink(2)]
@@ -386,6 +398,33 @@ As a consequence of this, default_groups cannot be removed directly via
rmdir(2). They also are not considered when rmdir(2) on the parent
group is checking for children.
+[Dependant Subsystems]
+
+Sometimes other drivers depend on particular configfs items. For
+example, ocfs2 mounts depend on a heartbeat region item. If that
+region item is removed with rmdir(2), the ocfs2 mount must BUG or go
+readonly. Not happy.
+
+configfs provides two additional API calls: configfs_depend_item() and
+configfs_undepend_item(). A client driver can call
+configfs_depend_item() on an existing item to tell configfs that it is
+depended on. configfs will then return -EBUSY from rmdir(2) for that
+item. When the item is no longer depended on, the client driver calls
+configfs_undepend_item() on it.
+
+These API cannot be called underneath any configfs callbacks, as
+they will conflict. They can block and allocate. A client driver
+probably shouldn't calling them of its own gumption. Rather it should
+be providing an API that external subsystems call.
+
+How does this work? Imagine the ocfs2 mount process. When it mounts,
+it asks for a heartbeat region item. This is done via a call into the
+heartbeat code. Inside the heartbeat code, the region item is looked
+up. Here, the heartbeat code calls configfs_depend_item(). If it
+succeeds, then heartbeat knows the region is safe to give to ocfs2.
+If it fails, it was being torn down anyway, and heartbeat can gracefully
+pass up an error.
+
[Committable Items]
NOTE: Committable items are currently unimplemented.
diff --git a/Documentation/filesystems/configfs/configfs_example.c b/Documentation/filesystems/configfs/configfs_example.c
index 2d6a14a..25151fd 100644
--- a/Documentation/filesystems/configfs/configfs_example.c
+++ b/Documentation/filesystems/configfs/configfs_example.c
@@ -277,11 +277,10 @@ static struct config_item *simple_children_make_item(struct config_group *group,
{
struct simple_child *simple_child;
- simple_child = kmalloc(sizeof(struct simple_child), GFP_KERNEL);
+ simple_child = kzalloc(sizeof(struct simple_child), GFP_KERNEL);
if (!simple_child)
return NULL;
- memset(simple_child, 0, sizeof(struct simple_child));
config_item_init_type_name(&simple_child->item, name,
&simple_child_type);
@@ -364,12 +363,11 @@ static struct config_group *group_children_make_group(struct config_group *group
{
struct simple_children *simple_children;
- simple_children = kmalloc(sizeof(struct simple_children),
+ simple_children = kzalloc(sizeof(struct simple_children),
GFP_KERNEL);
if (!simple_children)
return NULL;
- memset(simple_children, 0, sizeof(struct simple_children));
config_group_init_type_name(&simple_children->group, name,
&simple_children_type);
@@ -453,7 +451,7 @@ static int __init configfs_example_init(void)
subsys = example_subsys[i];
config_group_init(&subsys->su_group);
- init_MUTEX(&subsys->su_sem);
+ mutex_init(&subsys->su_mutex);
ret = configfs_register_subsystem(subsys);
if (ret) {
printk(KERN_ERR "Error %d while registering subsystem %s\n",
diff --git a/Documentation/filesystems/ecryptfs.txt b/Documentation/filesystems/ecryptfs.txt
new file mode 100644
index 0000000..01d8a08
--- /dev/null
+++ b/Documentation/filesystems/ecryptfs.txt
@@ -0,0 +1,77 @@
+eCryptfs: A stacked cryptographic filesystem for Linux
+
+eCryptfs is free software. Please see the file COPYING for details.
+For documentation, please see the files in the doc/ subdirectory. For
+building and installation instructions please see the INSTALL file.
+
+Maintainer: Phillip Hellewell
+Lead developer: Michael A. Halcrow <mhalcrow@us.ibm.com>
+Developers: Michael C. Thompson
+ Kent Yoder
+Web Site: http://ecryptfs.sf.net
+
+This software is currently undergoing development. Make sure to
+maintain a backup copy of any data you write into eCryptfs.
+
+eCryptfs requires the userspace tools downloadable from the
+SourceForge site:
+
+http://sourceforge.net/projects/ecryptfs/
+
+Userspace requirements include:
+ - David Howells' userspace keyring headers and libraries (version
+ 1.0 or higher), obtainable from
+ http://people.redhat.com/~dhowells/keyutils/
+ - Libgcrypt
+
+
+NOTES
+
+In the beta/experimental releases of eCryptfs, when you upgrade
+eCryptfs, you should copy the files to an unencrypted location and
+then copy the files back into the new eCryptfs mount to migrate the
+files.
+
+
+MOUNT-WIDE PASSPHRASE
+
+Create a new directory into which eCryptfs will write its encrypted
+files (i.e., /root/crypt). Then, create the mount point directory
+(i.e., /mnt/crypt). Now it's time to mount eCryptfs:
+
+mount -t ecryptfs /root/crypt /mnt/crypt
+
+You should be prompted for a passphrase and a salt (the salt may be
+blank).
+
+Try writing a new file:
+
+echo "Hello, World" > /mnt/crypt/hello.txt
+
+The operation will complete. Notice that there is a new file in
+/root/crypt that is at least 12288 bytes in size (depending on your
+host page size). This is the encrypted underlying file for what you
+just wrote. To test reading, from start to finish, you need to clear
+the user session keyring:
+
+keyctl clear @u
+
+Then umount /mnt/crypt and mount again per the instructions given
+above.
+
+cat /mnt/crypt/hello.txt
+
+
+NOTES
+
+eCryptfs version 0.1 should only be mounted on (1) empty directories
+or (2) directories containing files only created by eCryptfs. If you
+mount a directory that has pre-existing files not created by eCryptfs,
+then behavior is undefined. Do not run eCryptfs in higher verbosity
+levels unless you are doing so for the sole purpose of debugging or
+development, since secret values will be written out to the system log
+in that case.
+
+
+Mike Halcrow
+mhalcrow@us.ibm.com
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index 8756a07..4a37e25 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -42,6 +42,7 @@ Table of Contents
2.12 /proc/<pid>/oom_adj - Adjust the oom-killer score
2.13 /proc/<pid>/oom_score - Display current oom-killer score
2.14 /proc/<pid>/io - Display the IO accounting fields
+ 2.15 /proc/<pid>/coredump_filter - Core dump filtering settings
------------------------------------------------------------------------------
Preface
@@ -171,7 +172,9 @@ read the file /proc/PID/status:
This shows you nearly the same information you would get if you viewed it with
the ps command. In fact, ps uses the proc file system to obtain its
information. The statm file contains more detailed information about the
-process memory usage. Its seven fields are explained in Table 1-2.
+process memory usage. Its seven fields are explained in Table 1-2. The stat
+file contains details information about the process itself. Its fields are
+explained in Table 1-3.
Table 1-2: Contents of the statm files (as of 2.6.8-rc3)
@@ -188,16 +191,65 @@ Table 1-2: Contents of the statm files (as of 2.6.8-rc3)
dt number of dirty pages (always 0 on 2.6)
..............................................................................
+
+Table 1-3: Contents of the stat files (as of 2.6.22-rc3)
+..............................................................................
+ Field Content
+ pid process id
+ tcomm filename of the executable
+ state state (R is running, S is sleeping, D is sleeping in an
+ uninterruptible wait, Z is zombie, T is traced or stopped)
+ ppid process id of the parent process
+ pgrp pgrp of the process
+ sid session id
+ tty_nr tty the process uses
+ tty_pgrp pgrp of the tty
+ flags task flags
+ min_flt number of minor faults
+ cmin_flt number of minor faults with child's
+ maj_flt number of major faults
+ cmaj_flt number of major faults with child's
+ utime user mode jiffies
+ stime kernel mode jiffies
+ cutime user mode jiffies with child's
+ cstime kernel mode jiffies with child's
+ priority priority level
+ nice nice level
+ num_threads number of threads
+ start_time time the process started after system boot
+ vsize virtual memory size
+ rss resident set memory size
+ rsslim current limit in bytes on the rss
+ start_code address above which program text can run
+ end_code address below which program text can run
+ start_stack address of the start of the stack
+ esp current value of ESP
+ eip current value of EIP
+ pending bitmap of pending signals (obsolete)
+ blocked bitmap of blocked signals (obsolete)
+ sigign bitmap of ignored signals (obsolete)
+ sigcatch bitmap of catched signals (obsolete)
+ wchan address where process went to sleep
+ 0 (place holder)
+ 0 (place holder)
+ exit_signal signal to send to parent thread on exit
+ task_cpu which CPU the task is scheduled on
+ rt_priority realtime priority
+ policy scheduling policy (man sched_setscheduler)
+ blkio_ticks time spent waiting for block IO
+..............................................................................
+
+
1.2 Kernel data
---------------
Similar to the process entries, the kernel data files give information about
the running kernel. The files used to obtain this information are contained in
-/proc and are listed in Table 1-3. Not all of these will be present in your
+/proc and are listed in Table 1-4. Not all of these will be present in your
system. It depends on the kernel configuration and the loaded modules, which
files are there, and which are missing.
-Table 1-3: Kernel info in /proc
+Table 1-4: Kernel info in /proc
..............................................................................
File Content
apm Advanced power management info
@@ -473,10 +525,10 @@ IDE devices:
More detailed information can be found in the controller specific
subdirectories. These are named ide0, ide1 and so on. Each of these
-directories contains the files shown in table 1-4.
+directories contains the files shown in table 1-5.
-Table 1-4: IDE controller info in /proc/ide/ide?
+Table 1-5: IDE controller info in /proc/ide/ide?
..............................................................................
File Content
channel IDE channel (0 or 1)
@@ -486,11 +538,11 @@ Table 1-4: IDE controller info in /proc/ide/ide?
..............................................................................
Each device connected to a controller has a separate subdirectory in the
-controllers directory. The files listed in table 1-5 are contained in these
+controllers directory. The files listed in table 1-6 are contained in these
directories.
-Table 1-5: IDE device information
+Table 1-6: IDE device information
..............................................................................
File Content
cache The cache
@@ -1014,6 +1066,13 @@ check the amount of free space (value is in seconds). Default settings are: 4,
resume it if we have a value of 3 or more percent; consider information about
the amount of free space valid for 30 seconds
+audit_argv_kb
+-------------
+
+The file contains a single value denoting the limit on the argv array size
+for execve (in KiB). This limit is only applied when system call auditing for
+execve is enabled, otherwise the value is ignored.
+
ctrl-alt-del
------------
@@ -1297,6 +1356,21 @@ nr_hugepages configures number of hugetlb page reserved for the system.
hugetlb_shm_group contains group id that is allowed to create SysV shared
memory segment using hugetlb page.
+hugepages_treat_as_movable
+--------------------------
+
+This parameter is only useful when kernelcore= is specified at boot time to
+create ZONE_MOVABLE for pages that may be reclaimed or migrated. Huge pages
+are not movable so are not normally allocated from ZONE_MOVABLE. A non-zero
+value written to hugepages_treat_as_movable allows huge pages to be allocated
+from ZONE_MOVABLE.
+
+Once enabled, the ZONE_MOVABLE is treated as an area of memory the huge
+pages pool can easily grow or shrink within. Assuming that applications are
+not running that mlock() a lot of memory, it is likely the huge pages pool
+can grow to the size of ZONE_MOVABLE by repeatedly entering the desired value
+into nr_hugepages and triggering page reclaim.
+
laptop_mode
-----------
@@ -2111,4 +2185,41 @@ those 64-bit counters, process A could see an intermediate result.
More information about this can be found within the taskstats documentation in
Documentation/accounting.
+2.15 /proc/<pid>/coredump_filter - Core dump filtering settings
+---------------------------------------------------------------
+When a process is dumped, all anonymous memory is written to a core file as
+long as the size of the core file isn't limited. But sometimes we don't want
+to dump some memory segments, for example, huge shared memory. Conversely,
+sometimes we want to save file-backed memory segments into a core file, not
+only the individual files.
+
+/proc/<pid>/coredump_filter allows you to customize which memory segments
+will be dumped when the <pid> process is dumped. coredump_filter is a bitmask
+of memory types. If a bit of the bitmask is set, memory segments of the
+corresponding memory type are dumped, otherwise they are not dumped.
+
+The following 4 memory types are supported:
+ - (bit 0) anonymous private memory
+ - (bit 1) anonymous shared memory
+ - (bit 2) file-backed private memory
+ - (bit 3) file-backed shared memory
+
+ Note that MMIO pages such as frame buffer are never dumped and vDSO pages
+ are always dumped regardless of the bitmask status.
+
+Default value of coredump_filter is 0x3; this means all anonymous memory
+segments are dumped.
+
+If you don't want to dump all shared memory segments attached to pid 1234,
+write 1 to the process's proc file.
+
+ $ echo 0x1 > /proc/1234/coredump_filter
+
+When a new process is created, the process inherits the bitmask status from its
+parent. It is useful to set up coredump_filter before the program runs.
+For example:
+
+ $ echo 0x7 > /proc/self/coredump_filter
+ $ ./some_program
+
------------------------------------------------------------------------------
diff --git a/Documentation/filesystems/tmpfs.txt b/Documentation/filesystems/tmpfs.txt
index 6dd0508..145e440 100644
--- a/Documentation/filesystems/tmpfs.txt
+++ b/Documentation/filesystems/tmpfs.txt
@@ -94,10 +94,10 @@ largest node numbers in the range. For example, mpol=bind:0-3,5,7,9-15
Note that trying to mount a tmpfs with an mpol option will fail if the
running kernel does not support NUMA; and will fail if its nodelist
-specifies a node >= MAX_NUMNODES. If your system relies on that tmpfs
-being mounted, but from time to time runs a kernel built without NUMA
-capability (perhaps a safe recovery kernel), or configured to support
-fewer nodes, then it is advisable to omit the mpol option from automatic
+specifies a node which is not online. If your system relies on that
+tmpfs being mounted, but from time to time runs a kernel built without
+NUMA capability (perhaps a safe recovery kernel), or with fewer nodes
+online, then it is advisable to omit the mpol option from automatic
mount options. It can be added later, when the tmpfs is already mounted
on MountPoint, by 'mount -o remount,mpol=Policy:NodeList MountPoint'.
@@ -121,4 +121,4 @@ RAM/SWAP in 10240 inodes and it is only accessible by root.
Author:
Christoph Rohland <cr@sap.com>, 1.12.01
Updated:
- Hugh Dickins <hugh@veritas.com>, 19 February 2006
+ Hugh Dickins <hugh@veritas.com>, 4 June 2007
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index a47cc81..045f3e0 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -3,7 +3,7 @@
Original author: Richard Gooch <rgooch@atnf.csiro.au>
- Last updated on October 28, 2005
+ Last updated on June 24, 2007.
Copyright (C) 1999 Richard Gooch
Copyright (C) 2005 Pekka Enberg
@@ -107,7 +107,7 @@ file /proc/filesystems.
struct file_system_type
-----------------------
-This describes the filesystem. As of kernel 2.6.13, the following
+This describes the filesystem. As of kernel 2.6.22, the following
members are defined:
struct file_system_type {
@@ -119,6 +119,8 @@ struct file_system_type {
struct module *owner;
struct file_system_type * next;
struct list_head fs_supers;
+ struct lock_class_key s_lock_key;
+ struct lock_class_key s_umount_key;
};
name: the name of the filesystem type, such as "ext2", "iso9660",
@@ -137,11 +139,12 @@ struct file_system_type {
next: for internal VFS use: you should initialize this to NULL
+ s_lock_key, s_umount_key: lockdep-specific
+
The get_sb() method has the following arguments:
- struct super_block *sb: the superblock structure. This is partially
- initialized by the VFS and the rest must be initialized by the
- get_sb() method
+ struct file_system_type *fs_type: decribes the filesystem, partly initialized
+ by the specific filesystem code
int flags: mount flags
@@ -150,12 +153,13 @@ The get_sb() method has the following arguments:
void *data: arbitrary mount options, usually comes as an ASCII
string
- int silent: whether or not to be silent on error
+ struct vfsmount *mnt: a vfs-internal representation of a mount point
The get_sb() method must determine if the block device specified
-in the superblock contains a filesystem of the type the method
-supports. On success the method returns the superblock pointer, on
-failure it returns NULL.
+in the dev_name and fs_type contains a filesystem of the type the method
+supports. If it succeeds in opening the named block device, it initializes a
+struct super_block descriptor for the filesystem contained by the block device.
+On failure it returns an error.
The most interesting member of the superblock structure that the
get_sb() method fills in is the "s_op" field. This is a pointer to
@@ -193,7 +197,7 @@ struct super_operations
-----------------------
This describes how the VFS can manipulate the superblock of your
-filesystem. As of kernel 2.6.13, the following members are defined:
+filesystem. As of kernel 2.6.22, the following members are defined:
struct super_operations {
struct inode *(*alloc_inode)(struct super_block *sb);
@@ -216,8 +220,6 @@ struct super_operations {
void (*clear_inode) (struct inode *);
void (*umount_begin) (struct super_block *);
- void (*sync_inodes) (struct super_block *sb,
- struct writeback_control *wbc);
int (*show_options)(struct seq_file *, struct vfsmount *);
ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t);
@@ -300,9 +302,6 @@ or bottom half).
umount_begin: called when the VFS is unmounting a filesystem.
- sync_inodes: called when the VFS is writing out dirty data associated with
- a superblock.
-
show_options: called by the VFS to show mount options for /proc/<pid>/mounts.
quota_read: called by the VFS to read from filesystem quota file.
@@ -324,7 +323,7 @@ struct inode_operations
-----------------------
This describes how the VFS can manipulate an inode in your
-filesystem. As of kernel 2.6.13, the following members are defined:
+filesystem. As of kernel 2.6.22, the following members are defined:
struct inode_operations {
int (*create) (struct inode *,struct dentry *,int, struct nameidata *);
@@ -348,6 +347,7 @@ struct inode_operations {
ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t);
ssize_t (*listxattr) (struct dentry *, char *, size_t);
int (*removexattr) (struct dentry *, const char *);
+ void (*truncate_range)(struct inode *, loff_t, loff_t);
};
Again, all methods are called without any locks being held, unless
@@ -444,6 +444,9 @@ otherwise noted.
removexattr: called by the VFS to remove an extended attribute from
a file. This method is called by removexattr(2) system call.
+ truncate_range: a method provided by the underlying filesystem to truncate a
+ range of blocks , i.e. punch a hole somewhere in a file.
+
The Address Space Object
========================
@@ -522,7 +525,7 @@ struct address_space_operations
-------------------------------
This describes how the VFS can manipulate mapping of a file to page cache in
-your filesystem. As of kernel 2.6.16, the following members are defined:
+your filesystem. As of kernel 2.6.22, the following members are defined:
struct address_space_operations {
int (*writepage)(struct page *page, struct writeback_control *wbc);
@@ -543,6 +546,7 @@ struct address_space_operations {
int);
/* migrate the contents of a page to the specified target */
int (*migratepage) (struct page *, struct page *);
+ int (*launder_page) (struct page *);
};
writepage: called by the VM to write a dirty page to backing store.
@@ -689,6 +693,10 @@ struct address_space_operations {
transfer any private data across and update any references
that it has to the page.
+ launder_page: Called before freeing a page - it writes back the dirty page. To
+ prevent redirtying the page, it is kept locked during the whole
+ operation.
+
The File Object
===============
@@ -699,9 +707,10 @@ struct file_operations
----------------------
This describes how the VFS can manipulate an open file. As of kernel
-2.6.17, the following members are defined:
+2.6.22, the following members are defined:
struct file_operations {
+ struct module *owner;
loff_t (*llseek) (struct file *, loff_t, int);
ssize_t (*read) (struct file *, char __user *, size_t, loff_t *);
ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
@@ -728,10 +737,8 @@ struct file_operations {
int (*check_flags)(int);
int (*dir_notify)(struct file *filp, unsigned long arg);
int (*flock) (struct file *, int, struct file_lock *);
- ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, size_t, unsigned
-int);
- ssize_t (*splice_read)(struct file *, struct pipe_inode_info *, size_t, unsigned
-int);
+ ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, size_t, unsigned int);
+ ssize_t (*splice_read)(struct file *, struct pipe_inode_info *, size_t, unsigned int);
};
Again, all methods are called without any locks being held, unless