summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/Makefile2
-rw-r--r--fs/cifs/cifsproto.h7
-rw-r--r--fs/cifs/cifssmb.c6
-rw-r--r--fs/cifs/dir.c11
-rw-r--r--fs/cifs/inode.c6
-rw-r--r--fs/cifs/link.c26
-rw-r--r--fs/dcache.c7
-rw-r--r--fs/eventpoll.c4
-rw-r--r--fs/ext2/super.c1
-rw-r--r--fs/ext4/ext4.h10
-rw-r--r--fs/ext4/ext4_jbd2.c9
-rw-r--r--fs/ext4/extents.c45
-rw-r--r--fs/ext4/inode.c12
-rw-r--r--fs/ext4/mballoc.c17
-rw-r--r--fs/ext4/super.c21
-rw-r--r--fs/fs-writeback.c15
-rw-r--r--fs/gfs2/aops.c30
-rw-r--r--fs/gfs2/glock.c2
-rw-r--r--fs/gfs2/glops.c10
-rw-r--r--fs/gfs2/log.c4
-rw-r--r--fs/gfs2/meta_io.c5
-rw-r--r--fs/gfs2/ops_fstype.c12
-rw-r--r--fs/jbd2/journal.c18
-rw-r--r--fs/jbd2/recovery.c2
-rw-r--r--fs/jbd2/transaction.c16
-rw-r--r--fs/kernfs/Makefile5
-rw-r--r--fs/kernfs/dir.c1073
-rw-r--r--fs/kernfs/file.c867
-rw-r--r--fs/kernfs/inode.c377
-rw-r--r--fs/kernfs/kernfs-internal.h122
-rw-r--r--fs/kernfs/mount.c165
-rw-r--r--fs/kernfs/symlink.c151
-rw-r--r--fs/namespace.c4
-rw-r--r--fs/nilfs2/segment.c10
-rw-r--r--fs/sysfs/Makefile2
-rw-r--r--fs/sysfs/dir.c1075
-rw-r--r--fs/sysfs/file.c961
-rw-r--r--fs/sysfs/group.c102
-rw-r--r--fs/sysfs/inode.c331
-rw-r--r--fs/sysfs/mount.c184
-rw-r--r--fs/sysfs/symlink.c219
-rw-r--r--fs/sysfs/sysfs.h236
-rw-r--r--fs/xfs/xfs_attr_remote.c2
-rw-r--r--fs/xfs/xfs_bmap_util.c1
44 files changed, 3318 insertions, 2867 deletions
diff --git a/fs/Makefile b/fs/Makefile
index 4fe6df3..39a824f 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -53,7 +53,7 @@ obj-$(CONFIG_FHANDLE) += fhandle.o
obj-y += quota/
obj-$(CONFIG_PROC_FS) += proc/
-obj-$(CONFIG_SYSFS) += sysfs/
+obj-$(CONFIG_SYSFS) += sysfs/ kernfs/
obj-$(CONFIG_CONFIGFS_FS) += configfs/
obj-y += devpts/
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index aa33976..2c29db6 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -477,9 +477,10 @@ extern int CIFSGetExtAttr(const unsigned int xid, struct cifs_tcon *tcon,
const int netfid, __u64 *pExtAttrBits, __u64 *pMask);
extern void cifs_autodisable_serverino(struct cifs_sb_info *cifs_sb);
extern bool CIFSCouldBeMFSymlink(const struct cifs_fattr *fattr);
-extern int CIFSCheckMFSymlink(struct cifs_fattr *fattr,
- const unsigned char *path,
- struct cifs_sb_info *cifs_sb, unsigned int xid);
+extern int CIFSCheckMFSymlink(unsigned int xid, struct cifs_tcon *tcon,
+ struct cifs_sb_info *cifs_sb,
+ struct cifs_fattr *fattr,
+ const unsigned char *path);
extern int mdfour(unsigned char *, unsigned char *, int);
extern int E_md4hash(const unsigned char *passwd, unsigned char *p16,
const struct nls_table *codepage);
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 124aa02..d707edb 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -4010,7 +4010,7 @@ QFileInfoRetry:
rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
(struct smb_hdr *) pSMBr, &bytes_returned, 0);
if (rc) {
- cifs_dbg(FYI, "Send error in QPathInfo = %d\n", rc);
+ cifs_dbg(FYI, "Send error in QFileInfo = %d", rc);
} else { /* decode response */
rc = validate_t2((struct smb_t2_rsp *)pSMBr);
@@ -4179,7 +4179,7 @@ UnixQFileInfoRetry:
rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
(struct smb_hdr *) pSMBr, &bytes_returned, 0);
if (rc) {
- cifs_dbg(FYI, "Send error in QPathInfo = %d\n", rc);
+ cifs_dbg(FYI, "Send error in UnixQFileInfo = %d", rc);
} else { /* decode response */
rc = validate_t2((struct smb_t2_rsp *)pSMBr);
@@ -4263,7 +4263,7 @@ UnixQPathInfoRetry:
rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
(struct smb_hdr *) pSMBr, &bytes_returned, 0);
if (rc) {
- cifs_dbg(FYI, "Send error in QPathInfo = %d\n", rc);
+ cifs_dbg(FYI, "Send error in UnixQPathInfo = %d", rc);
} else { /* decode response */
rc = validate_t2((struct smb_t2_rsp *)pSMBr);
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 11ff5f1..a514e0a 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -193,7 +193,7 @@ check_name(struct dentry *direntry)
static int
cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned int xid,
struct tcon_link *tlink, unsigned oflags, umode_t mode,
- __u32 *oplock, struct cifs_fid *fid, int *created)
+ __u32 *oplock, struct cifs_fid *fid)
{
int rc = -ENOENT;
int create_options = CREATE_NOT_DIR;
@@ -349,7 +349,6 @@ cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned int xid,
.device = 0,
};
- *created |= FILE_CREATED;
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) {
args.uid = current_fsuid();
if (inode->i_mode & S_ISGID)
@@ -480,13 +479,16 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry,
cifs_add_pending_open(&fid, tlink, &open);
rc = cifs_do_create(inode, direntry, xid, tlink, oflags, mode,
- &oplock, &fid, opened);
+ &oplock, &fid);
if (rc) {
cifs_del_pending_open(&open);
goto out;
}
+ if ((oflags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
+ *opened |= FILE_CREATED;
+
rc = finish_open(file, direntry, generic_file_open, opened);
if (rc) {
if (server->ops->close)
@@ -529,7 +531,6 @@ int cifs_create(struct inode *inode, struct dentry *direntry, umode_t mode,
struct TCP_Server_Info *server;
struct cifs_fid fid;
__u32 oplock;
- int created = FILE_CREATED;
cifs_dbg(FYI, "cifs_create parent inode = 0x%p name is: %s and dentry = 0x%p\n",
inode, direntry->d_name.name, direntry);
@@ -546,7 +547,7 @@ int cifs_create(struct inode *inode, struct dentry *direntry, umode_t mode,
server->ops->new_lease_key(&fid);
rc = cifs_do_create(inode, direntry, xid, tlink, oflags, mode,
- &oplock, &fid, &created);
+ &oplock, &fid);
if (!rc && server->ops->close)
server->ops->close(xid, tcon, &fid);
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 36f9ebb..49719b8 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -383,7 +383,8 @@ int cifs_get_inode_info_unix(struct inode **pinode,
/* check for Minshall+French symlinks */
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MF_SYMLINKS) {
- int tmprc = CIFSCheckMFSymlink(&fattr, full_path, cifs_sb, xid);
+ int tmprc = CIFSCheckMFSymlink(xid, tcon, cifs_sb, &fattr,
+ full_path);
if (tmprc)
cifs_dbg(FYI, "CIFSCheckMFSymlink: %d\n", tmprc);
}
@@ -799,7 +800,8 @@ cifs_get_inode_info(struct inode **inode, const char *full_path,
/* check for Minshall+French symlinks */
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MF_SYMLINKS) {
- tmprc = CIFSCheckMFSymlink(&fattr, full_path, cifs_sb, xid);
+ tmprc = CIFSCheckMFSymlink(xid, tcon, cifs_sb, &fattr,
+ full_path);
if (tmprc)
cifs_dbg(FYI, "CIFSCheckMFSymlink: %d\n", tmprc);
}
diff --git a/fs/cifs/link.c b/fs/cifs/link.c
index cc023471..92aee08 100644
--- a/fs/cifs/link.c
+++ b/fs/cifs/link.c
@@ -354,34 +354,30 @@ open_query_close_cifs_symlink(const unsigned char *path, char *pbuf,
int
-CIFSCheckMFSymlink(struct cifs_fattr *fattr,
- const unsigned char *path,
- struct cifs_sb_info *cifs_sb, unsigned int xid)
+CIFSCheckMFSymlink(unsigned int xid, struct cifs_tcon *tcon,
+ struct cifs_sb_info *cifs_sb, struct cifs_fattr *fattr,
+ const unsigned char *path)
{
- int rc = 0;
+ int rc;
u8 *buf = NULL;
unsigned int link_len = 0;
unsigned int bytes_read = 0;
- struct cifs_tcon *ptcon;
if (!CIFSCouldBeMFSymlink(fattr))
/* it's not a symlink */
return 0;
buf = kmalloc(CIFS_MF_SYMLINK_FILE_SIZE, GFP_KERNEL);
- if (!buf) {
- rc = -ENOMEM;
- goto out;
- }
+ if (!buf)
+ return -ENOMEM;
- ptcon = tlink_tcon(cifs_sb_tlink(cifs_sb));
- if ((ptcon->ses) && (ptcon->ses->server->ops->query_mf_symlink))
- rc = ptcon->ses->server->ops->query_mf_symlink(path, buf,
- &bytes_read, cifs_sb, xid);
+ if (tcon->ses->server->ops->query_mf_symlink)
+ rc = tcon->ses->server->ops->query_mf_symlink(path, buf,
+ &bytes_read, cifs_sb, xid);
else
- goto out;
+ rc = -ENOSYS;
- if (rc != 0)
+ if (rc)
goto out;
if (bytes_read == 0) /* not a symlink */
diff --git a/fs/dcache.c b/fs/dcache.c
index 6055d61..cb4a106 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -3061,8 +3061,13 @@ char *d_path(const struct path *path, char *buf, int buflen)
* thus don't need to be hashed. They also don't need a name until a
* user wants to identify the object in /proc/pid/fd/. The little hack
* below allows us to generate a name for these objects on demand:
+ *
+ * Some pseudo inodes are mountable. When they are mounted
+ * path->dentry == path->mnt->mnt_root. In that case don't call d_dname
+ * and instead have d_path return the mounted path.
*/
- if (path->dentry->d_op && path->dentry->d_op->d_dname)
+ if (path->dentry->d_op && path->dentry->d_op->d_dname &&
+ (!IS_ROOT(path->dentry) || path->dentry != path->mnt->mnt_root))
return path->dentry->d_op->d_dname(path->dentry, buf, buflen);
rcu_read_lock();
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 8b5e258..af90312 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1907,10 +1907,6 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
}
}
}
- if (op == EPOLL_CTL_DEL && is_file_epoll(tf.file)) {
- tep = tf.file->private_data;
- mutex_lock_nested(&tep->mtx, 1);
- }
/*
* Try to lookup the file inside our RB tree, Since we grabbed "mtx"
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 2885349..20d6697 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -1493,6 +1493,7 @@ static ssize_t ext2_quota_write(struct super_block *sb, int type,
sb->s_blocksize - offset : towrite;
tmp_bh.b_state = 0;
+ tmp_bh.b_size = sb->s_blocksize;
err = ext2_get_block(inode, blk, &tmp_bh, 1);
if (err < 0)
goto out;
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index e618503..ece5556 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -268,6 +268,16 @@ struct ext4_io_submit {
/* Translate # of blks to # of clusters */
#define EXT4_NUM_B2C(sbi, blks) (((blks) + (sbi)->s_cluster_ratio - 1) >> \
(sbi)->s_cluster_bits)
+/* Mask out the low bits to get the starting block of the cluster */
+#define EXT4_PBLK_CMASK(s, pblk) ((pblk) & \
+ ~((ext4_fsblk_t) (s)->s_cluster_ratio - 1))
+#define EXT4_LBLK_CMASK(s, lblk) ((lblk) & \
+ ~((ext4_lblk_t) (s)->s_cluster_ratio - 1))
+/* Get the cluster offset */
+#define EXT4_PBLK_COFF(s, pblk) ((pblk) & \
+ ((ext4_fsblk_t) (s)->s_cluster_ratio - 1))
+#define EXT4_LBLK_COFF(s, lblk) ((lblk) & \
+ ((ext4_lblk_t) (s)->s_cluster_ratio - 1))
/*
* Structure of a blocks group descriptor
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c
index 17ac112..3fe29de 100644
--- a/fs/ext4/ext4_jbd2.c
+++ b/fs/ext4/ext4_jbd2.c
@@ -259,6 +259,15 @@ int __ext4_handle_dirty_metadata(const char *where, unsigned int line,
if (WARN_ON_ONCE(err)) {
ext4_journal_abort_handle(where, line, __func__, bh,
handle, err);
+ ext4_error_inode(inode, where, line,
+ bh->b_blocknr,
+ "journal_dirty_metadata failed: "
+ "handle type %u started at line %u, "
+ "credits %u/%u, errcode %d",
+ handle->h_type,
+ handle->h_line_no,
+ handle->h_requested_credits,
+ handle->h_buffer_credits, err);
}
} else {
if (inode)
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 35f65cf..3384dc4 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -360,8 +360,10 @@ static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext)
{
ext4_fsblk_t block = ext4_ext_pblock(ext);
int len = ext4_ext_get_actual_len(ext);
+ ext4_lblk_t lblock = le32_to_cpu(ext->ee_block);
+ ext4_lblk_t last = lblock + len - 1;
- if (len == 0)
+ if (lblock > last)
return 0;
return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len);
}
@@ -387,11 +389,26 @@ static int ext4_valid_extent_entries(struct inode *inode,
if (depth == 0) {
/* leaf entries */
struct ext4_extent *ext = EXT_FIRST_EXTENT(eh);
+ struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
+ ext4_fsblk_t pblock = 0;
+ ext4_lblk_t lblock = 0;
+ ext4_lblk_t prev = 0;
+ int len = 0;
while (entries) {
if (!ext4_valid_extent(inode, ext))
return 0;
+
+ /* Check for overlapping extents */
+ lblock = le32_to_cpu(ext->ee_block);
+ len = ext4_ext_get_actual_len(ext);
+ if ((lblock <= prev) && prev) {
+ pblock = ext4_ext_pblock(ext);
+ es->s_last_error_block = cpu_to_le64(pblock);
+ return 0;
+ }
ext++;
entries--;
+ prev = lblock + len - 1;
}
} else {
struct ext4_extent_idx *ext_idx = EXT_FIRST_INDEX(eh);
@@ -1834,8 +1851,7 @@ static unsigned int ext4_ext_check_overlap(struct ext4_sb_info *sbi,
depth = ext_depth(inode);
if (!path[depth].p_ext)
goto out;
- b2 = le32_to_cpu(path[depth].p_ext->ee_block);
- b2 &= ~(sbi->s_cluster_ratio - 1);
+ b2 = EXT4_LBLK_CMASK(sbi, le32_to_cpu(path[depth].p_ext->ee_block));
/*
* get the next allocated block if the extent in the path
@@ -1845,7 +1861,7 @@ static unsigned int ext4_ext_check_overlap(struct ext4_sb_info *sbi,
b2 = ext4_ext_next_allocated_block(path);
if (b2 == EXT_MAX_BLOCKS)
goto out;
- b2 &= ~(sbi->s_cluster_ratio - 1);
+ b2 = EXT4_LBLK_CMASK(sbi, b2);
}
/* check for wrap through zero on extent logical start block*/
@@ -2504,7 +2520,7 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
* extent, we have to mark the cluster as used (store negative
* cluster number in partial_cluster).
*/
- unaligned = pblk & (sbi->s_cluster_ratio - 1);
+ unaligned = EXT4_PBLK_COFF(sbi, pblk);
if (unaligned && (ee_len == num) &&
(*partial_cluster != -((long long)EXT4_B2C(sbi, pblk))))
*partial_cluster = EXT4_B2C(sbi, pblk);
@@ -2598,7 +2614,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
* accidentally freeing it later on
*/
pblk = ext4_ext_pblock(ex);
- if (pblk & (sbi->s_cluster_ratio - 1))
+ if (EXT4_PBLK_COFF(sbi, pblk))
*partial_cluster =
-((long long)EXT4_B2C(sbi, pblk));
ex--;
@@ -3753,7 +3769,7 @@ int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk)
{
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
ext4_lblk_t lblk_start, lblk_end;
- lblk_start = lblk & (~(sbi->s_cluster_ratio - 1));
+ lblk_start = EXT4_LBLK_CMASK(sbi, lblk);
lblk_end = lblk_start + sbi->s_cluster_ratio - 1;
return ext4_find_delalloc_range(inode, lblk_start, lblk_end);
@@ -3812,9 +3828,9 @@ get_reserved_cluster_alloc(struct inode *inode, ext4_lblk_t lblk_start,
trace_ext4_get_reserved_cluster_alloc(inode, lblk_start, num_blks);
/* Check towards left side */
- c_offset = lblk_start & (sbi->s_cluster_ratio - 1);
+ c_offset = EXT4_LBLK_COFF(sbi, lblk_start);
if (c_offset) {
- lblk_from = lblk_start & (~(sbi->s_cluster_ratio - 1));
+ lblk_from = EXT4_LBLK_CMASK(sbi, lblk_start);
lblk_to = lblk_from + c_offset - 1;
if (ext4_find_delalloc_range(inode, lblk_from, lblk_to))
@@ -3822,7 +3838,7 @@ get_reserved_cluster_alloc(struct inode *inode, ext4_lblk_t lblk_start,
}
/* Now check towards right. */
- c_offset = (lblk_start + num_blks) & (sbi->s_cluster_ratio - 1);
+ c_offset = EXT4_LBLK_COFF(sbi, lblk_start + num_blks);
if (allocated_clusters && c_offset) {
lblk_from = lblk_start + num_blks;
lblk_to = lblk_from + (sbi->s_cluster_ratio - c_offset) - 1;
@@ -4030,7 +4046,7 @@ static int get_implied_cluster_alloc(struct super_block *sb,
struct ext4_ext_path *path)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
- ext4_lblk_t c_offset = map->m_lblk & (sbi->s_cluster_ratio-1);
+ ext4_lblk_t c_offset = EXT4_LBLK_COFF(sbi, map->m_lblk);
ext4_lblk_t ex_cluster_start, ex_cluster_end;
ext4_lblk_t rr_cluster_start;
ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block);
@@ -4048,8 +4064,7 @@ static int get_implied_cluster_alloc(struct super_block *sb,
(rr_cluster_start == ex_cluster_start)) {
if (rr_cluster_start == ex_cluster_end)
ee_start += ee_len - 1;
- map->m_pblk = (ee_start & ~(sbi->s_cluster_ratio - 1)) +
- c_offset;
+ map->m_pblk = EXT4_PBLK_CMASK(sbi, ee_start) + c_offset;
map->m_len = min(map->m_len,
(unsigned) sbi->s_cluster_ratio - c_offset);
/*
@@ -4203,7 +4218,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
*/
map->m_flags &= ~EXT4_MAP_FROM_CLUSTER;
newex.ee_block = cpu_to_le32(map->m_lblk);
- cluster_offset = map->m_lblk & (sbi->s_cluster_ratio-1);
+ cluster_offset = EXT4_LBLK_COFF(sbi, map->m_lblk);
/*
* If we are doing bigalloc, check to see if the extent returned
@@ -4271,7 +4286,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
* needed so that future calls to get_implied_cluster_alloc()
* work correctly.
*/
- offset = map->m_lblk & (sbi->s_cluster_ratio - 1);
+ offset = EXT4_LBLK_COFF(sbi, map->m_lblk);
ar.len = EXT4_NUM_B2C(sbi, offset+allocated);
ar.goal -= offset;
ar.logical -= offset;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 0757634..61d49ff 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1206,7 +1206,6 @@ static int ext4_journalled_write_end(struct file *file,
*/
static int ext4_da_reserve_metadata(struct inode *inode, ext4_lblk_t lblock)
{
- int retries = 0;
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
struct ext4_inode_info *ei = EXT4_I(inode);
unsigned int md_needed;
@@ -1218,7 +1217,6 @@ static int ext4_da_reserve_metadata(struct inode *inode, ext4_lblk_t lblock)
* in order to allocate nrblocks
* worse case is one extent per block
*/
-repeat:
spin_lock(&ei->i_block_reservation_lock);
/*
* ext4_calc_metadata_amount() has side effects, which we have
@@ -1238,10 +1236,6 @@ repeat:
ei->i_da_metadata_calc_len = save_len;
ei->i_da_metadata_calc_last_lblock = save_last_lblock;
spin_unlock(&ei->i_block_reservation_lock);
- if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
- cond_resched();
- goto repeat;
- }
return -ENOSPC;
}
ei->i_reserved_meta_blocks += md_needed;
@@ -1255,7 +1249,6 @@ repeat:
*/
static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock)
{
- int retries = 0;
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
struct ext4_inode_info *ei = EXT4_I(inode);
unsigned int md_needed;
@@ -1277,7 +1270,6 @@ static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock)
* in order to allocate nrblocks
* worse case is one extent per block
*/
-repeat:
spin_lock(&ei->i_block_reservation_lock);
/*
* ext4_calc_metadata_amount() has side effects, which we have
@@ -1297,10 +1289,6 @@ repeat:
ei->i_da_metadata_calc_len = save_len;
ei->i_da_metadata_calc_last_lblock = save_last_lblock;
spin_unlock(&ei->i_block_reservation_lock);
- if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
- cond_resched();
- goto repeat;
- }
dquot_release_reservation_block(inode, EXT4_C2B(sbi, 1));
return -ENOSPC;
}
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 4d113ef..04a5c75 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -3442,6 +3442,9 @@ static void ext4_mb_pa_callback(struct rcu_head *head)
{
struct ext4_prealloc_space *pa;
pa = container_of(head, struct ext4_prealloc_space, u.pa_rcu);
+
+ BUG_ON(atomic_read(&pa->pa_count));
+ BUG_ON(pa->pa_deleted == 0);
kmem_cache_free(ext4_pspace_cachep, pa);
}
@@ -3455,11 +3458,13 @@ static void ext4_mb_put_pa(struct ext4_allocation_context *ac,
ext4_group_t grp;
ext4_fsblk_t grp_blk;
- if (!atomic_dec_and_test(&pa->pa_count) || pa->pa_free != 0)
- return;
-
/* in this short window concurrent discard can set pa_deleted */
spin_lock(&pa->pa_lock);
+ if (!atomic_dec_and_test(&pa->pa_count) || pa->pa_free != 0) {
+ spin_unlock(&pa->pa_lock);
+ return;
+ }
+
if (pa->pa_deleted == 1) {
spin_unlock(&pa->pa_lock);
return;
@@ -4121,7 +4126,7 @@ ext4_mb_initialize_context(struct ext4_allocation_context *ac,
ext4_get_group_no_and_offset(sb, goal, &group, &block);
/* set up allocation goals */
- ac->ac_b_ex.fe_logical = ar->logical & ~(sbi->s_cluster_ratio - 1);
+ ac->ac_b_ex.fe_logical = EXT4_LBLK_CMASK(sbi, ar->logical);
ac->ac_status = AC_STATUS_CONTINUE;
ac->ac_sb = sb;
ac->ac_inode = ar->inode;
@@ -4663,7 +4668,7 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
* blocks at the beginning or the end unless we are explicitly
* requested to avoid doing so.
*/
- overflow = block & (sbi->s_cluster_ratio - 1);
+ overflow = EXT4_PBLK_COFF(sbi, block);
if (overflow) {
if (flags & EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER) {
overflow = sbi->s_cluster_ratio - overflow;
@@ -4677,7 +4682,7 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
count += overflow;
}
}
- overflow = count & (sbi->s_cluster_ratio - 1);
+ overflow = EXT4_LBLK_COFF(sbi, count);
if (overflow) {
if (flags & EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER) {
if (count > overflow)
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index c977f4e..1f7784d 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -792,7 +792,7 @@ static void ext4_put_super(struct super_block *sb)
}
ext4_es_unregister_shrinker(sbi);
- del_timer(&sbi->s_err_report);
+ del_timer_sync(&sbi->s_err_report);
ext4_release_system_zone(sb);
ext4_mb_release(sb);
ext4_ext_release(sb);
@@ -3316,11 +3316,19 @@ int ext4_calculate_overhead(struct super_block *sb)
}
-static ext4_fsblk_t ext4_calculate_resv_clusters(struct ext4_sb_info *sbi)
+static ext4_fsblk_t ext4_calculate_resv_clusters(struct super_block *sb)
{
ext4_fsblk_t resv_clusters;
/*
+ * There's no need to reserve anything when we aren't using extents.
+ * The space estimates are exact, there are no unwritten extents,
+ * hole punching doesn't need new metadata... This is needed especially
+ * to keep ext2/3 backward compatibility.
+ */
+ if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS))
+ return 0;
+ /*
* By default we reserve 2% or 4096 clusters, whichever is smaller.
* This should cover the situations where we can not afford to run
* out of space like for example punch hole, or converting
@@ -3328,7 +3336,8 @@ static ext4_fsblk_t ext4_calculate_resv_clusters(struct ext4_sb_info *sbi)
* allocation would require 1, or 2 blocks, higher numbers are
* very rare.
*/
- resv_clusters = ext4_blocks_count(sbi->s_es) >> sbi->s_cluster_bits;
+ resv_clusters = ext4_blocks_count(EXT4_SB(sb)->s_es) >>
+ EXT4_SB(sb)->s_cluster_bits;
do_div(resv_clusters, 50);
resv_clusters = min_t(ext4_fsblk_t, resv_clusters, 4096);
@@ -4071,10 +4080,10 @@ no_journal:
"available");
}
- err = ext4_reserve_clusters(sbi, ext4_calculate_resv_clusters(sbi));
+ err = ext4_reserve_clusters(sbi, ext4_calculate_resv_clusters(sb));
if (err) {
ext4_msg(sb, KERN_ERR, "failed to reserve %llu clusters for "
- "reserved pool", ext4_calculate_resv_clusters(sbi));
+ "reserved pool", ext4_calculate_resv_clusters(sb));
goto failed_mount4a;
}
@@ -4184,7 +4193,7 @@ failed_mount_wq:
}
failed_mount3:
ext4_es_unregister_shrinker(sbi);
- del_timer(&sbi->s_err_report);
+ del_timer_sync(&sbi->s_err_report);
if (sbi->s_flex_groups)
ext4_kvfree(sbi->s_flex_groups);
percpu_counter_destroy(&sbi->s_freeclusters_counter);
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 1f4a10e..e0259a1 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -516,13 +516,16 @@ writeback_single_inode(struct inode *inode, struct bdi_writeback *wb,
}
WARN_ON(inode->i_state & I_SYNC);
/*
- * Skip inode if it is clean. We don't want to mess with writeback
- * lists in this function since flusher thread may be doing for example
- * sync in parallel and if we move the inode, it could get skipped. So
- * here we make sure inode is on some writeback list and leave it there
- * unless we have completely cleaned the inode.
+ * Skip inode if it is clean and we have no outstanding writeback in
+ * WB_SYNC_ALL mode. We don't want to mess with writeback lists in this
+ * function since flusher thread may be doing for example sync in
+ * parallel and if we move the inode, it could get skipped. So here we
+ * make sure inode is on some writeback list and leave it there unless
+ * we have completely cleaned the inode.
*/
- if (!(inode->i_state & I_DIRTY))
+ if (!(inode->i_state & I_DIRTY) &&
+ (wbc->sync_mode != WB_SYNC_ALL ||
+ !mapping_tagged(inode->i_mapping, PAGECACHE_TAG_WRITEBACK)))
goto out;
inode->i_state |= I_SYNC;
spin_unlock(&inode->i_lock);
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index b7fc035..73f3e4e 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -986,6 +986,7 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb,
{
struct file *file = iocb->ki_filp;
struct inode *inode = file->f_mapping->host;
+ struct address_space *mapping = inode->i_mapping;
struct gfs2_inode *ip = GFS2_I(inode);
struct gfs2_holder gh;
int rv;
@@ -1006,6 +1007,35 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb,
if (rv != 1)
goto out; /* dio not valid, fall back to buffered i/o */
+ /*
+ * Now since we are holding a deferred (CW) lock at this point, you
+ * might be wondering why this is ever needed. There is a case however
+ * where we've granted a deferred local lock against a cached exclusive
+ * glock. That is ok provided all granted local locks are deferred, but
+ * it also means that it is possible to encounter pages which are
+ * cached and possibly also mapped. So here we check for that and sort
+ * them out ahead of the dio. The glock state machine will take care of
+ * everything else.
+ *
+ * If in fact the cached glock state (gl->gl_state) is deferred (CW) in
+ * the first place, mapping->nr_pages will always be zero.
+ */
+ if (mapping->nrpages) {
+ loff_t lstart = offset & (PAGE_CACHE_SIZE - 1);
+ loff_t len = iov_length(iov, nr_segs);
+ loff_t end = PAGE_ALIGN(offset + len) - 1;
+
+ rv = 0;
+ if (len == 0)
+ goto out;
+ if (test_and_clear_bit(GIF_SW_PAGED, &ip->i_flags))
+ unmap_shared_mapping_range(ip->i_inode.i_mapping, offset, len);
+ rv = filemap_write_and_wait_range(mapping, lstart, end);
+ if (rv)
+ return rv;
+ truncate_inode_pages_range(mapping, lstart, end);
+ }
+
rv = __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
offset, nr_segs, gfs2_get_block_direct,
NULL, NULL, 0);
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index c8420f7..6f7a47c 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -1655,6 +1655,7 @@ static int dump_holder(struct seq_file *seq, const struct gfs2_holder *gh)
struct task_struct *gh_owner = NULL;
char flags_buf[32];
+ rcu_read_lock();
if (gh->gh_owner_pid)
gh_owner = pid_task(gh->gh_owner_pid, PIDTYPE_PID);
gfs2_print_dbg(seq, " H: s:%s f:%s e:%d p:%ld [%s] %pS\n",
@@ -1664,6 +1665,7 @@ static int dump_holder(struct seq_file *seq, const struct gfs2_holder *gh)
gh->gh_owner_pid ? (long)pid_nr(gh->gh_owner_pid) : -1,
gh_owner ? gh_owner->comm : "(ended)",
(void *)gh->gh_ip);
+ rcu_read_unlock();
return 0;
}
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index db908f6..f88dcd9 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -192,8 +192,11 @@ static void inode_go_sync(struct gfs2_glock *gl)
if (ip && !S_ISREG(ip->i_inode.i_mode))
ip = NULL;
- if (ip && test_and_clear_bit(GIF_SW_PAGED, &ip->i_flags))
- unmap_shared_mapping_range(ip->i_inode.i_mapping, 0, 0);
+ if (ip) {
+ if (test_and_clear_bit(GIF_SW_PAGED, &ip->i_flags))
+ unmap_shared_mapping_range(ip->i_inode.i_mapping, 0, 0);
+ inode_dio_wait(&ip->i_inode);
+ }
if (!test_and_clear_bit(GLF_DIRTY, &gl->gl_flags))
return;
@@ -410,6 +413,9 @@ static int inode_go_lock(struct gfs2_holder *gh)
return error;
}
+ if (gh->gh_state != LM_ST_DEFERRED)
+ inode_dio_wait(&ip->i_inode);
+
if ((ip->i_diskflags & GFS2_DIF_TRUNC_IN_PROG) &&
(gl->gl_state == LM_ST_EXCLUSIVE) &&
(gh->gh_state == LM_ST_EXCLUSIVE)) {
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 610613f..9dcb977 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -551,10 +551,10 @@ void gfs2_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
struct buffer_head *bh = bd->bd_bh;
struct gfs2_glock *gl = bd->bd_gl;
- gfs2_remove_from_ail(bd);
- bd->bd_bh = NULL;
bh->b_private = NULL;
bd->bd_blkno = bh->b_blocknr;
+ gfs2_remove_from_ail(bd); /* drops ref on bh */
+ bd->bd_bh = NULL;
bd->bd_ops = &gfs2_revoke_lops;
sdp->sd_log_num_revoke++;
atomic_inc(&gl->gl_revokes);
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 9324150..52f177b 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -258,6 +258,7 @@ void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr, int
struct address_space *mapping = bh->b_page->mapping;
struct gfs2_sbd *sdp = gfs2_mapping2sbd(mapping);
struct gfs2_bufdata *bd = bh->b_private;
+ int was_pinned = 0;
if (test_clear_buffer_pinned(bh)) {
trace_gfs2_pin(bd, 0);
@@ -273,12 +274,16 @@ void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr, int
tr->tr_num_databuf_rm++;
}
tr->tr_touched = 1;
+ was_pinned = 1;
brelse(bh);
}
if (bd) {
spin_lock(&sdp->sd_ail_lock);
if (bd->bd_tr) {
gfs2_trans_add_revoke(sdp, bd);
+ } else if (was_pinned) {
+ bh->b_private = NULL;
+ kmem_cache_free(gfs2_bufdata_cachep, bd);
}
spin_unlock(&sdp->sd_ail_lock);
}
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 82303b4..52fa883 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -1366,8 +1366,18 @@ static struct dentry *gfs2_mount(struct file_system_type *fs_type, int flags,
if (IS_ERR(s))
goto error_bdev;
- if (s->s_root)
+ if (s->s_root) {
+ /*
+ * s_umount nests inside bd_mutex during
+ * __invalidate_device(). blkdev_put() acquires
+ * bd_mutex and can't be called under s_umount. Drop
+ * s_umount temporarily. This is safe as we're
+ * holding an active reference.
+ */
+ up_write(&s->s_umount);
blkdev_put(bdev, mode);
+ down_write(&s->s_umount);
+ }
memset(&args, 0, sizeof(args));
args.ar_quota = GFS2_QUOTA_DEFAULT;
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 5203264..5fa344a 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -702,7 +702,7 @@ int jbd2_log_wait_commit(journal_t *journal, tid_t tid)
read_lock(&journal->j_state_lock);
#ifdef CONFIG_JBD2_DEBUG
if (!tid_geq(journal->j_commit_request, tid)) {
- printk(KERN_EMERG
+ printk(KERN_ERR
"%s: error: j_commit_request=%d, tid=%d\n",
__func__, journal->j_commit_request, tid);
}
@@ -718,10 +718,8 @@ int jbd2_log_wait_commit(journal_t *journal, tid_t tid)
}
read_unlock(&journal->j_state_lock);
- if (unlikely(is_journal_aborted(journal))) {
- printk(KERN_EMERG "journal commit I/O error\n");
+ if (unlikely(is_journal_aborted(journal)))
err = -EIO;
- }
return err;
}
@@ -1527,13 +1525,13 @@ static int journal_get_superblock(journal_t *journal)
if (JBD2_HAS_COMPAT_FEATURE(journal, JBD2_FEATURE_COMPAT_CHECKSUM) &&
JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) {
/* Can't have checksum v1 and v2 on at the same time! */
- printk(KERN_ERR "JBD: Can't enable checksumming v1 and v2 "
+ printk(KERN_ERR "JBD2: Can't enable checksumming v1 and v2 "
"at the same time!\n");
goto out;
}
if (!jbd2_verify_csum_type(journal, sb)) {
- printk(KERN_ERR "JBD: Unknown checksum type\n");
+ printk(KERN_ERR "JBD2: Unknown checksum type\n");
goto out;
}
@@ -1541,7 +1539,7 @@ static int journal_get_superblock(journal_t *journal)
if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) {
journal->j_chksum_driver = crypto_alloc_shash("crc32c", 0, 0);
if (IS_ERR(journal->j_chksum_driver)) {
- printk(KERN_ERR "JBD: Cannot load crc32c driver.\n");
+ printk(KERN_ERR "JBD2: Cannot load crc32c driver.\n");
err = PTR_ERR(journal->j_chksum_driver);
journal->j_chksum_driver = NULL;
goto out;
@@ -1550,7 +1548,7 @@ static int journal_get_superblock(journal_t *journal)
/* Check superblock checksum */
if (!jbd2_superblock_csum_verify(journal, sb)) {
- printk(KERN_ERR "JBD: journal checksum error\n");
+ printk(KERN_ERR "JBD2: journal checksum error\n");
goto out;
}
@@ -1836,7 +1834,7 @@ int jbd2_journal_set_features (journal_t *journal, unsigned long compat,
journal->j_chksum_driver = crypto_alloc_shash("crc32c",
0, 0);
if (IS_ERR(journal->j_chksum_driver)) {
- printk(KERN_ERR "JBD: Cannot load crc32c "
+ printk(KERN_ERR "JBD2: Cannot load crc32c "
"driver.\n");
journal->j_chksum_driver = NULL;
return 0;
@@ -2645,7 +2643,7 @@ static void __exit journal_exit(void)
#ifdef CONFIG_JBD2_DEBUG
int n = atomic_read(&nr_journal_heads);
if (n)
- printk(KERN_EMERG "JBD2: leaked %d journal_heads!\n", n);
+ printk(KERN_ERR "JBD2: leaked %d journal_heads!\n", n);
#endif
jbd2_remove_jbd_stats_proc_entry();
jbd2_journal_destroy_caches();
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
index 3929c50..3b6bb19 100644
--- a/fs/jbd2/recovery.c
+++ b/fs/jbd2/recovery.c
@@ -594,7 +594,7 @@ static int do_one_pass(journal_t *journal,
be32_to_cpu(tmp->h_sequence))) {
brelse(obh);
success = -EIO;
- printk(KERN_ERR "JBD: Invalid "
+ printk(KERN_ERR "JBD2: Invalid "
"checksum recovering "
"block %llu in log\n",
blocknr);
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 7aa9a32..8360674 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -932,7 +932,7 @@ repeat:
jbd2_alloc(jh2bh(jh)->b_size,
GFP_NOFS);
if (!frozen_buffer) {
- printk(KERN_EMERG
+ printk(KERN_ERR
"%s: OOM for frozen_buffer\n",
__func__);
JBUFFER_TRACE(jh, "oom!");
@@ -1166,7 +1166,7 @@ repeat:
if (!jh->b_committed_data) {
committed_data = jbd2_alloc(jh2bh(jh)->b_size, GFP_NOFS);
if (!committed_data) {
- printk(KERN_EMERG "%s: No memory for committed data\n",
+ printk(KERN_ERR "%s: No memory for committed data\n",
__func__);
err = -ENOMEM;
goto out;
@@ -1290,7 +1290,10 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
* once a transaction -bzzz
*/
jh->b_modified = 1;
- J_ASSERT_JH(jh, handle->h_buffer_credits > 0);
+ if (handle->h_buffer_credits <= 0) {
+ ret = -ENOSPC;
+ goto out_unlock_bh;
+ }
handle->h_buffer_credits--;
}
@@ -1305,7 +1308,7 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
JBUFFER_TRACE(jh, "fastpath");
if (unlikely(jh->b_transaction !=
journal->j_running_transaction)) {
- printk(KERN_EMERG "JBD: %s: "
+ printk(KERN_ERR "JBD2: %s: "
"jh->b_transaction (%llu, %p, %u) != "
"journal->j_running_transaction (%p, %u)",
journal->j_devname,
@@ -1332,7 +1335,7 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
JBUFFER_TRACE(jh, "already on other transaction");
if (unlikely(jh->b_transaction !=
journal->j_committing_transaction)) {
- printk(KERN_EMERG "JBD: %s: "
+ printk(KERN_ERR "JBD2: %s: "
"jh->b_transaction (%llu, %p, %u) != "
"journal->j_committing_transaction (%p, %u)",
journal->j_devname,
@@ -1345,7 +1348,7 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
ret = -EINVAL;
}
if (unlikely(jh->b_next_transaction != transaction)) {
- printk(KERN_EMERG "JBD: %s: "
+ printk(KERN_ERR "JBD2: %s: "
"jh->b_next_transaction (%llu, %p, %u) != "
"transaction (%p, %u)",
journal->j_devname,
@@ -1373,7 +1376,6 @@ out_unlock_bh:
jbd2_journal_put_journal_head(jh);
out:
JBUFFER_TRACE(jh, "exit");
- WARN_ON(ret); /* All errors are bugs, so dump the stack */
return ret;
}
diff --git a/fs/kernfs/Makefile b/fs/kernfs/Makefile
new file mode 100644
index 0000000..674337c
--- /dev/null
+++ b/fs/kernfs/Makefile
@@ -0,0 +1,5 @@
+#
+# Makefile for the kernfs pseudo filesystem
+#
+
+obj-y := mount.o inode.o dir.o file.o symlink.o
diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
new file mode 100644
index 0000000..5104cf5
--- /dev/null
+++ b/fs/kernfs/dir.c
@@ -0,0 +1,1073 @@
+/*
+ * fs/kernfs/dir.c - kernfs directory implementation
+ *
+ * Copyright (c) 2001-3 Patrick Mochel
+ * Copyright (c) 2007 SUSE Linux Products GmbH
+ * Copyright (c) 2007, 2013 Tejun Heo <tj@kernel.org>
+ *
+ * This file is released under the GPLv2.
+ */
+
+#include <linux/fs.h>
+#include <linux/namei.h>
+#include <linux/idr.h>
+#include <linux/slab.h>
+#include <linux/security.h>
+#include <linux/hash.h>
+
+#include "kernfs-internal.h"
+
+DEFINE_MUTEX(kernfs_mutex);
+
+#define rb_to_kn(X) rb_entry((X), struct kernfs_node, rb)
+
+/**
+ * kernfs_name_hash
+ * @name: Null terminated string to hash
+ * @ns: Namespace tag to hash
+ *
+ * Returns 31 bit hash of ns + name (so it fits in an off_t )
+ */
+static unsigned int kernfs_name_hash(const char *name, const void *ns)
+{
+ unsigned long hash = init_name_hash();
+ unsigned int len = strlen(name);
+ while (len--)
+ hash = partial_name_hash(*name++, hash);
+ hash = (end_name_hash(hash) ^ hash_ptr((void *)ns, 31));
+ hash &= 0x7fffffffU;
+ /* Reserve hash numbers 0, 1 and INT_MAX for magic directory entries */
+ if (hash < 1)
+ hash += 2;
+ if (hash >= INT_MAX)
+ hash = INT_MAX - 1;
+ return hash;
+}
+
+static int kernfs_name_compare(unsigned int hash, const char *name,
+ const void *ns, const struct kernfs_node *kn)
+{
+ if (hash != kn->hash)
+ return hash - kn->hash;
+ if (ns != kn->ns)
+ return ns - kn->ns;
+ return strcmp(name, kn->name);
+}
+
+static int kernfs_sd_compare(const struct kernfs_node *left,
+ const struct kernfs_node *right)
+{
+ return kernfs_name_compare(left->hash, left->name, left->ns, right);
+}
+
+/**
+ * kernfs_link_sibling - link kernfs_node into sibling rbtree
+ * @kn: kernfs_node of interest
+ *
+ * Link @kn into its sibling rbtree which starts from
+ * @kn->parent->dir.children.
+ *
+ * Locking:
+ * mutex_lock(kernfs_mutex)
+ *
+ * RETURNS:
+ * 0 on susccess -EEXIST on failure.
+ */
+static int kernfs_link_sibling(struct kernfs_node *kn)
+{
+ struct rb_node **node = &kn->parent->dir.children.rb_node;
+ struct rb_node *parent = NULL;
+
+ if (kernfs_type(kn) == KERNFS_DIR)
+ kn->parent->dir.subdirs++;
+
+ while (*node) {
+ struct kernfs_node *pos;
+ int result;
+
+ pos = rb_to_kn(*node);
+ parent = *node;
+ result = kernfs_sd_compare(kn, pos);
+ if (result < 0)
+ node = &pos->rb.rb_left;
+ else if (result > 0)
+ node = &pos->rb.rb_right;
+ else
+ return -EEXIST;
+ }
+ /* add new node and rebalance the tree */
+ rb_link_node(&kn->rb, parent, node);
+ rb_insert_color(&kn->rb, &kn->parent->dir.children);
+ return 0;
+}
+
+/**
+ * kernfs_unlink_sibling - unlink kernfs_node from sibling rbtree
+ * @kn: kernfs_node of interest
+ *
+ * Unlink @kn from its sibling rbtree which starts from
+ * kn->parent->dir.children.
+ *
+ * Locking:
+ * mutex_lock(kernfs_mutex)
+ */
+static void kernfs_unlink_sibling(struct kernfs_node *kn)
+{
+ if (kernfs_type(kn) == KERNFS_DIR)
+ kn->parent->dir.subdirs--;
+
+ rb_erase(&kn->rb, &kn->parent->dir.children);
+}
+
+/**
+ * kernfs_get_active - get an active reference to kernfs_node
+ * @kn: kernfs_node to get an active reference to
+ *
+ * Get an active reference of @kn. This function is noop if @kn
+ * is NULL.
+ *
+ * RETURNS:
+ * Pointer to @kn on success, NULL on failure.
+ */
+struct kernfs_node *kernfs_get_active(struct kernfs_node *kn)
+{
+ if (unlikely(!kn))
+ return NULL;
+
+ if (!atomic_inc_unless_negative(&kn->active))
+ return NULL;
+
+ if (kn->flags & KERNFS_LOCKDEP)
+ rwsem_acquire_read(&kn->dep_map, 0, 1, _RET_IP_);
+ return kn;
+}
+
+/**
+ * kernfs_put_active - put an active reference to kernfs_node
+ * @kn: kernfs_node to put an active reference to
+ *
+ * Put an active reference to @kn. This function is noop if @kn
+ * is NULL.
+ */
+void kernfs_put_active(struct kernfs_node *kn)
+{
+ int v;
+
+ if (unlikely(!kn))
+ return;
+
+ if (kn->flags & KERNFS_LOCKDEP)
+ rwsem_release(&kn->dep_map, 1, _RET_IP_);
+ v = atomic_dec_return(&kn->active);
+ if (likely(v != KN_DEACTIVATED_BIAS))
+ return;
+
+ /*
+ * atomic_dec_return() is a mb(), we'll always see the updated
+ * kn->u.completion.
+ */
+ complete(kn->u.completion);
+}
+
+/**
+ * kernfs_deactivate - deactivate kernfs_node
+ * @kn: kernfs_node to deactivate
+ *
+ * Deny new active references and drain existing ones.
+ */
+static void kernfs_deactivate(struct kernfs_node *kn)
+{
+ DECLARE_COMPLETION_ONSTACK(wait);
+ int v;
+
+ BUG_ON(!(kn->flags & KERNFS_REMOVED));
+
+ if (!(kernfs_type(kn) & KERNFS_ACTIVE_REF))
+ return;
+
+ kn->u.completion = (void *)&wait;
+
+ rwsem_acquire(&kn->dep_map, 0, 0, _RET_IP_);
+ /* atomic_add_return() is a mb(), put_active() will always see
+ * the updated kn->u.completion.
+ */
+ v = atomic_add_return(KN_DEACTIVATED_BIAS, &kn->active);
+
+ if (v != KN_DEACTIVATED_BIAS) {
+ lock_contended(&kn->dep_map, _RET_IP_);
+ wait_for_completion(&wait);
+ }
+
+ lock_acquired(&kn->dep_map, _RET_IP_);
+ rwsem_release(&kn->dep_map, 1, _RET_IP_);
+}
+
+/**
+ * kernfs_get - get a reference count on a kernfs_node
+ * @kn: the target kernfs_node
+ */
+void kernfs_get(struct kernfs_node *kn)
+{
+ if (kn) {
+ WARN_ON(!atomic_read(&kn->count));
+ atomic_inc(&kn->count);
+ }
+}
+EXPORT_SYMBOL_GPL(kernfs_get);
+
+/**
+ * kernfs_put - put a reference count on a kernfs_node
+ * @kn: the target kernfs_node
+ *
+ * Put a reference count of @kn and destroy it if it reached zero.
+ */
+void kernfs_put(struct kernfs_node *kn)
+{
+ struct kernfs_node *parent;
+ struct kernfs_root *root;
+
+ if (!kn || !atomic_dec_and_test(&kn->count))
+ return;
+ root = kernfs_root(kn);
+ repeat:
+ /* Moving/renaming is always done while holding reference.
+ * kn->parent won't change beneath us.
+ */
+ parent = kn->parent;
+
+ WARN(!(kn->flags & KERNFS_REMOVED), "kernfs: free using entry: %s/%s\n",
+ parent ? parent->name : "", kn->name);
+
+ if (kernfs_type(kn) == KERNFS_LINK)
+ kernfs_put(kn->symlink.target_kn);
+ if (!(kn->flags & KERNFS_STATIC_NAME))
+ kfree(kn->name);
+ if (kn->iattr) {
+ if (kn->iattr->ia_secdata)
+ security_release_secctx(kn->iattr->ia_secdata,
+ kn->iattr->ia_secdata_len);
+ simple_xattrs_free(&kn->iattr->xattrs);
+ }
+ kfree(kn->iattr);
+ ida_simple_remove(&root->ino_ida, kn->ino);
+ kmem_cache_free(kernfs_node_cache, kn);
+
+ kn = parent;
+ if (kn) {
+ if (atomic_dec_and_test(&kn->count))
+ goto repeat;
+ } else {
+ /* just released the root kn, free @root too */
+ ida_destroy(&root->ino_ida);
+ kfree(root);
+ }
+}
+EXPORT_SYMBOL_GPL(kernfs_put);
+
+static int kernfs_dop_revalidate(struct dentry *dentry, unsigned int flags)
+{
+ struct kernfs_node *kn;
+
+ if (flags & LOOKUP_RCU)
+ return -ECHILD;
+
+ /* Always perform fresh lookup for negatives */
+ if (!dentry->d_inode)
+ goto out_bad_unlocked;
+
+ kn = dentry->d_fsdata;
+ mutex_lock(&kernfs_mutex);
+
+ /* The kernfs node has been deleted */
+ if (kn->flags & KERNFS_REMOVED)
+ goto out_bad;
+
+ /* The kernfs node has been moved? */
+ if (dentry->d_parent->d_fsdata != kn->parent)
+ goto out_bad;
+
+ /* The kernfs node has been renamed */
+ if (strcmp(dentry->d_name.name, kn->name) != 0)
+ goto out_bad;
+
+ /* The kernfs node has been moved to a different namespace */
+ if (kn->parent && kernfs_ns_enabled(kn->parent) &&
+ kernfs_info(dentry->d_sb)->ns != kn->ns)
+ goto out_bad;
+
+ mutex_unlock(&kernfs_mutex);
+out_valid:
+ return 1;
+out_bad:
+ mutex_unlock(&kernfs_mutex);
+out_bad_unlocked:
+ /*
+ * @dentry doesn't match the underlying kernfs node, drop the
+ * dentry and force lookup. If we have submounts we must allow the
+ * vfs caches to lie about the state of the filesystem to prevent
+ * leaks and other nasty things, so use check_submounts_and_drop()
+ * instead of d_drop().
+ */
+ if (check_submounts_and_drop(dentry) != 0)
+ goto out_valid;
+
+ return 0;
+}
+
+static void kernfs_dop_release(struct dentry *dentry)
+{
+ kernfs_put(dentry->d_fsdata);
+}
+
+const struct dentry_operations kernfs_dops = {
+ .d_revalidate = kernfs_dop_revalidate,
+ .d_release = kernfs_dop_release,
+};
+
+static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root,
+ const char *name, umode_t mode,
+ unsigned flags)
+{
+ char *dup_name = NULL;
+ struct kernfs_node *kn;
+ int ret;
+
+ if (!(flags & KERNFS_STATIC_NAME)) {
+ name = dup_name = kstrdup(name, GFP_KERNEL);
+ if (!name)
+ return NULL;
+ }
+
+ kn = kmem_cache_zalloc(kernfs_node_cache, GFP_KERNEL);
+ if (!kn)
+ goto err_out1;
+
+ ret = ida_simple_get(&root->ino_ida, 1, 0, GFP_KERNEL);
+ if (ret < 0)
+ goto err_out2;
+ kn->ino = ret;
+
+ atomic_set(&kn->count, 1);
+ atomic_set(&kn->active, 0);
+
+ kn->name = name;
+ kn->mode = mode;
+ kn->flags = flags | KERNFS_REMOVED;
+
+ return kn;
+
+ err_out2:
+ kmem_cache_free(kernfs_node_cache, kn);
+ err_out1:
+ kfree(dup_name);
+ return NULL;
+}
+
+struct kernfs_node *kernfs_new_node(struct kernfs_node *parent,
+ const char *name, umode_t mode,
+ unsigned flags)
+{
+ struct kernfs_node *kn;
+
+ kn = __kernfs_new_node(kernfs_root(parent), name, mode, flags);
+ if (kn) {
+ kernfs_get(parent);
+ kn->parent = parent;
+ }
+ return kn;
+}
+
+/**
+ * kernfs_addrm_start - prepare for kernfs_node add/remove
+ * @acxt: pointer to kernfs_addrm_cxt to be used
+ *
+ * This function is called when the caller is about to add or remove
+ * kernfs_node. This function acquires kernfs_mutex. @acxt is used
+ * to keep and pass context to other addrm functions.
+ *
+ * LOCKING:
+ * Kernel thread context (may sleep). kernfs_mutex is locked on
+ * return.
+ */
+void kernfs_addrm_start(struct kernfs_addrm_cxt *acxt)
+ __acquires(kernfs_mutex)
+{
+ memset(acxt, 0, sizeof(*acxt));
+
+ mutex_lock(&kernfs_mutex);
+}
+
+/**
+ * kernfs_add_one - add kernfs_node to parent without warning
+ * @acxt: addrm context to use
+ * @kn: kernfs_node to be added
+ *
+ * The caller must already have initialized @kn->parent. This
+ * function increments nlink of the parent's inode if @kn is a
+ * directory and link into the children list of the parent.
+ *
+ * This function should be called between calls to
+ * kernfs_addrm_start() and kernfs_addrm_finish() and should be passed
+ * the same @acxt as passed to kernfs_addrm_start().
+ *
+ * LOCKING:
+ * Determined by kernfs_addrm_start().
+ *
+ * RETURNS:
+ * 0 on success, -EEXIST if entry with the given name already
+ * exists.
+ */
+int kernfs_add_one(struct kernfs_addrm_cxt *acxt, struct kernfs_node *kn)
+{
+ struct kernfs_node *parent = kn->parent;
+ bool has_ns = kernfs_ns_enabled(parent);
+ struct kernfs_iattrs *ps_iattr;
+ int ret;
+
+ if (has_ns != (bool)kn->ns) {
+ WARN(1, KERN_WARNING "kernfs: ns %s in '%s' for '%s'\n",
+ has_ns ? "required" : "invalid", parent->name, kn->name);
+ return -EINVAL;
+ }
+
+ if (kernfs_type(parent) != KERNFS_DIR)
+ return -EINVAL;
+
+ if (parent->flags & KERNFS_REMOVED)
+ return -ENOENT;
+
+ kn->hash = kernfs_name_hash(kn->name, kn->ns);
+
+ ret = kernfs_link_sibling(kn);
+ if (ret)
+ return ret;
+
+ /* Update timestamps on the parent */
+ ps_iattr = parent->iattr;
+ if (ps_iattr) {
+ struct iattr *ps_iattrs = &ps_iattr->ia_iattr;
+ ps_iattrs->ia_ctime = ps_iattrs->ia_mtime = CURRENT_TIME;
+ }
+
+ /* Mark the entry added into directory tree */
+ kn->flags &= ~KERNFS_REMOVED;
+
+ return 0;
+}
+
+/**
+ * kernfs_remove_one - remove kernfs_node from parent
+ * @acxt: addrm context to use
+ * @kn: kernfs_node to be removed
+ *
+ * Mark @kn removed and drop nlink of parent inode if @kn is a
+ * directory. @kn is unlinked from the children list.
+ *
+ * This function should be called between calls to
+ * kernfs_addrm_start() and kernfs_addrm_finish() and should be
+ * passed the same @acxt as passed to kernfs_addrm_start().
+ *
+ * LOCKING:
+ * Determined by kernfs_addrm_start().
+ */
+static void kernfs_remove_one(struct kernfs_addrm_cxt *acxt,
+ struct kernfs_node *kn)
+{
+ struct kernfs_iattrs *ps_iattr;
+
+ /*
+ * Removal can be called multiple times on the same node. Only the
+ * first invocation is effective and puts the base ref.
+ */
+ if (kn->flags & KERNFS_REMOVED)
+ return;
+
+ if (kn->parent) {
+ kernfs_unlink_sibling(kn);
+
+ /* Update timestamps on the parent */
+ ps_iattr = kn->parent->iattr;
+ if (ps_iattr) {
+ ps_iattr->ia_iattr.ia_ctime = CURRENT_TIME;
+ ps_iattr->ia_iattr.ia_mtime = CURRENT_TIME;
+ }
+ }
+
+ kn->flags |= KERNFS_REMOVED;
+ kn->u.removed_list = acxt->removed;
+ acxt->removed = kn;
+}
+
+/**
+ * kernfs_addrm_finish - finish up kernfs_node add/remove
+ * @acxt: addrm context to finish up
+ *
+ * Finish up kernfs_node add/remove. Resources acquired by
+ * kernfs_addrm_start() are released and removed kernfs_nodes are
+ * cleaned up.
+ *
+ * LOCKING:
+ * kernfs_mutex is released.
+ */
+void kernfs_addrm_finish(struct kernfs_addrm_cxt *acxt)
+ __releases(kernfs_mutex)
+{
+ /* release resources acquired by kernfs_addrm_start() */
+ mutex_unlock(&kernfs_mutex);
+
+ /* kill removed kernfs_nodes */
+ while (acxt->removed) {
+ struct kernfs_node *kn = acxt->removed;
+
+ acxt->removed = kn->u.removed_list;
+
+ kernfs_deactivate(kn);
+ kernfs_unmap_bin_file(kn);
+ kernfs_put(kn);
+ }
+}
+
+/**
+ * kernfs_find_ns - find kernfs_node with the given name
+ * @parent: kernfs_node to search under
+ * @name: name to look for
+ * @ns: the namespace tag to use
+ *
+ * Look for kernfs_node with name @name under @parent. Returns pointer to
+ * the found kernfs_node on success, %NULL on failure.
+ */
+static struct kernfs_node *kernfs_find_ns(struct kernfs_node *parent,
+ const unsigned char *name,
+ const void *ns)
+{
+ struct rb_node *node = parent->dir.children.rb_node;
+ bool has_ns = kernfs_ns_enabled(parent);
+ unsigned int hash;
+
+ lockdep_assert_held(&kernfs_mutex);
+
+ if (has_ns != (bool)ns) {
+ WARN(1, KERN_WARNING "kernfs: ns %s in '%s' for '%s'\n",
+ has_ns ? "required" : "invalid", parent->name, name);
+ return NULL;
+ }
+
+ hash = kernfs_name_hash(name, ns);
+ while (node) {
+ struct kernfs_node *kn;
+ int result;
+
+ kn = rb_to_kn(node);
+ result = kernfs_name_compare(hash, name, ns, kn);
+ if (result < 0)
+ node = node->rb_left;
+ else if (result > 0)
+ node = node->rb_right;
+ else
+ return kn;
+ }
+ return NULL;
+}
+
+/**
+ * kernfs_find_and_get_ns - find and get kernfs_node with the given name
+ * @parent: kernfs_node to search under
+ * @name: name to look for
+ * @ns: the namespace tag to use
+ *
+ * Look for kernfs_node with name @name under @parent and get a reference
+ * if found. This function may sleep and returns pointer to the found
+ * kernfs_node on success, %NULL on failure.
+ */
+struct kernfs_node *kernfs_find_and_get_ns(struct kernfs_node *parent,
+ const char *name, const void *ns)
+{
+ struct kernfs_node *kn;
+
+ mutex_lock(&kernfs_mutex);
+ kn = kernfs_find_ns(parent, name, ns);
+ kernfs_get(kn);
+ mutex_unlock(&kernfs_mutex);
+
+ return kn;
+}
+EXPORT_SYMBOL_GPL(kernfs_find_and_get_ns);
+
+/**
+ * kernfs_create_root - create a new kernfs hierarchy
+ * @kdops: optional directory syscall operations for the hierarchy
+ * @priv: opaque data associated with the new directory
+ *
+ * Returns the root of the new hierarchy on success, ERR_PTR() value on
+ * failure.
+ */
+struct kernfs_root *kernfs_create_root(struct kernfs_dir_ops *kdops, void *priv)
+{
+ struct kernfs_root *root;
+ struct kernfs_node *kn;
+
+ root = kzalloc(sizeof(*root), GFP_KERNEL);
+ if (!root)
+ return ERR_PTR(-ENOMEM);
+
+ ida_init(&root->ino_ida);
+
+ kn = __kernfs_new_node(root, "", S_IFDIR | S_IRUGO | S_IXUGO,
+ KERNFS_DIR);
+ if (!kn) {
+ ida_destroy(&root->ino_ida);
+ kfree(root);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ kn->flags &= ~KERNFS_REMOVED;
+ kn->priv = priv;
+ kn->dir.root = root;
+
+ root->dir_ops = kdops;
+ root->kn = kn;
+
+ return root;
+}
+
+/**
+ * kernfs_destroy_root - destroy a kernfs hierarchy
+ * @root: root of the hierarchy to destroy
+ *
+ * Destroy the hierarchy anchored at @root by removing all existing
+ * directories and destroying @root.
+ */
+void kernfs_destroy_root(struct kernfs_root *root)
+{
+ kernfs_remove(root->kn); /* will also free @root */
+}
+
+/**
+ * kernfs_create_dir_ns - create a directory
+ * @parent: parent in which to create a new directory
+ * @name: name of the new directory
+ * @mode: mode of the new directory
+ * @priv: opaque data associated with the new directory
+ * @ns: optional namespace tag of the directory
+ *
+ * Returns the created node on success, ERR_PTR() value on failure.
+ */
+struct kernfs_node *kernfs_create_dir_ns(struct kernfs_node *parent,
+ const char *name, umode_t mode,
+ void *priv, const void *ns)
+{
+ struct kernfs_addrm_cxt acxt;
+ struct kernfs_node *kn;
+ int rc;
+
+ /* allocate */
+ kn = kernfs_new_node(parent, name, mode | S_IFDIR, KERNFS_DIR);
+ if (!kn)
+ return ERR_PTR(-ENOMEM);
+
+ kn->dir.root = parent->dir.root;
+ kn->ns = ns;
+ kn->priv = priv;
+
+ /* link in */
+ kernfs_addrm_start(&acxt);
+ rc = kernfs_add_one(&acxt, kn);
+ kernfs_addrm_finish(&acxt);
+
+ if (!rc)
+ return kn;
+
+ kernfs_put(kn);
+ return ERR_PTR(rc);
+}
+
+static struct dentry *kernfs_iop_lookup(struct inode *dir,
+ struct dentry *dentry,
+ unsigned int flags)
+{
+ struct dentry *ret;
+ struct kernfs_node *parent = dentry->d_parent->d_fsdata;
+ struct kernfs_node *kn;
+ struct inode *inode;
+ const void *ns = NULL;
+
+ mutex_lock(&kernfs_mutex);
+
+ if (kernfs_ns_enabled(parent))
+ ns = kernfs_info(dir->i_sb)->ns;
+
+ kn = kernfs_find_ns(parent, dentry->d_name.name, ns);
+
+ /* no such entry */
+ if (!kn) {
+ ret = NULL;
+ goto out_unlock;
+ }
+ kernfs_get(kn);
+ dentry->d_fsdata = kn;
+
+ /* attach dentry and inode */
+ inode = kernfs_get_inode(dir->i_sb, kn);
+ if (!inode) {
+ ret = ERR_PTR(-ENOMEM);
+ goto out_unlock;
+ }
+
+ /* instantiate and hash dentry */
+ ret = d_materialise_unique(dentry, inode);
+ out_unlock:
+ mutex_unlock(&kernfs_mutex);
+ return ret;
+}
+
+static int kernfs_iop_mkdir(struct inode *dir, struct dentry *dentry,
+ umode_t mode)
+{
+ struct kernfs_node *parent = dir->i_private;
+ struct kernfs_dir_ops *kdops = kernfs_root(parent)->dir_ops;
+
+ if (!kdops || !kdops->mkdir)
+ return -EPERM;
+
+ return kdops->mkdir(parent, dentry->d_name.name, mode);
+}
+
+static int kernfs_iop_rmdir(struct inode *dir, struct dentry *dentry)
+{
+ struct kernfs_node *kn = dentry->d_fsdata;
+ struct kernfs_dir_ops *kdops = kernfs_root(kn)->dir_ops;
+
+ if (!kdops || !kdops->rmdir)
+ return -EPERM;
+
+ return kdops->rmdir(kn);
+}
+
+static int kernfs_iop_rename(struct inode *old_dir, struct dentry *old_dentry,
+ struct inode *new_dir, struct dentry *new_dentry)
+{
+ struct kernfs_node *kn = old_dentry->d_fsdata;
+ struct kernfs_node *new_parent = new_dir->i_private;
+ struct kernfs_dir_ops *kdops = kernfs_root(kn)->dir_ops;
+
+ if (!kdops || !kdops->rename)
+ return -EPERM;
+
+ return kdops->rename(kn, new_parent, new_dentry->d_name.name);
+}
+
+const struct inode_operations kernfs_dir_iops = {
+ .lookup = kernfs_iop_lookup,
+ .permission = kernfs_iop_permission,
+ .setattr = kernfs_iop_setattr,
+ .getattr = kernfs_iop_getattr,
+ .setxattr = kernfs_iop_setxattr,
+ .removexattr = kernfs_iop_removexattr,
+ .getxattr = kernfs_iop_getxattr,
+ .listxattr = kernfs_iop_listxattr,
+
+ .mkdir = kernfs_iop_mkdir,
+ .rmdir = kernfs_iop_rmdir,
+ .rename = kernfs_iop_rename,
+};
+
+static struct kernfs_node *kernfs_leftmost_descendant(struct kernfs_node *pos)
+{
+ struct kernfs_node *last;
+
+ while (true) {
+ struct rb_node *rbn;
+
+ last = pos;
+
+ if (kernfs_type(pos) != KERNFS_DIR)
+ break;
+
+ rbn = rb_first(&pos->dir.children);
+ if (!rbn)
+ break;
+
+ pos = rb_to_kn(rbn);
+ }
+
+ return last;
+}
+
+/**
+ * kernfs_next_descendant_post - find the next descendant for post-order walk
+ * @pos: the current position (%NULL to initiate traversal)
+ * @root: kernfs_node whose descendants to walk
+ *
+ * Find the next descendant to visit for post-order traversal of @root's
+ * descendants. @root is included in the iteration and the last node to be
+ * visited.
+ */
+static struct kernfs_node *kernfs_next_descendant_post(struct kernfs_node *pos,
+ struct kernfs_node *root)
+{
+ struct rb_node *rbn;
+
+ lockdep_assert_held(&kernfs_mutex);
+
+ /* if first iteration, visit leftmost descendant which may be root */
+ if (!pos)
+ return kernfs_leftmost_descendant(root);
+
+ /* if we visited @root, we're done */
+ if (pos == root)
+ return NULL;
+
+ /* if there's an unvisited sibling, visit its leftmost descendant */
+ rbn = rb_next(&pos->rb);
+ if (rbn)
+ return kernfs_leftmost_descendant(rb_to_kn(rbn));
+
+ /* no sibling left, visit parent */
+ return pos->parent;
+}
+
+static void __kernfs_remove(struct kernfs_addrm_cxt *acxt,
+ struct kernfs_node *kn)
+{
+ struct kernfs_node *pos, *next;
+
+ if (!kn)
+ return;
+
+ pr_debug("kernfs %s: removing\n", kn->name);
+
+ next = NULL;
+ do {
+ pos = next;
+ next = kernfs_next_descendant_post(pos, kn);
+ if (pos)
+ kernfs_remove_one(acxt, pos);
+ } while (next);
+}
+
+/**
+ * kernfs_remove - remove a kernfs_node recursively
+ * @kn: the kernfs_node to remove
+ *
+ * Remove @kn along with all its subdirectories and files.
+ */
+void kernfs_remove(struct kernfs_node *kn)
+{
+ struct kernfs_addrm_cxt acxt;
+
+ kernfs_addrm_start(&acxt);
+ __kernfs_remove(&acxt, kn);
+ kernfs_addrm_finish(&acxt);
+}
+
+/**
+ * kernfs_remove_by_name_ns - find a kernfs_node by name and remove it
+ * @parent: parent of the target
+ * @name: name of the kernfs_node to remove
+ * @ns: namespace tag of the kernfs_node to remove
+ *
+ * Look for the kernfs_node with @name and @ns under @parent and remove it.
+ * Returns 0 on success, -ENOENT if such entry doesn't exist.
+ */
+int kernfs_remove_by_name_ns(struct kernfs_node *parent, const char *name,
+ const void *ns)
+{
+ struct kernfs_addrm_cxt acxt;
+ struct kernfs_node *kn;
+
+ if (!parent) {
+ WARN(1, KERN_WARNING "kernfs: can not remove '%s', no directory\n",
+ name);
+ return -ENOENT;
+ }
+
+ kernfs_addrm_start(&acxt);
+
+ kn = kernfs_find_ns(parent, name, ns);
+ if (kn)
+ __kernfs_remove(&acxt, kn);
+
+ kernfs_addrm_finish(&acxt);
+
+ if (kn)
+ return 0;
+ else
+ return -ENOENT;
+}
+
+/**
+ * kernfs_rename_ns - move and rename a kernfs_node
+ * @kn: target node
+ * @new_parent: new parent to put @sd under
+ * @new_name: new name
+ * @new_ns: new namespace tag
+ */
+int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent,
+ const char *new_name, const void *new_ns)
+{
+ int error;
+
+ mutex_lock(&kernfs_mutex);
+
+ error = -ENOENT;
+ if ((kn->flags | new_parent->flags) & KERNFS_REMOVED)
+ goto out;
+
+ error = 0;
+ if ((kn->parent == new_parent) && (kn->ns == new_ns) &&
+ (strcmp(kn->name, new_name) == 0))
+ goto out; /* nothing to rename */
+
+ error = -EEXIST;
+ if (kernfs_find_ns(new_parent, new_name, new_ns))
+ goto out;
+
+ /* rename kernfs_node */
+ if (strcmp(kn->name, new_name) != 0) {
+ error = -ENOMEM;
+ new_name = kstrdup(new_name, GFP_KERNEL);
+ if (!new_name)
+ goto out;
+
+ if (kn->flags & KERNFS_STATIC_NAME)
+ kn->flags &= ~KERNFS_STATIC_NAME;
+ else
+ kfree(kn->name);
+
+ kn->name = new_name;
+ }
+
+ /*
+ * Move to the appropriate place in the appropriate directories rbtree.
+ */
+ kernfs_unlink_sibling(kn);
+ kernfs_get(new_parent);
+ kernfs_put(kn->parent);
+ kn->ns = new_ns;
+ kn->hash = kernfs_name_hash(kn->name, kn->ns);
+ kn->parent = new_parent;
+ kernfs_link_sibling(kn);
+
+ error = 0;
+ out:
+ mutex_unlock(&kernfs_mutex);
+ return error;
+}
+
+/* Relationship between s_mode and the DT_xxx types */
+static inline unsigned char dt_type(struct kernfs_node *kn)
+{
+ return (kn->mode >> 12) & 15;
+}
+
+static int kernfs_dir_fop_release(struct inode *inode, struct file *filp)
+{
+ kernfs_put(filp->private_data);
+ return 0;
+}
+
+static struct kernfs_node *kernfs_dir_pos(const void *ns,
+ struct kernfs_node *parent, loff_t hash, struct kernfs_node *pos)
+{
+ if (pos) {
+ int valid = !(pos->flags & KERNFS_REMOVED) &&
+ pos->parent == parent && hash == pos->hash;
+ kernfs_put(pos);
+ if (!valid)
+ pos = NULL;
+ }
+ if (!pos && (hash > 1) && (hash < INT_MAX)) {
+ struct rb_node *node = parent->dir.children.rb_node;
+ while (node) {
+ pos = rb_to_kn(node);
+
+ if (hash < pos->hash)
+ node = node->rb_left;
+ else if (hash > pos->hash)
+ node = node->rb_right;
+ else
+ break;
+ }
+ }
+ /* Skip over entries in the wrong namespace */
+ while (pos && pos->ns != ns) {
+ struct rb_node *node = rb_next(&pos->rb);
+ if (!node)
+ pos = NULL;
+ else
+ pos = rb_to_kn(node);
+ }
+ return pos;
+}
+
+static struct kernfs_node *kernfs_dir_next_pos(const void *ns,
+ struct kernfs_node *parent, ino_t ino, struct kernfs_node *pos)
+{
+ pos = kernfs_dir_pos(ns, parent, ino, pos);
+ if (pos)
+ do {
+ struct rb_node *node = rb_next(&pos->rb);
+ if (!node)
+ pos = NULL;
+ else
+ pos = rb_to_kn(node);
+ } while (pos && pos->ns != ns);
+ return pos;
+}
+
+static int kernfs_fop_readdir(struct file *file, struct dir_context *ctx)
+{
+ struct dentry *dentry = file->f_path.dentry;
+ struct kernfs_node *parent = dentry->d_fsdata;
+ struct kernfs_node *pos = file->private_data;
+ const void *ns = NULL;
+
+ if (!dir_emit_dots(file, ctx))
+ return 0;
+ mutex_lock(&kernfs_mutex);
+
+ if (kernfs_ns_enabled(parent))
+ ns = kernfs_info(dentry->d_sb)->ns;
+
+ for (pos = kernfs_dir_pos(ns, parent, ctx->pos, pos);
+ pos;
+ pos = kernfs_dir_next_pos(ns, parent, ctx->pos, pos)) {
+ const char *name = pos->name;
+ unsigned int type = dt_type(pos);
+ int len = strlen(name);
+ ino_t ino = pos->ino;
+
+ ctx->pos = pos->hash;
+ file->private_data = pos;
+ kernfs_get(pos);
+
+ mutex_unlock(&kernfs_mutex);
+ if (!dir_emit(ctx, name, len, ino, type))
+ return 0;
+ mutex_lock(&kernfs_mutex);
+ }
+ mutex_unlock(&kernfs_mutex);
+ file->private_data = NULL;
+ ctx->pos = INT_MAX;
+ return 0;
+}
+
+static loff_t kernfs_dir_fop_llseek(struct file *file, loff_t offset,
+ int whence)
+{
+ struct inode *inode = file_inode(file);
+ loff_t ret;
+
+ mutex_lock(&inode->i_mutex);
+ ret = generic_file_llseek(file, offset, whence);
+ mutex_unlock(&inode->i_mutex);
+
+ return ret;
+}
+
+const struct file_operations kernfs_dir_fops = {
+ .read = generic_read_dir,
+ .iterate = kernfs_fop_readdir,
+ .release = kernfs_dir_fop_release,
+ .llseek = kernfs_dir_fop_llseek,
+};
diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
new file mode 100644
index 0000000..dbf397b
--- /dev/null
+++ b/fs/kernfs/file.c
@@ -0,0 +1,867 @@
+/*
+ * fs/kernfs/file.c - kernfs file implementation
+ *
+ * Copyright (c) 2001-3 Patrick Mochel
+ * Copyright (c) 2007 SUSE Linux Products GmbH
+ * Copyright (c) 2007, 2013 Tejun Heo <tj@kernel.org>
+ *
+ * This file is released under the GPLv2.
+ */
+
+#include <linux/fs.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+#include <linux/poll.h>
+#include <linux/pagemap.h>
+#include <linux/sched.h>
+
+#include "kernfs-internal.h"
+
+/*
+ * There's one kernfs_open_file for each open file and one kernfs_open_node
+ * for each kernfs_node with one or more open files.
+ *
+ * kernfs_node->attr.open points to kernfs_open_node. attr.open is
+ * protected by kernfs_open_node_lock.
+ *
+ * filp->private_data points to seq_file whose ->private points to
+ * kernfs_open_file. kernfs_open_files are chained at
+ * kernfs_open_node->files, which is protected by kernfs_open_file_mutex.
+ */
+static DEFINE_SPINLOCK(kernfs_open_node_lock);
+static DEFINE_MUTEX(kernfs_open_file_mutex);
+
+struct kernfs_open_node {
+ atomic_t refcnt;
+ atomic_t event;
+ wait_queue_head_t poll;
+ struct list_head files; /* goes through kernfs_open_file.list */
+};
+
+static struct kernfs_open_file *kernfs_of(struct file *file)
+{
+ return ((struct seq_file *)file->private_data)->private;
+}
+
+/*
+ * Determine the kernfs_ops for the given kernfs_node. This function must
+ * be called while holding an active reference.
+ */
+static const struct kernfs_ops *kernfs_ops(struct kernfs_node *kn)
+{
+ if (kn->flags & KERNFS_LOCKDEP)
+ lockdep_assert_held(kn);
+ return kn->attr.ops;
+}
+
+/*
+ * As kernfs_seq_stop() is also called after kernfs_seq_start() or
+ * kernfs_seq_next() failure, it needs to distinguish whether it's stopping
+ * a seq_file iteration which is fully initialized with an active reference
+ * or an aborted kernfs_seq_start() due to get_active failure. The
+ * position pointer is the only context for each seq_file iteration and
+ * thus the stop condition should be encoded in it. As the return value is
+ * directly visible to userland, ERR_PTR(-ENODEV) is the only acceptable
+ * choice to indicate get_active failure.
+ *
+ * Unfortunately, this is complicated due to the optional custom seq_file
+ * operations which may return ERR_PTR(-ENODEV) too. kernfs_seq_stop()
+ * can't distinguish whether ERR_PTR(-ENODEV) is from get_active failure or
+ * custom seq_file operations and thus can't decide whether put_active
+ * should be performed or not only on ERR_PTR(-ENODEV).
+ *
+ * This is worked around by factoring out the custom seq_stop() and
+ * put_active part into kernfs_seq_stop_active(), skipping it from
+ * kernfs_seq_stop() if ERR_PTR(-ENODEV) while invoking it directly after
+ * custom seq_file operations fail with ERR_PTR(-ENODEV) - this ensures
+ * that kernfs_seq_stop_active() is skipped only after get_active failure.
+ */
+static void kernfs_seq_stop_active(struct seq_file *sf, void *v)
+{
+ struct kernfs_open_file *of = sf->private;
+ const struct kernfs_ops *ops = kernfs_ops(of->kn);
+
+ if (ops->seq_stop)
+ ops->seq_stop(sf, v);
+ kernfs_put_active(of->kn);
+}
+
+static void *kernfs_seq_start(struct seq_file *sf, loff_t *ppos)
+{
+ struct kernfs_open_file *of = sf->private;
+ const struct kernfs_ops *ops;
+
+ /*
+ * @of->mutex nests outside active ref and is just to ensure that
+ * the ops aren't called concurrently for the same open file.
+ */
+ mutex_lock(&of->mutex);
+ if (!kernfs_get_active(of->kn))
+ return ERR_PTR(-ENODEV);
+
+ ops = kernfs_ops(of->kn);
+ if (ops->seq_start) {
+ void *next = ops->seq_start(sf, ppos);
+ /* see the comment above kernfs_seq_stop_active() */
+ if (next == ERR_PTR(-ENODEV))
+ kernfs_seq_stop_active(sf, next);
+ return next;
+ } else {
+ /*
+ * The same behavior and code as single_open(). Returns
+ * !NULL if pos is at the beginning; otherwise, NULL.
+ */
+ return NULL + !*ppos;
+ }
+}
+
+static void *kernfs_seq_next(struct seq_file *sf, void *v, loff_t *ppos)
+{
+ struct kernfs_open_file *of = sf->private;
+ const struct kernfs_ops *ops = kernfs_ops(of->kn);
+
+ if (ops->seq_next) {
+ void *next = ops->seq_next(sf, v, ppos);
+ /* see the comment above kernfs_seq_stop_active() */
+ if (next == ERR_PTR(-ENODEV))
+ kernfs_seq_stop_active(sf, next);
+ return next;
+ } else {
+ /*
+ * The same behavior and code as single_open(), always
+ * terminate after the initial read.
+ */
+ ++*ppos;
+ return NULL;
+ }
+}
+
+static void kernfs_seq_stop(struct seq_file *sf, void *v)
+{
+ struct kernfs_open_file *of = sf->private;
+
+ if (v != ERR_PTR(-ENODEV))
+ kernfs_seq_stop_active(sf, v);
+ mutex_unlock(&of->mutex);
+}
+
+static int kernfs_seq_show(struct seq_file *sf, void *v)
+{
+ struct kernfs_open_file *of = sf->private;
+
+ of->event = atomic_read(&of->kn->attr.open->event);
+
+ return of->kn->attr.ops->seq_show(sf, v);
+}
+
+static const struct seq_operations kernfs_seq_ops = {
+ .start = kernfs_seq_start,
+ .next = kernfs_seq_next,
+ .stop = kernfs_seq_stop,
+ .show = kernfs_seq_show,
+};
+
+/*
+ * As reading a bin file can have side-effects, the exact offset and bytes
+ * specified in read(2) call should be passed to the read callback making
+ * it difficult to use seq_file. Implement simplistic custom buffering for
+ * bin files.
+ */
+static ssize_t kernfs_file_direct_read(struct kernfs_open_file *of,
+ char __user *user_buf, size_t count,
+ loff_t *ppos)
+{
+ ssize_t len = min_t(size_t, count, PAGE_SIZE);
+ const struct kernfs_ops *ops;
+ char *buf;
+
+ buf = kmalloc(len, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ /*
+ * @of->mutex nests outside active ref and is just to ensure that
+ * the ops aren't called concurrently for the same open file.
+ */
+ mutex_lock(&of->mutex);
+ if (!kernfs_get_active(of->kn)) {
+ len = -ENODEV;
+ mutex_unlock(&of->mutex);
+ goto out_free;
+ }
+
+ ops = kernfs_ops(of->kn);
+ if (ops->read)
+ len = ops->read(of, buf, len, *ppos);
+ else
+ len = -EINVAL;
+
+ kernfs_put_active(of->kn);
+ mutex_unlock(&of->mutex);
+
+ if (len < 0)
+ goto out_free;
+
+ if (copy_to_user(user_buf, buf, len)) {
+ len = -EFAULT;
+ goto out_free;
+ }
+
+ *ppos += len;
+
+ out_free:
+ kfree(buf);
+ return len;
+}
+
+/**
+ * kernfs_fop_read - kernfs vfs read callback
+ * @file: file pointer
+ * @user_buf: data to write
+ * @count: number of bytes
+ * @ppos: starting offset
+ */
+static ssize_t kernfs_fop_read(struct file *file, char __user *user_buf,
+ size_t count, loff_t *ppos)
+{
+ struct kernfs_open_file *of = kernfs_of(file);
+
+ if (of->kn->flags & KERNFS_HAS_SEQ_SHOW)
+ return seq_read(file, user_buf, count, ppos);
+ else
+ return kernfs_file_direct_read(of, user_buf, count, ppos);
+}
+
+/**
+ * kernfs_fop_write - kernfs vfs write callback
+ * @file: file pointer
+ * @user_buf: data to write
+ * @count: number of bytes
+ * @ppos: starting offset
+ *
+ * Copy data in from userland and pass it to the matching kernfs write
+ * operation.
+ *
+ * There is no easy way for us to know if userspace is only doing a partial
+ * write, so we don't support them. We expect the entire buffer to come on
+ * the first write. Hint: if you're writing a value, first read the file,
+ * modify only the the value you're changing, then write entire buffer
+ * back.
+ */
+static ssize_t kernfs_fop_write(struct file *file, const char __user *user_buf,
+ size_t count, loff_t *ppos)
+{
+ struct kernfs_open_file *of = kernfs_of(file);
+ ssize_t len = min_t(size_t, count, PAGE_SIZE);
+ const struct kernfs_ops *ops;
+ char *buf;
+
+ buf = kmalloc(len + 1, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ if (copy_from_user(buf, user_buf, len)) {
+ len = -EFAULT;
+ goto out_free;
+ }
+ buf[len] = '\0'; /* guarantee string termination */
+
+ /*
+ * @of->mutex nests outside active ref and is just to ensure that
+ * the ops aren't called concurrently for the same open file.
+ */
+ mutex_lock(&of->mutex);
+ if (!kernfs_get_active(of->kn)) {
+ mutex_unlock(&of->mutex);
+ len = -ENODEV;
+ goto out_free;
+ }
+
+ ops = kernfs_ops(of->kn);
+ if (ops->write)
+ len = ops->write(of, buf, len, *ppos);
+ else
+ len = -EINVAL;
+
+ kernfs_put_active(of->kn);
+ mutex_unlock(&of->mutex);
+
+ if (len > 0)
+ *ppos += len;
+out_free:
+ kfree(buf);
+ return len;
+}
+
+static void kernfs_vma_open(struct vm_area_struct *vma)
+{
+ struct file *file = vma->vm_file;
+ struct kernfs_open_file *of = kernfs_of(file);
+
+ if (!of->vm_ops)
+ return;
+
+ if (!kernfs_get_active(of->kn))
+ return;
+
+ if (of->vm_ops->open)
+ of->vm_ops->open(vma);
+
+ kernfs_put_active(of->kn);
+}
+
+static int kernfs_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+ struct file *file = vma->vm_file;
+ struct kernfs_open_file *of = kernfs_of(file);
+ int ret;
+
+ if (!of->vm_ops)
+ return VM_FAULT_SIGBUS;
+
+ if (!kernfs_get_active(of->kn))
+ return VM_FAULT_SIGBUS;
+
+ ret = VM_FAULT_SIGBUS;
+ if (of->vm_ops->fault)
+ ret = of->vm_ops->fault(vma, vmf);
+
+ kernfs_put_active(of->kn);
+ return ret;
+}
+
+static int kernfs_vma_page_mkwrite(struct vm_area_struct *vma,
+ struct vm_fault *vmf)
+{
+ struct file *file = vma->vm_file;
+ struct kernfs_open_file *of = kernfs_of(file);
+ int ret;
+
+ if (!of->vm_ops)
+ return VM_FAULT_SIGBUS;
+
+ if (!kernfs_get_active(of->kn))
+ return VM_FAULT_SIGBUS;
+
+ ret = 0;
+ if (of->vm_ops->page_mkwrite)
+ ret = of->vm_ops->page_mkwrite(vma, vmf);
+ else
+ file_update_time(file);
+
+ kernfs_put_active(of->kn);
+ return ret;
+}
+
+static int kernfs_vma_access(struct vm_area_struct *vma, unsigned long addr,
+ void *buf, int len, int write)
+{
+ struct file *file = vma->vm_file;
+ struct kernfs_open_file *of = kernfs_of(file);
+ int ret;
+
+ if (!of->vm_ops)
+ return -EINVAL;
+
+ if (!kernfs_get_active(of->kn))
+ return -EINVAL;
+
+ ret = -EINVAL;
+ if (of->vm_ops->access)
+ ret = of->vm_ops->access(vma, addr, buf, len, write);
+
+ kernfs_put_active(of->kn);
+ return ret;
+}
+
+#ifdef CONFIG_NUMA
+static int kernfs_vma_set_policy(struct vm_area_struct *vma,
+ struct mempolicy *new)
+{
+ struct file *file = vma->vm_file;
+ struct kernfs_open_file *of = kernfs_of(file);
+ int ret;
+
+ if (!of->vm_ops)
+ return 0;
+
+ if (!kernfs_get_active(of->kn))
+ return -EINVAL;
+
+ ret = 0;
+ if (of->vm_ops->set_policy)
+ ret = of->vm_ops->set_policy(vma, new);
+
+ kernfs_put_active(of->kn);
+ return ret;
+}
+
+static struct mempolicy *kernfs_vma_get_policy(struct vm_area_struct *vma,
+ unsigned long addr)
+{
+ struct file *file = vma->vm_file;
+ struct kernfs_open_file *of = kernfs_of(file);
+ struct mempolicy *pol;
+
+ if (!of->vm_ops)
+ return vma->vm_policy;
+
+ if (!kernfs_get_active(of->kn))
+ return vma->vm_policy;
+
+ pol = vma->vm_policy;
+ if (of->vm_ops->get_policy)
+ pol = of->vm_ops->get_policy(vma, addr);
+
+ kernfs_put_active(of->kn);
+ return pol;
+}
+
+static int kernfs_vma_migrate(struct vm_area_struct *vma,
+ const nodemask_t *from, const nodemask_t *to,
+ unsigned long flags)
+{
+ struct file *file = vma->vm_file;
+ struct kernfs_open_file *of = kernfs_of(file);
+ int ret;
+
+ if (!of->vm_ops)
+ return 0;
+
+ if (!kernfs_get_active(of->kn))
+ return 0;
+
+ ret = 0;
+ if (of->vm_ops->migrate)
+ ret = of->vm_ops->migrate(vma, from, to, flags);
+
+ kernfs_put_active(of->kn);
+ return ret;
+}
+#endif
+
+static const struct vm_operations_struct kernfs_vm_ops = {
+ .open = kernfs_vma_open,
+ .fault = kernfs_vma_fault,
+ .page_mkwrite = kernfs_vma_page_mkwrite,
+ .access = kernfs_vma_access,
+#ifdef CONFIG_NUMA
+ .set_policy = kernfs_vma_set_policy,
+ .get_policy = kernfs_vma_get_policy,
+ .migrate = kernfs_vma_migrate,
+#endif
+};
+
+static int kernfs_fop_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ struct kernfs_open_file *of = kernfs_of(file);
+ const struct kernfs_ops *ops;
+ int rc;
+
+ /*
+ * mmap path and of->mutex are prone to triggering spurious lockdep
+ * warnings and we don't want to add spurious locking dependency
+ * between the two. Check whether mmap is actually implemented
+ * without grabbing @of->mutex by testing HAS_MMAP flag. See the
+ * comment in kernfs_file_open() for more details.
+ */
+ if (!(of->kn->flags & KERNFS_HAS_MMAP))
+ return -ENODEV;
+
+ mutex_lock(&of->mutex);
+
+ rc = -ENODEV;
+ if (!kernfs_get_active(of->kn))
+ goto out_unlock;
+
+ ops = kernfs_ops(of->kn);
+ rc = ops->mmap(of, vma);
+
+ /*
+ * PowerPC's pci_mmap of legacy_mem uses shmem_zero_setup()
+ * to satisfy versions of X which crash if the mmap fails: that
+ * substitutes a new vm_file, and we don't then want bin_vm_ops.
+ */
+ if (vma->vm_file != file)
+ goto out_put;
+
+ rc = -EINVAL;
+ if (of->mmapped && of->vm_ops != vma->vm_ops)
+ goto out_put;
+
+ /*
+ * It is not possible to successfully wrap close.
+ * So error if someone is trying to use close.
+ */
+ rc = -EINVAL;
+ if (vma->vm_ops && vma->vm_ops->close)
+ goto out_put;
+
+ rc = 0;
+ of->mmapped = 1;
+ of->vm_ops = vma->vm_ops;
+ vma->vm_ops = &kernfs_vm_ops;
+out_put:
+ kernfs_put_active(of->kn);
+out_unlock:
+ mutex_unlock(&of->mutex);
+
+ return rc;
+}
+
+/**
+ * kernfs_get_open_node - get or create kernfs_open_node
+ * @kn: target kernfs_node
+ * @of: kernfs_open_file for this instance of open
+ *
+ * If @kn->attr.open exists, increment its reference count; otherwise,
+ * create one. @of is chained to the files list.
+ *
+ * LOCKING:
+ * Kernel thread context (may sleep).
+ *
+ * RETURNS:
+ * 0 on success, -errno on failure.
+ */
+static int kernfs_get_open_node(struct kernfs_node *kn,
+ struct kernfs_open_file *of)
+{
+ struct kernfs_open_node *on, *new_on = NULL;
+
+ retry:
+ mutex_lock(&kernfs_open_file_mutex);
+ spin_lock_irq(&kernfs_open_node_lock);
+
+ if (!kn->attr.open && new_on) {
+ kn->attr.open = new_on;
+ new_on = NULL;
+ }
+
+ on = kn->attr.open;
+ if (on) {
+ atomic_inc(&on->refcnt);
+ list_add_tail(&of->list, &on->files);
+ }
+
+ spin_unlock_irq(&kernfs_open_node_lock);
+ mutex_unlock(&kernfs_open_file_mutex);
+
+ if (on) {
+ kfree(new_on);
+ return 0;
+ }
+
+ /* not there, initialize a new one and retry */
+ new_on = kmalloc(sizeof(*new_on), GFP_KERNEL);
+ if (!new_on)
+ return -ENOMEM;
+
+ atomic_set(&new_on->refcnt, 0);
+ atomic_set(&new_on->event, 1);
+ init_waitqueue_head(&new_on->poll);
+ INIT_LIST_HEAD(&new_on->files);
+ goto retry;
+}
+
+/**
+ * kernfs_put_open_node - put kernfs_open_node
+ * @kn: target kernfs_nodet
+ * @of: associated kernfs_open_file
+ *
+ * Put @kn->attr.open and unlink @of from the files list. If
+ * reference count reaches zero, disassociate and free it.
+ *
+ * LOCKING:
+ * None.
+ */
+static void kernfs_put_open_node(struct kernfs_node *kn,
+ struct kernfs_open_file *of)
+{
+ struct kernfs_open_node *on = kn->attr.open;
+ unsigned long flags;
+
+ mutex_lock(&kernfs_open_file_mutex);
+ spin_lock_irqsave(&kernfs_open_node_lock, flags);
+
+ if (of)
+ list_del(&of->list);
+
+ if (atomic_dec_and_test(&on->refcnt))
+ kn->attr.open = NULL;
+ else
+ on = NULL;
+
+ spin_unlock_irqrestore(&kernfs_open_node_lock, flags);
+ mutex_unlock(&kernfs_open_file_mutex);
+
+ kfree(on);
+}
+
+static int kernfs_fop_open(struct inode *inode, struct file *file)
+{
+ struct kernfs_node *kn = file->f_path.dentry->d_fsdata;
+ const struct kernfs_ops *ops;
+ struct kernfs_open_file *of;
+ bool has_read, has_write, has_mmap;
+ int error = -EACCES;
+
+ if (!kernfs_get_active(kn))
+ return -ENODEV;
+
+ ops = kernfs_ops(kn);
+
+ has_read = ops->seq_show || ops->read || ops->mmap;
+ has_write = ops->write || ops->mmap;
+ has_mmap = ops->mmap;
+
+ /* check perms and supported operations */
+ if ((file->f_mode & FMODE_WRITE) &&
+ (!(inode->i_mode & S_IWUGO) || !has_write))
+ goto err_out;
+
+ if ((file->f_mode & FMODE_READ) &&
+ (!(inode->i_mode & S_IRUGO) || !has_read))
+ goto err_out;
+
+ /* allocate a kernfs_open_file for the file */
+ error = -ENOMEM;
+ of = kzalloc(sizeof(struct kernfs_open_file), GFP_KERNEL);
+ if (!of)
+ goto err_out;
+
+ /*
+ * The following is done to give a different lockdep key to
+ * @of->mutex for files which implement mmap. This is a rather
+ * crude way to avoid false positive lockdep warning around
+ * mm->mmap_sem - mmap nests @of->mutex under mm->mmap_sem and
+ * reading /sys/block/sda/trace/act_mask grabs sr_mutex, under
+ * which mm->mmap_sem nests, while holding @of->mutex. As each
+ * open file has a separate mutex, it's okay as long as those don't
+ * happen on the same file. At this point, we can't easily give
+ * each file a separate locking class. Let's differentiate on
+ * whether the file has mmap or not for now.
+ *
+ * Both paths of the branch look the same. They're supposed to
+ * look that way and give @of->mutex different static lockdep keys.
+ */
+ if (has_mmap)
+ mutex_init(&of->mutex);
+ else
+ mutex_init(&of->mutex);
+
+ of->kn = kn;
+ of->file = file;
+
+ /*
+ * Always instantiate seq_file even if read access doesn't use
+ * seq_file or is not requested. This unifies private data access
+ * and readable regular files are the vast majority anyway.
+ */
+ if (ops->seq_show)
+ error = seq_open(file, &kernfs_seq_ops);
+ else
+ error = seq_open(file, NULL);
+ if (error)
+ goto err_free;
+
+ ((struct seq_file *)file->private_data)->private = of;
+
+ /* seq_file clears PWRITE unconditionally, restore it if WRITE */
+ if (file->f_mode & FMODE_WRITE)
+ file->f_mode |= FMODE_PWRITE;
+
+ /* make sure we have open node struct */
+ error = kernfs_get_open_node(kn, of);
+ if (error)
+ goto err_close;
+
+ /* open succeeded, put active references */
+ kernfs_put_active(kn);
+ return 0;
+
+err_close:
+ seq_release(inode, file);
+err_free:
+ kfree(of);
+err_out:
+ kernfs_put_active(kn);
+ return error;
+}
+
+static int kernfs_fop_release(struct inode *inode, struct file *filp)
+{
+ struct kernfs_node *kn = filp->f_path.dentry->d_fsdata;
+ struct kernfs_open_file *of = kernfs_of(filp);
+
+ kernfs_put_open_node(kn, of);
+ seq_release(inode, filp);
+ kfree(of);
+
+ return 0;
+}
+
+void kernfs_unmap_bin_file(struct kernfs_node *kn)
+{
+ struct kernfs_open_node *on;
+ struct kernfs_open_file *of;
+
+ if (!(kn->flags & KERNFS_HAS_MMAP))
+ return;
+
+ spin_lock_irq(&kernfs_open_node_lock);
+ on = kn->attr.open;
+ if (on)
+ atomic_inc(&on->refcnt);
+ spin_unlock_irq(&kernfs_open_node_lock);
+ if (!on)
+ return;
+
+ mutex_lock(&kernfs_open_file_mutex);
+ list_for_each_entry(of, &on->files, list) {
+ struct inode *inode = file_inode(of->file);
+ unmap_mapping_range(inode->i_mapping, 0, 0, 1);
+ }
+ mutex_unlock(&kernfs_open_file_mutex);
+
+ kernfs_put_open_node(kn, NULL);
+}
+
+/*
+ * Kernfs attribute files are pollable. The idea is that you read
+ * the content and then you use 'poll' or 'select' to wait for
+ * the content to change. When the content changes (assuming the
+ * manager for the kobject supports notification), poll will
+ * return POLLERR|POLLPRI, and select will return the fd whether
+ * it is waiting for read, write, or exceptions.
+ * Once poll/select indicates that the value has changed, you
+ * need to close and re-open the file, or seek to 0 and read again.
+ * Reminder: this only works for attributes which actively support
+ * it, and it is not possible to test an attribute from userspace
+ * to see if it supports poll (Neither 'poll' nor 'select' return
+ * an appropriate error code). When in doubt, set a suitable timeout value.
+ */
+static unsigned int kernfs_fop_poll(struct file *filp, poll_table *wait)
+{
+ struct kernfs_open_file *of = kernfs_of(filp);
+ struct kernfs_node *kn = filp->f_path.dentry->d_fsdata;
+ struct kernfs_open_node *on = kn->attr.open;
+
+ /* need parent for the kobj, grab both */
+ if (!kernfs_get_active(kn))
+ goto trigger;
+
+ poll_wait(filp, &on->poll, wait);
+
+ kernfs_put_active(kn);
+
+ if (of->event != atomic_read(&on->event))
+ goto trigger;
+
+ return DEFAULT_POLLMASK;
+
+ trigger:
+ return DEFAULT_POLLMASK|POLLERR|POLLPRI;
+}
+
+/**
+ * kernfs_notify - notify a kernfs file
+ * @kn: file to notify
+ *
+ * Notify @kn such that poll(2) on @kn wakes up.
+ */
+void kernfs_notify(struct kernfs_node *kn)
+{
+ struct kernfs_open_node *on;
+ unsigned long flags;
+
+ spin_lock_irqsave(&kernfs_open_node_lock, flags);
+
+ if (!WARN_ON(kernfs_type(kn) != KERNFS_FILE)) {
+ on = kn->attr.open;
+ if (on) {
+ atomic_inc(&on->event);
+ wake_up_interruptible(&on->poll);
+ }
+ }
+
+ spin_unlock_irqrestore(&kernfs_open_node_lock, flags);
+}
+EXPORT_SYMBOL_GPL(kernfs_notify);
+
+const struct file_operations kernfs_file_fops = {
+ .read = kernfs_fop_read,
+ .write = kernfs_fop_write,
+ .llseek = generic_file_llseek,
+ .mmap = kernfs_fop_mmap,
+ .open = kernfs_fop_open,
+ .release = kernfs_fop_release,
+ .poll = kernfs_fop_poll,
+};
+
+/**
+ * __kernfs_create_file - kernfs internal function to create a file
+ * @parent: directory to create the file in
+ * @name: name of the file
+ * @mode: mode of the file
+ * @size: size of the file
+ * @ops: kernfs operations for the file
+ * @priv: private data for the file
+ * @ns: optional namespace tag of the file
+ * @static_name: don't copy file name
+ * @key: lockdep key for the file's active_ref, %NULL to disable lockdep
+ *
+ * Returns the created node on success, ERR_PTR() value on error.
+ */
+struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent,
+ const char *name,
+ umode_t mode, loff_t size,
+ const struct kernfs_ops *ops,
+ void *priv, const void *ns,
+ bool name_is_static,
+ struct lock_class_key *key)
+{
+ struct kernfs_addrm_cxt acxt;
+ struct kernfs_node *kn;
+ unsigned flags;
+ int rc;
+
+ flags = KERNFS_FILE;
+ if (name_is_static)
+ flags |= KERNFS_STATIC_NAME;
+
+ kn = kernfs_new_node(parent, name, (mode & S_IALLUGO) | S_IFREG, flags);
+ if (!kn)
+ return ERR_PTR(-ENOMEM);
+
+ kn->attr.ops = ops;
+ kn->attr.size = size;
+ kn->ns = ns;
+ kn->priv = priv;
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+ if (key) {
+ lockdep_init_map(&kn->dep_map, "s_active", key, 0);
+ kn->flags |= KERNFS_LOCKDEP;
+ }
+#endif
+
+ /*
+ * kn->attr.ops is accesible only while holding active ref. We
+ * need to know whether some ops are implemented outside active
+ * ref. Cache their existence in flags.
+ */
+ if (ops->seq_show)
+ kn->flags |= KERNFS_HAS_SEQ_SHOW;
+ if (ops->mmap)
+ kn->flags |= KERNFS_HAS_MMAP;
+
+ kernfs_addrm_start(&acxt);
+ rc = kernfs_add_one(&acxt, kn);
+ kernfs_addrm_finish(&acxt);
+
+ if (rc) {
+ kernfs_put(kn);
+ return ERR_PTR(rc);
+ }
+ return kn;
+}
diff --git a/fs/kernfs/inode.c b/fs/kernfs/inode.c
new file mode 100644
index 0000000..e55126f
--- /dev/null
+++ b/fs/kernfs/inode.c
@@ -0,0 +1,377 @@
+/*
+ * fs/kernfs/inode.c - kernfs inode implementation
+ *
+ * Copyright (c) 2001-3 Patrick Mochel
+ * Copyright (c) 2007 SUSE Linux Products GmbH
+ * Copyright (c) 2007, 2013 Tejun Heo <tj@kernel.org>
+ *
+ * This file is released under the GPLv2.
+ */
+
+#include <linux/pagemap.h>
+#include <linux/backing-dev.h>
+#include <linux/capability.h>
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/xattr.h>
+#include <linux/security.h>
+
+#include "kernfs-internal.h"
+
+static const struct address_space_operations kernfs_aops = {
+ .readpage = simple_readpage,
+ .write_begin = simple_write_begin,
+ .write_end = simple_write_end,
+};
+
+static struct backing_dev_info kernfs_bdi = {
+ .name = "kernfs",
+ .ra_pages = 0, /* No readahead */
+ .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK,
+};
+
+static const struct inode_operations kernfs_iops = {
+ .permission = kernfs_iop_permission,
+ .setattr = kernfs_iop_setattr,
+ .getattr = kernfs_iop_getattr,
+ .setxattr = kernfs_iop_setxattr,
+ .removexattr = kernfs_iop_removexattr,
+ .getxattr = kernfs_iop_getxattr,
+ .listxattr = kernfs_iop_listxattr,
+};
+
+void __init kernfs_inode_init(void)
+{
+ if (bdi_init(&kernfs_bdi))
+ panic("failed to init kernfs_bdi");
+}
+
+static struct kernfs_iattrs *kernfs_iattrs(struct kernfs_node *kn)
+{
+ struct iattr *iattrs;
+
+ if (kn->iattr)
+ return kn->iattr;
+
+ kn->iattr = kzalloc(sizeof(struct kernfs_iattrs), GFP_KERNEL);
+ if (!kn->iattr)
+ return NULL;
+ iattrs = &kn->iattr->ia_iattr;
+
+ /* assign default attributes */
+ iattrs->ia_mode = kn->mode;
+ iattrs->ia_uid = GLOBAL_ROOT_UID;
+ iattrs->ia_gid = GLOBAL_ROOT_GID;
+ iattrs->ia_atime = iattrs->ia_mtime = iattrs->ia_ctime = CURRENT_TIME;
+
+ simple_xattrs_init(&kn->iattr->xattrs);
+
+ return kn->iattr;
+}
+
+static int __kernfs_setattr(struct kernfs_node *kn, const struct iattr *iattr)
+{
+ struct kernfs_iattrs *attrs;
+ struct iattr *iattrs;
+ unsigned int ia_valid = iattr->ia_valid;
+
+ attrs = kernfs_iattrs(kn);
+ if (!attrs)
+ return -ENOMEM;
+
+ iattrs = &attrs->ia_iattr;
+
+ if (ia_valid & ATTR_UID)
+ iattrs->ia_uid = iattr->ia_uid;
+ if (ia_valid & ATTR_GID)
+ iattrs->ia_gid = iattr->ia_gid;
+ if (ia_valid & ATTR_ATIME)
+ iattrs->ia_atime = iattr->ia_atime;
+ if (ia_valid & ATTR_MTIME)
+ iattrs->ia_mtime = iattr->ia_mtime;
+ if (ia_valid & ATTR_CTIME)
+ iattrs->ia_ctime = iattr->ia_ctime;
+ if (ia_valid & ATTR_MODE) {
+ umode_t mode = iattr->ia_mode;
+ iattrs->ia_mode = kn->mode = mode;
+ }
+ return 0;
+}
+
+/**
+ * kernfs_setattr - set iattr on a node
+ * @kn: target node
+ * @iattr: iattr to set
+ *
+ * Returns 0 on success, -errno on failure.
+ */
+int kernfs_setattr(struct kernfs_node *kn, const struct iattr *iattr)
+{
+ int ret;
+
+ mutex_lock(&kernfs_mutex);
+ ret = __kernfs_setattr(kn, iattr);
+ mutex_unlock(&kernfs_mutex);
+ return ret;
+}
+
+int kernfs_iop_setattr(struct dentry *dentry, struct iattr *iattr)
+{
+ struct inode *inode = dentry->d_inode;
+ struct kernfs_node *kn = dentry->d_fsdata;
+ int error;
+
+ if (!kn)
+ return -EINVAL;
+
+ mutex_lock(&kernfs_mutex);
+ error = inode_change_ok(inode, iattr);
+ if (error)
+ goto out;
+
+ error = __kernfs_setattr(kn, iattr);
+ if (error)
+ goto out;
+
+ /* this ignores size changes */
+ setattr_copy(inode, iattr);
+
+out:
+ mutex_unlock(&kernfs_mutex);
+ return error;
+}
+
+static int kernfs_node_setsecdata(struct kernfs_node *kn, void **secdata,
+ u32 *secdata_len)
+{
+ struct kernfs_iattrs *attrs;
+ void *old_secdata;
+ size_t old_secdata_len;
+
+ attrs = kernfs_iattrs(kn);
+ if (!attrs)
+ return -ENOMEM;
+
+ old_secdata = attrs->ia_secdata;
+ old_secdata_len = attrs->ia_secdata_len;
+
+ attrs->ia_secdata = *secdata;
+ attrs->ia_secdata_len = *secdata_len;
+
+ *secdata = old_secdata;
+ *secdata_len = old_secdata_len;
+ return 0;
+}
+
+int kernfs_iop_setxattr(struct dentry *dentry, const char *name,
+ const void *value, size_t size, int flags)
+{
+ struct kernfs_node *kn = dentry->d_fsdata;
+ struct kernfs_iattrs *attrs;
+ void *secdata;
+ int error;
+ u32 secdata_len = 0;
+
+ attrs = kernfs_iattrs(kn);
+ if (!attrs)
+ return -ENOMEM;
+
+ if (!strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN)) {
+ const char *suffix = name + XATTR_SECURITY_PREFIX_LEN;
+ error = security_inode_setsecurity(dentry->d_inode, suffix,
+ value, size, flags);
+ if (error)
+ return error;
+ error = security_inode_getsecctx(dentry->d_inode,
+ &secdata, &secdata_len);
+ if (error)
+ return error;
+
+ mutex_lock(&kernfs_mutex);
+ error = kernfs_node_setsecdata(kn, &secdata, &secdata_len);
+ mutex_unlock(&kernfs_mutex);
+
+ if (secdata)
+ security_release_secctx(secdata, secdata_len);
+ return error;
+ } else if (!strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN)) {
+ return simple_xattr_set(&attrs->xattrs, name, value, size,
+ flags);
+ }
+
+ return -EINVAL;
+}
+
+int kernfs_iop_removexattr(struct dentry *dentry, const char *name)
+{
+ struct kernfs_node *kn = dentry->d_fsdata;
+ struct kernfs_iattrs *attrs;
+
+ attrs = kernfs_iattrs(kn);
+ if (!attrs)
+ return -ENOMEM;
+
+ return simple_xattr_remove(&attrs->xattrs, name);
+}
+
+ssize_t kernfs_iop_getxattr(struct dentry *dentry, const char *name, void *buf,
+ size_t size)
+{
+ struct kernfs_node *kn = dentry->d_fsdata;
+ struct kernfs_iattrs *attrs;
+
+ attrs = kernfs_iattrs(kn);
+ if (!attrs)
+ return -ENOMEM;
+
+ return simple_xattr_get(&attrs->xattrs, name, buf, size);
+}
+
+ssize_t kernfs_iop_listxattr(struct dentry *dentry, char *buf, size_t size)
+{
+ struct kernfs_node *kn = dentry->d_fsdata;
+ struct kernfs_iattrs *attrs;
+
+ attrs = kernfs_iattrs(kn);
+ if (!attrs)
+ return -ENOMEM;
+
+ return simple_xattr_list(&attrs->xattrs, buf, size);
+}
+
+static inline void set_default_inode_attr(struct inode *inode, umode_t mode)
+{
+ inode->i_mode = mode;
+ inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+}
+
+static inline void set_inode_attr(struct inode *inode, struct iattr *iattr)
+{
+ inode->i_uid = iattr->ia_uid;
+ inode->i_gid = iattr->ia_gid;
+ inode->i_atime = iattr->ia_atime;
+ inode->i_mtime = iattr->ia_mtime;
+ inode->i_ctime = iattr->ia_ctime;
+}
+
+static void kernfs_refresh_inode(struct kernfs_node *kn, struct inode *inode)
+{
+ struct kernfs_iattrs *attrs = kn->iattr;
+
+ inode->i_mode = kn->mode;
+ if (attrs) {
+ /*
+ * kernfs_node has non-default attributes get them from
+ * persistent copy in kernfs_node.
+ */
+ set_inode_attr(inode, &attrs->ia_iattr);
+ security_inode_notifysecctx(inode, attrs->ia_secdata,
+ attrs->ia_secdata_len);
+ }
+
+ if (kernfs_type(kn) == KERNFS_DIR)
+ set_nlink(inode, kn->dir.subdirs + 2);
+}
+
+int kernfs_iop_getattr(struct vfsmount *mnt, struct dentry *dentry,
+ struct kstat *stat)
+{
+ struct kernfs_node *kn = dentry->d_fsdata;
+ struct inode *inode = dentry->d_inode;
+
+ mutex_lock(&kernfs_mutex);
+ kernfs_refresh_inode(kn, inode);
+ mutex_unlock(&kernfs_mutex);
+
+ generic_fillattr(inode, stat);
+ return 0;
+}
+
+static void kernfs_init_inode(struct kernfs_node *kn, struct inode *inode)
+{
+ kernfs_get(kn);
+ inode->i_private = kn;
+ inode->i_mapping->a_ops = &kernfs_aops;
+ inode->i_mapping->backing_dev_info = &kernfs_bdi;
+ inode->i_op = &kernfs_iops;
+
+ set_default_inode_attr(inode, kn->mode);
+ kernfs_refresh_inode(kn, inode);
+
+ /* initialize inode according to type */
+ switch (kernfs_type(kn)) {
+ case KERNFS_DIR:
+ inode->i_op = &kernfs_dir_iops;
+ inode->i_fop = &kernfs_dir_fops;
+ break;
+ case KERNFS_FILE:
+ inode->i_size = kn->attr.size;
+ inode->i_fop = &kernfs_file_fops;
+ break;
+ case KERNFS_LINK:
+ inode->i_op = &kernfs_symlink_iops;
+ break;
+ default:
+ BUG();
+ }
+
+ unlock_new_inode(inode);
+}
+
+/**
+ * kernfs_get_inode - get inode for kernfs_node
+ * @sb: super block
+ * @kn: kernfs_node to allocate inode for
+ *
+ * Get inode for @kn. If such inode doesn't exist, a new inode is
+ * allocated and basics are initialized. New inode is returned
+ * locked.
+ *
+ * LOCKING:
+ * Kernel thread context (may sleep).
+ *
+ * RETURNS:
+ * Pointer to allocated inode on success, NULL on failure.
+ */
+struct inode *kernfs_get_inode(struct super_block *sb, struct kernfs_node *kn)
+{
+ struct inode *inode;
+
+ inode = iget_locked(sb, kn->ino);
+ if (inode && (inode->i_state & I_NEW))
+ kernfs_init_inode(kn, inode);
+
+ return inode;
+}
+
+/*
+ * The kernfs_node serves as both an inode and a directory entry for
+ * kernfs. To prevent the kernfs inode numbers from being freed
+ * prematurely we take a reference to kernfs_node from the kernfs inode. A
+ * super_operations.evict_inode() implementation is needed to drop that
+ * reference upon inode destruction.
+ */
+void kernfs_evict_inode(struct inode *inode)
+{
+ struct kernfs_node *kn = inode->i_private;
+
+ truncate_inode_pages(&inode->i_data, 0);
+ clear_inode(inode);
+ kernfs_put(kn);
+}
+
+int kernfs_iop_permission(struct inode *inode, int mask)
+{
+ struct kernfs_node *kn;
+
+ if (mask & MAY_NOT_BLOCK)
+ return -ECHILD;
+
+ kn = inode->i_private;
+
+ mutex_lock(&kernfs_mutex);
+ kernfs_refresh_inode(kn, inode);
+ mutex_unlock(&kernfs_mutex);
+
+ return generic_permission(inode, mask);
+}
diff --git a/fs/kernfs/kernfs-internal.h b/fs/kernfs/kernfs-internal.h
new file mode 100644
index 0000000..eb536b7
--- /dev/null
+++ b/fs/kernfs/kernfs-internal.h
@@ -0,0 +1,122 @@
+/*
+ * fs/kernfs/kernfs-internal.h - kernfs internal header file
+ *
+ * Copyright (c) 2001-3 Patrick Mochel
+ * Copyright (c) 2007 SUSE Linux Products GmbH
+ * Copyright (c) 2007, 2013 Tejun Heo <teheo@suse.de>
+ *
+ * This file is released under the GPLv2.
+ */
+
+#ifndef __KERNFS_INTERNAL_H
+#define __KERNFS_INTERNAL_H
+
+#include <linux/lockdep.h>
+#include <linux/fs.h>
+#include <linux/mutex.h>
+#include <linux/xattr.h>
+
+#include <linux/kernfs.h>
+
+struct kernfs_iattrs {
+ struct iattr ia_iattr;
+ void *ia_secdata;
+ u32 ia_secdata_len;
+
+ struct simple_xattrs xattrs;
+};
+
+#define KN_DEACTIVATED_BIAS INT_MIN
+
+/* KERNFS_TYPE_MASK and types are defined in include/linux/kernfs.h */
+
+/**
+ * kernfs_root - find out the kernfs_root a kernfs_node belongs to
+ * @kn: kernfs_node of interest
+ *
+ * Return the kernfs_root @kn belongs to.
+ */
+static inline struct kernfs_root *kernfs_root(struct kernfs_node *kn)
+{
+ /* if parent exists, it's always a dir; otherwise, @sd is a dir */
+ if (kn->parent)
+ kn = kn->parent;
+ return kn->dir.root;
+}
+
+/*
+ * Context structure to be used while adding/removing nodes.
+ */
+struct kernfs_addrm_cxt {
+ struct kernfs_node *removed;
+};
+
+/*
+ * mount.c
+ */
+struct kernfs_super_info {
+ /*
+ * The root associated with this super_block. Each super_block is
+ * identified by the root and ns it's associated with.
+ */
+ struct kernfs_root *root;
+
+ /*
+ * Each sb is associated with one namespace tag, currently the
+ * network namespace of the task which mounted this kernfs
+ * instance. If multiple tags become necessary, make the following
+ * an array and compare kernfs_node tag against every entry.
+ */
+ const void *ns;
+};
+#define kernfs_info(SB) ((struct kernfs_super_info *)(SB->s_fs_info))
+
+extern struct kmem_cache *kernfs_node_cache;
+
+/*
+ * inode.c
+ */
+struct inode *kernfs_get_inode(struct super_block *sb, struct kernfs_node *kn);
+void kernfs_evict_inode(struct inode *inode);
+int kernfs_iop_permission(struct inode *inode, int mask);
+int kernfs_iop_setattr(struct dentry *dentry, struct iattr *iattr);
+int kernfs_iop_getattr(struct vfsmount *mnt, struct dentry *dentry,
+ struct kstat *stat);
+int kernfs_iop_setxattr(struct dentry *dentry, const char *name, const void *value,
+ size_t size, int flags);
+int kernfs_iop_removexattr(struct dentry *dentry, const char *name);
+ssize_t kernfs_iop_getxattr(struct dentry *dentry, const char *name, void *buf,
+ size_t size);
+ssize_t kernfs_iop_listxattr(struct dentry *dentry, char *buf, size_t size);
+void kernfs_inode_init(void);
+
+/*
+ * dir.c
+ */
+extern struct mutex kernfs_mutex;
+extern const struct dentry_operations kernfs_dops;
+extern const struct file_operations kernfs_dir_fops;
+extern const struct inode_operations kernfs_dir_iops;
+
+struct kernfs_node *kernfs_get_active(struct kernfs_node *kn);
+void kernfs_put_active(struct kernfs_node *kn);
+void kernfs_addrm_start(struct kernfs_addrm_cxt *acxt);
+int kernfs_add_one(struct kernfs_addrm_cxt *acxt, struct kernfs_node *kn);
+void kernfs_addrm_finish(struct kernfs_addrm_cxt *acxt);
+struct kernfs_node *kernfs_new_node(struct kernfs_node *parent,
+ const char *name, umode_t mode,
+ unsigned flags);
+
+/*
+ * file.c
+ */
+extern const struct file_operations kernfs_file_fops;
+
+void kernfs_unmap_bin_file(struct kernfs_node *kn);
+
+/*
+ * symlink.c
+ */
+extern const struct inode_operations kernfs_symlink_iops;
+
+#endif /* __KERNFS_INTERNAL_H */
diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c
new file mode 100644
index 0000000..0d6ce89
--- /dev/null
+++ b/fs/kernfs/mount.c
@@ -0,0 +1,165 @@
+/*
+ * fs/kernfs/mount.c - kernfs mount implementation
+ *
+ * Copyright (c) 2001-3 Patrick Mochel
+ * Copyright (c) 2007 SUSE Linux Products GmbH
+ * Copyright (c) 2007, 2013 Tejun Heo <tj@kernel.org>
+ *
+ * This file is released under the GPLv2.
+ */
+
+#include <linux/fs.h>
+#include <linux/mount.h>
+#include <linux/init.h>
+#include <linux/magic.h>
+#include <linux/slab.h>
+#include <linux/pagemap.h>
+
+#include "kernfs-internal.h"
+
+struct kmem_cache *kernfs_node_cache;
+
+static const struct super_operations kernfs_sops = {
+ .statfs = simple_statfs,
+ .drop_inode = generic_delete_inode,
+ .evict_inode = kernfs_evict_inode,
+};
+
+static int kernfs_fill_super(struct super_block *sb)
+{
+ struct kernfs_super_info *info = kernfs_info(sb);
+ struct inode *inode;
+ struct dentry *root;
+
+ sb->s_blocksize = PAGE_CACHE_SIZE;
+ sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
+ sb->s_magic = SYSFS_MAGIC;
+ sb->s_op = &kernfs_sops;
+ sb->s_time_gran = 1;
+
+ /* get root inode, initialize and unlock it */
+ mutex_lock(&kernfs_mutex);
+ inode = kernfs_get_inode(sb, info->root->kn);
+ mutex_unlock(&kernfs_mutex);
+ if (!inode) {
+ pr_debug("kernfs: could not get root inode\n");
+ return -ENOMEM;
+ }
+
+ /* instantiate and link root dentry */
+ root = d_make_root(inode);
+ if (!root) {
+ pr_debug("%s: could not get root dentry!\n", __func__);
+ return -ENOMEM;
+ }
+ kernfs_get(info->root->kn);
+ root->d_fsdata = info->root->kn;
+ sb->s_root = root;
+ sb->s_d_op = &kernfs_dops;
+ return 0;
+}
+
+static int kernfs_test_super(struct super_block *sb, void *data)
+{
+ struct kernfs_super_info *sb_info = kernfs_info(sb);
+ struct kernfs_super_info *info = data;
+
+ return sb_info->root == info->root && sb_info->ns == info->ns;
+}
+
+static int kernfs_set_super(struct super_block *sb, void *data)
+{
+ int error;
+ error = set_anon_super(sb, data);
+ if (!error)
+ sb->s_fs_info = data;
+ return error;
+}
+
+/**
+ * kernfs_super_ns - determine the namespace tag of a kernfs super_block
+ * @sb: super_block of interest
+ *
+ * Return the namespace tag associated with kernfs super_block @sb.
+ */
+const void *kernfs_super_ns(struct super_block *sb)
+{
+ struct kernfs_super_info *info = kernfs_info(sb);
+
+ return info->ns;
+}
+
+/**
+ * kernfs_mount_ns - kernfs mount helper
+ * @fs_type: file_system_type of the fs being mounted
+ * @flags: mount flags specified for the mount
+ * @root: kernfs_root of the hierarchy being mounted
+ * @ns: optional namespace tag of the mount
+ *
+ * This is to be called from each kernfs user's file_system_type->mount()
+ * implementation, which should pass through the specified @fs_type and
+ * @flags, and specify the hierarchy and namespace tag to mount via @root
+ * and @ns, respectively.
+ *
+ * The return value can be passed to the vfs layer verbatim.
+ */
+struct dentry *kernfs_mount_ns(struct file_system_type *fs_type, int flags,
+ struct kernfs_root *root, const void *ns)
+{
+ struct super_block *sb;
+ struct kernfs_super_info *info;
+ int error;
+
+ info = kzalloc(sizeof(*info), GFP_KERNEL);
+ if (!info)
+ return ERR_PTR(-ENOMEM);
+
+ info->root = root;
+ info->ns = ns;
+
+ sb = sget(fs_type, kernfs_test_super, kernfs_set_super, flags, info);
+ if (IS_ERR(sb) || sb->s_fs_info != info)
+ kfree(info);
+ if (IS_ERR(sb))
+ return ERR_CAST(sb);
+ if (!sb->s_root) {
+ error = kernfs_fill_super(sb);
+ if (error) {
+ deactivate_locked_super(sb);
+ return ERR_PTR(error);
+ }
+ sb->s_flags |= MS_ACTIVE;
+ }
+
+ return dget(sb->s_root);
+}
+
+/**
+ * kernfs_kill_sb - kill_sb for kernfs
+ * @sb: super_block being killed
+ *
+ * This can be used directly for file_system_type->kill_sb(). If a kernfs
+ * user needs extra cleanup, it can implement its own kill_sb() and call
+ * this function at the end.
+ */
+void kernfs_kill_sb(struct super_block *sb)
+{
+ struct kernfs_super_info *info = kernfs_info(sb);
+ struct kernfs_node *root_kn = sb->s_root->d_fsdata;
+
+ /*
+ * Remove the superblock from fs_supers/s_instances
+ * so we can't find it, before freeing kernfs_super_info.
+ */
+ kill_anon_super(sb);
+ kfree(info);
+ kernfs_put(root_kn);
+}
+
+void __init kernfs_init(void)
+{
+ kernfs_node_cache = kmem_cache_create("kernfs_node_cache",
+ sizeof(struct kernfs_node),
+ 0, SLAB_PANIC, NULL);
+ kernfs_inode_init();
+}
diff --git a/fs/kernfs/symlink.c b/fs/kernfs/symlink.c
new file mode 100644
index 0000000..4d45705
--- /dev/null
+++ b/fs/kernfs/symlink.c
@@ -0,0 +1,151 @@
+/*
+ * fs/kernfs/symlink.c - kernfs symlink implementation
+ *
+ * Copyright (c) 2001-3 Patrick Mochel
+ * Copyright (c) 2007 SUSE Linux Products GmbH
+ * Copyright (c) 2007, 2013 Tejun Heo <tj@kernel.org>
+ *
+ * This file is released under the GPLv2.
+ */
+
+#include <linux/fs.h>
+#include <linux/gfp.h>
+#include <linux/namei.h>
+
+#include "kernfs-internal.h"
+
+/**
+ * kernfs_create_link - create a symlink
+ * @parent: directory to create the symlink in
+ * @name: name of the symlink
+ * @target: target node for the symlink to point to
+ *
+ * Returns the created node on success, ERR_PTR() value on error.
+ */
+struct kernfs_node *kernfs_create_link(struct kernfs_node *parent,
+ const char *name,
+ struct kernfs_node *target)
+{
+ struct kernfs_node *kn;
+ struct kernfs_addrm_cxt acxt;
+ int error;
+
+ kn = kernfs_new_node(parent, name, S_IFLNK|S_IRWXUGO, KERNFS_LINK);
+ if (!kn)
+ return ERR_PTR(-ENOMEM);
+
+ if (kernfs_ns_enabled(parent))
+ kn->ns = target->ns;
+ kn->symlink.target_kn = target;
+ kernfs_get(target); /* ref owned by symlink */
+
+ kernfs_addrm_start(&acxt);
+ error = kernfs_add_one(&acxt, kn);
+ kernfs_addrm_finish(&acxt);
+
+ if (!error)
+ return kn;
+
+ kernfs_put(kn);
+ return ERR_PTR(error);
+}
+
+static int kernfs_get_target_path(struct kernfs_node *parent,
+ struct kernfs_node *target, char *path)
+{
+ struct kernfs_node *base, *kn;
+ char *s = path;
+ int len = 0;
+
+ /* go up to the root, stop at the base */
+ base = parent;
+ while (base->parent) {
+ kn = target->parent;
+ while (kn->parent && base != kn)
+ kn = kn->parent;
+
+ if (base == kn)
+ break;
+
+ strcpy(s, "../");
+ s += 3;
+ base = base->parent;
+ }
+
+ /* determine end of target string for reverse fillup */
+ kn = target;
+ while (kn->parent && kn != base) {
+ len += strlen(kn->name) + 1;
+ kn = kn->parent;
+ }
+
+ /* check limits */
+ if (len < 2)
+ return -EINVAL;
+ len--;
+ if ((s - path) + len > PATH_MAX)
+ return -ENAMETOOLONG;
+
+ /* reverse fillup of target string from target to base */
+ kn = target;
+ while (kn->parent && kn != base) {
+ int slen = strlen(kn->name);
+
+ len -= slen;
+ strncpy(s + len, kn->name, slen);
+ if (len)
+ s[--len] = '/';
+
+ kn = kn->parent;
+ }
+
+ return 0;
+}
+
+static int kernfs_getlink(struct dentry *dentry, char *path)
+{
+ struct kernfs_node *kn = dentry->d_fsdata;
+ struct kernfs_node *parent = kn->parent;
+ struct kernfs_node *target = kn->symlink.target_kn;
+ int error;
+
+ mutex_lock(&kernfs_mutex);
+ error = kernfs_get_target_path(parent, target, path);
+ mutex_unlock(&kernfs_mutex);
+
+ return error;
+}
+
+static void *kernfs_iop_follow_link(struct dentry *dentry, struct nameidata *nd)
+{
+ int error = -ENOMEM;
+ unsigned long page = get_zeroed_page(GFP_KERNEL);
+ if (page) {
+ error = kernfs_getlink(dentry, (char *) page);
+ if (error < 0)
+ free_page((unsigned long)page);
+ }
+ nd_set_link(nd, error ? ERR_PTR(error) : (char *)page);
+ return NULL;
+}
+
+static void kernfs_iop_put_link(struct dentry *dentry, struct nameidata *nd,
+ void *cookie)
+{
+ char *page = nd_get_link(nd);
+ if (!IS_ERR(page))
+ free_page((unsigned long)page);
+}
+
+const struct inode_operations kernfs_symlink_iops = {
+ .setxattr = kernfs_iop_setxattr,
+ .removexattr = kernfs_iop_removexattr,
+ .getxattr = kernfs_iop_getxattr,
+ .listxattr = kernfs_iop_listxattr,
+ .readlink = generic_readlink,
+ .follow_link = kernfs_iop_follow_link,
+ .put_link = kernfs_iop_put_link,
+ .setattr = kernfs_iop_setattr,
+ .getattr = kernfs_iop_getattr,
+ .permission = kernfs_iop_permission,
+};
diff --git a/fs/namespace.c b/fs/namespace.c
index ac2ce8a..22e5367 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2790,6 +2790,8 @@ void __init mnt_init(void)
for (u = 0; u < HASH_SIZE; u++)
INIT_LIST_HEAD(&mountpoint_hashtable[u]);
+ kernfs_init();
+
err = sysfs_init();
if (err)
printk(KERN_WARNING "%s: sysfs_init error: %d\n",
@@ -2886,7 +2888,7 @@ bool fs_fully_visible(struct file_system_type *type)
struct inode *inode = child->mnt_mountpoint->d_inode;
if (!S_ISDIR(inode->i_mode))
goto next;
- if (inode->i_nlink != 2)
+ if (inode->i_nlink > 2)
goto next;
}
visible = true;
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index 9f6b486..a1a1916 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -1440,17 +1440,19 @@ static int nilfs_segctor_collect(struct nilfs_sc_info *sci,
nilfs_clear_logs(&sci->sc_segbufs);
- err = nilfs_segctor_extend_segments(sci, nilfs, nadd);
- if (unlikely(err))
- return err;
-
if (sci->sc_stage.flags & NILFS_CF_SUFREED) {
err = nilfs_sufile_cancel_freev(nilfs->ns_sufile,
sci->sc_freesegs,
sci->sc_nfreesegs,
NULL);
WARN_ON(err); /* do not happen */
+ sci->sc_stage.flags &= ~NILFS_CF_SUFREED;
}
+
+ err = nilfs_segctor_extend_segments(sci, nilfs, nadd);
+ if (unlikely(err))
+ return err;
+
nadd = min_t(int, nadd << 1, SC_MAX_SEGDELTA);
sci->sc_stage = prev_stage;
}
diff --git a/fs/sysfs/Makefile b/fs/sysfs/Makefile
index 8876ac1..6eff6e1 100644
--- a/fs/sysfs/Makefile
+++ b/fs/sysfs/Makefile
@@ -2,4 +2,4 @@
# Makefile for the sysfs virtual filesystem
#
-obj-y := inode.o file.o dir.o symlink.o mount.o group.o
+obj-y := file.o dir.o symlink.o mount.o group.o
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 5e73d66..ee0d761 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -13,465 +13,31 @@
#undef DEBUG
#include <linux/fs.h>
-#include <linux/mount.h>
-#include <linux/module.h>
#include <linux/kobject.h>
-#include <linux/namei.h>
-#include <linux/idr.h>
-#include <linux/completion.h>
-#include <linux/mutex.h>
#include <linux/slab.h>
-#include <linux/security.h>
-#include <linux/hash.h>
#include "sysfs.h"
-DEFINE_MUTEX(sysfs_mutex);
DEFINE_SPINLOCK(sysfs_symlink_target_lock);
-#define to_sysfs_dirent(X) rb_entry((X), struct sysfs_dirent, s_rb)
-
-static DEFINE_SPINLOCK(sysfs_ino_lock);
-static DEFINE_IDA(sysfs_ino_ida);
-
-/**
- * sysfs_name_hash
- * @name: Null terminated string to hash
- * @ns: Namespace tag to hash
- *
- * Returns 31 bit hash of ns + name (so it fits in an off_t )
- */
-static unsigned int sysfs_name_hash(const char *name, const void *ns)
-{
- unsigned long hash = init_name_hash();
- unsigned int len = strlen(name);
- while (len--)
- hash = partial_name_hash(*name++, hash);
- hash = (end_name_hash(hash) ^ hash_ptr((void *)ns, 31));
- hash &= 0x7fffffffU;
- /* Reserve hash numbers 0, 1 and INT_MAX for magic directory entries */
- if (hash < 1)
- hash += 2;
- if (hash >= INT_MAX)
- hash = INT_MAX - 1;
- return hash;
-}
-
-static int sysfs_name_compare(unsigned int hash, const char *name,
- const void *ns, const struct sysfs_dirent *sd)
-{
- if (hash != sd->s_hash)
- return hash - sd->s_hash;
- if (ns != sd->s_ns)
- return ns - sd->s_ns;
- return strcmp(name, sd->s_name);
-}
-
-static int sysfs_sd_compare(const struct sysfs_dirent *left,
- const struct sysfs_dirent *right)
-{
- return sysfs_name_compare(left->s_hash, left->s_name, left->s_ns,
- right);
-}
-
-/**
- * sysfs_link_sibling - link sysfs_dirent into sibling rbtree
- * @sd: sysfs_dirent of interest
- *
- * Link @sd into its sibling rbtree which starts from
- * sd->s_parent->s_dir.children.
- *
- * Locking:
- * mutex_lock(sysfs_mutex)
- *
- * RETURNS:
- * 0 on susccess -EEXIST on failure.
- */
-static int sysfs_link_sibling(struct sysfs_dirent *sd)
-{
- struct rb_node **node = &sd->s_parent->s_dir.children.rb_node;
- struct rb_node *parent = NULL;
-
- if (sysfs_type(sd) == SYSFS_DIR)
- sd->s_parent->s_dir.subdirs++;
-
- while (*node) {
- struct sysfs_dirent *pos;
- int result;
-
- pos = to_sysfs_dirent(*node);
- parent = *node;
- result = sysfs_sd_compare(sd, pos);
- if (result < 0)
- node = &pos->s_rb.rb_left;
- else if (result > 0)
- node = &pos->s_rb.rb_right;
- else
- return -EEXIST;
- }
- /* add new node and rebalance the tree */
- rb_link_node(&sd->s_rb, parent, node);
- rb_insert_color(&sd->s_rb, &sd->s_parent->s_dir.children);
- return 0;
-}
-
-/**
- * sysfs_unlink_sibling - unlink sysfs_dirent from sibling rbtree
- * @sd: sysfs_dirent of interest
- *
- * Unlink @sd from its sibling rbtree which starts from
- * sd->s_parent->s_dir.children.
- *
- * Locking:
- * mutex_lock(sysfs_mutex)
- */
-static void sysfs_unlink_sibling(struct sysfs_dirent *sd)
-{
- if (sysfs_type(sd) == SYSFS_DIR)
- sd->s_parent->s_dir.subdirs--;
-
- rb_erase(&sd->s_rb, &sd->s_parent->s_dir.children);
-}
-
-/**
- * sysfs_get_active - get an active reference to sysfs_dirent
- * @sd: sysfs_dirent to get an active reference to
- *
- * Get an active reference of @sd. This function is noop if @sd
- * is NULL.
- *
- * RETURNS:
- * Pointer to @sd on success, NULL on failure.
- */
-struct sysfs_dirent *sysfs_get_active(struct sysfs_dirent *sd)
-{
- if (unlikely(!sd))
- return NULL;
-
- if (!atomic_inc_unless_negative(&sd->s_active))
- return NULL;
-
- if (likely(!sysfs_ignore_lockdep(sd)))
- rwsem_acquire_read(&sd->dep_map, 0, 1, _RET_IP_);
- return sd;
-}
-
-/**
- * sysfs_put_active - put an active reference to sysfs_dirent
- * @sd: sysfs_dirent to put an active reference to
- *
- * Put an active reference to @sd. This function is noop if @sd
- * is NULL.
- */
-void sysfs_put_active(struct sysfs_dirent *sd)
-{
- int v;
-
- if (unlikely(!sd))
- return;
-
- if (likely(!sysfs_ignore_lockdep(sd)))
- rwsem_release(&sd->dep_map, 1, _RET_IP_);
- v = atomic_dec_return(&sd->s_active);
- if (likely(v != SD_DEACTIVATED_BIAS))
- return;
-
- /* atomic_dec_return() is a mb(), we'll always see the updated
- * sd->u.completion.
- */
- complete(sd->u.completion);
-}
-
-/**
- * sysfs_deactivate - deactivate sysfs_dirent
- * @sd: sysfs_dirent to deactivate
- *
- * Deny new active references and drain existing ones.
- */
-static void sysfs_deactivate(struct sysfs_dirent *sd)
-{
- DECLARE_COMPLETION_ONSTACK(wait);
- int v;
-
- BUG_ON(!(sd->s_flags & SYSFS_FLAG_REMOVED));
-
- if (!(sysfs_type(sd) & SYSFS_ACTIVE_REF))
- return;
-
- sd->u.completion = (void *)&wait;
-
- rwsem_acquire(&sd->dep_map, 0, 0, _RET_IP_);
- /* atomic_add_return() is a mb(), put_active() will always see
- * the updated sd->u.completion.
- */
- v = atomic_add_return(SD_DEACTIVATED_BIAS, &sd->s_active);
-
- if (v != SD_DEACTIVATED_BIAS) {
- lock_contended(&sd->dep_map, _RET_IP_);
- wait_for_completion(&wait);
- }
-
- lock_acquired(&sd->dep_map, _RET_IP_);
- rwsem_release(&sd->dep_map, 1, _RET_IP_);
-}
-
-static int sysfs_alloc_ino(unsigned int *pino)
-{
- int ino, rc;
-
- retry:
- spin_lock(&sysfs_ino_lock);
- rc = ida_get_new_above(&sysfs_ino_ida, 2, &ino);
- spin_unlock(&sysfs_ino_lock);
-
- if (rc == -EAGAIN) {
- if (ida_pre_get(&sysfs_ino_ida, GFP_KERNEL))
- goto retry;
- rc = -ENOMEM;
- }
-
- *pino = ino;
- return rc;
-}
-
-static void sysfs_free_ino(unsigned int ino)
-{
- spin_lock(&sysfs_ino_lock);
- ida_remove(&sysfs_ino_ida, ino);
- spin_unlock(&sysfs_ino_lock);
-}
-
-void release_sysfs_dirent(struct sysfs_dirent *sd)
-{
- struct sysfs_dirent *parent_sd;
-
- repeat:
- /* Moving/renaming is always done while holding reference.
- * sd->s_parent won't change beneath us.
- */
- parent_sd = sd->s_parent;
-
- WARN(!(sd->s_flags & SYSFS_FLAG_REMOVED),
- "sysfs: free using entry: %s/%s\n",
- parent_sd ? parent_sd->s_name : "", sd->s_name);
-
- if (sysfs_type(sd) == SYSFS_KOBJ_LINK)
- sysfs_put(sd->s_symlink.target_sd);
- if (sysfs_type(sd) & SYSFS_COPY_NAME)
- kfree(sd->s_name);
- if (sd->s_iattr && sd->s_iattr->ia_secdata)
- security_release_secctx(sd->s_iattr->ia_secdata,
- sd->s_iattr->ia_secdata_len);
- kfree(sd->s_iattr);
- sysfs_free_ino(sd->s_ino);
- kmem_cache_free(sysfs_dir_cachep, sd);
-
- sd = parent_sd;
- if (sd && atomic_dec_and_test(&sd->s_count))
- goto repeat;
-}
-
-static int sysfs_dentry_delete(const struct dentry *dentry)
-{
- struct sysfs_dirent *sd = dentry->d_fsdata;
- return !(sd && !(sd->s_flags & SYSFS_FLAG_REMOVED));
-}
-
-static int sysfs_dentry_revalidate(struct dentry *dentry, unsigned int flags)
-{
- struct sysfs_dirent *sd;
- int type;
-
- if (flags & LOOKUP_RCU)
- return -ECHILD;
-
- sd = dentry->d_fsdata;
- mutex_lock(&sysfs_mutex);
-
- /* The sysfs dirent has been deleted */
- if (sd->s_flags & SYSFS_FLAG_REMOVED)
- goto out_bad;
-
- /* The sysfs dirent has been moved? */
- if (dentry->d_parent->d_fsdata != sd->s_parent)
- goto out_bad;
-
- /* The sysfs dirent has been renamed */
- if (strcmp(dentry->d_name.name, sd->s_name) != 0)
- goto out_bad;
-
- /* The sysfs dirent has been moved to a different namespace */
- type = KOBJ_NS_TYPE_NONE;
- if (sd->s_parent) {
- type = sysfs_ns_type(sd->s_parent);
- if (type != KOBJ_NS_TYPE_NONE &&
- sysfs_info(dentry->d_sb)->ns[type] != sd->s_ns)
- goto out_bad;
- }
-
- mutex_unlock(&sysfs_mutex);
-out_valid:
- return 1;
-out_bad:
- /* Remove the dentry from the dcache hashes.
- * If this is a deleted dentry we use d_drop instead of d_delete
- * so sysfs doesn't need to cope with negative dentries.
- *
- * If this is a dentry that has simply been renamed we
- * use d_drop to remove it from the dcache lookup on its
- * old parent. If this dentry persists later when a lookup
- * is performed at its new name the dentry will be readded
- * to the dcache hashes.
- */
- mutex_unlock(&sysfs_mutex);
-
- /* If we have submounts we must allow the vfs caches
- * to lie about the state of the filesystem to prevent
- * leaks and other nasty things.
- */
- if (check_submounts_and_drop(dentry) != 0)
- goto out_valid;
-
- return 0;
-}
-
-static void sysfs_dentry_release(struct dentry *dentry)
-{
- sysfs_put(dentry->d_fsdata);
-}
-
-const struct dentry_operations sysfs_dentry_ops = {
- .d_revalidate = sysfs_dentry_revalidate,
- .d_delete = sysfs_dentry_delete,
- .d_release = sysfs_dentry_release,
-};
-
-struct sysfs_dirent *sysfs_new_dirent(const char *name, umode_t mode, int type)
-{
- char *dup_name = NULL;
- struct sysfs_dirent *sd;
-
- if (type & SYSFS_COPY_NAME) {
- name = dup_name = kstrdup(name, GFP_KERNEL);
- if (!name)
- return NULL;
- }
-
- sd = kmem_cache_zalloc(sysfs_dir_cachep, GFP_KERNEL);
- if (!sd)
- goto err_out1;
-
- if (sysfs_alloc_ino(&sd->s_ino))
- goto err_out2;
-
- atomic_set(&sd->s_count, 1);
- atomic_set(&sd->s_active, 0);
-
- sd->s_name = name;
- sd->s_mode = mode;
- sd->s_flags = type | SYSFS_FLAG_REMOVED;
-
- return sd;
-
- err_out2:
- kmem_cache_free(sysfs_dir_cachep, sd);
- err_out1:
- kfree(dup_name);
- return NULL;
-}
-
-/**
- * sysfs_addrm_start - prepare for sysfs_dirent add/remove
- * @acxt: pointer to sysfs_addrm_cxt to be used
- *
- * This function is called when the caller is about to add or remove
- * sysfs_dirent. This function acquires sysfs_mutex. @acxt is used
- * to keep and pass context to other addrm functions.
- *
- * LOCKING:
- * Kernel thread context (may sleep). sysfs_mutex is locked on
- * return.
- */
-void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt)
- __acquires(sysfs_mutex)
-{
- memset(acxt, 0, sizeof(*acxt));
-
- mutex_lock(&sysfs_mutex);
-}
-
-/**
- * __sysfs_add_one - add sysfs_dirent to parent without warning
- * @acxt: addrm context to use
- * @sd: sysfs_dirent to be added
- * @parent_sd: the parent sysfs_dirent to add @sd to
- *
- * Get @parent_sd and set @sd->s_parent to it and increment nlink of
- * the parent inode if @sd is a directory and link into the children
- * list of the parent.
- *
- * This function should be called between calls to
- * sysfs_addrm_start() and sysfs_addrm_finish() and should be
- * passed the same @acxt as passed to sysfs_addrm_start().
- *
- * LOCKING:
- * Determined by sysfs_addrm_start().
- *
- * RETURNS:
- * 0 on success, -EEXIST if entry with the given name already
- * exists.
- */
-int __sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd,
- struct sysfs_dirent *parent_sd)
-{
- struct sysfs_inode_attrs *ps_iattr;
- int ret;
-
- if (!!sysfs_ns_type(parent_sd) != !!sd->s_ns) {
- WARN(1, KERN_WARNING "sysfs: ns %s in '%s' for '%s'\n",
- sysfs_ns_type(parent_sd) ? "required" : "invalid",
- parent_sd->s_name, sd->s_name);
- return -EINVAL;
- }
-
- sd->s_hash = sysfs_name_hash(sd->s_name, sd->s_ns);
- sd->s_parent = sysfs_get(parent_sd);
-
- ret = sysfs_link_sibling(sd);
- if (ret)
- return ret;
-
- /* Update timestamps on the parent */
- ps_iattr = parent_sd->s_iattr;
- if (ps_iattr) {
- struct iattr *ps_iattrs = &ps_iattr->ia_iattr;
- ps_iattrs->ia_ctime = ps_iattrs->ia_mtime = CURRENT_TIME;
- }
-
- /* Mark the entry added into directory tree */
- sd->s_flags &= ~SYSFS_FLAG_REMOVED;
-
- return 0;
-}
-
/**
* sysfs_pathname - return full path to sysfs dirent
- * @sd: sysfs_dirent whose path we want
+ * @kn: kernfs_node whose path we want
* @path: caller allocated buffer of size PATH_MAX
*
* Gives the name "/" to the sysfs_root entry; any path returned
* is relative to wherever sysfs is mounted.
*/
-static char *sysfs_pathname(struct sysfs_dirent *sd, char *path)
+static char *sysfs_pathname(struct kernfs_node *kn, char *path)
{
- if (sd->s_parent) {
- sysfs_pathname(sd->s_parent, path);
+ if (kn->parent) {
+ sysfs_pathname(kn->parent, path);
strlcat(path, "/", PATH_MAX);
}
- strlcat(path, sd->s_name, PATH_MAX);
+ strlcat(path, kn->name, PATH_MAX);
return path;
}
-void sysfs_warn_dup(struct sysfs_dirent *parent, const char *name)
+void sysfs_warn_dup(struct kernfs_node *parent, const char *name)
{
char *path;
@@ -489,445 +55,34 @@ void sysfs_warn_dup(struct sysfs_dirent *parent, const char *name)
}
/**
- * sysfs_add_one - add sysfs_dirent to parent
- * @acxt: addrm context to use
- * @sd: sysfs_dirent to be added
- * @parent_sd: the parent sysfs_dirent to add @sd to
- *
- * Get @parent_sd and set @sd->s_parent to it and increment nlink of
- * the parent inode if @sd is a directory and link into the children
- * list of the parent.
- *
- * This function should be called between calls to
- * sysfs_addrm_start() and sysfs_addrm_finish() and should be
- * passed the same @acxt as passed to sysfs_addrm_start().
- *
- * LOCKING:
- * Determined by sysfs_addrm_start().
- *
- * RETURNS:
- * 0 on success, -EEXIST if entry with the given name already
- * exists.
- */
-int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd,
- struct sysfs_dirent *parent_sd)
-{
- int ret;
-
- ret = __sysfs_add_one(acxt, sd, parent_sd);
-
- if (ret == -EEXIST)
- sysfs_warn_dup(parent_sd, sd->s_name);
- return ret;
-}
-
-/**
- * sysfs_remove_one - remove sysfs_dirent from parent
- * @acxt: addrm context to use
- * @sd: sysfs_dirent to be removed
- *
- * Mark @sd removed and drop nlink of parent inode if @sd is a
- * directory. @sd is unlinked from the children list.
- *
- * This function should be called between calls to
- * sysfs_addrm_start() and sysfs_addrm_finish() and should be
- * passed the same @acxt as passed to sysfs_addrm_start().
- *
- * LOCKING:
- * Determined by sysfs_addrm_start().
- */
-static void sysfs_remove_one(struct sysfs_addrm_cxt *acxt,
- struct sysfs_dirent *sd)
-{
- struct sysfs_inode_attrs *ps_iattr;
-
- /*
- * Removal can be called multiple times on the same node. Only the
- * first invocation is effective and puts the base ref.
- */
- if (sd->s_flags & SYSFS_FLAG_REMOVED)
- return;
-
- sysfs_unlink_sibling(sd);
-
- /* Update timestamps on the parent */
- ps_iattr = sd->s_parent->s_iattr;
- if (ps_iattr) {
- struct iattr *ps_iattrs = &ps_iattr->ia_iattr;
- ps_iattrs->ia_ctime = ps_iattrs->ia_mtime = CURRENT_TIME;
- }
-
- sd->s_flags |= SYSFS_FLAG_REMOVED;
- sd->u.removed_list = acxt->removed;
- acxt->removed = sd;
-}
-
-/**
- * sysfs_addrm_finish - finish up sysfs_dirent add/remove
- * @acxt: addrm context to finish up
- *
- * Finish up sysfs_dirent add/remove. Resources acquired by
- * sysfs_addrm_start() are released and removed sysfs_dirents are
- * cleaned up.
- *
- * LOCKING:
- * sysfs_mutex is released.
- */
-void sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt)
- __releases(sysfs_mutex)
-{
- /* release resources acquired by sysfs_addrm_start() */
- mutex_unlock(&sysfs_mutex);
-
- /* kill removed sysfs_dirents */
- while (acxt->removed) {
- struct sysfs_dirent *sd = acxt->removed;
-
- acxt->removed = sd->u.removed_list;
-
- sysfs_deactivate(sd);
- sysfs_unmap_bin_file(sd);
- sysfs_put(sd);
- }
-}
-
-/**
- * sysfs_find_dirent - find sysfs_dirent with the given name
- * @parent_sd: sysfs_dirent to search under
- * @name: name to look for
- * @ns: the namespace tag to use
- *
- * Look for sysfs_dirent with name @name under @parent_sd.
- *
- * LOCKING:
- * mutex_lock(sysfs_mutex)
- *
- * RETURNS:
- * Pointer to sysfs_dirent if found, NULL if not.
- */
-struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd,
- const unsigned char *name,
- const void *ns)
-{
- struct rb_node *node = parent_sd->s_dir.children.rb_node;
- unsigned int hash;
-
- if (!!sysfs_ns_type(parent_sd) != !!ns) {
- WARN(1, KERN_WARNING "sysfs: ns %s in '%s' for '%s'\n",
- sysfs_ns_type(parent_sd) ? "required" : "invalid",
- parent_sd->s_name, name);
- return NULL;
- }
-
- hash = sysfs_name_hash(name, ns);
- while (node) {
- struct sysfs_dirent *sd;
- int result;
-
- sd = to_sysfs_dirent(node);
- result = sysfs_name_compare(hash, name, ns, sd);
- if (result < 0)
- node = node->rb_left;
- else if (result > 0)
- node = node->rb_right;
- else
- return sd;
- }
- return NULL;
-}
-
-/**
- * sysfs_get_dirent_ns - find and get sysfs_dirent with the given name
- * @parent_sd: sysfs_dirent to search under
- * @name: name to look for
- * @ns: the namespace tag to use
- *
- * Look for sysfs_dirent with name @name under @parent_sd and get
- * it if found.
- *
- * LOCKING:
- * Kernel thread context (may sleep). Grabs sysfs_mutex.
- *
- * RETURNS:
- * Pointer to sysfs_dirent if found, NULL if not.
- */
-struct sysfs_dirent *sysfs_get_dirent_ns(struct sysfs_dirent *parent_sd,
- const unsigned char *name,
- const void *ns)
-{
- struct sysfs_dirent *sd;
-
- mutex_lock(&sysfs_mutex);
- sd = sysfs_find_dirent(parent_sd, name, ns);
- sysfs_get(sd);
- mutex_unlock(&sysfs_mutex);
-
- return sd;
-}
-EXPORT_SYMBOL_GPL(sysfs_get_dirent_ns);
-
-static int create_dir(struct kobject *kobj, struct sysfs_dirent *parent_sd,
- enum kobj_ns_type type,
- const char *name, const void *ns,
- struct sysfs_dirent **p_sd)
-{
- umode_t mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO;
- struct sysfs_addrm_cxt acxt;
- struct sysfs_dirent *sd;
- int rc;
-
- /* allocate */
- sd = sysfs_new_dirent(name, mode, SYSFS_DIR);
- if (!sd)
- return -ENOMEM;
-
- sd->s_flags |= (type << SYSFS_NS_TYPE_SHIFT);
- sd->s_ns = ns;
- sd->s_dir.kobj = kobj;
-
- /* link in */
- sysfs_addrm_start(&acxt);
- rc = sysfs_add_one(&acxt, sd, parent_sd);
- sysfs_addrm_finish(&acxt);
-
- if (rc == 0)
- *p_sd = sd;
- else
- sysfs_put(sd);
-
- return rc;
-}
-
-int sysfs_create_subdir(struct kobject *kobj, const char *name,
- struct sysfs_dirent **p_sd)
-{
- return create_dir(kobj, kobj->sd,
- KOBJ_NS_TYPE_NONE, name, NULL, p_sd);
-}
-
-/**
- * sysfs_read_ns_type: return associated ns_type
- * @kobj: the kobject being queried
- *
- * Each kobject can be tagged with exactly one namespace type
- * (i.e. network or user). Return the ns_type associated with
- * this object if any
- */
-static enum kobj_ns_type sysfs_read_ns_type(struct kobject *kobj)
-{
- const struct kobj_ns_type_operations *ops;
- enum kobj_ns_type type;
-
- ops = kobj_child_ns_ops(kobj);
- if (!ops)
- return KOBJ_NS_TYPE_NONE;
-
- type = ops->type;
- BUG_ON(type <= KOBJ_NS_TYPE_NONE);
- BUG_ON(type >= KOBJ_NS_TYPES);
- BUG_ON(!kobj_ns_type_registered(type));
-
- return type;
-}
-
-/**
* sysfs_create_dir_ns - create a directory for an object with a namespace tag
* @kobj: object we're creating directory for
* @ns: the namespace tag to use
*/
int sysfs_create_dir_ns(struct kobject *kobj, const void *ns)
{
- enum kobj_ns_type type;
- struct sysfs_dirent *parent_sd, *sd;
- int error = 0;
+ struct kernfs_node *parent, *kn;
BUG_ON(!kobj);
if (kobj->parent)
- parent_sd = kobj->parent->sd;
+ parent = kobj->parent->sd;
else
- parent_sd = &sysfs_root;
+ parent = sysfs_root_kn;
- if (!parent_sd)
+ if (!parent)
return -ENOENT;
- type = sysfs_read_ns_type(kobj);
-
- error = create_dir(kobj, parent_sd, type, kobject_name(kobj), ns, &sd);
- if (!error)
- kobj->sd = sd;
- return error;
-}
-
-static struct dentry *sysfs_lookup(struct inode *dir, struct dentry *dentry,
- unsigned int flags)
-{
- struct dentry *ret = NULL;
- struct dentry *parent = dentry->d_parent;
- struct sysfs_dirent *parent_sd = parent->d_fsdata;
- struct sysfs_dirent *sd;
- struct inode *inode;
- enum kobj_ns_type type;
- const void *ns;
-
- mutex_lock(&sysfs_mutex);
-
- type = sysfs_ns_type(parent_sd);
- ns = sysfs_info(dir->i_sb)->ns[type];
-
- sd = sysfs_find_dirent(parent_sd, dentry->d_name.name, ns);
-
- /* no such entry */
- if (!sd) {
- ret = ERR_PTR(-ENOENT);
- goto out_unlock;
- }
- dentry->d_fsdata = sysfs_get(sd);
-
- /* attach dentry and inode */
- inode = sysfs_get_inode(dir->i_sb, sd);
- if (!inode) {
- ret = ERR_PTR(-ENOMEM);
- goto out_unlock;
- }
-
- /* instantiate and hash dentry */
- ret = d_materialise_unique(dentry, inode);
- out_unlock:
- mutex_unlock(&sysfs_mutex);
- return ret;
-}
-
-const struct inode_operations sysfs_dir_inode_operations = {
- .lookup = sysfs_lookup,
- .permission = sysfs_permission,
- .setattr = sysfs_setattr,
- .getattr = sysfs_getattr,
- .setxattr = sysfs_setxattr,
-};
-
-static struct sysfs_dirent *sysfs_leftmost_descendant(struct sysfs_dirent *pos)
-{
- struct sysfs_dirent *last;
-
- while (true) {
- struct rb_node *rbn;
-
- last = pos;
-
- if (sysfs_type(pos) != SYSFS_DIR)
- break;
-
- rbn = rb_first(&pos->s_dir.children);
- if (!rbn)
- break;
-
- pos = to_sysfs_dirent(rbn);
- }
-
- return last;
-}
-
-/**
- * sysfs_next_descendant_post - find the next descendant for post-order walk
- * @pos: the current position (%NULL to initiate traversal)
- * @root: sysfs_dirent whose descendants to walk
- *
- * Find the next descendant to visit for post-order traversal of @root's
- * descendants. @root is included in the iteration and the last node to be
- * visited.
- */
-static struct sysfs_dirent *sysfs_next_descendant_post(struct sysfs_dirent *pos,
- struct sysfs_dirent *root)
-{
- struct rb_node *rbn;
-
- lockdep_assert_held(&sysfs_mutex);
-
- /* if first iteration, visit leftmost descendant which may be root */
- if (!pos)
- return sysfs_leftmost_descendant(root);
-
- /* if we visited @root, we're done */
- if (pos == root)
- return NULL;
-
- /* if there's an unvisited sibling, visit its leftmost descendant */
- rbn = rb_next(&pos->s_rb);
- if (rbn)
- return sysfs_leftmost_descendant(to_sysfs_dirent(rbn));
-
- /* no sibling left, visit parent */
- return pos->s_parent;
-}
-
-static void __sysfs_remove(struct sysfs_addrm_cxt *acxt,
- struct sysfs_dirent *sd)
-{
- struct sysfs_dirent *pos, *next;
-
- if (!sd)
- return;
-
- pr_debug("sysfs %s: removing\n", sd->s_name);
-
- next = NULL;
- do {
- pos = next;
- next = sysfs_next_descendant_post(pos, sd);
- if (pos)
- sysfs_remove_one(acxt, pos);
- } while (next);
-}
-
-/**
- * sysfs_remove - remove a sysfs_dirent recursively
- * @sd: the sysfs_dirent to remove
- *
- * Remove @sd along with all its subdirectories and files.
- */
-void sysfs_remove(struct sysfs_dirent *sd)
-{
- struct sysfs_addrm_cxt acxt;
-
- sysfs_addrm_start(&acxt);
- __sysfs_remove(&acxt, sd);
- sysfs_addrm_finish(&acxt);
-}
-
-/**
- * sysfs_hash_and_remove - find a sysfs_dirent by name and remove it
- * @dir_sd: parent of the target
- * @name: name of the sysfs_dirent to remove
- * @ns: namespace tag of the sysfs_dirent to remove
- *
- * Look for the sysfs_dirent with @name and @ns under @dir_sd and remove
- * it. Returns 0 on success, -ENOENT if such entry doesn't exist.
- */
-int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name,
- const void *ns)
-{
- struct sysfs_addrm_cxt acxt;
- struct sysfs_dirent *sd;
-
- if (!dir_sd) {
- WARN(1, KERN_WARNING "sysfs: can not remove '%s', no directory\n",
- name);
- return -ENOENT;
+ kn = kernfs_create_dir_ns(parent, kobject_name(kobj),
+ S_IRWXU | S_IRUGO | S_IXUGO, kobj, ns);
+ if (IS_ERR(kn)) {
+ if (PTR_ERR(kn) == -EEXIST)
+ sysfs_warn_dup(parent, kobject_name(kobj));
+ return PTR_ERR(kn);
}
- sysfs_addrm_start(&acxt);
-
- sd = sysfs_find_dirent(dir_sd, name, ns);
- if (sd)
- __sysfs_remove(&acxt, sd);
-
- sysfs_addrm_finish(&acxt);
-
- if (sd)
- return 0;
- else
- return -ENOENT;
+ kobj->sd = kn;
+ return 0;
}
/**
@@ -940,207 +95,47 @@ int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name,
*/
void sysfs_remove_dir(struct kobject *kobj)
{
- struct sysfs_dirent *sd = kobj->sd;
+ struct kernfs_node *kn = kobj->sd;
/*
* In general, kboject owner is responsible for ensuring removal
* doesn't race with other operations and sysfs doesn't provide any
* protection; however, when @kobj is used as a symlink target, the
* symlinking entity usually doesn't own @kobj and thus has no
- * control over removal. @kobj->sd may be removed anytime and
- * symlink code may end up dereferencing an already freed sd.
+ * control over removal. @kobj->sd may be removed anytime
+ * and symlink code may end up dereferencing an already freed node.
*
- * sysfs_symlink_target_lock synchronizes @kobj->sd disassociation
- * against symlink operations so that symlink code can safely
- * dereference @kobj->sd.
+ * sysfs_symlink_target_lock synchronizes @kobj->sd
+ * disassociation against symlink operations so that symlink code
+ * can safely dereference @kobj->sd.
*/
spin_lock(&sysfs_symlink_target_lock);
kobj->sd = NULL;
spin_unlock(&sysfs_symlink_target_lock);
- if (sd) {
- WARN_ON_ONCE(sysfs_type(sd) != SYSFS_DIR);
- sysfs_remove(sd);
+ if (kn) {
+ WARN_ON_ONCE(kernfs_type(kn) != KERNFS_DIR);
+ kernfs_remove(kn);
}
}
-int sysfs_rename(struct sysfs_dirent *sd, struct sysfs_dirent *new_parent_sd,
- const char *new_name, const void *new_ns)
-{
- int error;
-
- mutex_lock(&sysfs_mutex);
-
- error = 0;
- if ((sd->s_parent == new_parent_sd) && (sd->s_ns == new_ns) &&
- (strcmp(sd->s_name, new_name) == 0))
- goto out; /* nothing to rename */
-
- error = -EEXIST;
- if (sysfs_find_dirent(new_parent_sd, new_name, new_ns))
- goto out;
-
- /* rename sysfs_dirent */
- if (strcmp(sd->s_name, new_name) != 0) {
- error = -ENOMEM;
- new_name = kstrdup(new_name, GFP_KERNEL);
- if (!new_name)
- goto out;
-
- kfree(sd->s_name);
- sd->s_name = new_name;
- }
-
- /*
- * Move to the appropriate place in the appropriate directories rbtree.
- */
- sysfs_unlink_sibling(sd);
- sysfs_get(new_parent_sd);
- sysfs_put(sd->s_parent);
- sd->s_ns = new_ns;
- sd->s_hash = sysfs_name_hash(sd->s_name, sd->s_ns);
- sd->s_parent = new_parent_sd;
- sysfs_link_sibling(sd);
-
- error = 0;
- out:
- mutex_unlock(&sysfs_mutex);
- return error;
-}
-
int sysfs_rename_dir_ns(struct kobject *kobj, const char *new_name,
const void *new_ns)
{
- struct sysfs_dirent *parent_sd = kobj->sd->s_parent;
+ struct kernfs_node *parent = kobj->sd->parent;
- return sysfs_rename(kobj->sd, parent_sd, new_name, new_ns);
+ return kernfs_rename_ns(kobj->sd, parent, new_name, new_ns);
}
int sysfs_move_dir_ns(struct kobject *kobj, struct kobject *new_parent_kobj,
const void *new_ns)
{
- struct sysfs_dirent *sd = kobj->sd;
- struct sysfs_dirent *new_parent_sd;
+ struct kernfs_node *kn = kobj->sd;
+ struct kernfs_node *new_parent;
- BUG_ON(!sd->s_parent);
- new_parent_sd = new_parent_kobj && new_parent_kobj->sd ?
- new_parent_kobj->sd : &sysfs_root;
+ BUG_ON(!kn->parent);
+ new_parent = new_parent_kobj && new_parent_kobj->sd ?
+ new_parent_kobj->sd : sysfs_root_kn;
- return sysfs_rename(sd, new_parent_sd, sd->s_name, new_ns);
+ return kernfs_rename_ns(kn, new_parent, kn->name, new_ns);
}
-
-/* Relationship between s_mode and the DT_xxx types */
-static inline unsigned char dt_type(struct sysfs_dirent *sd)
-{
- return (sd->s_mode >> 12) & 15;
-}
-
-static int sysfs_dir_release(struct inode *inode, struct file *filp)
-{
- sysfs_put(filp->private_data);
- return 0;
-}
-
-static struct sysfs_dirent *sysfs_dir_pos(const void *ns,
- struct sysfs_dirent *parent_sd, loff_t hash, struct sysfs_dirent *pos)
-{
- if (pos) {
- int valid = !(pos->s_flags & SYSFS_FLAG_REMOVED) &&
- pos->s_parent == parent_sd &&
- hash == pos->s_hash;
- sysfs_put(pos);
- if (!valid)
- pos = NULL;
- }
- if (!pos && (hash > 1) && (hash < INT_MAX)) {
- struct rb_node *node = parent_sd->s_dir.children.rb_node;
- while (node) {
- pos = to_sysfs_dirent(node);
-
- if (hash < pos->s_hash)
- node = node->rb_left;
- else if (hash > pos->s_hash)
- node = node->rb_right;
- else
- break;
- }
- }
- /* Skip over entries in the wrong namespace */
- while (pos && pos->s_ns != ns) {
- struct rb_node *node = rb_next(&pos->s_rb);
- if (!node)
- pos = NULL;
- else
- pos = to_sysfs_dirent(node);
- }
- return pos;
-}
-
-static struct sysfs_dirent *sysfs_dir_next_pos(const void *ns,
- struct sysfs_dirent *parent_sd, ino_t ino, struct sysfs_dirent *pos)
-{
- pos = sysfs_dir_pos(ns, parent_sd, ino, pos);
- if (pos)
- do {
- struct rb_node *node = rb_next(&pos->s_rb);
- if (!node)
- pos = NULL;
- else
- pos = to_sysfs_dirent(node);
- } while (pos && pos->s_ns != ns);
- return pos;
-}
-
-static int sysfs_readdir(struct file *file, struct dir_context *ctx)
-{
- struct dentry *dentry = file->f_path.dentry;
- struct sysfs_dirent *parent_sd = dentry->d_fsdata;
- struct sysfs_dirent *pos = file->private_data;
- enum kobj_ns_type type;
- const void *ns;
-
- type = sysfs_ns_type(parent_sd);
- ns = sysfs_info(dentry->d_sb)->ns[type];
-
- if (!dir_emit_dots(file, ctx))
- return 0;
- mutex_lock(&sysfs_mutex);
- for (pos = sysfs_dir_pos(ns, parent_sd, ctx->pos, pos);
- pos;
- pos = sysfs_dir_next_pos(ns, parent_sd, ctx->pos, pos)) {
- const char *name = pos->s_name;
- unsigned int type = dt_type(pos);
- int len = strlen(name);
- ino_t ino = pos->s_ino;
- ctx->pos = pos->s_hash;
- file->private_data = sysfs_get(pos);
-
- mutex_unlock(&sysfs_mutex);
- if (!dir_emit(ctx, name, len, ino, type))
- return 0;
- mutex_lock(&sysfs_mutex);
- }
- mutex_unlock(&sysfs_mutex);
- file->private_data = NULL;
- ctx->pos = INT_MAX;
- return 0;
-}
-
-static loff_t sysfs_dir_llseek(struct file *file, loff_t offset, int whence)
-{
- struct inode *inode = file_inode(file);
- loff_t ret;
-
- mutex_lock(&inode->i_mutex);
- ret = generic_file_llseek(file, offset, whence);
- mutex_unlock(&inode->i_mutex);
-
- return ret;
-}
-
-const struct file_operations sysfs_dir_operations = {
- .read = generic_read_dir,
- .iterate = sysfs_readdir,
- .release = sysfs_dir_release,
- .llseek = sysfs_dir_llseek,
-};
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index 35e7d08..810cf6e 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -14,70 +14,23 @@
#include <linux/kobject.h>
#include <linux/kallsyms.h>
#include <linux/slab.h>
-#include <linux/fsnotify.h>
-#include <linux/namei.h>
-#include <linux/poll.h>
#include <linux/list.h>
#include <linux/mutex.h>
-#include <linux/limits.h>
-#include <linux/uaccess.h>
#include <linux/seq_file.h>
-#include <linux/mm.h>
#include "sysfs.h"
+#include "../kernfs/kernfs-internal.h"
/*
- * There's one sysfs_open_file for each open file and one sysfs_open_dirent
- * for each sysfs_dirent with one or more open files.
- *
- * sysfs_dirent->s_attr.open points to sysfs_open_dirent. s_attr.open is
- * protected by sysfs_open_dirent_lock.
- *
- * filp->private_data points to seq_file whose ->private points to
- * sysfs_open_file. sysfs_open_files are chained at
- * sysfs_open_dirent->files, which is protected by sysfs_open_file_mutex.
- */
-static DEFINE_SPINLOCK(sysfs_open_dirent_lock);
-static DEFINE_MUTEX(sysfs_open_file_mutex);
-
-struct sysfs_open_dirent {
- atomic_t refcnt;
- atomic_t event;
- wait_queue_head_t poll;
- struct list_head files; /* goes through sysfs_open_file.list */
-};
-
-struct sysfs_open_file {
- struct sysfs_dirent *sd;
- struct file *file;
- struct mutex mutex;
- int event;
- struct list_head list;
-
- bool mmapped;
- const struct vm_operations_struct *vm_ops;
-};
-
-static bool sysfs_is_bin(struct sysfs_dirent *sd)
-{
- return sysfs_type(sd) == SYSFS_KOBJ_BIN_ATTR;
-}
-
-static struct sysfs_open_file *sysfs_of(struct file *file)
-{
- return ((struct seq_file *)file->private_data)->private;
-}
-
-/*
- * Determine ktype->sysfs_ops for the given sysfs_dirent. This function
+ * Determine ktype->sysfs_ops for the given kernfs_node. This function
* must be called while holding an active reference.
*/
-static const struct sysfs_ops *sysfs_file_ops(struct sysfs_dirent *sd)
+static const struct sysfs_ops *sysfs_file_ops(struct kernfs_node *kn)
{
- struct kobject *kobj = sd->s_parent->s_dir.kobj;
+ struct kobject *kobj = kn->parent->priv;
- if (!sysfs_ignore_lockdep(sd))
- lockdep_assert_held(sd);
+ if (kn->flags & KERNFS_LOCKDEP)
+ lockdep_assert_held(kn);
return kobj->ktype ? kobj->ktype->sysfs_ops : NULL;
}
@@ -86,13 +39,13 @@ static const struct sysfs_ops *sysfs_file_ops(struct sysfs_dirent *sd)
* details like buffering and seeking. The following function pipes
* sysfs_ops->show() result through seq_file.
*/
-static int sysfs_seq_show(struct seq_file *sf, void *v)
+static int sysfs_kf_seq_show(struct seq_file *sf, void *v)
{
- struct sysfs_open_file *of = sf->private;
- struct kobject *kobj = of->sd->s_parent->s_dir.kobj;
- const struct sysfs_ops *ops;
- char *buf;
+ struct kernfs_open_file *of = sf->private;
+ struct kobject *kobj = of->kn->parent->priv;
+ const struct sysfs_ops *ops = sysfs_file_ops(of->kn);
ssize_t count;
+ char *buf;
/* acquire buffer and ensure that it's >= PAGE_SIZE */
count = seq_get_buf(sf, &buf);
@@ -102,34 +55,15 @@ static int sysfs_seq_show(struct seq_file *sf, void *v)
}
/*
- * Need @of->sd for attr and ops, its parent for kobj. @of->mutex
- * nests outside active ref and is just to ensure that the ops
- * aren't called concurrently for the same open file.
+ * Invoke show(). Control may reach here via seq file lseek even
+ * if @ops->show() isn't implemented.
*/
- mutex_lock(&of->mutex);
- if (!sysfs_get_active(of->sd)) {
- mutex_unlock(&of->mutex);
- return -ENODEV;
+ if (ops->show) {
+ count = ops->show(kobj, of->kn->priv, buf);
+ if (count < 0)
+ return count;
}
- of->event = atomic_read(&of->sd->s_attr.open->event);
-
- /*
- * Lookup @ops and invoke show(). Control may reach here via seq
- * file lseek even if @ops->show() isn't implemented.
- */
- ops = sysfs_file_ops(of->sd);
- if (ops->show)
- count = ops->show(kobj, of->sd->s_attr.attr, buf);
- else
- count = 0;
-
- sysfs_put_active(of->sd);
- mutex_unlock(&of->mutex);
-
- if (count < 0)
- return count;
-
/*
* The code works fine with PAGE_SIZE return but it's likely to
* indicate truncated result or overflow in normal use cases.
@@ -144,726 +78,194 @@ static int sysfs_seq_show(struct seq_file *sf, void *v)
return 0;
}
-/*
- * Read method for bin files. As reading a bin file can have side-effects,
- * the exact offset and bytes specified in read(2) call should be passed to
- * the read callback making it difficult to use seq_file. Implement
- * simplistic custom buffering for bin files.
- */
-static ssize_t sysfs_bin_read(struct file *file, char __user *userbuf,
- size_t bytes, loff_t *off)
+static ssize_t sysfs_kf_bin_read(struct kernfs_open_file *of, char *buf,
+ size_t count, loff_t pos)
{
- struct sysfs_open_file *of = sysfs_of(file);
- struct bin_attribute *battr = of->sd->s_attr.bin_attr;
- struct kobject *kobj = of->sd->s_parent->s_dir.kobj;
- loff_t size = file_inode(file)->i_size;
- int count = min_t(size_t, bytes, PAGE_SIZE);
- loff_t offs = *off;
- char *buf;
+ struct bin_attribute *battr = of->kn->priv;
+ struct kobject *kobj = of->kn->parent->priv;
+ loff_t size = file_inode(of->file)->i_size;
- if (!bytes)
+ if (!count)
return 0;
if (size) {
- if (offs > size)
+ if (pos > size)
return 0;
- if (offs + count > size)
- count = size - offs;
- }
-
- buf = kmalloc(count, GFP_KERNEL);
- if (!buf)
- return -ENOMEM;
-
- /* need of->sd for battr, its parent for kobj */
- mutex_lock(&of->mutex);
- if (!sysfs_get_active(of->sd)) {
- count = -ENODEV;
- mutex_unlock(&of->mutex);
- goto out_free;
- }
-
- if (battr->read)
- count = battr->read(file, kobj, battr, buf, offs, count);
- else
- count = -EIO;
-
- sysfs_put_active(of->sd);
- mutex_unlock(&of->mutex);
-
- if (count < 0)
- goto out_free;
-
- if (copy_to_user(userbuf, buf, count)) {
- count = -EFAULT;
- goto out_free;
+ if (pos + count > size)
+ count = size - pos;
}
- pr_debug("offs = %lld, *off = %lld, count = %d\n", offs, *off, count);
-
- *off = offs + count;
+ if (!battr->read)
+ return -EIO;
- out_free:
- kfree(buf);
- return count;
+ return battr->read(of->file, kobj, battr, buf, pos, count);
}
-/**
- * flush_write_buffer - push buffer to kobject
- * @of: open file
- * @buf: data buffer for file
- * @off: file offset to write to
- * @count: number of bytes
- *
- * Get the correct pointers for the kobject and the attribute we're dealing
- * with, then call the store() method for it with @buf.
- */
-static int flush_write_buffer(struct sysfs_open_file *of, char *buf, loff_t off,
- size_t count)
+/* kernfs write callback for regular sysfs files */
+static ssize_t sysfs_kf_write(struct kernfs_open_file *of, char *buf,
+ size_t count, loff_t pos)
{
- struct kobject *kobj = of->sd->s_parent->s_dir.kobj;
- int rc = 0;
-
- /*
- * Need @of->sd for attr and ops, its parent for kobj. @of->mutex
- * nests outside active ref and is just to ensure that the ops
- * aren't called concurrently for the same open file.
- */
- mutex_lock(&of->mutex);
- if (!sysfs_get_active(of->sd)) {
- mutex_unlock(&of->mutex);
- return -ENODEV;
- }
+ const struct sysfs_ops *ops = sysfs_file_ops(of->kn);
+ struct kobject *kobj = of->kn->parent->priv;
- if (sysfs_is_bin(of->sd)) {
- struct bin_attribute *battr = of->sd->s_attr.bin_attr;
-
- rc = -EIO;
- if (battr->write)
- rc = battr->write(of->file, kobj, battr, buf, off,
- count);
- } else {
- const struct sysfs_ops *ops = sysfs_file_ops(of->sd);
-
- rc = ops->store(kobj, of->sd->s_attr.attr, buf, count);
- }
-
- sysfs_put_active(of->sd);
- mutex_unlock(&of->mutex);
+ if (!count)
+ return 0;
- return rc;
+ return ops->store(kobj, of->kn->priv, buf, count);
}
-/**
- * sysfs_write_file - write an attribute
- * @file: file pointer
- * @user_buf: data to write
- * @count: number of bytes
- * @ppos: starting offset
- *
- * Copy data in from userland and pass it to the matching
- * sysfs_ops->store() by invoking flush_write_buffer().
- *
- * There is no easy way for us to know if userspace is only doing a partial
- * write, so we don't support them. We expect the entire buffer to come on
- * the first write. Hint: if you're writing a value, first read the file,
- * modify only the the value you're changing, then write entire buffer
- * back.
- */
-static ssize_t sysfs_write_file(struct file *file, const char __user *user_buf,
- size_t count, loff_t *ppos)
+/* kernfs write callback for bin sysfs files */
+static ssize_t sysfs_kf_bin_write(struct kernfs_open_file *of, char *buf,
+ size_t count, loff_t pos)
{
- struct sysfs_open_file *of = sysfs_of(file);
- ssize_t len = min_t(size_t, count, PAGE_SIZE);
- loff_t size = file_inode(file)->i_size;
- char *buf;
+ struct bin_attribute *battr = of->kn->priv;
+ struct kobject *kobj = of->kn->parent->priv;
+ loff_t size = file_inode(of->file)->i_size;
- if (sysfs_is_bin(of->sd) && size) {
- if (size <= *ppos)
+ if (size) {
+ if (size <= pos)
return 0;
- len = min_t(ssize_t, len, size - *ppos);
+ count = min_t(ssize_t, count, size - pos);
}
-
- if (!len)
+ if (!count)
return 0;
- buf = kmalloc(len + 1, GFP_KERNEL);
- if (!buf)
- return -ENOMEM;
+ if (!battr->write)
+ return -EIO;
- if (copy_from_user(buf, user_buf, len)) {
- len = -EFAULT;
- goto out_free;
- }
- buf[len] = '\0'; /* guarantee string termination */
-
- len = flush_write_buffer(of, buf, *ppos, len);
- if (len > 0)
- *ppos += len;
-out_free:
- kfree(buf);
- return len;
-}
-
-static void sysfs_bin_vma_open(struct vm_area_struct *vma)
-{
- struct file *file = vma->vm_file;
- struct sysfs_open_file *of = sysfs_of(file);
-
- if (!of->vm_ops)
- return;
-
- if (!sysfs_get_active(of->sd))
- return;
-
- if (of->vm_ops->open)
- of->vm_ops->open(vma);
-
- sysfs_put_active(of->sd);
+ return battr->write(of->file, kobj, battr, buf, pos, count);
}
-static int sysfs_bin_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int sysfs_kf_bin_mmap(struct kernfs_open_file *of,
+ struct vm_area_struct *vma)
{
- struct file *file = vma->vm_file;
- struct sysfs_open_file *of = sysfs_of(file);
- int ret;
+ struct bin_attribute *battr = of->kn->priv;
+ struct kobject *kobj = of->kn->parent->priv;
- if (!of->vm_ops)
- return VM_FAULT_SIGBUS;
-
- if (!sysfs_get_active(of->sd))
- return VM_FAULT_SIGBUS;
-
- ret = VM_FAULT_SIGBUS;
- if (of->vm_ops->fault)
- ret = of->vm_ops->fault(vma, vmf);
-
- sysfs_put_active(of->sd);
- return ret;
+ return battr->mmap(of->file, kobj, battr, vma);
}
-static int sysfs_bin_page_mkwrite(struct vm_area_struct *vma,
- struct vm_fault *vmf)
+void sysfs_notify(struct kobject *kobj, const char *dir, const char *attr)
{
- struct file *file = vma->vm_file;
- struct sysfs_open_file *of = sysfs_of(file);
- int ret;
-
- if (!of->vm_ops)
- return VM_FAULT_SIGBUS;
+ struct kernfs_node *kn = kobj->sd, *tmp;
- if (!sysfs_get_active(of->sd))
- return VM_FAULT_SIGBUS;
-
- ret = 0;
- if (of->vm_ops->page_mkwrite)
- ret = of->vm_ops->page_mkwrite(vma, vmf);
+ if (kn && dir)
+ kn = kernfs_find_and_get(kn, dir);
else
- file_update_time(file);
-
- sysfs_put_active(of->sd);
- return ret;
-}
-
-static int sysfs_bin_access(struct vm_area_struct *vma, unsigned long addr,
- void *buf, int len, int write)
-{
- struct file *file = vma->vm_file;
- struct sysfs_open_file *of = sysfs_of(file);
- int ret;
-
- if (!of->vm_ops)
- return -EINVAL;
-
- if (!sysfs_get_active(of->sd))
- return -EINVAL;
-
- ret = -EINVAL;
- if (of->vm_ops->access)
- ret = of->vm_ops->access(vma, addr, buf, len, write);
-
- sysfs_put_active(of->sd);
- return ret;
-}
-
-#ifdef CONFIG_NUMA
-static int sysfs_bin_set_policy(struct vm_area_struct *vma,
- struct mempolicy *new)
-{
- struct file *file = vma->vm_file;
- struct sysfs_open_file *of = sysfs_of(file);
- int ret;
-
- if (!of->vm_ops)
- return 0;
-
- if (!sysfs_get_active(of->sd))
- return -EINVAL;
-
- ret = 0;
- if (of->vm_ops->set_policy)
- ret = of->vm_ops->set_policy(vma, new);
-
- sysfs_put_active(of->sd);
- return ret;
-}
-
-static struct mempolicy *sysfs_bin_get_policy(struct vm_area_struct *vma,
- unsigned long addr)
-{
- struct file *file = vma->vm_file;
- struct sysfs_open_file *of = sysfs_of(file);
- struct mempolicy *pol;
-
- if (!of->vm_ops)
- return vma->vm_policy;
-
- if (!sysfs_get_active(of->sd))
- return vma->vm_policy;
-
- pol = vma->vm_policy;
- if (of->vm_ops->get_policy)
- pol = of->vm_ops->get_policy(vma, addr);
-
- sysfs_put_active(of->sd);
- return pol;
-}
-
-static int sysfs_bin_migrate(struct vm_area_struct *vma, const nodemask_t *from,
- const nodemask_t *to, unsigned long flags)
-{
- struct file *file = vma->vm_file;
- struct sysfs_open_file *of = sysfs_of(file);
- int ret;
-
- if (!of->vm_ops)
- return 0;
-
- if (!sysfs_get_active(of->sd))
- return 0;
-
- ret = 0;
- if (of->vm_ops->migrate)
- ret = of->vm_ops->migrate(vma, from, to, flags);
-
- sysfs_put_active(of->sd);
- return ret;
-}
-#endif
-
-static const struct vm_operations_struct sysfs_bin_vm_ops = {
- .open = sysfs_bin_vma_open,
- .fault = sysfs_bin_fault,
- .page_mkwrite = sysfs_bin_page_mkwrite,
- .access = sysfs_bin_access,
-#ifdef CONFIG_NUMA
- .set_policy = sysfs_bin_set_policy,
- .get_policy = sysfs_bin_get_policy,
- .migrate = sysfs_bin_migrate,
-#endif
-};
-
-static int sysfs_bin_mmap(struct file *file, struct vm_area_struct *vma)
-{
- struct sysfs_open_file *of = sysfs_of(file);
- struct bin_attribute *battr = of->sd->s_attr.bin_attr;
- struct kobject *kobj = of->sd->s_parent->s_dir.kobj;
- int rc;
-
- mutex_lock(&of->mutex);
-
- /* need of->sd for battr, its parent for kobj */
- rc = -ENODEV;
- if (!sysfs_get_active(of->sd))
- goto out_unlock;
-
- if (!battr->mmap)
- goto out_put;
-
- rc = battr->mmap(file, kobj, battr, vma);
- if (rc)
- goto out_put;
-
- /*
- * PowerPC's pci_mmap of legacy_mem uses shmem_zero_setup()
- * to satisfy versions of X which crash if the mmap fails: that
- * substitutes a new vm_file, and we don't then want bin_vm_ops.
- */
- if (vma->vm_file != file)
- goto out_put;
-
- rc = -EINVAL;
- if (of->mmapped && of->vm_ops != vma->vm_ops)
- goto out_put;
+ kernfs_get(kn);
- /*
- * It is not possible to successfully wrap close.
- * So error if someone is trying to use close.
- */
- rc = -EINVAL;
- if (vma->vm_ops && vma->vm_ops->close)
- goto out_put;
-
- rc = 0;
- of->mmapped = 1;
- of->vm_ops = vma->vm_ops;
- vma->vm_ops = &sysfs_bin_vm_ops;
-out_put:
- sysfs_put_active(of->sd);
-out_unlock:
- mutex_unlock(&of->mutex);
-
- return rc;
-}
-
-/**
- * sysfs_get_open_dirent - get or create sysfs_open_dirent
- * @sd: target sysfs_dirent
- * @of: sysfs_open_file for this instance of open
- *
- * If @sd->s_attr.open exists, increment its reference count;
- * otherwise, create one. @of is chained to the files list.
- *
- * LOCKING:
- * Kernel thread context (may sleep).
- *
- * RETURNS:
- * 0 on success, -errno on failure.
- */
-static int sysfs_get_open_dirent(struct sysfs_dirent *sd,
- struct sysfs_open_file *of)
-{
- struct sysfs_open_dirent *od, *new_od = NULL;
-
- retry:
- mutex_lock(&sysfs_open_file_mutex);
- spin_lock_irq(&sysfs_open_dirent_lock);
-
- if (!sd->s_attr.open && new_od) {
- sd->s_attr.open = new_od;
- new_od = NULL;
+ if (kn && attr) {
+ tmp = kernfs_find_and_get(kn, attr);
+ kernfs_put(kn);
+ kn = tmp;
}
- od = sd->s_attr.open;
- if (od) {
- atomic_inc(&od->refcnt);
- list_add_tail(&of->list, &od->files);
- }
-
- spin_unlock_irq(&sysfs_open_dirent_lock);
- mutex_unlock(&sysfs_open_file_mutex);
-
- if (od) {
- kfree(new_od);
- return 0;
+ if (kn) {
+ kernfs_notify(kn);
+ kernfs_put(kn);
}
+}
+EXPORT_SYMBOL_GPL(sysfs_notify);
- /* not there, initialize a new one and retry */
- new_od = kmalloc(sizeof(*new_od), GFP_KERNEL);
- if (!new_od)
- return -ENOMEM;
+static const struct kernfs_ops sysfs_file_kfops_empty = {
+};
- atomic_set(&new_od->refcnt, 0);
- atomic_set(&new_od->event, 1);
- init_waitqueue_head(&new_od->poll);
- INIT_LIST_HEAD(&new_od->files);
- goto retry;
-}
+static const struct kernfs_ops sysfs_file_kfops_ro = {
+ .seq_show = sysfs_kf_seq_show,
+};
-/**
- * sysfs_put_open_dirent - put sysfs_open_dirent
- * @sd: target sysfs_dirent
- * @of: associated sysfs_open_file
- *
- * Put @sd->s_attr.open and unlink @of from the files list. If
- * reference count reaches zero, disassociate and free it.
- *
- * LOCKING:
- * None.
- */
-static void sysfs_put_open_dirent(struct sysfs_dirent *sd,
- struct sysfs_open_file *of)
-{
- struct sysfs_open_dirent *od = sd->s_attr.open;
- unsigned long flags;
+static const struct kernfs_ops sysfs_file_kfops_wo = {
+ .write = sysfs_kf_write,
+};
- mutex_lock(&sysfs_open_file_mutex);
- spin_lock_irqsave(&sysfs_open_dirent_lock, flags);
+static const struct kernfs_ops sysfs_file_kfops_rw = {
+ .seq_show = sysfs_kf_seq_show,
+ .write = sysfs_kf_write,
+};
- if (of)
- list_del(&of->list);
+static const struct kernfs_ops sysfs_bin_kfops_ro = {
+ .read = sysfs_kf_bin_read,
+};
- if (atomic_dec_and_test(&od->refcnt))
- sd->s_attr.open = NULL;
- else
- od = NULL;
+static const struct kernfs_ops sysfs_bin_kfops_wo = {
+ .write = sysfs_kf_bin_write,
+};
- spin_unlock_irqrestore(&sysfs_open_dirent_lock, flags);
- mutex_unlock(&sysfs_open_file_mutex);
+static const struct kernfs_ops sysfs_bin_kfops_rw = {
+ .read = sysfs_kf_bin_read,
+ .write = sysfs_kf_bin_write,
+};
- kfree(od);
-}
+static const struct kernfs_ops sysfs_bin_kfops_mmap = {
+ .read = sysfs_kf_bin_read,
+ .write = sysfs_kf_bin_write,
+ .mmap = sysfs_kf_bin_mmap,
+};
-static int sysfs_open_file(struct inode *inode, struct file *file)
+int sysfs_add_file_mode_ns(struct kernfs_node *parent,
+ const struct attribute *attr, bool is_bin,
+ umode_t mode, const void *ns)
{
- struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
- struct kobject *kobj = attr_sd->s_parent->s_dir.kobj;
- struct sysfs_open_file *of;
- bool has_read, has_write;
- int error = -EACCES;
-
- /* need attr_sd for attr and ops, its parent for kobj */
- if (!sysfs_get_active(attr_sd))
- return -ENODEV;
+ struct lock_class_key *key = NULL;
+ const struct kernfs_ops *ops;
+ struct kernfs_node *kn;
+ loff_t size;
- if (sysfs_is_bin(attr_sd)) {
- struct bin_attribute *battr = attr_sd->s_attr.bin_attr;
-
- has_read = battr->read || battr->mmap;
- has_write = battr->write || battr->mmap;
- } else {
- const struct sysfs_ops *ops = sysfs_file_ops(attr_sd);
+ if (!is_bin) {
+ struct kobject *kobj = parent->priv;
+ const struct sysfs_ops *sysfs_ops = kobj->ktype->sysfs_ops;
/* every kobject with an attribute needs a ktype assigned */
- if (WARN(!ops, KERN_ERR
+ if (WARN(!sysfs_ops, KERN_ERR
"missing sysfs attribute operations for kobject: %s\n",
kobject_name(kobj)))
- goto err_out;
-
- has_read = ops->show;
- has_write = ops->store;
- }
-
- /* check perms and supported operations */
- if ((file->f_mode & FMODE_WRITE) &&
- (!(inode->i_mode & S_IWUGO) || !has_write))
- goto err_out;
-
- if ((file->f_mode & FMODE_READ) &&
- (!(inode->i_mode & S_IRUGO) || !has_read))
- goto err_out;
-
- /* allocate a sysfs_open_file for the file */
- error = -ENOMEM;
- of = kzalloc(sizeof(struct sysfs_open_file), GFP_KERNEL);
- if (!of)
- goto err_out;
-
- /*
- * The following is done to give a different lockdep key to
- * @of->mutex for files which implement mmap. This is a rather
- * crude way to avoid false positive lockdep warning around
- * mm->mmap_sem - mmap nests @of->mutex under mm->mmap_sem and
- * reading /sys/block/sda/trace/act_mask grabs sr_mutex, under
- * which mm->mmap_sem nests, while holding @of->mutex. As each
- * open file has a separate mutex, it's okay as long as those don't
- * happen on the same file. At this point, we can't easily give
- * each file a separate locking class. Let's differentiate on
- * whether the file is bin or not for now.
- */
- if (sysfs_is_bin(attr_sd))
- mutex_init(&of->mutex);
- else
- mutex_init(&of->mutex);
-
- of->sd = attr_sd;
- of->file = file;
-
- /*
- * Always instantiate seq_file even if read access doesn't use
- * seq_file or is not requested. This unifies private data access
- * and readable regular files are the vast majority anyway.
- */
- if (sysfs_is_bin(attr_sd))
- error = single_open(file, NULL, of);
- else
- error = single_open(file, sysfs_seq_show, of);
- if (error)
- goto err_free;
-
- /* seq_file clears PWRITE unconditionally, restore it if WRITE */
- if (file->f_mode & FMODE_WRITE)
- file->f_mode |= FMODE_PWRITE;
-
- /* make sure we have open dirent struct */
- error = sysfs_get_open_dirent(attr_sd, of);
- if (error)
- goto err_close;
-
- /* open succeeded, put active references */
- sysfs_put_active(attr_sd);
- return 0;
-
-err_close:
- single_release(inode, file);
-err_free:
- kfree(of);
-err_out:
- sysfs_put_active(attr_sd);
- return error;
-}
-
-static int sysfs_release(struct inode *inode, struct file *filp)
-{
- struct sysfs_dirent *sd = filp->f_path.dentry->d_fsdata;
- struct sysfs_open_file *of = sysfs_of(filp);
-
- sysfs_put_open_dirent(sd, of);
- single_release(inode, filp);
- kfree(of);
-
- return 0;
-}
-
-void sysfs_unmap_bin_file(struct sysfs_dirent *sd)
-{
- struct sysfs_open_dirent *od;
- struct sysfs_open_file *of;
-
- if (!sysfs_is_bin(sd))
- return;
-
- spin_lock_irq(&sysfs_open_dirent_lock);
- od = sd->s_attr.open;
- if (od)
- atomic_inc(&od->refcnt);
- spin_unlock_irq(&sysfs_open_dirent_lock);
- if (!od)
- return;
-
- mutex_lock(&sysfs_open_file_mutex);
- list_for_each_entry(of, &od->files, list) {
- struct inode *inode = file_inode(of->file);
- unmap_mapping_range(inode->i_mapping, 0, 0, 1);
+ return -EINVAL;
+
+ if (sysfs_ops->show && sysfs_ops->store)
+ ops = &sysfs_file_kfops_rw;
+ else if (sysfs_ops->show)
+ ops = &sysfs_file_kfops_ro;
+ else if (sysfs_ops->store)
+ ops = &sysfs_file_kfops_wo;
+ else
+ ops = &sysfs_file_kfops_empty;
+
+ size = PAGE_SIZE;
+ } else {
+ struct bin_attribute *battr = (void *)attr;
+
+ if (battr->mmap)
+ ops = &sysfs_bin_kfops_mmap;
+ else if (battr->read && battr->write)
+ ops = &sysfs_bin_kfops_rw;
+ else if (battr->read)
+ ops = &sysfs_bin_kfops_ro;
+ else if (battr->write)
+ ops = &sysfs_bin_kfops_wo;
+ else
+ ops = &sysfs_file_kfops_empty;
+
+ size = battr->size;
}
- mutex_unlock(&sysfs_open_file_mutex);
-
- sysfs_put_open_dirent(sd, NULL);
-}
-
-/* Sysfs attribute files are pollable. The idea is that you read
- * the content and then you use 'poll' or 'select' to wait for
- * the content to change. When the content changes (assuming the
- * manager for the kobject supports notification), poll will
- * return POLLERR|POLLPRI, and select will return the fd whether
- * it is waiting for read, write, or exceptions.
- * Once poll/select indicates that the value has changed, you
- * need to close and re-open the file, or seek to 0 and read again.
- * Reminder: this only works for attributes which actively support
- * it, and it is not possible to test an attribute from userspace
- * to see if it supports poll (Neither 'poll' nor 'select' return
- * an appropriate error code). When in doubt, set a suitable timeout value.
- */
-static unsigned int sysfs_poll(struct file *filp, poll_table *wait)
-{
- struct sysfs_open_file *of = sysfs_of(filp);
- struct sysfs_dirent *attr_sd = filp->f_path.dentry->d_fsdata;
- struct sysfs_open_dirent *od = attr_sd->s_attr.open;
-
- /* need parent for the kobj, grab both */
- if (!sysfs_get_active(attr_sd))
- goto trigger;
-
- poll_wait(filp, &od->poll, wait);
- sysfs_put_active(attr_sd);
-
- if (of->event != atomic_read(&od->event))
- goto trigger;
-
- return DEFAULT_POLLMASK;
-
- trigger:
- return DEFAULT_POLLMASK|POLLERR|POLLPRI;
-}
-
-void sysfs_notify_dirent(struct sysfs_dirent *sd)
-{
- struct sysfs_open_dirent *od;
- unsigned long flags;
-
- spin_lock_irqsave(&sysfs_open_dirent_lock, flags);
-
- if (!WARN_ON(sysfs_type(sd) != SYSFS_KOBJ_ATTR)) {
- od = sd->s_attr.open;
- if (od) {
- atomic_inc(&od->event);
- wake_up_interruptible(&od->poll);
- }
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+ if (!attr->ignore_lockdep)
+ key = attr->key ?: (struct lock_class_key *)&attr->skey;
+#endif
+ kn = __kernfs_create_file(parent, attr->name, mode, size, ops,
+ (void *)attr, ns, true, key);
+ if (IS_ERR(kn)) {
+ if (PTR_ERR(kn) == -EEXIST)
+ sysfs_warn_dup(parent, attr->name);
+ return PTR_ERR(kn);
}
-
- spin_unlock_irqrestore(&sysfs_open_dirent_lock, flags);
-}
-EXPORT_SYMBOL_GPL(sysfs_notify_dirent);
-
-void sysfs_notify(struct kobject *k, const char *dir, const char *attr)
-{
- struct sysfs_dirent *sd = k->sd;
-
- mutex_lock(&sysfs_mutex);
-
- if (sd && dir)
- sd = sysfs_find_dirent(sd, dir, NULL);
- if (sd && attr)
- sd = sysfs_find_dirent(sd, attr, NULL);
- if (sd)
- sysfs_notify_dirent(sd);
-
- mutex_unlock(&sysfs_mutex);
-}
-EXPORT_SYMBOL_GPL(sysfs_notify);
-
-const struct file_operations sysfs_file_operations = {
- .read = seq_read,
- .write = sysfs_write_file,
- .llseek = generic_file_llseek,
- .open = sysfs_open_file,
- .release = sysfs_release,
- .poll = sysfs_poll,
-};
-
-const struct file_operations sysfs_bin_operations = {
- .read = sysfs_bin_read,
- .write = sysfs_write_file,
- .llseek = generic_file_llseek,
- .mmap = sysfs_bin_mmap,
- .open = sysfs_open_file,
- .release = sysfs_release,
- .poll = sysfs_poll,
-};
-
-int sysfs_add_file_mode_ns(struct sysfs_dirent *dir_sd,
- const struct attribute *attr, int type,
- umode_t amode, const void *ns)
-{
- umode_t mode = (amode & S_IALLUGO) | S_IFREG;
- struct sysfs_addrm_cxt acxt;
- struct sysfs_dirent *sd;
- int rc;
-
- sd = sysfs_new_dirent(attr->name, mode, type);
- if (!sd)
- return -ENOMEM;
-
- sd->s_ns = ns;
- sd->s_attr.attr = (void *)attr;
- sysfs_dirent_init_lockdep(sd);
-
- sysfs_addrm_start(&acxt);
- rc = sysfs_add_one(&acxt, sd, dir_sd);
- sysfs_addrm_finish(&acxt);
-
- if (rc)
- sysfs_put(sd);
-
- return rc;
+ return 0;
}
-
-int sysfs_add_file(struct sysfs_dirent *dir_sd, const struct attribute *attr,
- int type)
+int sysfs_add_file(struct kernfs_node *parent, const struct attribute *attr,
+ bool is_bin)
{
- return sysfs_add_file_mode_ns(dir_sd, attr, type, attr->mode, NULL);
+ return sysfs_add_file_mode_ns(parent, attr, is_bin, attr->mode, NULL);
}
/**
@@ -877,8 +279,7 @@ int sysfs_create_file_ns(struct kobject *kobj, const struct attribute *attr,
{
BUG_ON(!kobj || !kobj->sd || !attr);
- return sysfs_add_file_mode_ns(kobj->sd, attr, SYSFS_KOBJ_ATTR,
- attr->mode, ns);
+ return sysfs_add_file_mode_ns(kobj->sd, attr, false, attr->mode, ns);
}
EXPORT_SYMBOL_GPL(sysfs_create_file_ns);
@@ -906,19 +307,21 @@ EXPORT_SYMBOL_GPL(sysfs_create_files);
int sysfs_add_file_to_group(struct kobject *kobj,
const struct attribute *attr, const char *group)
{
- struct sysfs_dirent *dir_sd;
+ struct kernfs_node *parent;
int error;
- if (group)
- dir_sd = sysfs_get_dirent(kobj->sd, group);
- else
- dir_sd = sysfs_get(kobj->sd);
+ if (group) {
+ parent = kernfs_find_and_get(kobj->sd, group);
+ } else {
+ parent = kobj->sd;
+ kernfs_get(parent);
+ }
- if (!dir_sd)
+ if (!parent)
return -ENOENT;
- error = sysfs_add_file(dir_sd, attr, SYSFS_KOBJ_ATTR);
- sysfs_put(dir_sd);
+ error = sysfs_add_file(parent, attr, false);
+ kernfs_put(parent);
return error;
}
@@ -934,23 +337,20 @@ EXPORT_SYMBOL_GPL(sysfs_add_file_to_group);
int sysfs_chmod_file(struct kobject *kobj, const struct attribute *attr,
umode_t mode)
{
- struct sysfs_dirent *sd;
+ struct kernfs_node *kn;
struct iattr newattrs;
int rc;
- mutex_lock(&sysfs_mutex);
-
- rc = -ENOENT;
- sd = sysfs_find_dirent(kobj->sd, attr->name, NULL);
- if (!sd)
- goto out;
+ kn = kernfs_find_and_get(kobj->sd, attr->name);
+ if (!kn)
+ return -ENOENT;
- newattrs.ia_mode = (mode & S_IALLUGO) | (sd->s_mode & ~S_IALLUGO);
+ newattrs.ia_mode = (mode & S_IALLUGO) | (kn->mode & ~S_IALLUGO);
newattrs.ia_valid = ATTR_MODE;
- rc = sysfs_sd_setattr(sd, &newattrs);
- out:
- mutex_unlock(&sysfs_mutex);
+ rc = kernfs_setattr(kn, &newattrs);
+
+ kernfs_put(kn);
return rc;
}
EXPORT_SYMBOL_GPL(sysfs_chmod_file);
@@ -966,9 +366,9 @@ EXPORT_SYMBOL_GPL(sysfs_chmod_file);
void sysfs_remove_file_ns(struct kobject *kobj, const struct attribute *attr,
const void *ns)
{
- struct sysfs_dirent *dir_sd = kobj->sd;
+ struct kernfs_node *parent = kobj->sd;
- sysfs_hash_and_remove(dir_sd, attr->name, ns);
+ kernfs_remove_by_name_ns(parent, attr->name, ns);
}
EXPORT_SYMBOL_GPL(sysfs_remove_file_ns);
@@ -989,15 +389,18 @@ EXPORT_SYMBOL_GPL(sysfs_remove_files);
void sysfs_remove_file_from_group(struct kobject *kobj,
const struct attribute *attr, const char *group)
{
- struct sysfs_dirent *dir_sd;
+ struct kernfs_node *parent;
- if (group)
- dir_sd = sysfs_get_dirent(kobj->sd, group);
- else
- dir_sd = sysfs_get(kobj->sd);
- if (dir_sd) {
- sysfs_hash_and_remove(dir_sd, attr->name, NULL);
- sysfs_put(dir_sd);
+ if (group) {
+ parent = kernfs_find_and_get(kobj->sd, group);
+ } else {
+ parent = kobj->sd;
+ kernfs_get(parent);
+ }
+
+ if (parent) {
+ kernfs_remove_by_name(parent, attr->name);
+ kernfs_put(parent);
}
}
EXPORT_SYMBOL_GPL(sysfs_remove_file_from_group);
@@ -1012,7 +415,7 @@ int sysfs_create_bin_file(struct kobject *kobj,
{
BUG_ON(!kobj || !kobj->sd || !attr);
- return sysfs_add_file(kobj->sd, &attr->attr, SYSFS_KOBJ_BIN_ATTR);
+ return sysfs_add_file(kobj->sd, &attr->attr, true);
}
EXPORT_SYMBOL_GPL(sysfs_create_bin_file);
@@ -1024,7 +427,7 @@ EXPORT_SYMBOL_GPL(sysfs_create_bin_file);
void sysfs_remove_bin_file(struct kobject *kobj,
const struct bin_attribute *attr)
{
- sysfs_hash_and_remove(kobj->sd, attr->attr.name, NULL);
+ kernfs_remove_by_name(kobj->sd, attr->attr.name);
}
EXPORT_SYMBOL_GPL(sysfs_remove_bin_file);
diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c
index 1898a10..6b57938 100644
--- a/fs/sysfs/group.c
+++ b/fs/sysfs/group.c
@@ -18,7 +18,7 @@
#include "sysfs.h"
-static void remove_files(struct sysfs_dirent *dir_sd, struct kobject *kobj,
+static void remove_files(struct kernfs_node *parent, struct kobject *kobj,
const struct attribute_group *grp)
{
struct attribute *const *attr;
@@ -26,13 +26,13 @@ static void remove_files(struct sysfs_dirent *dir_sd, struct kobject *kobj,
if (grp->attrs)
for (attr = grp->attrs; *attr; attr++)
- sysfs_hash_and_remove(dir_sd, (*attr)->name, NULL);
+ kernfs_remove_by_name(parent, (*attr)->name);
if (grp->bin_attrs)
for (bin_attr = grp->bin_attrs; *bin_attr; bin_attr++)
sysfs_remove_bin_file(kobj, *bin_attr);
}
-static int create_files(struct sysfs_dirent *dir_sd, struct kobject *kobj,
+static int create_files(struct kernfs_node *parent, struct kobject *kobj,
const struct attribute_group *grp, int update)
{
struct attribute *const *attr;
@@ -49,22 +49,20 @@ static int create_files(struct sysfs_dirent *dir_sd, struct kobject *kobj,
* re-adding (if required) the file.
*/
if (update)
- sysfs_hash_and_remove(dir_sd, (*attr)->name,
- NULL);
+ kernfs_remove_by_name(parent, (*attr)->name);
if (grp->is_visible) {
mode = grp->is_visible(kobj, *attr, i);
if (!mode)
continue;
}
- error = sysfs_add_file_mode_ns(dir_sd, *attr,
- SYSFS_KOBJ_ATTR,
+ error = sysfs_add_file_mode_ns(parent, *attr, false,
(*attr)->mode | mode,
NULL);
if (unlikely(error))
break;
}
if (error) {
- remove_files(dir_sd, kobj, grp);
+ remove_files(parent, kobj, grp);
goto exit;
}
}
@@ -78,7 +76,7 @@ static int create_files(struct sysfs_dirent *dir_sd, struct kobject *kobj,
break;
}
if (error)
- remove_files(dir_sd, kobj, grp);
+ remove_files(parent, kobj, grp);
}
exit:
return error;
@@ -88,7 +86,7 @@ exit:
static int internal_create_group(struct kobject *kobj, int update,
const struct attribute_group *grp)
{
- struct sysfs_dirent *sd;
+ struct kernfs_node *kn;
int error;
BUG_ON(!kobj || (!update && !kobj->sd));
@@ -102,18 +100,22 @@ static int internal_create_group(struct kobject *kobj, int update,
return -EINVAL;
}
if (grp->name) {
- error = sysfs_create_subdir(kobj, grp->name, &sd);
- if (error)
- return error;
+ kn = kernfs_create_dir(kobj->sd, grp->name,
+ S_IRWXU | S_IRUGO | S_IXUGO, kobj);
+ if (IS_ERR(kn)) {
+ if (PTR_ERR(kn) == -EEXIST)
+ sysfs_warn_dup(kobj->sd, grp->name);
+ return PTR_ERR(kn);
+ }
} else
- sd = kobj->sd;
- sysfs_get(sd);
- error = create_files(sd, kobj, grp, update);
+ kn = kobj->sd;
+ kernfs_get(kn);
+ error = create_files(kn, kobj, grp, update);
if (error) {
if (grp->name)
- sysfs_remove(sd);
+ kernfs_remove(kn);
}
- sysfs_put(sd);
+ kernfs_put(kn);
return error;
}
@@ -203,25 +205,27 @@ EXPORT_SYMBOL_GPL(sysfs_update_group);
void sysfs_remove_group(struct kobject *kobj,
const struct attribute_group *grp)
{
- struct sysfs_dirent *dir_sd = kobj->sd;
- struct sysfs_dirent *sd;
+ struct kernfs_node *parent = kobj->sd;
+ struct kernfs_node *kn;
if (grp->name) {
- sd = sysfs_get_dirent(dir_sd, grp->name);
- if (!sd) {
- WARN(!sd, KERN_WARNING
+ kn = kernfs_find_and_get(parent, grp->name);
+ if (!kn) {
+ WARN(!kn, KERN_WARNING
"sysfs group %p not found for kobject '%s'\n",
grp, kobject_name(kobj));
return;
}
- } else
- sd = sysfs_get(dir_sd);
+ } else {
+ kn = parent;
+ kernfs_get(kn);
+ }
- remove_files(sd, kobj, grp);
+ remove_files(kn, kobj, grp);
if (grp->name)
- sysfs_remove(sd);
+ kernfs_remove(kn);
- sysfs_put(sd);
+ kernfs_put(kn);
}
EXPORT_SYMBOL_GPL(sysfs_remove_group);
@@ -257,22 +261,22 @@ EXPORT_SYMBOL_GPL(sysfs_remove_groups);
int sysfs_merge_group(struct kobject *kobj,
const struct attribute_group *grp)
{
- struct sysfs_dirent *dir_sd;
+ struct kernfs_node *parent;
int error = 0;
struct attribute *const *attr;
int i;
- dir_sd = sysfs_get_dirent(kobj->sd, grp->name);
- if (!dir_sd)
+ parent = kernfs_find_and_get(kobj->sd, grp->name);
+ if (!parent)
return -ENOENT;
for ((i = 0, attr = grp->attrs); *attr && !error; (++i, ++attr))
- error = sysfs_add_file(dir_sd, *attr, SYSFS_KOBJ_ATTR);
+ error = sysfs_add_file(parent, *attr, false);
if (error) {
while (--i >= 0)
- sysfs_hash_and_remove(dir_sd, (*--attr)->name, NULL);
+ kernfs_remove_by_name(parent, (*--attr)->name);
}
- sysfs_put(dir_sd);
+ kernfs_put(parent);
return error;
}
@@ -286,14 +290,14 @@ EXPORT_SYMBOL_GPL(sysfs_merge_group);
void sysfs_unmerge_group(struct kobject *kobj,
const struct attribute_group *grp)
{
- struct sysfs_dirent *dir_sd;
+ struct kernfs_node *parent;
struct attribute *const *attr;
- dir_sd = sysfs_get_dirent(kobj->sd, grp->name);
- if (dir_sd) {
+ parent = kernfs_find_and_get(kobj->sd, grp->name);
+ if (parent) {
for (attr = grp->attrs; *attr; ++attr)
- sysfs_hash_and_remove(dir_sd, (*attr)->name, NULL);
- sysfs_put(dir_sd);
+ kernfs_remove_by_name(parent, (*attr)->name);
+ kernfs_put(parent);
}
}
EXPORT_SYMBOL_GPL(sysfs_unmerge_group);
@@ -308,15 +312,15 @@ EXPORT_SYMBOL_GPL(sysfs_unmerge_group);
int sysfs_add_link_to_group(struct kobject *kobj, const char *group_name,
struct kobject *target, const char *link_name)
{
- struct sysfs_dirent *dir_sd;
+ struct kernfs_node *parent;
int error = 0;
- dir_sd = sysfs_get_dirent(kobj->sd, group_name);
- if (!dir_sd)
+ parent = kernfs_find_and_get(kobj->sd, group_name);
+ if (!parent)
return -ENOENT;
- error = sysfs_create_link_sd(dir_sd, target, link_name);
- sysfs_put(dir_sd);
+ error = sysfs_create_link_sd(parent, target, link_name);
+ kernfs_put(parent);
return error;
}
@@ -331,12 +335,12 @@ EXPORT_SYMBOL_GPL(sysfs_add_link_to_group);
void sysfs_remove_link_from_group(struct kobject *kobj, const char *group_name,
const char *link_name)
{
- struct sysfs_dirent *dir_sd;
+ struct kernfs_node *parent;
- dir_sd = sysfs_get_dirent(kobj->sd, group_name);
- if (dir_sd) {
- sysfs_hash_and_remove(dir_sd, link_name, NULL);
- sysfs_put(dir_sd);
+ parent = kernfs_find_and_get(kobj->sd, group_name);
+ if (parent) {
+ kernfs_remove_by_name(parent, link_name);
+ kernfs_put(parent);
}
}
EXPORT_SYMBOL_GPL(sysfs_remove_link_from_group);
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
deleted file mode 100644
index 1750f79..0000000
--- a/fs/sysfs/inode.c
+++ /dev/null
@@ -1,331 +0,0 @@
-/*
- * fs/sysfs/inode.c - basic sysfs inode and dentry operations
- *
- * Copyright (c) 2001-3 Patrick Mochel
- * Copyright (c) 2007 SUSE Linux Products GmbH
- * Copyright (c) 2007 Tejun Heo <teheo@suse.de>
- *
- * This file is released under the GPLv2.
- *
- * Please see Documentation/filesystems/sysfs.txt for more information.
- */
-
-#undef DEBUG
-
-#include <linux/pagemap.h>
-#include <linux/namei.h>
-#include <linux/backing-dev.h>
-#include <linux/capability.h>
-#include <linux/errno.h>
-#include <linux/sched.h>
-#include <linux/slab.h>
-#include <linux/sysfs.h>
-#include <linux/xattr.h>
-#include <linux/security.h>
-#include "sysfs.h"
-
-static const struct address_space_operations sysfs_aops = {
- .readpage = simple_readpage,
- .write_begin = simple_write_begin,
- .write_end = simple_write_end,
-};
-
-static struct backing_dev_info sysfs_backing_dev_info = {
- .name = "sysfs",
- .ra_pages = 0, /* No readahead */
- .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK,
-};
-
-static const struct inode_operations sysfs_inode_operations = {
- .permission = sysfs_permission,
- .setattr = sysfs_setattr,
- .getattr = sysfs_getattr,
- .setxattr = sysfs_setxattr,
-};
-
-int __init sysfs_inode_init(void)
-{
- return bdi_init(&sysfs_backing_dev_info);
-}
-
-static struct sysfs_inode_attrs *sysfs_init_inode_attrs(struct sysfs_dirent *sd)
-{
- struct sysfs_inode_attrs *attrs;
- struct iattr *iattrs;
-
- attrs = kzalloc(sizeof(struct sysfs_inode_attrs), GFP_KERNEL);
- if (!attrs)
- return NULL;
- iattrs = &attrs->ia_iattr;
-
- /* assign default attributes */
- iattrs->ia_mode = sd->s_mode;
- iattrs->ia_uid = GLOBAL_ROOT_UID;
- iattrs->ia_gid = GLOBAL_ROOT_GID;
- iattrs->ia_atime = iattrs->ia_mtime = iattrs->ia_ctime = CURRENT_TIME;
-
- return attrs;
-}
-
-int sysfs_sd_setattr(struct sysfs_dirent *sd, struct iattr *iattr)
-{
- struct sysfs_inode_attrs *sd_attrs;
- struct iattr *iattrs;
- unsigned int ia_valid = iattr->ia_valid;
-
- sd_attrs = sd->s_iattr;
-
- if (!sd_attrs) {
- /* setting attributes for the first time, allocate now */
- sd_attrs = sysfs_init_inode_attrs(sd);
- if (!sd_attrs)
- return -ENOMEM;
- sd->s_iattr = sd_attrs;
- }
- /* attributes were changed at least once in past */
- iattrs = &sd_attrs->ia_iattr;
-
- if (ia_valid & ATTR_UID)
- iattrs->ia_uid = iattr->ia_uid;
- if (ia_valid & ATTR_GID)
- iattrs->ia_gid = iattr->ia_gid;
- if (ia_valid & ATTR_ATIME)
- iattrs->ia_atime = iattr->ia_atime;
- if (ia_valid & ATTR_MTIME)
- iattrs->ia_mtime = iattr->ia_mtime;
- if (ia_valid & ATTR_CTIME)
- iattrs->ia_ctime = iattr->ia_ctime;
- if (ia_valid & ATTR_MODE) {
- umode_t mode = iattr->ia_mode;
- iattrs->ia_mode = sd->s_mode = mode;
- }
- return 0;
-}
-
-int sysfs_setattr(struct dentry *dentry, struct iattr *iattr)
-{
- struct inode *inode = dentry->d_inode;
- struct sysfs_dirent *sd = dentry->d_fsdata;
- int error;
-
- if (!sd)
- return -EINVAL;
-
- mutex_lock(&sysfs_mutex);
- error = inode_change_ok(inode, iattr);
- if (error)
- goto out;
-
- error = sysfs_sd_setattr(sd, iattr);
- if (error)
- goto out;
-
- /* this ignores size changes */
- setattr_copy(inode, iattr);
-
-out:
- mutex_unlock(&sysfs_mutex);
- return error;
-}
-
-static int sysfs_sd_setsecdata(struct sysfs_dirent *sd, void **secdata,
- u32 *secdata_len)
-{
- struct sysfs_inode_attrs *iattrs;
- void *old_secdata;
- size_t old_secdata_len;
-
- if (!sd->s_iattr) {
- sd->s_iattr = sysfs_init_inode_attrs(sd);
- if (!sd->s_iattr)
- return -ENOMEM;
- }
-
- iattrs = sd->s_iattr;
- old_secdata = iattrs->ia_secdata;
- old_secdata_len = iattrs->ia_secdata_len;
-
- iattrs->ia_secdata = *secdata;
- iattrs->ia_secdata_len = *secdata_len;
-
- *secdata = old_secdata;
- *secdata_len = old_secdata_len;
- return 0;
-}
-
-int sysfs_setxattr(struct dentry *dentry, const char *name, const void *value,
- size_t size, int flags)
-{
- struct sysfs_dirent *sd = dentry->d_fsdata;
- void *secdata;
- int error;
- u32 secdata_len = 0;
-
- if (!sd)
- return -EINVAL;
-
- if (!strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN)) {
- const char *suffix = name + XATTR_SECURITY_PREFIX_LEN;
- error = security_inode_setsecurity(dentry->d_inode, suffix,
- value, size, flags);
- if (error)
- goto out;
- error = security_inode_getsecctx(dentry->d_inode,
- &secdata, &secdata_len);
- if (error)
- goto out;
-
- mutex_lock(&sysfs_mutex);
- error = sysfs_sd_setsecdata(sd, &secdata, &secdata_len);
- mutex_unlock(&sysfs_mutex);
-
- if (secdata)
- security_release_secctx(secdata, secdata_len);
- } else
- return -EINVAL;
-out:
- return error;
-}
-
-static inline void set_default_inode_attr(struct inode *inode, umode_t mode)
-{
- inode->i_mode = mode;
- inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
-}
-
-static inline void set_inode_attr(struct inode *inode, struct iattr *iattr)
-{
- inode->i_uid = iattr->ia_uid;
- inode->i_gid = iattr->ia_gid;
- inode->i_atime = iattr->ia_atime;
- inode->i_mtime = iattr->ia_mtime;
- inode->i_ctime = iattr->ia_ctime;
-}
-
-static void sysfs_refresh_inode(struct sysfs_dirent *sd, struct inode *inode)
-{
- struct sysfs_inode_attrs *iattrs = sd->s_iattr;
-
- inode->i_mode = sd->s_mode;
- if (iattrs) {
- /* sysfs_dirent has non-default attributes
- * get them from persistent copy in sysfs_dirent
- */
- set_inode_attr(inode, &iattrs->ia_iattr);
- security_inode_notifysecctx(inode,
- iattrs->ia_secdata,
- iattrs->ia_secdata_len);
- }
-
- if (sysfs_type(sd) == SYSFS_DIR)
- set_nlink(inode, sd->s_dir.subdirs + 2);
-}
-
-int sysfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
- struct kstat *stat)
-{
- struct sysfs_dirent *sd = dentry->d_fsdata;
- struct inode *inode = dentry->d_inode;
-
- mutex_lock(&sysfs_mutex);
- sysfs_refresh_inode(sd, inode);
- mutex_unlock(&sysfs_mutex);
-
- generic_fillattr(inode, stat);
- return 0;
-}
-
-static void sysfs_init_inode(struct sysfs_dirent *sd, struct inode *inode)
-{
- struct bin_attribute *bin_attr;
-
- inode->i_private = sysfs_get(sd);
- inode->i_mapping->a_ops = &sysfs_aops;
- inode->i_mapping->backing_dev_info = &sysfs_backing_dev_info;
- inode->i_op = &sysfs_inode_operations;
-
- set_default_inode_attr(inode, sd->s_mode);
- sysfs_refresh_inode(sd, inode);
-
- /* initialize inode according to type */
- switch (sysfs_type(sd)) {
- case SYSFS_DIR:
- inode->i_op = &sysfs_dir_inode_operations;
- inode->i_fop = &sysfs_dir_operations;
- break;
- case SYSFS_KOBJ_ATTR:
- inode->i_size = PAGE_SIZE;
- inode->i_fop = &sysfs_file_operations;
- break;
- case SYSFS_KOBJ_BIN_ATTR:
- bin_attr = sd->s_attr.bin_attr;
- inode->i_size = bin_attr->size;
- inode->i_fop = &sysfs_bin_operations;
- break;
- case SYSFS_KOBJ_LINK:
- inode->i_op = &sysfs_symlink_inode_operations;
- break;
- default:
- BUG();
- }
-
- unlock_new_inode(inode);
-}
-
-/**
- * sysfs_get_inode - get inode for sysfs_dirent
- * @sb: super block
- * @sd: sysfs_dirent to allocate inode for
- *
- * Get inode for @sd. If such inode doesn't exist, a new inode
- * is allocated and basics are initialized. New inode is
- * returned locked.
- *
- * LOCKING:
- * Kernel thread context (may sleep).
- *
- * RETURNS:
- * Pointer to allocated inode on success, NULL on failure.
- */
-struct inode *sysfs_get_inode(struct super_block *sb, struct sysfs_dirent *sd)
-{
- struct inode *inode;
-
- inode = iget_locked(sb, sd->s_ino);
- if (inode && (inode->i_state & I_NEW))
- sysfs_init_inode(sd, inode);
-
- return inode;
-}
-
-/*
- * The sysfs_dirent serves as both an inode and a directory entry for sysfs.
- * To prevent the sysfs inode numbers from being freed prematurely we take a
- * reference to sysfs_dirent from the sysfs inode. A
- * super_operations.evict_inode() implementation is needed to drop that
- * reference upon inode destruction.
- */
-void sysfs_evict_inode(struct inode *inode)
-{
- struct sysfs_dirent *sd = inode->i_private;
-
- truncate_inode_pages(&inode->i_data, 0);
- clear_inode(inode);
- sysfs_put(sd);
-}
-
-int sysfs_permission(struct inode *inode, int mask)
-{
- struct sysfs_dirent *sd;
-
- if (mask & MAY_NOT_BLOCK)
- return -ECHILD;
-
- sd = inode->i_private;
-
- mutex_lock(&sysfs_mutex);
- sysfs_refresh_inode(sd, inode);
- mutex_unlock(&sysfs_mutex);
-
- return generic_permission(inode, mask);
-}
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index 834ec2c..6211230 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -14,146 +14,41 @@
#include <linux/fs.h>
#include <linux/mount.h>
-#include <linux/pagemap.h>
#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/magic.h>
-#include <linux/slab.h>
#include <linux/user_namespace.h>
#include "sysfs.h"
-
-static struct vfsmount *sysfs_mnt;
-struct kmem_cache *sysfs_dir_cachep;
-
-static const struct super_operations sysfs_ops = {
- .statfs = simple_statfs,
- .drop_inode = generic_delete_inode,
- .evict_inode = sysfs_evict_inode,
-};
-
-struct sysfs_dirent sysfs_root = {
- .s_name = "",
- .s_count = ATOMIC_INIT(1),
- .s_flags = SYSFS_DIR | (KOBJ_NS_TYPE_NONE << SYSFS_NS_TYPE_SHIFT),
- .s_mode = S_IFDIR | S_IRUGO | S_IXUGO,
- .s_ino = 1,
-};
-
-static int sysfs_fill_super(struct super_block *sb, void *data, int silent)
-{
- struct inode *inode;
- struct dentry *root;
-
- sb->s_blocksize = PAGE_CACHE_SIZE;
- sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
- sb->s_magic = SYSFS_MAGIC;
- sb->s_op = &sysfs_ops;
- sb->s_time_gran = 1;
-
- /* get root inode, initialize and unlock it */
- mutex_lock(&sysfs_mutex);
- inode = sysfs_get_inode(sb, &sysfs_root);
- mutex_unlock(&sysfs_mutex);
- if (!inode) {
- pr_debug("sysfs: could not get root inode\n");
- return -ENOMEM;
- }
-
- /* instantiate and link root dentry */
- root = d_make_root(inode);
- if (!root) {
- pr_debug("%s: could not get root dentry!\n", __func__);
- return -ENOMEM;
- }
- root->d_fsdata = &sysfs_root;
- sb->s_root = root;
- sb->s_d_op = &sysfs_dentry_ops;
- return 0;
-}
-
-static int sysfs_test_super(struct super_block *sb, void *data)
-{
- struct sysfs_super_info *sb_info = sysfs_info(sb);
- struct sysfs_super_info *info = data;
- enum kobj_ns_type type;
- int found = 1;
-
- for (type = KOBJ_NS_TYPE_NONE; type < KOBJ_NS_TYPES; type++) {
- if (sb_info->ns[type] != info->ns[type])
- found = 0;
- }
- return found;
-}
-
-static int sysfs_set_super(struct super_block *sb, void *data)
-{
- int error;
- error = set_anon_super(sb, data);
- if (!error)
- sb->s_fs_info = data;
- return error;
-}
-
-static void free_sysfs_super_info(struct sysfs_super_info *info)
-{
- int type;
- for (type = KOBJ_NS_TYPE_NONE; type < KOBJ_NS_TYPES; type++)
- kobj_ns_drop(type, info->ns[type]);
- kfree(info);
-}
+static struct kernfs_root *sysfs_root;
+struct kernfs_node *sysfs_root_kn;
static struct dentry *sysfs_mount(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data)
{
- struct sysfs_super_info *info;
- enum kobj_ns_type type;
- struct super_block *sb;
- int error;
+ struct dentry *root;
+ void *ns;
if (!(flags & MS_KERNMOUNT)) {
if (!capable(CAP_SYS_ADMIN) && !fs_fully_visible(fs_type))
return ERR_PTR(-EPERM);
- for (type = KOBJ_NS_TYPE_NONE; type < KOBJ_NS_TYPES; type++) {
- if (!kobj_ns_current_may_mount(type))
- return ERR_PTR(-EPERM);
- }
- }
-
- info = kzalloc(sizeof(*info), GFP_KERNEL);
- if (!info)
- return ERR_PTR(-ENOMEM);
-
- for (type = KOBJ_NS_TYPE_NONE; type < KOBJ_NS_TYPES; type++)
- info->ns[type] = kobj_ns_grab_current(type);
-
- sb = sget(fs_type, sysfs_test_super, sysfs_set_super, flags, info);
- if (IS_ERR(sb) || sb->s_fs_info != info)
- free_sysfs_super_info(info);
- if (IS_ERR(sb))
- return ERR_CAST(sb);
- if (!sb->s_root) {
- error = sysfs_fill_super(sb, data, flags & MS_SILENT ? 1 : 0);
- if (error) {
- deactivate_locked_super(sb);
- return ERR_PTR(error);
- }
- sb->s_flags |= MS_ACTIVE;
+ if (!kobj_ns_current_may_mount(KOBJ_NS_TYPE_NET))
+ return ERR_PTR(-EPERM);
}
- return dget(sb->s_root);
+ ns = kobj_ns_grab_current(KOBJ_NS_TYPE_NET);
+ root = kernfs_mount_ns(fs_type, flags, sysfs_root, ns);
+ if (IS_ERR(root))
+ kobj_ns_drop(KOBJ_NS_TYPE_NET, ns);
+ return root;
}
static void sysfs_kill_sb(struct super_block *sb)
{
- struct sysfs_super_info *info = sysfs_info(sb);
- /* Remove the superblock from fs_supers/s_instances
- * so we can't find it, before freeing sysfs_super_info.
- */
- kill_anon_super(sb);
- free_sysfs_super_info(info);
+ void *ns = (void *)kernfs_super_ns(sb);
+
+ kernfs_kill_sb(sb);
+ kobj_ns_drop(KOBJ_NS_TYPE_NET, ns);
}
static struct file_system_type sysfs_fs_type = {
@@ -165,48 +60,19 @@ static struct file_system_type sysfs_fs_type = {
int __init sysfs_init(void)
{
- int err = -ENOMEM;
+ int err;
- sysfs_dir_cachep = kmem_cache_create("sysfs_dir_cache",
- sizeof(struct sysfs_dirent),
- 0, 0, NULL);
- if (!sysfs_dir_cachep)
- goto out;
+ sysfs_root = kernfs_create_root(NULL, NULL);
+ if (IS_ERR(sysfs_root))
+ return PTR_ERR(sysfs_root);
- err = sysfs_inode_init();
- if (err)
- goto out_err;
+ sysfs_root_kn = sysfs_root->kn;
err = register_filesystem(&sysfs_fs_type);
- if (!err) {
- sysfs_mnt = kern_mount(&sysfs_fs_type);
- if (IS_ERR(sysfs_mnt)) {
- printk(KERN_ERR "sysfs: could not mount!\n");
- err = PTR_ERR(sysfs_mnt);
- sysfs_mnt = NULL;
- unregister_filesystem(&sysfs_fs_type);
- goto out_err;
- }
- } else
- goto out_err;
-out:
- return err;
-out_err:
- kmem_cache_destroy(sysfs_dir_cachep);
- sysfs_dir_cachep = NULL;
- goto out;
-}
-
-#undef sysfs_get
-struct sysfs_dirent *sysfs_get(struct sysfs_dirent *sd)
-{
- return __sysfs_get(sd);
-}
-EXPORT_SYMBOL_GPL(sysfs_get);
+ if (err) {
+ kernfs_destroy_root(sysfs_root);
+ return err;
+ }
-#undef sysfs_put
-void sysfs_put(struct sysfs_dirent *sd)
-{
- __sysfs_put(sd);
+ return 0;
}
-EXPORT_SYMBOL_GPL(sysfs_put);
diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c
index 3ae3f1b..aecb15f 100644
--- a/fs/sysfs/symlink.c
+++ b/fs/sysfs/symlink.c
@@ -11,109 +11,73 @@
*/
#include <linux/fs.h>
-#include <linux/gfp.h>
-#include <linux/mount.h>
#include <linux/module.h>
#include <linux/kobject.h>
-#include <linux/namei.h>
#include <linux/mutex.h>
#include <linux/security.h>
#include "sysfs.h"
-static int sysfs_do_create_link_sd(struct sysfs_dirent *parent_sd,
- struct kobject *target,
+static int sysfs_do_create_link_sd(struct kernfs_node *parent,
+ struct kobject *target_kobj,
const char *name, int warn)
{
- struct sysfs_dirent *target_sd = NULL;
- struct sysfs_dirent *sd = NULL;
- struct sysfs_addrm_cxt acxt;
- enum kobj_ns_type ns_type;
- int error;
+ struct kernfs_node *kn, *target = NULL;
- BUG_ON(!name || !parent_sd);
+ BUG_ON(!name || !parent);
/*
- * We don't own @target and it may be removed at any time.
+ * We don't own @target_kobj and it may be removed at any time.
* Synchronize using sysfs_symlink_target_lock. See
* sysfs_remove_dir() for details.
*/
spin_lock(&sysfs_symlink_target_lock);
- if (target->sd)
- target_sd = sysfs_get(target->sd);
+ if (target_kobj->sd) {
+ target = target_kobj->sd;
+ kernfs_get(target);
+ }
spin_unlock(&sysfs_symlink_target_lock);
- error = -ENOENT;
- if (!target_sd)
- goto out_put;
-
- error = -ENOMEM;
- sd = sysfs_new_dirent(name, S_IFLNK|S_IRWXUGO, SYSFS_KOBJ_LINK);
- if (!sd)
- goto out_put;
+ if (!target)
+ return -ENOENT;
- ns_type = sysfs_ns_type(parent_sd);
- if (ns_type)
- sd->s_ns = target_sd->s_ns;
- sd->s_symlink.target_sd = target_sd;
- target_sd = NULL; /* reference is now owned by the symlink */
-
- sysfs_addrm_start(&acxt);
- /* Symlinks must be between directories with the same ns_type */
- if (!ns_type ||
- (ns_type == sysfs_ns_type(sd->s_symlink.target_sd->s_parent))) {
- if (warn)
- error = sysfs_add_one(&acxt, sd, parent_sd);
- else
- error = __sysfs_add_one(&acxt, sd, parent_sd);
- } else {
- error = -EINVAL;
- WARN(1, KERN_WARNING
- "sysfs: symlink across ns_types %s/%s -> %s/%s\n",
- parent_sd->s_name,
- sd->s_name,
- sd->s_symlink.target_sd->s_parent->s_name,
- sd->s_symlink.target_sd->s_name);
- }
- sysfs_addrm_finish(&acxt);
+ kn = kernfs_create_link(parent, name, target);
+ kernfs_put(target);
- if (error)
- goto out_put;
+ if (!IS_ERR(kn))
+ return 0;
- return 0;
-
- out_put:
- sysfs_put(target_sd);
- sysfs_put(sd);
- return error;
+ if (warn && PTR_ERR(kn) == -EEXIST)
+ sysfs_warn_dup(parent, name);
+ return PTR_ERR(kn);
}
/**
* sysfs_create_link_sd - create symlink to a given object.
- * @sd: directory we're creating the link in.
+ * @kn: directory we're creating the link in.
* @target: object we're pointing to.
* @name: name of the symlink.
*/
-int sysfs_create_link_sd(struct sysfs_dirent *sd, struct kobject *target,
+int sysfs_create_link_sd(struct kernfs_node *kn, struct kobject *target,
const char *name)
{
- return sysfs_do_create_link_sd(sd, target, name, 1);
+ return sysfs_do_create_link_sd(kn, target, name, 1);
}
static int sysfs_do_create_link(struct kobject *kobj, struct kobject *target,
const char *name, int warn)
{
- struct sysfs_dirent *parent_sd = NULL;
+ struct kernfs_node *parent = NULL;
if (!kobj)
- parent_sd = &sysfs_root;
+ parent = sysfs_root_kn;
else
- parent_sd = kobj->sd;
+ parent = kobj->sd;
- if (!parent_sd)
+ if (!parent)
return -EFAULT;
- return sysfs_do_create_link_sd(parent_sd, target, name, warn);
+ return sysfs_do_create_link_sd(parent, target, name, warn);
}
/**
@@ -164,10 +128,10 @@ void sysfs_delete_link(struct kobject *kobj, struct kobject *targ,
* sysfs_remove_dir() for details.
*/
spin_lock(&sysfs_symlink_target_lock);
- if (targ->sd && sysfs_ns_type(kobj->sd))
- ns = targ->sd->s_ns;
+ if (targ->sd && kernfs_ns_enabled(kobj->sd))
+ ns = targ->sd->ns;
spin_unlock(&sysfs_symlink_target_lock);
- sysfs_hash_and_remove(kobj->sd, name, ns);
+ kernfs_remove_by_name_ns(kobj->sd, name, ns);
}
/**
@@ -177,14 +141,14 @@ void sysfs_delete_link(struct kobject *kobj, struct kobject *targ,
*/
void sysfs_remove_link(struct kobject *kobj, const char *name)
{
- struct sysfs_dirent *parent_sd = NULL;
+ struct kernfs_node *parent = NULL;
if (!kobj)
- parent_sd = &sysfs_root;
+ parent = sysfs_root_kn;
else
- parent_sd = kobj->sd;
+ parent = kobj->sd;
- sysfs_hash_and_remove(parent_sd, name, NULL);
+ kernfs_remove_by_name(parent, name);
}
EXPORT_SYMBOL_GPL(sysfs_remove_link);
@@ -201,130 +165,33 @@ EXPORT_SYMBOL_GPL(sysfs_remove_link);
int sysfs_rename_link_ns(struct kobject *kobj, struct kobject *targ,
const char *old, const char *new, const void *new_ns)
{
- struct sysfs_dirent *parent_sd, *sd = NULL;
+ struct kernfs_node *parent, *kn = NULL;
const void *old_ns = NULL;
int result;
if (!kobj)
- parent_sd = &sysfs_root;
+ parent = sysfs_root_kn;
else
- parent_sd = kobj->sd;
+ parent = kobj->sd;
if (targ->sd)
- old_ns = targ->sd->s_ns;
+ old_ns = targ->sd->ns;
result = -ENOENT;
- sd = sysfs_get_dirent_ns(parent_sd, old, old_ns);
- if (!sd)
+ kn = kernfs_find_and_get_ns(parent, old, old_ns);
+ if (!kn)
goto out;
result = -EINVAL;
- if (sysfs_type(sd) != SYSFS_KOBJ_LINK)
+ if (kernfs_type(kn) != KERNFS_LINK)
goto out;
- if (sd->s_symlink.target_sd->s_dir.kobj != targ)
+ if (kn->symlink.target_kn->priv != targ)
goto out;
- result = sysfs_rename(sd, parent_sd, new, new_ns);
+ result = kernfs_rename_ns(kn, parent, new, new_ns);
out:
- sysfs_put(sd);
+ kernfs_put(kn);
return result;
}
EXPORT_SYMBOL_GPL(sysfs_rename_link_ns);
-
-static int sysfs_get_target_path(struct sysfs_dirent *parent_sd,
- struct sysfs_dirent *target_sd, char *path)
-{
- struct sysfs_dirent *base, *sd;
- char *s = path;
- int len = 0;
-
- /* go up to the root, stop at the base */
- base = parent_sd;
- while (base->s_parent) {
- sd = target_sd->s_parent;
- while (sd->s_parent && base != sd)
- sd = sd->s_parent;
-
- if (base == sd)
- break;
-
- strcpy(s, "../");
- s += 3;
- base = base->s_parent;
- }
-
- /* determine end of target string for reverse fillup */
- sd = target_sd;
- while (sd->s_parent && sd != base) {
- len += strlen(sd->s_name) + 1;
- sd = sd->s_parent;
- }
-
- /* check limits */
- if (len < 2)
- return -EINVAL;
- len--;
- if ((s - path) + len > PATH_MAX)
- return -ENAMETOOLONG;
-
- /* reverse fillup of target string from target to base */
- sd = target_sd;
- while (sd->s_parent && sd != base) {
- int slen = strlen(sd->s_name);
-
- len -= slen;
- strncpy(s + len, sd->s_name, slen);
- if (len)
- s[--len] = '/';
-
- sd = sd->s_parent;
- }
-
- return 0;
-}
-
-static int sysfs_getlink(struct dentry *dentry, char *path)
-{
- struct sysfs_dirent *sd = dentry->d_fsdata;
- struct sysfs_dirent *parent_sd = sd->s_parent;
- struct sysfs_dirent *target_sd = sd->s_symlink.target_sd;
- int error;
-
- mutex_lock(&sysfs_mutex);
- error = sysfs_get_target_path(parent_sd, target_sd, path);
- mutex_unlock(&sysfs_mutex);
-
- return error;
-}
-
-static void *sysfs_follow_link(struct dentry *dentry, struct nameidata *nd)
-{
- int error = -ENOMEM;
- unsigned long page = get_zeroed_page(GFP_KERNEL);
- if (page) {
- error = sysfs_getlink(dentry, (char *) page);
- if (error < 0)
- free_page((unsigned long)page);
- }
- nd_set_link(nd, error ? ERR_PTR(error) : (char *)page);
- return NULL;
-}
-
-static void sysfs_put_link(struct dentry *dentry, struct nameidata *nd,
- void *cookie)
-{
- char *page = nd_get_link(nd);
- if (!IS_ERR(page))
- free_page((unsigned long)page);
-}
-
-const struct inode_operations sysfs_symlink_inode_operations = {
- .setxattr = sysfs_setxattr,
- .readlink = generic_readlink,
- .follow_link = sysfs_follow_link,
- .put_link = sysfs_put_link,
- .setattr = sysfs_setattr,
- .getattr = sysfs_getattr,
- .permission = sysfs_permission,
-};
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index 0af09fb..0e2f1cc 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -8,248 +8,36 @@
* This file is released under the GPLv2.
*/
-#include <linux/lockdep.h>
-#include <linux/kobject_ns.h>
-#include <linux/fs.h>
-#include <linux/rbtree.h>
+#ifndef __SYSFS_INTERNAL_H
+#define __SYSFS_INTERNAL_H
-struct sysfs_open_dirent;
-
-/* type-specific structures for sysfs_dirent->s_* union members */
-struct sysfs_elem_dir {
- struct kobject *kobj;
-
- unsigned long subdirs;
- /* children rbtree starts here and goes through sd->s_rb */
- struct rb_root children;
-};
-
-struct sysfs_elem_symlink {
- struct sysfs_dirent *target_sd;
-};
-
-struct sysfs_elem_attr {
- union {
- struct attribute *attr;
- struct bin_attribute *bin_attr;
- };
- struct sysfs_open_dirent *open;
-};
-
-struct sysfs_inode_attrs {
- struct iattr ia_iattr;
- void *ia_secdata;
- u32 ia_secdata_len;
-};
-
-/*
- * sysfs_dirent - the building block of sysfs hierarchy. Each and
- * every sysfs node is represented by single sysfs_dirent.
- *
- * As long as s_count reference is held, the sysfs_dirent itself is
- * accessible. Dereferencing s_elem or any other outer entity
- * requires s_active reference.
- */
-struct sysfs_dirent {
- atomic_t s_count;
- atomic_t s_active;
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
- struct lockdep_map dep_map;
-#endif
- struct sysfs_dirent *s_parent;
- const char *s_name;
-
- struct rb_node s_rb;
-
- union {
- struct completion *completion;
- struct sysfs_dirent *removed_list;
- } u;
-
- const void *s_ns; /* namespace tag */
- unsigned int s_hash; /* ns + name hash */
- union {
- struct sysfs_elem_dir s_dir;
- struct sysfs_elem_symlink s_symlink;
- struct sysfs_elem_attr s_attr;
- };
-
- unsigned short s_flags;
- umode_t s_mode;
- unsigned int s_ino;
- struct sysfs_inode_attrs *s_iattr;
-};
-
-#define SD_DEACTIVATED_BIAS INT_MIN
-
-#define SYSFS_TYPE_MASK 0x00ff
-#define SYSFS_DIR 0x0001
-#define SYSFS_KOBJ_ATTR 0x0002
-#define SYSFS_KOBJ_BIN_ATTR 0x0004
-#define SYSFS_KOBJ_LINK 0x0008
-#define SYSFS_COPY_NAME (SYSFS_DIR | SYSFS_KOBJ_LINK)
-#define SYSFS_ACTIVE_REF (SYSFS_KOBJ_ATTR | SYSFS_KOBJ_BIN_ATTR)
-
-/* identify any namespace tag on sysfs_dirents */
-#define SYSFS_NS_TYPE_MASK 0xf00
-#define SYSFS_NS_TYPE_SHIFT 8
-
-#define SYSFS_FLAG_MASK ~(SYSFS_NS_TYPE_MASK|SYSFS_TYPE_MASK)
-#define SYSFS_FLAG_REMOVED 0x02000
-
-static inline unsigned int sysfs_type(struct sysfs_dirent *sd)
-{
- return sd->s_flags & SYSFS_TYPE_MASK;
-}
-
-/*
- * Return any namespace tags on this dirent.
- * enum kobj_ns_type is defined in linux/kobject.h
- */
-static inline enum kobj_ns_type sysfs_ns_type(struct sysfs_dirent *sd)
-{
- return (sd->s_flags & SYSFS_NS_TYPE_MASK) >> SYSFS_NS_TYPE_SHIFT;
-}
-
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-
-#define sysfs_dirent_init_lockdep(sd) \
-do { \
- struct attribute *attr = sd->s_attr.attr; \
- struct lock_class_key *key = attr->key; \
- if (!key) \
- key = &attr->skey; \
- \
- lockdep_init_map(&sd->dep_map, "s_active", key, 0); \
-} while (0)
-
-/* Test for attributes that want to ignore lockdep for read-locking */
-static inline bool sysfs_ignore_lockdep(struct sysfs_dirent *sd)
-{
- int type = sysfs_type(sd);
-
- return (type == SYSFS_KOBJ_ATTR || type == SYSFS_KOBJ_BIN_ATTR) &&
- sd->s_attr.attr->ignore_lockdep;
-}
-
-#else
-
-#define sysfs_dirent_init_lockdep(sd) do {} while (0)
-
-static inline bool sysfs_ignore_lockdep(struct sysfs_dirent *sd)
-{
- return true;
-}
-
-#endif
-
-/*
- * Context structure to be used while adding/removing nodes.
- */
-struct sysfs_addrm_cxt {
- struct sysfs_dirent *removed;
-};
+#include <linux/sysfs.h>
/*
* mount.c
*/
-
-/*
- * Each sb is associated with a set of namespace tags (i.e.
- * the network namespace of the task which mounted this sysfs
- * instance).
- */
-struct sysfs_super_info {
- void *ns[KOBJ_NS_TYPES];
-};
-#define sysfs_info(SB) ((struct sysfs_super_info *)(SB->s_fs_info))
-extern struct sysfs_dirent sysfs_root;
-extern struct kmem_cache *sysfs_dir_cachep;
+extern struct kernfs_node *sysfs_root_kn;
/*
* dir.c
*/
-extern struct mutex sysfs_mutex;
extern spinlock_t sysfs_symlink_target_lock;
-extern const struct dentry_operations sysfs_dentry_ops;
-
-extern const struct file_operations sysfs_dir_operations;
-extern const struct inode_operations sysfs_dir_inode_operations;
-struct sysfs_dirent *sysfs_get_active(struct sysfs_dirent *sd);
-void sysfs_put_active(struct sysfs_dirent *sd);
-void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt);
-void sysfs_warn_dup(struct sysfs_dirent *parent, const char *name);
-int __sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd,
- struct sysfs_dirent *parent_sd);
-int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd,
- struct sysfs_dirent *parent_sd);
-void sysfs_remove(struct sysfs_dirent *sd);
-int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name,
- const void *ns);
-void sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt);
-
-struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd,
- const unsigned char *name,
- const void *ns);
-struct sysfs_dirent *sysfs_new_dirent(const char *name, umode_t mode, int type);
-
-void release_sysfs_dirent(struct sysfs_dirent *sd);
-
-int sysfs_create_subdir(struct kobject *kobj, const char *name,
- struct sysfs_dirent **p_sd);
-
-int sysfs_rename(struct sysfs_dirent *sd, struct sysfs_dirent *new_parent_sd,
- const char *new_name, const void *new_ns);
-
-static inline struct sysfs_dirent *__sysfs_get(struct sysfs_dirent *sd)
-{
- if (sd) {
- WARN_ON(!atomic_read(&sd->s_count));
- atomic_inc(&sd->s_count);
- }
- return sd;
-}
-#define sysfs_get(sd) __sysfs_get(sd)
-
-static inline void __sysfs_put(struct sysfs_dirent *sd)
-{
- if (sd && atomic_dec_and_test(&sd->s_count))
- release_sysfs_dirent(sd);
-}
-#define sysfs_put(sd) __sysfs_put(sd)
-
-/*
- * inode.c
- */
-struct inode *sysfs_get_inode(struct super_block *sb, struct sysfs_dirent *sd);
-void sysfs_evict_inode(struct inode *inode);
-int sysfs_sd_setattr(struct sysfs_dirent *sd, struct iattr *iattr);
-int sysfs_permission(struct inode *inode, int mask);
-int sysfs_setattr(struct dentry *dentry, struct iattr *iattr);
-int sysfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
- struct kstat *stat);
-int sysfs_setxattr(struct dentry *dentry, const char *name, const void *value,
- size_t size, int flags);
-int sysfs_inode_init(void);
+void sysfs_warn_dup(struct kernfs_node *parent, const char *name);
/*
* file.c
*/
-extern const struct file_operations sysfs_file_operations;
-extern const struct file_operations sysfs_bin_operations;
-
-int sysfs_add_file(struct sysfs_dirent *dir_sd,
- const struct attribute *attr, int type);
-
-int sysfs_add_file_mode_ns(struct sysfs_dirent *dir_sd,
- const struct attribute *attr, int type,
+int sysfs_add_file(struct kernfs_node *parent,
+ const struct attribute *attr, bool is_bin);
+int sysfs_add_file_mode_ns(struct kernfs_node *parent,
+ const struct attribute *attr, bool is_bin,
umode_t amode, const void *ns);
-void sysfs_unmap_bin_file(struct sysfs_dirent *sd);
/*
* symlink.c
*/
-extern const struct inode_operations sysfs_symlink_inode_operations;
-int sysfs_create_link_sd(struct sysfs_dirent *sd, struct kobject *target,
+int sysfs_create_link_sd(struct kernfs_node *kn, struct kobject *target,
const char *name);
+
+#endif /* __SYSFS_INTERNAL_H */
diff --git a/fs/xfs/xfs_attr_remote.c b/fs/xfs/xfs_attr_remote.c
index 739e0a52..5549d69 100644
--- a/fs/xfs/xfs_attr_remote.c
+++ b/fs/xfs/xfs_attr_remote.c
@@ -110,7 +110,7 @@ xfs_attr3_rmt_verify(
if (be32_to_cpu(rmt->rm_bytes) > fsbsize - sizeof(*rmt))
return false;
if (be32_to_cpu(rmt->rm_offset) +
- be32_to_cpu(rmt->rm_bytes) >= XATTR_SIZE_MAX)
+ be32_to_cpu(rmt->rm_bytes) > XATTR_SIZE_MAX)
return false;
if (rmt->rm_owner == 0)
return false;
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 1394106..82e0dab 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -287,6 +287,7 @@ xfs_bmapi_allocate(
INIT_WORK_ONSTACK(&args->work, xfs_bmapi_allocate_worker);
queue_work(xfs_alloc_wq, &args->work);
wait_for_completion(&done);
+ destroy_work_on_stack(&args->work);
return args->result;
}