summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/block_dev.c6
-rw-r--r--fs/cifs/cifsencrypt.c32
-rw-r--r--fs/cifs/smbencrypt.c26
-rw-r--r--fs/compat_ioctl.c22
-rw-r--r--fs/configfs/dir.c44
-rw-r--r--fs/configfs/inode.c8
-rw-r--r--fs/configfs/item.c1
-rw-r--r--fs/dlm/config.c38
-rw-r--r--fs/dlm/lowcomms.c74
-rw-r--r--fs/ecryptfs/crypto.c107
-rw-r--r--fs/ecryptfs/ecryptfs_kernel.h12
-rw-r--r--fs/ecryptfs/inode.c1
-rw-r--r--fs/ecryptfs/keystore.c218
-rw-r--r--fs/ecryptfs/main.c1
-rw-r--r--fs/ecryptfs/mmap.c1
-rw-r--r--fs/ecryptfs/super.c1
-rw-r--r--fs/exec.c92
-rw-r--r--fs/ext2/ext2.h3
-rw-r--r--fs/ext2/super.c25
-rw-r--r--fs/ext2/xattr.c139
-rw-r--r--fs/ext2/xattr.h21
-rw-r--r--fs/ext4/crypto.c24
-rw-r--r--fs/ext4/crypto_fname.c32
-rw-r--r--fs/ext4/crypto_key.c42
-rw-r--r--fs/ext4/ext4.h45
-rw-r--r--fs/ext4/ext4_crypto.h2
-rw-r--r--fs/ext4/ext4_extents.h2
-rw-r--r--fs/ext4/extents.c128
-rw-r--r--fs/ext4/extents_status.c4
-rw-r--r--fs/ext4/file.c129
-rw-r--r--fs/ext4/ialloc.c2
-rw-r--r--fs/ext4/indirect.c29
-rw-r--r--fs/ext4/inline.c8
-rw-r--r--fs/ext4/inode.c388
-rw-r--r--fs/ext4/mballoc.c81
-rw-r--r--fs/ext4/mballoc.h12
-rw-r--r--fs/ext4/migrate.c2
-rw-r--r--fs/ext4/mmp.c34
-rw-r--r--fs/ext4/page-io.c4
-rw-r--r--fs/ext4/super.c35
-rw-r--r--fs/ext4/xattr.c166
-rw-r--r--fs/ext4/xattr.h3
-rw-r--r--fs/f2fs/crypto.c24
-rw-r--r--fs/f2fs/crypto_fname.c32
-rw-r--r--fs/f2fs/crypto_key.c40
-rw-r--r--fs/f2fs/f2fs_crypto.h2
-rw-r--r--fs/gfs2/aops.c2
-rw-r--r--fs/gfs2/dir.c6
-rw-r--r--fs/gfs2/export.c2
-rw-r--r--fs/gfs2/glock.c10
-rw-r--r--fs/gfs2/incore.h1
-rw-r--r--fs/gfs2/inode.c71
-rw-r--r--fs/gfs2/inode.h5
-rw-r--r--fs/gfs2/ops_fstype.c2
-rw-r--r--fs/gfs2/super.c26
-rw-r--r--fs/jbd2/commit.c49
-rw-r--r--fs/jbd2/journal.c43
-rw-r--r--fs/jbd2/recovery.c31
-rw-r--r--fs/jbd2/revoke.c60
-rw-r--r--fs/jbd2/transaction.c22
-rw-r--r--fs/kernfs/dir.c19
-rw-r--r--fs/mbcache.c1093
-rw-r--r--fs/nfsd/nfs4recover.c28
-rw-r--r--fs/ocfs2/cluster/nodemanager.c22
-rw-r--r--fs/pstore/ram.c4
65 files changed, 1687 insertions, 1951 deletions
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 826b164..3172c4e 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -575,7 +575,11 @@ static const struct super_operations bdev_sops = {
static struct dentry *bd_mount(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data)
{
- return mount_pseudo(fs_type, "bdev:", &bdev_sops, NULL, BDEVFS_MAGIC);
+ struct dentry *dent;
+ dent = mount_pseudo(fs_type, "bdev:", &bdev_sops, NULL, BDEVFS_MAGIC);
+ if (dent)
+ dent->d_sb->s_iflags |= SB_I_CGROUPWB;
+ return dent;
}
static struct file_system_type bd_type = {
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index e682b36..4897dac 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -33,6 +33,7 @@
#include <linux/ctype.h>
#include <linux/random.h>
#include <linux/highmem.h>
+#include <crypto/skcipher.h>
static int
cifs_crypto_shash_md5_allocate(struct TCP_Server_Info *server)
@@ -789,38 +790,46 @@ int
calc_seckey(struct cifs_ses *ses)
{
int rc;
- struct crypto_blkcipher *tfm_arc4;
+ struct crypto_skcipher *tfm_arc4;
struct scatterlist sgin, sgout;
- struct blkcipher_desc desc;
+ struct skcipher_request *req;
unsigned char sec_key[CIFS_SESS_KEY_SIZE]; /* a nonce */
get_random_bytes(sec_key, CIFS_SESS_KEY_SIZE);
- tfm_arc4 = crypto_alloc_blkcipher("ecb(arc4)", 0, CRYPTO_ALG_ASYNC);
+ tfm_arc4 = crypto_alloc_skcipher("ecb(arc4)", 0, CRYPTO_ALG_ASYNC);
if (IS_ERR(tfm_arc4)) {
rc = PTR_ERR(tfm_arc4);
cifs_dbg(VFS, "could not allocate crypto API arc4\n");
return rc;
}
- desc.tfm = tfm_arc4;
-
- rc = crypto_blkcipher_setkey(tfm_arc4, ses->auth_key.response,
+ rc = crypto_skcipher_setkey(tfm_arc4, ses->auth_key.response,
CIFS_SESS_KEY_SIZE);
if (rc) {
cifs_dbg(VFS, "%s: Could not set response as a key\n",
__func__);
- return rc;
+ goto out_free_cipher;
+ }
+
+ req = skcipher_request_alloc(tfm_arc4, GFP_KERNEL);
+ if (!req) {
+ rc = -ENOMEM;
+ cifs_dbg(VFS, "could not allocate crypto API arc4 request\n");
+ goto out_free_cipher;
}
sg_init_one(&sgin, sec_key, CIFS_SESS_KEY_SIZE);
sg_init_one(&sgout, ses->ntlmssp->ciphertext, CIFS_CPHTXT_SIZE);
- rc = crypto_blkcipher_encrypt(&desc, &sgout, &sgin, CIFS_CPHTXT_SIZE);
+ skcipher_request_set_callback(req, 0, NULL, NULL);
+ skcipher_request_set_crypt(req, &sgin, &sgout, CIFS_CPHTXT_SIZE, NULL);
+
+ rc = crypto_skcipher_encrypt(req);
+ skcipher_request_free(req);
if (rc) {
cifs_dbg(VFS, "could not encrypt session key rc: %d\n", rc);
- crypto_free_blkcipher(tfm_arc4);
- return rc;
+ goto out_free_cipher;
}
/* make secondary_key/nonce as session key */
@@ -828,7 +837,8 @@ calc_seckey(struct cifs_ses *ses)
/* and make len as that of session key only */
ses->auth_key.len = CIFS_SESS_KEY_SIZE;
- crypto_free_blkcipher(tfm_arc4);
+out_free_cipher:
+ crypto_free_skcipher(tfm_arc4);
return rc;
}
diff --git a/fs/cifs/smbencrypt.c b/fs/cifs/smbencrypt.c
index a4232ec..699b786 100644
--- a/fs/cifs/smbencrypt.c
+++ b/fs/cifs/smbencrypt.c
@@ -23,6 +23,7 @@
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
+#include <crypto/skcipher.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/fs.h>
@@ -70,31 +71,42 @@ smbhash(unsigned char *out, const unsigned char *in, unsigned char *key)
{
int rc;
unsigned char key2[8];
- struct crypto_blkcipher *tfm_des;
+ struct crypto_skcipher *tfm_des;
struct scatterlist sgin, sgout;
- struct blkcipher_desc desc;
+ struct skcipher_request *req;
str_to_key(key, key2);
- tfm_des = crypto_alloc_blkcipher("ecb(des)", 0, CRYPTO_ALG_ASYNC);
+ tfm_des = crypto_alloc_skcipher("ecb(des)", 0, CRYPTO_ALG_ASYNC);
if (IS_ERR(tfm_des)) {
rc = PTR_ERR(tfm_des);
cifs_dbg(VFS, "could not allocate des crypto API\n");
goto smbhash_err;
}
- desc.tfm = tfm_des;
+ req = skcipher_request_alloc(tfm_des, GFP_KERNEL);
+ if (!req) {
+ rc = -ENOMEM;
+ cifs_dbg(VFS, "could not allocate des crypto API\n");
+ goto smbhash_free_skcipher;
+ }
- crypto_blkcipher_setkey(tfm_des, key2, 8);
+ crypto_skcipher_setkey(tfm_des, key2, 8);
sg_init_one(&sgin, in, 8);
sg_init_one(&sgout, out, 8);
- rc = crypto_blkcipher_encrypt(&desc, &sgout, &sgin, 8);
+ skcipher_request_set_callback(req, 0, NULL, NULL);
+ skcipher_request_set_crypt(req, &sgin, &sgout, 8, NULL);
+
+ rc = crypto_skcipher_encrypt(req);
if (rc)
cifs_dbg(VFS, "could not encrypt crypt key rc: %d\n", rc);
- crypto_free_blkcipher(tfm_des);
+ skcipher_request_free(req);
+
+smbhash_free_skcipher:
+ crypto_free_skcipher(tfm_des);
smbhash_err:
return rc;
}
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 6402eaf..bd01b92 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -1040,28 +1040,6 @@ COMPATIBLE_IOCTL(PPPIOCGL2TPSTATS)
/* PPPOX */
COMPATIBLE_IOCTL(PPPOEIOCSFWD)
COMPATIBLE_IOCTL(PPPOEIOCDFWD)
-/* ppdev */
-COMPATIBLE_IOCTL(PPSETMODE)
-COMPATIBLE_IOCTL(PPRSTATUS)
-COMPATIBLE_IOCTL(PPRCONTROL)
-COMPATIBLE_IOCTL(PPWCONTROL)
-COMPATIBLE_IOCTL(PPFCONTROL)
-COMPATIBLE_IOCTL(PPRDATA)
-COMPATIBLE_IOCTL(PPWDATA)
-COMPATIBLE_IOCTL(PPCLAIM)
-COMPATIBLE_IOCTL(PPRELEASE)
-COMPATIBLE_IOCTL(PPYIELD)
-COMPATIBLE_IOCTL(PPEXCL)
-COMPATIBLE_IOCTL(PPDATADIR)
-COMPATIBLE_IOCTL(PPNEGOT)
-COMPATIBLE_IOCTL(PPWCTLONIRQ)
-COMPATIBLE_IOCTL(PPCLRIRQ)
-COMPATIBLE_IOCTL(PPSETPHASE)
-COMPATIBLE_IOCTL(PPGETMODES)
-COMPATIBLE_IOCTL(PPGETMODE)
-COMPATIBLE_IOCTL(PPGETPHASE)
-COMPATIBLE_IOCTL(PPGETFLAGS)
-COMPATIBLE_IOCTL(PPSETFLAGS)
/* Big A */
/* sparc only */
/* Big Q for sound/OSS */
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index f419519..b51ce67 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -701,23 +701,29 @@ static int populate_groups(struct config_group *group)
{
struct config_group *new_group;
int ret = 0;
- int i;
-
- if (group->default_groups) {
- for (i = 0; group->default_groups[i]; i++) {
- new_group = group->default_groups[i];
- ret = create_default_group(group, new_group);
- if (ret) {
- detach_groups(group);
- break;
- }
+ list_for_each_entry(new_group, &group->default_groups, group_entry) {
+ ret = create_default_group(group, new_group);
+ if (ret) {
+ detach_groups(group);
+ break;
}
}
return ret;
}
+void configfs_remove_default_groups(struct config_group *group)
+{
+ struct config_group *g, *n;
+
+ list_for_each_entry_safe(g, n, &group->default_groups, group_entry) {
+ list_del(&g->group_entry);
+ config_item_put(&g->cg_item);
+ }
+}
+EXPORT_SYMBOL(configfs_remove_default_groups);
+
/*
* All of link_obj/unlink_obj/link_group/unlink_group require that
* subsys->su_mutex is held.
@@ -766,15 +772,10 @@ static void link_obj(struct config_item *parent_item, struct config_item *item)
static void unlink_group(struct config_group *group)
{
- int i;
struct config_group *new_group;
- if (group->default_groups) {
- for (i = 0; group->default_groups[i]; i++) {
- new_group = group->default_groups[i];
- unlink_group(new_group);
- }
- }
+ list_for_each_entry(new_group, &group->default_groups, group_entry)
+ unlink_group(new_group);
group->cg_subsys = NULL;
unlink_obj(&group->cg_item);
@@ -782,7 +783,6 @@ static void unlink_group(struct config_group *group)
static void link_group(struct config_group *parent_group, struct config_group *group)
{
- int i;
struct config_group *new_group;
struct configfs_subsystem *subsys = NULL; /* gcc is a turd */
@@ -796,12 +796,8 @@ static void link_group(struct config_group *parent_group, struct config_group *g
BUG();
group->cg_subsys = subsys;
- if (group->default_groups) {
- for (i = 0; group->default_groups[i]; i++) {
- new_group = group->default_groups[i];
- link_group(group, new_group);
- }
- }
+ list_for_each_entry(new_group, &group->default_groups, group_entry)
+ link_group(group, new_group);
}
/*
diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c
index cee087d..5f24ad3 100644
--- a/fs/configfs/inode.c
+++ b/fs/configfs/inode.c
@@ -75,7 +75,8 @@ int configfs_setattr(struct dentry * dentry, struct iattr * iattr)
sd_iattr->ia_mode = sd->s_mode;
sd_iattr->ia_uid = GLOBAL_ROOT_UID;
sd_iattr->ia_gid = GLOBAL_ROOT_GID;
- sd_iattr->ia_atime = sd_iattr->ia_mtime = sd_iattr->ia_ctime = CURRENT_TIME;
+ sd_iattr->ia_atime = sd_iattr->ia_mtime =
+ sd_iattr->ia_ctime = current_fs_time(inode->i_sb);
sd->s_iattr = sd_iattr;
}
/* attributes were changed atleast once in past */
@@ -111,7 +112,8 @@ int configfs_setattr(struct dentry * dentry, struct iattr * iattr)
static inline void set_default_inode_attr(struct inode * inode, umode_t mode)
{
inode->i_mode = mode;
- inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+ inode->i_atime = inode->i_mtime =
+ inode->i_ctime = current_fs_time(inode->i_sb);
}
static inline void set_inode_attr(struct inode * inode, struct iattr * iattr)
@@ -195,7 +197,7 @@ int configfs_create(struct dentry * dentry, umode_t mode, void (*init)(struct in
return -ENOMEM;
p_inode = d_inode(dentry->d_parent);
- p_inode->i_mtime = p_inode->i_ctime = CURRENT_TIME;
+ p_inode->i_mtime = p_inode->i_ctime = current_fs_time(p_inode->i_sb);
configfs_set_inode_lock_class(sd, inode);
init(inode);
diff --git a/fs/configfs/item.c b/fs/configfs/item.c
index b863a09..8b2a994 100644
--- a/fs/configfs/item.c
+++ b/fs/configfs/item.c
@@ -182,6 +182,7 @@ void config_group_init(struct config_group *group)
{
config_item_init(&group->cg_item);
INIT_LIST_HEAD(&group->cg_children);
+ INIT_LIST_HEAD(&group->default_groups);
}
EXPORT_SYMBOL(config_group_init);
diff --git a/fs/dlm/config.c b/fs/dlm/config.c
index 8e294fb..5191121 100644
--- a/fs/dlm/config.c
+++ b/fs/dlm/config.c
@@ -346,7 +346,6 @@ static struct config_group *make_cluster(struct config_group *g,
void *gps = NULL;
cl = kzalloc(sizeof(struct dlm_cluster), GFP_NOFS);
- gps = kcalloc(3, sizeof(struct config_group *), GFP_NOFS);
sps = kzalloc(sizeof(struct dlm_spaces), GFP_NOFS);
cms = kzalloc(sizeof(struct dlm_comms), GFP_NOFS);
@@ -357,10 +356,8 @@ static struct config_group *make_cluster(struct config_group *g,
config_group_init_type_name(&sps->ss_group, "spaces", &spaces_type);
config_group_init_type_name(&cms->cs_group, "comms", &comms_type);
- cl->group.default_groups = gps;
- cl->group.default_groups[0] = &sps->ss_group;
- cl->group.default_groups[1] = &cms->cs_group;
- cl->group.default_groups[2] = NULL;
+ configfs_add_default_group(&sps->ss_group, &cl->group);
+ configfs_add_default_group(&cms->cs_group, &cl->group);
cl->cl_tcp_port = dlm_config.ci_tcp_port;
cl->cl_buffer_size = dlm_config.ci_buffer_size;
@@ -383,7 +380,6 @@ static struct config_group *make_cluster(struct config_group *g,
fail:
kfree(cl);
- kfree(gps);
kfree(sps);
kfree(cms);
return ERR_PTR(-ENOMEM);
@@ -392,14 +388,8 @@ static struct config_group *make_cluster(struct config_group *g,
static void drop_cluster(struct config_group *g, struct config_item *i)
{
struct dlm_cluster *cl = config_item_to_cluster(i);
- struct config_item *tmp;
- int j;
- for (j = 0; cl->group.default_groups[j]; j++) {
- tmp = &cl->group.default_groups[j]->cg_item;
- cl->group.default_groups[j] = NULL;
- config_item_put(tmp);
- }
+ configfs_remove_default_groups(&cl->group);
space_list = NULL;
comm_list = NULL;
@@ -410,7 +400,6 @@ static void drop_cluster(struct config_group *g, struct config_item *i)
static void release_cluster(struct config_item *i)
{
struct dlm_cluster *cl = config_item_to_cluster(i);
- kfree(cl->group.default_groups);
kfree(cl);
}
@@ -418,21 +407,17 @@ static struct config_group *make_space(struct config_group *g, const char *name)
{
struct dlm_space *sp = NULL;
struct dlm_nodes *nds = NULL;
- void *gps = NULL;
sp = kzalloc(sizeof(struct dlm_space), GFP_NOFS);
- gps = kcalloc(2, sizeof(struct config_group *), GFP_NOFS);
nds = kzalloc(sizeof(struct dlm_nodes), GFP_NOFS);
- if (!sp || !gps || !nds)
+ if (!sp || !nds)
goto fail;
config_group_init_type_name(&sp->group, name, &space_type);
- config_group_init_type_name(&nds->ns_group, "nodes", &nodes_type);
- sp->group.default_groups = gps;
- sp->group.default_groups[0] = &nds->ns_group;
- sp->group.default_groups[1] = NULL;
+ config_group_init_type_name(&nds->ns_group, "nodes", &nodes_type);
+ configfs_add_default_group(&nds->ns_group, &sp->group);
INIT_LIST_HEAD(&sp->members);
mutex_init(&sp->members_lock);
@@ -441,7 +426,6 @@ static struct config_group *make_space(struct config_group *g, const char *name)
fail:
kfree(sp);
- kfree(gps);
kfree(nds);
return ERR_PTR(-ENOMEM);
}
@@ -449,24 +433,16 @@ static struct config_group *make_space(struct config_group *g, const char *name)
static void drop_space(struct config_group *g, struct config_item *i)
{
struct dlm_space *sp = config_item_to_space(i);
- struct config_item *tmp;
- int j;
/* assert list_empty(&sp->members) */
- for (j = 0; sp->group.default_groups[j]; j++) {
- tmp = &sp->group.default_groups[j]->cg_item;
- sp->group.default_groups[j] = NULL;
- config_item_put(tmp);
- }
-
+ configfs_remove_default_groups(&sp->group);
config_item_put(i);
}
static void release_space(struct config_item *i)
{
struct dlm_space *sp = config_item_to_space(i);
- kfree(sp->group.default_groups);
kfree(sp);
}
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 3a37bd3..00640e7 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -124,7 +124,10 @@ struct connection {
struct connection *othercon;
struct work_struct rwork; /* Receive workqueue */
struct work_struct swork; /* Send workqueue */
- void (*orig_error_report)(struct sock *sk);
+ void (*orig_error_report)(struct sock *);
+ void (*orig_data_ready)(struct sock *);
+ void (*orig_state_change)(struct sock *);
+ void (*orig_write_space)(struct sock *);
};
#define sock2con(x) ((struct connection *)(x)->sk_user_data)
@@ -467,16 +470,24 @@ int dlm_lowcomms_connect_node(int nodeid)
static void lowcomms_error_report(struct sock *sk)
{
- struct connection *con = sock2con(sk);
+ struct connection *con;
struct sockaddr_storage saddr;
+ int buflen;
+ void (*orig_report)(struct sock *) = NULL;
- if (nodeid_to_addr(con->nodeid, &saddr, NULL, false)) {
+ read_lock_bh(&sk->sk_callback_lock);
+ con = sock2con(sk);
+ if (con == NULL)
+ goto out;
+
+ orig_report = con->orig_error_report;
+ if (con->sock == NULL ||
+ kernel_getpeername(con->sock, (struct sockaddr *)&saddr, &buflen)) {
printk_ratelimited(KERN_ERR "dlm: node %d: socket error "
"sending to node %d, port %d, "
"sk_err=%d/%d\n", dlm_our_nodeid(),
con->nodeid, dlm_config.ci_tcp_port,
sk->sk_err, sk->sk_err_soft);
- return;
} else if (saddr.ss_family == AF_INET) {
struct sockaddr_in *sin4 = (struct sockaddr_in *)&saddr;
@@ -499,22 +510,54 @@ static void lowcomms_error_report(struct sock *sk)
dlm_config.ci_tcp_port, sk->sk_err,
sk->sk_err_soft);
}
- con->orig_error_report(sk);
+out:
+ read_unlock_bh(&sk->sk_callback_lock);
+ if (orig_report)
+ orig_report(sk);
+}
+
+/* Note: sk_callback_lock must be locked before calling this function. */
+static void save_callbacks(struct connection *con, struct sock *sk)
+{
+ lock_sock(sk);
+ con->orig_data_ready = sk->sk_data_ready;
+ con->orig_state_change = sk->sk_state_change;
+ con->orig_write_space = sk->sk_write_space;
+ con->orig_error_report = sk->sk_error_report;
+ release_sock(sk);
+}
+
+static void restore_callbacks(struct connection *con, struct sock *sk)
+{
+ write_lock_bh(&sk->sk_callback_lock);
+ lock_sock(sk);
+ sk->sk_user_data = NULL;
+ sk->sk_data_ready = con->orig_data_ready;
+ sk->sk_state_change = con->orig_state_change;
+ sk->sk_write_space = con->orig_write_space;
+ sk->sk_error_report = con->orig_error_report;
+ release_sock(sk);
+ write_unlock_bh(&sk->sk_callback_lock);
}
/* Make a socket active */
static void add_sock(struct socket *sock, struct connection *con)
{
+ struct sock *sk = sock->sk;
+
+ write_lock_bh(&sk->sk_callback_lock);
con->sock = sock;
+ sk->sk_user_data = con;
+ if (!test_bit(CF_IS_OTHERCON, &con->flags))
+ save_callbacks(con, sk);
/* Install a data_ready callback */
- con->sock->sk->sk_data_ready = lowcomms_data_ready;
- con->sock->sk->sk_write_space = lowcomms_write_space;
- con->sock->sk->sk_state_change = lowcomms_state_change;
- con->sock->sk->sk_user_data = con;
- con->sock->sk->sk_allocation = GFP_NOFS;
- con->orig_error_report = con->sock->sk->sk_error_report;
- con->sock->sk->sk_error_report = lowcomms_error_report;
+ sk->sk_data_ready = lowcomms_data_ready;
+ sk->sk_write_space = lowcomms_write_space;
+ sk->sk_state_change = lowcomms_state_change;
+ sk->sk_allocation = GFP_NOFS;
+ sk->sk_error_report = lowcomms_error_report;
+ write_unlock_bh(&sk->sk_callback_lock);
}
/* Add the port number to an IPv6 or 4 sockaddr and return the address
@@ -549,6 +592,8 @@ static void close_connection(struct connection *con, bool and_other,
mutex_lock(&con->sock_mutex);
if (con->sock) {
+ if (!test_bit(CF_IS_OTHERCON, &con->flags))
+ restore_callbacks(con, con->sock->sk);
sock_release(con->sock);
con->sock = NULL;
}
@@ -1190,6 +1235,8 @@ static struct socket *tcp_create_listen_sock(struct connection *con,
if (result < 0) {
log_print("Failed to set SO_REUSEADDR on socket: %d", result);
}
+ sock->sk->sk_user_data = con;
+
con->rx_action = tcp_accept_from_sock;
con->connect_action = tcp_connect_to_sock;
@@ -1271,6 +1318,7 @@ static int sctp_listen_for_all(void)
if (result < 0)
log_print("Could not set SCTP NODELAY error %d\n", result);
+ write_lock_bh(&sock->sk->sk_callback_lock);
/* Init con struct */
sock->sk->sk_user_data = con;
con->sock = sock;
@@ -1278,6 +1326,8 @@ static int sctp_listen_for_all(void)
con->rx_action = sctp_accept_from_sock;
con->connect_action = sctp_connect_to_sock;
+ write_unlock_bh(&sock->sk->sk_callback_lock);
+
/* Bind to all addresses. */
if (sctp_bind_addrs(con, dlm_config.ci_tcp_port))
goto create_delsock;
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index 80d6901..11255cb 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -23,6 +23,8 @@
* 02111-1307, USA.
*/
+#include <crypto/hash.h>
+#include <crypto/skcipher.h>
#include <linux/fs.h>
#include <linux/mount.h>
#include <linux/pagemap.h>
@@ -30,7 +32,6 @@
#include <linux/compiler.h>
#include <linux/key.h>
#include <linux/namei.h>
-#include <linux/crypto.h>
#include <linux/file.h>
#include <linux/scatterlist.h>
#include <linux/slab.h>
@@ -74,6 +75,19 @@ void ecryptfs_from_hex(char *dst, char *src, int dst_size)
}
}
+static int ecryptfs_hash_digest(struct crypto_shash *tfm,
+ char *src, int len, char *dst)
+{
+ SHASH_DESC_ON_STACK(desc, tfm);
+ int err;
+
+ desc->tfm = tfm;
+ desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP;
+ err = crypto_shash_digest(desc, src, len, dst);
+ shash_desc_zero(desc);
+ return err;
+}
+
/**
* ecryptfs_calculate_md5 - calculates the md5 of @src
* @dst: Pointer to 16 bytes of allocated memory
@@ -88,45 +102,26 @@ static int ecryptfs_calculate_md5(char *dst,
struct ecryptfs_crypt_stat *crypt_stat,
char *src, int len)
{
- struct scatterlist sg;
- struct hash_desc desc = {
- .tfm = crypt_stat->hash_tfm,
- .flags = CRYPTO_TFM_REQ_MAY_SLEEP
- };
+ struct crypto_shash *tfm;
int rc = 0;
mutex_lock(&crypt_stat->cs_hash_tfm_mutex);
- sg_init_one(&sg, (u8 *)src, len);
- if (!desc.tfm) {
- desc.tfm = crypto_alloc_hash(ECRYPTFS_DEFAULT_HASH, 0,
- CRYPTO_ALG_ASYNC);
- if (IS_ERR(desc.tfm)) {
- rc = PTR_ERR(desc.tfm);
+ tfm = crypt_stat->hash_tfm;
+ if (!tfm) {
+ tfm = crypto_alloc_shash(ECRYPTFS_DEFAULT_HASH, 0, 0);
+ if (IS_ERR(tfm)) {
+ rc = PTR_ERR(tfm);
ecryptfs_printk(KERN_ERR, "Error attempting to "
"allocate crypto context; rc = [%d]\n",
rc);
goto out;
}
- crypt_stat->hash_tfm = desc.tfm;
- }
- rc = crypto_hash_init(&desc);
- if (rc) {
- printk(KERN_ERR
- "%s: Error initializing crypto hash; rc = [%d]\n",
- __func__, rc);
- goto out;
+ crypt_stat->hash_tfm = tfm;
}
- rc = crypto_hash_update(&desc, &sg, len);
+ rc = ecryptfs_hash_digest(tfm, src, len, dst);
if (rc) {
printk(KERN_ERR
- "%s: Error updating crypto hash; rc = [%d]\n",
- __func__, rc);
- goto out;
- }
- rc = crypto_hash_final(&desc, dst);
- if (rc) {
- printk(KERN_ERR
- "%s: Error finalizing crypto hash; rc = [%d]\n",
+ "%s: Error computing crypto hash; rc = [%d]\n",
__func__, rc);
goto out;
}
@@ -234,10 +229,8 @@ void ecryptfs_destroy_crypt_stat(struct ecryptfs_crypt_stat *crypt_stat)
{
struct ecryptfs_key_sig *key_sig, *key_sig_tmp;
- if (crypt_stat->tfm)
- crypto_free_ablkcipher(crypt_stat->tfm);
- if (crypt_stat->hash_tfm)
- crypto_free_hash(crypt_stat->hash_tfm);
+ crypto_free_skcipher(crypt_stat->tfm);
+ crypto_free_shash(crypt_stat->hash_tfm);
list_for_each_entry_safe(key_sig, key_sig_tmp,
&crypt_stat->keysig_list, crypt_stat_list) {
list_del(&key_sig->crypt_stat_list);
@@ -342,7 +335,7 @@ static int crypt_scatterlist(struct ecryptfs_crypt_stat *crypt_stat,
struct scatterlist *src_sg, int size,
unsigned char *iv, int op)
{
- struct ablkcipher_request *req = NULL;
+ struct skcipher_request *req = NULL;
struct extent_crypt_result ecr;
int rc = 0;
@@ -358,20 +351,20 @@ static int crypt_scatterlist(struct ecryptfs_crypt_stat *crypt_stat,
init_completion(&ecr.completion);
mutex_lock(&crypt_stat->cs_tfm_mutex);
- req = ablkcipher_request_alloc(crypt_stat->tfm, GFP_NOFS);
+ req = skcipher_request_alloc(crypt_stat->tfm, GFP_NOFS);
if (!req) {
mutex_unlock(&crypt_stat->cs_tfm_mutex);
rc = -ENOMEM;
goto out;
}
- ablkcipher_request_set_callback(req,
+ skcipher_request_set_callback(req,
CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
extent_crypt_complete, &ecr);
/* Consider doing this once, when the file is opened */
if (!(crypt_stat->flags & ECRYPTFS_KEY_SET)) {
- rc = crypto_ablkcipher_setkey(crypt_stat->tfm, crypt_stat->key,
- crypt_stat->key_size);
+ rc = crypto_skcipher_setkey(crypt_stat->tfm, crypt_stat->key,
+ crypt_stat->key_size);
if (rc) {
ecryptfs_printk(KERN_ERR,
"Error setting key; rc = [%d]\n",
@@ -383,9 +376,9 @@ static int crypt_scatterlist(struct ecryptfs_crypt_stat *crypt_stat,
crypt_stat->flags |= ECRYPTFS_KEY_SET;
}
mutex_unlock(&crypt_stat->cs_tfm_mutex);
- ablkcipher_request_set_crypt(req, src_sg, dst_sg, size, iv);
- rc = op == ENCRYPT ? crypto_ablkcipher_encrypt(req) :
- crypto_ablkcipher_decrypt(req);
+ skcipher_request_set_crypt(req, src_sg, dst_sg, size, iv);
+ rc = op == ENCRYPT ? crypto_skcipher_encrypt(req) :
+ crypto_skcipher_decrypt(req);
if (rc == -EINPROGRESS || rc == -EBUSY) {
struct extent_crypt_result *ecr = req->base.data;
@@ -394,7 +387,7 @@ static int crypt_scatterlist(struct ecryptfs_crypt_stat *crypt_stat,
reinit_completion(&ecr->completion);
}
out:
- ablkcipher_request_free(req);
+ skcipher_request_free(req);
return rc;
}
@@ -622,7 +615,7 @@ int ecryptfs_init_crypt_ctx(struct ecryptfs_crypt_stat *crypt_stat)
crypt_stat->cipher, "cbc");
if (rc)
goto out_unlock;
- crypt_stat->tfm = crypto_alloc_ablkcipher(full_alg_name, 0, 0);
+ crypt_stat->tfm = crypto_alloc_skcipher(full_alg_name, 0, 0);
if (IS_ERR(crypt_stat->tfm)) {
rc = PTR_ERR(crypt_stat->tfm);
crypt_stat->tfm = NULL;
@@ -631,7 +624,7 @@ int ecryptfs_init_crypt_ctx(struct ecryptfs_crypt_stat *crypt_stat)
full_alg_name);
goto out_free;
}
- crypto_ablkcipher_set_flags(crypt_stat->tfm, CRYPTO_TFM_REQ_WEAK_KEY);
+ crypto_skcipher_set_flags(crypt_stat->tfm, CRYPTO_TFM_REQ_WEAK_KEY);
rc = 0;
out_free:
kfree(full_alg_name);
@@ -1591,7 +1584,7 @@ out:
* event, regardless of whether this function succeeds for fails.
*/
static int
-ecryptfs_process_key_cipher(struct crypto_blkcipher **key_tfm,
+ecryptfs_process_key_cipher(struct crypto_skcipher **key_tfm,
char *cipher_name, size_t *key_size)
{
char dummy_key[ECRYPTFS_MAX_KEY_BYTES];
@@ -1609,21 +1602,18 @@ ecryptfs_process_key_cipher(struct crypto_blkcipher **key_tfm,
"ecb");
if (rc)
goto out;
- *key_tfm = crypto_alloc_blkcipher(full_alg_name, 0, CRYPTO_ALG_ASYNC);
+ *key_tfm = crypto_alloc_skcipher(full_alg_name, 0, CRYPTO_ALG_ASYNC);
if (IS_ERR(*key_tfm)) {
rc = PTR_ERR(*key_tfm);
printk(KERN_ERR "Unable to allocate crypto cipher with name "
"[%s]; rc = [%d]\n", full_alg_name, rc);
goto out;
}
- crypto_blkcipher_set_flags(*key_tfm, CRYPTO_TFM_REQ_WEAK_KEY);
- if (*key_size == 0) {
- struct blkcipher_alg *alg = crypto_blkcipher_alg(*key_tfm);
-
- *key_size = alg->max_keysize;
- }
+ crypto_skcipher_set_flags(*key_tfm, CRYPTO_TFM_REQ_WEAK_KEY);
+ if (*key_size == 0)
+ *key_size = crypto_skcipher_default_keysize(*key_tfm);
get_random_bytes(dummy_key, *key_size);
- rc = crypto_blkcipher_setkey(*key_tfm, dummy_key, *key_size);
+ rc = crypto_skcipher_setkey(*key_tfm, dummy_key, *key_size);
if (rc) {
printk(KERN_ERR "Error attempting to set key of size [%zd] for "
"cipher [%s]; rc = [%d]\n", *key_size, full_alg_name,
@@ -1660,8 +1650,7 @@ int ecryptfs_destroy_crypto(void)
list_for_each_entry_safe(key_tfm, key_tfm_tmp, &key_tfm_list,
key_tfm_list) {
list_del(&key_tfm->key_tfm_list);
- if (key_tfm->key_tfm)
- crypto_free_blkcipher(key_tfm->key_tfm);
+ crypto_free_skcipher(key_tfm->key_tfm);
kmem_cache_free(ecryptfs_key_tfm_cache, key_tfm);
}
mutex_unlock(&key_tfm_list_mutex);
@@ -1747,7 +1736,7 @@ int ecryptfs_tfm_exists(char *cipher_name, struct ecryptfs_key_tfm **key_tfm)
* Searches for cached item first, and creates new if not found.
* Returns 0 on success, non-zero if adding new cipher failed
*/
-int ecryptfs_get_tfm_and_mutex_for_cipher_name(struct crypto_blkcipher **tfm,
+int ecryptfs_get_tfm_and_mutex_for_cipher_name(struct crypto_skcipher **tfm,
struct mutex **tfm_mutex,
char *cipher_name)
{
@@ -2120,7 +2109,7 @@ out:
int ecryptfs_set_f_namelen(long *namelen, long lower_namelen,
struct ecryptfs_mount_crypt_stat *mount_crypt_stat)
{
- struct blkcipher_desc desc;
+ struct crypto_skcipher *tfm;
struct mutex *tfm_mutex;
size_t cipher_blocksize;
int rc;
@@ -2130,7 +2119,7 @@ int ecryptfs_set_f_namelen(long *namelen, long lower_namelen,
return 0;
}
- rc = ecryptfs_get_tfm_and_mutex_for_cipher_name(&desc.tfm, &tfm_mutex,
+ rc = ecryptfs_get_tfm_and_mutex_for_cipher_name(&tfm, &tfm_mutex,
mount_crypt_stat->global_default_fn_cipher_name);
if (unlikely(rc)) {
(*namelen) = 0;
@@ -2138,7 +2127,7 @@ int ecryptfs_set_f_namelen(long *namelen, long lower_namelen,
}
mutex_lock(tfm_mutex);
- cipher_blocksize = crypto_blkcipher_blocksize(desc.tfm);
+ cipher_blocksize = crypto_skcipher_blocksize(tfm);
mutex_unlock(tfm_mutex);
/* Return an exact amount for the common cases */
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index 7b39260..b7f8128 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -28,6 +28,7 @@
#ifndef ECRYPTFS_KERNEL_H
#define ECRYPTFS_KERNEL_H
+#include <crypto/skcipher.h>
#include <keys/user-type.h>
#include <keys/encrypted-type.h>
#include <linux/fs.h>
@@ -38,7 +39,6 @@
#include <linux/nsproxy.h>
#include <linux/backing-dev.h>
#include <linux/ecryptfs.h>
-#include <linux/crypto.h>
#define ECRYPTFS_DEFAULT_IV_BYTES 16
#define ECRYPTFS_DEFAULT_EXTENT_SIZE 4096
@@ -233,9 +233,9 @@ struct ecryptfs_crypt_stat {
size_t extent_shift;
unsigned int extent_mask;
struct ecryptfs_mount_crypt_stat *mount_crypt_stat;
- struct crypto_ablkcipher *tfm;
- struct crypto_hash *hash_tfm; /* Crypto context for generating
- * the initialization vectors */
+ struct crypto_skcipher *tfm;
+ struct crypto_shash *hash_tfm; /* Crypto context for generating
+ * the initialization vectors */
unsigned char cipher[ECRYPTFS_MAX_CIPHER_NAME_SIZE + 1];
unsigned char key[ECRYPTFS_MAX_KEY_BYTES];
unsigned char root_iv[ECRYPTFS_MAX_IV_BYTES];
@@ -309,7 +309,7 @@ struct ecryptfs_global_auth_tok {
* keeps a list of crypto API contexts around to use when needed.
*/
struct ecryptfs_key_tfm {
- struct crypto_blkcipher *key_tfm;
+ struct crypto_skcipher *key_tfm;
size_t key_size;
struct mutex key_tfm_mutex;
struct list_head key_tfm_list;
@@ -659,7 +659,7 @@ ecryptfs_add_new_key_tfm(struct ecryptfs_key_tfm **key_tfm, char *cipher_name,
int ecryptfs_init_crypto(void);
int ecryptfs_destroy_crypto(void);
int ecryptfs_tfm_exists(char *cipher_name, struct ecryptfs_key_tfm **key_tfm);
-int ecryptfs_get_tfm_and_mutex_for_cipher_name(struct crypto_blkcipher **tfm,
+int ecryptfs_get_tfm_and_mutex_for_cipher_name(struct crypto_skcipher **tfm,
struct mutex **tfm_mutex,
char *cipher_name);
int ecryptfs_keyring_auth_tok_for_sig(struct key **auth_tok_key,
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 4e685ac..0a8f1b4 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -29,7 +29,6 @@
#include <linux/dcache.h>
#include <linux/namei.h>
#include <linux/mount.h>
-#include <linux/crypto.h>
#include <linux/fs_stack.h>
#include <linux/slab.h>
#include <linux/xattr.h>
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c
index 6bd67e2..c5c84df 100644
--- a/fs/ecryptfs/keystore.c
+++ b/fs/ecryptfs/keystore.c
@@ -25,11 +25,12 @@
* 02111-1307, USA.
*/
+#include <crypto/hash.h>
+#include <crypto/skcipher.h>
#include <linux/string.h>
#include <linux/pagemap.h>
#include <linux/key.h>
#include <linux/random.h>
-#include <linux/crypto.h>
#include <linux/scatterlist.h>
#include <linux/slab.h>
#include "ecryptfs_kernel.h"
@@ -601,12 +602,13 @@ struct ecryptfs_write_tag_70_packet_silly_stack {
struct ecryptfs_auth_tok *auth_tok;
struct scatterlist src_sg[2];
struct scatterlist dst_sg[2];
- struct blkcipher_desc desc;
+ struct crypto_skcipher *skcipher_tfm;
+ struct skcipher_request *skcipher_req;
char iv[ECRYPTFS_MAX_IV_BYTES];
char hash[ECRYPTFS_TAG_70_DIGEST_SIZE];
char tmp_hash[ECRYPTFS_TAG_70_DIGEST_SIZE];
- struct hash_desc hash_desc;
- struct scatterlist hash_sg;
+ struct crypto_shash *hash_tfm;
+ struct shash_desc *hash_desc;
};
/**
@@ -629,14 +631,13 @@ ecryptfs_write_tag_70_packet(char *dest, size_t *remaining_bytes,
struct key *auth_tok_key = NULL;
int rc = 0;
- s = kmalloc(sizeof(*s), GFP_KERNEL);
+ s = kzalloc(sizeof(*s), GFP_KERNEL);
if (!s) {
printk(KERN_ERR "%s: Out of memory whilst trying to kmalloc "
"[%zd] bytes of kernel memory\n", __func__, sizeof(*s));
rc = -ENOMEM;
goto out;
}
- s->desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP;
(*packet_size) = 0;
rc = ecryptfs_find_auth_tok_for_sig(
&auth_tok_key,
@@ -649,7 +650,7 @@ ecryptfs_write_tag_70_packet(char *dest, size_t *remaining_bytes,
goto out;
}
rc = ecryptfs_get_tfm_and_mutex_for_cipher_name(
- &s->desc.tfm,
+ &s->skcipher_tfm,
&s->tfm_mutex, mount_crypt_stat->global_default_fn_cipher_name);
if (unlikely(rc)) {
printk(KERN_ERR "Internal error whilst attempting to get "
@@ -658,7 +659,7 @@ ecryptfs_write_tag_70_packet(char *dest, size_t *remaining_bytes,
goto out;
}
mutex_lock(s->tfm_mutex);
- s->block_size = crypto_blkcipher_blocksize(s->desc.tfm);
+ s->block_size = crypto_skcipher_blocksize(s->skcipher_tfm);
/* Plus one for the \0 separator between the random prefix
* and the plaintext filename */
s->num_rand_bytes = (ECRYPTFS_FILENAME_MIN_RANDOM_PREPEND_BYTES + 1);
@@ -691,6 +692,19 @@ ecryptfs_write_tag_70_packet(char *dest, size_t *remaining_bytes,
rc = -EINVAL;
goto out_unlock;
}
+
+ s->skcipher_req = skcipher_request_alloc(s->skcipher_tfm, GFP_KERNEL);
+ if (!s->skcipher_req) {
+ printk(KERN_ERR "%s: Out of kernel memory whilst attempting to "
+ "skcipher_request_alloc for %s\n", __func__,
+ crypto_skcipher_driver_name(s->skcipher_tfm));
+ rc = -ENOMEM;
+ goto out_unlock;
+ }
+
+ skcipher_request_set_callback(s->skcipher_req,
+ CRYPTO_TFM_REQ_MAY_SLEEP, NULL, NULL);
+
s->block_aligned_filename = kzalloc(s->block_aligned_filename_size,
GFP_KERNEL);
if (!s->block_aligned_filename) {
@@ -700,7 +714,6 @@ ecryptfs_write_tag_70_packet(char *dest, size_t *remaining_bytes,
rc = -ENOMEM;
goto out_unlock;
}
- s->i = 0;
dest[s->i++] = ECRYPTFS_TAG_70_PACKET_TYPE;
rc = ecryptfs_write_packet_length(&dest[s->i],
(ECRYPTFS_SIG_SIZE
@@ -738,40 +751,36 @@ ecryptfs_write_tag_70_packet(char *dest, size_t *remaining_bytes,
"password tokens\n", __func__);
goto out_free_unlock;
}
- sg_init_one(
- &s->hash_sg,
- (u8 *)s->auth_tok->token.password.session_key_encryption_key,
- s->auth_tok->token.password.session_key_encryption_key_bytes);
- s->hash_desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP;
- s->hash_desc.tfm = crypto_alloc_hash(ECRYPTFS_TAG_70_DIGEST, 0,
- CRYPTO_ALG_ASYNC);
- if (IS_ERR(s->hash_desc.tfm)) {
- rc = PTR_ERR(s->hash_desc.tfm);
+ s->hash_tfm = crypto_alloc_shash(ECRYPTFS_TAG_70_DIGEST, 0, 0);
+ if (IS_ERR(s->hash_tfm)) {
+ rc = PTR_ERR(s->hash_tfm);
printk(KERN_ERR "%s: Error attempting to "
"allocate hash crypto context; rc = [%d]\n",
__func__, rc);
goto out_free_unlock;
}
- rc = crypto_hash_init(&s->hash_desc);
- if (rc) {
- printk(KERN_ERR
- "%s: Error initializing crypto hash; rc = [%d]\n",
- __func__, rc);
- goto out_release_free_unlock;
- }
- rc = crypto_hash_update(
- &s->hash_desc, &s->hash_sg,
- s->auth_tok->token.password.session_key_encryption_key_bytes);
- if (rc) {
- printk(KERN_ERR
- "%s: Error updating crypto hash; rc = [%d]\n",
- __func__, rc);
+
+ s->hash_desc = kmalloc(sizeof(*s->hash_desc) +
+ crypto_shash_descsize(s->hash_tfm), GFP_KERNEL);
+ if (!s->hash_desc) {
+ printk(KERN_ERR "%s: Out of kernel memory whilst attempting to "
+ "kmalloc [%zd] bytes\n", __func__,
+ sizeof(*s->hash_desc) +
+ crypto_shash_descsize(s->hash_tfm));
+ rc = -ENOMEM;
goto out_release_free_unlock;
}
- rc = crypto_hash_final(&s->hash_desc, s->hash);
+
+ s->hash_desc->tfm = s->hash_tfm;
+ s->hash_desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP;
+
+ rc = crypto_shash_digest(s->hash_desc,
+ (u8 *)s->auth_tok->token.password.session_key_encryption_key,
+ s->auth_tok->token.password.session_key_encryption_key_bytes,
+ s->hash);
if (rc) {
printk(KERN_ERR
- "%s: Error finalizing crypto hash; rc = [%d]\n",
+ "%s: Error computing crypto hash; rc = [%d]\n",
__func__, rc);
goto out_release_free_unlock;
}
@@ -780,27 +789,12 @@ ecryptfs_write_tag_70_packet(char *dest, size_t *remaining_bytes,
s->hash[(s->j % ECRYPTFS_TAG_70_DIGEST_SIZE)];
if ((s->j % ECRYPTFS_TAG_70_DIGEST_SIZE)
== (ECRYPTFS_TAG_70_DIGEST_SIZE - 1)) {
- sg_init_one(&s->hash_sg, (u8 *)s->hash,
- ECRYPTFS_TAG_70_DIGEST_SIZE);
- rc = crypto_hash_init(&s->hash_desc);
- if (rc) {
- printk(KERN_ERR
- "%s: Error initializing crypto hash; "
- "rc = [%d]\n", __func__, rc);
- goto out_release_free_unlock;
- }
- rc = crypto_hash_update(&s->hash_desc, &s->hash_sg,
- ECRYPTFS_TAG_70_DIGEST_SIZE);
+ rc = crypto_shash_digest(s->hash_desc, (u8 *)s->hash,
+ ECRYPTFS_TAG_70_DIGEST_SIZE,
+ s->tmp_hash);
if (rc) {
printk(KERN_ERR
- "%s: Error updating crypto hash; "
- "rc = [%d]\n", __func__, rc);
- goto out_release_free_unlock;
- }
- rc = crypto_hash_final(&s->hash_desc, s->tmp_hash);
- if (rc) {
- printk(KERN_ERR
- "%s: Error finalizing crypto hash; "
+ "%s: Error computing crypto hash; "
"rc = [%d]\n", __func__, rc);
goto out_release_free_unlock;
}
@@ -834,10 +828,8 @@ ecryptfs_write_tag_70_packet(char *dest, size_t *remaining_bytes,
* of the IV here, so we just use 0's for the IV. Note the
* constraint that ECRYPTFS_FILENAME_MIN_RANDOM_PREPEND_BYTES
* >= ECRYPTFS_MAX_IV_BYTES. */
- memset(s->iv, 0, ECRYPTFS_MAX_IV_BYTES);
- s->desc.info = s->iv;
- rc = crypto_blkcipher_setkey(
- s->desc.tfm,
+ rc = crypto_skcipher_setkey(
+ s->skcipher_tfm,
s->auth_tok->token.password.session_key_encryption_key,
mount_crypt_stat->global_default_fn_cipher_key_bytes);
if (rc < 0) {
@@ -850,8 +842,9 @@ ecryptfs_write_tag_70_packet(char *dest, size_t *remaining_bytes,
mount_crypt_stat->global_default_fn_cipher_key_bytes);
goto out_release_free_unlock;
}
- rc = crypto_blkcipher_encrypt_iv(&s->desc, s->dst_sg, s->src_sg,
- s->block_aligned_filename_size);
+ skcipher_request_set_crypt(s->skcipher_req, s->src_sg, s->dst_sg,
+ s->block_aligned_filename_size, s->iv);
+ rc = crypto_skcipher_encrypt(s->skcipher_req);
if (rc) {
printk(KERN_ERR "%s: Error attempting to encrypt filename; "
"rc = [%d]\n", __func__, rc);
@@ -861,7 +854,7 @@ ecryptfs_write_tag_70_packet(char *dest, size_t *remaining_bytes,
(*packet_size) = s->i;
(*remaining_bytes) -= (*packet_size);
out_release_free_unlock:
- crypto_free_hash(s->hash_desc.tfm);
+ crypto_free_shash(s->hash_tfm);
out_free_unlock:
kzfree(s->block_aligned_filename);
out_unlock:
@@ -871,6 +864,8 @@ out:
up_write(&(auth_tok_key->sem));
key_put(auth_tok_key);
}
+ skcipher_request_free(s->skcipher_req);
+ kzfree(s->hash_desc);
kfree(s);
return rc;
}
@@ -888,7 +883,8 @@ struct ecryptfs_parse_tag_70_packet_silly_stack {
struct ecryptfs_auth_tok *auth_tok;
struct scatterlist src_sg[2];
struct scatterlist dst_sg[2];
- struct blkcipher_desc desc;
+ struct crypto_skcipher *skcipher_tfm;
+ struct skcipher_request *skcipher_req;
char fnek_sig_hex[ECRYPTFS_SIG_SIZE_HEX + 1];
char iv[ECRYPTFS_MAX_IV_BYTES];
char cipher_string[ECRYPTFS_MAX_CIPHER_NAME_SIZE + 1];
@@ -922,14 +918,13 @@ ecryptfs_parse_tag_70_packet(char **filename, size_t *filename_size,
(*packet_size) = 0;
(*filename_size) = 0;
(*filename) = NULL;
- s = kmalloc(sizeof(*s), GFP_KERNEL);
+ s = kzalloc(sizeof(*s), GFP_KERNEL);
if (!s) {
printk(KERN_ERR "%s: Out of memory whilst trying to kmalloc "
"[%zd] bytes of kernel memory\n", __func__, sizeof(*s));
rc = -ENOMEM;
goto out;
}
- s->desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP;
if (max_packet_size < ECRYPTFS_TAG_70_MIN_METADATA_SIZE) {
printk(KERN_WARNING "%s: max_packet_size is [%zd]; it must be "
"at least [%d]\n", __func__, max_packet_size,
@@ -992,7 +987,7 @@ ecryptfs_parse_tag_70_packet(char **filename, size_t *filename_size,
rc);
goto out;
}
- rc = ecryptfs_get_tfm_and_mutex_for_cipher_name(&s->desc.tfm,
+ rc = ecryptfs_get_tfm_and_mutex_for_cipher_name(&s->skcipher_tfm,
&s->tfm_mutex,
s->cipher_string);
if (unlikely(rc)) {
@@ -1030,12 +1025,23 @@ ecryptfs_parse_tag_70_packet(char **filename, size_t *filename_size,
__func__, rc, s->block_aligned_filename_size);
goto out_free_unlock;
}
+
+ s->skcipher_req = skcipher_request_alloc(s->skcipher_tfm, GFP_KERNEL);
+ if (!s->skcipher_req) {
+ printk(KERN_ERR "%s: Out of kernel memory whilst attempting to "
+ "skcipher_request_alloc for %s\n", __func__,
+ crypto_skcipher_driver_name(s->skcipher_tfm));
+ rc = -ENOMEM;
+ goto out_free_unlock;
+ }
+
+ skcipher_request_set_callback(s->skcipher_req,
+ CRYPTO_TFM_REQ_MAY_SLEEP, NULL, NULL);
+
/* The characters in the first block effectively do the job of
* the IV here, so we just use 0's for the IV. Note the
* constraint that ECRYPTFS_FILENAME_MIN_RANDOM_PREPEND_BYTES
* >= ECRYPTFS_MAX_IV_BYTES. */
- memset(s->iv, 0, ECRYPTFS_MAX_IV_BYTES);
- s->desc.info = s->iv;
/* TODO: Support other key modules than passphrase for
* filename encryption */
if (s->auth_tok->token_type != ECRYPTFS_PASSWORD) {
@@ -1044,8 +1050,8 @@ ecryptfs_parse_tag_70_packet(char **filename, size_t *filename_size,
"password tokens\n", __func__);
goto out_free_unlock;
}
- rc = crypto_blkcipher_setkey(
- s->desc.tfm,
+ rc = crypto_skcipher_setkey(
+ s->skcipher_tfm,
s->auth_tok->token.password.session_key_encryption_key,
mount_crypt_stat->global_default_fn_cipher_key_bytes);
if (rc < 0) {
@@ -1058,14 +1064,14 @@ ecryptfs_parse_tag_70_packet(char **filename, size_t *filename_size,
mount_crypt_stat->global_default_fn_cipher_key_bytes);
goto out_free_unlock;
}
- rc = crypto_blkcipher_decrypt_iv(&s->desc, s->dst_sg, s->src_sg,
- s->block_aligned_filename_size);
+ skcipher_request_set_crypt(s->skcipher_req, s->src_sg, s->dst_sg,
+ s->block_aligned_filename_size, s->iv);
+ rc = crypto_skcipher_decrypt(s->skcipher_req);
if (rc) {
printk(KERN_ERR "%s: Error attempting to decrypt filename; "
"rc = [%d]\n", __func__, rc);
goto out_free_unlock;
}
- s->i = 0;
while (s->decrypted_filename[s->i] != '\0'
&& s->i < s->block_aligned_filename_size)
s->i++;
@@ -1108,6 +1114,7 @@ out:
up_write(&(auth_tok_key->sem));
key_put(auth_tok_key);
}
+ skcipher_request_free(s->skcipher_req);
kfree(s);
return rc;
}
@@ -1667,9 +1674,8 @@ decrypt_passphrase_encrypted_session_key(struct ecryptfs_auth_tok *auth_tok,
struct scatterlist dst_sg[2];
struct scatterlist src_sg[2];
struct mutex *tfm_mutex;
- struct blkcipher_desc desc = {
- .flags = CRYPTO_TFM_REQ_MAY_SLEEP
- };
+ struct crypto_skcipher *tfm;
+ struct skcipher_request *req = NULL;
int rc = 0;
if (unlikely(ecryptfs_verbosity > 0)) {
@@ -1680,7 +1686,7 @@ decrypt_passphrase_encrypted_session_key(struct ecryptfs_auth_tok *auth_tok,
auth_tok->token.password.session_key_encryption_key,
auth_tok->token.password.session_key_encryption_key_bytes);
}
- rc = ecryptfs_get_tfm_and_mutex_for_cipher_name(&desc.tfm, &tfm_mutex,
+ rc = ecryptfs_get_tfm_and_mutex_for_cipher_name(&tfm, &tfm_mutex,
crypt_stat->cipher);
if (unlikely(rc)) {
printk(KERN_ERR "Internal error whilst attempting to get "
@@ -1711,8 +1717,20 @@ decrypt_passphrase_encrypted_session_key(struct ecryptfs_auth_tok *auth_tok,
goto out;
}
mutex_lock(tfm_mutex);
- rc = crypto_blkcipher_setkey(
- desc.tfm, auth_tok->token.password.session_key_encryption_key,
+ req = skcipher_request_alloc(tfm, GFP_KERNEL);
+ if (!req) {
+ mutex_unlock(tfm_mutex);
+ printk(KERN_ERR "%s: Out of kernel memory whilst attempting to "
+ "skcipher_request_alloc for %s\n", __func__,
+ crypto_skcipher_driver_name(tfm));
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP,
+ NULL, NULL);
+ rc = crypto_skcipher_setkey(
+ tfm, auth_tok->token.password.session_key_encryption_key,
crypt_stat->key_size);
if (unlikely(rc < 0)) {
mutex_unlock(tfm_mutex);
@@ -1720,8 +1738,10 @@ decrypt_passphrase_encrypted_session_key(struct ecryptfs_auth_tok *auth_tok,
rc = -EINVAL;
goto out;
}
- rc = crypto_blkcipher_decrypt(&desc, dst_sg, src_sg,
- auth_tok->session_key.encrypted_key_size);
+ skcipher_request_set_crypt(req, src_sg, dst_sg,
+ auth_tok->session_key.encrypted_key_size,
+ NULL);
+ rc = crypto_skcipher_decrypt(req);
mutex_unlock(tfm_mutex);
if (unlikely(rc)) {
printk(KERN_ERR "Error decrypting; rc = [%d]\n", rc);
@@ -1738,6 +1758,7 @@ decrypt_passphrase_encrypted_session_key(struct ecryptfs_auth_tok *auth_tok,
crypt_stat->key_size);
}
out:
+ skcipher_request_free(req);
return rc;
}
@@ -2191,16 +2212,14 @@ write_tag_3_packet(char *dest, size_t *remaining_bytes,
size_t max_packet_size;
struct ecryptfs_mount_crypt_stat *mount_crypt_stat =
crypt_stat->mount_crypt_stat;
- struct blkcipher_desc desc = {
- .tfm = NULL,
- .flags = CRYPTO_TFM_REQ_MAY_SLEEP
- };
+ struct crypto_skcipher *tfm;
+ struct skcipher_request *req;
int rc = 0;
(*packet_size) = 0;
ecryptfs_from_hex(key_rec->sig, auth_tok->token.password.signature,
ECRYPTFS_SIG_SIZE);
- rc = ecryptfs_get_tfm_and_mutex_for_cipher_name(&desc.tfm, &tfm_mutex,
+ rc = ecryptfs_get_tfm_and_mutex_for_cipher_name(&tfm, &tfm_mutex,
crypt_stat->cipher);
if (unlikely(rc)) {
printk(KERN_ERR "Internal error whilst attempting to get "
@@ -2209,12 +2228,11 @@ write_tag_3_packet(char *dest, size_t *remaining_bytes,
goto out;
}
if (mount_crypt_stat->global_default_cipher_key_size == 0) {
- struct blkcipher_alg *alg = crypto_blkcipher_alg(desc.tfm);
-
printk(KERN_WARNING "No key size specified at mount; "
- "defaulting to [%d]\n", alg->max_keysize);
+ "defaulting to [%d]\n",
+ crypto_skcipher_default_keysize(tfm));
mount_crypt_stat->global_default_cipher_key_size =
- alg->max_keysize;
+ crypto_skcipher_default_keysize(tfm);
}
if (crypt_stat->key_size == 0)
crypt_stat->key_size =
@@ -2284,20 +2302,36 @@ write_tag_3_packet(char *dest, size_t *remaining_bytes,
goto out;
}
mutex_lock(tfm_mutex);
- rc = crypto_blkcipher_setkey(desc.tfm, session_key_encryption_key,
- crypt_stat->key_size);
+ rc = crypto_skcipher_setkey(tfm, session_key_encryption_key,
+ crypt_stat->key_size);
if (rc < 0) {
mutex_unlock(tfm_mutex);
ecryptfs_printk(KERN_ERR, "Error setting key for crypto "
"context; rc = [%d]\n", rc);
goto out;
}
+
+ req = skcipher_request_alloc(tfm, GFP_KERNEL);
+ if (!req) {
+ mutex_unlock(tfm_mutex);
+ ecryptfs_printk(KERN_ERR, "Out of kernel memory whilst "
+ "attempting to skcipher_request_alloc for "
+ "%s\n", crypto_skcipher_driver_name(tfm));
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP,
+ NULL, NULL);
+
rc = 0;
ecryptfs_printk(KERN_DEBUG, "Encrypting [%zd] bytes of the key\n",
crypt_stat->key_size);
- rc = crypto_blkcipher_encrypt(&desc, dst_sg, src_sg,
- (*key_rec).enc_key_size);
+ skcipher_request_set_crypt(req, src_sg, dst_sg,
+ (*key_rec).enc_key_size, NULL);
+ rc = crypto_skcipher_encrypt(req);
mutex_unlock(tfm_mutex);
+ skcipher_request_free(req);
if (rc) {
printk(KERN_ERR "Error encrypting; rc = [%d]\n", rc);
goto out;
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index e25b6b0..8b0b4a7 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -29,7 +29,6 @@
#include <linux/module.h>
#include <linux/namei.h>
#include <linux/skbuff.h>
-#include <linux/crypto.h>
#include <linux/mount.h>
#include <linux/pagemap.h>
#include <linux/key.h>
diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c
index c6ced4c..1f58652 100644
--- a/fs/ecryptfs/mmap.c
+++ b/fs/ecryptfs/mmap.c
@@ -30,7 +30,6 @@
#include <linux/page-flags.h>
#include <linux/mount.h>
#include <linux/file.h>
-#include <linux/crypto.h>
#include <linux/scatterlist.h>
#include <linux/slab.h>
#include <asm/unaligned.h>
diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c
index afa1b81..77a486d 100644
--- a/fs/ecryptfs/super.c
+++ b/fs/ecryptfs/super.c
@@ -29,7 +29,6 @@
#include <linux/slab.h>
#include <linux/seq_file.h>
#include <linux/file.h>
-#include <linux/crypto.h>
#include <linux/statfs.h>
#include <linux/magic.h>
#include "ecryptfs_kernel.h"
diff --git a/fs/exec.c b/fs/exec.c
index dcd4ac7..9bdf0ed 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -56,6 +56,7 @@
#include <linux/pipe_fs_i.h>
#include <linux/oom.h>
#include <linux/compat.h>
+#include <linux/vmalloc.h>
#include <asm/uaccess.h>
#include <asm/mmu_context.h>
@@ -831,6 +832,97 @@ int kernel_read(struct file *file, loff_t offset,
EXPORT_SYMBOL(kernel_read);
+int kernel_read_file(struct file *file, void **buf, loff_t *size,
+ loff_t max_size, enum kernel_read_file_id id)
+{
+ loff_t i_size, pos;
+ ssize_t bytes = 0;
+ int ret;
+
+ if (!S_ISREG(file_inode(file)->i_mode) || max_size < 0)
+ return -EINVAL;
+
+ ret = security_kernel_read_file(file, id);
+ if (ret)
+ return ret;
+
+ i_size = i_size_read(file_inode(file));
+ if (max_size > 0 && i_size > max_size)
+ return -EFBIG;
+ if (i_size <= 0)
+ return -EINVAL;
+
+ *buf = vmalloc(i_size);
+ if (!*buf)
+ return -ENOMEM;
+
+ pos = 0;
+ while (pos < i_size) {
+ bytes = kernel_read(file, pos, (char *)(*buf) + pos,
+ i_size - pos);
+ if (bytes < 0) {
+ ret = bytes;
+ goto out;
+ }
+
+ if (bytes == 0)
+ break;
+ pos += bytes;
+ }
+
+ if (pos != i_size) {
+ ret = -EIO;
+ goto out;
+ }
+
+ ret = security_kernel_post_read_file(file, *buf, i_size, id);
+ if (!ret)
+ *size = pos;
+
+out:
+ if (ret < 0) {
+ vfree(*buf);
+ *buf = NULL;
+ }
+ return ret;
+}
+EXPORT_SYMBOL_GPL(kernel_read_file);
+
+int kernel_read_file_from_path(char *path, void **buf, loff_t *size,
+ loff_t max_size, enum kernel_read_file_id id)
+{
+ struct file *file;
+ int ret;
+
+ if (!path || !*path)
+ return -EINVAL;
+
+ file = filp_open(path, O_RDONLY, 0);
+ if (IS_ERR(file))
+ return PTR_ERR(file);
+
+ ret = kernel_read_file(file, buf, size, max_size, id);
+ fput(file);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(kernel_read_file_from_path);
+
+int kernel_read_file_from_fd(int fd, void **buf, loff_t *size, loff_t max_size,
+ enum kernel_read_file_id id)
+{
+ struct fd f = fdget(fd);
+ int ret = -EBADF;
+
+ if (!f.file)
+ goto out;
+
+ ret = kernel_read_file(f.file, buf, size, max_size, id);
+out:
+ fdput(f);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(kernel_read_file_from_fd);
+
ssize_t read_code(struct file *file, unsigned long addr, loff_t pos, size_t len)
{
ssize_t res = vfs_read(file, (void __user *)addr, len, &pos);
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index 4c69c94..170939f 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -61,6 +61,8 @@ struct ext2_block_alloc_info {
#define rsv_start rsv_window._rsv_start
#define rsv_end rsv_window._rsv_end
+struct mb_cache;
+
/*
* second extended-fs super-block data in memory
*/
@@ -111,6 +113,7 @@ struct ext2_sb_info {
* of the mount options.
*/
spinlock_t s_lock;
+ struct mb_cache *s_mb_cache;
};
static inline spinlock_t *
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 2a18841..b78caf2 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -131,7 +131,10 @@ static void ext2_put_super (struct super_block * sb)
dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED);
- ext2_xattr_put_super(sb);
+ if (sbi->s_mb_cache) {
+ ext2_xattr_destroy_cache(sbi->s_mb_cache);
+ sbi->s_mb_cache = NULL;
+ }
if (!(sb->s_flags & MS_RDONLY)) {
struct ext2_super_block *es = sbi->s_es;
@@ -1104,6 +1107,14 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
ext2_msg(sb, KERN_ERR, "error: insufficient memory");
goto failed_mount3;
}
+
+#ifdef CONFIG_EXT2_FS_XATTR
+ sbi->s_mb_cache = ext2_xattr_create_cache();
+ if (!sbi->s_mb_cache) {
+ ext2_msg(sb, KERN_ERR, "Failed to create an mb_cache");
+ goto failed_mount3;
+ }
+#endif
/*
* set up enough so that it can read an inode
*/
@@ -1149,6 +1160,8 @@ cantfind_ext2:
sb->s_id);
goto failed_mount;
failed_mount3:
+ if (sbi->s_mb_cache)
+ ext2_xattr_destroy_cache(sbi->s_mb_cache);
percpu_counter_destroy(&sbi->s_freeblocks_counter);
percpu_counter_destroy(&sbi->s_freeinodes_counter);
percpu_counter_destroy(&sbi->s_dirs_counter);
@@ -1555,20 +1568,17 @@ MODULE_ALIAS_FS("ext2");
static int __init init_ext2_fs(void)
{
- int err = init_ext2_xattr();
- if (err)
- return err;
+ int err;
+
err = init_inodecache();
if (err)
- goto out1;
+ return err;
err = register_filesystem(&ext2_fs_type);
if (err)
goto out;
return 0;
out:
destroy_inodecache();
-out1:
- exit_ext2_xattr();
return err;
}
@@ -1576,7 +1586,6 @@ static void __exit exit_ext2_fs(void)
{
unregister_filesystem(&ext2_fs_type);
destroy_inodecache();
- exit_ext2_xattr();
}
MODULE_AUTHOR("Remy Card and others");
diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c
index f57a7ab..1a5e3bf 100644
--- a/fs/ext2/xattr.c
+++ b/fs/ext2/xattr.c
@@ -90,14 +90,12 @@
static int ext2_xattr_set2(struct inode *, struct buffer_head *,
struct ext2_xattr_header *);
-static int ext2_xattr_cache_insert(struct buffer_head *);
+static int ext2_xattr_cache_insert(struct mb_cache *, struct buffer_head *);
static struct buffer_head *ext2_xattr_cache_find(struct inode *,
struct ext2_xattr_header *);
static void ext2_xattr_rehash(struct ext2_xattr_header *,
struct ext2_xattr_entry *);
-static struct mb_cache *ext2_xattr_cache;
-
static const struct xattr_handler *ext2_xattr_handler_map[] = {
[EXT2_XATTR_INDEX_USER] = &ext2_xattr_user_handler,
#ifdef CONFIG_EXT2_FS_POSIX_ACL
@@ -152,6 +150,7 @@ ext2_xattr_get(struct inode *inode, int name_index, const char *name,
size_t name_len, size;
char *end;
int error;
+ struct mb_cache *ext2_mb_cache = EXT2_SB(inode->i_sb)->s_mb_cache;
ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld",
name_index, name, buffer, (long)buffer_size);
@@ -196,7 +195,7 @@ bad_block: ext2_error(inode->i_sb, "ext2_xattr_get",
goto found;
entry = next;
}
- if (ext2_xattr_cache_insert(bh))
+ if (ext2_xattr_cache_insert(ext2_mb_cache, bh))
ea_idebug(inode, "cache insert failed");
error = -ENODATA;
goto cleanup;
@@ -209,7 +208,7 @@ found:
le16_to_cpu(entry->e_value_offs) + size > inode->i_sb->s_blocksize)
goto bad_block;
- if (ext2_xattr_cache_insert(bh))
+ if (ext2_xattr_cache_insert(ext2_mb_cache, bh))
ea_idebug(inode, "cache insert failed");
if (buffer) {
error = -ERANGE;
@@ -247,6 +246,7 @@ ext2_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
char *end;
size_t rest = buffer_size;
int error;
+ struct mb_cache *ext2_mb_cache = EXT2_SB(inode->i_sb)->s_mb_cache;
ea_idebug(inode, "buffer=%p, buffer_size=%ld",
buffer, (long)buffer_size);
@@ -281,7 +281,7 @@ bad_block: ext2_error(inode->i_sb, "ext2_xattr_list",
goto bad_block;
entry = next;
}
- if (ext2_xattr_cache_insert(bh))
+ if (ext2_xattr_cache_insert(ext2_mb_cache, bh))
ea_idebug(inode, "cache insert failed");
/* list the attribute names */
@@ -483,22 +483,23 @@ bad_block: ext2_error(sb, "ext2_xattr_set",
/* Here we know that we can set the new attribute. */
if (header) {
- struct mb_cache_entry *ce;
-
/* assert(header == HDR(bh)); */
- ce = mb_cache_entry_get(ext2_xattr_cache, bh->b_bdev,
- bh->b_blocknr);
lock_buffer(bh);
if (header->h_refcount == cpu_to_le32(1)) {
+ __u32 hash = le32_to_cpu(header->h_hash);
+
ea_bdebug(bh, "modifying in-place");
- if (ce)
- mb_cache_entry_free(ce);
+ /*
+ * This must happen under buffer lock for
+ * ext2_xattr_set2() to reliably detect modified block
+ */
+ mb_cache_entry_delete_block(EXT2_SB(sb)->s_mb_cache,
+ hash, bh->b_blocknr);
+
/* keep the buffer locked while modifying it. */
} else {
int offset;
- if (ce)
- mb_cache_entry_release(ce);
unlock_buffer(bh);
ea_bdebug(bh, "cloning");
header = kmalloc(bh->b_size, GFP_KERNEL);
@@ -626,6 +627,7 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
struct super_block *sb = inode->i_sb;
struct buffer_head *new_bh = NULL;
int error;
+ struct mb_cache *ext2_mb_cache = EXT2_SB(sb)->s_mb_cache;
if (header) {
new_bh = ext2_xattr_cache_find(inode, header);
@@ -653,7 +655,7 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
don't need to change the reference count. */
new_bh = old_bh;
get_bh(new_bh);
- ext2_xattr_cache_insert(new_bh);
+ ext2_xattr_cache_insert(ext2_mb_cache, new_bh);
} else {
/* We need to allocate a new block */
ext2_fsblk_t goal = ext2_group_first_block_no(sb,
@@ -674,7 +676,7 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
memcpy(new_bh->b_data, header, new_bh->b_size);
set_buffer_uptodate(new_bh);
unlock_buffer(new_bh);
- ext2_xattr_cache_insert(new_bh);
+ ext2_xattr_cache_insert(ext2_mb_cache, new_bh);
ext2_xattr_update_super_block(sb);
}
@@ -707,19 +709,21 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
error = 0;
if (old_bh && old_bh != new_bh) {
- struct mb_cache_entry *ce;
-
/*
* If there was an old block and we are no longer using it,
* release the old block.
*/
- ce = mb_cache_entry_get(ext2_xattr_cache, old_bh->b_bdev,
- old_bh->b_blocknr);
lock_buffer(old_bh);
if (HDR(old_bh)->h_refcount == cpu_to_le32(1)) {
+ __u32 hash = le32_to_cpu(HDR(old_bh)->h_hash);
+
+ /*
+ * This must happen under buffer lock for
+ * ext2_xattr_set2() to reliably detect freed block
+ */
+ mb_cache_entry_delete_block(ext2_mb_cache,
+ hash, old_bh->b_blocknr);
/* Free the old block. */
- if (ce)
- mb_cache_entry_free(ce);
ea_bdebug(old_bh, "freeing");
ext2_free_blocks(inode, old_bh->b_blocknr, 1);
mark_inode_dirty(inode);
@@ -730,8 +734,6 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
} else {
/* Decrement the refcount only. */
le32_add_cpu(&HDR(old_bh)->h_refcount, -1);
- if (ce)
- mb_cache_entry_release(ce);
dquot_free_block_nodirty(inode, 1);
mark_inode_dirty(inode);
mark_buffer_dirty(old_bh);
@@ -757,7 +759,6 @@ void
ext2_xattr_delete_inode(struct inode *inode)
{
struct buffer_head *bh = NULL;
- struct mb_cache_entry *ce;
down_write(&EXT2_I(inode)->xattr_sem);
if (!EXT2_I(inode)->i_file_acl)
@@ -777,19 +778,22 @@ ext2_xattr_delete_inode(struct inode *inode)
EXT2_I(inode)->i_file_acl);
goto cleanup;
}
- ce = mb_cache_entry_get(ext2_xattr_cache, bh->b_bdev, bh->b_blocknr);
lock_buffer(bh);
if (HDR(bh)->h_refcount == cpu_to_le32(1)) {
- if (ce)
- mb_cache_entry_free(ce);
+ __u32 hash = le32_to_cpu(HDR(bh)->h_hash);
+
+ /*
+ * This must happen under buffer lock for ext2_xattr_set2() to
+ * reliably detect freed block
+ */
+ mb_cache_entry_delete_block(EXT2_SB(inode->i_sb)->s_mb_cache,
+ hash, bh->b_blocknr);
ext2_free_blocks(inode, EXT2_I(inode)->i_file_acl, 1);
get_bh(bh);
bforget(bh);
unlock_buffer(bh);
} else {
le32_add_cpu(&HDR(bh)->h_refcount, -1);
- if (ce)
- mb_cache_entry_release(ce);
ea_bdebug(bh, "refcount now=%d",
le32_to_cpu(HDR(bh)->h_refcount));
unlock_buffer(bh);
@@ -806,18 +810,6 @@ cleanup:
}
/*
- * ext2_xattr_put_super()
- *
- * This is called when a file system is unmounted.
- */
-void
-ext2_xattr_put_super(struct super_block *sb)
-{
- mb_cache_shrink(sb->s_bdev);
-}
-
-
-/*
* ext2_xattr_cache_insert()
*
* Create a new entry in the extended attribute cache, and insert
@@ -826,28 +818,20 @@ ext2_xattr_put_super(struct super_block *sb)
* Returns 0, or a negative error number on failure.
*/
static int
-ext2_xattr_cache_insert(struct buffer_head *bh)
+ext2_xattr_cache_insert(struct mb_cache *cache, struct buffer_head *bh)
{
__u32 hash = le32_to_cpu(HDR(bh)->h_hash);
- struct mb_cache_entry *ce;
int error;
- ce = mb_cache_entry_alloc(ext2_xattr_cache, GFP_NOFS);
- if (!ce)
- return -ENOMEM;
- error = mb_cache_entry_insert(ce, bh->b_bdev, bh->b_blocknr, hash);
+ error = mb_cache_entry_create(cache, GFP_NOFS, hash, bh->b_blocknr, 1);
if (error) {
- mb_cache_entry_free(ce);
if (error == -EBUSY) {
ea_bdebug(bh, "already in cache (%d cache entries)",
atomic_read(&ext2_xattr_cache->c_entry_count));
error = 0;
}
- } else {
- ea_bdebug(bh, "inserting [%x] (%d cache entries)", (int)hash,
- atomic_read(&ext2_xattr_cache->c_entry_count));
- mb_cache_entry_release(ce);
- }
+ } else
+ ea_bdebug(bh, "inserting [%x]", (int)hash);
return error;
}
@@ -904,22 +888,16 @@ ext2_xattr_cache_find(struct inode *inode, struct ext2_xattr_header *header)
{
__u32 hash = le32_to_cpu(header->h_hash);
struct mb_cache_entry *ce;
+ struct mb_cache *ext2_mb_cache = EXT2_SB(inode->i_sb)->s_mb_cache;
if (!header->h_hash)
return NULL; /* never share */
ea_idebug(inode, "looking for cached blocks [%x]", (int)hash);
again:
- ce = mb_cache_entry_find_first(ext2_xattr_cache, inode->i_sb->s_bdev,
- hash);
+ ce = mb_cache_entry_find_first(ext2_mb_cache, hash);
while (ce) {
struct buffer_head *bh;
- if (IS_ERR(ce)) {
- if (PTR_ERR(ce) == -EAGAIN)
- goto again;
- break;
- }
-
bh = sb_bread(inode->i_sb, ce->e_block);
if (!bh) {
ext2_error(inode->i_sb, "ext2_xattr_cache_find",
@@ -927,7 +905,21 @@ again:
inode->i_ino, (unsigned long) ce->e_block);
} else {
lock_buffer(bh);
- if (le32_to_cpu(HDR(bh)->h_refcount) >
+ /*
+ * We have to be careful about races with freeing or
+ * rehashing of xattr block. Once we hold buffer lock
+ * xattr block's state is stable so we can check
+ * whether the block got freed / rehashed or not.
+ * Since we unhash mbcache entry under buffer lock when
+ * freeing / rehashing xattr block, checking whether
+ * entry is still hashed is reliable.
+ */
+ if (hlist_bl_unhashed(&ce->e_hash_list)) {
+ mb_cache_entry_put(ext2_mb_cache, ce);
+ unlock_buffer(bh);
+ brelse(bh);
+ goto again;
+ } else if (le32_to_cpu(HDR(bh)->h_refcount) >
EXT2_XATTR_REFCOUNT_MAX) {
ea_idebug(inode, "block %ld refcount %d>%d",
(unsigned long) ce->e_block,
@@ -936,13 +928,14 @@ again:
} else if (!ext2_xattr_cmp(header, HDR(bh))) {
ea_bdebug(bh, "b_count=%d",
atomic_read(&(bh->b_count)));
- mb_cache_entry_release(ce);
+ mb_cache_entry_touch(ext2_mb_cache, ce);
+ mb_cache_entry_put(ext2_mb_cache, ce);
return bh;
}
unlock_buffer(bh);
brelse(bh);
}
- ce = mb_cache_entry_find_next(ce, inode->i_sb->s_bdev, hash);
+ ce = mb_cache_entry_find_next(ext2_mb_cache, ce);
}
return NULL;
}
@@ -1015,17 +1008,15 @@ static void ext2_xattr_rehash(struct ext2_xattr_header *header,
#undef BLOCK_HASH_SHIFT
-int __init
-init_ext2_xattr(void)
+#define HASH_BUCKET_BITS 10
+
+struct mb_cache *ext2_xattr_create_cache(void)
{
- ext2_xattr_cache = mb_cache_create("ext2_xattr", 6);
- if (!ext2_xattr_cache)
- return -ENOMEM;
- return 0;
+ return mb_cache_create(HASH_BUCKET_BITS);
}
-void
-exit_ext2_xattr(void)
+void ext2_xattr_destroy_cache(struct mb_cache *cache)
{
- mb_cache_destroy(ext2_xattr_cache);
+ if (cache)
+ mb_cache_destroy(cache);
}
diff --git a/fs/ext2/xattr.h b/fs/ext2/xattr.h
index 60edf29..6f82ab1 100644
--- a/fs/ext2/xattr.h
+++ b/fs/ext2/xattr.h
@@ -53,6 +53,8 @@ struct ext2_xattr_entry {
#define EXT2_XATTR_SIZE(size) \
(((size) + EXT2_XATTR_ROUND) & ~EXT2_XATTR_ROUND)
+struct mb_cache;
+
# ifdef CONFIG_EXT2_FS_XATTR
extern const struct xattr_handler ext2_xattr_user_handler;
@@ -65,10 +67,9 @@ extern int ext2_xattr_get(struct inode *, int, const char *, void *, size_t);
extern int ext2_xattr_set(struct inode *, int, const char *, const void *, size_t, int);
extern void ext2_xattr_delete_inode(struct inode *);
-extern void ext2_xattr_put_super(struct super_block *);
-extern int init_ext2_xattr(void);
-extern void exit_ext2_xattr(void);
+extern struct mb_cache *ext2_xattr_create_cache(void);
+extern void ext2_xattr_destroy_cache(struct mb_cache *cache);
extern const struct xattr_handler *ext2_xattr_handlers[];
@@ -93,19 +94,7 @@ ext2_xattr_delete_inode(struct inode *inode)
{
}
-static inline void
-ext2_xattr_put_super(struct super_block *sb)
-{
-}
-
-static inline int
-init_ext2_xattr(void)
-{
- return 0;
-}
-
-static inline void
-exit_ext2_xattr(void)
+static inline void ext2_xattr_destroy_cache(struct mb_cache *cache)
{
}
diff --git a/fs/ext4/crypto.c b/fs/ext4/crypto.c
index 38f7562..edc053a 100644
--- a/fs/ext4/crypto.c
+++ b/fs/ext4/crypto.c
@@ -18,11 +18,9 @@
* Special Publication 800-38E and IEEE P1619/D16.
*/
-#include <crypto/hash.h>
-#include <crypto/sha.h>
+#include <crypto/skcipher.h>
#include <keys/user-type.h>
#include <keys/encrypted-type.h>
-#include <linux/crypto.h>
#include <linux/ecryptfs.h>
#include <linux/gfp.h>
#include <linux/kernel.h>
@@ -261,21 +259,21 @@ static int ext4_page_crypto(struct inode *inode,
{
u8 xts_tweak[EXT4_XTS_TWEAK_SIZE];
- struct ablkcipher_request *req = NULL;
+ struct skcipher_request *req = NULL;
DECLARE_EXT4_COMPLETION_RESULT(ecr);
struct scatterlist dst, src;
struct ext4_crypt_info *ci = EXT4_I(inode)->i_crypt_info;
- struct crypto_ablkcipher *tfm = ci->ci_ctfm;
+ struct crypto_skcipher *tfm = ci->ci_ctfm;
int res = 0;
- req = ablkcipher_request_alloc(tfm, GFP_NOFS);
+ req = skcipher_request_alloc(tfm, GFP_NOFS);
if (!req) {
printk_ratelimited(KERN_ERR
"%s: crypto_request_alloc() failed\n",
__func__);
return -ENOMEM;
}
- ablkcipher_request_set_callback(
+ skcipher_request_set_callback(
req, CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
ext4_crypt_complete, &ecr);
@@ -288,21 +286,21 @@ static int ext4_page_crypto(struct inode *inode,
sg_set_page(&dst, dest_page, PAGE_CACHE_SIZE, 0);
sg_init_table(&src, 1);
sg_set_page(&src, src_page, PAGE_CACHE_SIZE, 0);
- ablkcipher_request_set_crypt(req, &src, &dst, PAGE_CACHE_SIZE,
- xts_tweak);
+ skcipher_request_set_crypt(req, &src, &dst, PAGE_CACHE_SIZE,
+ xts_tweak);
if (rw == EXT4_DECRYPT)
- res = crypto_ablkcipher_decrypt(req);
+ res = crypto_skcipher_decrypt(req);
else
- res = crypto_ablkcipher_encrypt(req);
+ res = crypto_skcipher_encrypt(req);
if (res == -EINPROGRESS || res == -EBUSY) {
wait_for_completion(&ecr.completion);
res = ecr.res;
}
- ablkcipher_request_free(req);
+ skcipher_request_free(req);
if (res) {
printk_ratelimited(
KERN_ERR
- "%s: crypto_ablkcipher_encrypt() returned %d\n",
+ "%s: crypto_skcipher_encrypt() returned %d\n",
__func__, res);
return res;
}
diff --git a/fs/ext4/crypto_fname.c b/fs/ext4/crypto_fname.c
index 2fbef8a..1a2f360 100644
--- a/fs/ext4/crypto_fname.c
+++ b/fs/ext4/crypto_fname.c
@@ -11,11 +11,9 @@
*
*/
-#include <crypto/hash.h>
-#include <crypto/sha.h>
+#include <crypto/skcipher.h>
#include <keys/encrypted-type.h>
#include <keys/user-type.h>
-#include <linux/crypto.h>
#include <linux/gfp.h>
#include <linux/kernel.h>
#include <linux/key.h>
@@ -65,10 +63,10 @@ static int ext4_fname_encrypt(struct inode *inode,
struct ext4_str *oname)
{
u32 ciphertext_len;
- struct ablkcipher_request *req = NULL;
+ struct skcipher_request *req = NULL;
DECLARE_EXT4_COMPLETION_RESULT(ecr);
struct ext4_crypt_info *ci = EXT4_I(inode)->i_crypt_info;
- struct crypto_ablkcipher *tfm = ci->ci_ctfm;
+ struct crypto_skcipher *tfm = ci->ci_ctfm;
int res = 0;
char iv[EXT4_CRYPTO_BLOCK_SIZE];
struct scatterlist src_sg, dst_sg;
@@ -95,14 +93,14 @@ static int ext4_fname_encrypt(struct inode *inode,
}
/* Allocate request */
- req = ablkcipher_request_alloc(tfm, GFP_NOFS);
+ req = skcipher_request_alloc(tfm, GFP_NOFS);
if (!req) {
printk_ratelimited(
KERN_ERR "%s: crypto_request_alloc() failed\n", __func__);
kfree(alloc_buf);
return -ENOMEM;
}
- ablkcipher_request_set_callback(req,
+ skcipher_request_set_callback(req,
CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
ext4_dir_crypt_complete, &ecr);
@@ -117,14 +115,14 @@ static int ext4_fname_encrypt(struct inode *inode,
/* Create encryption request */
sg_init_one(&src_sg, workbuf, ciphertext_len);
sg_init_one(&dst_sg, oname->name, ciphertext_len);
- ablkcipher_request_set_crypt(req, &src_sg, &dst_sg, ciphertext_len, iv);
- res = crypto_ablkcipher_encrypt(req);
+ skcipher_request_set_crypt(req, &src_sg, &dst_sg, ciphertext_len, iv);
+ res = crypto_skcipher_encrypt(req);
if (res == -EINPROGRESS || res == -EBUSY) {
wait_for_completion(&ecr.completion);
res = ecr.res;
}
kfree(alloc_buf);
- ablkcipher_request_free(req);
+ skcipher_request_free(req);
if (res < 0) {
printk_ratelimited(
KERN_ERR "%s: Error (error code %d)\n", __func__, res);
@@ -145,11 +143,11 @@ static int ext4_fname_decrypt(struct inode *inode,
struct ext4_str *oname)
{
struct ext4_str tmp_in[2], tmp_out[1];
- struct ablkcipher_request *req = NULL;
+ struct skcipher_request *req = NULL;
DECLARE_EXT4_COMPLETION_RESULT(ecr);
struct scatterlist src_sg, dst_sg;
struct ext4_crypt_info *ci = EXT4_I(inode)->i_crypt_info;
- struct crypto_ablkcipher *tfm = ci->ci_ctfm;
+ struct crypto_skcipher *tfm = ci->ci_ctfm;
int res = 0;
char iv[EXT4_CRYPTO_BLOCK_SIZE];
unsigned lim = max_name_len(inode);
@@ -162,13 +160,13 @@ static int ext4_fname_decrypt(struct inode *inode,
tmp_out[0].name = oname->name;
/* Allocate request */
- req = ablkcipher_request_alloc(tfm, GFP_NOFS);
+ req = skcipher_request_alloc(tfm, GFP_NOFS);
if (!req) {
printk_ratelimited(
KERN_ERR "%s: crypto_request_alloc() failed\n", __func__);
return -ENOMEM;
}
- ablkcipher_request_set_callback(req,
+ skcipher_request_set_callback(req,
CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
ext4_dir_crypt_complete, &ecr);
@@ -178,13 +176,13 @@ static int ext4_fname_decrypt(struct inode *inode,
/* Create encryption request */
sg_init_one(&src_sg, iname->name, iname->len);
sg_init_one(&dst_sg, oname->name, oname->len);
- ablkcipher_request_set_crypt(req, &src_sg, &dst_sg, iname->len, iv);
- res = crypto_ablkcipher_decrypt(req);
+ skcipher_request_set_crypt(req, &src_sg, &dst_sg, iname->len, iv);
+ res = crypto_skcipher_decrypt(req);
if (res == -EINPROGRESS || res == -EBUSY) {
wait_for_completion(&ecr.completion);
res = ecr.res;
}
- ablkcipher_request_free(req);
+ skcipher_request_free(req);
if (res < 0) {
printk_ratelimited(
KERN_ERR "%s: Error in ext4_fname_encrypt (error code %d)\n",
diff --git a/fs/ext4/crypto_key.c b/fs/ext4/crypto_key.c
index 9a16d1e..0129d68 100644
--- a/fs/ext4/crypto_key.c
+++ b/fs/ext4/crypto_key.c
@@ -8,6 +8,7 @@
* Written by Michael Halcrow, Ildar Muslukhov, and Uday Savagaonkar, 2015.
*/
+#include <crypto/skcipher.h>
#include <keys/encrypted-type.h>
#include <keys/user-type.h>
#include <linux/random.h>
@@ -41,45 +42,42 @@ static int ext4_derive_key_aes(char deriving_key[EXT4_AES_128_ECB_KEY_SIZE],
char derived_key[EXT4_AES_256_XTS_KEY_SIZE])
{
int res = 0;
- struct ablkcipher_request *req = NULL;
+ struct skcipher_request *req = NULL;
DECLARE_EXT4_COMPLETION_RESULT(ecr);
struct scatterlist src_sg, dst_sg;
- struct crypto_ablkcipher *tfm = crypto_alloc_ablkcipher("ecb(aes)", 0,
- 0);
+ struct crypto_skcipher *tfm = crypto_alloc_skcipher("ecb(aes)", 0, 0);
if (IS_ERR(tfm)) {
res = PTR_ERR(tfm);
tfm = NULL;
goto out;
}
- crypto_ablkcipher_set_flags(tfm, CRYPTO_TFM_REQ_WEAK_KEY);
- req = ablkcipher_request_alloc(tfm, GFP_NOFS);
+ crypto_skcipher_set_flags(tfm, CRYPTO_TFM_REQ_WEAK_KEY);
+ req = skcipher_request_alloc(tfm, GFP_NOFS);
if (!req) {
res = -ENOMEM;
goto out;
}
- ablkcipher_request_set_callback(req,
+ skcipher_request_set_callback(req,
CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
derive_crypt_complete, &ecr);
- res = crypto_ablkcipher_setkey(tfm, deriving_key,
- EXT4_AES_128_ECB_KEY_SIZE);
+ res = crypto_skcipher_setkey(tfm, deriving_key,
+ EXT4_AES_128_ECB_KEY_SIZE);
if (res < 0)
goto out;
sg_init_one(&src_sg, source_key, EXT4_AES_256_XTS_KEY_SIZE);
sg_init_one(&dst_sg, derived_key, EXT4_AES_256_XTS_KEY_SIZE);
- ablkcipher_request_set_crypt(req, &src_sg, &dst_sg,
- EXT4_AES_256_XTS_KEY_SIZE, NULL);
- res = crypto_ablkcipher_encrypt(req);
+ skcipher_request_set_crypt(req, &src_sg, &dst_sg,
+ EXT4_AES_256_XTS_KEY_SIZE, NULL);
+ res = crypto_skcipher_encrypt(req);
if (res == -EINPROGRESS || res == -EBUSY) {
wait_for_completion(&ecr.completion);
res = ecr.res;
}
out:
- if (req)
- ablkcipher_request_free(req);
- if (tfm)
- crypto_free_ablkcipher(tfm);
+ skcipher_request_free(req);
+ crypto_free_skcipher(tfm);
return res;
}
@@ -90,7 +88,7 @@ void ext4_free_crypt_info(struct ext4_crypt_info *ci)
if (ci->ci_keyring_key)
key_put(ci->ci_keyring_key);
- crypto_free_ablkcipher(ci->ci_ctfm);
+ crypto_free_skcipher(ci->ci_ctfm);
kmem_cache_free(ext4_crypt_info_cachep, ci);
}
@@ -122,7 +120,7 @@ int _ext4_get_encryption_info(struct inode *inode)
struct ext4_encryption_context ctx;
const struct user_key_payload *ukp;
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
- struct crypto_ablkcipher *ctfm;
+ struct crypto_skcipher *ctfm;
const char *cipher_str;
char raw_key[EXT4_MAX_KEY_SIZE];
char mode;
@@ -237,7 +235,7 @@ retry:
if (res)
goto out;
got_key:
- ctfm = crypto_alloc_ablkcipher(cipher_str, 0, 0);
+ ctfm = crypto_alloc_skcipher(cipher_str, 0, 0);
if (!ctfm || IS_ERR(ctfm)) {
res = ctfm ? PTR_ERR(ctfm) : -ENOMEM;
printk(KERN_DEBUG
@@ -246,11 +244,11 @@ got_key:
goto out;
}
crypt_info->ci_ctfm = ctfm;
- crypto_ablkcipher_clear_flags(ctfm, ~0);
- crypto_tfm_set_flags(crypto_ablkcipher_tfm(ctfm),
+ crypto_skcipher_clear_flags(ctfm, ~0);
+ crypto_tfm_set_flags(crypto_skcipher_tfm(ctfm),
CRYPTO_TFM_REQ_WEAK_KEY);
- res = crypto_ablkcipher_setkey(ctfm, raw_key,
- ext4_encryption_key_size(mode));
+ res = crypto_skcipher_setkey(ctfm, raw_key,
+ ext4_encryption_key_size(mode));
if (res)
goto out;
memzero_explicit(raw_key, sizeof(raw_key));
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 157b458..393689d 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -42,6 +42,18 @@
*/
/*
+ * with AGGRESSIVE_CHECK allocator runs consistency checks over
+ * structures. these checks slow things down a lot
+ */
+#define AGGRESSIVE_CHECK__
+
+/*
+ * with DOUBLE_CHECK defined mballoc creates persistent in-core
+ * bitmaps, maintains and uses them to check for double allocations
+ */
+#define DOUBLE_CHECK__
+
+/*
* Define EXT4FS_DEBUG to produce debug messages
*/
#undef EXT4FS_DEBUG
@@ -182,9 +194,9 @@ typedef struct ext4_io_end {
struct bio *bio; /* Linked list of completed
* bios covering the extent */
unsigned int flag; /* unwritten or not */
+ atomic_t count; /* reference counter */
loff_t offset; /* offset in the file */
ssize_t size; /* size of the extent */
- atomic_t count; /* reference counter */
} ext4_io_end_t;
struct ext4_io_submit {
@@ -1024,13 +1036,8 @@ struct ext4_inode_info {
* transaction reserved
*/
struct list_head i_rsv_conversion_list;
- /*
- * Completed IOs that need unwritten extents handling and don't have
- * transaction reserved
- */
- atomic_t i_ioend_count; /* Number of outstanding io_end structs */
- atomic_t i_unwritten; /* Nr. of inflight conversions pending */
struct work_struct i_rsv_conversion_work;
+ atomic_t i_unwritten; /* Nr. of inflight conversions pending */
spinlock_t i_block_reservation_lock;
@@ -1513,16 +1520,6 @@ static inline void ext4_set_io_unwritten_flag(struct inode *inode,
}
}
-static inline ext4_io_end_t *ext4_inode_aio(struct inode *inode)
-{
- return inode->i_private;
-}
-
-static inline void ext4_inode_aio_set(struct inode *inode, ext4_io_end_t *io)
-{
- inode->i_private = io;
-}
-
/*
* Inode dynamic state flags
*/
@@ -2506,12 +2503,14 @@ extern int ext4_trim_fs(struct super_block *, struct fstrim_range *);
int ext4_inode_is_fast_symlink(struct inode *inode);
struct buffer_head *ext4_getblk(handle_t *, struct inode *, ext4_lblk_t, int);
struct buffer_head *ext4_bread(handle_t *, struct inode *, ext4_lblk_t, int);
-int ext4_get_block_write(struct inode *inode, sector_t iblock,
- struct buffer_head *bh_result, int create);
+int ext4_get_block_unwritten(struct inode *inode, sector_t iblock,
+ struct buffer_head *bh_result, int create);
int ext4_dax_mmap_get_block(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create);
int ext4_get_block(struct inode *inode, sector_t iblock,
- struct buffer_head *bh_result, int create);
+ struct buffer_head *bh_result, int create);
+int ext4_dio_get_block(struct inode *inode, sector_t iblock,
+ struct buffer_head *bh_result, int create);
int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
struct buffer_head *bh, int create);
int ext4_walk_page_buffers(handle_t *handle,
@@ -2559,6 +2558,9 @@ extern void ext4_da_update_reserve_space(struct inode *inode,
int used, int quota_claim);
extern int ext4_issue_zeroout(struct inode *inode, ext4_lblk_t lblk,
ext4_fsblk_t pblk, ext4_lblk_t len);
+extern int ext4_get_next_extent(struct inode *inode, ext4_lblk_t lblk,
+ unsigned int map_len,
+ struct extent_status *result);
/* indirect.c */
extern int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
@@ -3285,10 +3287,7 @@ static inline void ext4_inode_resume_unlocked_dio(struct inode *inode)
#define EXT4_WQ_HASH_SZ 37
#define ext4_ioend_wq(v) (&ext4__ioend_wq[((unsigned long)(v)) %\
EXT4_WQ_HASH_SZ])
-#define ext4_aio_mutex(v) (&ext4__aio_mutex[((unsigned long)(v)) %\
- EXT4_WQ_HASH_SZ])
extern wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ];
-extern struct mutex ext4__aio_mutex[EXT4_WQ_HASH_SZ];
#define EXT4_RESIZING 0
extern int ext4_resize_begin(struct super_block *sb);
diff --git a/fs/ext4/ext4_crypto.h b/fs/ext4/ext4_crypto.h
index ac7d4e8..1f73c29 100644
--- a/fs/ext4/ext4_crypto.h
+++ b/fs/ext4/ext4_crypto.h
@@ -77,7 +77,7 @@ struct ext4_crypt_info {
char ci_data_mode;
char ci_filename_mode;
char ci_flags;
- struct crypto_ablkcipher *ci_ctfm;
+ struct crypto_skcipher *ci_ctfm;
struct key *ci_keyring_key;
char ci_master_key[EXT4_KEY_DESCRIPTOR_SIZE];
};
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h
index 3c93815..8ecf84b 100644
--- a/fs/ext4/ext4_extents.h
+++ b/fs/ext4/ext4_extents.h
@@ -11,7 +11,7 @@
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
- * You should have received a copy of the GNU General Public Licens
+ * You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-
*/
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 3753ceb..95bf467 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -15,7 +15,7 @@
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
- * You should have received a copy of the GNU General Public Licens
+ * You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-
*/
@@ -1736,6 +1736,12 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
*/
if (ext1_ee_len + ext2_ee_len > EXT_INIT_MAX_LEN)
return 0;
+ /*
+ * The check for IO to unwritten extent is somewhat racy as we
+ * increment i_unwritten / set EXT4_STATE_DIO_UNWRITTEN only after
+ * dropping i_data_sem. But reserved blocks should save us in that
+ * case.
+ */
if (ext4_ext_is_unwritten(ex1) &&
(ext4_test_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN) ||
atomic_read(&EXT4_I(inode)->i_unwritten) ||
@@ -2293,59 +2299,69 @@ static int ext4_fill_fiemap_extents(struct inode *inode,
}
/*
- * ext4_ext_put_gap_in_cache:
- * calculate boundaries of the gap that the requested block fits into
- * and cache this gap
+ * ext4_ext_determine_hole - determine hole around given block
+ * @inode: inode we lookup in
+ * @path: path in extent tree to @lblk
+ * @lblk: pointer to logical block around which we want to determine hole
+ *
+ * Determine hole length (and start if easily possible) around given logical
+ * block. We don't try too hard to find the beginning of the hole but @path
+ * actually points to extent before @lblk, we provide it.
+ *
+ * The function returns the length of a hole starting at @lblk. We update @lblk
+ * to the beginning of the hole if we managed to find it.
*/
-static void
-ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path,
- ext4_lblk_t block)
+static ext4_lblk_t ext4_ext_determine_hole(struct inode *inode,
+ struct ext4_ext_path *path,
+ ext4_lblk_t *lblk)
{
int depth = ext_depth(inode);
- ext4_lblk_t len;
- ext4_lblk_t lblock;
struct ext4_extent *ex;
- struct extent_status es;
+ ext4_lblk_t len;
ex = path[depth].p_ext;
if (ex == NULL) {
/* there is no extent yet, so gap is [0;-] */
- lblock = 0;
+ *lblk = 0;
len = EXT_MAX_BLOCKS;
- ext_debug("cache gap(whole file):");
- } else if (block < le32_to_cpu(ex->ee_block)) {
- lblock = block;
- len = le32_to_cpu(ex->ee_block) - block;
- ext_debug("cache gap(before): %u [%u:%u]",
- block,
- le32_to_cpu(ex->ee_block),
- ext4_ext_get_actual_len(ex));
- } else if (block >= le32_to_cpu(ex->ee_block)
+ } else if (*lblk < le32_to_cpu(ex->ee_block)) {
+ len = le32_to_cpu(ex->ee_block) - *lblk;
+ } else if (*lblk >= le32_to_cpu(ex->ee_block)
+ ext4_ext_get_actual_len(ex)) {
ext4_lblk_t next;
- lblock = le32_to_cpu(ex->ee_block)
- + ext4_ext_get_actual_len(ex);
+ *lblk = le32_to_cpu(ex->ee_block) + ext4_ext_get_actual_len(ex);
next = ext4_ext_next_allocated_block(path);
- ext_debug("cache gap(after): [%u:%u] %u",
- le32_to_cpu(ex->ee_block),
- ext4_ext_get_actual_len(ex),
- block);
- BUG_ON(next == lblock);
- len = next - lblock;
+ BUG_ON(next == *lblk);
+ len = next - *lblk;
} else {
BUG();
}
+ return len;
+}
- ext4_es_find_delayed_extent_range(inode, lblock, lblock + len - 1, &es);
+/*
+ * ext4_ext_put_gap_in_cache:
+ * calculate boundaries of the gap that the requested block fits into
+ * and cache this gap
+ */
+static void
+ext4_ext_put_gap_in_cache(struct inode *inode, ext4_lblk_t hole_start,
+ ext4_lblk_t hole_len)
+{
+ struct extent_status es;
+
+ ext4_es_find_delayed_extent_range(inode, hole_start,
+ hole_start + hole_len - 1, &es);
if (es.es_len) {
/* There's delayed extent containing lblock? */
- if (es.es_lblk <= lblock)
+ if (es.es_lblk <= hole_start)
return;
- len = min(es.es_lblk - lblock, len);
+ hole_len = min(es.es_lblk - hole_start, hole_len);
}
- ext_debug(" -> %u:%u\n", lblock, len);
- ext4_es_insert_extent(inode, lblock, len, ~0, EXTENT_STATUS_HOLE);
+ ext_debug(" -> %u:%u\n", hole_start, hole_len);
+ ext4_es_insert_extent(inode, hole_start, hole_len, ~0,
+ EXTENT_STATUS_HOLE);
}
/*
@@ -3927,7 +3943,7 @@ get_reserved_cluster_alloc(struct inode *inode, ext4_lblk_t lblk_start,
static int
convert_initialized_extent(handle_t *handle, struct inode *inode,
struct ext4_map_blocks *map,
- struct ext4_ext_path **ppath, int flags,
+ struct ext4_ext_path **ppath,
unsigned int allocated)
{
struct ext4_ext_path *path = *ppath;
@@ -4007,7 +4023,6 @@ ext4_ext_handle_unwritten_extents(handle_t *handle, struct inode *inode,
struct ext4_ext_path *path = *ppath;
int ret = 0;
int err = 0;
- ext4_io_end_t *io = ext4_inode_aio(inode);
ext_debug("ext4_ext_handle_unwritten_extents: inode %lu, logical "
"block %llu, max_blocks %u, flags %x, allocated %u\n",
@@ -4030,15 +4045,6 @@ ext4_ext_handle_unwritten_extents(handle_t *handle, struct inode *inode,
flags | EXT4_GET_BLOCKS_CONVERT);
if (ret <= 0)
goto out;
- /*
- * Flag the inode(non aio case) or end_io struct (aio case)
- * that this IO needs to conversion to written when IO is
- * completed
- */
- if (io)
- ext4_set_io_unwritten_flag(inode, io);
- else
- ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN);
map->m_flags |= EXT4_MAP_UNWRITTEN;
goto out;
}
@@ -4283,9 +4289,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
unsigned int allocated = 0, offset = 0;
unsigned int allocated_clusters = 0;
struct ext4_allocation_request ar;
- ext4_io_end_t *io = ext4_inode_aio(inode);
ext4_lblk_t cluster_offset;
- int set_unwritten = 0;
bool map_from_cluster = false;
ext_debug("blocks %u/%u requested for inode %lu\n",
@@ -4347,7 +4351,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
(flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN)) {
allocated = convert_initialized_extent(
handle, inode, map, &path,
- flags, allocated);
+ allocated);
goto out2;
} else if (!ext4_ext_is_unwritten(ex))
goto out;
@@ -4368,11 +4372,22 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
* we couldn't try to create block if create flag is zero
*/
if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
+ ext4_lblk_t hole_start, hole_len;
+
+ hole_start = map->m_lblk;
+ hole_len = ext4_ext_determine_hole(inode, path, &hole_start);
/*
* put just found gap into cache to speed up
* subsequent requests
*/
- ext4_ext_put_gap_in_cache(inode, path, map->m_lblk);
+ ext4_ext_put_gap_in_cache(inode, hole_start, hole_len);
+
+ /* Update hole_len to reflect hole size after map->m_lblk */
+ if (hole_start != map->m_lblk)
+ hole_len -= map->m_lblk - hole_start;
+ map->m_pblk = 0;
+ map->m_len = min_t(unsigned int, map->m_len, hole_len);
+
goto out2;
}
@@ -4482,15 +4497,6 @@ got_allocated_blocks:
if (flags & EXT4_GET_BLOCKS_UNWRIT_EXT){
ext4_ext_mark_unwritten(&newex);
map->m_flags |= EXT4_MAP_UNWRITTEN;
- /*
- * io_end structure was created for every IO write to an
- * unwritten extent. To avoid unnecessary conversion,
- * here we flag the IO that really needs the conversion.
- * For non asycn direct IO case, flag the inode state
- * that we need to perform conversion when IO is done.
- */
- if (flags & EXT4_GET_BLOCKS_PRE_IO)
- set_unwritten = 1;
}
err = 0;
@@ -4501,14 +4507,6 @@ got_allocated_blocks:
err = ext4_ext_insert_extent(handle, inode, &path,
&newex, flags);
- if (!err && set_unwritten) {
- if (io)
- ext4_set_io_unwritten_flag(inode, io);
- else
- ext4_set_inode_state(inode,
- EXT4_STATE_DIO_UNWRITTEN);
- }
-
if (err && free_on_err) {
int fb_flags = flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE ?
EXT4_FREE_BLOCKS_NO_QUOT_UPDATE : 0;
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
index ac748b3..e38b987a 100644
--- a/fs/ext4/extents_status.c
+++ b/fs/ext4/extents_status.c
@@ -823,8 +823,8 @@ out:
es->es_lblk = es1->es_lblk;
es->es_len = es1->es_len;
es->es_pblk = es1->es_pblk;
- if (!ext4_es_is_referenced(es))
- ext4_es_set_referenced(es);
+ if (!ext4_es_is_referenced(es1))
+ ext4_es_set_referenced(es1);
stats->es_stats_cache_hits++;
} else {
stats->es_stats_cache_misses++;
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 4cd318f..6659e21 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -93,31 +93,29 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file_inode(iocb->ki_filp);
- struct mutex *aio_mutex = NULL;
struct blk_plug plug;
int o_direct = iocb->ki_flags & IOCB_DIRECT;
+ int unaligned_aio = 0;
int overwrite = 0;
ssize_t ret;
+ inode_lock(inode);
+ ret = generic_write_checks(iocb, from);
+ if (ret <= 0)
+ goto out;
+
/*
- * Unaligned direct AIO must be serialized; see comment above
- * In the case of O_APPEND, assume that we must always serialize
+ * Unaligned direct AIO must be serialized among each other as zeroing
+ * of partial blocks of two competing unaligned AIOs can result in data
+ * corruption.
*/
- if (o_direct &&
- ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) &&
+ if (o_direct && ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) &&
!is_sync_kiocb(iocb) &&
- (iocb->ki_flags & IOCB_APPEND ||
- ext4_unaligned_aio(inode, from, iocb->ki_pos))) {
- aio_mutex = ext4_aio_mutex(inode);
- mutex_lock(aio_mutex);
+ ext4_unaligned_aio(inode, from, iocb->ki_pos)) {
+ unaligned_aio = 1;
ext4_unwritten_wait(inode);
}
- inode_lock(inode);
- ret = generic_write_checks(iocb, from);
- if (ret <= 0)
- goto out;
-
/*
* If we have encountered a bitmap-format file, the size limit
* is smaller than s_maxbytes, which is for extent-mapped files.
@@ -139,7 +137,7 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
blk_start_plug(&plug);
/* check whether we do a DIO overwrite or not */
- if (ext4_should_dioread_nolock(inode) && !aio_mutex &&
+ if (ext4_should_dioread_nolock(inode) && !unaligned_aio &&
!file->f_mapping->nrpages && pos + length <= i_size_read(inode)) {
struct ext4_map_blocks map;
unsigned int blkbits = inode->i_blkbits;
@@ -181,14 +179,10 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
if (o_direct)
blk_finish_plug(&plug);
- if (aio_mutex)
- mutex_unlock(aio_mutex);
return ret;
out:
inode_unlock(inode);
- if (aio_mutex)
- mutex_unlock(aio_mutex);
return ret;
}
@@ -417,7 +411,7 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
*/
static int ext4_find_unwritten_pgoff(struct inode *inode,
int whence,
- struct ext4_map_blocks *map,
+ ext4_lblk_t end_blk,
loff_t *offset)
{
struct pagevec pvec;
@@ -432,7 +426,7 @@ static int ext4_find_unwritten_pgoff(struct inode *inode,
blkbits = inode->i_sb->s_blocksize_bits;
startoff = *offset;
lastoff = startoff;
- endoff = (loff_t)(map->m_lblk + map->m_len) << blkbits;
+ endoff = (loff_t)end_blk << blkbits;
index = startoff >> PAGE_CACHE_SHIFT;
end = endoff >> PAGE_CACHE_SHIFT;
@@ -550,12 +544,11 @@ out:
static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize)
{
struct inode *inode = file->f_mapping->host;
- struct ext4_map_blocks map;
struct extent_status es;
ext4_lblk_t start, last, end;
loff_t dataoff, isize;
int blkbits;
- int ret = 0;
+ int ret;
inode_lock(inode);
@@ -572,41 +565,32 @@ static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize)
dataoff = offset;
do {
- map.m_lblk = last;
- map.m_len = end - last + 1;
- ret = ext4_map_blocks(NULL, inode, &map, 0);
- if (ret > 0 && !(map.m_flags & EXT4_MAP_UNWRITTEN)) {
- if (last != start)
- dataoff = (loff_t)last << blkbits;
- break;
+ ret = ext4_get_next_extent(inode, last, end - last + 1, &es);
+ if (ret <= 0) {
+ /* No extent found -> no data */
+ if (ret == 0)
+ ret = -ENXIO;
+ inode_unlock(inode);
+ return ret;
}
- /*
- * If there is a delay extent at this offset,
- * it will be as a data.
- */
- ext4_es_find_delayed_extent_range(inode, last, last, &es);
- if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) {
- if (last != start)
- dataoff = (loff_t)last << blkbits;
+ last = es.es_lblk;
+ if (last != start)
+ dataoff = (loff_t)last << blkbits;
+ if (!ext4_es_is_unwritten(&es))
break;
- }
/*
* If there is a unwritten extent at this offset,
* it will be as a data or a hole according to page
* cache that has data or not.
*/
- if (map.m_flags & EXT4_MAP_UNWRITTEN) {
- int unwritten;
- unwritten = ext4_find_unwritten_pgoff(inode, SEEK_DATA,
- &map, &dataoff);
- if (unwritten)
- break;
- }
-
- last++;
+ if (ext4_find_unwritten_pgoff(inode, SEEK_DATA,
+ es.es_lblk + es.es_len, &dataoff))
+ break;
+ last += es.es_len;
dataoff = (loff_t)last << blkbits;
+ cond_resched();
} while (last <= end);
inode_unlock(inode);
@@ -623,12 +607,11 @@ static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize)
static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize)
{
struct inode *inode = file->f_mapping->host;
- struct ext4_map_blocks map;
struct extent_status es;
ext4_lblk_t start, last, end;
loff_t holeoff, isize;
int blkbits;
- int ret = 0;
+ int ret;
inode_lock(inode);
@@ -645,44 +628,30 @@ static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize)
holeoff = offset;
do {
- map.m_lblk = last;
- map.m_len = end - last + 1;
- ret = ext4_map_blocks(NULL, inode, &map, 0);
- if (ret > 0 && !(map.m_flags & EXT4_MAP_UNWRITTEN)) {
- last += ret;
- holeoff = (loff_t)last << blkbits;
- continue;
+ ret = ext4_get_next_extent(inode, last, end - last + 1, &es);
+ if (ret < 0) {
+ inode_unlock(inode);
+ return ret;
}
-
- /*
- * If there is a delay extent at this offset,
- * we will skip this extent.
- */
- ext4_es_find_delayed_extent_range(inode, last, last, &es);
- if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) {
- last = es.es_lblk + es.es_len;
- holeoff = (loff_t)last << blkbits;
- continue;
+ /* Found a hole? */
+ if (ret == 0 || es.es_lblk > last) {
+ if (last != start)
+ holeoff = (loff_t)last << blkbits;
+ break;
}
-
/*
* If there is a unwritten extent at this offset,
* it will be as a data or a hole according to page
* cache that has data or not.
*/
- if (map.m_flags & EXT4_MAP_UNWRITTEN) {
- int unwritten;
- unwritten = ext4_find_unwritten_pgoff(inode, SEEK_HOLE,
- &map, &holeoff);
- if (!unwritten) {
- last += ret;
- holeoff = (loff_t)last << blkbits;
- continue;
- }
- }
+ if (ext4_es_is_unwritten(&es) &&
+ ext4_find_unwritten_pgoff(inode, SEEK_HOLE,
+ last + es.es_len, &holeoff))
+ break;
- /* find a hole */
- break;
+ last += es.es_len;
+ holeoff = (loff_t)last << blkbits;
+ cond_resched();
} while (last <= end);
inode_unlock(inode);
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index acc0ad5..237b877 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -787,7 +787,7 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
sbi = EXT4_SB(sb);
/*
- * Initalize owners and quota early so that we don't have to account
+ * Initialize owners and quota early so that we don't have to account
* for quota initialization worst case in standard inode creating
* transaction
*/
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
index 355ef9c..3027fa6 100644
--- a/fs/ext4/indirect.c
+++ b/fs/ext4/indirect.c
@@ -555,8 +555,23 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
goto got_it;
}
- /* Next simple case - plain lookup or failed read of indirect block */
- if ((flags & EXT4_GET_BLOCKS_CREATE) == 0 || err == -EIO)
+ /* Next simple case - plain lookup failed */
+ if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
+ unsigned epb = inode->i_sb->s_blocksize / sizeof(u32);
+ int i;
+
+ /* Count number blocks in a subtree under 'partial' */
+ count = 1;
+ for (i = 0; partial + i != chain + depth - 1; i++)
+ count *= epb;
+ /* Fill in size of a hole we found */
+ map->m_pblk = 0;
+ map->m_len = min_t(unsigned int, map->m_len, count);
+ goto cleanup;
+ }
+
+ /* Failed read of indirect block */
+ if (err == -EIO)
goto cleanup;
/*
@@ -693,21 +708,21 @@ retry:
}
if (IS_DAX(inode))
ret = dax_do_io(iocb, inode, iter, offset,
- ext4_get_block, NULL, 0);
+ ext4_dio_get_block, NULL, 0);
else
ret = __blockdev_direct_IO(iocb, inode,
inode->i_sb->s_bdev, iter,
- offset, ext4_get_block, NULL,
- NULL, 0);
+ offset, ext4_dio_get_block,
+ NULL, NULL, 0);
inode_dio_end(inode);
} else {
locked:
if (IS_DAX(inode))
ret = dax_do_io(iocb, inode, iter, offset,
- ext4_get_block, NULL, DIO_LOCKING);
+ ext4_dio_get_block, NULL, DIO_LOCKING);
else
ret = blockdev_direct_IO(iocb, inode, iter, offset,
- ext4_get_block);
+ ext4_dio_get_block);
if (unlikely(iov_iter_rw(iter) == WRITE && ret < 0)) {
loff_t isize = i_size_read(inode);
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index dfe3b9b..7cbdd375 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -581,9 +581,10 @@ retry:
if (ret)
goto out;
- if (ext4_should_dioread_nolock(inode))
- ret = __block_write_begin(page, from, to, ext4_get_block_write);
- else
+ if (ext4_should_dioread_nolock(inode)) {
+ ret = __block_write_begin(page, from, to,
+ ext4_get_block_unwritten);
+ } else
ret = __block_write_begin(page, from, to, ext4_get_block);
if (!ret && ext4_should_journal_data(inode)) {
@@ -1696,7 +1697,6 @@ int ext4_delete_inline_entry(handle_t *handle,
if (err)
goto out;
- BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
err = ext4_mark_inode_dirty(handle, dir);
if (unlikely(err))
goto out;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index aee960b..b2e9576 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -216,7 +216,6 @@ void ext4_evict_inode(struct inode *inode)
}
truncate_inode_pages_final(&inode->i_data);
- WARN_ON(atomic_read(&EXT4_I(inode)->i_ioend_count));
goto no_delete;
}
@@ -228,8 +227,6 @@ void ext4_evict_inode(struct inode *inode)
ext4_begin_ordered_truncate(inode, 0);
truncate_inode_pages_final(&inode->i_data);
- WARN_ON(atomic_read(&EXT4_I(inode)->i_ioend_count));
-
/*
* Protect us against freezing - iput() caller didn't have to have any
* protection against it
@@ -458,13 +455,13 @@ static void ext4_map_blocks_es_recheck(handle_t *handle,
* Otherwise, call with ext4_ind_map_blocks() to handle indirect mapping
* based files
*
- * On success, it returns the number of blocks being mapped or allocated.
- * if create==0 and the blocks are pre-allocated and unwritten block,
- * the result buffer head is unmapped. If the create ==1, it will make sure
- * the buffer head is mapped.
+ * On success, it returns the number of blocks being mapped or allocated. if
+ * create==0 and the blocks are pre-allocated and unwritten, the resulting @map
+ * is marked as unwritten. If the create == 1, it will mark @map as mapped.
*
* It returns 0 if plain look up failed (blocks have not been allocated), in
- * that case, buffer head is unmapped
+ * that case, @map is returned as unmapped but we still do fill map->m_len to
+ * indicate the length of a hole starting at map->m_lblk.
*
* It returns the error in case of allocation failure.
*/
@@ -507,6 +504,11 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
retval = map->m_len;
map->m_len = retval;
} else if (ext4_es_is_delayed(&es) || ext4_es_is_hole(&es)) {
+ map->m_pblk = 0;
+ retval = es.es_len - (map->m_lblk - es.es_lblk);
+ if (retval > map->m_len)
+ retval = map->m_len;
+ map->m_len = retval;
retval = 0;
} else {
BUG_ON(1);
@@ -714,16 +716,11 @@ static void ext4_update_bh_state(struct buffer_head *bh, unsigned long flags)
cmpxchg(&bh->b_state, old_state, new_state) != old_state));
}
-/* Maximum number of blocks we map for direct IO at once. */
-#define DIO_MAX_BLOCKS 4096
-
static int _ext4_get_block(struct inode *inode, sector_t iblock,
struct buffer_head *bh, int flags)
{
- handle_t *handle = ext4_journal_current_handle();
struct ext4_map_blocks map;
- int ret = 0, started = 0;
- int dio_credits;
+ int ret = 0;
if (ext4_has_inline_data(inode))
return -ERANGE;
@@ -731,33 +728,14 @@ static int _ext4_get_block(struct inode *inode, sector_t iblock,
map.m_lblk = iblock;
map.m_len = bh->b_size >> inode->i_blkbits;
- if (flags && !handle) {
- /* Direct IO write... */
- if (map.m_len > DIO_MAX_BLOCKS)
- map.m_len = DIO_MAX_BLOCKS;
- dio_credits = ext4_chunk_trans_blocks(inode, map.m_len);
- handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS,
- dio_credits);
- if (IS_ERR(handle)) {
- ret = PTR_ERR(handle);
- return ret;
- }
- started = 1;
- }
-
- ret = ext4_map_blocks(handle, inode, &map, flags);
+ ret = ext4_map_blocks(ext4_journal_current_handle(), inode, &map,
+ flags);
if (ret > 0) {
- ext4_io_end_t *io_end = ext4_inode_aio(inode);
-
map_bh(bh, inode->i_sb, map.m_pblk);
ext4_update_bh_state(bh, map.m_flags);
- if (io_end && io_end->flag & EXT4_IO_END_UNWRITTEN)
- set_buffer_defer_completion(bh);
bh->b_size = inode->i_sb->s_blocksize * map.m_len;
ret = 0;
}
- if (started)
- ext4_journal_stop(handle);
return ret;
}
@@ -769,6 +747,155 @@ int ext4_get_block(struct inode *inode, sector_t iblock,
}
/*
+ * Get block function used when preparing for buffered write if we require
+ * creating an unwritten extent if blocks haven't been allocated. The extent
+ * will be converted to written after the IO is complete.
+ */
+int ext4_get_block_unwritten(struct inode *inode, sector_t iblock,
+ struct buffer_head *bh_result, int create)
+{
+ ext4_debug("ext4_get_block_unwritten: inode %lu, create flag %d\n",
+ inode->i_ino, create);
+ return _ext4_get_block(inode, iblock, bh_result,
+ EXT4_GET_BLOCKS_IO_CREATE_EXT);
+}
+
+/* Maximum number of blocks we map for direct IO at once. */
+#define DIO_MAX_BLOCKS 4096
+
+static handle_t *start_dio_trans(struct inode *inode,
+ struct buffer_head *bh_result)
+{
+ int dio_credits;
+
+ /* Trim mapping request to maximum we can map at once for DIO */
+ if (bh_result->b_size >> inode->i_blkbits > DIO_MAX_BLOCKS)
+ bh_result->b_size = DIO_MAX_BLOCKS << inode->i_blkbits;
+ dio_credits = ext4_chunk_trans_blocks(inode,
+ bh_result->b_size >> inode->i_blkbits);
+ return ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS, dio_credits);
+}
+
+/* Get block function for DIO reads and writes to inodes without extents */
+int ext4_dio_get_block(struct inode *inode, sector_t iblock,
+ struct buffer_head *bh, int create)
+{
+ handle_t *handle;
+ int ret;
+
+ /* We don't expect handle for direct IO */
+ WARN_ON_ONCE(ext4_journal_current_handle());
+
+ if (create) {
+ handle = start_dio_trans(inode, bh);
+ if (IS_ERR(handle))
+ return PTR_ERR(handle);
+ }
+ ret = _ext4_get_block(inode, iblock, bh,
+ create ? EXT4_GET_BLOCKS_CREATE : 0);
+ if (create)
+ ext4_journal_stop(handle);
+ return ret;
+}
+
+/*
+ * Get block function for AIO DIO writes when we create unwritten extent if
+ * blocks are not allocated yet. The extent will be converted to written
+ * after IO is complete.
+ */
+static int ext4_dio_get_block_unwritten_async(struct inode *inode,
+ sector_t iblock, struct buffer_head *bh_result, int create)
+{
+ handle_t *handle;
+ int ret;
+
+ /* We don't expect handle for direct IO */
+ WARN_ON_ONCE(ext4_journal_current_handle());
+
+ handle = start_dio_trans(inode, bh_result);
+ if (IS_ERR(handle))
+ return PTR_ERR(handle);
+ ret = _ext4_get_block(inode, iblock, bh_result,
+ EXT4_GET_BLOCKS_IO_CREATE_EXT);
+ ext4_journal_stop(handle);
+
+ /*
+ * When doing DIO using unwritten extents, we need io_end to convert
+ * unwritten extents to written on IO completion. We allocate io_end
+ * once we spot unwritten extent and store it in b_private. Generic
+ * DIO code keeps b_private set and furthermore passes the value to
+ * our completion callback in 'private' argument.
+ */
+ if (!ret && buffer_unwritten(bh_result)) {
+ if (!bh_result->b_private) {
+ ext4_io_end_t *io_end;
+
+ io_end = ext4_init_io_end(inode, GFP_KERNEL);
+ if (!io_end)
+ return -ENOMEM;
+ bh_result->b_private = io_end;
+ ext4_set_io_unwritten_flag(inode, io_end);
+ }
+ set_buffer_defer_completion(bh_result);
+ }
+
+ return ret;
+}
+
+/*
+ * Get block function for non-AIO DIO writes when we create unwritten extent if
+ * blocks are not allocated yet. The extent will be converted to written
+ * after IO is complete from ext4_ext_direct_IO() function.
+ */
+static int ext4_dio_get_block_unwritten_sync(struct inode *inode,
+ sector_t iblock, struct buffer_head *bh_result, int create)
+{
+ handle_t *handle;
+ int ret;
+
+ /* We don't expect handle for direct IO */
+ WARN_ON_ONCE(ext4_journal_current_handle());
+
+ handle = start_dio_trans(inode, bh_result);
+ if (IS_ERR(handle))
+ return PTR_ERR(handle);
+ ret = _ext4_get_block(inode, iblock, bh_result,
+ EXT4_GET_BLOCKS_IO_CREATE_EXT);
+ ext4_journal_stop(handle);
+
+ /*
+ * Mark inode as having pending DIO writes to unwritten extents.
+ * ext4_ext_direct_IO() checks this flag and converts extents to
+ * written.
+ */
+ if (!ret && buffer_unwritten(bh_result))
+ ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN);
+
+ return ret;
+}
+
+static int ext4_dio_get_block_overwrite(struct inode *inode, sector_t iblock,
+ struct buffer_head *bh_result, int create)
+{
+ int ret;
+
+ ext4_debug("ext4_dio_get_block_overwrite: inode %lu, create flag %d\n",
+ inode->i_ino, create);
+ /* We don't expect handle for direct IO */
+ WARN_ON_ONCE(ext4_journal_current_handle());
+
+ ret = _ext4_get_block(inode, iblock, bh_result, 0);
+ /*
+ * Blocks should have been preallocated! ext4_file_write_iter() checks
+ * that.
+ */
+ WARN_ON_ONCE(!buffer_mapped(bh_result) || buffer_unwritten(bh_result));
+
+ return ret;
+}
+
+
+/*
* `handle' can be NULL if create is zero
*/
struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode,
@@ -1079,13 +1206,14 @@ retry_journal:
#ifdef CONFIG_EXT4_FS_ENCRYPTION
if (ext4_should_dioread_nolock(inode))
ret = ext4_block_write_begin(page, pos, len,
- ext4_get_block_write);
+ ext4_get_block_unwritten);
else
ret = ext4_block_write_begin(page, pos, len,
ext4_get_block);
#else
if (ext4_should_dioread_nolock(inode))
- ret = __block_write_begin(page, pos, len, ext4_get_block_write);
+ ret = __block_write_begin(page, pos, len,
+ ext4_get_block_unwritten);
else
ret = __block_write_begin(page, pos, len, ext4_get_block);
#endif
@@ -3088,37 +3216,6 @@ static int ext4_releasepage(struct page *page, gfp_t wait)
return try_to_free_buffers(page);
}
-/*
- * ext4_get_block used when preparing for a DIO write or buffer write.
- * We allocate an uinitialized extent if blocks haven't been allocated.
- * The extent will be converted to initialized after the IO is complete.
- */
-int ext4_get_block_write(struct inode *inode, sector_t iblock,
- struct buffer_head *bh_result, int create)
-{
- ext4_debug("ext4_get_block_write: inode %lu, create flag %d\n",
- inode->i_ino, create);
- return _ext4_get_block(inode, iblock, bh_result,
- EXT4_GET_BLOCKS_IO_CREATE_EXT);
-}
-
-static int ext4_get_block_overwrite(struct inode *inode, sector_t iblock,
- struct buffer_head *bh_result, int create)
-{
- int ret;
-
- ext4_debug("ext4_get_block_overwrite: inode %lu, create flag %d\n",
- inode->i_ino, create);
- ret = _ext4_get_block(inode, iblock, bh_result, 0);
- /*
- * Blocks should have been preallocated! ext4_file_write_iter() checks
- * that.
- */
- WARN_ON_ONCE(!buffer_mapped(bh_result));
-
- return ret;
-}
-
#ifdef CONFIG_FS_DAX
int ext4_dax_mmap_get_block(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create)
@@ -3179,13 +3276,12 @@ out:
WARN_ON_ONCE(ret == 0 && create);
if (ret > 0) {
map_bh(bh_result, inode->i_sb, map.m_pblk);
- bh_result->b_state = (bh_result->b_state & ~EXT4_MAP_FLAGS) |
- map.m_flags;
/*
* At least for now we have to clear BH_New so that DAX code
* doesn't attempt to zero blocks again in a racy way.
*/
- bh_result->b_state &= ~(1 << BH_New);
+ map.m_flags &= ~EXT4_MAP_NEW;
+ ext4_update_bh_state(bh_result, map.m_flags);
bh_result->b_size = map.m_len << inode->i_blkbits;
ret = 0;
}
@@ -3196,7 +3292,7 @@ out:
static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
ssize_t size, void *private)
{
- ext4_io_end_t *io_end = iocb->private;
+ ext4_io_end_t *io_end = private;
/* if not async direct IO just return */
if (!io_end)
@@ -3204,10 +3300,8 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
ext_debug("ext4_end_io_dio(): io_end 0x%p "
"for inode %lu, iocb 0x%p, offset %llu, size %zd\n",
- iocb->private, io_end->inode->i_ino, iocb, offset,
- size);
+ io_end, io_end->inode->i_ino, iocb, offset, size);
- iocb->private = NULL;
io_end->offset = offset;
io_end->size = size;
ext4_put_io_end(io_end);
@@ -3243,7 +3337,6 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
get_block_t *get_block_func = NULL;
int dio_flags = 0;
loff_t final_size = offset + count;
- ext4_io_end_t *io_end = NULL;
/* Use the old path for reads and writes beyond i_size. */
if (iov_iter_rw(iter) != WRITE || final_size > inode->i_size)
@@ -3268,16 +3361,17 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
/*
* We could direct write to holes and fallocate.
*
- * Allocated blocks to fill the hole are marked as
- * unwritten to prevent parallel buffered read to expose
- * the stale data before DIO complete the data IO.
+ * Allocated blocks to fill the hole are marked as unwritten to prevent
+ * parallel buffered read to expose the stale data before DIO complete
+ * the data IO.
*
- * As to previously fallocated extents, ext4 get_block will
- * just simply mark the buffer mapped but still keep the
- * extents unwritten.
+ * As to previously fallocated extents, ext4 get_block will just simply
+ * mark the buffer mapped but still keep the extents unwritten.
*
- * For non AIO case, we will convert those unwritten extents
- * to written after return back from blockdev_direct_IO.
+ * For non AIO case, we will convert those unwritten extents to written
+ * after return back from blockdev_direct_IO. That way we save us from
+ * allocating io_end structure and also the overhead of offloading
+ * the extent convertion to a workqueue.
*
* For async DIO, the conversion needs to be deferred when the
* IO is completed. The ext4 end_io callback function will be
@@ -3285,30 +3379,13 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
* case, we allocate an io_end structure to hook to the iocb.
*/
iocb->private = NULL;
- if (overwrite) {
- get_block_func = ext4_get_block_overwrite;
+ if (overwrite)
+ get_block_func = ext4_dio_get_block_overwrite;
+ else if (is_sync_kiocb(iocb)) {
+ get_block_func = ext4_dio_get_block_unwritten_sync;
+ dio_flags = DIO_LOCKING;
} else {
- ext4_inode_aio_set(inode, NULL);
- if (!is_sync_kiocb(iocb)) {
- io_end = ext4_init_io_end(inode, GFP_NOFS);
- if (!io_end) {
- ret = -ENOMEM;
- goto retake_lock;
- }
- /*
- * Grab reference for DIO. Will be dropped in
- * ext4_end_io_dio()
- */
- iocb->private = ext4_get_io_end(io_end);
- /*
- * we save the io structure for current async direct
- * IO, so that later ext4_map_blocks() could flag the
- * io structure whether there is a unwritten extents
- * needs to be converted when IO is completed.
- */
- ext4_inode_aio_set(inode, io_end);
- }
- get_block_func = ext4_get_block_write;
+ get_block_func = ext4_dio_get_block_unwritten_async;
dio_flags = DIO_LOCKING;
}
#ifdef CONFIG_EXT4_FS_ENCRYPTION
@@ -3323,27 +3400,6 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
get_block_func,
ext4_end_io_dio, NULL, dio_flags);
- /*
- * Put our reference to io_end. This can free the io_end structure e.g.
- * in sync IO case or in case of error. It can even perform extent
- * conversion if all bios we submitted finished before we got here.
- * Note that in that case iocb->private can be already set to NULL
- * here.
- */
- if (io_end) {
- ext4_inode_aio_set(inode, NULL);
- ext4_put_io_end(io_end);
- /*
- * When no IO was submitted ext4_end_io_dio() was not
- * called so we have to put iocb's reference.
- */
- if (ret <= 0 && ret != -EIOCBQUEUED && iocb->private) {
- WARN_ON(iocb->private != io_end);
- WARN_ON(io_end->flag & EXT4_IO_END_UNWRITTEN);
- ext4_put_io_end(io_end);
- iocb->private = NULL;
- }
- }
if (ret > 0 && !overwrite && ext4_test_inode_state(inode,
EXT4_STATE_DIO_UNWRITTEN)) {
int err;
@@ -3358,7 +3414,6 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
ext4_clear_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN);
}
-retake_lock:
if (iov_iter_rw(iter) == WRITE)
inode_dio_end(inode);
/* take i_mutex locking again if we do a ovewrite dio */
@@ -5261,6 +5316,8 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode)
might_sleep();
trace_ext4_mark_inode_dirty(inode, _RET_IP_);
err = ext4_reserve_inode_write(handle, inode, &iloc);
+ if (err)
+ return err;
if (ext4_handle_valid(handle) &&
EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize &&
!ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND)) {
@@ -5291,9 +5348,7 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode)
}
}
}
- if (!err)
- err = ext4_mark_iloc_dirty(handle, inode, &iloc);
- return err;
+ return ext4_mark_iloc_dirty(handle, inode, &iloc);
}
/*
@@ -5502,7 +5557,7 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
unlock_page(page);
/* OK, we need to fill the hole... */
if (ext4_should_dioread_nolock(inode))
- get_block = ext4_get_block_write;
+ get_block = ext4_get_block_unwritten;
else
get_block = ext4_get_block;
retry_alloc:
@@ -5545,3 +5600,70 @@ int ext4_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
return err;
}
+
+/*
+ * Find the first extent at or after @lblk in an inode that is not a hole.
+ * Search for @map_len blocks at most. The extent is returned in @result.
+ *
+ * The function returns 1 if we found an extent. The function returns 0 in
+ * case there is no extent at or after @lblk and in that case also sets
+ * @result->es_len to 0. In case of error, the error code is returned.
+ */
+int ext4_get_next_extent(struct inode *inode, ext4_lblk_t lblk,
+ unsigned int map_len, struct extent_status *result)
+{
+ struct ext4_map_blocks map;
+ struct extent_status es = {};
+ int ret;
+
+ map.m_lblk = lblk;
+ map.m_len = map_len;
+
+ /*
+ * For non-extent based files this loop may iterate several times since
+ * we do not determine full hole size.
+ */
+ while (map.m_len > 0) {
+ ret = ext4_map_blocks(NULL, inode, &map, 0);
+ if (ret < 0)
+ return ret;
+ /* There's extent covering m_lblk? Just return it. */
+ if (ret > 0) {
+ int status;
+
+ ext4_es_store_pblock(result, map.m_pblk);
+ result->es_lblk = map.m_lblk;
+ result->es_len = map.m_len;
+ if (map.m_flags & EXT4_MAP_UNWRITTEN)
+ status = EXTENT_STATUS_UNWRITTEN;
+ else
+ status = EXTENT_STATUS_WRITTEN;
+ ext4_es_store_status(result, status);
+ return 1;
+ }
+ ext4_es_find_delayed_extent_range(inode, map.m_lblk,
+ map.m_lblk + map.m_len - 1,
+ &es);
+ /* Is delalloc data before next block in extent tree? */
+ if (es.es_len && es.es_lblk < map.m_lblk + map.m_len) {
+ ext4_lblk_t offset = 0;
+
+ if (es.es_lblk < lblk)
+ offset = lblk - es.es_lblk;
+ result->es_lblk = es.es_lblk + offset;
+ ext4_es_store_pblock(result,
+ ext4_es_pblock(&es) + offset);
+ result->es_len = es.es_len - offset;
+ ext4_es_store_status(result, ext4_es_status(&es));
+
+ return 1;
+ }
+ /* There's a hole at m_lblk, advance us after it */
+ map.m_lblk += map.m_len;
+ map_len -= map.m_len;
+ map.m_len = map_len;
+ cond_resched();
+ }
+ result->es_len = 0;
+ return 0;
+}
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 4424b7b..50e05df 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -11,7 +11,7 @@
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
- * You should have received a copy of the GNU General Public Licens
+ * You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-
*/
@@ -815,7 +815,7 @@ static void mb_regenerate_buddy(struct ext4_buddy *e4b)
* for this page; do not hold this lock when calling this routine!
*/
-static int ext4_mb_init_cache(struct page *page, char *incore)
+static int ext4_mb_init_cache(struct page *page, char *incore, gfp_t gfp)
{
ext4_group_t ngroups;
int blocksize;
@@ -848,7 +848,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
/* allocate buffer_heads to read bitmaps */
if (groups_per_page > 1) {
i = sizeof(struct buffer_head *) * groups_per_page;
- bh = kzalloc(i, GFP_NOFS);
+ bh = kzalloc(i, gfp);
if (bh == NULL) {
err = -ENOMEM;
goto out;
@@ -983,7 +983,7 @@ out:
* are on the same page e4b->bd_buddy_page is NULL and return value is 0.
*/
static int ext4_mb_get_buddy_page_lock(struct super_block *sb,
- ext4_group_t group, struct ext4_buddy *e4b)
+ ext4_group_t group, struct ext4_buddy *e4b, gfp_t gfp)
{
struct inode *inode = EXT4_SB(sb)->s_buddy_cache;
int block, pnum, poff;
@@ -1002,7 +1002,7 @@ static int ext4_mb_get_buddy_page_lock(struct super_block *sb,
block = group * 2;
pnum = block / blocks_per_page;
poff = block % blocks_per_page;
- page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
+ page = find_or_create_page(inode->i_mapping, pnum, gfp);
if (!page)
return -ENOMEM;
BUG_ON(page->mapping != inode->i_mapping);
@@ -1016,7 +1016,7 @@ static int ext4_mb_get_buddy_page_lock(struct super_block *sb,
block++;
pnum = block / blocks_per_page;
- page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
+ page = find_or_create_page(inode->i_mapping, pnum, gfp);
if (!page)
return -ENOMEM;
BUG_ON(page->mapping != inode->i_mapping);
@@ -1042,7 +1042,7 @@ static void ext4_mb_put_buddy_page_lock(struct ext4_buddy *e4b)
* calling this routine!
*/
static noinline_for_stack
-int ext4_mb_init_group(struct super_block *sb, ext4_group_t group)
+int ext4_mb_init_group(struct super_block *sb, ext4_group_t group, gfp_t gfp)
{
struct ext4_group_info *this_grp;
@@ -1062,7 +1062,7 @@ int ext4_mb_init_group(struct super_block *sb, ext4_group_t group)
* The call to ext4_mb_get_buddy_page_lock will mark the
* page accessed.
*/
- ret = ext4_mb_get_buddy_page_lock(sb, group, &e4b);
+ ret = ext4_mb_get_buddy_page_lock(sb, group, &e4b, gfp);
if (ret || !EXT4_MB_GRP_NEED_INIT(this_grp)) {
/*
* somebody initialized the group
@@ -1072,7 +1072,7 @@ int ext4_mb_init_group(struct super_block *sb, ext4_group_t group)
}
page = e4b.bd_bitmap_page;
- ret = ext4_mb_init_cache(page, NULL);
+ ret = ext4_mb_init_cache(page, NULL, gfp);
if (ret)
goto err;
if (!PageUptodate(page)) {
@@ -1091,7 +1091,7 @@ int ext4_mb_init_group(struct super_block *sb, ext4_group_t group)
}
/* init buddy cache */
page = e4b.bd_buddy_page;
- ret = ext4_mb_init_cache(page, e4b.bd_bitmap);
+ ret = ext4_mb_init_cache(page, e4b.bd_bitmap, gfp);
if (ret)
goto err;
if (!PageUptodate(page)) {
@@ -1109,8 +1109,8 @@ err:
* calling this routine!
*/
static noinline_for_stack int
-ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
- struct ext4_buddy *e4b)
+ext4_mb_load_buddy_gfp(struct super_block *sb, ext4_group_t group,
+ struct ext4_buddy *e4b, gfp_t gfp)
{
int blocks_per_page;
int block;
@@ -1140,7 +1140,7 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
* we need full data about the group
* to make a good selection
*/
- ret = ext4_mb_init_group(sb, group);
+ ret = ext4_mb_init_group(sb, group, gfp);
if (ret)
return ret;
}
@@ -1168,11 +1168,11 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
* wait for it to initialize.
*/
page_cache_release(page);
- page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
+ page = find_or_create_page(inode->i_mapping, pnum, gfp);
if (page) {
BUG_ON(page->mapping != inode->i_mapping);
if (!PageUptodate(page)) {
- ret = ext4_mb_init_cache(page, NULL);
+ ret = ext4_mb_init_cache(page, NULL, gfp);
if (ret) {
unlock_page(page);
goto err;
@@ -1204,11 +1204,12 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
if (page == NULL || !PageUptodate(page)) {
if (page)
page_cache_release(page);
- page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
+ page = find_or_create_page(inode->i_mapping, pnum, gfp);
if (page) {
BUG_ON(page->mapping != inode->i_mapping);
if (!PageUptodate(page)) {
- ret = ext4_mb_init_cache(page, e4b->bd_bitmap);
+ ret = ext4_mb_init_cache(page, e4b->bd_bitmap,
+ gfp);
if (ret) {
unlock_page(page);
goto err;
@@ -1247,6 +1248,12 @@ err:
return ret;
}
+static int ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
+ struct ext4_buddy *e4b)
+{
+ return ext4_mb_load_buddy_gfp(sb, group, e4b, GFP_NOFS);
+}
+
static void ext4_mb_unload_buddy(struct ext4_buddy *e4b)
{
if (e4b->bd_bitmap_page)
@@ -2045,7 +2052,7 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac,
/* We only do this if the grp has never been initialized */
if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
- int ret = ext4_mb_init_group(ac->ac_sb, group);
+ int ret = ext4_mb_init_group(ac->ac_sb, group, GFP_NOFS);
if (ret)
return ret;
}
@@ -4695,16 +4702,6 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
}
/*
- * We need to make sure we don't reuse the freed block until
- * after the transaction is committed, which we can do by
- * treating the block as metadata, below. We make an
- * exception if the inode is to be written in writeback mode
- * since writeback mode has weak data consistency guarantees.
- */
- if (!ext4_should_writeback_data(inode))
- flags |= EXT4_FREE_BLOCKS_METADATA;
-
- /*
* If the extent to be freed does not begin on a cluster
* boundary, we need to deal with partial clusters at the
* beginning and end of the extent. Normally we will free
@@ -4738,14 +4735,13 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
if (!bh && (flags & EXT4_FREE_BLOCKS_FORGET)) {
int i;
+ int is_metadata = flags & EXT4_FREE_BLOCKS_METADATA;
for (i = 0; i < count; i++) {
cond_resched();
- bh = sb_find_get_block(inode->i_sb, block + i);
- if (!bh)
- continue;
- ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA,
- inode, bh, block + i);
+ if (is_metadata)
+ bh = sb_find_get_block(inode->i_sb, block + i);
+ ext4_forget(handle, is_metadata, inode, bh, block + i);
}
}
@@ -4815,16 +4811,23 @@ do_more:
#endif
trace_ext4_mballoc_free(sb, inode, block_group, bit, count_clusters);
- err = ext4_mb_load_buddy(sb, block_group, &e4b);
+ /* __GFP_NOFAIL: retry infinitely, ignore TIF_MEMDIE and memcg limit. */
+ err = ext4_mb_load_buddy_gfp(sb, block_group, &e4b,
+ GFP_NOFS|__GFP_NOFAIL);
if (err)
goto error_return;
- if ((flags & EXT4_FREE_BLOCKS_METADATA) && ext4_handle_valid(handle)) {
+ /*
+ * We need to make sure we don't reuse the freed block until after the
+ * transaction is committed. We make an exception if the inode is to be
+ * written in writeback mode since writeback mode has weak data
+ * consistency guarantees.
+ */
+ if (ext4_handle_valid(handle) &&
+ ((flags & EXT4_FREE_BLOCKS_METADATA) ||
+ !ext4_should_writeback_data(inode))) {
struct ext4_free_data *new_entry;
/*
- * blocks being freed are metadata. these blocks shouldn't
- * be used until this transaction is committed
- *
* We use __GFP_NOFAIL because ext4_free_blocks() is not allowed
* to fail.
*/
@@ -5217,7 +5220,7 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
grp = ext4_get_group_info(sb, group);
/* We only do this if the grp has never been initialized */
if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
- ret = ext4_mb_init_group(sb, group);
+ ret = ext4_mb_init_group(sb, group, GFP_NOFS);
if (ret)
break;
}
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
index d634e18..3ef1df6 100644
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -23,18 +23,6 @@
#include "ext4.h"
/*
- * with AGGRESSIVE_CHECK allocator runs consistency checks over
- * structures. these checks slow things down a lot
- */
-#define AGGRESSIVE_CHECK__
-
-/*
- * with DOUBLE_CHECK defined mballoc creates persistent in-core
- * bitmaps, maintains and uses them to check for double allocations
- */
-#define DOUBLE_CHECK__
-
-/*
*/
#ifdef CONFIG_EXT4_DEBUG
extern ushort ext4_mballoc_debug;
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index a465189..364ea4d 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -361,7 +361,7 @@ static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode,
* blocks.
*
* While converting to extents we need not
- * update the orignal inode i_blocks for extent blocks
+ * update the original inode i_blocks for extent blocks
* via quota APIs. The quota update happened via tmp_inode already.
*/
spin_lock(&inode->i_lock);
diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c
index 0a512aa..2444527 100644
--- a/fs/ext4/mmp.c
+++ b/fs/ext4/mmp.c
@@ -91,21 +91,22 @@ static int read_mmp_block(struct super_block *sb, struct buffer_head **bh,
submit_bh(READ_SYNC | REQ_META | REQ_PRIO, *bh);
wait_on_buffer(*bh);
if (!buffer_uptodate(*bh)) {
- brelse(*bh);
- *bh = NULL;
ret = -EIO;
goto warn_exit;
}
-
mmp = (struct mmp_struct *)((*bh)->b_data);
- if (le32_to_cpu(mmp->mmp_magic) != EXT4_MMP_MAGIC)
+ if (le32_to_cpu(mmp->mmp_magic) != EXT4_MMP_MAGIC) {
ret = -EFSCORRUPTED;
- else if (!ext4_mmp_csum_verify(sb, mmp))
+ goto warn_exit;
+ }
+ if (!ext4_mmp_csum_verify(sb, mmp)) {
ret = -EFSBADCRC;
- else
- return 0;
-
+ goto warn_exit;
+ }
+ return 0;
warn_exit:
+ brelse(*bh);
+ *bh = NULL;
ext4_warning(sb, "Error %d while reading MMP block %llu",
ret, mmp_block);
return ret;
@@ -181,15 +182,13 @@ static int kmmpd(void *data)
EXT4_FEATURE_INCOMPAT_MMP)) {
ext4_warning(sb, "kmmpd being stopped since MMP feature"
" has been disabled.");
- EXT4_SB(sb)->s_mmp_tsk = NULL;
- goto failed;
+ goto exit_thread;
}
if (sb->s_flags & MS_RDONLY) {
ext4_warning(sb, "kmmpd being stopped since filesystem "
"has been remounted as readonly.");
- EXT4_SB(sb)->s_mmp_tsk = NULL;
- goto failed;
+ goto exit_thread;
}
diff = jiffies - last_update_time;
@@ -211,9 +210,7 @@ static int kmmpd(void *data)
if (retval) {
ext4_error(sb, "error reading MMP data: %d",
retval);
-
- EXT4_SB(sb)->s_mmp_tsk = NULL;
- goto failed;
+ goto exit_thread;
}
mmp_check = (struct mmp_struct *)(bh_check->b_data);
@@ -225,7 +222,9 @@ static int kmmpd(void *data)
"The filesystem seems to have been"
" multiply mounted.");
ext4_error(sb, "abort");
- goto failed;
+ put_bh(bh_check);
+ retval = -EBUSY;
+ goto exit_thread;
}
put_bh(bh_check);
}
@@ -248,7 +247,8 @@ static int kmmpd(void *data)
retval = write_mmp_block(sb, bh);
-failed:
+exit_thread:
+ EXT4_SB(sb)->s_mmp_tsk = NULL;
kfree(data);
brelse(bh);
return retval;
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 090b349..349d7aa 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -128,9 +128,6 @@ static void ext4_release_io_end(ext4_io_end_t *io_end)
BUG_ON(io_end->flag & EXT4_IO_END_UNWRITTEN);
WARN_ON(io_end->handle);
- if (atomic_dec_and_test(&EXT4_I(io_end->inode)->i_ioend_count))
- wake_up_all(ext4_ioend_wq(io_end->inode));
-
for (bio = io_end->bio; bio; bio = next_bio) {
next_bio = bio->bi_private;
ext4_finish_bio(bio);
@@ -265,7 +262,6 @@ ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags)
{
ext4_io_end_t *io = kmem_cache_zalloc(io_end_cachep, flags);
if (io) {
- atomic_inc(&EXT4_I(inode)->i_ioend_count);
io->inode = inode;
INIT_LIST_HEAD(&io->list);
atomic_set(&io->count, 1);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 3ed01ec..99996e9 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -55,7 +55,6 @@
static struct ext4_lazy_init *ext4_li_info;
static struct mutex ext4_li_mtx;
-static int ext4_mballoc_ready;
static struct ratelimit_state ext4_mount_msg_ratelimit;
static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
@@ -844,7 +843,6 @@ static void ext4_put_super(struct super_block *sb)
ext4_release_system_zone(sb);
ext4_mb_release(sb);
ext4_ext_release(sb);
- ext4_xattr_put_super(sb);
if (!(sb->s_flags & MS_RDONLY)) {
ext4_clear_feature_journal_needs_recovery(sb);
@@ -944,7 +942,6 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
spin_lock_init(&ei->i_completed_io_lock);
ei->i_sync_tid = 0;
ei->i_datasync_tid = 0;
- atomic_set(&ei->i_ioend_count, 0);
atomic_set(&ei->i_unwritten, 0);
INIT_WORK(&ei->i_rsv_conversion_work, ext4_end_io_rsv_work);
#ifdef CONFIG_EXT4_FS_ENCRYPTION
@@ -1425,9 +1422,9 @@ static const struct mount_opts {
{Opt_err_ro, EXT4_MOUNT_ERRORS_RO, MOPT_SET | MOPT_CLEAR_ERR},
{Opt_err_cont, EXT4_MOUNT_ERRORS_CONT, MOPT_SET | MOPT_CLEAR_ERR},
{Opt_data_err_abort, EXT4_MOUNT_DATA_ERR_ABORT,
- MOPT_NO_EXT2 | MOPT_SET},
+ MOPT_NO_EXT2},
{Opt_data_err_ignore, EXT4_MOUNT_DATA_ERR_ABORT,
- MOPT_NO_EXT2 | MOPT_CLEAR},
+ MOPT_NO_EXT2},
{Opt_barrier, EXT4_MOUNT_BARRIER, MOPT_SET},
{Opt_nobarrier, EXT4_MOUNT_BARRIER, MOPT_CLEAR},
{Opt_noauto_da_alloc, EXT4_MOUNT_NO_AUTO_DA_ALLOC, MOPT_SET},
@@ -1705,6 +1702,10 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
ext4_msg(sb, KERN_INFO, "dax option not supported");
return -1;
#endif
+ } else if (token == Opt_data_err_abort) {
+ sbi->s_mount_opt |= m->mount_opt;
+ } else if (token == Opt_data_err_ignore) {
+ sbi->s_mount_opt &= ~m->mount_opt;
} else {
if (!args->from)
arg = 1;
@@ -1914,6 +1915,8 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb,
SEQ_OPTS_PRINT("init_itable=%u", sbi->s_li_wait_mult);
if (nodefs || sbi->s_max_dir_size_kb)
SEQ_OPTS_PRINT("max_dir_size_kb=%u", sbi->s_max_dir_size_kb);
+ if (test_opt(sb, DATA_ERR_ABORT))
+ SEQ_OPTS_PUTS("data_err=abort");
ext4_show_quota_options(seq, sb);
return 0;
@@ -3796,12 +3799,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
sbi->s_journal->j_commit_callback = ext4_journal_commit_callback;
no_journal:
- if (ext4_mballoc_ready) {
- sbi->s_mb_cache = ext4_xattr_create_cache(sb->s_id);
- if (!sbi->s_mb_cache) {
- ext4_msg(sb, KERN_ERR, "Failed to create an mb_cache");
- goto failed_mount_wq;
- }
+ sbi->s_mb_cache = ext4_xattr_create_cache();
+ if (!sbi->s_mb_cache) {
+ ext4_msg(sb, KERN_ERR, "Failed to create an mb_cache");
+ goto failed_mount_wq;
}
if ((DUMMY_ENCRYPTION_ENABLED(sbi) || ext4_has_feature_encrypt(sb)) &&
@@ -4027,6 +4028,10 @@ failed_mount4:
if (EXT4_SB(sb)->rsv_conversion_wq)
destroy_workqueue(EXT4_SB(sb)->rsv_conversion_wq);
failed_mount_wq:
+ if (sbi->s_mb_cache) {
+ ext4_xattr_destroy_cache(sbi->s_mb_cache);
+ sbi->s_mb_cache = NULL;
+ }
if (sbi->s_journal) {
jbd2_journal_destroy(sbi->s_journal);
sbi->s_journal = NULL;
@@ -5321,7 +5326,6 @@ MODULE_ALIAS_FS("ext4");
/* Shared across all ext4 file systems */
wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ];
-struct mutex ext4__aio_mutex[EXT4_WQ_HASH_SZ];
static int __init ext4_init_fs(void)
{
@@ -5334,10 +5338,8 @@ static int __init ext4_init_fs(void)
/* Build-time check for flags consistency */
ext4_check_flag_values();
- for (i = 0; i < EXT4_WQ_HASH_SZ; i++) {
- mutex_init(&ext4__aio_mutex[i]);
+ for (i = 0; i < EXT4_WQ_HASH_SZ; i++)
init_waitqueue_head(&ext4__ioend_wq[i]);
- }
err = ext4_init_es();
if (err)
@@ -5358,8 +5360,6 @@ static int __init ext4_init_fs(void)
err = ext4_init_mballoc();
if (err)
goto out2;
- else
- ext4_mballoc_ready = 1;
err = init_inodecache();
if (err)
goto out1;
@@ -5375,7 +5375,6 @@ out:
unregister_as_ext3();
destroy_inodecache();
out1:
- ext4_mballoc_ready = 0;
ext4_exit_mballoc();
out2:
ext4_exit_sysfs();
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index a95151e..0441e05 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -545,30 +545,44 @@ static void
ext4_xattr_release_block(handle_t *handle, struct inode *inode,
struct buffer_head *bh)
{
- struct mb_cache_entry *ce = NULL;
- int error = 0;
struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode);
+ u32 hash, ref;
+ int error = 0;
- ce = mb_cache_entry_get(ext4_mb_cache, bh->b_bdev, bh->b_blocknr);
BUFFER_TRACE(bh, "get_write_access");
error = ext4_journal_get_write_access(handle, bh);
if (error)
goto out;
lock_buffer(bh);
- if (BHDR(bh)->h_refcount == cpu_to_le32(1)) {
+ hash = le32_to_cpu(BHDR(bh)->h_hash);
+ ref = le32_to_cpu(BHDR(bh)->h_refcount);
+ if (ref == 1) {
ea_bdebug(bh, "refcount now=0; freeing");
- if (ce)
- mb_cache_entry_free(ce);
+ /*
+ * This must happen under buffer lock for
+ * ext4_xattr_block_set() to reliably detect freed block
+ */
+ mb_cache_entry_delete_block(ext4_mb_cache, hash, bh->b_blocknr);
get_bh(bh);
unlock_buffer(bh);
ext4_free_blocks(handle, inode, bh, 0, 1,
EXT4_FREE_BLOCKS_METADATA |
EXT4_FREE_BLOCKS_FORGET);
} else {
- le32_add_cpu(&BHDR(bh)->h_refcount, -1);
- if (ce)
- mb_cache_entry_release(ce);
+ ref--;
+ BHDR(bh)->h_refcount = cpu_to_le32(ref);
+ if (ref == EXT4_XATTR_REFCOUNT_MAX - 1) {
+ struct mb_cache_entry *ce;
+
+ ce = mb_cache_entry_get(ext4_mb_cache, hash,
+ bh->b_blocknr);
+ if (ce) {
+ ce->e_reusable = 1;
+ mb_cache_entry_put(ext4_mb_cache, ce);
+ }
+ }
+
/*
* Beware of this ugliness: Releasing of xattr block references
* from different inodes can race and so we have to protect
@@ -790,8 +804,6 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
if (i->value && i->value_len > sb->s_blocksize)
return -ENOSPC;
if (s->base) {
- ce = mb_cache_entry_get(ext4_mb_cache, bs->bh->b_bdev,
- bs->bh->b_blocknr);
BUFFER_TRACE(bs->bh, "get_write_access");
error = ext4_journal_get_write_access(handle, bs->bh);
if (error)
@@ -799,10 +811,15 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
lock_buffer(bs->bh);
if (header(s->base)->h_refcount == cpu_to_le32(1)) {
- if (ce) {
- mb_cache_entry_free(ce);
- ce = NULL;
- }
+ __u32 hash = le32_to_cpu(BHDR(bs->bh)->h_hash);
+
+ /*
+ * This must happen under buffer lock for
+ * ext4_xattr_block_set() to reliably detect modified
+ * block
+ */
+ mb_cache_entry_delete_block(ext4_mb_cache, hash,
+ bs->bh->b_blocknr);
ea_bdebug(bs->bh, "modifying in-place");
error = ext4_xattr_set_entry(i, s);
if (!error) {
@@ -826,10 +843,6 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
int offset = (char *)s->here - bs->bh->b_data;
unlock_buffer(bs->bh);
- if (ce) {
- mb_cache_entry_release(ce);
- ce = NULL;
- }
ea_bdebug(bs->bh, "cloning");
s->base = kmalloc(bs->bh->b_size, GFP_NOFS);
error = -ENOMEM;
@@ -872,6 +885,8 @@ inserted:
if (new_bh == bs->bh)
ea_bdebug(new_bh, "keeping");
else {
+ u32 ref;
+
/* The old block is released after updating
the inode. */
error = dquot_alloc_block(inode,
@@ -884,9 +899,40 @@ inserted:
if (error)
goto cleanup_dquot;
lock_buffer(new_bh);
- le32_add_cpu(&BHDR(new_bh)->h_refcount, 1);
+ /*
+ * We have to be careful about races with
+ * freeing, rehashing or adding references to
+ * xattr block. Once we hold buffer lock xattr
+ * block's state is stable so we can check
+ * whether the block got freed / rehashed or
+ * not. Since we unhash mbcache entry under
+ * buffer lock when freeing / rehashing xattr
+ * block, checking whether entry is still
+ * hashed is reliable. Same rules hold for
+ * e_reusable handling.
+ */
+ if (hlist_bl_unhashed(&ce->e_hash_list) ||
+ !ce->e_reusable) {
+ /*
+ * Undo everything and check mbcache
+ * again.
+ */
+ unlock_buffer(new_bh);
+ dquot_free_block(inode,
+ EXT4_C2B(EXT4_SB(sb),
+ 1));
+ brelse(new_bh);
+ mb_cache_entry_put(ext4_mb_cache, ce);
+ ce = NULL;
+ new_bh = NULL;
+ goto inserted;
+ }
+ ref = le32_to_cpu(BHDR(new_bh)->h_refcount) + 1;
+ BHDR(new_bh)->h_refcount = cpu_to_le32(ref);
+ if (ref >= EXT4_XATTR_REFCOUNT_MAX)
+ ce->e_reusable = 0;
ea_bdebug(new_bh, "reusing; refcount now=%d",
- le32_to_cpu(BHDR(new_bh)->h_refcount));
+ ref);
unlock_buffer(new_bh);
error = ext4_handle_dirty_xattr_block(handle,
inode,
@@ -894,7 +940,8 @@ inserted:
if (error)
goto cleanup_dquot;
}
- mb_cache_entry_release(ce);
+ mb_cache_entry_touch(ext4_mb_cache, ce);
+ mb_cache_entry_put(ext4_mb_cache, ce);
ce = NULL;
} else if (bs->bh && s->base == bs->bh->b_data) {
/* We were modifying this block in-place. */
@@ -959,7 +1006,7 @@ getblk_failed:
cleanup:
if (ce)
- mb_cache_entry_release(ce);
+ mb_cache_entry_put(ext4_mb_cache, ce);
brelse(new_bh);
if (!(bs->bh && s->base == bs->bh->b_data))
kfree(s->base);
@@ -1070,6 +1117,17 @@ static int ext4_xattr_ibody_set(handle_t *handle, struct inode *inode,
return 0;
}
+static int ext4_xattr_value_same(struct ext4_xattr_search *s,
+ struct ext4_xattr_info *i)
+{
+ void *value;
+
+ if (le32_to_cpu(s->here->e_value_size) != i->value_len)
+ return 0;
+ value = ((void *)s->base) + le16_to_cpu(s->here->e_value_offs);
+ return !memcmp(value, i->value, i->value_len);
+}
+
/*
* ext4_xattr_set_handle()
*
@@ -1146,6 +1204,13 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
else if (!bs.s.not_found)
error = ext4_xattr_block_set(handle, inode, &i, &bs);
} else {
+ error = 0;
+ /* Xattr value did not change? Save us some work and bail out */
+ if (!is.s.not_found && ext4_xattr_value_same(&is.s, &i))
+ goto cleanup;
+ if (!bs.s.not_found && ext4_xattr_value_same(&bs.s, &i))
+ goto cleanup;
+
error = ext4_xattr_ibody_set(handle, inode, &i, &is);
if (!error && !bs.s.not_found) {
i.value = NULL;
@@ -1512,17 +1577,6 @@ cleanup:
}
/*
- * ext4_xattr_put_super()
- *
- * This is called when a file system is unmounted.
- */
-void
-ext4_xattr_put_super(struct super_block *sb)
-{
- mb_cache_shrink(sb->s_bdev);
-}
-
-/*
* ext4_xattr_cache_insert()
*
* Create a new entry in the extended attribute cache, and insert
@@ -1533,26 +1587,19 @@ ext4_xattr_put_super(struct super_block *sb)
static void
ext4_xattr_cache_insert(struct mb_cache *ext4_mb_cache, struct buffer_head *bh)
{
- __u32 hash = le32_to_cpu(BHDR(bh)->h_hash);
- struct mb_cache_entry *ce;
+ struct ext4_xattr_header *header = BHDR(bh);
+ __u32 hash = le32_to_cpu(header->h_hash);
+ int reusable = le32_to_cpu(header->h_refcount) <
+ EXT4_XATTR_REFCOUNT_MAX;
int error;
- ce = mb_cache_entry_alloc(ext4_mb_cache, GFP_NOFS);
- if (!ce) {
- ea_bdebug(bh, "out of memory");
- return;
- }
- error = mb_cache_entry_insert(ce, bh->b_bdev, bh->b_blocknr, hash);
+ error = mb_cache_entry_create(ext4_mb_cache, GFP_NOFS, hash,
+ bh->b_blocknr, reusable);
if (error) {
- mb_cache_entry_free(ce);
- if (error == -EBUSY) {
+ if (error == -EBUSY)
ea_bdebug(bh, "already in cache");
- error = 0;
- }
- } else {
+ } else
ea_bdebug(bh, "inserting [%x]", (int)hash);
- mb_cache_entry_release(ce);
- }
}
/*
@@ -1614,33 +1661,20 @@ ext4_xattr_cache_find(struct inode *inode, struct ext4_xattr_header *header,
if (!header->h_hash)
return NULL; /* never share */
ea_idebug(inode, "looking for cached blocks [%x]", (int)hash);
-again:
- ce = mb_cache_entry_find_first(ext4_mb_cache, inode->i_sb->s_bdev,
- hash);
+ ce = mb_cache_entry_find_first(ext4_mb_cache, hash);
while (ce) {
struct buffer_head *bh;
- if (IS_ERR(ce)) {
- if (PTR_ERR(ce) == -EAGAIN)
- goto again;
- break;
- }
bh = sb_bread(inode->i_sb, ce->e_block);
if (!bh) {
EXT4_ERROR_INODE(inode, "block %lu read error",
(unsigned long) ce->e_block);
- } else if (le32_to_cpu(BHDR(bh)->h_refcount) >=
- EXT4_XATTR_REFCOUNT_MAX) {
- ea_idebug(inode, "block %lu refcount %d>=%d",
- (unsigned long) ce->e_block,
- le32_to_cpu(BHDR(bh)->h_refcount),
- EXT4_XATTR_REFCOUNT_MAX);
} else if (ext4_xattr_cmp(header, BHDR(bh)) == 0) {
*pce = ce;
return bh;
}
brelse(bh);
- ce = mb_cache_entry_find_next(ce, inode->i_sb->s_bdev, hash);
+ ce = mb_cache_entry_find_next(ext4_mb_cache, ce);
}
return NULL;
}
@@ -1716,9 +1750,9 @@ static void ext4_xattr_rehash(struct ext4_xattr_header *header,
#define HASH_BUCKET_BITS 10
struct mb_cache *
-ext4_xattr_create_cache(char *name)
+ext4_xattr_create_cache(void)
{
- return mb_cache_create(name, HASH_BUCKET_BITS);
+ return mb_cache_create(HASH_BUCKET_BITS);
}
void ext4_xattr_destroy_cache(struct mb_cache *cache)
diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h
index ddc0957..69dd3e6 100644
--- a/fs/ext4/xattr.h
+++ b/fs/ext4/xattr.h
@@ -108,7 +108,6 @@ extern int ext4_xattr_set(struct inode *, int, const char *, const void *, size_
extern int ext4_xattr_set_handle(handle_t *, struct inode *, int, const char *, const void *, size_t, int);
extern void ext4_xattr_delete_inode(handle_t *, struct inode *);
-extern void ext4_xattr_put_super(struct super_block *);
extern int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
struct ext4_inode *raw_inode, handle_t *handle);
@@ -124,7 +123,7 @@ extern int ext4_xattr_ibody_inline_set(handle_t *handle, struct inode *inode,
struct ext4_xattr_info *i,
struct ext4_xattr_ibody_find *is);
-extern struct mb_cache *ext4_xattr_create_cache(char *name);
+extern struct mb_cache *ext4_xattr_create_cache(void);
extern void ext4_xattr_destroy_cache(struct mb_cache *);
#ifdef CONFIG_EXT4_FS_SECURITY
diff --git a/fs/f2fs/crypto.c b/fs/f2fs/crypto.c
index 4a62ef1..95c5cf0 100644
--- a/fs/f2fs/crypto.c
+++ b/fs/f2fs/crypto.c
@@ -23,11 +23,9 @@
* The usage of AES-XTS should conform to recommendations in NIST
* Special Publication 800-38E and IEEE P1619/D16.
*/
-#include <crypto/hash.h>
-#include <crypto/sha.h>
+#include <crypto/skcipher.h>
#include <keys/user-type.h>
#include <keys/encrypted-type.h>
-#include <linux/crypto.h>
#include <linux/ecryptfs.h>
#include <linux/gfp.h>
#include <linux/kernel.h>
@@ -328,21 +326,21 @@ static int f2fs_page_crypto(struct f2fs_crypto_ctx *ctx,
struct page *dest_page)
{
u8 xts_tweak[F2FS_XTS_TWEAK_SIZE];
- struct ablkcipher_request *req = NULL;
+ struct skcipher_request *req = NULL;
DECLARE_F2FS_COMPLETION_RESULT(ecr);
struct scatterlist dst, src;
struct f2fs_crypt_info *ci = F2FS_I(inode)->i_crypt_info;
- struct crypto_ablkcipher *tfm = ci->ci_ctfm;
+ struct crypto_skcipher *tfm = ci->ci_ctfm;
int res = 0;
- req = ablkcipher_request_alloc(tfm, GFP_NOFS);
+ req = skcipher_request_alloc(tfm, GFP_NOFS);
if (!req) {
printk_ratelimited(KERN_ERR
"%s: crypto_request_alloc() failed\n",
__func__);
return -ENOMEM;
}
- ablkcipher_request_set_callback(
+ skcipher_request_set_callback(
req, CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
f2fs_crypt_complete, &ecr);
@@ -355,21 +353,21 @@ static int f2fs_page_crypto(struct f2fs_crypto_ctx *ctx,
sg_set_page(&dst, dest_page, PAGE_CACHE_SIZE, 0);
sg_init_table(&src, 1);
sg_set_page(&src, src_page, PAGE_CACHE_SIZE, 0);
- ablkcipher_request_set_crypt(req, &src, &dst, PAGE_CACHE_SIZE,
- xts_tweak);
+ skcipher_request_set_crypt(req, &src, &dst, PAGE_CACHE_SIZE,
+ xts_tweak);
if (rw == F2FS_DECRYPT)
- res = crypto_ablkcipher_decrypt(req);
+ res = crypto_skcipher_decrypt(req);
else
- res = crypto_ablkcipher_encrypt(req);
+ res = crypto_skcipher_encrypt(req);
if (res == -EINPROGRESS || res == -EBUSY) {
BUG_ON(req->base.data != &ecr);
wait_for_completion(&ecr.completion);
res = ecr.res;
}
- ablkcipher_request_free(req);
+ skcipher_request_free(req);
if (res) {
printk_ratelimited(KERN_ERR
- "%s: crypto_ablkcipher_encrypt() returned %d\n",
+ "%s: crypto_skcipher_encrypt() returned %d\n",
__func__, res);
return res;
}
diff --git a/fs/f2fs/crypto_fname.c b/fs/f2fs/crypto_fname.c
index ab377d4..16aec66 100644
--- a/fs/f2fs/crypto_fname.c
+++ b/fs/f2fs/crypto_fname.c
@@ -15,11 +15,9 @@
*
* This has not yet undergone a rigorous security audit.
*/
-#include <crypto/hash.h>
-#include <crypto/sha.h>
+#include <crypto/skcipher.h>
#include <keys/encrypted-type.h>
#include <keys/user-type.h>
-#include <linux/crypto.h>
#include <linux/gfp.h>
#include <linux/kernel.h>
#include <linux/key.h>
@@ -70,10 +68,10 @@ static int f2fs_fname_encrypt(struct inode *inode,
const struct qstr *iname, struct f2fs_str *oname)
{
u32 ciphertext_len;
- struct ablkcipher_request *req = NULL;
+ struct skcipher_request *req = NULL;
DECLARE_F2FS_COMPLETION_RESULT(ecr);
struct f2fs_crypt_info *ci = F2FS_I(inode)->i_crypt_info;
- struct crypto_ablkcipher *tfm = ci->ci_ctfm;
+ struct crypto_skcipher *tfm = ci->ci_ctfm;
int res = 0;
char iv[F2FS_CRYPTO_BLOCK_SIZE];
struct scatterlist src_sg, dst_sg;
@@ -99,14 +97,14 @@ static int f2fs_fname_encrypt(struct inode *inode,
}
/* Allocate request */
- req = ablkcipher_request_alloc(tfm, GFP_NOFS);
+ req = skcipher_request_alloc(tfm, GFP_NOFS);
if (!req) {
printk_ratelimited(KERN_ERR
"%s: crypto_request_alloc() failed\n", __func__);
kfree(alloc_buf);
return -ENOMEM;
}
- ablkcipher_request_set_callback(req,
+ skcipher_request_set_callback(req,
CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
f2fs_dir_crypt_complete, &ecr);
@@ -121,15 +119,15 @@ static int f2fs_fname_encrypt(struct inode *inode,
/* Create encryption request */
sg_init_one(&src_sg, workbuf, ciphertext_len);
sg_init_one(&dst_sg, oname->name, ciphertext_len);
- ablkcipher_request_set_crypt(req, &src_sg, &dst_sg, ciphertext_len, iv);
- res = crypto_ablkcipher_encrypt(req);
+ skcipher_request_set_crypt(req, &src_sg, &dst_sg, ciphertext_len, iv);
+ res = crypto_skcipher_encrypt(req);
if (res == -EINPROGRESS || res == -EBUSY) {
BUG_ON(req->base.data != &ecr);
wait_for_completion(&ecr.completion);
res = ecr.res;
}
kfree(alloc_buf);
- ablkcipher_request_free(req);
+ skcipher_request_free(req);
if (res < 0) {
printk_ratelimited(KERN_ERR
"%s: Error (error code %d)\n", __func__, res);
@@ -148,11 +146,11 @@ static int f2fs_fname_encrypt(struct inode *inode,
static int f2fs_fname_decrypt(struct inode *inode,
const struct f2fs_str *iname, struct f2fs_str *oname)
{
- struct ablkcipher_request *req = NULL;
+ struct skcipher_request *req = NULL;
DECLARE_F2FS_COMPLETION_RESULT(ecr);
struct scatterlist src_sg, dst_sg;
struct f2fs_crypt_info *ci = F2FS_I(inode)->i_crypt_info;
- struct crypto_ablkcipher *tfm = ci->ci_ctfm;
+ struct crypto_skcipher *tfm = ci->ci_ctfm;
int res = 0;
char iv[F2FS_CRYPTO_BLOCK_SIZE];
unsigned lim = max_name_len(inode);
@@ -161,13 +159,13 @@ static int f2fs_fname_decrypt(struct inode *inode,
return -EIO;
/* Allocate request */
- req = ablkcipher_request_alloc(tfm, GFP_NOFS);
+ req = skcipher_request_alloc(tfm, GFP_NOFS);
if (!req) {
printk_ratelimited(KERN_ERR
"%s: crypto_request_alloc() failed\n", __func__);
return -ENOMEM;
}
- ablkcipher_request_set_callback(req,
+ skcipher_request_set_callback(req,
CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
f2fs_dir_crypt_complete, &ecr);
@@ -177,14 +175,14 @@ static int f2fs_fname_decrypt(struct inode *inode,
/* Create decryption request */
sg_init_one(&src_sg, iname->name, iname->len);
sg_init_one(&dst_sg, oname->name, oname->len);
- ablkcipher_request_set_crypt(req, &src_sg, &dst_sg, iname->len, iv);
- res = crypto_ablkcipher_decrypt(req);
+ skcipher_request_set_crypt(req, &src_sg, &dst_sg, iname->len, iv);
+ res = crypto_skcipher_decrypt(req);
if (res == -EINPROGRESS || res == -EBUSY) {
BUG_ON(req->base.data != &ecr);
wait_for_completion(&ecr.completion);
res = ecr.res;
}
- ablkcipher_request_free(req);
+ skcipher_request_free(req);
if (res < 0) {
printk_ratelimited(KERN_ERR
"%s: Error in f2fs_fname_decrypt (error code %d)\n",
diff --git a/fs/f2fs/crypto_key.c b/fs/f2fs/crypto_key.c
index 5de2d86..2aeb627 100644
--- a/fs/f2fs/crypto_key.c
+++ b/fs/f2fs/crypto_key.c
@@ -14,7 +14,7 @@
#include <linux/random.h>
#include <linux/scatterlist.h>
#include <uapi/linux/keyctl.h>
-#include <crypto/hash.h>
+#include <crypto/skcipher.h>
#include <linux/f2fs_fs.h>
#include "f2fs.h"
@@ -44,46 +44,43 @@ static int f2fs_derive_key_aes(char deriving_key[F2FS_AES_128_ECB_KEY_SIZE],
char derived_key[F2FS_AES_256_XTS_KEY_SIZE])
{
int res = 0;
- struct ablkcipher_request *req = NULL;
+ struct skcipher_request *req = NULL;
DECLARE_F2FS_COMPLETION_RESULT(ecr);
struct scatterlist src_sg, dst_sg;
- struct crypto_ablkcipher *tfm = crypto_alloc_ablkcipher("ecb(aes)", 0,
- 0);
+ struct crypto_skcipher *tfm = crypto_alloc_skcipher("ecb(aes)", 0, 0);
if (IS_ERR(tfm)) {
res = PTR_ERR(tfm);
tfm = NULL;
goto out;
}
- crypto_ablkcipher_set_flags(tfm, CRYPTO_TFM_REQ_WEAK_KEY);
- req = ablkcipher_request_alloc(tfm, GFP_NOFS);
+ crypto_skcipher_set_flags(tfm, CRYPTO_TFM_REQ_WEAK_KEY);
+ req = skcipher_request_alloc(tfm, GFP_NOFS);
if (!req) {
res = -ENOMEM;
goto out;
}
- ablkcipher_request_set_callback(req,
+ skcipher_request_set_callback(req,
CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
derive_crypt_complete, &ecr);
- res = crypto_ablkcipher_setkey(tfm, deriving_key,
+ res = crypto_skcipher_setkey(tfm, deriving_key,
F2FS_AES_128_ECB_KEY_SIZE);
if (res < 0)
goto out;
sg_init_one(&src_sg, source_key, F2FS_AES_256_XTS_KEY_SIZE);
sg_init_one(&dst_sg, derived_key, F2FS_AES_256_XTS_KEY_SIZE);
- ablkcipher_request_set_crypt(req, &src_sg, &dst_sg,
+ skcipher_request_set_crypt(req, &src_sg, &dst_sg,
F2FS_AES_256_XTS_KEY_SIZE, NULL);
- res = crypto_ablkcipher_encrypt(req);
+ res = crypto_skcipher_encrypt(req);
if (res == -EINPROGRESS || res == -EBUSY) {
BUG_ON(req->base.data != &ecr);
wait_for_completion(&ecr.completion);
res = ecr.res;
}
out:
- if (req)
- ablkcipher_request_free(req);
- if (tfm)
- crypto_free_ablkcipher(tfm);
+ skcipher_request_free(req);
+ crypto_free_skcipher(tfm);
return res;
}
@@ -93,7 +90,7 @@ static void f2fs_free_crypt_info(struct f2fs_crypt_info *ci)
return;
key_put(ci->ci_keyring_key);
- crypto_free_ablkcipher(ci->ci_ctfm);
+ crypto_free_skcipher(ci->ci_ctfm);
kmem_cache_free(f2fs_crypt_info_cachep, ci);
}
@@ -123,7 +120,7 @@ int _f2fs_get_encryption_info(struct inode *inode)
struct f2fs_encryption_key *master_key;
struct f2fs_encryption_context ctx;
const struct user_key_payload *ukp;
- struct crypto_ablkcipher *ctfm;
+ struct crypto_skcipher *ctfm;
const char *cipher_str;
char raw_key[F2FS_MAX_KEY_SIZE];
char mode;
@@ -213,7 +210,7 @@ retry:
if (res)
goto out;
- ctfm = crypto_alloc_ablkcipher(cipher_str, 0, 0);
+ ctfm = crypto_alloc_skcipher(cipher_str, 0, 0);
if (!ctfm || IS_ERR(ctfm)) {
res = ctfm ? PTR_ERR(ctfm) : -ENOMEM;
printk(KERN_DEBUG
@@ -222,11 +219,10 @@ retry:
goto out;
}
crypt_info->ci_ctfm = ctfm;
- crypto_ablkcipher_clear_flags(ctfm, ~0);
- crypto_tfm_set_flags(crypto_ablkcipher_tfm(ctfm),
- CRYPTO_TFM_REQ_WEAK_KEY);
- res = crypto_ablkcipher_setkey(ctfm, raw_key,
- f2fs_encryption_key_size(mode));
+ crypto_skcipher_clear_flags(ctfm, ~0);
+ crypto_skcipher_set_flags(ctfm, CRYPTO_TFM_REQ_WEAK_KEY);
+ res = crypto_skcipher_setkey(ctfm, raw_key,
+ f2fs_encryption_key_size(mode));
if (res)
goto out;
diff --git a/fs/f2fs/f2fs_crypto.h b/fs/f2fs/f2fs_crypto.h
index c2c1c2b..ea3d1d7 100644
--- a/fs/f2fs/f2fs_crypto.h
+++ b/fs/f2fs/f2fs_crypto.h
@@ -78,7 +78,7 @@ struct f2fs_crypt_info {
char ci_data_mode;
char ci_filename_mode;
char ci_flags;
- struct crypto_ablkcipher *ci_ctfm;
+ struct crypto_skcipher *ci_ctfm;
struct key *ci_keyring_key;
char ci_master_key[F2FS_KEY_DESCRIPTOR_SIZE];
};
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 93f0746..aa016e4 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -1082,7 +1082,7 @@ static ssize_t gfs2_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
* the first place, mapping->nr_pages will always be zero.
*/
if (mapping->nrpages) {
- loff_t lstart = offset & (PAGE_CACHE_SIZE - 1);
+ loff_t lstart = offset & ~(PAGE_CACHE_SIZE - 1);
loff_t len = iov_iter_count(iter);
loff_t end = PAGE_ALIGN(offset + len) - 1;
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index 6a92592..4a01f30 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -798,7 +798,7 @@ static int get_first_leaf(struct gfs2_inode *dip, u32 index,
int error;
error = get_leaf_nr(dip, index, &leaf_no);
- if (!error)
+ if (!IS_ERR_VALUE(error))
error = get_leaf(dip, leaf_no, bh_out);
return error;
@@ -1014,7 +1014,7 @@ static int dir_split_leaf(struct inode *inode, const struct qstr *name)
index = name->hash >> (32 - dip->i_depth);
error = get_leaf_nr(dip, index, &leaf_no);
- if (error)
+ if (IS_ERR_VALUE(error))
return error;
/* Get the old leaf block */
@@ -1660,7 +1660,7 @@ struct inode *gfs2_dir_search(struct inode *dir, const struct qstr *name,
brelse(bh);
if (fail_on_exist)
return ERR_PTR(-EEXIST);
- inode = gfs2_inode_lookup(dir->i_sb, dtype, addr, formal_ino, 0);
+ inode = gfs2_inode_lookup(dir->i_sb, dtype, addr, formal_ino);
if (!IS_ERR(inode))
GFS2_I(inode)->i_rahead = rahead;
return inode;
diff --git a/fs/gfs2/export.c b/fs/gfs2/export.c
index 5d15e94..d5bda85 100644
--- a/fs/gfs2/export.c
+++ b/fs/gfs2/export.c
@@ -137,7 +137,7 @@ static struct dentry *gfs2_get_dentry(struct super_block *sb,
struct gfs2_sbd *sdp = sb->s_fs_info;
struct inode *inode;
- inode = gfs2_ilookup(sb, inum->no_addr, 0);
+ inode = gfs2_ilookup(sb, inum->no_addr);
if (inode) {
if (GFS2_I(inode)->i_no_formal_ino != inum->no_formal_ino) {
iput(inode);
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index a4ff7b5..6539131 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -572,17 +572,24 @@ static void delete_work_func(struct work_struct *work)
struct inode *inode;
u64 no_addr = gl->gl_name.ln_number;
+ /* If someone's using this glock to create a new dinode, the block must
+ have been freed by another node, then re-used, in which case our
+ iopen callback is too late after the fact. Ignore it. */
+ if (test_bit(GLF_INODE_CREATING, &gl->gl_flags))
+ goto out;
+
ip = gl->gl_object;
/* Note: Unsafe to dereference ip as we don't hold right refs/locks */
if (ip)
- inode = gfs2_ilookup(sdp->sd_vfs, no_addr, 1);
+ inode = gfs2_ilookup(sdp->sd_vfs, no_addr);
else
inode = gfs2_lookup_by_inum(sdp, no_addr, NULL, GFS2_BLKST_UNLINKED);
if (inode && !IS_ERR(inode)) {
d_prune_aliases(inode);
iput(inode);
}
+out:
gfs2_glock_put(gl);
}
@@ -1015,6 +1022,7 @@ void gfs2_glock_dq(struct gfs2_holder *gh)
handle_callback(gl, LM_ST_UNLOCKED, 0, false);
list_del_init(&gh->gh_list);
+ clear_bit(HIF_HOLDER, &gh->gh_iflags);
if (find_first_holder(gl) == NULL) {
if (glops->go_unlock) {
GLOCK_BUG_ON(gl, test_and_set_bit(GLF_LOCK, &gl->gl_flags));
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 845fb09..a6a3389 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -328,6 +328,7 @@ enum {
GLF_LRU = 13,
GLF_OBJECT = 14, /* Used only for tracing */
GLF_BLOCKING = 15,
+ GLF_INODE_CREATING = 16, /* Inode creation occurring */
};
struct gfs2_glock {
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 352f958..bb30f9a 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -37,61 +37,9 @@
#include "super.h"
#include "glops.h"
-struct gfs2_skip_data {
- u64 no_addr;
- int skipped;
- int non_block;
-};
-
-static int iget_test(struct inode *inode, void *opaque)
-{
- struct gfs2_inode *ip = GFS2_I(inode);
- struct gfs2_skip_data *data = opaque;
-
- if (ip->i_no_addr == data->no_addr) {
- if (data->non_block &&
- inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)) {
- data->skipped = 1;
- return 0;
- }
- return 1;
- }
- return 0;
-}
-
-static int iget_set(struct inode *inode, void *opaque)
-{
- struct gfs2_inode *ip = GFS2_I(inode);
- struct gfs2_skip_data *data = opaque;
-
- if (data->skipped)
- return -ENOENT;
- inode->i_ino = (unsigned long)(data->no_addr);
- ip->i_no_addr = data->no_addr;
- return 0;
-}
-
-struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr, int non_block)
+struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr)
{
- unsigned long hash = (unsigned long)no_addr;
- struct gfs2_skip_data data;
-
- data.no_addr = no_addr;
- data.skipped = 0;
- data.non_block = non_block;
- return ilookup5(sb, hash, iget_test, &data);
-}
-
-static struct inode *gfs2_iget(struct super_block *sb, u64 no_addr,
- int non_block)
-{
- struct gfs2_skip_data data;
- unsigned long hash = (unsigned long)no_addr;
-
- data.no_addr = no_addr;
- data.skipped = 0;
- data.non_block = non_block;
- return iget5_locked(sb, hash, iget_test, iget_set, &data);
+ return ilookup(sb, (unsigned long)no_addr);
}
/**
@@ -132,21 +80,21 @@ static void gfs2_set_iop(struct inode *inode)
* @sb: The super block
* @no_addr: The inode number
* @type: The type of the inode
- * non_block: Can we block on inodes that are being freed?
*
* Returns: A VFS inode, or an error
*/
struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type,
- u64 no_addr, u64 no_formal_ino, int non_block)
+ u64 no_addr, u64 no_formal_ino)
{
struct inode *inode;
struct gfs2_inode *ip;
struct gfs2_glock *io_gl = NULL;
int error;
- inode = gfs2_iget(sb, no_addr, non_block);
+ inode = iget_locked(sb, (unsigned long)no_addr);
ip = GFS2_I(inode);
+ ip->i_no_addr = no_addr;
if (!inode)
return ERR_PTR(-ENOMEM);
@@ -221,7 +169,7 @@ struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr,
if (error)
goto fail;
- inode = gfs2_inode_lookup(sb, DT_UNKNOWN, no_addr, 0, 1);
+ inode = gfs2_inode_lookup(sb, DT_UNKNOWN, no_addr, 0);
if (IS_ERR(inode))
goto fail;
@@ -592,7 +540,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
struct inode *inode = NULL;
struct gfs2_inode *dip = GFS2_I(dir), *ip;
struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
- struct gfs2_glock *io_gl;
+ struct gfs2_glock *io_gl = NULL;
int error, free_vfs_inode = 1;
u32 aflags = 0;
unsigned blocks = 1;
@@ -729,6 +677,8 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
if (error)
goto fail_gunlock2;
+ BUG_ON(test_and_set_bit(GLF_INODE_CREATING, &io_gl->gl_flags));
+
error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, &ip->i_iopen_gh);
if (error)
goto fail_gunlock2;
@@ -771,12 +721,15 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
}
gfs2_glock_dq_uninit(ghs);
gfs2_glock_dq_uninit(ghs + 1);
+ clear_bit(GLF_INODE_CREATING, &io_gl->gl_flags);
return error;
fail_gunlock3:
gfs2_glock_dq_uninit(&ip->i_iopen_gh);
gfs2_glock_put(io_gl);
fail_gunlock2:
+ if (io_gl)
+ clear_bit(GLF_INODE_CREATING, &io_gl->gl_flags);
gfs2_glock_dq_uninit(ghs + 1);
fail_free_inode:
if (ip->i_gl)
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h
index ba4d949..e1af0d4 100644
--- a/fs/gfs2/inode.h
+++ b/fs/gfs2/inode.h
@@ -94,12 +94,11 @@ err:
}
extern struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type,
- u64 no_addr, u64 no_formal_ino,
- int non_block);
+ u64 no_addr, u64 no_formal_ino);
extern struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr,
u64 *no_formal_ino,
unsigned int blktype);
-extern struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr, int nonblock);
+extern struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr);
extern int gfs2_inode_refresh(struct gfs2_inode *ip);
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index dbed9e2..49b0bff 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -454,7 +454,7 @@ static int gfs2_lookup_root(struct super_block *sb, struct dentry **dptr,
struct dentry *dentry;
struct inode *inode;
- inode = gfs2_inode_lookup(sb, DT_DIR, no_addr, 0, 0);
+ inode = gfs2_inode_lookup(sb, DT_DIR, no_addr, 0);
if (IS_ERR(inode)) {
fs_err(sdp, "can't read in %s inode: %ld\n", name, PTR_ERR(inode));
return PTR_ERR(inode);
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 8f960a5..f8a0cd8 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -1551,12 +1551,16 @@ static void gfs2_evict_inode(struct inode *inode)
goto out_truncate;
}
- ip->i_iopen_gh.gh_flags |= GL_NOCACHE;
- gfs2_glock_dq_wait(&ip->i_iopen_gh);
- gfs2_holder_reinit(LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB | GL_NOCACHE, &ip->i_iopen_gh);
- error = gfs2_glock_nq(&ip->i_iopen_gh);
- if (error)
- goto out_truncate;
+ if (ip->i_iopen_gh.gh_gl &&
+ test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) {
+ ip->i_iopen_gh.gh_flags |= GL_NOCACHE;
+ gfs2_glock_dq_wait(&ip->i_iopen_gh);
+ gfs2_holder_reinit(LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB | GL_NOCACHE,
+ &ip->i_iopen_gh);
+ error = gfs2_glock_nq(&ip->i_iopen_gh);
+ if (error)
+ goto out_truncate;
+ }
/* Case 1 starts here */
@@ -1606,11 +1610,13 @@ out_unlock:
if (gfs2_rs_active(&ip->i_res))
gfs2_rs_deltree(&ip->i_res);
- if (test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) {
- ip->i_iopen_gh.gh_flags |= GL_NOCACHE;
- gfs2_glock_dq_wait(&ip->i_iopen_gh);
+ if (ip->i_iopen_gh.gh_gl) {
+ if (test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) {
+ ip->i_iopen_gh.gh_flags |= GL_NOCACHE;
+ gfs2_glock_dq_wait(&ip->i_iopen_gh);
+ }
+ gfs2_holder_uninit(&ip->i_iopen_gh);
}
- gfs2_holder_uninit(&ip->i_iopen_gh);
gfs2_glock_dq_uninit(&gh);
if (error && error != GLR_TRYFAILED && error != -EROFS)
fs_warn(sdp, "gfs2_evict_inode: %d\n", error);
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 36345fe..517f2de 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -131,14 +131,12 @@ static int journal_submit_commit_record(journal_t *journal,
if (is_journal_aborted(journal))
return 0;
- bh = jbd2_journal_get_descriptor_buffer(journal);
+ bh = jbd2_journal_get_descriptor_buffer(commit_transaction,
+ JBD2_COMMIT_BLOCK);
if (!bh)
return 1;
tmp = (struct commit_header *)bh->b_data;
- tmp->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER);
- tmp->h_blocktype = cpu_to_be32(JBD2_COMMIT_BLOCK);
- tmp->h_sequence = cpu_to_be32(commit_transaction->t_tid);
tmp->h_commit_sec = cpu_to_be64(now.tv_sec);
tmp->h_commit_nsec = cpu_to_be32(now.tv_nsec);
@@ -222,7 +220,7 @@ static int journal_submit_data_buffers(journal_t *journal,
spin_lock(&journal->j_list_lock);
list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) {
mapping = jinode->i_vfs_inode->i_mapping;
- set_bit(__JI_COMMIT_RUNNING, &jinode->i_flags);
+ jinode->i_flags |= JI_COMMIT_RUNNING;
spin_unlock(&journal->j_list_lock);
/*
* submit the inode data buffers. We use writepage
@@ -236,8 +234,8 @@ static int journal_submit_data_buffers(journal_t *journal,
ret = err;
spin_lock(&journal->j_list_lock);
J_ASSERT(jinode->i_transaction == commit_transaction);
- clear_bit(__JI_COMMIT_RUNNING, &jinode->i_flags);
- smp_mb__after_atomic();
+ jinode->i_flags &= ~JI_COMMIT_RUNNING;
+ smp_mb();
wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
}
spin_unlock(&journal->j_list_lock);
@@ -258,7 +256,7 @@ static int journal_finish_inode_data_buffers(journal_t *journal,
/* For locking, see the comment in journal_submit_data_buffers() */
spin_lock(&journal->j_list_lock);
list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) {
- set_bit(__JI_COMMIT_RUNNING, &jinode->i_flags);
+ jinode->i_flags |= JI_COMMIT_RUNNING;
spin_unlock(&journal->j_list_lock);
err = filemap_fdatawait(jinode->i_vfs_inode->i_mapping);
if (err) {
@@ -274,8 +272,8 @@ static int journal_finish_inode_data_buffers(journal_t *journal,
ret = err;
}
spin_lock(&journal->j_list_lock);
- clear_bit(__JI_COMMIT_RUNNING, &jinode->i_flags);
- smp_mb__after_atomic();
+ jinode->i_flags &= ~JI_COMMIT_RUNNING;
+ smp_mb();
wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
}
@@ -319,22 +317,6 @@ static void write_tag_block(journal_t *j, journal_block_tag_t *tag,
tag->t_blocknr_high = cpu_to_be32((block >> 31) >> 1);
}
-static void jbd2_descr_block_csum_set(journal_t *j,
- struct buffer_head *bh)
-{
- struct jbd2_journal_block_tail *tail;
- __u32 csum;
-
- if (!jbd2_journal_has_csum_v2or3(j))
- return;
-
- tail = (struct jbd2_journal_block_tail *)(bh->b_data + j->j_blocksize -
- sizeof(struct jbd2_journal_block_tail));
- tail->t_checksum = 0;
- csum = jbd2_chksum(j, j->j_csum_seed, bh->b_data, j->j_blocksize);
- tail->t_checksum = cpu_to_be32(csum);
-}
-
static void jbd2_block_tag_csum_set(journal_t *j, journal_block_tag_t *tag,
struct buffer_head *bh, __u32 sequence)
{
@@ -379,7 +361,6 @@ void jbd2_journal_commit_transaction(journal_t *journal)
ktime_t start_time;
u64 commit_time;
char *tagp = NULL;
- journal_header_t *header;
journal_block_tag_t *tag = NULL;
int space_left = 0;
int first_tag = 0;
@@ -554,8 +535,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
jbd2_journal_abort(journal, err);
blk_start_plug(&plug);
- jbd2_journal_write_revoke_records(journal, commit_transaction,
- &log_bufs, WRITE_SYNC);
+ jbd2_journal_write_revoke_records(commit_transaction, &log_bufs);
jbd_debug(3, "JBD2: commit phase 2b\n");
@@ -616,7 +596,9 @@ void jbd2_journal_commit_transaction(journal_t *journal)
jbd_debug(4, "JBD2: get descriptor\n");
- descriptor = jbd2_journal_get_descriptor_buffer(journal);
+ descriptor = jbd2_journal_get_descriptor_buffer(
+ commit_transaction,
+ JBD2_DESCRIPTOR_BLOCK);
if (!descriptor) {
jbd2_journal_abort(journal, -EIO);
continue;
@@ -625,11 +607,6 @@ void jbd2_journal_commit_transaction(journal_t *journal)
jbd_debug(4, "JBD2: got buffer %llu (%p)\n",
(unsigned long long)descriptor->b_blocknr,
descriptor->b_data);
- header = (journal_header_t *)descriptor->b_data;
- header->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER);
- header->h_blocktype = cpu_to_be32(JBD2_DESCRIPTOR_BLOCK);
- header->h_sequence = cpu_to_be32(commit_transaction->t_tid);
-
tagp = &descriptor->b_data[sizeof(journal_header_t)];
space_left = descriptor->b_size -
sizeof(journal_header_t);
@@ -721,7 +698,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
tag->t_flags |= cpu_to_be16(JBD2_FLAG_LAST_TAG);
- jbd2_descr_block_csum_set(journal, descriptor);
+ jbd2_descriptor_block_csum_set(journal, descriptor);
start_journal_io:
for (i = 0; i < bufs; i++) {
struct buffer_head *bh = wbuf[i];
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 81e6226..de73a95 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -805,10 +805,13 @@ int jbd2_journal_bmap(journal_t *journal, unsigned long blocknr,
* But we don't bother doing that, so there will be coherency problems with
* mmaps of blockdevs which hold live JBD-controlled filesystems.
*/
-struct buffer_head *jbd2_journal_get_descriptor_buffer(journal_t *journal)
+struct buffer_head *
+jbd2_journal_get_descriptor_buffer(transaction_t *transaction, int type)
{
+ journal_t *journal = transaction->t_journal;
struct buffer_head *bh;
unsigned long long blocknr;
+ journal_header_t *header;
int err;
err = jbd2_journal_next_log_block(journal, &blocknr);
@@ -821,12 +824,31 @@ struct buffer_head *jbd2_journal_get_descriptor_buffer(journal_t *journal)
return NULL;
lock_buffer(bh);
memset(bh->b_data, 0, journal->j_blocksize);
+ header = (journal_header_t *)bh->b_data;
+ header->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER);
+ header->h_blocktype = cpu_to_be32(type);
+ header->h_sequence = cpu_to_be32(transaction->t_tid);
set_buffer_uptodate(bh);
unlock_buffer(bh);
BUFFER_TRACE(bh, "return this buffer");
return bh;
}
+void jbd2_descriptor_block_csum_set(journal_t *j, struct buffer_head *bh)
+{
+ struct jbd2_journal_block_tail *tail;
+ __u32 csum;
+
+ if (!jbd2_journal_has_csum_v2or3(j))
+ return;
+
+ tail = (struct jbd2_journal_block_tail *)(bh->b_data + j->j_blocksize -
+ sizeof(struct jbd2_journal_block_tail));
+ tail->t_checksum = 0;
+ csum = jbd2_chksum(j, j->j_csum_seed, bh->b_data, j->j_blocksize);
+ tail->t_checksum = cpu_to_be32(csum);
+}
+
/*
* Return tid of the oldest transaction in the journal and block in the journal
* where the transaction starts.
@@ -1408,11 +1430,12 @@ out:
/**
* jbd2_mark_journal_empty() - Mark on disk journal as empty.
* @journal: The journal to update.
+ * @write_op: With which operation should we write the journal sb
*
* Update a journal's dynamic superblock fields to show that journal is empty.
* Write updated superblock to disk waiting for IO to complete.
*/
-static void jbd2_mark_journal_empty(journal_t *journal)
+static void jbd2_mark_journal_empty(journal_t *journal, int write_op)
{
journal_superblock_t *sb = journal->j_superblock;
@@ -1430,7 +1453,7 @@ static void jbd2_mark_journal_empty(journal_t *journal)
sb->s_start = cpu_to_be32(0);
read_unlock(&journal->j_state_lock);
- jbd2_write_superblock(journal, WRITE_FUA);
+ jbd2_write_superblock(journal, write_op);
/* Log is no longer empty */
write_lock(&journal->j_state_lock);
@@ -1716,7 +1739,13 @@ int jbd2_journal_destroy(journal_t *journal)
if (journal->j_sb_buffer) {
if (!is_journal_aborted(journal)) {
mutex_lock(&journal->j_checkpoint_mutex);
- jbd2_mark_journal_empty(journal);
+
+ write_lock(&journal->j_state_lock);
+ journal->j_tail_sequence =
+ ++journal->j_transaction_sequence;
+ write_unlock(&journal->j_state_lock);
+
+ jbd2_mark_journal_empty(journal, WRITE_FLUSH_FUA);
mutex_unlock(&journal->j_checkpoint_mutex);
} else
err = -EIO;
@@ -1975,7 +2004,7 @@ int jbd2_journal_flush(journal_t *journal)
* the magic code for a fully-recovered superblock. Any future
* commits of data to the journal will restore the current
* s_start value. */
- jbd2_mark_journal_empty(journal);
+ jbd2_mark_journal_empty(journal, WRITE_FUA);
mutex_unlock(&journal->j_checkpoint_mutex);
write_lock(&journal->j_state_lock);
J_ASSERT(!journal->j_running_transaction);
@@ -2021,7 +2050,7 @@ int jbd2_journal_wipe(journal_t *journal, int write)
if (write) {
/* Lock to make assertions happy... */
mutex_lock(&journal->j_checkpoint_mutex);
- jbd2_mark_journal_empty(journal);
+ jbd2_mark_journal_empty(journal, WRITE_FUA);
mutex_unlock(&journal->j_checkpoint_mutex);
}
@@ -2565,7 +2594,7 @@ void jbd2_journal_release_jbd_inode(journal_t *journal,
restart:
spin_lock(&journal->j_list_lock);
/* Is commit writing out inode - we have to wait */
- if (test_bit(__JI_COMMIT_RUNNING, &jinode->i_flags)) {
+ if (jinode->i_flags & JI_COMMIT_RUNNING) {
wait_queue_head_t *wq;
DEFINE_WAIT_BIT(wait, &jinode->i_flags, __JI_COMMIT_RUNNING);
wq = bit_waitqueue(&jinode->i_flags, __JI_COMMIT_RUNNING);
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
index 7f277e4..08a456b 100644
--- a/fs/jbd2/recovery.c
+++ b/fs/jbd2/recovery.c
@@ -174,8 +174,7 @@ static int jread(struct buffer_head **bhp, journal_t *journal,
return 0;
}
-static int jbd2_descr_block_csum_verify(journal_t *j,
- void *buf)
+static int jbd2_descriptor_block_csum_verify(journal_t *j, void *buf)
{
struct jbd2_journal_block_tail *tail;
__be32 provided;
@@ -522,8 +521,8 @@ static int do_one_pass(journal_t *journal,
descr_csum_size =
sizeof(struct jbd2_journal_block_tail);
if (descr_csum_size > 0 &&
- !jbd2_descr_block_csum_verify(journal,
- bh->b_data)) {
+ !jbd2_descriptor_block_csum_verify(journal,
+ bh->b_data)) {
printk(KERN_ERR "JBD2: Invalid checksum "
"recovering block %lu in log\n",
next_log_block);
@@ -811,26 +810,6 @@ static int do_one_pass(journal_t *journal,
return err;
}
-static int jbd2_revoke_block_csum_verify(journal_t *j,
- void *buf)
-{
- struct jbd2_journal_revoke_tail *tail;
- __be32 provided;
- __u32 calculated;
-
- if (!jbd2_journal_has_csum_v2or3(j))
- return 1;
-
- tail = (struct jbd2_journal_revoke_tail *)(buf + j->j_blocksize -
- sizeof(struct jbd2_journal_revoke_tail));
- provided = tail->r_checksum;
- tail->r_checksum = 0;
- calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize);
- tail->r_checksum = provided;
-
- return provided == cpu_to_be32(calculated);
-}
-
/* Scan a revoke record, marking all blocks mentioned as revoked. */
static int scan_revoke_records(journal_t *journal, struct buffer_head *bh,
@@ -846,11 +825,11 @@ static int scan_revoke_records(journal_t *journal, struct buffer_head *bh,
offset = sizeof(jbd2_journal_revoke_header_t);
rcount = be32_to_cpu(header->r_count);
- if (!jbd2_revoke_block_csum_verify(journal, header))
+ if (!jbd2_descriptor_block_csum_verify(journal, header))
return -EFSBADCRC;
if (jbd2_journal_has_csum_v2or3(journal))
- csum_size = sizeof(struct jbd2_journal_revoke_tail);
+ csum_size = sizeof(struct jbd2_journal_block_tail);
if (rcount > journal->j_blocksize - csum_size)
return -EINVAL;
max = rcount;
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c
index 705ae57..91171dc 100644
--- a/fs/jbd2/revoke.c
+++ b/fs/jbd2/revoke.c
@@ -122,11 +122,11 @@ struct jbd2_revoke_table_s
#ifdef __KERNEL__
-static void write_one_revoke_record(journal_t *, transaction_t *,
+static void write_one_revoke_record(transaction_t *,
struct list_head *,
struct buffer_head **, int *,
- struct jbd2_revoke_record_s *, int);
-static void flush_descriptor(journal_t *, struct buffer_head *, int, int);
+ struct jbd2_revoke_record_s *);
+static void flush_descriptor(journal_t *, struct buffer_head *, int);
#endif
/* Utility functions to maintain the revoke table */
@@ -519,11 +519,10 @@ void jbd2_journal_switch_revoke_table(journal_t *journal)
* Write revoke records to the journal for all entries in the current
* revoke hash, deleting the entries as we go.
*/
-void jbd2_journal_write_revoke_records(journal_t *journal,
- transaction_t *transaction,
- struct list_head *log_bufs,
- int write_op)
+void jbd2_journal_write_revoke_records(transaction_t *transaction,
+ struct list_head *log_bufs)
{
+ journal_t *journal = transaction->t_journal;
struct buffer_head *descriptor;
struct jbd2_revoke_record_s *record;
struct jbd2_revoke_table_s *revoke;
@@ -544,16 +543,15 @@ void jbd2_journal_write_revoke_records(journal_t *journal,
while (!list_empty(hash_list)) {
record = (struct jbd2_revoke_record_s *)
hash_list->next;
- write_one_revoke_record(journal, transaction, log_bufs,
- &descriptor, &offset,
- record, write_op);
+ write_one_revoke_record(transaction, log_bufs,
+ &descriptor, &offset, record);
count++;
list_del(&record->hash);
kmem_cache_free(jbd2_revoke_record_cache, record);
}
}
if (descriptor)
- flush_descriptor(journal, descriptor, offset, write_op);
+ flush_descriptor(journal, descriptor, offset);
jbd_debug(1, "Wrote %d revoke records\n", count);
}
@@ -562,18 +560,16 @@ void jbd2_journal_write_revoke_records(journal_t *journal,
* block if the old one is full or if we have not already created one.
*/
-static void write_one_revoke_record(journal_t *journal,
- transaction_t *transaction,
+static void write_one_revoke_record(transaction_t *transaction,
struct list_head *log_bufs,
struct buffer_head **descriptorp,
int *offsetp,
- struct jbd2_revoke_record_s *record,
- int write_op)
+ struct jbd2_revoke_record_s *record)
{
+ journal_t *journal = transaction->t_journal;
int csum_size = 0;
struct buffer_head *descriptor;
int sz, offset;
- journal_header_t *header;
/* If we are already aborting, this all becomes a noop. We
still need to go round the loop in
@@ -587,7 +583,7 @@ static void write_one_revoke_record(journal_t *journal,
/* Do we need to leave space at the end for a checksum? */
if (jbd2_journal_has_csum_v2or3(journal))
- csum_size = sizeof(struct jbd2_journal_revoke_tail);
+ csum_size = sizeof(struct jbd2_journal_block_tail);
if (jbd2_has_feature_64bit(journal))
sz = 8;
@@ -597,19 +593,16 @@ static void write_one_revoke_record(journal_t *journal,
/* Make sure we have a descriptor with space left for the record */
if (descriptor) {
if (offset + sz > journal->j_blocksize - csum_size) {
- flush_descriptor(journal, descriptor, offset, write_op);
+ flush_descriptor(journal, descriptor, offset);
descriptor = NULL;
}
}
if (!descriptor) {
- descriptor = jbd2_journal_get_descriptor_buffer(journal);
+ descriptor = jbd2_journal_get_descriptor_buffer(transaction,
+ JBD2_REVOKE_BLOCK);
if (!descriptor)
return;
- header = (journal_header_t *)descriptor->b_data;
- header->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER);
- header->h_blocktype = cpu_to_be32(JBD2_REVOKE_BLOCK);
- header->h_sequence = cpu_to_be32(transaction->t_tid);
/* Record it so that we can wait for IO completion later */
BUFFER_TRACE(descriptor, "file in log_bufs");
@@ -630,21 +623,6 @@ static void write_one_revoke_record(journal_t *journal,
*offsetp = offset;
}
-static void jbd2_revoke_csum_set(journal_t *j, struct buffer_head *bh)
-{
- struct jbd2_journal_revoke_tail *tail;
- __u32 csum;
-
- if (!jbd2_journal_has_csum_v2or3(j))
- return;
-
- tail = (struct jbd2_journal_revoke_tail *)(bh->b_data + j->j_blocksize -
- sizeof(struct jbd2_journal_revoke_tail));
- tail->r_checksum = 0;
- csum = jbd2_chksum(j, j->j_csum_seed, bh->b_data, j->j_blocksize);
- tail->r_checksum = cpu_to_be32(csum);
-}
-
/*
* Flush a revoke descriptor out to the journal. If we are aborting,
* this is a noop; otherwise we are generating a buffer which needs to
@@ -654,7 +632,7 @@ static void jbd2_revoke_csum_set(journal_t *j, struct buffer_head *bh)
static void flush_descriptor(journal_t *journal,
struct buffer_head *descriptor,
- int offset, int write_op)
+ int offset)
{
jbd2_journal_revoke_header_t *header;
@@ -665,12 +643,12 @@ static void flush_descriptor(journal_t *journal,
header = (jbd2_journal_revoke_header_t *)descriptor->b_data;
header->r_count = cpu_to_be32(offset);
- jbd2_revoke_csum_set(journal, descriptor);
+ jbd2_descriptor_block_csum_set(journal, descriptor);
set_buffer_jwrite(descriptor);
BUFFER_TRACE(descriptor, "write");
set_buffer_dirty(descriptor);
- write_dirty_buffer(descriptor, write_op);
+ write_dirty_buffer(descriptor, WRITE_SYNC);
}
#endif
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 081dff0..01e4652d 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -966,14 +966,8 @@ repeat:
if (!frozen_buffer) {
JBUFFER_TRACE(jh, "allocate memory for buffer");
jbd_unlock_bh_state(bh);
- frozen_buffer = jbd2_alloc(jh2bh(jh)->b_size, GFP_NOFS);
- if (!frozen_buffer) {
- printk(KERN_ERR "%s: OOM for frozen_buffer\n",
- __func__);
- JBUFFER_TRACE(jh, "oom!");
- error = -ENOMEM;
- goto out;
- }
+ frozen_buffer = jbd2_alloc(jh2bh(jh)->b_size,
+ GFP_NOFS | __GFP_NOFAIL);
goto repeat;
}
jh->b_frozen_data = frozen_buffer;
@@ -1226,15 +1220,9 @@ int jbd2_journal_get_undo_access(handle_t *handle, struct buffer_head *bh)
goto out;
repeat:
- if (!jh->b_committed_data) {
- committed_data = jbd2_alloc(jh2bh(jh)->b_size, GFP_NOFS);
- if (!committed_data) {
- printk(KERN_ERR "%s: No memory for committed data\n",
- __func__);
- err = -ENOMEM;
- goto out;
- }
- }
+ if (!jh->b_committed_data)
+ committed_data = jbd2_alloc(jh2bh(jh)->b_size,
+ GFP_NOFS|__GFP_NOFAIL);
jbd_lock_bh_state(bh);
if (!jh->b_committed_data) {
diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
index 996b774..118d033 100644
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -691,15 +691,22 @@ static struct kernfs_node *kernfs_walk_ns(struct kernfs_node *parent,
const unsigned char *path,
const void *ns)
{
- static char path_buf[PATH_MAX]; /* protected by kernfs_mutex */
- size_t len = strlcpy(path_buf, path, PATH_MAX);
- char *p = path_buf;
- char *name;
+ size_t len;
+ char *p, *name;
lockdep_assert_held(&kernfs_mutex);
- if (len >= PATH_MAX)
+ /* grab kernfs_rename_lock to piggy back on kernfs_pr_cont_buf */
+ spin_lock_irq(&kernfs_rename_lock);
+
+ len = strlcpy(kernfs_pr_cont_buf, path, sizeof(kernfs_pr_cont_buf));
+
+ if (len >= sizeof(kernfs_pr_cont_buf)) {
+ spin_unlock_irq(&kernfs_rename_lock);
return NULL;
+ }
+
+ p = kernfs_pr_cont_buf;
while ((name = strsep(&p, "/")) && parent) {
if (*name == '\0')
@@ -707,6 +714,8 @@ static struct kernfs_node *kernfs_walk_ns(struct kernfs_node *parent,
parent = kernfs_find_ns(parent, name, ns);
}
+ spin_unlock_irq(&kernfs_rename_lock);
+
return parent;
}
diff --git a/fs/mbcache.c b/fs/mbcache.c
index 187477d..eccda3a 100644
--- a/fs/mbcache.c
+++ b/fs/mbcache.c
@@ -1,858 +1,433 @@
-/*
- * linux/fs/mbcache.c
- * (C) 2001-2002 Andreas Gruenbacher, <a.gruenbacher@computer.org>
- */
-
-/*
- * Filesystem Meta Information Block Cache (mbcache)
- *
- * The mbcache caches blocks of block devices that need to be located
- * by their device/block number, as well as by other criteria (such
- * as the block's contents).
- *
- * There can only be one cache entry in a cache per device and block number.
- * Additional indexes need not be unique in this sense. The number of
- * additional indexes (=other criteria) can be hardwired at compile time
- * or specified at cache create time.
- *
- * Each cache entry is of fixed size. An entry may be `valid' or `invalid'
- * in the cache. A valid entry is in the main hash tables of the cache,
- * and may also be in the lru list. An invalid entry is not in any hashes
- * or lists.
- *
- * A valid cache entry is only in the lru list if no handles refer to it.
- * Invalid cache entries will be freed when the last handle to the cache
- * entry is released. Entries that cannot be freed immediately are put
- * back on the lru list.
- */
-
-/*
- * Lock descriptions and usage:
- *
- * Each hash chain of both the block and index hash tables now contains
- * a built-in lock used to serialize accesses to the hash chain.
- *
- * Accesses to global data structures mb_cache_list and mb_cache_lru_list
- * are serialized via the global spinlock mb_cache_spinlock.
- *
- * Each mb_cache_entry contains a spinlock, e_entry_lock, to serialize
- * accesses to its local data, such as e_used and e_queued.
- *
- * Lock ordering:
- *
- * Each block hash chain's lock has the highest lock order, followed by an
- * index hash chain's lock, mb_cache_bg_lock (used to implement mb_cache_entry's
- * lock), and mb_cach_spinlock, with the lowest order. While holding
- * either a block or index hash chain lock, a thread can acquire an
- * mc_cache_bg_lock, which in turn can also acquire mb_cache_spinlock.
- *
- * Synchronization:
- *
- * Since both mb_cache_entry_get and mb_cache_entry_find scan the block and
- * index hash chian, it needs to lock the corresponding hash chain. For each
- * mb_cache_entry within the chain, it needs to lock the mb_cache_entry to
- * prevent either any simultaneous release or free on the entry and also
- * to serialize accesses to either the e_used or e_queued member of the entry.
- *
- * To avoid having a dangling reference to an already freed
- * mb_cache_entry, an mb_cache_entry is only freed when it is not on a
- * block hash chain and also no longer being referenced, both e_used,
- * and e_queued are 0's. When an mb_cache_entry is explicitly freed it is
- * first removed from a block hash chain.
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-
-#include <linux/hash.h>
-#include <linux/fs.h>
-#include <linux/mm.h>
+#include <linux/spinlock.h>
#include <linux/slab.h>
-#include <linux/sched.h>
+#include <linux/list.h>
#include <linux/list_bl.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/workqueue.h>
#include <linux/mbcache.h>
-#include <linux/init.h>
-#include <linux/blockgroup_lock.h>
-#include <linux/log2.h>
-
-#ifdef MB_CACHE_DEBUG
-# define mb_debug(f...) do { \
- printk(KERN_DEBUG f); \
- printk("\n"); \
- } while (0)
-#define mb_assert(c) do { if (!(c)) \
- printk(KERN_ERR "assertion " #c " failed\n"); \
- } while(0)
-#else
-# define mb_debug(f...) do { } while(0)
-# define mb_assert(c) do { } while(0)
-#endif
-#define mb_error(f...) do { \
- printk(KERN_ERR f); \
- printk("\n"); \
- } while(0)
-
-#define MB_CACHE_WRITER ((unsigned short)~0U >> 1)
-
-#define MB_CACHE_ENTRY_LOCK_BITS ilog2(NR_BG_LOCKS)
-#define MB_CACHE_ENTRY_LOCK_INDEX(ce) \
- (hash_long((unsigned long)ce, MB_CACHE_ENTRY_LOCK_BITS))
-
-static DECLARE_WAIT_QUEUE_HEAD(mb_cache_queue);
-static struct blockgroup_lock *mb_cache_bg_lock;
-static struct kmem_cache *mb_cache_kmem_cache;
-
-MODULE_AUTHOR("Andreas Gruenbacher <a.gruenbacher@computer.org>");
-MODULE_DESCRIPTION("Meta block cache (for extended attributes)");
-MODULE_LICENSE("GPL");
-
-EXPORT_SYMBOL(mb_cache_create);
-EXPORT_SYMBOL(mb_cache_shrink);
-EXPORT_SYMBOL(mb_cache_destroy);
-EXPORT_SYMBOL(mb_cache_entry_alloc);
-EXPORT_SYMBOL(mb_cache_entry_insert);
-EXPORT_SYMBOL(mb_cache_entry_release);
-EXPORT_SYMBOL(mb_cache_entry_free);
-EXPORT_SYMBOL(mb_cache_entry_get);
-#if !defined(MB_CACHE_INDEXES_COUNT) || (MB_CACHE_INDEXES_COUNT > 0)
-EXPORT_SYMBOL(mb_cache_entry_find_first);
-EXPORT_SYMBOL(mb_cache_entry_find_next);
-#endif
/*
- * Global data: list of all mbcache's, lru list, and a spinlock for
- * accessing cache data structures on SMP machines. The lru list is
- * global across all mbcaches.
+ * Mbcache is a simple key-value store. Keys need not be unique, however
+ * key-value pairs are expected to be unique (we use this fact in
+ * mb_cache_entry_delete_block()).
+ *
+ * Ext2 and ext4 use this cache for deduplication of extended attribute blocks.
+ * They use hash of a block contents as a key and block number as a value.
+ * That's why keys need not be unique (different xattr blocks may end up having
+ * the same hash). However block number always uniquely identifies a cache
+ * entry.
+ *
+ * We provide functions for creation and removal of entries, search by key,
+ * and a special "delete entry with given key-value pair" operation. Fixed
+ * size hash table is used for fast key lookups.
*/
-static LIST_HEAD(mb_cache_list);
-static LIST_HEAD(mb_cache_lru_list);
-static DEFINE_SPINLOCK(mb_cache_spinlock);
-
-static inline void
-__spin_lock_mb_cache_entry(struct mb_cache_entry *ce)
-{
- spin_lock(bgl_lock_ptr(mb_cache_bg_lock,
- MB_CACHE_ENTRY_LOCK_INDEX(ce)));
-}
-
-static inline void
-__spin_unlock_mb_cache_entry(struct mb_cache_entry *ce)
-{
- spin_unlock(bgl_lock_ptr(mb_cache_bg_lock,
- MB_CACHE_ENTRY_LOCK_INDEX(ce)));
-}
-
-static inline int
-__mb_cache_entry_is_block_hashed(struct mb_cache_entry *ce)
-{
- return !hlist_bl_unhashed(&ce->e_block_list);
-}
+struct mb_cache {
+ /* Hash table of entries */
+ struct hlist_bl_head *c_hash;
+ /* log2 of hash table size */
+ int c_bucket_bits;
+ /* Maximum entries in cache to avoid degrading hash too much */
+ int c_max_entries;
+ /* Protects c_list, c_entry_count */
+ spinlock_t c_list_lock;
+ struct list_head c_list;
+ /* Number of entries in cache */
+ unsigned long c_entry_count;
+ struct shrinker c_shrink;
+ /* Work for shrinking when the cache has too many entries */
+ struct work_struct c_shrink_work;
+};
+static struct kmem_cache *mb_entry_cache;
-static inline void
-__mb_cache_entry_unhash_block(struct mb_cache_entry *ce)
-{
- if (__mb_cache_entry_is_block_hashed(ce))
- hlist_bl_del_init(&ce->e_block_list);
-}
+static unsigned long mb_cache_shrink(struct mb_cache *cache,
+ unsigned int nr_to_scan);
-static inline int
-__mb_cache_entry_is_index_hashed(struct mb_cache_entry *ce)
+static inline struct hlist_bl_head *mb_cache_entry_head(struct mb_cache *cache,
+ u32 key)
{
- return !hlist_bl_unhashed(&ce->e_index.o_list);
+ return &cache->c_hash[hash_32(key, cache->c_bucket_bits)];
}
-static inline void
-__mb_cache_entry_unhash_index(struct mb_cache_entry *ce)
-{
- if (__mb_cache_entry_is_index_hashed(ce))
- hlist_bl_del_init(&ce->e_index.o_list);
-}
+/*
+ * Number of entries to reclaim synchronously when there are too many entries
+ * in cache
+ */
+#define SYNC_SHRINK_BATCH 64
/*
- * __mb_cache_entry_unhash_unlock()
- *
- * This function is called to unhash both the block and index hash
- * chain.
- * It assumes both the block and index hash chain is locked upon entry.
- * It also unlock both hash chains both exit
+ * mb_cache_entry_create - create entry in cache
+ * @cache - cache where the entry should be created
+ * @mask - gfp mask with which the entry should be allocated
+ * @key - key of the entry
+ * @block - block that contains data
+ * @reusable - is the block reusable by other inodes?
+ *
+ * Creates entry in @cache with key @key and records that data is stored in
+ * block @block. The function returns -EBUSY if entry with the same key
+ * and for the same block already exists in cache. Otherwise 0 is returned.
*/
-static inline void
-__mb_cache_entry_unhash_unlock(struct mb_cache_entry *ce)
+int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key,
+ sector_t block, bool reusable)
{
- __mb_cache_entry_unhash_index(ce);
- hlist_bl_unlock(ce->e_index_hash_p);
- __mb_cache_entry_unhash_block(ce);
- hlist_bl_unlock(ce->e_block_hash_p);
+ struct mb_cache_entry *entry, *dup;
+ struct hlist_bl_node *dup_node;
+ struct hlist_bl_head *head;
+
+ /* Schedule background reclaim if there are too many entries */
+ if (cache->c_entry_count >= cache->c_max_entries)
+ schedule_work(&cache->c_shrink_work);
+ /* Do some sync reclaim if background reclaim cannot keep up */
+ if (cache->c_entry_count >= 2*cache->c_max_entries)
+ mb_cache_shrink(cache, SYNC_SHRINK_BATCH);
+
+ entry = kmem_cache_alloc(mb_entry_cache, mask);
+ if (!entry)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&entry->e_list);
+ /* One ref for hash, one ref returned */
+ atomic_set(&entry->e_refcnt, 1);
+ entry->e_key = key;
+ entry->e_block = block;
+ entry->e_reusable = reusable;
+ head = mb_cache_entry_head(cache, key);
+ hlist_bl_lock(head);
+ hlist_bl_for_each_entry(dup, dup_node, head, e_hash_list) {
+ if (dup->e_key == key && dup->e_block == block) {
+ hlist_bl_unlock(head);
+ kmem_cache_free(mb_entry_cache, entry);
+ return -EBUSY;
+ }
+ }
+ hlist_bl_add_head(&entry->e_hash_list, head);
+ hlist_bl_unlock(head);
+
+ spin_lock(&cache->c_list_lock);
+ list_add_tail(&entry->e_list, &cache->c_list);
+ /* Grab ref for LRU list */
+ atomic_inc(&entry->e_refcnt);
+ cache->c_entry_count++;
+ spin_unlock(&cache->c_list_lock);
+
+ return 0;
}
+EXPORT_SYMBOL(mb_cache_entry_create);
-static void
-__mb_cache_entry_forget(struct mb_cache_entry *ce, gfp_t gfp_mask)
+void __mb_cache_entry_free(struct mb_cache_entry *entry)
{
- struct mb_cache *cache = ce->e_cache;
-
- mb_assert(!(ce->e_used || ce->e_queued || atomic_read(&ce->e_refcnt)));
- kmem_cache_free(cache->c_entry_cache, ce);
- atomic_dec(&cache->c_entry_count);
+ kmem_cache_free(mb_entry_cache, entry);
}
+EXPORT_SYMBOL(__mb_cache_entry_free);
-static void
-__mb_cache_entry_release(struct mb_cache_entry *ce)
+static struct mb_cache_entry *__entry_find(struct mb_cache *cache,
+ struct mb_cache_entry *entry,
+ u32 key)
{
- /* First lock the entry to serialize access to its local data. */
- __spin_lock_mb_cache_entry(ce);
- /* Wake up all processes queuing for this cache entry. */
- if (ce->e_queued)
- wake_up_all(&mb_cache_queue);
- if (ce->e_used >= MB_CACHE_WRITER)
- ce->e_used -= MB_CACHE_WRITER;
- /*
- * Make sure that all cache entries on lru_list have
- * both e_used and e_qued of 0s.
- */
- ce->e_used--;
- if (!(ce->e_used || ce->e_queued || atomic_read(&ce->e_refcnt))) {
- if (!__mb_cache_entry_is_block_hashed(ce)) {
- __spin_unlock_mb_cache_entry(ce);
- goto forget;
+ struct mb_cache_entry *old_entry = entry;
+ struct hlist_bl_node *node;
+ struct hlist_bl_head *head;
+
+ head = mb_cache_entry_head(cache, key);
+ hlist_bl_lock(head);
+ if (entry && !hlist_bl_unhashed(&entry->e_hash_list))
+ node = entry->e_hash_list.next;
+ else
+ node = hlist_bl_first(head);
+ while (node) {
+ entry = hlist_bl_entry(node, struct mb_cache_entry,
+ e_hash_list);
+ if (entry->e_key == key && entry->e_reusable) {
+ atomic_inc(&entry->e_refcnt);
+ goto out;
}
- /*
- * Need access to lru list, first drop entry lock,
- * then reacquire the lock in the proper order.
- */
- spin_lock(&mb_cache_spinlock);
- if (list_empty(&ce->e_lru_list))
- list_add_tail(&ce->e_lru_list, &mb_cache_lru_list);
- spin_unlock(&mb_cache_spinlock);
+ node = node->next;
}
- __spin_unlock_mb_cache_entry(ce);
- return;
-forget:
- mb_assert(list_empty(&ce->e_lru_list));
- __mb_cache_entry_forget(ce, GFP_KERNEL);
+ entry = NULL;
+out:
+ hlist_bl_unlock(head);
+ if (old_entry)
+ mb_cache_entry_put(cache, old_entry);
+
+ return entry;
}
/*
- * mb_cache_shrink_scan() memory pressure callback
- *
- * This function is called by the kernel memory management when memory
- * gets low.
+ * mb_cache_entry_find_first - find the first entry in cache with given key
+ * @cache: cache where we should search
+ * @key: key to look for
*
- * @shrink: (ignored)
- * @sc: shrink_control passed from reclaim
- *
- * Returns the number of objects freed.
+ * Search in @cache for entry with key @key. Grabs reference to the first
+ * entry found and returns the entry.
*/
-static unsigned long
-mb_cache_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
+struct mb_cache_entry *mb_cache_entry_find_first(struct mb_cache *cache,
+ u32 key)
{
- LIST_HEAD(free_list);
- struct mb_cache_entry *entry, *tmp;
- int nr_to_scan = sc->nr_to_scan;
- gfp_t gfp_mask = sc->gfp_mask;
- unsigned long freed = 0;
-
- mb_debug("trying to free %d entries", nr_to_scan);
- spin_lock(&mb_cache_spinlock);
- while ((nr_to_scan-- > 0) && !list_empty(&mb_cache_lru_list)) {
- struct mb_cache_entry *ce =
- list_entry(mb_cache_lru_list.next,
- struct mb_cache_entry, e_lru_list);
- list_del_init(&ce->e_lru_list);
- if (ce->e_used || ce->e_queued || atomic_read(&ce->e_refcnt))
- continue;
- spin_unlock(&mb_cache_spinlock);
- /* Prevent any find or get operation on the entry */
- hlist_bl_lock(ce->e_block_hash_p);
- hlist_bl_lock(ce->e_index_hash_p);
- /* Ignore if it is touched by a find/get */
- if (ce->e_used || ce->e_queued || atomic_read(&ce->e_refcnt) ||
- !list_empty(&ce->e_lru_list)) {
- hlist_bl_unlock(ce->e_index_hash_p);
- hlist_bl_unlock(ce->e_block_hash_p);
- spin_lock(&mb_cache_spinlock);
- continue;
- }
- __mb_cache_entry_unhash_unlock(ce);
- list_add_tail(&ce->e_lru_list, &free_list);
- spin_lock(&mb_cache_spinlock);
- }
- spin_unlock(&mb_cache_spinlock);
-
- list_for_each_entry_safe(entry, tmp, &free_list, e_lru_list) {
- __mb_cache_entry_forget(entry, gfp_mask);
- freed++;
- }
- return freed;
+ return __entry_find(cache, NULL, key);
}
+EXPORT_SYMBOL(mb_cache_entry_find_first);
-static unsigned long
-mb_cache_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
+/*
+ * mb_cache_entry_find_next - find next entry in cache with the same
+ * @cache: cache where we should search
+ * @entry: entry to start search from
+ *
+ * Finds next entry in the hash chain which has the same key as @entry.
+ * If @entry is unhashed (which can happen when deletion of entry races
+ * with the search), finds the first entry in the hash chain. The function
+ * drops reference to @entry and returns with a reference to the found entry.
+ */
+struct mb_cache_entry *mb_cache_entry_find_next(struct mb_cache *cache,
+ struct mb_cache_entry *entry)
{
- struct mb_cache *cache;
- unsigned long count = 0;
-
- spin_lock(&mb_cache_spinlock);
- list_for_each_entry(cache, &mb_cache_list, c_cache_list) {
- mb_debug("cache %s (%d)", cache->c_name,
- atomic_read(&cache->c_entry_count));
- count += atomic_read(&cache->c_entry_count);
- }
- spin_unlock(&mb_cache_spinlock);
-
- return vfs_pressure_ratio(count);
+ return __entry_find(cache, entry, entry->e_key);
}
-
-static struct shrinker mb_cache_shrinker = {
- .count_objects = mb_cache_shrink_count,
- .scan_objects = mb_cache_shrink_scan,
- .seeks = DEFAULT_SEEKS,
-};
+EXPORT_SYMBOL(mb_cache_entry_find_next);
/*
- * mb_cache_create() create a new cache
- *
- * All entries in one cache are equal size. Cache entries may be from
- * multiple devices. If this is the first mbcache created, registers
- * the cache with kernel memory management. Returns NULL if no more
- * memory was available.
- *
- * @name: name of the cache (informal)
- * @bucket_bits: log2(number of hash buckets)
+ * mb_cache_entry_get - get a cache entry by block number (and key)
+ * @cache - cache we work with
+ * @key - key of block number @block
+ * @block - block number
*/
-struct mb_cache *
-mb_cache_create(const char *name, int bucket_bits)
+struct mb_cache_entry *mb_cache_entry_get(struct mb_cache *cache, u32 key,
+ sector_t block)
{
- int n, bucket_count = 1 << bucket_bits;
- struct mb_cache *cache = NULL;
-
- if (!mb_cache_bg_lock) {
- mb_cache_bg_lock = kmalloc(sizeof(struct blockgroup_lock),
- GFP_KERNEL);
- if (!mb_cache_bg_lock)
- return NULL;
- bgl_lock_init(mb_cache_bg_lock);
- }
-
- cache = kmalloc(sizeof(struct mb_cache), GFP_KERNEL);
- if (!cache)
- return NULL;
- cache->c_name = name;
- atomic_set(&cache->c_entry_count, 0);
- cache->c_bucket_bits = bucket_bits;
- cache->c_block_hash = kmalloc(bucket_count *
- sizeof(struct hlist_bl_head), GFP_KERNEL);
- if (!cache->c_block_hash)
- goto fail;
- for (n=0; n<bucket_count; n++)
- INIT_HLIST_BL_HEAD(&cache->c_block_hash[n]);
- cache->c_index_hash = kmalloc(bucket_count *
- sizeof(struct hlist_bl_head), GFP_KERNEL);
- if (!cache->c_index_hash)
- goto fail;
- for (n=0; n<bucket_count; n++)
- INIT_HLIST_BL_HEAD(&cache->c_index_hash[n]);
- if (!mb_cache_kmem_cache) {
- mb_cache_kmem_cache = kmem_cache_create(name,
- sizeof(struct mb_cache_entry), 0,
- SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, NULL);
- if (!mb_cache_kmem_cache)
- goto fail2;
+ struct hlist_bl_node *node;
+ struct hlist_bl_head *head;
+ struct mb_cache_entry *entry;
+
+ head = mb_cache_entry_head(cache, key);
+ hlist_bl_lock(head);
+ hlist_bl_for_each_entry(entry, node, head, e_hash_list) {
+ if (entry->e_key == key && entry->e_block == block) {
+ atomic_inc(&entry->e_refcnt);
+ goto out;
+ }
}
- cache->c_entry_cache = mb_cache_kmem_cache;
-
- /*
- * Set an upper limit on the number of cache entries so that the hash
- * chains won't grow too long.
- */
- cache->c_max_entries = bucket_count << 4;
-
- spin_lock(&mb_cache_spinlock);
- list_add(&cache->c_cache_list, &mb_cache_list);
- spin_unlock(&mb_cache_spinlock);
- return cache;
-
-fail2:
- kfree(cache->c_index_hash);
-
-fail:
- kfree(cache->c_block_hash);
- kfree(cache);
- return NULL;
+ entry = NULL;
+out:
+ hlist_bl_unlock(head);
+ return entry;
}
+EXPORT_SYMBOL(mb_cache_entry_get);
-
-/*
- * mb_cache_shrink()
- *
- * Removes all cache entries of a device from the cache. All cache entries
- * currently in use cannot be freed, and thus remain in the cache. All others
- * are freed.
+/* mb_cache_entry_delete_block - remove information about block from cache
+ * @cache - cache we work with
+ * @key - key of block @block
+ * @block - block number
*
- * @bdev: which device's cache entries to shrink
+ * Remove entry from cache @cache with key @key with data stored in @block.
*/
-void
-mb_cache_shrink(struct block_device *bdev)
+void mb_cache_entry_delete_block(struct mb_cache *cache, u32 key,
+ sector_t block)
{
- LIST_HEAD(free_list);
- struct list_head *l;
- struct mb_cache_entry *ce, *tmp;
-
- l = &mb_cache_lru_list;
- spin_lock(&mb_cache_spinlock);
- while (!list_is_last(l, &mb_cache_lru_list)) {
- l = l->next;
- ce = list_entry(l, struct mb_cache_entry, e_lru_list);
- if (ce->e_bdev == bdev) {
- list_del_init(&ce->e_lru_list);
- if (ce->e_used || ce->e_queued ||
- atomic_read(&ce->e_refcnt))
- continue;
- spin_unlock(&mb_cache_spinlock);
- /*
- * Prevent any find or get operation on the entry.
- */
- hlist_bl_lock(ce->e_block_hash_p);
- hlist_bl_lock(ce->e_index_hash_p);
- /* Ignore if it is touched by a find/get */
- if (ce->e_used || ce->e_queued ||
- atomic_read(&ce->e_refcnt) ||
- !list_empty(&ce->e_lru_list)) {
- hlist_bl_unlock(ce->e_index_hash_p);
- hlist_bl_unlock(ce->e_block_hash_p);
- l = &mb_cache_lru_list;
- spin_lock(&mb_cache_spinlock);
- continue;
+ struct hlist_bl_node *node;
+ struct hlist_bl_head *head;
+ struct mb_cache_entry *entry;
+
+ head = mb_cache_entry_head(cache, key);
+ hlist_bl_lock(head);
+ hlist_bl_for_each_entry(entry, node, head, e_hash_list) {
+ if (entry->e_key == key && entry->e_block == block) {
+ /* We keep hash list reference to keep entry alive */
+ hlist_bl_del_init(&entry->e_hash_list);
+ hlist_bl_unlock(head);
+ spin_lock(&cache->c_list_lock);
+ if (!list_empty(&entry->e_list)) {
+ list_del_init(&entry->e_list);
+ cache->c_entry_count--;
+ atomic_dec(&entry->e_refcnt);
}
- __mb_cache_entry_unhash_unlock(ce);
- mb_assert(!(ce->e_used || ce->e_queued ||
- atomic_read(&ce->e_refcnt)));
- list_add_tail(&ce->e_lru_list, &free_list);
- l = &mb_cache_lru_list;
- spin_lock(&mb_cache_spinlock);
+ spin_unlock(&cache->c_list_lock);
+ mb_cache_entry_put(cache, entry);
+ return;
}
}
- spin_unlock(&mb_cache_spinlock);
-
- list_for_each_entry_safe(ce, tmp, &free_list, e_lru_list) {
- __mb_cache_entry_forget(ce, GFP_KERNEL);
- }
+ hlist_bl_unlock(head);
}
+EXPORT_SYMBOL(mb_cache_entry_delete_block);
-
-/*
- * mb_cache_destroy()
+/* mb_cache_entry_touch - cache entry got used
+ * @cache - cache the entry belongs to
+ * @entry - entry that got used
*
- * Shrinks the cache to its minimum possible size (hopefully 0 entries),
- * and then destroys it. If this was the last mbcache, un-registers the
- * mbcache from kernel memory management.
+ * Marks entry as used to give hit higher chances of surviving in cache.
*/
-void
-mb_cache_destroy(struct mb_cache *cache)
+void mb_cache_entry_touch(struct mb_cache *cache,
+ struct mb_cache_entry *entry)
{
- LIST_HEAD(free_list);
- struct mb_cache_entry *ce, *tmp;
-
- spin_lock(&mb_cache_spinlock);
- list_for_each_entry_safe(ce, tmp, &mb_cache_lru_list, e_lru_list) {
- if (ce->e_cache == cache)
- list_move_tail(&ce->e_lru_list, &free_list);
- }
- list_del(&cache->c_cache_list);
- spin_unlock(&mb_cache_spinlock);
-
- list_for_each_entry_safe(ce, tmp, &free_list, e_lru_list) {
- list_del_init(&ce->e_lru_list);
- /*
- * Prevent any find or get operation on the entry.
- */
- hlist_bl_lock(ce->e_block_hash_p);
- hlist_bl_lock(ce->e_index_hash_p);
- mb_assert(!(ce->e_used || ce->e_queued ||
- atomic_read(&ce->e_refcnt)));
- __mb_cache_entry_unhash_unlock(ce);
- __mb_cache_entry_forget(ce, GFP_KERNEL);
- }
-
- if (atomic_read(&cache->c_entry_count) > 0) {
- mb_error("cache %s: %d orphaned entries",
- cache->c_name,
- atomic_read(&cache->c_entry_count));
- }
-
- if (list_empty(&mb_cache_list)) {
- kmem_cache_destroy(mb_cache_kmem_cache);
- mb_cache_kmem_cache = NULL;
- }
- kfree(cache->c_index_hash);
- kfree(cache->c_block_hash);
- kfree(cache);
+ entry->e_referenced = 1;
}
+EXPORT_SYMBOL(mb_cache_entry_touch);
-/*
- * mb_cache_entry_alloc()
- *
- * Allocates a new cache entry. The new entry will not be valid initially,
- * and thus cannot be looked up yet. It should be filled with data, and
- * then inserted into the cache using mb_cache_entry_insert(). Returns NULL
- * if no more memory was available.
- */
-struct mb_cache_entry *
-mb_cache_entry_alloc(struct mb_cache *cache, gfp_t gfp_flags)
+static unsigned long mb_cache_count(struct shrinker *shrink,
+ struct shrink_control *sc)
{
- struct mb_cache_entry *ce;
-
- if (atomic_read(&cache->c_entry_count) >= cache->c_max_entries) {
- struct list_head *l;
-
- l = &mb_cache_lru_list;
- spin_lock(&mb_cache_spinlock);
- while (!list_is_last(l, &mb_cache_lru_list)) {
- l = l->next;
- ce = list_entry(l, struct mb_cache_entry, e_lru_list);
- if (ce->e_cache == cache) {
- list_del_init(&ce->e_lru_list);
- if (ce->e_used || ce->e_queued ||
- atomic_read(&ce->e_refcnt))
- continue;
- spin_unlock(&mb_cache_spinlock);
- /*
- * Prevent any find or get operation on the
- * entry.
- */
- hlist_bl_lock(ce->e_block_hash_p);
- hlist_bl_lock(ce->e_index_hash_p);
- /* Ignore if it is touched by a find/get */
- if (ce->e_used || ce->e_queued ||
- atomic_read(&ce->e_refcnt) ||
- !list_empty(&ce->e_lru_list)) {
- hlist_bl_unlock(ce->e_index_hash_p);
- hlist_bl_unlock(ce->e_block_hash_p);
- l = &mb_cache_lru_list;
- spin_lock(&mb_cache_spinlock);
- continue;
- }
- mb_assert(list_empty(&ce->e_lru_list));
- mb_assert(!(ce->e_used || ce->e_queued ||
- atomic_read(&ce->e_refcnt)));
- __mb_cache_entry_unhash_unlock(ce);
- goto found;
- }
- }
- spin_unlock(&mb_cache_spinlock);
- }
+ struct mb_cache *cache = container_of(shrink, struct mb_cache,
+ c_shrink);
- ce = kmem_cache_alloc(cache->c_entry_cache, gfp_flags);
- if (!ce)
- return NULL;
- atomic_inc(&cache->c_entry_count);
- INIT_LIST_HEAD(&ce->e_lru_list);
- INIT_HLIST_BL_NODE(&ce->e_block_list);
- INIT_HLIST_BL_NODE(&ce->e_index.o_list);
- ce->e_cache = cache;
- ce->e_queued = 0;
- atomic_set(&ce->e_refcnt, 0);
-found:
- ce->e_block_hash_p = &cache->c_block_hash[0];
- ce->e_index_hash_p = &cache->c_index_hash[0];
- ce->e_used = 1 + MB_CACHE_WRITER;
- return ce;
+ return cache->c_entry_count;
}
-
-/*
- * mb_cache_entry_insert()
- *
- * Inserts an entry that was allocated using mb_cache_entry_alloc() into
- * the cache. After this, the cache entry can be looked up, but is not yet
- * in the lru list as the caller still holds a handle to it. Returns 0 on
- * success, or -EBUSY if a cache entry for that device + inode exists
- * already (this may happen after a failed lookup, but when another process
- * has inserted the same cache entry in the meantime).
- *
- * @bdev: device the cache entry belongs to
- * @block: block number
- * @key: lookup key
- */
-int
-mb_cache_entry_insert(struct mb_cache_entry *ce, struct block_device *bdev,
- sector_t block, unsigned int key)
+/* Shrink number of entries in cache */
+static unsigned long mb_cache_shrink(struct mb_cache *cache,
+ unsigned int nr_to_scan)
{
- struct mb_cache *cache = ce->e_cache;
- unsigned int bucket;
- struct hlist_bl_node *l;
- struct hlist_bl_head *block_hash_p;
- struct hlist_bl_head *index_hash_p;
- struct mb_cache_entry *lce;
-
- mb_assert(ce);
- bucket = hash_long((unsigned long)bdev + (block & 0xffffffff),
- cache->c_bucket_bits);
- block_hash_p = &cache->c_block_hash[bucket];
- hlist_bl_lock(block_hash_p);
- hlist_bl_for_each_entry(lce, l, block_hash_p, e_block_list) {
- if (lce->e_bdev == bdev && lce->e_block == block) {
- hlist_bl_unlock(block_hash_p);
- return -EBUSY;
+ struct mb_cache_entry *entry;
+ struct hlist_bl_head *head;
+ unsigned int shrunk = 0;
+
+ spin_lock(&cache->c_list_lock);
+ while (nr_to_scan-- && !list_empty(&cache->c_list)) {
+ entry = list_first_entry(&cache->c_list,
+ struct mb_cache_entry, e_list);
+ if (entry->e_referenced) {
+ entry->e_referenced = 0;
+ list_move_tail(&cache->c_list, &entry->e_list);
+ continue;
}
+ list_del_init(&entry->e_list);
+ cache->c_entry_count--;
+ /*
+ * We keep LRU list reference so that entry doesn't go away
+ * from under us.
+ */
+ spin_unlock(&cache->c_list_lock);
+ head = mb_cache_entry_head(cache, entry->e_key);
+ hlist_bl_lock(head);
+ if (!hlist_bl_unhashed(&entry->e_hash_list)) {
+ hlist_bl_del_init(&entry->e_hash_list);
+ atomic_dec(&entry->e_refcnt);
+ }
+ hlist_bl_unlock(head);
+ if (mb_cache_entry_put(cache, entry))
+ shrunk++;
+ cond_resched();
+ spin_lock(&cache->c_list_lock);
}
- mb_assert(!__mb_cache_entry_is_block_hashed(ce));
- __mb_cache_entry_unhash_block(ce);
- __mb_cache_entry_unhash_index(ce);
- ce->e_bdev = bdev;
- ce->e_block = block;
- ce->e_block_hash_p = block_hash_p;
- ce->e_index.o_key = key;
- hlist_bl_add_head(&ce->e_block_list, block_hash_p);
- hlist_bl_unlock(block_hash_p);
- bucket = hash_long(key, cache->c_bucket_bits);
- index_hash_p = &cache->c_index_hash[bucket];
- hlist_bl_lock(index_hash_p);
- ce->e_index_hash_p = index_hash_p;
- hlist_bl_add_head(&ce->e_index.o_list, index_hash_p);
- hlist_bl_unlock(index_hash_p);
- return 0;
-}
+ spin_unlock(&cache->c_list_lock);
+ return shrunk;
+}
-/*
- * mb_cache_entry_release()
- *
- * Release a handle to a cache entry. When the last handle to a cache entry
- * is released it is either freed (if it is invalid) or otherwise inserted
- * in to the lru list.
- */
-void
-mb_cache_entry_release(struct mb_cache_entry *ce)
+static unsigned long mb_cache_scan(struct shrinker *shrink,
+ struct shrink_control *sc)
{
- __mb_cache_entry_release(ce);
+ int nr_to_scan = sc->nr_to_scan;
+ struct mb_cache *cache = container_of(shrink, struct mb_cache,
+ c_shrink);
+ return mb_cache_shrink(cache, nr_to_scan);
}
+/* We shrink 1/X of the cache when we have too many entries in it */
+#define SHRINK_DIVISOR 16
-/*
- * mb_cache_entry_free()
- *
- */
-void
-mb_cache_entry_free(struct mb_cache_entry *ce)
+static void mb_cache_shrink_worker(struct work_struct *work)
{
- mb_assert(ce);
- mb_assert(list_empty(&ce->e_lru_list));
- hlist_bl_lock(ce->e_index_hash_p);
- __mb_cache_entry_unhash_index(ce);
- hlist_bl_unlock(ce->e_index_hash_p);
- hlist_bl_lock(ce->e_block_hash_p);
- __mb_cache_entry_unhash_block(ce);
- hlist_bl_unlock(ce->e_block_hash_p);
- __mb_cache_entry_release(ce);
+ struct mb_cache *cache = container_of(work, struct mb_cache,
+ c_shrink_work);
+ mb_cache_shrink(cache, cache->c_max_entries / SHRINK_DIVISOR);
}
-
/*
- * mb_cache_entry_get()
+ * mb_cache_create - create cache
+ * @bucket_bits: log2 of the hash table size
*
- * Get a cache entry by device / block number. (There can only be one entry
- * in the cache per device and block.) Returns NULL if no such cache entry
- * exists. The returned cache entry is locked for exclusive access ("single
- * writer").
+ * Create cache for keys with 2^bucket_bits hash entries.
*/
-struct mb_cache_entry *
-mb_cache_entry_get(struct mb_cache *cache, struct block_device *bdev,
- sector_t block)
+struct mb_cache *mb_cache_create(int bucket_bits)
{
- unsigned int bucket;
- struct hlist_bl_node *l;
- struct mb_cache_entry *ce;
- struct hlist_bl_head *block_hash_p;
-
- bucket = hash_long((unsigned long)bdev + (block & 0xffffffff),
- cache->c_bucket_bits);
- block_hash_p = &cache->c_block_hash[bucket];
- /* First serialize access to the block corresponding hash chain. */
- hlist_bl_lock(block_hash_p);
- hlist_bl_for_each_entry(ce, l, block_hash_p, e_block_list) {
- mb_assert(ce->e_block_hash_p == block_hash_p);
- if (ce->e_bdev == bdev && ce->e_block == block) {
- /*
- * Prevent a free from removing the entry.
- */
- atomic_inc(&ce->e_refcnt);
- hlist_bl_unlock(block_hash_p);
- __spin_lock_mb_cache_entry(ce);
- atomic_dec(&ce->e_refcnt);
- if (ce->e_used > 0) {
- DEFINE_WAIT(wait);
- while (ce->e_used > 0) {
- ce->e_queued++;
- prepare_to_wait(&mb_cache_queue, &wait,
- TASK_UNINTERRUPTIBLE);
- __spin_unlock_mb_cache_entry(ce);
- schedule();
- __spin_lock_mb_cache_entry(ce);
- ce->e_queued--;
- }
- finish_wait(&mb_cache_queue, &wait);
- }
- ce->e_used += 1 + MB_CACHE_WRITER;
- __spin_unlock_mb_cache_entry(ce);
+ struct mb_cache *cache;
+ int bucket_count = 1 << bucket_bits;
+ int i;
- if (!list_empty(&ce->e_lru_list)) {
- spin_lock(&mb_cache_spinlock);
- list_del_init(&ce->e_lru_list);
- spin_unlock(&mb_cache_spinlock);
- }
- if (!__mb_cache_entry_is_block_hashed(ce)) {
- __mb_cache_entry_release(ce);
- return NULL;
- }
- return ce;
- }
+ if (!try_module_get(THIS_MODULE))
+ return NULL;
+
+ cache = kzalloc(sizeof(struct mb_cache), GFP_KERNEL);
+ if (!cache)
+ goto err_out;
+ cache->c_bucket_bits = bucket_bits;
+ cache->c_max_entries = bucket_count << 4;
+ INIT_LIST_HEAD(&cache->c_list);
+ spin_lock_init(&cache->c_list_lock);
+ cache->c_hash = kmalloc(bucket_count * sizeof(struct hlist_bl_head),
+ GFP_KERNEL);
+ if (!cache->c_hash) {
+ kfree(cache);
+ goto err_out;
}
- hlist_bl_unlock(block_hash_p);
- return NULL;
-}
+ for (i = 0; i < bucket_count; i++)
+ INIT_HLIST_BL_HEAD(&cache->c_hash[i]);
-#if !defined(MB_CACHE_INDEXES_COUNT) || (MB_CACHE_INDEXES_COUNT > 0)
+ cache->c_shrink.count_objects = mb_cache_count;
+ cache->c_shrink.scan_objects = mb_cache_scan;
+ cache->c_shrink.seeks = DEFAULT_SEEKS;
+ register_shrinker(&cache->c_shrink);
-static struct mb_cache_entry *
-__mb_cache_entry_find(struct hlist_bl_node *l, struct hlist_bl_head *head,
- struct block_device *bdev, unsigned int key)
-{
+ INIT_WORK(&cache->c_shrink_work, mb_cache_shrink_worker);
- /* The index hash chain is alredy acquire by caller. */
- while (l != NULL) {
- struct mb_cache_entry *ce =
- hlist_bl_entry(l, struct mb_cache_entry,
- e_index.o_list);
- mb_assert(ce->e_index_hash_p == head);
- if (ce->e_bdev == bdev && ce->e_index.o_key == key) {
- /*
- * Prevent a free from removing the entry.
- */
- atomic_inc(&ce->e_refcnt);
- hlist_bl_unlock(head);
- __spin_lock_mb_cache_entry(ce);
- atomic_dec(&ce->e_refcnt);
- ce->e_used++;
- /* Incrementing before holding the lock gives readers
- priority over writers. */
- if (ce->e_used >= MB_CACHE_WRITER) {
- DEFINE_WAIT(wait);
-
- while (ce->e_used >= MB_CACHE_WRITER) {
- ce->e_queued++;
- prepare_to_wait(&mb_cache_queue, &wait,
- TASK_UNINTERRUPTIBLE);
- __spin_unlock_mb_cache_entry(ce);
- schedule();
- __spin_lock_mb_cache_entry(ce);
- ce->e_queued--;
- }
- finish_wait(&mb_cache_queue, &wait);
- }
- __spin_unlock_mb_cache_entry(ce);
- if (!list_empty(&ce->e_lru_list)) {
- spin_lock(&mb_cache_spinlock);
- list_del_init(&ce->e_lru_list);
- spin_unlock(&mb_cache_spinlock);
- }
- if (!__mb_cache_entry_is_block_hashed(ce)) {
- __mb_cache_entry_release(ce);
- return ERR_PTR(-EAGAIN);
- }
- return ce;
- }
- l = l->next;
- }
- hlist_bl_unlock(head);
+ return cache;
+
+err_out:
+ module_put(THIS_MODULE);
return NULL;
}
-
+EXPORT_SYMBOL(mb_cache_create);
/*
- * mb_cache_entry_find_first()
- *
- * Find the first cache entry on a given device with a certain key in
- * an additional index. Additional matches can be found with
- * mb_cache_entry_find_next(). Returns NULL if no match was found. The
- * returned cache entry is locked for shared access ("multiple readers").
+ * mb_cache_destroy - destroy cache
+ * @cache: the cache to destroy
*
- * @cache: the cache to search
- * @bdev: the device the cache entry should belong to
- * @key: the key in the index
+ * Free all entries in cache and cache itself. Caller must make sure nobody
+ * (except shrinker) can reach @cache when calling this.
*/
-struct mb_cache_entry *
-mb_cache_entry_find_first(struct mb_cache *cache, struct block_device *bdev,
- unsigned int key)
+void mb_cache_destroy(struct mb_cache *cache)
{
- unsigned int bucket = hash_long(key, cache->c_bucket_bits);
- struct hlist_bl_node *l;
- struct mb_cache_entry *ce = NULL;
- struct hlist_bl_head *index_hash_p;
-
- index_hash_p = &cache->c_index_hash[bucket];
- hlist_bl_lock(index_hash_p);
- if (!hlist_bl_empty(index_hash_p)) {
- l = hlist_bl_first(index_hash_p);
- ce = __mb_cache_entry_find(l, index_hash_p, bdev, key);
- } else
- hlist_bl_unlock(index_hash_p);
- return ce;
-}
+ struct mb_cache_entry *entry, *next;
+ unregister_shrinker(&cache->c_shrink);
-/*
- * mb_cache_entry_find_next()
- *
- * Find the next cache entry on a given device with a certain key in an
- * additional index. Returns NULL if no match could be found. The previous
- * entry is atomatically released, so that mb_cache_entry_find_next() can
- * be called like this:
- *
- * entry = mb_cache_entry_find_first();
- * while (entry) {
- * ...
- * entry = mb_cache_entry_find_next(entry, ...);
- * }
- *
- * @prev: The previous match
- * @bdev: the device the cache entry should belong to
- * @key: the key in the index
- */
-struct mb_cache_entry *
-mb_cache_entry_find_next(struct mb_cache_entry *prev,
- struct block_device *bdev, unsigned int key)
-{
- struct mb_cache *cache = prev->e_cache;
- unsigned int bucket = hash_long(key, cache->c_bucket_bits);
- struct hlist_bl_node *l;
- struct mb_cache_entry *ce;
- struct hlist_bl_head *index_hash_p;
-
- index_hash_p = &cache->c_index_hash[bucket];
- mb_assert(prev->e_index_hash_p == index_hash_p);
- hlist_bl_lock(index_hash_p);
- mb_assert(!hlist_bl_empty(index_hash_p));
- l = prev->e_index.o_list.next;
- ce = __mb_cache_entry_find(l, index_hash_p, bdev, key);
- __mb_cache_entry_release(prev);
- return ce;
+ /*
+ * We don't bother with any locking. Cache must not be used at this
+ * point.
+ */
+ list_for_each_entry_safe(entry, next, &cache->c_list, e_list) {
+ if (!hlist_bl_unhashed(&entry->e_hash_list)) {
+ hlist_bl_del_init(&entry->e_hash_list);
+ atomic_dec(&entry->e_refcnt);
+ } else
+ WARN_ON(1);
+ list_del(&entry->e_list);
+ WARN_ON(atomic_read(&entry->e_refcnt) != 1);
+ mb_cache_entry_put(cache, entry);
+ }
+ kfree(cache->c_hash);
+ kfree(cache);
+ module_put(THIS_MODULE);
}
+EXPORT_SYMBOL(mb_cache_destroy);
-#endif /* !defined(MB_CACHE_INDEXES_COUNT) || (MB_CACHE_INDEXES_COUNT > 0) */
-
-static int __init init_mbcache(void)
+static int __init mbcache_init(void)
{
- register_shrinker(&mb_cache_shrinker);
+ mb_entry_cache = kmem_cache_create("mbcache",
+ sizeof(struct mb_cache_entry), 0,
+ SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, NULL);
+ BUG_ON(!mb_entry_cache);
return 0;
}
-static void __exit exit_mbcache(void)
+static void __exit mbcache_exit(void)
{
- unregister_shrinker(&mb_cache_shrinker);
+ kmem_cache_destroy(mb_entry_cache);
}
-module_init(init_mbcache)
-module_exit(exit_mbcache)
+module_init(mbcache_init)
+module_exit(mbcache_exit)
+MODULE_AUTHOR("Jan Kara <jack@suse.cz>");
+MODULE_DESCRIPTION("Meta block cache (for extended attributes)");
+MODULE_LICENSE("GPL");
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index dc8ebec..195fe26 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -32,10 +32,10 @@
*
*/
+#include <crypto/hash.h>
#include <linux/file.h>
#include <linux/slab.h>
#include <linux/namei.h>
-#include <linux/crypto.h>
#include <linux/sched.h>
#include <linux/fs.h>
#include <linux/module.h>
@@ -104,29 +104,35 @@ static int
nfs4_make_rec_clidname(char *dname, const struct xdr_netobj *clname)
{
struct xdr_netobj cksum;
- struct hash_desc desc;
- struct scatterlist sg;
+ struct crypto_shash *tfm;
int status;
dprintk("NFSD: nfs4_make_rec_clidname for %.*s\n",
clname->len, clname->data);
- desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP;
- desc.tfm = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC);
- if (IS_ERR(desc.tfm)) {
- status = PTR_ERR(desc.tfm);
+ tfm = crypto_alloc_shash("md5", 0, 0);
+ if (IS_ERR(tfm)) {
+ status = PTR_ERR(tfm);
goto out_no_tfm;
}
- cksum.len = crypto_hash_digestsize(desc.tfm);
+ cksum.len = crypto_shash_digestsize(tfm);
cksum.data = kmalloc(cksum.len, GFP_KERNEL);
if (cksum.data == NULL) {
status = -ENOMEM;
goto out;
}
- sg_init_one(&sg, clname->data, clname->len);
+ {
+ SHASH_DESC_ON_STACK(desc, tfm);
+
+ desc->tfm = tfm;
+ desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP;
+
+ status = crypto_shash_digest(desc, clname->data, clname->len,
+ cksum.data);
+ shash_desc_zero(desc);
+ }
- status = crypto_hash_digest(&desc, &sg, sg.length, cksum.data);
if (status)
goto out;
@@ -135,7 +141,7 @@ nfs4_make_rec_clidname(char *dname, const struct xdr_netobj *clname)
status = 0;
out:
kfree(cksum.data);
- crypto_free_hash(desc.tfm);
+ crypto_free_shash(tfm);
out_no_tfm:
return status;
}
diff --git a/fs/ocfs2/cluster/nodemanager.c b/fs/ocfs2/cluster/nodemanager.c
index ebe5438..b17d180 100644
--- a/fs/ocfs2/cluster/nodemanager.c
+++ b/fs/ocfs2/cluster/nodemanager.c
@@ -630,7 +630,6 @@ static void o2nm_cluster_release(struct config_item *item)
{
struct o2nm_cluster *cluster = to_o2nm_cluster(item);
- kfree(cluster->cl_group.default_groups);
kfree(cluster);
}
@@ -666,7 +665,6 @@ static struct config_group *o2nm_cluster_group_make_group(struct config_group *g
struct o2nm_cluster *cluster = NULL;
struct o2nm_node_group *ns = NULL;
struct config_group *o2hb_group = NULL, *ret = NULL;
- void *defs = NULL;
/* this runs under the parent dir's i_mutex; there can be only
* one caller in here at a time */
@@ -675,20 +673,18 @@ static struct config_group *o2nm_cluster_group_make_group(struct config_group *g
cluster = kzalloc(sizeof(struct o2nm_cluster), GFP_KERNEL);
ns = kzalloc(sizeof(struct o2nm_node_group), GFP_KERNEL);
- defs = kcalloc(3, sizeof(struct config_group *), GFP_KERNEL);
o2hb_group = o2hb_alloc_hb_set();
- if (cluster == NULL || ns == NULL || o2hb_group == NULL || defs == NULL)
+ if (cluster == NULL || ns == NULL || o2hb_group == NULL)
goto out;
config_group_init_type_name(&cluster->cl_group, name,
&o2nm_cluster_type);
+ configfs_add_default_group(&ns->ns_group, &cluster->cl_group);
+
config_group_init_type_name(&ns->ns_group, "node",
&o2nm_node_group_type);
+ configfs_add_default_group(o2hb_group, &cluster->cl_group);
- cluster->cl_group.default_groups = defs;
- cluster->cl_group.default_groups[0] = &ns->ns_group;
- cluster->cl_group.default_groups[1] = o2hb_group;
- cluster->cl_group.default_groups[2] = NULL;
rwlock_init(&cluster->cl_nodes_lock);
cluster->cl_node_ip_tree = RB_ROOT;
cluster->cl_reconnect_delay_ms = O2NET_RECONNECT_DELAY_MS_DEFAULT;
@@ -704,7 +700,6 @@ out:
kfree(cluster);
kfree(ns);
o2hb_free_hb_set(o2hb_group);
- kfree(defs);
ret = ERR_PTR(-ENOMEM);
}
@@ -714,18 +709,11 @@ out:
static void o2nm_cluster_group_drop_item(struct config_group *group, struct config_item *item)
{
struct o2nm_cluster *cluster = to_o2nm_cluster(item);
- int i;
- struct config_item *killme;
BUG_ON(o2nm_single_cluster != cluster);
o2nm_single_cluster = NULL;
- for (i = 0; cluster->cl_group.default_groups[i]; i++) {
- killme = &cluster->cl_group.default_groups[i]->cg_item;
- cluster->cl_group.default_groups[i] = NULL;
- config_item_put(killme);
- }
-
+ configfs_remove_default_groups(&cluster->cl_group);
config_item_put(item);
}
diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
index 319c3a6..bd9812e 100644
--- a/fs/pstore/ram.c
+++ b/fs/pstore/ram.c
@@ -55,8 +55,8 @@ static ulong ramoops_pmsg_size = MIN_MEM_SIZE;
module_param_named(pmsg_size, ramoops_pmsg_size, ulong, 0400);
MODULE_PARM_DESC(pmsg_size, "size of user space message log");
-static ulong mem_address;
-module_param(mem_address, ulong, 0400);
+static unsigned long long mem_address;
+module_param(mem_address, ullong, 0400);
MODULE_PARM_DESC(mem_address,
"start of reserved RAM used to store oops/panic logs");